1 | /* |
2 | * Copyright (c) 2000-2018 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * Copyright (c) 1982, 1986, 1991, 1993, 1995 |
30 | * The Regents of the University of California. All rights reserved. |
31 | * |
32 | * Redistribution and use in source and binary forms, with or without |
33 | * modification, are permitted provided that the following conditions |
34 | * are met: |
35 | * 1. Redistributions of source code must retain the above copyright |
36 | * notice, this list of conditions and the following disclaimer. |
37 | * 2. Redistributions in binary form must reproduce the above copyright |
38 | * notice, this list of conditions and the following disclaimer in the |
39 | * documentation and/or other materials provided with the distribution. |
40 | * 3. All advertising materials mentioning features or use of this software |
41 | * must display the following acknowledgement: |
42 | * This product includes software developed by the University of |
43 | * California, Berkeley and its contributors. |
44 | * 4. Neither the name of the University nor the names of its contributors |
45 | * may be used to endorse or promote products derived from this software |
46 | * without specific prior written permission. |
47 | * |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
58 | * SUCH DAMAGE. |
59 | * |
60 | * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 |
61 | * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $ |
62 | */ |
63 | |
64 | #include <sys/param.h> |
65 | #include <sys/systm.h> |
66 | #include <sys/malloc.h> |
67 | #include <sys/mbuf.h> |
68 | #include <sys/domain.h> |
69 | #include <sys/protosw.h> |
70 | #include <sys/socket.h> |
71 | #include <sys/socketvar.h> |
72 | #include <sys/proc.h> |
73 | #include <sys/kernel.h> |
74 | #include <sys/sysctl.h> |
75 | #include <sys/mcache.h> |
76 | #include <sys/kauth.h> |
77 | #include <sys/priv.h> |
78 | #include <sys/proc_uuid_policy.h> |
79 | #include <sys/syslog.h> |
80 | #include <sys/priv.h> |
81 | #include <net/dlil.h> |
82 | |
83 | #include <libkern/OSAtomic.h> |
84 | #include <kern/locks.h> |
85 | |
86 | #include <machine/limits.h> |
87 | |
88 | #include <kern/zalloc.h> |
89 | |
90 | #include <net/if.h> |
91 | #include <net/if_types.h> |
92 | #include <net/route.h> |
93 | #include <net/flowhash.h> |
94 | #include <net/flowadv.h> |
95 | #include <net/nat464_utils.h> |
96 | #include <net/ntstat.h> |
97 | |
98 | #include <netinet/in.h> |
99 | #include <netinet/in_pcb.h> |
100 | #include <netinet/in_var.h> |
101 | #include <netinet/ip_var.h> |
102 | #if INET6 |
103 | #include <netinet/ip6.h> |
104 | #include <netinet6/ip6_var.h> |
105 | #endif /* INET6 */ |
106 | |
107 | #include <sys/kdebug.h> |
108 | #include <sys/random.h> |
109 | |
110 | #include <dev/random/randomdev.h> |
111 | #include <mach/boolean.h> |
112 | |
113 | #include <pexpert/pexpert.h> |
114 | |
115 | #if NECP |
116 | #include <net/necp.h> |
117 | #endif |
118 | |
119 | #include <sys/stat.h> |
120 | #include <sys/ubc.h> |
121 | #include <sys/vnode.h> |
122 | |
123 | static lck_grp_t *inpcb_lock_grp; |
124 | static lck_attr_t *inpcb_lock_attr; |
125 | static lck_grp_attr_t *inpcb_lock_grp_attr; |
126 | decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ |
127 | decl_lck_mtx_data(static, inpcb_timeout_lock); |
128 | |
129 | static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); |
130 | |
131 | static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ |
132 | static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ |
133 | static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ |
134 | static boolean_t inpcb_fast_timer_on = FALSE; |
135 | |
136 | #define INPCB_GCREQ_THRESHOLD 50000 |
137 | |
138 | static thread_call_t inpcb_thread_call, inpcb_fast_thread_call; |
139 | static void inpcb_sched_timeout(void); |
140 | static void inpcb_sched_lazy_timeout(void); |
141 | static void _inpcb_sched_timeout(unsigned int); |
142 | static void inpcb_timeout(void *, void *); |
143 | const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ |
144 | extern int tvtohz(struct timeval *); |
145 | |
146 | #if CONFIG_PROC_UUID_POLICY |
147 | static void inp_update_cellular_policy(struct inpcb *, boolean_t); |
148 | #if NECP |
149 | static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t); |
150 | #endif /* NECP */ |
151 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
152 | |
153 | #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) |
154 | #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) |
155 | |
156 | /* |
157 | * These configure the range of local port addresses assigned to |
158 | * "unspecified" outgoing connections/packets/whatever. |
159 | */ |
160 | int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ |
161 | int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ |
162 | int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
163 | int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ |
164 | int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ |
165 | int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ |
166 | |
167 | #define RANGECHK(var, min, max) \ |
168 | if ((var) < (min)) { (var) = (min); } \ |
169 | else if ((var) > (max)) { (var) = (max); } |
170 | |
171 | static int |
172 | sysctl_net_ipport_check SYSCTL_HANDLER_ARGS |
173 | { |
174 | #pragma unused(arg1, arg2) |
175 | int error; |
176 | |
177 | error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); |
178 | if (!error) { |
179 | RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); |
180 | RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); |
181 | RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); |
182 | RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); |
183 | RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); |
184 | RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); |
185 | } |
186 | return (error); |
187 | } |
188 | |
189 | #undef RANGECHK |
190 | |
191 | SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, |
192 | CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports" ); |
193 | |
194 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, |
195 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, |
196 | &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I" , "" ); |
197 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, |
198 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, |
199 | &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I" , "" ); |
200 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, |
201 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, |
202 | &ipport_firstauto, 0, &sysctl_net_ipport_check, "I" , "" ); |
203 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, |
204 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, |
205 | &ipport_lastauto, 0, &sysctl_net_ipport_check, "I" , "" ); |
206 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, |
207 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, |
208 | &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I" , "" ); |
209 | SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, |
210 | CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, |
211 | &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I" , "" ); |
212 | |
213 | static uint32_t apn_fallbk_debug = 0; |
214 | #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0) |
215 | |
216 | #if CONFIG_EMBEDDED |
217 | static boolean_t apn_fallbk_enabled = TRUE; |
218 | |
219 | SYSCTL_DECL(_net_inet); |
220 | SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "APN Fallback" ); |
221 | SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, |
222 | &apn_fallbk_enabled, 0, "APN fallback enable" ); |
223 | SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, |
224 | &apn_fallbk_debug, 0, "APN fallback debug enable" ); |
225 | #else |
226 | static boolean_t apn_fallbk_enabled = FALSE; |
227 | #endif |
228 | |
229 | extern int udp_use_randomport; |
230 | extern int tcp_use_randomport; |
231 | |
232 | /* Structs used for flowhash computation */ |
233 | struct inp_flowhash_key_addr { |
234 | union { |
235 | struct in_addr v4; |
236 | struct in6_addr v6; |
237 | u_int8_t addr8[16]; |
238 | u_int16_t addr16[8]; |
239 | u_int32_t addr32[4]; |
240 | } infha; |
241 | }; |
242 | |
243 | struct inp_flowhash_key { |
244 | struct inp_flowhash_key_addr infh_laddr; |
245 | struct inp_flowhash_key_addr infh_faddr; |
246 | u_int32_t infh_lport; |
247 | u_int32_t infh_fport; |
248 | u_int32_t infh_af; |
249 | u_int32_t infh_proto; |
250 | u_int32_t infh_rand1; |
251 | u_int32_t infh_rand2; |
252 | }; |
253 | |
254 | static u_int32_t inp_hash_seed = 0; |
255 | |
256 | static int infc_cmp(const struct inpcb *, const struct inpcb *); |
257 | |
258 | /* Flags used by inp_fc_getinp */ |
259 | #define INPFC_SOLOCKED 0x1 |
260 | #define INPFC_REMOVE 0x2 |
261 | static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); |
262 | |
263 | static void inp_fc_feedback(struct inpcb *); |
264 | extern void tcp_remove_from_time_wait(struct inpcb *inp); |
265 | |
266 | decl_lck_mtx_data(static, inp_fc_lck); |
267 | |
268 | RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; |
269 | RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); |
270 | RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp); |
271 | |
272 | /* |
273 | * Use this inp as a key to find an inp in the flowhash tree. |
274 | * Accesses to it are protected by inp_fc_lck. |
275 | */ |
276 | struct inpcb key_inp; |
277 | |
278 | /* |
279 | * in_pcb.c: manage the Protocol Control Blocks. |
280 | */ |
281 | |
282 | void |
283 | in_pcbinit(void) |
284 | { |
285 | static int inpcb_initialized = 0; |
286 | |
287 | VERIFY(!inpcb_initialized); |
288 | inpcb_initialized = 1; |
289 | |
290 | inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); |
291 | inpcb_lock_grp = lck_grp_alloc_init("inpcb" , inpcb_lock_grp_attr); |
292 | inpcb_lock_attr = lck_attr_alloc_init(); |
293 | lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); |
294 | lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); |
295 | inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout, |
296 | NULL, THREAD_CALL_PRIORITY_KERNEL); |
297 | inpcb_fast_thread_call = thread_call_allocate_with_priority( |
298 | inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL); |
299 | if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) |
300 | panic("unable to alloc the inpcb thread call" ); |
301 | |
302 | /* |
303 | * Initialize data structures required to deliver |
304 | * flow advisories. |
305 | */ |
306 | lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); |
307 | lck_mtx_lock(&inp_fc_lck); |
308 | RB_INIT(&inp_fc_tree); |
309 | bzero(&key_inp, sizeof(key_inp)); |
310 | lck_mtx_unlock(&inp_fc_lck); |
311 | } |
312 | |
313 | #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ |
314 | ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) |
315 | static void |
316 | inpcb_timeout(void *arg0, void *arg1) |
317 | { |
318 | #pragma unused(arg0, arg1) |
319 | struct inpcbinfo *ipi; |
320 | boolean_t t, gc; |
321 | struct intimercount gccnt, tmcnt; |
322 | |
323 | /* |
324 | * Update coarse-grained networking timestamp (in sec.); the idea |
325 | * is to piggy-back on the timeout callout to update the counter |
326 | * returnable via net_uptime(). |
327 | */ |
328 | net_update_uptime(); |
329 | |
330 | bzero(&gccnt, sizeof(gccnt)); |
331 | bzero(&tmcnt, sizeof(tmcnt)); |
332 | |
333 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
334 | gc = inpcb_garbage_collecting; |
335 | inpcb_garbage_collecting = FALSE; |
336 | |
337 | t = inpcb_ticking; |
338 | inpcb_ticking = FALSE; |
339 | |
340 | if (gc || t) { |
341 | lck_mtx_unlock(&inpcb_timeout_lock); |
342 | |
343 | lck_mtx_lock(&inpcb_lock); |
344 | TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { |
345 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { |
346 | bzero(&ipi->ipi_gc_req, |
347 | sizeof(ipi->ipi_gc_req)); |
348 | if (gc && ipi->ipi_gc != NULL) { |
349 | ipi->ipi_gc(ipi); |
350 | gccnt.intimer_lazy += |
351 | ipi->ipi_gc_req.intimer_lazy; |
352 | gccnt.intimer_fast += |
353 | ipi->ipi_gc_req.intimer_fast; |
354 | gccnt.intimer_nodelay += |
355 | ipi->ipi_gc_req.intimer_nodelay; |
356 | } |
357 | } |
358 | if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { |
359 | bzero(&ipi->ipi_timer_req, |
360 | sizeof(ipi->ipi_timer_req)); |
361 | if (t && ipi->ipi_timer != NULL) { |
362 | ipi->ipi_timer(ipi); |
363 | tmcnt.intimer_lazy += |
364 | ipi->ipi_timer_req.intimer_lazy; |
365 | tmcnt.intimer_fast += |
366 | ipi->ipi_timer_req.intimer_fast; |
367 | tmcnt.intimer_nodelay += |
368 | ipi->ipi_timer_req.intimer_nodelay; |
369 | } |
370 | } |
371 | } |
372 | lck_mtx_unlock(&inpcb_lock); |
373 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
374 | } |
375 | |
376 | /* lock was dropped above, so check first before overriding */ |
377 | if (!inpcb_garbage_collecting) |
378 | inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); |
379 | if (!inpcb_ticking) |
380 | inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); |
381 | |
382 | /* re-arm the timer if there's work to do */ |
383 | inpcb_timeout_run--; |
384 | VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); |
385 | |
386 | if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) |
387 | inpcb_sched_timeout(); |
388 | else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) |
389 | /* be lazy when idle with little activity */ |
390 | inpcb_sched_lazy_timeout(); |
391 | else |
392 | inpcb_sched_timeout(); |
393 | |
394 | lck_mtx_unlock(&inpcb_timeout_lock); |
395 | } |
396 | |
397 | static void |
398 | inpcb_sched_timeout(void) |
399 | { |
400 | _inpcb_sched_timeout(0); |
401 | } |
402 | |
403 | static void |
404 | inpcb_sched_lazy_timeout(void) |
405 | { |
406 | _inpcb_sched_timeout(inpcb_timeout_lazy); |
407 | } |
408 | |
409 | static void |
410 | _inpcb_sched_timeout(unsigned int offset) |
411 | { |
412 | uint64_t deadline, leeway; |
413 | |
414 | clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline); |
415 | LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); |
416 | if (inpcb_timeout_run == 0 && |
417 | (inpcb_garbage_collecting || inpcb_ticking)) { |
418 | lck_mtx_convert_spin(&inpcb_timeout_lock); |
419 | inpcb_timeout_run++; |
420 | if (offset == 0) { |
421 | inpcb_fast_timer_on = TRUE; |
422 | thread_call_enter_delayed(inpcb_thread_call, |
423 | deadline); |
424 | } else { |
425 | inpcb_fast_timer_on = FALSE; |
426 | clock_interval_to_absolutetime_interval(offset, |
427 | NSEC_PER_SEC, &leeway); |
428 | thread_call_enter_delayed_with_leeway( |
429 | inpcb_thread_call, NULL, deadline, leeway, |
430 | THREAD_CALL_DELAY_LEEWAY); |
431 | } |
432 | } else if (inpcb_timeout_run == 1 && |
433 | offset == 0 && !inpcb_fast_timer_on) { |
434 | /* |
435 | * Since the request was for a fast timer but the |
436 | * scheduled timer is a lazy timer, try to schedule |
437 | * another instance of fast timer also. |
438 | */ |
439 | lck_mtx_convert_spin(&inpcb_timeout_lock); |
440 | inpcb_timeout_run++; |
441 | inpcb_fast_timer_on = TRUE; |
442 | thread_call_enter_delayed(inpcb_fast_thread_call, deadline); |
443 | } |
444 | } |
445 | |
446 | void |
447 | inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) |
448 | { |
449 | u_int32_t gccnt; |
450 | |
451 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
452 | inpcb_garbage_collecting = TRUE; |
453 | gccnt = ipi->ipi_gc_req.intimer_nodelay + |
454 | ipi->ipi_gc_req.intimer_fast; |
455 | |
456 | if (gccnt > INPCB_GCREQ_THRESHOLD) { |
457 | type = INPCB_TIMER_FAST; |
458 | } |
459 | |
460 | switch (type) { |
461 | case INPCB_TIMER_NODELAY: |
462 | atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); |
463 | inpcb_sched_timeout(); |
464 | break; |
465 | case INPCB_TIMER_FAST: |
466 | atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); |
467 | inpcb_sched_timeout(); |
468 | break; |
469 | default: |
470 | atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); |
471 | inpcb_sched_lazy_timeout(); |
472 | break; |
473 | } |
474 | lck_mtx_unlock(&inpcb_timeout_lock); |
475 | } |
476 | |
477 | void |
478 | inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) |
479 | { |
480 | |
481 | lck_mtx_lock_spin(&inpcb_timeout_lock); |
482 | inpcb_ticking = TRUE; |
483 | switch (type) { |
484 | case INPCB_TIMER_NODELAY: |
485 | atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); |
486 | inpcb_sched_timeout(); |
487 | break; |
488 | case INPCB_TIMER_FAST: |
489 | atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); |
490 | inpcb_sched_timeout(); |
491 | break; |
492 | default: |
493 | atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); |
494 | inpcb_sched_lazy_timeout(); |
495 | break; |
496 | } |
497 | lck_mtx_unlock(&inpcb_timeout_lock); |
498 | } |
499 | |
500 | void |
501 | in_pcbinfo_attach(struct inpcbinfo *ipi) |
502 | { |
503 | struct inpcbinfo *ipi0; |
504 | |
505 | lck_mtx_lock(&inpcb_lock); |
506 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { |
507 | if (ipi0 == ipi) { |
508 | panic("%s: ipi %p already in the list\n" , |
509 | __func__, ipi); |
510 | /* NOTREACHED */ |
511 | } |
512 | } |
513 | TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); |
514 | lck_mtx_unlock(&inpcb_lock); |
515 | } |
516 | |
517 | int |
518 | in_pcbinfo_detach(struct inpcbinfo *ipi) |
519 | { |
520 | struct inpcbinfo *ipi0; |
521 | int error = 0; |
522 | |
523 | lck_mtx_lock(&inpcb_lock); |
524 | TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { |
525 | if (ipi0 == ipi) |
526 | break; |
527 | } |
528 | if (ipi0 != NULL) |
529 | TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); |
530 | else |
531 | error = ENXIO; |
532 | lck_mtx_unlock(&inpcb_lock); |
533 | |
534 | return (error); |
535 | } |
536 | |
537 | /* |
538 | * Allocate a PCB and associate it with the socket. |
539 | * |
540 | * Returns: 0 Success |
541 | * ENOBUFS |
542 | * ENOMEM |
543 | */ |
544 | int |
545 | in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) |
546 | { |
547 | #pragma unused(p) |
548 | struct inpcb *inp; |
549 | caddr_t temp; |
550 | #if CONFIG_MACF_NET |
551 | int mac_error; |
552 | #endif /* CONFIG_MACF_NET */ |
553 | |
554 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { |
555 | inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); |
556 | if (inp == NULL) |
557 | return (ENOBUFS); |
558 | bzero((caddr_t)inp, sizeof (*inp)); |
559 | } else { |
560 | inp = (struct inpcb *)(void *)so->so_saved_pcb; |
561 | temp = inp->inp_saved_ppcb; |
562 | bzero((caddr_t)inp, sizeof (*inp)); |
563 | inp->inp_saved_ppcb = temp; |
564 | } |
565 | |
566 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; |
567 | inp->inp_pcbinfo = pcbinfo; |
568 | inp->inp_socket = so; |
569 | #if CONFIG_MACF_NET |
570 | mac_error = mac_inpcb_label_init(inp, M_WAITOK); |
571 | if (mac_error != 0) { |
572 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) |
573 | zfree(pcbinfo->ipi_zone, inp); |
574 | return (mac_error); |
575 | } |
576 | mac_inpcb_label_associate(so, inp); |
577 | #endif /* CONFIG_MACF_NET */ |
578 | /* make sure inp_stat is always 64-bit aligned */ |
579 | inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, |
580 | sizeof (u_int64_t)); |
581 | if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + |
582 | sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { |
583 | panic("%s: insufficient space to align inp_stat" , __func__); |
584 | /* NOTREACHED */ |
585 | } |
586 | |
587 | /* make sure inp_cstat is always 64-bit aligned */ |
588 | inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, |
589 | sizeof (u_int64_t)); |
590 | if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + |
591 | sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { |
592 | panic("%s: insufficient space to align inp_cstat" , __func__); |
593 | /* NOTREACHED */ |
594 | } |
595 | |
596 | /* make sure inp_wstat is always 64-bit aligned */ |
597 | inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, |
598 | sizeof (u_int64_t)); |
599 | if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + |
600 | sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { |
601 | panic("%s: insufficient space to align inp_wstat" , __func__); |
602 | /* NOTREACHED */ |
603 | } |
604 | |
605 | /* make sure inp_Wstat is always 64-bit aligned */ |
606 | inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store, |
607 | sizeof (u_int64_t)); |
608 | if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) + |
609 | sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) { |
610 | panic("%s: insufficient space to align inp_Wstat" , __func__); |
611 | /* NOTREACHED */ |
612 | } |
613 | |
614 | so->so_pcb = (caddr_t)inp; |
615 | |
616 | if (so->so_proto->pr_flags & PR_PCBLOCK) { |
617 | lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, |
618 | pcbinfo->ipi_lock_attr); |
619 | } |
620 | |
621 | #if INET6 |
622 | if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) |
623 | inp->inp_flags |= IN6P_IPV6_V6ONLY; |
624 | |
625 | if (ip6_auto_flowlabel) |
626 | inp->inp_flags |= IN6P_AUTOFLOWLABEL; |
627 | #endif /* INET6 */ |
628 | if (intcoproc_unrestricted) |
629 | inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED; |
630 | |
631 | (void) inp_update_policy(inp); |
632 | |
633 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); |
634 | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; |
635 | LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); |
636 | pcbinfo->ipi_count++; |
637 | lck_rw_done(pcbinfo->ipi_lock); |
638 | return (0); |
639 | } |
640 | |
641 | /* |
642 | * in_pcblookup_local_and_cleanup does everything |
643 | * in_pcblookup_local does but it checks for a socket |
644 | * that's going away. Since we know that the lock is |
645 | * held read+write when this funciton is called, we |
646 | * can safely dispose of this socket like the slow |
647 | * timer would usually do and return NULL. This is |
648 | * great for bind. |
649 | */ |
650 | struct inpcb * |
651 | in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, |
652 | u_int lport_arg, int wild_okay) |
653 | { |
654 | struct inpcb *inp; |
655 | |
656 | /* Perform normal lookup */ |
657 | inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); |
658 | |
659 | /* Check if we found a match but it's waiting to be disposed */ |
660 | if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { |
661 | struct socket *so = inp->inp_socket; |
662 | |
663 | socket_lock(so, 0); |
664 | |
665 | if (so->so_usecount == 0) { |
666 | if (inp->inp_state != INPCB_STATE_DEAD) |
667 | in_pcbdetach(inp); |
668 | in_pcbdispose(inp); /* will unlock & destroy */ |
669 | inp = NULL; |
670 | } else { |
671 | socket_unlock(so, 0); |
672 | } |
673 | } |
674 | |
675 | return (inp); |
676 | } |
677 | |
678 | static void |
679 | in_pcb_conflict_post_msg(u_int16_t port) |
680 | { |
681 | /* |
682 | * Radar 5523020 send a kernel event notification if a |
683 | * non-participating socket tries to bind the port a socket |
684 | * who has set SOF_NOTIFYCONFLICT owns. |
685 | */ |
686 | struct kev_msg ev_msg; |
687 | struct kev_in_portinuse in_portinuse; |
688 | |
689 | bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); |
690 | bzero(&ev_msg, sizeof (struct kev_msg)); |
691 | in_portinuse.port = ntohs(port); /* port in host order */ |
692 | in_portinuse.req_pid = proc_selfpid(); |
693 | ev_msg.vendor_code = KEV_VENDOR_APPLE; |
694 | ev_msg.kev_class = KEV_NETWORK_CLASS; |
695 | ev_msg.kev_subclass = KEV_INET_SUBCLASS; |
696 | ev_msg.event_code = KEV_INET_PORTINUSE; |
697 | ev_msg.dv[0].data_ptr = &in_portinuse; |
698 | ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); |
699 | ev_msg.dv[1].data_length = 0; |
700 | dlil_post_complete_msg(NULL, &ev_msg); |
701 | } |
702 | |
703 | /* |
704 | * Bind an INPCB to an address and/or port. This routine should not alter |
705 | * the caller-supplied local address "nam". |
706 | * |
707 | * Returns: 0 Success |
708 | * EADDRNOTAVAIL Address not available. |
709 | * EINVAL Invalid argument |
710 | * EAFNOSUPPORT Address family not supported [notdef] |
711 | * EACCES Permission denied |
712 | * EADDRINUSE Address in use |
713 | * EAGAIN Resource unavailable, try again |
714 | * priv_check_cred:EPERM Operation not permitted |
715 | */ |
716 | int |
717 | in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) |
718 | { |
719 | struct socket *so = inp->inp_socket; |
720 | unsigned short *lastport; |
721 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; |
722 | u_short lport = 0, rand_port = 0; |
723 | int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); |
724 | int error, randomport, conflict = 0; |
725 | boolean_t anonport = FALSE; |
726 | kauth_cred_t cred; |
727 | struct in_addr laddr; |
728 | struct ifnet *outif = NULL; |
729 | |
730 | if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ |
731 | return (EADDRNOTAVAIL); |
732 | if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) |
733 | wild = 1; |
734 | |
735 | bzero(&laddr, sizeof(laddr)); |
736 | |
737 | socket_unlock(so, 0); /* keep reference on socket */ |
738 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); |
739 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { |
740 | /* another thread completed the bind */ |
741 | lck_rw_done(pcbinfo->ipi_lock); |
742 | socket_lock(so, 0); |
743 | return (EINVAL); |
744 | } |
745 | |
746 | if (nam != NULL) { |
747 | if (nam->sa_len != sizeof (struct sockaddr_in)) { |
748 | lck_rw_done(pcbinfo->ipi_lock); |
749 | socket_lock(so, 0); |
750 | return (EINVAL); |
751 | } |
752 | #if 0 |
753 | /* |
754 | * We should check the family, but old programs |
755 | * incorrectly fail to initialize it. |
756 | */ |
757 | if (nam->sa_family != AF_INET) { |
758 | lck_rw_done(pcbinfo->ipi_lock); |
759 | socket_lock(so, 0); |
760 | return (EAFNOSUPPORT); |
761 | } |
762 | #endif /* 0 */ |
763 | lport = SIN(nam)->sin_port; |
764 | |
765 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { |
766 | /* |
767 | * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; |
768 | * allow complete duplication of binding if |
769 | * SO_REUSEPORT is set, or if SO_REUSEADDR is set |
770 | * and a multicast address is bound on both |
771 | * new and duplicated sockets. |
772 | */ |
773 | if (so->so_options & SO_REUSEADDR) |
774 | reuseport = SO_REUSEADDR|SO_REUSEPORT; |
775 | } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { |
776 | struct sockaddr_in sin; |
777 | struct ifaddr *ifa; |
778 | |
779 | /* Sanitized for interface address searches */ |
780 | bzero(&sin, sizeof (sin)); |
781 | sin.sin_family = AF_INET; |
782 | sin.sin_len = sizeof (struct sockaddr_in); |
783 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; |
784 | |
785 | ifa = ifa_ifwithaddr(SA(&sin)); |
786 | if (ifa == NULL) { |
787 | lck_rw_done(pcbinfo->ipi_lock); |
788 | socket_lock(so, 0); |
789 | return (EADDRNOTAVAIL); |
790 | } else { |
791 | /* |
792 | * Opportunistically determine the outbound |
793 | * interface that may be used; this may not |
794 | * hold true if we end up using a route |
795 | * going over a different interface, e.g. |
796 | * when sending to a local address. This |
797 | * will get updated again after sending. |
798 | */ |
799 | IFA_LOCK(ifa); |
800 | outif = ifa->ifa_ifp; |
801 | IFA_UNLOCK(ifa); |
802 | IFA_REMREF(ifa); |
803 | } |
804 | } |
805 | if (lport != 0) { |
806 | struct inpcb *t; |
807 | uid_t u; |
808 | |
809 | #if !CONFIG_EMBEDDED |
810 | if (ntohs(lport) < IPPORT_RESERVED && |
811 | SIN(nam)->sin_addr.s_addr != 0) { |
812 | cred = kauth_cred_proc_ref(p); |
813 | error = priv_check_cred(cred, |
814 | PRIV_NETINET_RESERVEDPORT, 0); |
815 | kauth_cred_unref(&cred); |
816 | if (error != 0) { |
817 | lck_rw_done(pcbinfo->ipi_lock); |
818 | socket_lock(so, 0); |
819 | return (EACCES); |
820 | } |
821 | } |
822 | #endif /* !CONFIG_EMBEDDED */ |
823 | if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
824 | (u = kauth_cred_getuid(so->so_cred)) != 0 && |
825 | (t = in_pcblookup_local_and_cleanup( |
826 | inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, |
827 | INPLOOKUP_WILDCARD)) != NULL && |
828 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || |
829 | t->inp_laddr.s_addr != INADDR_ANY || |
830 | !(t->inp_socket->so_options & SO_REUSEPORT)) && |
831 | (u != kauth_cred_getuid(t->inp_socket->so_cred)) && |
832 | !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && |
833 | (SIN(nam)->sin_addr.s_addr != INADDR_ANY || |
834 | t->inp_laddr.s_addr != INADDR_ANY)) { |
835 | if ((t->inp_socket->so_flags & |
836 | SOF_NOTIFYCONFLICT) && |
837 | !(so->so_flags & SOF_NOTIFYCONFLICT)) |
838 | conflict = 1; |
839 | |
840 | lck_rw_done(pcbinfo->ipi_lock); |
841 | |
842 | if (conflict) |
843 | in_pcb_conflict_post_msg(lport); |
844 | |
845 | socket_lock(so, 0); |
846 | return (EADDRINUSE); |
847 | } |
848 | t = in_pcblookup_local_and_cleanup(pcbinfo, |
849 | SIN(nam)->sin_addr, lport, wild); |
850 | if (t != NULL && |
851 | (reuseport & t->inp_socket->so_options) == 0) { |
852 | #if INET6 |
853 | if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || |
854 | t->inp_laddr.s_addr != INADDR_ANY || |
855 | SOCK_DOM(so) != PF_INET6 || |
856 | SOCK_DOM(t->inp_socket) != PF_INET6) |
857 | #endif /* INET6 */ |
858 | { |
859 | |
860 | if ((t->inp_socket->so_flags & |
861 | SOF_NOTIFYCONFLICT) && |
862 | !(so->so_flags & SOF_NOTIFYCONFLICT)) |
863 | conflict = 1; |
864 | |
865 | lck_rw_done(pcbinfo->ipi_lock); |
866 | |
867 | if (conflict) |
868 | in_pcb_conflict_post_msg(lport); |
869 | socket_lock(so, 0); |
870 | return (EADDRINUSE); |
871 | } |
872 | } |
873 | } |
874 | laddr = SIN(nam)->sin_addr; |
875 | } |
876 | if (lport == 0) { |
877 | u_short first, last; |
878 | int count; |
879 | bool found; |
880 | |
881 | randomport = (so->so_flags & SOF_BINDRANDOMPORT) || |
882 | (so->so_type == SOCK_STREAM ? tcp_use_randomport : |
883 | udp_use_randomport); |
884 | |
885 | /* |
886 | * Even though this looks similar to the code in |
887 | * in6_pcbsetport, the v6 vs v4 checks are different. |
888 | */ |
889 | anonport = TRUE; |
890 | if (inp->inp_flags & INP_HIGHPORT) { |
891 | first = ipport_hifirstauto; /* sysctl */ |
892 | last = ipport_hilastauto; |
893 | lastport = &pcbinfo->ipi_lasthi; |
894 | } else if (inp->inp_flags & INP_LOWPORT) { |
895 | cred = kauth_cred_proc_ref(p); |
896 | error = priv_check_cred(cred, |
897 | PRIV_NETINET_RESERVEDPORT, 0); |
898 | kauth_cred_unref(&cred); |
899 | if (error != 0) { |
900 | lck_rw_done(pcbinfo->ipi_lock); |
901 | socket_lock(so, 0); |
902 | return (error); |
903 | } |
904 | first = ipport_lowfirstauto; /* 1023 */ |
905 | last = ipport_lowlastauto; /* 600 */ |
906 | lastport = &pcbinfo->ipi_lastlow; |
907 | } else { |
908 | first = ipport_firstauto; /* sysctl */ |
909 | last = ipport_lastauto; |
910 | lastport = &pcbinfo->ipi_lastport; |
911 | } |
912 | /* No point in randomizing if only one port is available */ |
913 | |
914 | if (first == last) |
915 | randomport = 0; |
916 | /* |
917 | * Simple check to ensure all ports are not used up causing |
918 | * a deadlock here. |
919 | * |
920 | * We split the two cases (up and down) so that the direction |
921 | * is not being tested on each round of the loop. |
922 | */ |
923 | if (first > last) { |
924 | struct in_addr lookup_addr; |
925 | |
926 | /* |
927 | * counting down |
928 | */ |
929 | if (randomport) { |
930 | read_frandom(&rand_port, sizeof (rand_port)); |
931 | *lastport = |
932 | first - (rand_port % (first - last)); |
933 | } |
934 | count = first - last; |
935 | |
936 | lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr : |
937 | inp->inp_laddr; |
938 | |
939 | found = false; |
940 | do { |
941 | if (count-- < 0) { /* completely used? */ |
942 | lck_rw_done(pcbinfo->ipi_lock); |
943 | socket_lock(so, 0); |
944 | return (EADDRNOTAVAIL); |
945 | } |
946 | --*lastport; |
947 | if (*lastport > first || *lastport < last) |
948 | *lastport = first; |
949 | lport = htons(*lastport); |
950 | |
951 | found = in_pcblookup_local_and_cleanup(pcbinfo, |
952 | lookup_addr, lport, wild) == NULL; |
953 | } while (!found); |
954 | } else { |
955 | struct in_addr lookup_addr; |
956 | |
957 | /* |
958 | * counting up |
959 | */ |
960 | if (randomport) { |
961 | read_frandom(&rand_port, sizeof (rand_port)); |
962 | *lastport = |
963 | first + (rand_port % (first - last)); |
964 | } |
965 | count = last - first; |
966 | |
967 | lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr : |
968 | inp->inp_laddr; |
969 | |
970 | found = false; |
971 | do { |
972 | if (count-- < 0) { /* completely used? */ |
973 | lck_rw_done(pcbinfo->ipi_lock); |
974 | socket_lock(so, 0); |
975 | return (EADDRNOTAVAIL); |
976 | } |
977 | ++*lastport; |
978 | if (*lastport < first || *lastport > last) |
979 | *lastport = first; |
980 | lport = htons(*lastport); |
981 | |
982 | found = in_pcblookup_local_and_cleanup(pcbinfo, |
983 | lookup_addr, lport, wild) == NULL; |
984 | } while (!found); |
985 | } |
986 | } |
987 | socket_lock(so, 0); |
988 | |
989 | /* |
990 | * We unlocked socket's protocol lock for a long time. |
991 | * The socket might have been dropped/defuncted. |
992 | * Checking if world has changed since. |
993 | */ |
994 | if (inp->inp_state == INPCB_STATE_DEAD) { |
995 | lck_rw_done(pcbinfo->ipi_lock); |
996 | return (ECONNABORTED); |
997 | } |
998 | |
999 | if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { |
1000 | lck_rw_done(pcbinfo->ipi_lock); |
1001 | return (EINVAL); |
1002 | } |
1003 | |
1004 | if (laddr.s_addr != INADDR_ANY) { |
1005 | inp->inp_laddr = laddr; |
1006 | inp->inp_last_outifp = outif; |
1007 | } |
1008 | inp->inp_lport = lport; |
1009 | if (anonport) |
1010 | inp->inp_flags |= INP_ANONPORT; |
1011 | |
1012 | if (in_pcbinshash(inp, 1) != 0) { |
1013 | inp->inp_laddr.s_addr = INADDR_ANY; |
1014 | inp->inp_last_outifp = NULL; |
1015 | |
1016 | inp->inp_lport = 0; |
1017 | if (anonport) |
1018 | inp->inp_flags &= ~INP_ANONPORT; |
1019 | lck_rw_done(pcbinfo->ipi_lock); |
1020 | return (EAGAIN); |
1021 | } |
1022 | lck_rw_done(pcbinfo->ipi_lock); |
1023 | sflt_notify(so, sock_evt_bound, NULL); |
1024 | return (0); |
1025 | } |
1026 | |
1027 | #define APN_FALLBACK_IP_FILTER(a) \ |
1028 | (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \ |
1029 | IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \ |
1030 | IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \ |
1031 | IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \ |
1032 | IN_PRIVATE(ntohl((a)->sin_addr.s_addr))) |
1033 | |
1034 | #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */ |
1035 | static uint64_t last_apn_fallback = 0; |
1036 | |
1037 | static boolean_t |
1038 | apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4) |
1039 | { |
1040 | uint64_t timenow; |
1041 | struct sockaddr_storage lookup_default_addr; |
1042 | struct rtentry *rt = NULL; |
1043 | |
1044 | VERIFY(proc != NULL); |
1045 | |
1046 | if (apn_fallbk_enabled == FALSE) |
1047 | return FALSE; |
1048 | |
1049 | if (proc == kernproc) |
1050 | return FALSE; |
1051 | |
1052 | if (so && (so->so_options & SO_NOAPNFALLBK)) |
1053 | return FALSE; |
1054 | |
1055 | timenow = net_uptime(); |
1056 | if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) { |
1057 | apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n" )); |
1058 | return FALSE; |
1059 | } |
1060 | |
1061 | if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) |
1062 | return FALSE; |
1063 | |
1064 | /* Check if we have unscoped IPv6 default route through cellular */ |
1065 | bzero(&lookup_default_addr, sizeof(lookup_default_addr)); |
1066 | lookup_default_addr.ss_family = AF_INET6; |
1067 | lookup_default_addr.ss_len = sizeof(struct sockaddr_in6); |
1068 | |
1069 | rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0); |
1070 | if (NULL == rt) { |
1071 | apn_fallbk_log((LOG_INFO, "APN fallback notification could not find " |
1072 | "unscoped default IPv6 route.\n" )); |
1073 | return FALSE; |
1074 | } |
1075 | |
1076 | if (!IFNET_IS_CELLULAR(rt->rt_ifp)) { |
1077 | rtfree(rt); |
1078 | apn_fallbk_log((LOG_INFO, "APN fallback notification could not find " |
1079 | "unscoped default IPv6 route through cellular interface.\n" )); |
1080 | return FALSE; |
1081 | } |
1082 | |
1083 | /* |
1084 | * We have a default IPv6 route, ensure that |
1085 | * we do not have IPv4 default route before triggering |
1086 | * the event |
1087 | */ |
1088 | rtfree(rt); |
1089 | rt = NULL; |
1090 | |
1091 | bzero(&lookup_default_addr, sizeof(lookup_default_addr)); |
1092 | lookup_default_addr.ss_family = AF_INET; |
1093 | lookup_default_addr.ss_len = sizeof(struct sockaddr_in); |
1094 | |
1095 | rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0); |
1096 | |
1097 | if (rt) { |
1098 | rtfree(rt); |
1099 | rt = NULL; |
1100 | apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped " |
1101 | "IPv4 default route!\n" )); |
1102 | return FALSE; |
1103 | } |
1104 | |
1105 | { |
1106 | /* |
1107 | * We disable APN fallback if the binary is not a third-party app. |
1108 | * Note that platform daemons use their process name as a |
1109 | * bundle ID so we filter out bundle IDs without dots. |
1110 | */ |
1111 | const char *bundle_id = cs_identity_get(proc); |
1112 | if (bundle_id == NULL || |
1113 | bundle_id[0] == '\0' || |
1114 | strchr(bundle_id, '.') == NULL || |
1115 | strncmp(bundle_id, "com.apple." , sizeof("com.apple." ) - 1) == 0) { |
1116 | apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-" |
1117 | "party bundle ID \"%s\"!\n" , (bundle_id ? bundle_id : "NULL" ))); |
1118 | return FALSE; |
1119 | } |
1120 | } |
1121 | |
1122 | { |
1123 | /* |
1124 | * The Apple App Store IPv6 requirement started on |
1125 | * June 1st, 2016 at 12:00:00 AM PDT. |
1126 | * We disable APN fallback if the binary is more recent than that. |
1127 | * We check both atime and birthtime since birthtime is not always supported. |
1128 | */ |
1129 | static const long ipv6_start_date = 1464764400L; |
1130 | vfs_context_t context; |
1131 | struct stat64 sb; |
1132 | int vn_stat_error; |
1133 | |
1134 | bzero(&sb, sizeof(struct stat64)); |
1135 | context = vfs_context_create(NULL); |
1136 | vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context); |
1137 | (void)vfs_context_rele(context); |
1138 | |
1139 | if (vn_stat_error != 0 || |
1140 | sb.st_atimespec.tv_sec >= ipv6_start_date || |
1141 | sb.st_birthtimespec.tv_sec >= ipv6_start_date) { |
1142 | apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary " |
1143 | "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n" , |
1144 | vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec, |
1145 | sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec)); |
1146 | return FALSE; |
1147 | } |
1148 | } |
1149 | return TRUE; |
1150 | } |
1151 | |
1152 | static void |
1153 | apn_fallback_trigger(proc_t proc, struct socket *so) |
1154 | { |
1155 | pid_t pid = 0; |
1156 | struct kev_msg ev_msg; |
1157 | struct kev_netevent_apnfallbk_data apnfallbk_data; |
1158 | |
1159 | last_apn_fallback = net_uptime(); |
1160 | pid = proc_pid(proc); |
1161 | uuid_t application_uuid; |
1162 | uuid_clear(application_uuid); |
1163 | proc_getexecutableuuid(proc, application_uuid, |
1164 | sizeof(application_uuid)); |
1165 | |
1166 | bzero(&ev_msg, sizeof (struct kev_msg)); |
1167 | ev_msg.vendor_code = KEV_VENDOR_APPLE; |
1168 | ev_msg.kev_class = KEV_NETWORK_CLASS; |
1169 | ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS; |
1170 | ev_msg.event_code = KEV_NETEVENT_APNFALLBACK; |
1171 | |
1172 | bzero(&apnfallbk_data, sizeof(apnfallbk_data)); |
1173 | |
1174 | if (so->so_flags & SOF_DELEGATED) { |
1175 | apnfallbk_data.epid = so->e_pid; |
1176 | uuid_copy(apnfallbk_data.euuid, so->e_uuid); |
1177 | } else { |
1178 | apnfallbk_data.epid = so->last_pid; |
1179 | uuid_copy(apnfallbk_data.euuid, so->last_uuid); |
1180 | } |
1181 | |
1182 | ev_msg.dv[0].data_ptr = &apnfallbk_data; |
1183 | ev_msg.dv[0].data_length = sizeof(apnfallbk_data); |
1184 | kev_post_msg(&ev_msg); |
1185 | apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n" )); |
1186 | } |
1187 | |
1188 | /* |
1189 | * Transform old in_pcbconnect() into an inner subroutine for new |
1190 | * in_pcbconnect(); do some validity-checking on the remote address |
1191 | * (in "nam") and then determine local host address (i.e., which |
1192 | * interface) to use to access that remote host. |
1193 | * |
1194 | * This routine may alter the caller-supplied remote address "nam". |
1195 | * |
1196 | * The caller may override the bound-to-interface setting of the socket |
1197 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) |
1198 | * |
1199 | * This routine might return an ifp with a reference held if the caller |
1200 | * provides a non-NULL outif, even in the error case. The caller is |
1201 | * responsible for releasing its reference. |
1202 | * |
1203 | * Returns: 0 Success |
1204 | * EINVAL Invalid argument |
1205 | * EAFNOSUPPORT Address family not supported |
1206 | * EADDRNOTAVAIL Address not available |
1207 | */ |
1208 | int |
1209 | in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, |
1210 | unsigned int ifscope, struct ifnet **outif, int raw) |
1211 | { |
1212 | struct route *ro = &inp->inp_route; |
1213 | struct in_ifaddr *ia = NULL; |
1214 | struct sockaddr_in sin; |
1215 | int error = 0; |
1216 | boolean_t restricted = FALSE; |
1217 | |
1218 | if (outif != NULL) |
1219 | *outif = NULL; |
1220 | if (nam->sa_len != sizeof (struct sockaddr_in)) |
1221 | return (EINVAL); |
1222 | if (SIN(nam)->sin_family != AF_INET) |
1223 | return (EAFNOSUPPORT); |
1224 | if (raw == 0 && SIN(nam)->sin_port == 0) |
1225 | return (EADDRNOTAVAIL); |
1226 | |
1227 | /* |
1228 | * If the destination address is INADDR_ANY, |
1229 | * use the primary local address. |
1230 | * If the supplied address is INADDR_BROADCAST, |
1231 | * and the primary interface supports broadcast, |
1232 | * choose the broadcast address for that interface. |
1233 | */ |
1234 | if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY || |
1235 | SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) { |
1236 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1237 | if (!TAILQ_EMPTY(&in_ifaddrhead)) { |
1238 | ia = TAILQ_FIRST(&in_ifaddrhead); |
1239 | IFA_LOCK_SPIN(&ia->ia_ifa); |
1240 | if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { |
1241 | SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; |
1242 | } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { |
1243 | SIN(nam)->sin_addr = |
1244 | SIN(&ia->ia_broadaddr)->sin_addr; |
1245 | } |
1246 | IFA_UNLOCK(&ia->ia_ifa); |
1247 | ia = NULL; |
1248 | } |
1249 | lck_rw_done(in_ifaddr_rwlock); |
1250 | } |
1251 | /* |
1252 | * Otherwise, if the socket has already bound the source, just use it. |
1253 | */ |
1254 | if (inp->inp_laddr.s_addr != INADDR_ANY) { |
1255 | VERIFY(ia == NULL); |
1256 | *laddr = inp->inp_laddr; |
1257 | return (0); |
1258 | } |
1259 | |
1260 | /* |
1261 | * If the ifscope is specified by the caller (e.g. IP_PKTINFO) |
1262 | * then it overrides the sticky ifscope set for the socket. |
1263 | */ |
1264 | if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) |
1265 | ifscope = inp->inp_boundifp->if_index; |
1266 | |
1267 | /* |
1268 | * If route is known or can be allocated now, |
1269 | * our src addr is taken from the i/f, else punt. |
1270 | * Note that we should check the address family of the cached |
1271 | * destination, in case of sharing the cache with IPv6. |
1272 | */ |
1273 | if (ro->ro_rt != NULL) |
1274 | RT_LOCK_SPIN(ro->ro_rt); |
1275 | if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || |
1276 | SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || |
1277 | (inp->inp_socket->so_options & SO_DONTROUTE)) { |
1278 | if (ro->ro_rt != NULL) |
1279 | RT_UNLOCK(ro->ro_rt); |
1280 | ROUTE_RELEASE(ro); |
1281 | } |
1282 | if (!(inp->inp_socket->so_options & SO_DONTROUTE) && |
1283 | (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { |
1284 | if (ro->ro_rt != NULL) |
1285 | RT_UNLOCK(ro->ro_rt); |
1286 | ROUTE_RELEASE(ro); |
1287 | /* No route yet, so try to acquire one */ |
1288 | bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); |
1289 | ro->ro_dst.sa_family = AF_INET; |
1290 | ro->ro_dst.sa_len = sizeof (struct sockaddr_in); |
1291 | SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; |
1292 | rtalloc_scoped(ro, ifscope); |
1293 | if (ro->ro_rt != NULL) |
1294 | RT_LOCK_SPIN(ro->ro_rt); |
1295 | } |
1296 | /* Sanitized local copy for interface address searches */ |
1297 | bzero(&sin, sizeof (sin)); |
1298 | sin.sin_family = AF_INET; |
1299 | sin.sin_len = sizeof (struct sockaddr_in); |
1300 | sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; |
1301 | /* |
1302 | * If we did not find (or use) a route, assume dest is reachable |
1303 | * on a directly connected network and try to find a corresponding |
1304 | * interface to take the source address from. |
1305 | */ |
1306 | if (ro->ro_rt == NULL) { |
1307 | proc_t proc = current_proc(); |
1308 | |
1309 | VERIFY(ia == NULL); |
1310 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); |
1311 | if (ia == NULL) |
1312 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); |
1313 | error = ((ia == NULL) ? ENETUNREACH : 0); |
1314 | |
1315 | if (apn_fallback_required(proc, inp->inp_socket, |
1316 | (void *)nam)) |
1317 | apn_fallback_trigger(proc, inp->inp_socket); |
1318 | |
1319 | goto done; |
1320 | } |
1321 | RT_LOCK_ASSERT_HELD(ro->ro_rt); |
1322 | /* |
1323 | * If the outgoing interface on the route found is not |
1324 | * a loopback interface, use the address from that interface. |
1325 | */ |
1326 | if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { |
1327 | VERIFY(ia == NULL); |
1328 | /* |
1329 | * If the route points to a cellular interface and the |
1330 | * caller forbids our using interfaces of such type, |
1331 | * pretend that there is no route. |
1332 | * Apply the same logic for expensive interfaces. |
1333 | */ |
1334 | if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) { |
1335 | RT_UNLOCK(ro->ro_rt); |
1336 | ROUTE_RELEASE(ro); |
1337 | error = EHOSTUNREACH; |
1338 | restricted = TRUE; |
1339 | } else { |
1340 | /* Become a regular mutex */ |
1341 | RT_CONVERT_LOCK(ro->ro_rt); |
1342 | ia = ifatoia(ro->ro_rt->rt_ifa); |
1343 | IFA_ADDREF(&ia->ia_ifa); |
1344 | |
1345 | /* |
1346 | * Mark the control block for notification of |
1347 | * a possible flow that might undergo clat46 |
1348 | * translation. |
1349 | * |
1350 | * We defer the decision to a later point when |
1351 | * inpcb is being disposed off. |
1352 | * The reason is that we only want to send notification |
1353 | * if the flow was ever used to send data. |
1354 | */ |
1355 | if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) |
1356 | inp->inp_flags2 |= INP2_CLAT46_FLOW; |
1357 | |
1358 | RT_UNLOCK(ro->ro_rt); |
1359 | error = 0; |
1360 | } |
1361 | goto done; |
1362 | } |
1363 | VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); |
1364 | RT_UNLOCK(ro->ro_rt); |
1365 | /* |
1366 | * The outgoing interface is marked with 'loopback net', so a route |
1367 | * to ourselves is here. |
1368 | * Try to find the interface of the destination address and then |
1369 | * take the address from there. That interface is not necessarily |
1370 | * a loopback interface. |
1371 | */ |
1372 | VERIFY(ia == NULL); |
1373 | ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); |
1374 | if (ia == NULL) |
1375 | ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); |
1376 | if (ia == NULL) |
1377 | ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); |
1378 | if (ia == NULL) { |
1379 | RT_LOCK(ro->ro_rt); |
1380 | ia = ifatoia(ro->ro_rt->rt_ifa); |
1381 | if (ia != NULL) |
1382 | IFA_ADDREF(&ia->ia_ifa); |
1383 | RT_UNLOCK(ro->ro_rt); |
1384 | } |
1385 | error = ((ia == NULL) ? ENETUNREACH : 0); |
1386 | |
1387 | done: |
1388 | /* |
1389 | * If the destination address is multicast and an outgoing |
1390 | * interface has been set as a multicast option, use the |
1391 | * address of that interface as our source address. |
1392 | */ |
1393 | if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && |
1394 | inp->inp_moptions != NULL) { |
1395 | struct ip_moptions *imo; |
1396 | struct ifnet *ifp; |
1397 | |
1398 | imo = inp->inp_moptions; |
1399 | IMO_LOCK(imo); |
1400 | if (imo->imo_multicast_ifp != NULL && (ia == NULL || |
1401 | ia->ia_ifp != imo->imo_multicast_ifp)) { |
1402 | ifp = imo->imo_multicast_ifp; |
1403 | if (ia != NULL) |
1404 | IFA_REMREF(&ia->ia_ifa); |
1405 | lck_rw_lock_shared(in_ifaddr_rwlock); |
1406 | TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { |
1407 | if (ia->ia_ifp == ifp) |
1408 | break; |
1409 | } |
1410 | if (ia != NULL) |
1411 | IFA_ADDREF(&ia->ia_ifa); |
1412 | lck_rw_done(in_ifaddr_rwlock); |
1413 | if (ia == NULL) |
1414 | error = EADDRNOTAVAIL; |
1415 | else |
1416 | error = 0; |
1417 | } |
1418 | IMO_UNLOCK(imo); |
1419 | } |
1420 | /* |
1421 | * Don't do pcblookup call here; return interface in laddr |
1422 | * and exit to caller, that will do the lookup. |
1423 | */ |
1424 | if (ia != NULL) { |
1425 | /* |
1426 | * If the source address belongs to a cellular interface |
1427 | * and the socket forbids our using interfaces of such |
1428 | * type, pretend that there is no source address. |
1429 | * Apply the same logic for expensive interfaces. |
1430 | */ |
1431 | IFA_LOCK_SPIN(&ia->ia_ifa); |
1432 | if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) { |
1433 | IFA_UNLOCK(&ia->ia_ifa); |
1434 | error = EHOSTUNREACH; |
1435 | restricted = TRUE; |
1436 | } else if (error == 0) { |
1437 | *laddr = ia->ia_addr.sin_addr; |
1438 | if (outif != NULL) { |
1439 | struct ifnet *ifp; |
1440 | |
1441 | if (ro->ro_rt != NULL) |
1442 | ifp = ro->ro_rt->rt_ifp; |
1443 | else |
1444 | ifp = ia->ia_ifp; |
1445 | |
1446 | VERIFY(ifp != NULL); |
1447 | IFA_CONVERT_LOCK(&ia->ia_ifa); |
1448 | ifnet_reference(ifp); /* for caller */ |
1449 | if (*outif != NULL) |
1450 | ifnet_release(*outif); |
1451 | *outif = ifp; |
1452 | } |
1453 | IFA_UNLOCK(&ia->ia_ifa); |
1454 | } else { |
1455 | IFA_UNLOCK(&ia->ia_ifa); |
1456 | } |
1457 | IFA_REMREF(&ia->ia_ifa); |
1458 | ia = NULL; |
1459 | } |
1460 | |
1461 | if (restricted && error == EHOSTUNREACH) { |
1462 | soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | |
1463 | SO_FILT_HINT_IFDENIED)); |
1464 | } |
1465 | |
1466 | return (error); |
1467 | } |
1468 | |
1469 | /* |
1470 | * Outer subroutine: |
1471 | * Connect from a socket to a specified address. |
1472 | * Both address and port must be specified in argument sin. |
1473 | * If don't have a local address for this socket yet, |
1474 | * then pick one. |
1475 | * |
1476 | * The caller may override the bound-to-interface setting of the socket |
1477 | * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) |
1478 | */ |
1479 | int |
1480 | in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, |
1481 | unsigned int ifscope, struct ifnet **outif) |
1482 | { |
1483 | struct in_addr laddr; |
1484 | struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; |
1485 | struct inpcb *pcb; |
1486 | int error; |
1487 | struct socket *so = inp->inp_socket; |
1488 | |
1489 | #if CONTENT_FILTER |
1490 | if (so) |
1491 | so->so_state_change_cnt++; |
1492 | #endif |
1493 | |
1494 | /* |
1495 | * Call inner routine, to assign local interface address. |
1496 | */ |
1497 | if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) |
1498 | return (error); |
1499 | |
1500 | socket_unlock(so, 0); |
1501 | pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, |
1502 | inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, |
1503 | inp->inp_lport, 0, NULL); |
1504 | socket_lock(so, 0); |
1505 | |
1506 | /* |
1507 | * Check if the socket is still in a valid state. When we unlock this |
1508 | * embryonic socket, it can get aborted if another thread is closing |
1509 | * the listener (radar 7947600). |
1510 | */ |
1511 | if ((so->so_flags & SOF_ABORTED) != 0) |
1512 | return (ECONNREFUSED); |
1513 | |
1514 | if (pcb != NULL) { |
1515 | in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); |
1516 | return (EADDRINUSE); |
1517 | } |
1518 | if (inp->inp_laddr.s_addr == INADDR_ANY) { |
1519 | if (inp->inp_lport == 0) { |
1520 | error = in_pcbbind(inp, NULL, p); |
1521 | if (error) |
1522 | return (error); |
1523 | } |
1524 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1525 | /* |
1526 | * Lock inversion issue, mostly with udp |
1527 | * multicast packets. |
1528 | */ |
1529 | socket_unlock(so, 0); |
1530 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
1531 | socket_lock(so, 0); |
1532 | } |
1533 | inp->inp_laddr = laddr; |
1534 | /* no reference needed */ |
1535 | inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; |
1536 | inp->inp_flags |= INP_INADDR_ANY; |
1537 | } else { |
1538 | /* |
1539 | * Usage of IP_PKTINFO, without local port already |
1540 | * speficified will cause kernel to panic, |
1541 | * see rdar://problem/18508185. |
1542 | * For now returning error to avoid a kernel panic |
1543 | * This routines can be refactored and handle this better |
1544 | * in future. |
1545 | */ |
1546 | if (inp->inp_lport == 0) |
1547 | return (EINVAL); |
1548 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1549 | /* |
1550 | * Lock inversion issue, mostly with udp |
1551 | * multicast packets. |
1552 | */ |
1553 | socket_unlock(so, 0); |
1554 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
1555 | socket_lock(so, 0); |
1556 | } |
1557 | } |
1558 | inp->inp_faddr = sin->sin_addr; |
1559 | inp->inp_fport = sin->sin_port; |
1560 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1561 | nstat_pcb_invalidate_cache(inp); |
1562 | in_pcbrehash(inp); |
1563 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1564 | return (0); |
1565 | } |
1566 | |
1567 | void |
1568 | in_pcbdisconnect(struct inpcb *inp) |
1569 | { |
1570 | struct socket *so = inp->inp_socket; |
1571 | |
1572 | if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) |
1573 | nstat_pcb_cache(inp); |
1574 | |
1575 | inp->inp_faddr.s_addr = INADDR_ANY; |
1576 | inp->inp_fport = 0; |
1577 | |
1578 | #if CONTENT_FILTER |
1579 | if (so) |
1580 | so->so_state_change_cnt++; |
1581 | #endif |
1582 | |
1583 | if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { |
1584 | /* lock inversion issue, mostly with udp multicast packets */ |
1585 | socket_unlock(so, 0); |
1586 | lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); |
1587 | socket_lock(so, 0); |
1588 | } |
1589 | |
1590 | in_pcbrehash(inp); |
1591 | lck_rw_done(inp->inp_pcbinfo->ipi_lock); |
1592 | /* |
1593 | * A multipath subflow socket would have its SS_NOFDREF set by default, |
1594 | * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; |
1595 | * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. |
1596 | */ |
1597 | if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) |
1598 | in_pcbdetach(inp); |
1599 | } |
1600 | |
1601 | void |
1602 | in_pcbdetach(struct inpcb *inp) |
1603 | { |
1604 | struct socket *so = inp->inp_socket; |
1605 | |
1606 | if (so->so_pcb == NULL) { |
1607 | /* PCB has been disposed */ |
1608 | panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n" , __func__, |
1609 | inp, so, SOCK_PROTO(so)); |
1610 | /* NOTREACHED */ |
1611 | } |
1612 | |
1613 | #if IPSEC |
1614 | if (inp->inp_sp != NULL) { |
1615 | (void) ipsec4_delete_pcbpolicy(inp); |
1616 | } |
1617 | #endif /* IPSEC */ |
1618 | |
1619 | if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) { |
1620 | if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) { |
1621 | INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data); |
1622 | } |
1623 | } |
1624 | |
1625 | /* |
1626 | * Let NetworkStatistics know this PCB is going away |
1627 | * before we detach it. |
1628 | */ |
1629 | if (nstat_collect && |
1630 | (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) |
1631 | nstat_pcb_detach(inp); |
1632 | |
1633 | /* Free memory buffer held for generating keep alives */ |
1634 | if (inp->inp_keepalive_data != NULL) { |
1635 | FREE(inp->inp_keepalive_data, M_TEMP); |
1636 | inp->inp_keepalive_data = NULL; |
1637 | } |
1638 | |
1639 | /* mark socket state as dead */ |
1640 | if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { |
1641 | panic("%s: so=%p proto=%d couldn't set to STOPUSING\n" , |
1642 | __func__, so, SOCK_PROTO(so)); |
1643 | /* NOTREACHED */ |
1644 | } |
1645 | |
1646 | if (!(so->so_flags & SOF_PCBCLEARING)) { |
1647 | struct ip_moptions *imo; |
1648 | |
1649 | inp->inp_vflag = 0; |
1650 | if (inp->inp_options != NULL) { |
1651 | (void) m_free(inp->inp_options); |
1652 | inp->inp_options = NULL; |
1653 | } |
1654 | ROUTE_RELEASE(&inp->inp_route); |
1655 | imo = inp->inp_moptions; |
1656 | inp->inp_moptions = NULL; |
1657 | sofreelastref(so, 0); |
1658 | inp->inp_state = INPCB_STATE_DEAD; |
1659 | |
1660 | /* |
1661 | * Enqueue an event to send kernel event notification |
1662 | * if the flow has to CLAT46 for data packets |
1663 | */ |
1664 | if (inp->inp_flags2 & INP2_CLAT46_FLOW) { |
1665 | /* |
1666 | * If there has been any exchange of data bytes |
1667 | * over this flow. |
1668 | * Schedule a notification to report that flow is |
1669 | * using client side translation. |
1670 | */ |
1671 | if (inp->inp_stat != NULL && |
1672 | (inp->inp_stat->txbytes != 0 || |
1673 | inp->inp_stat->rxbytes !=0)) { |
1674 | if (so->so_flags & SOF_DELEGATED) { |
1675 | in6_clat46_event_enqueue_nwk_wq_entry( |
1676 | IN6_CLAT46_EVENT_V4_FLOW, |
1677 | so->e_pid, |
1678 | so->e_uuid); |
1679 | } else { |
1680 | in6_clat46_event_enqueue_nwk_wq_entry( |
1681 | IN6_CLAT46_EVENT_V4_FLOW, |
1682 | so->last_pid, |
1683 | so->last_uuid); |
1684 | } |
1685 | } |
1686 | } |
1687 | |
1688 | /* makes sure we're not called twice from so_close */ |
1689 | so->so_flags |= SOF_PCBCLEARING; |
1690 | |
1691 | inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); |
1692 | |
1693 | /* |
1694 | * See inp_join_group() for why we need to unlock |
1695 | */ |
1696 | if (imo != NULL) { |
1697 | socket_unlock(so, 0); |
1698 | IMO_REMREF(imo); |
1699 | socket_lock(so, 0); |
1700 | } |
1701 | } |
1702 | } |
1703 | |
1704 | |
1705 | void |
1706 | in_pcbdispose(struct inpcb *inp) |
1707 | { |
1708 | struct socket *so = inp->inp_socket; |
1709 | struct inpcbinfo *ipi = inp->inp_pcbinfo; |
1710 | |
1711 | if (so != NULL && so->so_usecount != 0) { |
1712 | panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n" , |
1713 | __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, |
1714 | solockhistory_nr(so)); |
1715 | /* NOTREACHED */ |
1716 | } else if (inp->inp_wantcnt != WNT_STOPUSING) { |
1717 | if (so != NULL) { |
1718 | panic_plain("%s: inp %p invalid wantcnt %d, so %p " |
1719 | "[%d,%d] usecount %d retaincnt %d state 0x%x " |
1720 | "flags 0x%x lockhistory %s\n" , __func__, inp, |
1721 | inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), |
1722 | so->so_usecount, so->so_retaincnt, so->so_state, |
1723 | so->so_flags, solockhistory_nr(so)); |
1724 | /* NOTREACHED */ |
1725 | } else { |
1726 | panic("%s: inp %p invalid wantcnt %d no socket\n" , |
1727 | __func__, inp, inp->inp_wantcnt); |
1728 | /* NOTREACHED */ |
1729 | } |
1730 | } |
1731 | |
1732 | LCK_RW_ASSERT(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); |
1733 | |
1734 | inp->inp_gencnt = ++ipi->ipi_gencnt; |
1735 | /* access ipi in in_pcbremlists */ |
1736 | in_pcbremlists(inp); |
1737 | |
1738 | if (so != NULL) { |
1739 | if (so->so_proto->pr_flags & PR_PCBLOCK) { |
1740 | sofreelastref(so, 0); |
1741 | if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { |
1742 | /* |
1743 | * selthreadclear() already called |
1744 | * during sofreelastref() above. |
1745 | */ |
1746 | sbrelease(&so->so_rcv); |
1747 | sbrelease(&so->so_snd); |
1748 | } |
1749 | if (so->so_head != NULL) { |
1750 | panic("%s: so=%p head still exist\n" , |
1751 | __func__, so); |
1752 | /* NOTREACHED */ |
1753 | } |
1754 | lck_mtx_unlock(&inp->inpcb_mtx); |
1755 | |
1756 | #if NECP |
1757 | necp_inpcb_remove_cb(inp); |
1758 | #endif /* NECP */ |
1759 | |
1760 | lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); |
1761 | } |
1762 | /* makes sure we're not called twice from so_close */ |
1763 | so->so_flags |= SOF_PCBCLEARING; |
1764 | so->so_saved_pcb = (caddr_t)inp; |
1765 | so->so_pcb = NULL; |
1766 | inp->inp_socket = NULL; |
1767 | #if CONFIG_MACF_NET |
1768 | mac_inpcb_label_destroy(inp); |
1769 | #endif /* CONFIG_MACF_NET */ |
1770 | #if NECP |
1771 | necp_inpcb_dispose(inp); |
1772 | #endif /* NECP */ |
1773 | /* |
1774 | * In case there a route cached after a detach (possible |
1775 | * in the tcp case), make sure that it is freed before |
1776 | * we deallocate the structure. |
1777 | */ |
1778 | ROUTE_RELEASE(&inp->inp_route); |
1779 | if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) { |
1780 | zfree(ipi->ipi_zone, inp); |
1781 | } |
1782 | sodealloc(so); |
1783 | } |
1784 | } |
1785 | |
1786 | /* |
1787 | * The calling convention of in_getsockaddr() and in_getpeeraddr() was |
1788 | * modified to match the pru_sockaddr() and pru_peeraddr() entry points |
1789 | * in struct pr_usrreqs, so that protocols can just reference then directly |
1790 | * without the need for a wrapper function. |
1791 | */ |
1792 | int |
1793 | in_getsockaddr(struct socket *so, struct sockaddr **nam) |
1794 | { |
1795 | struct inpcb *inp; |
1796 | struct sockaddr_in *sin; |
1797 | |
1798 | /* |
1799 | * Do the malloc first in case it blocks. |
1800 | */ |
1801 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
1802 | if (sin == NULL) |
1803 | return (ENOBUFS); |
1804 | bzero(sin, sizeof (*sin)); |
1805 | sin->sin_family = AF_INET; |
1806 | sin->sin_len = sizeof (*sin); |
1807 | |
1808 | if ((inp = sotoinpcb(so)) == NULL) { |
1809 | FREE(sin, M_SONAME); |
1810 | return (EINVAL); |
1811 | } |
1812 | sin->sin_port = inp->inp_lport; |
1813 | sin->sin_addr = inp->inp_laddr; |
1814 | |
1815 | *nam = (struct sockaddr *)sin; |
1816 | return (0); |
1817 | } |
1818 | |
1819 | int |
1820 | in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss) |
1821 | { |
1822 | struct sockaddr_in *sin = ss; |
1823 | struct inpcb *inp; |
1824 | |
1825 | VERIFY(ss != NULL); |
1826 | bzero(ss, sizeof (*ss)); |
1827 | |
1828 | sin->sin_family = AF_INET; |
1829 | sin->sin_len = sizeof (*sin); |
1830 | |
1831 | if ((inp = sotoinpcb(so)) == NULL) |
1832 | return (EINVAL); |
1833 | |
1834 | sin->sin_port = inp->inp_lport; |
1835 | sin->sin_addr = inp->inp_laddr; |
1836 | return (0); |
1837 | } |
1838 | |
1839 | int |
1840 | in_getpeeraddr(struct socket *so, struct sockaddr **nam) |
1841 | { |
1842 | struct inpcb *inp; |
1843 | struct sockaddr_in *sin; |
1844 | |
1845 | /* |
1846 | * Do the malloc first in case it blocks. |
1847 | */ |
1848 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
1849 | if (sin == NULL) |
1850 | return (ENOBUFS); |
1851 | bzero((caddr_t)sin, sizeof (*sin)); |
1852 | sin->sin_family = AF_INET; |
1853 | sin->sin_len = sizeof (*sin); |
1854 | |
1855 | if ((inp = sotoinpcb(so)) == NULL) { |
1856 | FREE(sin, M_SONAME); |
1857 | return (EINVAL); |
1858 | } |
1859 | sin->sin_port = inp->inp_fport; |
1860 | sin->sin_addr = inp->inp_faddr; |
1861 | |
1862 | *nam = (struct sockaddr *)sin; |
1863 | return (0); |
1864 | } |
1865 | |
1866 | void |
1867 | in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
1868 | int errno, void (*notify)(struct inpcb *, int)) |
1869 | { |
1870 | struct inpcb *inp; |
1871 | |
1872 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
1873 | |
1874 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { |
1875 | #if INET6 |
1876 | if (!(inp->inp_vflag & INP_IPV4)) |
1877 | continue; |
1878 | #endif /* INET6 */ |
1879 | if (inp->inp_faddr.s_addr != faddr.s_addr || |
1880 | inp->inp_socket == NULL) |
1881 | continue; |
1882 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) |
1883 | continue; |
1884 | socket_lock(inp->inp_socket, 1); |
1885 | (*notify)(inp, errno); |
1886 | (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); |
1887 | socket_unlock(inp->inp_socket, 1); |
1888 | } |
1889 | lck_rw_done(pcbinfo->ipi_lock); |
1890 | } |
1891 | |
1892 | /* |
1893 | * Check for alternatives when higher level complains |
1894 | * about service problems. For now, invalidate cached |
1895 | * routing information. If the route was created dynamically |
1896 | * (by a redirect), time to try a default gateway again. |
1897 | */ |
1898 | void |
1899 | in_losing(struct inpcb *inp) |
1900 | { |
1901 | boolean_t release = FALSE; |
1902 | struct rtentry *rt; |
1903 | |
1904 | if ((rt = inp->inp_route.ro_rt) != NULL) { |
1905 | struct in_ifaddr *ia = NULL; |
1906 | |
1907 | RT_LOCK(rt); |
1908 | if (rt->rt_flags & RTF_DYNAMIC) { |
1909 | /* |
1910 | * Prevent another thread from modifying rt_key, |
1911 | * rt_gateway via rt_setgate() after rt_lock is |
1912 | * dropped by marking the route as defunct. |
1913 | */ |
1914 | rt->rt_flags |= RTF_CONDEMNED; |
1915 | RT_UNLOCK(rt); |
1916 | (void) rtrequest(RTM_DELETE, rt_key(rt), |
1917 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); |
1918 | } else { |
1919 | RT_UNLOCK(rt); |
1920 | } |
1921 | /* if the address is gone keep the old route in the pcb */ |
1922 | if (inp->inp_laddr.s_addr != INADDR_ANY && |
1923 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { |
1924 | /* |
1925 | * Address is around; ditch the route. A new route |
1926 | * can be allocated the next time output is attempted. |
1927 | */ |
1928 | release = TRUE; |
1929 | } |
1930 | if (ia != NULL) |
1931 | IFA_REMREF(&ia->ia_ifa); |
1932 | } |
1933 | if (rt == NULL || release) |
1934 | ROUTE_RELEASE(&inp->inp_route); |
1935 | } |
1936 | |
1937 | /* |
1938 | * After a routing change, flush old routing |
1939 | * and allocate a (hopefully) better one. |
1940 | */ |
1941 | void |
1942 | in_rtchange(struct inpcb *inp, int errno) |
1943 | { |
1944 | #pragma unused(errno) |
1945 | boolean_t release = FALSE; |
1946 | struct rtentry *rt; |
1947 | |
1948 | if ((rt = inp->inp_route.ro_rt) != NULL) { |
1949 | struct in_ifaddr *ia = NULL; |
1950 | |
1951 | /* if address is gone, keep the old route */ |
1952 | if (inp->inp_laddr.s_addr != INADDR_ANY && |
1953 | (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { |
1954 | /* |
1955 | * Address is around; ditch the route. A new route |
1956 | * can be allocated the next time output is attempted. |
1957 | */ |
1958 | release = TRUE; |
1959 | } |
1960 | if (ia != NULL) |
1961 | IFA_REMREF(&ia->ia_ifa); |
1962 | } |
1963 | if (rt == NULL || release) |
1964 | ROUTE_RELEASE(&inp->inp_route); |
1965 | } |
1966 | |
1967 | /* |
1968 | * Lookup a PCB based on the local address and port. |
1969 | */ |
1970 | struct inpcb * |
1971 | in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, |
1972 | unsigned int lport_arg, int wild_okay) |
1973 | { |
1974 | struct inpcb *inp; |
1975 | int matchwild = 3, wildcard; |
1976 | u_short lport = lport_arg; |
1977 | |
1978 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); |
1979 | |
1980 | if (!wild_okay) { |
1981 | struct inpcbhead *head; |
1982 | /* |
1983 | * Look for an unconnected (wildcard foreign addr) PCB that |
1984 | * matches the local address and port we're looking for. |
1985 | */ |
1986 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
1987 | pcbinfo->ipi_hashmask)]; |
1988 | LIST_FOREACH(inp, head, inp_hash) { |
1989 | #if INET6 |
1990 | if (!(inp->inp_vflag & INP_IPV4)) |
1991 | continue; |
1992 | #endif /* INET6 */ |
1993 | if (inp->inp_faddr.s_addr == INADDR_ANY && |
1994 | inp->inp_laddr.s_addr == laddr.s_addr && |
1995 | inp->inp_lport == lport) { |
1996 | /* |
1997 | * Found. |
1998 | */ |
1999 | return (inp); |
2000 | } |
2001 | } |
2002 | /* |
2003 | * Not found. |
2004 | */ |
2005 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); |
2006 | return (NULL); |
2007 | } else { |
2008 | struct inpcbporthead *porthash; |
2009 | struct inpcbport *phd; |
2010 | struct inpcb *match = NULL; |
2011 | /* |
2012 | * Best fit PCB lookup. |
2013 | * |
2014 | * First see if this local port is in use by looking on the |
2015 | * port hash list. |
2016 | */ |
2017 | porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, |
2018 | pcbinfo->ipi_porthashmask)]; |
2019 | LIST_FOREACH(phd, porthash, phd_hash) { |
2020 | if (phd->phd_port == lport) |
2021 | break; |
2022 | } |
2023 | if (phd != NULL) { |
2024 | /* |
2025 | * Port is in use by one or more PCBs. Look for best |
2026 | * fit. |
2027 | */ |
2028 | LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { |
2029 | wildcard = 0; |
2030 | #if INET6 |
2031 | if (!(inp->inp_vflag & INP_IPV4)) |
2032 | continue; |
2033 | #endif /* INET6 */ |
2034 | if (inp->inp_faddr.s_addr != INADDR_ANY) |
2035 | wildcard++; |
2036 | if (inp->inp_laddr.s_addr != INADDR_ANY) { |
2037 | if (laddr.s_addr == INADDR_ANY) |
2038 | wildcard++; |
2039 | else if (inp->inp_laddr.s_addr != |
2040 | laddr.s_addr) |
2041 | continue; |
2042 | } else { |
2043 | if (laddr.s_addr != INADDR_ANY) |
2044 | wildcard++; |
2045 | } |
2046 | if (wildcard < matchwild) { |
2047 | match = inp; |
2048 | matchwild = wildcard; |
2049 | if (matchwild == 0) { |
2050 | break; |
2051 | } |
2052 | } |
2053 | } |
2054 | } |
2055 | KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, |
2056 | 0, 0, 0, 0); |
2057 | return (match); |
2058 | } |
2059 | } |
2060 | |
2061 | /* |
2062 | * Check if PCB exists in hash list. |
2063 | */ |
2064 | int |
2065 | in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
2066 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, |
2067 | uid_t *uid, gid_t *gid, struct ifnet *ifp) |
2068 | { |
2069 | struct inpcbhead *head; |
2070 | struct inpcb *inp; |
2071 | u_short fport = fport_arg, lport = lport_arg; |
2072 | int found = 0; |
2073 | struct inpcb *local_wild = NULL; |
2074 | #if INET6 |
2075 | struct inpcb *local_wild_mapped = NULL; |
2076 | #endif /* INET6 */ |
2077 | |
2078 | *uid = UID_MAX; |
2079 | *gid = GID_MAX; |
2080 | |
2081 | /* |
2082 | * We may have found the pcb in the last lookup - check this first. |
2083 | */ |
2084 | |
2085 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
2086 | |
2087 | /* |
2088 | * First look for an exact match. |
2089 | */ |
2090 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
2091 | pcbinfo->ipi_hashmask)]; |
2092 | LIST_FOREACH(inp, head, inp_hash) { |
2093 | #if INET6 |
2094 | if (!(inp->inp_vflag & INP_IPV4)) |
2095 | continue; |
2096 | #endif /* INET6 */ |
2097 | if (inp_restricted_recv(inp, ifp)) |
2098 | continue; |
2099 | |
2100 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
2101 | inp->inp_laddr.s_addr == laddr.s_addr && |
2102 | inp->inp_fport == fport && |
2103 | inp->inp_lport == lport) { |
2104 | if ((found = (inp->inp_socket != NULL))) { |
2105 | /* |
2106 | * Found. |
2107 | */ |
2108 | *uid = kauth_cred_getuid( |
2109 | inp->inp_socket->so_cred); |
2110 | *gid = kauth_cred_getgid( |
2111 | inp->inp_socket->so_cred); |
2112 | } |
2113 | lck_rw_done(pcbinfo->ipi_lock); |
2114 | return (found); |
2115 | } |
2116 | } |
2117 | |
2118 | if (!wildcard) { |
2119 | /* |
2120 | * Not found. |
2121 | */ |
2122 | lck_rw_done(pcbinfo->ipi_lock); |
2123 | return (0); |
2124 | } |
2125 | |
2126 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
2127 | pcbinfo->ipi_hashmask)]; |
2128 | LIST_FOREACH(inp, head, inp_hash) { |
2129 | #if INET6 |
2130 | if (!(inp->inp_vflag & INP_IPV4)) |
2131 | continue; |
2132 | #endif /* INET6 */ |
2133 | if (inp_restricted_recv(inp, ifp)) |
2134 | continue; |
2135 | |
2136 | if (inp->inp_faddr.s_addr == INADDR_ANY && |
2137 | inp->inp_lport == lport) { |
2138 | if (inp->inp_laddr.s_addr == laddr.s_addr) { |
2139 | if ((found = (inp->inp_socket != NULL))) { |
2140 | *uid = kauth_cred_getuid( |
2141 | inp->inp_socket->so_cred); |
2142 | *gid = kauth_cred_getgid( |
2143 | inp->inp_socket->so_cred); |
2144 | } |
2145 | lck_rw_done(pcbinfo->ipi_lock); |
2146 | return (found); |
2147 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2148 | #if INET6 |
2149 | if (inp->inp_socket && |
2150 | SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) |
2151 | local_wild_mapped = inp; |
2152 | else |
2153 | #endif /* INET6 */ |
2154 | local_wild = inp; |
2155 | } |
2156 | } |
2157 | } |
2158 | if (local_wild == NULL) { |
2159 | #if INET6 |
2160 | if (local_wild_mapped != NULL) { |
2161 | if ((found = (local_wild_mapped->inp_socket != NULL))) { |
2162 | *uid = kauth_cred_getuid( |
2163 | local_wild_mapped->inp_socket->so_cred); |
2164 | *gid = kauth_cred_getgid( |
2165 | local_wild_mapped->inp_socket->so_cred); |
2166 | } |
2167 | lck_rw_done(pcbinfo->ipi_lock); |
2168 | return (found); |
2169 | } |
2170 | #endif /* INET6 */ |
2171 | lck_rw_done(pcbinfo->ipi_lock); |
2172 | return (0); |
2173 | } |
2174 | if ((found = (local_wild->inp_socket != NULL))) { |
2175 | *uid = kauth_cred_getuid( |
2176 | local_wild->inp_socket->so_cred); |
2177 | *gid = kauth_cred_getgid( |
2178 | local_wild->inp_socket->so_cred); |
2179 | } |
2180 | lck_rw_done(pcbinfo->ipi_lock); |
2181 | return (found); |
2182 | } |
2183 | |
2184 | /* |
2185 | * Lookup PCB in hash list. |
2186 | */ |
2187 | struct inpcb * |
2188 | in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, |
2189 | u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, |
2190 | struct ifnet *ifp) |
2191 | { |
2192 | struct inpcbhead *head; |
2193 | struct inpcb *inp; |
2194 | u_short fport = fport_arg, lport = lport_arg; |
2195 | struct inpcb *local_wild = NULL; |
2196 | #if INET6 |
2197 | struct inpcb *local_wild_mapped = NULL; |
2198 | #endif /* INET6 */ |
2199 | |
2200 | /* |
2201 | * We may have found the pcb in the last lookup - check this first. |
2202 | */ |
2203 | |
2204 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
2205 | |
2206 | /* |
2207 | * First look for an exact match. |
2208 | */ |
2209 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, |
2210 | pcbinfo->ipi_hashmask)]; |
2211 | LIST_FOREACH(inp, head, inp_hash) { |
2212 | #if INET6 |
2213 | if (!(inp->inp_vflag & INP_IPV4)) |
2214 | continue; |
2215 | #endif /* INET6 */ |
2216 | if (inp_restricted_recv(inp, ifp)) |
2217 | continue; |
2218 | |
2219 | if (inp->inp_faddr.s_addr == faddr.s_addr && |
2220 | inp->inp_laddr.s_addr == laddr.s_addr && |
2221 | inp->inp_fport == fport && |
2222 | inp->inp_lport == lport) { |
2223 | /* |
2224 | * Found. |
2225 | */ |
2226 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != |
2227 | WNT_STOPUSING) { |
2228 | lck_rw_done(pcbinfo->ipi_lock); |
2229 | return (inp); |
2230 | } else { |
2231 | /* it's there but dead, say it isn't found */ |
2232 | lck_rw_done(pcbinfo->ipi_lock); |
2233 | return (NULL); |
2234 | } |
2235 | } |
2236 | } |
2237 | |
2238 | if (!wildcard) { |
2239 | /* |
2240 | * Not found. |
2241 | */ |
2242 | lck_rw_done(pcbinfo->ipi_lock); |
2243 | return (NULL); |
2244 | } |
2245 | |
2246 | head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, |
2247 | pcbinfo->ipi_hashmask)]; |
2248 | LIST_FOREACH(inp, head, inp_hash) { |
2249 | #if INET6 |
2250 | if (!(inp->inp_vflag & INP_IPV4)) |
2251 | continue; |
2252 | #endif /* INET6 */ |
2253 | if (inp_restricted_recv(inp, ifp)) |
2254 | continue; |
2255 | |
2256 | if (inp->inp_faddr.s_addr == INADDR_ANY && |
2257 | inp->inp_lport == lport) { |
2258 | if (inp->inp_laddr.s_addr == laddr.s_addr) { |
2259 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != |
2260 | WNT_STOPUSING) { |
2261 | lck_rw_done(pcbinfo->ipi_lock); |
2262 | return (inp); |
2263 | } else { |
2264 | /* it's dead; say it isn't found */ |
2265 | lck_rw_done(pcbinfo->ipi_lock); |
2266 | return (NULL); |
2267 | } |
2268 | } else if (inp->inp_laddr.s_addr == INADDR_ANY) { |
2269 | #if INET6 |
2270 | if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) |
2271 | local_wild_mapped = inp; |
2272 | else |
2273 | #endif /* INET6 */ |
2274 | local_wild = inp; |
2275 | } |
2276 | } |
2277 | } |
2278 | if (local_wild == NULL) { |
2279 | #if INET6 |
2280 | if (local_wild_mapped != NULL) { |
2281 | if (in_pcb_checkstate(local_wild_mapped, |
2282 | WNT_ACQUIRE, 0) != WNT_STOPUSING) { |
2283 | lck_rw_done(pcbinfo->ipi_lock); |
2284 | return (local_wild_mapped); |
2285 | } else { |
2286 | /* it's dead; say it isn't found */ |
2287 | lck_rw_done(pcbinfo->ipi_lock); |
2288 | return (NULL); |
2289 | } |
2290 | } |
2291 | #endif /* INET6 */ |
2292 | lck_rw_done(pcbinfo->ipi_lock); |
2293 | return (NULL); |
2294 | } |
2295 | if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { |
2296 | lck_rw_done(pcbinfo->ipi_lock); |
2297 | return (local_wild); |
2298 | } |
2299 | /* |
2300 | * It's either not found or is already dead. |
2301 | */ |
2302 | lck_rw_done(pcbinfo->ipi_lock); |
2303 | return (NULL); |
2304 | } |
2305 | |
2306 | /* |
2307 | * @brief Insert PCB onto various hash lists. |
2308 | * |
2309 | * @param inp Pointer to internet protocol control block |
2310 | * @param locked Implies if ipi_lock (protecting pcb list) |
2311 | * is already locked or not. |
2312 | * |
2313 | * @return int error on failure and 0 on success |
2314 | */ |
2315 | int |
2316 | in_pcbinshash(struct inpcb *inp, int locked) |
2317 | { |
2318 | struct inpcbhead *pcbhash; |
2319 | struct inpcbporthead *pcbporthash; |
2320 | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; |
2321 | struct inpcbport *phd; |
2322 | u_int32_t hashkey_faddr; |
2323 | |
2324 | if (!locked) { |
2325 | if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { |
2326 | /* |
2327 | * Lock inversion issue, mostly with udp |
2328 | * multicast packets |
2329 | */ |
2330 | socket_unlock(inp->inp_socket, 0); |
2331 | lck_rw_lock_exclusive(pcbinfo->ipi_lock); |
2332 | socket_lock(inp->inp_socket, 0); |
2333 | } |
2334 | } |
2335 | |
2336 | /* |
2337 | * This routine or its caller may have given up |
2338 | * socket's protocol lock briefly. |
2339 | * During that time the socket may have been dropped. |
2340 | * Safe-guarding against that. |
2341 | */ |
2342 | if (inp->inp_state == INPCB_STATE_DEAD) { |
2343 | if (!locked) { |
2344 | lck_rw_done(pcbinfo->ipi_lock); |
2345 | } |
2346 | return (ECONNABORTED); |
2347 | } |
2348 | |
2349 | |
2350 | #if INET6 |
2351 | if (inp->inp_vflag & INP_IPV6) |
2352 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; |
2353 | else |
2354 | #endif /* INET6 */ |
2355 | hashkey_faddr = inp->inp_faddr.s_addr; |
2356 | |
2357 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, |
2358 | inp->inp_fport, pcbinfo->ipi_hashmask); |
2359 | |
2360 | pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; |
2361 | |
2362 | pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, |
2363 | pcbinfo->ipi_porthashmask)]; |
2364 | |
2365 | /* |
2366 | * Go through port list and look for a head for this lport. |
2367 | */ |
2368 | LIST_FOREACH(phd, pcbporthash, phd_hash) { |
2369 | if (phd->phd_port == inp->inp_lport) |
2370 | break; |
2371 | } |
2372 | |
2373 | /* |
2374 | * If none exists, malloc one and tack it on. |
2375 | */ |
2376 | if (phd == NULL) { |
2377 | MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), |
2378 | M_PCB, M_WAITOK); |
2379 | if (phd == NULL) { |
2380 | if (!locked) |
2381 | lck_rw_done(pcbinfo->ipi_lock); |
2382 | return (ENOBUFS); /* XXX */ |
2383 | } |
2384 | phd->phd_port = inp->inp_lport; |
2385 | LIST_INIT(&phd->phd_pcblist); |
2386 | LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); |
2387 | } |
2388 | |
2389 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); |
2390 | |
2391 | |
2392 | inp->inp_phd = phd; |
2393 | LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); |
2394 | LIST_INSERT_HEAD(pcbhash, inp, inp_hash); |
2395 | inp->inp_flags2 |= INP2_INHASHLIST; |
2396 | |
2397 | if (!locked) |
2398 | lck_rw_done(pcbinfo->ipi_lock); |
2399 | |
2400 | #if NECP |
2401 | // This call catches the original setting of the local address |
2402 | inp_update_necp_policy(inp, NULL, NULL, 0); |
2403 | #endif /* NECP */ |
2404 | |
2405 | return (0); |
2406 | } |
2407 | |
2408 | /* |
2409 | * Move PCB to the proper hash bucket when { faddr, fport } have been |
2410 | * changed. NOTE: This does not handle the case of the lport changing (the |
2411 | * hashed port list would have to be updated as well), so the lport must |
2412 | * not change after in_pcbinshash() has been called. |
2413 | */ |
2414 | void |
2415 | in_pcbrehash(struct inpcb *inp) |
2416 | { |
2417 | struct inpcbhead *head; |
2418 | u_int32_t hashkey_faddr; |
2419 | |
2420 | #if INET6 |
2421 | if (inp->inp_vflag & INP_IPV6) |
2422 | hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; |
2423 | else |
2424 | #endif /* INET6 */ |
2425 | hashkey_faddr = inp->inp_faddr.s_addr; |
2426 | |
2427 | inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, |
2428 | inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); |
2429 | head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; |
2430 | |
2431 | if (inp->inp_flags2 & INP2_INHASHLIST) { |
2432 | LIST_REMOVE(inp, inp_hash); |
2433 | inp->inp_flags2 &= ~INP2_INHASHLIST; |
2434 | } |
2435 | |
2436 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); |
2437 | LIST_INSERT_HEAD(head, inp, inp_hash); |
2438 | inp->inp_flags2 |= INP2_INHASHLIST; |
2439 | |
2440 | #if NECP |
2441 | // This call catches updates to the remote addresses |
2442 | inp_update_necp_policy(inp, NULL, NULL, 0); |
2443 | #endif /* NECP */ |
2444 | } |
2445 | |
2446 | /* |
2447 | * Remove PCB from various lists. |
2448 | * Must be called pcbinfo lock is held in exclusive mode. |
2449 | */ |
2450 | void |
2451 | in_pcbremlists(struct inpcb *inp) |
2452 | { |
2453 | inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; |
2454 | |
2455 | /* |
2456 | * Check if it's in hashlist -- an inp is placed in hashlist when |
2457 | * it's local port gets assigned. So it should also be present |
2458 | * in the port list. |
2459 | */ |
2460 | if (inp->inp_flags2 & INP2_INHASHLIST) { |
2461 | struct inpcbport *phd = inp->inp_phd; |
2462 | |
2463 | VERIFY(phd != NULL && inp->inp_lport > 0); |
2464 | |
2465 | LIST_REMOVE(inp, inp_hash); |
2466 | inp->inp_hash.le_next = NULL; |
2467 | inp->inp_hash.le_prev = NULL; |
2468 | |
2469 | LIST_REMOVE(inp, inp_portlist); |
2470 | inp->inp_portlist.le_next = NULL; |
2471 | inp->inp_portlist.le_prev = NULL; |
2472 | if (LIST_EMPTY(&phd->phd_pcblist)) { |
2473 | LIST_REMOVE(phd, phd_hash); |
2474 | FREE(phd, M_PCB); |
2475 | } |
2476 | inp->inp_phd = NULL; |
2477 | inp->inp_flags2 &= ~INP2_INHASHLIST; |
2478 | } |
2479 | VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); |
2480 | |
2481 | if (inp->inp_flags2 & INP2_TIMEWAIT) { |
2482 | /* Remove from time-wait queue */ |
2483 | tcp_remove_from_time_wait(inp); |
2484 | inp->inp_flags2 &= ~INP2_TIMEWAIT; |
2485 | VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); |
2486 | inp->inp_pcbinfo->ipi_twcount--; |
2487 | } else { |
2488 | /* Remove from global inp list if it is not time-wait */ |
2489 | LIST_REMOVE(inp, inp_list); |
2490 | } |
2491 | |
2492 | if (inp->inp_flags2 & INP2_IN_FCTREE) { |
2493 | inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); |
2494 | VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); |
2495 | } |
2496 | |
2497 | inp->inp_pcbinfo->ipi_count--; |
2498 | } |
2499 | |
2500 | /* |
2501 | * Mechanism used to defer the memory release of PCBs |
2502 | * The pcb list will contain the pcb until the reaper can clean it up if |
2503 | * the following conditions are met: |
2504 | * 1) state "DEAD", |
2505 | * 2) wantcnt is STOPUSING |
2506 | * 3) usecount is 0 |
2507 | * This function will be called to either mark the pcb as |
2508 | */ |
2509 | int |
2510 | in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) |
2511 | { |
2512 | volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; |
2513 | UInt32 origwant; |
2514 | UInt32 newwant; |
2515 | |
2516 | switch (mode) { |
2517 | case WNT_STOPUSING: |
2518 | /* |
2519 | * Try to mark the pcb as ready for recycling. CAS with |
2520 | * STOPUSING, if success we're good, if it's in use, will |
2521 | * be marked later |
2522 | */ |
2523 | if (locked == 0) |
2524 | socket_lock(pcb->inp_socket, 1); |
2525 | pcb->inp_state = INPCB_STATE_DEAD; |
2526 | |
2527 | stopusing: |
2528 | if (pcb->inp_socket->so_usecount < 0) { |
2529 | panic("%s: pcb=%p so=%p usecount is negative\n" , |
2530 | __func__, pcb, pcb->inp_socket); |
2531 | /* NOTREACHED */ |
2532 | } |
2533 | if (locked == 0) |
2534 | socket_unlock(pcb->inp_socket, 1); |
2535 | |
2536 | inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); |
2537 | |
2538 | origwant = *wantcnt; |
2539 | if ((UInt16) origwant == 0xffff) /* should stop using */ |
2540 | return (WNT_STOPUSING); |
2541 | newwant = 0xffff; |
2542 | if ((UInt16) origwant == 0) { |
2543 | /* try to mark it as unsuable now */ |
2544 | OSCompareAndSwap(origwant, newwant, wantcnt); |
2545 | } |
2546 | return (WNT_STOPUSING); |
2547 | |
2548 | case WNT_ACQUIRE: |
2549 | /* |
2550 | * Try to increase reference to pcb. If WNT_STOPUSING |
2551 | * should bail out. If socket state DEAD, try to set count |
2552 | * to STOPUSING, return failed otherwise increase cnt. |
2553 | */ |
2554 | do { |
2555 | origwant = *wantcnt; |
2556 | if ((UInt16) origwant == 0xffff) { |
2557 | /* should stop using */ |
2558 | return (WNT_STOPUSING); |
2559 | } |
2560 | newwant = origwant + 1; |
2561 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); |
2562 | return (WNT_ACQUIRE); |
2563 | |
2564 | case WNT_RELEASE: |
2565 | /* |
2566 | * Release reference. If result is null and pcb state |
2567 | * is DEAD, set wanted bit to STOPUSING |
2568 | */ |
2569 | if (locked == 0) |
2570 | socket_lock(pcb->inp_socket, 1); |
2571 | |
2572 | do { |
2573 | origwant = *wantcnt; |
2574 | if ((UInt16) origwant == 0x0) { |
2575 | panic("%s: pcb=%p release with zero count" , |
2576 | __func__, pcb); |
2577 | /* NOTREACHED */ |
2578 | } |
2579 | if ((UInt16) origwant == 0xffff) { |
2580 | /* should stop using */ |
2581 | if (locked == 0) |
2582 | socket_unlock(pcb->inp_socket, 1); |
2583 | return (WNT_STOPUSING); |
2584 | } |
2585 | newwant = origwant - 1; |
2586 | } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); |
2587 | |
2588 | if (pcb->inp_state == INPCB_STATE_DEAD) |
2589 | goto stopusing; |
2590 | if (pcb->inp_socket->so_usecount < 0) { |
2591 | panic("%s: RELEASE pcb=%p so=%p usecount is negative\n" , |
2592 | __func__, pcb, pcb->inp_socket); |
2593 | /* NOTREACHED */ |
2594 | } |
2595 | |
2596 | if (locked == 0) |
2597 | socket_unlock(pcb->inp_socket, 1); |
2598 | return (WNT_RELEASE); |
2599 | |
2600 | default: |
2601 | panic("%s: so=%p not a valid state =%x\n" , __func__, |
2602 | pcb->inp_socket, mode); |
2603 | /* NOTREACHED */ |
2604 | } |
2605 | |
2606 | /* NOTREACHED */ |
2607 | return (mode); |
2608 | } |
2609 | |
2610 | /* |
2611 | * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. |
2612 | * The inpcb_compat data structure is passed to user space and must |
2613 | * not change. We intentionally avoid copying pointers. |
2614 | */ |
2615 | void |
2616 | inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) |
2617 | { |
2618 | bzero(inp_compat, sizeof (*inp_compat)); |
2619 | inp_compat->inp_fport = inp->inp_fport; |
2620 | inp_compat->inp_lport = inp->inp_lport; |
2621 | inp_compat->nat_owner = 0; |
2622 | inp_compat->nat_cookie = 0; |
2623 | inp_compat->inp_gencnt = inp->inp_gencnt; |
2624 | inp_compat->inp_flags = inp->inp_flags; |
2625 | inp_compat->inp_flow = inp->inp_flow; |
2626 | inp_compat->inp_vflag = inp->inp_vflag; |
2627 | inp_compat->inp_ip_ttl = inp->inp_ip_ttl; |
2628 | inp_compat->inp_ip_p = inp->inp_ip_p; |
2629 | inp_compat->inp_dependfaddr.inp6_foreign = |
2630 | inp->inp_dependfaddr.inp6_foreign; |
2631 | inp_compat->inp_dependladdr.inp6_local = |
2632 | inp->inp_dependladdr.inp6_local; |
2633 | inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; |
2634 | inp_compat->inp_depend6.inp6_hlim = 0; |
2635 | inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
2636 | inp_compat->inp_depend6.inp6_ifindex = 0; |
2637 | inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
2638 | } |
2639 | |
2640 | #if !CONFIG_EMBEDDED |
2641 | void |
2642 | inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) |
2643 | { |
2644 | xinp->inp_fport = inp->inp_fport; |
2645 | xinp->inp_lport = inp->inp_lport; |
2646 | xinp->inp_gencnt = inp->inp_gencnt; |
2647 | xinp->inp_flags = inp->inp_flags; |
2648 | xinp->inp_flow = inp->inp_flow; |
2649 | xinp->inp_vflag = inp->inp_vflag; |
2650 | xinp->inp_ip_ttl = inp->inp_ip_ttl; |
2651 | xinp->inp_ip_p = inp->inp_ip_p; |
2652 | xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; |
2653 | xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; |
2654 | xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; |
2655 | xinp->inp_depend6.inp6_hlim = 0; |
2656 | xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; |
2657 | xinp->inp_depend6.inp6_ifindex = 0; |
2658 | xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; |
2659 | } |
2660 | #endif /* !CONFIG_EMBEDDED */ |
2661 | |
2662 | /* |
2663 | * The following routines implement this scheme: |
2664 | * |
2665 | * Callers of ip_output() that intend to cache the route in the inpcb pass |
2666 | * a local copy of the struct route to ip_output(). Using a local copy of |
2667 | * the cached route significantly simplifies things as IP no longer has to |
2668 | * worry about having exclusive access to the passed in struct route, since |
2669 | * it's defined in the caller's stack; in essence, this allows for a lock- |
2670 | * less operation when updating the struct route at the IP level and below, |
2671 | * whenever necessary. The scheme works as follows: |
2672 | * |
2673 | * Prior to dropping the socket's lock and calling ip_output(), the caller |
2674 | * copies the struct route from the inpcb into its stack, and adds a reference |
2675 | * to the cached route entry, if there was any. The socket's lock is then |
2676 | * dropped and ip_output() is called with a pointer to the copy of struct |
2677 | * route defined on the stack (not to the one in the inpcb.) |
2678 | * |
2679 | * Upon returning from ip_output(), the caller then acquires the socket's |
2680 | * lock and synchronizes the cache; if there is no route cached in the inpcb, |
2681 | * it copies the local copy of struct route (which may or may not contain any |
2682 | * route) back into the cache; otherwise, if the inpcb has a route cached in |
2683 | * it, the one in the local copy will be freed, if there's any. Trashing the |
2684 | * cached route in the inpcb can be avoided because ip_output() is single- |
2685 | * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized |
2686 | * by the socket/transport layer.) |
2687 | */ |
2688 | void |
2689 | inp_route_copyout(struct inpcb *inp, struct route *dst) |
2690 | { |
2691 | struct route *src = &inp->inp_route; |
2692 | |
2693 | socket_lock_assert_owned(inp->inp_socket); |
2694 | |
2695 | /* |
2696 | * If the route in the PCB is stale or not for IPv4, blow it away; |
2697 | * this is possible in the case of IPv4-mapped address case. |
2698 | */ |
2699 | if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) |
2700 | ROUTE_RELEASE(src); |
2701 | |
2702 | route_copyout(dst, src, sizeof (*dst)); |
2703 | } |
2704 | |
2705 | void |
2706 | inp_route_copyin(struct inpcb *inp, struct route *src) |
2707 | { |
2708 | struct route *dst = &inp->inp_route; |
2709 | |
2710 | socket_lock_assert_owned(inp->inp_socket); |
2711 | |
2712 | /* Minor sanity check */ |
2713 | if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) |
2714 | panic("%s: wrong or corrupted route: %p" , __func__, src); |
2715 | |
2716 | route_copyin(src, dst, sizeof (*src)); |
2717 | } |
2718 | |
2719 | /* |
2720 | * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option. |
2721 | */ |
2722 | int |
2723 | inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) |
2724 | { |
2725 | struct ifnet *ifp = NULL; |
2726 | |
2727 | ifnet_head_lock_shared(); |
2728 | if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE && |
2729 | (ifp = ifindex2ifnet[ifscope]) == NULL)) { |
2730 | ifnet_head_done(); |
2731 | return (ENXIO); |
2732 | } |
2733 | ifnet_head_done(); |
2734 | |
2735 | VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE); |
2736 | |
2737 | /* |
2738 | * A zero interface scope value indicates an "unbind". |
2739 | * Otherwise, take in whatever value the app desires; |
2740 | * the app may already know the scope (or force itself |
2741 | * to such a scope) ahead of time before the interface |
2742 | * gets attached. It doesn't matter either way; any |
2743 | * route lookup from this point on will require an |
2744 | * exact match for the embedded interface scope. |
2745 | */ |
2746 | inp->inp_boundifp = ifp; |
2747 | if (inp->inp_boundifp == NULL) |
2748 | inp->inp_flags &= ~INP_BOUND_IF; |
2749 | else |
2750 | inp->inp_flags |= INP_BOUND_IF; |
2751 | |
2752 | /* Blow away any cached route in the PCB */ |
2753 | ROUTE_RELEASE(&inp->inp_route); |
2754 | |
2755 | if (pifp != NULL) |
2756 | *pifp = ifp; |
2757 | |
2758 | return (0); |
2759 | } |
2760 | |
2761 | /* |
2762 | * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, |
2763 | * as well as for setting PROC_UUID_NO_CELLULAR policy. |
2764 | */ |
2765 | void |
2766 | inp_set_nocellular(struct inpcb *inp) |
2767 | { |
2768 | inp->inp_flags |= INP_NO_IFT_CELLULAR; |
2769 | |
2770 | /* Blow away any cached route in the PCB */ |
2771 | ROUTE_RELEASE(&inp->inp_route); |
2772 | } |
2773 | |
2774 | /* |
2775 | * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, |
2776 | * as well as for clearing PROC_UUID_NO_CELLULAR policy. |
2777 | */ |
2778 | void |
2779 | inp_clear_nocellular(struct inpcb *inp) |
2780 | { |
2781 | struct socket *so = inp->inp_socket; |
2782 | |
2783 | /* |
2784 | * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket |
2785 | * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag |
2786 | * if and only if the socket is unrestricted. |
2787 | */ |
2788 | if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { |
2789 | inp->inp_flags &= ~INP_NO_IFT_CELLULAR; |
2790 | |
2791 | /* Blow away any cached route in the PCB */ |
2792 | ROUTE_RELEASE(&inp->inp_route); |
2793 | } |
2794 | } |
2795 | |
2796 | void |
2797 | inp_set_noexpensive(struct inpcb *inp) |
2798 | { |
2799 | inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE; |
2800 | |
2801 | /* Blow away any cached route in the PCB */ |
2802 | ROUTE_RELEASE(&inp->inp_route); |
2803 | } |
2804 | |
2805 | void |
2806 | inp_set_awdl_unrestricted(struct inpcb *inp) |
2807 | { |
2808 | inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED; |
2809 | |
2810 | /* Blow away any cached route in the PCB */ |
2811 | ROUTE_RELEASE(&inp->inp_route); |
2812 | } |
2813 | |
2814 | boolean_t |
2815 | inp_get_awdl_unrestricted(struct inpcb *inp) |
2816 | { |
2817 | return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE; |
2818 | } |
2819 | |
2820 | void |
2821 | inp_clear_awdl_unrestricted(struct inpcb *inp) |
2822 | { |
2823 | inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED; |
2824 | |
2825 | /* Blow away any cached route in the PCB */ |
2826 | ROUTE_RELEASE(&inp->inp_route); |
2827 | } |
2828 | |
2829 | void |
2830 | inp_set_intcoproc_allowed(struct inpcb *inp) |
2831 | { |
2832 | inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED; |
2833 | |
2834 | /* Blow away any cached route in the PCB */ |
2835 | ROUTE_RELEASE(&inp->inp_route); |
2836 | } |
2837 | |
2838 | boolean_t |
2839 | inp_get_intcoproc_allowed(struct inpcb *inp) |
2840 | { |
2841 | return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE; |
2842 | } |
2843 | |
2844 | void |
2845 | inp_clear_intcoproc_allowed(struct inpcb *inp) |
2846 | { |
2847 | inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED; |
2848 | |
2849 | /* Blow away any cached route in the PCB */ |
2850 | ROUTE_RELEASE(&inp->inp_route); |
2851 | } |
2852 | |
2853 | #if NECP |
2854 | /* |
2855 | * Called when PROC_UUID_NECP_APP_POLICY is set. |
2856 | */ |
2857 | void |
2858 | inp_set_want_app_policy(struct inpcb *inp) |
2859 | { |
2860 | inp->inp_flags2 |= INP2_WANT_APP_POLICY; |
2861 | } |
2862 | |
2863 | /* |
2864 | * Called when PROC_UUID_NECP_APP_POLICY is cleared. |
2865 | */ |
2866 | void |
2867 | inp_clear_want_app_policy(struct inpcb *inp) |
2868 | { |
2869 | inp->inp_flags2 &= ~INP2_WANT_APP_POLICY; |
2870 | } |
2871 | #endif /* NECP */ |
2872 | |
2873 | /* |
2874 | * Calculate flow hash for an inp, used by an interface to identify a |
2875 | * flow. When an interface provides flow control advisory, this flow |
2876 | * hash is used as an identifier. |
2877 | */ |
2878 | u_int32_t |
2879 | inp_calc_flowhash(struct inpcb *inp) |
2880 | { |
2881 | struct inp_flowhash_key fh __attribute__((aligned(8))); |
2882 | u_int32_t flowhash = 0; |
2883 | struct inpcb *tmp_inp = NULL; |
2884 | |
2885 | if (inp_hash_seed == 0) |
2886 | inp_hash_seed = RandomULong(); |
2887 | |
2888 | bzero(&fh, sizeof (fh)); |
2889 | |
2890 | bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof (fh.infh_laddr)); |
2891 | bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof (fh.infh_faddr)); |
2892 | |
2893 | fh.infh_lport = inp->inp_lport; |
2894 | fh.infh_fport = inp->inp_fport; |
2895 | fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET; |
2896 | fh.infh_proto = inp->inp_ip_p; |
2897 | fh.infh_rand1 = RandomULong(); |
2898 | fh.infh_rand2 = RandomULong(); |
2899 | |
2900 | try_again: |
2901 | flowhash = net_flowhash(&fh, sizeof (fh), inp_hash_seed); |
2902 | if (flowhash == 0) { |
2903 | /* try to get a non-zero flowhash */ |
2904 | inp_hash_seed = RandomULong(); |
2905 | goto try_again; |
2906 | } |
2907 | |
2908 | inp->inp_flowhash = flowhash; |
2909 | |
2910 | /* Insert the inp into inp_fc_tree */ |
2911 | lck_mtx_lock_spin(&inp_fc_lck); |
2912 | tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); |
2913 | if (tmp_inp != NULL) { |
2914 | /* |
2915 | * There is a different inp with the same flowhash. |
2916 | * There can be a collision on flow hash but the |
2917 | * probability is low. Let's recompute the |
2918 | * flowhash. |
2919 | */ |
2920 | lck_mtx_unlock(&inp_fc_lck); |
2921 | /* recompute hash seed */ |
2922 | inp_hash_seed = RandomULong(); |
2923 | goto try_again; |
2924 | } |
2925 | |
2926 | RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); |
2927 | inp->inp_flags2 |= INP2_IN_FCTREE; |
2928 | lck_mtx_unlock(&inp_fc_lck); |
2929 | |
2930 | return (flowhash); |
2931 | } |
2932 | |
2933 | void |
2934 | inp_flowadv(uint32_t flowhash) |
2935 | { |
2936 | struct inpcb *inp; |
2937 | |
2938 | inp = inp_fc_getinp(flowhash, 0); |
2939 | |
2940 | if (inp == NULL) |
2941 | return; |
2942 | inp_fc_feedback(inp); |
2943 | } |
2944 | |
2945 | /* |
2946 | * Function to compare inp_fc_entries in inp flow control tree |
2947 | */ |
2948 | static inline int |
2949 | infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) |
2950 | { |
2951 | return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), |
2952 | sizeof(inp1->inp_flowhash))); |
2953 | } |
2954 | |
2955 | static struct inpcb * |
2956 | inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) |
2957 | { |
2958 | struct inpcb *inp = NULL; |
2959 | int locked = (flags & INPFC_SOLOCKED) ? 1 : 0; |
2960 | |
2961 | lck_mtx_lock_spin(&inp_fc_lck); |
2962 | key_inp.inp_flowhash = flowhash; |
2963 | inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp); |
2964 | if (inp == NULL) { |
2965 | /* inp is not present, return */ |
2966 | lck_mtx_unlock(&inp_fc_lck); |
2967 | return (NULL); |
2968 | } |
2969 | |
2970 | if (flags & INPFC_REMOVE) { |
2971 | RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp); |
2972 | lck_mtx_unlock(&inp_fc_lck); |
2973 | |
2974 | bzero(&(inp->infc_link), sizeof (inp->infc_link)); |
2975 | inp->inp_flags2 &= ~INP2_IN_FCTREE; |
2976 | return (NULL); |
2977 | } |
2978 | |
2979 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) |
2980 | inp = NULL; |
2981 | lck_mtx_unlock(&inp_fc_lck); |
2982 | |
2983 | return (inp); |
2984 | } |
2985 | |
2986 | static void |
2987 | inp_fc_feedback(struct inpcb *inp) |
2988 | { |
2989 | struct socket *so = inp->inp_socket; |
2990 | |
2991 | /* we already hold a want_cnt on this inp, socket can't be null */ |
2992 | VERIFY(so != NULL); |
2993 | socket_lock(so, 1); |
2994 | |
2995 | if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { |
2996 | socket_unlock(so, 1); |
2997 | return; |
2998 | } |
2999 | |
3000 | if (inp->inp_sndinprog_cnt > 0) |
3001 | inp->inp_flags |= INP_FC_FEEDBACK; |
3002 | |
3003 | /* |
3004 | * Return if the connection is not in flow-controlled state. |
3005 | * This can happen if the connection experienced |
3006 | * loss while it was in flow controlled state |
3007 | */ |
3008 | if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) { |
3009 | socket_unlock(so, 1); |
3010 | return; |
3011 | } |
3012 | inp_reset_fc_state(inp); |
3013 | |
3014 | if (SOCK_TYPE(so) == SOCK_STREAM) |
3015 | inp_fc_unthrottle_tcp(inp); |
3016 | |
3017 | socket_unlock(so, 1); |
3018 | } |
3019 | |
3020 | void |
3021 | inp_reset_fc_state(struct inpcb *inp) |
3022 | { |
3023 | struct socket *so = inp->inp_socket; |
3024 | int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0; |
3025 | int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0; |
3026 | |
3027 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); |
3028 | |
3029 | if (suspended) { |
3030 | so->so_flags &= ~(SOF_SUSPENDED); |
3031 | soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); |
3032 | } |
3033 | |
3034 | /* Give a write wakeup to unblock the socket */ |
3035 | if (needwakeup) |
3036 | sowwakeup(so); |
3037 | } |
3038 | |
3039 | int |
3040 | inp_set_fc_state(struct inpcb *inp, int advcode) |
3041 | { |
3042 | struct inpcb *tmp_inp = NULL; |
3043 | /* |
3044 | * If there was a feedback from the interface when |
3045 | * send operation was in progress, we should ignore |
3046 | * this flow advisory to avoid a race between setting |
3047 | * flow controlled state and receiving feedback from |
3048 | * the interface |
3049 | */ |
3050 | if (inp->inp_flags & INP_FC_FEEDBACK) |
3051 | return (0); |
3052 | |
3053 | inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); |
3054 | if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, |
3055 | INPFC_SOLOCKED)) != NULL) { |
3056 | if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) |
3057 | return (0); |
3058 | VERIFY(tmp_inp == inp); |
3059 | switch (advcode) { |
3060 | case FADV_FLOW_CONTROLLED: |
3061 | inp->inp_flags |= INP_FLOW_CONTROLLED; |
3062 | break; |
3063 | case FADV_SUSPENDED: |
3064 | inp->inp_flags |= INP_FLOW_SUSPENDED; |
3065 | soevent(inp->inp_socket, |
3066 | (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND)); |
3067 | |
3068 | /* Record the fact that suspend event was sent */ |
3069 | inp->inp_socket->so_flags |= SOF_SUSPENDED; |
3070 | break; |
3071 | } |
3072 | return (1); |
3073 | } |
3074 | return (0); |
3075 | } |
3076 | |
3077 | /* |
3078 | * Handler for SO_FLUSH socket option. |
3079 | */ |
3080 | int |
3081 | inp_flush(struct inpcb *inp, int optval) |
3082 | { |
3083 | u_int32_t flowhash = inp->inp_flowhash; |
3084 | struct ifnet *rtifp, *oifp; |
3085 | |
3086 | /* Either all classes or one of the valid ones */ |
3087 | if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) |
3088 | return (EINVAL); |
3089 | |
3090 | /* We need a flow hash for identification */ |
3091 | if (flowhash == 0) |
3092 | return (0); |
3093 | |
3094 | /* Grab the interfaces from the route and pcb */ |
3095 | rtifp = ((inp->inp_route.ro_rt != NULL) ? |
3096 | inp->inp_route.ro_rt->rt_ifp : NULL); |
3097 | oifp = inp->inp_last_outifp; |
3098 | |
3099 | if (rtifp != NULL) |
3100 | if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); |
3101 | if (oifp != NULL && oifp != rtifp) |
3102 | if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); |
3103 | |
3104 | return (0); |
3105 | } |
3106 | |
3107 | /* |
3108 | * Clear the INP_INADDR_ANY flag (special case for PPP only) |
3109 | */ |
3110 | void |
3111 | inp_clear_INP_INADDR_ANY(struct socket *so) |
3112 | { |
3113 | struct inpcb *inp = NULL; |
3114 | |
3115 | socket_lock(so, 1); |
3116 | inp = sotoinpcb(so); |
3117 | if (inp) { |
3118 | inp->inp_flags &= ~INP_INADDR_ANY; |
3119 | } |
3120 | socket_unlock(so, 1); |
3121 | } |
3122 | |
3123 | void |
3124 | inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) |
3125 | { |
3126 | struct socket *so = inp->inp_socket; |
3127 | |
3128 | soprocinfo->spi_pid = so->last_pid; |
3129 | if (so->last_pid != 0) |
3130 | uuid_copy(soprocinfo->spi_uuid, so->last_uuid); |
3131 | /* |
3132 | * When not delegated, the effective pid is the same as the real pid |
3133 | */ |
3134 | if (so->so_flags & SOF_DELEGATED) { |
3135 | soprocinfo->spi_delegated = 1; |
3136 | soprocinfo->spi_epid = so->e_pid; |
3137 | uuid_copy(soprocinfo->spi_euuid, so->e_uuid); |
3138 | } else { |
3139 | soprocinfo->spi_delegated = 0; |
3140 | soprocinfo->spi_epid = so->last_pid; |
3141 | } |
3142 | } |
3143 | |
3144 | int |
3145 | inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, |
3146 | struct so_procinfo *soprocinfo) |
3147 | { |
3148 | struct inpcb *inp = NULL; |
3149 | int found = 0; |
3150 | |
3151 | bzero(soprocinfo, sizeof (struct so_procinfo)); |
3152 | |
3153 | if (!flowhash) |
3154 | return (-1); |
3155 | |
3156 | lck_rw_lock_shared(pcbinfo->ipi_lock); |
3157 | LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { |
3158 | if (inp->inp_state != INPCB_STATE_DEAD && |
3159 | inp->inp_socket != NULL && |
3160 | inp->inp_flowhash == flowhash) { |
3161 | found = 1; |
3162 | inp_get_soprocinfo(inp, soprocinfo); |
3163 | break; |
3164 | } |
3165 | } |
3166 | lck_rw_done(pcbinfo->ipi_lock); |
3167 | |
3168 | return (found); |
3169 | } |
3170 | |
3171 | #if CONFIG_PROC_UUID_POLICY |
3172 | static void |
3173 | inp_update_cellular_policy(struct inpcb *inp, boolean_t set) |
3174 | { |
3175 | struct socket *so = inp->inp_socket; |
3176 | int before, after; |
3177 | |
3178 | VERIFY(so != NULL); |
3179 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); |
3180 | |
3181 | before = INP_NO_CELLULAR(inp); |
3182 | if (set) { |
3183 | inp_set_nocellular(inp); |
3184 | } else { |
3185 | inp_clear_nocellular(inp); |
3186 | } |
3187 | after = INP_NO_CELLULAR(inp); |
3188 | if (net_io_policy_log && (before != after)) { |
3189 | static const char *ok = "OK" ; |
3190 | static const char *nok = "NOACCESS" ; |
3191 | uuid_string_t euuid_buf; |
3192 | pid_t epid; |
3193 | |
3194 | if (so->so_flags & SOF_DELEGATED) { |
3195 | uuid_unparse(so->e_uuid, euuid_buf); |
3196 | epid = so->e_pid; |
3197 | } else { |
3198 | uuid_unparse(so->last_uuid, euuid_buf); |
3199 | epid = so->last_pid; |
3200 | } |
3201 | |
3202 | /* allow this socket to generate another notification event */ |
3203 | so->so_ifdenied_notifies = 0; |
3204 | |
3205 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " |
3206 | "euuid %s%s %s->%s\n" , __func__, |
3207 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), |
3208 | SOCK_TYPE(so), epid, euuid_buf, |
3209 | (so->so_flags & SOF_DELEGATED) ? |
3210 | " [delegated]" : "" , |
3211 | ((before < after) ? ok : nok), |
3212 | ((before < after) ? nok : ok)); |
3213 | } |
3214 | } |
3215 | |
3216 | #if NECP |
3217 | static void |
3218 | inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set) |
3219 | { |
3220 | struct socket *so = inp->inp_socket; |
3221 | int before, after; |
3222 | |
3223 | VERIFY(so != NULL); |
3224 | VERIFY(inp->inp_state != INPCB_STATE_DEAD); |
3225 | |
3226 | before = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
3227 | if (set) { |
3228 | inp_set_want_app_policy(inp); |
3229 | } else { |
3230 | inp_clear_want_app_policy(inp); |
3231 | } |
3232 | after = (inp->inp_flags2 & INP2_WANT_APP_POLICY); |
3233 | if (net_io_policy_log && (before != after)) { |
3234 | static const char *wanted = "WANTED" ; |
3235 | static const char *unwanted = "UNWANTED" ; |
3236 | uuid_string_t euuid_buf; |
3237 | pid_t epid; |
3238 | |
3239 | if (so->so_flags & SOF_DELEGATED) { |
3240 | uuid_unparse(so->e_uuid, euuid_buf); |
3241 | epid = so->e_pid; |
3242 | } else { |
3243 | uuid_unparse(so->last_uuid, euuid_buf); |
3244 | epid = so->last_pid; |
3245 | } |
3246 | |
3247 | log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " |
3248 | "euuid %s%s %s->%s\n" , __func__, |
3249 | (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), |
3250 | SOCK_TYPE(so), epid, euuid_buf, |
3251 | (so->so_flags & SOF_DELEGATED) ? |
3252 | " [delegated]" : "" , |
3253 | ((before < after) ? unwanted : wanted), |
3254 | ((before < after) ? wanted : unwanted)); |
3255 | } |
3256 | } |
3257 | #endif /* NECP */ |
3258 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
3259 | |
3260 | #if NECP |
3261 | void |
3262 | inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface) |
3263 | { |
3264 | necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface); |
3265 | if (necp_socket_should_rescope(inp) && |
3266 | inp->inp_lport == 0 && |
3267 | inp->inp_laddr.s_addr == INADDR_ANY && |
3268 | IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { |
3269 | // If we should rescope, and the socket is not yet bound |
3270 | inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL); |
3271 | } |
3272 | } |
3273 | #endif /* NECP */ |
3274 | |
3275 | int |
3276 | inp_update_policy(struct inpcb *inp) |
3277 | { |
3278 | #if CONFIG_PROC_UUID_POLICY |
3279 | struct socket *so = inp->inp_socket; |
3280 | uint32_t pflags = 0; |
3281 | int32_t ogencnt; |
3282 | int err = 0; |
3283 | |
3284 | if (!net_io_policy_uuid || |
3285 | so == NULL || inp->inp_state == INPCB_STATE_DEAD) |
3286 | return (0); |
3287 | |
3288 | /* |
3289 | * Kernel-created sockets that aren't delegating other sockets |
3290 | * are currently exempted from UUID policy checks. |
3291 | */ |
3292 | if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) |
3293 | return (0); |
3294 | |
3295 | ogencnt = so->so_policy_gencnt; |
3296 | err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? |
3297 | so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); |
3298 | |
3299 | /* |
3300 | * Discard cached generation count if the entry is gone (ENOENT), |
3301 | * so that we go thru the checks below. |
3302 | */ |
3303 | if (err == ENOENT && ogencnt != 0) |
3304 | so->so_policy_gencnt = 0; |
3305 | |
3306 | /* |
3307 | * If the generation count has changed, inspect the policy flags |
3308 | * and act accordingly. If a policy flag was previously set and |
3309 | * the UUID is no longer present in the table (ENOENT), treat it |
3310 | * as if the flag has been cleared. |
3311 | */ |
3312 | if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { |
3313 | /* update cellular policy for this socket */ |
3314 | if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { |
3315 | inp_update_cellular_policy(inp, TRUE); |
3316 | } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { |
3317 | inp_update_cellular_policy(inp, FALSE); |
3318 | } |
3319 | #if NECP |
3320 | /* update necp want app policy for this socket */ |
3321 | if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) { |
3322 | inp_update_necp_want_app_policy(inp, TRUE); |
3323 | } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) { |
3324 | inp_update_necp_want_app_policy(inp, FALSE); |
3325 | } |
3326 | #endif /* NECP */ |
3327 | } |
3328 | |
3329 | return ((err == ENOENT) ? 0 : err); |
3330 | #else /* !CONFIG_PROC_UUID_POLICY */ |
3331 | #pragma unused(inp) |
3332 | return (0); |
3333 | #endif /* !CONFIG_PROC_UUID_POLICY */ |
3334 | } |
3335 | |
3336 | static unsigned int log_restricted; |
3337 | SYSCTL_DECL(_net_inet); |
3338 | SYSCTL_INT(_net_inet, OID_AUTO, log_restricted, |
3339 | CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0, |
3340 | "Log network restrictions" ); |
3341 | /* |
3342 | * Called when we need to enforce policy restrictions in the input path. |
3343 | * |
3344 | * Returns TRUE if we're not allowed to receive data, otherwise FALSE. |
3345 | */ |
3346 | static boolean_t |
3347 | _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) |
3348 | { |
3349 | VERIFY(inp != NULL); |
3350 | |
3351 | /* |
3352 | * Inbound restrictions. |
3353 | */ |
3354 | if (!sorestrictrecv) |
3355 | return (FALSE); |
3356 | |
3357 | if (ifp == NULL) |
3358 | return (FALSE); |
3359 | |
3360 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) |
3361 | return (TRUE); |
3362 | |
3363 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) |
3364 | return (TRUE); |
3365 | |
3366 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) |
3367 | return (TRUE); |
3368 | |
3369 | if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) |
3370 | return (FALSE); |
3371 | |
3372 | if (inp->inp_flags & INP_RECV_ANYIF) |
3373 | return (FALSE); |
3374 | |
3375 | if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) |
3376 | return (FALSE); |
3377 | |
3378 | if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) |
3379 | return (TRUE); |
3380 | |
3381 | return (TRUE); |
3382 | } |
3383 | |
3384 | boolean_t |
3385 | inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) |
3386 | { |
3387 | boolean_t ret; |
3388 | |
3389 | ret = _inp_restricted_recv(inp, ifp); |
3390 | if (ret == TRUE && log_restricted) { |
3391 | printf("pid %d (%s) is unable to receive packets on %s\n" , |
3392 | current_proc()->p_pid, proc_best_name(current_proc()), |
3393 | ifp->if_xname); |
3394 | } |
3395 | return (ret); |
3396 | } |
3397 | |
3398 | /* |
3399 | * Called when we need to enforce policy restrictions in the output path. |
3400 | * |
3401 | * Returns TRUE if we're not allowed to send data out, otherwise FALSE. |
3402 | */ |
3403 | static boolean_t |
3404 | _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) |
3405 | { |
3406 | VERIFY(inp != NULL); |
3407 | |
3408 | /* |
3409 | * Outbound restrictions. |
3410 | */ |
3411 | if (!sorestrictsend) |
3412 | return (FALSE); |
3413 | |
3414 | if (ifp == NULL) |
3415 | return (FALSE); |
3416 | |
3417 | if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) |
3418 | return (TRUE); |
3419 | |
3420 | if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) |
3421 | return (TRUE); |
3422 | |
3423 | if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) |
3424 | return (TRUE); |
3425 | |
3426 | if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) |
3427 | return (TRUE); |
3428 | |
3429 | return (FALSE); |
3430 | } |
3431 | |
3432 | boolean_t |
3433 | inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) |
3434 | { |
3435 | boolean_t ret; |
3436 | |
3437 | ret = _inp_restricted_send(inp, ifp); |
3438 | if (ret == TRUE && log_restricted) { |
3439 | printf("pid %d (%s) is unable to transmit packets on %s\n" , |
3440 | current_proc()->p_pid, proc_best_name(current_proc()), |
3441 | ifp->if_xname); |
3442 | } |
3443 | return (ret); |
3444 | } |
3445 | |
3446 | inline void |
3447 | inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack) |
3448 | { |
3449 | struct ifnet *ifp = inp->inp_last_outifp; |
3450 | struct socket *so = inp->inp_socket; |
3451 | if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) && |
3452 | (ifp->if_type == IFT_CELLULAR || |
3453 | ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) { |
3454 | int32_t unsent; |
3455 | |
3456 | so->so_snd.sb_flags |= SB_SNDBYTE_CNT; |
3457 | |
3458 | /* |
3459 | * There can be data outstanding before the connection |
3460 | * becomes established -- TFO case |
3461 | */ |
3462 | if (so->so_snd.sb_cc > 0) |
3463 | inp_incr_sndbytes_total(so, so->so_snd.sb_cc); |
3464 | |
3465 | unsent = inp_get_sndbytes_allunsent(so, th_ack); |
3466 | if (unsent > 0) |
3467 | inp_incr_sndbytes_unsent(so, unsent); |
3468 | } |
3469 | } |
3470 | |
3471 | inline void |
3472 | inp_incr_sndbytes_total(struct socket *so, int32_t len) |
3473 | { |
3474 | struct inpcb *inp = (struct inpcb *)so->so_pcb; |
3475 | struct ifnet *ifp = inp->inp_last_outifp; |
3476 | |
3477 | if (ifp != NULL) { |
3478 | VERIFY(ifp->if_sndbyte_total >= 0); |
3479 | OSAddAtomic64(len, &ifp->if_sndbyte_total); |
3480 | } |
3481 | } |
3482 | |
3483 | inline void |
3484 | inp_decr_sndbytes_total(struct socket *so, int32_t len) |
3485 | { |
3486 | struct inpcb *inp = (struct inpcb *)so->so_pcb; |
3487 | struct ifnet *ifp = inp->inp_last_outifp; |
3488 | |
3489 | if (ifp != NULL) { |
3490 | VERIFY(ifp->if_sndbyte_total >= len); |
3491 | OSAddAtomic64(-len, &ifp->if_sndbyte_total); |
3492 | } |
3493 | } |
3494 | |
3495 | inline void |
3496 | inp_incr_sndbytes_unsent(struct socket *so, int32_t len) |
3497 | { |
3498 | struct inpcb *inp = (struct inpcb *)so->so_pcb; |
3499 | struct ifnet *ifp = inp->inp_last_outifp; |
3500 | |
3501 | if (ifp != NULL) { |
3502 | VERIFY(ifp->if_sndbyte_unsent >= 0); |
3503 | OSAddAtomic64(len, &ifp->if_sndbyte_unsent); |
3504 | } |
3505 | } |
3506 | |
3507 | inline void |
3508 | inp_decr_sndbytes_unsent(struct socket *so, int32_t len) |
3509 | { |
3510 | struct inpcb *inp = (struct inpcb *)so->so_pcb; |
3511 | struct ifnet *ifp = inp->inp_last_outifp; |
3512 | |
3513 | if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) |
3514 | return; |
3515 | |
3516 | if (ifp != NULL) { |
3517 | if (ifp->if_sndbyte_unsent >= len) |
3518 | OSAddAtomic64(-len, &ifp->if_sndbyte_unsent); |
3519 | else |
3520 | ifp->if_sndbyte_unsent = 0; |
3521 | } |
3522 | } |
3523 | |
3524 | inline void |
3525 | inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack) |
3526 | { |
3527 | int32_t len; |
3528 | |
3529 | if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) |
3530 | return; |
3531 | |
3532 | len = inp_get_sndbytes_allunsent(so, th_ack); |
3533 | inp_decr_sndbytes_unsent(so, len); |
3534 | } |
3535 | |
3536 | |
3537 | inline void |
3538 | inp_set_activity_bitmap(struct inpcb *inp) |
3539 | { |
3540 | in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime()); |
3541 | } |
3542 | |
3543 | inline void |
3544 | inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab) |
3545 | { |
3546 | bcopy(&inp->inp_nw_activity, ab, sizeof (*ab)); |
3547 | } |
3548 | |