1/*
2 * Copyright (c) 1998-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_domain.c 8.3 (Berkeley) 2/14/95
62 */
63
64#include <sys/param.h>
65#include <sys/socket.h>
66#include <sys/protosw.h>
67#include <sys/domain.h>
68#include <sys/mcache.h>
69#include <sys/mbuf.h>
70#include <sys/time.h>
71#include <sys/kernel.h>
72#include <sys/systm.h>
73#include <sys/proc_internal.h>
74#include <sys/sysctl.h>
75#include <sys/syslog.h>
76#include <sys/queue.h>
77
78#include <net/dlil.h>
79#include <net/nwk_wq.h>
80#include <net/sockaddr_utils.h>
81
82#include <mach/boolean.h>
83#include <pexpert/pexpert.h>
84
85#include <net/sockaddr_utils.h>
86
87#if __has_ptrcheck
88#include <machine/trap.h> /* Needed by bound-checks-soft when enabled. */
89#endif /* __has_ptrcheck */
90
91/* Eventhandler context for protocol events */
92struct eventhandler_lists_ctxt protoctl_evhdlr_ctxt;
93
94static void pr_init_old(struct protosw *, struct domain *);
95static void init_proto(struct protosw *, struct domain *);
96static void attach_proto(struct protosw *, struct domain *);
97static void detach_proto(struct protosw *, struct domain *);
98static void dom_init_old(struct domain *);
99static void init_domain(struct domain *);
100static void attach_domain(struct domain *);
101static void detach_domain(struct domain *);
102static struct protosw *pffindprotonotype_locked(int, int, int);
103static struct domain *pffinddomain_locked(int);
104
105static boolean_t domain_timeout_run; /* domain timer is scheduled to run */
106static boolean_t domain_draining;
107static void domain_sched_timeout(void);
108static void domain_timeout(void *);
109
110static LCK_GRP_DECLARE(domain_proto_mtx_grp, "domain");
111static LCK_ATTR_DECLARE(domain_proto_mtx_attr, 0, 0);
112static LCK_MTX_DECLARE_ATTR(domain_proto_mtx,
113 &domain_proto_mtx_grp, &domain_proto_mtx_attr);
114static LCK_MTX_DECLARE_ATTR(domain_timeout_mtx,
115 &domain_proto_mtx_grp, &domain_proto_mtx_attr);
116
117uint64_t _net_uptime;
118uint64_t _net_uptime_ms;
119uint64_t _net_uptime_us;
120
121#if (DEVELOPMENT || DEBUG)
122
123SYSCTL_DECL(_kern_ipc);
124
125static int sysctl_do_drain_domains SYSCTL_HANDLER_ARGS;
126
127SYSCTL_PROC(_kern_ipc, OID_AUTO, do_drain_domains,
128 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
129 0, 0,
130 sysctl_do_drain_domains, "I", "force manual drain domains");
131
132#endif /* DEVELOPMENT || DEBUG */
133
134static void
135pr_init_old(struct protosw *pp, struct domain *dp)
136{
137#pragma unused(dp)
138 VERIFY(pp->pr_flags & PR_OLD);
139 VERIFY(pp->pr_old != NULL);
140
141 if (pp->pr_old->pr_init != NULL) {
142 pp->pr_old->pr_init();
143 }
144}
145
146static void
147init_proto(struct protosw *pp, struct domain *dp)
148{
149 VERIFY(pp->pr_flags & PR_ATTACHED);
150
151 if (!(pp->pr_flags & PR_INITIALIZED)) {
152 TAILQ_INIT(&pp->pr_filter_head);
153 if (pp->pr_init != NULL) {
154 pp->pr_init(pp, dp);
155 }
156 pp->pr_flags |= PR_INITIALIZED;
157 }
158}
159
160static void
161attach_proto(struct protosw *pp, struct domain *dp)
162{
163 domain_proto_mtx_lock_assert_held();
164 VERIFY(!(pp->pr_flags & PR_ATTACHED));
165 VERIFY(pp->pr_domain == NULL);
166 VERIFY(pp->pr_protosw == NULL);
167
168 TAILQ_INSERT_TAIL(&dp->dom_protosw, pp, pr_entry);
169 pp->pr_flags |= PR_ATTACHED;
170 pp->pr_domain = dp;
171 pp->pr_protosw = pp;
172
173 /* do some cleaning up on user request callbacks */
174 pru_sanitize(pp->pr_usrreqs);
175}
176
177static void
178detach_proto(struct protosw *pp, struct domain *dp)
179{
180 domain_proto_mtx_lock_assert_held();
181 VERIFY(pp->pr_flags & PR_ATTACHED);
182 VERIFY(pp->pr_domain == dp);
183 VERIFY(pp->pr_protosw == pp);
184
185 TAILQ_REMOVE(&dp->dom_protosw, pp, pr_entry);
186 pp->pr_flags &= ~PR_ATTACHED;
187 pp->pr_domain = NULL;
188 pp->pr_protosw = NULL;
189}
190
191static void
192dom_init_old(struct domain *dp)
193{
194 VERIFY(dp->dom_flags & DOM_OLD);
195 VERIFY(dp->dom_old != NULL);
196
197 if (dp->dom_old->dom_init != NULL) {
198 dp->dom_old->dom_init();
199 }
200}
201
202static void
203init_domain(struct domain *dp)
204{
205 VERIFY(dp->dom_flags & DOM_ATTACHED);
206
207 if (!(dp->dom_flags & DOM_INITIALIZED)) {
208 lck_mtx_init(lck: &dp->dom_mtx_s, grp: &domain_proto_mtx_grp,
209 attr: &domain_proto_mtx_attr);
210 dp->dom_mtx = &dp->dom_mtx_s;
211 TAILQ_INIT(&dp->dom_protosw);
212 if (dp->dom_init != NULL) {
213 dp->dom_init(dp);
214 }
215 dp->dom_flags |= DOM_INITIALIZED;
216 }
217
218 /* Recompute for new protocol */
219 if (max_linkhdr < 16) { /* XXX - Sheesh; everything's ether? */
220 max_linkhdr = 16;
221 }
222 max_linkhdr = (int)P2ROUNDUP(max_linkhdr, sizeof(uint32_t));
223
224 if (dp->dom_protohdrlen > max_protohdr) {
225 max_protohdr = dp->dom_protohdrlen;
226 }
227 max_protohdr = (int)P2ROUNDUP(max_protohdr, sizeof(uint32_t));
228
229 max_hdr = max_linkhdr + max_protohdr;
230 max_datalen = MHLEN - max_hdr;
231}
232
233static void
234attach_domain(struct domain *dp)
235{
236 domain_proto_mtx_lock_assert_held();
237 VERIFY(!(dp->dom_flags & DOM_ATTACHED));
238
239 TAILQ_INSERT_TAIL(&domains, dp, dom_entry);
240 dp->dom_flags |= DOM_ATTACHED;
241}
242
243static void
244detach_domain(struct domain *dp)
245{
246 domain_proto_mtx_lock_assert_held();
247 VERIFY(dp->dom_flags & DOM_ATTACHED);
248
249 TAILQ_REMOVE(&domains, dp, dom_entry);
250 dp->dom_flags &= ~DOM_ATTACHED;
251
252 if (dp->dom_flags & DOM_OLD) {
253 struct domain_old *odp = dp->dom_old;
254
255 VERIFY(odp != NULL);
256 odp->dom_next = NULL;
257 odp->dom_mtx = NULL;
258 }
259}
260
261/*
262 * Exported (private) routine, indirection of net_add_domain.
263 */
264void
265net_add_domain_old(struct domain_old *odp)
266{
267 struct domain *dp;
268 domain_guard_t guard __single;
269
270 VERIFY(odp != NULL);
271
272 guard = domain_guard_deploy();
273 if ((dp = pffinddomain_locked(odp->dom_family)) != NULL) {
274 /*
275 * There is really nothing better than to panic here,
276 * as the caller would not have been able to handle
277 * any failures otherwise.
278 */
279 panic("%s: domain (%d,%s) already exists for %s", __func__,
280 dp->dom_family, dp->dom_name, odp->dom_name);
281 /* NOTREACHED */
282 }
283
284 /* Make sure nothing is currently pointing to the odp. */
285 TAILQ_FOREACH(dp, &domains, dom_entry) {
286 if (dp->dom_old == odp) {
287 panic("%s: domain %p (%d,%s) is already "
288 "associated with %p (%d,%s)\n", __func__,
289 odp, odp->dom_family, odp->dom_name, dp,
290 dp->dom_family, dp->dom_name);
291 /* NOTREACHED */
292 }
293 }
294
295 if (odp->dom_protosw != NULL) {
296 panic("%s: domain (%d,%s) protocols need to added "
297 "via net_add_proto\n", __func__, odp->dom_family,
298 odp->dom_name);
299 /* NOTREACHED */
300 }
301
302 dp = kalloc_type(struct domain, Z_WAITOK | Z_ZERO | Z_NOFAIL);
303
304 /* Copy everything but dom_init, dom_mtx, dom_next and dom_refs */
305 dp->dom_family = odp->dom_family;
306 dp->dom_flags = (odp->dom_flags & DOMF_USERFLAGS) | DOM_OLD;
307 dp->dom_name = odp->dom_name;
308 dp->dom_init = dom_init_old;
309 dp->dom_externalize = odp->dom_externalize;
310 dp->dom_dispose = odp->dom_dispose;
311 dp->dom_rtattach = odp->dom_rtattach;
312 dp->dom_rtoffset = odp->dom_rtoffset;
313 dp->dom_maxrtkey = odp->dom_maxrtkey;
314 dp->dom_protohdrlen = odp->dom_protohdrlen;
315 dp->dom_old = odp;
316
317 attach_domain(dp);
318 init_domain(dp);
319
320 /* Point the mutex back to the internal structure's */
321 odp->dom_mtx = dp->dom_mtx;
322 domain_guard_release(guard);
323}
324
325/*
326 * Exported (private) routine, indirection of net_del_domain.
327 */
328int
329net_del_domain_old(struct domain_old *odp)
330{
331 struct domain *dp1 __single, *dp2 __single;
332 int error = 0;
333 domain_guard_t guard __single;
334
335 VERIFY(odp != NULL);
336
337 guard = domain_guard_deploy();
338 if (odp->dom_refs != 0) {
339 error = EBUSY;
340 goto done;
341 }
342
343 TAILQ_FOREACH_SAFE(dp1, &domains, dom_entry, dp2) {
344 if (!(dp1->dom_flags & DOM_OLD)) {
345 continue;
346 }
347 VERIFY(dp1->dom_old != NULL);
348 if (odp == dp1->dom_old) {
349 break;
350 }
351 }
352 if (dp1 != NULL) {
353 struct protosw *pp1 __single, *pp2 __single;
354
355 VERIFY(dp1->dom_flags & DOM_OLD);
356 VERIFY(dp1->dom_old == odp);
357
358 /* Remove all protocols attached to this domain */
359 TAILQ_FOREACH_SAFE(pp1, &dp1->dom_protosw, pr_entry, pp2) {
360 detach_proto(pp: pp1, dp: dp1);
361 if (pp1->pr_usrreqs->pru_flags & PRUF_OLD) {
362 kfree_type(struct pr_usrreqs, pp1->pr_usrreqs);
363 }
364 if (pp1->pr_flags & PR_OLD) {
365 kfree_type(struct protosw, pp1);
366 }
367 }
368
369 detach_domain(dp: dp1);
370 kfree_type(struct domain, dp1);
371 } else {
372 error = EPFNOSUPPORT;
373 }
374done:
375 domain_guard_release(guard);
376 return error;
377}
378
379/*
380 * Internal routine, not exported.
381 *
382 * net_add_proto - link a protosw into a domain's protosw chain
383 *
384 * NOTE: Caller must have acquired domain_proto_mtx
385 */
386int
387net_add_proto(struct protosw *pp, struct domain *dp, int doinit)
388{
389 struct protosw *pp1;
390
391 /*
392 * This could be called as part of initializing the domain,
393 * and thus DOM_INITIALIZED may not be set (yet).
394 */
395 domain_proto_mtx_lock_assert_held();
396 VERIFY(!(pp->pr_flags & PR_ATTACHED));
397
398 /* pr_domain is set only after the protocol is attached */
399 if (pp->pr_domain != NULL) {
400 panic("%s: domain (%d,%s), proto %d has non-NULL pr_domain!",
401 __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
402 /* NOTREACHED */
403 }
404
405 if (pp->pr_usrreqs == NULL) {
406 panic("%s: domain (%d,%s), proto %d has no usrreqs!",
407 __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
408 /* NOTREACHED */
409 }
410
411 TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
412 if (pp1->pr_type == pp->pr_type &&
413 pp1->pr_protocol == pp->pr_protocol) {
414 return EEXIST;
415 }
416 }
417
418 attach_proto(pp, dp);
419 if (doinit) {
420 net_init_proto(pp, dp);
421 }
422
423 return 0;
424}
425
426void
427net_init_proto(struct protosw *pp, struct domain *dp)
428{
429 /*
430 * This could be called as part of initializing the domain,
431 * and thus DOM_INITIALIZED may not be set (yet). The protocol
432 * must have been attached via net_addr_protosw() by now.
433 */
434 domain_proto_mtx_lock_assert_held();
435 VERIFY(pp->pr_flags & PR_ATTACHED);
436
437 init_proto(pp, dp);
438}
439
440/*
441 * Exported (private) routine, indirection of net_add_proto.
442 */
443int
444net_add_proto_old(struct protosw_old *opp, struct domain_old *odp)
445{
446 struct pr_usrreqs_old *opru;
447 struct pr_usrreqs *pru __single = NULL;
448 struct protosw *pp __single = NULL, *pp1;
449 int error = 0;
450 struct domain *dp;
451 domain_guard_t guard __single;
452
453 /*
454 * This could be called as part of initializing the domain,
455 * and thus DOM_INITIALIZED may not be set (yet).
456 */
457 guard = domain_guard_deploy();
458
459 /* Make sure the domain has been added via net_add_domain */
460 TAILQ_FOREACH(dp, &domains, dom_entry) {
461 if (!(dp->dom_flags & DOM_OLD)) {
462 continue;
463 }
464 if (dp->dom_old == odp) {
465 break;
466 }
467 }
468 if (dp == NULL) {
469 error = EINVAL;
470 goto done;
471 }
472
473 TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
474 if (pp1->pr_type == opp->pr_type &&
475 pp1->pr_protocol == opp->pr_protocol) {
476 error = EEXIST;
477 goto done;
478 }
479 }
480
481 if ((opru = opp->pr_usrreqs) == NULL) {
482 panic("%s: domain (%d,%s), proto %d has no usrreqs!",
483 __func__, odp->dom_family, odp->dom_name, opp->pr_protocol);
484 /* NOTREACHED */
485 }
486
487 pru = kalloc_type(struct pr_usrreqs, Z_WAITOK | Z_ZERO | Z_NOFAIL);
488
489 pru->pru_flags = PRUF_OLD;
490 pru->pru_abort = opru->pru_abort;
491 pru->pru_accept = opru->pru_accept;
492 pru->pru_attach = opru->pru_attach;
493 pru->pru_bind = opru->pru_bind;
494 pru->pru_connect = opru->pru_connect;
495 pru->pru_connect2 = opru->pru_connect2;
496 pru->pru_control = opru->pru_control;
497 pru->pru_detach = opru->pru_detach;
498 pru->pru_disconnect = opru->pru_disconnect;
499 pru->pru_listen = opru->pru_listen;
500 pru->pru_peeraddr = opru->pru_peeraddr;
501 pru->pru_rcvd = opru->pru_rcvd;
502 pru->pru_rcvoob = opru->pru_rcvoob;
503 pru->pru_send = opru->pru_send;
504 pru->pru_sense = opru->pru_sense;
505 pru->pru_shutdown = opru->pru_shutdown;
506 pru->pru_sockaddr = opru->pru_sockaddr;
507 pru->pru_sosend = opru->pru_sosend;
508 pru->pru_soreceive = opru->pru_soreceive;
509 pru->pru_sopoll = opru->pru_sopoll;
510
511 pp = kalloc_type(struct protosw, Z_WAITOK | Z_ZERO | Z_NOFAIL);
512
513 /*
514 * Protocol fast and slow timers are now deprecated.
515 */
516 if (opp->pr_unused != NULL) {
517 printf("%s: domain (%d,%s), proto %d: pr_fasttimo is "
518 "deprecated and won't be called\n", __func__,
519 odp->dom_family, odp->dom_name, opp->pr_protocol);
520 }
521 if (opp->pr_unused2 != NULL) {
522 printf("%s: domain (%d,%s), proto %d: pr_slowtimo is "
523 "deprecated and won't be called\n", __func__,
524 odp->dom_family, odp->dom_name, opp->pr_protocol);
525 }
526
527 /* Copy everything but pr_init, pr_next, pr_domain, pr_protosw */
528 pp->pr_type = opp->pr_type;
529 pp->pr_protocol = opp->pr_protocol;
530 pp->pr_flags = (opp->pr_flags & PRF_USERFLAGS) | PR_OLD;
531 pp->pr_input = opp->pr_input;
532 pp->pr_output = opp->pr_output;
533 pp->pr_ctlinput = opp->pr_ctlinput;
534 pp->pr_ctloutput = opp->pr_ctloutput;
535 pp->pr_usrreqs = pru;
536 pp->pr_init = pr_init_old;
537 pp->pr_drain = opp->pr_drain;
538 pp->pr_sysctl = opp->pr_sysctl;
539 pp->pr_lock = opp->pr_lock;
540 pp->pr_unlock = opp->pr_unlock;
541 pp->pr_getlock = opp->pr_getlock;
542 pp->pr_old = opp;
543
544 /* attach as well as initialize */
545 attach_proto(pp, dp);
546 net_init_proto(pp, dp);
547done:
548 if (error != 0) {
549 printf("%s: domain (%d,%s), proto %d: failed to attach, "
550 "error %d\n", __func__, odp->dom_family,
551 odp->dom_name, opp->pr_protocol, error);
552
553 kfree_type(struct pr_usrreqs, pru);
554 kfree_type(struct protosw, pp);
555 }
556
557 domain_guard_release(guard);
558 return error;
559}
560
561/*
562 * Internal routine, not exported.
563 *
564 * net_del_proto - remove a protosw from a domain's protosw chain.
565 * Search the protosw chain for the element with matching data.
566 * Then unlink and return.
567 *
568 * NOTE: Caller must have acquired domain_proto_mtx
569 */
570int
571net_del_proto(int type, int protocol, struct domain *dp)
572{
573 struct protosw *pp __single;
574
575 /*
576 * This could be called as part of initializing the domain,
577 * and thus DOM_INITIALIZED may not be set (yet).
578 */
579 domain_proto_mtx_lock_assert_held();
580
581 TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
582 if (pp->pr_type == type && pp->pr_protocol == protocol) {
583 break;
584 }
585 }
586 if (pp == NULL) {
587 return ENXIO;
588 }
589
590 detach_proto(pp, dp);
591 if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
592 kfree_type(struct pr_usrreqs, pp->pr_usrreqs);
593 }
594 if (pp->pr_flags & PR_OLD) {
595 kfree_type(struct protosw, pp);
596 }
597
598 return 0;
599}
600
601/*
602 * Exported (private) routine, indirection of net_del_proto.
603 */
604int
605net_del_proto_old(int type, int protocol, struct domain_old *odp)
606{
607 int error = 0;
608 struct protosw *pp __single;
609 struct domain *dp;
610 domain_guard_t guard __single;
611
612 /*
613 * This could be called as part of initializing the domain,
614 * and thus DOM_INITIALIZED may not be set (yet).
615 */
616 guard = domain_guard_deploy();
617
618 /* Make sure the domain has been added via net_add_domain */
619 TAILQ_FOREACH(dp, &domains, dom_entry) {
620 if (!(dp->dom_flags & DOM_OLD)) {
621 continue;
622 }
623 if (dp->dom_old == odp) {
624 break;
625 }
626 }
627 if (dp == NULL) {
628 error = ENXIO;
629 goto done;
630 }
631
632 TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
633 if (pp->pr_type == type && pp->pr_protocol == protocol) {
634 break;
635 }
636 }
637 if (pp == NULL) {
638 error = ENXIO;
639 goto done;
640 }
641 detach_proto(pp, dp);
642 if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
643 kfree_type(struct pr_usrreqs, pp->pr_usrreqs);
644 }
645 if (pp->pr_flags & PR_OLD) {
646 kfree_type(struct protosw, pp);
647 }
648
649done:
650 domain_guard_release(guard);
651 return error;
652}
653
654static void
655domain_sched_timeout(void)
656{
657 LCK_MTX_ASSERT(&domain_timeout_mtx, LCK_MTX_ASSERT_OWNED);
658
659 if (!domain_timeout_run && domain_draining) {
660 domain_timeout_run = TRUE;
661 timeout(domain_timeout, NULL, ticks: hz);
662 }
663}
664
665void
666net_drain_domains(void)
667{
668 lck_mtx_lock(lck: &domain_timeout_mtx);
669 domain_draining = TRUE;
670 domain_sched_timeout();
671 lck_mtx_unlock(lck: &domain_timeout_mtx);
672}
673
674extern struct domain inet6domain_s;
675#if IPSEC
676extern struct domain keydomain_s;
677#endif
678
679extern struct domain routedomain_s, ndrvdomain_s, inetdomain_s;
680extern struct domain systemdomain_s, localdomain_s;
681extern struct domain vsockdomain_s;
682
683#if MULTIPATH
684extern struct domain mpdomain_s;
685#endif /* MULTIPATH */
686
687static void
688domain_timeout(void *arg)
689{
690#pragma unused(arg)
691 struct protosw *pp;
692 struct domain *dp;
693 domain_guard_t guard __single;
694
695 lck_mtx_lock(lck: &domain_timeout_mtx);
696 if (domain_draining) {
697 domain_draining = FALSE;
698 lck_mtx_unlock(lck: &domain_timeout_mtx);
699
700 guard = domain_guard_deploy();
701 TAILQ_FOREACH(dp, &domains, dom_entry) {
702 TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
703 if (pp->pr_drain != NULL) {
704 (*pp->pr_drain)();
705 }
706 }
707 }
708 domain_guard_release(guard);
709
710 lck_mtx_lock(lck: &domain_timeout_mtx);
711 }
712
713 /* re-arm the timer if there's work to do */
714 domain_timeout_run = FALSE;
715 domain_sched_timeout();
716 lck_mtx_unlock(lck: &domain_timeout_mtx);
717}
718
719void
720domaininit(void)
721{
722 struct domain *dp;
723 domain_guard_t guard __single;
724
725 eventhandler_lists_ctxt_init(evthdlr_lists_ctxt: &protoctl_evhdlr_ctxt);
726
727 guard = domain_guard_deploy();
728 /*
729 * Add all the static domains to the domains list. route domain
730 * gets added and initialized last, since we need it to attach
731 * rt_tables[] to everything that's already there. This also
732 * means that domains added after this point won't get their
733 * dom_rtattach() called on rt_tables[].
734 */
735 attach_domain(dp: &inetdomain_s);
736 attach_domain(dp: &inet6domain_s);
737#if MULTIPATH
738 attach_domain(dp: &mpdomain_s);
739#endif /* MULTIPATH */
740 attach_domain(dp: &systemdomain_s);
741 attach_domain(dp: &localdomain_s);
742#if IPSEC
743 attach_domain(dp: &keydomain_s);
744#endif /* IPSEC */
745 attach_domain(dp: &ndrvdomain_s);
746 attach_domain(dp: &vsockdomain_s);
747 attach_domain(dp: &routedomain_s); /* must be last domain */
748
749 /*
750 * Now ask them all to init (XXX including the routing domain,
751 * see above)
752 */
753 TAILQ_FOREACH(dp, &domains, dom_entry)
754 init_domain(dp);
755
756 domain_guard_release(guard);
757}
758
759static __inline__ struct domain *
760pffinddomain_locked(int pf)
761{
762 struct domain *dp;
763
764 domain_proto_mtx_lock_assert_held();
765
766 TAILQ_FOREACH(dp, &domains, dom_entry) {
767 if (dp->dom_family == pf) {
768 break;
769 }
770 }
771 return dp;
772}
773
774struct protosw *
775pffindtype(int family, int type)
776{
777 struct protosw *pp = NULL;
778 struct domain *dp;
779 domain_guard_t guard __single;
780
781 guard = domain_guard_deploy();
782 if ((dp = pffinddomain_locked(pf: family)) == NULL) {
783 goto done;
784 }
785
786 TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
787 if (pp->pr_type != 0 && pp->pr_type == type) {
788 goto done;
789 }
790 }
791done:
792 domain_guard_release(guard);
793 return pp;
794}
795
796/*
797 * Internal routine, not exported.
798 */
799struct domain *
800pffinddomain(int pf)
801{
802 struct domain *dp;
803 domain_guard_t guard __single;
804
805 guard = domain_guard_deploy();
806 dp = pffinddomain_locked(pf);
807 domain_guard_release(guard);
808 return dp;
809}
810
811/*
812 * Exported (private) routine, indirection of pffinddomain.
813 */
814struct domain_old *
815pffinddomain_old(int pf)
816{
817 struct domain_old *odp = NULL;
818 struct domain *dp;
819 domain_guard_t guard __single;
820
821 guard = domain_guard_deploy();
822 if ((dp = pffinddomain_locked(pf)) != NULL && (dp->dom_flags & DOM_OLD)) {
823 odp = dp->dom_old;
824 }
825 domain_guard_release(guard);
826 return odp;
827}
828
829/*
830 * Internal routine, not exported.
831 */
832struct protosw *
833pffindproto(int family, int protocol, int type)
834{
835 struct protosw *pp;
836 domain_guard_t guard __single;
837
838 guard = domain_guard_deploy();
839 pp = pffindproto_locked(family, protocol, type);
840 domain_guard_release(guard);
841 return pp;
842}
843
844struct protosw *
845pffindproto_locked(int family, int protocol, int type)
846{
847 struct protosw *maybe = NULL;
848 struct protosw *pp;
849 struct domain *dp;
850
851 domain_proto_mtx_lock_assert_held();
852
853 if (family == 0) {
854 return 0;
855 }
856
857 dp = pffinddomain_locked(pf: family);
858 if (dp == NULL) {
859 return NULL;
860 }
861
862 TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
863 if ((pp->pr_protocol == protocol) && (pp->pr_type == type)) {
864 return pp;
865 }
866
867 if (type == SOCK_RAW && pp->pr_type == SOCK_RAW &&
868 pp->pr_protocol == 0 && maybe == NULL) {
869 maybe = pp;
870 }
871 }
872 return maybe;
873}
874
875/*
876 * Exported (private) routine, indirection of pffindproto.
877 */
878struct protosw_old *
879pffindproto_old(int family, int protocol, int type)
880{
881 struct protosw_old *opr = NULL;
882 struct protosw *pp;
883 domain_guard_t guard __single;
884
885 guard = domain_guard_deploy();
886 if ((pp = pffindproto_locked(family, protocol, type)) != NULL &&
887 (pp->pr_flags & PR_OLD)) {
888 opr = pp->pr_old;
889 }
890 domain_guard_release(guard);
891 return opr;
892}
893
894static struct protosw *
895pffindprotonotype_locked(int family, int protocol, int type)
896{
897#pragma unused(type)
898 struct domain *dp;
899 struct protosw *pp;
900
901 domain_proto_mtx_lock_assert_held();
902
903 if (family == 0) {
904 return 0;
905 }
906
907 dp = pffinddomain_locked(pf: family);
908 if (dp == NULL) {
909 return NULL;
910 }
911
912 TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
913 if (pp->pr_protocol == protocol) {
914 return pp;
915 }
916 }
917 return NULL;
918}
919
920struct protosw *
921pffindprotonotype(int family, int protocol)
922{
923 struct protosw *pp;
924 domain_guard_t guard __single;
925
926 if (protocol == 0) {
927 return NULL;
928 }
929
930 guard = domain_guard_deploy();
931 pp = pffindprotonotype_locked(family, protocol, type: 0);
932 domain_guard_release(guard);
933 return pp;
934}
935
936void
937pfctlinput(int cmd, struct sockaddr *sa)
938{
939 pfctlinput2(cmd, sa, NULL);
940}
941
942void
943pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
944{
945 struct domain *dp;
946 struct protosw *pp;
947 domain_guard_t guard __single;
948
949 if (sa == NULL) {
950 return;
951 }
952
953 guard = domain_guard_deploy();
954 TAILQ_FOREACH(dp, &domains, dom_entry) {
955 TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
956 if (pp->pr_ctlinput != NULL) {
957 (*pp->pr_ctlinput)(cmd, sa, ctlparam, NULL);
958 }
959 }
960 }
961 domain_guard_release(guard);
962}
963
964void
965net_update_uptime_with_time(const struct timeval *tvp)
966{
967 uint64_t tmp;
968 uint64_t seconds = tvp->tv_sec;;
969 uint64_t milliseconds = ((uint64_t)tvp->tv_sec * 1000) + ((uint64_t)tvp->tv_usec / 1000);
970 uint64_t microseconds = ((uint64_t)tvp->tv_sec * USEC_PER_SEC) + (uint64_t)tvp->tv_usec;
971
972 /*
973 * Round up the timer to the nearest integer value because otherwise
974 * we might setup networking timers that are off by almost 1 second.
975 */
976 if (tvp->tv_usec > 500000) {
977 seconds++;
978 }
979
980 tmp = os_atomic_load(&_net_uptime, relaxed);
981 if (tmp < seconds) {
982 os_atomic_cmpxchg(&_net_uptime, tmp, seconds, relaxed);
983
984 /*
985 * No loop needed. If we are racing with another thread, let's give
986 * the other one the priority.
987 */
988 }
989
990 /* update milliseconds variant */
991 tmp = os_atomic_load(&_net_uptime_ms, relaxed);
992 if (tmp < milliseconds) {
993 os_atomic_cmpxchg(&_net_uptime_ms, tmp, milliseconds, relaxed);
994 }
995
996 /* update microseconds variant */
997 tmp = os_atomic_load(&_net_uptime_us, relaxed);
998 if (tmp < microseconds) {
999 os_atomic_cmpxchg(&_net_uptime_us, tmp, microseconds, relaxed);
1000 }
1001}
1002
1003void
1004net_update_uptime(void)
1005{
1006 struct timeval tv;
1007
1008 microuptime(tv: &tv);
1009
1010 net_update_uptime_with_time(tvp: &tv);
1011}
1012
1013/*
1014 * Convert our uin64_t net_uptime to a struct timeval.
1015 */
1016void
1017net_uptime2timeval(struct timeval *tv)
1018{
1019 if (tv == NULL) {
1020 return;
1021 }
1022
1023 tv->tv_usec = 0;
1024 tv->tv_sec = (time_t)net_uptime();
1025}
1026
1027/*
1028 * An alternative way to obtain the coarse-grained uptime (in seconds)
1029 * for networking code which do not require high-precision timestamp,
1030 * as this is significantly cheaper than microuptime().
1031 */
1032uint64_t
1033net_uptime(void)
1034{
1035 if (_net_uptime == 0) {
1036 net_update_uptime();
1037 }
1038
1039 return _net_uptime;
1040}
1041
1042uint64_t
1043net_uptime_ms(void)
1044{
1045 if (_net_uptime_ms == 0) {
1046 net_update_uptime();
1047 }
1048
1049 return _net_uptime_ms;
1050}
1051
1052uint64_t
1053net_uptime_us(void)
1054{
1055 if (_net_uptime_us == 0) {
1056 net_update_uptime();
1057 }
1058
1059 return _net_uptime_us;
1060}
1061
1062void
1063domain_proto_mtx_lock_assert_held(void)
1064{
1065 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1066}
1067
1068void
1069domain_proto_mtx_lock_assert_notheld(void)
1070{
1071 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1072}
1073
1074domain_guard_t
1075domain_guard_deploy(void)
1076{
1077 net_thread_marks_t marks __single;
1078
1079 marks = net_thread_marks_push(NET_THREAD_HELD_DOMAIN);
1080 if (marks != net_thread_marks_none) {
1081 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1082 lck_mtx_lock(lck: &domain_proto_mtx);
1083 } else {
1084 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1085 }
1086
1087 return (domain_guard_t)(const void*)marks;
1088}
1089
1090void
1091domain_guard_release(domain_guard_t guard)
1092{
1093 net_thread_marks_t marks __single = (net_thread_marks_t)(const void*)guard;
1094
1095 if (marks != net_thread_marks_none) {
1096 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1097 lck_mtx_unlock(lck: &domain_proto_mtx);
1098 net_thread_marks_pop(marks);
1099 } else {
1100 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1101 }
1102}
1103
1104domain_unguard_t
1105domain_unguard_deploy(void)
1106{
1107 net_thread_marks_t marks __single;
1108
1109 marks = net_thread_unmarks_push(NET_THREAD_HELD_DOMAIN);
1110 if (marks != net_thread_marks_none) {
1111 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1112 lck_mtx_unlock(lck: &domain_proto_mtx);
1113 } else {
1114 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1115 }
1116
1117 return (domain_unguard_t)(const void*)marks;
1118}
1119
1120void
1121domain_unguard_release(domain_unguard_t unguard)
1122{
1123 net_thread_marks_t marks __single = (net_thread_marks_t)(const void*)unguard;
1124
1125 if (marks != net_thread_marks_none) {
1126 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1127 lck_mtx_lock(lck: &domain_proto_mtx);
1128 net_thread_unmarks_pop(marks);
1129 } else {
1130 LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1131 }
1132}
1133
1134#if SKYWALK
1135/* The following is used to enqueue work items for interface events */
1136struct protoctl_event {
1137 struct ifnet *ifp;
1138 union sockaddr_in_4_6 laddr;
1139 union sockaddr_in_4_6 raddr;
1140 uint32_t protoctl_event_code;
1141 struct protoctl_ev_val val;
1142 uint16_t lport;
1143 uint16_t rport;
1144 uint8_t protocol;
1145};
1146
1147struct protoctl_event_nwk_wq_entry {
1148 struct nwk_wq_entry nwk_wqe;
1149 struct protoctl_event protoctl_ev_arg;
1150};
1151
1152static void
1153protoctl_event_callback(struct nwk_wq_entry *nwk_item)
1154{
1155 struct protoctl_event_nwk_wq_entry *p_ev __single = NULL;
1156
1157 p_ev = __unsafe_forge_single(struct protoctl_event_nwk_wq_entry *,
1158 __container_of(nwk_item, struct protoctl_event_nwk_wq_entry, nwk_wqe));
1159
1160 /* Call this before we walk the tree */
1161 EVENTHANDLER_INVOKE(&protoctl_evhdlr_ctxt, protoctl_event,
1162 p_ev->protoctl_ev_arg.ifp, SA(&p_ev->protoctl_ev_arg.laddr),
1163 SA(&p_ev->protoctl_ev_arg.raddr),
1164 p_ev->protoctl_ev_arg.lport, p_ev->protoctl_ev_arg.rport,
1165 p_ev->protoctl_ev_arg.protocol, p_ev->protoctl_ev_arg.protoctl_event_code,
1166 &p_ev->protoctl_ev_arg.val);
1167
1168 kfree_type(struct protoctl_event_nwk_wq_entry, p_ev);
1169}
1170
1171/* XXX Some PRC events needs extra verification like sequence number checking */
1172void
1173protoctl_event_enqueue_nwk_wq_entry(struct ifnet *ifp, struct sockaddr *p_laddr,
1174 struct sockaddr *p_raddr, uint16_t lport, uint16_t rport, uint8_t protocol,
1175 uint32_t protoctl_event_code, struct protoctl_ev_val *p_protoctl_ev_val)
1176{
1177 struct protoctl_event_nwk_wq_entry *p_protoctl_ev = NULL;
1178
1179 p_protoctl_ev = kalloc_type(struct protoctl_event_nwk_wq_entry,
1180 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1181
1182 p_protoctl_ev->protoctl_ev_arg.ifp = ifp;
1183
1184 if (p_laddr != NULL) {
1185 VERIFY(p_laddr->sa_len <= sizeof(p_protoctl_ev->protoctl_ev_arg.laddr));
1186 struct sockaddr_in6 *dst __single = &p_protoctl_ev->protoctl_ev_arg.laddr.sin6;
1187 SOCKADDR_COPY(SIN6(p_laddr), dst, p_laddr->sa_len);
1188 }
1189
1190 if (p_raddr != NULL) {
1191 VERIFY(p_raddr->sa_len <= sizeof(p_protoctl_ev->protoctl_ev_arg.raddr));
1192 struct sockaddr_in6 *dst __single = &p_protoctl_ev->protoctl_ev_arg.raddr.sin6;
1193 SOCKADDR_COPY(SIN6(p_raddr), dst, p_raddr->sa_len);
1194 }
1195
1196 p_protoctl_ev->protoctl_ev_arg.lport = lport;
1197 p_protoctl_ev->protoctl_ev_arg.rport = rport;
1198 p_protoctl_ev->protoctl_ev_arg.protocol = protocol;
1199 p_protoctl_ev->protoctl_ev_arg.protoctl_event_code = protoctl_event_code;
1200
1201 if (p_protoctl_ev_val != NULL) {
1202 bcopy(src: p_protoctl_ev_val, dst: &(p_protoctl_ev->protoctl_ev_arg.val),
1203 n: sizeof(*p_protoctl_ev_val));
1204 }
1205 p_protoctl_ev->nwk_wqe.func = protoctl_event_callback;
1206
1207 nwk_wq_enqueue(nwk_item: &p_protoctl_ev->nwk_wqe);
1208}
1209#endif /* SKYWALK */
1210
1211#if (DEVELOPMENT || DEBUG)
1212
1213static int
1214sysctl_do_drain_domains SYSCTL_HANDLER_ARGS
1215{
1216#pragma unused(arg1, arg2)
1217 int error;
1218 int dummy = 0;
1219
1220 error = sysctl_handle_int(oidp, &dummy, 0, req);
1221 if (error || req->newptr == USER_ADDR_NULL) {
1222 return error;
1223 }
1224
1225 net_drain_domains();
1226
1227 return 0;
1228}
1229
1230#endif /* DEVELOPMENT || DEBUG */
1231