1/*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <kern/assert.h>
30#include <kern/locks.h>
31#include <kern/zalloc.h>
32#include <libkern/tree.h>
33#include <sys/kernel.h>
34#include <sys/sysctl.h>
35#include <sys/bitstring.h>
36#include <net/if.h>
37#include <net/kpi_interface.h>
38#include <net/restricted_in_port.h>
39
40#include <netinet/in.h>
41#include <netinet/in_pcb.h>
42#include <netinet/tcp_fsm.h>
43#include <netinet/tcp_var.h>
44
45#include <netinet6/in6_var.h>
46#include <string.h>
47
48#include <skywalk/os_skywalk.h>
49#include <skywalk/os_skywalk_private.h>
50#include <skywalk/os_stats_private.h>
51#include <skywalk/nexus/flowswitch/flow/flow_var.h>
52#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
53
54#include <net/if_ports_used.h>
55
56static int __netns_inited = 0;
57
58/*
59 * Logging
60 */
61
62#define NS_VERB_PROTO(proto) ((proto == IPPROTO_TCP) ? SK_VERB_NS_TCP : \
63 SK_VERB_NS_UDP)
64#define NS_VERB_IP(addr_len) ((addr_len == sizeof (struct in_addr)) ? \
65 SK_VERB_NS_IPV4 : SK_VERB_NS_IPV6)
66#define PROTO_STR(proto) ((proto == IPPROTO_TCP) ? "tcp" : "udp")
67#define LEN_TO_AF(len) (((len == sizeof (struct in_addr)) ? \
68 AF_INET : AF_INET6))
69/*
70 * Locking
71 * Netns is currently protected by a global mutex, NETNS_LOCK. This lock is
72 * aquired at the entry of every kernel-facing function, and released at the
73 * end. Data within netns_token structures is also protected under this lock.
74 */
75
76#define NETNS_LOCK() \
77 lck_mtx_lock(&netns_lock)
78#define NETNS_LOCK_SPIN() \
79 lck_mtx_lock_spin(&netns_lock)
80#define NETNS_LOCK_CONVERT() do { \
81 NETNS_LOCK_ASSERT_HELD(); \
82 lck_mtx_convert_spin(&netns_lock); \
83} while (0)
84#define NETNS_UNLOCK() \
85 lck_mtx_unlock(&netns_lock)
86#define NETNS_LOCK_ASSERT_HELD() \
87 LCK_MTX_ASSERT(&netns_lock, LCK_MTX_ASSERT_OWNED)
88#define NETNS_LOCK_ASSERT_NOTHELD() \
89 LCK_MTX_ASSERT(&netns_lock, LCK_MTX_ASSERT_NOTOWNED)
90
91static LCK_GRP_DECLARE(netns_lock_group, "netns_lock");
92static LCK_MTX_DECLARE(netns_lock, &netns_lock_group);
93
94/*
95 * Internal data structures and parameters
96 */
97
98/*
99 * Local ports are kept track of by reference counts kept in a tree specific to
100 * an <IP, protocol> tuple (see struct ns).
101 *
102 * Note: port numbers are stored in host byte order.
103 */
104struct ns_reservation {
105 RB_ENTRY(ns_reservation) nsr_link;
106 uint32_t nsr_refs[NETNS_OWNER_MAX + 1];
107 in_port_t nsr_port;
108 bool nsr_reuseport:1;
109};
110
111#define NETNS_REF_COUNT(nsr, flags) \
112 (nsr)->nsr_refs[((flags) & NETNS_OWNER_MASK)]
113
114static inline int nsr_cmp(const struct ns_reservation *,
115 const struct ns_reservation *);
116
117RB_HEAD(ns_reservation_tree, ns_reservation);
118RB_PROTOTYPE(ns_reservation_tree, ns_reservation, nsr_link, nsr_cmp);
119RB_GENERATE(ns_reservation_tree, ns_reservation, nsr_link, nsr_cmp);
120
121static inline struct ns_reservation *ns_reservation_tree_find(
122 struct ns_reservation_tree *, const in_port_t);
123
124/*
125 * A namespace keeps track of the local port numbers in use for a given
126 * <IP, protocol> tuple. There are also global namespaces for each
127 * protocol to accomodate INADDR_ANY behavior and diagnostics.
128 */
129struct ns {
130 RB_ENTRY(ns) ns_link;
131
132 void *ns_addr_key;
133
134 union {
135 uint32_t ns_addr[4];
136 struct in_addr ns_inaddr;
137 struct in6_addr ns_in6addr;
138 };
139 uint8_t ns_addr_len;
140 uint8_t ns_proto;
141
142 in_port_t ns_last_ephemeral_port_down;
143 in_port_t ns_last_ephemeral_port_up;
144
145 uint8_t ns_is_freeable;
146
147 uint32_t ns_n_reservations;
148 struct ns_reservation_tree ns_reservations;
149};
150
151static uint32_t netns_n_namespaces;
152
153static inline int ns_cmp(const struct ns *, const struct ns *);
154
155RB_HEAD(netns_namespaces_tree, ns) netns_namespaces =
156 RB_INITIALIZER(netns_namespaces);
157RB_PROTOTYPE_PREV(netns_namespaces_tree, ns, ns_link, ns_cmp);
158RB_GENERATE_PREV(netns_namespaces_tree, ns, ns_link, ns_cmp);
159
160/*
161 * Declare pointers to global namespaces for each protocol.
162 * All non-wildcard reservations will have an entry here.
163 */
164#define NETNS_N_GLOBAL 4
165static struct ns *netns_global_non_wild[NETNS_N_GLOBAL];
166static struct ns *netns_global_wild[NETNS_N_GLOBAL];
167#define NETNS_ADDRLEN_V4 (sizeof(struct in_addr))
168#define NETNS_ADDRLEN_V6 (sizeof(struct in6_addr))
169#define NETNS_NS_TCP 0
170#define NETNS_NS_UDP 1
171#define NETNS_NS_V4 0
172#define NETNS_NS_V6 2
173#define NETNS_NS_GLOBAL_IDX(proto, addrlen) \
174 ((((proto) == IPPROTO_TCP) ? NETNS_NS_TCP : NETNS_NS_UDP) | \
175 (((addrlen) == NETNS_ADDRLEN_V4) ? NETNS_NS_V4 : NETNS_NS_V6))
176
177#define NETNS_NS_UDP_EPHEMERAL_RESERVE 4096
178
179/*
180 * Internal token structure
181 *
182 * Note: port numbers are stored in host byte order.
183 */
184struct ns_token {
185 /* Reservation state */
186 ifnet_t nt_ifp;
187 SLIST_ENTRY(ns_token) nt_ifp_link;
188 SLIST_ENTRY(ns_token) nt_all_link;
189 uint32_t nt_state; /* NETNS_STATE_* */
190
191 /* Reservation context */
192 union {
193 uint32_t nt_addr[4];
194 struct in_addr nt_inaddr;
195 struct in6_addr nt_in6addr;
196 };
197 uint8_t nt_addr_len;
198 uint8_t nt_proto;
199 in_port_t nt_port;
200 uint32_t nt_flags;
201
202 /* Optional information about the flow */
203 struct ns_flow_info *nt_flow_info;
204};
205
206/* Valid values for nt_state */
207#define NETNS_STATE_HALFCLOSED 0x1 /* half closed */
208#define NETNS_STATE_WITHDRAWN 0x2 /* withdrawn; not offloadable */
209
210#define NETNS_STATE_BITS "\020\01HALFCLOSED\02WITHDRAWN"
211
212/* List of tokens not bound to an ifnet */
213SLIST_HEAD(, ns_token) netns_unbound_tokens = SLIST_HEAD_INITIALIZER(
214 netns_unbound_tokens);
215
216/* List of all tokens currently allocated in the system */
217SLIST_HEAD(, ns_token) netns_all_tokens = SLIST_HEAD_INITIALIZER(
218 netns_all_tokens);
219
220/*
221 * Memory management
222 */
223static SKMEM_TYPE_DEFINE(netns_ns_zone, struct ns);
224
225#define NETNS_NS_TOKEN_ZONE_NAME "netns.ns_token"
226static unsigned int netns_ns_token_size; /* size of zone element */
227static struct skmem_cache *netns_ns_token_cache; /* for ns_token */
228
229#define NETNS_NS_FLOW_INFO_ZONE_NAME "netns.ns_flow_info"
230static unsigned int netns_ns_flow_info_size; /* size of zone element */
231static struct skmem_cache *netns_ns_flow_info_cache; /* for ns_flow_info */
232
233#define NETNS_NS_RESERVATION_ZONE_NAME "netns.ns_reservation"
234static unsigned int netns_ns_reservation_size; /* size of zone element */
235static struct skmem_cache *netns_ns_reservation_cache; /* for ns_reservation */
236
237static struct ns_reservation *netns_ns_reservation_alloc(in_port_t, uint32_t);
238static void netns_ns_reservation_free(struct ns_reservation *);
239static struct ns *netns_ns_alloc(zalloc_flags_t);
240static void netns_ns_free(struct ns *);
241static void netns_ns_cleanup(struct ns *);
242static struct ns_token *netns_ns_token_alloc(boolean_t);
243static void netns_ns_token_free(struct ns_token *);
244
245/*
246 * Utility/internal code
247 */
248static struct ns *_netns_get_ns(uint32_t *, uint8_t, uint8_t, bool);
249static inline boolean_t _netns_is_wildcard_addr(const uint32_t *, uint8_t);
250static int _netns_reserve_common(struct ns *, in_port_t, uint32_t);
251static void _netns_release_common(struct ns *, in_port_t, uint32_t);
252static inline void netns_clear_ifnet(struct ns_token *);
253static int _netns_reserve_kpi_common(struct ns *, netns_token *, uint32_t *,
254 uint8_t, uint8_t, in_port_t *, uint32_t, struct ns_flow_info *);
255static void _netns_set_ifnet_internal(struct ns_token *, struct ifnet *);
256
257static struct ns_reservation *
258netns_ns_reservation_alloc(in_port_t port, uint32_t flags)
259{
260 struct ns_reservation *res;
261
262 VERIFY(port != 0);
263
264 res = skmem_cache_alloc(netns_ns_reservation_cache, SKMEM_SLEEP);
265 ASSERT(res != NULL);
266
267 bzero(s: res, n: netns_ns_reservation_size);
268 res->nsr_port = port;
269 res->nsr_reuseport = ((flags & NETNS_REUSEPORT) != 0);
270 return res;
271}
272
273static void
274netns_ns_reservation_free(struct ns_reservation *res)
275{
276 skmem_cache_free(netns_ns_reservation_cache, res);
277}
278
279static struct ns *
280netns_ns_alloc(zalloc_flags_t how)
281{
282 struct ns *namespace;
283 in_port_t first = (in_port_t)ipport_firstauto;
284 in_port_t last = (in_port_t)ipport_lastauto;
285 in_port_t rand_port;
286
287 namespace = zalloc_flags(netns_ns_zone, how | Z_ZERO);
288 if (namespace == NULL) {
289 return NULL;
290 }
291
292 namespace->ns_is_freeable = 1;
293
294 RB_INIT(&namespace->ns_reservations);
295
296 /*
297 * Randomize the initial ephemeral port starting point, just in case
298 * this namespace is for an ipv6 address which gets brought up and
299 * down often.
300 */
301 if (first == last) {
302 rand_port = first;
303 } else {
304 read_frandom(buffer: &rand_port, numBytes: sizeof(rand_port));
305
306 if (first > last) {
307 rand_port = last + (rand_port % (first - last));
308 } else {
309 rand_port = first + (rand_port % (last - first));
310 }
311 }
312 namespace->ns_last_ephemeral_port_down = rand_port;
313 namespace->ns_last_ephemeral_port_up = rand_port;
314
315 return namespace;
316}
317
318static void
319netns_ns_free(struct ns *namespace)
320{
321 struct ns_reservation *res;
322 struct ns_reservation *tmp_res;
323#if SK_LOG
324 char tmp_ip_str[MAX_IPv6_STR_LEN];
325#endif /* SK_LOG */
326
327 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
328 NS_VERB_PROTO(namespace->ns_proto),
329 "freeing %s ns for IP %s",
330 PROTO_STR(namespace->ns_proto),
331 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
332 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)));
333
334 RB_FOREACH_SAFE(res, ns_reservation_tree, &namespace->ns_reservations,
335 tmp_res) {
336 netns_ns_reservation_free(res);
337 namespace->ns_n_reservations--;
338 RB_REMOVE(ns_reservation_tree, &namespace->ns_reservations,
339 res);
340 }
341
342 VERIFY(RB_EMPTY(&namespace->ns_reservations));
343
344 if (netns_global_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
345 namespace->ns_addr_len)] == namespace) {
346 netns_global_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
347 namespace->ns_addr_len)] = NULL;
348 }
349 if (netns_global_non_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
350 namespace->ns_addr_len)] == namespace) {
351 netns_global_non_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
352 namespace->ns_addr_len)] = NULL;
353 }
354
355 zfree(netns_ns_zone, namespace);
356}
357
358static void
359netns_ns_cleanup(struct ns *namespace)
360{
361 if (namespace->ns_is_freeable &&
362 RB_EMPTY(&namespace->ns_reservations)) {
363 RB_REMOVE(netns_namespaces_tree, &netns_namespaces, namespace);
364 netns_n_namespaces--;
365 netns_ns_free(namespace);
366 }
367}
368
369static struct ns_token *
370netns_ns_token_alloc(boolean_t with_nfi)
371{
372 struct ns_token *token;
373
374 NETNS_LOCK_ASSERT_HELD();
375 NETNS_LOCK_CONVERT();
376
377 token = skmem_cache_alloc(netns_ns_token_cache, SKMEM_SLEEP);
378 ASSERT(token != NULL);
379
380 bzero(s: token, n: netns_ns_token_size);
381
382 if (with_nfi) {
383 token->nt_flow_info = skmem_cache_alloc(netns_ns_flow_info_cache,
384 SKMEM_SLEEP);
385 ASSERT(token->nt_flow_info != NULL);
386 }
387 SLIST_INSERT_HEAD(&netns_all_tokens, token, nt_all_link);
388
389 return token;
390}
391
392static void
393netns_ns_token_free(struct ns_token *token)
394{
395 NETNS_LOCK_ASSERT_HELD();
396 NETNS_LOCK_CONVERT();
397 SLIST_REMOVE(&netns_all_tokens, token, ns_token, nt_all_link);
398
399 if (token->nt_flow_info != NULL) {
400 skmem_cache_free(netns_ns_flow_info_cache, token->nt_flow_info);
401 }
402 skmem_cache_free(netns_ns_token_cache, token);
403}
404
405__attribute__((always_inline))
406static inline int
407nsr_cmp(const struct ns_reservation *nsr1, const struct ns_reservation *nsr2)
408{
409#define NSR_COMPARE(r1, r2) ((int)(r1)->nsr_port - (int)(r2)->nsr_port)
410 return NSR_COMPARE(nsr1, nsr2);
411}
412
413__attribute__((always_inline))
414static inline int
415ns_cmp(const struct ns *a, const struct ns *b)
416{
417 int d;
418
419 if ((d = (a->ns_addr_len - b->ns_addr_len)) != 0) {
420 return d;
421 }
422 if ((d = (a->ns_proto - b->ns_proto)) != 0) {
423 return d;
424 }
425 if ((d = flow_ip_cmp(a0: a->ns_addr_key, b0: b->ns_addr_key,
426 alen: b->ns_addr_len)) != 0) {
427 return d;
428 }
429
430 return 0;
431}
432
433/*
434 * Common routine to look up a reservation.
435 *
436 * NOTE: Assumes the caller holds the NETNS global lock
437 */
438__attribute__((always_inline))
439static inline struct ns_reservation *
440ns_reservation_tree_find(struct ns_reservation_tree *tree, const in_port_t port)
441{
442 struct ns_reservation res;
443 res.nsr_port = port;
444 return RB_FIND(ns_reservation_tree, tree, &res);
445}
446
447/*
448 * Retrieve the namespace for the supplied <address, protocol> tuple.
449 * If create is set and such a namespace doesn't already exist, one will be
450 * created.
451 */
452static struct ns *
453_netns_get_ns(uint32_t *addr, uint8_t addr_len, uint8_t proto, bool create)
454{
455 struct ns *namespace = NULL;
456 struct ns find = {
457 .ns_addr_key = addr,
458 .ns_addr_len = addr_len,
459 .ns_proto = proto,
460 };
461#if SK_LOG
462 char tmp_ip_str[MAX_IPv6_STR_LEN];
463#endif /* SK_LOG */
464
465 VERIFY(addr_len == sizeof(struct in_addr) ||
466 addr_len == sizeof(struct in6_addr));
467
468 NETNS_LOCK_ASSERT_HELD();
469
470 namespace = RB_FIND(netns_namespaces_tree, &netns_namespaces, &find);
471
472 if (create && namespace == NULL) {
473 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
474 "allocating %s ns for IP %s",
475 PROTO_STR(proto), inet_ntop(LEN_TO_AF(addr_len), addr,
476 tmp_ip_str, sizeof(tmp_ip_str)));
477 NETNS_LOCK_CONVERT();
478 namespace = netns_ns_alloc(how: Z_WAITOK | Z_NOFAIL);
479 __builtin_assume(namespace != NULL);
480 memcpy(dst: namespace->ns_addr, src: addr, n: addr_len);
481 namespace->ns_addr_key = &namespace->ns_addr;
482 namespace->ns_addr_len = addr_len;
483 namespace->ns_proto = proto;
484 RB_INSERT(netns_namespaces_tree, &netns_namespaces, namespace);
485 netns_n_namespaces++;
486
487 if (_netns_is_wildcard_addr(addr, addr_len) &&
488 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto,
489 addr_len)] == NULL) {
490 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto,
491 addr_len)] = namespace;
492 }
493 }
494
495 return namespace;
496}
497
498/*
499 * Return true if the supplied address is a wildcard (INADDR_ANY)
500 */
501__attribute__((always_inline))
502static boolean_t
503_netns_is_wildcard_addr(const uint32_t *addr, uint8_t addr_len)
504{
505 boolean_t wildcard;
506
507 switch (addr_len) {
508 case sizeof(struct in_addr):
509 wildcard = (addr[0] == 0);
510 break;
511
512 case sizeof(struct in6_addr):
513 wildcard = (addr[0] == 0 && addr[1] == 0 &&
514 addr[2] == 0 && addr[3] == 0);
515 break;
516
517 default:
518 wildcard = FALSE;
519 break;
520 }
521
522 return wildcard;
523}
524
525__attribute__((always_inline))
526static boolean_t
527_netns_is_port_used(struct ns * gns, struct ns_reservation *curr_res, in_port_t port)
528{
529 struct ns_reservation *res = NULL;
530
531 if (gns == NULL) {
532 return FALSE;
533 }
534
535 res = ns_reservation_tree_find(tree: &gns->ns_reservations, port);
536 if (res != NULL && res != curr_res) {
537 if (!res->nsr_reuseport) {
538 return TRUE;
539 }
540 }
541
542 return FALSE;
543}
544
545/*
546 * Internal shared code to reserve ports within a specific namespace.
547 *
548 * Note: port numbers are in host byte-order here.
549 */
550static int
551_netns_reserve_common(struct ns *namespace, in_port_t port, uint32_t flags)
552{
553 struct ns_reservation *res = NULL, *exist = NULL;
554 uint8_t proto, addr_len;
555 int err = 0;
556#if SK_LOG
557 char tmp_ip_str[MAX_IPv6_STR_LEN];
558#endif /* SK_LOG */
559
560 VERIFY(port != 0);
561 proto = namespace->ns_proto;
562 addr_len = namespace->ns_addr_len;
563 NETNS_LOCK_CONVERT();
564 res = netns_ns_reservation_alloc(port, flags);
565 if (res == NULL) {
566 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
567 "ERROR %s:%s:%d // flags 0x%x // OUT OF MEMORY",
568 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
569 namespace->ns_addr, tmp_ip_str,
570 sizeof(tmp_ip_str)), PROTO_STR(proto), port, flags);
571 return ENOMEM;
572 }
573 exist = RB_INSERT(ns_reservation_tree, &namespace->ns_reservations,
574 res);
575 if (__probable(exist == NULL)) {
576 namespace->ns_n_reservations++;
577 } else {
578 netns_ns_reservation_free(res);
579 res = exist;
580 }
581
582 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
583 "pre: %s:%s:%d // flags 0x%x // refs %d sky, %d ls, "
584 "%d bsd %d pf", inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
585 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
586 PROTO_STR(proto), port, flags,
587 NETNS_REF_COUNT(res, NETNS_SKYWALK),
588 NETNS_REF_COUNT(res, NETNS_LISTENER),
589 NETNS_REF_COUNT(res, NETNS_BSD),
590 NETNS_REF_COUNT(res, NETNS_PF));
591
592 /* Make reservation */
593 /*
594 * Bypass collision detection for reservations in the global non-wild
595 * namespace. We use that namespace for reference counts only.
596 */
597 if (namespace !=
598 netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto, addr_len)]) {
599 struct ns_reservation *skres;
600 boolean_t is_wild = _netns_is_wildcard_addr(addr: namespace->ns_addr,
601 addr_len);
602 struct ns *gns =
603 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto, addr_len)];
604
605 if (NETNS_IS_SKYWALK(flags)) {
606 if ((!is_wild || exist != NULL) && gns != NULL &&
607 (skres = ns_reservation_tree_find(
608 tree: &gns->ns_reservations, port)) != NULL &&
609 NETNS_REF_COUNT(skres, NETNS_LISTENER) == 0) {
610 /*
611 * The mere existence of any non-skywalk
612 * listener wildcard entry for this
613 * protocol/port number means this must fail.
614 */
615 SK_ERR("ADDRINUSE: Duplicate wildcard");
616 err = EADDRINUSE;
617 goto done;
618 }
619
620 if (is_wild) {
621 gns = netns_global_non_wild[
622 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
623 VERIFY(gns != NULL);
624
625 if (_netns_is_port_used(gns: netns_global_non_wild[
626 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V4)], curr_res: res, port) ||
627 _netns_is_port_used(gns: netns_global_non_wild[
628 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V6)], curr_res: res, port)) {
629 /*
630 * If Skywalk is trying to reserve a
631 * wildcard, then the mere existance of
632 * any entry in either v4/v6 non-wild
633 * namespace for this port means this
634 * must fail.
635 */
636 SK_ERR("ADDRINUSE: Wildcard with non-wild.");
637 err = EADDRINUSE;
638 goto done;
639 }
640 }
641 } else {
642 /*
643 * Check if Skywalk has reserved a wildcard entry.
644 * Note that the arithmetic OR here is intentional.
645 */
646 if ((!is_wild || exist != NULL) && gns != NULL &&
647 (skres = ns_reservation_tree_find(
648 tree: &gns->ns_reservations, port)) != NULL &&
649 (NETNS_REF_COUNT(skres, NETNS_SKYWALK) |
650 NETNS_REF_COUNT(skres, NETNS_LISTENER)) != 0) {
651 /*
652 * BSD is trying to reserve a proto/port for
653 * which Skywalk already has a wildcard
654 * reservation.
655 */
656 SK_ERR("ADDRINUSE: BSD requesting Skywalk port");
657 err = EADDRINUSE;
658 goto done;
659 }
660
661 /*
662 * If BSD is trying to reserve a wildcard,
663 * ensure Skywalk has not already reserved
664 * a non-wildcard.
665 */
666 if (is_wild) {
667 gns = netns_global_non_wild[
668 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
669 VERIFY(gns != NULL);
670
671 /*
672 * Note that the arithmetic OR here is
673 * intentional.
674 */
675 if ((skres = ns_reservation_tree_find(
676 tree: &gns->ns_reservations, port)) != NULL &&
677 (NETNS_REF_COUNT(skres, NETNS_SKYWALK) |
678 NETNS_REF_COUNT(skres,
679 NETNS_LISTENER)) != 0) {
680 SK_ERR("ADDRINUSE: BSD wildcard with non-wild.");
681 err = EADDRINUSE;
682 goto done;
683 }
684 }
685 }
686
687 switch (flags & NETNS_OWNER_MASK) {
688 case NETNS_SKYWALK:
689 /* check collision w/ BSD */
690 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
691 NETNS_REF_COUNT(res, NETNS_PF) > 0) {
692 SK_ERR("ERROR - Skywalk got ADDRINUSE (w/ BSD)");
693 err = EADDRINUSE;
694 goto done;
695 }
696
697 /* BEGIN CSTYLED */
698 /*
699 * Scenarios with new Skywalk connected flow:
700 * 1. With existing Skywalk connected flow,
701 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
702 * NETNS_REF_COUNT(res, NETNS_SKYWALK) == 1
703 * reject by failing the wild gns lookup below.
704 * 2. With existing Skywalk 3-tuple listener,
705 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 1
706 * bypass the check below.
707 * 3. With existing Skywalk 2-tuple listener,
708 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
709 * NETNS_REF_COUNT(res, NETNS_SKYWALK) == 0
710 * pass with successful wild gns lookup.
711 */
712 /* END CSTYLED */
713 if (NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
714 NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0) {
715 /* check if covered by wild Skywalk listener */
716 gns = netns_global_wild[
717 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
718 if (gns != NULL &&
719 (skres = ns_reservation_tree_find(
720 tree: &gns->ns_reservations, port)) != NULL &&
721 NETNS_REF_COUNT(skres, NETNS_LISTENER)
722 != 0) {
723 err = 0;
724 goto done;
725 }
726 if (addr_len == sizeof(struct in_addr)) {
727 /* If address is IPv4, also check for wild IPv6 registration */
728 gns = netns_global_wild[
729 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V6)];
730 if (gns != NULL &&
731 (skres = ns_reservation_tree_find(
732 tree: &gns->ns_reservations, port)) != NULL &&
733 NETNS_REF_COUNT(skres, NETNS_LISTENER)
734 != 0) {
735 err = 0;
736 goto done;
737 }
738 }
739 SK_ERR("ERROR - Skywalk got ADDRINUSE (w/ SK connected flow)");
740 err = EADDRINUSE;
741 }
742 /*
743 * XXX: Duplicate 5-tuple flows under a Skywalk
744 * listener are currently detected by flow manager,
745 * till we implement 5-tuple-aware netns.
746 */
747 break;
748
749 case NETNS_LISTENER:
750 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
751 NETNS_REF_COUNT(res, NETNS_PF) > 0 ||
752 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0 ||
753 _netns_is_port_used(gns: netns_global_wild[
754 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V4)], curr_res: res, port) ||
755 _netns_is_port_used(gns: netns_global_wild[
756 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V6)], curr_res: res, port) ||
757 _netns_is_port_used(gns: netns_global_non_wild[
758 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V4)], curr_res: res, port) ||
759 _netns_is_port_used(gns: netns_global_non_wild[
760 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V6)], curr_res: res, port)) {
761 SK_ERR("ERROR - Listener got ADDRINUSE");
762 err = EADDRINUSE;
763 }
764 break;
765
766 case NETNS_BSD:
767 case NETNS_PF:
768 if (NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0 ||
769 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0) {
770 SK_ERR("ERROR - %s got ADDRINUSE",
771 ((flags & NETNS_OWNER_MASK) == NETNS_PF) ?
772 "PF" : "BSD");
773 err = EADDRINUSE;
774 }
775 break;
776
777 default:
778 panic("_netns_reserve_common: invalid owner 0x%x",
779 flags & NETNS_OWNER_MASK);
780 /* NOTREACHED */
781 __builtin_unreachable();
782 }
783 }
784
785done:
786 ASSERT(res != NULL);
787 if (__probable(err == 0)) {
788 NETNS_REF_COUNT(res, flags)++;
789 /* Check for wrap around */
790 VERIFY(NETNS_REF_COUNT(res, flags) != 0);
791 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
792 NS_VERB_PROTO(namespace->ns_proto),
793 "post: %s:%s:%d err %d // flags 0x%x // refs %d sky, "
794 "%d ls, %d bsd %d pf",
795 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
796 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
797 PROTO_STR(namespace->ns_proto), port, err, flags,
798 NETNS_REF_COUNT(res, NETNS_SKYWALK),
799 NETNS_REF_COUNT(res, NETNS_LISTENER),
800 NETNS_REF_COUNT(res, NETNS_BSD),
801 NETNS_REF_COUNT(res, NETNS_PF));
802 } else {
803 if (exist == NULL) {
804 RB_REMOVE(ns_reservation_tree,
805 &namespace->ns_reservations, res);
806 namespace->ns_n_reservations--;
807 netns_ns_reservation_free(res);
808 }
809 }
810 return err;
811}
812
813/*
814 * Internal shared code to release ports within a specific namespace.
815 */
816static void
817_netns_release_common(struct ns *namespace, in_port_t port, uint32_t flags)
818{
819 struct ns_reservation *res;
820 uint32_t refs;
821 int i;
822#if SK_LOG
823 char tmp_ip_str[MAX_IPv6_STR_LEN];
824#endif /* SK_LOG */
825
826 NETNS_LOCK_ASSERT_HELD();
827
828 res = ns_reservation_tree_find(tree: &namespace->ns_reservations, port);
829 if (res == NULL) {
830 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
831 NS_VERB_PROTO(namespace->ns_proto),
832 "ERROR %s:%s:%d // flags 0x%x // not found",
833 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
834 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
835 PROTO_STR(namespace->ns_proto), port, flags);
836 VERIFY(res != NULL);
837 }
838
839 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
840 NS_VERB_PROTO(namespace->ns_proto),
841 "%s:%s:%d // flags 0x%x // refs %d sky, %d ls, %d bsd, %d pf",
842 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
843 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
844 PROTO_STR(namespace->ns_proto), port, flags,
845 NETNS_REF_COUNT(res, NETNS_SKYWALK),
846 NETNS_REF_COUNT(res, NETNS_LISTENER),
847 NETNS_REF_COUNT(res, NETNS_BSD),
848 NETNS_REF_COUNT(res, NETNS_PF));
849
850 /* Release reservation */
851 VERIFY(NETNS_REF_COUNT(res, flags) > 0);
852 NETNS_REF_COUNT(res, flags) -= 1;
853
854 /* Clean up memory, if appropriate */
855 for (i = 0, refs = 0; i <= NETNS_OWNER_MAX && refs == 0; i++) {
856 refs |= res->nsr_refs[i];
857 }
858 if (refs == 0) {
859 RB_REMOVE(ns_reservation_tree, &namespace->ns_reservations,
860 res);
861 namespace->ns_n_reservations--;
862 NETNS_LOCK_CONVERT();
863 netns_ns_reservation_free(res);
864 netns_ns_cleanup(namespace);
865 }
866}
867
868__attribute__((always_inline))
869static inline void
870netns_init_global_ns(struct ns **global_ptr, uint8_t proto, uint8_t addrlen)
871{
872 struct ns *namespace;
873
874 namespace = *global_ptr = netns_ns_alloc(how: Z_WAITOK);
875 memset(s: namespace->ns_addr, c: 0xFF, n: addrlen);
876 namespace->ns_addr_len = addrlen;
877 namespace->ns_proto = proto;
878 namespace->ns_is_freeable = 0;
879}
880
881__attribute__((always_inline))
882static inline void
883netns_clear_ifnet(struct ns_token *nstoken)
884{
885#if SK_LOG
886 char tmp_ip_str[MAX_IPv6_STR_LEN];
887#endif /* SK_LOG */
888
889 NETNS_LOCK_ASSERT_HELD();
890
891 if (nstoken->nt_ifp != NULL) {
892 SLIST_REMOVE(&nstoken->nt_ifp->if_netns_tokens, nstoken,
893 ns_token, nt_ifp_link);
894
895 SK_DF(NS_VERB_IP(nstoken->nt_addr_len) |
896 NS_VERB_PROTO(nstoken->nt_proto),
897 "%s:%s:%d // removed from ifnet %d",
898 inet_ntop(LEN_TO_AF(nstoken->nt_addr_len),
899 nstoken->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
900 PROTO_STR(nstoken->nt_proto), nstoken->nt_port,
901 nstoken->nt_ifp->if_index);
902
903 NETNS_LOCK_CONVERT();
904 ifnet_decr_iorefcnt(nstoken->nt_ifp);
905 nstoken->nt_ifp = NULL;
906 } else {
907 SLIST_REMOVE(&netns_unbound_tokens, nstoken, ns_token,
908 nt_ifp_link);
909 }
910}
911
912/*
913 * Internal shared code to perform a port[-range] reservation, along with all
914 * the boilerplate and sanity checks expected for a call coming in from the
915 * surrounding kernel code.
916 */
917static int
918_netns_reserve_kpi_common(struct ns *ns, netns_token *token, uint32_t *addr,
919 uint8_t addr_len, uint8_t proto, in_port_t *port, uint32_t flags,
920 struct ns_flow_info *nfi)
921{
922 boolean_t ns_want_cleanup = (ns == NULL);
923 struct ns_token *nt;
924 int err = 0;
925 in_port_t hport;
926#if SK_LOG
927 char tmp_ip_str[MAX_IPv6_STR_LEN];
928#endif /* SK_LOG */
929 struct ifnet *ifp = (nfi != NULL) ? nfi->nfi_ifp : NULL;
930
931 NETNS_LOCK_ASSERT_HELD();
932
933 hport = ntohs(*port);
934
935 VERIFY((flags & NETNS_OWNER_MASK) <= NETNS_OWNER_MAX);
936 VERIFY(addr_len == sizeof(struct in_addr) ||
937 addr_len == sizeof(struct in6_addr));
938 VERIFY(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
939 VERIFY(hport != 0);
940
941 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
942 "reserving %s:%s:%d // flags 0x%x // token %svalid",
943 inet_ntop(LEN_TO_AF(addr_len), addr, tmp_ip_str,
944 sizeof(tmp_ip_str)), PROTO_STR(proto), hport, flags,
945 NETNS_TOKEN_VALID(token) ? "" : "in");
946
947 /*
948 * See the documentation for NETNS_PRERESERVED in netns.h for an
949 * explanation of this block.
950 */
951 if (NETNS_TOKEN_VALID(token)) {
952 if (flags & NETNS_PRERESERVED) {
953 nt = *token;
954 VERIFY(nt->nt_addr_len == addr_len);
955 VERIFY(memcmp(nt->nt_addr, addr, addr_len) == 0);
956 VERIFY(nt->nt_proto == proto);
957 VERIFY(nt->nt_port == hport);
958 VERIFY((nt->nt_flags &
959 NETNS_RESERVATION_FLAGS | NETNS_PRERESERVED) ==
960 (flags & NETNS_RESERVATION_FLAGS));
961
962 if ((nt->nt_flags & NETNS_CONFIGURATION_FLAGS) ==
963 (flags & NETNS_CONFIGURATION_FLAGS)) {
964 SK_DF(NS_VERB_IP(nt->nt_addr_len) |
965 NS_VERB_PROTO(nt->nt_proto),
966 "%s:%s:%d // flags 0x%x -> 0x%x",
967 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
968 nt->nt_addr, tmp_ip_str,
969 sizeof(tmp_ip_str)),
970 PROTO_STR(nt->nt_proto),
971 nt->nt_port, nt->nt_flags, flags);
972 nt->nt_flags &= ~NETNS_CONFIGURATION_FLAGS;
973 nt->nt_flags |=
974 flags & NETNS_CONFIGURATION_FLAGS;
975 }
976 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
977 "token was prereserved");
978 goto done;
979 } else {
980 panic("Request to overwrite valid netns token");
981 /* NOTREACHED */
982 __builtin_unreachable();
983 }
984 }
985
986 /*
987 * TODO: Check range against bitmap
988 */
989 if (hport == 0) {
990 /*
991 * Caller request an arbitrary range of ports
992 * TODO: Need to figure out how to allocate
993 * emphemeral ports only.
994 */
995 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
996 "ERROR - wildcard port not yet supported");
997 err = ENOMEM;
998 goto done;
999 }
1000
1001 /*
1002 * Fetch namespace for the specified address/protocol, creating
1003 * a new namespace if necessary.
1004 */
1005 if (ns == NULL) {
1006 ASSERT(ns_want_cleanup);
1007 ns = _netns_get_ns(addr, addr_len, proto, true);
1008 }
1009 if (__improbable(ns == NULL)) {
1010 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1011 "ERROR - couldn't create namespace");
1012 err = ENOMEM;
1013 goto done;
1014 }
1015
1016 /*
1017 * Make a reservation in the namespace
1018 * This will return an error if an incompatible reservation
1019 * already exists.
1020 */
1021 err = _netns_reserve_common(namespace: ns, port: hport, flags);
1022 if (__improbable(err != 0)) {
1023 NETNS_LOCK_CONVERT();
1024 if (ns_want_cleanup) {
1025 netns_ns_cleanup(namespace: ns);
1026 }
1027 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1028 "ERROR - reservation collision");
1029 goto done;
1030 }
1031
1032 if (!_netns_is_wildcard_addr(addr: ns->ns_addr, addr_len)) {
1033 /* Record the reservation in the non-wild namespace */
1034 struct ns *nwns;
1035
1036 nwns = netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto,
1037 addr_len)];
1038 err = _netns_reserve_common(namespace: nwns, port: hport, flags);
1039 if (__improbable(err != 0)) {
1040 /* Need to free the specific namespace entry */
1041 NETNS_LOCK_CONVERT();
1042 _netns_release_common(namespace: ns, port: hport, flags);
1043 if (ns_want_cleanup) {
1044 netns_ns_cleanup(namespace: ns);
1045 }
1046 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1047 "ERROR - reservation collision");
1048 goto done;
1049 }
1050 }
1051
1052 nt = netns_ns_token_alloc(with_nfi: nfi != NULL ? true : false);
1053 ASSERT(nt->nt_ifp == NULL);
1054 _netns_set_ifnet_internal(nt, ifp);
1055
1056 memcpy(dst: nt->nt_addr, src: addr, n: addr_len);
1057 nt->nt_addr_len = addr_len;
1058 nt->nt_proto = proto;
1059 nt->nt_port = hport;
1060 nt->nt_flags = flags;
1061
1062 if (nfi != NULL) {
1063 VERIFY(nt->nt_flow_info != NULL);
1064
1065 memcpy(dst: nt->nt_flow_info, src: nfi, n: sizeof(struct ns_flow_info));
1066 /*
1067 * The local port is passed as a separate argument
1068 */
1069 if (nfi->nfi_laddr.sa.sa_family == AF_INET) {
1070 nt->nt_flow_info->nfi_laddr.sin.sin_port = *port;
1071 } else if (nfi->nfi_laddr.sa.sa_family == AF_INET6) {
1072 nt->nt_flow_info->nfi_laddr.sin6.sin6_port = *port;
1073 }
1074 }
1075 *token = nt;
1076
1077done:
1078 return err;
1079}
1080
1081/*
1082 * Kernel-facing functions
1083 */
1084
1085int
1086netns_init(void)
1087{
1088 VERIFY(__netns_inited == 0);
1089
1090 netns_ns_reservation_size = sizeof(struct ns_reservation);
1091 netns_ns_reservation_cache = skmem_cache_create(NETNS_NS_RESERVATION_ZONE_NAME,
1092 netns_ns_reservation_size, sizeof(uint64_t), NULL, NULL, NULL,
1093 NULL, NULL, 0);
1094 if (netns_ns_reservation_cache == NULL) {
1095 panic("%s: skmem_cache create failed (%s)", __func__,
1096 NETNS_NS_RESERVATION_ZONE_NAME);
1097 /* NOTREACHED */
1098 __builtin_unreachable();
1099 }
1100
1101 netns_ns_token_size = sizeof(struct ns_token);
1102 netns_ns_token_cache = skmem_cache_create(NETNS_NS_TOKEN_ZONE_NAME,
1103 netns_ns_token_size, sizeof(uint64_t), NULL, NULL, NULL, NULL,
1104 NULL, 0);
1105 if (netns_ns_token_cache == NULL) {
1106 panic("%s: skmem_cache create failed (%s)", __func__,
1107 NETNS_NS_TOKEN_ZONE_NAME);
1108 /* NOTREACHED */
1109 __builtin_unreachable();
1110 }
1111
1112 netns_ns_flow_info_size = sizeof(struct ns_flow_info);
1113 netns_ns_flow_info_cache = skmem_cache_create(NETNS_NS_FLOW_INFO_ZONE_NAME,
1114 netns_ns_flow_info_size, sizeof(uint64_t), NULL, NULL, NULL,
1115 NULL, NULL, 0);
1116 if (netns_ns_flow_info_cache == NULL) {
1117 panic("%s: skmem_cache create failed (%s)", __func__,
1118 NETNS_NS_FLOW_INFO_ZONE_NAME);
1119 /* NOTREACHED */
1120 __builtin_unreachable();
1121 }
1122
1123 SLIST_INIT(&netns_unbound_tokens);
1124 SLIST_INIT(&netns_all_tokens);
1125
1126 netns_n_namespaces = 0;
1127 RB_INIT(&netns_namespaces);
1128
1129 SK_D("initializing global namespaces");
1130
1131 netns_init_global_ns(
1132 global_ptr: &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_TCP,
1133 NETNS_ADDRLEN_V4)], IPPROTO_TCP, addrlen: sizeof(struct in_addr));
1134
1135 netns_init_global_ns(
1136 global_ptr: &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_UDP,
1137 NETNS_ADDRLEN_V4)], IPPROTO_UDP, addrlen: sizeof(struct in_addr));
1138
1139 netns_init_global_ns(
1140 global_ptr: &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_TCP,
1141 NETNS_ADDRLEN_V6)], IPPROTO_TCP, addrlen: sizeof(struct in6_addr));
1142
1143 netns_init_global_ns(
1144 global_ptr: &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_UDP,
1145 NETNS_ADDRLEN_V6)], IPPROTO_UDP, addrlen: sizeof(struct in6_addr));
1146
1147 /* Done */
1148
1149 __netns_inited = 1;
1150 sk_features |= SK_FEATURE_NETNS;
1151
1152 SK_D("initialized netns");
1153
1154 return 0;
1155}
1156
1157void
1158netns_uninit(void)
1159{
1160 if (__netns_inited == 1) {
1161 struct ns *namespace;
1162 struct ns *temp_namespace;
1163 int i;
1164
1165 RB_FOREACH_SAFE(namespace, netns_namespaces_tree,
1166 &netns_namespaces, temp_namespace) {
1167 RB_REMOVE(netns_namespaces_tree, &netns_namespaces,
1168 namespace);
1169 netns_n_namespaces--;
1170 netns_ns_free(namespace);
1171 }
1172
1173 for (i = 0; i < NETNS_N_GLOBAL; i++) {
1174 netns_ns_free(namespace: netns_global_non_wild[i]);
1175 }
1176
1177 if (netns_ns_flow_info_cache != NULL) {
1178 skmem_cache_destroy(netns_ns_flow_info_cache);
1179 netns_ns_flow_info_cache = NULL;
1180 }
1181 if (netns_ns_token_cache != NULL) {
1182 skmem_cache_destroy(netns_ns_token_cache);
1183 netns_ns_token_cache = NULL;
1184 }
1185 if (netns_ns_reservation_cache != NULL) {
1186 skmem_cache_destroy(netns_ns_reservation_cache);
1187 netns_ns_reservation_cache = NULL;
1188 }
1189
1190 __netns_inited = 0;
1191 sk_features &= ~SK_FEATURE_NETNS;
1192
1193 SK_D("uninitialized netns");
1194 }
1195}
1196
1197void
1198netns_reap_caches(boolean_t purge)
1199{
1200 /* these aren't created unless netns is enabled */
1201 if (netns_ns_token_cache != NULL) {
1202 skmem_cache_reap_now(netns_ns_token_cache, purge);
1203 }
1204 if (netns_ns_reservation_cache != NULL) {
1205 skmem_cache_reap_now(netns_ns_reservation_cache, purge);
1206 }
1207 if (netns_ns_flow_info_cache != NULL) {
1208 skmem_cache_reap_now(netns_ns_flow_info_cache, purge);
1209 }
1210}
1211
1212boolean_t
1213netns_is_enabled(void)
1214{
1215 return __netns_inited == 1;
1216}
1217
1218int
1219netns_reserve(netns_token *token, uint32_t *addr, uint8_t addr_len,
1220 uint8_t proto, in_port_t port, uint32_t flags, struct ns_flow_info *nfi)
1221{
1222 int err = 0;
1223#if SK_LOG
1224 char tmp_ip_str[MAX_IPv6_STR_LEN];
1225#endif /* SK_LOG */
1226
1227 if (__netns_inited == 0) {
1228 *token = NULL;
1229 return err;
1230 }
1231
1232 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1233 SK_ERR("netns doesn't support non TCP/UDP protocol");
1234 return ENOTSUP;
1235 }
1236
1237 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1238 "%s:%s:%d // flags 0x%x", inet_ntop(LEN_TO_AF(addr_len), addr,
1239 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto), ntohs(port),
1240 flags);
1241
1242 /*
1243 * Check wether the process is allowed to bind to a restricted port
1244 */
1245 if (!current_task_can_use_restricted_in_port(port,
1246 protocol: proto, port_flags: flags)) {
1247 *token = NULL;
1248 return EADDRINUSE;
1249 }
1250
1251 NETNS_LOCK_SPIN();
1252 err = _netns_reserve_kpi_common(NULL, token, addr, addr_len,
1253 proto, port: &port, flags, nfi);
1254 NETNS_UNLOCK();
1255
1256 return err;
1257}
1258
1259/* Import net.inet.{tcp,udp}.randomize_ports sysctls */
1260extern int udp_use_randomport;
1261extern int tcp_use_randomport;
1262
1263int
1264netns_reserve_ephemeral(netns_token *token, uint32_t *addr, uint8_t addr_len,
1265 uint8_t proto, in_port_t *port, uint32_t flags, struct ns_flow_info *nfi)
1266{
1267 int err = 0;
1268 in_port_t first = (in_port_t)ipport_firstauto;
1269 in_port_t last = (in_port_t)ipport_lastauto;
1270 in_port_t rand_port;
1271 in_port_t last_port;
1272 in_port_t n_last_port;
1273 struct ns *namespace;
1274 boolean_t count_up = true;
1275 boolean_t use_randomport = (proto == IPPROTO_TCP) ?
1276 tcp_use_randomport : udp_use_randomport;
1277#if SK_LOG
1278 char tmp_ip_str[MAX_IPv6_STR_LEN];
1279#endif /* SK_LOG */
1280
1281 if (__netns_inited == 0) {
1282 *token = NULL;
1283 return err;
1284 }
1285
1286 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1287 SK_ERR("netns doesn't support non TCP/UDP protocol");
1288 return ENOTSUP;
1289 }
1290
1291 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1292 "%s:%s:%d // flags 0x%x", inet_ntop(LEN_TO_AF(addr_len), addr,
1293 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto), ntohs(*port),
1294 flags);
1295
1296 NETNS_LOCK_SPIN();
1297
1298 namespace = _netns_get_ns(addr, addr_len, proto, true);
1299 if (namespace == NULL) {
1300 err = ENOMEM;
1301 NETNS_UNLOCK();
1302 return err;
1303 }
1304
1305 if (proto == IPPROTO_UDP) {
1306 if (UINT16_MAX - namespace->ns_n_reservations <
1307 NETNS_NS_UDP_EPHEMERAL_RESERVE) {
1308 SK_ERR("UDP ephemeral port not available"
1309 "(less than 4096 UDP ports left)");
1310 err = EADDRNOTAVAIL;
1311 NETNS_UNLOCK();
1312 return err;
1313 }
1314 }
1315
1316 if (first == last) {
1317 rand_port = first;
1318 } else {
1319 if (use_randomport) {
1320 NETNS_LOCK_CONVERT();
1321 read_frandom(buffer: &rand_port, numBytes: sizeof(rand_port));
1322
1323 if (first > last) {
1324 rand_port = last + (rand_port %
1325 (first - last));
1326 count_up = false;
1327 } else {
1328 rand_port = first + (rand_port %
1329 (last - first));
1330 }
1331 } else {
1332 if (first > last) {
1333 rand_port =
1334 namespace->ns_last_ephemeral_port_down - 1;
1335 if (rand_port < last || rand_port > first) {
1336 rand_port = last;
1337 }
1338 count_up = false;
1339 } else {
1340 rand_port =
1341 namespace->ns_last_ephemeral_port_up + 1;
1342 if (rand_port < first || rand_port > last) {
1343 rand_port = first;
1344 }
1345 }
1346 }
1347 }
1348 last_port = rand_port;
1349 n_last_port = htons(last_port);
1350
1351 while (true) {
1352 if (n_last_port == 0) {
1353 SK_ERR("ephemeral port search range includes 0");
1354 err = EINVAL;
1355 break;
1356 }
1357
1358 /*
1359 * Skip if this is a restricted port as we do not want to
1360 * restricted ports as ephemeral
1361 */
1362 if (!IS_RESTRICTED_IN_PORT(n_last_port)) {
1363 err = _netns_reserve_kpi_common(ns: namespace, token, addr,
1364 addr_len, proto, port: &n_last_port, flags, nfi);
1365 if (err == 0 || err != EADDRINUSE) {
1366 break;
1367 }
1368 }
1369 if (count_up) {
1370 last_port++;
1371 if (last_port < first || last_port > last) {
1372 last_port = first;
1373 }
1374 } else {
1375 last_port--;
1376 if (last_port < last || last_port > first) {
1377 last_port = last;
1378 }
1379 }
1380 n_last_port = htons(last_port);
1381
1382 if (last_port == rand_port || first == last) {
1383 SK_ERR("couldn't find free ephemeral port");
1384 err = EADDRNOTAVAIL;
1385 break;
1386 }
1387 }
1388
1389 if (err == 0) {
1390 *port = n_last_port;
1391 if (count_up) {
1392 namespace->ns_last_ephemeral_port_up = last_port;
1393 } else {
1394 namespace->ns_last_ephemeral_port_down = last_port;
1395 }
1396 } else {
1397 netns_ns_cleanup(namespace);
1398 }
1399
1400 NETNS_UNLOCK();
1401
1402 return err;
1403}
1404
1405void
1406netns_release(netns_token *token)
1407{
1408 struct ns *ns;
1409 struct ns_token *nt;
1410 uint8_t proto, addr_len;
1411#if SK_LOG
1412 char tmp_ip_str[MAX_IPv6_STR_LEN];
1413#endif /* SK_LOG */
1414
1415 if (!NETNS_TOKEN_VALID(token)) {
1416 return;
1417 }
1418
1419 if (__netns_inited == 0) {
1420 *token = NULL;
1421 return;
1422 }
1423
1424 NETNS_LOCK_SPIN();
1425
1426 nt = *token;
1427 *token = NULL;
1428
1429 VERIFY((nt->nt_flags & NETNS_OWNER_MASK) <= NETNS_OWNER_MAX);
1430 VERIFY(nt->nt_addr_len == sizeof(struct in_addr) ||
1431 nt->nt_addr_len == sizeof(struct in6_addr));
1432 VERIFY(nt->nt_proto == IPPROTO_TCP || nt->nt_proto == IPPROTO_UDP);
1433
1434 addr_len = nt->nt_addr_len;
1435 proto = nt->nt_proto;
1436
1437 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1438 "releasing %s:%s:%d",
1439 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1440 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto),
1441 nt->nt_port);
1442
1443 if (!_netns_is_wildcard_addr(addr: nt->nt_addr, addr_len)) {
1444 /* Remove from global non-wild namespace */
1445
1446 ns = netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto,
1447 addr_len)];
1448 VERIFY(ns != NULL);
1449
1450 _netns_release_common(namespace: ns, port: nt->nt_port, flags: nt->nt_flags);
1451 }
1452
1453 ns = _netns_get_ns(addr: nt->nt_addr, addr_len, proto, false);
1454 VERIFY(ns != NULL);
1455 _netns_release_common(namespace: ns, port: nt->nt_port, flags: nt->nt_flags);
1456
1457 netns_clear_ifnet(nstoken: nt);
1458 netns_ns_token_free(token: nt);
1459
1460 NETNS_UNLOCK();
1461}
1462
1463int
1464netns_change_addr(netns_token *token, uint32_t *addr, uint8_t addr_len)
1465{
1466 int err = 0;
1467 struct ns *old_namespace;
1468 struct ns *new_namespace;
1469 struct ns *global_namespace;
1470 struct ns_token *nt;
1471 uint8_t proto;
1472#if SK_LOG
1473 char tmp_ip_str_1[MAX_IPv6_STR_LEN];
1474 char tmp_ip_str_2[MAX_IPv6_STR_LEN];
1475#endif /* SK_LOG */
1476
1477 if (__netns_inited == 0) {
1478 return 0;
1479 }
1480
1481 NETNS_LOCK();
1482
1483 VERIFY(NETNS_TOKEN_VALID(token));
1484
1485 nt = *token;
1486
1487 VERIFY((nt->nt_flags & NETNS_OWNER_MASK) == NETNS_BSD);
1488 VERIFY(nt->nt_addr_len == sizeof(struct in_addr) ||
1489 nt->nt_addr_len == sizeof(struct in6_addr));
1490 VERIFY(nt->nt_proto == IPPROTO_TCP || nt->nt_proto == IPPROTO_UDP);
1491
1492 proto = nt->nt_proto;
1493
1494#if SK_LOG
1495 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1496 tmp_ip_str_1, sizeof(tmp_ip_str_1));
1497 inet_ntop(LEN_TO_AF(addr_len), addr, tmp_ip_str_2,
1498 sizeof(tmp_ip_str_2));
1499#endif /* SK_LOG */
1500 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1501 "changing address for %s:%d from %s to %s",
1502 PROTO_STR(proto), nt->nt_port, tmp_ip_str_1,
1503 tmp_ip_str_2);
1504
1505 if (nt->nt_addr_len == addr_len &&
1506 memcmp(s1: nt->nt_addr, s2: addr, n: nt->nt_addr_len) == 0) {
1507 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1508 "address didn't change, exiting early");
1509 goto done;
1510 }
1511
1512 old_namespace = _netns_get_ns(addr: nt->nt_addr, addr_len: nt->nt_addr_len, proto,
1513 false);
1514 VERIFY(old_namespace != NULL);
1515
1516 new_namespace = _netns_get_ns(addr, addr_len, proto, true);
1517 if (new_namespace == NULL) {
1518 err = ENOMEM;
1519 goto done;
1520 }
1521
1522 /* Acquire reservation in new namespace */
1523 if ((err = _netns_reserve_common(namespace: new_namespace, port: nt->nt_port,
1524 flags: nt->nt_flags))) {
1525 NETNS_LOCK_CONVERT();
1526 netns_ns_cleanup(namespace: new_namespace);
1527 SK_ERR("ERROR - reservation collision under new namespace");
1528 goto done;
1529 }
1530
1531 /* Release from old namespace */
1532 _netns_release_common(namespace: old_namespace, port: nt->nt_port, flags: nt->nt_flags);
1533
1534 if (!_netns_is_wildcard_addr(addr: nt->nt_addr, addr_len: nt->nt_addr_len)) {
1535 /*
1536 * Old address is non-wildcard.
1537 * Remove old reservation from global non-wild namespace
1538 */
1539 global_namespace = netns_global_non_wild[
1540 NETNS_NS_GLOBAL_IDX(proto, nt->nt_addr_len)];
1541 VERIFY(global_namespace != NULL);
1542
1543 _netns_release_common(namespace: global_namespace, port: nt->nt_port,
1544 flags: nt->nt_flags);
1545 }
1546
1547 if (!_netns_is_wildcard_addr(addr, addr_len)) {
1548 /*
1549 * New address is non-wildcard.
1550 * Record new reservation in global non-wild namespace
1551 */
1552 global_namespace = netns_global_non_wild[
1553 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
1554 VERIFY(global_namespace != NULL);
1555
1556 if ((err = _netns_reserve_common(namespace: global_namespace,
1557 port: nt->nt_port, flags: nt->nt_flags)) != 0) {
1558 SK_ERR("ERROR - reservation collision under new global namespace");
1559 /* XXX: Should not fail. Maybe assert instead */
1560 goto done;
1561 }
1562 }
1563
1564 memcpy(dst: nt->nt_addr, src: addr, n: addr_len);
1565 nt->nt_addr_len = addr_len;
1566
1567done:
1568 NETNS_UNLOCK();
1569 return err;
1570}
1571
1572static void
1573_netns_set_ifnet_internal(struct ns_token *nt, struct ifnet *ifp)
1574{
1575#if SK_LOG
1576 char tmp_ip_str[MAX_IPv6_STR_LEN];
1577#endif /* SK_LOG */
1578
1579 NETNS_LOCK_ASSERT_HELD();
1580
1581 if (ifp != NULL && ifnet_is_attached(ifp, refio: 1)) {
1582 nt->nt_ifp = ifp;
1583 SLIST_INSERT_HEAD(&ifp->if_netns_tokens, nt, nt_ifp_link);
1584
1585 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1586 "%s:%s:%d // added to ifnet %d",
1587 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
1588 nt->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
1589 PROTO_STR(nt->nt_proto), nt->nt_port,
1590 ifp->if_index);
1591 } else {
1592 SLIST_INSERT_HEAD(&netns_unbound_tokens, nt, nt_ifp_link);
1593 }
1594}
1595
1596void
1597netns_set_ifnet(netns_token *token, ifnet_t ifp)
1598{
1599 struct ns_token *nt;
1600#if SK_LOG
1601 char tmp_ip_str[MAX_IPv6_STR_LEN];
1602#endif /* SK_LOG */
1603
1604 if (__netns_inited == 0) {
1605 return;
1606 }
1607
1608 NETNS_LOCK();
1609
1610 VERIFY(NETNS_TOKEN_VALID(token));
1611
1612 nt = *token;
1613
1614 if (nt->nt_ifp == ifp) {
1615 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1616 "%s:%s:%d // ifnet already %d, exiting early",
1617 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
1618 nt->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
1619 PROTO_STR(nt->nt_proto), nt->nt_port,
1620 ifp ? ifp->if_index : -1);
1621 NETNS_UNLOCK();
1622 return;
1623 }
1624
1625 netns_clear_ifnet(nstoken: nt);
1626
1627 _netns_set_ifnet_internal(nt, ifp);
1628
1629 NETNS_UNLOCK();
1630}
1631
1632void
1633netns_ifnet_detach(ifnet_t ifp)
1634{
1635 struct ns_token *token, *tmp_token;
1636
1637 if (__netns_inited == 0) {
1638 return;
1639 }
1640
1641 NETNS_LOCK();
1642
1643 SLIST_FOREACH_SAFE(token, &ifp->if_netns_tokens, nt_ifp_link,
1644 tmp_token) {
1645 netns_clear_ifnet(nstoken: token);
1646 SLIST_INSERT_HEAD(&netns_unbound_tokens, token, nt_ifp_link);
1647 }
1648
1649 NETNS_UNLOCK();
1650}
1651
1652static void
1653_netns_set_state(netns_token *token, uint32_t state)
1654{
1655 struct ns_token *nt;
1656#if SK_LOG
1657 char tmp_ip_str[MAX_IPv6_STR_LEN];
1658#endif /* SK_LOG */
1659
1660 if (__netns_inited == 0) {
1661 return;
1662 }
1663
1664 NETNS_LOCK();
1665 VERIFY(NETNS_TOKEN_VALID(token));
1666
1667 nt = *token;
1668 nt->nt_state |= state;
1669
1670 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1671 "%s:%s:%d // state 0x%b",
1672 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1673 tmp_ip_str, sizeof(tmp_ip_str)),
1674 PROTO_STR(nt->nt_proto), nt->nt_port, state, NETNS_STATE_BITS);
1675
1676 NETNS_UNLOCK();
1677}
1678
1679void
1680netns_half_close(netns_token *token)
1681{
1682 _netns_set_state(token, NETNS_STATE_HALFCLOSED);
1683}
1684
1685void
1686netns_withdraw(netns_token *token)
1687{
1688 _netns_set_state(token, NETNS_STATE_WITHDRAWN);
1689}
1690
1691int
1692netns_get_flow_info(netns_token *token,
1693 struct ns_flow_info *nfi)
1694{
1695 if (__netns_inited == 0) {
1696 return ENOTSUP;
1697 }
1698
1699 NETNS_LOCK();
1700 if (!NETNS_TOKEN_VALID(token) ||
1701 nfi == NULL) {
1702 NETNS_UNLOCK();
1703 return EINVAL;
1704 }
1705
1706 struct ns_token *nt = *token;
1707 if (nt->nt_flow_info == NULL) {
1708 NETNS_UNLOCK();
1709 return ENOENT;
1710 }
1711
1712 memcpy(dst: nfi, src: nt->nt_flow_info, n: sizeof(struct ns_flow_info));
1713 NETNS_UNLOCK();
1714
1715 return 0;
1716}
1717
1718void
1719netns_change_flags(netns_token *token, uint32_t set_flags,
1720 uint32_t clear_flags)
1721{
1722 struct ns_token *nt;
1723#if SK_LOG
1724 char tmp_ip_str[MAX_IPv6_STR_LEN];
1725#endif /* SK_LOG */
1726
1727 if (__netns_inited == 0) {
1728 return;
1729 }
1730
1731 NETNS_LOCK();
1732
1733 VERIFY(NETNS_TOKEN_VALID(token));
1734
1735 nt = *token;
1736
1737 VERIFY(!((set_flags | clear_flags) & NETNS_RESERVATION_FLAGS));
1738 /* TODO: verify set and clear flags don't overlap? */
1739
1740 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1741 "%s:%s:%d // flags 0x%x -> 0x%x",
1742 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1743 tmp_ip_str, sizeof(tmp_ip_str)),
1744 PROTO_STR(nt->nt_proto), nt->nt_port, nt->nt_flags,
1745 nt->nt_flags | set_flags & ~clear_flags);
1746
1747 nt->nt_flags |= set_flags;
1748 nt->nt_flags &= ~clear_flags;
1749
1750 NETNS_UNLOCK();
1751}
1752
1753/*
1754 * Port offloading KPI
1755 */
1756static inline void
1757netns_local_port_scan_flow_entry(struct flow_entry *fe, protocol_family_t protocol,
1758 u_int32_t flags, u_int8_t *bitfield)
1759{
1760 struct ns_token *token;
1761 boolean_t iswildcard = false;
1762
1763 if (fe == NULL) {
1764 return;
1765 }
1766
1767 if (fe->fe_flags & FLOWENTF_EXTRL_PORT) {
1768 return;
1769 }
1770
1771 token = fe->fe_port_reservation;
1772 if (token == NULL) {
1773 return;
1774 }
1775
1776 /*
1777 * We are only interested in active flows over skywalk channels
1778 */
1779 if ((token->nt_flags & NETNS_OWNER_MASK) != NETNS_SKYWALK) {
1780 return;
1781 }
1782
1783 if (token->nt_state & NETNS_STATE_WITHDRAWN) {
1784 return;
1785 }
1786
1787 if (!(flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) &&
1788 (flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) &&
1789 (token->nt_state & NETNS_STATE_HALFCLOSED)) {
1790 return;
1791 }
1792
1793 VERIFY(token->nt_addr_len == sizeof(struct in_addr) ||
1794 token->nt_addr_len == sizeof(struct in6_addr));
1795
1796 if (token->nt_addr_len == sizeof(struct in_addr)) {
1797 if (protocol == PF_INET6) {
1798 return;
1799 }
1800
1801 iswildcard = token->nt_inaddr.s_addr == INADDR_ANY;
1802 } else if (token->nt_addr_len == sizeof(struct in6_addr)) {
1803 if (protocol == PF_INET) {
1804 return;
1805 }
1806
1807 iswildcard = IN6_IS_ADDR_UNSPECIFIED(
1808 &token->nt_in6addr);
1809 }
1810 if (!(flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) && iswildcard) {
1811 return;
1812 }
1813
1814 if ((flags & IFNET_GET_LOCAL_PORTS_TCPONLY) &&
1815 token->nt_proto == IPPROTO_UDP) {
1816 return;
1817 }
1818 if ((flags & IFNET_GET_LOCAL_PORTS_UDPONLY) &&
1819 token->nt_proto == IPPROTO_TCP) {
1820 return;
1821 }
1822
1823 if ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) &&
1824 !(token->nt_flags & NETNS_RECVANYIF)) {
1825 return;
1826 }
1827
1828 if ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) &&
1829 !(token->nt_flags & NETNS_EXTBGIDLE)) {
1830 return;
1831 }
1832
1833 if (token->nt_ifp != NULL && (token->nt_ifp->if_eflags & IFEF_AWDL) != 0) {
1834 struct flow_route *fr = fe->fe_route;
1835
1836 if (fr == NULL || fr->fr_rt_dst == NULL ||
1837 (fr->fr_rt_dst->rt_flags & (RTF_UP | RTF_CONDEMNED)) != RTF_UP) {
1838#if DEBUG || DEVELOPMENT
1839 char lbuf[MAX_IPv6_STR_LEN + 6] = {};
1840 char fbuf[MAX_IPv6_STR_LEN + 6] = {};
1841 in_port_t lport;
1842 in_port_t fport;
1843 char pname[MAXCOMLEN + 1];
1844 const struct ns_flow_info *nfi = token->nt_flow_info;
1845
1846 proc_name(nfi->nfi_owner_pid, pname, sizeof(pname));
1847
1848 if (protocol == PF_INET) {
1849 inet_ntop(PF_INET, &nfi->nfi_laddr.sin.sin_addr,
1850 lbuf, sizeof(lbuf));
1851 inet_ntop(PF_INET, &nfi->nfi_faddr.sin.sin_addr,
1852 fbuf, sizeof(fbuf));
1853 lport = nfi->nfi_laddr.sin.sin_port;
1854 fport = nfi->nfi_faddr.sin.sin_port;
1855 } else {
1856 inet_ntop(PF_INET6, &nfi->nfi_laddr.sin6.sin6_addr.s6_addr,
1857 lbuf, sizeof(lbuf));
1858 inet_ntop(PF_INET6, &nfi->nfi_faddr.sin6.sin6_addr,
1859 fbuf, sizeof(fbuf));
1860 lport = nfi->nfi_laddr.sin6.sin6_port;
1861 fport = nfi->nfi_faddr.sin6.sin6_port;
1862 }
1863
1864 os_log(OS_LOG_DEFAULT,
1865 "netns_local_port_scan_flow_entry: route is down %s %s:%u %s:%u ifp %s proc %s:%d",
1866 token->nt_proto == IPPROTO_TCP ? "tcp" : "udp",
1867 lbuf, ntohs(lport), fbuf, ntohs(fport),
1868 token->nt_ifp->if_xname, pname, nfi->nfi_owner_pid);
1869#endif /* DEBUG || DEVELOPMENT */
1870
1871 return;
1872 }
1873 }
1874
1875#if DEBUG || DEVELOPMENT
1876 if (!(flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) &&
1877 (token->nt_flags & NETNS_NOWAKEFROMSLEEP)) {
1878 char lbuf[MAX_IPv6_STR_LEN + 6] = {};
1879 char fbuf[MAX_IPv6_STR_LEN + 6] = {};
1880 in_port_t lport;
1881 in_port_t fport;
1882 char pname[MAXCOMLEN + 1];
1883 const struct ns_flow_info *nfi = token->nt_flow_info;
1884
1885 proc_name(nfi->nfi_owner_pid, pname, sizeof(pname));
1886
1887 if (protocol == PF_INET) {
1888 inet_ntop(PF_INET, &nfi->nfi_laddr.sin.sin_addr,
1889 lbuf, sizeof(lbuf));
1890 inet_ntop(PF_INET, &nfi->nfi_faddr.sin.sin_addr,
1891 fbuf, sizeof(fbuf));
1892 lport = nfi->nfi_laddr.sin.sin_port;
1893 fport = nfi->nfi_faddr.sin.sin_port;
1894 } else {
1895 inet_ntop(PF_INET6, &nfi->nfi_laddr.sin6.sin6_addr.s6_addr,
1896 lbuf, sizeof(lbuf));
1897 inet_ntop(PF_INET6, &nfi->nfi_faddr.sin6.sin6_addr,
1898 fbuf, sizeof(fbuf));
1899 lport = nfi->nfi_laddr.sin6.sin6_port;
1900 fport = nfi->nfi_faddr.sin6.sin6_port;
1901 }
1902
1903 os_log(OS_LOG_DEFAULT,
1904 "netns_local_port_scan_flow_entry: no wake from sleep %s %s:%u %s:%u ifp %s proc %s:%d",
1905 token->nt_proto == IPPROTO_TCP ? "tcp" : "udp",
1906 lbuf, ntohs(lport), fbuf, ntohs(fport),
1907 token->nt_ifp != NULL ? token->nt_ifp->if_xname : "",
1908 pname, nfi->nfi_owner_pid);
1909 }
1910#endif /* DEBUG || DEVELOPMENT */
1911
1912 if (token->nt_ifp != NULL && token->nt_flow_info != NULL) {
1913 /*
1914 * When the flow has "no wake from sleep" option, do not set the port in the bitmap
1915 * except if explicetely requested by the driver.
1916 * We always add the flow to the list of port in order to report spurious wakes
1917 */
1918 if ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) ||
1919 (token->nt_flags & NETNS_NOWAKEFROMSLEEP) == 0) {
1920 bitstr_set(bitfield, token->nt_port);
1921 }
1922 (void) if_ports_used_add_flow_entry(fe, ifindex: token->nt_ifp->if_index,
1923 nfi: token->nt_flow_info, ns_flags: token->nt_flags);
1924 } else {
1925 SK_ERR("%s: unknown owner port %u"
1926 " nt_flags 0x%x ifindex %u nt_flow_info %p\n",
1927 __func__, token->nt_port,
1928 token->nt_flags,
1929 token->nt_ifp != NULL ? token->nt_ifp->if_index : 0,
1930 token->nt_flow_info);
1931 }
1932}
1933
1934static void
1935netns_get_if_local_ports(ifnet_t ifp, protocol_family_t protocol,
1936 u_int32_t flags, u_int8_t *bitfield)
1937{
1938 struct nx_flowswitch *fsw = NULL;
1939
1940 if (ifp == NULL || ifp->if_na == NULL) {
1941 return;
1942 }
1943 /* Ensure that the interface is attached and won't detach */
1944 if (!ifnet_is_attached(ifp, refio: 1)) {
1945 return;
1946 }
1947 fsw = fsw_ifp_to_fsw(ifp);
1948 if (fsw == NULL) {
1949 goto done;
1950 }
1951 FSW_RLOCK(fsw);
1952 NETNS_LOCK();
1953 flow_mgr_foreach_flow(fm: fsw->fsw_flow_mgr, flow_handler: ^(struct flow_entry *_fe) {
1954 netns_local_port_scan_flow_entry(fe: _fe, protocol, flags,
1955 bitfield);
1956 });
1957 NETNS_UNLOCK();
1958 FSW_UNLOCK(fsw);
1959done:
1960 ifnet_decr_iorefcnt(ifp);
1961}
1962
1963errno_t
1964netns_get_local_ports(ifnet_t ifp, protocol_family_t protocol,
1965 u_int32_t flags, u_int8_t *bitfield)
1966{
1967 if (__netns_inited == 0) {
1968 return 0;
1969 }
1970 if (ifp != NULL) {
1971 netns_get_if_local_ports(ifp, protocol, flags, bitfield);
1972 } else {
1973 errno_t error;
1974 ifnet_t *ifp_list;
1975 uint32_t count, i;
1976
1977 error = ifnet_list_get_all(family: IFNET_FAMILY_ANY, interfaces: &ifp_list, count: &count);
1978 if (error != 0) {
1979 os_log_error(OS_LOG_DEFAULT,
1980 "%s: ifnet_list_get_all() failed %d",
1981 __func__, error);
1982 return error;
1983 }
1984 for (i = 0; i < count; i++) {
1985 if (TAILQ_EMPTY(&ifp_list[i]->if_addrhead)) {
1986 continue;
1987 }
1988 netns_get_if_local_ports(ifp: ifp_list[i], protocol, flags,
1989 bitfield);
1990 }
1991 ifnet_list_free(interfaces: ifp_list);
1992 }
1993
1994 return 0;
1995}
1996
1997uint32_t
1998netns_find_anyres_byaddr(struct ifaddr *ifa, uint8_t proto)
1999{
2000 int result = 0;
2001 int ifa_addr_len;
2002 struct ns_token *token;
2003 struct ifnet *ifp = ifa->ifa_ifp;
2004 struct sockaddr *ifa_addr = ifa->ifa_addr;
2005
2006 if (__netns_inited == 0) {
2007 return ENOTSUP;
2008 }
2009
2010 if ((ifa_addr->sa_family != AF_INET) &&
2011 (ifa_addr->sa_family != AF_INET6)) {
2012 return 0;
2013 }
2014
2015 ifa_addr_len = (ifa_addr->sa_family == AF_INET) ?
2016 sizeof(struct in_addr) : sizeof(struct in6_addr);
2017
2018 NETNS_LOCK();
2019
2020 SLIST_FOREACH(token, &ifp->if_netns_tokens, nt_ifp_link) {
2021 if ((token->nt_flags & NETNS_OWNER_MASK) == NETNS_PF) {
2022 continue;
2023 }
2024 if (token->nt_addr_len != ifa_addr_len) {
2025 continue;
2026 }
2027 if (token->nt_proto != proto) {
2028 continue;
2029 }
2030 if (ifa_addr->sa_family == AF_INET) {
2031 if (token->nt_inaddr.s_addr ==
2032 (satosin(ifa->ifa_addr))->sin_addr.s_addr) {
2033 result = 1;
2034 break;
2035 }
2036 } else if (ifa_addr->sa_family == AF_INET6) {
2037 if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa),
2038 &token->nt_in6addr)) {
2039 result = 1;
2040 break;
2041 }
2042 }
2043 }
2044
2045 NETNS_UNLOCK();
2046 return result;
2047}
2048
2049static uint32_t
2050_netns_lookup_ns_n_reservations(uint32_t *addr, uint8_t addr_len, uint8_t proto)
2051{
2052 uint32_t ns_n_reservations = 0;
2053 NETNS_LOCK_SPIN();
2054 struct ns *namespace = _netns_get_ns(addr, addr_len, proto, true);
2055 if (namespace != NULL) {
2056 ns_n_reservations = namespace->ns_n_reservations;
2057 }
2058 NETNS_UNLOCK();
2059 return ns_n_reservations;
2060}
2061
2062uint32_t
2063netns_lookup_reservations_count_in(struct in_addr addr, uint8_t proto)
2064{
2065 return _netns_lookup_ns_n_reservations(addr: &addr.s_addr, addr_len: sizeof(struct in_addr), proto);
2066}
2067
2068uint32_t
2069netns_lookup_reservations_count_in6(struct in6_addr addr, uint8_t proto)
2070{
2071 if (IN6_IS_SCOPE_EMBED(&addr)) {
2072 addr.s6_addr16[1] = 0;
2073 }
2074 return _netns_lookup_ns_n_reservations(addr: &addr.s6_addr32[0], addr_len: sizeof(struct in6_addr), proto);
2075}
2076
2077/*
2078 * Sysctl interface
2079 */
2080
2081static int netns_ctl_dump_all SYSCTL_HANDLER_ARGS;
2082
2083SYSCTL_NODE(_kern_skywalk, OID_AUTO, netns, CTLFLAG_RW | CTLFLAG_LOCKED,
2084 0, "Netns interface");
2085
2086SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, netns,
2087 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
2088 0, 0, netns_ctl_dump_all, "-",
2089 "Namespace contents (struct netns_ctl_dump_header, "
2090 "skywalk/os_stats_private.h)");
2091
2092static int
2093netns_ctl_write_ns(struct sysctl_req *req, struct ns *namespace,
2094 boolean_t is_global)
2095{
2096 struct ns_reservation *res;
2097 struct netns_ctl_dump_header response_header;
2098 struct netns_ctl_dump_record response_record;
2099 int err;
2100
2101 /* Fill out header */
2102 memset(s: &response_header, c: 0, n: sizeof(response_header));
2103 response_header.ncdh_n_records = namespace->ns_n_reservations;
2104 response_header.ncdh_proto = namespace->ns_proto;
2105
2106 if (is_global) {
2107 response_header.ncdh_addr_len = 0;
2108 } else {
2109 response_header.ncdh_addr_len = namespace->ns_addr_len;
2110 }
2111 memcpy(dst: response_header.ncdh_addr, src: namespace->ns_addr,
2112 n: namespace->ns_addr_len);
2113
2114 err = SYSCTL_OUT(req, &response_header, sizeof(response_header));
2115 if (err) {
2116 return err;
2117 }
2118
2119 /* Fill out records */
2120 RB_FOREACH(res, ns_reservation_tree, &namespace->ns_reservations) {
2121 memset(s: &response_record, c: 0, n: sizeof(response_record));
2122 response_record.ncdr_port = res->nsr_port;
2123 response_record.ncdr_port_end = 0;
2124 response_record.ncdr_listener_refs =
2125 NETNS_REF_COUNT(res, NETNS_LISTENER);
2126 response_record.ncdr_skywalk_refs =
2127 NETNS_REF_COUNT(res, NETNS_SKYWALK);
2128 response_record.ncdr_bsd_refs =
2129 NETNS_REF_COUNT(res, NETNS_BSD);
2130 response_record.ncdr_pf_refs =
2131 NETNS_REF_COUNT(res, NETNS_PF);
2132 err = SYSCTL_OUT(req, &response_record,
2133 sizeof(response_record));
2134 if (err) {
2135 return err;
2136 }
2137 }
2138
2139 return 0;
2140}
2141
2142static int
2143netns_ctl_dump_all SYSCTL_HANDLER_ARGS
2144{
2145#pragma unused(oidp, arg1, arg2)
2146 struct ns *namespace;
2147 int i, err = 0;
2148
2149 if (!kauth_cred_issuser(cred: kauth_cred_get())) {
2150 return EPERM;
2151 }
2152
2153 if (__netns_inited == 0) {
2154 return ENOTSUP;
2155 }
2156
2157 NETNS_LOCK();
2158
2159 for (i = 0; i < NETNS_N_GLOBAL; i++) {
2160 err = netns_ctl_write_ns(req, namespace: netns_global_non_wild[i], true);
2161 if (err) {
2162 goto done;
2163 }
2164 }
2165
2166 RB_FOREACH(namespace, netns_namespaces_tree, &netns_namespaces) {
2167 err = netns_ctl_write_ns(req, namespace, false);
2168 if (err) {
2169 goto done;
2170 }
2171 }
2172
2173 /*
2174 * If this is just a request for length, add slop because
2175 * this is dynamically changing data
2176 */
2177 if (req->oldptr == USER_ADDR_NULL) {
2178 req->oldidx += 20 * sizeof(struct netns_ctl_dump_record);
2179 }
2180
2181done:
2182 NETNS_UNLOCK();
2183 return err;
2184}
2185/* CSTYLED */
2186