1/*
2 * Copyright (c) 2019-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <skywalk/os_skywalk_private.h>
29#include <skywalk/nexus/netif/nx_netif.h>
30#include <netinet/ip6.h>
31#include <netinet6/in6_var.h>
32#include <net/pktap.h>
33#include <sys/sdt.h>
34#include <os/log.h>
35
36/* This is just a list for now for simplicity. */
37struct netif_list_flowtable {
38 struct netif_flow_head lft_flow_list;
39};
40
41static netif_flow_lookup_t netif_flow_list_lookup;
42static netif_flow_insert_t netif_flow_list_insert;
43static netif_flow_remove_t netif_flow_list_remove;
44static netif_flow_table_alloc_t netif_flow_list_table_alloc;
45static netif_flow_table_free_t netif_flow_list_table_free;
46
47static netif_flow_match_t netif_flow_ethertype_match;
48static netif_flow_info_t netif_flow_ethertype_info;
49static netif_flow_match_t netif_flow_ipv6_ula_match;
50static netif_flow_info_t netif_flow_ipv6_ula_info;
51
52/*
53 * Two flow table types can share the same internal implementation.
54 * Using a list for now for simplicity.
55 */
56static struct netif_flowtable_ops netif_ethertype_ops = {
57 .nfo_lookup = netif_flow_list_lookup,
58 .nfo_match = netif_flow_ethertype_match,
59 .nfo_info = netif_flow_ethertype_info,
60 .nfo_insert = netif_flow_list_insert,
61 .nfo_remove = netif_flow_list_remove,
62 .nfo_table_alloc = netif_flow_list_table_alloc,
63 .nfo_table_free = netif_flow_list_table_free
64};
65
66static struct netif_flowtable_ops netif_ipv6_ula_ops = {
67 .nfo_lookup = netif_flow_list_lookup,
68 .nfo_match = netif_flow_ipv6_ula_match,
69 .nfo_info = netif_flow_ipv6_ula_info,
70 .nfo_insert = netif_flow_list_insert,
71 .nfo_remove = netif_flow_list_remove,
72 .nfo_table_alloc = netif_flow_list_table_alloc,
73 .nfo_table_free = netif_flow_list_table_free
74};
75
76static int
77netif_flow_get_buf_pkt(struct __kern_packet *pkt, size_t minlen,
78 uint8_t **buf, uint32_t *len)
79{
80 uint8_t *baddr;
81
82 if (pkt->pkt_length < minlen) {
83 return EINVAL;
84 }
85 MD_BUFLET_ADDR_ABS(pkt, baddr);
86 baddr += pkt->pkt_headroom;
87
88 *buf = baddr;
89 *len = pkt->pkt_length;
90 return 0;
91}
92
93static int
94netif_flow_get_buf_mbuf(struct mbuf *m, size_t minlen,
95 uint8_t **buf, uint32_t *len)
96{
97 /*
98 * XXX
99 * Not pulling up here if mbuf is not contiguous.
100 * This does not impact the current use case (ethertype
101 * demux).
102 */
103 if (mbuf_len(mbuf: m) < minlen) {
104 return EINVAL;
105 }
106 *buf = mbuf_data(mbuf: m);
107 *len = m_pktlen(m);
108 return 0;
109}
110
111static int
112netif_flow_get_buf(struct __kern_packet *pkt, size_t minlen,
113 uint8_t **buf, uint32_t *len)
114{
115 ASSERT((pkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
116 if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
117 ASSERT(pkt->pkt_mbuf != NULL);
118 return netif_flow_get_buf_mbuf(m: pkt->pkt_mbuf, minlen, buf, len);
119 }
120 return netif_flow_get_buf_pkt(pkt, minlen, buf, len);
121}
122
123static int
124netif_flow_ethertype_info(struct __kern_packet *pkt,
125 struct netif_flow_desc *fd, uint32_t flags)
126{
127#pragma unused (flags)
128 ether_header_t *eh;
129 uint32_t len;
130 uint16_t etype;
131 uint16_t tag;
132 uint8_t *buf;
133 int err;
134
135 err = netif_flow_get_buf(pkt, minlen: sizeof(ether_header_t), buf: &buf,
136 len: &len);
137 if (err != 0) {
138 DTRACE_SKYWALK2(get__buf__failed, struct __kern_packet *,
139 pkt, int, err);
140 return err;
141 }
142 eh = (ether_header_t *)(void *)buf;
143 if (__probable((((uintptr_t)buf) & 1) == 0)) {
144 etype = eh->ether_type;
145 } else {
146 bcopy(src: &eh->ether_type, dst: &etype, n: sizeof(etype));
147 }
148 etype = ntohs(etype);
149
150 if (kern_packet_get_vlan_tag(SK_PKT2PH(pkt), &tag, NULL) == 0) {
151 DTRACE_SKYWALK2(hw__vlan, struct __kern_packet *, pkt,
152 uint16_t, tag);
153 } else if (etype == ETHERTYPE_VLAN) {
154 struct ether_vlan_header *evh;
155
156 DTRACE_SKYWALK2(encap__vlan, struct __kern_packet *, pkt,
157 uint8_t *, buf);
158 if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
159 struct mbuf *m = pkt->pkt_mbuf;
160
161 if (mbuf_len(mbuf: m) < sizeof(*evh)) {
162 DTRACE_SKYWALK1(mbuf__too__small,
163 struct mbuf *, m);
164 return EINVAL;
165 }
166 } else {
167 if (len < sizeof(*evh)) {
168 DTRACE_SKYWALK2(pkt__too__small,
169 struct __kern_packet *, pkt,
170 uint32_t, len);
171 return EINVAL;
172 }
173 }
174 evh = (struct ether_vlan_header *)eh;
175 if (__probable((((uintptr_t)evh) & 1) == 0)) {
176 tag = evh->evl_tag;
177 etype = evh->evl_proto;
178 } else {
179 bcopy(src: &evh->evl_tag, dst: &tag, n: sizeof(tag));
180 bcopy(src: &evh->evl_proto, dst: &etype, n: sizeof(etype));
181 }
182 tag = ntohs(tag);
183 etype = ntohs(etype);
184 } else {
185 tag = 0;
186 }
187 /* Only accept priority tagged packets */
188 if (EVL_VLANOFTAG(tag) != 0) {
189 DTRACE_SKYWALK2(vlan__non__zero,
190 struct __kern_packet *, pkt, uint16_t, tag);
191 return ENOTSUP;
192 }
193 DTRACE_SKYWALK4(extracted__info, struct __kern_packet *, pkt,
194 uint8_t *, buf, uint16_t, tag, uint16_t, etype);
195 fd->fd_ethertype = etype;
196 return 0;
197}
198
199static boolean_t
200netif_flow_ethertype_match(struct netif_flow_desc *fd1,
201 struct netif_flow_desc *fd2)
202{
203 return fd1->fd_ethertype == fd2->fd_ethertype;
204}
205
206static int
207netif_flow_ipv6_ula_info(struct __kern_packet *pkt,
208 struct netif_flow_desc *fd, uint32_t flags)
209{
210 ether_header_t *eh;
211 uint32_t len;
212 uint8_t *buf;
213 struct ip6_hdr *ip6h;
214 void *laddr, *raddr;
215 uint16_t etype;
216 int err;
217
218 err = netif_flow_get_buf(pkt, minlen: sizeof(*eh) + sizeof(*ip6h),
219 buf: &buf, len: &len);
220 if (err != 0) {
221 DTRACE_SKYWALK2(get__buf__failed, struct __kern_packet *,
222 pkt, int, err);
223 return err;
224 }
225 eh = (ether_header_t *)(void *)buf;
226 ip6h = (struct ip6_hdr *)(eh + 1);
227
228 bcopy(src: &eh->ether_type, dst: &etype, n: sizeof(etype));
229 etype = ntohs(etype);
230 if (etype != ETHERTYPE_IPV6) {
231 return ENOENT;
232 }
233 if (len < sizeof(*eh) + sizeof(*ip6h)) {
234 return EINVAL;
235 }
236 if ((flags & NETIF_FLOW_OUTBOUND) != 0) {
237 laddr = &ip6h->ip6_src;
238 raddr = &ip6h->ip6_dst;
239 } else {
240 laddr = &ip6h->ip6_dst;
241 raddr = &ip6h->ip6_src;
242 }
243 bcopy(src: laddr, dst: &fd->fd_laddr, n: sizeof(struct in6_addr));
244 bcopy(src: raddr, dst: &fd->fd_raddr, n: sizeof(struct in6_addr));
245 return 0;
246}
247
248static boolean_t
249netif_flow_ipv6_ula_match(struct netif_flow_desc *fd1, struct netif_flow_desc *fd2)
250{
251 return IN6_ARE_ADDR_EQUAL(&fd1->fd_laddr, &fd2->fd_laddr) &&
252 IN6_ARE_ADDR_EQUAL(&fd1->fd_raddr, &fd2->fd_raddr);
253}
254
255static int
256netif_flow_list_lookup(struct netif_flowtable *ft, struct __kern_packet *pkt,
257 uint32_t flags, struct netif_flow **f)
258{
259 struct netif_list_flowtable *lft = ft->ft_internal;
260 struct netif_flowtable_ops *fops = ft->ft_ops;
261 struct netif_flow *nf;
262 struct netif_flow_desc fd;
263 int err;
264
265 /* XXX returns the first flow if "accept all" is on */
266 if (nx_netif_vp_accept_all != 0) {
267 nf = SLIST_FIRST(&lft->lft_flow_list);
268 goto done;
269 }
270 err = fops->nfo_info(pkt, &fd, flags);
271 if (err != 0) {
272 return err;
273 }
274 SLIST_FOREACH(nf, &lft->lft_flow_list, nf_table_link) {
275 if (fops->nfo_match(&nf->nf_desc, &fd)) {
276 break;
277 }
278 }
279done:
280 if (nf == NULL) {
281 return ENOENT;
282 }
283 *f = nf;
284 return 0;
285}
286
287static int
288netif_flow_list_insert(struct netif_flowtable *ft, struct netif_flow *f)
289{
290 struct netif_list_flowtable *lft = ft->ft_internal;
291 struct netif_flow *nf;
292
293 SLIST_FOREACH(nf, &lft->lft_flow_list, nf_table_link) {
294 if (nf->nf_port == f->nf_port ||
295 ft->ft_ops->nfo_match(&nf->nf_desc, &f->nf_desc)) {
296 break;
297 }
298 }
299 if (nf != NULL) {
300 return EEXIST;
301 }
302 SLIST_INSERT_HEAD(&lft->lft_flow_list, f, nf_table_link);
303 return 0;
304}
305
306static void
307netif_flow_list_remove(struct netif_flowtable *ft, struct netif_flow *f)
308{
309 struct netif_list_flowtable *lft = ft->ft_internal;
310
311 SLIST_REMOVE(&lft->lft_flow_list, f, netif_flow, nf_table_link);
312}
313
314static struct netif_flowtable *
315netif_flow_list_table_alloc(struct netif_flowtable_ops *ops)
316{
317 struct netif_flowtable *ft;
318 struct netif_list_flowtable *lft;
319
320 ft = skn_alloc_type(flowtable, struct netif_flowtable,
321 Z_WAITOK | Z_NOFAIL, skmem_tag_netif_flow);
322 lft = skn_alloc_type(list_flowtable, struct netif_list_flowtable,
323 Z_WAITOK | Z_NOFAIL, skmem_tag_netif_flow);
324 /*
325 * For now lft just holds a list. We can use any data structure here.
326 */
327 SLIST_INIT(&lft->lft_flow_list);
328 ft->ft_internal = lft;
329 ft->ft_ops = ops;
330 return ft;
331}
332
333static void
334netif_flow_list_table_free(struct netif_flowtable *ft)
335{
336 struct netif_list_flowtable *lft;
337
338 ASSERT(ft->ft_ops != NULL);
339 ft->ft_ops = NULL;
340
341 ASSERT(ft->ft_internal != NULL);
342 lft = ft->ft_internal;
343 ASSERT(SLIST_EMPTY(&lft->lft_flow_list));
344
345 skn_free_type(list_flowtable, struct netif_list_flowtable, lft);
346 ft->ft_internal = NULL;
347
348 skn_free_type(flowtable, struct netif_flowtable, ft);
349}
350
351static void
352nx_netif_flow_deliver(struct nx_netif *nif, struct netif_flow *f,
353 void *data, uint32_t flags)
354{
355#pragma unused(nif)
356 f->nf_cb_func(f->nf_cb_arg, data, flags);
357}
358
359void
360nx_netif_snoop(struct nx_netif *nif, struct __kern_packet *pkt,
361 boolean_t inbound)
362{
363 /* pktap only supports IPv4 or IPv6 packets */
364 if (!NETIF_IS_LOW_LATENCY(nif)) {
365 return;
366 }
367 if (inbound) {
368 pktap_input_packet(nif->nif_ifp, AF_INET6, DLT_EN10MB,
369 -1, NULL, -1, NULL, SK_PKT2PH(pkt), NULL, 0, 0, 0,
370 PTH_FLAG_NEXUS_CHAN);
371 } else {
372 pktap_output_packet(nif->nif_ifp, AF_INET6, DLT_EN10MB,
373 -1, NULL, -1, NULL, SK_PKT2PH(pkt), NULL, 0, 0, 0,
374 PTH_FLAG_NEXUS_CHAN);
375 }
376}
377
378/*
379 * This function ensures that the interface's mac address matches:
380 * -the destination mac address of inbound packets
381 * -the source mac address of outbound packets
382 */
383boolean_t
384nx_netif_validate_macaddr(struct nx_netif *nif, struct __kern_packet *pkt,
385 uint32_t flags)
386{
387 struct netif_stats *nifs = &nif->nif_stats;
388 struct ifnet *ifp = nif->nif_ifp;
389 uint8_t local_addr[ETHER_ADDR_LEN], *addr;
390 boolean_t valid = FALSE, outbound, mbcast;
391 ether_header_t *eh;
392 uint32_t len;
393 uint8_t *buf;
394
395 /*
396 * No need to hold any lock for the checks below because we are not
397 * accessing any shared state.
398 */
399 if (netif_flow_get_buf(pkt, minlen: sizeof(ether_header_t), buf: &buf, len: &len) != 0) {
400 STATS_INC(nifs, NETIF_STATS_VP_BAD_PKT_LEN);
401 DTRACE_SKYWALK2(bad__pkt__sz, struct nx_netif *, nif,
402 struct __kern_packet *, pkt);
403 return FALSE;
404 }
405 DTRACE_SKYWALK4(dump__buf, struct nx_netif *, nif,
406 struct __kern_packet *, pkt, void *, buf, uint32_t, len);
407
408 eh = (ether_header_t *)(void *)buf;
409 outbound = ((flags & NETIF_FLOW_OUTBOUND) != 0);
410 addr = outbound ? eh->ether_shost : eh->ether_dhost;
411 mbcast = ((addr[0] & 1) != 0);
412
413 if (NETIF_IS_LOW_LATENCY(nif)) {
414 /* disallow multicast/broadcast as both src or dest macaddr */
415 if (mbcast) {
416 DTRACE_SKYWALK4(mbcast__pkt__llw,
417 struct nx_netif *, nif, struct __kern_packet *, pkt,
418 void *, buf, uint32_t, len);
419 goto done;
420 }
421 /* only validate macaddr for outbound packets */
422 if (!outbound) {
423 DTRACE_SKYWALK4(skip__check__llw,
424 struct nx_netif *, nif, struct __kern_packet *, pkt,
425 void *, buf, uint32_t, len);
426 return TRUE;
427 }
428 } else {
429 if (mbcast) {
430 if (outbound) {
431 /* disallow multicast/broadcast as src macaddr */
432 DTRACE_SKYWALK4(mbcast__src,
433 struct nx_netif *, nif,
434 struct __kern_packet *, pkt,
435 void *, buf, uint32_t, len);
436 goto done;
437 } else {
438 /* allow multicast/broadcast as dest macaddr */
439 DTRACE_SKYWALK4(mbcast__dest,
440 struct nx_netif *, nif,
441 struct __kern_packet *, pkt,
442 void *, buf, uint32_t, len);
443 return TRUE;
444 }
445 }
446 }
447 if (ifnet_lladdr_copy_bytes(interface: ifp, lladdr: local_addr, length: sizeof(local_addr)) != 0) {
448 STATS_INC(nifs, NETIF_STATS_VP_BAD_MADDR_LEN);
449 DTRACE_SKYWALK2(bad__addr__len, struct nx_netif *, nif,
450 struct ifnet *, ifp);
451 return FALSE;
452 }
453 valid = (_ether_cmp(a: local_addr, b: addr) == 0);
454done:
455 if (!valid) {
456 /*
457 * A non-matching mac addr is not an error for the input path
458 * because we are expected to get such packets. These packets
459 * are already counted as NETIF_STATS_FLOW_NOT_FOUND.
460 */
461 if (outbound) {
462 STATS_INC(nifs, NETIF_STATS_VP_BAD_MADDR);
463 }
464 DTRACE_SKYWALK2(bad__addr, struct nx_netif *, nif,
465 struct __kern_packet *, pkt);
466 }
467 return valid;
468}
469
470/*
471 * Checks whether a packet matches the specified flow's description.
472 * This is used for validating outbound packets.
473 */
474boolean_t
475nx_netif_flow_match(struct nx_netif *nif, struct __kern_packet *pkt,
476 struct netif_flow *f, uint32_t flags)
477{
478 struct netif_stats *nifs = &nif->nif_stats;
479 struct netif_flowtable *ft;
480 struct netif_flowtable_ops *fops;
481 struct netif_flow_desc fd;
482 boolean_t match = FALSE;
483 int err;
484
485 /*
486 * Unlike the lookup case, ft cannot be NULL here because there
487 * should be a table to hold our flow. No locking is needed because
488 * no one can close our channel while we have ongoing syncs.
489 */
490 VERIFY((ft = nif->nif_flow_table) != NULL);
491 fops = ft->ft_ops;
492
493 /*
494 * We increment error stats here but not when we classify because in
495 * this case a match is expected.
496 */
497 err = fops->nfo_info(pkt, &fd, flags);
498 if (err != 0) {
499 STATS_INC(nifs, NETIF_STATS_VP_FLOW_INFO_ERR);
500 DTRACE_SKYWALK3(info__err, struct nx_netif *, nif, int, err,
501 struct __kern_packet *, pkt);
502 return FALSE;
503 }
504 match = fops->nfo_match(&f->nf_desc, &fd);
505 if (!match) {
506 STATS_INC(nifs, NETIF_STATS_VP_FLOW_NOT_MATCH);
507 DTRACE_SKYWALK3(not__match, struct nx_netif *, nif,
508 struct netif_flow *, f, struct __kern_packet *, pkt);
509 }
510 return match;
511}
512
513struct netif_flow *
514nx_netif_flow_classify(struct nx_netif *nif, struct __kern_packet *pkt,
515 uint32_t flags)
516{
517 struct netif_stats *nifs = &nif->nif_stats;
518 struct netif_flow *f = NULL;
519 struct netif_flowtable *ft;
520 int err;
521
522 lck_mtx_lock(lck: &nif->nif_flow_lock);
523 if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_ENABLED) == 0) {
524 STATS_INC(nifs, NETIF_STATS_VP_FLOW_DISABLED);
525 DTRACE_SKYWALK1(disabled, struct nx_netif *, nif);
526 goto fail;
527 }
528 if ((ft = nif->nif_flow_table) == NULL) {
529 STATS_INC(nifs, NETIF_STATS_VP_FLOW_EMPTY_TABLE);
530 DTRACE_SKYWALK1(empty__flowtable, struct nx_netif *, nif);
531 goto fail;
532 }
533 err = ft->ft_ops->nfo_lookup(ft, pkt, flags, &f);
534 if (err != 0) {
535 /* caller increments counter */
536 DTRACE_SKYWALK1(not__found, struct nx_netif *, nif);
537 goto fail;
538 }
539 f->nf_refcnt++;
540 lck_mtx_unlock(lck: &nif->nif_flow_lock);
541 return f;
542
543fail:
544 lck_mtx_unlock(lck: &nif->nif_flow_lock);
545 return NULL;
546}
547
548void
549nx_netif_flow_release(struct nx_netif *nif, struct netif_flow *nf)
550{
551 lck_mtx_lock(lck: &nif->nif_flow_lock);
552 if (--nf->nf_refcnt == 0) {
553 wakeup(chan: &nf->nf_refcnt);
554 }
555 lck_mtx_unlock(lck: &nif->nif_flow_lock);
556}
557
558static struct netif_flow *
559flow_classify(struct nx_netif *nif, struct __kern_packet *pkt, uint32_t flags)
560{
561 if (nx_netif_vp_accept_all == 0 &&
562 !nx_netif_validate_macaddr(nif, pkt, flags)) {
563 return NULL;
564 }
565 return nx_netif_flow_classify(nif, pkt, flags);
566}
567
568errno_t
569nx_netif_demux(struct nexus_netif_adapter *nifna,
570 struct __kern_packet *pkt_chain, struct __kern_packet **remain,
571 uint32_t flags)
572{
573 struct __kern_packet *pkt = pkt_chain, *next;
574 struct __kern_packet *head = NULL, **tailp = &head;
575 struct __kern_packet *rhead = NULL, **rtailp = &rhead;
576 struct netif_flow *nf, *prev_nf = NULL;
577 struct nx_netif *nif = nifna->nifna_netif;
578 struct netif_stats *nifs = &nif->nif_stats;
579 int c = 0, r = 0, delivered = 0, bytes = 0, rbytes = 0, plen = 0;
580
581 while (pkt != NULL) {
582 next = pkt->pkt_nextpkt;
583 pkt->pkt_nextpkt = NULL;
584
585 ASSERT((pkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
586 plen = ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) ?
587 m_pktlen(pkt->pkt_mbuf) : pkt->pkt_length;
588
589 /*
590 * The returned nf is refcounted to ensure it doesn't
591 * disappear while packets are being delivered.
592 */
593 nf = flow_classify(nif, pkt, flags);
594 if (nf != NULL) {
595 nx_netif_snoop(nif, pkt, TRUE);
596
597 /*
598 * Keep growing the chain until we classify to a
599 * different nf.
600 */
601 if (prev_nf != NULL) {
602 if (prev_nf != nf) {
603 DTRACE_SKYWALK5(deliver,
604 struct nx_netif *, nif,
605 struct netif_flow *, prev_nf,
606 struct __kern_packet *, head,
607 int, c, uint32_t, flags);
608
609 nx_netif_flow_deliver(nif,
610 f: prev_nf, data: head, flags);
611 nx_netif_flow_release(nif, nf: prev_nf);
612 prev_nf = nf;
613 head = NULL;
614 tailp = &head;
615 delivered += c;
616 c = 0;
617 } else {
618 /*
619 * one reference is enough.
620 */
621 nx_netif_flow_release(nif, nf);
622 }
623 } else {
624 prev_nf = nf;
625 }
626 c++;
627 bytes += plen;
628 *tailp = pkt;
629 tailp = &pkt->pkt_nextpkt;
630 } else {
631 r++;
632 rbytes += plen;
633 *rtailp = pkt;
634 rtailp = &pkt->pkt_nextpkt;
635 }
636 pkt = next;
637 }
638 if (head != NULL) {
639 ASSERT(prev_nf != NULL);
640 DTRACE_SKYWALK5(deliver__last, struct nx_netif *,
641 nif, struct netif_flow *, prev_nf, struct __kern_packet *,
642 pkt, int, c, uint32_t, flags);
643
644 nx_netif_flow_deliver(nif, f: prev_nf, data: head, flags);
645 nx_netif_flow_release(nif, nf: prev_nf);
646 prev_nf = NULL;
647 head = NULL;
648 tailp = &head;
649 delivered += c;
650 }
651 if (rhead != NULL) {
652 if (remain != NULL) {
653 *remain = rhead;
654 } else {
655 nx_netif_free_packet_chain(rhead, NULL);
656 }
657 }
658 STATS_ADD(nifs, NETIF_STATS_VP_FLOW_FOUND, delivered);
659 STATS_ADD(nifs, NETIF_STATS_VP_FLOW_NOT_FOUND, r);
660 DTRACE_SKYWALK5(demux__delivered, struct nx_netif *,
661 nif, int, delivered, int, r, int, bytes, int, rbytes);
662 return 0;
663}
664
665SK_NO_INLINE_ATTRIBUTE
666static errno_t
667nx_netif_flowtable_init(struct nx_netif *nif, netif_flowtable_type_t type)
668{
669 struct netif_flowtable *ft;
670 struct netif_flowtable_ops *fops;
671
672 switch (type) {
673 case FT_TYPE_ETHERTYPE:
674 fops = &netif_ethertype_ops;
675 break;
676 case FT_TYPE_IPV6_ULA:
677 fops = &netif_ipv6_ula_ops;
678 break;
679 default:
680 return ENOTSUP;
681 }
682 ft = fops->nfo_table_alloc(fops);
683 if (ft == NULL) {
684 return ENOMEM;
685 }
686 nif->nif_flow_table = ft;
687 return 0;
688}
689
690SK_NO_INLINE_ATTRIBUTE
691static void
692nx_netif_flowtable_fini(struct nx_netif *nif)
693{
694 struct netif_flowtable *ft = nif->nif_flow_table;
695
696 ASSERT(ft != NULL);
697 ft->ft_ops->nfo_table_free(ft);
698 nif->nif_flow_table = NULL;
699}
700
701/*
702 * netif doesn't keep accounting of flow statistics, this log message will
703 * print a snapshot of the current netif stats at the time of flow creation
704 * and removal. For a netif on interfaces like "llwX", the difference in these
705 * stats at creation vs removal will be analogous to flow stats as there will
706 * be atmost one flow active at any given time.
707 */
708static inline void
709nx_netif_flow_log(struct nx_netif *nif, struct netif_flow *nf, boolean_t add)
710{
711 int i;
712 struct netif_stats *nifs = &nif->nif_stats;
713
714 os_log(OS_LOG_DEFAULT, "netif flowstats (%s): if %s, nx_port %d, "
715 "ethertype 0x%x, src %s, dst %s", add ? "add" : "remove",
716 if_name(nif->nif_ifp), nf->nf_port, nf->nf_desc.fd_ethertype,
717 ip6_sprintf(&nf->nf_desc.fd_laddr),
718 ip6_sprintf(&nf->nf_desc.fd_raddr));
719 for (i = 0; i < __NETIF_STATS_MAX; i++) {
720 if (STATS_VAL(nifs, i) == 0) {
721 continue;
722 }
723 os_log(OS_LOG_DEFAULT, "%s: %llu", netif_stats_str(i),
724 STATS_VAL(nifs, i));
725 }
726}
727
728errno_t
729nx_netif_flow_add(struct nx_netif *nif, nexus_port_t port,
730 struct netif_flow_desc *desc, void *cb_arg,
731 errno_t (*cb_func)(void *, void *, uint32_t),
732 struct netif_flow **nfp)
733{
734 struct netif_flow *nf = NULL;
735 struct netif_flowtable *ft;
736 struct netif_stats *nifs = &nif->nif_stats;
737 boolean_t refcnt_incr = FALSE, new_table = FALSE;
738 errno_t err = 0;
739
740 lck_mtx_lock(lck: &nif->nif_flow_lock);
741 nf = sk_alloc_type(struct netif_flow, Z_WAITOK | Z_NOFAIL,
742 skmem_tag_netif_flow);
743 bcopy(src: desc, dst: &nf->nf_desc, n: sizeof(*desc));
744 nf->nf_port = port;
745 nf->nf_refcnt = 0;
746 nf->nf_cb_arg = cb_arg;
747 nf->nf_cb_func = cb_func;
748
749 if (++nif->nif_flow_cnt == 1) {
750 netif_flowtable_type_t ft_type;
751
752 ft_type = NETIF_IS_LOW_LATENCY(nif) ? FT_TYPE_IPV6_ULA :
753 FT_TYPE_ETHERTYPE;
754
755 err = nx_netif_flowtable_init(nif, type: ft_type);
756 if (err != 0) {
757 STATS_INC(nifs, NETIF_STATS_VP_FLOW_TABLE_INIT_FAIL);
758 DTRACE_SKYWALK1(flowtable__init__fail,
759 struct nx_netif *, nif);
760 goto fail;
761 }
762 new_table = TRUE;
763 }
764 refcnt_incr = TRUE;
765 ft = nif->nif_flow_table;
766 err = ft->ft_ops->nfo_insert(ft, nf);
767 if (err != 0) {
768 STATS_INC(nifs, NETIF_STATS_VP_FLOW_INSERT_FAIL);
769 DTRACE_SKYWALK1(insert__fail, struct nx_netif *, nif);
770 goto fail;
771 }
772 SLIST_INSERT_HEAD(&nif->nif_flow_list, nf, nf_link);
773 if (nfp != NULL) {
774 *nfp = nf;
775 }
776 STATS_INC(nifs, NETIF_STATS_VP_FLOW_ADD);
777 lck_mtx_unlock(lck: &nif->nif_flow_lock);
778 SK_DF(SK_VERB_VP, "flow add successful: if %s, nif 0x%llx",
779 if_name(nif->nif_ifp), SK_KVA(nif));
780 nx_netif_flow_log(nif, nf, TRUE);
781 return 0;
782
783fail:
784 if (nf != NULL) {
785 sk_free_type(struct netif_flow, nf);
786 }
787 if (refcnt_incr && --nif->nif_flow_cnt == 0) {
788 if (new_table) {
789 nx_netif_flowtable_fini(nif);
790 }
791 }
792 lck_mtx_unlock(lck: &nif->nif_flow_lock);
793 SK_ERR("flow add failed: if %s, nif 0x%llx, err %d",
794 if_name(nif->nif_ifp), SK_KVA(nif), err);
795 return err;
796}
797
798errno_t
799nx_netif_flow_remove(struct nx_netif *nif, struct netif_flow *nf)
800{
801 struct netif_flowtable_ops *fops;
802 struct netif_flowtable *ft;
803 struct netif_stats *nifs = &nif->nif_stats;
804
805 lck_mtx_lock(lck: &nif->nif_flow_lock);
806 SLIST_REMOVE(&nif->nif_flow_list, nf, netif_flow, nf_link);
807 ft = nif->nif_flow_table;
808 ASSERT(ft != NULL);
809 fops = ft->ft_ops;
810 fops->nfo_remove(ft, nf);
811
812 while (nf->nf_refcnt > 0) {
813 DTRACE_SKYWALK1(wait__refcnt, struct netif_flow *, nf);
814 (void) msleep(chan: &nf->nf_refcnt,
815 mtx: &nif->nif_flow_lock, pri: (PZERO + 1),
816 wmesg: __FUNCTION__, NULL);
817 }
818 if (--nif->nif_flow_cnt == 0) {
819 nx_netif_flowtable_fini(nif);
820 }
821 STATS_INC(nifs, NETIF_STATS_VP_FLOW_REMOVE);
822 lck_mtx_unlock(lck: &nif->nif_flow_lock);
823
824 SK_DF(SK_VERB_VP, "flow remove: if %s, nif 0x%llx",
825 if_name(nif->nif_ifp), SK_KVA(nif));
826 nx_netif_flow_log(nif, nf, FALSE);
827 sk_free_type(struct netif_flow, nf);
828 return 0;
829}
830
831void
832nx_netif_flow_init(struct nx_netif *nif)
833{
834 ifnet_t ifp = nif->nif_ifp;
835
836 if (!ifnet_needs_netif_netagent(ifp) && !NETIF_IS_LOW_LATENCY(nif)) {
837 SK_DF(SK_VERB_VP, "%s: flows not supported due to missing "
838 "if_attach_nx flag or invalid interface type",
839 if_name(ifp));
840 return;
841 }
842 if (ifp->if_family != IFNET_FAMILY_ETHERNET) {
843 SK_DF(SK_VERB_VP, "%s: flows not supported on "
844 "interface family %d", if_name(ifp), ifp->if_family);
845 return;
846 }
847 ASSERT(nif->nif_flow_flags == 0);
848 lck_mtx_init(lck: &nif->nif_flow_lock, grp: &nexus_lock_group,
849 attr: &nexus_lock_attr);
850
851 SLIST_INIT(&nif->nif_flow_list);
852 nif->nif_flow_table = NULL;
853 nif->nif_flow_cnt = 0;
854 nif->nif_flow_flags |= NETIF_FLOW_FLAG_INITIALIZED;
855
856 SK_DF(SK_VERB_VP, "%s: flows initialized", if_name(ifp));
857}
858
859void
860nx_netif_flow_fini(struct nx_netif *nif)
861{
862 if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_INITIALIZED) == 0) {
863 SK_DF(SK_VERB_VP, "%s: flows not initialized",
864 if_name(nif->nif_ifp));
865 return;
866 }
867 nif->nif_flow_flags &= ~NETIF_FLOW_FLAG_INITIALIZED;
868
869 /* This should've been cleared before we get to this point */
870 ASSERT((nif->nif_flow_flags & NETIF_FLOW_FLAG_ENABLED) == 0);
871 ASSERT(nif->nif_flow_cnt == 0);
872 ASSERT(nif->nif_flow_table == NULL);
873 ASSERT(SLIST_EMPTY(&nif->nif_flow_list));
874
875 lck_mtx_destroy(lck: &nif->nif_flow_lock, grp: &nexus_lock_group);
876
877 SK_DF(SK_VERB_VP, "%s: flows uninitialization done",
878 if_name(nif->nif_ifp));
879}
880
881static void
882nx_netif_flow_set_enable(struct nx_netif *nif, boolean_t set)
883{
884 /*
885 * No locking needed while checking for the initialized bit because
886 * if this were not set, no other flag would be modified.
887 */
888 if ((nif->nif_flow_flags & NETIF_FLOW_FLAG_INITIALIZED) == 0) {
889 return;
890 }
891 lck_mtx_lock(lck: &nif->nif_flow_lock);
892 if (set) {
893 SK_DF(SK_VERB_VP, "%s: flow enable, nif 0x%llx",
894 if_name(nif->nif_ifp), SK_KVA(nif));
895 nif->nif_flow_flags |= NETIF_FLOW_FLAG_ENABLED;
896 } else {
897 SK_DF(SK_VERB_VP, "%s: flow disable, nif 0x%llx",
898 if_name(nif->nif_ifp), SK_KVA(nif));
899 nif->nif_flow_flags &= ~NETIF_FLOW_FLAG_ENABLED;
900 }
901 lck_mtx_unlock(lck: &nif->nif_flow_lock);
902}
903
904void
905nx_netif_flow_enable(struct nx_netif *nif)
906{
907 nx_netif_flow_set_enable(nif, TRUE);
908}
909
910void
911nx_netif_flow_disable(struct nx_netif *nif)
912{
913 nx_netif_flow_set_enable(nif, FALSE);
914}
915