1/*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/cdefs.h>
30#include <sys/param.h>
31#include <sys/mbuf.h>
32#include <sys/errno.h>
33#include <sys/random.h>
34#include <sys/kernel_types.h>
35#include <sys/sysctl.h>
36
37#include <kern/zalloc.h>
38
39#include <net/if.h>
40#include <net/net_osdep.h>
41#include <net/classq/classq.h>
42#include <pexpert/pexpert.h>
43#include <net/classq/classq_sfb.h>
44#include <net/classq/classq_fq_codel.h>
45#include <net/pktsched/pktsched.h>
46#include <net/pktsched/pktsched_fq_codel.h>
47#include <net/flowadv.h>
48
49#include <libkern/libkern.h>
50
51#if SKYWALK
52#include <skywalk/os_skywalk_private.h>
53#include <skywalk/nexus/netif/nx_netif.h>
54#endif /* SKYWALK */
55
56static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
57 u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
58 u_int32_t *, boolean_t, u_int8_t);
59static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60 boolean_t, classq_pkt_t *, u_int8_t);
61
62static uint64_t ifclassq_def_c_target_qdelay = 0;
63SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
64 &ifclassq_def_c_target_qdelay, "def classic target queue delay in nanoseconds");
65
66static uint64_t ifclassq_def_c_update_interval = 0;
67SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_update_interval,
68 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_c_update_interval,
69 "def classic update interval in nanoseconds");
70
71static uint64_t ifclassq_def_l4s_target_qdelay = 0;
72SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
73 &ifclassq_def_l4s_target_qdelay, "def L4S target queue delay in nanoseconds");
74
75static uint64_t ifclassq_def_l4s_update_interval = 0;
76SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_update_interval,
77 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_l4s_update_interval,
78 "def L4S update interval in nanoseconds");
79
80static uint64_t ifclassq_ll_c_target_qdelay = 0;
81SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
82 &ifclassq_ll_c_target_qdelay, "low latency classic target queue delay in nanoseconds");
83
84static uint64_t ifclassq_ll_c_update_interval = 0;
85SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_update_interval,
86 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_c_update_interval,
87 "low latency classic update interval in nanoseconds");
88
89static uint64_t ifclassq_ll_l4s_target_qdelay = 0;
90SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
91 &ifclassq_ll_l4s_target_qdelay, "low latency L4S target queue delay in nanoseconds");
92
93static uint64_t ifclassq_ll_l4s_update_interval = 0;
94SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_update_interval,
95 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_l4s_update_interval,
96 "low latency L4S update interval in nanoseconds");
97
98uint32_t ifclassq_enable_l4s = 1;
99SYSCTL_UINT(_net_classq, OID_AUTO, enable_l4s,
100 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_enable_l4s, 0,
101 "enable/disable L4S");
102
103#if DEBUG || DEVELOPMENT
104uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
105SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
106 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
107 "enable/disable flow control advisory");
108
109uint16_t fq_codel_quantum = 0;
110#endif /* DEBUG || DEVELOPMENT */
111
112static KALLOC_TYPE_DEFINE(ifcq_zone, struct ifclassq, NET_KT_DEFAULT);
113LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
114static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
115
116void
117classq_init(void)
118{
119 _CASSERT(MBUF_TC_BE == 0);
120 _CASSERT(MBUF_SC_BE == 0);
121 _CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
122#if DEBUG || DEVELOPMENT
123 PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
124 sizeof(fq_codel_quantum));
125 PE_parse_boot_argn("ifclassq_def_c_target_qdelay", &ifclassq_def_c_target_qdelay,
126 sizeof(ifclassq_def_c_target_qdelay));
127 PE_parse_boot_argn("ifclassq_def_c_update_interval",
128 &ifclassq_def_c_update_interval, sizeof(ifclassq_def_c_update_interval));
129 PE_parse_boot_argn("ifclassq_def_l4s_target_qdelay", &ifclassq_def_l4s_target_qdelay,
130 sizeof(ifclassq_def_l4s_target_qdelay));
131 PE_parse_boot_argn("ifclassq_def_l4s_update_interval",
132 &ifclassq_def_l4s_update_interval, sizeof(ifclassq_def_l4s_update_interval));
133 PE_parse_boot_argn("ifclassq_ll_c_target_qdelay", &ifclassq_ll_c_target_qdelay,
134 sizeof(ifclassq_ll_c_target_qdelay));
135 PE_parse_boot_argn("ifclassq_ll_c_update_interval",
136 &ifclassq_ll_c_update_interval, sizeof(ifclassq_ll_c_update_interval));
137 PE_parse_boot_argn("ifclassq_ll_l4s_target_qdelay", &ifclassq_ll_l4s_target_qdelay,
138 sizeof(ifclassq_ll_l4s_target_qdelay));
139 PE_parse_boot_argn("ifclassq_ll_l4s_update_interval",
140 &ifclassq_ll_l4s_update_interval, sizeof(ifclassq_ll_l4s_update_interval));
141#endif /* DEBUG || DEVELOPMENT */
142 fq_codel_init();
143}
144
145int
146ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
147{
148 int err = 0;
149
150 IFCQ_LOCK(ifq);
151 VERIFY(IFCQ_IS_EMPTY(ifq));
152 ifq->ifcq_ifp = ifp;
153 IFCQ_LEN(ifq) = 0;
154 IFCQ_BYTES(ifq) = 0;
155 bzero(s: &ifq->ifcq_xmitcnt, n: sizeof(ifq->ifcq_xmitcnt));
156 bzero(s: &ifq->ifcq_dropcnt, n: sizeof(ifq->ifcq_dropcnt));
157
158 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
159 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
160 VERIFY(ifq->ifcq_flags == 0);
161 VERIFY(ifq->ifcq_sflags == 0);
162 VERIFY(ifq->ifcq_disc == NULL);
163
164 if (ifp->if_eflags & IFEF_TXSTART) {
165 u_int32_t maxlen = 0;
166
167 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
168 maxlen = if_sndq_maxlen;
169 }
170 IFCQ_SET_MAXLEN(ifq, maxlen);
171
172 if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
173 IFCQ_TARGET_QDELAY(ifq) == 0) {
174 /*
175 * Choose static queues because the interface has
176 * maximum queue size set
177 */
178 sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
179 }
180 ifq->ifcq_sflags = sflags;
181 err = ifclassq_pktsched_setup(ifq);
182 if (err == 0) {
183 ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
184 }
185 }
186 IFCQ_UNLOCK(ifq);
187 return err;
188}
189
190void
191ifclassq_teardown(struct ifclassq *ifq)
192{
193 IFCQ_LOCK(ifq);
194 if (IFCQ_IS_DESTROYED(ifq)) {
195 ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
196 goto done;
197 }
198 if (IFCQ_IS_READY(ifq)) {
199 if (IFCQ_TBR_IS_ENABLED(ifq)) {
200 struct tb_profile tb =
201 { .rate = 0, .percent = 0, .depth = 0 };
202 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
203 }
204 pktsched_teardown(ifq);
205 ifq->ifcq_flags &= ~IFCQF_READY;
206 }
207 ifq->ifcq_sflags = 0;
208 VERIFY(IFCQ_IS_EMPTY(ifq));
209 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
210 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
211 VERIFY(ifq->ifcq_flags == 0);
212 VERIFY(ifq->ifcq_sflags == 0);
213 VERIFY(ifq->ifcq_disc == NULL);
214 IFCQ_LEN(ifq) = 0;
215 IFCQ_BYTES(ifq) = 0;
216 IFCQ_MAXLEN(ifq) = 0;
217 bzero(s: &ifq->ifcq_xmitcnt, n: sizeof(ifq->ifcq_xmitcnt));
218 bzero(s: &ifq->ifcq_dropcnt, n: sizeof(ifq->ifcq_dropcnt));
219 ifq->ifcq_flags |= IFCQF_DESTROYED;
220done:
221 IFCQ_UNLOCK(ifq);
222}
223
224int
225ifclassq_pktsched_setup(struct ifclassq *ifq)
226{
227 struct ifnet *ifp = ifq->ifcq_ifp;
228 classq_pkt_type_t ptype = QP_MBUF;
229 int err = 0;
230
231 IFCQ_LOCK_ASSERT_HELD(ifq);
232 VERIFY(ifp->if_eflags & IFEF_TXSTART);
233#if SKYWALK
234 ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
235 QP_MBUF;
236#endif /* SKYWALK */
237
238 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
239
240 return err;
241}
242
243void
244ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
245{
246 IFCQ_LOCK(ifq);
247 if (maxqlen == 0) {
248 maxqlen = if_sndq_maxlen;
249 }
250 IFCQ_SET_MAXLEN(ifq, maxqlen);
251 IFCQ_UNLOCK(ifq);
252}
253
254u_int32_t
255ifclassq_get_maxlen(struct ifclassq *ifq)
256{
257 return IFCQ_MAXLEN(ifq);
258}
259
260int
261ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
262 u_int32_t *packets, u_int32_t *bytes)
263{
264 int err = 0;
265
266 IFCQ_LOCK(ifq);
267 if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
268 (IFCQF_READY | IFCQF_ENABLED)) {
269 return ENXIO;
270 }
271 if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
272 VERIFY(packets != NULL);
273 if (fq_if_is_all_paced(ifq)) {
274 *packets = 0;
275 } else {
276 *packets = IFCQ_LEN(ifq);
277 }
278 } else {
279 cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
280
281 VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
282
283 err = fq_if_request_classq(ifq, rq: CLASSQRQ_STAT_SC, arg: &req);
284 if (packets != NULL) {
285 *packets = req.packets;
286 }
287 if (bytes != NULL) {
288 *bytes = req.bytes;
289 }
290 }
291 KDBG(AQM_KTRACE_STATS_GET_QLEN, ifq->ifcq_ifp->if_index,
292 packets ? *packets : 0, bytes ? *bytes : 0, fq_if_is_all_paced(ifq));
293
294 IFCQ_UNLOCK(ifq);
295
296#if SKYWALK
297 struct ifnet *ifp = ifq->ifcq_ifp;
298
299 if (__improbable(ifp->if_na_ops != NULL &&
300 ifp->if_na_ops->ni_get_len != NULL)) {
301 err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
302 bytes, err);
303 }
304#endif /* SKYWALK */
305
306 return err;
307}
308
309inline void
310ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
311 classq_pkt_t *p)
312{
313 if (!IFNET_IS_CELLULAR(ifp)) {
314 return;
315 }
316
317 switch (p->cp_ptype) {
318 case QP_MBUF: {
319 struct mbuf *m = p->cp_mbuf;
320 m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
321 m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
322 m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
323 break;
324 }
325
326#if SKYWALK
327 case QP_PACKET:
328 /*
329 * Support for equivalent of mbuf_get_unsent_data_bytes()
330 * is not needed in the Skywalk architecture.
331 */
332 break;
333#endif /* SKYWALK */
334
335 default:
336 VERIFY(0);
337 /* NOTREACHED */
338 __builtin_unreachable();
339 }
340}
341
342errno_t
343ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
344 u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
345{
346 return fq_if_enqueue_classq(ifq, h: head, t: tail, cnt, bytes, pdrop);
347}
348
349errno_t
350ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
351 u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
352 u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
353{
354 return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
355 byte_limit, head, tail, cnt, len, FALSE, grp_idx);
356}
357
358errno_t
359ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
360 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
361 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
362{
363 return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
364 head, tail, cnt, len, TRUE, grp_idx);
365}
366
367static errno_t
368ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
369 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
370 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
371 u_int8_t grp_idx)
372{
373 struct ifnet *ifp = ifq->ifcq_ifp;
374 u_int32_t i = 0, l = 0;
375 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
376 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
377
378 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
379
380 if (IFCQ_TBR_IS_ENABLED(ifq)) {
381 goto dequeue_loop;
382 }
383
384 /*
385 * If the scheduler support dequeueing multiple packets at the
386 * same time, call that one instead.
387 */
388 if (drvmgt) {
389 int err;
390
391 IFCQ_LOCK_SPIN(ifq);
392 err = fq_if_dequeue_sc_classq_multi(ifq, svc: sc, maxpktcnt: pkt_limit,
393 maxbytecnt: byte_limit, first_packet: head, last_packet: tail, retpktcnt: cnt, retbytecnt: len, grp_idx);
394 IFCQ_UNLOCK(ifq);
395
396 if (err == 0 && head->cp_mbuf == NULL) {
397 err = EAGAIN;
398 }
399 return err;
400 } else {
401 int err;
402
403 IFCQ_LOCK_SPIN(ifq);
404 err = fq_if_dequeue_classq_multi(ifq, maxpktcnt: pkt_limit, maxbytecnt: byte_limit,
405 first_packet: head, last_packet: tail, retpktcnt: cnt, retbytecnt: len, grp_idx);
406 IFCQ_UNLOCK(ifq);
407
408 if (err == 0 && head->cp_mbuf == NULL) {
409 err = EAGAIN;
410 }
411 return err;
412 }
413
414dequeue_loop:
415 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
416 IFCQ_LOCK_SPIN(ifq);
417
418 while (i < pkt_limit && l < byte_limit) {
419 if (drvmgt) {
420 IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
421 } else {
422 IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
423 }
424
425 if (head->cp_mbuf == NULL) {
426 break;
427 }
428
429 if (first.cp_mbuf == NULL) {
430 first = *head;
431 }
432
433 switch (head->cp_ptype) {
434 case QP_MBUF:
435 head->cp_mbuf->m_nextpkt = NULL;
436 l += head->cp_mbuf->m_pkthdr.len;
437 ifclassq_set_packet_metadata(ifq, ifp, p: head);
438 if (last.cp_mbuf != NULL) {
439 last.cp_mbuf->m_nextpkt = head->cp_mbuf;
440 }
441 break;
442
443#if SKYWALK
444 case QP_PACKET:
445 head->cp_kpkt->pkt_nextpkt = NULL;
446 l += head->cp_kpkt->pkt_length;
447 ifclassq_set_packet_metadata(ifq, ifp, p: head);
448 if (last.cp_kpkt != NULL) {
449 last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
450 }
451 break;
452#endif /* SKYWALK */
453
454 default:
455 VERIFY(0);
456 /* NOTREACHED */
457 __builtin_unreachable();
458 }
459
460 last = *head;
461 i++;
462 }
463
464 IFCQ_UNLOCK(ifq);
465
466 if (tail != NULL) {
467 *tail = last;
468 }
469 if (cnt != NULL) {
470 *cnt = i;
471 }
472 if (len != NULL) {
473 *len = l;
474 }
475
476 *head = first;
477 return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
478}
479
480static errno_t
481ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
482 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
483 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
484 u_int8_t grp_idx)
485{
486#if SKYWALK
487 struct ifnet *ifp = ifq->ifcq_ifp;
488
489 if (__improbable(ifp->if_na_ops != NULL &&
490 ifp->if_na_ops->ni_dequeue != NULL)) {
491 /*
492 * TODO:
493 * We should be changing the pkt/byte limit to the
494 * available space in the next filter. But this is not
495 * useful until we can flow control the whole chain of
496 * filters.
497 */
498 errno_t err = ifclassq_dequeue_common_default(ifq, sc,
499 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
500
501 return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
502 byte_limit, head, tail, cnt, len, drvmgt, err);
503 }
504#endif /* SKYWALK */
505 return ifclassq_dequeue_common_default(ifq, sc,
506 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
507}
508
509void
510ifclassq_update(struct ifclassq *ifq, cqev_t ev)
511{
512 IFCQ_LOCK_ASSERT_HELD(ifq);
513 VERIFY(IFCQ_IS_READY(ifq));
514 fq_if_request_classq(ifq, rq: CLASSQRQ_EVENT, arg: (void *)ev);
515}
516
517int
518ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
519{
520 IFCQ_LOCK_ASSERT_HELD(ifq);
521 VERIFY(ifq->ifcq_disc == NULL);
522 ifq->ifcq_type = type;
523 ifq->ifcq_disc = discipline;
524 return 0;
525}
526
527void
528ifclassq_detach(struct ifclassq *ifq)
529{
530 IFCQ_LOCK_ASSERT_HELD(ifq);
531 VERIFY(ifq->ifcq_disc == NULL);
532 ifq->ifcq_type = PKTSCHEDT_NONE;
533}
534
535int
536ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
537 u_int32_t *nbytes)
538{
539 struct if_ifclassq_stats *ifqs;
540 int err;
541
542 if (*nbytes < sizeof(*ifqs)) {
543 return EINVAL;
544 }
545
546 ifqs = kalloc_type(struct if_ifclassq_stats,
547 Z_WAITOK | Z_ZERO | Z_NOFAIL);
548
549 IFCQ_LOCK(ifq);
550 if (!IFCQ_IS_READY(ifq)) {
551 IFCQ_UNLOCK(ifq);
552 kfree_type(struct if_ifclassq_stats, ifqs);
553 return ENXIO;
554 }
555
556 ifqs->ifqs_len = IFCQ_LEN(ifq);
557 ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
558 *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
559 *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
560 ifqs->ifqs_scheduler = ifq->ifcq_type;
561 ifqs->ifqs_doorbells = ifq->ifcq_doorbells;
562
563 err = pktsched_getqstats(ifq, gid, qid, ifqs);
564 IFCQ_UNLOCK(ifq);
565
566 if (err == 0 && (err = copyout((caddr_t)ifqs,
567 (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
568 *nbytes = sizeof(*ifqs);
569 }
570
571 kfree_type(struct if_ifclassq_stats, ifqs);
572
573 return err;
574}
575
576const char *
577ifclassq_ev2str(cqev_t ev)
578{
579 const char *c;
580
581 switch (ev) {
582 case CLASSQ_EV_LINK_BANDWIDTH:
583 c = "LINK_BANDWIDTH";
584 break;
585
586 case CLASSQ_EV_LINK_LATENCY:
587 c = "LINK_LATENCY";
588 break;
589
590 case CLASSQ_EV_LINK_MTU:
591 c = "LINK_MTU";
592 break;
593
594 case CLASSQ_EV_LINK_UP:
595 c = "LINK_UP";
596 break;
597
598 case CLASSQ_EV_LINK_DOWN:
599 c = "LINK_DOWN";
600 break;
601
602 default:
603 c = "UNKNOWN";
604 break;
605 }
606
607 return c;
608}
609
610/*
611 * internal representation of token bucket parameters
612 * rate: byte_per_unittime << 32
613 * (((bits_per_sec) / 8) << 32) / machclk_freq
614 * depth: byte << 32
615 *
616 */
617#define TBR_SHIFT 32
618#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
619#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
620
621void
622ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
623{
624 ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
625}
626
627void
628ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
629 classq_pkt_t *pkt, u_int8_t grp_idx)
630{
631 ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
632}
633
634static void
635ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
636 boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
637{
638 struct tb_regulator *tbr;
639 int64_t interval;
640 u_int64_t now;
641
642 IFCQ_LOCK_ASSERT_HELD(ifq);
643
644 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
645 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
646
647 *pkt = CLASSQ_PKT_INITIALIZER(*pkt);
648 tbr = &ifq->ifcq_tbr;
649 /* update token only when it is negative */
650 if (tbr->tbr_token <= 0) {
651 now = read_machclk();
652 interval = now - tbr->tbr_last;
653 if (interval >= tbr->tbr_filluptime) {
654 tbr->tbr_token = tbr->tbr_depth;
655 } else {
656 tbr->tbr_token += interval * tbr->tbr_rate;
657 if (tbr->tbr_token > tbr->tbr_depth) {
658 tbr->tbr_token = tbr->tbr_depth;
659 }
660 }
661 tbr->tbr_last = now;
662 }
663 /* if token is still negative, don't allow dequeue */
664 if (tbr->tbr_token <= 0) {
665 return;
666 }
667
668 /*
669 * ifclassq takes precedence over ALTQ queue;
670 * ifcq_drain count is adjusted by the caller.
671 */
672 if (drvmgt) {
673 fq_if_dequeue_sc_classq(ifq, svc: sc, pkt, grp_idx);
674 } else {
675 fq_if_dequeue_classq(ifq, pkt, grp_idx);
676 }
677
678 if (pkt->cp_mbuf != NULL) {
679 switch (pkt->cp_ptype) {
680 case QP_MBUF:
681 tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
682 break;
683
684#if SKYWALK
685 case QP_PACKET:
686 tbr->tbr_token -=
687 TBR_SCALE(pkt->cp_kpkt->pkt_length);
688 break;
689#endif /* SKYWALK */
690
691 default:
692 VERIFY(0);
693 /* NOTREACHED */
694 }
695 }
696}
697
698/*
699 * set a token bucket regulator.
700 * if the specified rate is zero, the token bucket regulator is deleted.
701 */
702int
703ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
704 boolean_t update)
705{
706 struct tb_regulator *tbr;
707 struct ifnet *ifp = ifq->ifcq_ifp;
708 u_int64_t rate, old_rate;
709
710 IFCQ_LOCK_ASSERT_HELD(ifq);
711 VERIFY(IFCQ_IS_READY(ifq));
712
713 VERIFY(machclk_freq != 0);
714
715 tbr = &ifq->ifcq_tbr;
716 old_rate = tbr->tbr_rate_raw;
717
718 rate = profile->rate;
719 if (profile->percent > 0) {
720 u_int64_t eff_rate;
721
722 if (profile->percent > 100) {
723 return EINVAL;
724 }
725 if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
726 return ENODEV;
727 }
728 rate = (eff_rate * profile->percent) / 100;
729 }
730
731 if (rate == 0) {
732 if (!IFCQ_TBR_IS_ENABLED(ifq)) {
733 return 0;
734 }
735
736 if (pktsched_verbose) {
737 printf("%s: TBR disabled\n", if_name(ifp));
738 }
739
740 /* disable this TBR */
741 ifq->ifcq_flags &= ~IFCQF_TBR;
742 bzero(s: tbr, n: sizeof(*tbr));
743 ifnet_set_start_cycle(ifp, NULL);
744 if (update) {
745 ifclassq_update(ifq, ev: CLASSQ_EV_LINK_BANDWIDTH);
746 }
747 return 0;
748 }
749
750 if (pktsched_verbose) {
751 printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
752 (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
753 "enabled", rate, profile->depth);
754 }
755
756 /* set the new TBR */
757 bzero(s: tbr, n: sizeof(*tbr));
758 tbr->tbr_rate_raw = rate;
759 tbr->tbr_percent = profile->percent;
760 ifq->ifcq_flags |= IFCQF_TBR;
761
762 /*
763 * Note that the TBR fill up time (hence the ifnet restart time)
764 * is directly related to the specified TBR depth. The ideal
765 * depth value should be computed such that the interval time
766 * between each successive wakeup is adequately spaced apart,
767 * in order to reduce scheduling overheads. A target interval
768 * of 10 ms seems to provide good performance balance. This can be
769 * overridden by specifying the depth profile. Values smaller than
770 * the ideal depth will reduce delay at the expense of CPU cycles.
771 */
772 tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
773 if (tbr->tbr_rate > 0) {
774 u_int32_t mtu = ifp->if_mtu;
775 int64_t ival, idepth = 0;
776 int i;
777
778 if (mtu < IF_MINMTU) {
779 mtu = IF_MINMTU;
780 }
781
782 ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
783
784 for (i = 1;; i++) {
785 idepth = TBR_SCALE(i * mtu);
786 if ((idepth / tbr->tbr_rate) > ival) {
787 break;
788 }
789 }
790 VERIFY(idepth > 0);
791
792 tbr->tbr_depth = TBR_SCALE(profile->depth);
793 if (tbr->tbr_depth == 0) {
794 tbr->tbr_filluptime = idepth / tbr->tbr_rate;
795 /* a little fudge factor to get closer to rate */
796 tbr->tbr_depth = idepth + (idepth >> 3);
797 } else {
798 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
799 }
800 } else {
801 tbr->tbr_depth = TBR_SCALE(profile->depth);
802 tbr->tbr_filluptime = 0xffffffffffffffffLL;
803 }
804 tbr->tbr_token = tbr->tbr_depth;
805 tbr->tbr_last = read_machclk();
806
807 if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
808 struct timespec ts =
809 { 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
810 if (pktsched_verbose) {
811 printf("%s: TBR calculated tokens %lld "
812 "filluptime %llu ns\n", if_name(ifp),
813 TBR_UNSCALE(tbr->tbr_token),
814 pktsched_abs_to_nsecs(tbr->tbr_filluptime));
815 }
816 ifnet_set_start_cycle(ifp, &ts);
817 } else {
818 if (pktsched_verbose) {
819 if (tbr->tbr_rate == 0) {
820 printf("%s: TBR calculated tokens %lld "
821 "infinite filluptime\n", if_name(ifp),
822 TBR_UNSCALE(tbr->tbr_token));
823 } else if (!(ifp->if_flags & IFF_UP)) {
824 printf("%s: TBR suspended (link is down)\n",
825 if_name(ifp));
826 }
827 }
828 ifnet_set_start_cycle(ifp, NULL);
829 }
830 if (update && tbr->tbr_rate_raw != old_rate) {
831 ifclassq_update(ifq, ev: CLASSQ_EV_LINK_BANDWIDTH);
832 }
833
834 return 0;
835}
836
837void
838ifclassq_calc_target_qdelay(struct ifnet *ifp, uint64_t *if_target_qdelay,
839 uint32_t flags)
840{
841 uint64_t qdelay = 0, qdelay_configed = 0, qdely_default = 0;
842 if (flags == IF_CLASSQ_DEF) {
843 qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
844 }
845
846 switch (flags) {
847 case IF_CLASSQ_DEF:
848 qdelay_configed = ifclassq_def_c_target_qdelay;
849 qdely_default = IFQ_DEF_C_TARGET_DELAY;
850 break;
851 case IF_CLASSQ_L4S:
852 qdelay_configed = ifclassq_def_l4s_target_qdelay;
853 if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI ||
854 ifp->if_family == IFNET_FAMILY_CELLULAR) {
855 qdely_default = IFQ_DEF_L4S_WIRELESS_TARGET_DELAY;
856 } else {
857 qdely_default = IFQ_DEF_L4S_TARGET_DELAY;
858 }
859 break;
860 case IF_CLASSQ_LOW_LATENCY:
861 qdelay_configed = ifclassq_ll_c_target_qdelay;
862 qdely_default = IFQ_LL_C_TARGET_DELAY;
863 break;
864 case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
865 qdelay_configed = ifclassq_ll_l4s_target_qdelay;
866 if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI ||
867 ifp->if_family == IFNET_FAMILY_CELLULAR) {
868 qdely_default = IFQ_LL_L4S_WIRELESS_TARGET_DELAY;
869 } else {
870 qdely_default = IFQ_LL_L4S_TARGET_DELAY;
871 }
872 break;
873 default:
874 VERIFY(0);
875 /* NOTREACHED */
876 __builtin_unreachable();
877 }
878
879 if (qdelay_configed != 0) {
880 qdelay = qdelay_configed;
881 }
882
883 /*
884 * If we do not know the effective bandwidth, use the default
885 * target queue delay.
886 */
887 if (qdelay == 0) {
888 qdelay = qdely_default;
889 }
890
891 /*
892 * If a delay has been added to ifnet start callback for
893 * coalescing, we have to add that to the pre-set target delay
894 * because the packets can be in the queue longer.
895 */
896 if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
897 ifp->if_start_delay_timeout > 0) {
898 qdelay += ifp->if_start_delay_timeout;
899 }
900
901 *(if_target_qdelay) = qdelay;
902}
903
904void
905ifclassq_calc_update_interval(uint64_t *update_interval, uint32_t flags)
906{
907 uint64_t interval = 0, interval_configed = 0, interval_default = 0;
908
909 switch (flags) {
910 case IF_CLASSQ_DEF:
911 interval_configed = ifclassq_def_c_update_interval;
912 interval_default = IFQ_DEF_C_UPDATE_INTERVAL;
913 break;
914 case IF_CLASSQ_L4S:
915 interval_configed = ifclassq_def_l4s_update_interval;
916 interval_default = IFQ_DEF_L4S_UPDATE_INTERVAL;
917 break;
918 case IF_CLASSQ_LOW_LATENCY:
919 interval_configed = ifclassq_ll_c_update_interval;
920 interval_default = IFQ_LL_C_UPDATE_INTERVAL;
921 break;
922 case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
923 interval_configed = ifclassq_ll_l4s_update_interval;
924 interval_default = IFQ_LL_L4S_UPDATE_INTERVAL;
925 break;
926 default:
927 VERIFY(0);
928 /* NOTREACHED */
929 __builtin_unreachable();
930 }
931
932 /* If the system level override is set, use it */
933 if (interval_configed != 0) {
934 interval = interval_configed;
935 }
936
937 /* Otherwise use the default value */
938 if (interval == 0) {
939 interval = interval_default;
940 }
941
942 *update_interval = interval;
943}
944
945struct ifclassq *
946ifclassq_alloc(void)
947{
948 struct ifclassq *ifcq;
949
950 ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
951 os_ref_init(&ifcq->ifcq_refcnt, NULL);
952 os_ref_retain(rc: &ifcq->ifcq_refcnt);
953 lck_mtx_init(lck: &ifcq->ifcq_lock, grp: &ifcq_lock_group, attr: &ifcq_lock_attr);
954 return ifcq;
955}
956
957void
958ifclassq_retain(struct ifclassq *ifcq)
959{
960 os_ref_retain(rc: &ifcq->ifcq_refcnt);
961}
962
963void
964ifclassq_release(struct ifclassq **pifcq)
965{
966 struct ifclassq *ifcq = *pifcq;
967
968 *pifcq = NULL;
969 if (os_ref_release(rc: &ifcq->ifcq_refcnt) == 0) {
970 ifclassq_teardown(ifq: ifcq);
971 zfree(ifcq_zone, ifcq);
972 }
973}
974
975int
976ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
977{
978 int err;
979
980 IFCQ_LOCK(ifcq);
981 VERIFY(ifcq->ifcq_disc != NULL);
982 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
983
984 err = fq_if_create_grp(ifcq, qset_idx: grp_idx, flags);
985 IFCQ_UNLOCK(ifcq);
986
987 return err;
988}
989
990void
991ifclassq_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
992{
993 IFCQ_LOCK(ifcq);
994 VERIFY(ifcq->ifcq_disc != NULL);
995 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
996
997 fq_if_set_grp_combined(ifcq, qset_idx: grp_idx);
998 IFCQ_UNLOCK(ifcq);
999}
1000
1001void
1002ifclassq_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
1003{
1004 IFCQ_LOCK(ifcq);
1005 VERIFY(ifcq->ifcq_disc != NULL);
1006 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
1007
1008 fq_if_set_grp_separated(ifcq, qset_idx: grp_idx);
1009 IFCQ_UNLOCK(ifcq);
1010}
1011