1 | /* |
2 | * Copyright (c) 2011-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <sys/cdefs.h> |
30 | #include <sys/param.h> |
31 | #include <sys/mbuf.h> |
32 | #include <sys/errno.h> |
33 | #include <sys/random.h> |
34 | #include <sys/kernel_types.h> |
35 | #include <sys/sysctl.h> |
36 | |
37 | #include <kern/zalloc.h> |
38 | |
39 | #include <net/if.h> |
40 | #include <net/net_osdep.h> |
41 | #include <net/classq/classq.h> |
42 | #include <pexpert/pexpert.h> |
43 | #include <net/classq/classq_sfb.h> |
44 | #include <net/classq/classq_fq_codel.h> |
45 | #include <net/pktsched/pktsched.h> |
46 | #include <net/pktsched/pktsched_fq_codel.h> |
47 | #include <net/flowadv.h> |
48 | |
49 | #include <libkern/libkern.h> |
50 | |
51 | #if SKYWALK |
52 | #include <skywalk/os_skywalk_private.h> |
53 | #include <skywalk/nexus/netif/nx_netif.h> |
54 | #endif /* SKYWALK */ |
55 | |
56 | static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t, |
57 | u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *, |
58 | u_int32_t *, boolean_t, u_int8_t); |
59 | static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t, |
60 | boolean_t, classq_pkt_t *, u_int8_t); |
61 | |
62 | static uint64_t ifclassq_def_c_target_qdelay = 0; |
63 | SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED, |
64 | &ifclassq_def_c_target_qdelay, "def classic target queue delay in nanoseconds" ); |
65 | |
66 | static uint64_t ifclassq_def_c_update_interval = 0; |
67 | SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_update_interval, |
68 | CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_c_update_interval, |
69 | "def classic update interval in nanoseconds" ); |
70 | |
71 | static uint64_t ifclassq_def_l4s_target_qdelay = 0; |
72 | SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED, |
73 | &ifclassq_def_l4s_target_qdelay, "def L4S target queue delay in nanoseconds" ); |
74 | |
75 | static uint64_t ifclassq_def_l4s_update_interval = 0; |
76 | SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_update_interval, |
77 | CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_l4s_update_interval, |
78 | "def L4S update interval in nanoseconds" ); |
79 | |
80 | static uint64_t ifclassq_ll_c_target_qdelay = 0; |
81 | SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED, |
82 | &ifclassq_ll_c_target_qdelay, "low latency classic target queue delay in nanoseconds" ); |
83 | |
84 | static uint64_t ifclassq_ll_c_update_interval = 0; |
85 | SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_update_interval, |
86 | CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_c_update_interval, |
87 | "low latency classic update interval in nanoseconds" ); |
88 | |
89 | static uint64_t ifclassq_ll_l4s_target_qdelay = 0; |
90 | SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED, |
91 | &ifclassq_ll_l4s_target_qdelay, "low latency L4S target queue delay in nanoseconds" ); |
92 | |
93 | static uint64_t ifclassq_ll_l4s_update_interval = 0; |
94 | SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_update_interval, |
95 | CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_l4s_update_interval, |
96 | "low latency L4S update interval in nanoseconds" ); |
97 | |
98 | uint32_t ifclassq_enable_l4s = 1; |
99 | SYSCTL_UINT(_net_classq, OID_AUTO, enable_l4s, |
100 | CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_enable_l4s, 0, |
101 | "enable/disable L4S" ); |
102 | |
103 | #if DEBUG || DEVELOPMENT |
104 | uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */ |
105 | SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv, |
106 | CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1, |
107 | "enable/disable flow control advisory" ); |
108 | |
109 | uint16_t fq_codel_quantum = 0; |
110 | #endif /* DEBUG || DEVELOPMENT */ |
111 | |
112 | static KALLOC_TYPE_DEFINE(ifcq_zone, struct ifclassq, NET_KT_DEFAULT); |
113 | LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0); |
114 | static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks" ); |
115 | |
116 | void |
117 | classq_init(void) |
118 | { |
119 | _CASSERT(MBUF_TC_BE == 0); |
120 | _CASSERT(MBUF_SC_BE == 0); |
121 | _CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES); |
122 | #if DEBUG || DEVELOPMENT |
123 | PE_parse_boot_argn("fq_codel_quantum" , &fq_codel_quantum, |
124 | sizeof(fq_codel_quantum)); |
125 | PE_parse_boot_argn("ifclassq_def_c_target_qdelay" , &ifclassq_def_c_target_qdelay, |
126 | sizeof(ifclassq_def_c_target_qdelay)); |
127 | PE_parse_boot_argn("ifclassq_def_c_update_interval" , |
128 | &ifclassq_def_c_update_interval, sizeof(ifclassq_def_c_update_interval)); |
129 | PE_parse_boot_argn("ifclassq_def_l4s_target_qdelay" , &ifclassq_def_l4s_target_qdelay, |
130 | sizeof(ifclassq_def_l4s_target_qdelay)); |
131 | PE_parse_boot_argn("ifclassq_def_l4s_update_interval" , |
132 | &ifclassq_def_l4s_update_interval, sizeof(ifclassq_def_l4s_update_interval)); |
133 | PE_parse_boot_argn("ifclassq_ll_c_target_qdelay" , &ifclassq_ll_c_target_qdelay, |
134 | sizeof(ifclassq_ll_c_target_qdelay)); |
135 | PE_parse_boot_argn("ifclassq_ll_c_update_interval" , |
136 | &ifclassq_ll_c_update_interval, sizeof(ifclassq_ll_c_update_interval)); |
137 | PE_parse_boot_argn("ifclassq_ll_l4s_target_qdelay" , &ifclassq_ll_l4s_target_qdelay, |
138 | sizeof(ifclassq_ll_l4s_target_qdelay)); |
139 | PE_parse_boot_argn("ifclassq_ll_l4s_update_interval" , |
140 | &ifclassq_ll_l4s_update_interval, sizeof(ifclassq_ll_l4s_update_interval)); |
141 | #endif /* DEBUG || DEVELOPMENT */ |
142 | fq_codel_init(); |
143 | } |
144 | |
145 | int |
146 | ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags) |
147 | { |
148 | int err = 0; |
149 | |
150 | IFCQ_LOCK(ifq); |
151 | VERIFY(IFCQ_IS_EMPTY(ifq)); |
152 | ifq->ifcq_ifp = ifp; |
153 | IFCQ_LEN(ifq) = 0; |
154 | IFCQ_BYTES(ifq) = 0; |
155 | bzero(s: &ifq->ifcq_xmitcnt, n: sizeof(ifq->ifcq_xmitcnt)); |
156 | bzero(s: &ifq->ifcq_dropcnt, n: sizeof(ifq->ifcq_dropcnt)); |
157 | |
158 | VERIFY(!IFCQ_TBR_IS_ENABLED(ifq)); |
159 | VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE); |
160 | VERIFY(ifq->ifcq_flags == 0); |
161 | VERIFY(ifq->ifcq_sflags == 0); |
162 | VERIFY(ifq->ifcq_disc == NULL); |
163 | |
164 | if (ifp->if_eflags & IFEF_TXSTART) { |
165 | u_int32_t maxlen = 0; |
166 | |
167 | if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) { |
168 | maxlen = if_sndq_maxlen; |
169 | } |
170 | IFCQ_SET_MAXLEN(ifq, maxlen); |
171 | |
172 | if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen && |
173 | IFCQ_TARGET_QDELAY(ifq) == 0) { |
174 | /* |
175 | * Choose static queues because the interface has |
176 | * maximum queue size set |
177 | */ |
178 | sflags &= ~PKTSCHEDF_QALG_DELAYBASED; |
179 | } |
180 | ifq->ifcq_sflags = sflags; |
181 | err = ifclassq_pktsched_setup(ifq); |
182 | if (err == 0) { |
183 | ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED); |
184 | } |
185 | } |
186 | IFCQ_UNLOCK(ifq); |
187 | return err; |
188 | } |
189 | |
190 | void |
191 | ifclassq_teardown(struct ifclassq *ifq) |
192 | { |
193 | IFCQ_LOCK(ifq); |
194 | if (IFCQ_IS_DESTROYED(ifq)) { |
195 | ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0); |
196 | goto done; |
197 | } |
198 | if (IFCQ_IS_READY(ifq)) { |
199 | if (IFCQ_TBR_IS_ENABLED(ifq)) { |
200 | struct tb_profile tb = |
201 | { .rate = 0, .percent = 0, .depth = 0 }; |
202 | (void) ifclassq_tbr_set(ifq, &tb, FALSE); |
203 | } |
204 | pktsched_teardown(ifq); |
205 | ifq->ifcq_flags &= ~IFCQF_READY; |
206 | } |
207 | ifq->ifcq_sflags = 0; |
208 | VERIFY(IFCQ_IS_EMPTY(ifq)); |
209 | VERIFY(!IFCQ_TBR_IS_ENABLED(ifq)); |
210 | VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE); |
211 | VERIFY(ifq->ifcq_flags == 0); |
212 | VERIFY(ifq->ifcq_sflags == 0); |
213 | VERIFY(ifq->ifcq_disc == NULL); |
214 | IFCQ_LEN(ifq) = 0; |
215 | IFCQ_BYTES(ifq) = 0; |
216 | IFCQ_MAXLEN(ifq) = 0; |
217 | bzero(s: &ifq->ifcq_xmitcnt, n: sizeof(ifq->ifcq_xmitcnt)); |
218 | bzero(s: &ifq->ifcq_dropcnt, n: sizeof(ifq->ifcq_dropcnt)); |
219 | ifq->ifcq_flags |= IFCQF_DESTROYED; |
220 | done: |
221 | IFCQ_UNLOCK(ifq); |
222 | } |
223 | |
224 | int |
225 | ifclassq_pktsched_setup(struct ifclassq *ifq) |
226 | { |
227 | struct ifnet *ifp = ifq->ifcq_ifp; |
228 | classq_pkt_type_t ptype = QP_MBUF; |
229 | int err = 0; |
230 | |
231 | IFCQ_LOCK_ASSERT_HELD(ifq); |
232 | VERIFY(ifp->if_eflags & IFEF_TXSTART); |
233 | #if SKYWALK |
234 | ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET : |
235 | QP_MBUF; |
236 | #endif /* SKYWALK */ |
237 | |
238 | err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype); |
239 | |
240 | return err; |
241 | } |
242 | |
243 | void |
244 | ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen) |
245 | { |
246 | IFCQ_LOCK(ifq); |
247 | if (maxqlen == 0) { |
248 | maxqlen = if_sndq_maxlen; |
249 | } |
250 | IFCQ_SET_MAXLEN(ifq, maxqlen); |
251 | IFCQ_UNLOCK(ifq); |
252 | } |
253 | |
254 | u_int32_t |
255 | ifclassq_get_maxlen(struct ifclassq *ifq) |
256 | { |
257 | return IFCQ_MAXLEN(ifq); |
258 | } |
259 | |
260 | int |
261 | ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx, |
262 | u_int32_t *packets, u_int32_t *bytes) |
263 | { |
264 | int err = 0; |
265 | |
266 | IFCQ_LOCK(ifq); |
267 | if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) != |
268 | (IFCQF_READY | IFCQF_ENABLED)) { |
269 | return ENXIO; |
270 | } |
271 | if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) { |
272 | VERIFY(packets != NULL); |
273 | if (fq_if_is_all_paced(ifq)) { |
274 | *packets = 0; |
275 | } else { |
276 | *packets = IFCQ_LEN(ifq); |
277 | } |
278 | } else { |
279 | cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 }; |
280 | |
281 | VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC); |
282 | |
283 | err = fq_if_request_classq(ifq, rq: CLASSQRQ_STAT_SC, arg: &req); |
284 | if (packets != NULL) { |
285 | *packets = req.packets; |
286 | } |
287 | if (bytes != NULL) { |
288 | *bytes = req.bytes; |
289 | } |
290 | } |
291 | KDBG(AQM_KTRACE_STATS_GET_QLEN, ifq->ifcq_ifp->if_index, |
292 | packets ? *packets : 0, bytes ? *bytes : 0, fq_if_is_all_paced(ifq)); |
293 | |
294 | IFCQ_UNLOCK(ifq); |
295 | |
296 | #if SKYWALK |
297 | struct ifnet *ifp = ifq->ifcq_ifp; |
298 | |
299 | if (__improbable(ifp->if_na_ops != NULL && |
300 | ifp->if_na_ops->ni_get_len != NULL)) { |
301 | err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets, |
302 | bytes, err); |
303 | } |
304 | #endif /* SKYWALK */ |
305 | |
306 | return err; |
307 | } |
308 | |
309 | inline void |
310 | ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp, |
311 | classq_pkt_t *p) |
312 | { |
313 | if (!IFNET_IS_CELLULAR(ifp)) { |
314 | return; |
315 | } |
316 | |
317 | switch (p->cp_ptype) { |
318 | case QP_MBUF: { |
319 | struct mbuf *m = p->cp_mbuf; |
320 | m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA; |
321 | m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq); |
322 | m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent; |
323 | break; |
324 | } |
325 | |
326 | #if SKYWALK |
327 | case QP_PACKET: |
328 | /* |
329 | * Support for equivalent of mbuf_get_unsent_data_bytes() |
330 | * is not needed in the Skywalk architecture. |
331 | */ |
332 | break; |
333 | #endif /* SKYWALK */ |
334 | |
335 | default: |
336 | VERIFY(0); |
337 | /* NOTREACHED */ |
338 | __builtin_unreachable(); |
339 | } |
340 | } |
341 | |
342 | errno_t |
343 | ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail, |
344 | u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop) |
345 | { |
346 | return fq_if_enqueue_classq(ifq, h: head, t: tail, cnt, bytes, pdrop); |
347 | } |
348 | |
349 | errno_t |
350 | ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit, |
351 | u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail, |
352 | u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx) |
353 | { |
354 | return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit, |
355 | byte_limit, head, tail, cnt, len, FALSE, grp_idx); |
356 | } |
357 | |
358 | errno_t |
359 | ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc, |
360 | u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head, |
361 | classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx) |
362 | { |
363 | return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit, |
364 | head, tail, cnt, len, TRUE, grp_idx); |
365 | } |
366 | |
367 | static errno_t |
368 | ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc, |
369 | u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head, |
370 | classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt, |
371 | u_int8_t grp_idx) |
372 | { |
373 | struct ifnet *ifp = ifq->ifcq_ifp; |
374 | u_int32_t i = 0, l = 0; |
375 | classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first); |
376 | classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last); |
377 | |
378 | VERIFY(!drvmgt || MBUF_VALID_SC(sc)); |
379 | |
380 | if (IFCQ_TBR_IS_ENABLED(ifq)) { |
381 | goto dequeue_loop; |
382 | } |
383 | |
384 | /* |
385 | * If the scheduler support dequeueing multiple packets at the |
386 | * same time, call that one instead. |
387 | */ |
388 | if (drvmgt) { |
389 | int err; |
390 | |
391 | IFCQ_LOCK_SPIN(ifq); |
392 | err = fq_if_dequeue_sc_classq_multi(ifq, svc: sc, maxpktcnt: pkt_limit, |
393 | maxbytecnt: byte_limit, first_packet: head, last_packet: tail, retpktcnt: cnt, retbytecnt: len, grp_idx); |
394 | IFCQ_UNLOCK(ifq); |
395 | |
396 | if (err == 0 && head->cp_mbuf == NULL) { |
397 | err = EAGAIN; |
398 | } |
399 | return err; |
400 | } else { |
401 | int err; |
402 | |
403 | IFCQ_LOCK_SPIN(ifq); |
404 | err = fq_if_dequeue_classq_multi(ifq, maxpktcnt: pkt_limit, maxbytecnt: byte_limit, |
405 | first_packet: head, last_packet: tail, retpktcnt: cnt, retbytecnt: len, grp_idx); |
406 | IFCQ_UNLOCK(ifq); |
407 | |
408 | if (err == 0 && head->cp_mbuf == NULL) { |
409 | err = EAGAIN; |
410 | } |
411 | return err; |
412 | } |
413 | |
414 | dequeue_loop: |
415 | VERIFY(IFCQ_TBR_IS_ENABLED(ifq)); |
416 | IFCQ_LOCK_SPIN(ifq); |
417 | |
418 | while (i < pkt_limit && l < byte_limit) { |
419 | if (drvmgt) { |
420 | IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx); |
421 | } else { |
422 | IFCQ_TBR_DEQUEUE(ifq, head, grp_idx); |
423 | } |
424 | |
425 | if (head->cp_mbuf == NULL) { |
426 | break; |
427 | } |
428 | |
429 | if (first.cp_mbuf == NULL) { |
430 | first = *head; |
431 | } |
432 | |
433 | switch (head->cp_ptype) { |
434 | case QP_MBUF: |
435 | head->cp_mbuf->m_nextpkt = NULL; |
436 | l += head->cp_mbuf->m_pkthdr.len; |
437 | ifclassq_set_packet_metadata(ifq, ifp, p: head); |
438 | if (last.cp_mbuf != NULL) { |
439 | last.cp_mbuf->m_nextpkt = head->cp_mbuf; |
440 | } |
441 | break; |
442 | |
443 | #if SKYWALK |
444 | case QP_PACKET: |
445 | head->cp_kpkt->pkt_nextpkt = NULL; |
446 | l += head->cp_kpkt->pkt_length; |
447 | ifclassq_set_packet_metadata(ifq, ifp, p: head); |
448 | if (last.cp_kpkt != NULL) { |
449 | last.cp_kpkt->pkt_nextpkt = head->cp_kpkt; |
450 | } |
451 | break; |
452 | #endif /* SKYWALK */ |
453 | |
454 | default: |
455 | VERIFY(0); |
456 | /* NOTREACHED */ |
457 | __builtin_unreachable(); |
458 | } |
459 | |
460 | last = *head; |
461 | i++; |
462 | } |
463 | |
464 | IFCQ_UNLOCK(ifq); |
465 | |
466 | if (tail != NULL) { |
467 | *tail = last; |
468 | } |
469 | if (cnt != NULL) { |
470 | *cnt = i; |
471 | } |
472 | if (len != NULL) { |
473 | *len = l; |
474 | } |
475 | |
476 | *head = first; |
477 | return (first.cp_mbuf != NULL) ? 0 : EAGAIN; |
478 | } |
479 | |
480 | static errno_t |
481 | ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc, |
482 | u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head, |
483 | classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt, |
484 | u_int8_t grp_idx) |
485 | { |
486 | #if SKYWALK |
487 | struct ifnet *ifp = ifq->ifcq_ifp; |
488 | |
489 | if (__improbable(ifp->if_na_ops != NULL && |
490 | ifp->if_na_ops->ni_dequeue != NULL)) { |
491 | /* |
492 | * TODO: |
493 | * We should be changing the pkt/byte limit to the |
494 | * available space in the next filter. But this is not |
495 | * useful until we can flow control the whole chain of |
496 | * filters. |
497 | */ |
498 | errno_t err = ifclassq_dequeue_common_default(ifq, sc, |
499 | pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx); |
500 | |
501 | return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit, |
502 | byte_limit, head, tail, cnt, len, drvmgt, err); |
503 | } |
504 | #endif /* SKYWALK */ |
505 | return ifclassq_dequeue_common_default(ifq, sc, |
506 | pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx); |
507 | } |
508 | |
509 | void |
510 | ifclassq_update(struct ifclassq *ifq, cqev_t ev) |
511 | { |
512 | IFCQ_LOCK_ASSERT_HELD(ifq); |
513 | VERIFY(IFCQ_IS_READY(ifq)); |
514 | fq_if_request_classq(ifq, rq: CLASSQRQ_EVENT, arg: (void *)ev); |
515 | } |
516 | |
517 | int |
518 | ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline) |
519 | { |
520 | IFCQ_LOCK_ASSERT_HELD(ifq); |
521 | VERIFY(ifq->ifcq_disc == NULL); |
522 | ifq->ifcq_type = type; |
523 | ifq->ifcq_disc = discipline; |
524 | return 0; |
525 | } |
526 | |
527 | void |
528 | ifclassq_detach(struct ifclassq *ifq) |
529 | { |
530 | IFCQ_LOCK_ASSERT_HELD(ifq); |
531 | VERIFY(ifq->ifcq_disc == NULL); |
532 | ifq->ifcq_type = PKTSCHEDT_NONE; |
533 | } |
534 | |
535 | int |
536 | ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf, |
537 | u_int32_t *nbytes) |
538 | { |
539 | struct if_ifclassq_stats *ifqs; |
540 | int err; |
541 | |
542 | if (*nbytes < sizeof(*ifqs)) { |
543 | return EINVAL; |
544 | } |
545 | |
546 | ifqs = kalloc_type(struct if_ifclassq_stats, |
547 | Z_WAITOK | Z_ZERO | Z_NOFAIL); |
548 | |
549 | IFCQ_LOCK(ifq); |
550 | if (!IFCQ_IS_READY(ifq)) { |
551 | IFCQ_UNLOCK(ifq); |
552 | kfree_type(struct if_ifclassq_stats, ifqs); |
553 | return ENXIO; |
554 | } |
555 | |
556 | ifqs->ifqs_len = IFCQ_LEN(ifq); |
557 | ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq); |
558 | *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt); |
559 | *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt); |
560 | ifqs->ifqs_scheduler = ifq->ifcq_type; |
561 | ifqs->ifqs_doorbells = ifq->ifcq_doorbells; |
562 | |
563 | err = pktsched_getqstats(ifq, gid, qid, ifqs); |
564 | IFCQ_UNLOCK(ifq); |
565 | |
566 | if (err == 0 && (err = copyout((caddr_t)ifqs, |
567 | (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) { |
568 | *nbytes = sizeof(*ifqs); |
569 | } |
570 | |
571 | kfree_type(struct if_ifclassq_stats, ifqs); |
572 | |
573 | return err; |
574 | } |
575 | |
576 | const char * |
577 | ifclassq_ev2str(cqev_t ev) |
578 | { |
579 | const char *c; |
580 | |
581 | switch (ev) { |
582 | case CLASSQ_EV_LINK_BANDWIDTH: |
583 | c = "LINK_BANDWIDTH" ; |
584 | break; |
585 | |
586 | case CLASSQ_EV_LINK_LATENCY: |
587 | c = "LINK_LATENCY" ; |
588 | break; |
589 | |
590 | case CLASSQ_EV_LINK_MTU: |
591 | c = "LINK_MTU" ; |
592 | break; |
593 | |
594 | case CLASSQ_EV_LINK_UP: |
595 | c = "LINK_UP" ; |
596 | break; |
597 | |
598 | case CLASSQ_EV_LINK_DOWN: |
599 | c = "LINK_DOWN" ; |
600 | break; |
601 | |
602 | default: |
603 | c = "UNKNOWN" ; |
604 | break; |
605 | } |
606 | |
607 | return c; |
608 | } |
609 | |
610 | /* |
611 | * internal representation of token bucket parameters |
612 | * rate: byte_per_unittime << 32 |
613 | * (((bits_per_sec) / 8) << 32) / machclk_freq |
614 | * depth: byte << 32 |
615 | * |
616 | */ |
617 | #define TBR_SHIFT 32 |
618 | #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) |
619 | #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) |
620 | |
621 | void |
622 | ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx) |
623 | { |
624 | ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx); |
625 | } |
626 | |
627 | void |
628 | ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc, |
629 | classq_pkt_t *pkt, u_int8_t grp_idx) |
630 | { |
631 | ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx); |
632 | } |
633 | |
634 | static void |
635 | ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc, |
636 | boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx) |
637 | { |
638 | struct tb_regulator *tbr; |
639 | int64_t interval; |
640 | u_int64_t now; |
641 | |
642 | IFCQ_LOCK_ASSERT_HELD(ifq); |
643 | |
644 | VERIFY(!drvmgt || MBUF_VALID_SC(sc)); |
645 | VERIFY(IFCQ_TBR_IS_ENABLED(ifq)); |
646 | |
647 | *pkt = CLASSQ_PKT_INITIALIZER(*pkt); |
648 | tbr = &ifq->ifcq_tbr; |
649 | /* update token only when it is negative */ |
650 | if (tbr->tbr_token <= 0) { |
651 | now = read_machclk(); |
652 | interval = now - tbr->tbr_last; |
653 | if (interval >= tbr->tbr_filluptime) { |
654 | tbr->tbr_token = tbr->tbr_depth; |
655 | } else { |
656 | tbr->tbr_token += interval * tbr->tbr_rate; |
657 | if (tbr->tbr_token > tbr->tbr_depth) { |
658 | tbr->tbr_token = tbr->tbr_depth; |
659 | } |
660 | } |
661 | tbr->tbr_last = now; |
662 | } |
663 | /* if token is still negative, don't allow dequeue */ |
664 | if (tbr->tbr_token <= 0) { |
665 | return; |
666 | } |
667 | |
668 | /* |
669 | * ifclassq takes precedence over ALTQ queue; |
670 | * ifcq_drain count is adjusted by the caller. |
671 | */ |
672 | if (drvmgt) { |
673 | fq_if_dequeue_sc_classq(ifq, svc: sc, pkt, grp_idx); |
674 | } else { |
675 | fq_if_dequeue_classq(ifq, pkt, grp_idx); |
676 | } |
677 | |
678 | if (pkt->cp_mbuf != NULL) { |
679 | switch (pkt->cp_ptype) { |
680 | case QP_MBUF: |
681 | tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf)); |
682 | break; |
683 | |
684 | #if SKYWALK |
685 | case QP_PACKET: |
686 | tbr->tbr_token -= |
687 | TBR_SCALE(pkt->cp_kpkt->pkt_length); |
688 | break; |
689 | #endif /* SKYWALK */ |
690 | |
691 | default: |
692 | VERIFY(0); |
693 | /* NOTREACHED */ |
694 | } |
695 | } |
696 | } |
697 | |
698 | /* |
699 | * set a token bucket regulator. |
700 | * if the specified rate is zero, the token bucket regulator is deleted. |
701 | */ |
702 | int |
703 | ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile, |
704 | boolean_t update) |
705 | { |
706 | struct tb_regulator *tbr; |
707 | struct ifnet *ifp = ifq->ifcq_ifp; |
708 | u_int64_t rate, old_rate; |
709 | |
710 | IFCQ_LOCK_ASSERT_HELD(ifq); |
711 | VERIFY(IFCQ_IS_READY(ifq)); |
712 | |
713 | VERIFY(machclk_freq != 0); |
714 | |
715 | tbr = &ifq->ifcq_tbr; |
716 | old_rate = tbr->tbr_rate_raw; |
717 | |
718 | rate = profile->rate; |
719 | if (profile->percent > 0) { |
720 | u_int64_t eff_rate; |
721 | |
722 | if (profile->percent > 100) { |
723 | return EINVAL; |
724 | } |
725 | if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) { |
726 | return ENODEV; |
727 | } |
728 | rate = (eff_rate * profile->percent) / 100; |
729 | } |
730 | |
731 | if (rate == 0) { |
732 | if (!IFCQ_TBR_IS_ENABLED(ifq)) { |
733 | return 0; |
734 | } |
735 | |
736 | if (pktsched_verbose) { |
737 | printf("%s: TBR disabled\n" , if_name(ifp)); |
738 | } |
739 | |
740 | /* disable this TBR */ |
741 | ifq->ifcq_flags &= ~IFCQF_TBR; |
742 | bzero(s: tbr, n: sizeof(*tbr)); |
743 | ifnet_set_start_cycle(ifp, NULL); |
744 | if (update) { |
745 | ifclassq_update(ifq, ev: CLASSQ_EV_LINK_BANDWIDTH); |
746 | } |
747 | return 0; |
748 | } |
749 | |
750 | if (pktsched_verbose) { |
751 | printf("%s: TBR %s (rate %llu bps depth %u)\n" , if_name(ifp), |
752 | (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" : |
753 | "enabled" , rate, profile->depth); |
754 | } |
755 | |
756 | /* set the new TBR */ |
757 | bzero(s: tbr, n: sizeof(*tbr)); |
758 | tbr->tbr_rate_raw = rate; |
759 | tbr->tbr_percent = profile->percent; |
760 | ifq->ifcq_flags |= IFCQF_TBR; |
761 | |
762 | /* |
763 | * Note that the TBR fill up time (hence the ifnet restart time) |
764 | * is directly related to the specified TBR depth. The ideal |
765 | * depth value should be computed such that the interval time |
766 | * between each successive wakeup is adequately spaced apart, |
767 | * in order to reduce scheduling overheads. A target interval |
768 | * of 10 ms seems to provide good performance balance. This can be |
769 | * overridden by specifying the depth profile. Values smaller than |
770 | * the ideal depth will reduce delay at the expense of CPU cycles. |
771 | */ |
772 | tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq; |
773 | if (tbr->tbr_rate > 0) { |
774 | u_int32_t mtu = ifp->if_mtu; |
775 | int64_t ival, idepth = 0; |
776 | int i; |
777 | |
778 | if (mtu < IF_MINMTU) { |
779 | mtu = IF_MINMTU; |
780 | } |
781 | |
782 | ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */ |
783 | |
784 | for (i = 1;; i++) { |
785 | idepth = TBR_SCALE(i * mtu); |
786 | if ((idepth / tbr->tbr_rate) > ival) { |
787 | break; |
788 | } |
789 | } |
790 | VERIFY(idepth > 0); |
791 | |
792 | tbr->tbr_depth = TBR_SCALE(profile->depth); |
793 | if (tbr->tbr_depth == 0) { |
794 | tbr->tbr_filluptime = idepth / tbr->tbr_rate; |
795 | /* a little fudge factor to get closer to rate */ |
796 | tbr->tbr_depth = idepth + (idepth >> 3); |
797 | } else { |
798 | tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; |
799 | } |
800 | } else { |
801 | tbr->tbr_depth = TBR_SCALE(profile->depth); |
802 | tbr->tbr_filluptime = 0xffffffffffffffffLL; |
803 | } |
804 | tbr->tbr_token = tbr->tbr_depth; |
805 | tbr->tbr_last = read_machclk(); |
806 | |
807 | if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) { |
808 | struct timespec ts = |
809 | { 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) }; |
810 | if (pktsched_verbose) { |
811 | printf("%s: TBR calculated tokens %lld " |
812 | "filluptime %llu ns\n" , if_name(ifp), |
813 | TBR_UNSCALE(tbr->tbr_token), |
814 | pktsched_abs_to_nsecs(tbr->tbr_filluptime)); |
815 | } |
816 | ifnet_set_start_cycle(ifp, &ts); |
817 | } else { |
818 | if (pktsched_verbose) { |
819 | if (tbr->tbr_rate == 0) { |
820 | printf("%s: TBR calculated tokens %lld " |
821 | "infinite filluptime\n" , if_name(ifp), |
822 | TBR_UNSCALE(tbr->tbr_token)); |
823 | } else if (!(ifp->if_flags & IFF_UP)) { |
824 | printf("%s: TBR suspended (link is down)\n" , |
825 | if_name(ifp)); |
826 | } |
827 | } |
828 | ifnet_set_start_cycle(ifp, NULL); |
829 | } |
830 | if (update && tbr->tbr_rate_raw != old_rate) { |
831 | ifclassq_update(ifq, ev: CLASSQ_EV_LINK_BANDWIDTH); |
832 | } |
833 | |
834 | return 0; |
835 | } |
836 | |
837 | void |
838 | ifclassq_calc_target_qdelay(struct ifnet *ifp, uint64_t *if_target_qdelay, |
839 | uint32_t flags) |
840 | { |
841 | uint64_t qdelay = 0, qdelay_configed = 0, qdely_default = 0; |
842 | if (flags == IF_CLASSQ_DEF) { |
843 | qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd); |
844 | } |
845 | |
846 | switch (flags) { |
847 | case IF_CLASSQ_DEF: |
848 | qdelay_configed = ifclassq_def_c_target_qdelay; |
849 | qdely_default = IFQ_DEF_C_TARGET_DELAY; |
850 | break; |
851 | case IF_CLASSQ_L4S: |
852 | qdelay_configed = ifclassq_def_l4s_target_qdelay; |
853 | if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI || |
854 | ifp->if_family == IFNET_FAMILY_CELLULAR) { |
855 | qdely_default = IFQ_DEF_L4S_WIRELESS_TARGET_DELAY; |
856 | } else { |
857 | qdely_default = IFQ_DEF_L4S_TARGET_DELAY; |
858 | } |
859 | break; |
860 | case IF_CLASSQ_LOW_LATENCY: |
861 | qdelay_configed = ifclassq_ll_c_target_qdelay; |
862 | qdely_default = IFQ_LL_C_TARGET_DELAY; |
863 | break; |
864 | case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S): |
865 | qdelay_configed = ifclassq_ll_l4s_target_qdelay; |
866 | if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI || |
867 | ifp->if_family == IFNET_FAMILY_CELLULAR) { |
868 | qdely_default = IFQ_LL_L4S_WIRELESS_TARGET_DELAY; |
869 | } else { |
870 | qdely_default = IFQ_LL_L4S_TARGET_DELAY; |
871 | } |
872 | break; |
873 | default: |
874 | VERIFY(0); |
875 | /* NOTREACHED */ |
876 | __builtin_unreachable(); |
877 | } |
878 | |
879 | if (qdelay_configed != 0) { |
880 | qdelay = qdelay_configed; |
881 | } |
882 | |
883 | /* |
884 | * If we do not know the effective bandwidth, use the default |
885 | * target queue delay. |
886 | */ |
887 | if (qdelay == 0) { |
888 | qdelay = qdely_default; |
889 | } |
890 | |
891 | /* |
892 | * If a delay has been added to ifnet start callback for |
893 | * coalescing, we have to add that to the pre-set target delay |
894 | * because the packets can be in the queue longer. |
895 | */ |
896 | if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) && |
897 | ifp->if_start_delay_timeout > 0) { |
898 | qdelay += ifp->if_start_delay_timeout; |
899 | } |
900 | |
901 | *(if_target_qdelay) = qdelay; |
902 | } |
903 | |
904 | void |
905 | ifclassq_calc_update_interval(uint64_t *update_interval, uint32_t flags) |
906 | { |
907 | uint64_t interval = 0, interval_configed = 0, interval_default = 0; |
908 | |
909 | switch (flags) { |
910 | case IF_CLASSQ_DEF: |
911 | interval_configed = ifclassq_def_c_update_interval; |
912 | interval_default = IFQ_DEF_C_UPDATE_INTERVAL; |
913 | break; |
914 | case IF_CLASSQ_L4S: |
915 | interval_configed = ifclassq_def_l4s_update_interval; |
916 | interval_default = IFQ_DEF_L4S_UPDATE_INTERVAL; |
917 | break; |
918 | case IF_CLASSQ_LOW_LATENCY: |
919 | interval_configed = ifclassq_ll_c_update_interval; |
920 | interval_default = IFQ_LL_C_UPDATE_INTERVAL; |
921 | break; |
922 | case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S): |
923 | interval_configed = ifclassq_ll_l4s_update_interval; |
924 | interval_default = IFQ_LL_L4S_UPDATE_INTERVAL; |
925 | break; |
926 | default: |
927 | VERIFY(0); |
928 | /* NOTREACHED */ |
929 | __builtin_unreachable(); |
930 | } |
931 | |
932 | /* If the system level override is set, use it */ |
933 | if (interval_configed != 0) { |
934 | interval = interval_configed; |
935 | } |
936 | |
937 | /* Otherwise use the default value */ |
938 | if (interval == 0) { |
939 | interval = interval_default; |
940 | } |
941 | |
942 | *update_interval = interval; |
943 | } |
944 | |
945 | struct ifclassq * |
946 | ifclassq_alloc(void) |
947 | { |
948 | struct ifclassq *ifcq; |
949 | |
950 | ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL); |
951 | os_ref_init(&ifcq->ifcq_refcnt, NULL); |
952 | os_ref_retain(rc: &ifcq->ifcq_refcnt); |
953 | lck_mtx_init(lck: &ifcq->ifcq_lock, grp: &ifcq_lock_group, attr: &ifcq_lock_attr); |
954 | return ifcq; |
955 | } |
956 | |
957 | void |
958 | ifclassq_retain(struct ifclassq *ifcq) |
959 | { |
960 | os_ref_retain(rc: &ifcq->ifcq_refcnt); |
961 | } |
962 | |
963 | void |
964 | ifclassq_release(struct ifclassq **pifcq) |
965 | { |
966 | struct ifclassq *ifcq = *pifcq; |
967 | |
968 | *pifcq = NULL; |
969 | if (os_ref_release(rc: &ifcq->ifcq_refcnt) == 0) { |
970 | ifclassq_teardown(ifq: ifcq); |
971 | zfree(ifcq_zone, ifcq); |
972 | } |
973 | } |
974 | |
975 | int |
976 | ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags) |
977 | { |
978 | int err; |
979 | |
980 | IFCQ_LOCK(ifcq); |
981 | VERIFY(ifcq->ifcq_disc != NULL); |
982 | VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL); |
983 | |
984 | err = fq_if_create_grp(ifcq, qset_idx: grp_idx, flags); |
985 | IFCQ_UNLOCK(ifcq); |
986 | |
987 | return err; |
988 | } |
989 | |
990 | void |
991 | ifclassq_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx) |
992 | { |
993 | IFCQ_LOCK(ifcq); |
994 | VERIFY(ifcq->ifcq_disc != NULL); |
995 | VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL); |
996 | |
997 | fq_if_set_grp_combined(ifcq, qset_idx: grp_idx); |
998 | IFCQ_UNLOCK(ifcq); |
999 | } |
1000 | |
1001 | void |
1002 | ifclassq_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx) |
1003 | { |
1004 | IFCQ_LOCK(ifcq); |
1005 | VERIFY(ifcq->ifcq_disc != NULL); |
1006 | VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL); |
1007 | |
1008 | fq_if_set_grp_separated(ifcq, qset_idx: grp_idx); |
1009 | IFCQ_UNLOCK(ifcq); |
1010 | } |
1011 | |