1 | /* |
2 | * Copyright (c) 2016-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <sys/types.h> |
30 | #include <sys/param.h> |
31 | #include <kern/zalloc.h> |
32 | #include <net/ethernet.h> |
33 | #include <net/if_var.h> |
34 | #include <net/if.h> |
35 | #include <net/classq/classq.h> |
36 | #include <net/classq/classq_fq_codel.h> |
37 | #include <net/pktsched/pktsched_fq_codel.h> |
38 | #include <os/log.h> |
39 | #include <pexpert/pexpert.h> /* for PE_parse_boot_argn */ |
40 | #include <mach/thread_act.h> |
41 | #include <kern/thread.h> |
42 | #include <kern/sched_prim.h> |
43 | |
44 | #define FQ_CODEL_DEFAULT_QUANTUM 1500 |
45 | |
46 | #define FQ_CODEL_QUANTUM_BK_SYS(_q) (_q) |
47 | #define FQ_CODEL_QUANTUM_BK(_q) (_q) |
48 | #define FQ_CODEL_QUANTUM_BE(_q) (_q) |
49 | #define FQ_CODEL_QUANTUM_RD(_q) (_q) |
50 | #define FQ_CODEL_QUANTUM_OAM(_q) (_q) |
51 | #define FQ_CODEL_QUANTUM_AV(_q) (_q * 2) |
52 | #define FQ_CODEL_QUANTUM_RV(_q) (_q * 2) |
53 | #define FQ_CODEL_QUANTUM_VI(_q) (_q * 2) |
54 | #define FQ_CODEL_QUANTUM_VO(_q) ((_q * 2) / 5) |
55 | #define FQ_CODEL_QUANTUM_CTL(_q) ((_q * 2) / 5) |
56 | |
57 | static KALLOC_TYPE_DEFINE(fq_if_zone, fq_if_t, NET_KT_DEFAULT); |
58 | static KALLOC_TYPE_DEFINE(fq_if_grp_zone, fq_if_group_t, NET_KT_DEFAULT); |
59 | |
60 | SYSCTL_NODE(_net_classq, OID_AUTO, fq_codel, CTLFLAG_RW | CTLFLAG_LOCKED, |
61 | 0, "FQ-CODEL parameters" ); |
62 | |
63 | SYSCTL_INT(_net_classq_fq_codel, OID_AUTO, fq_enable_pacing, CTLFLAG_RW | CTLFLAG_LOCKED, |
64 | &ifclassq_enable_pacing, 0, "Enable pacing" ); |
65 | |
66 | static uint64_t fq_empty_purge_delay = FQ_EMPTY_PURGE_DELAY; |
67 | #if (DEVELOPMENT || DEBUG) |
68 | SYSCTL_QUAD(_net_classq_fq_codel, OID_AUTO, fq_empty_purge_delay, CTLFLAG_RW | |
69 | CTLFLAG_LOCKED, &fq_empty_purge_delay, "Empty flow queue purge delay (ns)" ); |
70 | #endif /* !DEVELOPMENT && !DEBUG */ |
71 | |
72 | unsigned int ifclassq_enable_pacing = 1; |
73 | |
74 | typedef STAILQ_HEAD(, flowq) flowq_dqlist_t; |
75 | |
76 | static fq_if_t *fq_if_alloc(struct ifclassq *, classq_pkt_type_t); |
77 | static void fq_if_destroy(fq_if_t *fqs); |
78 | static void fq_if_classq_init(fq_if_group_t *fqg, uint32_t priority, |
79 | uint32_t quantum, uint32_t drr_max, uint32_t svc_class); |
80 | static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t, |
81 | int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *, |
82 | uint32_t *, flowq_dqlist_t *, bool, uint64_t, bool*, uint64_t*); |
83 | void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat); |
84 | static void fq_if_purge(fq_if_t *); |
85 | static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *); |
86 | static void fq_if_purge_flow(fq_if_t *, fq_t *, uint32_t *, uint32_t *, |
87 | uint64_t); |
88 | static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl); |
89 | static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, |
90 | fq_t *fq, uint64_t now); |
91 | static void fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq); |
92 | static void fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now, |
93 | bool purge_all); |
94 | static inline void fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now); |
95 | static int fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq, |
96 | mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt, |
97 | classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt, |
98 | u_int32_t *retbytecnt, uint8_t grp_idx); |
99 | static void fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp, |
100 | cqrq_stat_sc_t *stat, uint64_t now); |
101 | static void fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp); |
102 | static inline boolean_t fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx); |
103 | static void fq_if_destroy_grps(fq_if_t *fqs); |
104 | |
105 | uint32_t fq_codel_drr_max_values[FQ_IF_MAX_CLASSES] = { |
106 | [FQ_IF_CTL_INDEX] = 8, |
107 | [FQ_IF_VO_INDEX] = 8, |
108 | [FQ_IF_VI_INDEX] = 6, |
109 | [FQ_IF_RV_INDEX] = 6, |
110 | [FQ_IF_AV_INDEX] = 6, |
111 | [FQ_IF_OAM_INDEX] = 4, |
112 | [FQ_IF_RD_INDEX] = 4, |
113 | [FQ_IF_BE_INDEX] = 4, |
114 | [FQ_IF_BK_INDEX] = 2, |
115 | [FQ_IF_BK_SYS_INDEX] = 2, |
116 | }; |
117 | |
118 | #define FQ_CODEL_DRR_MAX(_s) fq_codel_drr_max_values[FQ_IF_##_s##_INDEX] |
119 | |
120 | static boolean_t fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, |
121 | fq_if_state state); |
122 | static void fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, |
123 | fq_if_state dst_state, fq_if_state src_state); |
124 | static void fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, |
125 | fq_if_state state); |
126 | static int fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, |
127 | fq_if_state state, fq_if_group_t **selected_grp); |
128 | static void fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri, |
129 | fq_if_state dst_state, fq_if_state src_state); |
130 | |
131 | static boolean_t fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, |
132 | fq_if_state state); |
133 | static void fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, |
134 | fq_if_state dst_state, fq_if_state src_state); |
135 | static void fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, |
136 | fq_if_state state); |
137 | static int fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, |
138 | fq_if_state state, fq_if_group_t **selected_grp); |
139 | static void fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri, |
140 | fq_if_state dst_state, fq_if_state src_state); |
141 | |
142 | bitmap_ops_t fq_if_grps_bitmap_ops = |
143 | { |
144 | .ffs = fq_if_grps_bitmap_ffs, |
145 | .zeros = fq_if_grps_bitmap_zeros, |
146 | .cpy = fq_if_grps_bitmap_cpy, |
147 | .clr = fq_if_grps_bitmap_clr, |
148 | .move = fq_if_grps_bitmap_move, |
149 | }; |
150 | |
151 | bitmap_ops_t fq_if_grps_sc_bitmap_ops = |
152 | { |
153 | .ffs = fq_if_grps_sc_bitmap_ffs, |
154 | .zeros = fq_if_grps_sc_bitmap_zeros, |
155 | .cpy = fq_if_grps_sc_bitmap_cpy, |
156 | .clr = fq_if_grps_sc_bitmap_clr, |
157 | .move = fq_if_grps_sc_bitmap_move, |
158 | }; |
159 | |
160 | void |
161 | pktsched_fq_init(void) |
162 | { |
163 | PE_parse_boot_argn(arg_string: "ifclassq_enable_pacing" , arg_ptr: &ifclassq_enable_pacing, |
164 | max_arg: sizeof(ifclassq_enable_pacing)); |
165 | |
166 | // format looks like ifcq_drr_max=8,8,6 |
167 | char buf[(FQ_IF_MAX_CLASSES) * 3]; |
168 | size_t i, len, pri_index = 0; |
169 | uint32_t drr = 0; |
170 | if (!PE_parse_boot_arg_str(arg_string: "ifcq_drr_max" , arg_ptr: buf, size: sizeof(buf))) { |
171 | return; |
172 | } |
173 | |
174 | len = strlen(s: buf); |
175 | for (i = 0; i < len + 1 && pri_index < FQ_IF_MAX_CLASSES; i++) { |
176 | if (buf[i] != ',' && buf[i] != '\0') { |
177 | VERIFY(buf[i] >= '0' && buf[i] <= '9'); |
178 | drr = drr * 10 + buf[i] - '0'; |
179 | continue; |
180 | } |
181 | fq_codel_drr_max_values[pri_index] = drr; |
182 | pri_index += 1; |
183 | drr = 0; |
184 | } |
185 | } |
186 | |
187 | #define FQ_IF_FLOW_HASH_ID(_flowid_) \ |
188 | (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK) |
189 | |
190 | #define FQ_IF_CLASSQ_IDLE(_fcl_) \ |
191 | (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \ |
192 | STAILQ_EMPTY(&(_fcl_)->fcl_old_flows)) |
193 | |
194 | typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *); |
195 | typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *, |
196 | int64_t, uint32_t, classq_pkt_t *, classq_pkt_t *, uint32_t *, |
197 | uint32_t *, boolean_t *, uint64_t); |
198 | |
199 | static void |
200 | fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt) |
201 | { |
202 | pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf; |
203 | } |
204 | |
205 | static inline uint64_t |
206 | fq_codel_get_time(void) |
207 | { |
208 | struct timespec ts; |
209 | uint64_t now; |
210 | |
211 | nanouptime(ts: &ts); |
212 | now = ((uint64_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; |
213 | return now; |
214 | } |
215 | |
216 | #if SKYWALK |
217 | static void |
218 | fq_if_append_pkt(classq_pkt_t *pkt, classq_pkt_t *next_pkt) |
219 | { |
220 | pkt->cp_kpkt->pkt_nextpkt = next_pkt->cp_kpkt; |
221 | } |
222 | #endif /* SKYWALK */ |
223 | |
224 | #if SKYWALK |
225 | static boolean_t |
226 | fq_getq_flow_kpkt(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq, |
227 | int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head, |
228 | classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt, |
229 | boolean_t *qempty, uint64_t now) |
230 | { |
231 | uint32_t plen; |
232 | pktsched_pkt_t pkt; |
233 | boolean_t limit_reached = FALSE; |
234 | struct ifclassq *ifq = fqs->fqs_ifq; |
235 | struct ifnet *ifp = ifq->ifcq_ifp; |
236 | |
237 | /* |
238 | * Assert to make sure pflags is part of PKT_F_COMMON_MASK; |
239 | * all common flags need to be declared in that mask. |
240 | */ |
241 | while (fq->fq_deficit > 0 && limit_reached == FALSE && |
242 | !KPKTQ_EMPTY(&fq->fq_kpktq) && fq_tx_time_ready(fqs, fq, now, NULL)) { |
243 | _PKTSCHED_PKT_INIT(&pkt); |
244 | fq_getq_flow(fqs, fq, &pkt, now); |
245 | ASSERT(pkt.pktsched_ptype == QP_PACKET); |
246 | |
247 | plen = pktsched_get_pkt_len(pkt: &pkt); |
248 | fq->fq_deficit -= plen; |
249 | if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) { |
250 | pkt.pktsched_pkt_kpkt->pkt_pflags |= PKT_F_NEW_FLOW; |
251 | fq->fq_flags &= ~FQF_FRESH_FLOW; |
252 | } |
253 | |
254 | if (head->cp_kpkt == NULL) { |
255 | *head = pkt.pktsched_pkt; |
256 | } else { |
257 | ASSERT(tail->cp_kpkt != NULL); |
258 | ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL); |
259 | tail->cp_kpkt->pkt_nextpkt = pkt.pktsched_pkt_kpkt; |
260 | } |
261 | *tail = pkt.pktsched_pkt; |
262 | tail->cp_kpkt->pkt_nextpkt = NULL; |
263 | fq_cl->fcl_stat.fcl_dequeue++; |
264 | fq_cl->fcl_stat.fcl_dequeue_bytes += plen; |
265 | *pkt_cnt += 1; |
266 | *byte_cnt += plen; |
267 | |
268 | ifclassq_set_packet_metadata(ifq, ifp, p: &pkt.pktsched_pkt); |
269 | |
270 | /* Check if the limit is reached */ |
271 | if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) { |
272 | limit_reached = TRUE; |
273 | } |
274 | } |
275 | KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash, |
276 | AQM_KTRACE_FQ_GRP_SC_IDX(fq), |
277 | fq->fq_bytes, fq->fq_min_qdelay); |
278 | |
279 | *qempty = KPKTQ_EMPTY(&fq->fq_kpktq); |
280 | return limit_reached; |
281 | } |
282 | #endif /* SKYWALK */ |
283 | |
284 | static boolean_t |
285 | fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq, |
286 | int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head, |
287 | classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt, |
288 | boolean_t *qempty, uint64_t now) |
289 | { |
290 | u_int32_t plen; |
291 | pktsched_pkt_t pkt; |
292 | boolean_t limit_reached = FALSE; |
293 | struct ifclassq *ifq = fqs->fqs_ifq; |
294 | struct ifnet *ifp = ifq->ifcq_ifp; |
295 | |
296 | while (fq->fq_deficit > 0 && limit_reached == FALSE && |
297 | !MBUFQ_EMPTY(&fq->fq_mbufq) && fq_tx_time_ready(fqs, fq, now, NULL)) { |
298 | _PKTSCHED_PKT_INIT(&pkt); |
299 | fq_getq_flow(fqs, fq, &pkt, now); |
300 | ASSERT(pkt.pktsched_ptype == QP_MBUF); |
301 | |
302 | plen = pktsched_get_pkt_len(pkt: &pkt); |
303 | fq->fq_deficit -= plen; |
304 | |
305 | if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) { |
306 | pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= PKTF_NEW_FLOW; |
307 | fq->fq_flags &= ~FQF_FRESH_FLOW; |
308 | } |
309 | |
310 | if (head->cp_mbuf == NULL) { |
311 | *head = pkt.pktsched_pkt; |
312 | } else { |
313 | ASSERT(tail->cp_mbuf != NULL); |
314 | ASSERT(tail->cp_mbuf->m_nextpkt == NULL); |
315 | tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf; |
316 | } |
317 | *tail = pkt.pktsched_pkt; |
318 | tail->cp_mbuf->m_nextpkt = NULL; |
319 | fq_cl->fcl_stat.fcl_dequeue++; |
320 | fq_cl->fcl_stat.fcl_dequeue_bytes += plen; |
321 | *pkt_cnt += 1; |
322 | *byte_cnt += plen; |
323 | |
324 | ifclassq_set_packet_metadata(ifq, ifp, p: &pkt.pktsched_pkt); |
325 | |
326 | /* Check if the limit is reached */ |
327 | if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) { |
328 | limit_reached = TRUE; |
329 | } |
330 | } |
331 | KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash, |
332 | AQM_KTRACE_FQ_GRP_SC_IDX(fq), |
333 | fq->fq_bytes, fq->fq_min_qdelay); |
334 | |
335 | *qempty = MBUFQ_EMPTY(&fq->fq_mbufq); |
336 | return limit_reached; |
337 | } |
338 | |
339 | static void |
340 | fq_if_pacemaker_tcall(thread_call_param_t arg0, thread_call_param_t arg1) |
341 | { |
342 | #pragma unused(arg1) |
343 | struct ifnet* ifp = (struct ifnet*)arg0; |
344 | ASSERT(ifp != NULL); |
345 | |
346 | ifnet_start_ignore_delay(interface: ifp); |
347 | } |
348 | |
349 | fq_if_t * |
350 | fq_if_alloc(struct ifclassq *ifq, classq_pkt_type_t ptype) |
351 | { |
352 | fq_if_t *fqs; |
353 | |
354 | ASSERT(ifq->ifcq_ifp != NULL); |
355 | fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO); |
356 | fqs->fqs_ifq = ifq; |
357 | fqs->fqs_ptype = ptype; |
358 | |
359 | /* Configure packet drop limit across all queues */ |
360 | fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(ifq); |
361 | STAILQ_INIT(&fqs->fqs_fclist); |
362 | TAILQ_INIT(&fqs->fqs_empty_list); |
363 | TAILQ_INIT(&fqs->fqs_combined_grp_list); |
364 | fqs->fqs_pacemaker_tcall = thread_call_allocate_with_options(func: fq_if_pacemaker_tcall, |
365 | param0: (thread_call_param_t)(ifq->ifcq_ifp), pri: THREAD_CALL_PRIORITY_KERNEL, |
366 | options: THREAD_CALL_OPTIONS_ONCE); |
367 | ASSERT(fqs->fqs_pacemaker_tcall != NULL); |
368 | |
369 | return fqs; |
370 | } |
371 | |
372 | void |
373 | fq_if_destroy(fq_if_t *fqs) |
374 | { |
375 | struct ifnet *ifp = fqs->fqs_ifq->ifcq_ifp; |
376 | thread_call_t tcall = fqs->fqs_pacemaker_tcall; |
377 | |
378 | VERIFY(ifp != NULL); |
379 | ASSERT(tcall != NULL); |
380 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); |
381 | LCK_MTX_ASSERT(&ifp->if_start_lock, LCK_MTX_ASSERT_NOTOWNED); |
382 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
383 | |
384 | /* |
385 | * Since we are holding the IFCQ lock here, another thread cannot enter AQM |
386 | * and schedule a pacemaker call. So we do not need a sleep wait loop here |
387 | * cancel wait and free should succeed in one call. |
388 | */ |
389 | thread_call_cancel_wait(call: tcall); |
390 | ASSERT(thread_call_free(tcall)); |
391 | |
392 | fq_if_purge(fqs); |
393 | fq_if_destroy_grps(fqs); |
394 | |
395 | fqs->fqs_ifq = NULL; |
396 | zfree(fq_if_zone, fqs); |
397 | } |
398 | |
399 | static inline uint8_t |
400 | fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc) |
401 | { |
402 | uint8_t pri; |
403 | |
404 | if (fqs->fqs_flags & FQS_DRIVER_MANAGED) { |
405 | switch (svc) { |
406 | case MBUF_SC_BK_SYS: |
407 | case MBUF_SC_BK: |
408 | pri = FQ_IF_BK_INDEX; |
409 | break; |
410 | case MBUF_SC_BE: |
411 | case MBUF_SC_RD: |
412 | case MBUF_SC_OAM: |
413 | pri = FQ_IF_BE_INDEX; |
414 | break; |
415 | case MBUF_SC_AV: |
416 | case MBUF_SC_RV: |
417 | case MBUF_SC_VI: |
418 | case MBUF_SC_SIG: |
419 | pri = FQ_IF_VI_INDEX; |
420 | break; |
421 | case MBUF_SC_VO: |
422 | case MBUF_SC_CTL: |
423 | pri = FQ_IF_VO_INDEX; |
424 | break; |
425 | default: |
426 | pri = FQ_IF_BE_INDEX; /* Use best effort by default */ |
427 | break; |
428 | } |
429 | return pri; |
430 | } |
431 | |
432 | /* scheduler is not managed by the driver */ |
433 | switch (svc) { |
434 | case MBUF_SC_BK_SYS: |
435 | pri = FQ_IF_BK_SYS_INDEX; |
436 | break; |
437 | case MBUF_SC_BK: |
438 | pri = FQ_IF_BK_INDEX; |
439 | break; |
440 | case MBUF_SC_BE: |
441 | pri = FQ_IF_BE_INDEX; |
442 | break; |
443 | case MBUF_SC_RD: |
444 | pri = FQ_IF_RD_INDEX; |
445 | break; |
446 | case MBUF_SC_OAM: |
447 | pri = FQ_IF_OAM_INDEX; |
448 | break; |
449 | case MBUF_SC_AV: |
450 | pri = FQ_IF_AV_INDEX; |
451 | break; |
452 | case MBUF_SC_RV: |
453 | pri = FQ_IF_RV_INDEX; |
454 | break; |
455 | case MBUF_SC_VI: |
456 | pri = FQ_IF_VI_INDEX; |
457 | break; |
458 | case MBUF_SC_SIG: |
459 | pri = FQ_IF_SIG_INDEX; |
460 | break; |
461 | case MBUF_SC_VO: |
462 | pri = FQ_IF_VO_INDEX; |
463 | break; |
464 | case MBUF_SC_CTL: |
465 | pri = FQ_IF_CTL_INDEX; |
466 | break; |
467 | default: |
468 | pri = FQ_IF_BE_INDEX; /* Use best effort by default */ |
469 | break; |
470 | } |
471 | return pri; |
472 | } |
473 | |
474 | void |
475 | fq_if_classq_init(fq_if_group_t *fqg, uint32_t pri, uint32_t quantum, |
476 | uint32_t drr_max, uint32_t svc_class) |
477 | { |
478 | fq_if_classq_t *fq_cl; |
479 | VERIFY(pri < FQ_IF_MAX_CLASSES); |
480 | fq_cl = &fqg->fqg_classq[pri]; |
481 | |
482 | VERIFY(fq_cl->fcl_quantum == 0); |
483 | VERIFY(quantum != 0); |
484 | fq_cl->fcl_quantum = quantum; |
485 | fq_cl->fcl_pri = pri; |
486 | fq_cl->fcl_drr_max = drr_max; |
487 | fq_cl->fcl_service_class = svc_class; |
488 | fq_cl->fcl_next_tx_time = 0; |
489 | fq_cl->fcl_flags = 0; |
490 | STAILQ_INIT(&fq_cl->fcl_new_flows); |
491 | STAILQ_INIT(&fq_cl->fcl_old_flows); |
492 | } |
493 | |
494 | int |
495 | fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head, |
496 | classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop) |
497 | { |
498 | uint8_t pri, grp_idx = 0; |
499 | fq_if_t *fqs; |
500 | fq_if_classq_t *fq_cl; |
501 | fq_if_group_t *fq_group; |
502 | int ret; |
503 | mbuf_svc_class_t svc; |
504 | pktsched_pkt_t pkt; |
505 | |
506 | pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes); |
507 | |
508 | fqs = (fq_if_t *)ifq->ifcq_disc; |
509 | svc = pktsched_get_pkt_svc(&pkt); |
510 | #if SKYWALK |
511 | if (head->cp_ptype == QP_PACKET) { |
512 | grp_idx = head->cp_kpkt->pkt_qset_idx; |
513 | } |
514 | #endif /* SKYWALK */ |
515 | pri = fq_if_service_to_priority(fqs, svc); |
516 | VERIFY(pri < FQ_IF_MAX_CLASSES); |
517 | |
518 | IFCQ_LOCK_SPIN(ifq); |
519 | fq_group = fq_if_find_grp(fqs, grp_idx); |
520 | fq_cl = &fq_group->fqg_classq[pri]; |
521 | |
522 | if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) { |
523 | IFCQ_UNLOCK(ifq); |
524 | /* BK_SYS is currently throttled */ |
525 | os_atomic_inc(&fq_cl->fcl_stat.fcl_throttle_drops, relaxed); |
526 | pktsched_free_pkt(&pkt); |
527 | *pdrop = TRUE; |
528 | ret = EQSUSPENDED; |
529 | goto done; |
530 | } |
531 | |
532 | ASSERT(pkt.pktsched_ptype == fqs->fqs_ptype); |
533 | ret = fq_addq(fqs, fq_group, &pkt, fq_cl); |
534 | if (!FQ_IF_CLASSQ_IDLE(fq_cl)) { |
535 | if (((fq_group->fqg_bitmaps[FQ_IF_ER] | fq_group->fqg_bitmaps[FQ_IF_EB]) & |
536 | (1 << pri)) == 0) { |
537 | /* |
538 | * this group is not in ER or EB groups, |
539 | * mark it as IB |
540 | */ |
541 | pktsched_bit_set(ix: pri, pData: &fq_group->fqg_bitmaps[FQ_IF_IB]); |
542 | } |
543 | } |
544 | |
545 | if (__improbable(ret != 0)) { |
546 | if (ret == CLASSQEQ_SUCCESS_FC) { |
547 | /* packet enqueued, return advisory feedback */ |
548 | ret = EQFULL; |
549 | *pdrop = FALSE; |
550 | } else if (ret == CLASSQEQ_COMPRESSED) { |
551 | ret = 0; |
552 | *pdrop = FALSE; |
553 | } else { |
554 | IFCQ_UNLOCK(ifq); |
555 | *pdrop = TRUE; |
556 | pktsched_free_pkt(&pkt); |
557 | switch (ret) { |
558 | case CLASSQEQ_DROP: |
559 | ret = ENOBUFS; |
560 | goto done; |
561 | case CLASSQEQ_DROP_FC: |
562 | ret = EQFULL; |
563 | goto done; |
564 | case CLASSQEQ_DROP_SP: |
565 | ret = EQSUSPENDED; |
566 | goto done; |
567 | default: |
568 | VERIFY(0); |
569 | /* NOTREACHED */ |
570 | __builtin_unreachable(); |
571 | } |
572 | /* NOTREACHED */ |
573 | __builtin_unreachable(); |
574 | } |
575 | } else { |
576 | *pdrop = FALSE; |
577 | } |
578 | IFCQ_ADD_LEN(ifq, cnt); |
579 | IFCQ_INC_BYTES(ifq, bytes); |
580 | |
581 | |
582 | FQS_GRP_ADD_LEN(fqs, grp_idx, cnt); |
583 | FQS_GRP_INC_BYTES(fqs, grp_idx, bytes); |
584 | |
585 | IFCQ_UNLOCK(ifq); |
586 | done: |
587 | #if DEBUG || DEVELOPMENT |
588 | if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) { |
589 | ret = 0; |
590 | } |
591 | #endif /* DEBUG || DEVELOPMENT */ |
592 | return ret; |
593 | } |
594 | |
595 | void |
596 | fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt, uint8_t grp_idx) |
597 | { |
598 | (void) fq_if_dequeue_classq_multi(ifq, maxpktcnt: 1, |
599 | CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, first_packet: pkt, NULL, NULL, NULL, grp_idx); |
600 | } |
601 | |
602 | void |
603 | fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc, |
604 | classq_pkt_t *pkt, uint8_t grp_idx) |
605 | { |
606 | (void) fq_if_dequeue_sc_classq_multi(ifq, svc, maxpktcnt: 1, |
607 | CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, first_packet: pkt, NULL, NULL, NULL, grp_idx); |
608 | } |
609 | |
610 | static inline void |
611 | fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq) |
612 | { |
613 | ASSERT(fq->fq_dq_head.cp_mbuf == NULL); |
614 | ASSERT(!fq->fq_in_dqlist); |
615 | STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink); |
616 | fq->fq_in_dqlist = true; |
617 | } |
618 | |
619 | static inline void |
620 | fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head, |
621 | classq_pkt_t *tail, classq_pkt_type_t ptype) |
622 | { |
623 | ASSERT(fq->fq_in_dqlist); |
624 | if (fq->fq_dq_head.cp_mbuf == NULL) { |
625 | goto done; |
626 | } |
627 | |
628 | if (head->cp_mbuf == NULL) { |
629 | *head = fq->fq_dq_head; |
630 | } else { |
631 | ASSERT(tail->cp_mbuf != NULL); |
632 | |
633 | switch (ptype) { |
634 | case QP_MBUF: |
635 | ASSERT(tail->cp_mbuf->m_nextpkt == NULL); |
636 | tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf; |
637 | ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL); |
638 | break; |
639 | #if SKYWALK |
640 | case QP_PACKET: |
641 | ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL); |
642 | tail->cp_kpkt->pkt_nextpkt = fq->fq_dq_head.cp_kpkt; |
643 | ASSERT(fq->fq_dq_tail.cp_kpkt->pkt_nextpkt == NULL); |
644 | break; |
645 | #endif /* SKYWALK */ |
646 | default: |
647 | VERIFY(0); |
648 | /* NOTREACHED */ |
649 | __builtin_unreachable(); |
650 | } |
651 | } |
652 | *tail = fq->fq_dq_tail; |
653 | done: |
654 | STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink); |
655 | CLASSQ_PKT_INIT(&fq->fq_dq_head); |
656 | CLASSQ_PKT_INIT(&fq->fq_dq_tail); |
657 | fq->fq_in_dqlist = false; |
658 | } |
659 | |
660 | static inline void |
661 | fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head, |
662 | classq_pkt_t *tail, classq_pkt_type_t ptype) |
663 | { |
664 | fq_t *fq, *tfq; |
665 | |
666 | STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) { |
667 | fq_dqlist_remove(fq_dqlist_head, fq, head, tail, ptype); |
668 | } |
669 | } |
670 | |
671 | static int |
672 | fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state, |
673 | fq_if_group_t **selected_grp) |
674 | { |
675 | #pragma unused(pri) |
676 | |
677 | fq_if_group_t *grp; |
678 | uint32_t highest_pri = FQ_IF_MAX_CLASSES; |
679 | int ret_pri = 0; |
680 | |
681 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
682 | uint32_t cur_pri = pktsched_ffs(pData: grp->fqg_bitmaps[state]); |
683 | /* bitmap is empty in this case */ |
684 | if (cur_pri == 0) { |
685 | continue; |
686 | } |
687 | if (cur_pri <= highest_pri) { |
688 | highest_pri = cur_pri; |
689 | ret_pri = cur_pri; |
690 | *selected_grp = grp; |
691 | } |
692 | } |
693 | return ret_pri; |
694 | } |
695 | |
696 | static boolean_t |
697 | fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state) |
698 | { |
699 | #pragma unused(pri) |
700 | |
701 | fq_if_group_t *grp; |
702 | |
703 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
704 | if (grp->fqg_bitmaps[state] != 0) { |
705 | return FALSE; |
706 | } |
707 | } |
708 | return TRUE; |
709 | } |
710 | |
711 | static void |
712 | fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state, |
713 | fq_if_state src_state) |
714 | { |
715 | #pragma unused(pri) |
716 | |
717 | fq_if_group_t *grp; |
718 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
719 | grp->fqg_bitmaps[dst_state] = grp->fqg_bitmaps[src_state]; |
720 | } |
721 | } |
722 | |
723 | static void |
724 | fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state) |
725 | { |
726 | #pragma unused(pri) |
727 | |
728 | fq_if_group_t *grp; |
729 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
730 | grp->fqg_bitmaps[state] = 0; |
731 | } |
732 | } |
733 | |
734 | static void |
735 | fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state, |
736 | fq_if_state src_state) |
737 | { |
738 | #pragma unused(pri) |
739 | |
740 | fq_if_group_t *grp; |
741 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
742 | grp->fqg_bitmaps[dst_state] = |
743 | grp->fqg_bitmaps[dst_state] | grp->fqg_bitmaps[src_state]; |
744 | grp->fqg_bitmaps[src_state] = 0; |
745 | } |
746 | } |
747 | |
748 | static int |
749 | fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state, |
750 | fq_if_group_t **selected_grp) |
751 | { |
752 | fq_if_group_t *grp; |
753 | int ret_pri = 0; |
754 | |
755 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
756 | if (pktsched_bit_tst(ix: pri, pData: &grp->fqg_bitmaps[state])) { |
757 | /* +1 to match the semantics of pktsched_ffs */ |
758 | ret_pri = pri + 1; |
759 | *selected_grp = grp; |
760 | break; |
761 | } |
762 | } |
763 | |
764 | return ret_pri; |
765 | } |
766 | |
767 | static boolean_t |
768 | fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state) |
769 | { |
770 | fq_if_group_t *grp; |
771 | |
772 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
773 | if (pktsched_bit_tst(ix: pri, pData: &grp->fqg_bitmaps[state])) { |
774 | return FALSE; |
775 | } |
776 | } |
777 | return TRUE; |
778 | } |
779 | |
780 | static void |
781 | fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state, |
782 | fq_if_state src_state) |
783 | { |
784 | fq_if_group_t *grp; |
785 | |
786 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
787 | pktsched_bit_cpy(ix: pri, pData_dst: &grp->fqg_bitmaps[dst_state], |
788 | pData_src: &grp->fqg_bitmaps[src_state]); |
789 | } |
790 | } |
791 | |
792 | static void |
793 | fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state) |
794 | { |
795 | fq_if_group_t *grp; |
796 | |
797 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
798 | pktsched_bit_clr(ix: pri, pData: &grp->fqg_bitmaps[state]); |
799 | } |
800 | } |
801 | |
802 | static void |
803 | fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state, |
804 | fq_if_state src_state) |
805 | { |
806 | fq_if_group_t *grp; |
807 | |
808 | TAILQ_FOREACH(grp, grp_list, fqg_grp_link) { |
809 | pktsched_bit_move(ix: pri, pData_dst: &grp->fqg_bitmaps[dst_state], |
810 | pData_src: &grp->fqg_bitmaps[src_state]); |
811 | pktsched_bit_clr(ix: pri, pData: &grp->fqg_bitmaps[src_state]); |
812 | } |
813 | } |
814 | |
815 | /* |
816 | * Pacemaker is only scheduled when no packet can be dequeued from AQM |
817 | * due to pacing. Pacemaker will doorbell the driver when current >= next_tx_time. |
818 | * This only applies to L4S traffic at this moment. |
819 | */ |
820 | static void |
821 | fq_if_schedule_pacemaker(fq_if_t *fqs, uint64_t now, uint64_t next_tx_time) |
822 | { |
823 | uint64_t deadline = 0; |
824 | if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) { |
825 | return; |
826 | } |
827 | ASSERT(next_tx_time != FQ_INVALID_TX_TS); |
828 | ASSERT(fqs->fqs_pacemaker_tcall != NULL); |
829 | ASSERT(now < next_tx_time); |
830 | |
831 | DTRACE_SKYWALK2(pacemaker__schedule, struct ifnet*, fqs->fqs_ifq->ifcq_ifp, |
832 | uint64_t, next_tx_time - now); |
833 | KDBG(AQM_KTRACE_TX_PACEMAKER, fqs->fqs_ifq->ifcq_ifp->if_index, now, |
834 | next_tx_time, next_tx_time - now); |
835 | |
836 | clock_interval_to_deadline(interval: (uint32_t)(next_tx_time - now), scale_factor: 1, result: &deadline); |
837 | thread_call_enter_delayed(call: fqs->fqs_pacemaker_tcall, deadline); |
838 | } |
839 | |
840 | static int |
841 | fq_if_dequeue_classq_multi_common(struct ifclassq *ifq, mbuf_svc_class_t svc, |
842 | u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet, |
843 | classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt, |
844 | uint8_t grp_idx) |
845 | { |
846 | uint32_t total_pktcnt = 0, total_bytecnt = 0; |
847 | classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt); |
848 | classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last); |
849 | classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp); |
850 | fq_if_append_pkt_t append_pkt; |
851 | flowq_dqlist_t fq_dqlist_head; |
852 | fq_if_classq_t *fq_cl; |
853 | fq_grp_tailq_t *grp_list, tmp_grp_list; |
854 | fq_if_group_t *fq_grp = NULL; |
855 | fq_if_t *fqs; |
856 | uint64_t now, next_tx_time = FQ_INVALID_TX_TS; |
857 | int pri = 0, svc_pri = 0; |
858 | bool all_paced = true; |
859 | |
860 | IFCQ_LOCK_ASSERT_HELD(ifq); |
861 | |
862 | fqs = (fq_if_t *)ifq->ifcq_disc; |
863 | STAILQ_INIT(&fq_dqlist_head); |
864 | |
865 | switch (fqs->fqs_ptype) { |
866 | case QP_MBUF: |
867 | append_pkt = fq_if_append_mbuf; |
868 | break; |
869 | |
870 | #if SKYWALK |
871 | case QP_PACKET: |
872 | append_pkt = fq_if_append_pkt; |
873 | break; |
874 | #endif /* SKYWALK */ |
875 | |
876 | default: |
877 | VERIFY(0); |
878 | /* NOTREACHED */ |
879 | __builtin_unreachable(); |
880 | } |
881 | |
882 | now = fq_codel_get_time(); |
883 | if (fqs->fqs_flags & FQS_DRIVER_MANAGED) { |
884 | svc_pri = fq_if_service_to_priority(fqs, svc); |
885 | } else { |
886 | VERIFY(svc == MBUF_SC_UNSPEC); |
887 | } |
888 | |
889 | if (fq_if_is_grp_combined(fqs, grp_idx)) { |
890 | grp_list = &fqs->fqs_combined_grp_list; |
891 | VERIFY(!TAILQ_EMPTY(grp_list)); |
892 | } else { |
893 | grp_list = &tmp_grp_list; |
894 | fq_grp = fq_if_find_grp(fqs, grp_idx); |
895 | TAILQ_INIT(grp_list); |
896 | TAILQ_INSERT_TAIL(grp_list, fq_grp, fqg_grp_link); |
897 | } |
898 | |
899 | for (;;) { |
900 | uint32_t pktcnt = 0, bytecnt = 0; |
901 | classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head); |
902 | classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail); |
903 | bool fq_cl_all_paced = false; |
904 | uint64_t fq_cl_next_tx_time = FQ_INVALID_TX_TS; |
905 | |
906 | if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_ER) && |
907 | fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) { |
908 | fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_EB, FQ_IF_IB); |
909 | fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IB); |
910 | if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) { |
911 | if (ifclassq_enable_pacing && ifclassq_enable_l4s) { |
912 | /* |
913 | * Move fq_cl in IR back to ER, so that they will inspected with priority |
914 | * the next time the driver dequeues |
915 | */ |
916 | fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR); |
917 | fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IR); |
918 | } |
919 | break; |
920 | } |
921 | } |
922 | pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_ER, &fq_grp); |
923 | if (pri == 0) { |
924 | /* |
925 | * There are no ER flows, move the highest |
926 | * priority one from EB if there are any in that |
927 | * category |
928 | */ |
929 | pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_EB, &fq_grp); |
930 | VERIFY(pri > 0); |
931 | VERIFY(fq_grp != NULL); |
932 | pktsched_bit_clr(ix: (pri - 1), pData: &fq_grp->fqg_bitmaps[FQ_IF_EB]); |
933 | pktsched_bit_set(ix: (pri - 1), pData: &fq_grp->fqg_bitmaps[FQ_IF_ER]); |
934 | } |
935 | VERIFY(fq_grp != NULL); |
936 | pri--; /* index starts at 0 */ |
937 | fq_cl = &fq_grp->fqg_classq[pri]; |
938 | |
939 | if (fq_cl->fcl_budget <= 0) { |
940 | /* Update the budget */ |
941 | fq_cl->fcl_budget += (min(a: fq_cl->fcl_drr_max, |
942 | b: fq_cl->fcl_stat.fcl_flows_cnt) * |
943 | fq_cl->fcl_quantum); |
944 | if (fq_cl->fcl_budget <= 0) { |
945 | goto state_change; |
946 | } |
947 | } |
948 | fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt), |
949 | (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt, |
950 | &bytecnt, &fq_dqlist_head, true, now, &fq_cl_all_paced, |
951 | &fq_cl_next_tx_time); |
952 | if (head.cp_mbuf != NULL) { |
953 | ASSERT(STAILQ_EMPTY(&fq_dqlist_head)); |
954 | if (first.cp_mbuf == NULL) { |
955 | first = head; |
956 | } else { |
957 | ASSERT(last.cp_mbuf != NULL); |
958 | append_pkt(&last, &head); |
959 | } |
960 | last = tail; |
961 | append_pkt(&last, &tmp); |
962 | } |
963 | if (fq_cl_all_paced && fq_cl_next_tx_time < next_tx_time) { |
964 | fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++; |
965 | next_tx_time = fq_cl_next_tx_time; |
966 | } |
967 | fq_cl->fcl_budget -= bytecnt; |
968 | total_pktcnt += pktcnt; |
969 | total_bytecnt += bytecnt; |
970 | |
971 | /* |
972 | * If the class has exceeded the budget but still has data |
973 | * to send, move it to IB |
974 | */ |
975 | state_change: |
976 | VERIFY(fq_grp != NULL); |
977 | all_paced &= fq_cl_all_paced; |
978 | if (!FQ_IF_CLASSQ_IDLE(fq_cl)) { |
979 | if (fq_cl->fcl_budget <= 0) { |
980 | pktsched_bit_set(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_IB]); |
981 | pktsched_bit_clr(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_ER]); |
982 | } else if (fq_cl_all_paced) { |
983 | if (ifclassq_enable_pacing && ifclassq_enable_l4s) { |
984 | /* |
985 | * If a fq_cl still has budget but only paced queues, park it |
986 | * to IR so that we will not keep loopping over it |
987 | */ |
988 | pktsched_bit_set(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_IR]); |
989 | pktsched_bit_clr(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_ER]); |
990 | } |
991 | } |
992 | } else { |
993 | pktsched_bit_clr(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_ER]); |
994 | VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] | |
995 | fq_grp->fqg_bitmaps[FQ_IF_EB] | |
996 | fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0); |
997 | fq_cl->fcl_budget = 0; |
998 | } |
999 | if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) { |
1000 | if (ifclassq_enable_pacing && ifclassq_enable_l4s) { |
1001 | /* |
1002 | * Move fq_cl in IR back to ER, so that they will inspected with priority |
1003 | * the next time the driver dequeues |
1004 | */ |
1005 | fqs->grp_bitmaps_move(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR); |
1006 | } |
1007 | break; |
1008 | } |
1009 | } |
1010 | |
1011 | if (!fq_if_is_grp_combined(fqs, grp_idx)) { |
1012 | TAILQ_REMOVE(grp_list, fq_grp, fqg_grp_link); |
1013 | VERIFY(TAILQ_EMPTY(grp_list)); |
1014 | } |
1015 | |
1016 | fq_dqlist_get_packet_list(fq_dqlist_head: &fq_dqlist_head, head: &first, tail: &last, |
1017 | ptype: fqs->fqs_ptype); |
1018 | |
1019 | if (__probable(first_packet != NULL)) { |
1020 | *first_packet = first; |
1021 | } |
1022 | if (last_packet != NULL) { |
1023 | *last_packet = last; |
1024 | } |
1025 | if (retpktcnt != NULL) { |
1026 | *retpktcnt = total_pktcnt; |
1027 | } |
1028 | if (retbytecnt != NULL) { |
1029 | *retbytecnt = total_bytecnt; |
1030 | } |
1031 | if (next_tx_time != FQ_INVALID_TX_TS) { |
1032 | ASSERT(next_tx_time > now); |
1033 | fq_if_schedule_pacemaker(fqs, now, next_tx_time); |
1034 | } |
1035 | |
1036 | IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt); |
1037 | fq_if_purge_empty_flow_list(fqs, now, false); |
1038 | return 0; |
1039 | } |
1040 | |
1041 | int |
1042 | fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt, |
1043 | u_int32_t maxbytecnt, classq_pkt_t *first_packet, |
1044 | classq_pkt_t *last_packet, u_int32_t *retpktcnt, |
1045 | u_int32_t *retbytecnt, uint8_t grp_idx) |
1046 | { |
1047 | return fq_if_dequeue_classq_multi_common(ifq, svc: MBUF_SC_UNSPEC, maxpktcnt, maxbytecnt, |
1048 | first_packet, last_packet, retpktcnt, retbytecnt, grp_idx); |
1049 | } |
1050 | |
1051 | int |
1052 | fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc, |
1053 | u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet, |
1054 | classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt, |
1055 | uint8_t grp_idx) |
1056 | { |
1057 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
1058 | |
1059 | if (fq_if_is_grp_combined(fqs, grp_idx)) { |
1060 | return fq_if_dequeue_classq_multi_common(ifq, svc, maxpktcnt, maxbytecnt, |
1061 | first_packet, last_packet, retpktcnt, retbytecnt, grp_idx); |
1062 | } else { |
1063 | /* |
1064 | * take a shortcut here since there is no need to schedule |
1065 | * one single service class. |
1066 | */ |
1067 | return fq_if_dequeue_sc_classq_multi_separate(ifq, svc, maxpktcnt, maxbytecnt, |
1068 | first_packet, last_packet, retpktcnt, retbytecnt, grp_idx); |
1069 | } |
1070 | } |
1071 | |
1072 | static int |
1073 | fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq, mbuf_svc_class_t svc, |
1074 | u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet, |
1075 | classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt, |
1076 | uint8_t grp_idx) |
1077 | { |
1078 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
1079 | uint8_t pri; |
1080 | u_int32_t total_pktcnt = 0, total_bytecnt = 0; |
1081 | fq_if_classq_t *fq_cl; |
1082 | classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt); |
1083 | classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last); |
1084 | fq_if_append_pkt_t append_pkt; |
1085 | flowq_dqlist_t fq_dqlist_head; |
1086 | fq_if_group_t *fq_grp; |
1087 | uint64_t now; |
1088 | |
1089 | switch (fqs->fqs_ptype) { |
1090 | case QP_MBUF: |
1091 | append_pkt = fq_if_append_mbuf; |
1092 | break; |
1093 | |
1094 | #if SKYWALK |
1095 | case QP_PACKET: |
1096 | append_pkt = fq_if_append_pkt; |
1097 | break; |
1098 | #endif /* SKYWALK */ |
1099 | |
1100 | default: |
1101 | VERIFY(0); |
1102 | /* NOTREACHED */ |
1103 | __builtin_unreachable(); |
1104 | } |
1105 | |
1106 | STAILQ_INIT(&fq_dqlist_head); |
1107 | now = fq_codel_get_time(); |
1108 | |
1109 | pri = fq_if_service_to_priority(fqs, svc); |
1110 | fq_grp = fq_if_find_grp(fqs, grp_idx); |
1111 | fq_cl = &fq_grp->fqg_classq[pri]; |
1112 | |
1113 | /* |
1114 | * Now we have the queue for a particular service class. We need |
1115 | * to dequeue as many packets as needed, first from the new flows |
1116 | * and then from the old flows. |
1117 | */ |
1118 | while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt && |
1119 | fq_cl->fcl_stat.fcl_pkt_cnt > 0) { |
1120 | classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head); |
1121 | classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail); |
1122 | u_int32_t pktcnt = 0, bytecnt = 0; |
1123 | bool all_paced = false; |
1124 | uint64_t next_tx_time = FQ_INVALID_TX_TS; |
1125 | |
1126 | fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt), |
1127 | (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt, |
1128 | &bytecnt, &fq_dqlist_head, false, now, &all_paced, &next_tx_time); |
1129 | if (head.cp_mbuf != NULL) { |
1130 | if (first.cp_mbuf == NULL) { |
1131 | first = head; |
1132 | } else { |
1133 | ASSERT(last.cp_mbuf != NULL); |
1134 | append_pkt(&last, &head); |
1135 | } |
1136 | last = tail; |
1137 | } |
1138 | total_pktcnt += pktcnt; |
1139 | total_bytecnt += bytecnt; |
1140 | |
1141 | if (next_tx_time != FQ_INVALID_TX_TS) { |
1142 | ASSERT(next_tx_time > now); |
1143 | fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++; |
1144 | fq_if_schedule_pacemaker(fqs, now, next_tx_time); |
1145 | break; |
1146 | } |
1147 | } |
1148 | |
1149 | /* |
1150 | * Mark classq as IB if it's not idle, so that we can |
1151 | * start without re-init the bitmaps when it's switched |
1152 | * to combined mode. |
1153 | */ |
1154 | if (!FQ_IF_CLASSQ_IDLE(fq_cl)) { |
1155 | pktsched_bit_set(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_IB]); |
1156 | pktsched_bit_clr(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_ER]); |
1157 | pktsched_bit_clr(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_EB]); |
1158 | } else { |
1159 | pktsched_bit_clr(ix: pri, pData: &fq_grp->fqg_bitmaps[FQ_IF_IB]); |
1160 | VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] | |
1161 | fq_grp->fqg_bitmaps[FQ_IF_EB] | |
1162 | fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0); |
1163 | } |
1164 | |
1165 | fq_dqlist_get_packet_list(fq_dqlist_head: &fq_dqlist_head, head: &first, tail: &last, ptype: fqs->fqs_ptype); |
1166 | |
1167 | if (__probable(first_packet != NULL)) { |
1168 | *first_packet = first; |
1169 | } |
1170 | if (last_packet != NULL) { |
1171 | *last_packet = last; |
1172 | } |
1173 | if (retpktcnt != NULL) { |
1174 | *retpktcnt = total_pktcnt; |
1175 | } |
1176 | if (retbytecnt != NULL) { |
1177 | *retbytecnt = total_bytecnt; |
1178 | } |
1179 | |
1180 | IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt); |
1181 | fq_if_purge_empty_flow_list(fqs, now, false); |
1182 | return 0; |
1183 | } |
1184 | |
1185 | static void |
1186 | fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, uint32_t *pktsp, |
1187 | uint32_t *bytesp, uint64_t now) |
1188 | { |
1189 | fq_if_classq_t *fq_cl; |
1190 | u_int32_t pkts, bytes; |
1191 | pktsched_pkt_t pkt; |
1192 | fq_if_group_t *grp; |
1193 | |
1194 | fq_cl = &FQ_CLASSQ(fq); |
1195 | grp = FQ_GROUP(fq); |
1196 | pkts = bytes = 0; |
1197 | _PKTSCHED_PKT_INIT(&pkt); |
1198 | for (;;) { |
1199 | fq_getq_flow(fqs, fq, &pkt, now); |
1200 | if (pkt.pktsched_pkt_mbuf == NULL) { |
1201 | VERIFY(pkt.pktsched_ptype == QP_INVALID); |
1202 | break; |
1203 | } |
1204 | pkts++; |
1205 | bytes += pktsched_get_pkt_len(pkt: &pkt); |
1206 | pktsched_free_pkt(&pkt); |
1207 | } |
1208 | KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash, |
1209 | AQM_KTRACE_FQ_GRP_SC_IDX(fq), fq->fq_bytes, fq->fq_min_qdelay); |
1210 | |
1211 | IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes); |
1212 | |
1213 | /* move through the flow queue states */ |
1214 | VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_EMPTY_FLOW))); |
1215 | if (fq->fq_flags & FQF_NEW_FLOW) { |
1216 | fq_if_empty_new_flow(fq, fq_cl); |
1217 | } |
1218 | if (fq->fq_flags & FQF_OLD_FLOW) { |
1219 | fq_if_empty_old_flow(fqs, fq_cl, fq, now); |
1220 | } |
1221 | if (fq->fq_flags & FQF_EMPTY_FLOW) { |
1222 | fq_if_purge_empty_flow(fqs, fq); |
1223 | fq = NULL; |
1224 | } |
1225 | |
1226 | if (FQ_IF_CLASSQ_IDLE(fq_cl)) { |
1227 | int i; |
1228 | for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) { |
1229 | pktsched_bit_clr(ix: fq_cl->fcl_pri, pData: &grp->fqg_bitmaps[i]); |
1230 | } |
1231 | } |
1232 | |
1233 | if (pktsp != NULL) { |
1234 | *pktsp = pkts; |
1235 | } |
1236 | if (bytesp != NULL) { |
1237 | *bytesp = bytes; |
1238 | } |
1239 | } |
1240 | |
1241 | static void |
1242 | fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl) |
1243 | { |
1244 | fq_t *fq, *tfq; |
1245 | uint64_t now; |
1246 | |
1247 | now = fq_codel_get_time(); |
1248 | /* |
1249 | * Take each flow from new/old flow list and flush mbufs |
1250 | * in that flow |
1251 | */ |
1252 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) { |
1253 | fq_if_purge_flow(fqs, fq, NULL, NULL, now); |
1254 | } |
1255 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) { |
1256 | fq_if_purge_flow(fqs, fq, NULL, NULL, now); |
1257 | } |
1258 | VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows)); |
1259 | VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows)); |
1260 | |
1261 | STAILQ_INIT(&fq_cl->fcl_new_flows); |
1262 | STAILQ_INIT(&fq_cl->fcl_old_flows); |
1263 | fq_cl->fcl_budget = 0; |
1264 | } |
1265 | |
1266 | static void |
1267 | fq_if_purge(fq_if_t *fqs) |
1268 | { |
1269 | uint64_t now; |
1270 | fq_if_group_t *grp; |
1271 | int i; |
1272 | |
1273 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
1274 | for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) { |
1275 | if (fqs->fqs_classq_groups[grp_idx] == NULL) { |
1276 | continue; |
1277 | } |
1278 | |
1279 | grp = fq_if_find_grp(fqs, grp_idx); |
1280 | fq_if_purge_grp(fqs, grp); |
1281 | } |
1282 | |
1283 | now = fq_codel_get_time(); |
1284 | fq_if_purge_empty_flow_list(fqs, now, true); |
1285 | |
1286 | VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist)); |
1287 | VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list)); |
1288 | |
1289 | fqs->fqs_large_flow = NULL; |
1290 | for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) { |
1291 | VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i])); |
1292 | } |
1293 | |
1294 | IFCQ_LEN(fqs->fqs_ifq) = 0; |
1295 | IFCQ_BYTES(fqs->fqs_ifq) = 0; |
1296 | } |
1297 | |
1298 | static void |
1299 | fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req) |
1300 | { |
1301 | fq_t *fq; |
1302 | uint64_t now; |
1303 | fq_if_group_t *grp; |
1304 | |
1305 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); |
1306 | req->packets = req->bytes = 0; |
1307 | VERIFY(req->flow != 0); |
1308 | |
1309 | now = fq_codel_get_time(); |
1310 | |
1311 | for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) { |
1312 | if (fqs->fqs_classq_groups[grp_idx] == NULL) { |
1313 | continue; |
1314 | } |
1315 | uint32_t bytes = 0, pkts = 0; |
1316 | |
1317 | grp = fq_if_find_grp(fqs, grp_idx); |
1318 | /* |
1319 | * Packet and traffic type are needed only if we want |
1320 | * to create a flow queue. |
1321 | */ |
1322 | fq = fq_if_hash_pkt(fqs, grp, req->flow, req->sc, 0, false, FQ_TFC_C); |
1323 | if (fq != NULL) { |
1324 | fq_if_purge_flow(fqs, fq, pktsp: &pkts, bytesp: &bytes, now); |
1325 | req->bytes += bytes; |
1326 | req->packets += pkts; |
1327 | } |
1328 | } |
1329 | } |
1330 | |
1331 | static uint16_t |
1332 | fq_if_calc_quantum(struct ifnet *ifp) |
1333 | { |
1334 | uint16_t quantum; |
1335 | |
1336 | switch (ifp->if_family) { |
1337 | case IFNET_FAMILY_ETHERNET: |
1338 | VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX); |
1339 | quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN; |
1340 | break; |
1341 | |
1342 | case IFNET_FAMILY_CELLULAR: |
1343 | case IFNET_FAMILY_IPSEC: |
1344 | case IFNET_FAMILY_UTUN: |
1345 | VERIFY(ifp->if_mtu <= UINT16_MAX); |
1346 | quantum = (uint16_t)ifp->if_mtu; |
1347 | break; |
1348 | |
1349 | default: |
1350 | quantum = FQ_CODEL_DEFAULT_QUANTUM; |
1351 | break; |
1352 | } |
1353 | |
1354 | if ((ifp->if_hwassist & IFNET_TSOF) != 0) { |
1355 | VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX); |
1356 | VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX); |
1357 | quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu); |
1358 | quantum = (quantum != 0) ? quantum : IF_MAXMTU; |
1359 | } |
1360 | |
1361 | quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum); |
1362 | #if DEBUG || DEVELOPMENT |
1363 | quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum; |
1364 | #endif /* DEBUG || DEVELOPMENT */ |
1365 | VERIFY(quantum != 0); |
1366 | return quantum; |
1367 | } |
1368 | |
1369 | static void |
1370 | fq_if_mtu_update(fq_if_t *fqs) |
1371 | { |
1372 | #define _FQ_CLASSQ_UPDATE_QUANTUM(_grp, _s, _q) \ |
1373 | (_grp)->fqg_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \ |
1374 | FQ_CODEL_QUANTUM_ ## _s(_q) \ |
1375 | |
1376 | uint32_t quantum; |
1377 | fq_if_group_t *grp; |
1378 | |
1379 | quantum = fq_if_calc_quantum(ifp: fqs->fqs_ifq->ifcq_ifp); |
1380 | |
1381 | for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) { |
1382 | if (fqs->fqs_classq_groups[grp_idx] == NULL) { |
1383 | continue; |
1384 | } |
1385 | |
1386 | grp = fq_if_find_grp(fqs, grp_idx); |
1387 | |
1388 | if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) { |
1389 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum); |
1390 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum); |
1391 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum); |
1392 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum); |
1393 | } else { |
1394 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK_SYS, quantum); |
1395 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum); |
1396 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum); |
1397 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, RD, quantum); |
1398 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, OAM, quantum); |
1399 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, AV, quantum); |
1400 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, RV, quantum); |
1401 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum); |
1402 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum); |
1403 | _FQ_CLASSQ_UPDATE_QUANTUM(grp, CTL, quantum); |
1404 | } |
1405 | } |
1406 | #undef _FQ_CLASSQ_UPDATE_QUANTUM |
1407 | } |
1408 | |
1409 | static void |
1410 | fq_if_event(fq_if_t *fqs, cqev_t ev) |
1411 | { |
1412 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); |
1413 | |
1414 | switch (ev) { |
1415 | case CLASSQ_EV_LINK_UP: |
1416 | case CLASSQ_EV_LINK_DOWN: |
1417 | fq_if_purge(fqs); |
1418 | break; |
1419 | case CLASSQ_EV_LINK_MTU: |
1420 | fq_if_mtu_update(fqs); |
1421 | break; |
1422 | default: |
1423 | break; |
1424 | } |
1425 | } |
1426 | |
1427 | static void |
1428 | fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl) |
1429 | { |
1430 | fq_if_purge_classq(fqs, fq_cl); |
1431 | fqs->fqs_throttle = 1; |
1432 | fq_cl->fcl_stat.fcl_throttle_on++; |
1433 | KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_START, |
1434 | fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0); |
1435 | } |
1436 | |
1437 | static void |
1438 | fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl) |
1439 | { |
1440 | VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl)); |
1441 | fqs->fqs_throttle = 0; |
1442 | fq_cl->fcl_stat.fcl_throttle_off++; |
1443 | KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_END, |
1444 | fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0); |
1445 | } |
1446 | |
1447 | |
1448 | static int |
1449 | fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr) |
1450 | { |
1451 | struct ifclassq *ifq = fqs->fqs_ifq; |
1452 | uint8_t index; |
1453 | fq_if_group_t *grp; |
1454 | |
1455 | #if !MACH_ASSERT |
1456 | #pragma unused(ifq) |
1457 | #endif |
1458 | IFCQ_LOCK_ASSERT_HELD(ifq); |
1459 | |
1460 | if (!tr->set) { |
1461 | tr->level = fqs->fqs_throttle; |
1462 | return 0; |
1463 | } |
1464 | |
1465 | if (tr->level == fqs->fqs_throttle) { |
1466 | return EALREADY; |
1467 | } |
1468 | |
1469 | /* Throttling is allowed on BK_SYS class only */ |
1470 | index = fq_if_service_to_priority(fqs, svc: MBUF_SC_BK_SYS); |
1471 | |
1472 | for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) { |
1473 | if (fqs->fqs_classq_groups[grp_idx] == NULL) { |
1474 | continue; |
1475 | } |
1476 | grp = fq_if_find_grp(fqs, grp_idx); |
1477 | switch (tr->level) { |
1478 | case IFNET_THROTTLE_OFF: |
1479 | fq_if_classq_resume(fqs, fq_cl: &grp->fqg_classq[index]); |
1480 | break; |
1481 | case IFNET_THROTTLE_OPPORTUNISTIC: |
1482 | fq_if_classq_suspend(fqs, fq_cl: &grp->fqg_classq[index]); |
1483 | break; |
1484 | default: |
1485 | break; |
1486 | } |
1487 | } |
1488 | return 0; |
1489 | } |
1490 | |
1491 | static inline boolean_t |
1492 | fq_if_is_fq_cl_paced(fq_if_classq_t *fq_cl, uint64_t now) |
1493 | { |
1494 | if ((fq_cl->fcl_flags & FCL_PACED) != 0 && fq_cl->fcl_next_tx_time > now) { |
1495 | return true; |
1496 | } |
1497 | |
1498 | fq_cl->fcl_flags &= ~FCL_PACED; |
1499 | fq_cl->fcl_next_tx_time = 0; |
1500 | return false; |
1501 | } |
1502 | |
1503 | static void |
1504 | fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp, cqrq_stat_sc_t *stat, uint64_t now) |
1505 | { |
1506 | uint8_t pri; |
1507 | fq_if_classq_t *fq_cl; |
1508 | |
1509 | ASSERT(stat != NULL); |
1510 | pri = fq_if_service_to_priority(fqs, svc: stat->sc); |
1511 | |
1512 | fq_cl = &grp->fqg_classq[pri]; |
1513 | stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt; |
1514 | stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt; |
1515 | |
1516 | if (ifclassq_enable_pacing && ifclassq_enable_l4s && |
1517 | fq_if_is_fq_cl_paced(fq_cl, now)) { |
1518 | stat->packets = 0; |
1519 | stat->bytes = 0; |
1520 | } |
1521 | } |
1522 | |
1523 | static boolean_t |
1524 | fq_if_is_grp_all_paced(fq_if_group_t *grp) |
1525 | { |
1526 | fq_if_classq_t *fq_cl; |
1527 | uint64_t now; |
1528 | |
1529 | if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) { |
1530 | return false; |
1531 | } |
1532 | |
1533 | now = fq_codel_get_time(); |
1534 | for (uint8_t fq_cl_idx = 0; fq_cl_idx < FQ_IF_MAX_CLASSES; fq_cl_idx++) { |
1535 | fq_cl = &grp->fqg_classq[fq_cl_idx]; |
1536 | if (fq_cl == NULL || FQ_IF_CLASSQ_IDLE(fq_cl)) { |
1537 | continue; |
1538 | } |
1539 | if (!fq_if_is_fq_cl_paced(fq_cl, now)) { |
1540 | return false; |
1541 | } |
1542 | } |
1543 | |
1544 | return true; |
1545 | } |
1546 | |
1547 | boolean_t |
1548 | fq_if_is_all_paced(struct ifclassq *ifq) |
1549 | { |
1550 | fq_if_group_t *grp; |
1551 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
1552 | |
1553 | IFCQ_LOCK_ASSERT_HELD(ifq); |
1554 | |
1555 | if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) { |
1556 | return false; |
1557 | } |
1558 | |
1559 | for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) { |
1560 | grp = fqs->fqs_classq_groups[grp_idx]; |
1561 | if (grp == NULL || FQG_BYTES(grp) == 0) { |
1562 | continue; |
1563 | } |
1564 | |
1565 | if (!fq_if_is_grp_all_paced(grp)) { |
1566 | return false; |
1567 | } |
1568 | } |
1569 | |
1570 | return true; |
1571 | } |
1572 | |
1573 | void |
1574 | fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat) |
1575 | { |
1576 | cqrq_stat_sc_t grp_sc_stat; |
1577 | fq_if_group_t *grp; |
1578 | uint64_t now = fq_codel_get_time(); |
1579 | |
1580 | if (stat == NULL) { |
1581 | return; |
1582 | } |
1583 | grp_sc_stat.sc = stat->sc; |
1584 | stat->packets = 0; |
1585 | stat->bytes = 0; |
1586 | |
1587 | if (stat->grp_idx == IF_CLASSQ_ALL_GRPS) { |
1588 | if (stat->sc == MBUF_SC_UNSPEC) { |
1589 | if (!fq_if_is_all_paced(ifq: fqs->fqs_ifq)) { |
1590 | stat->packets = IFCQ_LEN(fqs->fqs_ifq); |
1591 | stat->bytes = IFCQ_BYTES(fqs->fqs_ifq); |
1592 | } |
1593 | } else { |
1594 | for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) { |
1595 | grp = fqs->fqs_classq_groups[grp_idx]; |
1596 | if (grp == NULL) { |
1597 | continue; |
1598 | } |
1599 | |
1600 | fq_if_grp_stat_sc(fqs, grp, stat: &grp_sc_stat, now); |
1601 | stat->packets += grp_sc_stat.packets; |
1602 | stat->bytes += grp_sc_stat.bytes; |
1603 | } |
1604 | } |
1605 | return; |
1606 | } |
1607 | |
1608 | if (stat->sc == MBUF_SC_UNSPEC) { |
1609 | if (fq_if_is_grp_combined(fqs, grp_idx: stat->grp_idx)) { |
1610 | TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) { |
1611 | if (fq_if_is_grp_all_paced(grp)) { |
1612 | continue; |
1613 | } |
1614 | stat->packets += FQG_LEN(grp); |
1615 | stat->bytes += FQG_BYTES(grp); |
1616 | } |
1617 | } else { |
1618 | grp = fq_if_find_grp(fqs, grp_idx: stat->grp_idx); |
1619 | if (!fq_if_is_grp_all_paced(grp)) { |
1620 | stat->packets = FQG_LEN(grp); |
1621 | stat->bytes = FQG_BYTES(grp); |
1622 | } |
1623 | } |
1624 | } else { |
1625 | if (fq_if_is_grp_combined(fqs, grp_idx: stat->grp_idx)) { |
1626 | TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) { |
1627 | if (fq_if_is_grp_all_paced(grp)) { |
1628 | continue; |
1629 | } |
1630 | fq_if_grp_stat_sc(fqs, grp, stat: &grp_sc_stat, now); |
1631 | stat->packets += grp_sc_stat.packets; |
1632 | stat->bytes += grp_sc_stat.bytes; |
1633 | } |
1634 | } else { |
1635 | grp = fq_if_find_grp(fqs, grp_idx: stat->grp_idx); |
1636 | fq_if_grp_stat_sc(fqs, grp, stat, now); |
1637 | } |
1638 | } |
1639 | } |
1640 | |
1641 | int |
1642 | fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg) |
1643 | { |
1644 | int err = 0; |
1645 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
1646 | |
1647 | IFCQ_LOCK_ASSERT_HELD(ifq); |
1648 | |
1649 | /* |
1650 | * These are usually slow operations, convert the lock ahead of time |
1651 | */ |
1652 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
1653 | switch (rq) { |
1654 | case CLASSQRQ_PURGE: |
1655 | fq_if_purge(fqs); |
1656 | break; |
1657 | case CLASSQRQ_PURGE_SC: |
1658 | fq_if_purge_sc(fqs, req: (cqrq_purge_sc_t *)arg); |
1659 | break; |
1660 | case CLASSQRQ_EVENT: |
1661 | fq_if_event(fqs, ev: (cqev_t)arg); |
1662 | break; |
1663 | case CLASSQRQ_THROTTLE: |
1664 | fq_if_throttle(fqs, tr: (cqrq_throttle_t *)arg); |
1665 | break; |
1666 | case CLASSQRQ_STAT_SC: |
1667 | fq_if_stat_sc(fqs, stat: (cqrq_stat_sc_t *)arg); |
1668 | break; |
1669 | } |
1670 | return err; |
1671 | } |
1672 | |
1673 | int |
1674 | fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags, |
1675 | classq_pkt_type_t ptype) |
1676 | { |
1677 | fq_if_t *fqs = NULL; |
1678 | int err = 0; |
1679 | |
1680 | IFCQ_LOCK_ASSERT_HELD(ifq); |
1681 | VERIFY(ifq->ifcq_disc == NULL); |
1682 | VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE); |
1683 | |
1684 | fqs = fq_if_alloc(ifq, ptype); |
1685 | if (fqs == NULL) { |
1686 | return ENOMEM; |
1687 | } |
1688 | if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) { |
1689 | fqs->fqs_flags |= FQS_DRIVER_MANAGED; |
1690 | fqs->fqs_bm_ops = &fq_if_grps_sc_bitmap_ops; |
1691 | } else { |
1692 | fqs->fqs_bm_ops = &fq_if_grps_bitmap_ops; |
1693 | } |
1694 | |
1695 | err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs); |
1696 | if (err != 0) { |
1697 | os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, " |
1698 | "failed to attach fq_if: %d\n" , __func__, err); |
1699 | fq_if_destroy(fqs); |
1700 | return err; |
1701 | } |
1702 | |
1703 | /* |
1704 | * Always create one group. If qset 0 is added later, |
1705 | * this group will be updated. |
1706 | */ |
1707 | err = fq_if_create_grp(ifcq: ifq, qset_idx: 0, IF_CLASSQ_DEF); |
1708 | if (err != 0) { |
1709 | os_log_error(OS_LOG_DEFAULT, "%s: error from fq_if_create_grp, " |
1710 | "failed to create a fq group: %d\n" , __func__, err); |
1711 | fq_if_destroy(fqs); |
1712 | } |
1713 | |
1714 | return err; |
1715 | } |
1716 | |
1717 | fq_t * |
1718 | fq_if_hash_pkt(fq_if_t *fqs, fq_if_group_t *fq_grp, u_int32_t flowid, |
1719 | mbuf_svc_class_t svc_class, u_int64_t now, bool create, |
1720 | fq_tfc_type_t tfc_type) |
1721 | { |
1722 | fq_t *fq = NULL; |
1723 | flowq_list_t *fq_list; |
1724 | fq_if_classq_t *fq_cl; |
1725 | u_int8_t fqs_hash_id; |
1726 | u_int8_t scidx; |
1727 | |
1728 | scidx = fq_if_service_to_priority(fqs, svc: svc_class); |
1729 | |
1730 | fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid); |
1731 | |
1732 | fq_list = &fqs->fqs_flows[fqs_hash_id]; |
1733 | |
1734 | SLIST_FOREACH(fq, fq_list, fq_hashlink) { |
1735 | if (fq->fq_flowhash == flowid && |
1736 | fq->fq_sc_index == scidx && |
1737 | fq->fq_tfc_type == tfc_type && |
1738 | fq->fq_group == fq_grp) { |
1739 | break; |
1740 | } |
1741 | } |
1742 | if (fq == NULL && create) { |
1743 | /* If the flow is not already on the list, allocate it */ |
1744 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
1745 | fq = fq_alloc(fqs->fqs_ptype); |
1746 | if (fq != NULL) { |
1747 | fq->fq_flowhash = flowid; |
1748 | fq->fq_sc_index = scidx; |
1749 | fq->fq_group = fq_grp; |
1750 | fq->fq_tfc_type = tfc_type; |
1751 | fq_cl = &FQ_CLASSQ(fq); |
1752 | fq->fq_flags = (FQF_FLOWCTL_CAPABLE | FQF_FRESH_FLOW); |
1753 | fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq); |
1754 | fq->fq_next_tx_time = FQ_INVALID_TX_TS; |
1755 | SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink); |
1756 | fq_cl->fcl_stat.fcl_flows_cnt++; |
1757 | } |
1758 | KDBG(AQM_KTRACE_STATS_FLOW_ALLOC, |
1759 | fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash, |
1760 | AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0); |
1761 | } else if ((fq != NULL) && (fq->fq_flags & FQF_EMPTY_FLOW)) { |
1762 | fq_if_reuse_empty_flow(fqs, fq, now); |
1763 | } |
1764 | |
1765 | /* |
1766 | * If getq time is not set because this is the first packet or after |
1767 | * idle time, set it now so that we can detect a stall. |
1768 | */ |
1769 | if (fq != NULL && fq->fq_getqtime == 0) { |
1770 | fq->fq_getqtime = now; |
1771 | } |
1772 | |
1773 | return fq; |
1774 | } |
1775 | |
1776 | void |
1777 | fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq) |
1778 | { |
1779 | u_int8_t hash_id; |
1780 | |
1781 | ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) == 0); |
1782 | hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash); |
1783 | SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq, |
1784 | fq_hashlink); |
1785 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
1786 | if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) { |
1787 | fq_if_flow_feedback(fqs, fq, fq_cl); |
1788 | } |
1789 | KDBG(AQM_KTRACE_STATS_FLOW_DESTROY, |
1790 | fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash, |
1791 | AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0); |
1792 | fq_destroy(fq, fqs->fqs_ptype); |
1793 | } |
1794 | |
1795 | inline boolean_t |
1796 | fq_if_at_drop_limit(fq_if_t *fqs) |
1797 | { |
1798 | return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ? |
1799 | TRUE : FALSE; |
1800 | } |
1801 | |
1802 | inline boolean_t |
1803 | fq_if_almost_at_drop_limit(fq_if_t *fqs) |
1804 | { |
1805 | /* |
1806 | * Whether we are above 90% of the queue limit. This is used to tell if we |
1807 | * can stop flow controlling the largest flow. |
1808 | */ |
1809 | return IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit * 9 / 10; |
1810 | } |
1811 | |
1812 | static inline void |
1813 | fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now) |
1814 | { |
1815 | ASSERT(fq->fq_flags & FQF_EMPTY_FLOW); |
1816 | TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link); |
1817 | STAILQ_NEXT(fq, fq_actlink) = NULL; |
1818 | fq->fq_flags &= ~FQF_FLOW_STATE_MASK; |
1819 | fq->fq_empty_purge_time = 0; |
1820 | fq->fq_getqtime = 0; |
1821 | fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq); |
1822 | fqs->fqs_empty_list_cnt--; |
1823 | fq_if_classq_t *fq_cl = &FQ_CLASSQ(fq); |
1824 | fq_cl->fcl_stat.fcl_flows_cnt++; |
1825 | } |
1826 | |
1827 | inline void |
1828 | fq_if_move_to_empty_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq, |
1829 | uint64_t now) |
1830 | { |
1831 | ASSERT(fq->fq_flags & ~(FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_FLOWCTL_ON)); |
1832 | fq->fq_empty_purge_time = now + fq_empty_purge_delay; |
1833 | TAILQ_INSERT_TAIL(&fqs->fqs_empty_list, fq, fq_empty_link); |
1834 | fq->fq_flags |= FQF_EMPTY_FLOW; |
1835 | FQ_CLEAR_OVERWHELMING(fq); |
1836 | fqs->fqs_empty_list_cnt++; |
1837 | /* |
1838 | * fcl_flows_cnt is used in budget determination for the class. |
1839 | * empty flow shouldn't contribute to the budget. |
1840 | */ |
1841 | fq_cl->fcl_stat.fcl_flows_cnt--; |
1842 | } |
1843 | |
1844 | static void |
1845 | fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq) |
1846 | { |
1847 | fq_if_classq_t *fq_cl; |
1848 | fq_cl = &FQ_CLASSQ(fq); |
1849 | |
1850 | ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) != 0); |
1851 | TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link); |
1852 | fq->fq_flags &= ~FQF_EMPTY_FLOW; |
1853 | fqs->fqs_empty_list_cnt--; |
1854 | /* Remove from the hash list and free the flow queue */ |
1855 | fq_if_destroy_flow(fqs, fq_cl, fq); |
1856 | } |
1857 | |
1858 | static void |
1859 | fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now, bool purge_all) |
1860 | { |
1861 | fq_t *fq, *tmp; |
1862 | int i = 0; |
1863 | |
1864 | if (fqs->fqs_empty_list_cnt == 0) { |
1865 | ASSERT(TAILQ_EMPTY(&fqs->fqs_empty_list)); |
1866 | return; |
1867 | } |
1868 | |
1869 | TAILQ_FOREACH_SAFE(fq, &fqs->fqs_empty_list, fq_empty_link, tmp) { |
1870 | if (!purge_all && ((now < fq->fq_empty_purge_time) || |
1871 | (i++ == FQ_EMPTY_PURGE_MAX))) { |
1872 | break; |
1873 | } |
1874 | fq_if_purge_empty_flow(fqs, fq); |
1875 | } |
1876 | |
1877 | if (__improbable(purge_all)) { |
1878 | VERIFY(fqs->fqs_empty_list_cnt == 0); |
1879 | VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list)); |
1880 | } |
1881 | } |
1882 | |
1883 | static void |
1884 | fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq, |
1885 | uint64_t now) |
1886 | { |
1887 | /* |
1888 | * Remove the flow queue from the old flows list. |
1889 | */ |
1890 | STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq, fq_actlink); |
1891 | fq->fq_flags &= ~FQF_OLD_FLOW; |
1892 | fq_cl->fcl_stat.fcl_oldflows_cnt--; |
1893 | VERIFY(fq->fq_bytes == 0); |
1894 | |
1895 | /* release any flow control */ |
1896 | if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) { |
1897 | fq_if_flow_feedback(fqs, fq, fq_cl); |
1898 | } |
1899 | |
1900 | /* move the flow queue to empty flows list */ |
1901 | fq_if_move_to_empty_flow(fqs, fq_cl, fq, now); |
1902 | } |
1903 | |
1904 | static void |
1905 | fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl) |
1906 | { |
1907 | /* Move to the end of old queue list */ |
1908 | STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq, |
1909 | flowq, fq_actlink); |
1910 | fq->fq_flags &= ~FQF_NEW_FLOW; |
1911 | fq_cl->fcl_stat.fcl_newflows_cnt--; |
1912 | |
1913 | STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq, fq_actlink); |
1914 | fq->fq_flags |= FQF_OLD_FLOW; |
1915 | fq_cl->fcl_stat.fcl_oldflows_cnt++; |
1916 | } |
1917 | |
1918 | inline void |
1919 | fq_if_drop_packet(fq_if_t *fqs, uint64_t now) |
1920 | { |
1921 | fq_t *fq = fqs->fqs_large_flow; |
1922 | fq_if_classq_t *fq_cl; |
1923 | pktsched_pkt_t pkt; |
1924 | volatile uint32_t *pkt_flags; |
1925 | uint64_t *pkt_timestamp; |
1926 | |
1927 | if (fq == NULL) { |
1928 | return; |
1929 | } |
1930 | /* queue can not be empty on the largest flow */ |
1931 | VERIFY(!fq_empty(fq, fqs->fqs_ptype)); |
1932 | |
1933 | fq_cl = &FQ_CLASSQ(fq); |
1934 | _PKTSCHED_PKT_INIT(&pkt); |
1935 | fq_getq_flow_internal(fqs, fq, &pkt); |
1936 | ASSERT(pkt.pktsched_ptype != QP_INVALID); |
1937 | |
1938 | pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL, |
1939 | NULL, NULL, NULL); |
1940 | |
1941 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
1942 | *pkt_timestamp = 0; |
1943 | switch (pkt.pktsched_ptype) { |
1944 | case QP_MBUF: |
1945 | *pkt_flags &= ~PKTF_PRIV_GUARDED; |
1946 | break; |
1947 | #if SKYWALK |
1948 | case QP_PACKET: |
1949 | /* sanity check */ |
1950 | ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0); |
1951 | break; |
1952 | #endif /* SKYWALK */ |
1953 | default: |
1954 | VERIFY(0); |
1955 | /* NOTREACHED */ |
1956 | __builtin_unreachable(); |
1957 | } |
1958 | |
1959 | if (fq_empty(fq, fqs->fqs_ptype)) { |
1960 | fqs->fqs_large_flow = NULL; |
1961 | if (fq->fq_flags & FQF_OLD_FLOW) { |
1962 | fq_if_empty_old_flow(fqs, fq_cl, fq, now); |
1963 | } else { |
1964 | VERIFY(fq->fq_flags & FQF_NEW_FLOW); |
1965 | fq_if_empty_new_flow(fq, fq_cl); |
1966 | } |
1967 | } |
1968 | IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt)); |
1969 | |
1970 | pktsched_free_pkt(&pkt); |
1971 | fq_cl->fcl_stat.fcl_drop_overflow++; |
1972 | } |
1973 | |
1974 | inline void |
1975 | fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq) |
1976 | { |
1977 | fq_t *prev_fq; |
1978 | |
1979 | if (fqs->fqs_large_flow != NULL && |
1980 | fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) { |
1981 | fqs->fqs_large_flow = NULL; |
1982 | } |
1983 | |
1984 | if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) { |
1985 | return; |
1986 | } |
1987 | |
1988 | prev_fq = fqs->fqs_large_flow; |
1989 | if (prev_fq == NULL) { |
1990 | if (!fq_empty(fq, fqs->fqs_ptype)) { |
1991 | fqs->fqs_large_flow = fq; |
1992 | } |
1993 | return; |
1994 | } else if (fq->fq_bytes > prev_fq->fq_bytes) { |
1995 | fqs->fqs_large_flow = fq; |
1996 | } |
1997 | } |
1998 | |
1999 | boolean_t |
2000 | fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc, |
2001 | fq_t *fq, fq_if_classq_t *fq_cl) |
2002 | { |
2003 | struct flowadv_fcentry *fce; |
2004 | |
2005 | #if DEBUG || DEVELOPMENT |
2006 | if (__improbable(ifclassq_flow_control_adv == 0)) { |
2007 | os_log(OS_LOG_DEFAULT, "%s: skipped flow control" , __func__); |
2008 | return TRUE; |
2009 | } |
2010 | #endif /* DEBUG || DEVELOPMENT */ |
2011 | |
2012 | STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) { |
2013 | if ((uint8_t)fce->fce_flowsrc_type == flowsrc && |
2014 | fce->fce_flowid == fq->fq_flowhash) { |
2015 | /* Already on flowcontrol list */ |
2016 | return TRUE; |
2017 | } |
2018 | } |
2019 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
2020 | fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK); |
2021 | if (fce != NULL) { |
2022 | /* XXX Add number of bytes in the queue */ |
2023 | STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link); |
2024 | fq_cl->fcl_stat.fcl_flow_control++; |
2025 | os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, " |
2026 | "flow: 0x%x, iface: %s, B:%u\n" , __func__, |
2027 | fq_cl->fcl_stat.fcl_flow_control, |
2028 | fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash, |
2029 | if_name(fqs->fqs_ifq->ifcq_ifp), fq->fq_bytes); |
2030 | KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_START, |
2031 | fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq), |
2032 | fq->fq_bytes, fq->fq_min_qdelay); |
2033 | } |
2034 | return (fce != NULL) ? TRUE : FALSE; |
2035 | } |
2036 | |
2037 | static void |
2038 | fq_if_remove_fcentry(fq_if_t *fqs, struct flowadv_fcentry *fce) |
2039 | { |
2040 | STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, fce_link); |
2041 | STAILQ_NEXT(fce, fce_link) = NULL; |
2042 | flowadv_add_entry(fce); |
2043 | } |
2044 | |
2045 | void |
2046 | fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl) |
2047 | { |
2048 | struct flowadv_fcentry *fce = NULL; |
2049 | |
2050 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
2051 | STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) { |
2052 | if (fce->fce_flowid == fq->fq_flowhash) { |
2053 | break; |
2054 | } |
2055 | } |
2056 | if (fce != NULL) { |
2057 | fq_cl->fcl_stat.fcl_flow_feedback++; |
2058 | fce->fce_event_type = FCE_EVENT_TYPE_FLOW_CONTROL_FEEDBACK; |
2059 | os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, " |
2060 | "flow: 0x%x, iface: %s grp: %hhu, B:%u\n" , __func__, |
2061 | fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index, |
2062 | fce->fce_flowsrc_type, fce->fce_flowid, |
2063 | if_name(fqs->fqs_ifq->ifcq_ifp), FQ_GROUP(fq)->fqg_index, |
2064 | fq->fq_bytes); |
2065 | fq_if_remove_fcentry(fqs, fce); |
2066 | KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_END, |
2067 | fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq), |
2068 | fq->fq_bytes, fq->fq_min_qdelay); |
2069 | } |
2070 | fq->fq_flags &= ~FQF_FLOWCTL_ON; |
2071 | } |
2072 | |
2073 | boolean_t |
2074 | fq_if_report_ce(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t ce_cnt, |
2075 | uint32_t pkt_cnt) |
2076 | { |
2077 | struct flowadv_fcentry *fce; |
2078 | |
2079 | #if DEBUG || DEVELOPMENT |
2080 | if (__improbable(ifclassq_flow_control_adv == 0)) { |
2081 | os_log(OS_LOG_DEFAULT, "%s: skipped flow control" , __func__); |
2082 | return TRUE; |
2083 | } |
2084 | #endif /* DEBUG || DEVELOPMENT */ |
2085 | |
2086 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
2087 | fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK); |
2088 | if (fce != NULL) { |
2089 | fce->fce_event_type = FCE_EVENT_TYPE_CONGESTION_EXPERIENCED; |
2090 | fce->fce_ce_cnt = ce_cnt; |
2091 | fce->fce_pkts_since_last_report = pkt_cnt; |
2092 | |
2093 | flowadv_add_entry(fce); |
2094 | } |
2095 | return (fce != NULL) ? TRUE : FALSE; |
2096 | } |
2097 | |
2098 | |
2099 | void |
2100 | fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit, |
2101 | int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom, |
2102 | uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist, |
2103 | bool budget_restricted, uint64_t now, bool *fq_cl_paced, |
2104 | uint64_t *next_tx_time) |
2105 | { |
2106 | fq_t *fq = NULL, *tfq = NULL; |
2107 | flowq_stailq_t temp_stailq; |
2108 | uint32_t pktcnt, bytecnt; |
2109 | boolean_t qempty, limit_reached = FALSE; |
2110 | bool all_paced = true; |
2111 | classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last); |
2112 | fq_getq_flow_t fq_getq_flow_fn; |
2113 | classq_pkt_t *head, *tail; |
2114 | uint64_t fq_cl_tx_time = FQ_INVALID_TX_TS; |
2115 | |
2116 | switch (fqs->fqs_ptype) { |
2117 | case QP_MBUF: |
2118 | fq_getq_flow_fn = fq_getq_flow_mbuf; |
2119 | break; |
2120 | |
2121 | #if SKYWALK |
2122 | case QP_PACKET: |
2123 | fq_getq_flow_fn = fq_getq_flow_kpkt; |
2124 | break; |
2125 | #endif /* SKYWALK */ |
2126 | |
2127 | default: |
2128 | VERIFY(0); |
2129 | /* NOTREACHED */ |
2130 | __builtin_unreachable(); |
2131 | } |
2132 | |
2133 | /* |
2134 | * maximum byte limit should not be greater than the budget for |
2135 | * this class |
2136 | */ |
2137 | if (bytelimit > fq_cl->fcl_budget && budget_restricted) { |
2138 | bytelimit = fq_cl->fcl_budget; |
2139 | } |
2140 | |
2141 | VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL); |
2142 | pktcnt = bytecnt = 0; |
2143 | STAILQ_INIT(&temp_stailq); |
2144 | |
2145 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) { |
2146 | ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) == |
2147 | FQF_NEW_FLOW); |
2148 | uint64_t fq_tx_time; |
2149 | if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) { |
2150 | ASSERT(fq_tx_time != FQ_INVALID_TX_TS); |
2151 | if (fq_tx_time < fq_cl_tx_time) { |
2152 | fq_cl_tx_time = fq_tx_time; |
2153 | } |
2154 | continue; |
2155 | } |
2156 | all_paced = false; |
2157 | |
2158 | if (fq_dqlist != NULL) { |
2159 | if (!fq->fq_in_dqlist) { |
2160 | fq_dqlist_add(fq_dqlist_head: fq_dqlist, fq); |
2161 | } |
2162 | head = &fq->fq_dq_head; |
2163 | tail = &fq->fq_dq_tail; |
2164 | } else { |
2165 | ASSERT(!fq->fq_in_dqlist); |
2166 | head = top; |
2167 | tail = &last; |
2168 | } |
2169 | |
2170 | limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit, |
2171 | pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now); |
2172 | |
2173 | /* |
2174 | * From RFC 8290: |
2175 | * if that queue has a negative number of credits (i.e., it has already |
2176 | * dequeued at least a quantum of bytes), it is given an additional |
2177 | * quantum of credits, the queue is put onto _the end of_ the list of |
2178 | * old queues, and the routine selects the next queue and starts again. |
2179 | */ |
2180 | if (fq->fq_deficit <= 0 || qempty) { |
2181 | fq->fq_deficit += fq_cl->fcl_quantum; |
2182 | fq_if_empty_new_flow(fq, fq_cl); |
2183 | } |
2184 | //TODO: add credit when it's now paced? so that the fq is trated the same as empty |
2185 | |
2186 | if (!fq_tx_time_ready(fqs, fq, now, ready_time: &fq_tx_time)) { |
2187 | ASSERT(fq_tx_time != FQ_INVALID_TX_TS); |
2188 | if (fq_tx_time < fq_cl_tx_time) { |
2189 | fq_cl_tx_time = fq_tx_time; |
2190 | } |
2191 | } |
2192 | |
2193 | if (limit_reached) { |
2194 | goto done; |
2195 | } |
2196 | } |
2197 | |
2198 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) { |
2199 | VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) == |
2200 | FQF_OLD_FLOW); |
2201 | bool destroy = true; |
2202 | uint64_t fq_tx_time; |
2203 | |
2204 | if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) { |
2205 | ASSERT(fq_tx_time != FQ_INVALID_TX_TS); |
2206 | if (fq_tx_time < fq_cl_tx_time) { |
2207 | fq_cl_tx_time = fq_tx_time; |
2208 | } |
2209 | continue; |
2210 | } |
2211 | all_paced = false; |
2212 | |
2213 | if (fq_dqlist != NULL) { |
2214 | if (!fq->fq_in_dqlist) { |
2215 | fq_dqlist_add(fq_dqlist_head: fq_dqlist, fq); |
2216 | } |
2217 | head = &fq->fq_dq_head; |
2218 | tail = &fq->fq_dq_tail; |
2219 | destroy = false; |
2220 | } else { |
2221 | ASSERT(!fq->fq_in_dqlist); |
2222 | head = top; |
2223 | tail = &last; |
2224 | } |
2225 | |
2226 | limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit, |
2227 | pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now); |
2228 | |
2229 | if (!fq_tx_time_ready(fqs, fq, now, ready_time: &fq_tx_time)) { |
2230 | ASSERT(fq_tx_time != FQ_INVALID_TX_TS); |
2231 | if (fq_tx_time < fq_cl_tx_time) { |
2232 | fq_cl_tx_time = fq_tx_time; |
2233 | } |
2234 | } |
2235 | |
2236 | if (qempty) { |
2237 | fq_if_empty_old_flow(fqs, fq_cl, fq, now); |
2238 | } else if (fq->fq_deficit <= 0) { |
2239 | STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, |
2240 | flowq, fq_actlink); |
2241 | /* |
2242 | * Move to the end of the old queues list. We do not |
2243 | * need to update the flow count since this flow |
2244 | * will be added to the tail again |
2245 | */ |
2246 | STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink); |
2247 | fq->fq_deficit += fq_cl->fcl_quantum; |
2248 | } |
2249 | if (limit_reached) { |
2250 | break; |
2251 | } |
2252 | } |
2253 | |
2254 | done: |
2255 | if (all_paced) { |
2256 | fq_cl->fcl_flags |= FCL_PACED; |
2257 | fq_cl->fcl_next_tx_time = fq_cl_tx_time; |
2258 | } |
2259 | if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) { |
2260 | STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq); |
2261 | } else if (!STAILQ_EMPTY(&temp_stailq)) { |
2262 | fq_cl->fcl_old_flows = temp_stailq; |
2263 | } |
2264 | if (last.cp_mbuf != NULL) { |
2265 | VERIFY(top->cp_mbuf != NULL); |
2266 | if (bottom != NULL) { |
2267 | *bottom = last; |
2268 | } |
2269 | } |
2270 | if (retpktcnt != NULL) { |
2271 | *retpktcnt = pktcnt; |
2272 | } |
2273 | if (retbytecnt != NULL) { |
2274 | *retbytecnt = bytecnt; |
2275 | } |
2276 | if (fq_cl_paced != NULL) { |
2277 | *fq_cl_paced = all_paced; |
2278 | } |
2279 | if (next_tx_time != NULL) { |
2280 | *next_tx_time = fq_cl_tx_time; |
2281 | } |
2282 | } |
2283 | |
2284 | void |
2285 | fq_if_teardown_ifclassq(struct ifclassq *ifq) |
2286 | { |
2287 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
2288 | |
2289 | IFCQ_LOCK_ASSERT_HELD(ifq); |
2290 | VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL); |
2291 | fq_if_destroy(fqs); |
2292 | ifq->ifcq_disc = NULL; |
2293 | ifclassq_detach(ifq); |
2294 | } |
2295 | |
2296 | static void |
2297 | fq_export_flowstats(fq_if_t *fqs, fq_t *fq, |
2298 | struct fq_codel_flowstats *flowstat) |
2299 | { |
2300 | bzero(s: flowstat, n: sizeof(*flowstat)); |
2301 | flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay; |
2302 | flowstat->fqst_bytes = fq->fq_bytes; |
2303 | flowstat->fqst_flowhash = fq->fq_flowhash; |
2304 | if (fq->fq_flags & FQF_NEW_FLOW) { |
2305 | flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW; |
2306 | } |
2307 | if (fq->fq_flags & FQF_OLD_FLOW) { |
2308 | flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW; |
2309 | } |
2310 | if (fq->fq_flags & FQF_DELAY_HIGH) { |
2311 | flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH; |
2312 | } |
2313 | if (fq->fq_flags & FQF_FLOWCTL_ON) { |
2314 | flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON; |
2315 | } |
2316 | if (fqs->fqs_large_flow == fq) { |
2317 | flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW; |
2318 | } |
2319 | } |
2320 | |
2321 | int |
2322 | fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid, u_int32_t qid, |
2323 | struct if_ifclassq_stats *ifqs) |
2324 | { |
2325 | struct fq_codel_classstats *fcls; |
2326 | fq_if_classq_t *fq_cl; |
2327 | fq_if_t *fqs; |
2328 | fq_t *fq = NULL; |
2329 | fq_if_group_t *grp; |
2330 | u_int32_t i, flowstat_cnt; |
2331 | |
2332 | if (qid >= FQ_IF_MAX_CLASSES || gid >= FQ_IF_MAX_GROUPS) { |
2333 | return EINVAL; |
2334 | } |
2335 | |
2336 | fqs = (fq_if_t *)ifq->ifcq_disc; |
2337 | if (fqs->fqs_classq_groups[gid] == NULL) { |
2338 | return ENXIO; |
2339 | } |
2340 | |
2341 | fcls = &ifqs->ifqs_fq_codel_stats; |
2342 | |
2343 | fq_cl = &FQS_CLASSQ(fqs, gid, qid); |
2344 | grp = fq_if_find_grp(fqs, grp_idx: gid); |
2345 | |
2346 | fcls->fcls_pri = fq_cl->fcl_pri; |
2347 | fcls->fcls_service_class = fq_cl->fcl_service_class; |
2348 | fcls->fcls_quantum = fq_cl->fcl_quantum; |
2349 | fcls->fcls_drr_max = fq_cl->fcl_drr_max; |
2350 | fcls->fcls_budget = fq_cl->fcl_budget; |
2351 | fcls->fcls_l4s_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_L4S]; |
2352 | fcls->fcls_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_C]; |
2353 | fcls->fcls_update_interval = grp->fqg_update_intervals[FQ_TFC_C]; |
2354 | fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control; |
2355 | fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback; |
2356 | fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall; |
2357 | fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow; |
2358 | fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early; |
2359 | fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure; |
2360 | fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt; |
2361 | fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt; |
2362 | fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt; |
2363 | fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt; |
2364 | fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail; |
2365 | fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail; |
2366 | fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue; |
2367 | fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes; |
2368 | fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt; |
2369 | fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on; |
2370 | fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off; |
2371 | fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops; |
2372 | fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts; |
2373 | fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible; |
2374 | fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed; |
2375 | fcls->fcls_min_qdelay = fq_cl->fcl_stat.fcl_min_qdelay; |
2376 | fcls->fcls_max_qdelay = fq_cl->fcl_stat.fcl_max_qdelay; |
2377 | fcls->fcls_avg_qdelay = fq_cl->fcl_stat.fcl_avg_qdelay; |
2378 | fcls->fcls_overwhelming = fq_cl->fcl_stat.fcl_overwhelming; |
2379 | fcls->fcls_ce_marked = fq_cl->fcl_stat.fcl_ce_marked; |
2380 | fcls->fcls_ce_reported = fq_cl->fcl_stat.fcl_ce_reported; |
2381 | fcls->fcls_ce_mark_failures = fq_cl->fcl_stat.fcl_ce_mark_failures; |
2382 | fcls->fcls_l4s_pkts = fq_cl->fcl_stat.fcl_l4s_pkts; |
2383 | fcls->fcls_ignore_tx_time = fq_cl->fcl_stat.fcl_ignore_tx_time; |
2384 | fcls->fcls_paced_pkts = fq_cl->fcl_stat.fcl_paced_pkts; |
2385 | fcls->fcls_fcl_pacing_needed = fq_cl->fcl_stat.fcl_fcl_pacemaker_needed; |
2386 | |
2387 | /* Gather per flow stats */ |
2388 | flowstat_cnt = min(a: (fcls->fcls_newflows_cnt + |
2389 | fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS); |
2390 | i = 0; |
2391 | STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) { |
2392 | if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) { |
2393 | break; |
2394 | } |
2395 | |
2396 | /* leave space for a few old flows */ |
2397 | if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt && |
2398 | i >= (FQ_IF_MAX_FLOWSTATS >> 1)) { |
2399 | break; |
2400 | } |
2401 | fq_export_flowstats(fqs, fq, flowstat: &fcls->fcls_flowstats[i]); |
2402 | i++; |
2403 | } |
2404 | STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) { |
2405 | if (i >= flowstat_cnt) { |
2406 | break; |
2407 | } |
2408 | fq_export_flowstats(fqs, fq, flowstat: &fcls->fcls_flowstats[i]); |
2409 | i++; |
2410 | } |
2411 | VERIFY(i <= flowstat_cnt); |
2412 | fcls->fcls_flowstats_cnt = i; |
2413 | return 0; |
2414 | } |
2415 | |
2416 | int |
2417 | fq_if_create_grp(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags) |
2418 | { |
2419 | #define _FQ_CLASSQ_INIT(_grp, _s, _q) \ |
2420 | fq_if_classq_init(_grp, FQ_IF_ ## _s ##_INDEX, \ |
2421 | FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX(_s), \ |
2422 | MBUF_SC_ ## _s ); |
2423 | |
2424 | fq_if_group_t *grp; |
2425 | fq_if_t *fqs; |
2426 | uint32_t quantum, calc_flags = IF_CLASSQ_DEF; |
2427 | struct ifnet *ifp = ifcq->ifcq_ifp; |
2428 | |
2429 | VERIFY(grp_idx < FQ_IF_MAX_GROUPS); |
2430 | |
2431 | fqs = (fq_if_t *)ifcq->ifcq_disc; |
2432 | |
2433 | if (grp_idx == 0 && fqs->fqs_classq_groups[grp_idx] != NULL) { |
2434 | grp = fqs->fqs_classq_groups[grp_idx]; |
2435 | goto update; |
2436 | } |
2437 | |
2438 | if (fqs->fqs_classq_groups[grp_idx] != NULL) { |
2439 | return EINVAL; |
2440 | } |
2441 | |
2442 | grp = zalloc_flags(fq_if_grp_zone, Z_WAITOK | Z_ZERO); |
2443 | if (grp == NULL) { |
2444 | return ENOMEM; |
2445 | } |
2446 | |
2447 | fqs->fqs_classq_groups[grp_idx] = grp; |
2448 | grp->fqg_index = grp_idx; |
2449 | |
2450 | quantum = fq_if_calc_quantum(ifp); |
2451 | if (fqs->fqs_flags & FQS_DRIVER_MANAGED) { |
2452 | _FQ_CLASSQ_INIT(grp, BK, quantum); |
2453 | _FQ_CLASSQ_INIT(grp, BE, quantum); |
2454 | _FQ_CLASSQ_INIT(grp, VI, quantum); |
2455 | _FQ_CLASSQ_INIT(grp, VO, quantum); |
2456 | } else { |
2457 | /* SIG shares same INDEX with VI */ |
2458 | _CASSERT(SCIDX_SIG == SCIDX_VI); |
2459 | _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX); |
2460 | |
2461 | _FQ_CLASSQ_INIT(grp, BK_SYS, quantum); |
2462 | _FQ_CLASSQ_INIT(grp, BK, quantum); |
2463 | _FQ_CLASSQ_INIT(grp, BE, quantum); |
2464 | _FQ_CLASSQ_INIT(grp, RD, quantum); |
2465 | _FQ_CLASSQ_INIT(grp, OAM, quantum); |
2466 | _FQ_CLASSQ_INIT(grp, AV, quantum); |
2467 | _FQ_CLASSQ_INIT(grp, RV, quantum); |
2468 | _FQ_CLASSQ_INIT(grp, VI, quantum); |
2469 | _FQ_CLASSQ_INIT(grp, VO, quantum); |
2470 | _FQ_CLASSQ_INIT(grp, CTL, quantum); |
2471 | } |
2472 | |
2473 | update: |
2474 | if (flags & IF_DEFAULT_GRP) { |
2475 | fq_if_set_grp_combined(ifcq, qset_idx: grp_idx); |
2476 | grp->fqg_flags |= FQ_IF_DEFAULT_GRP; |
2477 | } else { |
2478 | fq_if_set_grp_separated(ifcq, qset_idx: grp_idx); |
2479 | grp->fqg_flags &= ~FQ_IF_DEFAULT_GRP; |
2480 | } |
2481 | |
2482 | calc_flags |= (flags & IF_CLASSQ_LOW_LATENCY); |
2483 | ifclassq_calc_target_qdelay(ifp, if_target_qdelay: &grp->fqg_target_qdelays[FQ_TFC_C], |
2484 | flags: calc_flags); |
2485 | ifclassq_calc_target_qdelay(ifp, if_target_qdelay: &grp->fqg_target_qdelays[FQ_TFC_L4S], |
2486 | flags: calc_flags | IF_CLASSQ_L4S); |
2487 | |
2488 | ifclassq_calc_update_interval(update_interval: &grp->fqg_update_intervals[FQ_TFC_C], |
2489 | flags: calc_flags); |
2490 | ifclassq_calc_update_interval(update_interval: &grp->fqg_update_intervals[FQ_TFC_L4S], |
2491 | flags: calc_flags | IF_CLASSQ_L4S); |
2492 | |
2493 | return 0; |
2494 | #undef _FQ_CLASSQ_INIT |
2495 | } |
2496 | |
2497 | fq_if_group_t * |
2498 | fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx) |
2499 | { |
2500 | fq_if_group_t *grp; |
2501 | |
2502 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); |
2503 | VERIFY(grp_idx < FQ_IF_MAX_GROUPS); |
2504 | |
2505 | grp = fqs->fqs_classq_groups[grp_idx]; |
2506 | VERIFY(grp != NULL); |
2507 | |
2508 | return grp; |
2509 | } |
2510 | |
2511 | static void |
2512 | fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp) |
2513 | { |
2514 | for (uint8_t i = 0; i < FQ_IF_MAX_CLASSES; i++) { |
2515 | fq_if_purge_classq(fqs, fq_cl: &grp->fqg_classq[i]); |
2516 | } |
2517 | |
2518 | bzero(s: &grp->fqg_bitmaps, n: sizeof(grp->fqg_bitmaps)); |
2519 | grp->fqg_len = 0; |
2520 | grp->fqg_bytes = 0; |
2521 | fq_if_set_grp_separated(ifcq: fqs->fqs_ifq, qset_idx: grp->fqg_index); |
2522 | } |
2523 | |
2524 | void |
2525 | fq_if_destroy_grps(fq_if_t *fqs) |
2526 | { |
2527 | fq_if_group_t *grp; |
2528 | |
2529 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); |
2530 | |
2531 | for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) { |
2532 | if (fqs->fqs_classq_groups[grp_idx] == NULL) { |
2533 | continue; |
2534 | } |
2535 | |
2536 | grp = fq_if_find_grp(fqs, grp_idx); |
2537 | fq_if_purge_grp(fqs, grp); |
2538 | zfree(fq_if_grp_zone, grp); |
2539 | fqs->fqs_classq_groups[grp_idx] = NULL; |
2540 | } |
2541 | } |
2542 | |
2543 | static inline boolean_t |
2544 | fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx) |
2545 | { |
2546 | return pktsched_bit_tst(ix: grp_idx, pData: &fqs->fqs_combined_grp_bitmap); |
2547 | } |
2548 | |
2549 | void |
2550 | fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx) |
2551 | { |
2552 | fq_if_t *fqs; |
2553 | fq_if_group_t *grp; |
2554 | |
2555 | IFCQ_LOCK_ASSERT_HELD(ifcq); |
2556 | |
2557 | fqs = (fq_if_t *)ifcq->ifcq_disc; |
2558 | grp = fq_if_find_grp(fqs, grp_idx); |
2559 | |
2560 | if (fq_if_is_grp_combined(fqs, grp_idx)) { |
2561 | return; |
2562 | } |
2563 | |
2564 | /* |
2565 | * We keep the current fq_deficit and fcl_budget when combining a group. |
2566 | * That might disrupt the AQM but only for a moment. |
2567 | */ |
2568 | pktsched_bit_set(ix: grp_idx, pData: &fqs->fqs_combined_grp_bitmap); |
2569 | TAILQ_INSERT_TAIL(&fqs->fqs_combined_grp_list, grp, fqg_grp_link); |
2570 | } |
2571 | |
2572 | void |
2573 | fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx) |
2574 | { |
2575 | fq_if_t *fqs; |
2576 | fq_if_group_t *grp; |
2577 | |
2578 | IFCQ_LOCK_ASSERT_HELD(ifcq); |
2579 | |
2580 | fqs = (fq_if_t *)ifcq->ifcq_disc; |
2581 | grp = fq_if_find_grp(fqs, grp_idx); |
2582 | |
2583 | if (!fq_if_is_grp_combined(fqs, grp_idx)) { |
2584 | return; |
2585 | } |
2586 | |
2587 | pktsched_bit_clr(ix: grp_idx, pData: &fqs->fqs_combined_grp_bitmap); |
2588 | TAILQ_REMOVE(&fqs->fqs_combined_grp_list, grp, fqg_grp_link); |
2589 | } |
2590 | |