1 | /* |
2 | * Copyright (c) 2016-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <sys/types.h> |
30 | #include <sys/param.h> |
31 | #include <kern/zalloc.h> |
32 | #include <net/if_var.h> |
33 | #include <net/if.h> |
34 | #include <net/classq/classq.h> |
35 | #include <net/classq/classq_fq_codel.h> |
36 | #include <net/pktsched/pktsched_fq_codel.h> |
37 | |
38 | static size_t fq_if_size; |
39 | static struct zone *fq_if_zone; |
40 | |
41 | static fq_if_t *fq_if_alloc(struct ifnet *, classq_pkt_type_t); |
42 | static void fq_if_destroy(fq_if_t *fqs); |
43 | static void fq_if_classq_init(fq_if_t *fqs, u_int32_t priority, |
44 | u_int32_t quantum, u_int32_t drr_max, u_int32_t svc_class); |
45 | static int fq_if_enqueue_classq(struct ifclassq *ifq, void *p, |
46 | classq_pkt_type_t ptype, boolean_t *pdrop); |
47 | static void *fq_if_dequeue_classq(struct ifclassq *, classq_pkt_type_t *); |
48 | static int fq_if_dequeue_classq_multi(struct ifclassq *, u_int32_t, |
49 | u_int32_t, void **, void **, u_int32_t *, u_int32_t *, classq_pkt_type_t *); |
50 | static void *fq_if_dequeue_sc_classq(struct ifclassq *, mbuf_svc_class_t, |
51 | classq_pkt_type_t *); |
52 | static int fq_if_dequeue_sc_classq_multi(struct ifclassq *, |
53 | mbuf_svc_class_t, u_int32_t, u_int32_t, void **, |
54 | void **, u_int32_t *, u_int32_t *, classq_pkt_type_t *); |
55 | static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, u_int32_t, |
56 | u_int32_t, void **, void **, u_int32_t *, u_int32_t *, |
57 | boolean_t drvmgmt, classq_pkt_type_t *); |
58 | static int fq_if_request_classq(struct ifclassq *ifq, cqrq_t op, void *arg); |
59 | void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat); |
60 | static void fq_if_purge(fq_if_t *); |
61 | static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *); |
62 | static void fq_if_purge_flow(fq_if_t *, fq_t *, u_int32_t *, u_int32_t *); |
63 | static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, |
64 | bool add_to_old); |
65 | static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, |
66 | fq_t *fq, bool remove_hash); |
67 | |
68 | #define FQ_IF_ZONE_MAX 32 /* Maximum elements in zone */ |
69 | #define FQ_IF_ZONE_NAME "pktsched_fq_if" /* zone for fq_if class */ |
70 | |
71 | #define FQ_IF_FLOW_HASH_ID(_flowid_) \ |
72 | (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK) |
73 | |
74 | #define FQ_IF_CLASSQ_IDLE(_fcl_) \ |
75 | (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \ |
76 | STAILQ_EMPTY(&(_fcl_)->fcl_old_flows)) |
77 | |
78 | typedef void (* fq_if_append_pkt_t)(void *, void *); |
79 | typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *, |
80 | u_int32_t, u_int32_t, void **, void **, u_int32_t *, u_int32_t *, |
81 | boolean_t *, u_int32_t); |
82 | |
83 | static void |
84 | fq_if_append_mbuf(void *pkt, void *next_pkt) |
85 | { |
86 | ((mbuf_t)pkt)->m_nextpkt = (mbuf_t)next_pkt; |
87 | } |
88 | |
89 | |
90 | |
91 | static boolean_t |
92 | fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq, |
93 | u_int32_t byte_limit, u_int32_t pkt_limit, void **top, void **last, |
94 | u_int32_t *byte_cnt, u_int32_t *pkt_cnt, boolean_t *qempty, |
95 | u_int32_t pflags) |
96 | { |
97 | struct mbuf *m; |
98 | u_int32_t plen; |
99 | pktsched_pkt_t pkt; |
100 | boolean_t limit_reached = FALSE; |
101 | struct ifclassq *ifq = fqs->fqs_ifq; |
102 | struct ifnet *ifp = ifq->ifcq_ifp; |
103 | |
104 | while (fq->fq_deficit > 0 && limit_reached == FALSE && |
105 | !MBUFQ_EMPTY(&fq->fq_mbufq)) { |
106 | |
107 | _PKTSCHED_PKT_INIT(&pkt); |
108 | m = fq_getq_flow(fqs, fq, &pkt); |
109 | ASSERT(pkt.pktsched_ptype == QP_MBUF); |
110 | |
111 | plen = pktsched_get_pkt_len(&pkt); |
112 | fq->fq_deficit -= plen; |
113 | m->m_pkthdr.pkt_flags |= pflags; |
114 | |
115 | if (*top == NULL) { |
116 | *top = m; |
117 | } else { |
118 | ASSERT(*last != NULL); |
119 | ASSERT((*(struct mbuf **)last)->m_nextpkt == NULL); |
120 | (*(struct mbuf **)last)->m_nextpkt = m; |
121 | } |
122 | *last = m; |
123 | (*(mbuf_t *)last)->m_nextpkt = NULL; |
124 | fq_cl->fcl_stat.fcl_dequeue++; |
125 | fq_cl->fcl_stat.fcl_dequeue_bytes += plen; |
126 | *pkt_cnt += 1; |
127 | *byte_cnt += plen; |
128 | |
129 | ifclassq_set_packet_metadata(ifq, ifp, m, QP_MBUF); |
130 | |
131 | /* Check if the limit is reached */ |
132 | if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) |
133 | limit_reached = TRUE; |
134 | } |
135 | |
136 | *qempty = MBUFQ_EMPTY(&fq->fq_mbufq); |
137 | return (limit_reached); |
138 | } |
139 | |
140 | void |
141 | fq_codel_scheduler_init(void) |
142 | { |
143 | /* Initialize the zone for flow queue structures */ |
144 | fq_codel_init(); |
145 | |
146 | fq_if_size = sizeof (fq_if_t); |
147 | fq_if_zone = zinit(fq_if_size, (FQ_IF_ZONE_MAX * fq_if_size), 0, |
148 | FQ_IF_ZONE_NAME); |
149 | if (fq_if_zone == NULL) { |
150 | panic("%s: failed allocating from %s" , __func__, |
151 | (FQ_IF_ZONE_NAME)); |
152 | } |
153 | zone_change(fq_if_zone, Z_EXPAND, TRUE); |
154 | zone_change(fq_if_zone, Z_CALLERACCT, TRUE); |
155 | |
156 | } |
157 | |
158 | fq_if_t * |
159 | fq_if_alloc(struct ifnet *ifp, classq_pkt_type_t ptype) |
160 | { |
161 | fq_if_t *fqs; |
162 | fqs = zalloc(fq_if_zone); |
163 | if (fqs == NULL) |
164 | return (NULL); |
165 | |
166 | bzero(fqs, fq_if_size); |
167 | fqs->fqs_ifq = &ifp->if_snd; |
168 | fqs->fqs_ptype = ptype; |
169 | |
170 | /* Calculate target queue delay */ |
171 | ifclassq_calc_target_qdelay(ifp, &fqs->fqs_target_qdelay); |
172 | |
173 | /* Calculate update interval */ |
174 | ifclassq_calc_update_interval(&fqs->fqs_update_interval); |
175 | |
176 | /* Configure packet drop limit across all queues */ |
177 | fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(&ifp->if_snd); |
178 | STAILQ_INIT(&fqs->fqs_fclist); |
179 | return (fqs); |
180 | } |
181 | |
182 | void |
183 | fq_if_destroy(fq_if_t *fqs) |
184 | { |
185 | fq_if_purge(fqs); |
186 | fqs->fqs_ifq = NULL; |
187 | zfree(fq_if_zone, fqs); |
188 | } |
189 | |
190 | static inline u_int32_t |
191 | fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc) |
192 | { |
193 | u_int32_t pri; |
194 | |
195 | if (fqs->fqs_flags & FQS_DRIVER_MANAGED) { |
196 | switch (svc) { |
197 | case MBUF_SC_BK_SYS: |
198 | case MBUF_SC_BK: |
199 | pri = FQ_IF_BK_INDEX; |
200 | break; |
201 | case MBUF_SC_BE: |
202 | case MBUF_SC_RD: |
203 | case MBUF_SC_OAM: |
204 | pri = FQ_IF_BE_INDEX; |
205 | break; |
206 | case MBUF_SC_AV: |
207 | case MBUF_SC_RV: |
208 | case MBUF_SC_VI: |
209 | case MBUF_SC_SIG: |
210 | pri = FQ_IF_VI_INDEX; |
211 | break; |
212 | case MBUF_SC_VO: |
213 | case MBUF_SC_CTL: |
214 | pri = FQ_IF_VO_INDEX; |
215 | break; |
216 | default: |
217 | pri = FQ_IF_BE_INDEX; /* Use best effort by default */ |
218 | break; |
219 | } |
220 | return (pri); |
221 | } |
222 | |
223 | /* scheduler is not managed by the driver */ |
224 | switch (svc) { |
225 | case MBUF_SC_BK_SYS: |
226 | pri = FQ_IF_BK_SYS_INDEX; |
227 | break; |
228 | case MBUF_SC_BK: |
229 | pri = FQ_IF_BK_INDEX; |
230 | break; |
231 | case MBUF_SC_BE: |
232 | pri = FQ_IF_BE_INDEX; |
233 | break; |
234 | case MBUF_SC_RD: |
235 | pri = FQ_IF_RD_INDEX; |
236 | break; |
237 | case MBUF_SC_OAM: |
238 | pri = FQ_IF_OAM_INDEX; |
239 | break; |
240 | case MBUF_SC_AV: |
241 | pri = FQ_IF_AV_INDEX; |
242 | break; |
243 | case MBUF_SC_RV: |
244 | pri = FQ_IF_RV_INDEX; |
245 | break; |
246 | case MBUF_SC_VI: |
247 | pri = FQ_IF_VI_INDEX; |
248 | break; |
249 | case MBUF_SC_SIG: |
250 | pri = FQ_IF_SIG_INDEX; |
251 | break; |
252 | case MBUF_SC_VO: |
253 | pri = FQ_IF_VO_INDEX; |
254 | break; |
255 | case MBUF_SC_CTL: |
256 | pri = FQ_IF_CTL_INDEX; |
257 | break; |
258 | default: |
259 | pri = FQ_IF_BE_INDEX; /* Use best effort by default */ |
260 | break; |
261 | } |
262 | return (pri); |
263 | } |
264 | |
265 | void |
266 | fq_if_classq_init(fq_if_t *fqs, u_int32_t pri, u_int32_t quantum, |
267 | u_int32_t drr_max, u_int32_t svc_class) |
268 | { |
269 | fq_if_classq_t *fq_cl; |
270 | |
271 | fq_cl = &fqs->fqs_classq[pri]; |
272 | |
273 | VERIFY(pri >= 0 && pri < FQ_IF_MAX_CLASSES && |
274 | fq_cl->fcl_quantum == 0); |
275 | fq_cl->fcl_quantum = quantum; |
276 | fq_cl->fcl_pri = pri; |
277 | fq_cl->fcl_drr_max = drr_max; |
278 | fq_cl->fcl_service_class = svc_class; |
279 | STAILQ_INIT(&fq_cl->fcl_new_flows); |
280 | STAILQ_INIT(&fq_cl->fcl_old_flows); |
281 | } |
282 | |
283 | int |
284 | fq_if_enqueue_classq(struct ifclassq *ifq, void *p, classq_pkt_type_t ptype, |
285 | boolean_t *pdrop) |
286 | { |
287 | u_int32_t pri; |
288 | fq_if_t *fqs; |
289 | fq_if_classq_t *fq_cl; |
290 | int ret, len; |
291 | mbuf_svc_class_t svc; |
292 | pktsched_pkt_t pkt; |
293 | |
294 | IFCQ_LOCK_ASSERT_HELD(ifq); |
295 | if ((ptype == QP_MBUF) && !(((mbuf_t)p)->m_flags & M_PKTHDR)) { |
296 | IFCQ_CONVERT_LOCK(ifq); |
297 | m_freem((mbuf_t)p); |
298 | *pdrop = TRUE; |
299 | return (ENOBUFS); |
300 | } |
301 | pktsched_pkt_encap(&pkt, ptype, p); |
302 | |
303 | fqs = (fq_if_t *)ifq->ifcq_disc; |
304 | svc = pktsched_get_pkt_svc(&pkt); |
305 | pri = fq_if_service_to_priority(fqs, svc); |
306 | VERIFY(pri >= 0 && pri < FQ_IF_MAX_CLASSES); |
307 | fq_cl = &fqs->fqs_classq[pri]; |
308 | |
309 | if (svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1) { |
310 | /* BK_SYS is currently throttled */ |
311 | fq_cl->fcl_stat.fcl_throttle_drops++; |
312 | IFCQ_CONVERT_LOCK(ifq); |
313 | pktsched_free_pkt(&pkt); |
314 | *pdrop = TRUE; |
315 | return (EQSUSPENDED); |
316 | } |
317 | |
318 | len = pktsched_get_pkt_len(&pkt); |
319 | ret = fq_addq(fqs, &pkt, fq_cl); |
320 | if (!(fqs->fqs_flags & FQS_DRIVER_MANAGED) && |
321 | !FQ_IF_CLASSQ_IDLE(fq_cl)) { |
322 | if (((fqs->fqs_bitmaps[FQ_IF_ER] | fqs->fqs_bitmaps[FQ_IF_EB]) & |
323 | (1 << pri)) == 0) { |
324 | /* |
325 | * this group is not in ER or EB groups, |
326 | * mark it as IB |
327 | */ |
328 | pktsched_bit_set(pri, &fqs->fqs_bitmaps[FQ_IF_IB]); |
329 | } |
330 | } |
331 | |
332 | if (ret != 0) { |
333 | if (ret == CLASSQEQ_SUCCESS_FC) { |
334 | /* packet enqueued, return advisory feedback */ |
335 | ret = EQFULL; |
336 | *pdrop = FALSE; |
337 | } else { |
338 | *pdrop = TRUE; |
339 | VERIFY(ret == CLASSQEQ_DROP || |
340 | ret == CLASSQEQ_DROP_FC || |
341 | ret == CLASSQEQ_DROP_SP); |
342 | pktsched_free_pkt(&pkt); |
343 | switch (ret) { |
344 | case CLASSQEQ_DROP: |
345 | return (ENOBUFS); |
346 | case CLASSQEQ_DROP_FC: |
347 | return (EQFULL); |
348 | case CLASSQEQ_DROP_SP: |
349 | return (EQSUSPENDED); |
350 | } |
351 | } |
352 | } else { |
353 | *pdrop = FALSE; |
354 | } |
355 | IFCQ_INC_LEN(ifq); |
356 | IFCQ_INC_BYTES(ifq, len); |
357 | return (ret); |
358 | } |
359 | |
360 | static void * |
361 | fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_type_t *ptype) |
362 | { |
363 | void *top; |
364 | |
365 | (void) fq_if_dequeue_classq_multi(ifq, 1, |
366 | CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &top, NULL, NULL, NULL, ptype); |
367 | return (top); |
368 | } |
369 | |
370 | static void * |
371 | fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc, |
372 | classq_pkt_type_t *ptype) |
373 | { |
374 | void *top; |
375 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
376 | fq_if_classq_t *fq_cl; |
377 | u_int32_t pri; |
378 | |
379 | pri = fq_if_service_to_priority(fqs, svc); |
380 | fq_cl = &fqs->fqs_classq[pri]; |
381 | |
382 | fq_if_dequeue(fqs, fq_cl, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, |
383 | &top, NULL, NULL, NULL, TRUE, ptype); |
384 | return (top); |
385 | } |
386 | |
387 | int |
388 | fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt, |
389 | u_int32_t maxbytecnt, void **first_packet, |
390 | void **last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt, |
391 | classq_pkt_type_t *ptype) |
392 | { |
393 | void *top = NULL, *tail = NULL, *first, *last; |
394 | u_int32_t pktcnt = 0, bytecnt = 0, total_pktcnt, total_bytecnt; |
395 | fq_if_t *fqs; |
396 | fq_if_classq_t *fq_cl; |
397 | int pri; |
398 | fq_if_append_pkt_t append_pkt; |
399 | |
400 | IFCQ_LOCK_ASSERT_HELD(ifq); |
401 | |
402 | fqs = (fq_if_t *)ifq->ifcq_disc; |
403 | |
404 | switch (fqs->fqs_ptype) { |
405 | case QP_MBUF: |
406 | append_pkt = fq_if_append_mbuf; |
407 | break; |
408 | |
409 | |
410 | default: |
411 | VERIFY(0); |
412 | /* NOTREACHED */ |
413 | } |
414 | |
415 | first = last = NULL; |
416 | total_pktcnt = total_bytecnt = 0; |
417 | *ptype = fqs->fqs_ptype; |
418 | |
419 | for (;;) { |
420 | classq_pkt_type_t tmp_ptype; |
421 | if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 && |
422 | fqs->fqs_bitmaps[FQ_IF_EB] == 0) { |
423 | fqs->fqs_bitmaps[FQ_IF_EB] = fqs->fqs_bitmaps[FQ_IF_IB]; |
424 | fqs->fqs_bitmaps[FQ_IF_IB] = 0; |
425 | if (fqs->fqs_bitmaps[FQ_IF_EB] == 0) |
426 | break; |
427 | } |
428 | pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_ER]); |
429 | if (pri == 0) { |
430 | /* |
431 | * There are no ER flows, move the highest |
432 | * priority one from EB if there are any in that |
433 | * category |
434 | */ |
435 | pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_EB]); |
436 | VERIFY(pri > 0); |
437 | pktsched_bit_clr((pri - 1), |
438 | &fqs->fqs_bitmaps[FQ_IF_EB]); |
439 | pktsched_bit_set((pri - 1), |
440 | &fqs->fqs_bitmaps[FQ_IF_ER]); |
441 | } |
442 | pri--; /* index starts at 0 */ |
443 | fq_cl = &fqs->fqs_classq[pri]; |
444 | |
445 | if (fq_cl->fcl_budget <= 0) { |
446 | /* Update the budget */ |
447 | fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max, |
448 | fq_cl->fcl_stat.fcl_flows_cnt) * |
449 | fq_cl->fcl_quantum); |
450 | if (fq_cl->fcl_budget <= 0) |
451 | goto state_change; |
452 | } |
453 | fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt), |
454 | (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt, |
455 | &bytecnt, FALSE, &tmp_ptype); |
456 | if (top != NULL) { |
457 | ASSERT(tmp_ptype == *ptype); |
458 | ASSERT(pktcnt > 0 && bytecnt > 0); |
459 | if (first == NULL) { |
460 | first = top; |
461 | last = tail; |
462 | total_pktcnt = pktcnt; |
463 | total_bytecnt = bytecnt; |
464 | } else { |
465 | append_pkt(last, top); |
466 | last = tail; |
467 | total_pktcnt += pktcnt; |
468 | total_bytecnt += bytecnt; |
469 | } |
470 | append_pkt(last, NULL); |
471 | fq_cl->fcl_budget -= bytecnt; |
472 | pktcnt = 0; |
473 | bytecnt = 0; |
474 | } |
475 | |
476 | /* |
477 | * If the class has exceeded the budget but still has data |
478 | * to send, move it to IB |
479 | */ |
480 | state_change: |
481 | if (!FQ_IF_CLASSQ_IDLE(fq_cl)) { |
482 | if (fq_cl->fcl_budget <= 0) { |
483 | pktsched_bit_set(pri, |
484 | &fqs->fqs_bitmaps[FQ_IF_IB]); |
485 | pktsched_bit_clr(pri, |
486 | &fqs->fqs_bitmaps[FQ_IF_ER]); |
487 | } |
488 | } else { |
489 | pktsched_bit_clr(pri, &fqs->fqs_bitmaps[FQ_IF_ER]); |
490 | VERIFY(((fqs->fqs_bitmaps[FQ_IF_ER] | |
491 | fqs->fqs_bitmaps[FQ_IF_EB] | |
492 | fqs->fqs_bitmaps[FQ_IF_IB])&(1 << pri)) == 0); |
493 | fq_cl->fcl_budget = 0; |
494 | } |
495 | if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) |
496 | break; |
497 | } |
498 | if (first != NULL) { |
499 | if (first_packet != NULL) |
500 | *first_packet = first; |
501 | if (last_packet != NULL) |
502 | *last_packet = last; |
503 | if (retpktcnt != NULL) |
504 | *retpktcnt = total_pktcnt; |
505 | if (retbytecnt != NULL) |
506 | *retbytecnt = total_bytecnt; |
507 | IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt); |
508 | } else { |
509 | if (first_packet != NULL) |
510 | *first_packet = NULL; |
511 | if (last_packet != NULL) |
512 | *last_packet = NULL; |
513 | if (retpktcnt != NULL) |
514 | *retpktcnt = 0; |
515 | if (retbytecnt != NULL) |
516 | *retbytecnt = 0; |
517 | } |
518 | return (0); |
519 | } |
520 | |
521 | int |
522 | fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc, |
523 | u_int32_t maxpktcnt, u_int32_t maxbytecnt, void **first_packet, |
524 | void **last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt, |
525 | classq_pkt_type_t *ptype) |
526 | { |
527 | #pragma unused(maxpktcnt, maxbytecnt, first_packet, last_packet, retpktcnt, retbytecnt) |
528 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
529 | u_int32_t pri; |
530 | u_int32_t total_pktcnt = 0, total_bytecnt = 0; |
531 | fq_if_classq_t *fq_cl; |
532 | void *first = NULL, *last = NULL; |
533 | fq_if_append_pkt_t append_pkt; |
534 | |
535 | switch (fqs->fqs_ptype) { |
536 | case QP_MBUF: |
537 | append_pkt = fq_if_append_mbuf; |
538 | break; |
539 | |
540 | |
541 | default: |
542 | VERIFY(0); |
543 | /* NOTREACHED */ |
544 | } |
545 | |
546 | pri = fq_if_service_to_priority(fqs, svc); |
547 | fq_cl = &fqs->fqs_classq[pri]; |
548 | |
549 | /* |
550 | * Now we have the queue for a particular service class. We need |
551 | * to dequeue as many packets as needed, first from the new flows |
552 | * and then from the old flows. |
553 | */ |
554 | while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt && |
555 | fq_cl->fcl_stat.fcl_pkt_cnt > 0) { |
556 | void *top, *tail; |
557 | u_int32_t pktcnt = 0, bytecnt = 0; |
558 | fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt), |
559 | (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt, |
560 | &bytecnt, TRUE, ptype); |
561 | if (first == NULL) { |
562 | first = top; |
563 | total_pktcnt = pktcnt; |
564 | total_bytecnt = bytecnt; |
565 | } else { |
566 | append_pkt(last, top); |
567 | total_pktcnt += pktcnt; |
568 | total_bytecnt += bytecnt; |
569 | } |
570 | last = tail; |
571 | } |
572 | if (first != NULL) { |
573 | if (first_packet != NULL) |
574 | *first_packet = first; |
575 | if (last_packet != NULL) |
576 | *last_packet = last; |
577 | if (retpktcnt != NULL) |
578 | *retpktcnt = total_pktcnt; |
579 | if (retbytecnt != NULL) |
580 | *retbytecnt = total_bytecnt; |
581 | } else { |
582 | if (first_packet != NULL) |
583 | *first_packet = NULL; |
584 | if (last_packet != NULL) |
585 | *last_packet = NULL; |
586 | if (retpktcnt != NULL) |
587 | *retpktcnt = 0; |
588 | if (retbytecnt != NULL) |
589 | *retbytecnt = 0; |
590 | } |
591 | return (0); |
592 | } |
593 | |
594 | static void |
595 | fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, u_int32_t *pktsp, |
596 | u_int32_t *bytesp) |
597 | { |
598 | fq_if_classq_t *fq_cl; |
599 | u_int32_t pkts, bytes; |
600 | pktsched_pkt_t pkt; |
601 | |
602 | fq_cl = &fqs->fqs_classq[fq->fq_sc_index]; |
603 | pkts = bytes = 0; |
604 | _PKTSCHED_PKT_INIT(&pkt); |
605 | while (fq_getq_flow(fqs, fq, &pkt) != NULL) { |
606 | pkts++; |
607 | bytes += pktsched_get_pkt_len(&pkt); |
608 | pktsched_free_pkt(&pkt); |
609 | } |
610 | IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes); |
611 | |
612 | if (fq->fq_flags & FQF_NEW_FLOW) { |
613 | fq_if_empty_new_flow(fq, fq_cl, false); |
614 | } else if (fq->fq_flags & FQF_OLD_FLOW) { |
615 | fq_if_empty_old_flow(fqs, fq_cl, fq, false); |
616 | } |
617 | |
618 | fq_if_destroy_flow(fqs, fq_cl, fq); |
619 | |
620 | if (FQ_IF_CLASSQ_IDLE(fq_cl)) { |
621 | int i; |
622 | for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) { |
623 | pktsched_bit_clr(fq_cl->fcl_pri, |
624 | &fqs->fqs_bitmaps[i]); |
625 | } |
626 | } |
627 | if (pktsp != NULL) |
628 | *pktsp = pkts; |
629 | if (bytesp != NULL) |
630 | *bytesp = bytes; |
631 | } |
632 | |
633 | static void |
634 | fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl) |
635 | { |
636 | fq_t *fq, *tfq; |
637 | /* |
638 | * Take each flow from new/old flow list and flush mbufs |
639 | * in that flow |
640 | */ |
641 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) { |
642 | fq_if_purge_flow(fqs, fq, NULL, NULL); |
643 | } |
644 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) { |
645 | fq_if_purge_flow(fqs, fq, NULL, NULL); |
646 | } |
647 | VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows)); |
648 | VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows)); |
649 | |
650 | STAILQ_INIT(&fq_cl->fcl_new_flows); |
651 | STAILQ_INIT(&fq_cl->fcl_old_flows); |
652 | fq_cl->fcl_budget = 0; |
653 | } |
654 | |
655 | static void |
656 | fq_if_purge(fq_if_t *fqs) |
657 | { |
658 | int i; |
659 | |
660 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
661 | for (i = 0; i < FQ_IF_MAX_CLASSES; i++) { |
662 | fq_if_purge_classq(fqs, &fqs->fqs_classq[i]); |
663 | } |
664 | |
665 | VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist)); |
666 | |
667 | fqs->fqs_large_flow = NULL; |
668 | for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) { |
669 | VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i])); |
670 | } |
671 | |
672 | bzero(&fqs->fqs_bitmaps, sizeof (fqs->fqs_bitmaps)); |
673 | |
674 | IFCQ_LEN(fqs->fqs_ifq) = 0; |
675 | IFCQ_BYTES(fqs->fqs_ifq) = 0; |
676 | } |
677 | |
678 | static void |
679 | fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req) |
680 | { |
681 | fq_t *fq; |
682 | |
683 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); |
684 | req->packets = req->bytes = 0; |
685 | VERIFY(req->flow != 0); |
686 | |
687 | /* packet type is needed only if we want to create a flow queue */ |
688 | fq = fq_if_hash_pkt(fqs, req->flow, req->sc, 0, FALSE, QP_INVALID); |
689 | |
690 | if (fq != NULL) |
691 | fq_if_purge_flow(fqs, fq, &req->packets, &req->bytes); |
692 | } |
693 | |
694 | static void |
695 | fq_if_event(fq_if_t *fqs, cqev_t ev) |
696 | { |
697 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); |
698 | |
699 | switch (ev) { |
700 | case CLASSQ_EV_LINK_UP: |
701 | case CLASSQ_EV_LINK_DOWN: |
702 | fq_if_purge(fqs); |
703 | break; |
704 | default: |
705 | break; |
706 | } |
707 | } |
708 | |
709 | static void |
710 | fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl) |
711 | { |
712 | fq_if_purge_classq(fqs, fq_cl); |
713 | fqs->fqs_throttle = 1; |
714 | fq_cl->fcl_stat.fcl_throttle_on++; |
715 | } |
716 | |
717 | static void |
718 | fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl) |
719 | { |
720 | VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl)); |
721 | fqs->fqs_throttle = 0; |
722 | fq_cl->fcl_stat.fcl_throttle_off++; |
723 | } |
724 | |
725 | |
726 | static int |
727 | fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr) |
728 | { |
729 | struct ifclassq *ifq = fqs->fqs_ifq; |
730 | int index; |
731 | #if !MACH_ASSERT |
732 | #pragma unused(ifq) |
733 | #endif |
734 | IFCQ_LOCK_ASSERT_HELD(ifq); |
735 | |
736 | if (!tr->set) { |
737 | tr->level = fqs->fqs_throttle; |
738 | return (0); |
739 | } |
740 | |
741 | if (tr->level == fqs->fqs_throttle) |
742 | return (EALREADY); |
743 | |
744 | /* Throttling is allowed on BK_SYS class only */ |
745 | index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS); |
746 | switch (tr->level) { |
747 | case IFNET_THROTTLE_OFF: |
748 | fq_if_classq_resume(fqs, &fqs->fqs_classq[index]); |
749 | break; |
750 | case IFNET_THROTTLE_OPPORTUNISTIC: |
751 | fq_if_classq_suspend(fqs, &fqs->fqs_classq[index]); |
752 | break; |
753 | default: |
754 | break; |
755 | } |
756 | return (0); |
757 | } |
758 | |
759 | void |
760 | fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat) |
761 | { |
762 | u_int32_t pri; |
763 | fq_if_classq_t *fq_cl; |
764 | |
765 | if (stat == NULL) |
766 | return; |
767 | |
768 | pri = fq_if_service_to_priority(fqs, stat->sc); |
769 | fq_cl = &fqs->fqs_classq[pri]; |
770 | stat->packets = fq_cl->fcl_stat.fcl_pkt_cnt; |
771 | stat->bytes = fq_cl->fcl_stat.fcl_byte_cnt; |
772 | } |
773 | |
774 | int |
775 | fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg) |
776 | { |
777 | int err = 0; |
778 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
779 | |
780 | IFCQ_LOCK_ASSERT_HELD(ifq); |
781 | |
782 | /* |
783 | * These are usually slow operations, convert the lock ahead of time |
784 | */ |
785 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
786 | switch (rq) { |
787 | case CLASSQRQ_PURGE: |
788 | fq_if_purge(fqs); |
789 | break; |
790 | case CLASSQRQ_PURGE_SC: |
791 | fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg); |
792 | break; |
793 | case CLASSQRQ_EVENT: |
794 | fq_if_event(fqs, (cqev_t)arg); |
795 | break; |
796 | case CLASSQRQ_THROTTLE: |
797 | fq_if_throttle(fqs, (cqrq_throttle_t *)arg); |
798 | break; |
799 | case CLASSQRQ_STAT_SC: |
800 | fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg); |
801 | break; |
802 | } |
803 | return (err); |
804 | } |
805 | |
806 | int |
807 | fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags, |
808 | classq_pkt_type_t ptype) |
809 | { |
810 | #pragma unused(flags) |
811 | struct ifnet *ifp = ifq->ifcq_ifp; |
812 | fq_if_t *fqs = NULL; |
813 | int err = 0; |
814 | |
815 | IFCQ_LOCK_ASSERT_HELD(ifq); |
816 | VERIFY(ifq->ifcq_disc == NULL); |
817 | VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE); |
818 | |
819 | fqs = fq_if_alloc(ifp, ptype); |
820 | if (fqs == NULL) |
821 | return (ENOMEM); |
822 | |
823 | if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) { |
824 | fqs->fqs_flags |= FQS_DRIVER_MANAGED; |
825 | fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500, |
826 | 2, MBUF_SC_BK); |
827 | fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500, |
828 | 4, MBUF_SC_BE); |
829 | fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000, |
830 | 6, MBUF_SC_VI); |
831 | fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600, |
832 | 8, MBUF_SC_VO); |
833 | } else { |
834 | /* SIG shares same INDEX with VI */ |
835 | _CASSERT(SCIDX_SIG == SCIDX_VI); |
836 | _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX); |
837 | |
838 | fq_if_classq_init(fqs, FQ_IF_BK_SYS_INDEX, 1500, |
839 | 2, MBUF_SC_BK_SYS); |
840 | fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500, |
841 | 2, MBUF_SC_BK); |
842 | fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500, |
843 | 4, MBUF_SC_BE); |
844 | fq_if_classq_init(fqs, FQ_IF_RD_INDEX, 1500, |
845 | 4, MBUF_SC_RD); |
846 | fq_if_classq_init(fqs, FQ_IF_OAM_INDEX, 1500, |
847 | 4, MBUF_SC_OAM); |
848 | fq_if_classq_init(fqs, FQ_IF_AV_INDEX, 3000, |
849 | 6, MBUF_SC_AV); |
850 | fq_if_classq_init(fqs, FQ_IF_RV_INDEX, 3000, |
851 | 6, MBUF_SC_RV); |
852 | fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000, |
853 | 6, MBUF_SC_VI); |
854 | fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600, |
855 | 8, MBUF_SC_VO); |
856 | fq_if_classq_init(fqs, FQ_IF_CTL_INDEX, 600, |
857 | 8, MBUF_SC_CTL); |
858 | } |
859 | |
860 | err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs, |
861 | fq_if_enqueue_classq, fq_if_dequeue_classq, |
862 | fq_if_dequeue_sc_classq, fq_if_dequeue_classq_multi, |
863 | fq_if_dequeue_sc_classq_multi, fq_if_request_classq); |
864 | |
865 | if (err != 0) { |
866 | printf("%s: error from ifclassq_attach, " |
867 | "failed to attach fq_if: %d\n" , __func__, err); |
868 | fq_if_destroy(fqs); |
869 | } |
870 | return (err); |
871 | } |
872 | |
873 | fq_t * |
874 | fq_if_hash_pkt(fq_if_t *fqs, u_int32_t flowid, mbuf_svc_class_t svc_class, |
875 | u_int64_t now, boolean_t create, classq_pkt_type_t ptype) |
876 | { |
877 | fq_t *fq = NULL; |
878 | flowq_list_t *fq_list; |
879 | fq_if_classq_t *fq_cl; |
880 | u_int8_t fqs_hash_id; |
881 | u_int8_t scidx; |
882 | |
883 | scidx = fq_if_service_to_priority(fqs, svc_class); |
884 | |
885 | fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid); |
886 | |
887 | fq_list = &fqs->fqs_flows[fqs_hash_id]; |
888 | |
889 | SLIST_FOREACH(fq, fq_list, fq_hashlink) { |
890 | if (fq->fq_flowhash == flowid && |
891 | fq->fq_sc_index == scidx) |
892 | break; |
893 | } |
894 | if (fq == NULL && create == TRUE) { |
895 | ASSERT(ptype == QP_MBUF); |
896 | |
897 | /* If the flow is not already on the list, allocate it */ |
898 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
899 | fq = fq_alloc(ptype); |
900 | if (fq != NULL) { |
901 | fq->fq_flowhash = flowid; |
902 | fq->fq_sc_index = scidx; |
903 | fq->fq_updatetime = now + fqs->fqs_update_interval; |
904 | fq_cl = &fqs->fqs_classq[scidx]; |
905 | fq->fq_flags = FQF_FLOWCTL_CAPABLE; |
906 | SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink); |
907 | fq_cl->fcl_stat.fcl_flows_cnt++; |
908 | } |
909 | } |
910 | |
911 | /* |
912 | * If getq time is not set because this is the first packet or after |
913 | * idle time, set it now so that we can detect a stall. |
914 | */ |
915 | if (fq != NULL && fq->fq_getqtime == 0) |
916 | fq->fq_getqtime = now; |
917 | |
918 | return (fq); |
919 | } |
920 | |
921 | void |
922 | fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq) |
923 | { |
924 | u_int8_t hash_id; |
925 | hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash); |
926 | SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq, |
927 | fq_hashlink); |
928 | fq_cl->fcl_stat.fcl_flows_cnt--; |
929 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
930 | fq_destroy(fq); |
931 | |
932 | } |
933 | |
934 | inline boolean_t |
935 | fq_if_at_drop_limit(fq_if_t *fqs) |
936 | { |
937 | return (((IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ? |
938 | TRUE : FALSE)); |
939 | } |
940 | |
941 | static void |
942 | fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq, |
943 | bool remove_hash) |
944 | { |
945 | /* |
946 | * Remove the flow queue if it is empty |
947 | * and delete it |
948 | */ |
949 | STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq, |
950 | fq_actlink); |
951 | fq->fq_flags &= ~FQF_OLD_FLOW; |
952 | fq_cl->fcl_stat.fcl_oldflows_cnt--; |
953 | VERIFY(fq->fq_bytes == 0); |
954 | |
955 | if (remove_hash) { |
956 | /* Remove from the hash list */ |
957 | fq_if_destroy_flow(fqs, fq_cl, fq); |
958 | } |
959 | } |
960 | |
961 | static void |
962 | fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, bool add_to_old) |
963 | { |
964 | /* Move to the end of old queue list */ |
965 | STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq, |
966 | flowq, fq_actlink); |
967 | fq->fq_flags &= ~FQF_NEW_FLOW; |
968 | fq_cl->fcl_stat.fcl_newflows_cnt--; |
969 | |
970 | if (add_to_old) { |
971 | STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq, |
972 | fq_actlink); |
973 | fq->fq_flags |= FQF_OLD_FLOW; |
974 | fq_cl->fcl_stat.fcl_oldflows_cnt++; |
975 | } |
976 | } |
977 | |
978 | inline void |
979 | fq_if_drop_packet(fq_if_t *fqs) |
980 | { |
981 | fq_t *fq = fqs->fqs_large_flow; |
982 | fq_if_classq_t *fq_cl; |
983 | pktsched_pkt_t pkt; |
984 | uint32_t *pkt_flags; |
985 | uint64_t *pkt_timestamp; |
986 | |
987 | if (fq == NULL) |
988 | return; |
989 | /* queue can not be empty on the largest flow */ |
990 | VERIFY(!fq_empty(fq)); |
991 | |
992 | fq_cl = &fqs->fqs_classq[fq->fq_sc_index]; |
993 | _PKTSCHED_PKT_INIT(&pkt); |
994 | (void)fq_getq_flow_internal(fqs, fq, &pkt); |
995 | |
996 | pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL, |
997 | NULL, NULL); |
998 | |
999 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
1000 | *pkt_timestamp = 0; |
1001 | if (pkt.pktsched_ptype == QP_MBUF) |
1002 | *pkt_flags &= ~PKTF_PRIV_GUARDED; |
1003 | |
1004 | if (fq_empty(fq)) { |
1005 | fqs->fqs_large_flow = NULL; |
1006 | if (fq->fq_flags & FQF_OLD_FLOW) { |
1007 | fq_if_empty_old_flow(fqs, fq_cl, fq, true); |
1008 | } else { |
1009 | VERIFY(fq->fq_flags & FQF_NEW_FLOW); |
1010 | fq_if_empty_new_flow(fq, fq_cl, true); |
1011 | } |
1012 | } |
1013 | IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt)); |
1014 | |
1015 | pktsched_free_pkt(&pkt); |
1016 | fq_cl->fcl_stat.fcl_drop_overflow++; |
1017 | } |
1018 | |
1019 | inline void |
1020 | fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq) |
1021 | { |
1022 | fq_t *prev_fq; |
1023 | |
1024 | if (fqs->fqs_large_flow != NULL && |
1025 | fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) |
1026 | fqs->fqs_large_flow = NULL; |
1027 | |
1028 | if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) |
1029 | return; |
1030 | |
1031 | prev_fq = fqs->fqs_large_flow; |
1032 | if (prev_fq == NULL) { |
1033 | if (!fq_empty(fq)) |
1034 | fqs->fqs_large_flow = fq; |
1035 | return; |
1036 | } else if (fq->fq_bytes > prev_fq->fq_bytes) { |
1037 | fqs->fqs_large_flow = fq; |
1038 | } |
1039 | } |
1040 | |
1041 | boolean_t |
1042 | fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t flowid, |
1043 | uint8_t flowsrc, fq_if_classq_t *fq_cl) |
1044 | { |
1045 | struct flowadv_fcentry *fce; |
1046 | |
1047 | STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) { |
1048 | if ((uint8_t)fce->fce_flowsrc_type == flowsrc && |
1049 | fce->fce_flowid == flowid) { |
1050 | /* Already on flowcontrol list */ |
1051 | return (TRUE); |
1052 | } |
1053 | } |
1054 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
1055 | fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK); |
1056 | if (fce != NULL) { |
1057 | /* XXX Add number of bytes in the queue */ |
1058 | STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link); |
1059 | fq_cl->fcl_stat.fcl_flow_control++; |
1060 | } |
1061 | return ((fce != NULL) ? TRUE : FALSE); |
1062 | } |
1063 | |
1064 | void |
1065 | fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl) |
1066 | { |
1067 | struct flowadv_fcentry *fce = NULL; |
1068 | |
1069 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
1070 | STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) { |
1071 | if (fce->fce_flowid == fq->fq_flowhash) |
1072 | break; |
1073 | } |
1074 | if (fce != NULL) { |
1075 | STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, |
1076 | fce_link); |
1077 | STAILQ_NEXT(fce, fce_link) = NULL; |
1078 | flowadv_add_entry(fce); |
1079 | fq_cl->fcl_stat.fcl_flow_feedback++; |
1080 | } |
1081 | fq->fq_flags &= ~FQF_FLOWCTL_ON; |
1082 | } |
1083 | |
1084 | void |
1085 | fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, u_int32_t pktlimit, |
1086 | u_int32_t bytelimit, void **top, void **tail, |
1087 | u_int32_t *retpktcnt, u_int32_t *retbytecnt, boolean_t drvmgmt, |
1088 | classq_pkt_type_t *ptype) |
1089 | { |
1090 | fq_t *fq = NULL, *tfq = NULL; |
1091 | flowq_stailq_t temp_stailq; |
1092 | u_int32_t pktcnt, bytecnt; |
1093 | boolean_t qempty, limit_reached = FALSE; |
1094 | void *last = NULL; |
1095 | fq_getq_flow_t fq_getq_flow_fn; |
1096 | |
1097 | switch (fqs->fqs_ptype) { |
1098 | case QP_MBUF: |
1099 | fq_getq_flow_fn = fq_getq_flow_mbuf; |
1100 | break; |
1101 | |
1102 | |
1103 | default: |
1104 | VERIFY(0); |
1105 | /* NOTREACHED */ |
1106 | } |
1107 | |
1108 | /* |
1109 | * maximum byte limit should not be greater than the budget for |
1110 | * this class |
1111 | */ |
1112 | if ((int32_t)bytelimit > fq_cl->fcl_budget && !drvmgmt) |
1113 | bytelimit = fq_cl->fcl_budget; |
1114 | |
1115 | VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL); |
1116 | |
1117 | *top = NULL; |
1118 | *ptype = fqs->fqs_ptype; |
1119 | pktcnt = bytecnt = 0; |
1120 | STAILQ_INIT(&temp_stailq); |
1121 | |
1122 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) { |
1123 | ASSERT((fq->fq_flags & (FQF_NEW_FLOW|FQF_OLD_FLOW)) == |
1124 | FQF_NEW_FLOW); |
1125 | |
1126 | limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit, |
1127 | pktlimit, top, &last, &bytecnt, &pktcnt, &qempty, |
1128 | PKTF_NEW_FLOW); |
1129 | |
1130 | if (fq->fq_deficit <= 0 || qempty) |
1131 | fq_if_empty_new_flow(fq, fq_cl, true); |
1132 | fq->fq_deficit += fq_cl->fcl_quantum; |
1133 | if (limit_reached) |
1134 | goto done; |
1135 | } |
1136 | |
1137 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) { |
1138 | VERIFY((fq->fq_flags & (FQF_NEW_FLOW|FQF_OLD_FLOW)) == |
1139 | FQF_OLD_FLOW); |
1140 | |
1141 | limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit, |
1142 | pktlimit, top, &last, &bytecnt, &pktcnt, &qempty, 0); |
1143 | |
1144 | if (qempty) { |
1145 | fq_if_empty_old_flow(fqs, fq_cl, fq, true); |
1146 | } else if (fq->fq_deficit <= 0) { |
1147 | STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, |
1148 | flowq, fq_actlink); |
1149 | /* |
1150 | * Move to the end of the old queues list. We do not |
1151 | * need to update the flow count since this flow |
1152 | * will be added to the tail again |
1153 | */ |
1154 | STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink); |
1155 | fq->fq_deficit += fq_cl->fcl_quantum; |
1156 | } |
1157 | if (limit_reached) |
1158 | break; |
1159 | } |
1160 | |
1161 | done: |
1162 | if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) { |
1163 | STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq); |
1164 | } else if (!STAILQ_EMPTY(&temp_stailq)) { |
1165 | fq_cl->fcl_old_flows = temp_stailq; |
1166 | } |
1167 | |
1168 | if (last != NULL) { |
1169 | VERIFY(*top != NULL); |
1170 | if (tail != NULL) |
1171 | *tail = last; |
1172 | if (retpktcnt != NULL) |
1173 | *retpktcnt = pktcnt; |
1174 | if (retbytecnt != NULL) |
1175 | *retbytecnt = bytecnt; |
1176 | } |
1177 | } |
1178 | |
1179 | int |
1180 | fq_if_teardown_ifclassq(struct ifclassq *ifq) |
1181 | { |
1182 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; |
1183 | |
1184 | IFCQ_LOCK_ASSERT_HELD(ifq); |
1185 | VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL); |
1186 | |
1187 | fq_if_destroy(fqs); |
1188 | ifq->ifcq_disc = NULL; |
1189 | return (ifclassq_detach(ifq)); |
1190 | } |
1191 | |
1192 | static void |
1193 | fq_export_flowstats(fq_if_t *fqs, fq_t *fq, |
1194 | struct fq_codel_flowstats *flowstat) |
1195 | { |
1196 | bzero(flowstat, sizeof (*flowstat)); |
1197 | flowstat->fqst_min_qdelay = fq->fq_min_qdelay; |
1198 | flowstat->fqst_bytes = fq->fq_bytes; |
1199 | flowstat->fqst_flowhash = fq->fq_flowhash; |
1200 | if (fq->fq_flags & FQF_NEW_FLOW) |
1201 | flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW; |
1202 | if (fq->fq_flags & FQF_OLD_FLOW) |
1203 | flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW; |
1204 | if (fq->fq_flags & FQF_DELAY_HIGH) |
1205 | flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH; |
1206 | if (fq->fq_flags & FQF_FLOWCTL_ON) |
1207 | flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON; |
1208 | if (fqs->fqs_large_flow == fq) |
1209 | flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW; |
1210 | } |
1211 | |
1212 | int |
1213 | fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid, |
1214 | struct if_ifclassq_stats *ifqs) |
1215 | { |
1216 | struct fq_codel_classstats *fcls; |
1217 | fq_if_classq_t *fq_cl; |
1218 | fq_if_t *fqs; |
1219 | fq_t *fq = NULL; |
1220 | u_int32_t i, flowstat_cnt; |
1221 | |
1222 | if (qid >= FQ_IF_MAX_CLASSES) |
1223 | return (EINVAL); |
1224 | |
1225 | fqs = (fq_if_t *)ifq->ifcq_disc; |
1226 | fcls = &ifqs->ifqs_fq_codel_stats; |
1227 | |
1228 | fq_cl = &fqs->fqs_classq[qid]; |
1229 | |
1230 | fcls->fcls_pri = fq_cl->fcl_pri; |
1231 | fcls->fcls_service_class = fq_cl->fcl_service_class; |
1232 | fcls->fcls_quantum = fq_cl->fcl_quantum; |
1233 | fcls->fcls_drr_max = fq_cl->fcl_drr_max; |
1234 | fcls->fcls_budget = fq_cl->fcl_budget; |
1235 | fcls->fcls_target_qdelay = fqs->fqs_target_qdelay; |
1236 | fcls->fcls_update_interval = fqs->fqs_update_interval; |
1237 | fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control; |
1238 | fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback; |
1239 | fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall; |
1240 | fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow; |
1241 | fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early; |
1242 | fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure; |
1243 | fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt; |
1244 | fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt; |
1245 | fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt; |
1246 | fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt; |
1247 | fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail; |
1248 | fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail; |
1249 | fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue; |
1250 | fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes; |
1251 | fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt; |
1252 | fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on; |
1253 | fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off; |
1254 | fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops; |
1255 | fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts; |
1256 | |
1257 | /* Gather per flow stats */ |
1258 | flowstat_cnt = min((fcls->fcls_newflows_cnt + |
1259 | fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS); |
1260 | i = 0; |
1261 | STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) { |
1262 | if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) |
1263 | break; |
1264 | |
1265 | /* leave space for a few old flows */ |
1266 | if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt && |
1267 | i >= (FQ_IF_MAX_FLOWSTATS >> 1)) |
1268 | break; |
1269 | fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]); |
1270 | i++; |
1271 | } |
1272 | STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) { |
1273 | if (i >= flowstat_cnt) |
1274 | break; |
1275 | fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]); |
1276 | i++; |
1277 | } |
1278 | VERIFY(i <= flowstat_cnt); |
1279 | fcls->fcls_flowstats_cnt = i; |
1280 | return (0); |
1281 | } |
1282 | |