1 | /* |
2 | * Copyright (c) 2016-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #ifndef _NET_PKTSCHED_FQ_CODEL_H_ |
30 | #define _NET_PKTSCHED_FQ_CODEL_H_ |
31 | |
32 | #ifdef PRIVATE |
33 | #include <sys/types.h> |
34 | #include <sys/param.h> |
35 | |
36 | #ifdef BSD_KERNEL_PRIVATE |
37 | #include <net/flowadv.h> |
38 | #include <net/pktsched/pktsched.h> |
39 | #endif /* BSD_KERNEL_PRIVATE */ |
40 | |
41 | #ifdef __cplusplus |
42 | extern "C" { |
43 | #endif |
44 | |
45 | #ifdef BSD_KERNEL_PRIVATE |
46 | struct fcl_stat { |
47 | u_int32_t fcl_flow_control; |
48 | u_int32_t fcl_flow_feedback; |
49 | u_int32_t fcl_dequeue_stall; |
50 | u_int32_t fcl_flow_control_fail; |
51 | u_int64_t fcl_drop_overflow; |
52 | u_int64_t fcl_drop_early; |
53 | u_int32_t fcl_drop_memfailure; |
54 | u_int32_t fcl_flows_cnt; |
55 | u_int32_t fcl_newflows_cnt; |
56 | u_int32_t fcl_oldflows_cnt; |
57 | u_int64_t fcl_pkt_cnt; |
58 | u_int64_t fcl_dequeue; |
59 | u_int64_t fcl_dequeue_bytes; |
60 | u_int64_t fcl_byte_cnt; |
61 | u_int32_t fcl_throttle_on; |
62 | u_int32_t fcl_throttle_off; |
63 | u_int32_t fcl_throttle_drops; |
64 | u_int32_t fcl_dup_rexmts; |
65 | u_int32_t fcl_pkts_compressible; |
66 | u_int32_t fcl_pkts_compressed; |
67 | uint64_t fcl_min_qdelay; |
68 | uint64_t fcl_max_qdelay; |
69 | uint64_t fcl_avg_qdelay; |
70 | uint32_t fcl_overwhelming; |
71 | uint64_t fcl_ce_marked; |
72 | uint64_t fcl_ce_reported; |
73 | uint64_t fcl_ce_mark_failures; |
74 | uint64_t fcl_l4s_pkts; |
75 | uint64_t fcl_ignore_tx_time; |
76 | uint64_t fcl_paced_pkts; |
77 | uint64_t fcl_fcl_pacemaker_needed; |
78 | }; |
79 | |
80 | /* |
81 | * Use 8 bits from the flow id as the tag for set associative |
82 | * hashing |
83 | * NOTE: The first 2 bits of the flow id is being used to encode the flow |
84 | * domain information, so don't use the top 8 bits as it won't have a uniform |
85 | * distribution. |
86 | */ |
87 | |
88 | #define FQ_IF_HASH_TAG_SIZE 8 |
89 | #define FQ_IF_HASH_TAG_SHIFT 16 |
90 | #define FQ_IF_HASH_TAG_MASK 0xFF |
91 | #define FQ_IF_HASH_TABLE_SIZE (1 << FQ_IF_HASH_TAG_SIZE) |
92 | |
93 | /* Set the quantum to be one MTU */ |
94 | #define FQ_IF_DEFAULT_QUANTUM 1500 |
95 | |
96 | /* Max number of service classes currently supported */ |
97 | #define FQ_IF_MAX_CLASSES 10 |
98 | _Static_assert(FQ_IF_MAX_CLASSES < 127, |
99 | "maximum number of classes needs to fit in a single byte" ); |
100 | |
101 | #define FQ_IF_LARGE_FLOW_BYTE_LIMIT 15000 |
102 | |
103 | /* Max number of classq groups currently supported */ |
104 | #define FQ_IF_MAX_GROUPS 16 |
105 | |
106 | typedef enum : uint8_t { |
107 | FQ_TFC_C = 0, /* classic traffic */ |
108 | FQ_TFC_L4S = 1, /* L4S traffic */ |
109 | FQ_TFC_CNT = 2, |
110 | } fq_tfc_type_t; |
111 | |
112 | struct flowq; |
113 | typedef u_int32_t pktsched_bitmap_t; |
114 | struct if_ifclassq_stats; |
115 | |
116 | typedef enum : uint8_t { |
117 | FQ_IF_ER = 0, /* eligible, ready */ |
118 | FQ_IF_IR = 1, /* ineligible, ready */ |
119 | FQ_IF_EB = 2, /* eligible blocked */ |
120 | FQ_IF_IB = 3, /* ineligible, blocked */ |
121 | FQ_IF_MAX_STATE |
122 | } fq_if_state; |
123 | |
124 | /* |
125 | * This priority index is used for QFQ state bitmaps, lower index gets |
126 | * higher priority |
127 | */ |
128 | #define FQ_IF_BK_SYS_INDEX 9 |
129 | #define FQ_IF_BK_INDEX 8 |
130 | #define FQ_IF_BE_INDEX 7 |
131 | #define FQ_IF_RD_INDEX 6 |
132 | #define FQ_IF_OAM_INDEX 5 |
133 | #define FQ_IF_AV_INDEX 4 |
134 | #define FQ_IF_RV_INDEX 3 |
135 | #define FQ_IF_VI_INDEX 2 |
136 | #define FQ_IF_SIG_INDEX 2 |
137 | #define FQ_IF_VO_INDEX 1 |
138 | #define FQ_IF_CTL_INDEX 0 |
139 | |
140 | typedef SLIST_HEAD(, flowq) flowq_list_t; |
141 | typedef STAILQ_HEAD(, flowq) flowq_stailq_t; |
142 | typedef struct fq_if_classq { |
143 | uint32_t fcl_pri; /* class priority, lower the better */ |
144 | uint32_t fcl_service_class; /* service class */ |
145 | uint32_t fcl_quantum; /* quantum in bytes */ |
146 | uint32_t fcl_drr_max; /* max flows per class for DRR */ |
147 | int64_t fcl_budget; /* budget for this classq */ |
148 | uint64_t fcl_next_tx_time; /* next time a packet is ready */ |
149 | flowq_stailq_t fcl_new_flows; /* List of new flows */ |
150 | flowq_stailq_t fcl_old_flows; /* List of old flows */ |
151 | struct fcl_stat fcl_stat; |
152 | #define FCL_PACED 0x1 |
153 | uint8_t fcl_flags; |
154 | } fq_if_classq_t; |
155 | typedef struct fq_codel_classq_group { |
156 | /* Target queue delays (ns) */ |
157 | uint64_t fqg_target_qdelays[FQ_TFC_CNT]; |
158 | /* update intervals (ns) */ |
159 | uint64_t fqg_update_intervals[FQ_TFC_CNT]; |
160 | /* classq bitmaps */ |
161 | pktsched_bitmap_t fqg_bitmaps[FQ_IF_MAX_STATE]; |
162 | TAILQ_ENTRY(fq_codel_classq_group) fqg_grp_link; |
163 | uint32_t fqg_bytes; /* bytes count */ |
164 | uint32_t fqg_len; /* pkts count */ |
165 | uint8_t fqg_flags; /* flags */ |
166 | #define FQ_IF_DEFAULT_GRP 0x1 |
167 | uint8_t fqg_index; /* group index */ |
168 | fq_if_classq_t fqg_classq[FQ_IF_MAX_CLASSES]; /* class queues */ |
169 | struct flowq *fqg_large_flow; /* flow has highest number of bytes */ |
170 | } fq_if_group_t; |
171 | |
172 | #define FQG_LEN(_fqg) ((_fqg)->fqg_len) |
173 | #define FQG_IS_EMPTY(_fqg) (FQG_LEN(_fqg) == 0) |
174 | #define FQG_INC_LEN(_fqg) (FQG_LEN(_fqg)++) |
175 | #define FQG_DEC_LEN(_fqg) (FQG_LEN(_fqg)--) |
176 | #define FQG_ADD_LEN(_fqg, _len) (FQG_LEN(_fqg) += (_len)) |
177 | #define FQG_SUB_LEN(_fqg, _len) (FQG_LEN(_fqg) -= (_len)) |
178 | #define FQG_BYTES(_fqg) ((_fqg)->fqg_bytes) |
179 | |
180 | #define FQG_INC_BYTES(_fqg, _len) \ |
181 | ((_fqg)->fqg_bytes = (_fqg)->fqg_bytes + (_len)) |
182 | #define FQG_DEC_BYTES(_fqg, _len) \ |
183 | ((_fqg)->fqg_bytes = (_fqg)->fqg_bytes - (_len)) |
184 | |
185 | typedef TAILQ_HEAD(, fq_codel_classq_group) fq_grp_tailq_t; |
186 | |
187 | typedef int (* fq_if_bitmaps_ffs)(fq_grp_tailq_t *, int, fq_if_state, fq_if_group_t **); |
188 | typedef boolean_t (* fq_if_bitmaps_zeros)(fq_grp_tailq_t *, int, fq_if_state); |
189 | typedef void (* fq_if_bitmaps_cpy)(fq_grp_tailq_t *, int, fq_if_state, fq_if_state); |
190 | typedef void (* fq_if_bitmaps_clr)(fq_grp_tailq_t *, int, fq_if_state); |
191 | typedef void (* fq_if_bitmaps_move)(fq_grp_tailq_t *, int, fq_if_state, fq_if_state); |
192 | |
193 | /* |
194 | * Functions that are used to look at groups' |
195 | * bitmaps and decide which pri and group are the |
196 | * next one to dequeue from. |
197 | */ |
198 | typedef struct fq_if_bitmap_ops { |
199 | fq_if_bitmaps_ffs ffs; |
200 | fq_if_bitmaps_zeros zeros; |
201 | fq_if_bitmaps_cpy cpy; |
202 | fq_if_bitmaps_clr clr; |
203 | fq_if_bitmaps_move move; |
204 | } bitmap_ops_t; |
205 | |
206 | typedef struct fq_codel_sched_data { |
207 | struct ifclassq *fqs_ifq; /* back pointer to ifclassq */ |
208 | flowq_list_t fqs_flows[FQ_IF_HASH_TABLE_SIZE]; /* flows table */ |
209 | uint32_t fqs_pkt_droplimit; /* drop limit */ |
210 | uint8_t fqs_throttle; /* throttle on or off */ |
211 | uint8_t fqs_flags; /* flags */ |
212 | #define FQS_DRIVER_MANAGED 0x1 |
213 | struct flowadv_fclist fqs_fclist; /* flow control state */ |
214 | struct flowq *fqs_large_flow; /* flow has highest number of bytes */ |
215 | TAILQ_HEAD(, flowq) fqs_empty_list; /* list of empty flows */ |
216 | /* list of groups in combined mode */ |
217 | fq_grp_tailq_t fqs_combined_grp_list; |
218 | uint32_t fqs_empty_list_cnt; |
219 | /* bitmap indicating which grp is in combined mode */ |
220 | pktsched_bitmap_t fqs_combined_grp_bitmap; |
221 | classq_pkt_type_t fqs_ptype; |
222 | thread_call_t fqs_pacemaker_tcall; |
223 | bitmap_ops_t *fqs_bm_ops; |
224 | #define grp_bitmaps_ffs fqs_bm_ops->ffs |
225 | #define grp_bitmaps_zeros fqs_bm_ops->zeros |
226 | #define grp_bitmaps_cpy fqs_bm_ops->cpy |
227 | #define grp_bitmaps_clr fqs_bm_ops->clr |
228 | #define grp_bitmaps_move fqs_bm_ops->move |
229 | fq_if_group_t *fqs_classq_groups[FQ_IF_MAX_GROUPS]; |
230 | } fq_if_t; |
231 | |
232 | #define FQS_GROUP(_fqs, _group_idx) \ |
233 | (fq_if_find_grp((_fqs), (_group_idx))) |
234 | |
235 | #define FQS_CLASSQ(_fqs, _group_idx, _sc_idx) \ |
236 | (FQS_GROUP((_fqs), (_group_idx))->fqg_classq[_sc_idx]) |
237 | |
238 | #define FQ_GROUP(_fq) \ |
239 | ((_fq)->fq_group) |
240 | |
241 | #define FQ_GRP_LEN(_fq) \ |
242 | (FQ_GROUP((_fq))->fqg_len) |
243 | #define FQ_GRP_IS_EMPTY(_fq) \ |
244 | (FQ_GRP_LEN((_fq)) == 0) |
245 | #define FQ_GRP_INC_LEN(_fq) \ |
246 | (FQ_GRP_LEN((_fq))++) |
247 | #define FQ_GRP_DEC_LEN(_fq) \ |
248 | (FQ_GRP_LEN((_fq))--) |
249 | #define FQ_GRP_ADD_LEN(_fq, _len) \ |
250 | (FQ_GRP_LEN((_fq)) += (_len)) |
251 | #define FQ_GRP_SUB_LEN(_fq, _len) \ |
252 | (FQ_GRP_LEN((_fq)) -= (_len)) |
253 | |
254 | #define FQS_GRP_ADD_LEN(_fqs, _grp_idx, _len) \ |
255 | (FQS_GROUP(_fqs, grp_idx)->fqg_len += (_len)) |
256 | |
257 | |
258 | #define FQ_GRP_BYTES(_fq) \ |
259 | (FQ_GROUP((_fq))->fqg_bytes) |
260 | #define FQ_GRP_INC_BYTES(_fq, _len) \ |
261 | (FQ_GRP_BYTES((_fq)) += (_len)) |
262 | #define FQ_GRP_DEC_BYTES(_fq, _len) \ |
263 | (FQ_GRP_BYTES((_fq)) -= (_len)) |
264 | |
265 | #define FQS_GRP_INC_BYTES(_fqs, grp_idx, _len) \ |
266 | (FQS_GROUP(_fqs, grp_idx)->fqg_bytes += (_len)) |
267 | |
268 | #define FQ_CLASSQ(_fq) \ |
269 | (FQ_GROUP((_fq))->fqg_classq[(_fq)->fq_sc_index]) |
270 | |
271 | #define FQ_TARGET_DELAY(_fq) \ |
272 | (FQ_GROUP((_fq))->fqg_target_qdelays[(_fq)->fq_tfc_type]) |
273 | #define FQ_UPDATE_INTERVAL(_fq) \ |
274 | (FQ_GROUP((_fq))->fqg_update_intervals[(_fq)->fq_tfc_type]) |
275 | |
276 | #endif /* BSD_KERNEL_PRIVATE */ |
277 | |
278 | struct fq_codel_flowstats { |
279 | u_int32_t fqst_min_qdelay; |
280 | #define FQ_FLOWSTATS_OLD_FLOW 0x1 |
281 | #define FQ_FLOWSTATS_NEW_FLOW 0x2 |
282 | #define FQ_FLOWSTATS_LARGE_FLOW 0x4 |
283 | #define FQ_FLOWSTATS_DELAY_HIGH 0x8 |
284 | #define FQ_FLOWSTATS_FLOWCTL_ON 0x10 |
285 | u_int32_t fqst_flags; |
286 | u_int32_t fqst_bytes; |
287 | u_int32_t fqst_flowhash; |
288 | }; |
289 | |
290 | #define FQ_IF_MAX_FLOWSTATS 20 |
291 | #define FQ_IF_STATS_MAX_GROUPS 16 |
292 | |
293 | struct fq_codel_classstats { |
294 | u_int32_t fcls_pri; |
295 | u_int32_t fcls_service_class; |
296 | u_int32_t fcls_quantum; |
297 | u_int32_t fcls_drr_max; |
298 | int64_t fcls_budget; |
299 | u_int64_t fcls_target_qdelay; |
300 | u_int64_t fcls_l4s_target_qdelay; |
301 | u_int64_t fcls_update_interval; |
302 | u_int32_t fcls_flow_control; |
303 | u_int32_t fcls_flow_feedback; |
304 | u_int32_t fcls_dequeue_stall; |
305 | u_int32_t fcls_flow_control_fail; |
306 | u_int64_t fcls_drop_overflow; |
307 | u_int64_t fcls_drop_early; |
308 | u_int32_t fcls_drop_memfailure; |
309 | u_int32_t fcls_flows_cnt; |
310 | u_int32_t fcls_newflows_cnt; |
311 | u_int32_t fcls_oldflows_cnt; |
312 | u_int64_t fcls_pkt_cnt; |
313 | u_int64_t fcls_dequeue; |
314 | u_int64_t fcls_dequeue_bytes; |
315 | u_int64_t fcls_byte_cnt; |
316 | u_int32_t fcls_throttle_on; |
317 | u_int32_t fcls_throttle_off; |
318 | u_int32_t fcls_throttle_drops; |
319 | u_int32_t fcls_dup_rexmts; |
320 | u_int32_t fcls_flowstats_cnt; |
321 | struct fq_codel_flowstats fcls_flowstats[FQ_IF_MAX_FLOWSTATS]; |
322 | u_int32_t fcls_pkts_compressible; |
323 | u_int32_t fcls_pkts_compressed; |
324 | uint64_t fcls_min_qdelay; |
325 | uint64_t fcls_max_qdelay; |
326 | uint64_t fcls_avg_qdelay; |
327 | uint32_t fcls_overwhelming; |
328 | uint64_t fcls_ce_marked; |
329 | uint64_t fcls_ce_reported; |
330 | uint64_t fcls_ce_mark_failures; |
331 | uint64_t fcls_l4s_pkts; |
332 | uint64_t fcls_ignore_tx_time; |
333 | uint64_t fcls_paced_pkts; |
334 | uint64_t fcls_fcl_pacing_needed; |
335 | }; |
336 | |
337 | #ifdef BSD_KERNEL_PRIVATE |
338 | |
339 | _Static_assert(FQ_IF_STATS_MAX_GROUPS == FQ_IF_MAX_GROUPS, |
340 | "max group counts do not match" ); |
341 | |
342 | extern void pktsched_fq_init(void); |
343 | extern void fq_codel_scheduler_init(void); |
344 | extern int fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *h, |
345 | classq_pkt_t *t, uint32_t cnt, uint32_t bytes, boolean_t *pdrop); |
346 | extern void fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt, |
347 | uint8_t grp_idx); |
348 | extern void fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc, |
349 | classq_pkt_t *pkt, uint8_t grp_idx); |
350 | extern int fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt, |
351 | u_int32_t maxbytecnt, classq_pkt_t *first_packet, classq_pkt_t *last_packet, |
352 | u_int32_t *retpktcnt, u_int32_t *retbytecnt, uint8_t grp_idx); |
353 | extern int fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, |
354 | mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt, |
355 | classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt, |
356 | u_int32_t *retbytecnt, uint8_t grp_idx); |
357 | extern int fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg); |
358 | extern struct flowq *fq_if_hash_pkt(fq_if_t *, fq_if_group_t *, |
359 | u_int32_t, mbuf_svc_class_t, u_int64_t, bool, fq_tfc_type_t); |
360 | extern boolean_t fq_if_at_drop_limit(fq_if_t *); |
361 | extern boolean_t fq_if_almost_at_drop_limit(fq_if_t *fqs); |
362 | extern void fq_if_drop_packet(fq_if_t *, uint64_t); |
363 | extern void fq_if_is_flow_heavy(fq_if_t *, struct flowq *); |
364 | extern boolean_t fq_if_add_fcentry(fq_if_t *, pktsched_pkt_t *, uint8_t, |
365 | struct flowq *, fq_if_classq_t *); |
366 | extern void fq_if_flow_feedback(fq_if_t *, struct flowq *, fq_if_classq_t *); |
367 | extern boolean_t fq_if_report_ce(fq_if_t *, pktsched_pkt_t *, uint32_t, uint32_t); |
368 | extern int fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags, |
369 | classq_pkt_type_t ptype); |
370 | extern void fq_if_teardown_ifclassq(struct ifclassq *ifq); |
371 | extern int fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid, |
372 | u_int32_t qid, struct if_ifclassq_stats *ifqs); |
373 | extern void fq_if_destroy_flow(fq_if_t *, fq_if_classq_t *, struct flowq *); |
374 | extern void fq_if_move_to_empty_flow(fq_if_t *, fq_if_classq_t *, |
375 | struct flowq *, uint64_t); |
376 | extern int fq_if_create_grp(struct ifclassq *ifcq, uint8_t qset_idx, uint8_t flags); |
377 | extern void fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t qset_idx); |
378 | extern void fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t qset_idx); |
379 | extern fq_if_group_t *fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx); |
380 | extern boolean_t fq_if_is_all_paced(struct ifclassq *ifq); |
381 | #endif /* BSD_KERNEL_PRIVATE */ |
382 | |
383 | #ifdef __cplusplus |
384 | } |
385 | #endif |
386 | |
387 | #endif /* PRIVATE */ |
388 | #endif /* _NET_PKTSCHED_PKTSCHED_FQ_CODEL_H_ */ |
389 | |