1/*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#ifndef _NET_PKTSCHED_FQ_CODEL_H_
30#define _NET_PKTSCHED_FQ_CODEL_H_
31
32#ifdef PRIVATE
33#include <sys/types.h>
34#include <sys/param.h>
35
36#ifdef BSD_KERNEL_PRIVATE
37#include <net/flowadv.h>
38#include <net/pktsched/pktsched.h>
39#endif /* BSD_KERNEL_PRIVATE */
40
41#ifdef __cplusplus
42extern "C" {
43#endif
44
45#ifdef BSD_KERNEL_PRIVATE
46struct fcl_stat {
47 u_int32_t fcl_flow_control;
48 u_int32_t fcl_flow_feedback;
49 u_int32_t fcl_dequeue_stall;
50 u_int32_t fcl_flow_control_fail;
51 u_int64_t fcl_drop_overflow;
52 u_int64_t fcl_drop_early;
53 u_int32_t fcl_drop_memfailure;
54 u_int32_t fcl_flows_cnt;
55 u_int32_t fcl_newflows_cnt;
56 u_int32_t fcl_oldflows_cnt;
57 u_int64_t fcl_pkt_cnt;
58 u_int64_t fcl_dequeue;
59 u_int64_t fcl_dequeue_bytes;
60 u_int64_t fcl_byte_cnt;
61 u_int32_t fcl_throttle_on;
62 u_int32_t fcl_throttle_off;
63 u_int32_t fcl_throttle_drops;
64 u_int32_t fcl_dup_rexmts;
65 u_int32_t fcl_pkts_compressible;
66 u_int32_t fcl_pkts_compressed;
67 uint64_t fcl_min_qdelay;
68 uint64_t fcl_max_qdelay;
69 uint64_t fcl_avg_qdelay;
70 uint32_t fcl_overwhelming;
71 uint64_t fcl_ce_marked;
72 uint64_t fcl_ce_reported;
73 uint64_t fcl_ce_mark_failures;
74 uint64_t fcl_l4s_pkts;
75 uint64_t fcl_ignore_tx_time;
76 uint64_t fcl_paced_pkts;
77 uint64_t fcl_fcl_pacemaker_needed;
78};
79
80/*
81 * Use 8 bits from the flow id as the tag for set associative
82 * hashing
83 * NOTE: The first 2 bits of the flow id is being used to encode the flow
84 * domain information, so don't use the top 8 bits as it won't have a uniform
85 * distribution.
86 */
87
88#define FQ_IF_HASH_TAG_SIZE 8
89#define FQ_IF_HASH_TAG_SHIFT 16
90#define FQ_IF_HASH_TAG_MASK 0xFF
91#define FQ_IF_HASH_TABLE_SIZE (1 << FQ_IF_HASH_TAG_SIZE)
92
93/* Set the quantum to be one MTU */
94#define FQ_IF_DEFAULT_QUANTUM 1500
95
96/* Max number of service classes currently supported */
97#define FQ_IF_MAX_CLASSES 10
98_Static_assert(FQ_IF_MAX_CLASSES < 127,
99 "maximum number of classes needs to fit in a single byte");
100
101#define FQ_IF_LARGE_FLOW_BYTE_LIMIT 15000
102
103/* Max number of classq groups currently supported */
104#define FQ_IF_MAX_GROUPS 16
105
106typedef enum : uint8_t {
107 FQ_TFC_C = 0, /* classic traffic */
108 FQ_TFC_L4S = 1, /* L4S traffic */
109 FQ_TFC_CNT = 2,
110} fq_tfc_type_t;
111
112struct flowq;
113typedef u_int32_t pktsched_bitmap_t;
114struct if_ifclassq_stats;
115
116typedef enum : uint8_t {
117 FQ_IF_ER = 0, /* eligible, ready */
118 FQ_IF_IR = 1, /* ineligible, ready */
119 FQ_IF_EB = 2, /* eligible blocked */
120 FQ_IF_IB = 3, /* ineligible, blocked */
121 FQ_IF_MAX_STATE
122} fq_if_state;
123
124/*
125 * This priority index is used for QFQ state bitmaps, lower index gets
126 * higher priority
127 */
128#define FQ_IF_BK_SYS_INDEX 9
129#define FQ_IF_BK_INDEX 8
130#define FQ_IF_BE_INDEX 7
131#define FQ_IF_RD_INDEX 6
132#define FQ_IF_OAM_INDEX 5
133#define FQ_IF_AV_INDEX 4
134#define FQ_IF_RV_INDEX 3
135#define FQ_IF_VI_INDEX 2
136#define FQ_IF_SIG_INDEX 2
137#define FQ_IF_VO_INDEX 1
138#define FQ_IF_CTL_INDEX 0
139
140typedef SLIST_HEAD(, flowq) flowq_list_t;
141typedef STAILQ_HEAD(, flowq) flowq_stailq_t;
142typedef struct fq_if_classq {
143 uint32_t fcl_pri; /* class priority, lower the better */
144 uint32_t fcl_service_class; /* service class */
145 uint32_t fcl_quantum; /* quantum in bytes */
146 uint32_t fcl_drr_max; /* max flows per class for DRR */
147 int64_t fcl_budget; /* budget for this classq */
148 uint64_t fcl_next_tx_time; /* next time a packet is ready */
149 flowq_stailq_t fcl_new_flows; /* List of new flows */
150 flowq_stailq_t fcl_old_flows; /* List of old flows */
151 struct fcl_stat fcl_stat;
152#define FCL_PACED 0x1
153 uint8_t fcl_flags;
154} fq_if_classq_t;
155typedef struct fq_codel_classq_group {
156 /* Target queue delays (ns) */
157 uint64_t fqg_target_qdelays[FQ_TFC_CNT];
158 /* update intervals (ns) */
159 uint64_t fqg_update_intervals[FQ_TFC_CNT];
160 /* classq bitmaps */
161 pktsched_bitmap_t fqg_bitmaps[FQ_IF_MAX_STATE];
162 TAILQ_ENTRY(fq_codel_classq_group) fqg_grp_link;
163 uint32_t fqg_bytes; /* bytes count */
164 uint32_t fqg_len; /* pkts count */
165 uint8_t fqg_flags; /* flags */
166#define FQ_IF_DEFAULT_GRP 0x1
167 uint8_t fqg_index; /* group index */
168 fq_if_classq_t fqg_classq[FQ_IF_MAX_CLASSES]; /* class queues */
169 struct flowq *fqg_large_flow; /* flow has highest number of bytes */
170} fq_if_group_t;
171
172#define FQG_LEN(_fqg) ((_fqg)->fqg_len)
173#define FQG_IS_EMPTY(_fqg) (FQG_LEN(_fqg) == 0)
174#define FQG_INC_LEN(_fqg) (FQG_LEN(_fqg)++)
175#define FQG_DEC_LEN(_fqg) (FQG_LEN(_fqg)--)
176#define FQG_ADD_LEN(_fqg, _len) (FQG_LEN(_fqg) += (_len))
177#define FQG_SUB_LEN(_fqg, _len) (FQG_LEN(_fqg) -= (_len))
178#define FQG_BYTES(_fqg) ((_fqg)->fqg_bytes)
179
180#define FQG_INC_BYTES(_fqg, _len) \
181 ((_fqg)->fqg_bytes = (_fqg)->fqg_bytes + (_len))
182#define FQG_DEC_BYTES(_fqg, _len) \
183 ((_fqg)->fqg_bytes = (_fqg)->fqg_bytes - (_len))
184
185typedef TAILQ_HEAD(, fq_codel_classq_group) fq_grp_tailq_t;
186
187typedef int (* fq_if_bitmaps_ffs)(fq_grp_tailq_t *, int, fq_if_state, fq_if_group_t **);
188typedef boolean_t (* fq_if_bitmaps_zeros)(fq_grp_tailq_t *, int, fq_if_state);
189typedef void (* fq_if_bitmaps_cpy)(fq_grp_tailq_t *, int, fq_if_state, fq_if_state);
190typedef void (* fq_if_bitmaps_clr)(fq_grp_tailq_t *, int, fq_if_state);
191typedef void (* fq_if_bitmaps_move)(fq_grp_tailq_t *, int, fq_if_state, fq_if_state);
192
193/*
194 * Functions that are used to look at groups'
195 * bitmaps and decide which pri and group are the
196 * next one to dequeue from.
197 */
198typedef struct fq_if_bitmap_ops {
199 fq_if_bitmaps_ffs ffs;
200 fq_if_bitmaps_zeros zeros;
201 fq_if_bitmaps_cpy cpy;
202 fq_if_bitmaps_clr clr;
203 fq_if_bitmaps_move move;
204} bitmap_ops_t;
205
206typedef struct fq_codel_sched_data {
207 struct ifclassq *fqs_ifq; /* back pointer to ifclassq */
208 flowq_list_t fqs_flows[FQ_IF_HASH_TABLE_SIZE]; /* flows table */
209 uint32_t fqs_pkt_droplimit; /* drop limit */
210 uint8_t fqs_throttle; /* throttle on or off */
211 uint8_t fqs_flags; /* flags */
212#define FQS_DRIVER_MANAGED 0x1
213 struct flowadv_fclist fqs_fclist; /* flow control state */
214 struct flowq *fqs_large_flow; /* flow has highest number of bytes */
215 TAILQ_HEAD(, flowq) fqs_empty_list; /* list of empty flows */
216 /* list of groups in combined mode */
217 fq_grp_tailq_t fqs_combined_grp_list;
218 uint32_t fqs_empty_list_cnt;
219 /* bitmap indicating which grp is in combined mode */
220 pktsched_bitmap_t fqs_combined_grp_bitmap;
221 classq_pkt_type_t fqs_ptype;
222 thread_call_t fqs_pacemaker_tcall;
223 bitmap_ops_t *fqs_bm_ops;
224#define grp_bitmaps_ffs fqs_bm_ops->ffs
225#define grp_bitmaps_zeros fqs_bm_ops->zeros
226#define grp_bitmaps_cpy fqs_bm_ops->cpy
227#define grp_bitmaps_clr fqs_bm_ops->clr
228#define grp_bitmaps_move fqs_bm_ops->move
229 fq_if_group_t *fqs_classq_groups[FQ_IF_MAX_GROUPS];
230} fq_if_t;
231
232#define FQS_GROUP(_fqs, _group_idx) \
233 (fq_if_find_grp((_fqs), (_group_idx)))
234
235#define FQS_CLASSQ(_fqs, _group_idx, _sc_idx) \
236 (FQS_GROUP((_fqs), (_group_idx))->fqg_classq[_sc_idx])
237
238#define FQ_GROUP(_fq) \
239 ((_fq)->fq_group)
240
241#define FQ_GRP_LEN(_fq) \
242 (FQ_GROUP((_fq))->fqg_len)
243#define FQ_GRP_IS_EMPTY(_fq) \
244 (FQ_GRP_LEN((_fq)) == 0)
245#define FQ_GRP_INC_LEN(_fq) \
246 (FQ_GRP_LEN((_fq))++)
247#define FQ_GRP_DEC_LEN(_fq) \
248 (FQ_GRP_LEN((_fq))--)
249#define FQ_GRP_ADD_LEN(_fq, _len) \
250 (FQ_GRP_LEN((_fq)) += (_len))
251#define FQ_GRP_SUB_LEN(_fq, _len) \
252 (FQ_GRP_LEN((_fq)) -= (_len))
253
254#define FQS_GRP_ADD_LEN(_fqs, _grp_idx, _len) \
255 (FQS_GROUP(_fqs, grp_idx)->fqg_len += (_len))
256
257
258#define FQ_GRP_BYTES(_fq) \
259 (FQ_GROUP((_fq))->fqg_bytes)
260#define FQ_GRP_INC_BYTES(_fq, _len) \
261 (FQ_GRP_BYTES((_fq)) += (_len))
262#define FQ_GRP_DEC_BYTES(_fq, _len) \
263 (FQ_GRP_BYTES((_fq)) -= (_len))
264
265#define FQS_GRP_INC_BYTES(_fqs, grp_idx, _len) \
266 (FQS_GROUP(_fqs, grp_idx)->fqg_bytes += (_len))
267
268#define FQ_CLASSQ(_fq) \
269 (FQ_GROUP((_fq))->fqg_classq[(_fq)->fq_sc_index])
270
271#define FQ_TARGET_DELAY(_fq) \
272 (FQ_GROUP((_fq))->fqg_target_qdelays[(_fq)->fq_tfc_type])
273#define FQ_UPDATE_INTERVAL(_fq) \
274 (FQ_GROUP((_fq))->fqg_update_intervals[(_fq)->fq_tfc_type])
275
276#endif /* BSD_KERNEL_PRIVATE */
277
278struct fq_codel_flowstats {
279 u_int32_t fqst_min_qdelay;
280#define FQ_FLOWSTATS_OLD_FLOW 0x1
281#define FQ_FLOWSTATS_NEW_FLOW 0x2
282#define FQ_FLOWSTATS_LARGE_FLOW 0x4
283#define FQ_FLOWSTATS_DELAY_HIGH 0x8
284#define FQ_FLOWSTATS_FLOWCTL_ON 0x10
285 u_int32_t fqst_flags;
286 u_int32_t fqst_bytes;
287 u_int32_t fqst_flowhash;
288};
289
290#define FQ_IF_MAX_FLOWSTATS 20
291#define FQ_IF_STATS_MAX_GROUPS 16
292
293struct fq_codel_classstats {
294 u_int32_t fcls_pri;
295 u_int32_t fcls_service_class;
296 u_int32_t fcls_quantum;
297 u_int32_t fcls_drr_max;
298 int64_t fcls_budget;
299 u_int64_t fcls_target_qdelay;
300 u_int64_t fcls_l4s_target_qdelay;
301 u_int64_t fcls_update_interval;
302 u_int32_t fcls_flow_control;
303 u_int32_t fcls_flow_feedback;
304 u_int32_t fcls_dequeue_stall;
305 u_int32_t fcls_flow_control_fail;
306 u_int64_t fcls_drop_overflow;
307 u_int64_t fcls_drop_early;
308 u_int32_t fcls_drop_memfailure;
309 u_int32_t fcls_flows_cnt;
310 u_int32_t fcls_newflows_cnt;
311 u_int32_t fcls_oldflows_cnt;
312 u_int64_t fcls_pkt_cnt;
313 u_int64_t fcls_dequeue;
314 u_int64_t fcls_dequeue_bytes;
315 u_int64_t fcls_byte_cnt;
316 u_int32_t fcls_throttle_on;
317 u_int32_t fcls_throttle_off;
318 u_int32_t fcls_throttle_drops;
319 u_int32_t fcls_dup_rexmts;
320 u_int32_t fcls_flowstats_cnt;
321 struct fq_codel_flowstats fcls_flowstats[FQ_IF_MAX_FLOWSTATS];
322 u_int32_t fcls_pkts_compressible;
323 u_int32_t fcls_pkts_compressed;
324 uint64_t fcls_min_qdelay;
325 uint64_t fcls_max_qdelay;
326 uint64_t fcls_avg_qdelay;
327 uint32_t fcls_overwhelming;
328 uint64_t fcls_ce_marked;
329 uint64_t fcls_ce_reported;
330 uint64_t fcls_ce_mark_failures;
331 uint64_t fcls_l4s_pkts;
332 uint64_t fcls_ignore_tx_time;
333 uint64_t fcls_paced_pkts;
334 uint64_t fcls_fcl_pacing_needed;
335};
336
337#ifdef BSD_KERNEL_PRIVATE
338
339_Static_assert(FQ_IF_STATS_MAX_GROUPS == FQ_IF_MAX_GROUPS,
340 "max group counts do not match");
341
342extern void pktsched_fq_init(void);
343extern void fq_codel_scheduler_init(void);
344extern int fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *h,
345 classq_pkt_t *t, uint32_t cnt, uint32_t bytes, boolean_t *pdrop);
346extern void fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt,
347 uint8_t grp_idx);
348extern void fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
349 classq_pkt_t *pkt, uint8_t grp_idx);
350extern int fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
351 u_int32_t maxbytecnt, classq_pkt_t *first_packet, classq_pkt_t *last_packet,
352 u_int32_t *retpktcnt, u_int32_t *retbytecnt, uint8_t grp_idx);
353extern int fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq,
354 mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt,
355 classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt,
356 u_int32_t *retbytecnt, uint8_t grp_idx);
357extern int fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg);
358extern struct flowq *fq_if_hash_pkt(fq_if_t *, fq_if_group_t *,
359 u_int32_t, mbuf_svc_class_t, u_int64_t, bool, fq_tfc_type_t);
360extern boolean_t fq_if_at_drop_limit(fq_if_t *);
361extern boolean_t fq_if_almost_at_drop_limit(fq_if_t *fqs);
362extern void fq_if_drop_packet(fq_if_t *, uint64_t);
363extern void fq_if_is_flow_heavy(fq_if_t *, struct flowq *);
364extern boolean_t fq_if_add_fcentry(fq_if_t *, pktsched_pkt_t *, uint8_t,
365 struct flowq *, fq_if_classq_t *);
366extern void fq_if_flow_feedback(fq_if_t *, struct flowq *, fq_if_classq_t *);
367extern boolean_t fq_if_report_ce(fq_if_t *, pktsched_pkt_t *, uint32_t, uint32_t);
368extern int fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
369 classq_pkt_type_t ptype);
370extern void fq_if_teardown_ifclassq(struct ifclassq *ifq);
371extern int fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid,
372 u_int32_t qid, struct if_ifclassq_stats *ifqs);
373extern void fq_if_destroy_flow(fq_if_t *, fq_if_classq_t *, struct flowq *);
374extern void fq_if_move_to_empty_flow(fq_if_t *, fq_if_classq_t *,
375 struct flowq *, uint64_t);
376extern int fq_if_create_grp(struct ifclassq *ifcq, uint8_t qset_idx, uint8_t flags);
377extern void fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t qset_idx);
378extern void fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t qset_idx);
379extern fq_if_group_t *fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx);
380extern boolean_t fq_if_is_all_paced(struct ifclassq *ifq);
381#endif /* BSD_KERNEL_PRIVATE */
382
383#ifdef __cplusplus
384}
385#endif
386
387#endif /* PRIVATE */
388#endif /* _NET_PKTSCHED_PKTSCHED_FQ_CODEL_H_ */
389