1/*
2 * Copyright (c) 2009-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/types.h>
32#include <sys/filedesc.h>
33#include <sys/file_internal.h>
34#include <sys/proc.h>
35#include <sys/socket.h>
36#include <sys/socketvar.h>
37#include <sys/errno.h>
38#include <sys/protosw.h>
39#include <sys/domain.h>
40#include <sys/mbuf.h>
41#include <sys/queue.h>
42#include <sys/sysctl.h>
43#include <sys/sysproto.h>
44
45#include <net/if.h>
46#include <net/if_var.h>
47#include <net/route.h>
48
49#include <netinet/in.h>
50#include <netinet/in_var.h>
51#include <netinet/in_pcb.h>
52#include <netinet/ip.h>
53#include <netinet/ip_var.h>
54#include <netinet/ip6.h>
55#include <netinet6/ip6_var.h>
56#include <netinet/udp.h>
57#include <netinet/udp_var.h>
58#include <netinet/tcp.h>
59#include <netinet/tcp_var.h>
60#include <netinet/tcp_cc.h>
61#include <netinet/in_tclass.h>
62
63#include <os/log.h>
64
65static_assert(_SO_TC_MAX == SO_TC_STATS_MAX);
66
67struct net_qos_dscp_map {
68 uint8_t sotc_to_dscp[SO_TC_MAX];
69 uint8_t netsvctype_to_dscp[_NET_SERVICE_TYPE_COUNT];
70};
71
72struct dcsp_msc_map {
73 uint8_t dscp;
74 mbuf_svc_class_t msc;
75};
76static inline int so_throttle_best_effort(struct socket *, struct ifnet *);
77static void set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *, int);
78static errno_t dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *, size_t,
79 struct dcsp_msc_map *);
80
81static LCK_GRP_DECLARE(tclass_lck_grp, "tclass");
82static LCK_MTX_DECLARE(tclass_lock, &tclass_lck_grp);
83
84SYSCTL_NODE(_net, OID_AUTO, qos,
85 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "QoS");
86
87static int sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS;
88SYSCTL_PROC(_net_qos, OID_AUTO, default_netsvctype_to_dscp_map,
89 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
90 0, 0, sysctl_default_netsvctype_to_dscp_map, "S", "");
91
92static int sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
93SYSCTL_PROC(_net_qos, OID_AUTO, dscp_to_wifi_ac_map,
94 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
95 0, 0, sysctl_dscp_to_wifi_ac_map, "S", "");
96
97static int sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS;
98SYSCTL_PROC(_net_qos, OID_AUTO, reset_dscp_to_wifi_ac_map,
99 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
100 0, 0, sysctl_reset_dscp_to_wifi_ac_map, "I", "");
101
102int net_qos_verbose = 0;
103SYSCTL_INT(_net_qos, OID_AUTO, verbose,
104 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_verbose, 0, "");
105
106/*
107 * Fastlane QoS policy:
108 * By Default allow all apps to get traffic class to DSCP mapping
109 */
110SYSCTL_NODE(_net_qos, OID_AUTO, policy,
111 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
112
113int net_qos_policy_restricted = 0;
114SYSCTL_INT(_net_qos_policy, OID_AUTO, restricted,
115 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restricted, 0, "");
116
117int net_qos_policy_restrict_avapps = 0;
118SYSCTL_INT(_net_qos_policy, OID_AUTO, restrict_avapps,
119 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_restrict_avapps, 0, "");
120
121int net_qos_policy_wifi_enabled = 0;
122SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled,
123 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, "");
124
125int net_qos_policy_capable_enabled = 0;
126SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled,
127 CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, "");
128
129/*
130 * Socket traffic class from network service type
131 */
132const int sotc_by_netservicetype[_NET_SERVICE_TYPE_COUNT] = {
133 SO_TC_BE, /* NET_SERVICE_TYPE_BE */
134 SO_TC_BK, /* NET_SERVICE_TYPE_BK */
135 SO_TC_VI, /* NET_SERVICE_TYPE_SIG */
136 SO_TC_VI, /* NET_SERVICE_TYPE_VI */
137 SO_TC_VO, /* NET_SERVICE_TYPE_VO */
138 SO_TC_RV, /* NET_SERVICE_TYPE_RV */
139 SO_TC_AV, /* NET_SERVICE_TYPE_AV */
140 SO_TC_OAM, /* NET_SERVICE_TYPE_OAM */
141 SO_TC_RD /* NET_SERVICE_TYPE_RD */
142};
143
144/*
145 * DSCP mappings for QoS Fastlane as based on network service types
146 */
147static const
148struct netsvctype_dscp_map fastlane_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
149 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
150 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_AF11 },
151 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS3 },
152 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
153 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
154 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
155 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
156 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
157 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
158};
159
160/*
161 * DSCP mappings for QoS RFC4594 as based on network service types
162 */
163static const
164struct netsvctype_dscp_map rfc4594_netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {
165 { .netsvctype = NET_SERVICE_TYPE_BE, .dscp = _DSCP_DF },
166 { .netsvctype = NET_SERVICE_TYPE_BK, .dscp = _DSCP_CS1 },
167 { .netsvctype = NET_SERVICE_TYPE_SIG, .dscp = _DSCP_CS5 },
168 { .netsvctype = NET_SERVICE_TYPE_VI, .dscp = _DSCP_AF41 },
169 { .netsvctype = NET_SERVICE_TYPE_VO, .dscp = _DSCP_EF },
170 { .netsvctype = NET_SERVICE_TYPE_RV, .dscp = _DSCP_CS4 },
171 { .netsvctype = NET_SERVICE_TYPE_AV, .dscp = _DSCP_AF31 },
172 { .netsvctype = NET_SERVICE_TYPE_OAM, .dscp = _DSCP_CS2 },
173 { .netsvctype = NET_SERVICE_TYPE_RD, .dscp = _DSCP_AF21 },
174};
175
176static struct net_qos_dscp_map fastlane_net_qos_dscp_map;
177static struct net_qos_dscp_map rfc4594_net_qos_dscp_map;
178#if (DEBUG || DEVELOPMENT)
179static struct net_qos_dscp_map custom_net_qos_dscp_map;
180#endif /* (DEBUG || DEVELOPMENT) */
181
182/*
183 * The size is one more than the max because DSCP start at zero
184 */
185#define DSCP_ARRAY_SIZE (_MAX_DSCP + 1)
186
187/*
188 * The DSCP to UP mapping (via mbuf service class) for WiFi follows is the mapping
189 * that implemented at the 802.11 driver level when the mbuf service class is
190 * MBUF_SC_BE.
191 *
192 * This clashes with the recommended mapping documented by the IETF document
193 * draft-szigeti-tsvwg-ieee-802-11e-01.txt but we keep the mapping to maintain
194 * binary compatibility. Applications should use the network service type socket
195 * option instead to select L2 QoS marking instead of IP_TOS or IPV6_TCLASS.
196 */
197static const struct dcsp_msc_map default_dscp_to_wifi_ac_map[] = {
198 { .dscp = _DSCP_DF, .msc = MBUF_SC_BE }, /* RFC 2474 Standard */
199 { .dscp = 1, .msc = MBUF_SC_BE }, /* */
200 { .dscp = 2, .msc = MBUF_SC_BE }, /* */
201 { .dscp = 3, .msc = MBUF_SC_BE }, /* */
202 { .dscp = 4, .msc = MBUF_SC_BE }, /* */
203 { .dscp = 5, .msc = MBUF_SC_BE }, /* */
204 { .dscp = 6, .msc = MBUF_SC_BE }, /* */
205 { .dscp = 7, .msc = MBUF_SC_BE }, /* */
206
207 { .dscp = _DSCP_CS1, .msc = MBUF_SC_BK }, /* RFC 3662 Low-Priority Data */
208 { .dscp = 9, .msc = MBUF_SC_BK }, /* */
209 { .dscp = _DSCP_AF11, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
210 { .dscp = 11, .msc = MBUF_SC_BK }, /* */
211 { .dscp = _DSCP_AF12, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
212 { .dscp = 13, .msc = MBUF_SC_BK }, /* */
213 { .dscp = _DSCP_AF13, .msc = MBUF_SC_BK }, /* RFC 2597 High-Throughput Data */
214 { .dscp = 15, .msc = MBUF_SC_BK }, /* */
215
216 { .dscp = _DSCP_CS2, .msc = MBUF_SC_BK }, /* RFC 4594 OAM */
217 { .dscp = 17, .msc = MBUF_SC_BK }, /* */
218 { .dscp = _DSCP_AF21, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
219 { .dscp = 19, .msc = MBUF_SC_BK }, /* */
220 { .dscp = _DSCP_AF22, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
221 { .dscp = 21, .msc = MBUF_SC_BK }, /* */
222 { .dscp = _DSCP_AF23, .msc = MBUF_SC_BK }, /* RFC 2597 Low-Latency Data */
223 { .dscp = 23, .msc = MBUF_SC_BK }, /* */
224
225 { .dscp = _DSCP_CS3, .msc = MBUF_SC_BE }, /* RFC 2474 Broadcast Video */
226 { .dscp = 25, .msc = MBUF_SC_BE }, /* */
227 { .dscp = _DSCP_AF31, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
228 { .dscp = 27, .msc = MBUF_SC_BE }, /* */
229 { .dscp = _DSCP_AF32, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
230 { .dscp = 29, .msc = MBUF_SC_BE }, /* */
231 { .dscp = _DSCP_AF33, .msc = MBUF_SC_BE }, /* RFC 2597 Multimedia Streaming */
232 { .dscp = 31, .msc = MBUF_SC_BE }, /* */
233
234 { .dscp = _DSCP_CS4, .msc = MBUF_SC_VI }, /* RFC 2474 Real-Time Interactive */
235 { .dscp = 33, .msc = MBUF_SC_VI }, /* */
236 { .dscp = _DSCP_AF41, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
237 { .dscp = 35, .msc = MBUF_SC_VI }, /* */
238 { .dscp = _DSCP_AF42, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
239 { .dscp = 37, .msc = MBUF_SC_VI }, /* */
240 { .dscp = _DSCP_AF43, .msc = MBUF_SC_VI }, /* RFC 2597 Multimedia Conferencing */
241 { .dscp = 39, .msc = MBUF_SC_VI }, /* */
242
243 { .dscp = _DSCP_CS5, .msc = MBUF_SC_VI }, /* RFC 2474 Signaling */
244 { .dscp = 41, .msc = MBUF_SC_VI }, /* */
245 { .dscp = 42, .msc = MBUF_SC_VI }, /* */
246 { .dscp = 43, .msc = MBUF_SC_VI }, /* */
247 { .dscp = _DSCP_VA, .msc = MBUF_SC_VI }, /* RFC 5865 VOICE-ADMIT */
248 { .dscp = 45, .msc = MBUF_SC_VI }, /* */
249 { .dscp = _DSCP_EF, .msc = MBUF_SC_VI }, /* RFC 3246 Telephony */
250 { .dscp = 47, .msc = MBUF_SC_VI }, /* */
251
252 { .dscp = _DSCP_CS6, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
253 { .dscp = 49, .msc = MBUF_SC_VO }, /* */
254 { .dscp = 50, .msc = MBUF_SC_VO }, /* */
255 { .dscp = 51, .msc = MBUF_SC_VO }, /* */
256 { .dscp = 52, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Sigma */
257 { .dscp = 53, .msc = MBUF_SC_VO }, /* */
258 { .dscp = 54, .msc = MBUF_SC_VO }, /* */
259 { .dscp = 55, .msc = MBUF_SC_VO }, /* */
260
261 { .dscp = _DSCP_CS7, .msc = MBUF_SC_VO }, /* Wi-Fi WMM Certification: Chariot */
262 { .dscp = 57, .msc = MBUF_SC_VO }, /* */
263 { .dscp = 58, .msc = MBUF_SC_VO }, /* */
264 { .dscp = 59, .msc = MBUF_SC_VO }, /* */
265 { .dscp = 60, .msc = MBUF_SC_VO }, /* */
266 { .dscp = 61, .msc = MBUF_SC_VO }, /* */
267 { .dscp = 62, .msc = MBUF_SC_VO }, /* */
268 { .dscp = 63, .msc = MBUF_SC_VO }, /* */
269
270 { .dscp = 255, .msc = MBUF_SC_UNSPEC } /* invalid DSCP to mark last entry */
271};
272
273mbuf_svc_class_t wifi_dscp_to_msc_array[DSCP_ARRAY_SIZE];
274
275/*
276 * If there is no foreground activity on the interface for bg_switch_time
277 * seconds, the background connections can switch to foreground TCP
278 * congestion control.
279 */
280#define TCP_BG_SWITCH_TIME 2 /* seconds */
281
282#if (DEVELOPMENT || DEBUG)
283
284static int tfp_count = 0;
285
286static TAILQ_HEAD(, tclass_for_proc) tfp_head =
287 TAILQ_HEAD_INITIALIZER(tfp_head);
288
289struct tclass_for_proc {
290 TAILQ_ENTRY(tclass_for_proc) tfp_link;
291 int tfp_class;
292 pid_t tfp_pid;
293 char tfp_pname[(2 * MAXCOMLEN) + 1];
294 uint32_t tfp_qos_mode;
295};
296
297static int get_pid_tclass(struct so_tcdbg *);
298static int get_pname_tclass(struct so_tcdbg *);
299static int set_pid_tclass(struct so_tcdbg *);
300static int set_pname_tclass(struct so_tcdbg *);
301static int flush_pid_tclass(struct so_tcdbg *);
302static int purge_tclass_for_proc(void);
303static int flush_tclass_for_proc(void);
304static void set_tclass_for_curr_proc(struct socket *);
305
306/*
307 * Must be called with tclass_lock held
308 */
309static struct tclass_for_proc *
310find_tfp_by_pid(pid_t pid)
311{
312 struct tclass_for_proc *tfp;
313
314 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
315 if (tfp->tfp_pid == pid) {
316 break;
317 }
318 }
319 return tfp;
320}
321
322/*
323 * Must be called with tclass_lock held
324 */
325static struct tclass_for_proc *
326find_tfp_by_pname(const char *pname)
327{
328 struct tclass_for_proc *tfp;
329
330 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
331 if (strncmp(pname, tfp->tfp_pname,
332 sizeof(tfp->tfp_pname)) == 0) {
333 break;
334 }
335 }
336 return tfp;
337}
338
339__private_extern__ void
340set_tclass_for_curr_proc(struct socket *so)
341{
342 struct tclass_for_proc *tfp = NULL;
343 proc_t p = current_proc(); /* Not ref counted */
344 pid_t pid = proc_pid(p);
345 char *pname = proc_best_name(p);
346
347 lck_mtx_lock(&tclass_lock);
348
349 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
350 if ((tfp->tfp_pid == pid) || (tfp->tfp_pid == -1 &&
351 strncmp(pname, tfp->tfp_pname,
352 sizeof(tfp->tfp_pname)) == 0)) {
353 if (tfp->tfp_class != SO_TC_UNSPEC) {
354 so->so_traffic_class = (uint16_t)tfp->tfp_class;
355 }
356
357 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
358 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
359 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
360 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
361 }
362 break;
363 }
364 }
365
366 lck_mtx_unlock(&tclass_lock);
367}
368
369/*
370 * Purge entries with PIDs of exited processes
371 */
372int
373purge_tclass_for_proc(void)
374{
375 int error = 0;
376 struct tclass_for_proc *tfp, *tvar;
377
378 lck_mtx_lock(&tclass_lock);
379
380 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
381 proc_t p;
382
383 if (tfp->tfp_pid == -1) {
384 continue;
385 }
386 if ((p = proc_find(tfp->tfp_pid)) == NULL) {
387 tfp_count--;
388 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
389
390 kfree_type(struct tclass_for_proc, tfp);
391 } else {
392 proc_rele(p);
393 }
394 }
395
396 lck_mtx_unlock(&tclass_lock);
397
398 return error;
399}
400
401/*
402 * Remove one entry
403 * Must be called with tclass_lock held
404 */
405static void
406free_tclass_for_proc(struct tclass_for_proc *tfp)
407{
408 if (tfp == NULL) {
409 return;
410 }
411 tfp_count--;
412 TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
413 kfree_type(struct tclass_for_proc, tfp);
414}
415
416/*
417 * Remove all entries
418 */
419int
420flush_tclass_for_proc(void)
421{
422 int error = 0;
423 struct tclass_for_proc *tfp, *tvar;
424
425 lck_mtx_lock(&tclass_lock);
426
427 TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
428 free_tclass_for_proc(tfp);
429 }
430
431 lck_mtx_unlock(&tclass_lock);
432
433 return error;
434}
435
436/*
437 * Must be called with tclass_lock held
438 */
439static struct tclass_for_proc *
440alloc_tclass_for_proc(pid_t pid, const char *pname, size_t pnamelen)
441{
442 struct tclass_for_proc *tfp;
443
444 if (pid == -1 && pname == NULL) {
445 return NULL;
446 }
447
448 tfp = kalloc_type(struct tclass_for_proc, Z_NOWAIT | Z_ZERO);
449 if (tfp == NULL) {
450 return NULL;
451 }
452
453 tfp->tfp_pid = pid;
454 /*
455 * Add per pid entries before per proc name so we can find
456 * a specific instance of a process before the general name base entry.
457 */
458 if (pid != -1) {
459 TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
460 } else {
461 if (pname != NULL) {
462 strncpy(tfp->tfp_pname, pname, pnamelen);
463 tfp->tfp_pname[sizeof(tfp->tfp_pname) - 1] = '\0';
464 } else {
465 tfp->tfp_pname[0] = '\0';
466 }
467 TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
468 }
469
470 tfp_count++;
471
472 return tfp;
473}
474
475/*
476 * SO_TC_UNSPEC for tclass means to remove the entry
477 */
478int
479set_pid_tclass(struct so_tcdbg *so_tcdbg)
480{
481 int error = EINVAL;
482 proc_t p = NULL;
483 struct tclass_for_proc *tfp;
484 pid_t pid = so_tcdbg->so_tcdbg_pid;
485 int tclass = so_tcdbg->so_tcdbg_tclass;
486 int netsvctype = so_tcdbg->so_tcdbg_netsvctype;
487 uint8_t ecn_val = so_tcdbg->so_tcdbg_ecn_val;
488
489 p = proc_find(pid);
490 if (p == NULL) {
491 printf("%s proc_find(%d) failed\n", __func__, pid);
492 goto done;
493 }
494
495 /* Need a tfp */
496 lck_mtx_lock(&tclass_lock);
497
498 tfp = find_tfp_by_pid(pid);
499 if (tfp == NULL) {
500 tfp = alloc_tclass_for_proc(pid, NULL, 0);
501 if (tfp == NULL) {
502 lck_mtx_unlock(&tclass_lock);
503 error = ENOBUFS;
504 goto done;
505 }
506 }
507 tfp->tfp_class = tclass;
508 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
509
510 lck_mtx_unlock(&tclass_lock);
511
512 if (tfp != NULL) {
513 struct fileproc *fp;
514 proc_fdlock(p);
515 fdt_foreach(fp, p) {
516 struct socket *so;
517
518 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
519 continue;
520 }
521
522 so = (struct socket *)fp_get_data(fp);
523 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
524 continue;
525 }
526
527 socket_lock(so, 1);
528 if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_ENABLE) {
529 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
530 } else if (tfp->tfp_qos_mode == QOS_MODE_MARKING_POLICY_DISABLE) {
531 so->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
532 }
533
534 struct inpcb *inp = so ? sotoinpcb(so) : NULL;
535 struct tcpcb *tp = inp ? intotcpcb(inp) : NULL;
536
537 if (tp != NULL) {
538 if (ecn_val == IPTOS_ECN_ECT1 || ecn_val == IPTOS_ECN_ECT0) {
539 tp->ecn_flags |= (ecn_val == IPTOS_ECN_ECT1) ?
540 TE_FORCE_ECT1 : TE_FORCE_ECT0;
541 } else {
542 tp->ecn_flags &= ~(TE_FORCE_ECT1 | TE_FORCE_ECT0);
543 }
544 }
545 socket_unlock(so, 1);
546
547 if (netsvctype != _NET_SERVICE_TYPE_UNSPEC) {
548 error = sock_setsockopt(so, SOL_SOCKET,
549 SO_NET_SERVICE_TYPE, &netsvctype, sizeof(int));
550 }
551 if (tclass != SO_TC_UNSPEC) {
552 error = sock_setsockopt(so, SOL_SOCKET,
553 SO_TRAFFIC_CLASS, &tclass, sizeof(int));
554 }
555 }
556
557 proc_fdunlock(p);
558 }
559
560 error = 0;
561done:
562 if (p != NULL) {
563 proc_rele(p);
564 }
565
566 return error;
567}
568
569int
570set_pname_tclass(struct so_tcdbg *so_tcdbg)
571{
572 int error = EINVAL;
573 struct tclass_for_proc *tfp;
574
575 lck_mtx_lock(&tclass_lock);
576
577 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
578 if (tfp == NULL) {
579 tfp = alloc_tclass_for_proc(-1, so_tcdbg->so_tcdbg_pname,
580 sizeof(so_tcdbg->so_tcdbg_pname));
581 if (tfp == NULL) {
582 lck_mtx_unlock(&tclass_lock);
583 error = ENOBUFS;
584 goto done;
585 }
586 }
587 tfp->tfp_class = so_tcdbg->so_tcdbg_tclass;
588 tfp->tfp_qos_mode = so_tcdbg->so_tcbbg_qos_mode;
589
590 lck_mtx_unlock(&tclass_lock);
591
592 error = 0;
593done:
594
595 return error;
596}
597
598static int
599flush_pid_tclass(struct so_tcdbg *so_tcdbg)
600{
601 pid_t pid = so_tcdbg->so_tcdbg_pid;
602 int tclass = so_tcdbg->so_tcdbg_tclass;
603 struct fileproc *fp;
604 proc_t p;
605 int error;
606
607 p = proc_find(pid);
608 if (p == PROC_NULL) {
609 printf("%s proc_find(%d) failed\n", __func__, pid);
610 return EINVAL;
611 }
612
613 proc_fdlock(p);
614
615 fdt_foreach(fp, p) {
616 struct socket *so;
617
618 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
619 continue;
620 }
621
622 so = (struct socket *)fp_get_data(fp);
623 error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass,
624 sizeof(tclass));
625 if (error != 0) {
626 printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, "
627 "tclass=%d) failed %d\n", __func__,
628 (uint64_t)VM_KERNEL_ADDRPERM(so), fdt_foreach_fd(), tclass,
629 error);
630 }
631 }
632
633 proc_fdunlock(p);
634
635 proc_rele(p);
636 return 0;
637}
638
639int
640get_pid_tclass(struct so_tcdbg *so_tcdbg)
641{
642 int error = EINVAL;
643 proc_t p = NULL;
644 struct tclass_for_proc *tfp;
645 pid_t pid = so_tcdbg->so_tcdbg_pid;
646
647 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
648
649 p = proc_find(pid);
650 if (p == NULL) {
651 printf("%s proc_find(%d) failed\n", __func__, pid);
652 goto done;
653 }
654
655 /* Need a tfp */
656 lck_mtx_lock(&tclass_lock);
657
658 tfp = find_tfp_by_pid(pid);
659 if (tfp != NULL) {
660 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
661 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
662 error = 0;
663 }
664 lck_mtx_unlock(&tclass_lock);
665done:
666 if (p != NULL) {
667 proc_rele(p);
668 }
669
670 return error;
671}
672
673int
674get_pname_tclass(struct so_tcdbg *so_tcdbg)
675{
676 int error = EINVAL;
677 struct tclass_for_proc *tfp;
678
679 so_tcdbg->so_tcdbg_tclass = SO_TC_UNSPEC; /* Means not set */
680
681 /* Need a tfp */
682 lck_mtx_lock(&tclass_lock);
683
684 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
685 if (tfp != NULL) {
686 so_tcdbg->so_tcdbg_tclass = tfp->tfp_class;
687 so_tcdbg->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
688 error = 0;
689 }
690 lck_mtx_unlock(&tclass_lock);
691
692 return error;
693}
694
695static int
696delete_tclass_for_pid_pname(struct so_tcdbg *so_tcdbg)
697{
698 int error = EINVAL;
699 pid_t pid = so_tcdbg->so_tcdbg_pid;
700 struct tclass_for_proc *tfp = NULL;
701
702 lck_mtx_lock(&tclass_lock);
703
704 if (pid != -1) {
705 tfp = find_tfp_by_pid(pid);
706 } else {
707 tfp = find_tfp_by_pname(so_tcdbg->so_tcdbg_pname);
708 }
709
710 if (tfp != NULL) {
711 free_tclass_for_proc(tfp);
712 error = 0;
713 }
714
715 lck_mtx_unlock(&tclass_lock);
716
717 return error;
718}
719
720/*
721 * Setting options requires privileges
722 */
723__private_extern__ int
724so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
725{
726 int error = 0;
727
728 if ((so->so_state & SS_PRIV) == 0) {
729 return EPERM;
730 }
731
732 socket_unlock(so, 0);
733
734 switch (so_tcdbg->so_tcdbg_cmd) {
735 case SO_TCDBG_PID:
736 error = set_pid_tclass(so_tcdbg);
737 break;
738
739 case SO_TCDBG_PNAME:
740 error = set_pname_tclass(so_tcdbg);
741 break;
742
743 case SO_TCDBG_PURGE:
744 error = purge_tclass_for_proc();
745 break;
746
747 case SO_TCDBG_FLUSH:
748 error = flush_tclass_for_proc();
749 break;
750
751 case SO_TCDBG_DELETE:
752 error = delete_tclass_for_pid_pname(so_tcdbg);
753 break;
754
755 case SO_TCDBG_TCFLUSH_PID:
756 error = flush_pid_tclass(so_tcdbg);
757 break;
758
759 default:
760 error = EINVAL;
761 break;
762 }
763
764 socket_lock(so, 0);
765
766 return error;
767}
768
769/*
770 * Not required to be privileged to get
771 */
772__private_extern__ int
773sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
774{
775 int error = 0;
776 struct so_tcdbg so_tcdbg;
777 void *buf = NULL;
778 size_t len = sopt->sopt_valsize;
779
780 error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg),
781 sizeof(struct so_tcdbg));
782 if (error != 0) {
783 return error;
784 }
785
786 sopt->sopt_valsize = len;
787
788 socket_unlock(so, 0);
789
790 switch (so_tcdbg.so_tcdbg_cmd) {
791 case SO_TCDBG_PID:
792 error = get_pid_tclass(&so_tcdbg);
793 break;
794
795 case SO_TCDBG_PNAME:
796 error = get_pname_tclass(&so_tcdbg);
797 break;
798
799 case SO_TCDBG_COUNT:
800 lck_mtx_lock(&tclass_lock);
801 so_tcdbg.so_tcdbg_count = tfp_count;
802 lck_mtx_unlock(&tclass_lock);
803 break;
804
805 case SO_TCDBG_LIST: {
806 struct tclass_for_proc *tfp;
807 int n, alloc_count;
808 struct so_tcdbg *ptr;
809
810 lck_mtx_lock(&tclass_lock);
811 if ((alloc_count = tfp_count) == 0) {
812 lck_mtx_unlock(&tclass_lock);
813 error = EINVAL;
814 break;
815 }
816 len = alloc_count * sizeof(struct so_tcdbg);
817 lck_mtx_unlock(&tclass_lock);
818
819 buf = kalloc_data(len, Z_WAITOK | Z_ZERO);
820 if (buf == NULL) {
821 error = ENOBUFS;
822 break;
823 }
824
825 lck_mtx_lock(&tclass_lock);
826 n = 0;
827 ptr = (struct so_tcdbg *)buf;
828 TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
829 if (++n > alloc_count) {
830 break;
831 }
832 if (tfp->tfp_pid != -1) {
833 ptr->so_tcdbg_cmd = SO_TCDBG_PID;
834 ptr->so_tcdbg_pid = tfp->tfp_pid;
835 } else {
836 ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
837 ptr->so_tcdbg_pid = -1;
838 strncpy(ptr->so_tcdbg_pname,
839 tfp->tfp_pname,
840 sizeof(ptr->so_tcdbg_pname));
841 ptr->so_tcdbg_pname[sizeof(ptr->so_tcdbg_pname) - 1] = '\0';
842 }
843 ptr->so_tcdbg_tclass = tfp->tfp_class;
844 ptr->so_tcbbg_qos_mode = tfp->tfp_qos_mode;
845 ptr++;
846 }
847
848 lck_mtx_unlock(&tclass_lock);
849 }
850 break;
851
852 default:
853 error = EINVAL;
854 break;
855 }
856
857 socket_lock(so, 0);
858
859 if (error == 0) {
860 if (buf == NULL) {
861 error = sooptcopyout(sopt, &so_tcdbg,
862 sizeof(struct so_tcdbg));
863 } else {
864 error = sooptcopyout(sopt, buf, len);
865 kfree_data(buf, len);
866 }
867 }
868 return error;
869}
870
871#endif /* (DEVELOPMENT || DEBUG) */
872
873int
874so_get_netsvc_marking_level(struct socket *so)
875{
876 int marking_level = NETSVC_MRKNG_UNKNOWN;
877 struct ifnet *ifp = NULL;
878
879 switch (SOCK_DOM(so)) {
880 case PF_INET: {
881 struct inpcb *inp = sotoinpcb(so);
882
883 if (inp != NULL) {
884 ifp = inp->inp_last_outifp;
885 }
886 break;
887 }
888 case PF_INET6: {
889 struct in6pcb *in6p = sotoin6pcb(so);
890
891 if (in6p != NULL) {
892 ifp = in6p->in6p_last_outifp;
893 }
894 break;
895 }
896 default:
897 break;
898 }
899 if (ifp != NULL) {
900 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0) {
901 if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
902 marking_level = NETSVC_MRKNG_LVL_L3L2_ALL;
903 } else {
904 marking_level = NETSVC_MRKNG_LVL_L3L2_BK;
905 }
906 } else {
907 marking_level = NETSVC_MRKNG_LVL_L2;
908 }
909 }
910 return marking_level;
911}
912
913__private_extern__ int
914so_set_traffic_class(struct socket *so, int optval)
915{
916 int error = 0;
917
918 if (optval < SO_TC_BE || optval > SO_TC_CTL) {
919 error = EINVAL;
920 } else {
921 switch (optval) {
922 case _SO_TC_BK:
923 optval = SO_TC_BK;
924 break;
925 case _SO_TC_VI:
926 optval = SO_TC_VI;
927 break;
928 case _SO_TC_VO:
929 optval = SO_TC_VO;
930 break;
931 default:
932 if (!SO_VALID_TC(optval)) {
933 error = EINVAL;
934 }
935 break;
936 }
937
938 if (error == 0) {
939 int oldval = so->so_traffic_class;
940
941 VERIFY(SO_VALID_TC(optval));
942 so->so_traffic_class = (uint16_t)optval;
943
944 if ((SOCK_DOM(so) == PF_INET ||
945 SOCK_DOM(so) == PF_INET6) &&
946 SOCK_TYPE(so) == SOCK_STREAM) {
947 set_tcp_stream_priority(so);
948 }
949
950 if ((SOCK_DOM(so) == PF_INET ||
951 SOCK_DOM(so) == PF_INET6) &&
952 optval != oldval && (optval == SO_TC_BK_SYS ||
953 oldval == SO_TC_BK_SYS)) {
954 /*
955 * If the app switches from BK_SYS to something
956 * else, resume the socket if it was suspended.
957 */
958 if (oldval == SO_TC_BK_SYS) {
959 inp_reset_fc_state(so->so_pcb);
960 }
961
962 SOTHROTTLELOG("throttle[%d]: so 0x%llx "
963 "[%d,%d] opportunistic %s\n", so->last_pid,
964 (uint64_t)VM_KERNEL_ADDRPERM(so),
965 SOCK_DOM(so), SOCK_TYPE(so),
966 (optval == SO_TC_BK_SYS) ? "ON" : "OFF");
967 }
968 }
969 }
970 return error;
971}
972
973__private_extern__ int
974so_set_net_service_type(struct socket *so, int netsvctype)
975{
976 int sotc;
977 int error;
978
979 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype)) {
980 return EINVAL;
981 }
982
983 sotc = sotc_by_netservicetype[netsvctype];
984 error = so_set_traffic_class(so, optval: sotc);
985 if (error != 0) {
986 return error;
987 }
988 so->so_netsvctype = (int8_t)netsvctype;
989 so->so_flags1 |= SOF1_TC_NET_SERV_TYPE;
990
991 return 0;
992}
993
994__private_extern__ void
995so_set_default_traffic_class(struct socket *so)
996{
997 so->so_traffic_class = SO_TC_BE;
998
999 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) {
1000 if (net_qos_policy_restricted == 0) {
1001 so->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
1002 }
1003#if (DEVELOPMENT || DEBUG)
1004 if (tfp_count > 0) {
1005 set_tclass_for_curr_proc(so);
1006 }
1007#endif /* (DEVELOPMENT || DEBUG) */
1008 }
1009}
1010
1011__private_extern__ int
1012so_set_opportunistic(struct socket *so, int optval)
1013{
1014 return so_set_traffic_class(so, optval: (optval == 0) ?
1015 SO_TC_BE : SO_TC_BK_SYS);
1016}
1017
1018__private_extern__ int
1019so_get_opportunistic(struct socket *so)
1020{
1021 return so->so_traffic_class == SO_TC_BK_SYS;
1022}
1023
1024__private_extern__ int
1025so_tc_from_control(struct mbuf *control, int *out_netsvctype)
1026{
1027 struct cmsghdr *cm;
1028 int sotc = SO_TC_UNSPEC;
1029
1030 *out_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
1031
1032 for (cm = M_FIRST_CMSGHDR(control);
1033 is_cmsg_valid(control, cmsg: cm);
1034 cm = M_NXT_CMSGHDR(control, cm)) {
1035 int val;
1036
1037 if (cm->cmsg_level != SOL_SOCKET ||
1038 cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1039 continue;
1040 }
1041 val = *(int *)(void *)CMSG_DATA(cm);
1042 /*
1043 * The first valid option wins
1044 */
1045 switch (cm->cmsg_type) {
1046 case SO_TRAFFIC_CLASS:
1047 if (SO_VALID_TC(val)) {
1048 sotc = val;
1049 return sotc;
1050 /* NOT REACHED */
1051 } else if (val < SO_TC_NET_SERVICE_OFFSET) {
1052 break;
1053 }
1054 /*
1055 * Handle the case SO_NET_SERVICE_TYPE values are
1056 * passed using SO_TRAFFIC_CLASS
1057 */
1058 val = val - SO_TC_NET_SERVICE_OFFSET;
1059 OS_FALLTHROUGH;
1060 case SO_NET_SERVICE_TYPE:
1061 if (!IS_VALID_NET_SERVICE_TYPE(val)) {
1062 break;
1063 }
1064 *out_netsvctype = val;
1065 sotc = sotc_by_netservicetype[val];
1066 return sotc;
1067 /* NOT REACHED */
1068 default:
1069 break;
1070 }
1071 }
1072
1073 return sotc;
1074}
1075
1076__private_extern__ int
1077so_tos_from_control(struct mbuf *control)
1078{
1079 struct cmsghdr *cm;
1080 int tos = IPTOS_UNSPEC;
1081
1082 for (cm = M_FIRST_CMSGHDR(control);
1083 is_cmsg_valid(control, cmsg: cm);
1084 cm = M_NXT_CMSGHDR(control, cm)) {
1085 if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
1086 continue;
1087 }
1088
1089 if ((cm->cmsg_level == IPPROTO_IP &&
1090 cm->cmsg_type == IP_TOS) ||
1091 (cm->cmsg_level == IPPROTO_IPV6 &&
1092 cm->cmsg_type == IPV6_TCLASS)) {
1093 tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
1094 /* The first valid option wins */
1095 break;
1096 }
1097 }
1098
1099 return tos;
1100}
1101
1102__private_extern__ void
1103so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
1104{
1105 uint32_t mtc = m_get_traffic_class(m);
1106
1107 if (mtc >= SO_TC_STATS_MAX) {
1108 mtc = MBUF_TC_BE;
1109 }
1110
1111 so->so_tc_stats[mtc].rxpackets += 1;
1112 so->so_tc_stats[mtc].rxbytes +=
1113 ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
1114}
1115
1116__private_extern__ void
1117so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes,
1118 uint32_t mtc)
1119{
1120 if (mtc >= SO_TC_STATS_MAX) {
1121 mtc = MBUF_TC_BE;
1122 }
1123
1124 so->so_tc_stats[mtc].rxpackets += pkts;
1125 so->so_tc_stats[mtc].rxbytes += bytes;
1126}
1127
1128static inline int
1129so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
1130{
1131 uint32_t uptime = (uint32_t)net_uptime();
1132 return soissrcbesteffort(so) &&
1133 net_io_policy_throttle_best_effort == 1 &&
1134 ifp->if_rt_sendts > 0 &&
1135 (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME;
1136}
1137
1138__private_extern__ void
1139set_tcp_stream_priority(struct socket *so)
1140{
1141 struct inpcb *inp = sotoinpcb(so);
1142 struct tcpcb *tp = intotcpcb(inp);
1143 struct ifnet *outifp;
1144 u_char old_cc = tp->tcp_cc_index;
1145 int recvbg = IS_TCP_RECV_BG(so);
1146 bool is_local = false, fg_active = false;
1147 uint32_t uptime;
1148
1149 VERIFY((SOCK_CHECK_DOM(so, PF_INET) ||
1150 SOCK_CHECK_DOM(so, PF_INET6)) &&
1151 SOCK_CHECK_TYPE(so, SOCK_STREAM) &&
1152 SOCK_CHECK_PROTO(so, IPPROTO_TCP));
1153
1154 /* Return if the socket is in a terminal state */
1155 if (inp->inp_state == INPCB_STATE_DEAD) {
1156 return;
1157 }
1158
1159 outifp = inp->inp_last_outifp;
1160 uptime = (uint32_t)net_uptime();
1161
1162 /*
1163 * If the socket was marked as a background socket or if the
1164 * traffic class is set to background with traffic class socket
1165 * option then make both send and recv side of the stream to be
1166 * background. The variable sotcdb which can be set with sysctl
1167 * is used to disable these settings for testing.
1168 */
1169 if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) {
1170 is_local = true;
1171 }
1172
1173 /* Check if there has been recent foreground activity */
1174 if (outifp != NULL) {
1175 /*
1176 * If the traffic source is background, check if
1177 * there is recent foreground activity which should
1178 * continue to keep the traffic source as background.
1179 * Otherwise, we can switch the traffic source to
1180 * foreground.
1181 */
1182 if (soissrcbackground(so) && outifp->if_fg_sendts > 0 &&
1183 (int)(uptime - outifp->if_fg_sendts) <= TCP_BG_SWITCH_TIME) {
1184 fg_active = true;
1185 }
1186
1187 /*
1188 * The traffic source is best-effort -- check if
1189 * the policy to throttle best effort is enabled
1190 * and there was realtime activity on this
1191 * interface recently. If this is true, enable
1192 * algorithms that respond to increased latency
1193 * on best-effort traffic.
1194 */
1195 if (so_throttle_best_effort(so, ifp: outifp)) {
1196 fg_active = true;
1197 }
1198 }
1199
1200 /*
1201 * System initiated background traffic like cloud uploads should
1202 * always use background delay sensitive algorithms. This will
1203 * make the stream more responsive to other streams on the user's
1204 * network and it will minimize latency induced.
1205 */
1206 if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
1207 /*
1208 * If the interface that the connection is using is
1209 * loopback, do not use background congestion
1210 * control algorithm.
1211 *
1212 * If there has been recent foreground activity or if there
1213 * was an indication that a real time foreground application
1214 * is going to use networking (net_io_policy_throttled),
1215 * switch the background and best effort streams to use background
1216 * congestion control algorithm.
1217 */
1218 if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local) {
1219 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1220 tcp_set_foreground_cc(so);
1221 }
1222 } else {
1223 if (old_cc != TCP_CC_ALGO_BACKGROUND_INDEX) {
1224 tcp_set_background_cc(so);
1225 }
1226 }
1227
1228 /* Set receive side background flags */
1229 if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local) {
1230 tcp_clear_recv_bg(so);
1231 } else {
1232 tcp_set_recv_bg(so);
1233 }
1234 } else {
1235 /*
1236 * If there is no recent foreground activity, even the
1237 * background flows can use foreground congestion controller.
1238 */
1239 tcp_clear_recv_bg(so);
1240 if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) {
1241 tcp_set_foreground_cc(so);
1242 }
1243 }
1244
1245 if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) {
1246 SOTHROTTLELOG("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; "
1247 "%s recv\n", so->last_pid,
1248 (uint64_t)VM_KERNEL_ADDRPERM(so),
1249 SOCK_DOM(so), SOCK_TYPE(so),
1250 (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ?
1251 "background" : "foreground",
1252 IS_TCP_RECV_BG(so) ? "background" : "foreground");
1253 }
1254}
1255
1256/*
1257 * Set traffic class to an IPv4 or IPv6 packet
1258 * - mark the mbuf
1259 * - set the DSCP code following the WMM mapping
1260 */
1261__private_extern__ void
1262set_packet_service_class(struct mbuf *m, struct socket *so,
1263 int sotc, uint32_t flags)
1264{
1265 mbuf_svc_class_t msc = MBUF_SC_BE; /* Best effort by default */
1266 struct inpcb *inp = sotoinpcb(so); /* in6pcb and inpcb are the same */
1267
1268 if (!(m->m_flags & M_PKTHDR)) {
1269 return;
1270 }
1271
1272 /*
1273 * Here is the precedence:
1274 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
1275 * 2) Traffic class passed via ancillary data to sendmsdg(2)
1276 * 3) Traffic class socket option last
1277 */
1278 if (sotc != SO_TC_UNSPEC) {
1279 VERIFY(SO_VALID_TC(sotc));
1280 msc = so_tc2msc(sotc);
1281 /* Assert because tc must have been valid */
1282 VERIFY(MBUF_VALID_SC(msc));
1283 }
1284
1285 /*
1286 * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
1287 * best effort is set, depress the priority.
1288 */
1289 if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so)) {
1290 msc = MBUF_SC_BK;
1291 }
1292
1293 if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
1294 so_throttle_best_effort(so, ifp: inp->inp_last_outifp)) {
1295 msc = MBUF_SC_BK;
1296 }
1297
1298 if (soissrcbackground(so)) {
1299 m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
1300 }
1301
1302 if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc)) {
1303 m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
1304 }
1305 /*
1306 * Set the traffic class in the mbuf packet header svc field
1307 */
1308 if (sotcdb & SOTCDB_NO_MTC) {
1309 goto no_mbtc;
1310 }
1311
1312 /*
1313 * Elevate service class if the packet is a pure TCP ACK.
1314 * We can do this only when the flow is not a background
1315 * flow and the outgoing interface supports
1316 * transmit-start model.
1317 */
1318 if (!IS_MBUF_SC_BACKGROUND(msc) &&
1319 (flags & (PKT_SCF_TCP_ACK | PKT_SCF_TCP_SYN)) != 0) {
1320 msc = MBUF_SC_CTL;
1321 }
1322
1323 (void) m_set_service_class(m, msc);
1324
1325 /*
1326 * Set the privileged traffic auxiliary flag if applicable,
1327 * or clear it.
1328 */
1329 if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) &&
1330 msc != MBUF_SC_UNSPEC) {
1331 m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED;
1332 } else {
1333 m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED;
1334 }
1335
1336no_mbtc:
1337 /*
1338 * For TCP with background traffic class switch CC algo based on sysctl
1339 */
1340 if (so->so_type == SOCK_STREAM) {
1341 set_tcp_stream_priority(so);
1342 }
1343
1344 so_tc_update_stats(m, so, msc);
1345}
1346
1347__private_extern__ void
1348so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc)
1349{
1350 mbuf_traffic_class_t mtc;
1351
1352 /*
1353 * Assume socket and mbuf traffic class values are the same
1354 * Also assume the socket lock is held. Note that the stats
1355 * at the socket layer are reduced down to the legacy traffic
1356 * classes; we could/should potentially expand so_tc_stats[].
1357 */
1358 mtc = MBUF_SC2TC(msc);
1359 VERIFY(mtc < SO_TC_STATS_MAX);
1360 so->so_tc_stats[mtc].txpackets += 1;
1361 so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
1362}
1363
1364__private_extern__ mbuf_svc_class_t
1365so_tc2msc(int tc)
1366{
1367 mbuf_svc_class_t msc;
1368
1369 switch (tc) {
1370 case SO_TC_BK_SYS:
1371 msc = MBUF_SC_BK_SYS;
1372 break;
1373 case SO_TC_BK:
1374 case _SO_TC_BK:
1375 msc = MBUF_SC_BK;
1376 break;
1377 case SO_TC_BE:
1378 msc = MBUF_SC_BE;
1379 break;
1380 case SO_TC_RD:
1381 msc = MBUF_SC_RD;
1382 break;
1383 case SO_TC_OAM:
1384 msc = MBUF_SC_OAM;
1385 break;
1386 case SO_TC_AV:
1387 msc = MBUF_SC_AV;
1388 break;
1389 case SO_TC_RV:
1390 msc = MBUF_SC_RV;
1391 break;
1392 case SO_TC_VI:
1393 case _SO_TC_VI:
1394 msc = MBUF_SC_VI;
1395 break;
1396 case SO_TC_NETSVC_SIG:
1397 msc = MBUF_SC_SIG;
1398 break;
1399 case SO_TC_VO:
1400 case _SO_TC_VO:
1401 msc = MBUF_SC_VO;
1402 break;
1403 case SO_TC_CTL:
1404 msc = MBUF_SC_CTL;
1405 break;
1406 case SO_TC_ALL:
1407 default:
1408 msc = MBUF_SC_UNSPEC;
1409 break;
1410 }
1411
1412 return msc;
1413}
1414
1415__private_extern__ int
1416so_svc2tc(mbuf_svc_class_t svc)
1417{
1418 switch (svc) {
1419 case MBUF_SC_BK_SYS:
1420 return SO_TC_BK_SYS;
1421 case MBUF_SC_BK:
1422 return SO_TC_BK;
1423 case MBUF_SC_BE:
1424 return SO_TC_BE;
1425 case MBUF_SC_RD:
1426 return SO_TC_RD;
1427 case MBUF_SC_OAM:
1428 return SO_TC_OAM;
1429 case MBUF_SC_AV:
1430 return SO_TC_AV;
1431 case MBUF_SC_RV:
1432 return SO_TC_RV;
1433 case MBUF_SC_VI:
1434 return SO_TC_VI;
1435 case MBUF_SC_SIG:
1436 return SO_TC_NETSVC_SIG;
1437 case MBUF_SC_VO:
1438 return SO_TC_VO;
1439 case MBUF_SC_CTL:
1440 return SO_TC_CTL;
1441 case MBUF_SC_UNSPEC:
1442 default:
1443 return SO_TC_BE;
1444 }
1445}
1446
1447static size_t
1448sotc_index(int sotc)
1449{
1450 switch (sotc) {
1451 case SO_TC_BK_SYS:
1452 return SOTCIX_BK_SYS;
1453 case _SO_TC_BK:
1454 case SO_TC_BK:
1455 return SOTCIX_BK;
1456
1457 case SO_TC_BE:
1458 return SOTCIX_BE;
1459 case SO_TC_RD:
1460 return SOTCIX_RD;
1461 case SO_TC_OAM:
1462 return SOTCIX_OAM;
1463
1464 case SO_TC_AV:
1465 return SOTCIX_AV;
1466 case SO_TC_RV:
1467 return SOTCIX_RV;
1468 case _SO_TC_VI:
1469 case SO_TC_VI:
1470 return SOTCIX_VI;
1471
1472 case _SO_TC_VO:
1473 case SO_TC_VO:
1474 return SOTCIX_VO;
1475 case SO_TC_CTL:
1476 return SOTCIX_CTL;
1477
1478 default:
1479 break;
1480 }
1481 /*
1482 * Unknown traffic class value
1483 */
1484 return SIZE_T_MAX;
1485}
1486
1487uint8_t
1488fastlane_sc_to_dscp(uint32_t svc_class)
1489{
1490 uint8_t dscp = _DSCP_DF;
1491
1492 switch (svc_class) {
1493 case MBUF_SC_BK_SYS:
1494 case MBUF_SC_BK:
1495 dscp = _DSCP_AF11;
1496 break;
1497
1498 case MBUF_SC_BE:
1499 dscp = _DSCP_DF;
1500 break;
1501 case MBUF_SC_RD:
1502 dscp = _DSCP_AF21;
1503 break;
1504 case MBUF_SC_OAM:
1505 dscp = _DSCP_CS2;
1506 break;
1507
1508 case MBUF_SC_AV:
1509 dscp = _DSCP_AF31;
1510 break;
1511 case MBUF_SC_RV:
1512 dscp = _DSCP_CS4;
1513 break;
1514 case MBUF_SC_VI:
1515 dscp = _DSCP_AF41;
1516 break;
1517 case MBUF_SC_SIG:
1518 dscp = _DSCP_CS3;
1519 break;
1520
1521 case MBUF_SC_VO:
1522 dscp = _DSCP_EF;
1523 break;
1524 case MBUF_SC_CTL:
1525 dscp = _DSCP_DF;
1526 break;
1527 default:
1528 dscp = _DSCP_DF;
1529 break;
1530 }
1531
1532 return dscp;
1533}
1534
1535uint8_t
1536rfc4594_sc_to_dscp(uint32_t svc_class)
1537{
1538 uint8_t dscp = _DSCP_DF;
1539
1540 switch (svc_class) {
1541 case MBUF_SC_BK_SYS: /* Low-Priority Data */
1542 case MBUF_SC_BK:
1543 dscp = _DSCP_CS1;
1544 break;
1545
1546 case MBUF_SC_BE: /* Standard */
1547 dscp = _DSCP_DF;
1548 break;
1549 case MBUF_SC_RD: /* Low-Latency Data */
1550 dscp = _DSCP_AF21;
1551 break;
1552
1553 /* SVC_CLASS Not Defined: High-Throughput Data */
1554
1555 case MBUF_SC_OAM: /* OAM */
1556 dscp = _DSCP_CS2;
1557 break;
1558
1559 /* SVC_CLASS Not Defined: Broadcast Video */
1560
1561 case MBUF_SC_AV: /* Multimedia Streaming */
1562 dscp = _DSCP_AF31;
1563 break;
1564 case MBUF_SC_RV: /* Real-Time Interactive */
1565 dscp = _DSCP_CS4;
1566 break;
1567 case MBUF_SC_VI: /* Multimedia Conferencing */
1568 dscp = _DSCP_AF41;
1569 break;
1570 case MBUF_SC_SIG: /* Signaling */
1571 dscp = _DSCP_CS5;
1572 break;
1573
1574 case MBUF_SC_VO: /* Telephony */
1575 dscp = _DSCP_EF;
1576 break;
1577 case MBUF_SC_CTL: /* Network Control*/
1578 dscp = _DSCP_CS6;
1579 break;
1580 default:
1581 dscp = _DSCP_DF;
1582 break;
1583 }
1584
1585 return dscp;
1586}
1587
1588mbuf_traffic_class_t
1589rfc4594_dscp_to_tc(uint8_t dscp)
1590{
1591 mbuf_traffic_class_t tc = MBUF_TC_BE;
1592
1593 switch (dscp) {
1594 case _DSCP_CS1:
1595 tc = MBUF_TC_BK;
1596 break;
1597 case _DSCP_DF:
1598 case _DSCP_AF21:
1599 case _DSCP_CS2:
1600 tc = MBUF_TC_BE;
1601 break;
1602 case _DSCP_AF31:
1603 case _DSCP_CS4:
1604 case _DSCP_AF41:
1605 case _DSCP_CS5:
1606 tc = MBUF_TC_VI;
1607 break;
1608 case _DSCP_EF:
1609 case _DSCP_CS6:
1610 tc = MBUF_TC_VO;
1611 break;
1612 default:
1613 tc = MBUF_TC_BE;
1614 break;
1615 }
1616
1617 return tc;
1618}
1619
1620/*
1621 * Pass NULL ifp for default map
1622 */
1623static errno_t
1624set_netsvctype_dscp_map(struct net_qos_dscp_map *net_qos_dscp_map,
1625 const struct netsvctype_dscp_map *netsvctype_dscp_map)
1626{
1627 size_t i;
1628 int netsvctype;
1629
1630 /*
1631 * Do not accept more that max number of distinct DSCPs
1632 */
1633 if (net_qos_dscp_map == NULL || netsvctype_dscp_map == NULL) {
1634 return EINVAL;
1635 }
1636
1637 /*
1638 * Validate input parameters
1639 */
1640 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1641 if (!IS_VALID_NET_SERVICE_TYPE(netsvctype_dscp_map[i].netsvctype)) {
1642 return EINVAL;
1643 }
1644 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1645 return EINVAL;
1646 }
1647 }
1648
1649 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1650 netsvctype = netsvctype_dscp_map[i].netsvctype;
1651
1652 net_qos_dscp_map->netsvctype_to_dscp[netsvctype] =
1653 netsvctype_dscp_map[i].dscp;
1654 }
1655 for (netsvctype = 0; netsvctype < _NET_SERVICE_TYPE_COUNT; netsvctype++) {
1656 switch (netsvctype) {
1657 case NET_SERVICE_TYPE_BE:
1658 case NET_SERVICE_TYPE_BK:
1659 case NET_SERVICE_TYPE_VI:
1660 case NET_SERVICE_TYPE_VO:
1661 case NET_SERVICE_TYPE_RV:
1662 case NET_SERVICE_TYPE_AV:
1663 case NET_SERVICE_TYPE_OAM:
1664 case NET_SERVICE_TYPE_RD: {
1665 size_t sotcix;
1666
1667 sotcix = sotc_index(sotc: sotc_by_netservicetype[netsvctype]);
1668 if (sotcix != SIZE_T_MAX) {
1669 net_qos_dscp_map->sotc_to_dscp[sotcix] =
1670 netsvctype_dscp_map[netsvctype].dscp;
1671 }
1672 break;
1673 }
1674 case NET_SERVICE_TYPE_SIG:
1675 /* Signaling does not have its own traffic class */
1676 break;
1677 default:
1678 /* We should not be here */
1679 ASSERT(0);
1680 }
1681 }
1682 if (net_qos_dscp_map == &fastlane_net_qos_dscp_map) {
1683 /* Network control socket traffic class is always best effort for fastlane*/
1684 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_DF;
1685 } else {
1686 net_qos_dscp_map->sotc_to_dscp[SOTCIX_CTL] = _DSCP_CS6;
1687 }
1688
1689 /* Background system socket traffic class DSCP same as background */
1690 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK_SYS] =
1691 net_qos_dscp_map->sotc_to_dscp[SOTCIX_BK];
1692
1693 return 0;
1694}
1695
1696static size_t
1697get_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map)
1698{
1699 struct net_qos_dscp_map *net_qos_dscp_map;
1700 int i;
1701
1702 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1703
1704 for (i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
1705 netsvctype_dscp_map[i].netsvctype = i;
1706 netsvctype_dscp_map[i].dscp = net_qos_dscp_map->netsvctype_to_dscp[i];
1707 }
1708
1709 return i * sizeof(struct netsvctype_dscp_map);
1710}
1711
1712void
1713net_qos_map_init()
1714{
1715 errno_t error;
1716
1717 error = set_netsvctype_dscp_map(net_qos_dscp_map: &fastlane_net_qos_dscp_map,
1718 netsvctype_dscp_map: fastlane_netsvctype_dscp_map);
1719 ASSERT(error == 0);
1720
1721 error = set_netsvctype_dscp_map(net_qos_dscp_map: &rfc4594_net_qos_dscp_map,
1722 netsvctype_dscp_map: rfc4594_netsvctype_dscp_map);
1723 ASSERT(error == 0);
1724
1725#if (DEBUG || DEVELOPMENT)
1726 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
1727 rfc4594_netsvctype_dscp_map);
1728 ASSERT(error == 0);
1729
1730#endif /* (DEBUG || DEVELOPMENT) */
1731
1732 set_dscp_to_wifi_ac_map(default_dscp_to_wifi_ac_map, 1);
1733}
1734
1735int
1736sysctl_default_netsvctype_to_dscp_map SYSCTL_HANDLER_ARGS
1737{
1738#pragma unused(oidp, arg1, arg2)
1739 int error = 0;
1740
1741 if (req->oldptr == USER_ADDR_NULL) {
1742 req->oldidx =
1743 _NET_SERVICE_TYPE_COUNT * sizeof(struct netsvctype_dscp_map);
1744 } else if (req->oldlen > 0) {
1745 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
1746 size_t len;
1747
1748 len = get_netsvctype_dscp_map(netsvctype_dscp_map);
1749
1750 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1751 MIN(len, req->oldlen));
1752 if (error != 0) {
1753 goto done;
1754 }
1755 }
1756
1757 if (req->newptr != USER_ADDR_NULL) {
1758 error = EPERM;
1759 }
1760done:
1761 return error;
1762}
1763
1764__private_extern__ errno_t
1765set_packet_qos(struct mbuf *m, struct ifnet *ifp, boolean_t qos_allowed,
1766 int sotc, int netsvctype, uint8_t *dscp_inout)
1767{
1768 if (ifp == NULL || dscp_inout == NULL) {
1769 return EINVAL;
1770 }
1771
1772 if ((ifp->if_eflags & IFEF_QOSMARKING_ENABLED) != 0 &&
1773 ifp->if_qosmarking_mode != IFRTYPE_QOSMARKING_MODE_NONE) {
1774 uint8_t dscp;
1775 const struct net_qos_dscp_map *net_qos_dscp_map = NULL;
1776
1777 switch (ifp->if_qosmarking_mode) {
1778 case IFRTYPE_QOSMARKING_FASTLANE:
1779 net_qos_dscp_map = &fastlane_net_qos_dscp_map;
1780 break;
1781 case IFRTYPE_QOSMARKING_RFC4594:
1782 net_qos_dscp_map = &rfc4594_net_qos_dscp_map;
1783 break;
1784#if (DEBUG || DEVELOPMENT)
1785 case IFRTYPE_QOSMARKING_CUSTOM:
1786 net_qos_dscp_map = &custom_net_qos_dscp_map;
1787 break;
1788#endif /* (DEBUG || DEVELOPMENT) */
1789 default:
1790 panic("invalid QoS marking type");
1791 /* NOTREACHED */
1792 }
1793
1794 /*
1795 * When on a Fastlane network, IP_TOS/IPV6_TCLASS are no-ops
1796 */
1797 dscp = _DSCP_DF;
1798
1799 /*
1800 * For DSCP use the network service type is specified, otherwise
1801 * use the socket traffic class
1802 *
1803 * When not whitelisted by the policy, set DSCP only for best
1804 * effort and background, and set the mbuf service class to
1805 * best effort as well so the packet will be queued and
1806 * scheduled at a lower priority.
1807 * We still want to prioritize control traffic on the interface
1808 * so we do not change the mbuf service class for SO_TC_CTL
1809 */
1810 if (IS_VALID_NET_SERVICE_TYPE(netsvctype) &&
1811 netsvctype != NET_SERVICE_TYPE_BE) {
1812 dscp = net_qos_dscp_map->netsvctype_to_dscp[netsvctype];
1813
1814 if (qos_allowed == FALSE &&
1815 netsvctype != NET_SERVICE_TYPE_BE &&
1816 netsvctype != NET_SERVICE_TYPE_BK) {
1817 dscp = _DSCP_DF;
1818 if (sotc != SO_TC_CTL) {
1819 m_set_service_class(m, MBUF_SC_BE);
1820 }
1821 }
1822 } else if (sotc != SO_TC_UNSPEC) {
1823 size_t sotcix = sotc_index(sotc);
1824 if (sotcix != SIZE_T_MAX) {
1825 dscp = net_qos_dscp_map->sotc_to_dscp[sotcix];
1826
1827 if (qos_allowed == FALSE && sotc != SO_TC_BE &&
1828 sotc != SO_TC_BK && sotc != SO_TC_BK_SYS &&
1829 sotc != SO_TC_CTL) {
1830 dscp = _DSCP_DF;
1831 if (sotc != SO_TC_CTL) {
1832 m_set_service_class(m, MBUF_SC_BE);
1833 }
1834 }
1835 }
1836 }
1837 if (net_qos_verbose != 0) {
1838 printf("%s qos_allowed %d sotc %u netsvctype %u dscp %u\n",
1839 __func__, qos_allowed, sotc, netsvctype, dscp);
1840 }
1841
1842 if (*dscp_inout != dscp) {
1843 *dscp_inout = dscp;
1844 }
1845 } else if (*dscp_inout != _DSCP_DF && IFNET_IS_WIFI_INFRA(ifp)) {
1846 mbuf_svc_class_t msc = m_get_service_class(m);
1847
1848 /*
1849 * For WiFi infra, when the mbuf service class is best effort
1850 * and the DSCP is not default, set the service class based
1851 * on DSCP
1852 */
1853 if (msc == MBUF_SC_BE) {
1854 msc = wifi_dscp_to_msc_array[*dscp_inout];
1855
1856 if (msc != MBUF_SC_BE) {
1857 m_set_service_class(m, msc);
1858
1859 if (net_qos_verbose != 0) {
1860 printf("%s set msc %u for dscp %u\n",
1861 __func__, msc, *dscp_inout);
1862 }
1863 }
1864 }
1865 }
1866
1867 return 0;
1868}
1869
1870static void
1871set_dscp_to_wifi_ac_map(const struct dcsp_msc_map *map, int clear)
1872{
1873 int i;
1874
1875 if (clear) {
1876 bzero(s: wifi_dscp_to_msc_array, n: sizeof(wifi_dscp_to_msc_array));
1877 }
1878
1879 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1880 const struct dcsp_msc_map *elem = map + i;
1881
1882 if (elem->dscp > _MAX_DSCP || elem->msc == MBUF_SC_UNSPEC) {
1883 break;
1884 }
1885 switch (elem->msc) {
1886 case MBUF_SC_BK_SYS:
1887 case MBUF_SC_BK:
1888 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BK;
1889 break;
1890 default:
1891 case MBUF_SC_BE:
1892 case MBUF_SC_RD:
1893 case MBUF_SC_OAM:
1894 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_BE;
1895 break;
1896 case MBUF_SC_AV:
1897 case MBUF_SC_RV:
1898 case MBUF_SC_VI:
1899 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VI;
1900 break;
1901 case MBUF_SC_VO:
1902 case MBUF_SC_CTL:
1903 wifi_dscp_to_msc_array[elem->dscp] = MBUF_SC_VO;
1904 break;
1905 }
1906 }
1907}
1908
1909static errno_t
1910dscp_msc_map_from_netsvctype_dscp_map(struct netsvctype_dscp_map *netsvctype_dscp_map,
1911 size_t count, struct dcsp_msc_map *dcsp_msc_map)
1912{
1913 errno_t error = 0;
1914 uint32_t i;
1915
1916 /*
1917 * Validate input parameters
1918 */
1919 for (i = 0; i < count; i++) {
1920 if (!SO_VALID_TC(netsvctype_dscp_map[i].netsvctype)) {
1921 error = EINVAL;
1922 goto done;
1923 }
1924 if (netsvctype_dscp_map[i].dscp > _MAX_DSCP) {
1925 error = EINVAL;
1926 goto done;
1927 }
1928 }
1929
1930 bzero(s: dcsp_msc_map, DSCP_ARRAY_SIZE * sizeof(struct dcsp_msc_map));
1931
1932 for (i = 0; i < count; i++) {
1933 dcsp_msc_map[i].dscp = netsvctype_dscp_map[i].dscp;
1934 dcsp_msc_map[i].msc = so_tc2msc(tc: netsvctype_dscp_map[i].netsvctype);
1935 }
1936done:
1937 return error;
1938}
1939
1940int
1941sysctl_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
1942{
1943#pragma unused(oidp, arg1, arg2)
1944 int error = 0;
1945 size_t len = DSCP_ARRAY_SIZE * sizeof(struct netsvctype_dscp_map);
1946 struct netsvctype_dscp_map netsvctype_dscp_map[DSCP_ARRAY_SIZE] = {};
1947 struct dcsp_msc_map dcsp_msc_map[DSCP_ARRAY_SIZE];
1948 size_t count;
1949
1950 if (req->oldptr == USER_ADDR_NULL) {
1951 req->oldidx = len;
1952 } else if (req->oldlen > 0) {
1953 uint8_t i;
1954
1955 for (i = 0; i < DSCP_ARRAY_SIZE; i++) {
1956 netsvctype_dscp_map[i].dscp = i;
1957 netsvctype_dscp_map[i].netsvctype =
1958 so_svc2tc(svc: wifi_dscp_to_msc_array[i]);
1959 }
1960 error = SYSCTL_OUT(req, netsvctype_dscp_map,
1961 MIN(len, req->oldlen));
1962 if (error != 0) {
1963 goto done;
1964 }
1965 }
1966
1967 if (req->newptr == USER_ADDR_NULL) {
1968 goto done;
1969 }
1970
1971 error = proc_suser(p: current_proc());
1972 if (error != 0) {
1973 goto done;
1974 }
1975
1976 /*
1977 * Check input length
1978 */
1979 if (req->newlen > len) {
1980 error = EINVAL;
1981 goto done;
1982 }
1983 /*
1984 * Cap the number of entries to copy from input buffer
1985 */
1986 if (len > req->newlen) {
1987 len = req->newlen;
1988 }
1989 error = SYSCTL_IN(req, netsvctype_dscp_map, len);
1990 if (error != 0) {
1991 goto done;
1992 }
1993 count = len / sizeof(struct netsvctype_dscp_map);
1994 bzero(s: dcsp_msc_map, n: sizeof(dcsp_msc_map));
1995 error = dscp_msc_map_from_netsvctype_dscp_map(netsvctype_dscp_map, count,
1996 dcsp_msc_map);
1997 if (error != 0) {
1998 goto done;
1999 }
2000 set_dscp_to_wifi_ac_map(map: dcsp_msc_map, clear: 0);
2001done:
2002 return error;
2003}
2004
2005int
2006sysctl_reset_dscp_to_wifi_ac_map SYSCTL_HANDLER_ARGS
2007{
2008#pragma unused(oidp, arg1, arg2)
2009 int error = 0;
2010 int val = 0;
2011
2012 error = sysctl_handle_int(oidp, arg1: &val, arg2: 0, req);
2013 if (error || !req->newptr) {
2014 return error;
2015 }
2016 if (req->newptr == USER_ADDR_NULL) {
2017 return 0;
2018 }
2019 error = proc_suser(p: current_proc());
2020 if (error != 0) {
2021 return error;
2022 }
2023
2024 set_dscp_to_wifi_ac_map(map: default_dscp_to_wifi_ac_map, clear: 1);
2025
2026 return 0;
2027}
2028
2029/*
2030 * Returns whether a large upload or download transfer should be marked as
2031 * BK service type for network activity. This is a system level
2032 * hint/suggestion to classify application traffic based on statistics
2033 * collected from the current network attachment
2034 *
2035 * Returns 1 for BK and 0 for default
2036 */
2037
2038int
2039net_qos_guideline(struct proc *p, struct net_qos_guideline_args *arg,
2040 int *retval)
2041{
2042#pragma unused(p)
2043#define RETURN_USE_BK 1
2044#define RETURN_USE_DEFAULT 0
2045 struct net_qos_param qos_arg;
2046 struct ifnet *ipv4_primary, *ipv6_primary;
2047 int err = 0;
2048
2049 if (arg->param == USER_ADDR_NULL || retval == NULL ||
2050 arg->param_len != sizeof(qos_arg)) {
2051 return EINVAL;
2052 }
2053 err = copyin(arg->param, (caddr_t) &qos_arg, sizeof(qos_arg));
2054 if (err != 0) {
2055 return err;
2056 }
2057
2058 *retval = RETURN_USE_DEFAULT;
2059 ipv4_primary = ifindex2ifnet[get_primary_ifscope(AF_INET)];
2060 ipv6_primary = ifindex2ifnet[get_primary_ifscope(AF_INET6)];
2061
2062 /*
2063 * If either of the interfaces is in Low Internet mode, enable
2064 * background delay based algorithms on this transfer
2065 */
2066 if (qos_arg.nq_uplink) {
2067 if ((ipv4_primary != NULL &&
2068 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_UL)) ||
2069 (ipv6_primary != NULL &&
2070 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_UL))) {
2071 *retval = RETURN_USE_BK;
2072 return 0;
2073 }
2074 } else {
2075 if ((ipv4_primary != NULL &&
2076 (ipv4_primary->if_xflags & IFXF_LOW_INTERNET_DL)) ||
2077 (ipv6_primary != NULL &&
2078 (ipv6_primary->if_xflags & IFXF_LOW_INTERNET_DL))) {
2079 *retval = RETURN_USE_BK;
2080 return 0;
2081 }
2082 }
2083
2084 /*
2085 * Some times IPv4 and IPv6 primary interfaces can be different.
2086 * In this case, if either of them is non-cellular, we should mark
2087 * the transfer as BK as it can potentially get used based on
2088 * the host name resolution
2089 */
2090 if (ipv4_primary != NULL && IFNET_IS_EXPENSIVE(ipv4_primary) &&
2091 ipv6_primary != NULL && IFNET_IS_EXPENSIVE(ipv6_primary)) {
2092 if (qos_arg.nq_use_expensive) {
2093 return 0;
2094 } else {
2095 *retval = RETURN_USE_BK;
2096 return 0;
2097 }
2098 }
2099 if (ipv4_primary != NULL && IFNET_IS_CONSTRAINED(ipv4_primary) &&
2100 ipv6_primary != NULL && IFNET_IS_CONSTRAINED(ipv6_primary)) {
2101 if (qos_arg.nq_use_constrained) {
2102 return 0;
2103 } else {
2104 *retval = RETURN_USE_BK;
2105 return 0;
2106 }
2107 }
2108 if (qos_arg.nq_transfer_size >= 5 * 1024 * 1024) {
2109 *retval = RETURN_USE_BK;
2110 return 0;
2111 }
2112
2113
2114#undef RETURN_USE_BK
2115#undef RETURN_USE_DEFAULT
2116 return 0;
2117}
2118
2119#if (DEBUG || DEVELOPMENT)
2120/*
2121 * Customizable QoS mapping table
2122 * By default it uses the mapping table for RFC 4594
2123 *
2124 * Notes:
2125 * BK_SYS is the same as BK
2126 * CTL cannot be changed and is always _DSCP_CS6
2127 */
2128SYSCTL_NODE(_net_qos, OID_AUTO, custom,
2129 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2130
2131SYSCTL_NODE(_net_qos_custom, OID_AUTO, netsvctype_to_dscp,
2132 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
2133
2134static int sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS;
2135SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, be,
2136 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2137 0, NET_SERVICE_TYPE_BE, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2138SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, bk,
2139 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2140 0, NET_SERVICE_TYPE_BK, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2141SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, sig,
2142 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2143 0, NET_SERVICE_TYPE_SIG, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2144SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vi,
2145 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2146 0, NET_SERVICE_TYPE_VI, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2147SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, vo,
2148 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2149 0, NET_SERVICE_TYPE_VO, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2150SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rv,
2151 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2152 0, NET_SERVICE_TYPE_RV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2153SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, av,
2154 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2155 0, NET_SERVICE_TYPE_AV, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2156SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, oam,
2157 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2158 0, NET_SERVICE_TYPE_OAM, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2159SYSCTL_PROC(_net_qos_custom_netsvctype_to_dscp, OID_AUTO, rd,
2160 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2161 0, NET_SERVICE_TYPE_RD, sysctl_net_qos_custom_netsvctype_to_dscp, "I", "");
2162
2163static int sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS;
2164SYSCTL_PROC(_net_qos_custom, OID_AUTO, reset,
2165 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
2166 0, 0, sysctl_net_qos_custom_reset, "I", "");
2167
2168int
2169sysctl_net_qos_custom_netsvctype_to_dscp SYSCTL_HANDLER_ARGS
2170{
2171#pragma unused(arg1)
2172 int error = 0;
2173
2174 switch (arg2) {
2175 case NET_SERVICE_TYPE_BE:
2176 case NET_SERVICE_TYPE_BK:
2177 case NET_SERVICE_TYPE_SIG:
2178 case NET_SERVICE_TYPE_VI:
2179 case NET_SERVICE_TYPE_VO:
2180 case NET_SERVICE_TYPE_RV:
2181 case NET_SERVICE_TYPE_AV:
2182 case NET_SERVICE_TYPE_OAM:
2183 case NET_SERVICE_TYPE_RD:
2184 break;
2185 default:
2186 os_log(OS_LOG_DEFAULT, "%s: unexpected netsvctype %d",
2187 __func__, arg2);
2188 return EINVAL;
2189 }
2190
2191 int val = custom_net_qos_dscp_map.netsvctype_to_dscp[arg2];
2192 error = sysctl_handle_int(oidp, &val, 0, req);
2193 if (error != 0 || req->newptr == USER_ADDR_NULL) {
2194 return error;
2195 }
2196 if (req->newptr == USER_ADDR_NULL) {
2197 return 0;
2198 }
2199 error = proc_suser(current_proc());
2200 if (error != 0) {
2201 return error;
2202 }
2203 if (val < 0 || val > _MAX_DSCP) {
2204 os_log(OS_LOG_DEFAULT, "%s: unexpected DSCP %d",
2205 __func__, val);
2206 return EINVAL;
2207 }
2208
2209 struct netsvctype_dscp_map netsvctype_dscp_map[_NET_SERVICE_TYPE_COUNT] = {};
2210
2211 for (int i = 0; i < _NET_SERVICE_TYPE_COUNT; i++) {
2212 netsvctype_dscp_map[i].netsvctype = i;
2213 netsvctype_dscp_map[i].dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[i];
2214 }
2215 netsvctype_dscp_map[arg2].dscp = (uint8_t) val;
2216
2217 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2218 netsvctype_dscp_map);
2219
2220 return 0;
2221}
2222
2223int
2224sysctl_net_qos_custom_reset SYSCTL_HANDLER_ARGS
2225{
2226#pragma unused(arg1, arg2)
2227 int error = 0;
2228 int val = 0;
2229
2230 error = sysctl_handle_int(oidp, &val, 0, req);
2231 if (error || !req->newptr) {
2232 return error;
2233 }
2234 if (req->newptr == USER_ADDR_NULL) {
2235 return 0;
2236 }
2237 error = proc_suser(current_proc());
2238 if (error != 0) {
2239 return error;
2240 }
2241
2242 error = set_netsvctype_dscp_map(&custom_net_qos_dscp_map,
2243 rfc4594_netsvctype_dscp_map);
2244
2245 return error;
2246}
2247
2248uint8_t
2249custom_sc_to_dscp(uint32_t svc_class)
2250{
2251 uint8_t dscp = _DSCP_DF;
2252
2253 switch (svc_class) {
2254 case MBUF_SC_BK_SYS:
2255 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK_SYS];
2256 break;
2257 case MBUF_SC_BK:
2258 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BK];
2259 break;
2260
2261 case MBUF_SC_BE:
2262 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_BE];
2263 break;
2264 case MBUF_SC_RD:
2265 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RD];
2266 break;
2267 case MBUF_SC_OAM:
2268 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_OAM];
2269 break;
2270
2271 case MBUF_SC_AV:
2272 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_AV];
2273 break;
2274 case MBUF_SC_RV:
2275 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_RV];
2276 break;
2277 case MBUF_SC_VI:
2278 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VI];
2279 break;
2280 case MBUF_SC_SIG:
2281 dscp = custom_net_qos_dscp_map.netsvctype_to_dscp[NET_SERVICE_TYPE_SIG];
2282 break;
2283
2284 case MBUF_SC_VO:
2285 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_VO];
2286 break;
2287 case MBUF_SC_CTL:
2288 dscp = custom_net_qos_dscp_map.sotc_to_dscp[SOTCIX_CTL];
2289 break;
2290 default:
2291 break;
2292 }
2293 return dscp;
2294}
2295#endif /* (DEBUG || DEVELOPMENT) */
2296