1/*
2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/kernel.h>
32#include <sys/socket.h>
33#include <sys/socketvar.h>
34#include <sys/protosw.h>
35#include <sys/mcache.h>
36#include <sys/syslog.h>
37#include <sys/proc.h>
38#include <sys/proc_internal.h>
39#include <sys/resourcevar.h>
40#include <sys/kauth.h>
41#include <sys/priv.h>
42
43#include <net/if.h>
44#include <netinet/in.h>
45#include <netinet/in_var.h>
46#include <netinet/tcp.h>
47#include <netinet/tcp_fsm.h>
48#include <netinet/tcp_seq.h>
49#include <netinet/tcp_var.h>
50#include <netinet/tcp_timer.h>
51#include <netinet/mptcp.h>
52#include <netinet/mptcp_var.h>
53#include <netinet/mptcp_timer.h>
54
55#include <mach/sdt.h>
56#include <net/sockaddr_utils.h>
57
58static int mptcp_usr_attach(struct socket *, int, struct proc *);
59static int mptcp_usr_detach(struct socket *);
60static int mptcp_attach(struct socket *, struct proc *);
61static int mptcp_usr_connectx(struct socket *, struct sockaddr *,
62 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
63 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
64static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
65static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
66 user_addr_t);
67static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
68 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
69 uint32_t *, user_addr_t, uint32_t *);
70static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
71 struct proc *);
72static int mptcp_disconnect(struct mptses *);
73static int mptcp_usr_disconnect(struct socket *);
74static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
75static struct mptses *mptcp_usrclosed(struct mptses *);
76static int mptcp_usr_rcvd(struct socket *, int);
77static int mptcp_usr_send(struct socket *, int, struct mbuf *,
78 struct sockaddr *, struct mbuf *, struct proc *);
79static int mptcp_usr_shutdown(struct socket *);
80static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
81 struct mbuf *, struct mbuf *, int);
82static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
83static int mptcp_usr_preconnect(struct socket *so);
84
85struct pr_usrreqs mptcp_usrreqs = {
86 .pru_attach = mptcp_usr_attach,
87 .pru_connectx = mptcp_usr_connectx,
88 .pru_control = mptcp_usr_control,
89 .pru_detach = mptcp_usr_detach,
90 .pru_disconnect = mptcp_usr_disconnect,
91 .pru_disconnectx = mptcp_usr_disconnectx,
92 .pru_peeraddr = mp_getpeeraddr,
93 .pru_rcvd = mptcp_usr_rcvd,
94 .pru_send = mptcp_usr_send,
95 .pru_shutdown = mptcp_usr_shutdown,
96 .pru_sockaddr = mp_getsockaddr,
97 .pru_sosend = mptcp_usr_sosend,
98 .pru_soreceive = soreceive,
99 .pru_socheckopt = mptcp_usr_socheckopt,
100 .pru_preconnect = mptcp_usr_preconnect,
101};
102
103
104int mptcp_developer_mode = 0;
105SYSCTL_INT(_net_inet_mptcp, OID_AUTO, allow_aggregate, CTLFLAG_RW | CTLFLAG_LOCKED,
106 &mptcp_developer_mode, 0, "Allow the Multipath aggregation mode");
107
108int mptcp_no_first_party = 0;
109SYSCTL_INT(_net_inet_mptcp, OID_AUTO, no_first_party, CTLFLAG_RW | CTLFLAG_LOCKED,
110 &mptcp_no_first_party, 0, "Do not do first-party app exemptions");
111
112static unsigned long mptcp_expected_progress_headstart = 5000;
113SYSCTL_ULONG(_net_inet_mptcp, OID_AUTO, expected_progress_headstart, CTLFLAG_RW | CTLFLAG_LOCKED,
114 &mptcp_expected_progress_headstart, "Headstart to give MPTCP before meeting the progress deadline");
115
116
117/*
118 * Attaches an MPTCP control block to a socket.
119 */
120static int
121mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
122{
123#pragma unused(proto)
124 int error;
125
126 VERIFY(mpsotomppcb(mp_so) == NULL);
127
128 error = mptcp_attach(mp_so, p);
129 if (error) {
130 goto out;
131 }
132
133 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0) {
134 mp_so->so_linger = (short)(TCP_LINGERTIME * hz);
135 }
136out:
137 return error;
138}
139
140/*
141 * Detaches an MPTCP control block from a socket.
142 */
143static int
144mptcp_usr_detach(struct socket *mp_so)
145{
146 struct mptses *mpte = mpsotompte(so: mp_so);
147 struct mppcb *mpp = mpsotomppcb(mp_so);
148
149 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
150 os_log_error(mptcp_log_handle, "%s - %lx: state: %d\n",
151 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
152 mpp ? mpp->mpp_state : -1);
153 return EINVAL;
154 }
155
156 /*
157 * We are done with this MPTCP socket (it has been closed);
158 * trigger all subflows to be disconnected, if not already,
159 * by initiating the PCB detach sequence (SOF_PCBCLEARING
160 * will be set.)
161 */
162 mp_pcbdetach(mp_so);
163
164 mptcp_disconnect(mpte);
165
166 return 0;
167}
168
169/*
170 * Attach MPTCP protocol to socket, allocating MP control block,
171 * MPTCP session, control block, buffer space, etc.
172 */
173static int
174mptcp_attach(struct socket *mp_so, struct proc *p)
175{
176#pragma unused(p)
177 struct mptses *mpte = NULL;
178 struct mptcb *mp_tp = NULL;
179 struct mppcb *mpp = NULL;
180 int error = 0;
181
182 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
183 error = soreserve(so: mp_so, sndcc: tcp_sendspace, rcvcc: tcp_recvspace);
184 if (error != 0) {
185 goto out;
186 }
187 }
188
189 if (mp_so->so_snd.sb_preconn_hiwat == 0) {
190 soreserve_preconnect(so: mp_so, pre_cc: 2048);
191 }
192
193 if ((mp_so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
194 mp_so->so_rcv.sb_flags |= SB_AUTOSIZE;
195 }
196 if ((mp_so->so_snd.sb_flags & SB_USRSIZE) == 0) {
197 mp_so->so_snd.sb_flags |= SB_AUTOSIZE;
198 }
199
200 /*
201 * MPTCP send-socket buffers cannot be compressed, due to the
202 * fact that each mbuf chained via m_next is a M_PKTHDR
203 * which carries some MPTCP metadata.
204 */
205 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
206
207 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
208 goto out;
209 }
210
211 mpp = mpsotomppcb(mp_so);
212 mpte = (struct mptses *)mpp->mpp_pcbe;
213 mp_tp = mpte->mpte_mptcb;
214
215 VERIFY(mp_tp != NULL);
216out:
217 return error;
218}
219
220static int
221mptcp_entitlement_check(struct socket *mp_so, uint8_t svctype)
222{
223 struct mptses *mpte = mpsotompte(so: mp_so);
224
225 if (mptcp_no_first_party) {
226 return 0;
227 }
228
229 /* First, check for mptcp_extended without delegation */
230 if (soopt_cred_check(so: mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, FALSE) == 0) {
231 /*
232 * This means the app has the extended entitlement. Thus,
233 * it's a first party app and can run without restrictions.
234 */
235 mpte->mpte_flags |= MPTE_FIRSTPARTY;
236 return 0;
237 }
238
239 /* Now with delegation */
240 if (mp_so->so_flags & SOF_DELEGATED &&
241 soopt_cred_check(so: mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, TRUE) == 0) {
242 /*
243 * This means the app has the extended entitlement. Thus,
244 * it's a first party app and can run without restrictions.
245 */
246 mpte->mpte_flags |= MPTE_FIRSTPARTY;
247 return 0;
248 }
249
250 if (svctype == MPTCP_SVCTYPE_AGGREGATE) {
251 if (mptcp_developer_mode) {
252 return 0;
253 }
254
255 os_log_error(mptcp_log_handle, "%s - %lx: MPTCP prohibited on svc %u\n",
256 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
257 return -1;
258 }
259
260 return 0;
261}
262
263/*
264 * Common subroutine to open a MPTCP connection to one of the remote hosts
265 * specified by dst_sl. This includes allocating and establishing a
266 * subflow TCP connection, either initially to establish MPTCP connection,
267 * or to join an existing one. Returns a connection handle upon success.
268 */
269static int
270mptcp_connectx(struct mptses *mpte, struct sockaddr *src,
271 struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
272{
273 int error = 0;
274
275 VERIFY(dst != NULL);
276 VERIFY(pcid != NULL);
277
278 error = mptcp_subflow_add(mpte, src, dst, ifscope, pcid);
279
280 return error;
281}
282
283/*
284 * User-protocol pru_connectx callback.
285 */
286static int
287mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src,
288 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
289 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
290 uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
291{
292#pragma unused(p, aid, flags, arg, arglen)
293 struct mppcb *mpp = mpsotomppcb(mp_so);
294 struct mptses *mpte = NULL;
295 struct mptcb *mp_tp = NULL;
296 user_ssize_t datalen;
297 int error = 0;
298
299 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
300 os_log_error(mptcp_log_handle, "%s - %lx: state %d\n",
301 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
302 mpp ? mpp->mpp_state : -1);
303 error = EINVAL;
304 goto out;
305 }
306 mpte = mptompte(mp: mpp);
307 mp_tp = mpte->mpte_mptcb;
308
309 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
310 os_log_error(mptcp_log_handle, "%s - %lx: fell back to TCP\n",
311 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
312 error = EINVAL;
313 goto out;
314 }
315
316 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
317 error = EAFNOSUPPORT;
318 goto out;
319 }
320
321 if (dst->sa_family == AF_INET &&
322 dst->sa_len != sizeof(mpte->__mpte_dst_v4)) {
323 os_log_error(mptcp_log_handle, "%s - %lx: IPv4 dst len %u\n",
324 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
325 error = EINVAL;
326 goto out;
327 }
328
329 if (dst->sa_family == AF_INET6 &&
330 dst->sa_len != sizeof(mpte->__mpte_dst_v6)) {
331 os_log_error(mptcp_log_handle, "%s - %lx: IPv6 dst len %u\n",
332 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
333 error = EINVAL;
334 goto out;
335 }
336
337 if (!(mpte->mpte_flags & MPTE_SVCTYPE_CHECKED)) {
338 if (mptcp_entitlement_check(mp_so, svctype: mpte->mpte_svctype) < 0) {
339 error = EPERM;
340 goto out;
341 }
342
343 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
344 }
345
346 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
347 SOCKADDR_COPY(dst, &mpte->mpte_dst, dst->sa_len);
348
349 if (dst->sa_family == AF_INET) {
350 SOCKADDR_COPY(dst, &mpte->mpte_sub_dst_v4, dst->sa_len);
351 } else {
352 SOCKADDR_COPY(dst, &mpte->mpte_sub_dst_v6, dst->sa_len);
353 }
354 }
355
356 if (src) {
357 if (src->sa_family != AF_INET && src->sa_family != AF_INET6) {
358 error = EAFNOSUPPORT;
359 goto out;
360 }
361
362 if (src->sa_family == AF_INET &&
363 src->sa_len != sizeof(mpte->__mpte_src_v4)) {
364 os_log_error(mptcp_log_handle, "%s - %lx: IPv4 src len %u\n",
365 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
366 error = EINVAL;
367 goto out;
368 }
369
370 if (src->sa_family == AF_INET6 &&
371 src->sa_len != sizeof(mpte->__mpte_src_v6)) {
372 os_log_error(mptcp_log_handle, "%s - %lx: IPv6 src len %u\n",
373 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
374 error = EINVAL;
375 goto out;
376 }
377
378 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
379 SOCKADDR_COPY(src, &mpte->mpte_src, src->sa_len);
380 }
381 }
382
383 error = mptcp_connectx(mpte, src, dst, ifscope, pcid);
384
385 /* If there is data, copy it */
386 if (auio != NULL) {
387 datalen = uio_resid(a_uio: auio);
388 socket_unlock(so: mp_so, refcount: 0);
389 error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
390 (uio_t) auio, NULL, NULL, 0);
391
392 if (error == 0 || error == EWOULDBLOCK) {
393 *bytes_written = datalen - uio_resid(a_uio: auio);
394 }
395
396 if (error == EWOULDBLOCK) {
397 error = EINPROGRESS;
398 }
399
400 socket_lock(so: mp_so, refcount: 0);
401 }
402
403out:
404 return error;
405}
406
407/*
408 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
409 */
410static int
411mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
412{
413 /* MPTCP has at most 1 association */
414 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
415
416 /* just asking how many there are? */
417 if (aidp == USER_ADDR_NULL) {
418 return 0;
419 }
420
421 return copyout(&mpte->mpte_associd, aidp,
422 sizeof(mpte->mpte_associd));
423}
424
425/*
426 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
427 */
428static int
429mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
430 user_addr_t cidp)
431{
432 struct mptsub *mpts;
433 int error = 0;
434
435 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
436 aid != mpte->mpte_associd) {
437 return EINVAL;
438 }
439
440 *cnt = mpte->mpte_numflows;
441
442 /* just asking how many there are? */
443 if (cidp == USER_ADDR_NULL) {
444 return 0;
445 }
446
447 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
448 if ((error = copyout(&mpts->mpts_connid, cidp,
449 sizeof(mpts->mpts_connid))) != 0) {
450 break;
451 }
452
453 cidp += sizeof(mpts->mpts_connid);
454 }
455
456 return error;
457}
458
459/*
460 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
461 */
462static int
463mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
464 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
465 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
466 user_addr_t aux_data, uint32_t *aux_len)
467{
468 *flags = 0;
469 *aux_type = 0;
470 *ifindex = 0;
471 *soerror = 0;
472 struct mptcb *mp_tp = mpte->mpte_mptcb;
473
474 /* MPTCP-level global stats */
475 if (*cid == SAE_CONNID_ALL) {
476 struct socket *mp_so = mptetoso(mpte);
477 struct conninfo_multipathtcp mptcp_ci;
478 int error = 0;
479
480 if (*aux_len != 0 && *aux_len != sizeof(mptcp_ci)) {
481 return EINVAL;
482 }
483
484 if (mp_so->so_state & SS_ISCONNECTING) {
485 *flags |= CIF_CONNECTING;
486 }
487 if (mp_so->so_state & SS_ISCONNECTED) {
488 *flags |= CIF_CONNECTED;
489 }
490 if (mp_so->so_state & SS_ISDISCONNECTING) {
491 *flags |= CIF_DISCONNECTING;
492 }
493 if (mp_so->so_state & SS_ISDISCONNECTED) {
494 *flags |= CIF_DISCONNECTED;
495 }
496 if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) {
497 *flags |= CIF_MP_CAPABLE;
498 }
499 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
500 *flags |= CIF_MP_DEGRADED;
501 }
502 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
503 *flags |= CIF_MP_V1;
504 }
505
506 *src_len = 0;
507 *dst_len = 0;
508
509 *aux_type = CIAUX_MPTCP;
510 *aux_len = sizeof(mptcp_ci);
511
512 if (aux_data != USER_ADDR_NULL) {
513 const struct mptsub *mpts;
514 int initial_info_set = 0;
515 unsigned long i = 0;
516
517 bzero(s: &mptcp_ci, n: sizeof(mptcp_ci));
518 mptcp_ci.mptcpci_subflow_count = mpte->mpte_numflows;
519 mptcp_ci.mptcpci_switch_count = mpte->mpte_subflow_switches;
520
521 VERIFY(sizeof(mptcp_ci.mptcpci_itfstats) == sizeof(mpte->mpte_itfstats));
522 memcpy(dst: mptcp_ci.mptcpci_itfstats, src: mpte->mpte_itfstats, n: sizeof(mptcp_ci.mptcpci_itfstats));
523
524 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
525 if (i >= sizeof(mptcp_ci.mptcpci_subflow_connids) / sizeof(sae_connid_t)) {
526 break;
527 }
528 mptcp_ci.mptcpci_subflow_connids[i] = mpts->mpts_connid;
529
530 if (mpts->mpts_flags & MPTSF_INITIAL_SUB) {
531 const struct inpcb *inp;
532
533 inp = sotoinpcb(mpts->mpts_socket);
534
535 mptcp_ci.mptcpci_init_rxbytes = inp->inp_stat->rxbytes;
536 mptcp_ci.mptcpci_init_txbytes = inp->inp_stat->txbytes;
537 initial_info_set = 1;
538 }
539
540 mptcpstats_update(stats: mptcp_ci.mptcpci_itfstats, mpts);
541
542 i++;
543 }
544
545 if (initial_info_set == 0) {
546 mptcp_ci.mptcpci_init_rxbytes = mpte->mpte_init_rxbytes;
547 mptcp_ci.mptcpci_init_txbytes = mpte->mpte_init_txbytes;
548 }
549
550 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
551 mptcp_ci.mptcpci_flags |= MPTCPCI_FIRSTPARTY;
552 }
553
554 error = copyout(&mptcp_ci, aux_data, sizeof(mptcp_ci));
555 if (error != 0) {
556 os_log_error(mptcp_log_handle, "%s - %lx: copyout failed: %d\n",
557 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
558 return error;
559 }
560 }
561
562 return 0;
563 }
564
565 /* Any stats of any subflow */
566 if (*cid == SAE_CONNID_ANY) {
567 const struct mptsub *mpts;
568 struct socket *so;
569 const struct inpcb *inp;
570 int error = 0;
571
572 mpts = TAILQ_FIRST(&mpte->mpte_subflows);
573 if (mpts == NULL) {
574 return ENXIO;
575 }
576
577 so = mpts->mpts_socket;
578 inp = sotoinpcb(so);
579
580 if (inp->inp_vflag & INP_IPV4) {
581 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
582 soerror, src, src_len, dst, dst_len,
583 aux_type, aux_data, aux_len);
584 } else {
585 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
586 soerror, src, src_len, dst, dst_len,
587 aux_type, aux_data, aux_len);
588 }
589
590 if (error != 0) {
591 os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
592 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
593 return error;
594 }
595
596 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
597 *flags |= CIF_MP_CAPABLE;
598 }
599 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
600 *flags |= CIF_MP_DEGRADED;
601 }
602 if (mpts->mpts_flags & MPTSF_MP_READY) {
603 *flags |= CIF_MP_READY;
604 }
605 if (mpts->mpts_flags & MPTSF_ACTIVE) {
606 *flags |= CIF_MP_ACTIVE;
607 }
608 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
609 *flags |= CIF_MP_V1;
610 }
611
612 return 0;
613 } else {
614 /* Per-interface stats */
615 const struct mptsub *mpts, *orig_mpts = NULL;
616 struct conninfo_tcp tcp_ci;
617 const struct inpcb *inp;
618 struct socket *so;
619 int error = 0;
620 int index;
621
622 /* cid is thus an ifindex - range-check first! */
623 if (*cid > USHRT_MAX) {
624 return EINVAL;
625 }
626
627 bzero(s: &tcp_ci, n: sizeof(tcp_ci));
628
629 /* First, get a subflow to fill in the "regular" info. */
630 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
631 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
632
633 if (ifp && ifp->if_index == *cid) {
634 break;
635 }
636 }
637
638 if (mpts == NULL) {
639 /* No subflow there - well, let's just get the basic itf-info */
640 goto interface_info;
641 }
642
643 so = mpts->mpts_socket;
644 inp = sotoinpcb(so);
645
646 /* Give it USER_ADDR_NULL, because we are doing this on our own */
647 if (inp->inp_vflag & INP_IPV4) {
648 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
649 soerror, src, src_len, dst, dst_len,
650 aux_type, USER_ADDR_NULL, aux_len);
651 } else {
652 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
653 soerror, src, src_len, dst, dst_len,
654 aux_type, USER_ADDR_NULL, aux_len);
655 }
656
657 if (error != 0) {
658 os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
659 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
660 return error;
661 }
662
663 /* ToDo: Nobody is reading these flags on subflows. Why bother ? */
664 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
665 *flags |= CIF_MP_CAPABLE;
666 }
667 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
668 *flags |= CIF_MP_DEGRADED;
669 }
670 if (mpts->mpts_flags & MPTSF_MP_READY) {
671 *flags |= CIF_MP_READY;
672 }
673 if (mpts->mpts_flags & MPTSF_ACTIVE) {
674 *flags |= CIF_MP_ACTIVE;
675 }
676 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
677 *flags |= CIF_MP_V1;
678 }
679
680 /*
681 * Now, we gather the metrics (aka., tcp_info) and roll them in
682 * across all subflows of this interface to build an aggregated
683 * view.
684 *
685 * We take the TCP_INFO from the first subflow as the "master",
686 * feeding into those fields that we do not roll.
687 */
688 if (aux_data != USER_ADDR_NULL) {
689 tcp_getconninfo(so, &tcp_ci);
690
691 orig_mpts = mpts;
692 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
693 const struct inpcb *mptsinp = sotoinpcb(mpts->mpts_socket);
694 const struct ifnet *ifp;
695
696 ifp = mptsinp->inp_last_outifp;
697
698 if (ifp == NULL || ifp->if_index != *cid || mpts == orig_mpts) {
699 continue;
700 }
701
702 /* Roll the itf-stats into the tcp_info */
703 tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
704 mptsinp->inp_stat->txbytes;
705 tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
706 mptsinp->inp_stat->rxbytes;
707
708 tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
709 mptsinp->inp_wstat->txbytes;
710 tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
711 mptsinp->inp_wstat->rxbytes;
712
713 tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
714 mptsinp->inp_Wstat->txbytes;
715 tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
716 mptsinp->inp_Wstat->rxbytes;
717
718 tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
719 mptsinp->inp_cstat->txbytes;
720 tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
721 mptsinp->inp_cstat->rxbytes;
722 }
723 }
724
725interface_info:
726 *aux_type = CIAUX_TCP;
727 if (*aux_len == 0) {
728 *aux_len = sizeof(tcp_ci);
729 } else if (aux_data != USER_ADDR_NULL) {
730 boolean_t create;
731
732 /*
733 * Finally, old subflows might have been closed - we
734 * want this data as well, so grab it from the interface
735 * stats.
736 */
737 create = orig_mpts != NULL;
738
739 /*
740 * When we found a subflow, we are willing to create a stats-index
741 * because we have some data to return. If there isn't a subflow,
742 * nor anything in the stats, return EINVAL. Because the
743 * ifindex belongs to something that doesn't exist.
744 */
745 index = mptcpstats_get_index_by_ifindex(stats: mpte->mpte_itfstats, ifindex: (u_short)(*cid), false);
746 if (index == -1) {
747 os_log_error(mptcp_log_handle,
748 "%s - %lx: Asking for too many ifindex: %u subcount %u, mpts? %s\n",
749 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
750 *cid, mpte->mpte_numflows,
751 orig_mpts ? "yes" : "no");
752
753 if (orig_mpts == NULL) {
754 return EINVAL;
755 }
756 } else {
757 struct mptcp_itf_stats *stats;
758
759 stats = &mpte->mpte_itfstats[index];
760
761 /* Roll the itf-stats into the tcp_info */
762 tcp_ci.tcpci_tcp_info.tcpi_last_outif = *cid;
763 tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
764 stats->mpis_txbytes;
765 tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
766 stats->mpis_rxbytes;
767
768 tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
769 stats->mpis_wifi_txbytes;
770 tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
771 stats->mpis_wifi_rxbytes;
772
773 tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
774 stats->mpis_wired_txbytes;
775 tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
776 stats->mpis_wired_rxbytes;
777
778 tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
779 stats->mpis_cell_txbytes;
780 tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
781 stats->mpis_cell_rxbytes;
782 }
783
784 *aux_len = min(a: *aux_len, b: sizeof(tcp_ci));
785 error = copyout(&tcp_ci, aux_data, *aux_len);
786 if (error != 0) {
787 return error;
788 }
789 }
790 }
791
792 return 0;
793}
794
795/*
796 * User-protocol pru_control callback.
797 */
798static int
799mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
800 struct ifnet *ifp, struct proc *p)
801{
802#pragma unused(ifp, p)
803 struct mppcb *mpp = mpsotomppcb(mp_so);
804 struct mptses *mpte;
805 int error = 0;
806
807 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
808 error = EINVAL;
809 goto out;
810 }
811 mpte = mptompte(mp: mpp);
812
813 switch (cmd) {
814 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
815 struct so_aidreq32 aidr;
816 bcopy(src: data, dst: &aidr, n: sizeof(aidr));
817 error = mptcp_getassocids(mpte, cnt: &aidr.sar_cnt,
818 aidp: aidr.sar_aidp);
819 if (error == 0) {
820 bcopy(src: &aidr, dst: data, n: sizeof(aidr));
821 }
822 break;
823 }
824
825 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
826 struct so_aidreq64 aidr;
827 bcopy(src: data, dst: &aidr, n: sizeof(aidr));
828 error = mptcp_getassocids(mpte, cnt: &aidr.sar_cnt,
829 aidp: (user_addr_t)aidr.sar_aidp);
830 if (error == 0) {
831 bcopy(src: &aidr, dst: data, n: sizeof(aidr));
832 }
833 break;
834 }
835
836 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
837 struct so_cidreq32 cidr;
838 bcopy(src: data, dst: &cidr, n: sizeof(cidr));
839 error = mptcp_getconnids(mpte, aid: cidr.scr_aid, cnt: &cidr.scr_cnt,
840 cidp: cidr.scr_cidp);
841 if (error == 0) {
842 bcopy(src: &cidr, dst: data, n: sizeof(cidr));
843 }
844 break;
845 }
846
847 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
848 struct so_cidreq64 cidr;
849 bcopy(src: data, dst: &cidr, n: sizeof(cidr));
850 error = mptcp_getconnids(mpte, aid: cidr.scr_aid, cnt: &cidr.scr_cnt,
851 cidp: (user_addr_t)cidr.scr_cidp);
852 if (error == 0) {
853 bcopy(src: &cidr, dst: data, n: sizeof(cidr));
854 }
855 break;
856 }
857
858 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
859 struct so_cinforeq32 cifr;
860 bcopy(src: data, dst: &cifr, n: sizeof(cifr));
861 error = mptcp_getconninfo(mpte, cid: &cifr.scir_cid,
862 flags: &cifr.scir_flags, ifindex: &cifr.scir_ifindex, soerror: &cifr.scir_error,
863 src: cifr.scir_src, src_len: &cifr.scir_src_len, dst: cifr.scir_dst,
864 dst_len: &cifr.scir_dst_len, aux_type: &cifr.scir_aux_type, aux_data: cifr.scir_aux_data,
865 aux_len: &cifr.scir_aux_len);
866 if (error == 0) {
867 bcopy(src: &cifr, dst: data, n: sizeof(cifr));
868 }
869 break;
870 }
871
872 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
873 struct so_cinforeq64 cifr;
874 bcopy(src: data, dst: &cifr, n: sizeof(cifr));
875 error = mptcp_getconninfo(mpte, cid: &cifr.scir_cid,
876 flags: &cifr.scir_flags, ifindex: &cifr.scir_ifindex, soerror: &cifr.scir_error,
877 src: (user_addr_t)cifr.scir_src, src_len: &cifr.scir_src_len,
878 dst: (user_addr_t)cifr.scir_dst, dst_len: &cifr.scir_dst_len,
879 aux_type: &cifr.scir_aux_type, aux_data: (user_addr_t)cifr.scir_aux_data,
880 aux_len: &cifr.scir_aux_len);
881 if (error == 0) {
882 bcopy(src: &cifr, dst: data, n: sizeof(cifr));
883 }
884 break;
885 }
886
887 default:
888 error = EOPNOTSUPP;
889 break;
890 }
891out:
892 return error;
893}
894
895static int
896mptcp_disconnect(struct mptses *mpte)
897{
898 struct socket *mp_so;
899 struct mptcb *mp_tp;
900 int error = 0;
901
902 mp_so = mptetoso(mpte);
903 mp_tp = mpte->mpte_mptcb;
904
905 /* if we're not detached, go thru socket state checks */
906 if (!(mp_so->so_flags & SOF_PCBCLEARING) && !(mp_so->so_flags & SOF_DEFUNCT)) {
907 if (!(mp_so->so_state & (SS_ISCONNECTED |
908 SS_ISCONNECTING))) {
909 error = ENOTCONN;
910 goto out;
911 }
912 if (mp_so->so_state & SS_ISDISCONNECTING) {
913 error = EALREADY;
914 goto out;
915 }
916 }
917
918 mptcp_cancel_all_timers(mp_tp);
919 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
920 mptcp_close(mpte, mp_tp);
921 } else if (((mp_so->so_options & SO_LINGER) &&
922 mp_so->so_linger == 0) ||
923 (mp_so->so_flags1 & SOF1_DEFUNCTINPROG)) {
924 mptcp_drop(mpte, mp_tp, errno: 0);
925 } else {
926 soisdisconnecting(so: mp_so);
927 sbflush(sb: &mp_so->so_rcv);
928 if (mptcp_usrclosed(mpte) != NULL) {
929 mptcp_output(mpte);
930 }
931 }
932
933 if (error == 0) {
934 mptcp_subflow_workloop(mpte);
935 }
936
937out:
938 return error;
939}
940
941/*
942 * Wrapper function to support disconnect on socket
943 */
944static int
945mptcp_usr_disconnect(struct socket *mp_so)
946{
947 return mptcp_disconnect(mpte: mpsotompte(so: mp_so));
948}
949
950/*
951 * User-protocol pru_disconnectx callback.
952 */
953static int
954mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
955{
956 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
957 return EINVAL;
958 }
959
960 if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL) {
961 return EINVAL;
962 }
963
964 return mptcp_usr_disconnect(mp_so);
965}
966
967void
968mptcp_finish_usrclosed(struct mptses *mpte)
969{
970 struct mptcb *mp_tp = mpte->mpte_mptcb;
971 struct socket *mp_so = mptetoso(mpte);
972
973 if (mp_tp->mpt_state == MPTCPS_CLOSED || mp_tp->mpt_state == MPTCPS_TERMINATE) {
974 mpte = mptcp_close(mpte, mp_tp);
975 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
976 soisdisconnected(so: mp_so);
977 } else {
978 struct mptsub *mpts;
979
980 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
981 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
982 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
983 mptcp_subflow_disconnect(mpte, mpts);
984 } else {
985 mptcp_subflow_shutdown(mpte, mpts);
986 }
987 }
988 }
989}
990
991/*
992 * User issued close, and wish to trail thru shutdown states.
993 */
994static struct mptses *
995mptcp_usrclosed(struct mptses *mpte)
996{
997 struct mptcb *mp_tp = mpte->mpte_mptcb;
998
999 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
1000
1001 /* Not everything has been acknowledged - don't close the subflows! */
1002 if (mp_tp->mpt_state != MPTCPS_TERMINATE &&
1003 mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) {
1004 return mpte;
1005 }
1006
1007 mptcp_finish_usrclosed(mpte);
1008
1009 return mpte;
1010}
1011
1012/*
1013 * After a receive, possible send some update to peer.
1014 */
1015static int
1016mptcp_usr_rcvd(struct socket *mp_so, int flags)
1017{
1018#pragma unused(flags)
1019 struct mppcb *mpp = mpsotomppcb(mp_so);
1020 struct mptses *mpte;
1021 struct mptsub *mpts;
1022 int error = 0;
1023
1024 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1025 error = EINVAL;
1026 goto out;
1027 }
1028
1029 mpte = mptompte(mp: mpp);
1030
1031 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1032 struct socket *so = mpts->mpts_socket;
1033
1034 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) {
1035 (*so->so_proto->pr_usrreqs->pru_rcvd)(so, 0);
1036 }
1037 }
1038
1039 error = mptcp_output(mpte);
1040out:
1041 return error;
1042}
1043
1044/*
1045 * Do a send by putting data in the output queue.
1046 */
1047static int
1048mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1049 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1050{
1051#pragma unused(nam, p)
1052 struct mppcb *mpp = mpsotomppcb(mp_so);
1053 struct mptses *mpte;
1054 int error = 0;
1055
1056 if (prus_flags & (PRUS_OOB | PRUS_EOF)) {
1057 error = EOPNOTSUPP;
1058 goto out;
1059 }
1060
1061 if (nam != NULL) {
1062 error = EOPNOTSUPP;
1063 goto out;
1064 }
1065
1066 if (control != NULL && control->m_len != 0) {
1067 error = EOPNOTSUPP;
1068 goto out;
1069 }
1070
1071 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1072 error = ECONNRESET;
1073 goto out;
1074 }
1075 mpte = mptompte(mp: mpp);
1076 VERIFY(mpte != NULL);
1077
1078 if (!(mp_so->so_state & SS_ISCONNECTED) &&
1079 !(mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1080 error = ENOTCONN;
1081 goto out;
1082 }
1083
1084 mptcp_insert_dsn(mpp, m);
1085 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1086 sbappendstream(sb: &mp_so->so_snd, m);
1087 m = NULL;
1088
1089 error = mptcp_output(mpte);
1090 if (error != 0) {
1091 goto out;
1092 }
1093
1094 if (mp_so->so_state & SS_ISCONNECTING) {
1095 if (mp_so->so_state & SS_NBIO) {
1096 error = EWOULDBLOCK;
1097 } else {
1098 error = sbwait(sb: &mp_so->so_snd);
1099 }
1100 }
1101
1102out:
1103 if (error) {
1104 if (m != NULL) {
1105 m_freem(m);
1106 }
1107 if (control != NULL) {
1108 m_freem(control);
1109 }
1110 }
1111 return error;
1112}
1113
1114/*
1115 * Mark the MPTCP connection as being incapable of further output.
1116 */
1117static int
1118mptcp_usr_shutdown(struct socket *mp_so)
1119{
1120 struct mppcb *mpp = mpsotomppcb(mp_so);
1121 struct mptses *mpte;
1122 int error = 0;
1123
1124 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1125 error = EINVAL;
1126 goto out;
1127 }
1128 mpte = mptompte(mp: mpp);
1129 VERIFY(mpte != NULL);
1130
1131 socantsendmore(so: mp_so);
1132
1133 mpte = mptcp_usrclosed(mpte);
1134 if (mpte != NULL) {
1135 error = mptcp_output(mpte);
1136 }
1137out:
1138 return error;
1139}
1140
1141/*
1142 * Copy the contents of uio into a properly sized mbuf chain.
1143 */
1144static int
1145mptcp_uiotombuf(struct uio *uio, int how, user_ssize_t space, struct mbuf **top)
1146{
1147 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1148 int progress, len, error;
1149 user_ssize_t resid, tot;
1150
1151 VERIFY(top != NULL && *top == NULL);
1152
1153 /*
1154 * space can be zero or an arbitrary large value bound by
1155 * the total data supplied by the uio.
1156 */
1157 resid = uio_resid(a_uio: uio);
1158 if (space > 0) {
1159 tot = MIN(resid, space);
1160 } else {
1161 tot = resid;
1162 }
1163
1164 if (tot < 0 || tot > INT_MAX) {
1165 return EINVAL;
1166 }
1167
1168 len = (int)tot;
1169 if (len == 0) {
1170 len = 1;
1171 }
1172
1173 /* Loop and append maximum sized mbufs to the chain tail. */
1174 while (len > 0) {
1175 uint32_t m_needed = 1;
1176
1177 if (njcl > 0 && len > MBIGCLBYTES) {
1178 mb = m_getpackets_internal(&m_needed, 1,
1179 how, 1, M16KCLBYTES);
1180 } else if (len > MCLBYTES) {
1181 mb = m_getpackets_internal(&m_needed, 1,
1182 how, 1, MBIGCLBYTES);
1183 } else if (len >= (signed)MINCLSIZE) {
1184 mb = m_getpackets_internal(&m_needed, 1,
1185 how, 1, MCLBYTES);
1186 } else {
1187 mb = m_gethdr(how, MT_DATA);
1188 }
1189
1190 /* Fail the whole operation if one mbuf can't be allocated. */
1191 if (mb == NULL) {
1192 if (nm != NULL) {
1193 m_freem(nm);
1194 }
1195 return ENOBUFS;
1196 }
1197
1198 /* Book keeping. */
1199 VERIFY(mb->m_flags & M_PKTHDR);
1200 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1201 if (mtail != NULL) {
1202 mtail->m_next = mb;
1203 } else {
1204 nm = mb;
1205 }
1206 mtail = mb;
1207 }
1208
1209 m = nm;
1210
1211 progress = 0;
1212 /* Fill all mbufs with uio data and update header information. */
1213 for (mb = m; mb != NULL; mb = mb->m_next) {
1214 /* tot >= 0 && tot <= INT_MAX (see above) */
1215 len = MIN((int)M_TRAILINGSPACE(mb), (int)(tot - progress));
1216
1217 error = uiomove(mtod(mb, char *), n: len, uio);
1218 if (error != 0) {
1219 m_freem(m);
1220 return error;
1221 }
1222
1223 /* each mbuf is M_PKTHDR chained via m_next */
1224 mb->m_len = len;
1225 mb->m_pkthdr.len = len;
1226
1227 progress += len;
1228 }
1229 VERIFY(progress == tot);
1230 *top = m;
1231 return 0;
1232}
1233
1234/*
1235 * MPTCP socket protocol-user socket send routine, derived from sosend().
1236 */
1237static int
1238mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1239 struct mbuf *top, struct mbuf *control, int flags)
1240{
1241#pragma unused(addr)
1242 user_ssize_t resid, space;
1243 int error, sendflags;
1244 struct proc *p = current_proc();
1245 int sblocked = 0;
1246
1247 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1248 if (uio == NULL || top != NULL) {
1249 error = EINVAL;
1250 goto out;
1251 }
1252 resid = uio_resid(a_uio: uio);
1253
1254 socket_lock(so: mp_so, refcount: 1);
1255 so_update_last_owner_locked(mp_so, p);
1256 so_update_policy(mp_so);
1257
1258 VERIFY(mp_so->so_type == SOCK_STREAM);
1259 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1260
1261 if (flags & (MSG_OOB | MSG_DONTROUTE)) {
1262 error = EOPNOTSUPP;
1263 socket_unlock(so: mp_so, refcount: 1);
1264 goto out;
1265 }
1266
1267 /*
1268 * In theory resid should be unsigned. However, space must be
1269 * signed, as it might be less than 0 if we over-committed, and we
1270 * must use a signed comparison of space and resid. On the other
1271 * hand, a negative resid causes us to loop sending 0-length
1272 * segments to the protocol.
1273 */
1274 if (resid < 0 || resid > INT_MAX ||
1275 (flags & MSG_EOR) || control != NULL) {
1276 error = EINVAL;
1277 socket_unlock(so: mp_so, refcount: 1);
1278 goto out;
1279 }
1280
1281 OSIncrementAtomicLong(address: &p->p_stats->p_ru.ru_msgsnd);
1282
1283 do {
1284 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1285 &sblocked);
1286 if (error != 0) {
1287 goto release;
1288 }
1289
1290 space = sbspace(sb: &mp_so->so_snd);
1291 do {
1292 socket_unlock(so: mp_so, refcount: 0);
1293 /*
1294 * Copy the data from userland into an mbuf chain.
1295 */
1296 error = mptcp_uiotombuf(uio, M_WAITOK, space, top: &top);
1297 if (error != 0) {
1298 socket_lock(so: mp_so, refcount: 0);
1299 goto release;
1300 }
1301 VERIFY(top != NULL);
1302 space -= resid - uio_resid(a_uio: uio);
1303 resid = uio_resid(a_uio: uio);
1304 socket_lock(so: mp_so, refcount: 0);
1305
1306 /*
1307 * Compute flags here, for pru_send and NKEs.
1308 */
1309 sendflags = (resid > 0 && space > 0) ?
1310 PRUS_MORETOCOME : 0;
1311
1312 /*
1313 * Socket filter processing
1314 */
1315 VERIFY(control == NULL);
1316 error = sflt_data_out(so: mp_so, NULL, data: &top, control: &control, flags: 0);
1317 if (error != 0) {
1318 if (error == EJUSTRETURN) {
1319 error = 0;
1320 top = NULL;
1321 /* always free control if any */
1322 }
1323 goto release;
1324 }
1325 if (control != NULL) {
1326 m_freem(control);
1327 control = NULL;
1328 }
1329
1330 /*
1331 * Pass data to protocol.
1332 */
1333 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1334 (mp_so, sendflags, top, NULL, NULL, p);
1335
1336 top = NULL;
1337 if (error != 0) {
1338 goto release;
1339 }
1340 } while (resid != 0 && space > 0);
1341 } while (resid != 0);
1342
1343release:
1344 if (sblocked) {
1345 sbunlock(sb: &mp_so->so_snd, FALSE); /* will unlock socket */
1346 } else {
1347 socket_unlock(so: mp_so, refcount: 1);
1348 }
1349out:
1350 if (top != NULL) {
1351 m_freem(top);
1352 }
1353 if (control != NULL) {
1354 m_freem(control);
1355 }
1356
1357 soclearfastopen(so: mp_so);
1358
1359 return error;
1360}
1361
1362/*
1363 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1364 * This routine simply indicates to the caller whether or not to proceed
1365 * further with the given socket option. This is invoked by sosetoptlock()
1366 * and sogetoptlock().
1367 */
1368static int
1369mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1370{
1371#pragma unused(mp_so)
1372 int error = 0;
1373
1374 VERIFY(sopt->sopt_level == SOL_SOCKET);
1375
1376 /*
1377 * We could check for sopt_dir (set/get) here, but we'll just
1378 * let the caller deal with it as appropriate; therefore the
1379 * following is a superset of the socket options which we
1380 * allow for set/get.
1381 *
1382 * XXX: adi@apple.com
1383 *
1384 * Need to consider the following cases:
1385 *
1386 * a. Certain socket options don't have a clear definition
1387 * on the expected behavior post connect(2). At the time
1388 * those options are issued on the MP socket, there may
1389 * be existing subflow sockets that are already connected.
1390 */
1391 switch (sopt->sopt_name) {
1392 case SO_LINGER: /* MP */
1393 case SO_LINGER_SEC: /* MP */
1394 case SO_TYPE: /* MP */
1395 case SO_NREAD: /* MP */
1396 case SO_NWRITE: /* MP */
1397 case SO_ERROR: /* MP */
1398 case SO_SNDBUF: /* MP */
1399 case SO_RCVBUF: /* MP */
1400 case SO_SNDLOWAT: /* MP */
1401 case SO_RCVLOWAT: /* MP */
1402 case SO_SNDTIMEO: /* MP */
1403 case SO_RCVTIMEO: /* MP */
1404 case SO_NKE: /* MP */
1405 case SO_NOSIGPIPE: /* MP */
1406 case SO_NOADDRERR: /* MP */
1407 case SO_LABEL: /* MP */
1408 case SO_PEERLABEL: /* MP */
1409 case SO_DEFUNCTIT: /* MP */
1410 case SO_DEFUNCTOK: /* MP */
1411 case SO_ISDEFUNCT: /* MP */
1412 case SO_TRAFFIC_CLASS_DBG: /* MP */
1413 case SO_DELEGATED: /* MP */
1414 case SO_DELEGATED_UUID: /* MP */
1415#if NECP
1416 case SO_NECP_ATTRIBUTES:
1417 case SO_NECP_CLIENTUUID:
1418#endif /* NECP */
1419 case SO_MPKL_SEND_INFO:
1420 /*
1421 * Tell the caller that these options are to be processed.
1422 */
1423 break;
1424
1425 case SO_DEBUG: /* MP + subflow */
1426 case SO_KEEPALIVE: /* MP + subflow */
1427 case SO_USELOOPBACK: /* MP + subflow */
1428 case SO_RANDOMPORT: /* MP + subflow */
1429 case SO_TRAFFIC_CLASS: /* MP + subflow */
1430 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1431 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1432 case SO_RECV_ANYIF: /* MP + subflow */
1433 case SO_RESTRICTIONS: /* MP + subflow */
1434 case SO_FLUSH: /* MP + subflow */
1435 case SO_NOWAKEFROMSLEEP:
1436 case SO_NOAPNFALLBK:
1437 case SO_MARK_CELLFALLBACK:
1438 case SO_MARK_CELLFALLBACK_UUID:
1439 case SO_MARK_KNOWN_TRACKER:
1440 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1441 case SO_MARK_APPROVED_APP_DOMAIN:
1442 case SO_FALLBACK_MODE:
1443 /*
1444 * Tell the caller that these options are to be processed;
1445 * these will also be recorded later by mptcp_setopt().
1446 *
1447 * NOTE: Only support integer option value for now.
1448 */
1449 if (sopt->sopt_valsize != sizeof(int)) {
1450 error = EINVAL;
1451 }
1452 break;
1453
1454 default:
1455 /*
1456 * Tell the caller to stop immediately and return an error.
1457 */
1458 error = ENOPROTOOPT;
1459 break;
1460 }
1461
1462 return error;
1463}
1464
1465/*
1466 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1467 */
1468static int
1469mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1470{
1471 struct socket *mp_so;
1472 struct mptsub *mpts;
1473 struct mptopt smpo;
1474 int error = 0;
1475
1476 /* just bail now if this isn't applicable to subflow sockets */
1477 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1478 error = ENOPROTOOPT;
1479 goto out;
1480 }
1481
1482 /*
1483 * Skip those that are handled internally; these options
1484 * should not have been recorded and marked with the
1485 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1486 */
1487 if (mpo->mpo_level == SOL_SOCKET &&
1488 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1489 error = ENOPROTOOPT;
1490 goto out;
1491 }
1492
1493 mp_so = mptetoso(mpte);
1494
1495 /*
1496 * Don't bother going further if there's no subflow; mark the option
1497 * with MPOF_INTERIM so that we know whether or not to remove this
1498 * option upon encountering an error while issuing it during subflow
1499 * socket creation.
1500 */
1501 if (mpte->mpte_numflows == 0) {
1502 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1503 mpo->mpo_flags |= MPOF_INTERIM;
1504 /* return success */
1505 goto out;
1506 }
1507
1508 bzero(s: &smpo, n: sizeof(smpo));
1509 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1510 smpo.mpo_level = mpo->mpo_level;
1511 smpo.mpo_name = mpo->mpo_name;
1512
1513 /* grab exisiting values in case we need to rollback */
1514 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1515 struct socket *so;
1516
1517 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1518 mpts->mpts_oldintval = 0;
1519 smpo.mpo_intval = 0;
1520 VERIFY(mpts->mpts_socket != NULL);
1521 so = mpts->mpts_socket;
1522 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1523 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1524 mpts->mpts_oldintval = smpo.mpo_intval;
1525 }
1526 }
1527
1528 /* apply socket option */
1529 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1530 struct socket *so;
1531
1532 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1533 VERIFY(mpts->mpts_socket != NULL);
1534 so = mpts->mpts_socket;
1535 error = mptcp_subflow_sosetopt(mpte, mpts, mpo);
1536 if (error != 0) {
1537 break;
1538 }
1539 }
1540
1541 /* cleanup, and rollback if needed */
1542 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1543 struct socket *so;
1544
1545 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1546 /* clear in case it's set */
1547 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1548 mpts->mpts_oldintval = 0;
1549 continue;
1550 }
1551 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1552 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1553 VERIFY(mpts->mpts_oldintval == 0);
1554 continue;
1555 }
1556 /* error during sosetopt, so roll it back */
1557 if (error != 0) {
1558 VERIFY(mpts->mpts_socket != NULL);
1559 so = mpts->mpts_socket;
1560 smpo.mpo_intval = mpts->mpts_oldintval;
1561 mptcp_subflow_sosetopt(mpte, mpts, &smpo);
1562 }
1563 mpts->mpts_oldintval = 0;
1564 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1565 }
1566
1567out:
1568 return error;
1569}
1570
1571/*
1572 * Handle SOPT_SET for socket options issued on MP socket.
1573 */
1574static int
1575mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1576{
1577 int error = 0, optval = 0, level, optname, rec = 1;
1578 struct mptopt smpo, *mpo = NULL;
1579 struct socket *mp_so;
1580
1581 level = sopt->sopt_level;
1582 optname = sopt->sopt_name;
1583
1584 mp_so = mptetoso(mpte);
1585
1586 VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
1587 mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
1588
1589 /*
1590 * Record socket options which are applicable to subflow sockets so
1591 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1592 * for the list of eligible socket-level options.
1593 */
1594 if (level == SOL_SOCKET) {
1595 switch (optname) {
1596 case SO_DEBUG:
1597 case SO_KEEPALIVE:
1598 case SO_USELOOPBACK:
1599 case SO_RANDOMPORT:
1600 case SO_TRAFFIC_CLASS:
1601 case SO_RECV_TRAFFIC_CLASS:
1602 case SO_PRIVILEGED_TRAFFIC_CLASS:
1603 case SO_RECV_ANYIF:
1604 case SO_RESTRICTIONS:
1605 case SO_NOWAKEFROMSLEEP:
1606 case SO_NOAPNFALLBK:
1607 case SO_MARK_CELLFALLBACK:
1608 case SO_MARK_KNOWN_TRACKER:
1609 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1610 case SO_MARK_APPROVED_APP_DOMAIN:
1611 case SO_FALLBACK_MODE:
1612 /* record it */
1613 break;
1614 case SO_FLUSH:
1615 /* don't record it */
1616 rec = 0;
1617 break;
1618
1619 /* Next ones, record at MPTCP-level */
1620 case SO_DELEGATED:
1621 error = sooptcopyin(sopt, &mpte->mpte_epid,
1622 len: sizeof(int), minlen: sizeof(int));
1623 if (error != 0) {
1624 goto err_out;
1625 }
1626
1627 goto out;
1628 case SO_DELEGATED_UUID:
1629 error = sooptcopyin(sopt, &mpte->mpte_euuid,
1630 len: sizeof(uuid_t), minlen: sizeof(uuid_t));
1631 if (error != 0) {
1632 goto err_out;
1633 }
1634
1635 goto out;
1636#if NECP
1637 case SO_NECP_CLIENTUUID:
1638 if (!uuid_is_null(uu: mpsotomppcb(mp_so)->necp_client_uuid)) {
1639 error = EINVAL;
1640 goto err_out;
1641 }
1642
1643 error = sooptcopyin(sopt, &mpsotomppcb(mp_so)->necp_client_uuid,
1644 len: sizeof(uuid_t), minlen: sizeof(uuid_t));
1645 if (error != 0) {
1646 goto err_out;
1647 }
1648
1649 mpsotomppcb(mp_so)->necp_cb = mptcp_session_necp_cb;
1650 error = necp_client_register_multipath_cb(pid: mp_so->last_pid,
1651 client_id: mpsotomppcb(mp_so)->necp_client_uuid,
1652 mpp: mpsotomppcb(mp_so));
1653 if (error) {
1654 goto err_out;
1655 }
1656
1657 if (uuid_is_null(uu: mpsotomppcb(mp_so)->necp_client_uuid)) {
1658 error = EINVAL;
1659 goto err_out;
1660 }
1661
1662 goto out;
1663 case SO_NECP_ATTRIBUTES:
1664 error = necp_set_socket_attributes(attributes: &mpsotomppcb(mp_so)->inp_necp_attributes, sopt);
1665 if (error) {
1666 goto err_out;
1667 }
1668
1669 goto out;
1670#endif /* NECP */
1671 default:
1672 /* nothing to do; just return */
1673 goto out;
1674 }
1675 } else {
1676 switch (optname) {
1677 case TCP_NODELAY:
1678 case TCP_RXT_FINDROP:
1679 case TCP_KEEPALIVE:
1680 case TCP_KEEPINTVL:
1681 case TCP_KEEPCNT:
1682 case TCP_CONNECTIONTIMEOUT:
1683 case TCP_RXT_CONNDROPTIME:
1684 case PERSIST_TIMEOUT:
1685 case TCP_ADAPTIVE_READ_TIMEOUT:
1686 case TCP_ADAPTIVE_WRITE_TIMEOUT:
1687 case TCP_FASTOPEN_FORCE_ENABLE:
1688 /* eligible; record it */
1689 break;
1690 case TCP_NOTSENT_LOWAT:
1691 /* record at MPTCP level */
1692 error = sooptcopyin(sopt, &optval, len: sizeof(optval),
1693 minlen: sizeof(optval));
1694 if (error) {
1695 goto err_out;
1696 }
1697 if (optval < 0) {
1698 error = EINVAL;
1699 goto err_out;
1700 } else {
1701 if (optval == 0) {
1702 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1703 error = mptcp_set_notsent_lowat(mpte, optval: 0);
1704 } else {
1705 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1706 error = mptcp_set_notsent_lowat(mpte,
1707 optval);
1708 }
1709
1710 if (error) {
1711 goto err_out;
1712 }
1713 }
1714 goto out;
1715 case MPTCP_SERVICE_TYPE:
1716 /* record at MPTCP level */
1717 error = sooptcopyin(sopt, &optval, len: sizeof(optval),
1718 minlen: sizeof(optval));
1719 if (error) {
1720 goto err_out;
1721 }
1722 if (optval < 0 || optval >= MPTCP_SVCTYPE_MAX) {
1723 error = EINVAL;
1724 goto err_out;
1725 }
1726
1727 if (mptcp_entitlement_check(mp_so, svctype: (uint8_t)optval) < 0) {
1728 error = EACCES;
1729 goto err_out;
1730 }
1731
1732 mpte->mpte_svctype = (uint8_t)optval;
1733 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
1734
1735 goto out;
1736 case MPTCP_ALTERNATE_PORT:
1737 /* record at MPTCP level */
1738 error = sooptcopyin(sopt, &optval, len: sizeof(optval),
1739 minlen: sizeof(optval));
1740 if (error) {
1741 goto err_out;
1742 }
1743
1744 if (optval < 0 || optval > UINT16_MAX) {
1745 error = EINVAL;
1746 goto err_out;
1747 }
1748
1749 mpte->mpte_alternate_port = (uint16_t)optval;
1750
1751 goto out;
1752 case MPTCP_FORCE_ENABLE:
1753 /* record at MPTCP level */
1754 error = sooptcopyin(sopt, &optval, len: sizeof(optval),
1755 minlen: sizeof(optval));
1756 if (error) {
1757 goto err_out;
1758 }
1759
1760 if (optval < 0 || optval > 1) {
1761 error = EINVAL;
1762 goto err_out;
1763 }
1764
1765 if (optval) {
1766 mpte->mpte_flags |= MPTE_FORCE_ENABLE;
1767 } else {
1768 mpte->mpte_flags &= ~MPTE_FORCE_ENABLE;
1769 }
1770
1771 goto out;
1772 case MPTCP_FORCE_VERSION:
1773 error = sooptcopyin(sopt, &optval, len: sizeof(optval),
1774 minlen: sizeof(optval));
1775 if (error) {
1776 goto err_out;
1777 }
1778
1779 if (optval != 0 && optval != 1) {
1780 error = EINVAL;
1781 goto err_out;
1782 }
1783
1784 if (optval == 0) {
1785 mpte->mpte_flags |= MPTE_FORCE_V0;
1786 mpte->mpte_flags &= ~MPTE_FORCE_V1;
1787 } else {
1788 mpte->mpte_flags |= MPTE_FORCE_V1;
1789 mpte->mpte_flags &= ~MPTE_FORCE_V0;
1790 }
1791
1792 goto out;
1793 case MPTCP_EXPECTED_PROGRESS_TARGET:
1794 {
1795 struct mptcb *mp_tp = mpte->mpte_mptcb;
1796 uint64_t mach_time_target;
1797 uint64_t nanoseconds;
1798
1799 if (mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) {
1800 os_log(mptcp_log_handle, "%s - %lx: Can't set urgent activity when svctype is %u\n",
1801 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
1802 error = EINVAL;
1803 goto err_out;
1804 }
1805
1806 error = sooptcopyin(sopt, &mach_time_target, len: sizeof(mach_time_target), minlen: sizeof(mach_time_target));
1807 if (error) {
1808 goto err_out;
1809 }
1810
1811 if (!mptcp_ok_to_create_subflows(mp_tp)) {
1812 os_log(mptcp_log_handle, "%s - %lx: Not ok to create subflows, state %u flags %#x\n",
1813 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
1814 error = EINVAL;
1815 goto err_out;
1816 }
1817
1818 if (mach_time_target) {
1819 uint64_t time_now = 0;
1820 uint64_t time_now_nanoseconds;
1821
1822 absolutetime_to_nanoseconds(abstime: mach_time_target, result: &nanoseconds);
1823 nanoseconds = nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC);
1824
1825 time_now = mach_continuous_time();
1826 absolutetime_to_nanoseconds(abstime: time_now, result: &time_now_nanoseconds);
1827
1828 nanoseconds_to_absolutetime(nanoseconds, result: &mach_time_target);
1829 /* If the timer is already running and it would
1830 * fire in less than mptcp_expected_progress_headstart
1831 * seconds, then it's not worth canceling it.
1832 */
1833 if (mpte->mpte_time_target &&
1834 mpte->mpte_time_target < time_now &&
1835 time_now_nanoseconds > nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC)) {
1836 os_log(mptcp_log_handle, "%s - %lx: Not rescheduling timer %llu now %llu target %llu\n",
1837 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1838 mpte->mpte_time_target,
1839 time_now,
1840 mach_time_target);
1841 goto out;
1842 }
1843 }
1844
1845 mpte->mpte_time_target = mach_time_target;
1846 mptcp_set_urgency_timer(mpte);
1847
1848 goto out;
1849 }
1850 default:
1851 /* not eligible */
1852 error = ENOPROTOOPT;
1853 goto err_out;
1854 }
1855 }
1856
1857 if ((error = sooptcopyin(sopt, &optval, len: sizeof(optval),
1858 minlen: sizeof(optval))) != 0) {
1859 goto err_out;
1860 }
1861
1862 if (rec) {
1863 /* search for an existing one; if not found, allocate */
1864 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL) {
1865 mpo = mptcp_sopt_alloc(Z_WAITOK);
1866 }
1867
1868 if (mpo == NULL) {
1869 error = ENOBUFS;
1870 goto err_out;
1871 } else {
1872 /* initialize or update, as needed */
1873 mpo->mpo_intval = optval;
1874 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1875 mpo->mpo_level = level;
1876 mpo->mpo_name = optname;
1877 mptcp_sopt_insert(mpte, mpo);
1878 }
1879 /* this can be issued on the subflow socket */
1880 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1881 }
1882 } else {
1883 bzero(s: &smpo, n: sizeof(smpo));
1884 mpo = &smpo;
1885 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1886 mpo->mpo_level = level;
1887 mpo->mpo_name = optname;
1888 mpo->mpo_intval = optval;
1889 }
1890
1891 /* issue this socket option on existing subflows */
1892 error = mptcp_setopt_apply(mpte, mpo);
1893 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1894 VERIFY(mpo != &smpo);
1895 mptcp_sopt_remove(mpte, mpo);
1896 mptcp_sopt_free(mpo);
1897 }
1898 if (mpo == &smpo) {
1899 mpo->mpo_flags &= ~MPOF_INTERIM;
1900 }
1901
1902 if (error) {
1903 goto err_out;
1904 }
1905
1906out:
1907
1908 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1909 return 0;
1910
1911err_out:
1912 os_log_error(mptcp_log_handle, "%s - %lx: sopt %s (%d, %d) val %d can't be issued error %d\n",
1913 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1914 mptcp_sopt2str(level, optname), level, optname, optval, error);
1915 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1916 return error;
1917}
1918
1919static void
1920mptcp_fill_info_bytestats(struct tcp_info *ti, struct mptses *mpte)
1921{
1922 struct mptsub *mpts;
1923 int i;
1924
1925 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1926 const struct inpcb *inp = sotoinpcb(mpts->mpts_socket);
1927
1928 if (inp == NULL) {
1929 continue;
1930 }
1931
1932 ti->tcpi_txbytes += inp->inp_stat->txbytes;
1933 ti->tcpi_rxbytes += inp->inp_stat->rxbytes;
1934 ti->tcpi_cell_txbytes += inp->inp_cstat->txbytes;
1935 ti->tcpi_cell_rxbytes += inp->inp_cstat->rxbytes;
1936 ti->tcpi_wifi_txbytes += inp->inp_wstat->txbytes;
1937 ti->tcpi_wifi_rxbytes += inp->inp_wstat->rxbytes;
1938 ti->tcpi_wired_txbytes += inp->inp_Wstat->txbytes;
1939 ti->tcpi_wired_rxbytes += inp->inp_Wstat->rxbytes;
1940 }
1941
1942 for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
1943 struct mptcp_itf_stats *stats = &mpte->mpte_itfstats[i];
1944
1945 ti->tcpi_txbytes += stats->mpis_txbytes;
1946 ti->tcpi_rxbytes += stats->mpis_rxbytes;
1947
1948 ti->tcpi_wifi_txbytes += stats->mpis_wifi_txbytes;
1949 ti->tcpi_wifi_rxbytes += stats->mpis_wifi_rxbytes;
1950
1951 ti->tcpi_wired_txbytes += stats->mpis_wired_txbytes;
1952 ti->tcpi_wired_rxbytes += stats->mpis_wired_rxbytes;
1953
1954 ti->tcpi_cell_txbytes += stats->mpis_cell_txbytes;
1955 ti->tcpi_cell_rxbytes += stats->mpis_cell_rxbytes;
1956 }
1957}
1958
1959static void
1960mptcp_fill_info(struct mptses *mpte, struct tcp_info *ti)
1961{
1962 struct mptsub *actsub = mpte->mpte_active_sub;
1963 struct mptcb *mp_tp = mpte->mpte_mptcb;
1964 struct tcpcb *acttp = NULL;
1965
1966 if (actsub) {
1967 acttp = sototcpcb(actsub->mpts_socket);
1968 }
1969
1970 bzero(s: ti, n: sizeof(*ti));
1971
1972 ti->tcpi_state = (uint8_t)mp_tp->mpt_state;
1973 /* tcpi_options */
1974 /* tcpi_snd_wscale */
1975 /* tcpi_rcv_wscale */
1976 /* tcpi_flags */
1977 if (acttp) {
1978 ti->tcpi_rto = acttp->t_timer[TCPT_REXMT] ? acttp->t_rxtcur : 0;
1979 }
1980
1981 /* tcpi_snd_mss */
1982 /* tcpi_rcv_mss */
1983 if (acttp) {
1984 ti->tcpi_rttcur = acttp->t_rttcur;
1985 ti->tcpi_srtt = acttp->t_srtt >> TCP_RTT_SHIFT;
1986 ti->tcpi_rttvar = acttp->t_rttvar >> TCP_RTTVAR_SHIFT;
1987 ti->tcpi_rttbest = acttp->t_rttbest >> TCP_RTT_SHIFT;
1988 ti->tcpi_rcv_srtt = acttp->rcv_srtt >> TCP_RTT_SHIFT;
1989 }
1990 /* tcpi_snd_ssthresh */
1991 /* tcpi_snd_cwnd */
1992 /* tcpi_rcv_space */
1993 ti->tcpi_snd_wnd = mp_tp->mpt_sndwnd;
1994 ti->tcpi_snd_nxt = (uint32_t)mp_tp->mpt_sndnxt;
1995 ti->tcpi_rcv_nxt = (uint32_t)mp_tp->mpt_rcvnxt;
1996 if (acttp) {
1997 ti->tcpi_last_outif = (acttp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1998 acttp->t_inpcb->inp_last_outifp->if_index;
1999 }
2000
2001 mptcp_fill_info_bytestats(ti, mpte);
2002 /* tcpi_txpackets */
2003
2004 /* tcpi_txretransmitbytes */
2005 /* tcpi_txunacked */
2006 /* tcpi_rxpackets */
2007
2008 /* tcpi_rxduplicatebytes */
2009 /* tcpi_rxoutoforderbytes */
2010 /* tcpi_snd_bw */
2011 /* tcpi_synrexmits */
2012 /* tcpi_unused1 */
2013 /* tcpi_unused2 */
2014 /* tcpi_cell_rxpackets */
2015
2016 /* tcpi_cell_txpackets */
2017
2018 /* tcpi_wifi_rxpackets */
2019
2020 /* tcpi_wifi_txpackets */
2021
2022 /* tcpi_wired_rxpackets */
2023 /* tcpi_wired_txpackets */
2024 /* tcpi_connstatus */
2025 /* TFO-stuff */
2026 /* ECN stuff */
2027 /* tcpi_ecn_recv_ce */
2028 /* tcpi_ecn_recv_cwr */
2029 if (acttp) {
2030 ti->tcpi_rcvoopack = acttp->t_rcvoopack;
2031 }
2032 /* tcpi_pawsdrop */
2033 /* tcpi_sack_recovery_episode */
2034 /* tcpi_reordered_pkts */
2035 /* tcpi_dsack_sent */
2036 /* tcpi_dsack_recvd */
2037 /* tcpi_flowhash */
2038 if (acttp) {
2039 ti->tcpi_txretransmitpackets = acttp->t_stat.rxmitpkts;
2040 }
2041}
2042
2043/*
2044 * Handle SOPT_GET for socket options issued on MP socket.
2045 */
2046static int
2047mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
2048{
2049 int error = 0, optval = 0;
2050 struct socket *mp_so;
2051
2052 mp_so = mptetoso(mpte);
2053
2054 VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
2055 mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
2056
2057 /*
2058 * We only handle SOPT_GET for TCP level socket options; we should
2059 * not get here for socket level options since they are already
2060 * handled at the socket layer.
2061 */
2062 if (sopt->sopt_level != IPPROTO_TCP) {
2063 error = ENOPROTOOPT;
2064 goto out;
2065 }
2066
2067 switch (sopt->sopt_name) {
2068 case PERSIST_TIMEOUT:
2069 /* Only case for which we have a non-zero default */
2070 optval = tcp_max_persist_timeout;
2071 OS_FALLTHROUGH;
2072 case TCP_NODELAY:
2073 case TCP_RXT_FINDROP:
2074 case TCP_KEEPALIVE:
2075 case TCP_KEEPINTVL:
2076 case TCP_KEEPCNT:
2077 case TCP_CONNECTIONTIMEOUT:
2078 case TCP_RXT_CONNDROPTIME:
2079 case TCP_ADAPTIVE_READ_TIMEOUT:
2080 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2081 case TCP_FASTOPEN_FORCE_ENABLE:
2082 {
2083 struct mptopt *mpo = mptcp_sopt_find(mpte, sopt);
2084
2085 if (mpo != NULL) {
2086 optval = mpo->mpo_intval;
2087 }
2088 break;
2089 }
2090
2091 /* The next ones are stored at the MPTCP-level */
2092 case TCP_NOTSENT_LOWAT:
2093 if (mptetoso(mpte)->so_flags & SOF_NOTSENT_LOWAT) {
2094 optval = mptcp_get_notsent_lowat(mpte);
2095 } else {
2096 optval = 0;
2097 }
2098 break;
2099 case TCP_INFO:
2100 {
2101 struct tcp_info ti;
2102
2103 mptcp_fill_info(mpte, ti: &ti);
2104 error = sooptcopyout(sopt, data: &ti, len: sizeof(struct tcp_info));
2105
2106 goto out;
2107 }
2108 case MPTCP_SERVICE_TYPE:
2109 optval = mpte->mpte_svctype;
2110 break;
2111 case MPTCP_ALTERNATE_PORT:
2112 optval = mpte->mpte_alternate_port;
2113 break;
2114 case MPTCP_FORCE_ENABLE:
2115 optval = !!(mpte->mpte_flags & MPTE_FORCE_ENABLE);
2116 break;
2117 case MPTCP_FORCE_VERSION:
2118 if (mpte->mpte_flags & MPTE_FORCE_V0) {
2119 optval = 0;
2120 } else if (mpte->mpte_flags & MPTE_FORCE_V1) {
2121 optval = 1;
2122 } else {
2123 optval = -1;
2124 }
2125 break;
2126 case MPTCP_EXPECTED_PROGRESS_TARGET:
2127 error = sooptcopyout(sopt, data: &mpte->mpte_time_target, len: sizeof(mpte->mpte_time_target));
2128
2129 goto out;
2130 default:
2131 /* not eligible */
2132 error = ENOPROTOOPT;
2133 break;
2134 }
2135
2136 if (error == 0) {
2137 error = sooptcopyout(sopt, data: &optval, len: sizeof(int));
2138 }
2139
2140out:
2141 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
2142 return error;
2143}
2144
2145/*
2146 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
2147 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
2148 * to those that are allowed by mptcp_usr_socheckopt().
2149 */
2150int
2151mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
2152{
2153 struct mppcb *mpp = mpsotomppcb(mp_so);
2154 struct mptses *mpte;
2155 int error = 0;
2156
2157 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
2158 error = EINVAL;
2159 goto out;
2160 }
2161 mpte = mptompte(mp: mpp);
2162 socket_lock_assert_owned(so: mp_so);
2163
2164 /* we only handle socket and TCP-level socket options for MPTCP */
2165 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
2166 error = EINVAL;
2167 goto out;
2168 }
2169
2170 switch (sopt->sopt_dir) {
2171 case SOPT_SET:
2172 error = mptcp_setopt(mpte, sopt);
2173 break;
2174
2175 case SOPT_GET:
2176 error = mptcp_getopt(mpte, sopt);
2177 break;
2178 }
2179out:
2180 return error;
2181}
2182
2183const char *
2184mptcp_sopt2str(int level, int optname)
2185{
2186 switch (level) {
2187 case SOL_SOCKET:
2188 switch (optname) {
2189 case SO_LINGER:
2190 return "SO_LINGER";
2191 case SO_LINGER_SEC:
2192 return "SO_LINGER_SEC";
2193 case SO_DEBUG:
2194 return "SO_DEBUG";
2195 case SO_KEEPALIVE:
2196 return "SO_KEEPALIVE";
2197 case SO_USELOOPBACK:
2198 return "SO_USELOOPBACK";
2199 case SO_TYPE:
2200 return "SO_TYPE";
2201 case SO_NREAD:
2202 return "SO_NREAD";
2203 case SO_NWRITE:
2204 return "SO_NWRITE";
2205 case SO_ERROR:
2206 return "SO_ERROR";
2207 case SO_SNDBUF:
2208 return "SO_SNDBUF";
2209 case SO_RCVBUF:
2210 return "SO_RCVBUF";
2211 case SO_SNDLOWAT:
2212 return "SO_SNDLOWAT";
2213 case SO_RCVLOWAT:
2214 return "SO_RCVLOWAT";
2215 case SO_SNDTIMEO:
2216 return "SO_SNDTIMEO";
2217 case SO_RCVTIMEO:
2218 return "SO_RCVTIMEO";
2219 case SO_NKE:
2220 return "SO_NKE";
2221 case SO_NOSIGPIPE:
2222 return "SO_NOSIGPIPE";
2223 case SO_NOADDRERR:
2224 return "SO_NOADDRERR";
2225 case SO_RESTRICTIONS:
2226 return "SO_RESTRICTIONS";
2227 case SO_LABEL:
2228 return "SO_LABEL";
2229 case SO_PEERLABEL:
2230 return "SO_PEERLABEL";
2231 case SO_RANDOMPORT:
2232 return "SO_RANDOMPORT";
2233 case SO_TRAFFIC_CLASS:
2234 return "SO_TRAFFIC_CLASS";
2235 case SO_RECV_TRAFFIC_CLASS:
2236 return "SO_RECV_TRAFFIC_CLASS";
2237 case SO_TRAFFIC_CLASS_DBG:
2238 return "SO_TRAFFIC_CLASS_DBG";
2239 case SO_PRIVILEGED_TRAFFIC_CLASS:
2240 return "SO_PRIVILEGED_TRAFFIC_CLASS";
2241 case SO_DEFUNCTIT:
2242 return "SO_DEFUNCTIT";
2243 case SO_DEFUNCTOK:
2244 return "SO_DEFUNCTOK";
2245 case SO_ISDEFUNCT:
2246 return "SO_ISDEFUNCT";
2247 case SO_OPPORTUNISTIC:
2248 return "SO_OPPORTUNISTIC";
2249 case SO_FLUSH:
2250 return "SO_FLUSH";
2251 case SO_RECV_ANYIF:
2252 return "SO_RECV_ANYIF";
2253 case SO_NOWAKEFROMSLEEP:
2254 return "SO_NOWAKEFROMSLEEP";
2255 case SO_NOAPNFALLBK:
2256 return "SO_NOAPNFALLBK";
2257 case SO_MARK_CELLFALLBACK:
2258 return "SO_CELLFALLBACK";
2259 case SO_FALLBACK_MODE:
2260 return "SO_FALLBACK_MODE";
2261 case SO_MARK_KNOWN_TRACKER:
2262 return "SO_MARK_KNOWN_TRACKER";
2263 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
2264 return "SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED";
2265 case SO_MARK_APPROVED_APP_DOMAIN:
2266 return "SO_MARK_APPROVED_APP_DOMAIN";
2267 case SO_DELEGATED:
2268 return "SO_DELEGATED";
2269 case SO_DELEGATED_UUID:
2270 return "SO_DELEGATED_UUID";
2271#if NECP
2272 case SO_NECP_ATTRIBUTES:
2273 return "SO_NECP_ATTRIBUTES";
2274 case SO_NECP_CLIENTUUID:
2275 return "SO_NECP_CLIENTUUID";
2276#endif /* NECP */
2277 }
2278
2279 break;
2280 case IPPROTO_TCP:
2281 switch (optname) {
2282 case TCP_NODELAY:
2283 return "TCP_NODELAY";
2284 case TCP_KEEPALIVE:
2285 return "TCP_KEEPALIVE";
2286 case TCP_KEEPINTVL:
2287 return "TCP_KEEPINTVL";
2288 case TCP_KEEPCNT:
2289 return "TCP_KEEPCNT";
2290 case TCP_CONNECTIONTIMEOUT:
2291 return "TCP_CONNECTIONTIMEOUT";
2292 case TCP_RXT_CONNDROPTIME:
2293 return "TCP_RXT_CONNDROPTIME";
2294 case PERSIST_TIMEOUT:
2295 return "PERSIST_TIMEOUT";
2296 case TCP_NOTSENT_LOWAT:
2297 return "NOTSENT_LOWAT";
2298 case TCP_ADAPTIVE_READ_TIMEOUT:
2299 return "ADAPTIVE_READ_TIMEOUT";
2300 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2301 return "ADAPTIVE_WRITE_TIMEOUT";
2302 case TCP_FASTOPEN_FORCE_ENABLE:
2303 return "TCP_FASTOPEN_FORCE_ENABLE";
2304 case MPTCP_SERVICE_TYPE:
2305 return "MPTCP_SERVICE_TYPE";
2306 case MPTCP_ALTERNATE_PORT:
2307 return "MPTCP_ALTERNATE_PORT";
2308 case MPTCP_FORCE_ENABLE:
2309 return "MPTCP_FORCE_ENABLE";
2310 case MPTCP_FORCE_VERSION:
2311 return "MPTCP_FORCE_VERSION";
2312 case MPTCP_EXPECTED_PROGRESS_TARGET:
2313 return "MPTCP_EXPECTED_PROGRESS_TARGET";
2314 }
2315
2316 break;
2317 }
2318
2319 return "unknown";
2320}
2321
2322static int
2323mptcp_usr_preconnect(struct socket *mp_so)
2324{
2325 struct mptsub *mpts = NULL;
2326 struct mppcb *mpp = mpsotomppcb(mp_so);
2327 struct mptses *mpte;
2328 struct socket *so;
2329 struct tcpcb *tp = NULL;
2330 int error;
2331
2332 mpte = mptompte(mp: mpp);
2333
2334 mpts = mptcp_get_subflow(mpte, NULL);
2335 if (mpts == NULL) {
2336 os_log_error(mptcp_log_handle, "%s - %lx: invalid preconnect ",
2337 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
2338 return EINVAL;
2339 }
2340 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
2341 so = mpts->mpts_socket;
2342 tp = intotcpcb(sotoinpcb(so));
2343 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
2344 error = tcp_output(sototcpcb(so));
2345
2346 soclearfastopen(so: mp_so);
2347
2348 return error;
2349}
2350