1 | /* |
2 | * Copyright (c) 2012-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <kern/locks.h> |
30 | #include <kern/policy_internal.h> |
31 | #include <kern/zalloc.h> |
32 | |
33 | #include <mach/sdt.h> |
34 | |
35 | #include <sys/domain.h> |
36 | #include <sys/kdebug.h> |
37 | #include <sys/kern_control.h> |
38 | #include <sys/kernel.h> |
39 | #include <sys/mbuf.h> |
40 | #include <sys/mcache.h> |
41 | #include <sys/param.h> |
42 | #include <sys/proc.h> |
43 | #include <sys/protosw.h> |
44 | #include <sys/resourcevar.h> |
45 | #include <sys/socket.h> |
46 | #include <sys/socketvar.h> |
47 | #include <sys/sysctl.h> |
48 | #include <sys/syslog.h> |
49 | #include <sys/systm.h> |
50 | |
51 | #include <net/content_filter.h> |
52 | #include <net/if.h> |
53 | #include <net/if_var.h> |
54 | #include <netinet/in.h> |
55 | #include <netinet/in_pcb.h> |
56 | #include <netinet/in_var.h> |
57 | #include <netinet/tcp.h> |
58 | #include <netinet/tcp_cache.h> |
59 | #include <netinet/tcp_fsm.h> |
60 | #include <netinet/tcp_seq.h> |
61 | #include <netinet/tcp_var.h> |
62 | #include <netinet/mptcp_var.h> |
63 | #include <netinet/mptcp.h> |
64 | #include <netinet/mptcp_opt.h> |
65 | #include <netinet/mptcp_seq.h> |
66 | #include <netinet/mptcp_timer.h> |
67 | #include <libkern/crypto/sha1.h> |
68 | #include <libkern/crypto/sha2.h> |
69 | #include <netinet6/in6_pcb.h> |
70 | #include <netinet6/ip6protosw.h> |
71 | #include <dev/random/randomdev.h> |
72 | #include <net/sockaddr_utils.h> |
73 | |
74 | /* |
75 | * Notes on MPTCP implementation. |
76 | * |
77 | * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH |
78 | * communication domain. The structure mtcbinfo describes the MPTCP instance |
79 | * of a Multipath protocol in that domain. It is used to keep track of all |
80 | * MPTCP PCB instances in the system, and is protected by the global lock |
81 | * mppi_lock. |
82 | * |
83 | * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM, |
84 | * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with |
85 | * it comes an MPTCP Session and an MPTCP PCB. All three structures are |
86 | * allocated from the same memory block, and each structure has a pointer |
87 | * to the adjacent ones. The layout is defined by the mpp_mtp structure. |
88 | * The socket lock (mpp_lock) is used to protect accesses to the Multipath |
89 | * PCB (mppcb) as well as the MPTCP Session (mptses). |
90 | * |
91 | * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB; |
92 | * |
93 | * A functioning MPTCP Session consists of one or more subflow sockets. Each |
94 | * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is |
95 | * represented by the mptsub structure. Because each subflow requires access |
96 | * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each |
97 | * subflow. This gets decremented prior to the subflow's destruction. |
98 | * |
99 | * To handle events (read, write, control) from the subflows, we do direct |
100 | * upcalls into the specific function. |
101 | * |
102 | * The whole MPTCP connection is protected by a single lock, the MPTCP socket's |
103 | * lock. Incoming data on a subflow also ends up taking this single lock. To |
104 | * achieve the latter, tcp_lock/unlock has been changed to rather use the lock |
105 | * of the MPTCP-socket. |
106 | * |
107 | * An MPTCP socket will be destroyed when its so_usecount drops to zero; this |
108 | * work is done by the MPTCP garbage collector which is invoked on demand by |
109 | * the PF_MULTIPATH garbage collector. This process will take place once all |
110 | * of the subflows have been destroyed. |
111 | */ |
112 | |
113 | static void mptcp_subflow_abort(struct mptsub *, int); |
114 | |
115 | static void mptcp_send_dfin(struct socket *so); |
116 | static void mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts); |
117 | static int mptcp_freeq(struct mptcb *mp_tp); |
118 | |
119 | /* |
120 | * Possible return values for subflow event handlers. Note that success |
121 | * values must be greater or equal than MPTS_EVRET_OK. Values less than that |
122 | * indicate errors or actions which require immediate attention; they will |
123 | * prevent the rest of the handlers from processing their respective events |
124 | * until the next round of events processing. |
125 | */ |
126 | typedef enum { |
127 | MPTS_EVRET_DELETE = 1, /* delete this subflow */ |
128 | MPTS_EVRET_OK = 2, /* OK */ |
129 | MPTS_EVRET_CONNECT_PENDING = 3, /* resume pended connects */ |
130 | MPTS_EVRET_DISCONNECT_FALLBACK = 4, /* abort all but preferred */ |
131 | } ev_ret_t; |
132 | |
133 | static void mptcp_do_sha1(mptcp_key_t *, char *); |
134 | static void mptcp_do_sha256(mptcp_key_t *, char *); |
135 | |
136 | static void mptcp_init_local_parms(struct mptses *, struct sockaddr *); |
137 | |
138 | static KALLOC_TYPE_DEFINE(mptsub_zone, struct mptsub, NET_KT_DEFAULT); |
139 | static KALLOC_TYPE_DEFINE(mptopt_zone, struct mptopt, NET_KT_DEFAULT); |
140 | static KALLOC_TYPE_DEFINE(mpt_subauth_zone, struct mptcp_subf_auth_entry, |
141 | NET_KT_DEFAULT); |
142 | |
143 | struct mppcbinfo mtcbinfo; |
144 | |
145 | SYSCTL_DECL(_net_inet); |
146 | |
147 | SYSCTL_NODE(_net_inet, OID_AUTO, mptcp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "MPTCP" ); |
148 | |
149 | SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED, |
150 | &mtcbinfo.mppi_count, 0, "Number of active PCBs" ); |
151 | |
152 | |
153 | static int mptcp_alternate_port = 0; |
154 | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, alternate_port, CTLFLAG_RW | CTLFLAG_LOCKED, |
155 | &mptcp_alternate_port, 0, "Set alternate port for MPTCP connections" ); |
156 | |
157 | static struct protosw mptcp_subflow_protosw; |
158 | static struct pr_usrreqs mptcp_subflow_usrreqs; |
159 | static struct ip6protosw mptcp_subflow_protosw6; |
160 | static struct pr_usrreqs mptcp_subflow_usrreqs6; |
161 | |
162 | static uint8_t mptcp_create_subflows_scheduled; |
163 | |
164 | /* Using Symptoms Advisory to detect poor WiFi or poor Cell */ |
165 | static kern_ctl_ref mptcp_kern_ctrl_ref = NULL; |
166 | static uint32_t mptcp_kern_skt_inuse = 0; |
167 | static uint32_t mptcp_kern_skt_unit; |
168 | static symptoms_advisory_t mptcp_advisory; |
169 | |
170 | uint32_t mptcp_cellicon_refcount = 0; |
171 | |
172 | os_log_t mptcp_log_handle; |
173 | |
174 | int |
175 | mptcpstats_get_index_by_ifindex(struct mptcp_itf_stats *stats, u_short ifindex, boolean_t create) |
176 | { |
177 | int i, index = -1; |
178 | |
179 | for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) { |
180 | if (create && stats[i].ifindex == IFSCOPE_NONE) { |
181 | if (index < 0) { |
182 | index = i; |
183 | } |
184 | continue; |
185 | } |
186 | |
187 | if (stats[i].ifindex == ifindex) { |
188 | index = i; |
189 | return index; |
190 | } |
191 | } |
192 | |
193 | if (index != -1) { |
194 | stats[index].ifindex = ifindex; |
195 | } |
196 | |
197 | return index; |
198 | } |
199 | |
200 | static int |
201 | mptcpstats_get_index(struct mptcp_itf_stats *stats, const struct mptsub *mpts) |
202 | { |
203 | const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp; |
204 | int index; |
205 | |
206 | if (ifp == NULL) { |
207 | os_log_error(mptcp_log_handle, "%s - %lx: no ifp on subflow, state %u flags %#x\n" , |
208 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte), |
209 | sototcpcb(mpts->mpts_socket)->t_state, mpts->mpts_flags); |
210 | return -1; |
211 | } |
212 | |
213 | index = mptcpstats_get_index_by_ifindex(stats, ifindex: ifp->if_index, true); |
214 | |
215 | if (index != -1) { |
216 | if (stats[index].is_expensive == 0) { |
217 | stats[index].is_expensive = IFNET_IS_CELLULAR(ifp); |
218 | } |
219 | } |
220 | |
221 | return index; |
222 | } |
223 | |
224 | void |
225 | mptcpstats_inc_switch(struct mptses *mpte, const struct mptsub *mpts) |
226 | { |
227 | int index; |
228 | |
229 | tcpstat.tcps_mp_switches++; |
230 | mpte->mpte_subflow_switches++; |
231 | |
232 | index = mptcpstats_get_index(stats: mpte->mpte_itfstats, mpts); |
233 | |
234 | if (index != -1) { |
235 | mpte->mpte_itfstats[index].switches++; |
236 | } |
237 | } |
238 | |
239 | /* |
240 | * Flushes all recorded socket options from an MP socket. |
241 | */ |
242 | static void |
243 | mptcp_flush_sopts(struct mptses *mpte) |
244 | { |
245 | struct mptopt *mpo, *tmpo; |
246 | |
247 | TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) { |
248 | mptcp_sopt_remove(mpte, mpo); |
249 | mptcp_sopt_free(mpo); |
250 | } |
251 | VERIFY(TAILQ_EMPTY(&mpte->mpte_sopts)); |
252 | } |
253 | |
254 | /* |
255 | * Create an MPTCP session, called as a result of opening a MPTCP socket. |
256 | */ |
257 | int |
258 | mptcp_session_create(struct mppcb *mpp) |
259 | { |
260 | struct mpp_mtp *mtp; |
261 | struct mppcbinfo *mppi; |
262 | struct mptses *mpte; |
263 | struct mptcb *mp_tp; |
264 | |
265 | VERIFY(mpp != NULL); |
266 | mppi = mpp->mpp_pcbinfo; |
267 | VERIFY(mppi != NULL); |
268 | |
269 | mtp = __container_of(mpp, struct mpp_mtp, mpp); |
270 | mpte = &mtp->mpp_ses; |
271 | mp_tp = &mtp->mtcb; |
272 | |
273 | /* MPTCP Multipath PCB Extension */ |
274 | bzero(s: mpte, n: sizeof(*mpte)); |
275 | VERIFY(mpp->mpp_pcbe == NULL); |
276 | mpp->mpp_pcbe = mpte; |
277 | mpte->mpte_mppcb = mpp; |
278 | mpte->mpte_mptcb = mp_tp; |
279 | |
280 | TAILQ_INIT(&mpte->mpte_sopts); |
281 | TAILQ_INIT(&mpte->mpte_subflows); |
282 | mpte->mpte_associd = SAE_ASSOCID_ANY; |
283 | mpte->mpte_connid_last = SAE_CONNID_ANY; |
284 | |
285 | mptcp_init_urgency_timer(mpte); |
286 | |
287 | mpte->mpte_itfinfo = &mpte->_mpte_itfinfo[0]; |
288 | mpte->mpte_itfinfo_size = MPTE_ITFINFO_SIZE; |
289 | |
290 | if (mptcp_alternate_port > 0 && mptcp_alternate_port < UINT16_MAX) { |
291 | mpte->mpte_alternate_port = htons((uint16_t)mptcp_alternate_port); |
292 | } |
293 | |
294 | mpte->mpte_last_cellicon_set = tcp_now; |
295 | |
296 | /* MPTCP Protocol Control Block */ |
297 | bzero(s: mp_tp, n: sizeof(*mp_tp)); |
298 | mp_tp->mpt_mpte = mpte; |
299 | mp_tp->mpt_state = MPTCPS_CLOSED; |
300 | |
301 | DTRACE_MPTCP1(session__create, struct mppcb *, mpp); |
302 | |
303 | return 0; |
304 | } |
305 | |
306 | struct sockaddr * |
307 | mptcp_get_session_dst(struct mptses *mpte, boolean_t ipv6, boolean_t ipv4) |
308 | { |
309 | if (ipv6 && mpte->mpte_sub_dst_v6.sin6_family == AF_INET6) { |
310 | return SA(&mpte->mpte_sub_dst_v6); |
311 | } |
312 | |
313 | if (ipv4 && mpte->mpte_sub_dst_v4.sin_family == AF_INET) { |
314 | return SA(&mpte->mpte_sub_dst_v4); |
315 | } |
316 | |
317 | /* The interface has neither IPv4 nor IPv6 routes. Give our best guess, |
318 | * meaning we prefer IPv6 over IPv4. |
319 | */ |
320 | if (mpte->mpte_sub_dst_v6.sin6_family == AF_INET6) { |
321 | return SA(&mpte->mpte_sub_dst_v6); |
322 | } |
323 | |
324 | if (mpte->mpte_sub_dst_v4.sin_family == AF_INET) { |
325 | return SA(&mpte->mpte_sub_dst_v4); |
326 | } |
327 | |
328 | /* We don't yet have a unicast IP */ |
329 | return NULL; |
330 | } |
331 | |
332 | static void |
333 | mptcpstats_get_bytes(struct mptses *mpte, boolean_t initial_cell, |
334 | uint64_t *cellbytes, uint64_t *allbytes) |
335 | { |
336 | int64_t mycellbytes = 0; |
337 | uint64_t myallbytes = 0; |
338 | int i; |
339 | |
340 | for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) { |
341 | if (mpte->mpte_itfstats[i].is_expensive) { |
342 | mycellbytes += mpte->mpte_itfstats[i].mpis_txbytes; |
343 | mycellbytes += mpte->mpte_itfstats[i].mpis_rxbytes; |
344 | } |
345 | |
346 | myallbytes += mpte->mpte_itfstats[i].mpis_txbytes; |
347 | myallbytes += mpte->mpte_itfstats[i].mpis_rxbytes; |
348 | } |
349 | |
350 | if (initial_cell) { |
351 | mycellbytes -= mpte->mpte_init_txbytes; |
352 | mycellbytes -= mpte->mpte_init_rxbytes; |
353 | } |
354 | |
355 | if (mycellbytes < 0) { |
356 | os_log_error(mptcp_log_handle, "%s - %lx: cellbytes is %lld\n" , |
357 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mycellbytes); |
358 | *cellbytes = 0; |
359 | *allbytes = 0; |
360 | } else { |
361 | *cellbytes = mycellbytes; |
362 | *allbytes = myallbytes; |
363 | } |
364 | } |
365 | |
366 | static void |
367 | mptcpstats_session_wrapup(struct mptses *mpte) |
368 | { |
369 | boolean_t cell = mpte->mpte_initial_cell; |
370 | |
371 | switch (mpte->mpte_svctype) { |
372 | case MPTCP_SVCTYPE_HANDOVER: |
373 | if (mpte->mpte_flags & MPTE_FIRSTPARTY) { |
374 | tcpstat.tcps_mptcp_fp_handover_attempt++; |
375 | |
376 | if (cell && mpte->mpte_handshake_success) { |
377 | tcpstat.tcps_mptcp_fp_handover_success_cell++; |
378 | |
379 | if (mpte->mpte_used_wifi) { |
380 | tcpstat.tcps_mptcp_handover_wifi_from_cell++; |
381 | } |
382 | } else if (mpte->mpte_handshake_success) { |
383 | tcpstat.tcps_mptcp_fp_handover_success_wifi++; |
384 | |
385 | if (mpte->mpte_used_cell) { |
386 | tcpstat.tcps_mptcp_handover_cell_from_wifi++; |
387 | } |
388 | } |
389 | } else { |
390 | tcpstat.tcps_mptcp_handover_attempt++; |
391 | |
392 | if (cell && mpte->mpte_handshake_success) { |
393 | tcpstat.tcps_mptcp_handover_success_cell++; |
394 | |
395 | if (mpte->mpte_used_wifi) { |
396 | tcpstat.tcps_mptcp_handover_wifi_from_cell++; |
397 | } |
398 | } else if (mpte->mpte_handshake_success) { |
399 | tcpstat.tcps_mptcp_handover_success_wifi++; |
400 | |
401 | if (mpte->mpte_used_cell) { |
402 | tcpstat.tcps_mptcp_handover_cell_from_wifi++; |
403 | } |
404 | } |
405 | } |
406 | |
407 | if (mpte->mpte_handshake_success) { |
408 | uint64_t cellbytes; |
409 | uint64_t allbytes; |
410 | |
411 | mptcpstats_get_bytes(mpte, initial_cell: cell, cellbytes: &cellbytes, allbytes: &allbytes); |
412 | |
413 | tcpstat.tcps_mptcp_handover_cell_bytes += cellbytes; |
414 | tcpstat.tcps_mptcp_handover_all_bytes += allbytes; |
415 | } |
416 | break; |
417 | case MPTCP_SVCTYPE_INTERACTIVE: |
418 | if (mpte->mpte_flags & MPTE_FIRSTPARTY) { |
419 | tcpstat.tcps_mptcp_fp_interactive_attempt++; |
420 | |
421 | if (mpte->mpte_handshake_success) { |
422 | tcpstat.tcps_mptcp_fp_interactive_success++; |
423 | |
424 | if (!cell && mpte->mpte_used_cell) { |
425 | tcpstat.tcps_mptcp_interactive_cell_from_wifi++; |
426 | } |
427 | } |
428 | } else { |
429 | tcpstat.tcps_mptcp_interactive_attempt++; |
430 | |
431 | if (mpte->mpte_handshake_success) { |
432 | tcpstat.tcps_mptcp_interactive_success++; |
433 | |
434 | if (!cell && mpte->mpte_used_cell) { |
435 | tcpstat.tcps_mptcp_interactive_cell_from_wifi++; |
436 | } |
437 | } |
438 | } |
439 | |
440 | if (mpte->mpte_handshake_success) { |
441 | uint64_t cellbytes; |
442 | uint64_t allbytes; |
443 | |
444 | mptcpstats_get_bytes(mpte, initial_cell: cell, cellbytes: &cellbytes, allbytes: &allbytes); |
445 | |
446 | tcpstat.tcps_mptcp_interactive_cell_bytes += cellbytes; |
447 | tcpstat.tcps_mptcp_interactive_all_bytes += allbytes; |
448 | } |
449 | break; |
450 | case MPTCP_SVCTYPE_AGGREGATE: |
451 | if (mpte->mpte_flags & MPTE_FIRSTPARTY) { |
452 | tcpstat.tcps_mptcp_fp_aggregate_attempt++; |
453 | |
454 | if (mpte->mpte_handshake_success) { |
455 | tcpstat.tcps_mptcp_fp_aggregate_success++; |
456 | } |
457 | } else { |
458 | tcpstat.tcps_mptcp_aggregate_attempt++; |
459 | |
460 | if (mpte->mpte_handshake_success) { |
461 | tcpstat.tcps_mptcp_aggregate_success++; |
462 | } |
463 | } |
464 | |
465 | if (mpte->mpte_handshake_success) { |
466 | uint64_t cellbytes; |
467 | uint64_t allbytes; |
468 | |
469 | mptcpstats_get_bytes(mpte, initial_cell: cell, cellbytes: &cellbytes, allbytes: &allbytes); |
470 | |
471 | tcpstat.tcps_mptcp_aggregate_cell_bytes += cellbytes; |
472 | tcpstat.tcps_mptcp_aggregate_all_bytes += allbytes; |
473 | } |
474 | break; |
475 | } |
476 | |
477 | if (cell && mpte->mpte_handshake_success && mpte->mpte_used_wifi) { |
478 | tcpstat.tcps_mptcp_back_to_wifi++; |
479 | } |
480 | |
481 | if (mpte->mpte_triggered_cell) { |
482 | tcpstat.tcps_mptcp_triggered_cell++; |
483 | } |
484 | } |
485 | |
486 | /* |
487 | * Destroy an MPTCP session. |
488 | */ |
489 | static void |
490 | mptcp_session_destroy(struct mptses *mpte) |
491 | { |
492 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
493 | |
494 | VERIFY(mp_tp != NULL); |
495 | VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows) && mpte->mpte_numflows == 0); |
496 | |
497 | mptcpstats_session_wrapup(mpte); |
498 | mptcp_unset_cellicon(mpte, NULL, val: mpte->mpte_cellicon_increments); |
499 | mptcp_flush_sopts(mpte); |
500 | |
501 | if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) { |
502 | kfree_data(mpte->mpte_itfinfo, |
503 | sizeof(*mpte->mpte_itfinfo) * mpte->mpte_itfinfo_size); |
504 | } |
505 | mpte->mpte_itfinfo = NULL; |
506 | |
507 | mptcp_freeq(mp_tp); |
508 | m_freem_list(mpte->mpte_reinjectq); |
509 | |
510 | os_log(mptcp_log_handle, "%s - %lx: Destroying session\n" , |
511 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
512 | } |
513 | |
514 | boolean_t |
515 | mptcp_ok_to_create_subflows(struct mptcb *mp_tp) |
516 | { |
517 | return mp_tp->mpt_state >= MPTCPS_ESTABLISHED && |
518 | mp_tp->mpt_state < MPTCPS_FIN_WAIT_1 && |
519 | !(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP); |
520 | } |
521 | |
522 | static int |
523 | mptcp_synthesize_nat64(struct in6_addr *addr, uint32_t len, |
524 | const struct in_addr *addrv4) |
525 | { |
526 | static const struct in6_addr well_known_prefix = { |
527 | .__u6_addr.__u6_addr8 = {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00, |
528 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
529 | 0x00, 0x00, 0x00, 0x00}, |
530 | }; |
531 | const char *ptrv4 = (const char *)addrv4; |
532 | char *ptr = (char *)addr; |
533 | |
534 | if (IN_ZERONET(ntohl(addrv4->s_addr)) || // 0.0.0.0/8 Source hosts on local network |
535 | IN_LOOPBACK(ntohl(addrv4->s_addr)) || // 127.0.0.0/8 Loopback |
536 | IN_LINKLOCAL(ntohl(addrv4->s_addr)) || // 169.254.0.0/16 Link Local |
537 | IN_DS_LITE(ntohl(addrv4->s_addr)) || // 192.0.0.0/29 DS-Lite |
538 | IN_6TO4_RELAY_ANYCAST(ntohl(addrv4->s_addr)) || // 192.88.99.0/24 6to4 Relay Anycast |
539 | IN_MULTICAST(ntohl(addrv4->s_addr)) || // 224.0.0.0/4 Multicast |
540 | INADDR_BROADCAST == addrv4->s_addr) { // 255.255.255.255/32 Limited Broadcast |
541 | return -1; |
542 | } |
543 | |
544 | /* Check for the well-known prefix */ |
545 | if (len == NAT64_PREFIX_LEN_96 && |
546 | IN6_ARE_ADDR_EQUAL(addr, &well_known_prefix)) { |
547 | if (IN_PRIVATE(ntohl(addrv4->s_addr)) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use |
548 | IN_SHARED_ADDRESS_SPACE(ntohl(addrv4->s_addr))) { // 100.64.0.0/10 Shared Address Space |
549 | return -1; |
550 | } |
551 | } |
552 | |
553 | switch (len) { |
554 | case NAT64_PREFIX_LEN_96: |
555 | memcpy(dst: ptr + 12, src: ptrv4, n: 4); |
556 | break; |
557 | case NAT64_PREFIX_LEN_64: |
558 | memcpy(dst: ptr + 9, src: ptrv4, n: 4); |
559 | break; |
560 | case NAT64_PREFIX_LEN_56: |
561 | memcpy(dst: ptr + 7, src: ptrv4, n: 1); |
562 | memcpy(dst: ptr + 9, src: ptrv4 + 1, n: 3); |
563 | break; |
564 | case NAT64_PREFIX_LEN_48: |
565 | memcpy(dst: ptr + 6, src: ptrv4, n: 2); |
566 | memcpy(dst: ptr + 9, src: ptrv4 + 2, n: 2); |
567 | break; |
568 | case NAT64_PREFIX_LEN_40: |
569 | memcpy(dst: ptr + 5, src: ptrv4, n: 3); |
570 | memcpy(dst: ptr + 9, src: ptrv4 + 3, n: 1); |
571 | break; |
572 | case NAT64_PREFIX_LEN_32: |
573 | memcpy(dst: ptr + 4, src: ptrv4, n: 4); |
574 | break; |
575 | default: |
576 | panic("NAT64-prefix len is wrong: %u" , len); |
577 | } |
578 | |
579 | return 0; |
580 | } |
581 | |
582 | static void |
583 | mptcp_trigger_cell_bringup(struct mptses *mpte) |
584 | { |
585 | struct socket *mp_so = mptetoso(mpte); |
586 | |
587 | if (!uuid_is_null(uu: mpsotomppcb(mp_so)->necp_client_uuid)) { |
588 | uuid_string_t uuidstr; |
589 | int err; |
590 | |
591 | socket_unlock(so: mp_so, refcount: 0); |
592 | err = necp_client_assert_bb_radio_manager(client_id: mpsotomppcb(mp_so)->necp_client_uuid, |
593 | TRUE); |
594 | socket_lock(so: mp_so, refcount: 0); |
595 | |
596 | if (err == 0) { |
597 | mpte->mpte_triggered_cell = 1; |
598 | } |
599 | |
600 | uuid_unparse_upper(uu: mpsotomppcb(mp_so)->necp_client_uuid, out: uuidstr); |
601 | os_log_info(mptcp_log_handle, "%s - %lx: asked irat to bringup cell for uuid %s, err %d\n" , |
602 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), uuidstr, err); |
603 | } else { |
604 | os_log_info(mptcp_log_handle, "%s - %lx: UUID is already null\n" , |
605 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
606 | } |
607 | } |
608 | |
609 | static boolean_t |
610 | mptcp_subflow_disconnecting(struct mptsub *mpts) |
611 | { |
612 | if (mpts->mpts_socket->so_state & SS_ISDISCONNECTED) { |
613 | return true; |
614 | } |
615 | |
616 | if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED | MPTSF_CLOSE_REQD)) { |
617 | return true; |
618 | } |
619 | |
620 | if (sototcpcb(mpts->mpts_socket)->t_state == TCPS_CLOSED) { |
621 | return true; |
622 | } |
623 | |
624 | return false; |
625 | } |
626 | |
627 | /* |
628 | * In Handover mode, only create cell subflow if |
629 | * - Symptoms marked WiFi as weak: |
630 | * Here, if we are sending data, then we can check the RTO-state. That is a |
631 | * stronger signal of WiFi quality than the Symptoms indicator. |
632 | * If however we are not sending any data, the only thing we can do is guess |
633 | * and thus bring up Cell. |
634 | * |
635 | * - Symptoms marked WiFi as unknown: |
636 | * In this state we don't know what the situation is and thus remain |
637 | * conservative, only bringing up cell if there are retransmissions going on. |
638 | */ |
639 | static boolean_t |
640 | mptcp_handover_use_cellular(struct mptses *mpte, struct tcpcb *tp) |
641 | { |
642 | mptcp_wifi_quality_t wifi_quality = mptcp_wifi_quality_for_session(mpte); |
643 | |
644 | if (wifi_quality == MPTCP_WIFI_QUALITY_GOOD) { |
645 | /* WiFi is good - don't use cell */ |
646 | return false; |
647 | } |
648 | |
649 | if (wifi_quality == MPTCP_WIFI_QUALITY_UNSURE) { |
650 | /* |
651 | * We are in unknown state, only use Cell if we have confirmed |
652 | * that WiFi is bad. |
653 | */ |
654 | if (mptetoso(mpte)->so_snd.sb_cc != 0 && tp->t_rxtshift >= mptcp_fail_thresh * 2) { |
655 | return true; |
656 | } else { |
657 | return false; |
658 | } |
659 | } |
660 | |
661 | if (wifi_quality == MPTCP_WIFI_QUALITY_BAD) { |
662 | /* |
663 | * WiFi is confirmed to be bad from Symptoms-Framework. |
664 | * If we are sending data, check the RTOs. |
665 | * Otherwise, be pessimistic and use Cell. |
666 | */ |
667 | if (mptetoso(mpte)->so_snd.sb_cc != 0) { |
668 | if (tp->t_rxtshift >= mptcp_fail_thresh * 2) { |
669 | return true; |
670 | } else { |
671 | return false; |
672 | } |
673 | } else { |
674 | return true; |
675 | } |
676 | } |
677 | |
678 | return false; |
679 | } |
680 | |
681 | void |
682 | mptcp_check_subflows_and_add(struct mptses *mpte) |
683 | { |
684 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
685 | boolean_t cellular_viable = FALSE; |
686 | boolean_t want_cellular = TRUE; |
687 | uint32_t i; |
688 | |
689 | if (!mptcp_ok_to_create_subflows(mp_tp)) { |
690 | os_log_debug(mptcp_log_handle, "%s - %lx: not a good time for subflows, state %u flags %#x" , |
691 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags); |
692 | return; |
693 | } |
694 | |
695 | /* Just to see if we have an IP-address available */ |
696 | if (mptcp_get_session_dst(mpte, false, false) == NULL) { |
697 | return; |
698 | } |
699 | |
700 | for (i = 0; i < mpte->mpte_itfinfo_size; i++) { |
701 | boolean_t need_to_ask_symptoms = FALSE, found = FALSE; |
702 | struct mpt_itf_info *info; |
703 | struct sockaddr_in6 nat64pre; |
704 | struct sockaddr *dst; |
705 | struct mptsub *mpts; |
706 | struct ifnet *ifp; |
707 | uint32_t ifindex; |
708 | |
709 | info = &mpte->mpte_itfinfo[i]; |
710 | |
711 | ifindex = info->ifindex; |
712 | if (ifindex == IFSCOPE_NONE) { |
713 | continue; |
714 | } |
715 | |
716 | os_log(mptcp_log_handle, "%s - %lx: itf %u no support %u hasv4 %u has v6 %u hasnat64 %u\n" , |
717 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), info->ifindex, info->no_mptcp_support, |
718 | info->has_v4_conn, info->has_v6_conn, info->has_nat64_conn); |
719 | |
720 | if (info->no_mptcp_support) { |
721 | continue; |
722 | } |
723 | |
724 | ifnet_head_lock_shared(); |
725 | ifp = ifindex2ifnet[ifindex]; |
726 | ifnet_head_done(); |
727 | |
728 | if (ifp == NULL) { |
729 | continue; |
730 | } |
731 | |
732 | if (IFNET_IS_CELLULAR(ifp)) { |
733 | cellular_viable = TRUE; |
734 | |
735 | if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || |
736 | mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) { |
737 | if (mptcp_wifi_quality_for_session(mpte) == MPTCP_WIFI_QUALITY_GOOD) { |
738 | continue; |
739 | } |
740 | } |
741 | } |
742 | |
743 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
744 | const struct ifnet *subifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp; |
745 | struct tcpcb *tp = sototcpcb(mpts->mpts_socket); |
746 | |
747 | if (subifp == NULL) { |
748 | continue; |
749 | } |
750 | |
751 | /* |
752 | * If there is at least one functioning subflow on WiFi |
753 | * and we are checking for the cell interface, then |
754 | * we always need to ask symptoms for permission as |
755 | * cell is triggered even if WiFi is available. |
756 | */ |
757 | if (!IFNET_IS_CELLULAR(subifp) && |
758 | !mptcp_subflow_disconnecting(mpts) && |
759 | IFNET_IS_CELLULAR(ifp)) { |
760 | need_to_ask_symptoms = TRUE; |
761 | } |
762 | |
763 | if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) { |
764 | os_log(mptcp_log_handle, |
765 | "%s - %lx: %s: cell %u wifi-state %d flags %#x rxt %u first-party %u sb_cc %u ifindex %u this %u rtt %u rttvar %u rto %u\n" , |
766 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
767 | mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER ? "handover" : "pure-handover" , |
768 | IFNET_IS_CELLULAR(subifp), |
769 | mptcp_wifi_quality_for_session(mpte), |
770 | mpts->mpts_flags, |
771 | tp->t_rxtshift, |
772 | !!(mpte->mpte_flags & MPTE_FIRSTPARTY), |
773 | mptetoso(mpte)->so_snd.sb_cc, |
774 | ifindex, subifp->if_index, |
775 | tp->t_srtt >> TCP_RTT_SHIFT, |
776 | tp->t_rttvar >> TCP_RTTVAR_SHIFT, |
777 | tp->t_rxtcur); |
778 | |
779 | if (!IFNET_IS_CELLULAR(subifp) && |
780 | !mptcp_subflow_disconnecting(mpts) && |
781 | (mpts->mpts_flags & MPTSF_CONNECTED) && |
782 | !mptcp_handover_use_cellular(mpte, tp)) { |
783 | found = TRUE; |
784 | |
785 | /* We found a proper subflow on WiFi - no need for cell */ |
786 | want_cellular = FALSE; |
787 | break; |
788 | } |
789 | } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) { |
790 | uint64_t time_now = mach_continuous_time(); |
791 | |
792 | os_log(mptcp_log_handle, |
793 | "%s - %lx: target-based: %llu now %llu wifi quality %d cell %u sostat %#x mpts_flags %#x tcp-state %u\n" , |
794 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_time_target, |
795 | time_now, mptcp_wifi_quality_for_session(mpte), |
796 | IFNET_IS_CELLULAR(subifp), mpts->mpts_socket->so_state, |
797 | mpts->mpts_flags, sototcpcb(mpts->mpts_socket)->t_state); |
798 | |
799 | if (!IFNET_IS_CELLULAR(subifp) && |
800 | !mptcp_subflow_disconnecting(mpts) && |
801 | (mpte->mpte_time_target == 0 || |
802 | (int64_t)(mpte->mpte_time_target - time_now) > 0 || |
803 | mptcp_wifi_quality_for_session(mpte) == MPTCP_WIFI_QUALITY_GOOD)) { |
804 | found = TRUE; |
805 | |
806 | want_cellular = FALSE; |
807 | break; |
808 | } |
809 | } |
810 | |
811 | if (subifp->if_index == ifindex && |
812 | !mptcp_subflow_disconnecting(mpts)) { |
813 | /* |
814 | * We found a subflow on this interface. |
815 | * No need to create a new one. |
816 | */ |
817 | found = TRUE; |
818 | break; |
819 | } |
820 | } |
821 | |
822 | if (found) { |
823 | continue; |
824 | } |
825 | |
826 | if (need_to_ask_symptoms && |
827 | !(mpte->mpte_flags & MPTE_FIRSTPARTY) && |
828 | !(mpte->mpte_flags & MPTE_ACCESS_GRANTED) && |
829 | mptcp_developer_mode == 0) { |
830 | mptcp_ask_symptoms(mpte); |
831 | return; |
832 | } |
833 | |
834 | dst = mptcp_get_session_dst(mpte, ipv6: info->has_v6_conn, ipv4: info->has_v4_conn); |
835 | |
836 | if (dst->sa_family == AF_INET && |
837 | !info->has_v4_conn && info->has_nat64_conn) { |
838 | struct ipv6_prefix nat64prefixes[NAT64_MAX_NUM_PREFIXES]; |
839 | int error, j; |
840 | |
841 | SOCKADDR_ZERO(&nat64pre, sizeof(struct sockaddr_in6)); |
842 | |
843 | error = ifnet_get_nat64prefix(ifp, nat64prefixes); |
844 | if (error) { |
845 | os_log_error(mptcp_log_handle, "%s - %lx: no NAT64-prefix on itf %s, error %d\n" , |
846 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ifp->if_name, error); |
847 | continue; |
848 | } |
849 | |
850 | for (j = 0; j < NAT64_MAX_NUM_PREFIXES; j++) { |
851 | if (nat64prefixes[j].prefix_len != 0) { |
852 | break; |
853 | } |
854 | } |
855 | |
856 | VERIFY(j < NAT64_MAX_NUM_PREFIXES); |
857 | |
858 | error = mptcp_synthesize_nat64(addr: &nat64prefixes[j].ipv6_prefix, |
859 | len: nat64prefixes[j].prefix_len, |
860 | addrv4: &SIN(dst)->sin_addr); |
861 | if (error != 0) { |
862 | os_log_error(mptcp_log_handle, "%s - %lx: cannot synthesize this addr\n" , |
863 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
864 | continue; |
865 | } |
866 | |
867 | memcpy(dst: &nat64pre.sin6_addr, |
868 | src: &nat64prefixes[j].ipv6_prefix, |
869 | n: sizeof(nat64pre.sin6_addr)); |
870 | nat64pre.sin6_len = sizeof(struct sockaddr_in6); |
871 | nat64pre.sin6_family = AF_INET6; |
872 | nat64pre.sin6_port = SIN(dst)->sin_port; |
873 | nat64pre.sin6_flowinfo = 0; |
874 | nat64pre.sin6_scope_id = 0; |
875 | |
876 | dst = SA(&nat64pre); |
877 | } |
878 | |
879 | if (dst->sa_family == AF_INET && !info->has_v4_conn) { |
880 | continue; |
881 | } |
882 | if (dst->sa_family == AF_INET6 && !info->has_v6_conn) { |
883 | continue; |
884 | } |
885 | |
886 | mptcp_subflow_add(mpte, NULL, dst, ifindex, NULL); |
887 | } |
888 | |
889 | if (!cellular_viable && want_cellular) { |
890 | /* Trigger Cell Bringup */ |
891 | mptcp_trigger_cell_bringup(mpte); |
892 | } |
893 | } |
894 | |
895 | static void |
896 | mptcp_remove_cell_subflows(struct mptses *mpte) |
897 | { |
898 | struct mptsub *mpts, *tmpts; |
899 | |
900 | TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { |
901 | const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp; |
902 | |
903 | if (ifp == NULL || !IFNET_IS_CELLULAR(ifp)) { |
904 | continue; |
905 | } |
906 | |
907 | os_log(mptcp_log_handle, "%s - %lx: removing cell subflow\n" , |
908 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
909 | |
910 | soevent(so: mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); |
911 | } |
912 | |
913 | return; |
914 | } |
915 | |
916 | static void |
917 | mptcp_remove_wifi_subflows(struct mptses *mpte) |
918 | { |
919 | struct mptsub *mpts, *tmpts; |
920 | |
921 | TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { |
922 | const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp; |
923 | |
924 | if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) { |
925 | continue; |
926 | } |
927 | |
928 | os_log(mptcp_log_handle, "%s - %lx: removing wifi subflow\n" , |
929 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
930 | |
931 | soevent(so: mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); |
932 | } |
933 | |
934 | return; |
935 | } |
936 | |
937 | static void |
938 | mptcp_pure_handover_subflows_remove(struct mptses *mpte) |
939 | { |
940 | mptcp_wifi_quality_t wifi_quality = mptcp_wifi_quality_for_session(mpte); |
941 | boolean_t found_working_wifi_subflow = false; |
942 | boolean_t found_working_cell_subflow = false; |
943 | |
944 | struct mptsub *mpts; |
945 | |
946 | /* |
947 | * Look for a subflow that is on a non-cellular interface in connected |
948 | * state. |
949 | * |
950 | * In that case, remove all cellular subflows. |
951 | * |
952 | * If however there is no connected subflow |
953 | */ |
954 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
955 | const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp; |
956 | struct socket *so; |
957 | struct tcpcb *tp; |
958 | |
959 | if (ifp == NULL) { |
960 | continue; |
961 | } |
962 | |
963 | so = mpts->mpts_socket; |
964 | tp = sototcpcb(so); |
965 | |
966 | if (!(mpts->mpts_flags & MPTSF_CONNECTED) || |
967 | tp->t_state != TCPS_ESTABLISHED || |
968 | mptcp_subflow_disconnecting(mpts)) { |
969 | continue; |
970 | } |
971 | |
972 | if (IFNET_IS_CELLULAR(ifp)) { |
973 | found_working_cell_subflow = true; |
974 | } else { |
975 | os_log_debug(mptcp_log_handle, "%s - %lx: rxt %u sb_cc %u wifi quality %d\n" , |
976 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), tp->t_rxtshift, mptetoso(mpte)->so_snd.sb_cc, wifi_quality); |
977 | if (!mptcp_handover_use_cellular(mpte, tp)) { |
978 | found_working_wifi_subflow = true; |
979 | } |
980 | } |
981 | } |
982 | |
983 | /* |
984 | * Couldn't find a working subflow, let's not remove those on a cellular |
985 | * interface. |
986 | */ |
987 | os_log_debug(mptcp_log_handle, "%s - %lx: Found Wi-Fi: %u Found Cellular %u" , |
988 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
989 | found_working_wifi_subflow, found_working_cell_subflow); |
990 | if (!found_working_wifi_subflow && wifi_quality != MPTCP_WIFI_QUALITY_GOOD) { |
991 | if (found_working_cell_subflow) { |
992 | mptcp_remove_wifi_subflows(mpte); |
993 | } |
994 | return; |
995 | } |
996 | |
997 | mptcp_remove_cell_subflows(mpte); |
998 | } |
999 | |
1000 | static void |
1001 | mptcp_handover_subflows_remove(struct mptses *mpte) |
1002 | { |
1003 | mptcp_wifi_quality_t wifi_quality = mptcp_wifi_quality_for_session(mpte); |
1004 | boolean_t found_working_subflow = false; |
1005 | struct mptsub *mpts; |
1006 | |
1007 | /* |
1008 | * Look for a subflow that is on a non-cellular interface |
1009 | * and actually works (aka, no retransmission timeout). |
1010 | */ |
1011 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
1012 | const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp; |
1013 | struct socket *so; |
1014 | struct tcpcb *tp; |
1015 | |
1016 | if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) { |
1017 | continue; |
1018 | } |
1019 | |
1020 | so = mpts->mpts_socket; |
1021 | tp = sototcpcb(so); |
1022 | |
1023 | if (!(mpts->mpts_flags & MPTSF_CONNECTED) || |
1024 | tp->t_state != TCPS_ESTABLISHED) { |
1025 | continue; |
1026 | } |
1027 | |
1028 | os_log_debug(mptcp_log_handle, "%s - %lx: rxt %u sb_cc %u wifi quality %d\n" , |
1029 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), tp->t_rxtshift, mptetoso(mpte)->so_snd.sb_cc, wifi_quality); |
1030 | |
1031 | if (!mptcp_handover_use_cellular(mpte, tp)) { |
1032 | found_working_subflow = true; |
1033 | break; |
1034 | } |
1035 | } |
1036 | |
1037 | /* |
1038 | * Couldn't find a working subflow, let's not remove those on a cellular |
1039 | * interface. |
1040 | */ |
1041 | if (!found_working_subflow) { |
1042 | return; |
1043 | } |
1044 | |
1045 | mptcp_remove_cell_subflows(mpte); |
1046 | } |
1047 | |
1048 | static void |
1049 | mptcp_targetbased_subflows_remove(struct mptses *mpte) |
1050 | { |
1051 | uint64_t time_now = mach_continuous_time(); |
1052 | struct mptsub *mpts; |
1053 | |
1054 | if (mpte->mpte_time_target != 0 && |
1055 | (int64_t)(mpte->mpte_time_target - time_now) <= 0 && |
1056 | mptcp_wifi_quality_for_session(mpte) != MPTCP_WIFI_QUALITY_GOOD) { |
1057 | /* WiFi is bad and we are below the target - don't remove any subflows */ |
1058 | return; |
1059 | } |
1060 | |
1061 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
1062 | const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp; |
1063 | |
1064 | if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) { |
1065 | continue; |
1066 | } |
1067 | |
1068 | /* We have a functioning subflow on WiFi. No need for cell! */ |
1069 | if (mpts->mpts_flags & MPTSF_CONNECTED && |
1070 | !mptcp_subflow_disconnecting(mpts)) { |
1071 | mptcp_remove_cell_subflows(mpte); |
1072 | break; |
1073 | } |
1074 | } |
1075 | } |
1076 | |
1077 | /* |
1078 | * Based on the MPTCP Service-type and the state of the subflows, we |
1079 | * will destroy subflows here. |
1080 | */ |
1081 | void |
1082 | mptcp_check_subflows_and_remove(struct mptses *mpte) |
1083 | { |
1084 | if (!mptcp_ok_to_create_subflows(mp_tp: mpte->mpte_mptcb)) { |
1085 | return; |
1086 | } |
1087 | |
1088 | socket_lock_assert_owned(so: mptetoso(mpte)); |
1089 | |
1090 | if (mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) { |
1091 | mptcp_pure_handover_subflows_remove(mpte); |
1092 | } |
1093 | |
1094 | if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) { |
1095 | mptcp_handover_subflows_remove(mpte); |
1096 | } |
1097 | |
1098 | if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) { |
1099 | mptcp_targetbased_subflows_remove(mpte); |
1100 | } |
1101 | } |
1102 | |
1103 | static void |
1104 | mptcp_remove_subflows(struct mptses *mpte) |
1105 | { |
1106 | struct mptsub *mpts, *tmpts; |
1107 | |
1108 | if (!mptcp_ok_to_create_subflows(mp_tp: mpte->mpte_mptcb)) { |
1109 | return; |
1110 | } |
1111 | |
1112 | TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { |
1113 | const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp; |
1114 | boolean_t found = false; |
1115 | uint32_t ifindex; |
1116 | uint32_t i; |
1117 | |
1118 | if (mpts->mpts_flags & MPTSF_CLOSE_REQD) { |
1119 | mpts->mpts_flags &= ~MPTSF_CLOSE_REQD; |
1120 | |
1121 | os_log(mptcp_log_handle, "%s - %lx: itf %u close_reqd last itf %d\n" , |
1122 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_ifscope, |
1123 | ifp ? ifp->if_index : -1); |
1124 | soevent(so: mpts->mpts_socket, |
1125 | SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR); |
1126 | |
1127 | continue; |
1128 | } |
1129 | |
1130 | if (ifp == NULL && mpts->mpts_ifscope == IFSCOPE_NONE) { |
1131 | continue; |
1132 | } |
1133 | |
1134 | if (ifp) { |
1135 | ifindex = ifp->if_index; |
1136 | } else { |
1137 | ifindex = mpts->mpts_ifscope; |
1138 | } |
1139 | |
1140 | for (i = 0; i < mpte->mpte_itfinfo_size; i++) { |
1141 | if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) { |
1142 | continue; |
1143 | } |
1144 | |
1145 | if (mpte->mpte_itfinfo[i].ifindex == ifindex) { |
1146 | if (mpts->mpts_dst.sa_family == AF_INET6 && |
1147 | (mpte->mpte_itfinfo[i].has_v6_conn || mpte->mpte_itfinfo[i].has_nat64_conn)) { |
1148 | found = true; |
1149 | break; |
1150 | } |
1151 | |
1152 | if (mpts->mpts_dst.sa_family == AF_INET && |
1153 | mpte->mpte_itfinfo[i].has_v4_conn) { |
1154 | found = true; |
1155 | break; |
1156 | } |
1157 | } |
1158 | } |
1159 | |
1160 | if (!found) { |
1161 | os_log(mptcp_log_handle, "%s - %lx: itf %u killing %#x\n" , |
1162 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
1163 | ifindex, mpts->mpts_flags); |
1164 | |
1165 | soevent(so: mpts->mpts_socket, |
1166 | SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR); |
1167 | } |
1168 | } |
1169 | } |
1170 | |
1171 | static void |
1172 | mptcp_create_subflows(__unused void *arg) |
1173 | { |
1174 | struct mppcb *mpp; |
1175 | |
1176 | /* |
1177 | * Start with clearing, because we might be processing connections |
1178 | * while a new event comes in. |
1179 | */ |
1180 | if (OSTestAndClear(bit: 0x01, startAddress: &mptcp_create_subflows_scheduled)) { |
1181 | os_log_error(mptcp_log_handle, "%s: bit was already cleared!\n" , __func__); |
1182 | } |
1183 | |
1184 | /* Iterate over all MPTCP connections */ |
1185 | |
1186 | lck_mtx_lock(lck: &mtcbinfo.mppi_lock); |
1187 | |
1188 | TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) { |
1189 | struct socket *mp_so = mpp->mpp_socket; |
1190 | struct mptses *mpte = mpp->mpp_pcbe; |
1191 | |
1192 | socket_lock(so: mp_so, refcount: 1); |
1193 | if (!(mpp->mpp_flags & MPP_CREATE_SUBFLOWS) || |
1194 | !(mpte->mpte_flags & MPTE_ITFINFO_INIT)) { |
1195 | socket_unlock(so: mp_so, refcount: 1); |
1196 | continue; |
1197 | } |
1198 | |
1199 | VERIFY(mp_so->so_usecount > 0); |
1200 | |
1201 | mpp->mpp_flags &= ~MPP_CREATE_SUBFLOWS; |
1202 | |
1203 | mptcp_check_subflows_and_add(mpte); |
1204 | mptcp_remove_subflows(mpte); |
1205 | |
1206 | mp_so->so_usecount--; /* See mptcp_sched_create_subflows */ |
1207 | socket_unlock(so: mp_so, refcount: 1); |
1208 | } |
1209 | |
1210 | lck_mtx_unlock(lck: &mtcbinfo.mppi_lock); |
1211 | } |
1212 | |
1213 | /* |
1214 | * We need this because we are coming from an NECP-event. This event gets posted |
1215 | * while holding NECP-locks. The creation of the subflow however leads us back |
1216 | * into NECP (e.g., to add the necp_cb and also from tcp_connect). |
1217 | * So, we would deadlock there as we already hold the NECP-lock. |
1218 | * |
1219 | * So, let's schedule this separately. It also gives NECP the chance to make |
1220 | * progress, without having to wait for MPTCP to finish its subflow creation. |
1221 | */ |
1222 | void |
1223 | mptcp_sched_create_subflows(struct mptses *mpte) |
1224 | { |
1225 | struct mppcb *mpp = mpte->mpte_mppcb; |
1226 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
1227 | struct socket *mp_so = mpp->mpp_socket; |
1228 | |
1229 | if (!mptcp_ok_to_create_subflows(mp_tp)) { |
1230 | os_log_debug(mptcp_log_handle, "%s - %lx: not a good time for subflows, state %u flags %#x" , |
1231 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags); |
1232 | return; |
1233 | } |
1234 | |
1235 | if (!(mpp->mpp_flags & MPP_CREATE_SUBFLOWS)) { |
1236 | mp_so->so_usecount++; /* To prevent it from being free'd in-between */ |
1237 | mpp->mpp_flags |= MPP_CREATE_SUBFLOWS; |
1238 | } |
1239 | |
1240 | if (OSTestAndSet(bit: 0x01, startAddress: &mptcp_create_subflows_scheduled)) { |
1241 | return; |
1242 | } |
1243 | |
1244 | /* Do the call in 100ms to allow NECP to schedule it on all sockets */ |
1245 | timeout(mptcp_create_subflows, NULL, ticks: hz / 10); |
1246 | } |
1247 | |
1248 | /* |
1249 | * Allocate an MPTCP socket option structure. |
1250 | */ |
1251 | struct mptopt * |
1252 | mptcp_sopt_alloc(zalloc_flags_t how) |
1253 | { |
1254 | return zalloc_flags(mptopt_zone, how | Z_ZERO); |
1255 | } |
1256 | |
1257 | /* |
1258 | * Free an MPTCP socket option structure. |
1259 | */ |
1260 | void |
1261 | mptcp_sopt_free(struct mptopt *mpo) |
1262 | { |
1263 | VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED)); |
1264 | |
1265 | zfree(mptopt_zone, mpo); |
1266 | } |
1267 | |
1268 | /* |
1269 | * Add a socket option to the MPTCP socket option list. |
1270 | */ |
1271 | void |
1272 | mptcp_sopt_insert(struct mptses *mpte, struct mptopt *mpo) |
1273 | { |
1274 | socket_lock_assert_owned(so: mptetoso(mpte)); |
1275 | mpo->mpo_flags |= MPOF_ATTACHED; |
1276 | TAILQ_INSERT_TAIL(&mpte->mpte_sopts, mpo, mpo_entry); |
1277 | } |
1278 | |
1279 | /* |
1280 | * Remove a socket option from the MPTCP socket option list. |
1281 | */ |
1282 | void |
1283 | mptcp_sopt_remove(struct mptses *mpte, struct mptopt *mpo) |
1284 | { |
1285 | socket_lock_assert_owned(so: mptetoso(mpte)); |
1286 | VERIFY(mpo->mpo_flags & MPOF_ATTACHED); |
1287 | mpo->mpo_flags &= ~MPOF_ATTACHED; |
1288 | TAILQ_REMOVE(&mpte->mpte_sopts, mpo, mpo_entry); |
1289 | } |
1290 | |
1291 | /* |
1292 | * Search for an existing <sopt_level,sopt_name> socket option. |
1293 | */ |
1294 | struct mptopt * |
1295 | mptcp_sopt_find(struct mptses *mpte, struct sockopt *sopt) |
1296 | { |
1297 | struct mptopt *mpo; |
1298 | |
1299 | socket_lock_assert_owned(so: mptetoso(mpte)); |
1300 | |
1301 | TAILQ_FOREACH(mpo, &mpte->mpte_sopts, mpo_entry) { |
1302 | if (mpo->mpo_level == sopt->sopt_level && |
1303 | mpo->mpo_name == sopt->sopt_name) { |
1304 | break; |
1305 | } |
1306 | } |
1307 | return mpo; |
1308 | } |
1309 | |
1310 | /* |
1311 | * Allocate a MPTCP subflow structure. |
1312 | */ |
1313 | static struct mptsub * |
1314 | mptcp_subflow_alloc(void) |
1315 | { |
1316 | return zalloc_flags(mptsub_zone, Z_WAITOK | Z_ZERO); |
1317 | } |
1318 | |
1319 | /* |
1320 | * Deallocate a subflow structure, called when all of the references held |
1321 | * on it have been released. This implies that the subflow has been deleted. |
1322 | */ |
1323 | static void |
1324 | mptcp_subflow_free(struct mptsub *mpts) |
1325 | { |
1326 | VERIFY(mpts->mpts_refcnt == 0); |
1327 | VERIFY(mpts->mpts_mpte == NULL); |
1328 | VERIFY(mpts->mpts_socket == NULL); |
1329 | |
1330 | free_sockaddr(mpts->mpts_src); |
1331 | |
1332 | zfree(mptsub_zone, mpts); |
1333 | } |
1334 | |
1335 | static void |
1336 | mptcp_subflow_addref(struct mptsub *mpts) |
1337 | { |
1338 | if (++mpts->mpts_refcnt == 0) { |
1339 | panic("%s: mpts %p wraparound refcnt" , __func__, mpts); |
1340 | } |
1341 | /* NOTREACHED */ |
1342 | } |
1343 | |
1344 | static void |
1345 | mptcp_subflow_remref(struct mptsub *mpts) |
1346 | { |
1347 | if (mpts->mpts_refcnt == 0) { |
1348 | panic("%s: mpts %p negative refcnt" , __func__, mpts); |
1349 | /* NOTREACHED */ |
1350 | } |
1351 | if (--mpts->mpts_refcnt > 0) { |
1352 | return; |
1353 | } |
1354 | |
1355 | /* callee will unlock and destroy lock */ |
1356 | mptcp_subflow_free(mpts); |
1357 | } |
1358 | |
1359 | static void |
1360 | mptcp_subflow_attach(struct mptses *mpte, struct mptsub *mpts, struct socket *so) |
1361 | { |
1362 | struct socket *mp_so = mpte->mpte_mppcb->mpp_socket; |
1363 | struct tcpcb *tp = sototcpcb(so); |
1364 | |
1365 | /* |
1366 | * From this moment on, the subflow is linked to the MPTCP-connection. |
1367 | * Locking,... happens now at the MPTCP-layer |
1368 | */ |
1369 | tp->t_mptcb = mpte->mpte_mptcb; |
1370 | so->so_flags |= SOF_MP_SUBFLOW; |
1371 | mp_so->so_usecount++; |
1372 | |
1373 | /* |
1374 | * Insert the subflow into the list, and associate the MPTCP PCB |
1375 | * as well as the the subflow socket. From this point on, removing |
1376 | * the subflow needs to be done via mptcp_subflow_del(). |
1377 | */ |
1378 | TAILQ_INSERT_TAIL(&mpte->mpte_subflows, mpts, mpts_entry); |
1379 | mpte->mpte_numflows++; |
1380 | |
1381 | mpts->mpts_mpte = mpte; |
1382 | mpts->mpts_socket = so; |
1383 | tp->t_mpsub = mpts; |
1384 | mptcp_subflow_addref(mpts); /* for being in MPTCP subflow list */ |
1385 | mptcp_subflow_addref(mpts); /* for subflow socket */ |
1386 | } |
1387 | |
1388 | static void |
1389 | mptcp_subflow_necp_cb(void *handle, __unused int action, |
1390 | __unused uint32_t interface_index, |
1391 | uint32_t necp_flags, bool *viable) |
1392 | { |
1393 | boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER); |
1394 | struct inpcb *inp = (struct inpcb *)handle; |
1395 | struct socket *so = inp->inp_socket; |
1396 | struct mptsub *mpts; |
1397 | struct mptses *mpte; |
1398 | |
1399 | if (low_power) { |
1400 | action = NECP_CLIENT_CBACTION_NONVIABLE; |
1401 | } |
1402 | |
1403 | if (action != NECP_CLIENT_CBACTION_NONVIABLE) { |
1404 | return; |
1405 | } |
1406 | |
1407 | /* |
1408 | * The socket is being garbage-collected. There is nothing to be done |
1409 | * here. |
1410 | */ |
1411 | if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) { |
1412 | return; |
1413 | } |
1414 | |
1415 | socket_lock(so, refcount: 1); |
1416 | |
1417 | /* Check again after we acquired the lock. */ |
1418 | if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { |
1419 | goto out; |
1420 | } |
1421 | |
1422 | mpte = tptomptp(sototcpcb(so))->mpt_mpte; |
1423 | mpts = sototcpcb(so)->t_mpsub; |
1424 | |
1425 | os_log_debug(mptcp_log_handle, "%s - %lx: Subflow on itf %u became non-viable, power %u" , |
1426 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_ifscope, low_power); |
1427 | |
1428 | mpts->mpts_flags |= MPTSF_CLOSE_REQD; |
1429 | |
1430 | mptcp_sched_create_subflows(mpte); |
1431 | |
1432 | if ((mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || |
1433 | mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER || |
1434 | mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) && |
1435 | viable != NULL) { |
1436 | *viable = 1; |
1437 | } |
1438 | |
1439 | out: |
1440 | socket_unlock(so, refcount: 1); |
1441 | } |
1442 | |
1443 | /* |
1444 | * Create an MPTCP subflow socket. |
1445 | */ |
1446 | static int |
1447 | mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom, |
1448 | struct socket **so) |
1449 | { |
1450 | lck_mtx_t *subflow_mtx; |
1451 | struct mptopt smpo, *mpo, *tmpo; |
1452 | struct proc *p; |
1453 | struct socket *mp_so; |
1454 | struct mppcb *mpp; |
1455 | int error; |
1456 | |
1457 | *so = NULL; |
1458 | |
1459 | mp_so = mptetoso(mpte); |
1460 | mpp = mpsotomppcb(mp_so); |
1461 | |
1462 | p = proc_find(pid: mp_so->last_pid); |
1463 | if (p == PROC_NULL) { |
1464 | os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n" , |
1465 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_so->last_pid); |
1466 | |
1467 | mptcp_subflow_free(mpts); |
1468 | return ESRCH; |
1469 | } |
1470 | |
1471 | /* |
1472 | * Create the subflow socket (multipath subflow, non-blocking.) |
1473 | * |
1474 | * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow |
1475 | * socket; it will be cleared when the socket is peeled off or closed. |
1476 | * It also indicates to the underlying TCP to handle MPTCP options. |
1477 | * A multipath subflow socket implies SS_NOFDREF state. |
1478 | */ |
1479 | |
1480 | /* |
1481 | * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes |
1482 | * the ipi-lock. We cannot hold the socket-lock at that point. |
1483 | */ |
1484 | socket_unlock(so: mp_so, refcount: 0); |
1485 | error = socreate_internal(dom, aso: so, SOCK_STREAM, IPPROTO_TCP, p, |
1486 | SOCF_MPTCP, PROC_NULL); |
1487 | socket_lock(so: mp_so, refcount: 0); |
1488 | if (error) { |
1489 | os_log_error(mptcp_log_handle, "%s - %lx: unable to create subflow socket error %d\n" , |
1490 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error); |
1491 | |
1492 | proc_rele(p); |
1493 | |
1494 | mptcp_subflow_free(mpts); |
1495 | return error; |
1496 | } |
1497 | |
1498 | /* |
1499 | * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because |
1500 | * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock. |
1501 | * Which is why we also need to get the lock with pr_getlock, as after |
1502 | * setting the flag, socket_unlock will work on the MPTCP-level lock. |
1503 | */ |
1504 | subflow_mtx = ((*so)->so_proto->pr_getlock)(*so, 0); |
1505 | lck_mtx_lock(lck: subflow_mtx); |
1506 | |
1507 | /* |
1508 | * Must be the first thing we do, to make sure all pointers for this |
1509 | * subflow are set. |
1510 | */ |
1511 | mptcp_subflow_attach(mpte, mpts, so: *so); |
1512 | |
1513 | /* |
1514 | * A multipath subflow socket is used internally in the kernel, |
1515 | * therefore it does not have a file desciptor associated by |
1516 | * default. |
1517 | */ |
1518 | (*so)->so_state |= SS_NOFDREF; |
1519 | |
1520 | lck_mtx_unlock(lck: subflow_mtx); |
1521 | |
1522 | /* prevent the socket buffers from being compressed */ |
1523 | (*so)->so_rcv.sb_flags |= SB_NOCOMPRESS; |
1524 | (*so)->so_snd.sb_flags |= SB_NOCOMPRESS; |
1525 | |
1526 | /* Inherit preconnect and TFO data flags */ |
1527 | if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA) { |
1528 | (*so)->so_flags1 |= SOF1_PRECONNECT_DATA; |
1529 | } |
1530 | if (mp_so->so_flags1 & SOF1_DATA_IDEMPOTENT) { |
1531 | (*so)->so_flags1 |= SOF1_DATA_IDEMPOTENT; |
1532 | } |
1533 | if (mp_so->so_flags1 & SOF1_DATA_AUTHENTICATED) { |
1534 | (*so)->so_flags1 |= SOF1_DATA_AUTHENTICATED; |
1535 | } |
1536 | |
1537 | /* Inherit uuid and create the related flow. */ |
1538 | if (!uuid_is_null(uu: mpp->necp_client_uuid)) { |
1539 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
1540 | |
1541 | sotoinpcb(*so)->necp_cb = mptcp_subflow_necp_cb; |
1542 | |
1543 | /* |
1544 | * A note on the unlock: With MPTCP, we do multiple times a |
1545 | * necp_client_register_socket_flow. This is problematic, |
1546 | * because now the lock-ordering guarantee (first necp-locks, |
1547 | * then socket-locks) is no more respected. So, we need to |
1548 | * unlock here. |
1549 | */ |
1550 | socket_unlock(so: mp_so, refcount: 0); |
1551 | error = necp_client_register_socket_flow(pid: mp_so->last_pid, |
1552 | client_id: mpp->necp_client_uuid, sotoinpcb(*so)); |
1553 | socket_lock(so: mp_so, refcount: 0); |
1554 | |
1555 | if (error) { |
1556 | os_log_error(mptcp_log_handle, "%s - %lx: necp_client_register_socket_flow failed with error %d\n" , |
1557 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error); |
1558 | |
1559 | goto out_err; |
1560 | } |
1561 | |
1562 | /* Possible state-change during the unlock above */ |
1563 | if (mp_tp->mpt_state >= MPTCPS_TIME_WAIT || |
1564 | (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) { |
1565 | os_log_error(mptcp_log_handle, "%s - %lx: state changed during unlock: %u flags %#x\n" , |
1566 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
1567 | mp_tp->mpt_state, mp_tp->mpt_flags); |
1568 | |
1569 | error = EINVAL; |
1570 | goto out_err; |
1571 | } |
1572 | |
1573 | uuid_copy(sotoinpcb(*so)->necp_client_uuid, src: mpp->necp_client_uuid); |
1574 | } |
1575 | |
1576 | if (mpp->inp_necp_attributes.inp_domain != NULL) { |
1577 | size_t string_size = strlen(s: mpp->inp_necp_attributes.inp_domain); |
1578 | sotoinpcb(*so)->inp_necp_attributes.inp_domain = kalloc_data(string_size + 1, Z_WAITOK | Z_ZERO); |
1579 | |
1580 | if (sotoinpcb(*so)->inp_necp_attributes.inp_domain) { |
1581 | memcpy(sotoinpcb(*so)->inp_necp_attributes.inp_domain, src: mpp->inp_necp_attributes.inp_domain, n: string_size + 1); |
1582 | } |
1583 | } |
1584 | if (mpp->inp_necp_attributes.inp_account != NULL) { |
1585 | size_t string_size = strlen(s: mpp->inp_necp_attributes.inp_account); |
1586 | sotoinpcb(*so)->inp_necp_attributes.inp_account = kalloc_data(string_size + 1, Z_WAITOK | Z_ZERO); |
1587 | |
1588 | if (sotoinpcb(*so)->inp_necp_attributes.inp_account) { |
1589 | memcpy(sotoinpcb(*so)->inp_necp_attributes.inp_account, src: mpp->inp_necp_attributes.inp_account, n: string_size + 1); |
1590 | } |
1591 | } |
1592 | |
1593 | if (mpp->inp_necp_attributes.inp_domain_owner != NULL) { |
1594 | size_t string_size = strlen(s: mpp->inp_necp_attributes.inp_domain_owner); |
1595 | sotoinpcb(*so)->inp_necp_attributes.inp_domain_owner = kalloc_data(string_size + 1, Z_WAITOK | Z_ZERO); |
1596 | |
1597 | if (sotoinpcb(*so)->inp_necp_attributes.inp_domain_owner) { |
1598 | memcpy(sotoinpcb(*so)->inp_necp_attributes.inp_domain_owner, src: mpp->inp_necp_attributes.inp_domain_owner, n: string_size + 1); |
1599 | } |
1600 | } |
1601 | |
1602 | if (mpp->inp_necp_attributes.inp_tracker_domain != NULL) { |
1603 | size_t string_size = strlen(s: mpp->inp_necp_attributes.inp_tracker_domain); |
1604 | sotoinpcb(*so)->inp_necp_attributes.inp_tracker_domain = kalloc_data(string_size + 1, Z_WAITOK | Z_ZERO); |
1605 | |
1606 | if (sotoinpcb(*so)->inp_necp_attributes.inp_tracker_domain) { |
1607 | memcpy(sotoinpcb(*so)->inp_necp_attributes.inp_tracker_domain, src: mpp->inp_necp_attributes.inp_tracker_domain, n: string_size + 1); |
1608 | } |
1609 | } |
1610 | |
1611 | /* Needs to happen prior to the delegation! */ |
1612 | (*so)->last_pid = mp_so->last_pid; |
1613 | |
1614 | if (mp_so->so_flags & SOF_DELEGATED) { |
1615 | if (mpte->mpte_epid) { |
1616 | error = so_set_effective_pid(so: *so, epid: mpte->mpte_epid, p, false); |
1617 | if (error) { |
1618 | os_log_error(mptcp_log_handle, "%s - %lx: so_set_effective_pid failed with error %d\n" , |
1619 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error); |
1620 | goto out_err; |
1621 | } |
1622 | } |
1623 | if (!uuid_is_null(uu: mpte->mpte_euuid)) { |
1624 | error = so_set_effective_uuid(so: *so, euuid: mpte->mpte_euuid, p, false); |
1625 | if (error) { |
1626 | os_log_error(mptcp_log_handle, "%s - %lx: so_set_effective_uuid failed with error %d\n" , |
1627 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error); |
1628 | goto out_err; |
1629 | } |
1630 | } |
1631 | } |
1632 | |
1633 | /* inherit the other socket options */ |
1634 | bzero(s: &smpo, n: sizeof(smpo)); |
1635 | smpo.mpo_flags |= MPOF_SUBFLOW_OK; |
1636 | smpo.mpo_level = SOL_SOCKET; |
1637 | smpo.mpo_intval = 1; |
1638 | |
1639 | /* disable SIGPIPE */ |
1640 | smpo.mpo_name = SO_NOSIGPIPE; |
1641 | if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) { |
1642 | goto out_err; |
1643 | } |
1644 | |
1645 | /* find out if the subflow's source address goes away */ |
1646 | smpo.mpo_name = SO_NOADDRERR; |
1647 | if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) { |
1648 | goto out_err; |
1649 | } |
1650 | |
1651 | if (mpte->mpte_mptcb->mpt_state >= MPTCPS_ESTABLISHED) { |
1652 | /* |
1653 | * On secondary subflows we might need to set the cell-fallback |
1654 | * flag (see conditions in mptcp_subflow_sosetopt). |
1655 | */ |
1656 | smpo.mpo_level = SOL_SOCKET; |
1657 | smpo.mpo_name = SO_MARK_CELLFALLBACK; |
1658 | smpo.mpo_intval = 1; |
1659 | if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) { |
1660 | goto out_err; |
1661 | } |
1662 | } |
1663 | |
1664 | /* replay setsockopt(2) on the subflow sockets for eligible options */ |
1665 | TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) { |
1666 | int interim; |
1667 | |
1668 | if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) { |
1669 | continue; |
1670 | } |
1671 | |
1672 | /* |
1673 | * Skip those that are handled internally; these options |
1674 | * should not have been recorded and marked with the |
1675 | * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case. |
1676 | */ |
1677 | if (mpo->mpo_level == SOL_SOCKET && |
1678 | (mpo->mpo_name == SO_NOSIGPIPE || |
1679 | mpo->mpo_name == SO_NOADDRERR || |
1680 | mpo->mpo_name == SO_KEEPALIVE)) { |
1681 | continue; |
1682 | } |
1683 | |
1684 | interim = (mpo->mpo_flags & MPOF_INTERIM); |
1685 | if (mptcp_subflow_sosetopt(mpte, mpts, mpo) != 0 && interim) { |
1686 | os_log_error(mptcp_log_handle, "%s - %lx: sopt %s val %d interim record removed\n" , |
1687 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
1688 | mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name), |
1689 | mpo->mpo_intval); |
1690 | mptcp_sopt_remove(mpte, mpo); |
1691 | mptcp_sopt_free(mpo); |
1692 | continue; |
1693 | } |
1694 | } |
1695 | |
1696 | /* |
1697 | * We need to receive everything that the subflow socket has, |
1698 | * so use a customized socket receive function. We will undo |
1699 | * this when the socket is peeled off or closed. |
1700 | */ |
1701 | switch (dom) { |
1702 | case PF_INET: |
1703 | (*so)->so_proto = &mptcp_subflow_protosw; |
1704 | break; |
1705 | case PF_INET6: |
1706 | (*so)->so_proto = (struct protosw *)&mptcp_subflow_protosw6; |
1707 | break; |
1708 | default: |
1709 | VERIFY(0); |
1710 | /* NOTREACHED */ |
1711 | } |
1712 | |
1713 | proc_rele(p); |
1714 | |
1715 | DTRACE_MPTCP3(subflow__create, struct mptses *, mpte, |
1716 | int, dom, int, error); |
1717 | |
1718 | return 0; |
1719 | |
1720 | out_err: |
1721 | mptcp_subflow_abort(mpts, error); |
1722 | |
1723 | proc_rele(p); |
1724 | |
1725 | return error; |
1726 | } |
1727 | |
1728 | /* |
1729 | * Close an MPTCP subflow socket. |
1730 | * |
1731 | * Note that this may be called on an embryonic subflow, and the only |
1732 | * thing that is guaranteed valid is the protocol-user request. |
1733 | */ |
1734 | static void |
1735 | mptcp_subflow_soclose(struct mptsub *mpts) |
1736 | { |
1737 | struct socket *so = mpts->mpts_socket; |
1738 | |
1739 | if (mpts->mpts_flags & MPTSF_CLOSED) { |
1740 | return; |
1741 | } |
1742 | |
1743 | VERIFY(so != NULL); |
1744 | VERIFY(so->so_flags & SOF_MP_SUBFLOW); |
1745 | VERIFY((so->so_state & (SS_NBIO | SS_NOFDREF)) == (SS_NBIO | SS_NOFDREF)); |
1746 | |
1747 | DTRACE_MPTCP5(subflow__close, struct mptsub *, mpts, |
1748 | struct socket *, so, |
1749 | struct sockbuf *, &so->so_rcv, |
1750 | struct sockbuf *, &so->so_snd, |
1751 | struct mptses *, mpts->mpts_mpte); |
1752 | |
1753 | mpts->mpts_flags |= MPTSF_CLOSED; |
1754 | |
1755 | if (so->so_retaincnt == 0) { |
1756 | soclose_locked(so); |
1757 | |
1758 | return; |
1759 | } else { |
1760 | VERIFY(so->so_usecount > 0); |
1761 | so->so_usecount--; |
1762 | } |
1763 | |
1764 | return; |
1765 | } |
1766 | |
1767 | static void |
1768 | mptcp_attach_to_subf(struct socket *so, struct mptcb *mp_tp, uint8_t addr_id) |
1769 | { |
1770 | struct tcpcb *tp = sototcpcb(so); |
1771 | struct mptcp_subf_auth_entry *sauth_entry; |
1772 | |
1773 | /* |
1774 | * The address ID of the first flow is implicitly 0. |
1775 | */ |
1776 | if (mp_tp->mpt_state == MPTCPS_CLOSED) { |
1777 | tp->t_local_aid = 0; |
1778 | } else { |
1779 | tp->t_local_aid = addr_id; |
1780 | tp->t_mpflags |= (TMPF_PREESTABLISHED | TMPF_JOINED_FLOW); |
1781 | so->so_flags |= SOF_MP_SEC_SUBFLOW; |
1782 | } |
1783 | sauth_entry = zalloc(kt_view: mpt_subauth_zone); |
1784 | sauth_entry->msae_laddr_id = tp->t_local_aid; |
1785 | sauth_entry->msae_raddr_id = 0; |
1786 | sauth_entry->msae_raddr_rand = 0; |
1787 | try_again: |
1788 | sauth_entry->msae_laddr_rand = RandomULong(); |
1789 | if (sauth_entry->msae_laddr_rand == 0) { |
1790 | goto try_again; |
1791 | } |
1792 | LIST_INSERT_HEAD(&mp_tp->mpt_subauth_list, sauth_entry, msae_next); |
1793 | } |
1794 | |
1795 | static void |
1796 | mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so) |
1797 | { |
1798 | struct mptcp_subf_auth_entry *sauth_entry; |
1799 | struct tcpcb *tp = NULL; |
1800 | int found = 0; |
1801 | |
1802 | tp = sototcpcb(so); |
1803 | if (tp == NULL) { |
1804 | return; |
1805 | } |
1806 | |
1807 | LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) { |
1808 | if (sauth_entry->msae_laddr_id == tp->t_local_aid) { |
1809 | found = 1; |
1810 | break; |
1811 | } |
1812 | } |
1813 | if (found) { |
1814 | LIST_REMOVE(sauth_entry, msae_next); |
1815 | } |
1816 | |
1817 | if (found) { |
1818 | zfree(mpt_subauth_zone, sauth_entry); |
1819 | } |
1820 | } |
1821 | |
1822 | /* |
1823 | * Connect an MPTCP subflow socket. |
1824 | * |
1825 | * Note that in the pending connect case, the subflow socket may have been |
1826 | * bound to an interface and/or a source IP address which may no longer be |
1827 | * around by the time this routine is called; in that case the connect attempt |
1828 | * will most likely fail. |
1829 | */ |
1830 | static int |
1831 | mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts) |
1832 | { |
1833 | char dbuf[MAX_IPv6_STR_LEN]; |
1834 | struct socket *mp_so, *so; |
1835 | struct mptcb *mp_tp; |
1836 | struct sockaddr *dst; |
1837 | struct proc *p; |
1838 | int af, error, dport; |
1839 | |
1840 | mp_so = mptetoso(mpte); |
1841 | mp_tp = mpte->mpte_mptcb; |
1842 | so = mpts->mpts_socket; |
1843 | af = mpts->mpts_dst.sa_family; |
1844 | dst = &mpts->mpts_dst; |
1845 | |
1846 | VERIFY((mpts->mpts_flags & (MPTSF_CONNECTING | MPTSF_CONNECTED)) == MPTSF_CONNECTING); |
1847 | VERIFY(mpts->mpts_socket != NULL); |
1848 | VERIFY(af == AF_INET || af == AF_INET6); |
1849 | |
1850 | if (af == AF_INET) { |
1851 | inet_ntop(af, &SIN(dst)->sin_addr.s_addr, dbuf, sizeof(dbuf)); |
1852 | dport = ntohs(SIN(dst)->sin_port); |
1853 | } else { |
1854 | inet_ntop(af, &SIN6(dst)->sin6_addr, dbuf, sizeof(dbuf)); |
1855 | dport = ntohs(SIN6(dst)->sin6_port); |
1856 | } |
1857 | |
1858 | os_log(mptcp_log_handle, |
1859 | "%s - %lx: ifindex %u dst %s:%d pended %u\n" , __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
1860 | mpts->mpts_ifscope, dbuf, dport, !!(mpts->mpts_flags & MPTSF_CONNECT_PENDING)); |
1861 | |
1862 | p = proc_find(pid: mp_so->last_pid); |
1863 | if (p == PROC_NULL) { |
1864 | os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n" , |
1865 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_so->last_pid); |
1866 | |
1867 | return ESRCH; |
1868 | } |
1869 | |
1870 | mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING; |
1871 | |
1872 | mptcp_attach_to_subf(so, mp_tp: mpte->mpte_mptcb, addr_id: mpte->mpte_addrid_last); |
1873 | |
1874 | /* connect the subflow socket */ |
1875 | error = soconnectxlocked(so, src: mpts->mpts_src, dst: &mpts->mpts_dst, |
1876 | p, mpts->mpts_ifscope, |
1877 | mpte->mpte_associd, NULL, 0, NULL, 0, NULL, NULL); |
1878 | |
1879 | mpts->mpts_iss = sototcpcb(so)->iss; |
1880 | |
1881 | /* See tcp_connect_complete */ |
1882 | if (mp_tp->mpt_state < MPTCPS_ESTABLISHED && |
1883 | (mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) { |
1884 | mp_tp->mpt_sndwnd = sototcpcb(so)->snd_wnd; |
1885 | } |
1886 | |
1887 | /* Allocate a unique address id per subflow */ |
1888 | mpte->mpte_addrid_last++; |
1889 | if (mpte->mpte_addrid_last == 0) { |
1890 | mpte->mpte_addrid_last++; |
1891 | } |
1892 | |
1893 | proc_rele(p); |
1894 | |
1895 | DTRACE_MPTCP3(subflow__connect, struct mptses *, mpte, |
1896 | struct mptsub *, mpts, int, error); |
1897 | if (error) { |
1898 | os_log_error(mptcp_log_handle, "%s - %lx: connectx failed with error %d ifscope %u\n" , |
1899 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error, mpts->mpts_ifscope); |
1900 | } |
1901 | |
1902 | return error; |
1903 | } |
1904 | |
1905 | static int |
1906 | mptcp_adj_rmap(struct socket *so, struct mbuf *m, int off, uint64_t dsn, |
1907 | uint32_t rseq, uint16_t dlen, uint8_t dfin) |
1908 | { |
1909 | struct mptsub *mpts = sototcpcb(so)->t_mpsub; |
1910 | |
1911 | if (m_pktlen(m) == 0) { |
1912 | return 0; |
1913 | } |
1914 | |
1915 | if (!(m->m_flags & M_PKTHDR)) { |
1916 | return 0; |
1917 | } |
1918 | |
1919 | if (m->m_pkthdr.pkt_flags & PKTF_MPTCP) { |
1920 | if (off && (dsn != m->m_pkthdr.mp_dsn || |
1921 | rseq != m->m_pkthdr.mp_rseq || |
1922 | dlen != m->m_pkthdr.mp_rlen || |
1923 | dfin != !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN))) { |
1924 | os_log_error(mptcp_log_handle, "%s - %lx: Received incorrect second mapping: DSN: %u - %u , SSN: %u - %u, DLEN: %u - %u, DFIN: %u - %u\n" , |
1925 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte), |
1926 | (uint32_t)dsn, (uint32_t)m->m_pkthdr.mp_dsn, |
1927 | rseq, m->m_pkthdr.mp_rseq, |
1928 | dlen, m->m_pkthdr.mp_rlen, |
1929 | dfin, !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)); |
1930 | |
1931 | soevent(so: mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); |
1932 | return -1; |
1933 | } |
1934 | } |
1935 | |
1936 | /* If mbuf is beyond right edge of the mapping, we need to split */ |
1937 | if (m_pktlen(m) > dlen - dfin - off) { |
1938 | struct mbuf *new = m_split(m, dlen - dfin - off, M_DONTWAIT); |
1939 | if (new == NULL) { |
1940 | os_log_error(mptcp_log_handle, "%s - %lx: m_split failed dlen %u dfin %u off %d pktlen %d, killing subflow %d" , |
1941 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte), |
1942 | dlen, dfin, off, m_pktlen(m), |
1943 | mpts->mpts_connid); |
1944 | |
1945 | soevent(so: mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); |
1946 | return -1; |
1947 | } |
1948 | |
1949 | m->m_next = new; |
1950 | sballoc(sb: &so->so_rcv, m: new); |
1951 | /* Undo, as sballoc will add to it as well */ |
1952 | so->so_rcv.sb_cc -= new->m_len; |
1953 | |
1954 | if (so->so_rcv.sb_mbtail == m) { |
1955 | so->so_rcv.sb_mbtail = new; |
1956 | } |
1957 | } |
1958 | |
1959 | m->m_pkthdr.pkt_flags |= PKTF_MPTCP; |
1960 | m->m_pkthdr.mp_dsn = dsn + off; |
1961 | m->m_pkthdr.mp_rseq = rseq + off; |
1962 | VERIFY(m_pktlen(m) < UINT16_MAX); |
1963 | m->m_pkthdr.mp_rlen = (uint16_t)m_pktlen(m); |
1964 | |
1965 | /* Only put the DATA_FIN-flag on the last mbuf of this mapping */ |
1966 | if (dfin) { |
1967 | if (m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen < dsn + dlen - dfin) { |
1968 | m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN; |
1969 | } else { |
1970 | m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN; |
1971 | } |
1972 | } |
1973 | |
1974 | |
1975 | mpts->mpts_flags |= MPTSF_FULLY_ESTABLISHED; |
1976 | |
1977 | return 0; |
1978 | } |
1979 | |
1980 | /* |
1981 | * Update the pid, upid, uuid of the subflow so, based on parent so |
1982 | */ |
1983 | static void |
1984 | mptcp_update_last_owner(struct socket *so, struct socket *mp_so) |
1985 | { |
1986 | if (so->last_pid != mp_so->last_pid || |
1987 | so->last_upid != mp_so->last_upid) { |
1988 | so->last_upid = mp_so->last_upid; |
1989 | so->last_pid = mp_so->last_pid; |
1990 | uuid_copy(dst: so->last_uuid, src: mp_so->last_uuid); |
1991 | } |
1992 | so_update_policy(so); |
1993 | } |
1994 | |
1995 | /* |
1996 | * MPTCP subflow socket receive routine, derived from soreceive(). |
1997 | */ |
1998 | static int |
1999 | mptcp_subflow_soreceive(struct socket *so, struct sockaddr **psa, |
2000 | struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) |
2001 | { |
2002 | #pragma unused(uio) |
2003 | struct socket *mp_so; |
2004 | struct mptses *mpte; |
2005 | struct mptcb *mp_tp; |
2006 | int flags, error = 0; |
2007 | struct mbuf *m, **mp = mp0; |
2008 | struct tcpcb *tp = sototcpcb(so); |
2009 | |
2010 | mpte = tptomptp(sototcpcb(so))->mpt_mpte; |
2011 | mp_so = mptetoso(mpte); |
2012 | mp_tp = mpte->mpte_mptcb; |
2013 | |
2014 | VERIFY(so->so_proto->pr_flags & PR_CONNREQUIRED); |
2015 | |
2016 | #ifdef MORE_LOCKING_DEBUG |
2017 | if (so->so_usecount == 1) { |
2018 | panic("%s: so=%x no other reference on socket" , __func__, so); |
2019 | /* NOTREACHED */ |
2020 | } |
2021 | #endif |
2022 | /* |
2023 | * We return all that is there in the subflow's socket receive buffer |
2024 | * to the MPTCP layer, so we require that the caller passes in the |
2025 | * expected parameters. |
2026 | */ |
2027 | if (mp == NULL || controlp != NULL) { |
2028 | return EINVAL; |
2029 | } |
2030 | |
2031 | *mp = NULL; |
2032 | if (psa != NULL) { |
2033 | *psa = NULL; |
2034 | } |
2035 | if (flagsp != NULL) { |
2036 | flags = *flagsp & ~MSG_EOR; |
2037 | } else { |
2038 | flags = 0; |
2039 | } |
2040 | |
2041 | if (flags & (MSG_PEEK | MSG_OOB | MSG_NEEDSA | MSG_WAITALL | MSG_WAITSTREAM)) { |
2042 | return EOPNOTSUPP; |
2043 | } |
2044 | |
2045 | flags |= (MSG_DONTWAIT | MSG_NBIO); |
2046 | |
2047 | /* |
2048 | * If a recv attempt is made on a previously-accepted socket |
2049 | * that has been marked as inactive (disconnected), reject |
2050 | * the request. |
2051 | */ |
2052 | if (so->so_flags & SOF_DEFUNCT) { |
2053 | struct sockbuf *sb = &so->so_rcv; |
2054 | |
2055 | error = ENOTCONN; |
2056 | /* |
2057 | * This socket should have been disconnected and flushed |
2058 | * prior to being returned from sodefunct(); there should |
2059 | * be no data on its receive list, so panic otherwise. |
2060 | */ |
2061 | if (so->so_state & SS_DEFUNCT) { |
2062 | sb_empty_assert(sb, __func__); |
2063 | } |
2064 | return error; |
2065 | } |
2066 | |
2067 | /* |
2068 | * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) |
2069 | * and if so just return to the caller. This could happen when |
2070 | * soreceive() is called by a socket upcall function during the |
2071 | * time the socket is freed. The socket buffer would have been |
2072 | * locked across the upcall, therefore we cannot put this thread |
2073 | * to sleep (else we will deadlock) or return EWOULDBLOCK (else |
2074 | * we may livelock), because the lock on the socket buffer will |
2075 | * only be released when the upcall routine returns to its caller. |
2076 | * Because the socket has been officially closed, there can be |
2077 | * no further read on it. |
2078 | * |
2079 | * A multipath subflow socket would have its SS_NOFDREF set by |
2080 | * default, so check for SOF_MP_SUBFLOW socket flag; when the |
2081 | * socket is closed for real, SOF_MP_SUBFLOW would be cleared. |
2082 | */ |
2083 | if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) == |
2084 | (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) { |
2085 | return 0; |
2086 | } |
2087 | |
2088 | /* |
2089 | * For consistency with soreceive() semantics, we need to obey |
2090 | * SB_LOCK in case some other code path has locked the buffer. |
2091 | */ |
2092 | error = sblock(sb: &so->so_rcv, flags: 0); |
2093 | if (error != 0) { |
2094 | return error; |
2095 | } |
2096 | |
2097 | m = so->so_rcv.sb_mb; |
2098 | if (m == NULL) { |
2099 | /* |
2100 | * Panic if we notice inconsistencies in the socket's |
2101 | * receive list; both sb_mb and sb_cc should correctly |
2102 | * reflect the contents of the list, otherwise we may |
2103 | * end up with false positives during select() or poll() |
2104 | * which could put the application in a bad state. |
2105 | */ |
2106 | SB_MB_CHECK(&so->so_rcv); |
2107 | |
2108 | if (so->so_error != 0) { |
2109 | error = so->so_error; |
2110 | so->so_error = 0; |
2111 | goto release; |
2112 | } |
2113 | |
2114 | if (so->so_state & SS_CANTRCVMORE) { |
2115 | goto release; |
2116 | } |
2117 | |
2118 | if (!(so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING))) { |
2119 | error = ENOTCONN; |
2120 | goto release; |
2121 | } |
2122 | |
2123 | /* |
2124 | * MSG_DONTWAIT is implicitly defined and this routine will |
2125 | * never block, so return EWOULDBLOCK when there is nothing. |
2126 | */ |
2127 | error = EWOULDBLOCK; |
2128 | goto release; |
2129 | } |
2130 | |
2131 | mptcp_update_last_owner(so, mp_so); |
2132 | |
2133 | SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 1" ); |
2134 | SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 1" ); |
2135 | |
2136 | while (m != NULL) { |
2137 | int dlen = 0, error_out = 0, off = 0; |
2138 | uint8_t dfin = 0; |
2139 | struct mbuf *start = m; |
2140 | uint64_t dsn; |
2141 | uint32_t sseq; |
2142 | uint16_t orig_dlen; |
2143 | uint16_t csum; |
2144 | |
2145 | VERIFY(m->m_nextpkt == NULL); |
2146 | |
2147 | if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { |
2148 | fallback: |
2149 | /* Just move mbuf to MPTCP-level */ |
2150 | |
2151 | sbfree(sb: &so->so_rcv, m); |
2152 | |
2153 | if (mp != NULL) { |
2154 | *mp = m; |
2155 | mp = &m->m_next; |
2156 | so->so_rcv.sb_mb = m = m->m_next; |
2157 | *mp = NULL; |
2158 | } |
2159 | |
2160 | if (m != NULL) { |
2161 | so->so_rcv.sb_lastrecord = m; |
2162 | } else { |
2163 | SB_EMPTY_FIXUP(&so->so_rcv); |
2164 | } |
2165 | |
2166 | continue; |
2167 | } else if (!(m->m_flags & M_PKTHDR) || !(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { |
2168 | struct mptsub *mpts = sototcpcb(so)->t_mpsub; |
2169 | boolean_t found_mapping = false; |
2170 | int parsed_length = 0; |
2171 | struct mbuf *m_iter; |
2172 | |
2173 | /* |
2174 | * No MPTCP-option in the header. Either fallback or |
2175 | * wait for additional mappings. |
2176 | */ |
2177 | if (!(mpts->mpts_flags & MPTSF_FULLY_ESTABLISHED)) { |
2178 | /* data arrived without a DSS option mapping */ |
2179 | |
2180 | /* initial subflow can fallback right after SYN handshake */ |
2181 | if (mpts->mpts_flags & MPTSF_INITIAL_SUB) { |
2182 | mptcp_notify_mpfail(so); |
2183 | |
2184 | goto fallback; |
2185 | } else { |
2186 | os_log_error(mptcp_log_handle, "%s - %lx: No DSS on secondary subflow. Killing %d\n" , |
2187 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
2188 | mpts->mpts_connid); |
2189 | soevent(so: mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); |
2190 | |
2191 | error = EIO; |
2192 | *mp0 = NULL; |
2193 | goto release; |
2194 | } |
2195 | } |
2196 | |
2197 | /* Thus, let's look for an mbuf with the mapping */ |
2198 | m_iter = m->m_next; |
2199 | parsed_length = m->m_len; |
2200 | while (m_iter != NULL && parsed_length < UINT16_MAX) { |
2201 | if (!(m_iter->m_flags & M_PKTHDR) || !(m_iter->m_pkthdr.pkt_flags & PKTF_MPTCP)) { |
2202 | parsed_length += m_iter->m_len; |
2203 | m_iter = m_iter->m_next; |
2204 | continue; |
2205 | } |
2206 | |
2207 | found_mapping = true; |
2208 | |
2209 | /* Found an mbuf with a DSS-mapping */ |
2210 | orig_dlen = dlen = m_iter->m_pkthdr.mp_rlen; |
2211 | dsn = m_iter->m_pkthdr.mp_dsn; |
2212 | sseq = m_iter->m_pkthdr.mp_rseq; |
2213 | csum = m_iter->m_pkthdr.mp_csum; |
2214 | |
2215 | if (m_iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN) { |
2216 | dfin = 1; |
2217 | dlen--; |
2218 | } |
2219 | |
2220 | break; |
2221 | } |
2222 | |
2223 | if (!found_mapping && parsed_length < UINT16_MAX) { |
2224 | /* Mapping not yet present, we can wait! */ |
2225 | if (*mp0 == NULL) { |
2226 | error = EWOULDBLOCK; |
2227 | } |
2228 | goto release; |
2229 | } else if (!found_mapping && parsed_length >= UINT16_MAX) { |
2230 | os_log_error(mptcp_log_handle, "%s - %lx: Received more than 64KB without DSS mapping. Killing %d\n" , |
2231 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
2232 | mpts->mpts_connid); |
2233 | /* Received 64KB without DSS-mapping. We should kill the subflow */ |
2234 | soevent(so: mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); |
2235 | |
2236 | error = EIO; |
2237 | *mp0 = NULL; |
2238 | goto release; |
2239 | } |
2240 | } else { |
2241 | orig_dlen = dlen = m->m_pkthdr.mp_rlen; |
2242 | dsn = m->m_pkthdr.mp_dsn; |
2243 | sseq = m->m_pkthdr.mp_rseq; |
2244 | csum = m->m_pkthdr.mp_csum; |
2245 | |
2246 | if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN) { |
2247 | dfin = 1; |
2248 | dlen--; |
2249 | } |
2250 | } |
2251 | |
2252 | /* Now, see if we need to remove previous packets */ |
2253 | if (SEQ_GT(sseq + tp->irs, tp->rcv_nxt - so->so_rcv.sb_cc)) { |
2254 | /* Ok, there is data in there that we don't need - let's throw it away! */ |
2255 | int totrim = (int)sseq + tp->irs - (tp->rcv_nxt - so->so_rcv.sb_cc); |
2256 | |
2257 | sbdrop(sb: &so->so_rcv, len: totrim); |
2258 | |
2259 | m = so->so_rcv.sb_mb; |
2260 | } |
2261 | |
2262 | /* |
2263 | * Check if the full mapping is now present |
2264 | */ |
2265 | if ((int)so->so_rcv.sb_cc < dlen) { |
2266 | if (*mp0 == NULL) { |
2267 | error = EWOULDBLOCK; |
2268 | } |
2269 | goto release; |
2270 | } |
2271 | |
2272 | /* Now, get the full mapping */ |
2273 | off = 0; |
2274 | while (dlen > 0) { |
2275 | if (mptcp_adj_rmap(so, m, off, dsn, rseq: sseq, dlen: orig_dlen, dfin)) { |
2276 | error_out = 1; |
2277 | error = EIO; |
2278 | dlen = 0; |
2279 | *mp0 = NULL; |
2280 | break; |
2281 | } |
2282 | |
2283 | dlen -= m->m_len; |
2284 | off += m->m_len; |
2285 | sbfree(sb: &so->so_rcv, m); |
2286 | |
2287 | if (mp != NULL) { |
2288 | *mp = m; |
2289 | mp = &m->m_next; |
2290 | so->so_rcv.sb_mb = m = m->m_next; |
2291 | *mp = NULL; |
2292 | } |
2293 | |
2294 | ASSERT(dlen == 0 || m); |
2295 | if (dlen != 0 && m == NULL) { |
2296 | /* "try" to gracefully recover on customer builds */ |
2297 | error_out = 1; |
2298 | error = EIO; |
2299 | dlen = 0; |
2300 | |
2301 | *mp0 = NULL; |
2302 | |
2303 | SB_EMPTY_FIXUP(&so->so_rcv); |
2304 | soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); |
2305 | |
2306 | break; |
2307 | } |
2308 | } |
2309 | |
2310 | VERIFY(dlen == 0); |
2311 | |
2312 | if (m != NULL) { |
2313 | so->so_rcv.sb_lastrecord = m; |
2314 | } else { |
2315 | SB_EMPTY_FIXUP(&so->so_rcv); |
2316 | } |
2317 | |
2318 | if (error_out) { |
2319 | goto release; |
2320 | } |
2321 | |
2322 | if (mptcp_validate_csum(sototcpcb(so), m: start, dsn, sseq, dlen: orig_dlen, csum, dfin)) { |
2323 | error = EIO; |
2324 | *mp0 = NULL; |
2325 | goto release; |
2326 | } |
2327 | |
2328 | SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 2" ); |
2329 | SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 2" ); |
2330 | } |
2331 | |
2332 | DTRACE_MPTCP3(subflow__receive, struct socket *, so, |
2333 | struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd); |
2334 | |
2335 | if (flagsp != NULL) { |
2336 | *flagsp |= flags; |
2337 | } |
2338 | |
2339 | release: |
2340 | sbunlock(sb: &so->so_rcv, TRUE); |
2341 | |
2342 | return error; |
2343 | } |
2344 | |
2345 | /* |
2346 | * MPTCP subflow socket send routine, derived from sosend(). |
2347 | */ |
2348 | static int |
2349 | mptcp_subflow_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, |
2350 | struct mbuf *top, struct mbuf *control, int flags) |
2351 | { |
2352 | struct socket *mp_so = mptetoso(mpte: tptomptp(sototcpcb(so))->mpt_mpte); |
2353 | boolean_t en_tracing = FALSE, proc_held = FALSE; |
2354 | struct proc *p = current_proc(); |
2355 | int en_tracing_val; |
2356 | int sblocked = 1; /* Pretend as if it is already locked, so we won't relock it */ |
2357 | int error; |
2358 | |
2359 | VERIFY(control == NULL); |
2360 | VERIFY(addr == NULL); |
2361 | VERIFY(uio == NULL); |
2362 | VERIFY(flags == 0); |
2363 | VERIFY((so->so_flags & SOF_CONTENT_FILTER) == 0); |
2364 | |
2365 | VERIFY(top->m_pkthdr.len > 0 && top->m_pkthdr.len <= UINT16_MAX); |
2366 | VERIFY(top->m_pkthdr.pkt_flags & PKTF_MPTCP); |
2367 | |
2368 | /* |
2369 | * trace if tracing & network (vs. unix) sockets & and |
2370 | * non-loopback |
2371 | */ |
2372 | if (ENTR_SHOULDTRACE && |
2373 | (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) { |
2374 | struct inpcb *inp = sotoinpcb(so); |
2375 | if (inp->inp_last_outifp != NULL && |
2376 | !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) { |
2377 | en_tracing = TRUE; |
2378 | en_tracing_val = top->m_pkthdr.len; |
2379 | KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START, |
2380 | (unsigned long)VM_KERNEL_ADDRPERM(so), |
2381 | ((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0), |
2382 | (int64_t)en_tracing_val); |
2383 | } |
2384 | } |
2385 | |
2386 | mptcp_update_last_owner(so, mp_so); |
2387 | |
2388 | if (mp_so->last_pid != proc_pid(p)) { |
2389 | p = proc_find(pid: mp_so->last_pid); |
2390 | if (p == PROC_NULL) { |
2391 | p = current_proc(); |
2392 | } else { |
2393 | proc_held = TRUE; |
2394 | } |
2395 | } |
2396 | |
2397 | #if NECP |
2398 | inp_update_necp_policy(sotoinpcb(so), NULL, NULL, 0); |
2399 | #endif /* NECP */ |
2400 | |
2401 | error = sosendcheck(so, NULL, top->m_pkthdr.len, 0, 1, 0, &sblocked); |
2402 | if (error) { |
2403 | goto out; |
2404 | } |
2405 | |
2406 | error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, top, NULL, NULL, p); |
2407 | top = NULL; |
2408 | |
2409 | out: |
2410 | if (top != NULL) { |
2411 | m_freem(top); |
2412 | } |
2413 | |
2414 | if (proc_held) { |
2415 | proc_rele(p); |
2416 | } |
2417 | |
2418 | soclearfastopen(so); |
2419 | |
2420 | if (en_tracing) { |
2421 | KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END, |
2422 | (unsigned long)VM_KERNEL_ADDRPERM(so), |
2423 | ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0), |
2424 | (int64_t)en_tracing_val); |
2425 | } |
2426 | |
2427 | return error; |
2428 | } |
2429 | |
2430 | /* |
2431 | * Subflow socket write upcall. |
2432 | * |
2433 | * Called when the associated subflow socket posted a read event. |
2434 | */ |
2435 | static void |
2436 | mptcp_subflow_wupcall(struct socket *so, void *arg, int waitf) |
2437 | { |
2438 | #pragma unused(so, waitf) |
2439 | struct mptsub *mpts = arg; |
2440 | struct mptses *mpte = mpts->mpts_mpte; |
2441 | |
2442 | VERIFY(mpte != NULL); |
2443 | |
2444 | if (mptcp_should_defer_upcall(mpp: mpte->mpte_mppcb)) { |
2445 | if (!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL)) { |
2446 | mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WWAKEUP; |
2447 | } |
2448 | return; |
2449 | } |
2450 | |
2451 | mptcp_output(mpte); |
2452 | } |
2453 | |
2454 | /* |
2455 | * Subflow socket control event upcall. |
2456 | */ |
2457 | static void |
2458 | mptcp_subflow_eupcall1(struct socket *so, void *arg, uint32_t events) |
2459 | { |
2460 | #pragma unused(so) |
2461 | struct mptsub *mpts = arg; |
2462 | struct mptses *mpte = mpts->mpts_mpte; |
2463 | |
2464 | socket_lock_assert_owned(so: mptetoso(mpte)); |
2465 | |
2466 | if ((mpts->mpts_evctl & events) == events) { |
2467 | return; |
2468 | } |
2469 | |
2470 | mpts->mpts_evctl |= events; |
2471 | |
2472 | if (mptcp_should_defer_upcall(mpp: mpte->mpte_mppcb)) { |
2473 | mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WORKLOOP; |
2474 | return; |
2475 | } |
2476 | |
2477 | mptcp_subflow_workloop(mpte); |
2478 | } |
2479 | |
2480 | /* |
2481 | * Establish an initial MPTCP connection (if first subflow and not yet |
2482 | * connected), or add a subflow to an existing MPTCP connection. |
2483 | */ |
2484 | int |
2485 | mptcp_subflow_add(struct mptses *mpte, struct sockaddr *src, |
2486 | struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid) |
2487 | { |
2488 | struct socket *mp_so, *so = NULL; |
2489 | struct mptcb *mp_tp; |
2490 | struct mptsub *mpts = NULL; |
2491 | int af, error = 0; |
2492 | |
2493 | mp_so = mptetoso(mpte); |
2494 | mp_tp = mpte->mpte_mptcb; |
2495 | |
2496 | socket_lock_assert_owned(so: mp_so); |
2497 | |
2498 | if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) { |
2499 | /* If the remote end sends Data FIN, refuse subflow adds */ |
2500 | os_log_error(mptcp_log_handle, "%s - %lx: state %u\n" , |
2501 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state); |
2502 | error = ENOTCONN; |
2503 | goto out_err; |
2504 | } |
2505 | |
2506 | if (mpte->mpte_numflows > MPTCP_MAX_NUM_SUBFLOWS) { |
2507 | error = EOVERFLOW; |
2508 | goto out_err; |
2509 | } |
2510 | |
2511 | mpts = mptcp_subflow_alloc(); |
2512 | if (mpts == NULL) { |
2513 | os_log_error(mptcp_log_handle, "%s - %lx: malloc subflow failed\n" , |
2514 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
2515 | error = ENOMEM; |
2516 | goto out_err; |
2517 | } |
2518 | |
2519 | if (src) { |
2520 | if (src->sa_family != AF_INET && src->sa_family != AF_INET6) { |
2521 | error = EAFNOSUPPORT; |
2522 | goto out_err; |
2523 | } |
2524 | |
2525 | if (src->sa_family == AF_INET && |
2526 | src->sa_len != sizeof(struct sockaddr_in)) { |
2527 | error = EINVAL; |
2528 | goto out_err; |
2529 | } |
2530 | |
2531 | if (src->sa_family == AF_INET6 && |
2532 | src->sa_len != sizeof(struct sockaddr_in6)) { |
2533 | error = EINVAL; |
2534 | goto out_err; |
2535 | } |
2536 | |
2537 | mpts->mpts_src = SA(alloc_sockaddr(src->sa_len, Z_WAITOK | Z_NOFAIL)); |
2538 | |
2539 | SOCKADDR_COPY(src, mpts->mpts_src, src->sa_len); |
2540 | } |
2541 | |
2542 | if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) { |
2543 | error = EAFNOSUPPORT; |
2544 | goto out_err; |
2545 | } |
2546 | |
2547 | if (dst->sa_family == AF_INET && |
2548 | dst->sa_len != sizeof(mpts->__mpts_dst_v4)) { |
2549 | error = EINVAL; |
2550 | goto out_err; |
2551 | } |
2552 | |
2553 | if (dst->sa_family == AF_INET6 && |
2554 | dst->sa_len != sizeof(mpts->__mpts_dst_v6)) { |
2555 | error = EINVAL; |
2556 | goto out_err; |
2557 | } |
2558 | |
2559 | SOCKADDR_COPY(dst, &mpts->mpts_dst, dst->sa_len); |
2560 | |
2561 | af = mpts->mpts_dst.sa_family; |
2562 | |
2563 | ifnet_head_lock_shared(); |
2564 | if ((ifscope > (unsigned)if_index)) { |
2565 | ifnet_head_done(); |
2566 | error = ENXIO; |
2567 | goto out_err; |
2568 | } |
2569 | ifnet_head_done(); |
2570 | |
2571 | mpts->mpts_ifscope = ifscope; |
2572 | |
2573 | /* create the subflow socket */ |
2574 | if ((error = mptcp_subflow_socreate(mpte, mpts, dom: af, so: &so)) != 0) { |
2575 | /* |
2576 | * Returning (error) and not cleaning up, because up to here |
2577 | * all we did is creating mpts. |
2578 | * |
2579 | * And the contract is that the call to mptcp_subflow_socreate, |
2580 | * moves ownership of mpts to mptcp_subflow_socreate. |
2581 | */ |
2582 | return error; |
2583 | } |
2584 | |
2585 | /* |
2586 | * We may be called from within the kernel. Still need to account this |
2587 | * one to the real app. |
2588 | */ |
2589 | mptcp_update_last_owner(so: mpts->mpts_socket, mp_so); |
2590 | |
2591 | /* |
2592 | * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and |
2593 | * -1 (SAE_CONNID_ALL). |
2594 | */ |
2595 | mpte->mpte_connid_last++; |
2596 | if (mpte->mpte_connid_last == SAE_CONNID_ALL || |
2597 | mpte->mpte_connid_last == SAE_CONNID_ANY) { |
2598 | mpte->mpte_connid_last++; |
2599 | } |
2600 | |
2601 | mpts->mpts_connid = mpte->mpte_connid_last; |
2602 | |
2603 | mpts->mpts_rel_seq = 1; |
2604 | |
2605 | /* Allocate a unique address id per subflow */ |
2606 | mpte->mpte_addrid_last++; |
2607 | if (mpte->mpte_addrid_last == 0) { |
2608 | mpte->mpte_addrid_last++; |
2609 | } |
2610 | |
2611 | /* register for subflow socket read/write events */ |
2612 | sock_setupcalls_locked(sock: so, NULL, NULL, wcallback: mptcp_subflow_wupcall, wcontext: mpts, locked: 1); |
2613 | |
2614 | /* Register for subflow socket control events */ |
2615 | sock_catchevents_locked(sock: so, ecallback: mptcp_subflow_eupcall1, econtext: mpts, |
2616 | SO_FILT_HINT_CONNRESET | SO_FILT_HINT_CANTRCVMORE | |
2617 | SO_FILT_HINT_TIMEOUT | SO_FILT_HINT_NOSRCADDR | |
2618 | SO_FILT_HINT_IFDENIED | SO_FILT_HINT_CONNECTED | |
2619 | SO_FILT_HINT_DISCONNECTED | SO_FILT_HINT_MPFAILOVER | |
2620 | SO_FILT_HINT_MPSTATUS | SO_FILT_HINT_MUSTRST | |
2621 | SO_FILT_HINT_MPCANTRCVMORE | SO_FILT_HINT_ADAPTIVE_RTIMO | |
2622 | SO_FILT_HINT_ADAPTIVE_WTIMO | SO_FILT_HINT_MP_SUB_ERROR); |
2623 | |
2624 | /* sanity check */ |
2625 | VERIFY(!(mpts->mpts_flags & |
2626 | (MPTSF_CONNECTING | MPTSF_CONNECTED | MPTSF_CONNECT_PENDING))); |
2627 | |
2628 | /* |
2629 | * Indicate to the TCP subflow whether or not it should establish |
2630 | * the initial MPTCP connection, or join an existing one. Fill |
2631 | * in the connection request structure with additional info needed |
2632 | * by the underlying TCP (to be used in the TCP options, etc.) |
2633 | */ |
2634 | if (mp_tp->mpt_state < MPTCPS_ESTABLISHED && mpte->mpte_numflows == 1) { |
2635 | mpts->mpts_flags |= MPTSF_INITIAL_SUB; |
2636 | |
2637 | if (mp_tp->mpt_state == MPTCPS_CLOSED) { |
2638 | mptcp_init_local_parms(mpte, dst); |
2639 | } |
2640 | soisconnecting(so: mp_so); |
2641 | |
2642 | /* If fastopen is requested, set state in mpts */ |
2643 | if (so->so_flags1 & SOF1_PRECONNECT_DATA) { |
2644 | mpts->mpts_flags |= MPTSF_TFO_REQD; |
2645 | } |
2646 | } else { |
2647 | if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY)) { |
2648 | mpts->mpts_flags |= MPTSF_CONNECT_PENDING; |
2649 | } |
2650 | } |
2651 | |
2652 | mpts->mpts_flags |= MPTSF_CONNECTING; |
2653 | |
2654 | /* connect right away if first attempt, or if join can be done now */ |
2655 | if (!(mpts->mpts_flags & MPTSF_CONNECT_PENDING)) { |
2656 | error = mptcp_subflow_soconnectx(mpte, mpts); |
2657 | } |
2658 | |
2659 | if (error) { |
2660 | goto out_err_close; |
2661 | } |
2662 | |
2663 | if (pcid) { |
2664 | *pcid = mpts->mpts_connid; |
2665 | } |
2666 | |
2667 | return 0; |
2668 | |
2669 | out_err_close: |
2670 | mptcp_subflow_abort(mpts, error); |
2671 | |
2672 | return error; |
2673 | |
2674 | out_err: |
2675 | if (mpts) { |
2676 | mptcp_subflow_free(mpts); |
2677 | } |
2678 | |
2679 | return error; |
2680 | } |
2681 | |
2682 | void |
2683 | mptcpstats_update(struct mptcp_itf_stats *stats, const struct mptsub *mpts) |
2684 | { |
2685 | int index = mptcpstats_get_index(stats, mpts); |
2686 | |
2687 | if (index != -1) { |
2688 | struct inpcb *inp = sotoinpcb(mpts->mpts_socket); |
2689 | |
2690 | stats[index].mpis_txbytes += inp->inp_stat->txbytes; |
2691 | stats[index].mpis_rxbytes += inp->inp_stat->rxbytes; |
2692 | |
2693 | stats[index].mpis_wifi_txbytes += inp->inp_wstat->txbytes; |
2694 | stats[index].mpis_wifi_rxbytes += inp->inp_wstat->rxbytes; |
2695 | |
2696 | stats[index].mpis_wired_txbytes += inp->inp_Wstat->txbytes; |
2697 | stats[index].mpis_wired_rxbytes += inp->inp_Wstat->rxbytes; |
2698 | |
2699 | stats[index].mpis_cell_txbytes += inp->inp_cstat->txbytes; |
2700 | stats[index].mpis_cell_rxbytes += inp->inp_cstat->rxbytes; |
2701 | } |
2702 | } |
2703 | |
2704 | /* |
2705 | * Delete/remove a subflow from an MPTCP. The underlying subflow socket |
2706 | * will no longer be accessible after a subflow is deleted, thus this |
2707 | * should occur only after the subflow socket has been disconnected. |
2708 | */ |
2709 | void |
2710 | mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts) |
2711 | { |
2712 | struct socket *mp_so = mptetoso(mpte); |
2713 | struct socket *so = mpts->mpts_socket; |
2714 | struct tcpcb *tp = sototcpcb(so); |
2715 | |
2716 | socket_lock_assert_owned(so: mp_so); |
2717 | VERIFY(mpts->mpts_mpte == mpte); |
2718 | VERIFY(mpte->mpte_numflows != 0); |
2719 | VERIFY(mp_so->so_usecount > 0); |
2720 | |
2721 | mptcpstats_update(stats: mpte->mpte_itfstats, mpts); |
2722 | |
2723 | mptcp_unset_cellicon(mpte, mpts, val: 1); |
2724 | |
2725 | mpte->mpte_init_rxbytes = sotoinpcb(so)->inp_stat->rxbytes; |
2726 | mpte->mpte_init_txbytes = sotoinpcb(so)->inp_stat->txbytes; |
2727 | |
2728 | TAILQ_REMOVE(&mpte->mpte_subflows, mpts, mpts_entry); |
2729 | mpte->mpte_numflows--; |
2730 | if (mpte->mpte_active_sub == mpts) { |
2731 | mpte->mpte_active_sub = NULL; |
2732 | } |
2733 | |
2734 | /* |
2735 | * Drop references held by this subflow socket; there |
2736 | * will be no further upcalls made from this point. |
2737 | */ |
2738 | sock_setupcalls_locked(sock: so, NULL, NULL, NULL, NULL, locked: 0); |
2739 | sock_catchevents_locked(sock: so, NULL, NULL, emask: 0); |
2740 | |
2741 | mptcp_detach_mptcb_from_subf(mp_tp: mpte->mpte_mptcb, so); |
2742 | |
2743 | mp_so->so_usecount--; /* for subflow socket */ |
2744 | mpts->mpts_mpte = NULL; |
2745 | mpts->mpts_socket = NULL; |
2746 | |
2747 | mptcp_subflow_remref(mpts); /* for MPTCP subflow list */ |
2748 | mptcp_subflow_remref(mpts); /* for subflow socket */ |
2749 | |
2750 | so->so_flags &= ~SOF_MP_SUBFLOW; |
2751 | tp->t_mptcb = NULL; |
2752 | tp->t_mpsub = NULL; |
2753 | } |
2754 | |
2755 | void |
2756 | mptcp_subflow_shutdown(struct mptses *mpte, struct mptsub *mpts) |
2757 | { |
2758 | struct socket *so = mpts->mpts_socket; |
2759 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
2760 | int send_dfin = 0; |
2761 | |
2762 | if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { |
2763 | send_dfin = 1; |
2764 | } |
2765 | |
2766 | if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) && |
2767 | (so->so_state & SS_ISCONNECTED)) { |
2768 | if (send_dfin) { |
2769 | mptcp_send_dfin(so); |
2770 | } |
2771 | soshutdownlock(so, SHUT_WR); |
2772 | } |
2773 | } |
2774 | |
2775 | static void |
2776 | mptcp_subflow_abort(struct mptsub *mpts, int error) |
2777 | { |
2778 | struct socket *so = mpts->mpts_socket; |
2779 | struct tcpcb *tp = sototcpcb(so); |
2780 | |
2781 | if (mpts->mpts_flags & MPTSF_DISCONNECTED) { |
2782 | return; |
2783 | } |
2784 | |
2785 | if (tp->t_state != TCPS_CLOSED) { |
2786 | tcp_drop(tp, error); |
2787 | } |
2788 | |
2789 | mptcp_subflow_eupcall1(so, arg: mpts, SO_FILT_HINT_DISCONNECTED); |
2790 | } |
2791 | |
2792 | /* |
2793 | * Disconnect a subflow socket. |
2794 | */ |
2795 | void |
2796 | mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts) |
2797 | { |
2798 | struct socket *so, *mp_so; |
2799 | struct mptcb *mp_tp; |
2800 | int send_dfin = 0; |
2801 | |
2802 | so = mpts->mpts_socket; |
2803 | mp_tp = mpte->mpte_mptcb; |
2804 | mp_so = mptetoso(mpte); |
2805 | |
2806 | socket_lock_assert_owned(so: mp_so); |
2807 | |
2808 | if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) { |
2809 | return; |
2810 | } |
2811 | |
2812 | mptcp_unset_cellicon(mpte, mpts, val: 1); |
2813 | |
2814 | mpts->mpts_flags |= MPTSF_DISCONNECTING; |
2815 | |
2816 | if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { |
2817 | send_dfin = 1; |
2818 | } |
2819 | |
2820 | if (mp_so->so_flags & SOF_DEFUNCT) { |
2821 | errno_t ret; |
2822 | |
2823 | ret = sosetdefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE); |
2824 | if (ret == 0) { |
2825 | ret = sodefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL); |
2826 | |
2827 | if (ret != 0) { |
2828 | os_log_error(mptcp_log_handle, "%s - %lx: sodefunct failed with %d\n" , |
2829 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret); |
2830 | } |
2831 | } else { |
2832 | os_log_error(mptcp_log_handle, "%s - %lx: sosetdefunct failed with %d\n" , |
2833 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret); |
2834 | } |
2835 | } |
2836 | |
2837 | if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) && |
2838 | (so->so_state & SS_ISCONNECTED)) { |
2839 | if (send_dfin) { |
2840 | mptcp_send_dfin(so); |
2841 | } |
2842 | |
2843 | (void) soshutdownlock(so, SHUT_RD); |
2844 | (void) soshutdownlock(so, SHUT_WR); |
2845 | (void) sodisconnectlocked(so); |
2846 | } |
2847 | |
2848 | /* |
2849 | * Generate a disconnect event for this subflow socket, in case |
2850 | * the lower layer doesn't do it; this is needed because the |
2851 | * subflow socket deletion relies on it. |
2852 | */ |
2853 | mptcp_subflow_eupcall1(so, arg: mpts, SO_FILT_HINT_DISCONNECTED); |
2854 | } |
2855 | |
2856 | /* |
2857 | * Subflow socket input. |
2858 | */ |
2859 | static void |
2860 | mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts) |
2861 | { |
2862 | struct socket *mp_so = mptetoso(mpte); |
2863 | struct mbuf *m = NULL; |
2864 | struct socket *so; |
2865 | int error, wakeup = 0; |
2866 | |
2867 | VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_INSIDE_INPUT)); |
2868 | mpte->mpte_mppcb->mpp_flags |= MPP_INSIDE_INPUT; |
2869 | |
2870 | DTRACE_MPTCP2(subflow__input, struct mptses *, mpte, |
2871 | struct mptsub *, mpts); |
2872 | |
2873 | if (!(mpts->mpts_flags & MPTSF_CONNECTED)) { |
2874 | goto out; |
2875 | } |
2876 | |
2877 | so = mpts->mpts_socket; |
2878 | |
2879 | error = sock_receive_internal(so, NULL, &m, 0, NULL); |
2880 | if (error != 0 && error != EWOULDBLOCK) { |
2881 | os_log_error(mptcp_log_handle, "%s - %lx: cid %d error %d\n" , |
2882 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, error); |
2883 | if (error == ENODATA) { |
2884 | /* |
2885 | * Don't ignore ENODATA so as to discover |
2886 | * nasty middleboxes. |
2887 | */ |
2888 | mp_so->so_error = ENODATA; |
2889 | |
2890 | wakeup = 1; |
2891 | goto out; |
2892 | } |
2893 | } |
2894 | |
2895 | /* In fallback, make sure to accept data on all but one subflow */ |
2896 | if (m && (mpts->mpts_flags & MPTSF_MP_DEGRADED) && |
2897 | !(mpts->mpts_flags & MPTSF_ACTIVE)) { |
2898 | m_freem(m); |
2899 | goto out; |
2900 | } |
2901 | |
2902 | if (m != NULL) { |
2903 | if (IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) { |
2904 | mptcp_set_cellicon(mpte, mpts); |
2905 | |
2906 | mpte->mpte_used_cell = 1; |
2907 | } else { |
2908 | /* |
2909 | * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't |
2910 | * explicitly set the cellicon, then we unset it again. |
2911 | */ |
2912 | if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) { |
2913 | mptcp_unset_cellicon(mpte, NULL, val: 1); |
2914 | } |
2915 | |
2916 | mpte->mpte_used_wifi = 1; |
2917 | } |
2918 | |
2919 | mptcp_input(mpte, m); |
2920 | } |
2921 | |
2922 | out: |
2923 | if (wakeup) { |
2924 | mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_RWAKEUP; |
2925 | } |
2926 | |
2927 | mptcp_handle_deferred_upcalls(mpp: mpte->mpte_mppcb, MPP_INSIDE_INPUT); |
2928 | } |
2929 | |
2930 | void |
2931 | mptcp_handle_input(struct socket *so) |
2932 | { |
2933 | struct mptsub *mpts, *tmpts; |
2934 | struct mptses *mpte; |
2935 | |
2936 | if (!(so->so_flags & SOF_MP_SUBFLOW)) { |
2937 | return; |
2938 | } |
2939 | |
2940 | mpts = sototcpcb(so)->t_mpsub; |
2941 | mpte = mpts->mpts_mpte; |
2942 | |
2943 | socket_lock_assert_owned(so: mptetoso(mpte)); |
2944 | |
2945 | if (mptcp_should_defer_upcall(mpp: mpte->mpte_mppcb)) { |
2946 | if (!(mpte->mpte_mppcb->mpp_flags & MPP_INPUT_HANDLE)) { |
2947 | mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_RWAKEUP; |
2948 | } |
2949 | return; |
2950 | } |
2951 | |
2952 | mpte->mpte_mppcb->mpp_flags |= MPP_INPUT_HANDLE; |
2953 | TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { |
2954 | if (mpts->mpts_socket->so_usecount == 0) { |
2955 | /* Will be removed soon by tcp_garbage_collect */ |
2956 | continue; |
2957 | } |
2958 | |
2959 | mptcp_subflow_addref(mpts); |
2960 | mpts->mpts_socket->so_usecount++; |
2961 | |
2962 | mptcp_subflow_input(mpte, mpts); |
2963 | |
2964 | mptcp_subflow_remref(mpts); /* ours */ |
2965 | |
2966 | VERIFY(mpts->mpts_socket->so_usecount != 0); |
2967 | mpts->mpts_socket->so_usecount--; |
2968 | } |
2969 | |
2970 | mptcp_handle_deferred_upcalls(mpp: mpte->mpte_mppcb, MPP_INPUT_HANDLE); |
2971 | } |
2972 | |
2973 | static boolean_t |
2974 | mptcp_search_seq_in_sub(struct mbuf *m, struct socket *so) |
2975 | { |
2976 | struct mbuf *so_m = so->so_snd.sb_mb; |
2977 | uint64_t dsn = m->m_pkthdr.mp_dsn; |
2978 | |
2979 | while (so_m) { |
2980 | VERIFY(so_m->m_flags & M_PKTHDR); |
2981 | VERIFY(so_m->m_pkthdr.pkt_flags & PKTF_MPTCP); |
2982 | |
2983 | /* Part of the segment is covered, don't reinject here */ |
2984 | if (so_m->m_pkthdr.mp_dsn <= dsn && |
2985 | so_m->m_pkthdr.mp_dsn + so_m->m_pkthdr.mp_rlen > dsn) { |
2986 | return TRUE; |
2987 | } |
2988 | |
2989 | so_m = so_m->m_next; |
2990 | } |
2991 | |
2992 | return FALSE; |
2993 | } |
2994 | |
2995 | /* |
2996 | * Subflow socket output. |
2997 | * |
2998 | * Called for sending data from MPTCP to the underlying subflow socket. |
2999 | */ |
3000 | int |
3001 | mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts, int flags) |
3002 | { |
3003 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
3004 | struct mbuf *sb_mb, *m, *mpt_mbuf = NULL, *head = NULL, *tail = NULL; |
3005 | struct socket *mp_so, *so; |
3006 | struct tcpcb *tp; |
3007 | uint64_t mpt_dsn = 0, off = 0; |
3008 | int sb_cc = 0, error = 0, wakeup = 0; |
3009 | uint16_t dss_csum; |
3010 | uint16_t tot_sent = 0; |
3011 | boolean_t reinjected = FALSE; |
3012 | |
3013 | mp_so = mptetoso(mpte); |
3014 | so = mpts->mpts_socket; |
3015 | tp = sototcpcb(so); |
3016 | |
3017 | socket_lock_assert_owned(so: mp_so); |
3018 | |
3019 | VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_INSIDE_OUTPUT)); |
3020 | mpte->mpte_mppcb->mpp_flags |= MPP_INSIDE_OUTPUT; |
3021 | |
3022 | VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so))); |
3023 | VERIFY((mpts->mpts_flags & MPTSF_MP_CAPABLE) || |
3024 | (mpts->mpts_flags & MPTSF_MP_DEGRADED) || |
3025 | (mpts->mpts_flags & MPTSF_TFO_REQD)); |
3026 | VERIFY(mptcp_subflow_cwnd_space(mpts->mpts_socket) > 0); |
3027 | |
3028 | DTRACE_MPTCP2(subflow__output, struct mptses *, mpte, |
3029 | struct mptsub *, mpts); |
3030 | |
3031 | /* Remove Addr Option is not sent reliably as per I-D */ |
3032 | if (mpte->mpte_flags & MPTE_SND_REM_ADDR) { |
3033 | tp->t_rem_aid = mpte->mpte_lost_aid; |
3034 | tp->t_mpflags |= TMPF_SND_REM_ADDR; |
3035 | mpte->mpte_flags &= ~MPTE_SND_REM_ADDR; |
3036 | } |
3037 | |
3038 | /* |
3039 | * The mbuf chains containing the metadata (as well as pointing to |
3040 | * the user data sitting at the MPTCP output queue) would then be |
3041 | * sent down to the subflow socket. |
3042 | * |
3043 | * Some notes on data sequencing: |
3044 | * |
3045 | * a. Each mbuf must be a M_PKTHDR. |
3046 | * b. MPTCP metadata is stored in the mptcp_pktinfo structure |
3047 | * in the mbuf pkthdr structure. |
3048 | * c. Each mbuf containing the MPTCP metadata must have its |
3049 | * pkt_flags marked with the PKTF_MPTCP flag. |
3050 | */ |
3051 | |
3052 | if (mpte->mpte_reinjectq) { |
3053 | sb_mb = mpte->mpte_reinjectq; |
3054 | } else { |
3055 | sb_mb = mp_so->so_snd.sb_mb; |
3056 | } |
3057 | |
3058 | if (sb_mb == NULL) { |
3059 | os_log_error(mptcp_log_handle, "%s - %lx: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u state %u flags %#x\n" , |
3060 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
3061 | (uint32_t)mp_tp->mpt_sndmax, (uint32_t)mp_tp->mpt_sndnxt, |
3062 | (uint32_t)mp_tp->mpt_snduna, mp_tp->mpt_state, mp_so->so_flags1); |
3063 | |
3064 | /* Fix it to prevent looping */ |
3065 | if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) { |
3066 | mp_tp->mpt_sndnxt = mp_tp->mpt_snduna; |
3067 | } |
3068 | goto out; |
3069 | } |
3070 | |
3071 | VERIFY(sb_mb->m_pkthdr.pkt_flags & PKTF_MPTCP); |
3072 | |
3073 | if (sb_mb->m_pkthdr.mp_rlen == 0 && |
3074 | !(so->so_state & SS_ISCONNECTED) && |
3075 | (so->so_flags1 & SOF1_PRECONNECT_DATA)) { |
3076 | tp->t_mpflags |= TMPF_TFO_REQUEST; |
3077 | |
3078 | /* Opting to call pru_send as no mbuf at subflow level */ |
3079 | error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, NULL, NULL, |
3080 | NULL, current_proc()); |
3081 | |
3082 | goto done_sending; |
3083 | } |
3084 | |
3085 | mpt_dsn = sb_mb->m_pkthdr.mp_dsn; |
3086 | |
3087 | /* First, drop acknowledged data */ |
3088 | if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) { |
3089 | os_log_error(mptcp_log_handle, "%s - %lx: dropping data, should have been done earlier " |
3090 | "dsn %u suna %u reinject? %u\n" , |
3091 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), (uint32_t)mpt_dsn, |
3092 | (uint32_t)mp_tp->mpt_snduna, !!mpte->mpte_reinjectq); |
3093 | if (mpte->mpte_reinjectq) { |
3094 | mptcp_clean_reinjectq(mpte); |
3095 | } else { |
3096 | uint64_t len = 0; |
3097 | len = mp_tp->mpt_snduna - mpt_dsn; |
3098 | sbdrop(sb: &mp_so->so_snd, len: (int)len); |
3099 | wakeup = 1; |
3100 | } |
3101 | } |
3102 | |
3103 | /* Check again because of above sbdrop */ |
3104 | if (mp_so->so_snd.sb_mb == NULL && mpte->mpte_reinjectq == NULL) { |
3105 | os_log_error(mptcp_log_handle, "%s - $%lx: send-buffer is empty\n" , |
3106 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
3107 | goto out; |
3108 | } |
3109 | |
3110 | /* |
3111 | * In degraded mode, we don't receive data acks, so force free |
3112 | * mbufs less than snd_nxt |
3113 | */ |
3114 | if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) && |
3115 | (mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) && |
3116 | mp_so->so_snd.sb_mb) { |
3117 | mpt_dsn = mp_so->so_snd.sb_mb->m_pkthdr.mp_dsn; |
3118 | if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) { |
3119 | uint64_t len = 0; |
3120 | len = mp_tp->mpt_snduna - mpt_dsn; |
3121 | sbdrop(sb: &mp_so->so_snd, len: (int)len); |
3122 | wakeup = 1; |
3123 | |
3124 | os_log_error(mptcp_log_handle, "%s - %lx: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n" , |
3125 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
3126 | (uint32_t)mpt_dsn, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_snduna); |
3127 | } |
3128 | } |
3129 | |
3130 | if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) && |
3131 | !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC)) { |
3132 | mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC; |
3133 | so->so_flags1 |= SOF1_POST_FALLBACK_SYNC; |
3134 | } |
3135 | |
3136 | /* |
3137 | * Adjust the top level notion of next byte used for retransmissions |
3138 | * and sending FINs. |
3139 | */ |
3140 | if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) { |
3141 | mp_tp->mpt_sndnxt = mp_tp->mpt_snduna; |
3142 | } |
3143 | |
3144 | /* Now determine the offset from which to start transmitting data */ |
3145 | if (mpte->mpte_reinjectq) { |
3146 | sb_mb = mpte->mpte_reinjectq; |
3147 | } else { |
3148 | dont_reinject: |
3149 | sb_mb = mp_so->so_snd.sb_mb; |
3150 | } |
3151 | if (sb_mb == NULL) { |
3152 | os_log_error(mptcp_log_handle, "%s - %lx: send-buffer is still empty\n" , __func__, |
3153 | (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
3154 | goto out; |
3155 | } |
3156 | |
3157 | if (sb_mb == mpte->mpte_reinjectq) { |
3158 | sb_cc = sb_mb->m_pkthdr.mp_rlen; |
3159 | off = 0; |
3160 | |
3161 | if (mptcp_search_seq_in_sub(m: sb_mb, so)) { |
3162 | if (mptcp_can_send_more(mp_tp, TRUE)) { |
3163 | goto dont_reinject; |
3164 | } |
3165 | |
3166 | error = ECANCELED; |
3167 | goto out; |
3168 | } |
3169 | |
3170 | reinjected = TRUE; |
3171 | } else if (flags & MPTCP_SUBOUT_PROBING) { |
3172 | sb_cc = sb_mb->m_pkthdr.mp_rlen; |
3173 | off = 0; |
3174 | } else { |
3175 | sb_cc = min(a: mp_so->so_snd.sb_cc, b: mp_tp->mpt_sndwnd); |
3176 | |
3177 | /* |
3178 | * With TFO, there might be no data at all, thus still go into this |
3179 | * code-path here. |
3180 | */ |
3181 | if ((mp_so->so_flags1 & SOF1_PRECONNECT_DATA) || |
3182 | MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) { |
3183 | off = mp_tp->mpt_sndnxt - mp_tp->mpt_snduna; |
3184 | sb_cc -= off; |
3185 | } else { |
3186 | os_log_error(mptcp_log_handle, "%s - %lx: this should not happen: sndnxt %u sndmax %u\n" , |
3187 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), (uint32_t)mp_tp->mpt_sndnxt, |
3188 | (uint32_t)mp_tp->mpt_sndmax); |
3189 | |
3190 | goto out; |
3191 | } |
3192 | } |
3193 | |
3194 | sb_cc = min(a: sb_cc, b: mptcp_subflow_cwnd_space(so)); |
3195 | if (sb_cc <= 0) { |
3196 | os_log_error(mptcp_log_handle, "%s - %lx: sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n" , |
3197 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), sb_cc, mp_so->so_snd.sb_cc, mp_tp->mpt_sndwnd, |
3198 | (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_sndmax, |
3199 | mptcp_subflow_cwnd_space(so)); |
3200 | } |
3201 | |
3202 | sb_cc = min(a: sb_cc, UINT16_MAX); |
3203 | |
3204 | /* |
3205 | * Create a DSN mapping for the data we are about to send. It all |
3206 | * has the same mapping. |
3207 | */ |
3208 | if (reinjected) { |
3209 | mpt_dsn = sb_mb->m_pkthdr.mp_dsn; |
3210 | } else { |
3211 | mpt_dsn = mp_tp->mpt_snduna + off; |
3212 | } |
3213 | |
3214 | mpt_mbuf = sb_mb; |
3215 | while (mpt_mbuf && reinjected == FALSE && |
3216 | (mpt_mbuf->m_pkthdr.mp_rlen == 0 || |
3217 | mpt_mbuf->m_pkthdr.mp_rlen <= (uint32_t)off)) { |
3218 | off -= mpt_mbuf->m_pkthdr.mp_rlen; |
3219 | mpt_mbuf = mpt_mbuf->m_next; |
3220 | } |
3221 | VERIFY((mpt_mbuf == NULL) || (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP)); |
3222 | |
3223 | head = tail = NULL; |
3224 | |
3225 | while (tot_sent < sb_cc) { |
3226 | int32_t mlen; |
3227 | |
3228 | mlen = mpt_mbuf->m_len; |
3229 | mlen -= off; |
3230 | mlen = MIN(mlen, sb_cc - tot_sent); |
3231 | |
3232 | if (mlen < 0) { |
3233 | os_log_error(mptcp_log_handle, "%s - %lx: mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n" , |
3234 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mlen, mpt_mbuf->m_pkthdr.mp_rlen, |
3235 | (uint32_t)off, sb_cc, tot_sent); |
3236 | goto out; |
3237 | } |
3238 | |
3239 | if (mlen == 0) { |
3240 | goto next; |
3241 | } |
3242 | |
3243 | m = m_copym_mode(mpt_mbuf, (int)off, mlen, M_DONTWAIT, NULL, NULL, |
3244 | M_COPYM_MUST_COPY_HDR); |
3245 | if (m == NULL) { |
3246 | os_log_error(mptcp_log_handle, "%s - %lx: m_copym_mode failed\n" , __func__, |
3247 | (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
3248 | error = ENOBUFS; |
3249 | break; |
3250 | } |
3251 | |
3252 | /* Create a DSN mapping for the data (m_copym does it) */ |
3253 | VERIFY(m->m_flags & M_PKTHDR); |
3254 | VERIFY(m->m_next == NULL); |
3255 | |
3256 | m->m_pkthdr.pkt_flags |= PKTF_MPTCP; |
3257 | m->m_pkthdr.pkt_flags &= ~PKTF_MPSO; |
3258 | m->m_pkthdr.mp_dsn = mpt_dsn; |
3259 | m->m_pkthdr.mp_rseq = mpts->mpts_rel_seq; |
3260 | m->m_pkthdr.len = mlen; |
3261 | |
3262 | if (head == NULL) { |
3263 | head = tail = m; |
3264 | } else { |
3265 | tail->m_next = m; |
3266 | tail = m; |
3267 | } |
3268 | |
3269 | tot_sent += mlen; |
3270 | off = 0; |
3271 | next: |
3272 | mpt_mbuf = mpt_mbuf->m_next; |
3273 | } |
3274 | |
3275 | if (reinjected) { |
3276 | if (sb_cc < sb_mb->m_pkthdr.mp_rlen) { |
3277 | struct mbuf *n = sb_mb; |
3278 | |
3279 | while (n) { |
3280 | n->m_pkthdr.mp_dsn += sb_cc; |
3281 | n->m_pkthdr.mp_rlen -= sb_cc; |
3282 | n = n->m_next; |
3283 | } |
3284 | m_adj(sb_mb, sb_cc); |
3285 | } else { |
3286 | mpte->mpte_reinjectq = sb_mb->m_nextpkt; |
3287 | m_freem(sb_mb); |
3288 | } |
3289 | } |
3290 | |
3291 | if (head && (mp_tp->mpt_flags & MPTCPF_CHECKSUM)) { |
3292 | dss_csum = mptcp_output_csum(m: head, dss_val: mpt_dsn, sseq: mpts->mpts_rel_seq, |
3293 | dlen: tot_sent); |
3294 | } |
3295 | |
3296 | /* Now, let's update rel-seq and the data-level length */ |
3297 | mpts->mpts_rel_seq += tot_sent; |
3298 | m = head; |
3299 | while (m) { |
3300 | if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) { |
3301 | m->m_pkthdr.mp_csum = dss_csum; |
3302 | } |
3303 | m->m_pkthdr.mp_rlen = tot_sent; |
3304 | m = m->m_next; |
3305 | } |
3306 | |
3307 | if (head != NULL) { |
3308 | if ((mpts->mpts_flags & MPTSF_TFO_REQD) && |
3309 | (tp->t_tfo_stats == 0)) { |
3310 | tp->t_mpflags |= TMPF_TFO_REQUEST; |
3311 | } |
3312 | |
3313 | error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, NULL, head, NULL, 0); |
3314 | head = NULL; |
3315 | } |
3316 | |
3317 | done_sending: |
3318 | if (error == 0 || |
3319 | (error == EWOULDBLOCK && (tp->t_mpflags & TMPF_TFO_REQUEST))) { |
3320 | uint64_t new_sndnxt = mp_tp->mpt_sndnxt + tot_sent; |
3321 | |
3322 | if (mpts->mpts_probesoon && mpts->mpts_maxseg && tot_sent) { |
3323 | tcpstat.tcps_mp_num_probes++; |
3324 | if ((uint32_t)tot_sent < mpts->mpts_maxseg) { |
3325 | mpts->mpts_probecnt += 1; |
3326 | } else { |
3327 | mpts->mpts_probecnt += |
3328 | tot_sent / mpts->mpts_maxseg; |
3329 | } |
3330 | } |
3331 | |
3332 | if (!reinjected && !(flags & MPTCP_SUBOUT_PROBING)) { |
3333 | if (MPTCP_DATASEQ_HIGH32(new_sndnxt) > |
3334 | MPTCP_DATASEQ_HIGH32(mp_tp->mpt_sndnxt)) { |
3335 | mp_tp->mpt_flags |= MPTCPF_SND_64BITDSN; |
3336 | } |
3337 | mp_tp->mpt_sndnxt = new_sndnxt; |
3338 | } |
3339 | |
3340 | mptcp_cancel_timer(mp_tp, MPTT_REXMT); |
3341 | |
3342 | /* Must be here as mptcp_can_send_more() checks for this */ |
3343 | soclearfastopen(so: mp_so); |
3344 | |
3345 | if (IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) { |
3346 | mptcp_set_cellicon(mpte, mpts); |
3347 | |
3348 | mpte->mpte_used_cell = 1; |
3349 | } else { |
3350 | /* |
3351 | * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't |
3352 | * explicitly set the cellicon, then we unset it again. |
3353 | */ |
3354 | if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) { |
3355 | mptcp_unset_cellicon(mpte, NULL, val: 1); |
3356 | } |
3357 | |
3358 | mpte->mpte_used_wifi = 1; |
3359 | } |
3360 | |
3361 | /* |
3362 | * Don't propagate EWOULDBLOCK - it's already taken care of |
3363 | * in mptcp_usr_send for TFO. |
3364 | */ |
3365 | error = 0; |
3366 | } else { |
3367 | /* We need to revert our change to mpts_rel_seq */ |
3368 | mpts->mpts_rel_seq -= tot_sent; |
3369 | |
3370 | os_log_error(mptcp_log_handle, "%s - %lx: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n" , |
3371 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, error, tot_sent, so->so_flags, so->so_state, so->so_error, so->so_snd.sb_hiwat, so->so_snd.sb_lowat); |
3372 | } |
3373 | out: |
3374 | |
3375 | if (head != NULL) { |
3376 | m_freem(head); |
3377 | } |
3378 | |
3379 | if (wakeup) { |
3380 | mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WWAKEUP; |
3381 | } |
3382 | |
3383 | mptcp_handle_deferred_upcalls(mpp: mpte->mpte_mppcb, MPP_INSIDE_OUTPUT); |
3384 | return error; |
3385 | } |
3386 | |
3387 | static void |
3388 | mptcp_add_reinjectq(struct mptses *mpte, struct mbuf *m) |
3389 | { |
3390 | struct mbuf *n, *prev = NULL; |
3391 | |
3392 | n = mpte->mpte_reinjectq; |
3393 | |
3394 | /* First, look for an mbuf n, whose data-sequence-number is bigger or |
3395 | * equal than m's sequence number. |
3396 | */ |
3397 | while (n) { |
3398 | if (MPTCP_SEQ_GEQ(n->m_pkthdr.mp_dsn, m->m_pkthdr.mp_dsn)) { |
3399 | break; |
3400 | } |
3401 | |
3402 | prev = n; |
3403 | |
3404 | n = n->m_nextpkt; |
3405 | } |
3406 | |
3407 | if (n) { |
3408 | /* m is already fully covered by the next mbuf in the queue */ |
3409 | if (n->m_pkthdr.mp_dsn == m->m_pkthdr.mp_dsn && |
3410 | n->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_rlen) { |
3411 | os_log(mptcp_log_handle, "%s - %lx: dsn %u dlen %u rseq %u fully covered with len %u\n" , |
3412 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
3413 | (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen, |
3414 | m->m_pkthdr.mp_rseq, n->m_pkthdr.mp_rlen); |
3415 | goto dont_queue; |
3416 | } |
3417 | |
3418 | /* m is covering the next mbuf entirely, thus we remove this guy */ |
3419 | if (m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen >= n->m_pkthdr.mp_dsn + n->m_pkthdr.mp_rlen) { |
3420 | struct mbuf *tmp = n->m_nextpkt; |
3421 | |
3422 | os_log(mptcp_log_handle, "%s - %lx: m (dsn %u len %u) is covering existing mbuf (dsn %u len %u)\n" , |
3423 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
3424 | (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen, |
3425 | (uint32_t)n->m_pkthdr.mp_dsn, n->m_pkthdr.mp_rlen); |
3426 | |
3427 | m->m_nextpkt = NULL; |
3428 | if (prev == NULL) { |
3429 | mpte->mpte_reinjectq = tmp; |
3430 | } else { |
3431 | prev->m_nextpkt = tmp; |
3432 | } |
3433 | |
3434 | m_freem(n); |
3435 | n = tmp; |
3436 | } |
3437 | } |
3438 | |
3439 | if (prev) { |
3440 | /* m is already fully covered by the previous mbuf in the queue */ |
3441 | if (prev->m_pkthdr.mp_dsn + prev->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_dsn + m->m_pkthdr.len) { |
3442 | os_log(mptcp_log_handle, "%s - %lx: prev (dsn %u len %u) covers us (dsn %u len %u)\n" , |
3443 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
3444 | (uint32_t)prev->m_pkthdr.mp_dsn, prev->m_pkthdr.mp_rlen, |
3445 | (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen); |
3446 | goto dont_queue; |
3447 | } |
3448 | } |
3449 | |
3450 | if (prev == NULL) { |
3451 | mpte->mpte_reinjectq = m; |
3452 | } else { |
3453 | prev->m_nextpkt = m; |
3454 | } |
3455 | |
3456 | m->m_nextpkt = n; |
3457 | |
3458 | return; |
3459 | |
3460 | dont_queue: |
3461 | m_freem(m); |
3462 | return; |
3463 | } |
3464 | |
3465 | static struct mbuf * |
3466 | mptcp_lookup_dsn(struct mptses *mpte, uint64_t dsn) |
3467 | { |
3468 | struct socket *mp_so = mptetoso(mpte); |
3469 | struct mbuf *m; |
3470 | |
3471 | m = mp_so->so_snd.sb_mb; |
3472 | |
3473 | while (m) { |
3474 | /* If this segment covers what we are looking for, return it. */ |
3475 | if (MPTCP_SEQ_LEQ(m->m_pkthdr.mp_dsn, dsn) && |
3476 | MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen, dsn)) { |
3477 | break; |
3478 | } |
3479 | |
3480 | |
3481 | /* Segment is no more in the queue */ |
3482 | if (MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn, dsn)) { |
3483 | return NULL; |
3484 | } |
3485 | |
3486 | m = m->m_next; |
3487 | } |
3488 | |
3489 | return m; |
3490 | } |
3491 | |
3492 | static struct mbuf * |
3493 | mptcp_copy_mbuf_list(struct mptses *mpte, struct mbuf *m, int len) |
3494 | { |
3495 | struct mbuf *top = NULL, *tail = NULL; |
3496 | uint64_t dsn; |
3497 | uint32_t dlen, rseq; |
3498 | |
3499 | dsn = m->m_pkthdr.mp_dsn; |
3500 | dlen = m->m_pkthdr.mp_rlen; |
3501 | rseq = m->m_pkthdr.mp_rseq; |
3502 | |
3503 | while (len > 0) { |
3504 | struct mbuf *n; |
3505 | |
3506 | VERIFY((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP)); |
3507 | |
3508 | n = m_copym_mode(m, 0, m->m_len, M_DONTWAIT, NULL, NULL, M_COPYM_MUST_COPY_HDR); |
3509 | if (n == NULL) { |
3510 | os_log_error(mptcp_log_handle, "%s - %lx: m_copym_mode returned NULL\n" , |
3511 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
3512 | goto err; |
3513 | } |
3514 | |
3515 | VERIFY(n->m_flags & M_PKTHDR); |
3516 | VERIFY(n->m_next == NULL); |
3517 | VERIFY(n->m_pkthdr.mp_dsn == dsn); |
3518 | VERIFY(n->m_pkthdr.mp_rlen == dlen); |
3519 | VERIFY(n->m_pkthdr.mp_rseq == rseq); |
3520 | VERIFY(n->m_len == m->m_len); |
3521 | |
3522 | n->m_pkthdr.pkt_flags |= (PKTF_MPSO | PKTF_MPTCP); |
3523 | |
3524 | if (top == NULL) { |
3525 | top = n; |
3526 | } |
3527 | |
3528 | if (tail != NULL) { |
3529 | tail->m_next = n; |
3530 | } |
3531 | |
3532 | tail = n; |
3533 | |
3534 | len -= m->m_len; |
3535 | m = m->m_next; |
3536 | } |
3537 | |
3538 | return top; |
3539 | |
3540 | err: |
3541 | if (top) { |
3542 | m_freem(top); |
3543 | } |
3544 | |
3545 | return NULL; |
3546 | } |
3547 | |
3548 | static void |
3549 | mptcp_reinject_mbufs(struct socket *so) |
3550 | { |
3551 | struct tcpcb *tp = sototcpcb(so); |
3552 | struct mptsub *mpts = tp->t_mpsub; |
3553 | struct mptcb *mp_tp = tptomptp(tp); |
3554 | struct mptses *mpte = mp_tp->mpt_mpte; |
3555 | struct sockbuf *sb = &so->so_snd; |
3556 | struct mbuf *m; |
3557 | |
3558 | m = sb->sb_mb; |
3559 | while (m) { |
3560 | struct mbuf *n = m->m_next, *orig = m; |
3561 | bool set_reinject_flag = false; |
3562 | |
3563 | VERIFY((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP)); |
3564 | |
3565 | if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_REINJ) { |
3566 | goto next; |
3567 | } |
3568 | |
3569 | /* Has it all already been acknowledged at the data-level? */ |
3570 | if (MPTCP_SEQ_GEQ(mp_tp->mpt_snduna, m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen)) { |
3571 | goto next; |
3572 | } |
3573 | |
3574 | /* Part of this has already been acknowledged - lookup in the |
3575 | * MPTCP-socket for the segment. |
3576 | */ |
3577 | if (SEQ_GT(tp->snd_una - mpts->mpts_iss, m->m_pkthdr.mp_rseq)) { |
3578 | m = mptcp_lookup_dsn(mpte, dsn: m->m_pkthdr.mp_dsn); |
3579 | if (m == NULL) { |
3580 | goto next; |
3581 | } |
3582 | } |
3583 | |
3584 | /* Copy the mbuf with headers (aka, DSN-numbers) */ |
3585 | m = mptcp_copy_mbuf_list(mpte, m, len: m->m_pkthdr.mp_rlen); |
3586 | if (m == NULL) { |
3587 | break; |
3588 | } |
3589 | |
3590 | VERIFY(m->m_nextpkt == NULL); |
3591 | |
3592 | /* Now, add to the reinject-queue, eliminating overlapping |
3593 | * segments |
3594 | */ |
3595 | mptcp_add_reinjectq(mpte, m); |
3596 | |
3597 | set_reinject_flag = true; |
3598 | orig->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ; |
3599 | |
3600 | next: |
3601 | /* mp_rlen can cover multiple mbufs, so advance to the end of it. */ |
3602 | while (n) { |
3603 | VERIFY((n->m_flags & M_PKTHDR) && (n->m_pkthdr.pkt_flags & PKTF_MPTCP)); |
3604 | |
3605 | if (n->m_pkthdr.mp_dsn != orig->m_pkthdr.mp_dsn) { |
3606 | break; |
3607 | } |
3608 | |
3609 | if (set_reinject_flag) { |
3610 | n->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ; |
3611 | } |
3612 | n = n->m_next; |
3613 | } |
3614 | |
3615 | m = n; |
3616 | } |
3617 | } |
3618 | |
3619 | void |
3620 | mptcp_clean_reinjectq(struct mptses *mpte) |
3621 | { |
3622 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
3623 | |
3624 | socket_lock_assert_owned(so: mptetoso(mpte)); |
3625 | |
3626 | while (mpte->mpte_reinjectq) { |
3627 | struct mbuf *m = mpte->mpte_reinjectq; |
3628 | |
3629 | if (MPTCP_SEQ_GEQ(m->m_pkthdr.mp_dsn, mp_tp->mpt_snduna) || |
3630 | MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen, mp_tp->mpt_snduna)) { |
3631 | break; |
3632 | } |
3633 | |
3634 | mpte->mpte_reinjectq = m->m_nextpkt; |
3635 | m->m_nextpkt = NULL; |
3636 | m_freem(m); |
3637 | } |
3638 | } |
3639 | |
3640 | static ev_ret_t |
3641 | mptcp_subflow_propagate_ev(struct mptses *mpte, struct mptsub *mpts, |
3642 | uint32_t *p_mpsofilt_hint, uint32_t event) |
3643 | { |
3644 | struct socket *mp_so, *so; |
3645 | struct mptcb *mp_tp; |
3646 | |
3647 | mp_so = mptetoso(mpte); |
3648 | mp_tp = mpte->mpte_mptcb; |
3649 | so = mpts->mpts_socket; |
3650 | |
3651 | /* |
3652 | * We got an event for this subflow that might need to be propagated, |
3653 | * based on the state of the MPTCP connection. |
3654 | */ |
3655 | if (mp_tp->mpt_state < MPTCPS_ESTABLISHED || |
3656 | (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) && !(mpts->mpts_flags & MPTSF_MP_READY)) || |
3657 | ((mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && (mpts->mpts_flags & MPTSF_ACTIVE))) { |
3658 | mp_so->so_error = so->so_error; |
3659 | *p_mpsofilt_hint |= event; |
3660 | } |
3661 | |
3662 | return MPTS_EVRET_OK; |
3663 | } |
3664 | |
3665 | /* |
3666 | * Handle SO_FILT_HINT_NOSRCADDR subflow socket event. |
3667 | */ |
3668 | static ev_ret_t |
3669 | mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts, |
3670 | uint32_t *p_mpsofilt_hint, uint32_t event) |
3671 | { |
3672 | struct socket *mp_so; |
3673 | struct tcpcb *tp; |
3674 | |
3675 | mp_so = mptetoso(mpte); |
3676 | tp = intotcpcb(sotoinpcb(mpts->mpts_socket)); |
3677 | |
3678 | /* |
3679 | * This overwrites any previous mpte_lost_aid to avoid storing |
3680 | * too much state when the typical case has only two subflows. |
3681 | */ |
3682 | mpte->mpte_flags |= MPTE_SND_REM_ADDR; |
3683 | mpte->mpte_lost_aid = tp->t_local_aid; |
3684 | |
3685 | /* |
3686 | * The subflow connection has lost its source address. |
3687 | */ |
3688 | mptcp_subflow_abort(mpts, EADDRNOTAVAIL); |
3689 | |
3690 | if (mp_so->so_flags & SOF_NOADDRAVAIL) { |
3691 | mptcp_subflow_propagate_ev(mpte, mpts, p_mpsofilt_hint, event); |
3692 | } |
3693 | |
3694 | return MPTS_EVRET_DELETE; |
3695 | } |
3696 | |
3697 | static ev_ret_t |
3698 | mptcp_subflow_mpsuberror_ev(struct mptses *mpte, struct mptsub *mpts, |
3699 | uint32_t *p_mpsofilt_hint, uint32_t event) |
3700 | { |
3701 | #pragma unused(event, p_mpsofilt_hint) |
3702 | struct socket *so, *mp_so; |
3703 | |
3704 | so = mpts->mpts_socket; |
3705 | |
3706 | if (so->so_error != ENODATA) { |
3707 | return MPTS_EVRET_OK; |
3708 | } |
3709 | |
3710 | |
3711 | mp_so = mptetoso(mpte); |
3712 | |
3713 | mp_so->so_error = ENODATA; |
3714 | |
3715 | sorwakeup(so: mp_so); |
3716 | sowwakeup(so: mp_so); |
3717 | |
3718 | return MPTS_EVRET_OK; |
3719 | } |
3720 | |
3721 | |
3722 | /* |
3723 | * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that |
3724 | * indicates that the remote side sent a Data FIN |
3725 | */ |
3726 | static ev_ret_t |
3727 | mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts, |
3728 | uint32_t *p_mpsofilt_hint, uint32_t event) |
3729 | { |
3730 | #pragma unused(event, mpts) |
3731 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
3732 | |
3733 | /* |
3734 | * We got a Data FIN for the MPTCP connection. |
3735 | * The FIN may arrive with data. The data is handed up to the |
3736 | * mptcp socket and the user is notified so that it may close |
3737 | * the socket if needed. |
3738 | */ |
3739 | if (mp_tp->mpt_state == MPTCPS_CLOSE_WAIT) { |
3740 | *p_mpsofilt_hint |= SO_FILT_HINT_CANTRCVMORE; |
3741 | } |
3742 | |
3743 | return MPTS_EVRET_OK; /* keep the subflow socket around */ |
3744 | } |
3745 | |
3746 | /* |
3747 | * Handle SO_FILT_HINT_MPFAILOVER subflow socket event |
3748 | */ |
3749 | static ev_ret_t |
3750 | mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts, |
3751 | uint32_t *p_mpsofilt_hint, uint32_t event) |
3752 | { |
3753 | #pragma unused(event, p_mpsofilt_hint) |
3754 | struct mptsub *mpts_alt = NULL; |
3755 | struct socket *alt_so = NULL; |
3756 | struct socket *mp_so; |
3757 | int altpath_exists = 0; |
3758 | |
3759 | mp_so = mptetoso(mpte); |
3760 | os_log_info(mptcp_log_handle, "%s - %lx\n" , __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
3761 | |
3762 | mptcp_reinject_mbufs(so: mpts->mpts_socket); |
3763 | |
3764 | mpts_alt = mptcp_get_subflow(mpte, NULL); |
3765 | |
3766 | /* If there is no alternate eligible subflow, ignore the failover hint. */ |
3767 | if (mpts_alt == NULL || mpts_alt == mpts) { |
3768 | os_log(mptcp_log_handle, "%s - %lx no alternate path\n" , __func__, |
3769 | (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
3770 | |
3771 | goto done; |
3772 | } |
3773 | |
3774 | altpath_exists = 1; |
3775 | alt_so = mpts_alt->mpts_socket; |
3776 | if (mpts_alt->mpts_flags & MPTSF_FAILINGOVER) { |
3777 | /* All data acknowledged and no RTT spike */ |
3778 | if (alt_so->so_snd.sb_cc == 0 && mptcp_no_rto_spike(alt_so)) { |
3779 | mpts_alt->mpts_flags &= ~MPTSF_FAILINGOVER; |
3780 | } else { |
3781 | /* no alternate path available */ |
3782 | altpath_exists = 0; |
3783 | } |
3784 | } |
3785 | |
3786 | if (altpath_exists) { |
3787 | mpts_alt->mpts_flags |= MPTSF_ACTIVE; |
3788 | |
3789 | mpte->mpte_active_sub = mpts_alt; |
3790 | mpts->mpts_flags |= MPTSF_FAILINGOVER; |
3791 | mpts->mpts_flags &= ~MPTSF_ACTIVE; |
3792 | |
3793 | os_log_info(mptcp_log_handle, "%s - %lx: switched from %d to %d\n" , |
3794 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, mpts_alt->mpts_connid); |
3795 | |
3796 | mptcpstats_inc_switch(mpte, mpts); |
3797 | |
3798 | sowwakeup(so: alt_so); |
3799 | } else { |
3800 | done: |
3801 | mpts->mpts_socket->so_flags &= ~SOF_MP_TRYFAILOVER; |
3802 | } |
3803 | |
3804 | return MPTS_EVRET_OK; |
3805 | } |
3806 | |
3807 | /* |
3808 | * Handle SO_FILT_HINT_IFDENIED subflow socket event. |
3809 | */ |
3810 | static ev_ret_t |
3811 | mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts, |
3812 | uint32_t *p_mpsofilt_hint, uint32_t event) |
3813 | { |
3814 | /* |
3815 | * The subflow connection cannot use the outgoing interface, let's |
3816 | * close this subflow. |
3817 | */ |
3818 | mptcp_subflow_abort(mpts, EPERM); |
3819 | |
3820 | mptcp_subflow_propagate_ev(mpte, mpts, p_mpsofilt_hint, event); |
3821 | |
3822 | return MPTS_EVRET_DELETE; |
3823 | } |
3824 | |
3825 | /* |
3826 | * https://tools.ietf.org/html/rfc6052#section-2 |
3827 | * https://tools.ietf.org/html/rfc6147#section-5.2 |
3828 | */ |
3829 | static boolean_t |
3830 | mptcp_desynthesize_ipv6_addr(struct mptses *mpte, const struct in6_addr *addr, |
3831 | const struct ipv6_prefix *prefix, |
3832 | struct in_addr *addrv4) |
3833 | { |
3834 | char buf[MAX_IPv4_STR_LEN]; |
3835 | char *ptrv4 = (char *)addrv4; |
3836 | const char *ptr = (const char *)addr; |
3837 | |
3838 | if (memcmp(s1: addr, s2: &prefix->ipv6_prefix, n: prefix->prefix_len) != 0) { |
3839 | return false; |
3840 | } |
3841 | |
3842 | switch (prefix->prefix_len) { |
3843 | case NAT64_PREFIX_LEN_96: |
3844 | memcpy(dst: ptrv4, src: ptr + 12, n: 4); |
3845 | break; |
3846 | case NAT64_PREFIX_LEN_64: |
3847 | memcpy(dst: ptrv4, src: ptr + 9, n: 4); |
3848 | break; |
3849 | case NAT64_PREFIX_LEN_56: |
3850 | memcpy(dst: ptrv4, src: ptr + 7, n: 1); |
3851 | memcpy(dst: ptrv4 + 1, src: ptr + 9, n: 3); |
3852 | break; |
3853 | case NAT64_PREFIX_LEN_48: |
3854 | memcpy(dst: ptrv4, src: ptr + 6, n: 2); |
3855 | memcpy(dst: ptrv4 + 2, src: ptr + 9, n: 2); |
3856 | break; |
3857 | case NAT64_PREFIX_LEN_40: |
3858 | memcpy(dst: ptrv4, src: ptr + 5, n: 3); |
3859 | memcpy(dst: ptrv4 + 3, src: ptr + 9, n: 1); |
3860 | break; |
3861 | case NAT64_PREFIX_LEN_32: |
3862 | memcpy(dst: ptrv4, src: ptr + 4, n: 4); |
3863 | break; |
3864 | default: |
3865 | panic("NAT64-prefix len is wrong: %u" , |
3866 | prefix->prefix_len); |
3867 | } |
3868 | |
3869 | os_log_info(mptcp_log_handle, "%s - %lx: desynthesized to %s\n" , __func__, |
3870 | (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
3871 | inet_ntop(AF_INET, (void *)addrv4, buf, sizeof(buf))); |
3872 | |
3873 | return true; |
3874 | } |
3875 | |
3876 | static void |
3877 | mptcp_handle_ipv6_connection(struct mptses *mpte, const struct mptsub *mpts) |
3878 | { |
3879 | struct ipv6_prefix nat64prefixes[NAT64_MAX_NUM_PREFIXES]; |
3880 | struct socket *so = mpts->mpts_socket; |
3881 | struct ifnet *ifp; |
3882 | int j; |
3883 | |
3884 | /* Subflow IPs will be steered directly by the server - no need to |
3885 | * desynthesize. |
3886 | */ |
3887 | if (mpte->mpte_flags & MPTE_UNICAST_IP) { |
3888 | return; |
3889 | } |
3890 | |
3891 | ifp = sotoinpcb(so)->inp_last_outifp; |
3892 | |
3893 | if (ifnet_get_nat64prefix(ifp, nat64prefixes) == ENOENT) { |
3894 | return; |
3895 | } |
3896 | |
3897 | for (j = 0; j < NAT64_MAX_NUM_PREFIXES; j++) { |
3898 | int success; |
3899 | |
3900 | if (nat64prefixes[j].prefix_len == 0) { |
3901 | continue; |
3902 | } |
3903 | |
3904 | success = mptcp_desynthesize_ipv6_addr(mpte, |
3905 | addr: &mpte->__mpte_dst_v6.sin6_addr, |
3906 | prefix: &nat64prefixes[j], |
3907 | addrv4: &mpte->mpte_sub_dst_v4.sin_addr); |
3908 | if (success) { |
3909 | mpte->mpte_sub_dst_v4.sin_len = sizeof(mpte->mpte_sub_dst_v4); |
3910 | mpte->mpte_sub_dst_v4.sin_family = AF_INET; |
3911 | mpte->mpte_sub_dst_v4.sin_port = mpte->__mpte_dst_v6.sin6_port; |
3912 | |
3913 | /* |
3914 | * We connected to a NAT64'ed address. Let's remove it |
3915 | * from the potential IPs to use. Whenever we are back on |
3916 | * that network and need to connect, we can synthesize again. |
3917 | * |
3918 | * Otherwise, on different IPv6 networks we will attempt |
3919 | * to connect to that NAT64 address... |
3920 | */ |
3921 | memset(s: &mpte->mpte_sub_dst_v6, c: 0, n: sizeof(mpte->mpte_sub_dst_v6)); |
3922 | break; |
3923 | } |
3924 | } |
3925 | } |
3926 | |
3927 | static void |
3928 | mptcp_try_alternate_port(struct mptses *mpte, struct mptsub *mpts) |
3929 | { |
3930 | struct inpcb *inp; |
3931 | |
3932 | if (!mptcp_ok_to_create_subflows(mp_tp: mpte->mpte_mptcb)) { |
3933 | return; |
3934 | } |
3935 | |
3936 | inp = sotoinpcb(mpts->mpts_socket); |
3937 | if (inp == NULL) { |
3938 | return; |
3939 | } |
3940 | |
3941 | /* Should we try the alternate port? */ |
3942 | if (mpte->mpte_alternate_port && |
3943 | inp->inp_fport != mpte->mpte_alternate_port) { |
3944 | union sockaddr_in_4_6 dst; |
3945 | struct sockaddr_in *dst_in = SIN(&dst); |
3946 | |
3947 | SOCKADDR_COPY(&mpts->mpts_dst, &dst, mpts->mpts_dst.sa_len); |
3948 | |
3949 | dst_in->sin_port = mpte->mpte_alternate_port; |
3950 | |
3951 | mptcp_subflow_add(mpte, NULL, SA(&dst), ifscope: mpts->mpts_ifscope, NULL); |
3952 | } else { /* Else, we tried all we could, mark this interface as non-MPTCP */ |
3953 | unsigned int i; |
3954 | |
3955 | if (inp->inp_last_outifp == NULL) { |
3956 | return; |
3957 | } |
3958 | |
3959 | for (i = 0; i < mpte->mpte_itfinfo_size; i++) { |
3960 | struct mpt_itf_info *info = &mpte->mpte_itfinfo[i]; |
3961 | |
3962 | if (inp->inp_last_outifp->if_index == info->ifindex) { |
3963 | info->no_mptcp_support = 1; |
3964 | break; |
3965 | } |
3966 | } |
3967 | } |
3968 | } |
3969 | |
3970 | /* If TFO data is succesfully acked, it must be dropped from the mptcp so */ |
3971 | static void |
3972 | mptcp_drop_tfo_data(struct mptses *mpte, struct mptsub *mpts) |
3973 | { |
3974 | struct socket *mp_so = mptetoso(mpte); |
3975 | struct socket *so = mpts->mpts_socket; |
3976 | struct tcpcb *tp = intotcpcb(sotoinpcb(so)); |
3977 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
3978 | |
3979 | /* If data was sent with SYN, rewind state */ |
3980 | if (tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED) { |
3981 | u_int64_t mp_droplen = mp_tp->mpt_sndnxt - mp_tp->mpt_snduna; |
3982 | unsigned int tcp_droplen = tp->snd_una - tp->iss - 1; |
3983 | |
3984 | VERIFY(mp_droplen <= (UINT_MAX)); |
3985 | VERIFY(mp_droplen >= tcp_droplen); |
3986 | |
3987 | mpts->mpts_flags &= ~MPTSF_TFO_REQD; |
3988 | mpts->mpts_iss += tcp_droplen; |
3989 | tp->t_mpflags &= ~TMPF_TFO_REQUEST; |
3990 | |
3991 | if (mp_droplen > tcp_droplen) { |
3992 | /* handle partial TCP ack */ |
3993 | mp_so->so_flags1 |= SOF1_TFO_REWIND; |
3994 | mp_tp->mpt_sndnxt = mp_tp->mpt_snduna + (mp_droplen - tcp_droplen); |
3995 | mp_droplen = tcp_droplen; |
3996 | } else { |
3997 | /* all data on SYN was acked */ |
3998 | mpts->mpts_rel_seq = 1; |
3999 | mp_tp->mpt_sndnxt = mp_tp->mpt_snduna; |
4000 | } |
4001 | mp_tp->mpt_sndmax -= tcp_droplen; |
4002 | |
4003 | if (mp_droplen != 0) { |
4004 | VERIFY(mp_so->so_snd.sb_mb != NULL); |
4005 | sbdrop(sb: &mp_so->so_snd, len: (int)mp_droplen); |
4006 | } |
4007 | } |
4008 | } |
4009 | |
4010 | /* |
4011 | * Handle SO_FILT_HINT_CONNECTED subflow socket event. |
4012 | */ |
4013 | static ev_ret_t |
4014 | mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts, |
4015 | uint32_t *p_mpsofilt_hint, uint32_t event) |
4016 | { |
4017 | #pragma unused(event, p_mpsofilt_hint) |
4018 | struct socket *mp_so, *so; |
4019 | struct inpcb *inp; |
4020 | struct tcpcb *tp; |
4021 | struct mptcb *mp_tp; |
4022 | int af; |
4023 | boolean_t mpok = FALSE; |
4024 | |
4025 | mp_so = mptetoso(mpte); |
4026 | mp_tp = mpte->mpte_mptcb; |
4027 | so = mpts->mpts_socket; |
4028 | tp = sototcpcb(so); |
4029 | af = mpts->mpts_dst.sa_family; |
4030 | |
4031 | if (mpts->mpts_flags & MPTSF_CONNECTED) { |
4032 | return MPTS_EVRET_OK; |
4033 | } |
4034 | |
4035 | if ((mpts->mpts_flags & MPTSF_DISCONNECTED) || |
4036 | (mpts->mpts_flags & MPTSF_DISCONNECTING)) { |
4037 | return MPTS_EVRET_OK; |
4038 | } |
4039 | |
4040 | /* |
4041 | * The subflow connection has been connected. Find out whether it |
4042 | * is connected as a regular TCP or as a MPTCP subflow. The idea is: |
4043 | * |
4044 | * a. If MPTCP connection is not yet established, then this must be |
4045 | * the first subflow connection. If MPTCP failed to negotiate, |
4046 | * fallback to regular TCP by degrading this subflow. |
4047 | * |
4048 | * b. If MPTCP connection has been established, then this must be |
4049 | * one of the subsequent subflow connections. If MPTCP failed |
4050 | * to negotiate, disconnect the connection. |
4051 | * |
4052 | * Right now, we simply unblock any waiters at the MPTCP socket layer |
4053 | * if the MPTCP connection has not been established. |
4054 | */ |
4055 | |
4056 | if (so->so_state & SS_ISDISCONNECTED) { |
4057 | /* |
4058 | * With MPTCP joins, a connection is connected at the subflow |
4059 | * level, but the 4th ACK from the server elevates the MPTCP |
4060 | * subflow to connected state. So there is a small window |
4061 | * where the subflow could get disconnected before the |
4062 | * connected event is processed. |
4063 | */ |
4064 | return MPTS_EVRET_OK; |
4065 | } |
4066 | |
4067 | if (mpts->mpts_flags & MPTSF_TFO_REQD) { |
4068 | mptcp_drop_tfo_data(mpte, mpts); |
4069 | } |
4070 | |
4071 | mpts->mpts_flags &= ~(MPTSF_CONNECTING | MPTSF_TFO_REQD); |
4072 | mpts->mpts_flags |= MPTSF_CONNECTED; |
4073 | |
4074 | if (tp->t_mpflags & TMPF_MPTCP_TRUE) { |
4075 | mpts->mpts_flags |= MPTSF_MP_CAPABLE; |
4076 | } |
4077 | |
4078 | tp->t_mpflags &= ~TMPF_TFO_REQUEST; |
4079 | |
4080 | /* get/verify the outbound interface */ |
4081 | inp = sotoinpcb(so); |
4082 | |
4083 | mpts->mpts_maxseg = tp->t_maxseg; |
4084 | |
4085 | mpok = (mpts->mpts_flags & MPTSF_MP_CAPABLE); |
4086 | |
4087 | if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { |
4088 | mp_tp->mpt_state = MPTCPS_ESTABLISHED; |
4089 | mpte->mpte_associd = mpts->mpts_connid; |
4090 | DTRACE_MPTCP2(state__change, |
4091 | struct mptcb *, mp_tp, |
4092 | uint32_t, 0 /* event */); |
4093 | |
4094 | if (SOCK_DOM(so) == AF_INET) { |
4095 | in_getsockaddr_s(so, &mpte->__mpte_src_v4); |
4096 | } else { |
4097 | in6_getsockaddr_s(so, &mpte->__mpte_src_v6); |
4098 | } |
4099 | |
4100 | mpts->mpts_flags |= MPTSF_ACTIVE; |
4101 | |
4102 | /* case (a) above */ |
4103 | if (!mpok) { |
4104 | tcpstat.tcps_mpcap_fallback++; |
4105 | |
4106 | tp->t_mpflags |= TMPF_INFIN_SENT; |
4107 | mptcp_notify_mpfail(so); |
4108 | } else { |
4109 | if (IFNET_IS_CELLULAR(inp->inp_last_outifp) && |
4110 | mptcp_subflows_need_backup_flag(mpte)) { |
4111 | tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO); |
4112 | } else { |
4113 | mpts->mpts_flags |= MPTSF_PREFERRED; |
4114 | } |
4115 | mpts->mpts_flags |= MPTSF_MPCAP_CTRSET; |
4116 | mpte->mpte_nummpcapflows++; |
4117 | |
4118 | if (SOCK_DOM(so) == AF_INET6) { |
4119 | mptcp_handle_ipv6_connection(mpte, mpts); |
4120 | } |
4121 | |
4122 | mptcp_check_subflows_and_add(mpte); |
4123 | |
4124 | if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) { |
4125 | mpte->mpte_initial_cell = 1; |
4126 | } |
4127 | |
4128 | mpte->mpte_handshake_success = 1; |
4129 | } |
4130 | |
4131 | mp_tp->mpt_sndwnd = tp->snd_wnd; |
4132 | mp_tp->mpt_sndwl1 = mp_tp->mpt_rcvnxt; |
4133 | mp_tp->mpt_sndwl2 = mp_tp->mpt_snduna; |
4134 | soisconnected(so: mp_so); |
4135 | } else if (mpok) { |
4136 | /* |
4137 | * case (b) above |
4138 | * In case of additional flows, the MPTCP socket is not |
4139 | * MPTSF_MP_CAPABLE until an ACK is received from server |
4140 | * for 3-way handshake. TCP would have guaranteed that this |
4141 | * is an MPTCP subflow. |
4142 | */ |
4143 | if (IFNET_IS_CELLULAR(inp->inp_last_outifp) && |
4144 | !(tp->t_mpflags & TMPF_BACKUP_PATH) && |
4145 | mptcp_subflows_need_backup_flag(mpte)) { |
4146 | tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO); |
4147 | mpts->mpts_flags &= ~MPTSF_PREFERRED; |
4148 | } else { |
4149 | mpts->mpts_flags |= MPTSF_PREFERRED; |
4150 | } |
4151 | |
4152 | mpts->mpts_flags |= MPTSF_MPCAP_CTRSET; |
4153 | mpte->mpte_nummpcapflows++; |
4154 | |
4155 | mpts->mpts_rel_seq = 1; |
4156 | |
4157 | mptcp_check_subflows_and_remove(mpte); |
4158 | } else { |
4159 | mptcp_try_alternate_port(mpte, mpts); |
4160 | |
4161 | tcpstat.tcps_join_fallback++; |
4162 | if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) { |
4163 | tcpstat.tcps_mptcp_cell_proxy++; |
4164 | } else { |
4165 | tcpstat.tcps_mptcp_wifi_proxy++; |
4166 | } |
4167 | |
4168 | soevent(so: mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); |
4169 | |
4170 | return MPTS_EVRET_OK; |
4171 | } |
4172 | |
4173 | /* This call, just to "book" an entry in the stats-table for this ifindex */ |
4174 | mptcpstats_get_index(stats: mpte->mpte_itfstats, mpts); |
4175 | |
4176 | mptcp_output(mpte); |
4177 | |
4178 | return MPTS_EVRET_OK; /* keep the subflow socket around */ |
4179 | } |
4180 | |
4181 | /* |
4182 | * Handle SO_FILT_HINT_DISCONNECTED subflow socket event. |
4183 | */ |
4184 | static ev_ret_t |
4185 | mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts, |
4186 | uint32_t *p_mpsofilt_hint, uint32_t event) |
4187 | { |
4188 | #pragma unused(event, p_mpsofilt_hint) |
4189 | struct socket *mp_so, *so; |
4190 | struct mptcb *mp_tp; |
4191 | |
4192 | mp_so = mptetoso(mpte); |
4193 | mp_tp = mpte->mpte_mptcb; |
4194 | so = mpts->mpts_socket; |
4195 | |
4196 | if (mpts->mpts_flags & MPTSF_DISCONNECTED) { |
4197 | return MPTS_EVRET_DELETE; |
4198 | } |
4199 | |
4200 | mpts->mpts_flags |= MPTSF_DISCONNECTED; |
4201 | |
4202 | /* The subflow connection has been disconnected. */ |
4203 | |
4204 | if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) { |
4205 | mpte->mpte_nummpcapflows--; |
4206 | if (mpte->mpte_active_sub == mpts) { |
4207 | mpte->mpte_active_sub = NULL; |
4208 | } |
4209 | mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET; |
4210 | } else { |
4211 | if (so->so_flags & SOF_MP_SEC_SUBFLOW && |
4212 | !(mpts->mpts_flags & MPTSF_CONNECTED)) { |
4213 | mptcp_try_alternate_port(mpte, mpts); |
4214 | } |
4215 | } |
4216 | |
4217 | if (mp_tp->mpt_state < MPTCPS_ESTABLISHED || |
4218 | ((mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && (mpts->mpts_flags & MPTSF_ACTIVE))) { |
4219 | mptcp_drop(mpte, mp_tp, errno: so->so_error); |
4220 | } |
4221 | |
4222 | /* |
4223 | * Clear flags that are used by getconninfo to return state. |
4224 | * Retain like MPTSF_DELETEOK for internal purposes. |
4225 | */ |
4226 | mpts->mpts_flags &= ~(MPTSF_CONNECTING | MPTSF_CONNECT_PENDING | |
4227 | MPTSF_CONNECTED | MPTSF_DISCONNECTING | MPTSF_PREFERRED | |
4228 | MPTSF_MP_CAPABLE | MPTSF_MP_READY | MPTSF_MP_DEGRADED | MPTSF_ACTIVE); |
4229 | |
4230 | return MPTS_EVRET_DELETE; |
4231 | } |
4232 | |
4233 | /* |
4234 | * Handle SO_FILT_HINT_MPSTATUS subflow socket event |
4235 | */ |
4236 | static ev_ret_t |
4237 | mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts, |
4238 | uint32_t *p_mpsofilt_hint, uint32_t event) |
4239 | { |
4240 | #pragma unused(event, p_mpsofilt_hint) |
4241 | ev_ret_t ret = MPTS_EVRET_OK; |
4242 | struct socket *mp_so, *so; |
4243 | struct mptcb *mp_tp; |
4244 | |
4245 | mp_so = mptetoso(mpte); |
4246 | mp_tp = mpte->mpte_mptcb; |
4247 | so = mpts->mpts_socket; |
4248 | struct inpcb *inp = sotoinpcb(so); |
4249 | struct tcpcb *tp = intotcpcb(inp); |
4250 | |
4251 | if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_TRUE) { |
4252 | mpts->mpts_flags |= MPTSF_MP_CAPABLE; |
4253 | } else { |
4254 | mpts->mpts_flags &= ~MPTSF_MP_CAPABLE; |
4255 | } |
4256 | |
4257 | if (sototcpcb(so)->t_mpflags & TMPF_TCP_FALLBACK) { |
4258 | if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { |
4259 | goto done; |
4260 | } |
4261 | mpts->mpts_flags |= MPTSF_MP_DEGRADED; |
4262 | } else { |
4263 | mpts->mpts_flags &= ~MPTSF_MP_DEGRADED; |
4264 | } |
4265 | |
4266 | if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_READY) { |
4267 | mpts->mpts_flags |= MPTSF_MP_READY; |
4268 | } else { |
4269 | mpts->mpts_flags &= ~MPTSF_MP_READY; |
4270 | } |
4271 | |
4272 | if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { |
4273 | mp_tp->mpt_flags |= MPTCPF_FALLBACK_TO_TCP; |
4274 | mp_tp->mpt_flags &= ~MPTCPF_JOIN_READY; |
4275 | tcp_cache_update_mptcp_version(tp, FALSE); |
4276 | } |
4277 | |
4278 | if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { |
4279 | ret = MPTS_EVRET_DISCONNECT_FALLBACK; |
4280 | |
4281 | m_freem_list(mpte->mpte_reinjectq); |
4282 | mpte->mpte_reinjectq = NULL; |
4283 | } else if (mpts->mpts_flags & MPTSF_MP_READY) { |
4284 | mp_tp->mpt_flags |= MPTCPF_JOIN_READY; |
4285 | ret = MPTS_EVRET_CONNECT_PENDING; |
4286 | } |
4287 | |
4288 | done: |
4289 | return ret; |
4290 | } |
4291 | |
4292 | /* |
4293 | * Handle SO_FILT_HINT_MUSTRST subflow socket event |
4294 | */ |
4295 | static ev_ret_t |
4296 | mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts, |
4297 | uint32_t *p_mpsofilt_hint, uint32_t event) |
4298 | { |
4299 | #pragma unused(event) |
4300 | struct socket *mp_so, *so; |
4301 | struct mptcb *mp_tp; |
4302 | boolean_t is_fastclose; |
4303 | |
4304 | mp_so = mptetoso(mpte); |
4305 | mp_tp = mpte->mpte_mptcb; |
4306 | so = mpts->mpts_socket; |
4307 | |
4308 | /* We got an invalid option or a fast close */ |
4309 | struct inpcb *inp = sotoinpcb(so); |
4310 | struct tcpcb *tp = NULL; |
4311 | |
4312 | tp = intotcpcb(inp); |
4313 | so->so_error = ECONNABORTED; |
4314 | |
4315 | is_fastclose = !!(tp->t_mpflags & TMPF_FASTCLOSERCV); |
4316 | |
4317 | tp->t_mpflags |= TMPF_RESET; |
4318 | |
4319 | if (tp->t_state != TCPS_CLOSED) { |
4320 | struct mbuf *m; |
4321 | struct tcptemp *t_template = tcp_maketemplate(tp, &m); |
4322 | |
4323 | if (t_template) { |
4324 | struct tcp_respond_args tra; |
4325 | |
4326 | bzero(s: &tra, n: sizeof(tra)); |
4327 | if (inp->inp_flags & INP_BOUND_IF) { |
4328 | tra.ifscope = inp->inp_boundifp->if_index; |
4329 | } else { |
4330 | tra.ifscope = IFSCOPE_NONE; |
4331 | } |
4332 | tra.awdl_unrestricted = 1; |
4333 | |
4334 | tcp_respond(tp, t_template->tt_ipgen, |
4335 | &t_template->tt_t, (struct mbuf *)NULL, |
4336 | tp->rcv_nxt, tp->snd_una, TH_RST, &tra); |
4337 | (void) m_free(m); |
4338 | } |
4339 | } |
4340 | |
4341 | if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && is_fastclose) { |
4342 | struct mptsub *iter, *tmp; |
4343 | |
4344 | *p_mpsofilt_hint |= SO_FILT_HINT_CONNRESET; |
4345 | |
4346 | mp_so->so_error = ECONNRESET; |
4347 | |
4348 | TAILQ_FOREACH_SAFE(iter, &mpte->mpte_subflows, mpts_entry, tmp) { |
4349 | if (iter == mpts) { |
4350 | continue; |
4351 | } |
4352 | mptcp_subflow_abort(mpts: iter, ECONNABORTED); |
4353 | } |
4354 | |
4355 | /* |
4356 | * mptcp_drop is being called after processing the events, to fully |
4357 | * close the MPTCP connection |
4358 | */ |
4359 | mptcp_drop(mpte, mp_tp, errno: mp_so->so_error); |
4360 | } |
4361 | |
4362 | mptcp_subflow_abort(mpts, ECONNABORTED); |
4363 | |
4364 | if (mp_tp->mpt_gc_ticks == MPT_GC_TICKS) { |
4365 | mp_tp->mpt_gc_ticks = MPT_GC_TICKS_FAST; |
4366 | } |
4367 | |
4368 | return MPTS_EVRET_DELETE; |
4369 | } |
4370 | |
4371 | static ev_ret_t |
4372 | mptcp_subflow_adaptive_rtimo_ev(struct mptses *mpte, struct mptsub *mpts, |
4373 | uint32_t *p_mpsofilt_hint, uint32_t event) |
4374 | { |
4375 | #pragma unused(event) |
4376 | bool found_active = false; |
4377 | |
4378 | mpts->mpts_flags |= MPTSF_READ_STALL; |
4379 | |
4380 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
4381 | struct tcpcb *tp = sototcpcb(mpts->mpts_socket); |
4382 | |
4383 | if (!TCPS_HAVEESTABLISHED(tp->t_state) || |
4384 | TCPS_HAVERCVDFIN2(tp->t_state)) { |
4385 | continue; |
4386 | } |
4387 | |
4388 | if (!(mpts->mpts_flags & MPTSF_READ_STALL)) { |
4389 | found_active = true; |
4390 | break; |
4391 | } |
4392 | } |
4393 | |
4394 | if (!found_active) { |
4395 | *p_mpsofilt_hint |= SO_FILT_HINT_ADAPTIVE_RTIMO; |
4396 | } |
4397 | |
4398 | return MPTS_EVRET_OK; |
4399 | } |
4400 | |
4401 | static ev_ret_t |
4402 | mptcp_subflow_adaptive_wtimo_ev(struct mptses *mpte, struct mptsub *mpts, |
4403 | uint32_t *p_mpsofilt_hint, uint32_t event) |
4404 | { |
4405 | #pragma unused(event) |
4406 | bool found_active = false; |
4407 | |
4408 | mpts->mpts_flags |= MPTSF_WRITE_STALL; |
4409 | |
4410 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
4411 | struct tcpcb *tp = sototcpcb(mpts->mpts_socket); |
4412 | |
4413 | if (!TCPS_HAVEESTABLISHED(tp->t_state) || |
4414 | tp->t_state > TCPS_CLOSE_WAIT) { |
4415 | continue; |
4416 | } |
4417 | |
4418 | if (!(mpts->mpts_flags & MPTSF_WRITE_STALL)) { |
4419 | found_active = true; |
4420 | break; |
4421 | } |
4422 | } |
4423 | |
4424 | if (!found_active) { |
4425 | *p_mpsofilt_hint |= SO_FILT_HINT_ADAPTIVE_WTIMO; |
4426 | } |
4427 | |
4428 | return MPTS_EVRET_OK; |
4429 | } |
4430 | |
4431 | /* |
4432 | * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked, |
4433 | * caller must ensure that the option can be issued on subflow sockets, via |
4434 | * MPOF_SUBFLOW_OK flag. |
4435 | */ |
4436 | int |
4437 | mptcp_subflow_sosetopt(struct mptses *mpte, struct mptsub *mpts, struct mptopt *mpo) |
4438 | { |
4439 | struct socket *mp_so, *so; |
4440 | struct sockopt sopt; |
4441 | int error; |
4442 | |
4443 | VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK); |
4444 | |
4445 | mp_so = mptetoso(mpte); |
4446 | so = mpts->mpts_socket; |
4447 | |
4448 | socket_lock_assert_owned(so: mp_so); |
4449 | |
4450 | if (mpte->mpte_mptcb->mpt_state >= MPTCPS_ESTABLISHED && |
4451 | mpo->mpo_level == SOL_SOCKET && |
4452 | mpo->mpo_name == SO_MARK_CELLFALLBACK) { |
4453 | struct ifnet *ifp = ifindex2ifnet[mpts->mpts_ifscope]; |
4454 | |
4455 | /* |
4456 | * When we open a new subflow, mark it as cell fallback, if |
4457 | * this subflow goes over cell. |
4458 | * |
4459 | * (except for first-party apps) |
4460 | */ |
4461 | |
4462 | if (mpte->mpte_flags & MPTE_FIRSTPARTY) { |
4463 | return 0; |
4464 | } |
4465 | |
4466 | if (sotoinpcb(so)->inp_last_outifp && |
4467 | !IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) { |
4468 | return 0; |
4469 | } |
4470 | |
4471 | /* |
4472 | * This here is an OR, because if the app is not binding to the |
4473 | * interface, then it definitely is not a cell-fallback |
4474 | * connection. |
4475 | */ |
4476 | if (mpts->mpts_ifscope == IFSCOPE_NONE || ifp == NULL || |
4477 | !IFNET_IS_CELLULAR(ifp)) { |
4478 | return 0; |
4479 | } |
4480 | } |
4481 | |
4482 | mpo->mpo_flags &= ~MPOF_INTERIM; |
4483 | |
4484 | bzero(s: &sopt, n: sizeof(sopt)); |
4485 | sopt.sopt_dir = SOPT_SET; |
4486 | sopt.sopt_level = mpo->mpo_level; |
4487 | sopt.sopt_name = mpo->mpo_name; |
4488 | sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval); |
4489 | sopt.sopt_valsize = sizeof(int); |
4490 | sopt.sopt_p = kernproc; |
4491 | |
4492 | error = sosetoptlock(so, sopt: &sopt, 0); |
4493 | if (error) { |
4494 | os_log_error(mptcp_log_handle, "%s - %lx: sopt %s " |
4495 | "val %d set error %d\n" , __func__, |
4496 | (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
4497 | mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name), |
4498 | mpo->mpo_intval, error); |
4499 | } |
4500 | return error; |
4501 | } |
4502 | |
4503 | /* |
4504 | * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked, |
4505 | * caller must ensure that the option can be issued on subflow sockets, via |
4506 | * MPOF_SUBFLOW_OK flag. |
4507 | */ |
4508 | int |
4509 | mptcp_subflow_sogetopt(struct mptses *mpte, struct socket *so, |
4510 | struct mptopt *mpo) |
4511 | { |
4512 | struct socket *mp_so; |
4513 | struct sockopt sopt; |
4514 | int error; |
4515 | |
4516 | VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK); |
4517 | mp_so = mptetoso(mpte); |
4518 | |
4519 | socket_lock_assert_owned(so: mp_so); |
4520 | |
4521 | bzero(s: &sopt, n: sizeof(sopt)); |
4522 | sopt.sopt_dir = SOPT_GET; |
4523 | sopt.sopt_level = mpo->mpo_level; |
4524 | sopt.sopt_name = mpo->mpo_name; |
4525 | sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval); |
4526 | sopt.sopt_valsize = sizeof(int); |
4527 | sopt.sopt_p = kernproc; |
4528 | |
4529 | error = sogetoptlock(so, sopt: &sopt, 0); /* already locked */ |
4530 | if (error) { |
4531 | os_log_error(mptcp_log_handle, |
4532 | "%s - %lx: sopt %s get error %d\n" , |
4533 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
4534 | mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name), error); |
4535 | } |
4536 | return error; |
4537 | } |
4538 | |
4539 | |
4540 | /* |
4541 | * MPTCP garbage collector. |
4542 | * |
4543 | * This routine is called by the MP domain on-demand, periodic callout, |
4544 | * which is triggered when a MPTCP socket is closed. The callout will |
4545 | * repeat as long as this routine returns a non-zero value. |
4546 | */ |
4547 | static uint32_t |
4548 | mptcp_gc(struct mppcbinfo *mppi) |
4549 | { |
4550 | struct mppcb *mpp, *tmpp; |
4551 | uint32_t active = 0; |
4552 | |
4553 | LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); |
4554 | |
4555 | TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) { |
4556 | struct socket *mp_so; |
4557 | struct mptses *mpte; |
4558 | struct mptcb *mp_tp; |
4559 | |
4560 | mp_so = mpp->mpp_socket; |
4561 | mpte = mptompte(mp: mpp); |
4562 | mp_tp = mpte->mpte_mptcb; |
4563 | |
4564 | if (!mpp_try_lock(mp: mpp)) { |
4565 | active++; |
4566 | continue; |
4567 | } |
4568 | |
4569 | VERIFY(mpp->mpp_flags & MPP_ATTACHED); |
4570 | |
4571 | /* check again under the lock */ |
4572 | if (mp_so->so_usecount > 0) { |
4573 | boolean_t wakeup = FALSE; |
4574 | struct mptsub *mpts, *tmpts; |
4575 | |
4576 | if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) { |
4577 | if (mp_tp->mpt_gc_ticks > 0) { |
4578 | mp_tp->mpt_gc_ticks--; |
4579 | } |
4580 | if (mp_tp->mpt_gc_ticks == 0) { |
4581 | wakeup = TRUE; |
4582 | } |
4583 | } |
4584 | if (wakeup) { |
4585 | TAILQ_FOREACH_SAFE(mpts, |
4586 | &mpte->mpte_subflows, mpts_entry, tmpts) { |
4587 | mptcp_subflow_eupcall1(so: mpts->mpts_socket, |
4588 | arg: mpts, SO_FILT_HINT_DISCONNECTED); |
4589 | } |
4590 | } |
4591 | socket_unlock(so: mp_so, refcount: 0); |
4592 | active++; |
4593 | continue; |
4594 | } |
4595 | |
4596 | if (mpp->mpp_state != MPPCB_STATE_DEAD) { |
4597 | panic("%s - %lx: skipped state " |
4598 | "[u=%d,r=%d,s=%d]\n" , __func__, |
4599 | (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
4600 | mp_so->so_usecount, mp_so->so_retaincnt, |
4601 | mpp->mpp_state); |
4602 | } |
4603 | |
4604 | if (mp_tp->mpt_state == MPTCPS_TIME_WAIT) { |
4605 | mptcp_close(mpte, mp_tp); |
4606 | } |
4607 | |
4608 | mptcp_session_destroy(mpte); |
4609 | |
4610 | DTRACE_MPTCP4(dispose, struct socket *, mp_so, |
4611 | struct sockbuf *, &mp_so->so_rcv, |
4612 | struct sockbuf *, &mp_so->so_snd, |
4613 | struct mppcb *, mpp); |
4614 | |
4615 | mptcp_pcbdispose(mpp); |
4616 | sodealloc(so: mp_so); |
4617 | } |
4618 | |
4619 | return active; |
4620 | } |
4621 | |
4622 | /* |
4623 | * Drop a MPTCP connection, reporting the specified error. |
4624 | */ |
4625 | struct mptses * |
4626 | mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, u_short errno) |
4627 | { |
4628 | struct socket *mp_so = mptetoso(mpte); |
4629 | |
4630 | VERIFY(mpte->mpte_mptcb == mp_tp); |
4631 | |
4632 | socket_lock_assert_owned(so: mp_so); |
4633 | |
4634 | DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, |
4635 | uint32_t, 0 /* event */); |
4636 | |
4637 | if (errno == ETIMEDOUT && mp_tp->mpt_softerror != 0) { |
4638 | errno = mp_tp->mpt_softerror; |
4639 | } |
4640 | mp_so->so_error = errno; |
4641 | |
4642 | return mptcp_close(mpte, mp_tp); |
4643 | } |
4644 | |
4645 | /* |
4646 | * Close a MPTCP control block. |
4647 | */ |
4648 | struct mptses * |
4649 | mptcp_close(struct mptses *mpte, struct mptcb *mp_tp) |
4650 | { |
4651 | struct mptsub *mpts = NULL, *tmpts = NULL; |
4652 | struct socket *mp_so = mptetoso(mpte); |
4653 | |
4654 | socket_lock_assert_owned(so: mp_so); |
4655 | VERIFY(mpte->mpte_mptcb == mp_tp); |
4656 | |
4657 | mp_tp->mpt_state = MPTCPS_TERMINATE; |
4658 | |
4659 | mptcp_freeq(mp_tp); |
4660 | |
4661 | soisdisconnected(so: mp_so); |
4662 | |
4663 | /* Clean up all subflows */ |
4664 | TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { |
4665 | mptcp_subflow_disconnect(mpte, mpts); |
4666 | } |
4667 | |
4668 | return NULL; |
4669 | } |
4670 | |
4671 | void |
4672 | mptcp_notify_close(struct socket *so) |
4673 | { |
4674 | soevent(so, hint: (SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED)); |
4675 | } |
4676 | |
4677 | typedef struct mptcp_subflow_event_entry { |
4678 | uint32_t sofilt_hint_mask; |
4679 | ev_ret_t (*sofilt_hint_ev_hdlr)( |
4680 | struct mptses *mpte, |
4681 | struct mptsub *mpts, |
4682 | uint32_t *p_mpsofilt_hint, |
4683 | uint32_t event); |
4684 | } mptsub_ev_entry_t; |
4685 | |
4686 | /* |
4687 | * XXX The order of the event handlers below is really |
4688 | * really important. Think twice before changing it. |
4689 | */ |
4690 | static mptsub_ev_entry_t mpsub_ev_entry_tbl[] = { |
4691 | { |
4692 | .sofilt_hint_mask = SO_FILT_HINT_MP_SUB_ERROR, |
4693 | .sofilt_hint_ev_hdlr = mptcp_subflow_mpsuberror_ev, |
4694 | }, |
4695 | { |
4696 | .sofilt_hint_mask = SO_FILT_HINT_MPCANTRCVMORE, |
4697 | .sofilt_hint_ev_hdlr = mptcp_subflow_mpcantrcvmore_ev, |
4698 | }, |
4699 | { |
4700 | .sofilt_hint_mask = SO_FILT_HINT_MPFAILOVER, |
4701 | .sofilt_hint_ev_hdlr = mptcp_subflow_failover_ev, |
4702 | }, |
4703 | { |
4704 | .sofilt_hint_mask = SO_FILT_HINT_CONNRESET, |
4705 | .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev, |
4706 | }, |
4707 | { |
4708 | .sofilt_hint_mask = SO_FILT_HINT_MUSTRST, |
4709 | .sofilt_hint_ev_hdlr = mptcp_subflow_mustrst_ev, |
4710 | }, |
4711 | { |
4712 | .sofilt_hint_mask = SO_FILT_HINT_CANTRCVMORE, |
4713 | .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev, |
4714 | }, |
4715 | { |
4716 | .sofilt_hint_mask = SO_FILT_HINT_TIMEOUT, |
4717 | .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev, |
4718 | }, |
4719 | { |
4720 | .sofilt_hint_mask = SO_FILT_HINT_NOSRCADDR, |
4721 | .sofilt_hint_ev_hdlr = mptcp_subflow_nosrcaddr_ev, |
4722 | }, |
4723 | { |
4724 | .sofilt_hint_mask = SO_FILT_HINT_IFDENIED, |
4725 | .sofilt_hint_ev_hdlr = mptcp_subflow_ifdenied_ev, |
4726 | }, |
4727 | { |
4728 | .sofilt_hint_mask = SO_FILT_HINT_CONNECTED, |
4729 | .sofilt_hint_ev_hdlr = mptcp_subflow_connected_ev, |
4730 | }, |
4731 | { |
4732 | .sofilt_hint_mask = SO_FILT_HINT_MPSTATUS, |
4733 | .sofilt_hint_ev_hdlr = mptcp_subflow_mpstatus_ev, |
4734 | }, |
4735 | { |
4736 | .sofilt_hint_mask = SO_FILT_HINT_DISCONNECTED, |
4737 | .sofilt_hint_ev_hdlr = mptcp_subflow_disconnected_ev, |
4738 | }, |
4739 | { |
4740 | .sofilt_hint_mask = SO_FILT_HINT_ADAPTIVE_RTIMO, |
4741 | .sofilt_hint_ev_hdlr = mptcp_subflow_adaptive_rtimo_ev, |
4742 | }, |
4743 | { |
4744 | .sofilt_hint_mask = SO_FILT_HINT_ADAPTIVE_WTIMO, |
4745 | .sofilt_hint_ev_hdlr = mptcp_subflow_adaptive_wtimo_ev, |
4746 | }, |
4747 | }; |
4748 | |
4749 | /* |
4750 | * Subflow socket control events. |
4751 | * |
4752 | * Called for handling events related to the underlying subflow socket. |
4753 | */ |
4754 | static ev_ret_t |
4755 | mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts, |
4756 | uint32_t *p_mpsofilt_hint) |
4757 | { |
4758 | ev_ret_t ret = MPTS_EVRET_OK; |
4759 | int i, mpsub_ev_entry_count = sizeof(mpsub_ev_entry_tbl) / |
4760 | sizeof(mpsub_ev_entry_tbl[0]); |
4761 | |
4762 | /* bail if there's nothing to process */ |
4763 | if (!mpts->mpts_evctl) { |
4764 | return ret; |
4765 | } |
4766 | |
4767 | if (mpts->mpts_evctl & (SO_FILT_HINT_CONNRESET | SO_FILT_HINT_MUSTRST | |
4768 | SO_FILT_HINT_CANTSENDMORE | SO_FILT_HINT_TIMEOUT | |
4769 | SO_FILT_HINT_NOSRCADDR | SO_FILT_HINT_IFDENIED | |
4770 | SO_FILT_HINT_DISCONNECTED)) { |
4771 | mpts->mpts_evctl |= SO_FILT_HINT_MPFAILOVER; |
4772 | } |
4773 | |
4774 | DTRACE_MPTCP3(subflow__events, struct mptses *, mpte, |
4775 | struct mptsub *, mpts, uint32_t, mpts->mpts_evctl); |
4776 | |
4777 | /* |
4778 | * Process all the socket filter hints and reset the hint |
4779 | * once it is handled |
4780 | */ |
4781 | for (i = 0; i < mpsub_ev_entry_count && mpts->mpts_evctl; i++) { |
4782 | /* |
4783 | * Always execute the DISCONNECTED event, because it will wakeup |
4784 | * the app. |
4785 | */ |
4786 | if ((mpts->mpts_evctl & mpsub_ev_entry_tbl[i].sofilt_hint_mask) && |
4787 | (ret >= MPTS_EVRET_OK || |
4788 | mpsub_ev_entry_tbl[i].sofilt_hint_mask == SO_FILT_HINT_DISCONNECTED)) { |
4789 | mpts->mpts_evctl &= ~mpsub_ev_entry_tbl[i].sofilt_hint_mask; |
4790 | ev_ret_t error = |
4791 | mpsub_ev_entry_tbl[i].sofilt_hint_ev_hdlr(mpte, mpts, p_mpsofilt_hint, mpsub_ev_entry_tbl[i].sofilt_hint_mask); |
4792 | ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); |
4793 | } |
4794 | } |
4795 | |
4796 | return ret; |
4797 | } |
4798 | |
4799 | /* |
4800 | * MPTCP workloop. |
4801 | */ |
4802 | void |
4803 | mptcp_subflow_workloop(struct mptses *mpte) |
4804 | { |
4805 | boolean_t connect_pending = FALSE, disconnect_fallback = FALSE; |
4806 | uint32_t mpsofilt_hint_mask = SO_FILT_HINT_LOCKED; |
4807 | struct mptsub *mpts, *tmpts; |
4808 | struct socket *mp_so; |
4809 | |
4810 | mp_so = mptetoso(mpte); |
4811 | |
4812 | socket_lock_assert_owned(so: mp_so); |
4813 | |
4814 | if (mpte->mpte_flags & MPTE_IN_WORKLOOP) { |
4815 | mpte->mpte_flags |= MPTE_WORKLOOP_RELAUNCH; |
4816 | return; |
4817 | } |
4818 | mpte->mpte_flags |= MPTE_IN_WORKLOOP; |
4819 | |
4820 | relaunch: |
4821 | mpte->mpte_flags &= ~MPTE_WORKLOOP_RELAUNCH; |
4822 | |
4823 | TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { |
4824 | ev_ret_t ret; |
4825 | |
4826 | if (mpts->mpts_socket->so_usecount == 0) { |
4827 | /* Will be removed soon by tcp_garbage_collect */ |
4828 | continue; |
4829 | } |
4830 | |
4831 | mptcp_subflow_addref(mpts); |
4832 | mpts->mpts_socket->so_usecount++; |
4833 | |
4834 | ret = mptcp_subflow_events(mpte, mpts, p_mpsofilt_hint: &mpsofilt_hint_mask); |
4835 | |
4836 | /* |
4837 | * If MPTCP socket is closed, disconnect all subflows. |
4838 | * This will generate a disconnect event which will |
4839 | * be handled during the next iteration, causing a |
4840 | * non-zero error to be returned above. |
4841 | */ |
4842 | if (mp_so->so_flags & SOF_PCBCLEARING) { |
4843 | mptcp_subflow_disconnect(mpte, mpts); |
4844 | } |
4845 | |
4846 | switch (ret) { |
4847 | case MPTS_EVRET_OK: |
4848 | /* nothing to do */ |
4849 | break; |
4850 | case MPTS_EVRET_DELETE: |
4851 | mptcp_subflow_soclose(mpts); |
4852 | break; |
4853 | case MPTS_EVRET_CONNECT_PENDING: |
4854 | connect_pending = TRUE; |
4855 | break; |
4856 | case MPTS_EVRET_DISCONNECT_FALLBACK: |
4857 | disconnect_fallback = TRUE; |
4858 | break; |
4859 | default: |
4860 | break; |
4861 | } |
4862 | mptcp_subflow_remref(mpts); /* ours */ |
4863 | |
4864 | VERIFY(mpts->mpts_socket->so_usecount != 0); |
4865 | mpts->mpts_socket->so_usecount--; |
4866 | } |
4867 | |
4868 | if (mpsofilt_hint_mask != SO_FILT_HINT_LOCKED) { |
4869 | VERIFY(mpsofilt_hint_mask & SO_FILT_HINT_LOCKED); |
4870 | |
4871 | if (mpsofilt_hint_mask & SO_FILT_HINT_CANTRCVMORE) { |
4872 | mp_so->so_state |= SS_CANTRCVMORE; |
4873 | sorwakeup(so: mp_so); |
4874 | } |
4875 | |
4876 | soevent(so: mp_so, hint: mpsofilt_hint_mask); |
4877 | } |
4878 | |
4879 | if (!connect_pending && !disconnect_fallback) { |
4880 | goto exit; |
4881 | } |
4882 | |
4883 | TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { |
4884 | if (disconnect_fallback) { |
4885 | struct socket *so = NULL; |
4886 | struct inpcb *inp = NULL; |
4887 | struct tcpcb *tp = NULL; |
4888 | |
4889 | if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { |
4890 | continue; |
4891 | } |
4892 | |
4893 | mpts->mpts_flags |= MPTSF_MP_DEGRADED; |
4894 | |
4895 | if (mpts->mpts_flags & (MPTSF_DISCONNECTING | |
4896 | MPTSF_DISCONNECTED)) { |
4897 | continue; |
4898 | } |
4899 | |
4900 | so = mpts->mpts_socket; |
4901 | |
4902 | /* |
4903 | * The MPTCP connection has degraded to a fallback |
4904 | * mode, so there is no point in keeping this subflow |
4905 | * regardless of its MPTCP-readiness state, unless it |
4906 | * is the primary one which we use for fallback. This |
4907 | * assumes that the subflow used for fallback is the |
4908 | * ACTIVE one. |
4909 | */ |
4910 | |
4911 | inp = sotoinpcb(so); |
4912 | tp = intotcpcb(inp); |
4913 | tp->t_mpflags &= |
4914 | ~(TMPF_MPTCP_READY | TMPF_MPTCP_TRUE); |
4915 | tp->t_mpflags |= TMPF_TCP_FALLBACK; |
4916 | |
4917 | soevent(so, SO_FILT_HINT_MUSTRST); |
4918 | } else if (connect_pending) { |
4919 | /* |
4920 | * The MPTCP connection has progressed to a state |
4921 | * where it supports full multipath semantics; allow |
4922 | * additional joins to be attempted for all subflows |
4923 | * that are in the PENDING state. |
4924 | */ |
4925 | if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) { |
4926 | int error = mptcp_subflow_soconnectx(mpte, mpts); |
4927 | |
4928 | if (error) { |
4929 | mptcp_subflow_abort(mpts, error); |
4930 | } |
4931 | } |
4932 | } |
4933 | } |
4934 | |
4935 | exit: |
4936 | if (mpte->mpte_flags & MPTE_WORKLOOP_RELAUNCH) { |
4937 | goto relaunch; |
4938 | } |
4939 | |
4940 | mpte->mpte_flags &= ~MPTE_IN_WORKLOOP; |
4941 | } |
4942 | |
4943 | /* |
4944 | * Protocol pr_lock callback. |
4945 | */ |
4946 | int |
4947 | mptcp_lock(struct socket *mp_so, int refcount, void *lr) |
4948 | { |
4949 | struct mppcb *mpp = mpsotomppcb(mp_so); |
4950 | void *lr_saved; |
4951 | |
4952 | if (lr == NULL) { |
4953 | lr_saved = __builtin_return_address(0); |
4954 | } else { |
4955 | lr_saved = lr; |
4956 | } |
4957 | |
4958 | if (mpp == NULL) { |
4959 | panic("%s: so=%p NO PCB! lr=%p lrh= %s" , __func__, |
4960 | mp_so, lr_saved, solockhistory_nr(mp_so)); |
4961 | /* NOTREACHED */ |
4962 | } |
4963 | mpp_lock(mp: mpp); |
4964 | |
4965 | if (mp_so->so_usecount < 0) { |
4966 | panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s" , __func__, |
4967 | mp_so, mp_so->so_pcb, lr_saved, mp_so->so_usecount, |
4968 | solockhistory_nr(mp_so)); |
4969 | /* NOTREACHED */ |
4970 | } |
4971 | if (refcount != 0) { |
4972 | mp_so->so_usecount++; |
4973 | mpp->mpp_inside++; |
4974 | } |
4975 | mp_so->lock_lr[mp_so->next_lock_lr] = lr_saved; |
4976 | mp_so->next_lock_lr = (mp_so->next_lock_lr + 1) % SO_LCKDBG_MAX; |
4977 | |
4978 | return 0; |
4979 | } |
4980 | |
4981 | /* |
4982 | * Protocol pr_unlock callback. |
4983 | */ |
4984 | int |
4985 | mptcp_unlock(struct socket *mp_so, int refcount, void *lr) |
4986 | { |
4987 | struct mppcb *mpp = mpsotomppcb(mp_so); |
4988 | void *lr_saved; |
4989 | |
4990 | if (lr == NULL) { |
4991 | lr_saved = __builtin_return_address(0); |
4992 | } else { |
4993 | lr_saved = lr; |
4994 | } |
4995 | |
4996 | if (mpp == NULL) { |
4997 | panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s" , __func__, |
4998 | mp_so, mp_so->so_usecount, lr_saved, |
4999 | solockhistory_nr(mp_so)); |
5000 | /* NOTREACHED */ |
5001 | } |
5002 | socket_lock_assert_owned(so: mp_so); |
5003 | |
5004 | if (refcount != 0) { |
5005 | mp_so->so_usecount--; |
5006 | mpp->mpp_inside--; |
5007 | } |
5008 | |
5009 | if (mp_so->so_usecount < 0) { |
5010 | panic("%s: so=%p usecount=%x lrh= %s" , __func__, |
5011 | mp_so, mp_so->so_usecount, solockhistory_nr(mp_so)); |
5012 | /* NOTREACHED */ |
5013 | } |
5014 | if (mpp->mpp_inside < 0) { |
5015 | panic("%s: mpp=%p inside=%x lrh= %s" , __func__, |
5016 | mpp, mpp->mpp_inside, solockhistory_nr(mp_so)); |
5017 | /* NOTREACHED */ |
5018 | } |
5019 | mp_so->unlock_lr[mp_so->next_unlock_lr] = lr_saved; |
5020 | mp_so->next_unlock_lr = (mp_so->next_unlock_lr + 1) % SO_LCKDBG_MAX; |
5021 | mpp_unlock(mp: mpp); |
5022 | |
5023 | return 0; |
5024 | } |
5025 | |
5026 | /* |
5027 | * Protocol pr_getlock callback. |
5028 | */ |
5029 | lck_mtx_t * |
5030 | mptcp_getlock(struct socket *mp_so, int flags) |
5031 | { |
5032 | struct mppcb *mpp = mpsotomppcb(mp_so); |
5033 | |
5034 | if (mpp == NULL) { |
5035 | panic("%s: so=%p NULL so_pcb %s" , __func__, mp_so, |
5036 | solockhistory_nr(mp_so)); |
5037 | /* NOTREACHED */ |
5038 | } |
5039 | if (mp_so->so_usecount < 0) { |
5040 | panic("%s: so=%p usecount=%x lrh= %s" , __func__, |
5041 | mp_so, mp_so->so_usecount, solockhistory_nr(mp_so)); |
5042 | /* NOTREACHED */ |
5043 | } |
5044 | return mpp_getlock(mp: mpp, flags); |
5045 | } |
5046 | |
5047 | void |
5048 | mptcp_get_rands(mptcp_addr_id addr_id, struct mptcb *mp_tp, u_int32_t *lrand, |
5049 | u_int32_t *rrand) |
5050 | { |
5051 | struct mptcp_subf_auth_entry *sauth_entry; |
5052 | |
5053 | LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) { |
5054 | if (sauth_entry->msae_laddr_id == addr_id) { |
5055 | if (lrand) { |
5056 | *lrand = sauth_entry->msae_laddr_rand; |
5057 | } |
5058 | if (rrand) { |
5059 | *rrand = sauth_entry->msae_raddr_rand; |
5060 | } |
5061 | break; |
5062 | } |
5063 | } |
5064 | } |
5065 | |
5066 | void |
5067 | mptcp_set_raddr_rand(mptcp_addr_id laddr_id, struct mptcb *mp_tp, |
5068 | mptcp_addr_id raddr_id, u_int32_t raddr_rand) |
5069 | { |
5070 | struct mptcp_subf_auth_entry *sauth_entry; |
5071 | |
5072 | LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) { |
5073 | if (sauth_entry->msae_laddr_id == laddr_id) { |
5074 | if ((sauth_entry->msae_raddr_id != 0) && |
5075 | (sauth_entry->msae_raddr_id != raddr_id)) { |
5076 | os_log_error(mptcp_log_handle, "%s - %lx: mismatched" |
5077 | " address ids %d %d \n" , __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), |
5078 | raddr_id, sauth_entry->msae_raddr_id); |
5079 | return; |
5080 | } |
5081 | sauth_entry->msae_raddr_id = raddr_id; |
5082 | if ((sauth_entry->msae_raddr_rand != 0) && |
5083 | (sauth_entry->msae_raddr_rand != raddr_rand)) { |
5084 | os_log_error(mptcp_log_handle, "%s - %lx: " |
5085 | "dup SYN_ACK %d %d \n" , |
5086 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), |
5087 | raddr_rand, sauth_entry->msae_raddr_rand); |
5088 | return; |
5089 | } |
5090 | sauth_entry->msae_raddr_rand = raddr_rand; |
5091 | return; |
5092 | } |
5093 | } |
5094 | } |
5095 | |
5096 | /* |
5097 | * SHA-256 support for MPTCP |
5098 | */ |
5099 | |
5100 | static void |
5101 | mptcp_do_sha256(mptcp_key_t *key, char *sha_digest) |
5102 | { |
5103 | const unsigned char *sha2_base; |
5104 | int sha2_size; |
5105 | |
5106 | sha2_base = (const unsigned char *) key; |
5107 | sha2_size = sizeof(mptcp_key_t); |
5108 | |
5109 | SHA256_CTX sha_ctx; |
5110 | SHA256_Init(ctx: &sha_ctx); |
5111 | SHA256_Update(ctx: &sha_ctx, data: sha2_base, len: sha2_size); |
5112 | SHA256_Final(digest: sha_digest, ctx: &sha_ctx); |
5113 | } |
5114 | |
5115 | void |
5116 | mptcp_hmac_sha256(mptcp_key_t key1, mptcp_key_t key2, |
5117 | u_char *msg, uint16_t msg_len, u_char *digest) |
5118 | { |
5119 | SHA256_CTX sha_ctx; |
5120 | mptcp_key_t key_ipad[8] = {0}; /* key XOR'd with inner pad */ |
5121 | mptcp_key_t key_opad[8] = {0}; /* key XOR'd with outer pad */ |
5122 | int i; |
5123 | |
5124 | bzero(s: digest, SHA256_DIGEST_LENGTH); |
5125 | |
5126 | /* Set up the Key for HMAC */ |
5127 | key_ipad[0] = key1; |
5128 | key_ipad[1] = key2; |
5129 | |
5130 | key_opad[0] = key1; |
5131 | key_opad[1] = key2; |
5132 | |
5133 | /* Key is 512 block length, so no need to compute hash */ |
5134 | |
5135 | /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */ |
5136 | |
5137 | for (i = 0; i < 8; i++) { |
5138 | key_ipad[i] ^= 0x3636363636363636; |
5139 | key_opad[i] ^= 0x5c5c5c5c5c5c5c5c; |
5140 | } |
5141 | |
5142 | /* Perform inner SHA256 */ |
5143 | SHA256_Init(ctx: &sha_ctx); |
5144 | SHA256_Update(ctx: &sha_ctx, data: (unsigned char *)key_ipad, len: sizeof(key_ipad)); |
5145 | SHA256_Update(ctx: &sha_ctx, data: msg, len: msg_len); |
5146 | SHA256_Final(digest, ctx: &sha_ctx); |
5147 | |
5148 | /* Perform outer SHA256 */ |
5149 | SHA256_Init(ctx: &sha_ctx); |
5150 | SHA256_Update(ctx: &sha_ctx, data: (unsigned char *)key_opad, len: sizeof(key_opad)); |
5151 | SHA256_Update(ctx: &sha_ctx, data: (unsigned char *)digest, SHA256_DIGEST_LENGTH); |
5152 | SHA256_Final(digest, ctx: &sha_ctx); |
5153 | } |
5154 | |
5155 | /* |
5156 | * SHA1 support for MPTCP |
5157 | */ |
5158 | |
5159 | static void |
5160 | mptcp_do_sha1(mptcp_key_t *key, char *sha_digest) |
5161 | { |
5162 | SHA1_CTX sha1ctxt; |
5163 | const unsigned char *sha1_base; |
5164 | int sha1_size; |
5165 | |
5166 | sha1_base = (const unsigned char *) key; |
5167 | sha1_size = sizeof(mptcp_key_t); |
5168 | SHA1Init(&sha1ctxt); |
5169 | SHA1Update(&sha1ctxt, sha1_base, sha1_size); |
5170 | SHA1Final(sha_digest, &sha1ctxt); |
5171 | } |
5172 | |
5173 | void |
5174 | mptcp_hmac_sha1(mptcp_key_t key1, mptcp_key_t key2, |
5175 | u_int32_t rand1, u_int32_t rand2, u_char *digest) |
5176 | { |
5177 | SHA1_CTX sha1ctxt; |
5178 | mptcp_key_t key_ipad[8] = {0}; /* key XOR'd with inner pad */ |
5179 | mptcp_key_t key_opad[8] = {0}; /* key XOR'd with outer pad */ |
5180 | u_int32_t data[2]; |
5181 | int i; |
5182 | |
5183 | bzero(s: digest, SHA1_RESULTLEN); |
5184 | |
5185 | /* Set up the Key for HMAC */ |
5186 | key_ipad[0] = key1; |
5187 | key_ipad[1] = key2; |
5188 | |
5189 | key_opad[0] = key1; |
5190 | key_opad[1] = key2; |
5191 | |
5192 | /* Set up the message for HMAC */ |
5193 | data[0] = rand1; |
5194 | data[1] = rand2; |
5195 | |
5196 | /* Key is 512 block length, so no need to compute hash */ |
5197 | |
5198 | /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */ |
5199 | |
5200 | for (i = 0; i < 8; i++) { |
5201 | key_ipad[i] ^= 0x3636363636363636; |
5202 | key_opad[i] ^= 0x5c5c5c5c5c5c5c5c; |
5203 | } |
5204 | |
5205 | /* Perform inner SHA1 */ |
5206 | SHA1Init(&sha1ctxt); |
5207 | SHA1Update(&sha1ctxt, (unsigned char *)key_ipad, sizeof(key_ipad)); |
5208 | SHA1Update(&sha1ctxt, (unsigned char *)data, sizeof(data)); |
5209 | SHA1Final(digest, &sha1ctxt); |
5210 | |
5211 | /* Perform outer SHA1 */ |
5212 | SHA1Init(&sha1ctxt); |
5213 | SHA1Update(&sha1ctxt, (unsigned char *)key_opad, sizeof(key_opad)); |
5214 | SHA1Update(&sha1ctxt, (unsigned char *)digest, SHA1_RESULTLEN); |
5215 | SHA1Final(digest, &sha1ctxt); |
5216 | } |
5217 | |
5218 | /* |
5219 | * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A)) |
5220 | * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B)) |
5221 | */ |
5222 | void |
5223 | mptcp_get_mpjoin_hmac(mptcp_addr_id aid, struct mptcb *mp_tp, u_char *digest, uint8_t digest_len) |
5224 | { |
5225 | uint32_t lrand, rrand; |
5226 | |
5227 | lrand = rrand = 0; |
5228 | mptcp_get_rands(addr_id: aid, mp_tp, lrand: &lrand, rrand: &rrand); |
5229 | |
5230 | u_char full_digest[MAX(SHA1_RESULTLEN, SHA256_DIGEST_LENGTH)] = {0}; |
5231 | if (mp_tp->mpt_version == MPTCP_VERSION_0) { |
5232 | mptcp_hmac_sha1(key1: mp_tp->mpt_localkey, key2: mp_tp->mpt_remotekey, rand1: lrand, rand2: rrand, digest: full_digest); |
5233 | } else { |
5234 | uint32_t data[2]; |
5235 | data[0] = lrand; |
5236 | data[1] = rrand; |
5237 | mptcp_hmac_sha256(key1: mp_tp->mpt_localkey, key2: mp_tp->mpt_remotekey, msg: (u_char*)data, msg_len: 8, digest: full_digest); |
5238 | } |
5239 | bcopy(src: full_digest, dst: digest, n: digest_len); |
5240 | } |
5241 | |
5242 | /* |
5243 | * Authentication data generation |
5244 | */ |
5245 | static void |
5246 | mptcp_generate_token(char *sha_digest, int sha_digest_len, caddr_t token, |
5247 | int token_len) |
5248 | { |
5249 | VERIFY(token_len == sizeof(u_int32_t)); |
5250 | VERIFY(sha_digest_len == SHA1_RESULTLEN || |
5251 | sha_digest_len == SHA256_DIGEST_LENGTH); |
5252 | |
5253 | /* Most significant 32 bits of the SHA1/SHA256 hash */ |
5254 | bcopy(src: sha_digest, dst: token, n: sizeof(u_int32_t)); |
5255 | return; |
5256 | } |
5257 | |
5258 | static void |
5259 | mptcp_generate_idsn(char *sha_digest, int sha_digest_len, caddr_t idsn, |
5260 | int idsn_len, uint8_t mp_version) |
5261 | { |
5262 | VERIFY(idsn_len == sizeof(u_int64_t)); |
5263 | VERIFY(sha_digest_len == SHA1_RESULTLEN || |
5264 | sha_digest_len == SHA256_DIGEST_LENGTH); |
5265 | VERIFY(mp_version == MPTCP_VERSION_0 || mp_version == MPTCP_VERSION_1); |
5266 | |
5267 | /* |
5268 | * Least significant 64 bits of the hash |
5269 | */ |
5270 | |
5271 | if (mp_version == MPTCP_VERSION_0) { |
5272 | idsn[7] = sha_digest[12]; |
5273 | idsn[6] = sha_digest[13]; |
5274 | idsn[5] = sha_digest[14]; |
5275 | idsn[4] = sha_digest[15]; |
5276 | idsn[3] = sha_digest[16]; |
5277 | idsn[2] = sha_digest[17]; |
5278 | idsn[1] = sha_digest[18]; |
5279 | idsn[0] = sha_digest[19]; |
5280 | } else { |
5281 | idsn[7] = sha_digest[24]; |
5282 | idsn[6] = sha_digest[25]; |
5283 | idsn[5] = sha_digest[26]; |
5284 | idsn[4] = sha_digest[27]; |
5285 | idsn[3] = sha_digest[28]; |
5286 | idsn[2] = sha_digest[29]; |
5287 | idsn[1] = sha_digest[30]; |
5288 | idsn[0] = sha_digest[31]; |
5289 | } |
5290 | return; |
5291 | } |
5292 | |
5293 | static void |
5294 | mptcp_conn_properties(struct mptcb *mp_tp) |
5295 | { |
5296 | /* Set DSS checksum flag */ |
5297 | if (mptcp_dss_csum) { |
5298 | mp_tp->mpt_flags |= MPTCPF_CHECKSUM; |
5299 | } |
5300 | |
5301 | /* Set up receive window */ |
5302 | mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); |
5303 | |
5304 | /* Set up gc ticks */ |
5305 | mp_tp->mpt_gc_ticks = MPT_GC_TICKS; |
5306 | } |
5307 | |
5308 | static void |
5309 | mptcp_init_local_parms(struct mptses *mpte, struct sockaddr* dst) |
5310 | { |
5311 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
5312 | char key_digest[MAX(SHA1_RESULTLEN, SHA256_DIGEST_LENGTH)]; |
5313 | uint16_t digest_len; |
5314 | |
5315 | if (mpte->mpte_flags & MPTE_FORCE_V0 || !mptcp_enable_v1) { |
5316 | mp_tp->mpt_version = MPTCP_VERSION_0; |
5317 | } else if (mpte->mpte_flags & MPTE_FORCE_V1 && mptcp_enable_v1) { |
5318 | mp_tp->mpt_version = MPTCP_VERSION_1; |
5319 | } else { |
5320 | mp_tp->mpt_version = tcp_cache_get_mptcp_version(dst); |
5321 | } |
5322 | VERIFY(mp_tp->mpt_version == MPTCP_VERSION_0 || |
5323 | mp_tp->mpt_version == MPTCP_VERSION_1); |
5324 | |
5325 | read_frandom(buffer: &mp_tp->mpt_localkey, numBytes: sizeof(mp_tp->mpt_localkey)); |
5326 | if (mp_tp->mpt_version == MPTCP_VERSION_0) { |
5327 | digest_len = SHA1_RESULTLEN; |
5328 | mptcp_do_sha1(key: &mp_tp->mpt_localkey, sha_digest: key_digest); |
5329 | } else { |
5330 | digest_len = SHA256_DIGEST_LENGTH; |
5331 | mptcp_do_sha256(key: &mp_tp->mpt_localkey, sha_digest: key_digest); |
5332 | } |
5333 | |
5334 | mptcp_generate_token(sha_digest: key_digest, sha_digest_len: digest_len, |
5335 | token: (caddr_t)&mp_tp->mpt_localtoken, token_len: sizeof(mp_tp->mpt_localtoken)); |
5336 | mptcp_generate_idsn(sha_digest: key_digest, sha_digest_len: digest_len, |
5337 | idsn: (caddr_t)&mp_tp->mpt_local_idsn, idsn_len: sizeof(u_int64_t), mp_version: mp_tp->mpt_version); |
5338 | /* The subflow SYN is also first MPTCP byte */ |
5339 | mp_tp->mpt_snduna = mp_tp->mpt_sndmax = mp_tp->mpt_local_idsn + 1; |
5340 | mp_tp->mpt_sndnxt = mp_tp->mpt_snduna; |
5341 | |
5342 | mptcp_conn_properties(mp_tp); |
5343 | } |
5344 | |
5345 | int |
5346 | mptcp_init_remote_parms(struct mptcb *mp_tp) |
5347 | { |
5348 | /* Setup local and remote tokens and Initial DSNs */ |
5349 | char remote_digest[MAX(SHA1_RESULTLEN, SHA256_DIGEST_LENGTH)]; |
5350 | uint16_t digest_len; |
5351 | |
5352 | if (mp_tp->mpt_version == MPTCP_VERSION_0) { |
5353 | digest_len = SHA1_RESULTLEN; |
5354 | mptcp_do_sha1(key: &mp_tp->mpt_remotekey, sha_digest: remote_digest); |
5355 | } else if (mp_tp->mpt_version == MPTCP_VERSION_1) { |
5356 | digest_len = SHA256_DIGEST_LENGTH; |
5357 | mptcp_do_sha256(key: &mp_tp->mpt_remotekey, sha_digest: remote_digest); |
5358 | } else { |
5359 | return -1; |
5360 | } |
5361 | |
5362 | mptcp_generate_token(sha_digest: remote_digest, sha_digest_len: digest_len, |
5363 | token: (caddr_t)&mp_tp->mpt_remotetoken, token_len: sizeof(mp_tp->mpt_remotetoken)); |
5364 | mptcp_generate_idsn(sha_digest: remote_digest, sha_digest_len: digest_len, |
5365 | idsn: (caddr_t)&mp_tp->mpt_remote_idsn, idsn_len: sizeof(u_int64_t), mp_version: mp_tp->mpt_version); |
5366 | mp_tp->mpt_rcvnxt = mp_tp->mpt_remote_idsn + 1; |
5367 | mp_tp->mpt_rcvadv = mp_tp->mpt_rcvnxt + mp_tp->mpt_rcvwnd; |
5368 | return 0; |
5369 | } |
5370 | |
5371 | static void |
5372 | mptcp_send_dfin(struct socket *so) |
5373 | { |
5374 | struct tcpcb *tp = NULL; |
5375 | struct inpcb *inp = NULL; |
5376 | |
5377 | inp = sotoinpcb(so); |
5378 | if (!inp) { |
5379 | return; |
5380 | } |
5381 | |
5382 | tp = intotcpcb(inp); |
5383 | if (!tp) { |
5384 | return; |
5385 | } |
5386 | |
5387 | if (!(tp->t_mpflags & TMPF_RESET)) { |
5388 | tp->t_mpflags |= TMPF_SEND_DFIN; |
5389 | } |
5390 | } |
5391 | |
5392 | /* |
5393 | * Data Sequence Mapping routines |
5394 | */ |
5395 | void |
5396 | mptcp_insert_dsn(struct mppcb *mpp, struct mbuf *m) |
5397 | { |
5398 | struct mptcb *mp_tp; |
5399 | |
5400 | if (m == NULL) { |
5401 | return; |
5402 | } |
5403 | |
5404 | mp_tp = &__container_of(mpp, struct mpp_mtp, mpp)->mtcb; |
5405 | |
5406 | while (m) { |
5407 | VERIFY(m->m_flags & M_PKTHDR); |
5408 | m->m_pkthdr.pkt_flags |= (PKTF_MPTCP | PKTF_MPSO); |
5409 | m->m_pkthdr.mp_dsn = mp_tp->mpt_sndmax; |
5410 | VERIFY(m_pktlen(m) >= 0 && m_pktlen(m) < UINT16_MAX); |
5411 | m->m_pkthdr.mp_rlen = (uint16_t)m_pktlen(m); |
5412 | mp_tp->mpt_sndmax += m_pktlen(m); |
5413 | m = m->m_next; |
5414 | } |
5415 | } |
5416 | |
5417 | void |
5418 | mptcp_fallback_sbdrop(struct socket *so, struct mbuf *m, int len) |
5419 | { |
5420 | struct mptcb *mp_tp = tptomptp(sototcpcb(so)); |
5421 | uint64_t data_ack; |
5422 | uint64_t dsn; |
5423 | |
5424 | VERIFY(len >= 0); |
5425 | |
5426 | if (!m || len == 0) { |
5427 | return; |
5428 | } |
5429 | |
5430 | while (m && len > 0) { |
5431 | VERIFY(m->m_flags & M_PKTHDR); |
5432 | VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); |
5433 | |
5434 | data_ack = m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen; |
5435 | dsn = m->m_pkthdr.mp_dsn; |
5436 | |
5437 | len -= m->m_len; |
5438 | m = m->m_next; |
5439 | } |
5440 | |
5441 | if (m && len == 0) { |
5442 | /* |
5443 | * If there is one more mbuf in the chain, it automatically means |
5444 | * that up to m->mp_dsn has been ack'ed. |
5445 | * |
5446 | * This means, we actually correct data_ack back down (compared |
5447 | * to what we set inside the loop - dsn + data_len). Because in |
5448 | * the loop we are "optimistic" and assume that the full mapping |
5449 | * will be acked. If that's not the case and we get out of the |
5450 | * loop with m != NULL, it means only up to m->mp_dsn has been |
5451 | * really acked. |
5452 | */ |
5453 | data_ack = m->m_pkthdr.mp_dsn; |
5454 | } |
5455 | |
5456 | if (len < 0) { |
5457 | /* |
5458 | * If len is negative, meaning we acked in the middle of an mbuf, |
5459 | * only up to this mbuf's data-sequence number has been acked |
5460 | * at the MPTCP-level. |
5461 | */ |
5462 | data_ack = dsn; |
5463 | } |
5464 | |
5465 | /* We can have data in the subflow's send-queue that is being acked, |
5466 | * while the DATA_ACK has already advanced. Thus, we should check whether |
5467 | * or not the DATA_ACK is actually new here. |
5468 | */ |
5469 | if (MPTCP_SEQ_LEQ(data_ack, mp_tp->mpt_sndmax) && |
5470 | MPTCP_SEQ_GEQ(data_ack, mp_tp->mpt_snduna)) { |
5471 | mptcp_data_ack_rcvd(mp_tp, sototcpcb(so), full_dack: data_ack); |
5472 | } |
5473 | } |
5474 | |
5475 | void |
5476 | mptcp_preproc_sbdrop(struct socket *so, struct mbuf *m, unsigned int len) |
5477 | { |
5478 | int rewinding = 0; |
5479 | |
5480 | /* TFO makes things complicated. */ |
5481 | if (so->so_flags1 & SOF1_TFO_REWIND) { |
5482 | rewinding = 1; |
5483 | so->so_flags1 &= ~SOF1_TFO_REWIND; |
5484 | } |
5485 | |
5486 | while (m && (!(so->so_flags & SOF_MP_SUBFLOW) || rewinding)) { |
5487 | u_int32_t sub_len; |
5488 | VERIFY(m->m_flags & M_PKTHDR); |
5489 | VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); |
5490 | |
5491 | sub_len = m->m_pkthdr.mp_rlen; |
5492 | |
5493 | if (sub_len < len) { |
5494 | m->m_pkthdr.mp_dsn += sub_len; |
5495 | if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) { |
5496 | m->m_pkthdr.mp_rseq += sub_len; |
5497 | } |
5498 | m->m_pkthdr.mp_rlen = 0; |
5499 | len -= sub_len; |
5500 | } else { |
5501 | /* sub_len >= len */ |
5502 | if (rewinding == 0) { |
5503 | m->m_pkthdr.mp_dsn += len; |
5504 | } |
5505 | if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) { |
5506 | if (rewinding == 0) { |
5507 | m->m_pkthdr.mp_rseq += len; |
5508 | } |
5509 | } |
5510 | m->m_pkthdr.mp_rlen -= len; |
5511 | break; |
5512 | } |
5513 | m = m->m_next; |
5514 | } |
5515 | |
5516 | if (so->so_flags & SOF_MP_SUBFLOW && |
5517 | !(sototcpcb(so)->t_mpflags & TMPF_TFO_REQUEST) && |
5518 | !(sototcpcb(so)->t_mpflags & TMPF_RCVD_DACK)) { |
5519 | /* |
5520 | * Received an ack without receiving a DATA_ACK. |
5521 | * Need to fallback to regular TCP (or destroy this subflow). |
5522 | */ |
5523 | sototcpcb(so)->t_mpflags |= TMPF_INFIN_SENT; |
5524 | mptcp_notify_mpfail(so); |
5525 | } |
5526 | } |
5527 | |
5528 | /* Obtain the DSN mapping stored in the mbuf */ |
5529 | void |
5530 | mptcp_output_getm_dsnmap32(struct socket *so, int off, |
5531 | uint32_t *dsn, uint32_t *relseq, uint16_t *data_len, uint16_t *dss_csum) |
5532 | { |
5533 | u_int64_t dsn64; |
5534 | |
5535 | mptcp_output_getm_dsnmap64(so, off, dsn: &dsn64, relseq, data_len, dss_csum); |
5536 | *dsn = (u_int32_t)MPTCP_DATASEQ_LOW32(dsn64); |
5537 | } |
5538 | |
5539 | void |
5540 | mptcp_output_getm_dsnmap64(struct socket *so, int off, uint64_t *dsn, |
5541 | uint32_t *relseq, uint16_t *data_len, |
5542 | uint16_t *dss_csum) |
5543 | { |
5544 | struct mbuf *m = so->so_snd.sb_mb; |
5545 | |
5546 | VERIFY(off >= 0); |
5547 | |
5548 | if (m == NULL && (so->so_flags & SOF_DEFUNCT)) { |
5549 | *dsn = 0; |
5550 | *relseq = 0; |
5551 | *data_len = 0; |
5552 | *dss_csum = 0; |
5553 | return; |
5554 | } |
5555 | |
5556 | /* |
5557 | * In the subflow socket, the DSN sequencing can be discontiguous, |
5558 | * but the subflow sequence mapping is contiguous. Use the subflow |
5559 | * sequence property to find the right mbuf and corresponding dsn |
5560 | * mapping. |
5561 | */ |
5562 | |
5563 | while (m) { |
5564 | VERIFY(m->m_flags & M_PKTHDR); |
5565 | VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); |
5566 | |
5567 | if (off >= m->m_len) { |
5568 | off -= m->m_len; |
5569 | m = m->m_next; |
5570 | } else { |
5571 | break; |
5572 | } |
5573 | } |
5574 | |
5575 | VERIFY(off >= 0); |
5576 | VERIFY(m->m_pkthdr.mp_rlen <= UINT16_MAX); |
5577 | |
5578 | *dsn = m->m_pkthdr.mp_dsn; |
5579 | *relseq = m->m_pkthdr.mp_rseq; |
5580 | *data_len = m->m_pkthdr.mp_rlen; |
5581 | *dss_csum = m->m_pkthdr.mp_csum; |
5582 | } |
5583 | |
5584 | void |
5585 | mptcp_output_getm_data_level_details(struct socket *so, int off, uint16_t *data_len, uint16_t *dss_csum) |
5586 | { |
5587 | uint64_t dsn; |
5588 | uint32_t relseq; |
5589 | |
5590 | mptcp_output_getm_dsnmap64(so, off, dsn: &dsn, relseq: &relseq, data_len, dss_csum); |
5591 | } |
5592 | |
5593 | /* |
5594 | * Note that this is called only from tcp_input() via mptcp_input_preproc() |
5595 | * tcp_input() may trim data after the dsn mapping is inserted into the mbuf. |
5596 | * When it trims data tcp_input calls m_adj() which does not remove the |
5597 | * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf. |
5598 | * The dsn map insertion cannot be delayed after trim, because data can be in |
5599 | * the reassembly queue for a while and the DSN option info in tp will be |
5600 | * overwritten for every new packet received. |
5601 | * The dsn map will be adjusted just prior to appending to subflow sockbuf |
5602 | * with mptcp_adj_rmap() |
5603 | */ |
5604 | void |
5605 | mptcp_insert_rmap(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th) |
5606 | { |
5607 | VERIFY(m->m_flags & M_PKTHDR); |
5608 | VERIFY(!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)); |
5609 | |
5610 | if (tp->t_mpflags & TMPF_EMBED_DSN) { |
5611 | m->m_pkthdr.mp_dsn = tp->t_rcv_map.mpt_dsn; |
5612 | m->m_pkthdr.mp_rseq = tp->t_rcv_map.mpt_sseq; |
5613 | m->m_pkthdr.mp_rlen = tp->t_rcv_map.mpt_len; |
5614 | m->m_pkthdr.mp_csum = tp->t_rcv_map.mpt_csum; |
5615 | if (tp->t_rcv_map.mpt_dfin) { |
5616 | m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN; |
5617 | } |
5618 | |
5619 | m->m_pkthdr.pkt_flags |= PKTF_MPTCP; |
5620 | |
5621 | tp->t_mpflags &= ~TMPF_EMBED_DSN; |
5622 | tp->t_mpflags |= TMPF_MPTCP_ACKNOW; |
5623 | } else if (tp->t_mpflags & TMPF_TCP_FALLBACK) { |
5624 | if (th->th_flags & TH_FIN) { |
5625 | m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN; |
5626 | } |
5627 | } |
5628 | } |
5629 | |
5630 | /* |
5631 | * Following routines help with failure detection and failover of data |
5632 | * transfer from one subflow to another. |
5633 | */ |
5634 | void |
5635 | mptcp_act_on_txfail(struct socket *so) |
5636 | { |
5637 | struct tcpcb *tp = NULL; |
5638 | struct inpcb *inp = sotoinpcb(so); |
5639 | |
5640 | if (inp == NULL) { |
5641 | return; |
5642 | } |
5643 | |
5644 | tp = intotcpcb(inp); |
5645 | if (tp == NULL) { |
5646 | return; |
5647 | } |
5648 | |
5649 | if (so->so_flags & SOF_MP_TRYFAILOVER) { |
5650 | return; |
5651 | } |
5652 | |
5653 | so->so_flags |= SOF_MP_TRYFAILOVER; |
5654 | soevent(so, hint: (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFAILOVER)); |
5655 | } |
5656 | |
5657 | /* |
5658 | * Support for MP_FAIL option |
5659 | */ |
5660 | int |
5661 | mptcp_get_map_for_dsn(struct socket *so, uint64_t dsn_fail, uint32_t *tcp_seq) |
5662 | { |
5663 | struct mbuf *m = so->so_snd.sb_mb; |
5664 | uint16_t datalen; |
5665 | uint64_t dsn; |
5666 | int off = 0; |
5667 | |
5668 | if (m == NULL) { |
5669 | return -1; |
5670 | } |
5671 | |
5672 | while (m != NULL) { |
5673 | VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); |
5674 | VERIFY(m->m_flags & M_PKTHDR); |
5675 | dsn = m->m_pkthdr.mp_dsn; |
5676 | datalen = m->m_pkthdr.mp_rlen; |
5677 | if (MPTCP_SEQ_LEQ(dsn, dsn_fail) && |
5678 | (MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) { |
5679 | off = (int)(dsn_fail - dsn); |
5680 | *tcp_seq = m->m_pkthdr.mp_rseq + off; |
5681 | return 0; |
5682 | } |
5683 | |
5684 | m = m->m_next; |
5685 | } |
5686 | |
5687 | /* |
5688 | * If there was no mbuf data and a fallback to TCP occurred, there's |
5689 | * not much else to do. |
5690 | */ |
5691 | |
5692 | os_log_error(mptcp_log_handle, "%s: %llu not found \n" , __func__, dsn_fail); |
5693 | return -1; |
5694 | } |
5695 | |
5696 | /* |
5697 | * Support for sending contiguous MPTCP bytes in subflow |
5698 | * Also for preventing sending data with ACK in 3-way handshake |
5699 | */ |
5700 | int32_t |
5701 | mptcp_adj_sendlen(struct socket *so, int32_t off) |
5702 | { |
5703 | struct tcpcb *tp = sototcpcb(so); |
5704 | struct mptsub *mpts = tp->t_mpsub; |
5705 | uint64_t mdss_dsn; |
5706 | uint32_t mdss_subflow_seq; |
5707 | int mdss_subflow_off; |
5708 | uint16_t mdss_data_len; |
5709 | uint16_t dss_csum; |
5710 | |
5711 | if (so->so_snd.sb_mb == NULL && (so->so_flags & SOF_DEFUNCT)) { |
5712 | return 0; |
5713 | } |
5714 | |
5715 | mptcp_output_getm_dsnmap64(so, off, dsn: &mdss_dsn, relseq: &mdss_subflow_seq, |
5716 | data_len: &mdss_data_len, dss_csum: &dss_csum); |
5717 | |
5718 | /* |
5719 | * We need to compute how much of the mapping still remains. |
5720 | * So, we compute the offset in the send-buffer of the dss-sub-seq. |
5721 | */ |
5722 | mdss_subflow_off = (mdss_subflow_seq + mpts->mpts_iss) - tp->snd_una; |
5723 | |
5724 | /* |
5725 | * When TFO is used, we are sending the mpts->mpts_iss although the relative |
5726 | * seq has been set to 1 (while it should be 0). |
5727 | */ |
5728 | if (tp->t_mpflags & TMPF_TFO_REQUEST) { |
5729 | mdss_subflow_off--; |
5730 | } |
5731 | |
5732 | VERIFY(off >= mdss_subflow_off); |
5733 | |
5734 | return mdss_data_len - (off - mdss_subflow_off); |
5735 | } |
5736 | |
5737 | static uint32_t |
5738 | mptcp_get_maxseg(struct mptses *mpte) |
5739 | { |
5740 | struct mptsub *mpts; |
5741 | uint32_t maxseg = 0; |
5742 | |
5743 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
5744 | struct tcpcb *tp = sototcpcb(mpts->mpts_socket); |
5745 | |
5746 | if (!TCPS_HAVEESTABLISHED(tp->t_state) || |
5747 | TCPS_HAVERCVDFIN2(tp->t_state)) { |
5748 | continue; |
5749 | } |
5750 | |
5751 | if (tp->t_maxseg > maxseg) { |
5752 | maxseg = tp->t_maxseg; |
5753 | } |
5754 | } |
5755 | |
5756 | return maxseg; |
5757 | } |
5758 | |
5759 | static uint8_t |
5760 | mptcp_get_rcvscale(struct mptses *mpte) |
5761 | { |
5762 | struct mptsub *mpts; |
5763 | uint8_t rcvscale = UINT8_MAX; |
5764 | |
5765 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
5766 | struct tcpcb *tp = sototcpcb(mpts->mpts_socket); |
5767 | |
5768 | if (!TCPS_HAVEESTABLISHED(tp->t_state) || |
5769 | TCPS_HAVERCVDFIN2(tp->t_state)) { |
5770 | continue; |
5771 | } |
5772 | |
5773 | if (tp->rcv_scale < rcvscale) { |
5774 | rcvscale = tp->rcv_scale; |
5775 | } |
5776 | } |
5777 | |
5778 | return rcvscale; |
5779 | } |
5780 | |
5781 | /* Similar to tcp_sbrcv_reserve */ |
5782 | static void |
5783 | mptcp_sbrcv_reserve(struct mptcb *mp_tp, struct sockbuf *sbrcv, |
5784 | u_int32_t newsize, u_int32_t idealsize) |
5785 | { |
5786 | uint8_t rcvscale = mptcp_get_rcvscale(mpte: mp_tp->mpt_mpte); |
5787 | |
5788 | if (rcvscale == UINT8_MAX) { |
5789 | return; |
5790 | } |
5791 | |
5792 | /* newsize should not exceed max */ |
5793 | newsize = min(a: newsize, b: tcp_autorcvbuf_max); |
5794 | |
5795 | /* The receive window scale negotiated at the |
5796 | * beginning of the connection will also set a |
5797 | * limit on the socket buffer size |
5798 | */ |
5799 | newsize = min(a: newsize, TCP_MAXWIN << rcvscale); |
5800 | |
5801 | /* Set new socket buffer size */ |
5802 | if (newsize > sbrcv->sb_hiwat && |
5803 | (sbreserve(sb: sbrcv, cc: newsize) == 1)) { |
5804 | sbrcv->sb_idealsize = min(a: max(a: sbrcv->sb_idealsize, |
5805 | b: (idealsize != 0) ? idealsize : newsize), b: tcp_autorcvbuf_max); |
5806 | |
5807 | /* Again check the limit set by the advertised |
5808 | * window scale |
5809 | */ |
5810 | sbrcv->sb_idealsize = min(a: sbrcv->sb_idealsize, |
5811 | TCP_MAXWIN << rcvscale); |
5812 | } |
5813 | } |
5814 | |
5815 | void |
5816 | mptcp_sbrcv_grow(struct mptcb *mp_tp) |
5817 | { |
5818 | struct mptses *mpte = mp_tp->mpt_mpte; |
5819 | struct socket *mp_so = mpte->mpte_mppcb->mpp_socket; |
5820 | struct sockbuf *sbrcv = &mp_so->so_rcv; |
5821 | uint32_t hiwat_sum = 0; |
5822 | uint32_t ideal_sum = 0; |
5823 | struct mptsub *mpts; |
5824 | |
5825 | /* |
5826 | * Do not grow the receive socket buffer if |
5827 | * - auto resizing is disabled, globally or on this socket |
5828 | * - the high water mark already reached the maximum |
5829 | * - the stream is in background and receive side is being |
5830 | * throttled |
5831 | * - if there are segments in reassembly queue indicating loss, |
5832 | * do not need to increase recv window during recovery as more |
5833 | * data is not going to be sent. A duplicate ack sent during |
5834 | * recovery should not change the receive window |
5835 | */ |
5836 | if (tcp_do_autorcvbuf == 0 || |
5837 | (sbrcv->sb_flags & SB_AUTOSIZE) == 0 || |
5838 | sbrcv->sb_hiwat >= tcp_autorcvbuf_max || |
5839 | (mp_so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) || |
5840 | !LIST_EMPTY(&mp_tp->mpt_segq)) { |
5841 | /* Can not resize the socket buffer, just return */ |
5842 | return; |
5843 | } |
5844 | |
5845 | /* |
5846 | * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2) |
5847 | * |
5848 | * But, for this we first need accurate receiver-RTT estimations, which |
5849 | * we currently don't have. |
5850 | * |
5851 | * Let's use a dummy algorithm for now, just taking the sum of all |
5852 | * subflow's receive-buffers. It's too low, but that's all we can get |
5853 | * for now. |
5854 | */ |
5855 | |
5856 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
5857 | hiwat_sum += mpts->mpts_socket->so_rcv.sb_hiwat; |
5858 | ideal_sum += mpts->mpts_socket->so_rcv.sb_idealsize; |
5859 | } |
5860 | |
5861 | mptcp_sbrcv_reserve(mp_tp, sbrcv, newsize: hiwat_sum, idealsize: ideal_sum); |
5862 | } |
5863 | |
5864 | /* |
5865 | * Determine if we can grow the recieve socket buffer to avoid sending |
5866 | * a zero window update to the peer. We allow even socket buffers that |
5867 | * have fixed size (set by the application) to grow if the resource |
5868 | * constraints are met. They will also be trimmed after the application |
5869 | * reads data. |
5870 | * |
5871 | * Similar to tcp_sbrcv_grow_rwin |
5872 | */ |
5873 | static void |
5874 | mptcp_sbrcv_grow_rwin(struct mptcb *mp_tp, struct sockbuf *sb) |
5875 | { |
5876 | struct socket *mp_so = mp_tp->mpt_mpte->mpte_mppcb->mpp_socket; |
5877 | u_int32_t rcvbufinc = mptcp_get_maxseg(mpte: mp_tp->mpt_mpte) << 4; |
5878 | u_int32_t rcvbuf = sb->sb_hiwat; |
5879 | |
5880 | if (tcp_recv_bg == 1 || IS_TCP_RECV_BG(mp_so)) { |
5881 | return; |
5882 | } |
5883 | |
5884 | if (tcp_do_autorcvbuf == 1 && |
5885 | /* Diff to tcp_sbrcv_grow_rwin */ |
5886 | (mp_so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) == 0 && |
5887 | (rcvbuf - sb->sb_cc) < rcvbufinc && |
5888 | rcvbuf < tcp_autorcvbuf_max && |
5889 | (sb->sb_idealsize > 0 && |
5890 | sb->sb_hiwat <= (sb->sb_idealsize + rcvbufinc))) { |
5891 | sbreserve(sb, cc: min(a: (sb->sb_hiwat + rcvbufinc), b: tcp_autorcvbuf_max)); |
5892 | } |
5893 | } |
5894 | |
5895 | /* Similar to tcp_sbspace */ |
5896 | int32_t |
5897 | mptcp_sbspace(struct mptcb *mp_tp) |
5898 | { |
5899 | struct sockbuf *sb = &mp_tp->mpt_mpte->mpte_mppcb->mpp_socket->so_rcv; |
5900 | uint32_t rcvbuf; |
5901 | int32_t space; |
5902 | int32_t pending = 0; |
5903 | |
5904 | socket_lock_assert_owned(so: mptetoso(mpte: mp_tp->mpt_mpte)); |
5905 | |
5906 | mptcp_sbrcv_grow_rwin(mp_tp, sb); |
5907 | |
5908 | /* hiwat might have changed */ |
5909 | rcvbuf = sb->sb_hiwat; |
5910 | |
5911 | space = ((int32_t) imin(a: (rcvbuf - sb->sb_cc), |
5912 | b: (sb->sb_mbmax - sb->sb_mbcnt))); |
5913 | if (space < 0) { |
5914 | space = 0; |
5915 | } |
5916 | |
5917 | #if CONTENT_FILTER |
5918 | /* Compensate for data being processed by content filters */ |
5919 | pending = cfil_sock_data_space(sb); |
5920 | #endif /* CONTENT_FILTER */ |
5921 | if (pending > space) { |
5922 | space = 0; |
5923 | } else { |
5924 | space -= pending; |
5925 | } |
5926 | |
5927 | return space; |
5928 | } |
5929 | |
5930 | /* |
5931 | * Support Fallback to Regular TCP |
5932 | */ |
5933 | void |
5934 | mptcp_notify_mpready(struct socket *so) |
5935 | { |
5936 | struct tcpcb *tp = NULL; |
5937 | |
5938 | if (so == NULL) { |
5939 | return; |
5940 | } |
5941 | |
5942 | tp = intotcpcb(sotoinpcb(so)); |
5943 | |
5944 | if (tp == NULL) { |
5945 | return; |
5946 | } |
5947 | |
5948 | DTRACE_MPTCP4(multipath__ready, struct socket *, so, |
5949 | struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd, |
5950 | struct tcpcb *, tp); |
5951 | |
5952 | if (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) { |
5953 | return; |
5954 | } |
5955 | |
5956 | if (tp->t_mpflags & TMPF_MPTCP_READY) { |
5957 | return; |
5958 | } |
5959 | |
5960 | tp->t_mpflags &= ~TMPF_TCP_FALLBACK; |
5961 | tp->t_mpflags |= TMPF_MPTCP_READY; |
5962 | |
5963 | soevent(so, hint: (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS)); |
5964 | } |
5965 | |
5966 | void |
5967 | mptcp_notify_mpfail(struct socket *so) |
5968 | { |
5969 | struct tcpcb *tp = NULL; |
5970 | |
5971 | if (so == NULL) { |
5972 | return; |
5973 | } |
5974 | |
5975 | tp = intotcpcb(sotoinpcb(so)); |
5976 | |
5977 | if (tp == NULL) { |
5978 | return; |
5979 | } |
5980 | |
5981 | DTRACE_MPTCP4(multipath__failed, struct socket *, so, |
5982 | struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd, |
5983 | struct tcpcb *, tp); |
5984 | |
5985 | if (tp->t_mpflags & TMPF_TCP_FALLBACK) { |
5986 | return; |
5987 | } |
5988 | |
5989 | tp->t_mpflags &= ~(TMPF_MPTCP_READY | TMPF_MPTCP_TRUE); |
5990 | tp->t_mpflags |= TMPF_TCP_FALLBACK; |
5991 | |
5992 | soevent(so, hint: (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS)); |
5993 | } |
5994 | |
5995 | /* |
5996 | * Keepalive helper function |
5997 | */ |
5998 | boolean_t |
5999 | mptcp_ok_to_keepalive(struct mptcb *mp_tp) |
6000 | { |
6001 | boolean_t ret = 1; |
6002 | |
6003 | socket_lock_assert_owned(so: mptetoso(mpte: mp_tp->mpt_mpte)); |
6004 | |
6005 | if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) { |
6006 | ret = 0; |
6007 | } |
6008 | return ret; |
6009 | } |
6010 | |
6011 | /* |
6012 | * MPTCP t_maxseg adjustment function |
6013 | */ |
6014 | int |
6015 | mptcp_adj_mss(struct tcpcb *tp, boolean_t mtudisc) |
6016 | { |
6017 | int mss_lower = 0; |
6018 | struct mptcb *mp_tp = tptomptp(tp); |
6019 | |
6020 | #define MPTCP_COMPUTE_LEN { \ |
6021 | mss_lower = sizeof (struct mptcp_dss_ack_opt); \ |
6022 | if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \ |
6023 | mss_lower += 2; \ |
6024 | else \ |
6025 | /* adjust to 32-bit boundary + EOL */ \ |
6026 | mss_lower += 2; \ |
6027 | } |
6028 | if (mp_tp == NULL) { |
6029 | return 0; |
6030 | } |
6031 | |
6032 | socket_lock_assert_owned(so: mptetoso(mpte: mp_tp->mpt_mpte)); |
6033 | |
6034 | /* |
6035 | * For the first subflow and subsequent subflows, adjust mss for |
6036 | * most common MPTCP option size, for case where tcp_mss is called |
6037 | * during option processing and MTU discovery. |
6038 | */ |
6039 | if (!mtudisc) { |
6040 | if (tp->t_mpflags & TMPF_MPTCP_TRUE && |
6041 | !(tp->t_mpflags & TMPF_JOINED_FLOW)) { |
6042 | MPTCP_COMPUTE_LEN; |
6043 | } |
6044 | |
6045 | if (tp->t_mpflags & TMPF_PREESTABLISHED && |
6046 | tp->t_mpflags & TMPF_SENT_JOIN) { |
6047 | MPTCP_COMPUTE_LEN; |
6048 | } |
6049 | } else { |
6050 | if (tp->t_mpflags & TMPF_MPTCP_TRUE) { |
6051 | MPTCP_COMPUTE_LEN; |
6052 | } |
6053 | } |
6054 | |
6055 | return mss_lower; |
6056 | } |
6057 | |
6058 | static void |
6059 | fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts) |
6060 | { |
6061 | struct inpcb *inp; |
6062 | |
6063 | tcp_getconninfo(so, &flow->flow_ci); |
6064 | inp = sotoinpcb(so); |
6065 | if ((inp->inp_vflag & INP_IPV6) != 0) { |
6066 | flow->flow_src.ss_family = AF_INET6; |
6067 | flow->flow_dst.ss_family = AF_INET6; |
6068 | flow->flow_src.ss_len = sizeof(struct sockaddr_in6); |
6069 | flow->flow_dst.ss_len = sizeof(struct sockaddr_in6); |
6070 | SIN6(&flow->flow_src)->sin6_port = inp->in6p_lport; |
6071 | SIN6(&flow->flow_dst)->sin6_port = inp->in6p_fport; |
6072 | SIN6(&flow->flow_src)->sin6_addr = inp->in6p_laddr; |
6073 | SIN6(&flow->flow_dst)->sin6_addr = inp->in6p_faddr; |
6074 | } else if ((inp->inp_vflag & INP_IPV4) != 0) { |
6075 | flow->flow_src.ss_family = AF_INET; |
6076 | flow->flow_dst.ss_family = AF_INET; |
6077 | flow->flow_src.ss_len = sizeof(struct sockaddr_in); |
6078 | flow->flow_dst.ss_len = sizeof(struct sockaddr_in); |
6079 | SIN(&flow->flow_src)->sin_port = inp->inp_lport; |
6080 | SIN(&flow->flow_dst)->sin_port = inp->inp_fport; |
6081 | SIN(&flow->flow_src)->sin_addr = inp->inp_laddr; |
6082 | SIN(&flow->flow_dst)->sin_addr = inp->inp_faddr; |
6083 | } |
6084 | flow->flow_len = sizeof(*flow); |
6085 | flow->flow_tcpci_offset = offsetof(mptcp_flow_t, flow_ci); |
6086 | flow->flow_flags = mpts->mpts_flags; |
6087 | flow->flow_cid = mpts->mpts_connid; |
6088 | flow->flow_relseq = mpts->mpts_rel_seq; |
6089 | flow->flow_soerror = mpts->mpts_socket->so_error; |
6090 | flow->flow_probecnt = mpts->mpts_probecnt; |
6091 | } |
6092 | |
6093 | static int |
6094 | mptcp_pcblist SYSCTL_HANDLER_ARGS |
6095 | { |
6096 | #pragma unused(oidp, arg1, arg2) |
6097 | int error = 0, f; |
6098 | size_t len; |
6099 | struct mppcb *mpp; |
6100 | struct mptses *mpte; |
6101 | struct mptcb *mp_tp; |
6102 | struct mptsub *mpts; |
6103 | struct socket *so; |
6104 | conninfo_mptcp_t mptcpci; |
6105 | mptcp_flow_t *flows = NULL; |
6106 | |
6107 | if (req->newptr != USER_ADDR_NULL) { |
6108 | return EPERM; |
6109 | } |
6110 | |
6111 | lck_mtx_lock(lck: &mtcbinfo.mppi_lock); |
6112 | if (req->oldptr == USER_ADDR_NULL) { |
6113 | size_t n = mtcbinfo.mppi_count; |
6114 | lck_mtx_unlock(lck: &mtcbinfo.mppi_lock); |
6115 | req->oldidx = (n + n / 8) * sizeof(conninfo_mptcp_t) + |
6116 | 4 * (n + n / 8) * sizeof(mptcp_flow_t); |
6117 | return 0; |
6118 | } |
6119 | TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) { |
6120 | flows = NULL; |
6121 | socket_lock(so: mpp->mpp_socket, refcount: 1); |
6122 | VERIFY(mpp->mpp_flags & MPP_ATTACHED); |
6123 | mpte = mptompte(mp: mpp); |
6124 | |
6125 | socket_lock_assert_owned(so: mptetoso(mpte)); |
6126 | mp_tp = mpte->mpte_mptcb; |
6127 | |
6128 | bzero(s: &mptcpci, n: sizeof(mptcpci)); |
6129 | mptcpci.mptcpci_state = mp_tp->mpt_state; |
6130 | mptcpci.mptcpci_flags = mp_tp->mpt_flags; |
6131 | mptcpci.mptcpci_ltoken = mp_tp->mpt_localtoken; |
6132 | mptcpci.mptcpci_rtoken = mp_tp->mpt_remotetoken; |
6133 | mptcpci.mptcpci_notsent_lowat = mp_tp->mpt_notsent_lowat; |
6134 | mptcpci.mptcpci_snduna = mp_tp->mpt_snduna; |
6135 | mptcpci.mptcpci_sndnxt = mp_tp->mpt_sndnxt; |
6136 | mptcpci.mptcpci_sndmax = mp_tp->mpt_sndmax; |
6137 | mptcpci.mptcpci_lidsn = mp_tp->mpt_local_idsn; |
6138 | mptcpci.mptcpci_sndwnd = mp_tp->mpt_sndwnd; |
6139 | mptcpci.mptcpci_rcvnxt = mp_tp->mpt_rcvnxt; |
6140 | mptcpci.mptcpci_rcvatmark = mp_tp->mpt_rcvnxt; |
6141 | mptcpci.mptcpci_ridsn = mp_tp->mpt_remote_idsn; |
6142 | mptcpci.mptcpci_rcvwnd = mp_tp->mpt_rcvwnd; |
6143 | |
6144 | mptcpci.mptcpci_nflows = mpte->mpte_numflows; |
6145 | mptcpci.mptcpci_mpte_flags = mpte->mpte_flags; |
6146 | mptcpci.mptcpci_mpte_addrid = mpte->mpte_addrid_last; |
6147 | mptcpci.mptcpci_flow_offset = |
6148 | offsetof(conninfo_mptcp_t, mptcpci_flows); |
6149 | |
6150 | len = sizeof(*flows) * mpte->mpte_numflows; |
6151 | if (mpte->mpte_numflows != 0) { |
6152 | flows = kalloc_data(len, Z_WAITOK | Z_ZERO); |
6153 | if (flows == NULL) { |
6154 | socket_unlock(so: mpp->mpp_socket, refcount: 1); |
6155 | break; |
6156 | } |
6157 | mptcpci.mptcpci_len = sizeof(mptcpci) + |
6158 | sizeof(*flows) * (mptcpci.mptcpci_nflows - 1); |
6159 | error = SYSCTL_OUT(req, &mptcpci, |
6160 | sizeof(mptcpci) - sizeof(mptcp_flow_t)); |
6161 | } else { |
6162 | mptcpci.mptcpci_len = sizeof(mptcpci); |
6163 | error = SYSCTL_OUT(req, &mptcpci, sizeof(mptcpci)); |
6164 | } |
6165 | if (error) { |
6166 | socket_unlock(so: mpp->mpp_socket, refcount: 1); |
6167 | kfree_data(flows, len); |
6168 | break; |
6169 | } |
6170 | f = 0; |
6171 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
6172 | so = mpts->mpts_socket; |
6173 | fill_mptcp_subflow(so, flow: &flows[f], mpts); |
6174 | f++; |
6175 | } |
6176 | socket_unlock(so: mpp->mpp_socket, refcount: 1); |
6177 | if (flows) { |
6178 | error = SYSCTL_OUT(req, flows, len); |
6179 | kfree_data(flows, len); |
6180 | if (error) { |
6181 | break; |
6182 | } |
6183 | } |
6184 | } |
6185 | lck_mtx_unlock(lck: &mtcbinfo.mppi_lock); |
6186 | |
6187 | return error; |
6188 | } |
6189 | |
6190 | SYSCTL_PROC(_net_inet_mptcp, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, |
6191 | 0, 0, mptcp_pcblist, "S,conninfo_mptcp_t" , |
6192 | "List of active MPTCP connections" ); |
6193 | |
6194 | /* |
6195 | * Set notsent lowat mark on the MPTCB |
6196 | */ |
6197 | int |
6198 | mptcp_set_notsent_lowat(struct mptses *mpte, int optval) |
6199 | { |
6200 | struct mptcb *mp_tp = NULL; |
6201 | int error = 0; |
6202 | |
6203 | if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED) { |
6204 | mp_tp = mpte->mpte_mptcb; |
6205 | } |
6206 | |
6207 | if (mp_tp) { |
6208 | mp_tp->mpt_notsent_lowat = optval; |
6209 | } else { |
6210 | error = EINVAL; |
6211 | } |
6212 | |
6213 | return error; |
6214 | } |
6215 | |
6216 | u_int32_t |
6217 | mptcp_get_notsent_lowat(struct mptses *mpte) |
6218 | { |
6219 | struct mptcb *mp_tp = NULL; |
6220 | |
6221 | if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED) { |
6222 | mp_tp = mpte->mpte_mptcb; |
6223 | } |
6224 | |
6225 | if (mp_tp) { |
6226 | return mp_tp->mpt_notsent_lowat; |
6227 | } else { |
6228 | return 0; |
6229 | } |
6230 | } |
6231 | |
6232 | int |
6233 | mptcp_notsent_lowat_check(struct socket *so) |
6234 | { |
6235 | struct mptses *mpte; |
6236 | struct mppcb *mpp; |
6237 | struct mptcb *mp_tp; |
6238 | struct mptsub *mpts; |
6239 | |
6240 | int notsent = 0; |
6241 | |
6242 | mpp = mpsotomppcb(mp_so: so); |
6243 | if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { |
6244 | return 0; |
6245 | } |
6246 | |
6247 | mpte = mptompte(mp: mpp); |
6248 | socket_lock_assert_owned(so: mptetoso(mpte)); |
6249 | mp_tp = mpte->mpte_mptcb; |
6250 | |
6251 | notsent = so->so_snd.sb_cc; |
6252 | |
6253 | if ((notsent == 0) || |
6254 | ((notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)) <= |
6255 | mp_tp->mpt_notsent_lowat)) { |
6256 | return 1; |
6257 | } |
6258 | |
6259 | /* When Nagle's algorithm is not disabled, it is better |
6260 | * to wakeup the client even before there is atleast one |
6261 | * maxseg of data to write. |
6262 | */ |
6263 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
6264 | int retval = 0; |
6265 | if (mpts->mpts_flags & MPTSF_ACTIVE) { |
6266 | struct socket *subf_so = mpts->mpts_socket; |
6267 | struct tcpcb *tp = intotcpcb(sotoinpcb(subf_so)); |
6268 | |
6269 | notsent = so->so_snd.sb_cc - |
6270 | (tp->snd_nxt - tp->snd_una); |
6271 | |
6272 | if ((tp->t_flags & TF_NODELAY) == 0 && |
6273 | notsent > 0 && (notsent <= (int)tp->t_maxseg)) { |
6274 | retval = 1; |
6275 | } |
6276 | return retval; |
6277 | } |
6278 | } |
6279 | return 0; |
6280 | } |
6281 | |
6282 | static errno_t |
6283 | mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, |
6284 | void **unitinfo) |
6285 | { |
6286 | #pragma unused(kctlref, sac, unitinfo) |
6287 | |
6288 | if (OSIncrementAtomic(&mptcp_kern_skt_inuse) > 0) { |
6289 | os_log_error(mptcp_log_handle, "%s: MPTCP kernel-control socket for Symptoms already open!" , __func__); |
6290 | } |
6291 | |
6292 | mptcp_kern_skt_unit = sac->sc_unit; |
6293 | |
6294 | return 0; |
6295 | } |
6296 | |
6297 | static void |
6298 | mptcp_allow_uuid(uuid_t uuid, int32_t ) |
6299 | { |
6300 | struct mppcb *mpp; |
6301 | |
6302 | /* Iterate over all MPTCP connections */ |
6303 | |
6304 | lck_mtx_lock(lck: &mtcbinfo.mppi_lock); |
6305 | |
6306 | TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) { |
6307 | struct socket *mp_so = mpp->mpp_socket; |
6308 | struct mptses *mpte = mpp->mpp_pcbe; |
6309 | |
6310 | socket_lock(so: mp_so, refcount: 1); |
6311 | |
6312 | if (mp_so->so_flags & SOF_DELEGATED && |
6313 | uuid_compare(uu1: uuid, uu2: mp_so->e_uuid)) { |
6314 | goto next; |
6315 | } else if (!(mp_so->so_flags & SOF_DELEGATED) && |
6316 | uuid_compare(uu1: uuid, uu2: mp_so->last_uuid)) { |
6317 | goto next; |
6318 | } |
6319 | |
6320 | os_log(mptcp_log_handle, "%s - %lx: Got allowance for useApp with rssi %d\n" , |
6321 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), rssi); |
6322 | |
6323 | mpte->mpte_flags |= MPTE_ACCESS_GRANTED; |
6324 | |
6325 | if (rssi > MPTCP_TARGET_BASED_RSSI_THRESHOLD) { |
6326 | mpte->mpte_flags |= MPTE_CELL_PROHIBITED; |
6327 | } |
6328 | |
6329 | mptcp_check_subflows_and_add(mpte); |
6330 | mptcp_remove_subflows(mpte); |
6331 | |
6332 | mpte->mpte_flags &= ~(MPTE_ACCESS_GRANTED | MPTE_CELL_PROHIBITED); |
6333 | |
6334 | next: |
6335 | socket_unlock(so: mp_so, refcount: 1); |
6336 | } |
6337 | |
6338 | lck_mtx_unlock(lck: &mtcbinfo.mppi_lock); |
6339 | } |
6340 | |
6341 | static void |
6342 | mptcp_wifi_status_changed(void) |
6343 | { |
6344 | struct mppcb *mpp; |
6345 | |
6346 | /* Iterate over all MPTCP connections */ |
6347 | |
6348 | lck_mtx_lock(lck: &mtcbinfo.mppi_lock); |
6349 | |
6350 | TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) { |
6351 | struct socket *mp_so = mpp->mpp_socket; |
6352 | struct mptses *mpte = mpp->mpp_pcbe; |
6353 | |
6354 | socket_lock(so: mp_so, refcount: 1); |
6355 | |
6356 | /* Only handover- and urgency-mode are purely driven by Symptom's Wi-Fi status */ |
6357 | if (mpte->mpte_svctype != MPTCP_SVCTYPE_HANDOVER && |
6358 | mpte->mpte_svctype != MPTCP_SVCTYPE_PURE_HANDOVER && |
6359 | mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) { |
6360 | goto next; |
6361 | } |
6362 | |
6363 | mptcp_check_subflows_and_add(mpte); |
6364 | mptcp_check_subflows_and_remove(mpte); |
6365 | |
6366 | next: |
6367 | socket_unlock(so: mp_so, refcount: 1); |
6368 | } |
6369 | |
6370 | lck_mtx_unlock(lck: &mtcbinfo.mppi_lock); |
6371 | } |
6372 | |
6373 | struct mptcp_uuid_search_info { |
6374 | uuid_t target_uuid; |
6375 | proc_t found_proc; |
6376 | boolean_t is_proc_found; |
6377 | }; |
6378 | |
6379 | static int |
6380 | mptcp_find_proc_filter(proc_t p, void *arg) |
6381 | { |
6382 | struct mptcp_uuid_search_info *info = (struct mptcp_uuid_search_info *)arg; |
6383 | int found; |
6384 | |
6385 | if (info->is_proc_found) { |
6386 | return 0; |
6387 | } |
6388 | |
6389 | /* |
6390 | * uuid_compare returns 0 if the uuids are matching, but the proc-filter |
6391 | * expects != 0 for a matching filter. |
6392 | */ |
6393 | found = uuid_compare(uu1: proc_executableuuid_addr(p), uu2: info->target_uuid) == 0; |
6394 | if (found) { |
6395 | info->is_proc_found = true; |
6396 | } |
6397 | |
6398 | return found; |
6399 | } |
6400 | |
6401 | static int |
6402 | mptcp_find_proc_callout(proc_t p, void * arg) |
6403 | { |
6404 | struct mptcp_uuid_search_info *info = (struct mptcp_uuid_search_info *)arg; |
6405 | |
6406 | if (uuid_compare(uu1: proc_executableuuid_addr(p), uu2: info->target_uuid) == 0) { |
6407 | info->found_proc = p; |
6408 | return PROC_CLAIMED_DONE; |
6409 | } |
6410 | |
6411 | return PROC_RETURNED; |
6412 | } |
6413 | |
6414 | static proc_t |
6415 | mptcp_find_proc(const uuid_t uuid) |
6416 | { |
6417 | struct mptcp_uuid_search_info info; |
6418 | |
6419 | uuid_copy(dst: info.target_uuid, src: uuid); |
6420 | info.found_proc = PROC_NULL; |
6421 | info.is_proc_found = false; |
6422 | |
6423 | proc_iterate(PROC_ALLPROCLIST, callout: mptcp_find_proc_callout, arg: &info, |
6424 | filterfn: mptcp_find_proc_filter, filterarg: &info); |
6425 | |
6426 | return info.found_proc; |
6427 | } |
6428 | |
6429 | void |
6430 | mptcp_ask_symptoms(struct mptses *mpte) |
6431 | { |
6432 | struct mptcp_symptoms_ask_uuid ask; |
6433 | struct socket *mp_so; |
6434 | struct proc *p = PROC_NULL; |
6435 | int pid, prio, err; |
6436 | |
6437 | if (mptcp_kern_skt_unit == 0) { |
6438 | os_log_error(mptcp_log_handle, "%s - %lx: skt_unit is still 0\n" , |
6439 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
6440 | return; |
6441 | } |
6442 | |
6443 | mp_so = mptetoso(mpte); |
6444 | |
6445 | if (mp_so->so_flags & SOF_DELEGATED) { |
6446 | if (mpte->mpte_epid != 0) { |
6447 | p = proc_find(pid: mpte->mpte_epid); |
6448 | if (p != PROC_NULL) { |
6449 | /* We found a pid, check its UUID */ |
6450 | if (uuid_compare(uu1: mp_so->e_uuid, uu2: proc_executableuuid_addr(p))) { |
6451 | /* It's not the same - we need to look for the real proc */ |
6452 | proc_rele(p); |
6453 | p = PROC_NULL; |
6454 | } |
6455 | } |
6456 | } |
6457 | |
6458 | if (p == PROC_NULL) { |
6459 | p = mptcp_find_proc(uuid: mp_so->e_uuid); |
6460 | if (p == PROC_NULL) { |
6461 | uuid_string_t uuid_string; |
6462 | uuid_unparse(uu: mp_so->e_uuid, out: uuid_string); |
6463 | |
6464 | os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for uuid %s\n" , |
6465 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), uuid_string); |
6466 | |
6467 | return; |
6468 | } |
6469 | mpte->mpte_epid = proc_pid(p); |
6470 | } |
6471 | |
6472 | pid = mpte->mpte_epid; |
6473 | uuid_copy(dst: ask.uuid, src: mp_so->e_uuid); |
6474 | } else { |
6475 | pid = mp_so->last_pid; |
6476 | |
6477 | p = proc_find(pid); |
6478 | if (p == PROC_NULL) { |
6479 | os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n" , |
6480 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), pid); |
6481 | return; |
6482 | } |
6483 | |
6484 | uuid_copy(dst: ask.uuid, src: mp_so->last_uuid); |
6485 | } |
6486 | |
6487 | |
6488 | ask.cmd = MPTCP_SYMPTOMS_ASK_UUID; |
6489 | |
6490 | prio = proc_get_effective_task_policy(task: proc_task(p), TASK_POLICY_ROLE); |
6491 | |
6492 | if (prio == TASK_BACKGROUND_APPLICATION || prio == TASK_NONUI_APPLICATION || |
6493 | prio == TASK_DARWINBG_APPLICATION) { |
6494 | ask.priority = MPTCP_SYMPTOMS_BACKGROUND; |
6495 | } else if (prio == TASK_FOREGROUND_APPLICATION) { |
6496 | ask.priority = MPTCP_SYMPTOMS_FOREGROUND; |
6497 | } else { |
6498 | ask.priority = MPTCP_SYMPTOMS_UNKNOWN; |
6499 | } |
6500 | |
6501 | err = ctl_enqueuedata(kctlref: mptcp_kern_ctrl_ref, unit: mptcp_kern_skt_unit, |
6502 | data: &ask, len: sizeof(ask), CTL_DATA_EOR); |
6503 | |
6504 | os_log(mptcp_log_handle, "%s - %lx: asked symptoms about pid %u, taskprio %u, prio %u, err %d\n" , |
6505 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), pid, prio, ask.priority, err); |
6506 | |
6507 | |
6508 | proc_rele(p); |
6509 | } |
6510 | |
6511 | static errno_t |
6512 | mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, |
6513 | void *unitinfo) |
6514 | { |
6515 | #pragma unused(kctlref, kcunit, unitinfo) |
6516 | |
6517 | OSDecrementAtomic(&mptcp_kern_skt_inuse); |
6518 | |
6519 | return 0; |
6520 | } |
6521 | |
6522 | static errno_t |
6523 | mptcp_symptoms_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, |
6524 | mbuf_t m, int flags) |
6525 | { |
6526 | #pragma unused(kctlref, unitinfo, flags) |
6527 | symptoms_advisory_t *sa = NULL; |
6528 | |
6529 | if (kcunit != mptcp_kern_skt_unit) { |
6530 | os_log_error(mptcp_log_handle, "%s: kcunit %u is different from expected one %u\n" , |
6531 | __func__, kcunit, mptcp_kern_skt_unit); |
6532 | } |
6533 | |
6534 | if (mbuf_pkthdr_len(mbuf: m) < sizeof(*sa)) { |
6535 | mbuf_freem(mbuf: m); |
6536 | return EINVAL; |
6537 | } |
6538 | |
6539 | if (mbuf_len(mbuf: m) < sizeof(*sa)) { |
6540 | os_log_error(mptcp_log_handle, "%s: mbuf is %lu but need %lu\n" , |
6541 | __func__, mbuf_len(m), sizeof(*sa)); |
6542 | mbuf_freem(mbuf: m); |
6543 | return EINVAL; |
6544 | } |
6545 | |
6546 | sa = mbuf_data(mbuf: m); |
6547 | |
6548 | if (sa->sa_nwk_status != SYMPTOMS_ADVISORY_USEAPP) { |
6549 | os_log(mptcp_log_handle, "%s: wifi new,old: %d,%d, cell new, old: %d,%d\n" , __func__, |
6550 | sa->sa_wifi_status, mptcp_advisory.sa_wifi_status, |
6551 | sa->sa_cell_status, mptcp_advisory.sa_cell_status); |
6552 | |
6553 | if (sa->sa_wifi_status != mptcp_advisory.sa_wifi_status) { |
6554 | mptcp_advisory.sa_wifi_status = sa->sa_wifi_status; |
6555 | mptcp_wifi_status_changed(); |
6556 | } |
6557 | } else { |
6558 | struct mptcp_symptoms_answer answer; |
6559 | errno_t err; |
6560 | |
6561 | /* We temporarily allow different sizes for ease of submission */ |
6562 | if (mbuf_len(mbuf: m) != sizeof(uuid_t) + sizeof(*sa) && |
6563 | mbuf_len(mbuf: m) != sizeof(answer)) { |
6564 | os_log_error(mptcp_log_handle, "%s: mbuf is %lu but need %lu or %lu\n" , |
6565 | __func__, mbuf_len(m), sizeof(uuid_t) + sizeof(*sa), |
6566 | sizeof(answer)); |
6567 | mbuf_free(mbuf: m); |
6568 | return EINVAL; |
6569 | } |
6570 | |
6571 | memset(s: &answer, c: 0, n: sizeof(answer)); |
6572 | |
6573 | err = mbuf_copydata(mbuf: m, offset: 0, length: mbuf_len(mbuf: m), out_data: &answer); |
6574 | if (err) { |
6575 | os_log_error(mptcp_log_handle, "%s: mbuf_copydata returned %d\n" , __func__, err); |
6576 | mbuf_free(mbuf: m); |
6577 | return err; |
6578 | } |
6579 | |
6580 | mptcp_allow_uuid(uuid: answer.uuid, rssi: answer.rssi); |
6581 | } |
6582 | |
6583 | mbuf_freem(mbuf: m); |
6584 | return 0; |
6585 | } |
6586 | |
6587 | void |
6588 | mptcp_control_register(void) |
6589 | { |
6590 | /* Set up the advisory control socket */ |
6591 | struct kern_ctl_reg mptcp_kern_ctl; |
6592 | |
6593 | bzero(s: &mptcp_kern_ctl, n: sizeof(mptcp_kern_ctl)); |
6594 | strlcpy(dst: mptcp_kern_ctl.ctl_name, MPTCP_KERN_CTL_NAME, |
6595 | n: sizeof(mptcp_kern_ctl.ctl_name)); |
6596 | mptcp_kern_ctl.ctl_connect = mptcp_symptoms_ctl_connect; |
6597 | mptcp_kern_ctl.ctl_disconnect = mptcp_symptoms_ctl_disconnect; |
6598 | mptcp_kern_ctl.ctl_send = mptcp_symptoms_ctl_send; |
6599 | mptcp_kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; |
6600 | |
6601 | (void)ctl_register(userkctl: &mptcp_kern_ctl, kctlref: &mptcp_kern_ctrl_ref); |
6602 | } |
6603 | |
6604 | mptcp_wifi_quality_t |
6605 | mptcp_wifi_quality_for_session(struct mptses *mpte) |
6606 | { |
6607 | if (mpte->mpte_flags & MPTE_FIRSTPARTY) { |
6608 | if (mpte->mpte_svctype != MPTCP_SVCTYPE_HANDOVER && |
6609 | mptcp_advisory.sa_wifi_status) { |
6610 | return symptoms_is_wifi_lossy() ? MPTCP_WIFI_QUALITY_BAD : MPTCP_WIFI_QUALITY_GOOD; |
6611 | } |
6612 | |
6613 | /* |
6614 | * If it's a first-party app and we don't have any info |
6615 | * about the Wi-Fi state, let's be pessimistic. |
6616 | */ |
6617 | return MPTCP_WIFI_QUALITY_UNSURE; |
6618 | } else { |
6619 | if (symptoms_is_wifi_lossy()) { |
6620 | return MPTCP_WIFI_QUALITY_BAD; |
6621 | } |
6622 | |
6623 | /* |
6624 | * If we are target-based (meaning, we allow to be more lax on |
6625 | * the when wifi is considered bad), we only *know* about the state once |
6626 | * we got the allowance from Symptoms (MPTE_ACCESS_GRANTED). |
6627 | * |
6628 | * If RSSI is not bad enough, MPTE_CELL_PROHIBITED will then |
6629 | * be set. |
6630 | * |
6631 | * In any other case (while in target-mode), consider WiFi bad |
6632 | * and we are going to ask for allowance from Symptoms anyway. |
6633 | */ |
6634 | if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) { |
6635 | if (mpte->mpte_flags & MPTE_ACCESS_GRANTED && |
6636 | mpte->mpte_flags & MPTE_CELL_PROHIBITED) { |
6637 | return MPTCP_WIFI_QUALITY_GOOD; |
6638 | } |
6639 | |
6640 | return MPTCP_WIFI_QUALITY_BAD; |
6641 | } |
6642 | |
6643 | return MPTCP_WIFI_QUALITY_GOOD; |
6644 | } |
6645 | } |
6646 | |
6647 | boolean_t |
6648 | symptoms_is_wifi_lossy(void) |
6649 | { |
6650 | return (mptcp_advisory.sa_wifi_status & SYMPTOMS_ADVISORY_WIFI_OK) ? false : true; |
6651 | } |
6652 | |
6653 | int |
6654 | mptcp_freeq(struct mptcb *mp_tp) |
6655 | { |
6656 | struct tseg_qent *q; |
6657 | int rv = 0; |
6658 | int count = 0; |
6659 | |
6660 | while ((q = LIST_FIRST(&mp_tp->mpt_segq)) != NULL) { |
6661 | LIST_REMOVE(q, tqe_q); |
6662 | m_freem(q->tqe_m); |
6663 | zfree(tcp_reass_zone, q); |
6664 | count++; |
6665 | rv = 1; |
6666 | } |
6667 | mp_tp->mpt_reassqlen = 0; |
6668 | |
6669 | if (count > 0) { |
6670 | OSAddAtomic(-count, &mptcp_reass_total_qlen); |
6671 | } |
6672 | |
6673 | return rv; |
6674 | } |
6675 | |
6676 | static int |
6677 | mptcp_post_event(u_int32_t event_code, int value) |
6678 | { |
6679 | struct kev_mptcp_data event_data; |
6680 | struct kev_msg ev_msg; |
6681 | |
6682 | memset(s: &ev_msg, c: 0, n: sizeof(ev_msg)); |
6683 | |
6684 | ev_msg.vendor_code = KEV_VENDOR_APPLE; |
6685 | ev_msg.kev_class = KEV_NETWORK_CLASS; |
6686 | ev_msg.kev_subclass = KEV_MPTCP_SUBCLASS; |
6687 | ev_msg.event_code = event_code; |
6688 | |
6689 | event_data.value = value; |
6690 | |
6691 | ev_msg.dv[0].data_ptr = &event_data; |
6692 | ev_msg.dv[0].data_length = sizeof(event_data); |
6693 | |
6694 | return kev_post_msg(event: &ev_msg); |
6695 | } |
6696 | |
6697 | static void |
6698 | mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts) |
6699 | { |
6700 | struct tcpcb *tp = sototcpcb(mpts->mpts_socket); |
6701 | int error; |
6702 | |
6703 | /* First-party apps (Siri) don't flip the cellicon */ |
6704 | if (mpte->mpte_flags & MPTE_FIRSTPARTY) { |
6705 | return; |
6706 | } |
6707 | |
6708 | /* Subflow is disappearing - don't set it on this one */ |
6709 | if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) { |
6710 | return; |
6711 | } |
6712 | |
6713 | /* Fallen back connections are not triggering the cellicon */ |
6714 | if (mpte->mpte_mptcb->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { |
6715 | return; |
6716 | } |
6717 | |
6718 | /* Remember the last time we set the cellicon. Needed for debouncing */ |
6719 | mpte->mpte_last_cellicon_set = tcp_now; |
6720 | |
6721 | tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE); |
6722 | tcp_sched_timers(tp); |
6723 | |
6724 | if (mpts->mpts_flags & MPTSF_CELLICON_SET && |
6725 | mpte->mpte_cellicon_increments != 0) { |
6726 | if (mptcp_cellicon_refcount == 0) { |
6727 | os_log_error(mptcp_log_handle, "%s - %lx: Cell should be set (count is %u), but it's zero!\n" , |
6728 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments); |
6729 | |
6730 | /* Continue, so that the icon gets set... */ |
6731 | } else { |
6732 | /* |
6733 | * In this case, the cellicon is already set. No need to bump it |
6734 | * even higher |
6735 | */ |
6736 | |
6737 | return; |
6738 | } |
6739 | } |
6740 | |
6741 | /* When tearing down this subflow, we need to decrement the |
6742 | * reference counter |
6743 | */ |
6744 | mpts->mpts_flags |= MPTSF_CELLICON_SET; |
6745 | |
6746 | /* This counter, so that when a session gets destroyed we decrement |
6747 | * the reference counter by whatever is left |
6748 | */ |
6749 | mpte->mpte_cellicon_increments++; |
6750 | |
6751 | if (OSIncrementAtomic(&mptcp_cellicon_refcount)) { |
6752 | /* If cellicon is already set, get out of here! */ |
6753 | return; |
6754 | } |
6755 | |
6756 | error = mptcp_post_event(KEV_MPTCP_CELLUSE, value: 1); |
6757 | |
6758 | if (error) { |
6759 | os_log_error(mptcp_log_handle, "%s - %lx: Setting cellicon failed with %d\n" , |
6760 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error); |
6761 | } else { |
6762 | os_log(mptcp_log_handle, "%s - %lx: successfully set the cellicon\n" , |
6763 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte)); |
6764 | } |
6765 | } |
6766 | |
6767 | void |
6768 | mptcp_clear_cellicon(void) |
6769 | { |
6770 | int error = mptcp_post_event(KEV_MPTCP_CELLUSE, value: 0); |
6771 | |
6772 | if (error) { |
6773 | os_log_error(mptcp_log_handle, "%s: Unsetting cellicon failed with %d\n" , |
6774 | __func__, error); |
6775 | } else { |
6776 | os_log(mptcp_log_handle, "%s: successfully unset the cellicon\n" , |
6777 | __func__); |
6778 | } |
6779 | } |
6780 | |
6781 | /* |
6782 | * Returns true if the icon has been flipped to WiFi. |
6783 | */ |
6784 | static boolean_t |
6785 | __mptcp_unset_cellicon(uint32_t val) |
6786 | { |
6787 | VERIFY(val < INT32_MAX); |
6788 | if (OSAddAtomic((int32_t)-val, &mptcp_cellicon_refcount) != 1) { |
6789 | return false; |
6790 | } |
6791 | |
6792 | mptcp_clear_cellicon(); |
6793 | |
6794 | return true; |
6795 | } |
6796 | |
6797 | void |
6798 | mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val) |
6799 | { |
6800 | /* First-party apps (Siri) don't flip the cellicon */ |
6801 | if (mpte->mpte_flags & MPTE_FIRSTPARTY) { |
6802 | return; |
6803 | } |
6804 | |
6805 | if (mpte->mpte_cellicon_increments == 0) { |
6806 | /* This flow never used cell - get out of here! */ |
6807 | return; |
6808 | } |
6809 | |
6810 | if (mptcp_cellicon_refcount == 0) { |
6811 | os_log_error(mptcp_log_handle, "%s - %lx: Cell is off, but should be at least %u\n" , |
6812 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments); |
6813 | |
6814 | return; |
6815 | } |
6816 | |
6817 | if (mpts) { |
6818 | if (!(mpts->mpts_flags & MPTSF_CELLICON_SET)) { |
6819 | return; |
6820 | } |
6821 | |
6822 | mpts->mpts_flags &= ~MPTSF_CELLICON_SET; |
6823 | } |
6824 | |
6825 | if (mpte->mpte_cellicon_increments < val) { |
6826 | os_log_error(mptcp_log_handle, "%s - %lx: Increments is %u but want to dec by %u.\n" , |
6827 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments, val); |
6828 | val = mpte->mpte_cellicon_increments; |
6829 | } |
6830 | |
6831 | mpte->mpte_cellicon_increments -= val; |
6832 | |
6833 | if (__mptcp_unset_cellicon(val) == false) { |
6834 | return; |
6835 | } |
6836 | |
6837 | /* All flows are gone - our counter should be at zero too! */ |
6838 | if (mpte->mpte_cellicon_increments != 0) { |
6839 | os_log_error(mptcp_log_handle, "%s - %lx: Inconsistent state! Cell refcount is zero but increments are at %u\n" , |
6840 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments); |
6841 | } |
6842 | } |
6843 | |
6844 | void |
6845 | mptcp_reset_rexmit_state(struct tcpcb *tp) |
6846 | { |
6847 | struct mptsub *mpts; |
6848 | struct inpcb *inp; |
6849 | struct socket *so; |
6850 | |
6851 | inp = tp->t_inpcb; |
6852 | if (inp == NULL) { |
6853 | return; |
6854 | } |
6855 | |
6856 | so = inp->inp_socket; |
6857 | if (so == NULL) { |
6858 | return; |
6859 | } |
6860 | |
6861 | if (!(so->so_flags & SOF_MP_SUBFLOW)) { |
6862 | return; |
6863 | } |
6864 | |
6865 | mpts = tp->t_mpsub; |
6866 | |
6867 | mpts->mpts_flags &= ~MPTSF_WRITE_STALL; |
6868 | so->so_flags &= ~SOF_MP_TRYFAILOVER; |
6869 | } |
6870 | |
6871 | void |
6872 | mptcp_reset_keepalive(struct tcpcb *tp) |
6873 | { |
6874 | struct mptsub *mpts = tp->t_mpsub; |
6875 | |
6876 | mpts->mpts_flags &= ~MPTSF_READ_STALL; |
6877 | } |
6878 | |
6879 | static struct mppcb * |
6880 | mtcp_alloc(void) |
6881 | { |
6882 | return &kalloc_type(struct mpp_mtp, Z_WAITOK | Z_ZERO | Z_NOFAIL)->mpp; |
6883 | } |
6884 | |
6885 | static void |
6886 | mtcp_free(struct mppcb *mpp) |
6887 | { |
6888 | struct mpp_mtp *mtp = __container_of(mpp, struct mpp_mtp, mpp); |
6889 | |
6890 | kfree_type(struct mpp_mtp, mtp); |
6891 | } |
6892 | |
6893 | /* |
6894 | * Protocol pr_init callback. |
6895 | */ |
6896 | void |
6897 | mptcp_init(struct protosw *pp, struct domain *dp) |
6898 | { |
6899 | #pragma unused(dp) |
6900 | static int mptcp_initialized = 0; |
6901 | struct protosw *prp; |
6902 | struct ip6protosw *prp6; |
6903 | |
6904 | VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED); |
6905 | |
6906 | /* do this only once */ |
6907 | if (mptcp_initialized) { |
6908 | return; |
6909 | } |
6910 | mptcp_initialized = 1; |
6911 | |
6912 | mptcp_advisory.sa_wifi_status = SYMPTOMS_ADVISORY_WIFI_OK; |
6913 | |
6914 | /* |
6915 | * Since PF_MULTIPATH gets initialized after PF_INET/INET6, |
6916 | * we must be able to find IPPROTO_TCP entries for both. |
6917 | */ |
6918 | prp = pffindproto_locked(PF_INET, IPPROTO_TCP, SOCK_STREAM); |
6919 | VERIFY(prp != NULL); |
6920 | bcopy(src: prp, dst: &mptcp_subflow_protosw, n: sizeof(*prp)); |
6921 | bcopy(src: prp->pr_usrreqs, dst: &mptcp_subflow_usrreqs, |
6922 | n: sizeof(mptcp_subflow_usrreqs)); |
6923 | mptcp_subflow_protosw.pr_entry.tqe_next = NULL; |
6924 | mptcp_subflow_protosw.pr_entry.tqe_prev = NULL; |
6925 | mptcp_subflow_protosw.pr_usrreqs = &mptcp_subflow_usrreqs; |
6926 | mptcp_subflow_usrreqs.pru_soreceive = mptcp_subflow_soreceive; |
6927 | mptcp_subflow_usrreqs.pru_sosend = mptcp_subflow_sosend; |
6928 | mptcp_subflow_usrreqs.pru_rcvoob = pru_rcvoob_notsupp; |
6929 | /* |
6930 | * Socket filters shouldn't attach/detach to/from this protosw |
6931 | * since pr_protosw is to be used instead, which points to the |
6932 | * real protocol; if they do, it is a bug and we should panic. |
6933 | */ |
6934 | mptcp_subflow_protosw.pr_filter_head.tqh_first = |
6935 | (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; |
6936 | mptcp_subflow_protosw.pr_filter_head.tqh_last = |
6937 | (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; |
6938 | |
6939 | prp6 = (struct ip6protosw *)pffindproto_locked(PF_INET6, |
6940 | IPPROTO_TCP, SOCK_STREAM); |
6941 | VERIFY(prp6 != NULL); |
6942 | bcopy(src: prp6, dst: &mptcp_subflow_protosw6, n: sizeof(*prp6)); |
6943 | bcopy(src: prp6->pr_usrreqs, dst: &mptcp_subflow_usrreqs6, |
6944 | n: sizeof(mptcp_subflow_usrreqs6)); |
6945 | mptcp_subflow_protosw6.pr_entry.tqe_next = NULL; |
6946 | mptcp_subflow_protosw6.pr_entry.tqe_prev = NULL; |
6947 | mptcp_subflow_protosw6.pr_usrreqs = &mptcp_subflow_usrreqs6; |
6948 | mptcp_subflow_usrreqs6.pru_soreceive = mptcp_subflow_soreceive; |
6949 | mptcp_subflow_usrreqs6.pru_sosend = mptcp_subflow_sosend; |
6950 | mptcp_subflow_usrreqs6.pru_rcvoob = pru_rcvoob_notsupp; |
6951 | /* |
6952 | * Socket filters shouldn't attach/detach to/from this protosw |
6953 | * since pr_protosw is to be used instead, which points to the |
6954 | * real protocol; if they do, it is a bug and we should panic. |
6955 | */ |
6956 | mptcp_subflow_protosw6.pr_filter_head.tqh_first = |
6957 | (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; |
6958 | mptcp_subflow_protosw6.pr_filter_head.tqh_last = |
6959 | (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; |
6960 | |
6961 | bzero(s: &mtcbinfo, n: sizeof(mtcbinfo)); |
6962 | TAILQ_INIT(&mtcbinfo.mppi_pcbs); |
6963 | mtcbinfo.mppi_alloc = mtcp_alloc; |
6964 | mtcbinfo.mppi_free = mtcp_free; |
6965 | |
6966 | mtcbinfo.mppi_lock_grp = lck_grp_alloc_init(grp_name: "mppcb" , LCK_GRP_ATTR_NULL); |
6967 | lck_attr_setdefault(attr: &mtcbinfo.mppi_lock_attr); |
6968 | lck_mtx_init(lck: &mtcbinfo.mppi_lock, grp: mtcbinfo.mppi_lock_grp, |
6969 | attr: &mtcbinfo.mppi_lock_attr); |
6970 | |
6971 | mtcbinfo.mppi_gc = mptcp_gc; |
6972 | mtcbinfo.mppi_timer = mptcp_timer; |
6973 | |
6974 | /* attach to MP domain for garbage collection to take place */ |
6975 | mp_pcbinfo_attach(&mtcbinfo); |
6976 | |
6977 | mptcp_log_handle = os_log_create(subsystem: "com.apple.xnu.net.mptcp" , category: "mptcp" ); |
6978 | } |
6979 | |