1 | /* |
2 | * Copyright (c) 2012-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #ifndef _NETINET_MPTCP_VAR_H_ |
30 | #define _NETINET_MPTCP_VAR_H_ |
31 | |
32 | #ifdef PRIVATE |
33 | #include <netinet/in.h> |
34 | #include <netinet/tcp.h> |
35 | #endif |
36 | |
37 | #ifdef BSD_KERNEL_PRIVATE |
38 | #include <sys/queue.h> |
39 | #include <sys/protosw.h> |
40 | #include <kern/locks.h> |
41 | #include <mach/boolean.h> |
42 | #include <netinet/mp_pcb.h> |
43 | #include <netinet/tcp_var.h> |
44 | #include <os/log.h> |
45 | |
46 | struct mpt_itf_info { |
47 | uint32_t ifindex; |
48 | uint32_t has_v4_conn:1, |
49 | has_v6_conn:1, |
50 | has_nat64_conn:1, |
51 | no_mptcp_support:1; |
52 | }; |
53 | |
54 | /* |
55 | * MPTCP Session |
56 | * |
57 | * This is an extension to the multipath PCB specific for MPTCP, protected by |
58 | * the per-PCB mpp_lock (also the socket's lock); |
59 | */ |
60 | struct mptses { |
61 | struct mppcb *mpte_mppcb; /* back ptr to multipath PCB */ |
62 | struct mptcb *mpte_mptcb; /* ptr to MPTCP PCB */ |
63 | TAILQ_HEAD(, mptopt) mpte_sopts; /* list of socket options */ |
64 | TAILQ_HEAD(, mptsub) mpte_subflows; /* list of subflows */ |
65 | uint16_t mpte_numflows; /* # of subflows in list */ |
66 | uint16_t mpte_nummpcapflows; /* # of MP_CAP subflows */ |
67 | sae_associd_t mpte_associd; /* MPTCP association ID */ |
68 | sae_connid_t mpte_connid_last; /* last used connection ID */ |
69 | |
70 | union { |
71 | /* Source address of initial subflow */ |
72 | struct sockaddr mpte_src; |
73 | struct sockaddr_in __mpte_src_v4; |
74 | struct sockaddr_in6 __mpte_src_v6; |
75 | }; |
76 | |
77 | union { |
78 | /* Destination address of initial subflow */ |
79 | struct sockaddr mpte_dst; |
80 | struct sockaddr_in __mpte_dst_v4; |
81 | struct sockaddr_in6 __mpte_dst_v6; |
82 | }; |
83 | |
84 | struct sockaddr_in mpte_dst_v4_nat64; |
85 | |
86 | uint16_t mpte_alternate_port; /* Alternate port for subflow establishment (network-byte-order) */ |
87 | |
88 | struct mptsub *mpte_active_sub; /* ptr to last active subf */ |
89 | uint8_t mpte_flags; /* per mptcp session flags */ |
90 | #define MPTE_SND_REM_ADDR 0x01 /* Send Remove_addr option */ |
91 | #define MPTE_SVCTYPE_CHECKED 0x02 /* Did entitlement-check for service-type */ |
92 | #define MPTE_FIRSTPARTY 0x04 /* First-party app used multipath_extended entitlement */ |
93 | #define MPTE_ACCESS_GRANTED 0x08 /* Access to cellular has been granted for this connection */ |
94 | uint8_t mpte_svctype; /* MPTCP Service type */ |
95 | uint8_t mpte_lost_aid; /* storing lost address id */ |
96 | uint8_t mpte_addrid_last; /* storing address id parm */ |
97 | |
98 | #define MPTE_ITFINFO_SIZE 4 |
99 | uint32_t mpte_itfinfo_size; |
100 | struct mpt_itf_info _mpte_itfinfo[MPTE_ITFINFO_SIZE]; |
101 | struct mpt_itf_info *mpte_itfinfo; |
102 | |
103 | struct mbuf *mpte_reinjectq; |
104 | |
105 | /* The below is used for stats */ |
106 | uint32_t mpte_subflow_switches; /* Number of subflow-switches in sending */ |
107 | uint32_t mpte_used_cell:1, |
108 | mpte_used_wifi:1, |
109 | mpte_initial_cell:1, |
110 | mpte_triggered_cell, |
111 | mpte_handshake_success:1; |
112 | |
113 | struct mptcp_itf_stats mpte_itfstats[MPTCP_ITFSTATS_SIZE]; |
114 | uint64_t mpte_init_txbytes __attribute__((aligned(8))); |
115 | uint64_t mpte_init_rxbytes __attribute__((aligned(8))); |
116 | }; |
117 | |
118 | static inline struct socket * |
119 | mptetoso(struct mptses *mpte) |
120 | { |
121 | return (mpte->mpte_mppcb->mpp_socket); |
122 | } |
123 | |
124 | static inline struct mptses * |
125 | mptompte(struct mppcb *mp) |
126 | { |
127 | return ((struct mptses *)mp->mpp_pcbe); |
128 | } |
129 | |
130 | static inline struct mptses * |
131 | mpsotompte(struct socket *so) |
132 | { |
133 | return (mptompte(mpsotomppcb(so))); |
134 | } |
135 | |
136 | static inline void |
137 | mpp_lock_assert_held(struct mppcb *mp) |
138 | { |
139 | #if !MACH_ASSERT |
140 | #pragma unused(mp) |
141 | #endif |
142 | LCK_MTX_ASSERT(&mp->mpp_lock, LCK_MTX_ASSERT_OWNED); |
143 | } |
144 | |
145 | static inline void |
146 | mpp_lock_assert_notheld(struct mppcb *mp) |
147 | { |
148 | #if !MACH_ASSERT |
149 | #pragma unused(mp) |
150 | #endif |
151 | LCK_MTX_ASSERT(&mp->mpp_lock, LCK_MTX_ASSERT_NOTOWNED); |
152 | } |
153 | |
154 | static inline boolean_t |
155 | mpp_try_lock(struct mppcb *mp) |
156 | { |
157 | if (!lck_mtx_try_lock(&mp->mpp_lock)) |
158 | return false; |
159 | |
160 | VERIFY(!(mp->mpp_flags & MPP_INSIDE_OUTPUT)); |
161 | VERIFY(!(mp->mpp_flags & MPP_INSIDE_INPUT)); |
162 | |
163 | return true; |
164 | } |
165 | |
166 | static inline void |
167 | mpp_lock(struct mppcb *mp) |
168 | { |
169 | lck_mtx_lock(&mp->mpp_lock); |
170 | VERIFY(!(mp->mpp_flags & MPP_INSIDE_OUTPUT)); |
171 | VERIFY(!(mp->mpp_flags & MPP_INSIDE_INPUT)); |
172 | } |
173 | |
174 | static inline void |
175 | mpp_unlock(struct mppcb *mp) |
176 | { |
177 | VERIFY(!(mp->mpp_flags & MPP_INSIDE_OUTPUT)); |
178 | VERIFY(!(mp->mpp_flags & MPP_INSIDE_INPUT)); |
179 | lck_mtx_unlock(&mp->mpp_lock); |
180 | } |
181 | |
182 | static inline lck_mtx_t * |
183 | mpp_getlock(struct mppcb *mp, int flags) |
184 | { |
185 | if (flags & PR_F_WILLUNLOCK) { |
186 | VERIFY(!(mp->mpp_flags & MPP_INSIDE_OUTPUT)); |
187 | VERIFY(!(mp->mpp_flags & MPP_INSIDE_INPUT)); |
188 | } |
189 | |
190 | return (&mp->mpp_lock); |
191 | } |
192 | |
193 | static inline void |
194 | mpte_lock_assert_held(struct mptses *mpte) |
195 | { |
196 | mpp_lock_assert_held(mpte->mpte_mppcb); |
197 | } |
198 | |
199 | static inline void |
200 | mpte_lock_assert_notheld(struct mptses *mpte) |
201 | { |
202 | mpp_lock_assert_notheld(mpte->mpte_mppcb); |
203 | } |
204 | |
205 | static inline boolean_t |
206 | mpte_try_lock(struct mptses *mpte) |
207 | { |
208 | return (mpp_try_lock(mpte->mpte_mppcb)); |
209 | } |
210 | |
211 | static inline void |
212 | mpte_lock(struct mptses *mpte) |
213 | { |
214 | mpp_lock(mpte->mpte_mppcb); |
215 | } |
216 | |
217 | static inline void |
218 | mpte_unlock(struct mptses *mpte) |
219 | { |
220 | mpp_unlock(mpte->mpte_mppcb); |
221 | } |
222 | |
223 | static inline lck_mtx_t * |
224 | mpte_getlock(struct mptses *mpte, int flags) |
225 | { |
226 | return mpp_getlock(mpte->mpte_mppcb, flags); |
227 | } |
228 | |
229 | static inline int |
230 | mptcp_subflow_cwnd_space(struct socket *so) |
231 | { |
232 | struct tcpcb *tp = sototcpcb(so); |
233 | int cwnd = min(tp->snd_wnd, tp->snd_cwnd) - (so->so_snd.sb_cc); |
234 | |
235 | return (min(cwnd, sbspace(&so->so_snd))); |
236 | } |
237 | |
238 | |
239 | /* |
240 | * MPTCP socket options |
241 | */ |
242 | struct mptopt { |
243 | TAILQ_ENTRY(mptopt) mpo_entry; /* glue to other options */ |
244 | uint32_t mpo_flags; /* see flags below */ |
245 | int mpo_level; /* sopt_level */ |
246 | int mpo_name; /* sopt_name */ |
247 | int mpo_intval; /* sopt_val */ |
248 | }; |
249 | |
250 | #define MPOF_ATTACHED 0x1 /* attached to MP socket */ |
251 | #define MPOF_SUBFLOW_OK 0x2 /* can be issued on subflow socket */ |
252 | #define MPOF_INTERIM 0x4 /* has not been issued on any subflow */ |
253 | |
254 | /* |
255 | * MPTCP subflow |
256 | * |
257 | * Note that mpts_flags and mpts_evctl are modified via atomic operations. |
258 | */ |
259 | struct mptsub { |
260 | TAILQ_ENTRY(mptsub) mpts_entry; /* glue to peer subflows */ |
261 | uint32_t mpts_refcnt; /* reference count */ |
262 | uint32_t mpts_flags; /* see flags below */ |
263 | uint32_t mpts_evctl; /* subflow control events */ |
264 | sae_connid_t mpts_connid; /* subflow connection ID */ |
265 | int mpts_oldintval; /* sopt_val before sosetopt */ |
266 | struct mptses *mpts_mpte; /* back ptr to MPTCP session */ |
267 | struct socket *mpts_socket; /* subflow socket */ |
268 | struct sockaddr *mpts_src; /* source address */ |
269 | |
270 | union { |
271 | /* destination address */ |
272 | struct sockaddr mpts_dst; |
273 | struct sockaddr_in __mpts_dst_v4; |
274 | struct sockaddr_in6 __mpts_dst_v6; |
275 | }; |
276 | |
277 | u_int32_t mpts_rel_seq; /* running count of subflow # */ |
278 | u_int32_t mpts_iss; /* Initial sequence number, taking TFO into account */ |
279 | u_int32_t mpts_ifscope; /* scoped to the interface */ |
280 | uint32_t mpts_probesoon; /* send probe after probeto */ |
281 | uint32_t mpts_probecnt; /* number of probes sent */ |
282 | uint32_t mpts_maxseg; /* cached value of t_maxseg */ |
283 | }; |
284 | |
285 | /* |
286 | * Valid values for mpts_flags. In particular: |
287 | * |
288 | * - MP_CAPABLE means that the connection is successfully established as |
289 | * MPTCP and data transfer may occur, but is not yet ready for multipath- |
290 | * related semantics until MP_READY. I.e. if this is on the first subflow, |
291 | * it causes the MPTCP socket to transition to a connected state, except |
292 | * that additional subflows will not be established; they will be marked |
293 | * with PENDING and will be processed when the first subflow is marked |
294 | * with MP_READY. |
295 | * |
296 | * - MP_READY implies that an MP_CAPABLE connection has been confirmed as |
297 | * an MPTCP connection. See notes above. |
298 | * |
299 | * - MP_DEGRADED implies that the connection has lost its MPTCP capabilities |
300 | * but data transfer on the MPTCP socket is unaffected. Any existing |
301 | * PENDING subflows will be disconnected, and further attempts to connect |
302 | * additional subflows will be rejected. |
303 | * |
304 | * Note that these are per-subflow flags. The setting and clearing of MP_READY |
305 | * reflects the state of the MPTCP connection with regards to its multipath |
306 | * semantics, via the MPTCPF_JOIN_READY flag. Until that flag is set (meaning |
307 | * until at least a subflow is marked with MP_READY), further connectx(2) |
308 | * attempts to join will be queued. When the flag is cleared (after it has |
309 | * been set), further connectx(2) will fail (and existing queued ones will be |
310 | * aborted) and the MPTCP connection loses all of its multipath semantics. |
311 | * |
312 | * Keep in sync with bsd/dev/dtrace/scripts/mptcp.d. |
313 | */ |
314 | #define MPTSF_ATTACHED 0x00000001 /* attached to MPTCP PCB */ |
315 | #define MPTSF_CONNECTING 0x00000002 /* connection was attempted */ |
316 | #define MPTSF_CONNECT_PENDING 0x00000004 /* will connect when MPTCP is ready */ |
317 | #define MPTSF_CONNECTED 0x00000008 /* connection is established */ |
318 | #define MPTSF_DISCONNECTING 0x00000010 /* disconnection was attempted */ |
319 | #define MPTSF_DISCONNECTED 0x00000020 /* has been disconnected */ |
320 | #define MPTSF_MP_CAPABLE 0x00000040 /* connected as a MPTCP subflow */ |
321 | #define MPTSF_MP_READY 0x00000080 /* MPTCP has been confirmed */ |
322 | #define MPTSF_MP_DEGRADED 0x00000100 /* has lost its MPTCP capabilities */ |
323 | #define MPTSF_PREFERRED 0x00000200 /* primary/preferred subflow */ |
324 | #define MPTSF_SOPT_OLDVAL 0x00000400 /* old option value is valid */ |
325 | #define MPTSF_SOPT_INPROG 0x00000800 /* sosetopt in progress */ |
326 | #define MPTSF_FAILINGOVER 0x00001000 /* subflow not used for output */ |
327 | #define MPTSF_ACTIVE 0x00002000 /* subflow currently in use */ |
328 | #define MPTSF_MPCAP_CTRSET 0x00004000 /* mpcap counter */ |
329 | #define MPTSF_CLOSED 0x00008000 /* soclose_locked has been called on this subflow */ |
330 | #define MPTSF_TFO_REQD 0x00010000 /* TFO requested */ |
331 | #define MPTSF_CLOSE_REQD 0x00020000 /* A close has been requested from NECP */ |
332 | #define MPTSF_INITIAL_SUB 0x00040000 /* This is the initial subflow */ |
333 | #define MPTSF_READ_STALL 0x00080000 /* A read-stall has been detected */ |
334 | #define MPTSF_WRITE_STALL 0x00100000 /* A write-stall has been detected */ |
335 | #define MPTSF_CONFIRMED 0x00200000 /* Subflow confirmed to be MPTCP-capable */ |
336 | |
337 | #define MPTSF_BITS \ |
338 | "\020\1ATTACHED\2CONNECTING\3PENDING\4CONNECTED\5DISCONNECTING" \ |
339 | "\6DISCONNECTED\7MP_CAPABLE\10MP_READY\11MP_DEGRADED" \ |
340 | "\12PREFERRED\13SOPT_OLDVAL" \ |
341 | "\14SOPT_INPROG\15FAILINGOVER\16ACTIVE\17MPCAP_CTRSET" \ |
342 | "\20CLOSED\21TFO_REQD\22CLOSEREQD\23INITIALSUB\24READ_STALL" \ |
343 | "\25WRITE_STALL\26CONFIRMED" |
344 | |
345 | /* |
346 | * MPTCP states |
347 | * Keep in sync with bsd/dev/dtrace/mptcp.d |
348 | */ |
349 | typedef enum mptcp_state { |
350 | MPTCPS_CLOSED = 0, /* closed */ |
351 | MPTCPS_LISTEN = 1, /* not yet implemented */ |
352 | MPTCPS_ESTABLISHED = 2, /* MPTCP connection established */ |
353 | MPTCPS_CLOSE_WAIT = 3, /* rcvd DFIN, waiting for close */ |
354 | MPTCPS_FIN_WAIT_1 = 4, /* have closed, sent DFIN */ |
355 | MPTCPS_CLOSING = 5, /* closed xchd DFIN, waiting DFIN ACK */ |
356 | MPTCPS_LAST_ACK = 6, /* had DFIN and close; await DFIN ACK */ |
357 | MPTCPS_FIN_WAIT_2 = 7, /* have closed, DFIN is acked */ |
358 | MPTCPS_TIME_WAIT = 8, /* in 2*MSL quiet wait after close */ |
359 | MPTCPS_TERMINATE = 9, /* terminal state */ |
360 | } mptcp_state_t; |
361 | |
362 | typedef u_int64_t mptcp_key_t; |
363 | typedef u_int32_t mptcp_token_t; |
364 | typedef u_int8_t mptcp_addr_id; |
365 | |
366 | |
367 | /* Address ID list */ |
368 | struct mptcp_subf_auth_entry { |
369 | LIST_ENTRY(mptcp_subf_auth_entry) msae_next; |
370 | u_int32_t msae_laddr_rand; /* Local nonce */ |
371 | u_int32_t msae_raddr_rand; /* Remote nonce */ |
372 | mptcp_addr_id msae_laddr_id; /* Local addr ID */ |
373 | mptcp_addr_id msae_raddr_id; /* Remote addr ID */ |
374 | }; |
375 | |
376 | /* |
377 | * MPTCP Protocol Control Block |
378 | * |
379 | * Protected by per-MPTCP mpt_lock. |
380 | * Keep in sync with bsd/dev/dtrace/scripts/mptcp.d. |
381 | */ |
382 | struct mptcb { |
383 | struct mptses *mpt_mpte; /* back ptr to MPTCP session */ |
384 | mptcp_state_t mpt_state; /* MPTCP state */ |
385 | u_int32_t mpt_flags; /* see flags below */ |
386 | u_int32_t mpt_version; /* MPTCP proto version */ |
387 | int mpt_softerror; /* error not yet reported */ |
388 | /* |
389 | * Authentication and metadata invariants |
390 | */ |
391 | mptcp_key_t mpt_localkey; /* in network byte order */ |
392 | mptcp_key_t mpt_remotekey; /* in network byte order */ |
393 | mptcp_token_t mpt_localtoken; /* HMAC SHA1 of local key */ |
394 | mptcp_token_t mpt_remotetoken; /* HMAC SHA1 of remote key */ |
395 | |
396 | /* |
397 | * Timer vars for scenarios where subflow level acks arrive, but |
398 | * Data ACKs do not. |
399 | */ |
400 | int mpt_rxtshift; /* num of consecutive retrans */ |
401 | u_int32_t mpt_rxtstart; /* time at which rxt started */ |
402 | u_int64_t mpt_rtseq; /* seq # being tracked */ |
403 | u_int32_t mpt_timer_vals; /* timer related values */ |
404 | u_int32_t mpt_timewait; /* timewait */ |
405 | /* |
406 | * Sending side |
407 | */ |
408 | u_int64_t mpt_snduna; /* DSN of last unacked byte */ |
409 | u_int64_t mpt_sndnxt; /* DSN of next byte to send */ |
410 | u_int64_t mpt_sndmax; /* DSN of max byte sent */ |
411 | u_int64_t mpt_local_idsn; /* First byte's DSN */ |
412 | u_int32_t mpt_sndwnd; |
413 | u_int64_t mpt_sndwl1; |
414 | u_int64_t mpt_sndwl2; |
415 | /* |
416 | * Receiving side |
417 | */ |
418 | u_int64_t mpt_rcvnxt; /* Next expected DSN */ |
419 | u_int64_t mpt_remote_idsn; /* Peer's IDSN */ |
420 | u_int32_t mpt_rcvwnd; |
421 | LIST_HEAD(, mptcp_subf_auth_entry) mpt_subauth_list; /* address IDs */ |
422 | /* |
423 | * Fastclose |
424 | */ |
425 | u_int64_t mpt_dsn_at_csum_fail; /* MPFail Opt DSN */ |
426 | u_int32_t mpt_ssn_at_csum_fail; /* MPFail Subflow Seq */ |
427 | /* |
428 | * Zombie handling |
429 | */ |
430 | #define MPT_GC_TICKS (30) |
431 | #define MPT_GC_TICKS_FAST (10) |
432 | int32_t mpt_gc_ticks; /* Used for zombie deletion */ |
433 | |
434 | u_int32_t mpt_notsent_lowat; /* TCP_NOTSENT_LOWAT support */ |
435 | u_int32_t mpt_peer_version; /* Version from peer */ |
436 | |
437 | struct tsegqe_head mpt_segq; |
438 | u_int16_t mpt_reassqlen; /* length of reassembly queue */ |
439 | }; |
440 | |
441 | /* valid values for mpt_flags (see also notes on mpts_flags above) */ |
442 | #define MPTCPF_CHECKSUM 0x001 /* checksum DSS option */ |
443 | #define MPTCPF_FALLBACK_TO_TCP 0x002 /* Fallback to TCP */ |
444 | #define MPTCPF_JOIN_READY 0x004 /* Ready to start 2 or more subflows */ |
445 | #define MPTCPF_RECVD_MPFAIL 0x008 /* Received MP_FAIL option */ |
446 | #define MPTCPF_SND_64BITDSN 0x010 /* Send full 64-bit DSN */ |
447 | #define MPTCPF_SND_64BITACK 0x020 /* Send 64-bit ACK response */ |
448 | #define MPTCPF_RCVD_64BITACK 0x040 /* Received 64-bit Data ACK */ |
449 | #define MPTCPF_POST_FALLBACK_SYNC 0x080 /* Post fallback resend data */ |
450 | #define MPTCPF_FALLBACK_HEURISTIC 0x100 /* Send SYN without MP_CAPABLE due to heuristic */ |
451 | #define MPTCPF_HEURISTIC_TRAC 0x200 /* Tracked this connection in the heuristics as a failure */ |
452 | #define MPTCPF_REASS_INPROG 0x400 /* Reassembly is in progress */ |
453 | |
454 | #define MPTCPF_BITS \ |
455 | "\020\1CHECKSUM\2FALLBACK_TO_TCP\3JOIN_READY\4RECVD_MPFAIL" \ |
456 | "\5SND_64BITDSN\6SND_64BITACK\7RCVD_64BITACK\10POST_FALLBACK_SYNC" \ |
457 | "\11FALLBACK_HEURISTIC\12HEURISTIC_TRAC\13REASS_INPROG" |
458 | |
459 | /* valid values for mpt_timer_vals */ |
460 | #define MPTT_REXMT 0x01 /* Starting Retransmit Timer */ |
461 | #define MPTT_TW 0x02 /* Starting Timewait Timer */ |
462 | #define MPTT_FASTCLOSE 0x04 /* Starting Fastclose wait timer */ |
463 | |
464 | /* events for close FSM */ |
465 | #define MPCE_CLOSE 0x1 |
466 | #define MPCE_RECV_DATA_ACK 0x2 |
467 | #define MPCE_RECV_DATA_FIN 0x4 |
468 | |
469 | /* mptcb manipulation */ |
470 | static inline struct mptcb *tptomptp(struct tcpcb *tp) |
471 | { |
472 | return (tp->t_mptcb); |
473 | } |
474 | |
475 | /* |
476 | * MPTCP control block and state structures are allocated along with |
477 | * the MP protocol control block; the folllowing represents the layout. |
478 | */ |
479 | struct mpp_mtp { |
480 | struct mppcb mpp; /* Multipath PCB */ |
481 | struct mptses mpp_ses; /* MPTCP session */ |
482 | struct mptcb mtcb; /* MPTCP PCB */ |
483 | }; |
484 | |
485 | #ifdef SYSCTL_DECL |
486 | SYSCTL_DECL(_net_inet_mptcp); |
487 | #endif /* SYSCTL_DECL */ |
488 | |
489 | extern struct mppcbinfo mtcbinfo; |
490 | extern struct pr_usrreqs mptcp_usrreqs; |
491 | extern os_log_t mptcp_log_handle; |
492 | |
493 | /* Encryption algorithm related definitions */ |
494 | #define SHA1_TRUNCATED 8 |
495 | |
496 | /* MPTCP Debugging Levels */ |
497 | #define MPTCP_LOGLVL_NONE 0x0 /* No debug logging */ |
498 | #define MPTCP_LOGLVL_ERR 0x1 /* Errors in execution are logged */ |
499 | #define MPTCP_LOGLVL_LOG 0x2 /* Important logs */ |
500 | #define MPTCP_LOGLVL_VERBOSE 0x4 /* Verbose logs */ |
501 | |
502 | /* MPTCP sub-components for debug logging */ |
503 | #define MPTCP_NO_DBG 0x00 /* No areas are logged */ |
504 | #define MPTCP_STATE_DBG 0x01 /* State machine logging */ |
505 | #define MPTCP_SOCKET_DBG 0x02 /* Socket call logging */ |
506 | #define MPTCP_SENDER_DBG 0x04 /* Sender side logging */ |
507 | #define MPTCP_RECEIVER_DBG 0x08 /* Receiver logging */ |
508 | #define MPTCP_EVENTS_DBG 0x10 /* Subflow events logging */ |
509 | |
510 | /* Mask to obtain 32-bit portion of data sequence number */ |
511 | #define MPTCP_DATASEQ_LOW32_MASK (0xffffffff) |
512 | #define MPTCP_DATASEQ_LOW32(seq) (seq & MPTCP_DATASEQ_LOW32_MASK) |
513 | |
514 | /* Mask to obtain upper 32-bit portion of data sequence number */ |
515 | #define MPTCP_DATASEQ_HIGH32_MASK (0xffffffff00000000) |
516 | #define MPTCP_DATASEQ_HIGH32(seq) (seq & MPTCP_DATASEQ_HIGH32_MASK) |
517 | |
518 | /* Mask to obtain 32-bit portion of data ack */ |
519 | #define MPTCP_DATAACK_LOW32_MASK (0xffffffff) |
520 | #define MPTCP_DATAACK_LOW32(ack) (ack & MPTCP_DATAACK_LOW32_MASK) |
521 | |
522 | /* Mask to obtain upper 32-bit portion of data ack */ |
523 | #define MPTCP_DATAACK_HIGH32_MASK (0xffffffff00000000) |
524 | #define MPTCP_DATAACK_HIGH32(ack) (ack & MPTCP_DATAACK_HIGH32_MASK) |
525 | |
526 | /* |
527 | * x is the 64-bit data sequence number, y the 32-bit data seq number to be |
528 | * extended. z is y extended to the appropriate 64-bit value. |
529 | * This algorithm is based on the fact that subflow level window sizes are |
530 | * at the maximum 2**30 (in reality, they are a lot lesser). A high throughput |
531 | * application sending on a large number of subflows can in theory have very |
532 | * large MPTCP level send and receive windows. In which case, 64 bit DSNs |
533 | * must be sent in place of 32 bit DSNs on wire. For us, with 2 subflows at |
534 | * 512K each, sequence wraparound detection can be done by checking whether |
535 | * the 32-bit value obtained on wire is 2**31 bytes apart from the stored |
536 | * lower 32-bits of the Data Sequence Number. Bogus DSNs are dropped by |
537 | * comparing against rwnd. Bogus DSNs within rwnd cannot be protected against |
538 | * and are as weak as bogus TCP sequence numbers. |
539 | */ |
540 | #define MPTCP_EXTEND_DSN(x, y, z) { \ |
541 | if ((MPTCP_DATASEQ_LOW32(x) > y) && \ |
542 | ((((u_int32_t)MPTCP_DATASEQ_LOW32(x)) - (u_int32_t)y) >= \ |
543 | (u_int32_t)(1 << 31))) { \ |
544 | /* \ |
545 | * y wrapped around and x and y are 2**31 bytes apart \ |
546 | */ \ |
547 | z = MPTCP_DATASEQ_HIGH32(x) + 0x100000000; \ |
548 | z |= y; \ |
549 | } else if ((MPTCP_DATASEQ_LOW32(x) < y) && \ |
550 | (((u_int32_t)y - \ |
551 | ((u_int32_t)MPTCP_DATASEQ_LOW32(x))) >= \ |
552 | (u_int32_t)(1 << 31))) { \ |
553 | /* \ |
554 | * x wrapped around and x and y are 2**31 apart \ |
555 | */ \ |
556 | z = MPTCP_DATASEQ_HIGH32(x) - 0x100000000; \ |
557 | z |= y; \ |
558 | } else { \ |
559 | z = MPTCP_DATASEQ_HIGH32(x) | y; \ |
560 | } \ |
561 | } |
562 | |
563 | #define mptcplog(x, y, z) do { \ |
564 | if ((mptcp_dbg_area & y) && (mptcp_dbg_level & z)) \ |
565 | log x; \ |
566 | } while (0) |
567 | |
568 | extern int mptcp_enable; /* Multipath TCP */ |
569 | extern int mptcp_mpcap_retries; /* Multipath TCP retries */ |
570 | extern int mptcp_join_retries; /* Multipath TCP Join retries */ |
571 | extern int mptcp_dss_csum; /* Multipath DSS Option checksum */ |
572 | extern int mptcp_fail_thresh; /* Multipath failover thresh of retransmits */ |
573 | extern int mptcp_subflow_keeptime; /* Multipath subflow TCP_KEEPALIVE opt */ |
574 | extern uint32_t mptcp_dbg_level; /* Multipath TCP debugging level */ |
575 | extern uint32_t mptcp_dbg_area; /* Multipath TCP debugging area */ |
576 | extern int mptcp_developer_mode; /* Allow aggregation mode */ |
577 | |
578 | extern int tcp_jack_rxmt; /* Join ACK retransmission value in msecs */ |
579 | |
580 | __BEGIN_DECLS |
581 | extern void mptcp_init(struct protosw *, struct domain *); |
582 | extern int mptcp_ctloutput(struct socket *, struct sockopt *); |
583 | extern int mptcp_sescreate(struct mppcb *); |
584 | extern void mptcp_check_subflows_and_add(struct mptses *); |
585 | extern int mptcp_get_statsindex(struct mptcp_itf_stats *stats, |
586 | const struct mptsub *mpts); |
587 | extern void mptcpstats_inc_switch(struct mptses *, const struct mptsub *); |
588 | extern struct mptses *mptcp_drop(struct mptses *, struct mptcb *, int); |
589 | extern struct mptses *mptcp_close(struct mptses *, struct mptcb *); |
590 | extern int mptcp_lock(struct socket *, int, void *); |
591 | extern int mptcp_unlock(struct socket *, int, void *); |
592 | extern lck_mtx_t *mptcp_getlock(struct socket *, int); |
593 | extern void mptcp_subflow_workloop(struct mptses *); |
594 | |
595 | extern void mptcp_sched_create_subflows(struct mptses *); |
596 | |
597 | extern void mptcp_finish_usrclosed(struct mptses *mpte); |
598 | extern struct mptopt *mptcp_sopt_alloc(int); |
599 | extern const char *mptcp_sopt2str(int, int); |
600 | extern void mptcp_sopt_free(struct mptopt *); |
601 | extern void mptcp_sopt_insert(struct mptses *, struct mptopt *); |
602 | extern void mptcp_sopt_remove(struct mptses *, struct mptopt *); |
603 | extern struct mptopt *mptcp_sopt_find(struct mptses *, struct sockopt *); |
604 | |
605 | extern int mptcp_subflow_add(struct mptses *, struct sockaddr *, |
606 | struct sockaddr *, uint32_t, sae_connid_t *); |
607 | extern void mptcpstats_update(struct mptcp_itf_stats *stats, struct mptsub *mpts); |
608 | extern void mptcp_subflow_del(struct mptses *, struct mptsub *); |
609 | |
610 | #define MPTCP_SUBOUT_PROBING 0x01 |
611 | extern int mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts, int flags); |
612 | extern void mptcp_clean_reinjectq(struct mptses *mpte); |
613 | extern void mptcp_subflow_shutdown(struct mptses *, struct mptsub *); |
614 | extern void mptcp_subflow_disconnect(struct mptses *, struct mptsub *); |
615 | extern int mptcp_subflow_sosetopt(struct mptses *, struct mptsub *, |
616 | struct mptopt *); |
617 | extern int mptcp_subflow_sogetopt(struct mptses *, struct socket *, |
618 | struct mptopt *); |
619 | |
620 | extern void mptcp_input(struct mptses *, struct mbuf *); |
621 | extern boolean_t mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject); |
622 | extern int mptcp_output(struct mptses *); |
623 | extern void mptcp_close_fsm(struct mptcb *, uint32_t); |
624 | |
625 | extern void mptcp_hmac_sha1(mptcp_key_t, mptcp_key_t, u_int32_t, u_int32_t, |
626 | u_char*); |
627 | extern void mptcp_get_hmac(mptcp_addr_id, struct mptcb *, u_char *); |
628 | extern void mptcp_get_rands(mptcp_addr_id, struct mptcb *, u_int32_t *, |
629 | u_int32_t *); |
630 | extern void mptcp_set_raddr_rand(mptcp_addr_id, struct mptcb *, mptcp_addr_id, |
631 | u_int32_t); |
632 | extern int mptcp_init_remote_parms(struct mptcb *); |
633 | extern boolean_t mptcp_ok_to_keepalive(struct mptcb *); |
634 | extern void mptcp_insert_dsn(struct mppcb *, struct mbuf *); |
635 | extern void mptcp_output_getm_dsnmap32(struct socket *so, int off, |
636 | uint32_t *dsn, uint32_t *relseq, |
637 | uint16_t *data_len, uint16_t *dss_csum); |
638 | extern void mptcp_output_getm_dsnmap64(struct socket *so, int off, |
639 | uint64_t *dsn, uint32_t *relseq, |
640 | uint16_t *data_len, uint16_t *dss_csum); |
641 | extern void mptcp_act_on_txfail(struct socket *); |
642 | extern struct mptsub *mptcp_get_subflow(struct mptses *, struct mptsub *, |
643 | struct mptsub **); |
644 | extern int mptcp_get_map_for_dsn(struct socket *, u_int64_t, u_int32_t *); |
645 | extern int32_t mptcp_adj_sendlen(struct socket *so, int32_t off); |
646 | extern void mptcp_sbrcv_grow(struct mptcb *mp_tp); |
647 | extern int32_t mptcp_sbspace(struct mptcb *); |
648 | extern void mptcp_notify_mpready(struct socket *); |
649 | extern void mptcp_notify_mpfail(struct socket *); |
650 | extern void mptcp_notify_close(struct socket *); |
651 | extern boolean_t mptcp_no_rto_spike(struct socket*); |
652 | extern int mptcp_set_notsent_lowat(struct mptses *mpte, int optval); |
653 | extern u_int32_t mptcp_get_notsent_lowat(struct mptses *mpte); |
654 | extern int mptcp_notsent_lowat_check(struct socket *so); |
655 | extern void mptcp_ask_symptoms(struct mptses *mpte); |
656 | extern void mptcp_control_register(void); |
657 | extern int mptcp_is_wifi_unusable(struct mptses *mpte); |
658 | extern boolean_t mptcp_subflow_is_bad(struct mptses *mpte, struct mptsub *mpts); |
659 | extern void mptcp_ask_for_nat64(struct ifnet *ifp); |
660 | extern void mptcp_session_necp_cb(void *, int, uint32_t, uint32_t, bool *); |
661 | extern void mptcp_set_restrictions(struct socket *mp_so); |
662 | extern int mptcp_freeq(struct mptcb *); |
663 | extern void mptcp_set_cellicon(struct mptses *mpte); |
664 | extern void mptcp_unset_cellicon(void); |
665 | extern void mptcp_reset_rexmit_state(struct tcpcb *tp); |
666 | extern void mptcp_reset_keepalive(struct tcpcb *tp); |
667 | extern int mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, |
668 | uint32_t sseq, uint16_t dlen, uint16_t csum, |
669 | uint16_t dfin); |
670 | __END_DECLS |
671 | |
672 | #endif /* BSD_KERNEL_PRIVATE */ |
673 | #ifdef PRIVATE |
674 | |
675 | typedef struct mptcp_flow { |
676 | size_t flow_len; |
677 | size_t flow_tcpci_offset; |
678 | uint32_t flow_flags; |
679 | sae_connid_t flow_cid; |
680 | struct sockaddr_storage flow_src; |
681 | struct sockaddr_storage flow_dst; |
682 | uint32_t flow_relseq; /* last subflow rel seq# */ |
683 | int32_t flow_soerror; /* subflow level error */ |
684 | uint32_t flow_probecnt; /* number of probes sent */ |
685 | conninfo_tcp_t flow_ci; /* must be the last field */ |
686 | } mptcp_flow_t; |
687 | |
688 | typedef struct conninfo_mptcp { |
689 | size_t mptcpci_len; |
690 | size_t mptcpci_flow_offset; /* offsetof first flow */ |
691 | size_t mptcpci_nflows; /* number of subflows */ |
692 | uint32_t mptcpci_state; /* MPTCP level state */ |
693 | uint32_t mptcpci_mpte_flags; /* Session flags */ |
694 | uint32_t mptcpci_flags; /* MPTCB flags */ |
695 | uint32_t mptcpci_ltoken; /* local token */ |
696 | uint32_t mptcpci_rtoken; /* remote token */ |
697 | uint32_t mptcpci_notsent_lowat; /* NOTSENT_LOWAT */ |
698 | |
699 | /* Send side */ |
700 | uint64_t mptcpci_snduna; /* DSN of last unacked byte */ |
701 | uint64_t mptcpci_sndnxt; /* DSN of next byte to send */ |
702 | uint64_t mptcpci_sndmax; /* DSN of max byte sent */ |
703 | uint64_t mptcpci_lidsn; /* Local IDSN */ |
704 | uint32_t mptcpci_sndwnd; /* Send window snapshot */ |
705 | |
706 | /* Receive side */ |
707 | uint64_t mptcpci_rcvnxt; /* Next expected DSN */ |
708 | uint64_t mptcpci_rcvatmark; /* Session level rcvnxt */ |
709 | uint64_t mptcpci_ridsn; /* Peer's IDSN */ |
710 | uint32_t mptcpci_rcvwnd; /* Receive window */ |
711 | |
712 | uint8_t mptcpci_mpte_addrid; /* last addr id */ |
713 | |
714 | mptcp_flow_t mptcpci_flows[1]; |
715 | } conninfo_mptcp_t; |
716 | |
717 | /* Use SymptomsD notifications of wifi and cell status in subflow selection */ |
718 | #define MPTCP_KERN_CTL_NAME "com.apple.network.advisory" |
719 | typedef struct symptoms_advisory { |
720 | union { |
721 | uint32_t sa_nwk_status_int; |
722 | struct { |
723 | union { |
724 | #define 0x0000 |
725 | #define SYMPTOMS_ADVISORY_USEAPP 0xFFFF /* Very ugly workaround to avoid breaking backwards compatibility - ToDo: Fix it in +1 */ |
726 | uint16_t sa_nwk_status; |
727 | struct { |
728 | #define SYMPTOMS_ADVISORY_WIFI_BAD 0x01 |
729 | #define SYMPTOMS_ADVISORY_WIFI_OK 0x02 |
730 | uint8_t sa_wifi_status; |
731 | #define SYMPTOMS_ADVISORY_CELL_BAD 0x01 |
732 | #define SYMPTOMS_ADVISORY_CELL_OK 0x02 |
733 | uint8_t sa_cell_status; |
734 | }; |
735 | }; |
736 | uint16_t sa_unused; |
737 | }; |
738 | }; |
739 | } symptoms_advisory_t; |
740 | |
741 | struct mptcp_symptoms_ask_uuid { |
742 | uint32_t cmd; |
743 | #define MPTCP_SYMPTOMS_ASK_UUID 1 |
744 | uuid_t uuid; |
745 | uint32_t priority; |
746 | #define MPTCP_SYMPTOMS_UNKNOWN 0 |
747 | #define MPTCP_SYMPTOMS_BACKGROUND 1 |
748 | #define MPTCP_SYMPTOMS_FOREGROUND 2 |
749 | }; |
750 | |
751 | struct kev_mptcp_data { |
752 | int value; |
753 | }; |
754 | |
755 | #endif /* PRIVATE */ |
756 | #endif /* _NETINET_MPTCP_VAR_H_ */ |
757 | |