1/*
2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <netinet/in_systm.h>
31#include <sys/socket.h>
32#include <sys/socketvar.h>
33#include <sys/syslog.h>
34#include <net/route.h>
35#include <netinet/in.h>
36#include <net/if.h>
37
38#include <netinet/ip.h>
39#include <netinet/ip_var.h>
40#include <netinet/in_var.h>
41#include <netinet/tcp.h>
42#include <netinet/tcp_cache.h>
43#include <netinet/tcp_seq.h>
44#include <netinet/tcpip.h>
45#include <netinet/tcp_fsm.h>
46#include <netinet/mptcp_var.h>
47#include <netinet/mptcp.h>
48#include <netinet/mptcp_opt.h>
49#include <netinet/mptcp_seq.h>
50
51#include <libkern/crypto/sha1.h>
52#include <netinet/mptcp_timer.h>
53
54#include <mach/sdt.h>
55
56static int mptcp_validate_join_hmac(struct tcpcb *, u_char*, int);
57static int mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen);
58static void mptcp_send_remaddr_opt(struct tcpcb *, struct mptcp_remaddr_opt *);
59
60/*
61 * MPTCP Options Output Processing
62 */
63
64static unsigned
65mptcp_setup_first_subflow_syn_opts(struct socket *so, u_char *opt, unsigned optlen)
66{
67 struct mptcp_mpcapable_opt_common mptcp_opt;
68 struct tcpcb *tp = sototcpcb(so);
69 struct mptcb *mp_tp = tptomptp(tp);
70
71 mpte_lock_assert_held(mp_tp->mpt_mpte);
72
73 /*
74 * Avoid retransmitting the MP_CAPABLE option.
75 */
76 if (tp->t_rxtshift > mptcp_mpcap_retries) {
77 if (!(mp_tp->mpt_flags & (MPTCPF_FALLBACK_HEURISTIC | MPTCPF_HEURISTIC_TRAC))) {
78 mp_tp->mpt_flags |= MPTCPF_HEURISTIC_TRAC;
79 tcp_heuristic_mptcp_loss(tp);
80 }
81 return (optlen);
82 }
83
84 if (!tcp_heuristic_do_mptcp(tp)) {
85 mp_tp->mpt_flags |= MPTCPF_FALLBACK_HEURISTIC;
86 return (optlen);
87 }
88
89 bzero(&mptcp_opt, sizeof (struct mptcp_mpcapable_opt_common));
90
91 mptcp_opt.mmco_kind = TCPOPT_MULTIPATH;
92 mptcp_opt.mmco_len =
93 sizeof (struct mptcp_mpcapable_opt_common) +
94 sizeof (mptcp_key_t);
95 mptcp_opt.mmco_subtype = MPO_CAPABLE;
96 mptcp_opt.mmco_version = mp_tp->mpt_version;
97 mptcp_opt.mmco_flags |= MPCAP_PROPOSAL_SBIT;
98 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
99 mptcp_opt.mmco_flags |= MPCAP_CHECKSUM_CBIT;
100 memcpy(opt + optlen, &mptcp_opt, sizeof (struct mptcp_mpcapable_opt_common));
101 optlen += sizeof (struct mptcp_mpcapable_opt_common);
102 memcpy(opt + optlen, &mp_tp->mpt_localkey, sizeof (mptcp_key_t));
103 optlen += sizeof (mptcp_key_t);
104
105 return (optlen);
106}
107
108static unsigned
109mptcp_setup_join_subflow_syn_opts(struct socket *so, u_char *opt, unsigned optlen)
110{
111 struct mptcp_mpjoin_opt_req mpjoin_req;
112 struct inpcb *inp = sotoinpcb(so);
113 struct tcpcb *tp = NULL;
114 struct mptsub *mpts;
115
116 if (!inp)
117 return (optlen);
118
119 tp = intotcpcb(inp);
120 if (!tp)
121 return (optlen);
122
123 mpts = tp->t_mpsub;
124
125 VERIFY(tptomptp(tp));
126 mpte_lock_assert_held(tptomptp(tp)->mpt_mpte);
127
128 bzero(&mpjoin_req, sizeof (mpjoin_req));
129 mpjoin_req.mmjo_kind = TCPOPT_MULTIPATH;
130 mpjoin_req.mmjo_len = sizeof (mpjoin_req);
131 mpjoin_req.mmjo_subtype_bkp = MPO_JOIN << 4;
132
133 if (tp->t_mpflags & TMPF_BACKUP_PATH) {
134 mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
135 } else if (inp->inp_boundifp && IFNET_IS_CELLULAR(inp->inp_boundifp) &&
136 mpts->mpts_mpte->mpte_svctype != MPTCP_SVCTYPE_AGGREGATE) {
137 mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
138 tp->t_mpflags |= TMPF_BACKUP_PATH;
139 } else {
140 mpts->mpts_flags |= MPTSF_PREFERRED;
141 }
142
143 mpjoin_req.mmjo_addr_id = tp->t_local_aid;
144 mpjoin_req.mmjo_peer_token = tptomptp(tp)->mpt_remotetoken;
145 if (mpjoin_req.mmjo_peer_token == 0) {
146 mptcplog((LOG_DEBUG, "%s: peer token 0", __func__),
147 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
148 }
149 mptcp_get_rands(tp->t_local_aid, tptomptp(tp),
150 &mpjoin_req.mmjo_rand, NULL);
151 memcpy(opt + optlen, &mpjoin_req, mpjoin_req.mmjo_len);
152 optlen += mpjoin_req.mmjo_len;
153
154 return (optlen);
155}
156
157unsigned
158mptcp_setup_join_ack_opts(struct tcpcb *tp, u_char *opt, unsigned optlen)
159{
160 unsigned new_optlen;
161 struct mptcp_mpjoin_opt_rsp2 join_rsp2;
162
163 if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpjoin_opt_rsp2)) {
164 printf("%s: no space left %d \n", __func__, optlen);
165 return (optlen);
166 }
167
168 bzero(&join_rsp2, sizeof (struct mptcp_mpjoin_opt_rsp2));
169 join_rsp2.mmjo_kind = TCPOPT_MULTIPATH;
170 join_rsp2.mmjo_len = sizeof (struct mptcp_mpjoin_opt_rsp2);
171 join_rsp2.mmjo_subtype = MPO_JOIN;
172 mptcp_get_hmac(tp->t_local_aid, tptomptp(tp),
173 (u_char*)&join_rsp2.mmjo_mac);
174 memcpy(opt + optlen, &join_rsp2, join_rsp2.mmjo_len);
175 new_optlen = optlen + join_rsp2.mmjo_len;
176 return (new_optlen);
177}
178
179unsigned
180mptcp_setup_syn_opts(struct socket *so, u_char *opt, unsigned optlen)
181{
182 unsigned new_optlen;
183
184 if (!(so->so_flags & SOF_MP_SEC_SUBFLOW))
185 new_optlen = mptcp_setup_first_subflow_syn_opts(so, opt, optlen);
186 else
187 new_optlen = mptcp_setup_join_subflow_syn_opts(so, opt, optlen);
188
189 return (new_optlen);
190}
191
192static int
193mptcp_send_mpfail(struct tcpcb *tp, u_char *opt, unsigned int optlen)
194{
195#pragma unused(tp, opt, optlen)
196
197 struct mptcb *mp_tp = NULL;
198 struct mptcp_mpfail_opt fail_opt;
199 uint64_t dsn;
200 int len = sizeof (struct mptcp_mpfail_opt);
201
202 mp_tp = tptomptp(tp);
203 if (mp_tp == NULL) {
204 tp->t_mpflags &= ~TMPF_SND_MPFAIL;
205 return (optlen);
206 }
207
208 mpte_lock_assert_held(mp_tp->mpt_mpte);
209
210 /* if option space low give up */
211 if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpfail_opt)) {
212 tp->t_mpflags &= ~TMPF_SND_MPFAIL;
213 return (optlen);
214 }
215
216 dsn = mp_tp->mpt_rcvnxt;
217
218 bzero(&fail_opt, sizeof (fail_opt));
219 fail_opt.mfail_kind = TCPOPT_MULTIPATH;
220 fail_opt.mfail_len = len;
221 fail_opt.mfail_subtype = MPO_FAIL;
222 fail_opt.mfail_dsn = mptcp_hton64(dsn);
223 memcpy(opt + optlen, &fail_opt, len);
224 optlen += len;
225 tp->t_mpflags &= ~TMPF_SND_MPFAIL;
226 mptcplog((LOG_DEBUG, "%s: %d \n", __func__,
227 tp->t_local_aid), (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG),
228 MPTCP_LOGLVL_LOG);
229 return (optlen);
230}
231
232static int
233mptcp_send_infinite_mapping(struct tcpcb *tp, u_char *opt, unsigned int optlen)
234{
235 struct mptcp_dsn_opt infin_opt;
236 struct mptcb *mp_tp = NULL;
237 size_t len = sizeof (struct mptcp_dsn_opt);
238 struct socket *so = tp->t_inpcb->inp_socket;
239 int csum_len = 0;
240
241 if (!so)
242 return (optlen);
243
244 mp_tp = tptomptp(tp);
245 if (mp_tp == NULL)
246 return (optlen);
247
248 mpte_lock_assert_held(mp_tp->mpt_mpte);
249
250 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
251 csum_len = 2;
252
253 /* try later */
254 if ((MAX_TCPOPTLEN - optlen) < (len + csum_len))
255 return (optlen);
256
257 bzero(&infin_opt, sizeof (infin_opt));
258 infin_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
259 infin_opt.mdss_copt.mdss_len = len + csum_len;
260 infin_opt.mdss_copt.mdss_subtype = MPO_DSS;
261 infin_opt.mdss_copt.mdss_flags |= MDSS_M;
262 if (mp_tp->mpt_flags & MPTCPF_RECVD_MPFAIL) {
263 infin_opt.mdss_dsn = (u_int32_t)
264 MPTCP_DATASEQ_LOW32(mp_tp->mpt_dsn_at_csum_fail);
265 infin_opt.mdss_subflow_seqn = mp_tp->mpt_ssn_at_csum_fail;
266 } else {
267 /*
268 * If MPTCP fallback happens, but TFO succeeds, the data on the
269 * SYN does not belong to the MPTCP data sequence space.
270 */
271 if ((tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED) &&
272 ((mp_tp->mpt_local_idsn + 1) == mp_tp->mpt_snduna)) {
273 infin_opt.mdss_subflow_seqn = 1;
274
275 mptcplog((LOG_DEBUG, "%s: idsn %llu snduna %llu \n",
276 __func__, mp_tp->mpt_local_idsn,
277 mp_tp->mpt_snduna),
278 (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG),
279 MPTCP_LOGLVL_LOG);
280 } else {
281 infin_opt.mdss_subflow_seqn = tp->snd_una - tp->t_mpsub->mpts_iss;
282 }
283 infin_opt.mdss_dsn = (u_int32_t)
284 MPTCP_DATASEQ_LOW32(mp_tp->mpt_snduna);
285 }
286
287 if ((infin_opt.mdss_dsn == 0) || (infin_opt.mdss_subflow_seqn == 0)) {
288 return (optlen);
289 }
290 infin_opt.mdss_dsn = htonl(infin_opt.mdss_dsn);
291 infin_opt.mdss_subflow_seqn = htonl(infin_opt.mdss_subflow_seqn);
292 infin_opt.mdss_data_len = 0;
293
294 memcpy(opt + optlen, &infin_opt, len);
295 optlen += len;
296 if (csum_len != 0) {
297 /* The checksum field is set to 0 for infinite mapping */
298 uint16_t csum = 0;
299 memcpy(opt + optlen, &csum, csum_len);
300 optlen += csum_len;
301 }
302
303 mptcplog((LOG_DEBUG, "%s: dsn = %x, seq = %x len = %x\n", __func__,
304 ntohl(infin_opt.mdss_dsn),
305 ntohl(infin_opt.mdss_subflow_seqn),
306 ntohs(infin_opt.mdss_data_len)),
307 (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG),
308 MPTCP_LOGLVL_LOG);
309
310 tp->t_mpflags |= TMPF_INFIN_SENT;
311 tcpstat.tcps_estab_fallback++;
312 return (optlen);
313}
314
315
316static int
317mptcp_ok_to_fin(struct tcpcb *tp, u_int64_t dsn, u_int32_t datalen)
318{
319 struct mptcb *mp_tp = tptomptp(tp);
320
321 mpte_lock_assert_held(mp_tp->mpt_mpte);
322
323 dsn = (mp_tp->mpt_sndmax & MPTCP_DATASEQ_LOW32_MASK) | dsn;
324 if ((dsn + datalen) == mp_tp->mpt_sndmax)
325 return (1);
326
327 return (0);
328}
329
330unsigned int
331mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt,
332 unsigned int optlen, int flags, int len,
333 boolean_t *p_mptcp_acknow)
334{
335 struct inpcb *inp = (struct inpcb *)tp->t_inpcb;
336 struct socket *so = inp->inp_socket;
337 struct mptcb *mp_tp = tptomptp(tp);
338 boolean_t do_csum = FALSE;
339 boolean_t send_64bit_dsn = FALSE;
340 boolean_t send_64bit_ack = FALSE;
341 u_int32_t old_mpt_flags = tp->t_mpflags & TMPF_MPTCP_SIGNALS;
342
343 if (mptcp_enable == 0 || mp_tp == NULL || tp->t_state == TCPS_CLOSED) {
344 /* do nothing */
345 goto ret_optlen;
346 }
347
348 mpte_lock_assert_held(mp_tp->mpt_mpte);
349
350 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
351 do_csum = TRUE;
352 }
353
354 /* tcp_output handles the SYN path separately */
355 if (flags & TH_SYN) {
356 goto ret_optlen;
357 }
358
359 if ((MAX_TCPOPTLEN - optlen) <
360 sizeof (struct mptcp_mpcapable_opt_common)) {
361 mptcplog((LOG_ERR, "%s: no space left %d flags %x tp->t_mpflags %x len %d\n",
362 __func__, optlen, flags, tp->t_mpflags, len),
363 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
364 goto ret_optlen;
365 }
366
367 if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
368 if (tp->t_mpflags & TMPF_SND_MPFAIL)
369 optlen = mptcp_send_mpfail(tp, opt, optlen);
370 else if (!(tp->t_mpflags & TMPF_INFIN_SENT))
371 optlen = mptcp_send_infinite_mapping(tp, opt, optlen);
372 goto ret_optlen;
373 }
374
375 if (tp->t_mpflags & TMPF_SND_KEYS) {
376 struct mptcp_mpcapable_opt_rsp1 mptcp_opt;
377 if ((MAX_TCPOPTLEN - optlen) <
378 sizeof (struct mptcp_mpcapable_opt_rsp1))
379 goto ret_optlen;
380 bzero(&mptcp_opt, sizeof (struct mptcp_mpcapable_opt_rsp1));
381 mptcp_opt.mmc_common.mmco_kind = TCPOPT_MULTIPATH;
382 mptcp_opt.mmc_common.mmco_len =
383 sizeof (struct mptcp_mpcapable_opt_rsp1);
384 mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE;
385 mptcp_opt.mmc_common.mmco_version = mp_tp->mpt_version;
386 /* HMAC-SHA1 is the proposal */
387 mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT;
388 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
389 mptcp_opt.mmc_common.mmco_flags |= MPCAP_CHECKSUM_CBIT;
390 mptcp_opt.mmc_localkey = mp_tp->mpt_localkey;
391 mptcp_opt.mmc_remotekey = mp_tp->mpt_remotekey;
392 memcpy(opt + optlen, &mptcp_opt, mptcp_opt.mmc_common.mmco_len);
393 optlen += mptcp_opt.mmc_common.mmco_len;
394 tp->t_mpflags &= ~TMPF_SND_KEYS;
395
396 if (!tp->t_mpuna) {
397 tp->t_mpuna = tp->snd_una;
398 } else {
399 /* its a retransmission of the MP_CAPABLE ACK */
400 }
401 goto ret_optlen;
402 }
403
404 if (tp->t_mpflags & TMPF_SND_JACK) {
405 /* Do the ACK part */
406 optlen = mptcp_setup_join_ack_opts(tp, opt, optlen);
407 if (!tp->t_mpuna) {
408 tp->t_mpuna = tp->snd_una;
409 }
410 /* Start a timer to retransmit the ACK */
411 tp->t_timer[TCPT_JACK_RXMT] =
412 OFFSET_FROM_START(tp, tcp_jack_rxmt);
413
414 tp->t_mpflags &= ~TMPF_SND_JACK;
415 goto ret_optlen;
416 }
417
418 if (!(tp->t_mpflags & TMPF_MPTCP_TRUE))
419 goto ret_optlen;
420 /*
421 * From here on, all options are sent only if MPTCP_TRUE
422 * or when data is sent early on as in Fast Join
423 */
424
425 if ((tp->t_mpflags & TMPF_MPTCP_TRUE) &&
426 (tp->t_mpflags & TMPF_SND_REM_ADDR)) {
427 int rem_opt_len = sizeof (struct mptcp_remaddr_opt);
428 if ((optlen + rem_opt_len) <= MAX_TCPOPTLEN) {
429 mptcp_send_remaddr_opt(tp,
430 (struct mptcp_remaddr_opt *)(opt + optlen));
431 optlen += rem_opt_len;
432 } else {
433 tp->t_mpflags &= ~TMPF_SND_REM_ADDR;
434 }
435 }
436
437 if (tp->t_mpflags & TMPF_SND_MPPRIO) {
438 optlen = mptcp_snd_mpprio(tp, opt, optlen);
439 }
440
441 if (mp_tp->mpt_flags & MPTCPF_SND_64BITDSN) {
442 send_64bit_dsn = TRUE;
443 }
444 if (mp_tp->mpt_flags & MPTCPF_SND_64BITACK)
445 send_64bit_ack = TRUE;
446
447#define CHECK_OPTLEN { \
448 if ((MAX_TCPOPTLEN - optlen) < dssoptlen) { \
449 mptcplog((LOG_ERR, "%s: dssoptlen %d optlen %d \n", __func__, \
450 dssoptlen, optlen), \
451 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); \
452 goto ret_optlen; \
453 } \
454}
455
456#define DO_FIN(dsn_opt) { \
457 int sndfin = 0; \
458 sndfin = mptcp_ok_to_fin(tp, dsn_opt.mdss_dsn, len); \
459 if (sndfin) { \
460 dsn_opt.mdss_copt.mdss_flags |= MDSS_F; \
461 dsn_opt.mdss_data_len += 1; \
462 if (do_csum) \
463 dss_csum = in_addword(dss_csum, 1); \
464 } \
465}
466
467#define CHECK_DATALEN { \
468 /* MPTCP socket does not support IP options */ \
469 if ((len + optlen + dssoptlen) > tp->t_maxopd) { \
470 mptcplog((LOG_ERR, "%s: nosp %d len %d opt %d %d %d\n", \
471 __func__, len, dssoptlen, optlen, \
472 tp->t_maxseg, tp->t_maxopd), \
473 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); \
474 /* remove option length from payload len */ \
475 len = tp->t_maxopd - optlen - dssoptlen; \
476 } \
477}
478
479 if ((tp->t_mpflags & TMPF_SEND_DSN) &&
480 (send_64bit_dsn)) {
481 /*
482 * If there was the need to send 64-bit Data ACK along
483 * with 64-bit DSN, then 26 or 28 bytes would be used.
484 * With timestamps and NOOP padding that will cause
485 * overflow. Hence, in the rare event that both 64-bit
486 * DSN and 64-bit ACK have to be sent, delay the send of
487 * 64-bit ACK until our 64-bit DSN is acked with a 64-bit ack.
488 * XXX If this delay causes issue, remove the 2-byte padding.
489 */
490 struct mptcp_dss64_ack32_opt dsn_ack_opt;
491 unsigned int dssoptlen = sizeof (dsn_ack_opt);
492 uint16_t dss_csum;
493
494 if (do_csum) {
495 dssoptlen += 2;
496 }
497
498 CHECK_OPTLEN;
499
500 bzero(&dsn_ack_opt, sizeof (dsn_ack_opt));
501 dsn_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
502 dsn_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
503 dsn_ack_opt.mdss_copt.mdss_len = dssoptlen;
504 dsn_ack_opt.mdss_copt.mdss_flags |=
505 MDSS_M | MDSS_m | MDSS_A;
506
507 CHECK_DATALEN;
508
509 mptcp_output_getm_dsnmap64(so, off,
510 &dsn_ack_opt.mdss_dsn,
511 &dsn_ack_opt.mdss_subflow_seqn,
512 &dsn_ack_opt.mdss_data_len,
513 &dss_csum);
514
515 if ((dsn_ack_opt.mdss_data_len == 0) ||
516 (dsn_ack_opt.mdss_dsn == 0)) {
517 goto ret_optlen;
518 }
519
520 if (tp->t_mpflags & TMPF_SEND_DFIN) {
521 DO_FIN(dsn_ack_opt);
522 }
523
524 dsn_ack_opt.mdss_ack =
525 htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
526
527 dsn_ack_opt.mdss_dsn = mptcp_hton64(dsn_ack_opt.mdss_dsn);
528 dsn_ack_opt.mdss_subflow_seqn = htonl(
529 dsn_ack_opt.mdss_subflow_seqn);
530 dsn_ack_opt.mdss_data_len = htons(
531 dsn_ack_opt.mdss_data_len);
532
533 memcpy(opt + optlen, &dsn_ack_opt, sizeof (dsn_ack_opt));
534 if (do_csum)
535 *((uint16_t *)(void *)(opt + optlen + sizeof (dsn_ack_opt))) = dss_csum;
536
537 optlen += dssoptlen;
538 mptcplog((LOG_DEBUG,"%s: long DSS = %llx ACK = %llx \n", __func__,
539 mptcp_ntoh64(dsn_ack_opt.mdss_dsn),
540 mptcp_ntoh64(dsn_ack_opt.mdss_ack)),
541 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
542
543 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
544 goto ret_optlen;
545 }
546
547 if ((tp->t_mpflags & TMPF_SEND_DSN) &&
548 (!send_64bit_dsn) &&
549 !(tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
550 struct mptcp_dsn_opt dsn_opt;
551 unsigned int dssoptlen = sizeof (struct mptcp_dsn_opt);
552 uint16_t dss_csum;
553
554 if (do_csum) {
555 dssoptlen += 2;
556 }
557
558 CHECK_OPTLEN;
559
560 bzero(&dsn_opt, sizeof (dsn_opt));
561 dsn_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
562 dsn_opt.mdss_copt.mdss_subtype = MPO_DSS;
563 dsn_opt.mdss_copt.mdss_len = dssoptlen;
564 dsn_opt.mdss_copt.mdss_flags |= MDSS_M;
565
566 CHECK_DATALEN;
567
568 mptcp_output_getm_dsnmap32(so, off, &dsn_opt.mdss_dsn,
569 &dsn_opt.mdss_subflow_seqn,
570 &dsn_opt.mdss_data_len,
571 &dss_csum);
572
573 if ((dsn_opt.mdss_data_len == 0) ||
574 (dsn_opt.mdss_dsn == 0)) {
575 goto ret_optlen;
576 }
577
578 if (tp->t_mpflags & TMPF_SEND_DFIN) {
579 DO_FIN(dsn_opt);
580 }
581
582 dsn_opt.mdss_dsn = htonl(dsn_opt.mdss_dsn);
583 dsn_opt.mdss_subflow_seqn = htonl(dsn_opt.mdss_subflow_seqn);
584 dsn_opt.mdss_data_len = htons(dsn_opt.mdss_data_len);
585 memcpy(opt + optlen, &dsn_opt, sizeof (dsn_opt));
586 if (do_csum)
587 *((uint16_t *)(void *)(opt + optlen + sizeof (dsn_opt))) = dss_csum;
588
589 optlen += dssoptlen;
590 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
591 goto ret_optlen;
592 }
593
594 /* 32-bit Data ACK option */
595 if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) &&
596 (!send_64bit_ack) &&
597 !(tp->t_mpflags & TMPF_SEND_DSN) &&
598 !(tp->t_mpflags & TMPF_SEND_DFIN)) {
599
600 struct mptcp_data_ack_opt dack_opt;
601 unsigned int dssoptlen = 0;
602do_ack32_only:
603 dssoptlen = sizeof (dack_opt);
604
605 CHECK_OPTLEN;
606
607 bzero(&dack_opt, dssoptlen);
608 dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
609 dack_opt.mdss_copt.mdss_len = dssoptlen;
610 dack_opt.mdss_copt.mdss_subtype = MPO_DSS;
611 dack_opt.mdss_copt.mdss_flags |= MDSS_A;
612 dack_opt.mdss_ack =
613 htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
614 memcpy(opt + optlen, &dack_opt, dssoptlen);
615 optlen += dssoptlen;
616 VERIFY(optlen <= MAX_TCPOPTLEN);
617 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
618 goto ret_optlen;
619 }
620
621 /* 64-bit Data ACK option */
622 if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) &&
623 (send_64bit_ack) &&
624 !(tp->t_mpflags & TMPF_SEND_DSN) &&
625 !(tp->t_mpflags & TMPF_SEND_DFIN)) {
626 struct mptcp_data_ack64_opt dack_opt;
627 unsigned int dssoptlen = 0;
628do_ack64_only:
629 dssoptlen = sizeof (dack_opt);
630
631 CHECK_OPTLEN;
632
633 bzero(&dack_opt, dssoptlen);
634 dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
635 dack_opt.mdss_copt.mdss_len = dssoptlen;
636 dack_opt.mdss_copt.mdss_subtype = MPO_DSS;
637 dack_opt.mdss_copt.mdss_flags |= (MDSS_A | MDSS_a);
638 dack_opt.mdss_ack = mptcp_hton64(mp_tp->mpt_rcvnxt);
639 /*
640 * The other end should retransmit 64-bit DSN until it
641 * receives a 64-bit ACK.
642 */
643 mp_tp->mpt_flags &= ~MPTCPF_SND_64BITACK;
644 memcpy(opt + optlen, &dack_opt, dssoptlen);
645 optlen += dssoptlen;
646 VERIFY(optlen <= MAX_TCPOPTLEN);
647 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
648 goto ret_optlen;
649 }
650
651 /* 32-bit DSS+Data ACK option */
652 if ((tp->t_mpflags & TMPF_SEND_DSN) &&
653 (!send_64bit_dsn) &&
654 (!send_64bit_ack) &&
655 (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
656 struct mptcp_dss_ack_opt dss_ack_opt;
657 unsigned int dssoptlen = sizeof (dss_ack_opt);
658 uint16_t dss_csum;
659
660 if (do_csum)
661 dssoptlen += 2;
662
663 CHECK_OPTLEN;
664
665 bzero(&dss_ack_opt, sizeof (dss_ack_opt));
666 dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
667 dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
668 dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
669 dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M;
670 dss_ack_opt.mdss_ack =
671 htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
672
673 CHECK_DATALEN;
674
675 mptcp_output_getm_dsnmap32(so, off, &dss_ack_opt.mdss_dsn,
676 &dss_ack_opt.mdss_subflow_seqn,
677 &dss_ack_opt.mdss_data_len,
678 &dss_csum);
679
680 if ((dss_ack_opt.mdss_data_len == 0) ||
681 (dss_ack_opt.mdss_dsn == 0)) {
682 goto do_ack32_only;
683 }
684
685 if (tp->t_mpflags & TMPF_SEND_DFIN) {
686 DO_FIN(dss_ack_opt);
687 }
688
689 dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn);
690 dss_ack_opt.mdss_subflow_seqn =
691 htonl(dss_ack_opt.mdss_subflow_seqn);
692 dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
693 memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt));
694 if (do_csum)
695 *((uint16_t *)(void *)(opt + optlen + sizeof (dss_ack_opt))) = dss_csum;
696
697 optlen += dssoptlen;
698
699 if (optlen > MAX_TCPOPTLEN)
700 panic("optlen too large");
701 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
702 goto ret_optlen;
703 }
704
705 /* 32-bit DSS + 64-bit DACK option */
706 if ((tp->t_mpflags & TMPF_SEND_DSN) &&
707 (!send_64bit_dsn) &&
708 (send_64bit_ack) &&
709 (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
710 struct mptcp_dss32_ack64_opt dss_ack_opt;
711 unsigned int dssoptlen = sizeof (dss_ack_opt);
712 uint16_t dss_csum;
713
714 if (do_csum)
715 dssoptlen += 2;
716
717 CHECK_OPTLEN;
718
719 bzero(&dss_ack_opt, sizeof (dss_ack_opt));
720 dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
721 dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
722 dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
723 dss_ack_opt.mdss_copt.mdss_flags |= MDSS_M | MDSS_A | MDSS_a;
724 dss_ack_opt.mdss_ack =
725 mptcp_hton64(mp_tp->mpt_rcvnxt);
726
727 CHECK_DATALEN;
728
729 mptcp_output_getm_dsnmap32(so, off, &dss_ack_opt.mdss_dsn,
730 &dss_ack_opt.mdss_subflow_seqn,
731 &dss_ack_opt.mdss_data_len,
732 &dss_csum);
733
734 if ((dss_ack_opt.mdss_data_len == 0) ||
735 (dss_ack_opt.mdss_dsn == 0)) {
736 goto do_ack64_only;
737 }
738
739 if (tp->t_mpflags & TMPF_SEND_DFIN) {
740 DO_FIN(dss_ack_opt);
741 }
742
743 dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn);
744 dss_ack_opt.mdss_subflow_seqn =
745 htonl(dss_ack_opt.mdss_subflow_seqn);
746 dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
747 memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt));
748 if (do_csum)
749 *((uint16_t *)(void *)(opt + optlen + sizeof (dss_ack_opt))) = dss_csum;
750
751 optlen += dssoptlen;
752
753 if (optlen > MAX_TCPOPTLEN)
754 panic("optlen too large");
755 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
756 goto ret_optlen;
757 }
758
759 if (tp->t_mpflags & TMPF_SEND_DFIN) {
760 unsigned int dssoptlen = sizeof(struct mptcp_dss_ack_opt);
761 struct mptcp_dss_ack_opt dss_ack_opt;
762 uint16_t dss_csum;
763
764 if (do_csum) {
765 uint64_t dss_val = mptcp_hton64(mp_tp->mpt_sndmax - 1);
766 uint16_t dlen = htons(1);
767 uint32_t sseq = 0;
768 uint32_t sum;
769
770
771 dssoptlen += 2;
772
773 sum = in_pseudo64(dss_val, sseq, dlen);
774 ADDCARRY(sum);
775 dss_csum = ~sum & 0xffff;
776 }
777
778 CHECK_OPTLEN;
779
780 bzero(&dss_ack_opt, sizeof (dss_ack_opt));
781
782 /*
783 * Data FIN occupies one sequence space.
784 * Don't send it if it has been Acked.
785 */
786 if ((mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) ||
787 (mp_tp->mpt_snduna == mp_tp->mpt_sndmax))
788 goto ret_optlen;
789
790 dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
791 dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
792 dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
793 dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M | MDSS_F;
794 dss_ack_opt.mdss_ack =
795 htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
796 dss_ack_opt.mdss_dsn =
797 htonl(MPTCP_DATASEQ_LOW32(mp_tp->mpt_sndmax - 1));
798 dss_ack_opt.mdss_subflow_seqn = 0;
799 dss_ack_opt.mdss_data_len = 1;
800 dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
801 memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt));
802 if (do_csum)
803 *((uint16_t *)(void *)(opt + optlen + sizeof (dss_ack_opt))) = dss_csum;
804
805 optlen += dssoptlen;
806 }
807
808ret_optlen:
809 if (TRUE == *p_mptcp_acknow ) {
810 VERIFY(old_mpt_flags != 0);
811 u_int32_t new_mpt_flags = tp->t_mpflags & TMPF_MPTCP_SIGNALS;
812
813 /*
814 * If none of the above mpflags were acted on by
815 * this routine, reset these flags and set p_mptcp_acknow
816 * to false.
817 *
818 * XXX The reset value of p_mptcp_acknow can be used
819 * to communicate tcp_output to NOT send a pure ack without any
820 * MPTCP options as it will be treated as a dup ack.
821 * Since the instances of mptcp_setup_opts not acting on
822 * these options are mostly corner cases and sending a dup
823 * ack here would only have an impact if the system
824 * has sent consecutive dup acks before this false one,
825 * we haven't modified the logic in tcp_output to avoid
826 * that.
827 */
828 if (old_mpt_flags == new_mpt_flags) {
829 tp->t_mpflags &= ~TMPF_MPTCP_SIGNALS;
830 *p_mptcp_acknow = FALSE;
831 mptcplog((LOG_DEBUG, "%s: no action \n", __func__),
832 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
833 } else {
834 mptcplog((LOG_DEBUG, "%s: acknow set, old flags %x new flags %x \n",
835 __func__, old_mpt_flags, new_mpt_flags),
836 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
837 }
838 }
839
840 return optlen;
841}
842
843/*
844 * MPTCP Options Input Processing
845 */
846
847static int
848mptcp_sanitize_option(struct tcpcb *tp, int mptcp_subtype)
849{
850 struct mptcb *mp_tp = tptomptp(tp);
851 int ret = 1;
852
853 if (mp_tp == NULL) {
854 mptcplog((LOG_ERR, "%s: NULL mpsocket \n", __func__),
855 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
856 return (0);
857 }
858
859 switch (mptcp_subtype) {
860 case MPO_CAPABLE:
861 break;
862 case MPO_JOIN: /* fall through */
863 case MPO_DSS: /* fall through */
864 case MPO_FASTCLOSE: /* fall through */
865 case MPO_FAIL: /* fall through */
866 case MPO_REMOVE_ADDR: /* fall through */
867 case MPO_ADD_ADDR: /* fall through */
868 case MPO_PRIO: /* fall through */
869 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED)
870 ret = 0;
871 break;
872 default:
873 ret = 0;
874 mptcplog((LOG_ERR, "%s: type = %d \n", __func__,
875 mptcp_subtype),
876 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
877 break;
878 }
879 return (ret);
880}
881
882static int
883mptcp_valid_mpcapable_common_opt(u_char *cp)
884{
885 struct mptcp_mpcapable_opt_common *rsp =
886 (struct mptcp_mpcapable_opt_common *)cp;
887
888 /* mmco_kind, mmco_len and mmco_subtype are validated before */
889
890 if (!(rsp->mmco_flags & MPCAP_PROPOSAL_SBIT))
891 return (0);
892
893 if (rsp->mmco_flags & (MPCAP_BBIT | MPCAP_CBIT | MPCAP_DBIT |
894 MPCAP_EBIT | MPCAP_FBIT | MPCAP_GBIT))
895 return (0);
896
897 return (1);
898}
899
900
901static void
902mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
903 int optlen)
904{
905 struct mptcp_mpcapable_opt_rsp *rsp = NULL;
906 struct mptcb *mp_tp = tptomptp(tp);
907
908 mpte_lock_assert_held(mp_tp->mpt_mpte);
909
910 /* Only valid on SYN/ACK */
911 if ((th->th_flags & (TH_SYN | TH_ACK)) != (TH_SYN | TH_ACK))
912 return;
913
914 /* Validate the kind, len, flags */
915 if (mptcp_valid_mpcapable_common_opt(cp) != 1) {
916 tcpstat.tcps_invalid_mpcap++;
917 return;
918 }
919
920 /* handle SYN/ACK retransmission by acknowledging with ACK */
921 if (mp_tp->mpt_state >= MPTCPS_ESTABLISHED)
922 return;
923
924 /* A SYN/ACK contains peer's key and flags */
925 if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp)) {
926 /* complain */
927 mptcplog((LOG_ERR, "%s: SYN_ACK optlen = %d, sizeof mp opt = %lu \n",
928 __func__, optlen,
929 sizeof (struct mptcp_mpcapable_opt_rsp)),
930 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
931 tcpstat.tcps_invalid_mpcap++;
932 return;
933 }
934
935 /*
936 * If checksum flag is set, enable MPTCP checksum, even if
937 * it was not negotiated on the first SYN.
938 */
939 if (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
940 MPCAP_CHECKSUM_CBIT)
941 mp_tp->mpt_flags |= MPTCPF_CHECKSUM;
942
943 rsp = (struct mptcp_mpcapable_opt_rsp *)cp;
944 mp_tp->mpt_remotekey = rsp->mmc_localkey;
945 /* For now just downgrade to the peer's version */
946 mp_tp->mpt_peer_version = rsp->mmc_common.mmco_version;
947 if (rsp->mmc_common.mmco_version < mp_tp->mpt_version) {
948 mp_tp->mpt_version = rsp->mmc_common.mmco_version;
949 tcpstat.tcps_mp_verdowngrade++;
950 }
951 if (mptcp_init_remote_parms(mp_tp) != 0) {
952 tcpstat.tcps_invalid_mpcap++;
953 return;
954 }
955 tcp_heuristic_mptcp_success(tp);
956 tp->t_mpflags |= (TMPF_SND_KEYS | TMPF_MPTCP_TRUE);
957}
958
959
960static void
961mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
962{
963#define MPTCP_JOPT_ERROR_PATH(tp) { \
964 tp->t_mpflags |= TMPF_RESET; \
965 tcpstat.tcps_invalid_joins++; \
966 if (tp->t_inpcb->inp_socket != NULL) { \
967 soevent(tp->t_inpcb->inp_socket, \
968 SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); \
969 } \
970}
971 int error = 0;
972 struct mptcp_mpjoin_opt_rsp *join_rsp =
973 (struct mptcp_mpjoin_opt_rsp *)cp;
974
975 /* Only valid on SYN/ACK */
976 if ((th->th_flags & (TH_SYN | TH_ACK)) != (TH_SYN | TH_ACK))
977 return;
978
979 if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp)) {
980 mptcplog((LOG_ERR, "%s: SYN_ACK: unexpected optlen = %d mp "
981 "option = %lu\n", __func__, optlen,
982 sizeof (struct mptcp_mpjoin_opt_rsp)),
983 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
984 tp->t_mpflags &= ~TMPF_PREESTABLISHED;
985 /* send RST and close */
986 MPTCP_JOPT_ERROR_PATH(tp);
987 return;
988 }
989
990 mptcp_set_raddr_rand(tp->t_local_aid, tptomptp(tp),
991 join_rsp->mmjo_addr_id, join_rsp->mmjo_rand);
992 error = mptcp_validate_join_hmac(tp,
993 (u_char*)&join_rsp->mmjo_mac, SHA1_TRUNCATED);
994 if (error) {
995 mptcplog((LOG_ERR, "%s: SYN_ACK error = %d \n", __func__, error),
996 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
997 tp->t_mpflags &= ~TMPF_PREESTABLISHED;
998 /* send RST and close */
999 MPTCP_JOPT_ERROR_PATH(tp);
1000 return;
1001 }
1002 tp->t_mpflags |= (TMPF_SENT_JOIN | TMPF_SND_JACK);
1003}
1004
1005static int
1006mptcp_validate_join_hmac(struct tcpcb *tp, u_char* hmac, int mac_len)
1007{
1008 u_char digest[SHA1_RESULTLEN] = {0};
1009 struct mptcb *mp_tp = tptomptp(tp);
1010 u_int32_t rem_rand, loc_rand;
1011
1012 mpte_lock_assert_held(mp_tp->mpt_mpte);
1013
1014 rem_rand = loc_rand = 0;
1015
1016 mptcp_get_rands(tp->t_local_aid, mp_tp, &loc_rand, &rem_rand);
1017 if ((rem_rand == 0) || (loc_rand == 0))
1018 return (-1);
1019
1020 mptcp_hmac_sha1(mp_tp->mpt_remotekey, mp_tp->mpt_localkey, rem_rand, loc_rand,
1021 digest);
1022
1023 if (bcmp(digest, hmac, mac_len) == 0)
1024 return (0); /* matches */
1025 else {
1026 printf("%s: remote key %llx local key %llx remote rand %x "
1027 "local rand %x \n", __func__, mp_tp->mpt_remotekey, mp_tp->mpt_localkey,
1028 rem_rand, loc_rand);
1029 return (-1);
1030 }
1031}
1032
1033/*
1034 * Update the mptcb send state variables, but the actual sbdrop occurs
1035 * in MPTCP layer
1036 */
1037void
1038mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack)
1039{
1040 u_int64_t acked = full_dack - mp_tp->mpt_snduna;
1041
1042 if (acked) {
1043 struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
1044
1045 if (acked > mp_so->so_snd.sb_cc) {
1046 if (acked > mp_so->so_snd.sb_cc + 1 ||
1047 mp_tp->mpt_state < MPTCPS_FIN_WAIT_1)
1048 mptcplog((LOG_ERR, "%s: acked %u, sb_cc %u full %u suna %u state %u\n",
1049 __func__, (uint32_t)acked, mp_so->so_snd.sb_cc,
1050 (uint32_t)full_dack, (uint32_t)mp_tp->mpt_snduna,
1051 mp_tp->mpt_state),
1052 MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
1053
1054 sbdrop(&mp_so->so_snd, (int)mp_so->so_snd.sb_cc);
1055 } else {
1056 sbdrop(&mp_so->so_snd, acked);
1057 }
1058
1059 mp_tp->mpt_snduna += acked;
1060 /* In degraded mode, we may get some Data ACKs */
1061 if ((tp->t_mpflags & TMPF_TCP_FALLBACK) &&
1062 !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) &&
1063 MPTCP_SEQ_GT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
1064 /* bring back sndnxt to retransmit MPTCP data */
1065 mp_tp->mpt_sndnxt = mp_tp->mpt_dsn_at_csum_fail;
1066 mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC;
1067 tp->t_inpcb->inp_socket->so_flags1 |=
1068 SOF1_POST_FALLBACK_SYNC;
1069 }
1070
1071 mptcp_clean_reinjectq(mp_tp->mpt_mpte);
1072
1073 sowwakeup(mp_so);
1074 }
1075 if (full_dack == mp_tp->mpt_sndmax &&
1076 mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) {
1077 mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_ACK);
1078 tp->t_mpflags &= ~TMPF_SEND_DFIN;
1079 }
1080}
1081
1082void
1083mptcp_update_window_wakeup(struct tcpcb *tp)
1084{
1085 struct mptcb *mp_tp = tptomptp(tp);
1086
1087 mpte_lock_assert_held(mp_tp->mpt_mpte);
1088
1089 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
1090 mp_tp->mpt_sndwnd = tp->snd_wnd;
1091 mp_tp->mpt_sndwl1 = mp_tp->mpt_rcvnxt;
1092 mp_tp->mpt_sndwl2 = mp_tp->mpt_snduna;
1093 }
1094
1095 sowwakeup(tp->t_inpcb->inp_socket);
1096}
1097
1098static void
1099mptcp_update_window(struct mptcb *mp_tp, u_int64_t ack, u_int64_t seq, u_int32_t tiwin)
1100{
1101 if (SEQ_LT(mp_tp->mpt_sndwl1, seq) ||
1102 (mp_tp->mpt_sndwl1 == seq &&
1103 (SEQ_LT(mp_tp->mpt_sndwl2, ack) ||
1104 (mp_tp->mpt_sndwl2 == ack && tiwin > mp_tp->mpt_sndwnd)))) {
1105 mp_tp->mpt_sndwnd = tiwin;
1106 mp_tp->mpt_sndwl1 = seq;
1107 mp_tp->mpt_sndwl2 = ack;
1108 }
1109}
1110
1111static void
1112mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, u_int64_t full_dsn,
1113 struct tcpcb *tp, u_int32_t tiwin)
1114{
1115 struct mptcb *mp_tp = tptomptp(tp);
1116 int close_notify = 0;
1117
1118 tp->t_mpflags |= TMPF_RCVD_DACK;
1119
1120 if (MPTCP_SEQ_LEQ(full_dack, mp_tp->mpt_sndmax) &&
1121 MPTCP_SEQ_GEQ(full_dack, mp_tp->mpt_snduna)) {
1122 mptcp_data_ack_rcvd(mp_tp, tp, full_dack);
1123 if (mp_tp->mpt_state > MPTCPS_FIN_WAIT_2)
1124 close_notify = 1;
1125 if (mp_tp->mpt_flags & MPTCPF_RCVD_64BITACK) {
1126 mp_tp->mpt_flags &= ~MPTCPF_RCVD_64BITACK;
1127 mp_tp->mpt_flags &= ~MPTCPF_SND_64BITDSN;
1128 }
1129 mptcp_notify_mpready(tp->t_inpcb->inp_socket);
1130 if (close_notify)
1131 mptcp_notify_close(tp->t_inpcb->inp_socket);
1132 } else {
1133 os_log_error(mptcp_log_handle,
1134 "%s: unexpected dack %u snduna %u sndmax %u\n",
1135 __func__, (u_int32_t)full_dack,
1136 (u_int32_t)mp_tp->mpt_snduna,
1137 (u_int32_t)mp_tp->mpt_sndmax);
1138 }
1139
1140 mptcp_update_window(mp_tp, full_dack, full_dsn, tiwin);
1141}
1142
1143static void
1144mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp, struct tcphdr *th)
1145{
1146 struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp;
1147 u_int64_t full_dack = 0;
1148 u_int32_t tiwin = th->th_win << tp->snd_scale;
1149 struct mptcb *mp_tp = tptomptp(tp);
1150 int csum_len = 0;
1151
1152#define MPTCP_DSS_OPT_SZ_CHK(len, expected_len) { \
1153 if (len != expected_len) { \
1154 mptcplog((LOG_ERR, "%s: bad len = %d dss: %x \n", __func__, \
1155 len, dss_rsp->mdss_flags), \
1156 (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), \
1157 MPTCP_LOGLVL_LOG); \
1158 return; \
1159 } \
1160}
1161
1162 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
1163 csum_len = 2;
1164
1165 dss_rsp->mdss_flags &= (MDSS_A|MDSS_a|MDSS_M|MDSS_m);
1166 switch (dss_rsp->mdss_flags) {
1167 case (MDSS_M):
1168 {
1169 /* 32-bit DSS, No Data ACK */
1170 struct mptcp_dsn_opt *dss_rsp1;
1171 dss_rsp1 = (struct mptcp_dsn_opt *)cp;
1172
1173 MPTCP_DSS_OPT_SZ_CHK(dss_rsp1->mdss_copt.mdss_len,
1174 sizeof (struct mptcp_dsn_opt) + csum_len);
1175 if (csum_len == 0)
1176 mptcp_update_dss_rcv_state(dss_rsp1, tp, 0);
1177 else
1178 mptcp_update_dss_rcv_state(dss_rsp1, tp,
1179 *(uint16_t *)(void *)(cp +
1180 (dss_rsp1->mdss_copt.mdss_len - csum_len)));
1181 break;
1182 }
1183 case (MDSS_A):
1184 {
1185 /* 32-bit Data ACK, no DSS */
1186 struct mptcp_data_ack_opt *dack_opt;
1187 dack_opt = (struct mptcp_data_ack_opt *)cp;
1188
1189 MPTCP_DSS_OPT_SZ_CHK(dack_opt->mdss_copt.mdss_len,
1190 sizeof (struct mptcp_data_ack_opt));
1191
1192 u_int32_t dack = dack_opt->mdss_ack;
1193 NTOHL(dack);
1194 MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1195 mptcp_do_dss_opt_ack_meat(full_dack, mp_tp->mpt_sndwl1, tp, tiwin);
1196 break;
1197 }
1198 case (MDSS_M | MDSS_A):
1199 {
1200 /* 32-bit Data ACK + 32-bit DSS */
1201 struct mptcp_dss_ack_opt *dss_ack_rsp;
1202 dss_ack_rsp = (struct mptcp_dss_ack_opt *)cp;
1203 u_int64_t full_dsn;
1204 uint16_t csum = 0;
1205
1206 MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len,
1207 sizeof (struct mptcp_dss_ack_opt) + csum_len);
1208
1209 u_int32_t dack = dss_ack_rsp->mdss_ack;
1210 NTOHL(dack);
1211 MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1212
1213 NTOHL(dss_ack_rsp->mdss_dsn);
1214 NTOHL(dss_ack_rsp->mdss_subflow_seqn);
1215 NTOHS(dss_ack_rsp->mdss_data_len);
1216 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_ack_rsp->mdss_dsn, full_dsn);
1217
1218 mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1219
1220 if (csum_len != 0)
1221 csum = *(uint16_t *)(void *)(cp + (dss_ack_rsp->mdss_copt.mdss_len - csum_len));
1222
1223 mptcp_update_rcv_state_meat(mp_tp, tp,
1224 full_dsn,
1225 dss_ack_rsp->mdss_subflow_seqn,
1226 dss_ack_rsp->mdss_data_len,
1227 csum);
1228 break;
1229 }
1230 case (MDSS_M | MDSS_m):
1231 {
1232 /* 64-bit DSS , No Data ACK */
1233 struct mptcp_dsn64_opt *dsn64;
1234 dsn64 = (struct mptcp_dsn64_opt *)cp;
1235 u_int64_t full_dsn;
1236 uint16_t csum = 0;
1237
1238 MPTCP_DSS_OPT_SZ_CHK(dsn64->mdss_copt.mdss_len,
1239 sizeof (struct mptcp_dsn64_opt) + csum_len);
1240
1241 mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1242
1243 full_dsn = mptcp_ntoh64(dsn64->mdss_dsn);
1244 NTOHL(dsn64->mdss_subflow_seqn);
1245 NTOHS(dsn64->mdss_data_len);
1246
1247 if (csum_len != 0)
1248 csum = *(uint16_t *)(void *)(cp + dsn64->mdss_copt.mdss_len - csum_len);
1249
1250 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1251 dsn64->mdss_subflow_seqn,
1252 dsn64->mdss_data_len,
1253 csum);
1254 break;
1255 }
1256 case (MDSS_A | MDSS_a):
1257 {
1258 /* 64-bit Data ACK, no DSS */
1259 struct mptcp_data_ack64_opt *dack64;
1260 dack64 = (struct mptcp_data_ack64_opt *)cp;
1261
1262 MPTCP_DSS_OPT_SZ_CHK(dack64->mdss_copt.mdss_len,
1263 sizeof (struct mptcp_data_ack64_opt));
1264
1265 mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1266
1267 full_dack = mptcp_ntoh64(dack64->mdss_ack);
1268 mptcp_do_dss_opt_ack_meat(full_dack, mp_tp->mpt_sndwl1, tp, tiwin);
1269 break;
1270 }
1271 case (MDSS_M | MDSS_m | MDSS_A):
1272 {
1273 /* 64-bit DSS + 32-bit Data ACK */
1274 struct mptcp_dss64_ack32_opt *dss_ack_rsp;
1275 dss_ack_rsp = (struct mptcp_dss64_ack32_opt *)cp;
1276 u_int64_t full_dsn;
1277 uint16_t csum = 0;
1278
1279 MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len,
1280 sizeof (struct mptcp_dss64_ack32_opt) + csum_len);
1281
1282 u_int32_t dack = dss_ack_rsp->mdss_ack;
1283 NTOHL(dack);
1284 mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1285 MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1286
1287 full_dsn = mptcp_ntoh64(dss_ack_rsp->mdss_dsn);
1288 NTOHL(dss_ack_rsp->mdss_subflow_seqn);
1289 NTOHS(dss_ack_rsp->mdss_data_len);
1290
1291 mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1292
1293 if (csum_len != 0)
1294 csum = *(uint16_t *)(void *)(cp + dss_ack_rsp->mdss_copt.mdss_len - csum_len);
1295
1296 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1297 dss_ack_rsp->mdss_subflow_seqn,
1298 dss_ack_rsp->mdss_data_len,
1299 csum);
1300
1301 break;
1302 }
1303 case (MDSS_M | MDSS_A | MDSS_a):
1304 {
1305 /* 32-bit DSS + 64-bit Data ACK */
1306 struct mptcp_dss32_ack64_opt *dss32_ack64_opt;
1307 dss32_ack64_opt = (struct mptcp_dss32_ack64_opt *)cp;
1308 u_int64_t full_dsn;
1309
1310 MPTCP_DSS_OPT_SZ_CHK(
1311 dss32_ack64_opt->mdss_copt.mdss_len,
1312 sizeof (struct mptcp_dss32_ack64_opt) + csum_len);
1313
1314 full_dack = mptcp_ntoh64(dss32_ack64_opt->mdss_ack);
1315 NTOHL(dss32_ack64_opt->mdss_dsn);
1316 mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1317 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt,
1318 dss32_ack64_opt->mdss_dsn, full_dsn);
1319 NTOHL(dss32_ack64_opt->mdss_subflow_seqn);
1320 NTOHS(dss32_ack64_opt->mdss_data_len);
1321
1322 mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1323 if (csum_len == 0)
1324 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1325 dss32_ack64_opt->mdss_subflow_seqn,
1326 dss32_ack64_opt->mdss_data_len, 0);
1327 else
1328 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1329 dss32_ack64_opt->mdss_subflow_seqn,
1330 dss32_ack64_opt->mdss_data_len,
1331 *(uint16_t *)(void *)(cp +
1332 dss32_ack64_opt->mdss_copt.mdss_len -
1333 csum_len));
1334 break;
1335 }
1336 case (MDSS_M | MDSS_m | MDSS_A | MDSS_a):
1337 {
1338 /* 64-bit DSS + 64-bit Data ACK */
1339 struct mptcp_dss64_ack64_opt *dss64_ack64;
1340 dss64_ack64 = (struct mptcp_dss64_ack64_opt *)cp;
1341 u_int64_t full_dsn;
1342
1343 MPTCP_DSS_OPT_SZ_CHK(dss64_ack64->mdss_copt.mdss_len,
1344 sizeof (struct mptcp_dss64_ack64_opt) + csum_len);
1345
1346 mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1347 mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1348 full_dsn = mptcp_ntoh64(dss64_ack64->mdss_dsn);
1349 full_dack = mptcp_ntoh64(dss64_ack64->mdss_dsn);
1350 mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1351 NTOHL(dss64_ack64->mdss_subflow_seqn);
1352 NTOHS(dss64_ack64->mdss_data_len);
1353 if (csum_len == 0)
1354 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1355 dss64_ack64->mdss_subflow_seqn,
1356 dss64_ack64->mdss_data_len, 0);
1357 else
1358 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1359 dss64_ack64->mdss_subflow_seqn,
1360 dss64_ack64->mdss_data_len,
1361 *(uint16_t *)(void *)(cp +
1362 dss64_ack64->mdss_copt.mdss_len -
1363 csum_len));
1364 break;
1365 }
1366 default:
1367 mptcplog((LOG_DEBUG,"%s: File bug, DSS flags = %x\n",
1368 __func__, dss_rsp->mdss_flags),
1369 (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
1370 MPTCP_LOGLVL_LOG);
1371 break;
1372 }
1373}
1374
1375static void
1376mptcp_do_dss_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
1377{
1378#pragma unused(optlen)
1379 struct mptcb *mp_tp = tptomptp(tp);
1380
1381 if (!mp_tp)
1382 return;
1383
1384 /* We may get Data ACKs just during fallback, so don't ignore those */
1385 if ((tp->t_mpflags & TMPF_MPTCP_TRUE) ||
1386 (tp->t_mpflags & TMPF_TCP_FALLBACK)) {
1387 struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp;
1388
1389 if (dss_rsp->mdss_subtype == MPO_DSS) {
1390 if (dss_rsp->mdss_flags & MDSS_F)
1391 tp->t_rcv_map.mpt_dfin = 1;
1392
1393 mptcp_do_dss_opt_meat(cp, tp, th);
1394 }
1395 }
1396}
1397
1398static void
1399mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
1400{
1401 struct mptcb *mp_tp = NULL;
1402 struct mptcp_fastclose_opt *fc_opt = (struct mptcp_fastclose_opt *)cp;
1403
1404 if (th->th_flags != TH_ACK)
1405 return;
1406
1407 if (fc_opt->mfast_len != sizeof (struct mptcp_fastclose_opt)) {
1408 tcpstat.tcps_invalid_opt++;
1409 return;
1410 }
1411
1412 mp_tp = tptomptp(tp);
1413 if (!mp_tp)
1414 return;
1415
1416 if (fc_opt->mfast_key != mp_tp->mpt_localkey) {
1417 tcpstat.tcps_invalid_opt++;
1418 return;
1419 }
1420
1421 /*
1422 * fastclose could make us more vulnerable to attacks, hence
1423 * accept only those that are at the next expected sequence number.
1424 */
1425 if (th->th_seq != tp->rcv_nxt) {
1426 tcpstat.tcps_invalid_opt++;
1427 return;
1428 }
1429
1430 /* Reset this flow */
1431 tp->t_mpflags |= (TMPF_RESET | TMPF_FASTCLOSERCV);
1432
1433 if (tp->t_inpcb->inp_socket != NULL) {
1434 soevent(tp->t_inpcb->inp_socket,
1435 SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1436 }
1437}
1438
1439
1440static void
1441mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
1442{
1443 struct mptcb *mp_tp = NULL;
1444 struct mptcp_mpfail_opt *fail_opt = (struct mptcp_mpfail_opt *)cp;
1445 u_int32_t mdss_subflow_seqn = 0;
1446 int error = 0;
1447
1448 /*
1449 * mpfail could make us more vulnerable to attacks. Hence accept
1450 * only those that are the next expected sequence number.
1451 */
1452 if (th->th_seq != tp->rcv_nxt) {
1453 tcpstat.tcps_invalid_opt++;
1454 return;
1455 }
1456
1457 /* A packet without RST, must atleast have the ACK bit set */
1458 if ((th->th_flags != TH_ACK) && (th->th_flags != TH_RST))
1459 return;
1460
1461 if (fail_opt->mfail_len != sizeof (struct mptcp_mpfail_opt))
1462 return;
1463
1464 mp_tp = tptomptp(tp);
1465
1466 mp_tp->mpt_flags |= MPTCPF_RECVD_MPFAIL;
1467 mp_tp->mpt_dsn_at_csum_fail = mptcp_hton64(fail_opt->mfail_dsn);
1468 error = mptcp_get_map_for_dsn(tp->t_inpcb->inp_socket,
1469 mp_tp->mpt_dsn_at_csum_fail, &mdss_subflow_seqn);
1470 if (error == 0) {
1471 mp_tp->mpt_ssn_at_csum_fail = mdss_subflow_seqn;
1472 }
1473
1474 mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
1475}
1476
1477void
1478tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
1479 struct tcpopt *to, int optlen)
1480{
1481 int mptcp_subtype;
1482 struct mptcb *mp_tp = tptomptp(tp);
1483
1484 if (mp_tp == NULL)
1485 return;
1486
1487 mpte_lock_assert_held(mp_tp->mpt_mpte);
1488
1489 /* All MPTCP options have atleast 4 bytes */
1490 if (optlen < 4)
1491 return;
1492
1493 mptcp_subtype = (cp[2] >> 4);
1494
1495 if (mptcp_sanitize_option(tp, mptcp_subtype) == 0)
1496 return;
1497
1498 switch (mptcp_subtype) {
1499 case MPO_CAPABLE:
1500 mptcp_do_mpcapable_opt(tp, cp, th, optlen);
1501 break;
1502 case MPO_JOIN:
1503 mptcp_do_mpjoin_opt(tp, cp, th, optlen);
1504 break;
1505 case MPO_DSS:
1506 mptcp_do_dss_opt(tp, cp, th, optlen);
1507 break;
1508 case MPO_FASTCLOSE:
1509 mptcp_do_fastclose_opt(tp, cp, th);
1510 break;
1511 case MPO_FAIL:
1512 mptcp_do_mpfail_opt(tp, cp, th);
1513 break;
1514 case MPO_ADD_ADDR: /* fall through */
1515 case MPO_REMOVE_ADDR: /* fall through */
1516 case MPO_PRIO:
1517 to->to_flags |= TOF_MPTCP;
1518 break;
1519 default:
1520 break;
1521 }
1522 return;
1523}
1524
1525/* REMOVE_ADDR option is sent when a source address goes away */
1526static void
1527mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt)
1528{
1529 mptcplog((LOG_DEBUG,"%s: local id %d remove id %d \n",
1530 __func__, tp->t_local_aid, tp->t_rem_aid),
1531 (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
1532
1533 bzero(opt, sizeof (*opt));
1534 opt->mr_kind = TCPOPT_MULTIPATH;
1535 opt->mr_len = sizeof (*opt);
1536 opt->mr_subtype = MPO_REMOVE_ADDR;
1537 opt->mr_addr_id = tp->t_rem_aid;
1538 tp->t_mpflags &= ~TMPF_SND_REM_ADDR;
1539}
1540
1541/* We send MP_PRIO option based on the values set by the SIOCSCONNORDER ioctl */
1542static int
1543mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen)
1544{
1545 struct mptcp_mpprio_addr_opt mpprio;
1546
1547 if (tp->t_state != TCPS_ESTABLISHED) {
1548 tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1549 return (optlen);
1550 }
1551
1552 if ((MAX_TCPOPTLEN - optlen) <
1553 (int)sizeof (mpprio))
1554 return (optlen);
1555
1556 bzero(&mpprio, sizeof (mpprio));
1557 mpprio.mpprio_kind = TCPOPT_MULTIPATH;
1558 mpprio.mpprio_len = sizeof (mpprio);
1559 mpprio.mpprio_subtype = MPO_PRIO;
1560 if (tp->t_mpflags & TMPF_BACKUP_PATH)
1561 mpprio.mpprio_flags |= MPTCP_MPPRIO_BKP;
1562 mpprio.mpprio_addrid = tp->t_local_aid;
1563 memcpy(cp + optlen, &mpprio, sizeof (mpprio));
1564 optlen += sizeof (mpprio);
1565 tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1566 mptcplog((LOG_DEBUG, "%s: aid = %d \n", __func__,
1567 tp->t_local_aid),
1568 (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
1569 return (optlen);
1570}
1571
1572