1/*
2 * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58/*
59 * Copyright (c) 1982, 1986, 1988, 1990, 1993
60 * The Regents of the University of California. All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 * notice, this list of conditions and the following disclaimer in the
69 * documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 * must display the following acknowledgement:
72 * This product includes software developed by the University of
73 * California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
91 */
92/*
93 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
94 * support for mandatory and extensible security protections. This notice
95 * is included in support of clause 2.2 (b) of the Apple Public License,
96 * Version 2.0.
97 */
98
99#include <sys/param.h>
100#include <sys/malloc.h>
101#include <sys/mbuf.h>
102#include <sys/errno.h>
103#include <sys/protosw.h>
104#include <sys/socket.h>
105#include <sys/socketvar.h>
106#include <sys/systm.h>
107#include <sys/kernel.h>
108#include <sys/proc.h>
109#include <sys/kauth.h>
110#include <sys/mcache.h>
111#include <sys/sysctl.h>
112#include <kern/zalloc.h>
113#include <libkern/OSByteOrder.h>
114
115#include <pexpert/pexpert.h>
116#include <mach/sdt.h>
117
118#include <net/if.h>
119#include <net/route.h>
120#include <net/dlil.h>
121#include <net/net_api_stats.h>
122#include <net/net_osdep.h>
123#include <net/net_perf.h>
124
125#include <netinet/ip.h>
126#include <netinet/in.h>
127#include <netinet/in_var.h>
128#include <netinet/ip_var.h>
129#include <netinet6/in6_var.h>
130#include <netinet/ip6.h>
131#include <netinet/kpi_ipfilter_var.h>
132#include <netinet/in_tclass.h>
133
134#include <netinet6/ip6protosw.h>
135#include <netinet/icmp6.h>
136#include <netinet6/ip6_var.h>
137#include <netinet/in_pcb.h>
138#include <netinet6/nd6.h>
139#include <netinet6/scope6_var.h>
140#if IPSEC
141#include <netinet6/ipsec.h>
142#include <netinet6/ipsec6.h>
143#include <netkey/key.h>
144extern int ipsec_bypass;
145#endif /* IPSEC */
146
147#if NECP
148#include <net/necp.h>
149#endif /* NECP */
150
151#if DUMMYNET
152#include <netinet/ip_dummynet.h>
153#endif /* DUMMYNET */
154
155#if PF
156#include <net/pfvar.h>
157#endif /* PF */
158
159#include <net/sockaddr_utils.h>
160
161static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS;
162static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS;
163static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS;
164static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
165static void ip6_out_cksum_stats(int, u_int32_t);
166static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
167static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
168 struct ip6_frag **);
169static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
170 struct ifnet *, struct in6_addr *, uint32_t, u_int32_t *);
171static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *,
172 struct sockopt *sopt);
173static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int);
174static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
175static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, zalloc_flags_t);
176static void im6o_trace(struct ip6_moptions *, int);
177static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
178 int, int);
179static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
180static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
181 int, uint32_t, uint32_t);
182extern int udp_ctloutput(struct socket *, struct sockopt *);
183static int ip6_fragment_packet(struct mbuf **m,
184 struct ip6_pktopts *opt, struct ip6_out_args * ip6oa,
185 struct ip6_exthdrs *exthdrsp, struct ifnet *ifp,
186 uint32_t mtu, uint32_t unfragpartlen,
187 int nxt0, uint32_t optlen);
188
189SYSCTL_DECL(_net_inet6_ip6);
190
191static int ip6_output_measure = 0;
192SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf,
193 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
194 &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement");
195
196static uint64_t ip6_output_measure_bins = 0;
197SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins,
198 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0,
199 sysctl_ip6_output_measure_bins, "I",
200 "bins for chaining performance data histogram");
201
202static net_perf_t net_perf;
203SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data,
204 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
205 0, 0, sysctl_ip6_output_getperf, "S,net_perf",
206 "IP6 output performance data (struct net_perf, net/net_perf.h)");
207
208#define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
209
210/* For gdb */
211__private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
212
213struct ip6_moptions_dbg {
214 struct ip6_moptions im6o; /* ip6_moptions */
215 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
216 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
217 /*
218 * Alloc and free callers.
219 */
220 ctrace_t im6o_alloc;
221 ctrace_t im6o_free;
222 /*
223 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
224 */
225 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
226 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
227};
228
229#if DEBUG
230static unsigned int im6o_debug = 1; /* debugging (enabled) */
231#else
232static unsigned int im6o_debug; /* debugging (disabled) */
233#endif /* !DEBUG */
234
235static struct zone *im6o_zone; /* zone for ip6_moptions */
236#define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
237
238/*
239 * ip6_output() calls ip6_output_list() to do the work
240 */
241int
242ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
243 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
244 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
245{
246 return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa);
247}
248
249/*
250 * IP6 output. Each packet in mbuf chain m contains a skeletal IP6
251 * header (with pri, len, nxt, hlim, src, dst).
252 * This function may modify ver and hlim only.
253 * The mbuf chain containing the packet will be freed.
254 * The mbuf opt, if present, will not be freed.
255 *
256 * If ro is non-NULL and has valid ro->ro_rt, route lookup would be
257 * skipped and ro->ro_rt would be used. Otherwise the result of route
258 * lookup is stored in ro->ro_rt.
259 *
260 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
261 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
262 * which is rt_rmx.rmx_mtu.
263 */
264int
265ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt,
266 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
267 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
268{
269 struct ip6_hdr *ip6;
270 u_char *nexthdrp;
271 struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */
272 struct ifnet **ifpp_save = ifpp;
273 struct mbuf *m, *mprev;
274 struct mbuf *sendchain = NULL, *sendchain_last = NULL;
275 struct mbuf *inputchain = NULL;
276 int nxt0 = 0;
277 struct route_in6 *ro_pmtu = NULL;
278 struct rtentry *rt = NULL;
279 struct sockaddr_in6 *dst = NULL, src_sa, dst_sa;
280 int error = 0;
281 struct in6_ifaddr *ia = NULL, *src_ia = NULL;
282 u_int32_t mtu = 0;
283 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
284 struct ip6_rthdr *rh;
285 struct in6_addr finaldst;
286 ipfilter_t inject_filter_ref;
287 struct ipf_pktopts *ippo = NULL;
288 struct flowadv *adv = NULL;
289 uint32_t pktcnt = 0;
290 uint32_t packets_processed = 0;
291 struct timeval start_tv;
292#if PF
293 boolean_t skip_pf = (ip6oa != NULL) &&
294 (ip6oa->ip6oa_flags & IP6OAF_SKIP_PF);
295#endif
296
297#if DUMMYNET
298 struct m_tag *tag;
299 struct ip6_out_args saved_ip6oa;
300 struct sockaddr_in6 dst_buf;
301#endif /* DUMMYNET */
302#if IPSEC
303 struct socket *so = NULL;
304 struct secpolicy *sp = NULL;
305 struct route_in6 *ipsec_saved_route = NULL;
306 boolean_t needipsectun = FALSE;
307#endif /* IPSEC */
308#if NECP
309 necp_kernel_policy_result necp_result = 0;
310 necp_kernel_policy_result_parameter necp_result_parameter;
311 necp_kernel_policy_id necp_matched_policy_id = 0;
312#endif /* NECP */
313 struct {
314 struct ipf_pktopts ipf_pktopts;
315 struct ip6_exthdrs exthdrs;
316 struct route_in6 ip6route;
317#if IPSEC
318 struct ipsec_output_state ipsec_state;
319#endif /* IPSEC */
320#if NECP
321 struct route_in6 necp_route;
322#endif /* NECP */
323#if DUMMYNET
324 struct route_in6 saved_route;
325 struct route_in6 saved_ro_pmtu;
326 struct ip_fw_args args;
327#endif /* DUMMYNET */
328 } ip6obz;
329#define ipf_pktopts ip6obz.ipf_pktopts
330#define exthdrs ip6obz.exthdrs
331#define ip6route ip6obz.ip6route
332#define ipsec_state ip6obz.ipsec_state
333#define necp_route ip6obz.necp_route
334#define saved_route ip6obz.saved_route
335#define saved_ro_pmtu ip6obz.saved_ro_pmtu
336#define args ip6obz.args
337 union {
338 struct {
339 boolean_t select_srcif : 1;
340 boolean_t hdrsplit : 1;
341 boolean_t route_selected : 1;
342 boolean_t dontfrag : 1;
343#if IPSEC
344 boolean_t needipsec : 1;
345 boolean_t noipsec : 1;
346#endif /* IPSEC */
347 };
348 uint32_t raw;
349 } ip6obf = { .raw = 0 };
350
351 if (ip6_output_measure) {
352 net_perf_start_time(npp: &net_perf, tv: &start_tv);
353 }
354
355 VERIFY(m0->m_flags & M_PKTHDR);
356
357 /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
358 bzero(s: &ip6obz, n: sizeof(ip6obz));
359
360#if DUMMYNET
361 if (SLIST_EMPTY(&m0->m_pkthdr.tags)) {
362 goto tags_done;
363 }
364
365 /* Grab info from mtags prepended to the chain */
366 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
367 KERNEL_TAG_TYPE_DUMMYNET)) != NULL) {
368 struct dn_pkt_tag *dn_tag;
369
370 /*
371 * ip6_output_list() cannot handle chains of packets reinjected
372 * by dummynet. The same restriction applies to
373 * ip_output_list().
374 */
375 VERIFY(0 == packetchain);
376
377 dn_tag = (struct dn_pkt_tag *)(tag->m_tag_data);
378 args.fwa_pf_rule = dn_tag->dn_pf_rule;
379
380 SOCKADDR_COPY(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf));
381 dst = &dst_buf;
382 ifp = dn_tag->dn_ifp;
383 if (ifp != NULL) {
384 ifnet_reference(interface: ifp);
385 }
386 flags = dn_tag->dn_flags;
387 if (dn_tag->dn_flags & IPV6_OUTARGS) {
388 saved_ip6oa = dn_tag->dn_ip6oa;
389 ip6oa = &saved_ip6oa;
390 }
391
392 saved_route = dn_tag->dn_ro6;
393 ro = &saved_route;
394 saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
395 ro_pmtu = &saved_ro_pmtu;
396 origifp = dn_tag->dn_origifp;
397 if (origifp != NULL) {
398 ifnet_reference(interface: origifp);
399 }
400 mtu = dn_tag->dn_mtu;
401 unfragpartlen = dn_tag->dn_unfragpartlen;
402
403 bcopy(src: &dn_tag->dn_exthdrs, dst: &exthdrs, n: sizeof(exthdrs));
404
405 m_tag_delete(m0, tag);
406 }
407
408tags_done:
409#endif /* DUMMYNET */
410
411 m = m0;
412
413#if IPSEC
414 if (ipsec_bypass == 0) {
415 so = ipsec_getsocket(m);
416 if (so != NULL) {
417 (void) ipsec_setsocket(m, NULL);
418 }
419 /* If packet is bound to an interface, check bound policies */
420 if ((flags & IPV6_OUTARGS) &&
421 (ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
422 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
423 /* ip6obf.noipsec is a bitfield, use temp integer */
424 int noipsec = 0;
425
426 if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
427 flags, ip6oa, &noipsec, &sp) != 0) {
428 goto bad;
429 }
430
431 ip6obf.noipsec = (noipsec != 0);
432 }
433 }
434#endif /* IPSEC */
435
436 ippo = &ipf_pktopts;
437
438 if (flags & IPV6_OUTARGS) {
439 /*
440 * In the forwarding case, only the ifscope value is used,
441 * as source interface selection doesn't take place.
442 */
443 if ((ip6obf.select_srcif = (!(flags & (IPV6_FORWARDING |
444 IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
445 (ip6oa->ip6oa_flags & IP6OAF_SELECT_SRCIF)))) {
446 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
447 }
448
449 if ((ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
450 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
451 ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
452 (ip6oa->ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
453 }
454
455 if (ip6oa->ip6oa_flags & IP6OAF_BOUND_SRCADDR) {
456 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
457 }
458 } else {
459 ip6obf.select_srcif = FALSE;
460 if (flags & IPV6_OUTARGS) {
461 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
462 ip6oa->ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF |
463 IP6OAF_BOUND_IF | IP6OAF_BOUND_SRCADDR);
464 }
465 }
466
467 if (flags & IPV6_OUTARGS) {
468 if (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) {
469 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
470 }
471 if (ip6oa->ip6oa_flags & IP6OAF_NO_EXPENSIVE) {
472 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
473 }
474 if (ip6oa->ip6oa_flags & IP6OAF_NO_CONSTRAINED) {
475 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_CONSTRAINED;
476 }
477 adv = &ip6oa->ip6oa_flowadv;
478 adv->code = FADV_SUCCESS;
479 ip6oa->ip6oa_flags &= ~IP6OAF_RET_MASK;
480 }
481
482 /*
483 * Clear out ifpp to be filled in after determining route. ifpp_save is
484 * used to keep old value to release reference properly and dtrace
485 * ipsec tunnel traffic properly.
486 */
487 if (ifpp != NULL && *ifpp != NULL) {
488 *ifpp = NULL;
489 }
490
491#if DUMMYNET
492 if (args.fwa_pf_rule) {
493 ip6 = mtod(m, struct ip6_hdr *);
494 VERIFY(ro != NULL); /* ro == saved_route */
495 goto check_with_pf;
496 }
497#endif /* DUMMYNET */
498
499#if NECP
500 /*
501 * Since all packets are assumed to come from same socket, necp lookup
502 * only needs to happen once per function entry.
503 */
504 necp_matched_policy_id = necp_ip6_output_find_policy_match(packet: m, flags,
505 ip6oa: (flags & IPV6_OUTARGS) ? ip6oa : NULL, rt: ro ? ro->ro_rt : NULL, result: &necp_result,
506 result_parameter: &necp_result_parameter);
507#endif /* NECP */
508
509 /*
510 * If a chain was passed in, prepare for ther first iteration. For all
511 * other iterations, this work will be done at evaluateloop: label.
512 */
513 if (packetchain) {
514 /*
515 * Remove m from the chain during processing to avoid
516 * accidental frees on entire list.
517 */
518 inputchain = m->m_nextpkt;
519 m->m_nextpkt = NULL;
520 }
521
522loopit:
523 packets_processed++;
524 m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP | PKTF_IFAINFO);
525 ip6 = mtod(m, struct ip6_hdr *);
526 nxt0 = ip6->ip6_nxt;
527 finaldst = ip6->ip6_dst;
528 ip6obf.hdrsplit = FALSE;
529 ro_pmtu = NULL;
530
531 if (!SLIST_EMPTY(&m->m_pkthdr.tags)) {
532 inject_filter_ref = ipf_get_inject_filter(m);
533 } else {
534 inject_filter_ref = NULL;
535 }
536
537#define MAKE_EXTHDR(hp, mp) do { \
538 if (hp != NULL) { \
539 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
540 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
541 ((eh)->ip6e_len + 1) << 3); \
542 if (error) \
543 goto freehdrs; \
544 } \
545} while (0)
546
547 if (opt != NULL) {
548 /* Hop-by-Hop options header */
549 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
550 /* Destination options header(1st part) */
551 if (opt->ip6po_rthdr) {
552 /*
553 * Destination options header(1st part)
554 * This only makes sense with a routing header.
555 * See Section 9.2 of RFC 3542.
556 * Disabling this part just for MIP6 convenience is
557 * a bad idea. We need to think carefully about a
558 * way to make the advanced API coexist with MIP6
559 * options, which might automatically be inserted in
560 * the kernel.
561 */
562 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
563 }
564 /* Routing header */
565 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
566 /* Destination options header(2nd part) */
567 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
568 }
569
570#undef MAKE_EXTHDR
571
572#if NECP
573 if (necp_matched_policy_id) {
574 necp_mark_packet_from_ip(packet: m, policy_id: necp_matched_policy_id);
575
576 switch (necp_result) {
577 case NECP_KERNEL_POLICY_RESULT_PASS:
578 if (necp_result_parameter.pass_flags & NECP_KERNEL_POLICY_PASS_NO_SKIP_IPSEC) {
579 break;
580 }
581 goto skip_ipsec;
582 case NECP_KERNEL_POLICY_RESULT_DROP:
583 error = EHOSTUNREACH;
584 ip6stat.ip6s_necp_policy_drop++;
585 goto freehdrs;
586 case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
587 /*
588 * Flow divert packets should be blocked at the IP
589 * layer.
590 */
591 error = EHOSTUNREACH;
592 ip6stat.ip6s_necp_policy_drop++;
593 goto freehdrs;
594 case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
595 /*
596 * Verify that the packet is being routed to the tunnel
597 */
598 struct ifnet *policy_ifp =
599 necp_get_ifnet_from_result_parameter(
600 result_parameter: &necp_result_parameter);
601
602 /*
603 * Update the QOS marking policy if
604 * 1. upper layer asks it to do so
605 * 2. net_qos_policy_restricted is not set
606 * 3. qos_marking_gencount doesn't match necp_kernel_socket_policies_gencount (checked in necp_lookup_current_qos_marking)
607 */
608 if (ip6oa != NULL && (ip6oa->ip6oa_flags & IP6OAF_REDO_QOSMARKING_POLICY) &&
609 net_qos_policy_restricted != 0) {
610 bool qos_marking = (ip6oa->ip6oa_flags & IP6OAF_QOSMARKING_ALLOWED) != 0;
611 qos_marking = necp_lookup_current_qos_marking(qos_marking_gencount: &ip6oa->qos_marking_gencount, NULL, interface: policy_ifp, route_rule_id: necp_result_parameter.route_rule_id, old_qos_marking: qos_marking);
612 if (qos_marking) {
613 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
614 } else {
615 ip6oa->ip6oa_flags &= ~IP6OAF_QOSMARKING_ALLOWED;
616 }
617 }
618
619 if (policy_ifp == ifp) {
620 goto skip_ipsec;
621 } else {
622 if (necp_packet_can_rebind_to_ifnet(packet: m,
623 interface: policy_ifp, new_route: (struct route *)&necp_route,
624 AF_INET6)) {
625 /*
626 * Set scoped index to the tunnel
627 * interface, since it is compatible
628 * with the packet. This will only work
629 * for callers who pass IPV6_OUTARGS,
630 * but that covers all of the clients
631 * we care about today.
632 */
633 if (flags & IPV6_OUTARGS) {
634 ip6oa->ip6oa_boundif =
635 policy_ifp->if_index;
636 ip6oa->ip6oa_flags |=
637 IP6OAF_BOUND_IF;
638 }
639 if (opt != NULL
640 && opt->ip6po_pktinfo != NULL) {
641 opt->ip6po_pktinfo->
642 ipi6_ifindex =
643 policy_ifp->if_index;
644 }
645 ro = &necp_route;
646 goto skip_ipsec;
647 } else {
648 error = ENETUNREACH;
649 ip6stat.ip6s_necp_policy_drop++;
650 goto freehdrs;
651 }
652 }
653 }
654 default:
655 break;
656 }
657 }
658#endif /* NECP */
659
660#if IPSEC
661 if (ipsec_bypass != 0 || ip6obf.noipsec) {
662 goto skip_ipsec;
663 }
664
665 if (sp == NULL) {
666 /* get a security policy for this packet */
667 if (so != NULL) {
668 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
669 so, &error);
670 } else {
671 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
672 0, &error);
673 }
674 if (sp == NULL) {
675 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
676 goto freehdrs;
677 }
678 }
679
680 error = 0;
681
682 /* check policy */
683 switch (sp->policy) {
684 case IPSEC_POLICY_DISCARD:
685 case IPSEC_POLICY_GENERATE:
686 /*
687 * This packet is just discarded.
688 */
689 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
690 goto freehdrs;
691
692 case IPSEC_POLICY_BYPASS:
693 case IPSEC_POLICY_NONE:
694 /* no need to do IPsec. */
695 ip6obf.needipsec = FALSE;
696 break;
697
698 case IPSEC_POLICY_IPSEC:
699 if (sp->req == NULL) {
700 /* acquire a policy */
701 error = key_spdacquire(sp);
702 goto freehdrs;
703 }
704 if (sp->ipsec_if) {
705 goto skip_ipsec;
706 } else {
707 ip6obf.needipsec = true;
708 }
709 break;
710
711 case IPSEC_POLICY_ENTRUST:
712 default:
713 printf("%s: Invalid policy found: %d\n", __func__, sp->policy);
714 break;
715 }
716skip_ipsec:
717#endif /* IPSEC */
718
719 /*
720 * Calculate the total length of the extension header chain.
721 * Keep the length of the unfragmentable part for fragmentation.
722 */
723 optlen = 0;
724 if (exthdrs.ip6e_hbh != NULL) {
725 optlen += exthdrs.ip6e_hbh->m_len;
726 }
727 if (exthdrs.ip6e_dest1 != NULL) {
728 optlen += exthdrs.ip6e_dest1->m_len;
729 }
730 if (exthdrs.ip6e_rthdr != NULL) {
731 optlen += exthdrs.ip6e_rthdr->m_len;
732 }
733 unfragpartlen = optlen + sizeof(struct ip6_hdr);
734
735 /* NOTE: we don't add AH/ESP length here. do that later. */
736 if (exthdrs.ip6e_dest2 != NULL) {
737 optlen += exthdrs.ip6e_dest2->m_len;
738 }
739
740 /*
741 * If we need IPsec, or there is at least one extension header,
742 * separate IP6 header from the payload.
743 */
744 if ((
745#if IPSEC
746 ip6obf.needipsec ||
747#endif /* IPSEC */
748 optlen) && !ip6obf.hdrsplit) {
749 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
750 m = NULL;
751 goto freehdrs;
752 }
753 m = exthdrs.ip6e_ip6;
754 ip6obf.hdrsplit = true;
755 }
756
757 /* adjust pointer */
758 ip6 = mtod(m, struct ip6_hdr *);
759
760 /* adjust mbuf packet header length */
761 m->m_pkthdr.len += optlen;
762 plen = m->m_pkthdr.len - sizeof(*ip6);
763
764 /* If this is a jumbo payload, insert a jumbo payload option. */
765 if (plen > IPV6_MAXPACKET) {
766 if (!ip6obf.hdrsplit) {
767 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
768 m = NULL;
769 goto freehdrs;
770 }
771 m = exthdrs.ip6e_ip6;
772 ip6obf.hdrsplit = true;
773 }
774 /* adjust pointer */
775 ip6 = mtod(m, struct ip6_hdr *);
776 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) {
777 goto freehdrs;
778 }
779 ip6->ip6_plen = 0;
780 } else {
781 ip6->ip6_plen = htons((uint16_t)plen);
782 }
783 /*
784 * Concatenate headers and fill in next header fields.
785 * Here we have, on "m"
786 * IPv6 payload
787 * and we insert headers accordingly. Finally, we should be getting:
788 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
789 *
790 * during the header composing process, "m" points to IPv6 header.
791 * "mprev" points to an extension header prior to esp.
792 */
793 nexthdrp = &ip6->ip6_nxt;
794 mprev = m;
795
796 /*
797 * we treat dest2 specially. this makes IPsec processing
798 * much easier. the goal here is to make mprev point the
799 * mbuf prior to dest2.
800 *
801 * result: IPv6 dest2 payload
802 * m and mprev will point to IPv6 header.
803 */
804 if (exthdrs.ip6e_dest2 != NULL) {
805 if (!ip6obf.hdrsplit) {
806 panic("assumption failed: hdr not split");
807 /* NOTREACHED */
808 }
809 exthdrs.ip6e_dest2->m_next = m->m_next;
810 m->m_next = exthdrs.ip6e_dest2;
811 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
812 ip6->ip6_nxt = IPPROTO_DSTOPTS;
813 }
814
815#define MAKE_CHAIN(m, mp, p, i) do { \
816 if (m != NULL) { \
817 if (!ip6obf.hdrsplit) { \
818 panic("assumption failed: hdr not split"); \
819 /* NOTREACHED */ \
820 } \
821 *mtod((m), u_char *) = *(p); \
822 *(p) = (i); \
823 p = mtod((m), u_char *); \
824 (m)->m_next = (mp)->m_next; \
825 (mp)->m_next = (m); \
826 (mp) = (m); \
827 } \
828} while (0)
829 /*
830 * result: IPv6 hbh dest1 rthdr dest2 payload
831 * m will point to IPv6 header. mprev will point to the
832 * extension header prior to dest2 (rthdr in the above case).
833 */
834 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
835 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
836 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
837
838 /* It is no longer safe to free the pointers in exthdrs. */
839 exthdrs.merged = TRUE;
840
841#undef MAKE_CHAIN
842
843#if IPSEC
844 if (ip6obf.needipsec && (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
845 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
846 }
847#endif /* IPSEC */
848
849 if (!TAILQ_EMPTY(&ipv6_filters) &&
850 !((flags & IPV6_OUTARGS) &&
851 (ip6oa->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED) &&
852 (ip6oa->ip6oa_flags & IP6OAF_MANAGEMENT_ALLOWED)
853#if NECP
854 && !necp_packet_should_skip_filters(packet: m)
855#endif // NECP
856 )) {
857 struct ipfilter *filter;
858 int seen = (inject_filter_ref == NULL);
859 int fixscope = 0;
860
861 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
862 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
863 IM6O_LOCK(im6o);
864 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
865 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
866 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
867 IM6O_UNLOCK(im6o);
868 }
869
870 /* Hack: embed the scope_id in the destination */
871 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
872 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
873 fixscope = 1;
874 ip6->ip6_dst.s6_addr16[1] =
875 htons((uint16_t)ro->ro_dst.sin6_scope_id);
876 }
877
878 ipf_ref();
879 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
880 /*
881 * Don't process packet twice if we've already seen it.
882 */
883 if (seen == 0) {
884 if ((struct ipfilter *)inject_filter_ref ==
885 filter) {
886 seen = 1;
887 }
888 } else if (filter->ipf_filter.ipf_output != NULL) {
889 errno_t result;
890
891 result = filter->ipf_filter.ipf_output(
892 filter->ipf_filter.cookie,
893 (mbuf_t *)&m, ippo);
894 if (result == EJUSTRETURN) {
895 ipf_unref();
896 m = NULL;
897 goto evaluateloop;
898 }
899 if (result != 0) {
900 ipf_unref();
901 goto bad;
902 }
903 }
904 }
905 ipf_unref();
906
907 ip6 = mtod(m, struct ip6_hdr *);
908 /* Hack: cleanup embedded scope_id if we put it there */
909 if (fixscope) {
910 ip6->ip6_dst.s6_addr16[1] = 0;
911 }
912 }
913
914#if IPSEC
915 if (ip6obf.needipsec) {
916 uint8_t segleft_org;
917
918 /*
919 * pointers after IPsec headers are not valid any more.
920 * other pointers need a great care too.
921 * (IPsec routines should not mangle mbufs prior to AH/ESP)
922 */
923 exthdrs.ip6e_dest2 = NULL;
924
925 if (exthdrs.ip6e_rthdr != NULL) {
926 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
927 segleft_org = rh->ip6r_segleft;
928 rh->ip6r_segleft = 0;
929 } else {
930 rh = NULL;
931 segleft_org = 0;
932 }
933
934 ipsec_state.m = m;
935 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev,
936 sp, flags, &needipsectun);
937 m = ipsec_state.m;
938 if (error) {
939 /* mbuf is already reclaimed in ipsec6_output_trans. */
940 m = NULL;
941 switch (error) {
942 case EHOSTUNREACH:
943 case ENETUNREACH:
944 case EMSGSIZE:
945 case ENOBUFS:
946 case ENOMEM:
947 break;
948 default:
949 printf("ip6_output (ipsec): error code %d\n",
950 error);
951 OS_FALLTHROUGH;
952 case ENOENT:
953 /* don't show these error codes to the user */
954 error = 0;
955 break;
956 }
957 goto bad;
958 }
959 if (exthdrs.ip6e_rthdr != NULL) {
960 /* ah6_output doesn't modify mbuf chain */
961 rh->ip6r_segleft = segleft_org;
962 }
963 }
964#endif /* IPSEC */
965
966 /* If there is a routing header, discard the packet. */
967 if (exthdrs.ip6e_rthdr != NULL) {
968 error = EINVAL;
969 goto bad;
970 }
971
972 /* Source address validation */
973 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
974 !(flags & IPV6_UNSPECSRC)) {
975 error = EOPNOTSUPP;
976 ip6stat.ip6s_badscope++;
977 goto bad;
978 }
979 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
980 error = EOPNOTSUPP;
981 ip6stat.ip6s_badscope++;
982 goto bad;
983 }
984
985 ip6stat.ip6s_localout++;
986
987 /*
988 * Route packet.
989 */
990 if (ro == NULL) {
991 ro = &ip6route;
992 bzero(s: (caddr_t)ro, n: sizeof(*ro));
993 }
994 ro_pmtu = ro;
995 if (opt != NULL && opt->ip6po_rthdr) {
996 ro = &opt->ip6po_route;
997 }
998 dst = SIN6(&ro->ro_dst);
999
1000 if (ro->ro_rt != NULL) {
1001 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
1002 }
1003 /*
1004 * if specified, try to fill in the traffic class field.
1005 * do not override if a non-zero value is already set.
1006 * we check the diffserv field and the ecn field separately.
1007 */
1008 if (opt != NULL && opt->ip6po_tclass >= 0) {
1009 int mask = 0;
1010
1011 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) {
1012 mask |= 0xfc;
1013 }
1014 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) {
1015 mask |= 0x03;
1016 }
1017 if (mask != 0) {
1018 ip6->ip6_flow |=
1019 htonl((opt->ip6po_tclass & mask) << 20);
1020 }
1021 }
1022
1023 if (((ntohl(ip6->ip6_flow & IPV6_FLOW_ECN_MASK) >> 20) & IPTOS_ECN_ECT1) == IPTOS_ECN_ECT1) {
1024 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1025 }
1026
1027 /* fill in or override the hop limit field, if necessary. */
1028 if (opt && opt->ip6po_hlim != -1) {
1029 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
1030 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1031 if (im6o != NULL) {
1032 IM6O_LOCK(im6o);
1033 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
1034 IM6O_UNLOCK(im6o);
1035 } else {
1036 ip6->ip6_hlim = (uint8_t)ip6_defmcasthlim;
1037 }
1038 }
1039
1040 /*
1041 * If there is a cached route, check that it is to the same
1042 * destination and is still up. If not, free it and try again.
1043 * Test rt_flags without holding rt_lock for performance reasons;
1044 * if the route is down it will hopefully be caught by the layer
1045 * below (since it uses this route as a hint) or during the
1046 * next transmit.
1047 */
1048 if (ROUTE_UNUSABLE(ro) || dst->sin6_family != AF_INET6 ||
1049 !in6_are_addr_equal_scoped(&dst->sin6_addr, &ip6->ip6_dst, dst->sin6_scope_id, ip6_output_getdstifscope(m))) {
1050 ROUTE_RELEASE(ro);
1051 }
1052
1053 if (ro->ro_rt == NULL) {
1054 SOCKADDR_ZERO(dst, sizeof(*dst));
1055 dst->sin6_family = AF_INET6;
1056 dst->sin6_len = sizeof(struct sockaddr_in6);
1057 dst->sin6_addr = ip6->ip6_dst;
1058 }
1059#if IPSEC
1060 if (ip6obf.needipsec && needipsectun) {
1061#if CONFIG_DTRACE
1062 struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL;
1063#endif /* CONFIG_DTRACE */
1064 /*
1065 * All the extension headers will become inaccessible
1066 * (since they can be encrypted).
1067 * Don't panic, we need no more updates to extension headers
1068 * on inner IPv6 packet (since they are now encapsulated).
1069 *
1070 * IPv6 [ESP|AH] IPv6 [extension headers] payload
1071 */
1072 bzero(s: &exthdrs, n: sizeof(exthdrs));
1073 exthdrs.ip6e_ip6 = m;
1074
1075 ipsec_state.m = m;
1076 route_copyout((struct route *)&ipsec_state.ro, (struct route *)ro,
1077 sizeof(struct route_in6));
1078 ipsec_state.dst = SA(dst);
1079
1080 /* So that we can see packets inside the tunnel */
1081 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
1082 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
1083 struct ip *, NULL, struct ip6_hdr *, ip6);
1084
1085 error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
1086 /* tunneled in IPv4? packet is gone */
1087 if (ipsec_state.tunneled == 4) {
1088 m = NULL;
1089 goto evaluateloop;
1090 }
1091 m = ipsec_state.m;
1092 ipsec_saved_route = ro;
1093 ro = (struct route_in6 *)&ipsec_state.ro;
1094 dst = SIN6(ipsec_state.dst);
1095 if (error) {
1096 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
1097 m = NULL;
1098 switch (error) {
1099 case EHOSTUNREACH:
1100 case ENETUNREACH:
1101 case EMSGSIZE:
1102 case ENOBUFS:
1103 case ENOMEM:
1104 break;
1105 default:
1106 printf("ip6_output (ipsec): error code %d\n",
1107 error);
1108 OS_FALLTHROUGH;
1109 case ENOENT:
1110 /* don't show these error codes to the user */
1111 error = 0;
1112 break;
1113 }
1114 goto bad;
1115 }
1116 /*
1117 * The packet has been encapsulated so the ifscope
1118 * is no longer valid since it does not apply to the
1119 * outer address: ignore the ifscope.
1120 */
1121 if (flags & IPV6_OUTARGS) {
1122 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
1123 ip6oa->ip6oa_flags &= ~IP6OAF_BOUND_IF;
1124 }
1125 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
1126 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE) {
1127 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
1128 }
1129 }
1130 exthdrs.ip6e_ip6 = m;
1131 }
1132#endif /* IPSEC */
1133
1134 /*
1135 * ifp should only be filled in for dummy net packets which will jump
1136 * to check_with_pf label.
1137 */
1138 if (ifp != NULL) {
1139 VERIFY(ip6obf.route_selected);
1140 }
1141
1142 /* adjust pointer */
1143 ip6 = mtod(m, struct ip6_hdr *);
1144
1145 if (ip6obf.select_srcif) {
1146 SOCKADDR_ZERO(&src_sa, sizeof(src_sa));
1147 src_sa.sin6_family = AF_INET6;
1148 src_sa.sin6_len = sizeof(src_sa);
1149 src_sa.sin6_addr = ip6->ip6_src;
1150 src_sa.sin6_scope_id = (!in6_embedded_scope && IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) ? ip6_output_getsrcifscope(m) : IFSCOPE_NONE;
1151 }
1152 SOCKADDR_ZERO(&dst_sa, sizeof(dst_sa));
1153 dst_sa.sin6_family = AF_INET6;
1154 dst_sa.sin6_len = sizeof(dst_sa);
1155 dst_sa.sin6_addr = ip6->ip6_dst;
1156 dst_sa.sin6_scope_id = (!in6_embedded_scope && IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) ? ip6_output_getdstifscope(m) : IFSCOPE_NONE;
1157
1158 /*
1159 * Only call in6_selectroute() on first iteration to avoid taking
1160 * multiple references on ifp and rt.
1161 *
1162 * in6_selectroute() might return an ifp with its reference held
1163 * even in the error case, so make sure to release its reference.
1164 * ip6oa may be NULL if IPV6_OUTARGS isn't set.
1165 */
1166 if (!ip6obf.route_selected) {
1167 error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL,
1168 &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa);
1169
1170 if (error != 0) {
1171 switch (error) {
1172 case EHOSTUNREACH:
1173 ip6stat.ip6s_noroute++;
1174 break;
1175 case EADDRNOTAVAIL:
1176 default:
1177 break; /* XXX statistics? */
1178 }
1179 if (ifp != NULL) {
1180 in6_ifstat_inc(ifp, ifs6_out_discard);
1181 }
1182 /* ifp (if non-NULL) will be released at the end */
1183 goto bad;
1184 }
1185 ip6obf.route_selected = true;
1186 }
1187 if (rt == NULL) {
1188 /*
1189 * If in6_selectroute() does not return a route entry,
1190 * dst may not have been updated.
1191 */
1192 *dst = dst_sa; /* XXX */
1193 }
1194
1195#if NECP
1196 /* Catch-all to check if the interface is allowed */
1197 if (!necp_packet_is_allowed_over_interface(packet: m, interface: ifp)) {
1198 error = EHOSTUNREACH;
1199 ip6stat.ip6s_necp_policy_drop++;
1200 goto bad;
1201 }
1202#endif /* NECP */
1203
1204 /*
1205 * then rt (for unicast) and ifp must be non-NULL valid values.
1206 */
1207 if (!(flags & IPV6_FORWARDING)) {
1208 in6_ifstat_inc_na(ifp, ifs6_out_request);
1209 }
1210 if (rt != NULL) {
1211 RT_LOCK(rt);
1212 if (ia == NULL) {
1213 ia = (struct in6_ifaddr *)(rt->rt_ifa);
1214 if (ia != NULL) {
1215 ifa_addref(ifa: &ia->ia_ifa);
1216 }
1217 }
1218 rt->rt_use++;
1219 RT_UNLOCK(rt);
1220 }
1221
1222 /*
1223 * The outgoing interface must be in the zone of source and
1224 * destination addresses (except local/loopback). We should
1225 * use ia_ifp to support the case of sending packets to an
1226 * address of our own.
1227 */
1228 if (ia != NULL && ia->ia_ifp) {
1229 ifnet_reference(interface: ia->ia_ifp); /* for origifp */
1230 if (origifp != NULL) {
1231 ifnet_release(interface: origifp);
1232 }
1233 origifp = ia->ia_ifp;
1234 } else {
1235 if (ifp != NULL) {
1236 ifnet_reference(interface: ifp); /* for origifp */
1237 }
1238 if (origifp != NULL) {
1239 ifnet_release(interface: origifp);
1240 }
1241 origifp = ifp;
1242 }
1243
1244 /* skip scope enforcements for local/loopback route */
1245 if (rt == NULL || !(rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1246 struct in6_addr src0, dst0;
1247 u_int32_t zone;
1248
1249 src0 = ip6->ip6_src;
1250 if (in6_setscope(&src0, origifp, &zone)) {
1251 goto badscope;
1252 }
1253 SOCKADDR_ZERO(&src_sa, sizeof(src_sa));
1254 src_sa.sin6_family = AF_INET6;
1255 src_sa.sin6_len = sizeof(src_sa);
1256 src_sa.sin6_addr = ip6->ip6_src;
1257 src_sa.sin6_scope_id = (!in6_embedded_scope && IN6_IS_SCOPE_EMBED(&src_sa.sin6_addr)) ? ip6_output_getsrcifscope(m) : IFSCOPE_NONE;
1258 if ((sa6_recoverscope(&src_sa, TRUE) ||
1259 zone != src_sa.sin6_scope_id)) {
1260 goto badscope;
1261 }
1262
1263 dst0 = ip6->ip6_dst;
1264 if ((in6_setscope(&dst0, origifp, &zone))) {
1265 goto badscope;
1266 }
1267 /* re-initialize to be sure */
1268 SOCKADDR_ZERO(&dst_sa, sizeof(dst_sa));
1269 dst_sa.sin6_family = AF_INET6;
1270 dst_sa.sin6_len = sizeof(dst_sa);
1271 dst_sa.sin6_addr = ip6->ip6_dst;
1272 dst_sa.sin6_scope_id = (!in6_embedded_scope && IN6_IS_SCOPE_EMBED(&dst_sa.sin6_addr)) ? ip6_output_getdstifscope(m) : IFSCOPE_NONE;
1273 if ((sa6_recoverscope(&dst_sa, TRUE) ||
1274 zone != dst_sa.sin6_scope_id)) {
1275 goto badscope;
1276 }
1277
1278 /* scope check is done. */
1279 goto routefound;
1280
1281badscope:
1282 ip6stat.ip6s_badscope++;
1283 in6_ifstat_inc(origifp, ifs6_out_discard);
1284 if (error == 0) {
1285 error = EHOSTUNREACH; /* XXX */
1286 }
1287 goto bad;
1288 }
1289
1290routefound:
1291 if (rt != NULL && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1292 if (opt != NULL && opt->ip6po_nextroute.ro_rt) {
1293 /*
1294 * The nexthop is explicitly specified by the
1295 * application. We assume the next hop is an IPv6
1296 * address.
1297 */
1298 dst = SIN6(opt->ip6po_nexthop);
1299 } else if ((rt->rt_flags & RTF_GATEWAY)) {
1300 dst = SIN6(rt->rt_gateway);
1301 }
1302 /*
1303 * For packets destined to local/loopback, record the
1304 * source the source interface (which owns the source
1305 * address), as well as the output interface. This is
1306 * needed to reconstruct the embedded zone for the
1307 * link-local address case in ip6_input().
1308 */
1309 if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK)) {
1310 uint32_t srcidx;
1311
1312 if (src_ia != NULL) {
1313 srcidx = src_ia->ia_ifp->if_index;
1314 } else if (ro->ro_srcia != NULL) {
1315 srcidx = ro->ro_srcia->ifa_ifp->if_index;
1316 } else {
1317 srcidx = 0;
1318 }
1319
1320 ip6_setsrcifaddr_info(m, srcidx, NULL);
1321 ip6_setdstifaddr_info(m, 0, ia);
1322 }
1323 }
1324
1325 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1326 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1327 } else {
1328 struct in6_multi *in6m;
1329
1330 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
1331 in6_ifstat_inc_na(ifp, ifs6_out_mcast);
1332
1333 /*
1334 * Confirm that the outgoing interface supports multicast.
1335 */
1336 if (!(ifp->if_flags & IFF_MULTICAST)) {
1337 ip6stat.ip6s_noroute++;
1338 in6_ifstat_inc(ifp, ifs6_out_discard);
1339 error = ENETUNREACH;
1340 goto bad;
1341 }
1342 in6_multihead_lock_shared();
1343 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1344 in6_multihead_lock_done();
1345 if (im6o != NULL) {
1346 IM6O_LOCK(im6o);
1347 }
1348 if (in6m != NULL &&
1349 (im6o == NULL || im6o->im6o_multicast_loop)) {
1350 if (im6o != NULL) {
1351 IM6O_UNLOCK(im6o);
1352 }
1353 /*
1354 * If we belong to the destination multicast group
1355 * on the outgoing interface, and the caller did not
1356 * forbid loopback, loop back a copy.
1357 */
1358 ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
1359 } else if (im6o != NULL) {
1360 IM6O_UNLOCK(im6o);
1361 }
1362 if (in6m != NULL) {
1363 IN6M_REMREF(in6m);
1364 }
1365 /*
1366 * Multicasts with a hoplimit of zero may be looped back,
1367 * above, but must not be transmitted on a network.
1368 * Also, multicasts addressed to the loopback interface
1369 * are not sent -- the above call to ip6_mloopback() will
1370 * loop back a copy if this host actually belongs to the
1371 * destination group on the loopback interface.
1372 */
1373 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1374 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1375 /* remove m from the packetchain and continue looping */
1376 if (m != NULL) {
1377 m_freem(m);
1378 }
1379 m = NULL;
1380 goto evaluateloop;
1381 }
1382 }
1383
1384 /*
1385 * Fill the outgoing inteface to tell the upper layer
1386 * to increment per-interface statistics.
1387 */
1388 if (ifpp != NULL && *ifpp == NULL) {
1389 ifnet_reference(interface: ifp); /* for caller */
1390 *ifpp = ifp;
1391 }
1392
1393 /* Determine path MTU. */
1394 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, ifp->if_index, &mtu)) != 0) {
1395 goto bad;
1396 }
1397
1398 /*
1399 * The caller of this function may specify to use the minimum MTU
1400 * in some cases.
1401 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1402 * setting. The logic is a bit complicated; by default, unicast
1403 * packets will follow path MTU while multicast packets will be sent at
1404 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1405 * including unicast ones will be sent at the minimum MTU. Multicast
1406 * packets will always be sent at the minimum MTU unless
1407 * IP6PO_MINMTU_DISABLE is explicitly specified.
1408 * See RFC 3542 for more details.
1409 */
1410 if (mtu > IPV6_MMTU) {
1411 if ((flags & IPV6_MINMTU)) {
1412 mtu = IPV6_MMTU;
1413 } else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) {
1414 mtu = IPV6_MMTU;
1415 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1416 (opt == NULL ||
1417 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1418 mtu = IPV6_MMTU;
1419 }
1420 }
1421
1422 /*
1423 * clear embedded scope identifiers if necessary.
1424 * in6_clearscope will touch the addresses only when necessary.
1425 */
1426 in6_clearscope(&ip6->ip6_src);
1427 in6_clearscope(&ip6->ip6_dst);
1428 /*
1429 * If the outgoing packet contains a hop-by-hop options header,
1430 * it must be examined and processed even by the source node.
1431 * (RFC 2460, section 4.)
1432 */
1433 if (exthdrs.ip6e_hbh != NULL) {
1434 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1435 u_int32_t dummy; /* XXX unused */
1436 uint32_t oplen = 0; /* for ip6_process_hopopts() */
1437#if DIAGNOSTIC
1438 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) {
1439 panic("ip6e_hbh is not continuous");
1440 }
1441#endif
1442 /*
1443 * XXX: If we have to send an ICMPv6 error to the sender,
1444 * we need the M_LOOP flag since icmp6_error() expects
1445 * the IPv6 and the hop-by-hop options header are
1446 * continuous unless the flag is set.
1447 */
1448 m->m_flags |= M_LOOP;
1449 m->m_pkthdr.rcvif = ifp;
1450 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1451 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1452 &dummy, &oplen) < 0) {
1453 /*
1454 * m was already freed at this point. Set to NULL so it
1455 * is not re-freed at end of ip6_output_list.
1456 */
1457 m = NULL;
1458 error = EINVAL; /* better error? */
1459 goto bad;
1460 }
1461 m->m_flags &= ~M_LOOP; /* XXX */
1462 m->m_pkthdr.rcvif = NULL;
1463 }
1464
1465#if DUMMYNET
1466check_with_pf:
1467#endif /* DUMMYNET */
1468#if PF
1469 if (PF_IS_ENABLED && !skip_pf) {
1470#if DUMMYNET
1471
1472 /*
1473 * TODO: Need to save opt->ip6po_flags for reinjection
1474 * rdar://10434993
1475 */
1476 args.fwa_oif = ifp;
1477 args.fwa_oflags = flags;
1478 if (flags & IPV6_OUTARGS) {
1479 args.fwa_ip6oa = ip6oa;
1480 }
1481 args.fwa_ro6 = ro;
1482 args.fwa_dst6 = dst;
1483 args.fwa_ro6_pmtu = ro_pmtu;
1484 args.fwa_origifp = origifp;
1485 args.fwa_mtu = mtu;
1486 args.fwa_unfragpartlen = unfragpartlen;
1487 args.fwa_exthdrs = &exthdrs;
1488 /* Invoke outbound packet filter */
1489 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
1490#else /* !DUMMYNET */
1491 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
1492#endif /* !DUMMYNET */
1493
1494 if (error != 0 || m == NULL) {
1495 if (m != NULL) {
1496 panic("%s: unexpected packet %p",
1497 __func__, m);
1498 /* NOTREACHED */
1499 }
1500 /* m was already freed by callee and is now NULL. */
1501 goto evaluateloop;
1502 }
1503 ip6 = mtod(m, struct ip6_hdr *);
1504 }
1505#endif /* PF */
1506
1507#ifdef IPSEC
1508 /* clean ipsec history before fragmentation */
1509 ipsec_delaux(m);
1510#endif /* IPSEC */
1511
1512 if (ip6oa != NULL) {
1513 u_int8_t dscp;
1514
1515 dscp = (ntohl(ip6->ip6_flow) & IP6FLOW_DSCP_MASK) >> IP6FLOW_DSCP_SHIFT;
1516
1517 error = set_packet_qos(m, ifp,
1518 ip6oa->ip6oa_flags & IP6OAF_QOSMARKING_ALLOWED ? TRUE : FALSE,
1519 ip6oa->ip6oa_sotc, ip6oa->ip6oa_netsvctype, &dscp);
1520 if (error == 0) {
1521 ip6->ip6_flow &= ~htonl(IP6FLOW_DSCP_MASK);
1522 ip6->ip6_flow |= htonl((u_int32_t)dscp << IP6FLOW_DSCP_SHIFT);
1523 } else {
1524 printf("%s if_dscp_for_mbuf() error %d\n", __func__, error);
1525 error = 0;
1526 }
1527 }
1528 /*
1529 * Determine whether fragmentation is necessary. If so, m is passed
1530 * back as a chain of packets and original mbuf is freed. Otherwise, m
1531 * is unchanged.
1532 */
1533 error = ip6_fragment_packet(m: &m, opt, ip6oa,
1534 exthdrsp: &exthdrs, ifp, mtu, unfragpartlen, nxt0,
1535 optlen);
1536
1537 if (error) {
1538 goto bad;
1539 }
1540
1541/*
1542 * The evaluateloop label is where we decide whether to continue looping over
1543 * packets or call into nd code to send.
1544 */
1545evaluateloop:
1546
1547 /*
1548 * m may be NULL when we jump to the evaluateloop label from PF or
1549 * other code that can drop packets.
1550 */
1551 if (m != NULL) {
1552 /*
1553 * If we already have a chain to send, tack m onto the end.
1554 * Otherwise make m the start and end of the to-be-sent chain.
1555 */
1556 if (sendchain != NULL) {
1557 sendchain_last->m_nextpkt = m;
1558 } else {
1559 sendchain = m;
1560 }
1561
1562 /* Fragmentation may mean m is a chain. Find the last packet. */
1563 while (m->m_nextpkt) {
1564 m = m->m_nextpkt;
1565 }
1566 sendchain_last = m;
1567 pktcnt++;
1568 }
1569
1570 /* Fill in next m from inputchain as appropriate. */
1571 m = inputchain;
1572 if (m != NULL) {
1573 /* Isolate m from rest of input chain. */
1574 inputchain = m->m_nextpkt;
1575 m->m_nextpkt = NULL;
1576
1577 /*
1578 * Clear exthdrs and ipsec_state so stale contents are not
1579 * reused. Note this also clears the exthdrs.merged flag.
1580 */
1581 bzero(s: &exthdrs, n: sizeof(exthdrs));
1582 bzero(s: &ipsec_state, n: sizeof(ipsec_state));
1583
1584 /* Continue looping. */
1585 goto loopit;
1586 }
1587
1588 /*
1589 * If we get here, there's no more mbufs in inputchain, so send the
1590 * sendchain if there is one.
1591 */
1592 if (pktcnt > 0) {
1593 error = nd6_output_list(ifp, origifp, sendchain, dst,
1594 ro->ro_rt, adv);
1595 /*
1596 * Fall through to done label even in error case because
1597 * nd6_output_list frees packetchain in both success and
1598 * failure cases.
1599 */
1600 }
1601
1602done:
1603 if (ifpp_save != NULL && *ifpp_save != NULL) {
1604 ifnet_release(interface: *ifpp_save);
1605 *ifpp_save = NULL;
1606 }
1607 ROUTE_RELEASE(&ip6route);
1608#if IPSEC
1609 ROUTE_RELEASE(&ipsec_state.ro);
1610 if (sp != NULL) {
1611 key_freesp(sp, KEY_SADB_UNLOCKED);
1612 }
1613#endif /* IPSEC */
1614#if NECP
1615 ROUTE_RELEASE(&necp_route);
1616#endif /* NECP */
1617#if DUMMYNET
1618 ROUTE_RELEASE(&saved_route);
1619 ROUTE_RELEASE(&saved_ro_pmtu);
1620#endif /* DUMMYNET */
1621
1622 if (ia != NULL) {
1623 ifa_remref(ifa: &ia->ia_ifa);
1624 }
1625 if (src_ia != NULL) {
1626 ifa_remref(ifa: &src_ia->ia_ifa);
1627 }
1628 if (ifp != NULL) {
1629 ifnet_release(interface: ifp);
1630 }
1631 if (origifp != NULL) {
1632 ifnet_release(interface: origifp);
1633 }
1634 if (ip6_output_measure) {
1635 net_perf_measure_time(npp: &net_perf, start: &start_tv, num_pkts: packets_processed);
1636 net_perf_histogram(npp: &net_perf, num_pkts: packets_processed);
1637 }
1638 return error;
1639
1640freehdrs:
1641 if (exthdrs.ip6e_hbh != NULL) {
1642 if (exthdrs.merged) {
1643 panic("Double free of ip6e_hbh");
1644 }
1645 m_freem(exthdrs.ip6e_hbh);
1646 }
1647 if (exthdrs.ip6e_dest1 != NULL) {
1648 if (exthdrs.merged) {
1649 panic("Double free of ip6e_dest1");
1650 }
1651 m_freem(exthdrs.ip6e_dest1);
1652 }
1653 if (exthdrs.ip6e_rthdr != NULL) {
1654 if (exthdrs.merged) {
1655 panic("Double free of ip6e_rthdr");
1656 }
1657 m_freem(exthdrs.ip6e_rthdr);
1658 }
1659 if (exthdrs.ip6e_dest2 != NULL) {
1660 if (exthdrs.merged) {
1661 panic("Double free of ip6e_dest2");
1662 }
1663 m_freem(exthdrs.ip6e_dest2);
1664 }
1665 /* FALLTHRU */
1666bad:
1667 if (inputchain != NULL) {
1668 m_freem_list(inputchain);
1669 }
1670 if (sendchain != NULL) {
1671 m_freem_list(sendchain);
1672 }
1673 if (m != NULL) {
1674 m_freem(m);
1675 }
1676
1677 goto done;
1678
1679#undef ipf_pktopts
1680#undef exthdrs
1681#undef ip6route
1682#undef ipsec_state
1683#undef saved_route
1684#undef saved_ro_pmtu
1685#undef args
1686}
1687
1688/* ip6_fragment_packet
1689 *
1690 * The fragmentation logic is rather complex:
1691 * 1: normal case (dontfrag == 0)
1692 * 1-a: send as is if tlen <= path mtu
1693 * 1-b: fragment if tlen > path mtu
1694 *
1695 * 2: if user asks us not to fragment (dontfrag == 1)
1696 * 2-a: send as is if tlen <= interface mtu
1697 * 2-b: error if tlen > interface mtu
1698 */
1699
1700static int
1701ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt,
1702 struct ip6_out_args *ip6oa, struct ip6_exthdrs *exthdrsp,
1703 struct ifnet *ifp, uint32_t mtu, uint32_t unfragpartlen,
1704 int nxt0, uint32_t optlen)
1705{
1706 VERIFY(NULL != mptr);
1707 struct mbuf *m = *mptr;
1708 int error = 0;
1709 uint32_t tlen = m->m_pkthdr.len;
1710 boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG)) ||
1711 (ip6oa != NULL && (ip6oa->ip6oa_flags & IP6OAF_DONT_FRAG));
1712
1713 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
1714 dontfrag = TRUE;
1715 /*
1716 * Discard partial sum information if this packet originated
1717 * from another interface; the packet would already have the
1718 * final checksum and we shouldn't recompute it.
1719 */
1720 if ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
1721 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
1722 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1723 m->m_pkthdr.csum_data = 0;
1724 }
1725 }
1726
1727 /* Access without acquiring nd_ifinfo lock for performance */
1728 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
1729 /*
1730 * We do not notify the connection in the same outbound path
1731 * to avoid lock ordering issues.
1732 * The returned error should imply that the packet is too big
1733 * and the application should query the PMTU for a given destination.
1734 */
1735 return EMSGSIZE;
1736 }
1737
1738 /*
1739 * transmit packet without fragmentation
1740 */
1741 if (dontfrag ||
1742 (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
1743 (ifp->if_hwassist & CSUM_FRAGMENT_IPV6))) {
1744 /*
1745 * mppn not updated in this case because no new chain is formed
1746 * and inserted
1747 */
1748 ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
1749 } else {
1750 /*
1751 * time to fragment - cases 1-b is handled inside
1752 * ip6_do_fragmentation().
1753 * mppn is passed down to be updated to point at fragment chain.
1754 */
1755 u_int8_t *lexthdrsp;
1756
1757 if (exthdrsp->ip6e_rthdr != NULL) {
1758 lexthdrsp = mtod(exthdrsp->ip6e_rthdr, uint8_t *);
1759 } else if (exthdrsp->ip6e_dest1 != NULL) {
1760 lexthdrsp = mtod(exthdrsp->ip6e_dest1, uint8_t *);
1761 } else if (exthdrsp->ip6e_hbh != NULL) {
1762 lexthdrsp = mtod(exthdrsp->ip6e_hbh, uint8_t *);
1763 } else {
1764 lexthdrsp = NULL;
1765 }
1766 error = ip6_do_fragmentation(mptr, optlen, ifp,
1767 unfragpartlen, mtod(m, struct ip6_hdr *), lexthdrsp, mtu,
1768 nxt0, htonl(ip6_randomid((uint64_t)m)));
1769 }
1770
1771 return error;
1772}
1773
1774/*
1775 * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining
1776 * the packet needs to be fragmented. on success, morig is freed and a chain
1777 * of fragments is linked into the packet chain where morig existed. Otherwise,
1778 * an errno is returned.
1779 * optlen: total length of all extension headers (excludes the IPv6 header).
1780 * unfragpartlen: length of the per-fragment headers which consist of the IPv6
1781 * header plus any extension headers that must be processed by nodes
1782 * en route to the destination.
1783 * lexthdrsp: pointer to the last extension header in the unfragmentable part
1784 * or NULL.
1785 * nxt0: upper-layer protocol number.
1786 * id: Identification value to be used in the fragment header.
1787 */
1788int
1789ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp,
1790 uint32_t unfragpartlen, struct ip6_hdr *ip6, uint8_t *lexthdrsp,
1791 uint32_t mtu, int nxt0, uint32_t id)
1792{
1793 VERIFY(NULL != mptr);
1794 int error = 0;
1795
1796 struct mbuf *morig = *mptr;
1797 struct mbuf *first_mbufp = NULL;
1798 struct mbuf *last_mbufp = NULL;
1799
1800 uint32_t tlen = morig->m_pkthdr.len;
1801
1802 /* try to fragment the packet. case 1-b */
1803 if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
1804 /* TSO and fragment aren't compatible */
1805 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1806 return EMSGSIZE;
1807 } else if (mtu < IPV6_MMTU) {
1808 /* path MTU cannot be less than IPV6_MMTU */
1809 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1810 return EMSGSIZE;
1811 } else if (ip6->ip6_plen == 0) {
1812 /* jumbo payload cannot be fragmented */
1813 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1814 return EMSGSIZE;
1815 } else {
1816 uint32_t hlen, off, len;
1817 struct mbuf **mnext = NULL;
1818 struct ip6_frag *ip6f;
1819 u_char nextproto;
1820
1821 /*
1822 * Too large for the destination or interface;
1823 * fragment if possible.
1824 * Must be able to put at least 8 bytes per fragment.
1825 */
1826 hlen = unfragpartlen;
1827 if (mtu > IPV6_MAXPACKET) {
1828 mtu = IPV6_MAXPACKET;
1829 }
1830
1831 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1832 if (len < 8) {
1833 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1834 return EMSGSIZE;
1835 }
1836
1837 /*
1838 * Change the next header field of the last header in the
1839 * unfragmentable part.
1840 */
1841 if (lexthdrsp != NULL) {
1842 nextproto = *lexthdrsp;
1843 *lexthdrsp = IPPROTO_FRAGMENT;
1844 } else {
1845 nextproto = ip6->ip6_nxt;
1846 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1847 }
1848
1849 if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
1850 in6_delayed_cksum_offset(morig, 0, optlen, nxt0);
1851 }
1852
1853 /*
1854 * Loop through length of segment after first fragment,
1855 * make new header and copy data of each part and link onto
1856 * chain.
1857 */
1858 for (off = hlen; off < tlen; off += len) {
1859 struct ip6_hdr *new_mhip6;
1860 struct mbuf *new_m;
1861 struct mbuf *m_frgpart;
1862
1863 MGETHDR(new_m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1864 if (new_m == NULL) {
1865 error = ENOBUFS;
1866 ip6stat.ip6s_odropped++;
1867 break;
1868 }
1869 new_m->m_pkthdr.rcvif = NULL;
1870 new_m->m_flags = morig->m_flags & M_COPYFLAGS;
1871
1872 if (first_mbufp != NULL) {
1873 /* Every pass through loop but first */
1874 *mnext = new_m;
1875 last_mbufp = new_m;
1876 } else {
1877 /* This is the first element of the fragment chain */
1878 first_mbufp = new_m;
1879 last_mbufp = new_m;
1880 }
1881 mnext = &new_m->m_nextpkt;
1882
1883 new_m->m_data += max_linkhdr;
1884 new_mhip6 = mtod(new_m, struct ip6_hdr *);
1885 *new_mhip6 = *ip6;
1886 new_m->m_len = sizeof(*new_mhip6);
1887
1888 error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f);
1889 if (error) {
1890 ip6stat.ip6s_odropped++;
1891 break;
1892 }
1893
1894 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1895 if (off + len >= tlen) {
1896 len = tlen - off;
1897 } else {
1898 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1899 }
1900 new_mhip6->ip6_plen = htons((u_short)(len + hlen +
1901 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1902
1903 if ((m_frgpart = m_copy(morig, off, len)) == NULL) {
1904 error = ENOBUFS;
1905 ip6stat.ip6s_odropped++;
1906 break;
1907 }
1908 m_cat(new_m, m_frgpart);
1909 new_m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1910 new_m->m_pkthdr.rcvif = NULL;
1911
1912 M_COPY_CLASSIFIER(new_m, morig);
1913 M_COPY_PFTAG(new_m, morig);
1914 M_COPY_NECPTAG(new_m, morig);
1915
1916 ip6f->ip6f_reserved = 0;
1917 ip6f->ip6f_ident = id;
1918 ip6f->ip6f_nxt = nextproto;
1919 ip6stat.ip6s_ofragments++;
1920 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1921 }
1922
1923 if (error) {
1924 /* free all the fragments created */
1925 if (first_mbufp != NULL) {
1926 m_freem_list(first_mbufp);
1927 first_mbufp = NULL;
1928 }
1929 last_mbufp = NULL;
1930 } else {
1931 /* successful fragmenting */
1932 m_freem(morig);
1933 *mptr = first_mbufp;
1934 last_mbufp->m_nextpkt = NULL;
1935 ip6stat.ip6s_fragmented++;
1936 in6_ifstat_inc(ifp, ifs6_out_fragok);
1937 }
1938 }
1939 return error;
1940}
1941
1942static int
1943ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1944{
1945 struct mbuf *m;
1946
1947 if (hlen > MCLBYTES) {
1948 return ENOBUFS; /* XXX */
1949 }
1950 MGET(m, M_DONTWAIT, MT_DATA);
1951 if (m == NULL) {
1952 return ENOBUFS;
1953 }
1954
1955 if (hlen > MLEN) {
1956 MCLGET(m, M_DONTWAIT);
1957 if (!(m->m_flags & M_EXT)) {
1958 m_free(m);
1959 return ENOBUFS;
1960 }
1961 }
1962 m->m_len = hlen;
1963 if (hdr != NULL) {
1964 bcopy(src: hdr, mtod(m, caddr_t), n: hlen);
1965 }
1966
1967 *mp = m;
1968 return 0;
1969}
1970
1971static void
1972ip6_out_cksum_stats(int proto, u_int32_t len)
1973{
1974 switch (proto) {
1975 case IPPROTO_TCP:
1976 tcp_out6_cksum_stats(len);
1977 break;
1978 case IPPROTO_UDP:
1979 udp_out6_cksum_stats(len);
1980 break;
1981 default:
1982 /* keep only TCP or UDP stats for now */
1983 break;
1984 }
1985}
1986
1987/*
1988 * Process a delayed payload checksum calculation (outbound path.)
1989 *
1990 * hoff is the number of bytes beyond the mbuf data pointer which
1991 * points to the IPv6 header. optlen is the number of bytes, if any,
1992 * between the end of IPv6 header and the beginning of the ULP payload
1993 * header, which represents the extension headers. If optlen is less
1994 * than zero, this routine will bail when it detects extension headers.
1995 *
1996 * Returns a bitmask representing all the work done in software.
1997 */
1998uint32_t
1999in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
2000 int32_t nxt0, uint32_t csum_flags)
2001{
2002 unsigned char buf[sizeof(struct ip6_hdr)] __attribute__((aligned(8)));
2003 struct ip6_hdr *ip6;
2004 uint32_t offset, mlen, hlen, olen, sw_csum;
2005 uint16_t csum, ulpoff, plen;
2006 uint8_t nxt;
2007
2008 _CASSERT(sizeof(csum) == sizeof(uint16_t));
2009 VERIFY(m->m_flags & M_PKTHDR);
2010
2011 sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
2012
2013 if ((sw_csum &= CSUM_DELAY_IPV6_DATA) == 0) {
2014 goto done;
2015 }
2016
2017 mlen = m->m_pkthdr.len; /* total mbuf len */
2018 hlen = sizeof(*ip6); /* IPv6 header len */
2019
2020 /* sanity check (need at least IPv6 header) */
2021 if (mlen < (hoff + hlen)) {
2022 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr "
2023 "(%u+%u)\n", __func__, m, mlen, hoff, hlen);
2024 /* NOTREACHED */
2025 }
2026
2027 /*
2028 * In case the IPv6 header is not contiguous, or not 32-bit
2029 * aligned, copy it to a local buffer.
2030 */
2031 if ((hoff + hlen) > m->m_len ||
2032 !IP6_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
2033 m_copydata(m, hoff, hlen, (caddr_t)buf);
2034 ip6 = (struct ip6_hdr *)(void *)buf;
2035 } else {
2036 ip6 = (struct ip6_hdr *)(void *)(m->m_data + hoff);
2037 }
2038
2039 nxt = ip6->ip6_nxt;
2040 plen = ntohs(ip6->ip6_plen);
2041 if (plen != (mlen - (hoff + hlen))) {
2042 plen = OSSwapInt16(plen);
2043 if (plen != (mlen - (hoff + hlen))) {
2044 /* Don't complain for jumbograms */
2045 if (plen != 0 || nxt != IPPROTO_HOPOPTS) {
2046 printf("%s: mbuf 0x%llx proto %d IPv6 "
2047 "plen %d (%x) [swapped %d (%x)] doesn't "
2048 "match actual packet length; %d is used "
2049 "instead\n", __func__,
2050 (uint64_t)VM_KERNEL_ADDRPERM(m), nxt,
2051 ip6->ip6_plen, ip6->ip6_plen, plen, plen,
2052 (mlen - (hoff + hlen)));
2053 }
2054 plen = (uint16_t)(mlen - (hoff + hlen));
2055 }
2056 }
2057
2058 if (optlen < 0) {
2059 /* next header isn't TCP/UDP and we don't know optlen, bail */
2060 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2061 sw_csum = 0;
2062 goto done;
2063 }
2064 olen = 0;
2065 } else {
2066 /* caller supplied the original transport number; use it */
2067 if (nxt0 >= 0) {
2068 nxt = (uint8_t)nxt0;
2069 }
2070 olen = optlen;
2071 }
2072
2073 offset = hoff + hlen + olen; /* ULP header */
2074
2075 /* sanity check */
2076 if (mlen < offset) {
2077 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr+ext_hdr "
2078 "(%u+%u+%u)\n", __func__, m, mlen, hoff, hlen, olen);
2079 /* NOTREACHED */
2080 }
2081
2082 /*
2083 * offset is added to the lower 16-bit value of csum_data,
2084 * which is expected to contain the ULP offset; therefore
2085 * CSUM_PARTIAL offset adjustment must be undone.
2086 */
2087 if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL | CSUM_DATA_VALID)) ==
2088 (CSUM_PARTIAL | CSUM_DATA_VALID)) {
2089 /*
2090 * Get back the original ULP offset (this will
2091 * undo the CSUM_PARTIAL logic in ip6_output.)
2092 */
2093 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
2094 m->m_pkthdr.csum_tx_start);
2095 }
2096
2097 ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */
2098
2099 if (mlen < (ulpoff + sizeof(csum))) {
2100 panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
2101 "cksum offset (%u) cksum flags 0x%x\n", __func__,
2102 m, mlen, nxt, ulpoff, m->m_pkthdr.csum_flags);
2103 /* NOTREACHED */
2104 }
2105
2106 csum = inet6_cksum(m, 0, offset, plen - olen);
2107
2108 /* Update stats */
2109 ip6_out_cksum_stats(proto: nxt, len: plen - olen);
2110
2111 /* RFC1122 4.1.3.4 */
2112 if (csum == 0 &&
2113 (m->m_pkthdr.csum_flags & (CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
2114 csum = 0xffff;
2115 }
2116
2117 /* Insert the checksum in the ULP csum field */
2118 offset += ulpoff;
2119 if ((offset + sizeof(csum)) > m->m_len) {
2120 m_copyback(m, offset, sizeof(csum), &csum);
2121 } else if (IP6_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
2122 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
2123 } else {
2124 bcopy(src: &csum, dst: (mtod(m, char *) + offset), n: sizeof(csum));
2125 }
2126 m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID |
2127 CSUM_PARTIAL | CSUM_ZERO_INVERT);
2128
2129done:
2130 return sw_csum;
2131}
2132
2133/*
2134 * Insert jumbo payload option.
2135 */
2136static int
2137ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
2138{
2139 struct mbuf *mopt;
2140 u_char *optbuf;
2141 u_int32_t v;
2142
2143#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
2144
2145 /*
2146 * If there is no hop-by-hop options header, allocate new one.
2147 * If there is one but it doesn't have enough space to store the
2148 * jumbo payload option, allocate a cluster to store the whole options.
2149 * Otherwise, use it to store the options.
2150 */
2151 if (exthdrs->ip6e_hbh == NULL) {
2152 MGET(mopt, M_DONTWAIT, MT_DATA);
2153 if (mopt == NULL) {
2154 return ENOBUFS;
2155 }
2156 mopt->m_len = JUMBOOPTLEN;
2157 optbuf = mtod(mopt, u_char *);
2158 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
2159 exthdrs->ip6e_hbh = mopt;
2160 } else {
2161 struct ip6_hbh *hbh;
2162
2163 mopt = exthdrs->ip6e_hbh;
2164 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
2165 /*
2166 * XXX assumption:
2167 * - exthdrs->ip6e_hbh is not referenced from places
2168 * other than exthdrs.
2169 * - exthdrs->ip6e_hbh is not an mbuf chain.
2170 */
2171 u_int32_t oldoptlen = mopt->m_len;
2172 struct mbuf *n;
2173
2174 /*
2175 * XXX: give up if the whole (new) hbh header does
2176 * not fit even in an mbuf cluster.
2177 */
2178 if (oldoptlen + JUMBOOPTLEN > MCLBYTES) {
2179 return ENOBUFS;
2180 }
2181
2182 /*
2183 * As a consequence, we must always prepare a cluster
2184 * at this point.
2185 */
2186 MGET(n, M_DONTWAIT, MT_DATA);
2187 if (n != NULL) {
2188 MCLGET(n, M_DONTWAIT);
2189 if (!(n->m_flags & M_EXT)) {
2190 m_freem(n);
2191 n = NULL;
2192 }
2193 }
2194 if (n == NULL) {
2195 return ENOBUFS;
2196 }
2197 n->m_len = oldoptlen + JUMBOOPTLEN;
2198 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
2199 n: oldoptlen);
2200 optbuf = mtod(n, u_char *) + oldoptlen;
2201 m_freem(mopt);
2202 mopt = exthdrs->ip6e_hbh = n;
2203 } else {
2204 optbuf = mtod(mopt, u_char *) + mopt->m_len;
2205 mopt->m_len += JUMBOOPTLEN;
2206 }
2207 optbuf[0] = IP6OPT_PADN;
2208 optbuf[1] = 1;
2209
2210 /*
2211 * Adjust the header length according to the pad and
2212 * the jumbo payload option.
2213 */
2214 hbh = mtod(mopt, struct ip6_hbh *);
2215 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
2216 }
2217
2218 /* fill in the option. */
2219 optbuf[2] = IP6OPT_JUMBO;
2220 optbuf[3] = 4;
2221 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
2222 bcopy(src: &v, dst: &optbuf[4], n: sizeof(u_int32_t));
2223
2224 /* finally, adjust the packet header length */
2225 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
2226
2227 return 0;
2228#undef JUMBOOPTLEN
2229}
2230
2231/*
2232 * Insert fragment header and copy unfragmentable header portions.
2233 */
2234static int
2235ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
2236 struct ip6_frag **frghdrp)
2237{
2238 struct mbuf *n, *mlast;
2239
2240 if (hlen > sizeof(struct ip6_hdr)) {
2241 n = m_copym(m0, sizeof(struct ip6_hdr),
2242 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
2243 if (n == NULL) {
2244 return ENOBUFS;
2245 }
2246 m->m_next = n;
2247 } else {
2248 n = m;
2249 }
2250
2251 /* Search for the last mbuf of unfragmentable part. */
2252 for (mlast = n; mlast->m_next; mlast = mlast->m_next) {
2253 ;
2254 }
2255
2256 if (!(mlast->m_flags & M_EXT) &&
2257 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
2258 /* use the trailing space of the last mbuf for the frag hdr */
2259 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
2260 mlast->m_len);
2261 mlast->m_len += sizeof(struct ip6_frag);
2262 m->m_pkthdr.len += sizeof(struct ip6_frag);
2263 } else {
2264 /* allocate a new mbuf for the fragment header */
2265 struct mbuf *mfrg;
2266
2267 MGET(mfrg, M_DONTWAIT, MT_DATA);
2268 if (mfrg == NULL) {
2269 return ENOBUFS;
2270 }
2271 mfrg->m_len = sizeof(struct ip6_frag);
2272 *frghdrp = mtod(mfrg, struct ip6_frag *);
2273 mlast->m_next = mfrg;
2274 }
2275
2276 return 0;
2277}
2278
2279static int
2280ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
2281 struct ifnet *ifp, struct in6_addr *dst, uint32_t dst_ifscope, u_int32_t *mtup)
2282{
2283 u_int32_t mtu = 0;
2284 int error = 0;
2285
2286 if (ro_pmtu != ro) {
2287 /* The first hop and the final destination may differ. */
2288 struct sockaddr_in6 *sa6_dst = SIN6(&ro_pmtu->ro_dst);
2289 if (ROUTE_UNUSABLE(ro_pmtu) ||
2290 !in6_are_addr_equal_scoped(&sa6_dst->sin6_addr, dst, sa6_dst->sin6_scope_id, dst_ifscope)) {
2291 ROUTE_RELEASE(ro_pmtu);
2292 }
2293
2294 if (ro_pmtu->ro_rt == NULL) {
2295 SOCKADDR_ZERO(sa6_dst, sizeof(*sa6_dst));
2296 sa6_dst->sin6_family = AF_INET6;
2297 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
2298 sa6_dst->sin6_addr = *dst;
2299
2300 rtalloc_scoped((struct route *)ro_pmtu,
2301 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
2302 }
2303 }
2304
2305 if (ro_pmtu->ro_rt != NULL) {
2306 u_int32_t ifmtu;
2307
2308 if (ifp == NULL) {
2309 ifp = ro_pmtu->ro_rt->rt_ifp;
2310 }
2311 /* Access without acquiring nd_ifinfo lock for performance */
2312 ifmtu = IN6_LINKMTU(ifp);
2313
2314 /*
2315 * Access rmx_mtu without holding the route entry lock,
2316 * for performance; this isn't something that changes
2317 * often, so optimize.
2318 */
2319 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
2320 if (mtu > ifmtu || mtu == 0) {
2321 /*
2322 * The MTU on the route is larger than the MTU on
2323 * the interface! This shouldn't happen, unless the
2324 * MTU of the interface has been changed after the
2325 * interface was brought up. Change the MTU in the
2326 * route to match the interface MTU (as long as the
2327 * field isn't locked).
2328 *
2329 * if MTU on the route is 0, we need to fix the MTU.
2330 * this case happens with path MTU discovery timeouts.
2331 */
2332 mtu = ifmtu;
2333 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) {
2334 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
2335 }
2336 }
2337 } else {
2338 if (ifp) {
2339 /* Don't hold nd_ifinfo lock for performance */
2340 mtu = IN6_LINKMTU(ifp);
2341 } else {
2342 error = EHOSTUNREACH; /* XXX */
2343 }
2344 }
2345
2346 *mtup = mtu;
2347 return error;
2348}
2349
2350/*
2351 * IP6 socket option processing.
2352 */
2353int
2354ip6_ctloutput(struct socket *so, struct sockopt *sopt)
2355{
2356 int optdatalen, uproto;
2357 void *optdata;
2358 int privileged;
2359 struct inpcb *in6p = sotoinpcb(so);
2360 int error = 0, optval = 0;
2361 int level, op = -1, optname = 0;
2362 size_t optlen = 0;
2363 struct proc *p;
2364 lck_mtx_t *mutex_held = NULL;
2365
2366 VERIFY(sopt != NULL);
2367
2368 level = sopt->sopt_level;
2369 op = sopt->sopt_dir;
2370 optname = sopt->sopt_name;
2371 optlen = sopt->sopt_valsize;
2372 p = sopt->sopt_p;
2373 uproto = (int)SOCK_PROTO(so);
2374
2375 privileged = (proc_suser(p) == 0);
2376
2377 if (level == IPPROTO_IPV6) {
2378 boolean_t capture_exthdrstat_in = FALSE;
2379 switch (op) {
2380 case SOPT_SET:
2381 mutex_held = socket_getlock(so, PR_F_WILLUNLOCK);
2382 /*
2383 * Wait if we are in the middle of ip6_output
2384 * as we unlocked the socket there and don't
2385 * want to overwrite the IP options
2386 */
2387 if (in6p->inp_sndinprog_cnt > 0) {
2388 in6p->inp_sndingprog_waiters++;
2389
2390 while (in6p->inp_sndinprog_cnt > 0) {
2391 msleep(chan: &in6p->inp_sndinprog_cnt, mtx: mutex_held,
2392 PSOCK | PCATCH, wmesg: "inp_sndinprog_cnt",
2393 NULL);
2394 }
2395 in6p->inp_sndingprog_waiters--;
2396 }
2397 switch (optname) {
2398 case IPV6_2292PKTOPTIONS: {
2399 struct mbuf *m;
2400
2401 error = soopt_getm(sopt, mp: &m);
2402 if (error != 0) {
2403 break;
2404 }
2405 error = soopt_mcopyin(sopt, m);
2406 if (error != 0) {
2407 break;
2408 }
2409 error = ip6_pcbopts(&in6p->in6p_outputopts,
2410 m, so, sopt);
2411 m_freem(m);
2412 break;
2413 }
2414
2415 /*
2416 * Use of some Hop-by-Hop options or some
2417 * Destination options, might require special
2418 * privilege. That is, normal applications
2419 * (without special privilege) might be forbidden
2420 * from setting certain options in outgoing packets,
2421 * and might never see certain options in received
2422 * packets. [RFC 2292 Section 6]
2423 * KAME specific note:
2424 * KAME prevents non-privileged users from sending or
2425 * receiving ANY hbh/dst options in order to avoid
2426 * overhead of parsing options in the kernel.
2427 */
2428 case IPV6_RECVHOPOPTS:
2429 case IPV6_RECVDSTOPTS:
2430 case IPV6_RECVRTHDRDSTOPTS:
2431 if (!privileged) {
2432 break;
2433 }
2434 OS_FALLTHROUGH;
2435 case IPV6_UNICAST_HOPS:
2436 case IPV6_HOPLIMIT:
2437 case IPV6_RECVPKTINFO:
2438 case IPV6_RECVHOPLIMIT:
2439 case IPV6_RECVRTHDR:
2440 case IPV6_RECVPATHMTU:
2441 case IPV6_RECVTCLASS:
2442 case IPV6_V6ONLY:
2443 case IPV6_AUTOFLOWLABEL:
2444 if (optlen != sizeof(int)) {
2445 error = EINVAL;
2446 break;
2447 }
2448 error = sooptcopyin(sopt, &optval,
2449 len: sizeof(optval), minlen: sizeof(optval));
2450 if (error) {
2451 break;
2452 }
2453
2454 switch (optname) {
2455 case IPV6_UNICAST_HOPS:
2456 if (optval < -1 || optval >= 256) {
2457 error = EINVAL;
2458 } else {
2459 /* -1 = kernel default */
2460 in6p->in6p_hops = (short)optval;
2461 if (in6p->inp_vflag &
2462 INP_IPV4) {
2463 in6p->inp_ip_ttl =
2464 (uint8_t)optval;
2465 }
2466 }
2467 break;
2468#define OPTSET(bit) do { \
2469 if (optval) \
2470 in6p->inp_flags |= (bit); \
2471 else \
2472 in6p->inp_flags &= ~(bit); \
2473} while (0)
2474
2475#define OPTSET2292(bit) do { \
2476 in6p->inp_flags |= IN6P_RFC2292; \
2477 if (optval) \
2478 in6p->inp_flags |= (bit); \
2479 else \
2480 in6p->inp_flags &= ~(bit); \
2481} while (0)
2482
2483#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
2484
2485 case IPV6_RECVPKTINFO:
2486 /* cannot mix with RFC2292 */
2487 if (OPTBIT(IN6P_RFC2292)) {
2488 error = EINVAL;
2489 break;
2490 }
2491 OPTSET(IN6P_PKTINFO);
2492 break;
2493
2494 case IPV6_HOPLIMIT: {
2495 struct ip6_pktopts **optp;
2496
2497 /* cannot mix with RFC2292 */
2498 if (OPTBIT(IN6P_RFC2292)) {
2499 error = EINVAL;
2500 break;
2501 }
2502 optp = &in6p->in6p_outputopts;
2503 error = ip6_pcbopt(IPV6_HOPLIMIT,
2504 (u_char *)&optval, sizeof(optval),
2505 optp, uproto);
2506 break;
2507 }
2508
2509 case IPV6_RECVHOPLIMIT:
2510 /* cannot mix with RFC2292 */
2511 if (OPTBIT(IN6P_RFC2292)) {
2512 error = EINVAL;
2513 break;
2514 }
2515 OPTSET(IN6P_HOPLIMIT);
2516 break;
2517
2518 case IPV6_RECVHOPOPTS:
2519 /* cannot mix with RFC2292 */
2520 if (OPTBIT(IN6P_RFC2292)) {
2521 error = EINVAL;
2522 break;
2523 }
2524 OPTSET(IN6P_HOPOPTS);
2525 capture_exthdrstat_in = TRUE;
2526 break;
2527
2528 case IPV6_RECVDSTOPTS:
2529 /* cannot mix with RFC2292 */
2530 if (OPTBIT(IN6P_RFC2292)) {
2531 error = EINVAL;
2532 break;
2533 }
2534 OPTSET(IN6P_DSTOPTS);
2535 capture_exthdrstat_in = TRUE;
2536 break;
2537
2538 case IPV6_RECVRTHDRDSTOPTS:
2539 /* cannot mix with RFC2292 */
2540 if (OPTBIT(IN6P_RFC2292)) {
2541 error = EINVAL;
2542 break;
2543 }
2544 OPTSET(IN6P_RTHDRDSTOPTS);
2545 capture_exthdrstat_in = TRUE;
2546 break;
2547
2548 case IPV6_RECVRTHDR:
2549 /* cannot mix with RFC2292 */
2550 if (OPTBIT(IN6P_RFC2292)) {
2551 error = EINVAL;
2552 break;
2553 }
2554 OPTSET(IN6P_RTHDR);
2555 capture_exthdrstat_in = TRUE;
2556 break;
2557
2558 case IPV6_RECVPATHMTU:
2559 /*
2560 * We ignore this option for TCP
2561 * sockets.
2562 * (RFC3542 leaves this case
2563 * unspecified.)
2564 */
2565 if (uproto != IPPROTO_TCP) {
2566 OPTSET(IN6P_MTU);
2567 }
2568 break;
2569
2570 case IPV6_V6ONLY:
2571 /*
2572 * make setsockopt(IPV6_V6ONLY)
2573 * available only prior to bind(2).
2574 * see ipng mailing list, Jun 22 2001.
2575 */
2576 if (in6p->inp_lport ||
2577 !IN6_IS_ADDR_UNSPECIFIED(
2578 &in6p->in6p_laddr)) {
2579 error = EINVAL;
2580 break;
2581 }
2582 OPTSET(IN6P_IPV6_V6ONLY);
2583 if (optval) {
2584 in6p->inp_vflag &= ~INP_IPV4;
2585 } else {
2586 in6p->inp_vflag |= INP_IPV4;
2587 }
2588 break;
2589
2590 case IPV6_RECVTCLASS:
2591 /* we can mix with RFC2292 */
2592 OPTSET(IN6P_TCLASS);
2593 break;
2594
2595 case IPV6_AUTOFLOWLABEL:
2596 OPTSET(IN6P_AUTOFLOWLABEL);
2597 break;
2598 }
2599 break;
2600
2601 case IPV6_TCLASS:
2602 case IPV6_DONTFRAG:
2603 case IPV6_USE_MIN_MTU:
2604 case IPV6_PREFER_TEMPADDR: {
2605 struct ip6_pktopts **optp;
2606
2607 if (optlen != sizeof(optval)) {
2608 error = EINVAL;
2609 break;
2610 }
2611 error = sooptcopyin(sopt, &optval,
2612 len: sizeof(optval), minlen: sizeof(optval));
2613 if (error) {
2614 break;
2615 }
2616
2617 optp = &in6p->in6p_outputopts;
2618 error = ip6_pcbopt(optname, (u_char *)&optval,
2619 sizeof(optval), optp, uproto);
2620
2621 if (optname == IPV6_TCLASS) {
2622 // Add in the ECN flags
2623 u_int8_t tos = (in6p->inp_ip_tos & ~IPTOS_ECN_MASK);
2624 u_int8_t ecn = optval & IPTOS_ECN_MASK;
2625 in6p->inp_ip_tos = tos | ecn;
2626 }
2627 break;
2628 }
2629
2630 case IPV6_2292PKTINFO:
2631 case IPV6_2292HOPLIMIT:
2632 case IPV6_2292HOPOPTS:
2633 case IPV6_2292DSTOPTS:
2634 case IPV6_2292RTHDR:
2635 /* RFC 2292 */
2636 if (optlen != sizeof(int)) {
2637 error = EINVAL;
2638 break;
2639 }
2640 error = sooptcopyin(sopt, &optval,
2641 len: sizeof(optval), minlen: sizeof(optval));
2642 if (error) {
2643 break;
2644 }
2645 switch (optname) {
2646 case IPV6_2292PKTINFO:
2647 OPTSET2292(IN6P_PKTINFO);
2648 break;
2649 case IPV6_2292HOPLIMIT:
2650 OPTSET2292(IN6P_HOPLIMIT);
2651 break;
2652 case IPV6_2292HOPOPTS:
2653 /*
2654 * Check super-user privilege.
2655 * See comments for IPV6_RECVHOPOPTS.
2656 */
2657 if (!privileged) {
2658 return EPERM;
2659 }
2660 OPTSET2292(IN6P_HOPOPTS);
2661 capture_exthdrstat_in = TRUE;
2662 break;
2663 case IPV6_2292DSTOPTS:
2664 if (!privileged) {
2665 return EPERM;
2666 }
2667 OPTSET2292(IN6P_DSTOPTS |
2668 IN6P_RTHDRDSTOPTS); /* XXX */
2669 capture_exthdrstat_in = TRUE;
2670 break;
2671 case IPV6_2292RTHDR:
2672 OPTSET2292(IN6P_RTHDR);
2673 capture_exthdrstat_in = TRUE;
2674 break;
2675 }
2676 break;
2677
2678 case IPV6_3542PKTINFO:
2679 case IPV6_3542HOPOPTS:
2680 case IPV6_3542RTHDR:
2681 case IPV6_3542DSTOPTS:
2682 case IPV6_RTHDRDSTOPTS:
2683 case IPV6_3542NEXTHOP: {
2684 struct ip6_pktopts **optp;
2685 /* new advanced API (RFC3542) */
2686 struct mbuf *m;
2687
2688 /* cannot mix with RFC2292 */
2689 if (OPTBIT(IN6P_RFC2292)) {
2690 error = EINVAL;
2691 break;
2692 }
2693 error = soopt_getm(sopt, mp: &m);
2694 if (error != 0) {
2695 break;
2696 }
2697 error = soopt_mcopyin(sopt, m);
2698 if (error != 0) {
2699 break;
2700 }
2701
2702 optp = &in6p->in6p_outputopts;
2703 error = ip6_pcbopt(optname, mtod(m, u_char *),
2704 m->m_len, optp, uproto);
2705 m_freem(m);
2706 break;
2707 }
2708#undef OPTSET
2709 case IPV6_MULTICAST_IF:
2710 case IPV6_MULTICAST_HOPS:
2711 case IPV6_MULTICAST_LOOP:
2712 case IPV6_JOIN_GROUP:
2713 case IPV6_LEAVE_GROUP:
2714 case IPV6_MSFILTER:
2715 case MCAST_BLOCK_SOURCE:
2716 case MCAST_UNBLOCK_SOURCE:
2717 case MCAST_JOIN_GROUP:
2718 case MCAST_LEAVE_GROUP:
2719 case MCAST_JOIN_SOURCE_GROUP:
2720 case MCAST_LEAVE_SOURCE_GROUP:
2721 error = ip6_setmoptions(in6p, sopt);
2722 break;
2723
2724 case IPV6_PORTRANGE:
2725 error = sooptcopyin(sopt, &optval,
2726 len: sizeof(optval), minlen: sizeof(optval));
2727 if (error) {
2728 break;
2729 }
2730
2731 switch (optval) {
2732 case IPV6_PORTRANGE_DEFAULT:
2733 in6p->inp_flags &= ~(INP_LOWPORT);
2734 in6p->inp_flags &= ~(INP_HIGHPORT);
2735 break;
2736
2737 case IPV6_PORTRANGE_HIGH:
2738 in6p->inp_flags &= ~(INP_LOWPORT);
2739 in6p->inp_flags |= INP_HIGHPORT;
2740 break;
2741
2742 case IPV6_PORTRANGE_LOW:
2743 in6p->inp_flags &= ~(INP_HIGHPORT);
2744 in6p->inp_flags |= INP_LOWPORT;
2745 break;
2746
2747 default:
2748 error = EINVAL;
2749 break;
2750 }
2751 break;
2752#if IPSEC
2753 case IPV6_IPSEC_POLICY: {
2754 caddr_t req = NULL;
2755 size_t len = 0;
2756 struct mbuf *m;
2757
2758 if ((error = soopt_getm(sopt, mp: &m)) != 0) {
2759 break;
2760 }
2761 if ((error = soopt_mcopyin(sopt, m)) != 0) {
2762 break;
2763 }
2764
2765 req = mtod(m, caddr_t);
2766 len = m->m_len;
2767 error = ipsec6_set_policy(inp: in6p, optname, request: req,
2768 len, priv: privileged);
2769 m_freem(m);
2770 break;
2771 }
2772#endif /* IPSEC */
2773 /*
2774 * IPv6 variant of IP_BOUND_IF; for details see
2775 * comments on IP_BOUND_IF in ip_ctloutput().
2776 */
2777 case IPV6_BOUND_IF:
2778 /* This option is settable only on IPv6 */
2779 if (!(in6p->inp_vflag & INP_IPV6)) {
2780 error = EINVAL;
2781 break;
2782 }
2783
2784 error = sooptcopyin(sopt, &optval,
2785 len: sizeof(optval), minlen: sizeof(optval));
2786
2787 if (error) {
2788 break;
2789 }
2790
2791 error = inp_bindif(in6p, optval, NULL);
2792 break;
2793
2794 case IPV6_NO_IFT_CELLULAR:
2795 /* This option is settable only for IPv6 */
2796 if (!(in6p->inp_vflag & INP_IPV6)) {
2797 error = EINVAL;
2798 break;
2799 }
2800
2801 error = sooptcopyin(sopt, &optval,
2802 len: sizeof(optval), minlen: sizeof(optval));
2803
2804 if (error) {
2805 break;
2806 }
2807
2808 /* once set, it cannot be unset */
2809 if (!optval && INP_NO_CELLULAR(in6p)) {
2810 error = EINVAL;
2811 break;
2812 }
2813
2814 error = so_set_restrictions(so,
2815 SO_RESTRICT_DENY_CELLULAR);
2816 break;
2817
2818 case IPV6_OUT_IF:
2819 /* This option is not settable */
2820 error = EINVAL;
2821 break;
2822
2823 default:
2824 error = ENOPROTOOPT;
2825 break;
2826 }
2827 if (capture_exthdrstat_in) {
2828 if (uproto == IPPROTO_TCP) {
2829 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_in);
2830 } else if (uproto == IPPROTO_UDP) {
2831 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_in);
2832 }
2833 }
2834 break;
2835
2836 case SOPT_GET:
2837 switch (optname) {
2838 case IPV6_2292PKTOPTIONS:
2839 /*
2840 * RFC3542 (effectively) deprecated the
2841 * semantics of the 2292-style pktoptions.
2842 * Since it was not reliable in nature (i.e.,
2843 * applications had to expect the lack of some
2844 * information after all), it would make sense
2845 * to simplify this part by always returning
2846 * empty data.
2847 */
2848 sopt->sopt_valsize = 0;
2849 break;
2850
2851 case IPV6_RECVHOPOPTS:
2852 case IPV6_RECVDSTOPTS:
2853 case IPV6_RECVRTHDRDSTOPTS:
2854 case IPV6_UNICAST_HOPS:
2855 case IPV6_RECVPKTINFO:
2856 case IPV6_RECVHOPLIMIT:
2857 case IPV6_RECVRTHDR:
2858 case IPV6_RECVPATHMTU:
2859 case IPV6_V6ONLY:
2860 case IPV6_PORTRANGE:
2861 case IPV6_RECVTCLASS:
2862 case IPV6_AUTOFLOWLABEL:
2863 switch (optname) {
2864 case IPV6_RECVHOPOPTS:
2865 optval = OPTBIT(IN6P_HOPOPTS);
2866 break;
2867
2868 case IPV6_RECVDSTOPTS:
2869 optval = OPTBIT(IN6P_DSTOPTS);
2870 break;
2871
2872 case IPV6_RECVRTHDRDSTOPTS:
2873 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2874 break;
2875
2876 case IPV6_UNICAST_HOPS:
2877 optval = in6p->in6p_hops;
2878 break;
2879
2880 case IPV6_RECVPKTINFO:
2881 optval = OPTBIT(IN6P_PKTINFO);
2882 break;
2883
2884 case IPV6_RECVHOPLIMIT:
2885 optval = OPTBIT(IN6P_HOPLIMIT);
2886 break;
2887
2888 case IPV6_RECVRTHDR:
2889 optval = OPTBIT(IN6P_RTHDR);
2890 break;
2891
2892 case IPV6_RECVPATHMTU:
2893 optval = OPTBIT(IN6P_MTU);
2894 break;
2895
2896 case IPV6_V6ONLY:
2897 optval = OPTBIT(IN6P_IPV6_V6ONLY);
2898 break;
2899
2900 case IPV6_PORTRANGE: {
2901 int flags;
2902 flags = in6p->inp_flags;
2903 if (flags & INP_HIGHPORT) {
2904 optval = IPV6_PORTRANGE_HIGH;
2905 } else if (flags & INP_LOWPORT) {
2906 optval = IPV6_PORTRANGE_LOW;
2907 } else {
2908 optval = 0;
2909 }
2910 break;
2911 }
2912 case IPV6_RECVTCLASS:
2913 optval = OPTBIT(IN6P_TCLASS);
2914 break;
2915
2916 case IPV6_AUTOFLOWLABEL:
2917 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2918 break;
2919 }
2920 if (error) {
2921 break;
2922 }
2923 error = sooptcopyout(sopt, data: &optval,
2924 len: sizeof(optval));
2925 break;
2926
2927 case IPV6_PATHMTU: {
2928 u_int32_t pmtu = 0;
2929 struct ip6_mtuinfo mtuinfo;
2930 struct route_in6 sro;
2931
2932 bzero(s: &sro, n: sizeof(sro));
2933
2934 if (!(so->so_state & SS_ISCONNECTED)) {
2935 return ENOTCONN;
2936 }
2937 /*
2938 * XXX: we dot not consider the case of source
2939 * routing, or optional information to specify
2940 * the outgoing interface.
2941 */
2942 error = ip6_getpmtu(ro_pmtu: &sro, NULL, NULL,
2943 dst: &in6p->in6p_faddr, dst_ifscope: in6p->inp_fifscope, mtup: &pmtu);
2944 ROUTE_RELEASE(&sro);
2945 if (error) {
2946 break;
2947 }
2948 if (pmtu > IPV6_MAXPACKET) {
2949 pmtu = IPV6_MAXPACKET;
2950 }
2951
2952 bzero(s: &mtuinfo, n: sizeof(mtuinfo));
2953 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2954 optdata = (void *)&mtuinfo;
2955 optdatalen = sizeof(mtuinfo);
2956 error = sooptcopyout(sopt, data: optdata,
2957 len: optdatalen);
2958 break;
2959 }
2960
2961 case IPV6_2292PKTINFO:
2962 case IPV6_2292HOPLIMIT:
2963 case IPV6_2292HOPOPTS:
2964 case IPV6_2292RTHDR:
2965 case IPV6_2292DSTOPTS:
2966 switch (optname) {
2967 case IPV6_2292PKTINFO:
2968 optval = OPTBIT(IN6P_PKTINFO);
2969 break;
2970 case IPV6_2292HOPLIMIT:
2971 optval = OPTBIT(IN6P_HOPLIMIT);
2972 break;
2973 case IPV6_2292HOPOPTS:
2974 optval = OPTBIT(IN6P_HOPOPTS);
2975 break;
2976 case IPV6_2292RTHDR:
2977 optval = OPTBIT(IN6P_RTHDR);
2978 break;
2979 case IPV6_2292DSTOPTS:
2980 optval = OPTBIT(IN6P_DSTOPTS |
2981 IN6P_RTHDRDSTOPTS);
2982 break;
2983 }
2984 error = sooptcopyout(sopt, data: &optval,
2985 len: sizeof(optval));
2986 break;
2987
2988 case IPV6_PKTINFO:
2989 case IPV6_HOPOPTS:
2990 case IPV6_RTHDR:
2991 case IPV6_DSTOPTS:
2992 case IPV6_RTHDRDSTOPTS:
2993 case IPV6_NEXTHOP:
2994 case IPV6_TCLASS:
2995 case IPV6_DONTFRAG:
2996 case IPV6_USE_MIN_MTU:
2997 case IPV6_PREFER_TEMPADDR:
2998 error = ip6_getpcbopt(in6p->in6p_outputopts,
2999 optname, sopt);
3000 break;
3001
3002 case IPV6_MULTICAST_IF:
3003 case IPV6_MULTICAST_HOPS:
3004 case IPV6_MULTICAST_LOOP:
3005 case IPV6_MSFILTER:
3006 error = ip6_getmoptions(in6p, sopt);
3007 break;
3008#if IPSEC
3009 case IPV6_IPSEC_POLICY: {
3010 error = 0; /* This option is no longer supported */
3011 break;
3012 }
3013#endif /* IPSEC */
3014 case IPV6_BOUND_IF:
3015 if (in6p->inp_flags & INP_BOUND_IF) {
3016 optval = in6p->inp_boundifp->if_index;
3017 }
3018 error = sooptcopyout(sopt, data: &optval,
3019 len: sizeof(optval));
3020 break;
3021
3022 case IPV6_NO_IFT_CELLULAR:
3023 optval = INP_NO_CELLULAR(in6p) ? 1 : 0;
3024 error = sooptcopyout(sopt, data: &optval,
3025 len: sizeof(optval));
3026 break;
3027
3028 case IPV6_OUT_IF:
3029 optval = (in6p->in6p_last_outifp != NULL) ?
3030 in6p->in6p_last_outifp->if_index : 0;
3031 error = sooptcopyout(sopt, data: &optval,
3032 len: sizeof(optval));
3033 break;
3034
3035 default:
3036 error = ENOPROTOOPT;
3037 break;
3038 }
3039 break;
3040 }
3041 } else {
3042 error = EINVAL;
3043 }
3044 return error;
3045}
3046
3047int
3048ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
3049{
3050 int error = 0, optval;
3051 size_t optlen;
3052 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
3053 struct inpcb *in6p = sotoinpcb(so);
3054 int level, op, optname;
3055
3056 level = sopt->sopt_level;
3057 op = sopt->sopt_dir;
3058 optname = sopt->sopt_name;
3059 optlen = sopt->sopt_valsize;
3060
3061 if (level != IPPROTO_IPV6) {
3062 return EINVAL;
3063 }
3064
3065 switch (optname) {
3066 case IPV6_CHECKSUM:
3067 /*
3068 * For ICMPv6 sockets, no modification allowed for checksum
3069 * offset, permit "no change" values to help existing apps.
3070 *
3071 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
3072 * for an ICMPv6 socket will fail."
3073 * The current behavior does not meet RFC3542.
3074 */
3075 switch (op) {
3076 case SOPT_SET:
3077 if (optlen != sizeof(int)) {
3078 error = EINVAL;
3079 break;
3080 }
3081 error = sooptcopyin(sopt, &optval, len: sizeof(optval),
3082 minlen: sizeof(optval));
3083 if (error) {
3084 break;
3085 }
3086 if ((optval % 2) != 0) {
3087 /* the API assumes even offset values */
3088 error = EINVAL;
3089 } else if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
3090 if (optval != icmp6off) {
3091 error = EINVAL;
3092 }
3093 } else {
3094 in6p->in6p_cksum = optval;
3095 }
3096 break;
3097
3098 case SOPT_GET:
3099 if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
3100 optval = icmp6off;
3101 } else {
3102 optval = in6p->in6p_cksum;
3103 }
3104
3105 error = sooptcopyout(sopt, data: &optval, len: sizeof(optval));
3106 break;
3107
3108 default:
3109 error = EINVAL;
3110 break;
3111 }
3112 break;
3113
3114 default:
3115 error = ENOPROTOOPT;
3116 break;
3117 }
3118
3119 return error;
3120}
3121
3122/*
3123 * Set up IP6 options in pcb for insertion in output packets or
3124 * specifying behavior of outgoing packets.
3125 */
3126static int
3127ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so,
3128 struct sockopt *sopt)
3129{
3130#pragma unused(sopt)
3131 struct ip6_pktopts *opt = *pktopt;
3132 int error = 0;
3133
3134 /* turn off any old options. */
3135 if (opt != NULL) {
3136#if DIAGNOSTIC
3137 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
3138 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
3139 opt->ip6po_rhinfo.ip6po_rhi_rthdr) {
3140 printf("%s: all specified options are cleared.\n",
3141 __func__);
3142 }
3143#endif
3144 ip6_clearpktopts(opt, -1);
3145 } else {
3146 opt = kalloc_type(struct ip6_pktopts, Z_WAITOK | Z_NOFAIL);
3147 }
3148 *pktopt = NULL;
3149
3150 if (m == NULL || m->m_len == 0) {
3151 /*
3152 * Only turning off any previous options, regardless of
3153 * whether the opt is just created or given.
3154 */
3155 if (opt != NULL) {
3156 kfree_type(struct ip6_pktopts, opt);
3157 }
3158 return 0;
3159 }
3160
3161 /* set options specified by user. */
3162 if ((error = ip6_setpktopts(control: m, opt, NULL, SOCK_PROTO(so))) != 0) {
3163 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
3164 kfree_type(struct ip6_pktopts, opt);
3165 return error;
3166 }
3167 *pktopt = opt;
3168 return 0;
3169}
3170
3171/*
3172 * initialize ip6_pktopts. beware that there are non-zero default values in
3173 * the struct.
3174 */
3175void
3176ip6_initpktopts(struct ip6_pktopts *opt)
3177{
3178 bzero(s: opt, n: sizeof(*opt));
3179 opt->ip6po_hlim = -1; /* -1 means default hop limit */
3180 opt->ip6po_tclass = -1; /* -1 means default traffic class */
3181 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
3182 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
3183}
3184
3185static int
3186ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
3187 int uproto)
3188{
3189 struct ip6_pktopts *opt;
3190
3191 opt = *pktopt;
3192 if (opt == NULL) {
3193 opt = kalloc_type(struct ip6_pktopts, Z_WAITOK | Z_NOFAIL);
3194 ip6_initpktopts(opt);
3195 *pktopt = opt;
3196 }
3197
3198 return ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto);
3199}
3200
3201static int
3202ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
3203{
3204 void *optdata = NULL;
3205 int optdatalen = 0;
3206 struct ip6_ext *ip6e;
3207 struct in6_pktinfo null_pktinfo;
3208 int deftclass = 0, on;
3209 int defminmtu = IP6PO_MINMTU_MCASTONLY;
3210 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
3211
3212
3213 switch (optname) {
3214 case IPV6_PKTINFO:
3215 if (pktopt && pktopt->ip6po_pktinfo) {
3216 optdata = (void *)pktopt->ip6po_pktinfo;
3217 } else {
3218 /* XXX: we don't have to do this every time... */
3219 bzero(s: &null_pktinfo, n: sizeof(null_pktinfo));
3220 optdata = (void *)&null_pktinfo;
3221 }
3222 optdatalen = sizeof(struct in6_pktinfo);
3223 break;
3224
3225 case IPV6_TCLASS:
3226 if (pktopt && pktopt->ip6po_tclass >= 0) {
3227 optdata = (void *)&pktopt->ip6po_tclass;
3228 } else {
3229 optdata = (void *)&deftclass;
3230 }
3231 optdatalen = sizeof(int);
3232 break;
3233
3234 case IPV6_HOPOPTS:
3235 if (pktopt && pktopt->ip6po_hbh) {
3236 optdata = (void *)pktopt->ip6po_hbh;
3237 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
3238 optdatalen = (ip6e->ip6e_len + 1) << 3;
3239 }
3240 break;
3241
3242 case IPV6_RTHDR:
3243 if (pktopt && pktopt->ip6po_rthdr) {
3244 optdata = (void *)pktopt->ip6po_rthdr;
3245 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
3246 optdatalen = (ip6e->ip6e_len + 1) << 3;
3247 }
3248 break;
3249
3250 case IPV6_RTHDRDSTOPTS:
3251 if (pktopt && pktopt->ip6po_dest1) {
3252 optdata = (void *)pktopt->ip6po_dest1;
3253 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
3254 optdatalen = (ip6e->ip6e_len + 1) << 3;
3255 }
3256 break;
3257
3258 case IPV6_DSTOPTS:
3259 if (pktopt && pktopt->ip6po_dest2) {
3260 optdata = (void *)pktopt->ip6po_dest2;
3261 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
3262 optdatalen = (ip6e->ip6e_len + 1) << 3;
3263 }
3264 break;
3265
3266 case IPV6_NEXTHOP:
3267 if (pktopt && pktopt->ip6po_nexthop) {
3268 optdata = (void *)pktopt->ip6po_nexthop;
3269 optdatalen = pktopt->ip6po_nexthop->sa_len;
3270 }
3271 break;
3272
3273 case IPV6_USE_MIN_MTU:
3274 if (pktopt) {
3275 optdata = (void *)&pktopt->ip6po_minmtu;
3276 } else {
3277 optdata = (void *)&defminmtu;
3278 }
3279 optdatalen = sizeof(int);
3280 break;
3281
3282 case IPV6_DONTFRAG:
3283 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) {
3284 on = 1;
3285 } else {
3286 on = 0;
3287 }
3288 optdata = (void *)&on;
3289 optdatalen = sizeof(on);
3290 break;
3291
3292 case IPV6_PREFER_TEMPADDR:
3293 if (pktopt) {
3294 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
3295 } else {
3296 optdata = (void *)&defpreftemp;
3297 }
3298 optdatalen = sizeof(int);
3299 break;
3300
3301 default: /* should not happen */
3302#ifdef DIAGNOSTIC
3303 panic("ip6_getpcbopt: unexpected option");
3304#endif
3305 return ENOPROTOOPT;
3306 }
3307
3308 return sooptcopyout(sopt, data: optdata, len: optdatalen);
3309}
3310
3311void
3312ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
3313{
3314 if (pktopt == NULL) {
3315 return;
3316 }
3317
3318 if (optname == -1 || optname == IPV6_PKTINFO) {
3319 if (pktopt->ip6po_pktinfo) {
3320 kfree_type(struct in6_pktinfo, pktopt->ip6po_pktinfo);
3321 }
3322 pktopt->ip6po_pktinfo = NULL;
3323 }
3324 if (optname == -1 || optname == IPV6_HOPLIMIT) {
3325 pktopt->ip6po_hlim = -1;
3326 }
3327 if (optname == -1 || optname == IPV6_TCLASS) {
3328 pktopt->ip6po_tclass = -1;
3329 }
3330 if (optname == -1 || optname == IPV6_NEXTHOP) {
3331 ROUTE_RELEASE(&pktopt->ip6po_nextroute);
3332 if (pktopt->ip6po_nexthop) {
3333 kfree_data_addr(pktopt->ip6po_nexthop);
3334 }
3335 pktopt->ip6po_nexthop = NULL;
3336 }
3337 if (optname == -1 || optname == IPV6_HOPOPTS) {
3338 if (pktopt->ip6po_hbh) {
3339 kfree_data_addr(pktopt->ip6po_hbh);
3340 }
3341 pktopt->ip6po_hbh = NULL;
3342 }
3343 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
3344 if (pktopt->ip6po_dest1) {
3345 kfree_data_addr(pktopt->ip6po_dest1);
3346 }
3347 pktopt->ip6po_dest1 = NULL;
3348 }
3349 if (optname == -1 || optname == IPV6_RTHDR) {
3350 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) {
3351 kfree_data_addr(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr);
3352 }
3353 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
3354 ROUTE_RELEASE(&pktopt->ip6po_route);
3355 }
3356 if (optname == -1 || optname == IPV6_DSTOPTS) {
3357 if (pktopt->ip6po_dest2) {
3358 kfree_data_addr(pktopt->ip6po_dest2);
3359 }
3360 pktopt->ip6po_dest2 = NULL;
3361 }
3362}
3363
3364#define PKTOPT_EXTHDRCPY(type) do { \
3365 if (src->type) { \
3366 int hlen = \
3367 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3; \
3368 dst->type = kalloc_data(hlen, canwait); \
3369 if (dst->type == NULL && canwait == Z_NOWAIT) \
3370 goto bad; \
3371 bcopy(src->type, dst->type, hlen); \
3372 } \
3373} while (0)
3374
3375static int
3376copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, zalloc_flags_t canwait)
3377{
3378 if (dst == NULL || src == NULL) {
3379 printf("copypktopts: invalid argument\n");
3380 return EINVAL;
3381 }
3382
3383 dst->ip6po_hlim = src->ip6po_hlim;
3384 dst->ip6po_tclass = src->ip6po_tclass;
3385 dst->ip6po_flags = src->ip6po_flags;
3386 if (src->ip6po_pktinfo) {
3387 dst->ip6po_pktinfo = kalloc_type(struct in6_pktinfo, canwait);
3388 if (dst->ip6po_pktinfo == NULL && canwait == Z_NOWAIT) {
3389 goto bad;
3390 }
3391 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
3392 }
3393 if (src->ip6po_nexthop) {
3394 dst->ip6po_nexthop = kalloc_data(src->ip6po_nexthop->sa_len, canwait);
3395 if (dst->ip6po_nexthop == NULL && canwait == Z_NOWAIT) {
3396 goto bad;
3397 }
3398 SOCKADDR_COPY(src->ip6po_nexthop, dst->ip6po_nexthop,
3399 src->ip6po_nexthop->sa_len);
3400 }
3401 PKTOPT_EXTHDRCPY(ip6po_hbh);
3402 PKTOPT_EXTHDRCPY(ip6po_dest1);
3403 PKTOPT_EXTHDRCPY(ip6po_dest2);
3404 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
3405 return 0;
3406
3407bad:
3408 ip6_clearpktopts(pktopt: dst, optname: -1);
3409 return ENOBUFS;
3410}
3411#undef PKTOPT_EXTHDRCPY
3412
3413struct ip6_pktopts *
3414ip6_copypktopts(struct ip6_pktopts *src, zalloc_flags_t canwait)
3415{
3416 int error;
3417 struct ip6_pktopts *dst;
3418
3419 dst = kalloc_type(struct ip6_pktopts, canwait);
3420 if (dst == NULL) {
3421 return NULL;
3422 }
3423 ip6_initpktopts(opt: dst);
3424
3425 if ((error = copypktopts(dst, src, canwait)) != 0) {
3426 kfree_type(struct ip6_pktopts, dst);
3427 return NULL;
3428 }
3429
3430 return dst;
3431}
3432
3433void
3434ip6_freepcbopts(struct ip6_pktopts *pktopt)
3435{
3436 if (pktopt == NULL) {
3437 return;
3438 }
3439
3440 ip6_clearpktopts(pktopt, optname: -1);
3441
3442 kfree_type(struct ip6_pktopts, pktopt);
3443}
3444
3445void
3446ip6_moptions_init(void)
3447{
3448 PE_parse_boot_argn(arg_string: "ifa_debug", arg_ptr: &im6o_debug, max_arg: sizeof(im6o_debug));
3449
3450 vm_size_t im6o_size = (im6o_debug == 0) ? sizeof(struct ip6_moptions) :
3451 sizeof(struct ip6_moptions_dbg);
3452
3453 im6o_zone = zone_create(IM6O_ZONE_NAME, size: im6o_size, flags: ZC_ZFREE_CLEARMEM);
3454}
3455
3456void
3457im6o_addref(struct ip6_moptions *im6o, int locked)
3458{
3459 if (!locked) {
3460 IM6O_LOCK(im6o);
3461 } else {
3462 IM6O_LOCK_ASSERT_HELD(im6o);
3463 }
3464
3465 if (++im6o->im6o_refcnt == 0) {
3466 panic("%s: im6o %p wraparound refcnt", __func__, im6o);
3467 /* NOTREACHED */
3468 } else if (im6o->im6o_trace != NULL) {
3469 (*im6o->im6o_trace)(im6o, TRUE);
3470 }
3471
3472 if (!locked) {
3473 IM6O_UNLOCK(im6o);
3474 }
3475}
3476
3477void
3478im6o_remref(struct ip6_moptions *im6o)
3479{
3480 int i;
3481
3482 IM6O_LOCK(im6o);
3483 if (im6o->im6o_refcnt == 0) {
3484 panic("%s: im6o %p negative refcnt", __func__, im6o);
3485 /* NOTREACHED */
3486 } else if (im6o->im6o_trace != NULL) {
3487 (*im6o->im6o_trace)(im6o, FALSE);
3488 }
3489
3490 --im6o->im6o_refcnt;
3491 if (im6o->im6o_refcnt > 0) {
3492 IM6O_UNLOCK(im6o);
3493 return;
3494 }
3495
3496 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
3497 struct in6_mfilter *imf;
3498
3499 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
3500 if (imf != NULL) {
3501 im6f_leave(imf);
3502 }
3503
3504 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
3505
3506 if (imf != NULL) {
3507 im6f_purge(imf);
3508 }
3509
3510 IN6M_REMREF(im6o->im6o_membership[i]);
3511 im6o->im6o_membership[i] = NULL;
3512 }
3513 im6o->im6o_num_memberships = 0;
3514 IM6O_UNLOCK(im6o);
3515
3516 kfree_type(struct in6_multi *, im6o->im6o_max_memberships, im6o->im6o_membership);
3517 kfree_type(struct in6_mfilter, im6o->im6o_max_memberships, im6o->im6o_mfilters);
3518 lck_mtx_destroy(lck: &im6o->im6o_lock, grp: &ifa_mtx_grp);
3519
3520 if (!(im6o->im6o_debug & IFD_ALLOC)) {
3521 panic("%s: im6o %p cannot be freed", __func__, im6o);
3522 /* NOTREACHED */
3523 }
3524 zfree(im6o_zone, im6o);
3525}
3526
3527static void
3528im6o_trace(struct ip6_moptions *im6o, int refhold)
3529{
3530 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3531 ctrace_t *tr;
3532 u_int32_t idx;
3533 u_int16_t *cnt;
3534
3535 if (!(im6o->im6o_debug & IFD_DEBUG)) {
3536 panic("%s: im6o %p has no debug structure", __func__, im6o);
3537 /* NOTREACHED */
3538 }
3539 if (refhold) {
3540 cnt = &im6o_dbg->im6o_refhold_cnt;
3541 tr = im6o_dbg->im6o_refhold;
3542 } else {
3543 cnt = &im6o_dbg->im6o_refrele_cnt;
3544 tr = im6o_dbg->im6o_refrele;
3545 }
3546
3547 idx = os_atomic_inc_orig(cnt, relaxed) % IM6O_TRACE_HIST_SIZE;
3548 ctrace_record(&tr[idx]);
3549}
3550
3551struct ip6_moptions *
3552ip6_allocmoptions(zalloc_flags_t how)
3553{
3554 struct ip6_moptions *im6o;
3555
3556 im6o = zalloc_flags(im6o_zone, how | Z_ZERO);
3557 if (im6o != NULL) {
3558 lck_mtx_init(lck: &im6o->im6o_lock, grp: &ifa_mtx_grp, attr: &ifa_mtx_attr);
3559 im6o->im6o_debug |= IFD_ALLOC;
3560 if (im6o_debug != 0) {
3561 im6o->im6o_debug |= IFD_DEBUG;
3562 im6o->im6o_trace = im6o_trace;
3563 }
3564 IM6O_ADDREF(im6o);
3565 }
3566
3567 return im6o;
3568}
3569
3570/*
3571 * Set IPv6 outgoing packet options based on advanced API.
3572 */
3573int
3574ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3575 struct ip6_pktopts *stickyopt, int uproto)
3576{
3577 struct cmsghdr *cm = NULL;
3578
3579 if (control == NULL || opt == NULL) {
3580 return EINVAL;
3581 }
3582
3583 ip6_initpktopts(opt);
3584 if (stickyopt) {
3585 int error;
3586
3587 /*
3588 * If stickyopt is provided, make a local copy of the options
3589 * for this particular packet, then override them by ancillary
3590 * objects.
3591 * XXX: copypktopts() does not copy the cached route to a next
3592 * hop (if any). This is not very good in terms of efficiency,
3593 * but we can allow this since this option should be rarely
3594 * used.
3595 */
3596 if ((error = copypktopts(dst: opt, src: stickyopt, canwait: Z_NOWAIT)) != 0) {
3597 return error;
3598 }
3599 }
3600
3601 /*
3602 * XXX: Currently, we assume all the optional information is stored
3603 * in a single mbuf.
3604 */
3605 if (control->m_next) {
3606 return EINVAL;
3607 }
3608
3609 if (control->m_len < CMSG_LEN(0)) {
3610 return EINVAL;
3611 }
3612
3613 for (cm = M_FIRST_CMSGHDR(control);
3614 is_cmsg_valid(control, cmsg: cm);
3615 cm = M_NXT_CMSGHDR(control, cm)) {
3616 int error;
3617
3618 if (cm->cmsg_level != IPPROTO_IPV6) {
3619 continue;
3620 }
3621
3622 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3623 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
3624 if (error) {
3625 return error;
3626 }
3627 }
3628
3629 return 0;
3630}
3631/*
3632 * Set a particular packet option, as a sticky option or an ancillary data
3633 * item. "len" can be 0 only when it's a sticky option.
3634 * We have 4 cases of combination of "sticky" and "cmsg":
3635 * "sticky=0, cmsg=0": impossible
3636 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3637 * "sticky=1, cmsg=0": RFC3542 socket option
3638 * "sticky=1, cmsg=1": RFC2292 socket option
3639 */
3640static int
3641ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3642 int sticky, int cmsg, int uproto)
3643{
3644 int minmtupolicy, preftemp;
3645 int error;
3646 boolean_t capture_exthdrstat_out = FALSE;
3647
3648 if (!sticky && !cmsg) {
3649#ifdef DIAGNOSTIC
3650 printf("ip6_setpktopt: impossible case\n");
3651#endif
3652 return EINVAL;
3653 }
3654
3655 /*
3656 * Caller must have ensured that the buffer is at least
3657 * aligned on 32-bit boundary.
3658 */
3659 VERIFY(IS_P2ALIGNED(buf, sizeof(u_int32_t)));
3660
3661 /*
3662 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3663 * not be specified in the context of RFC3542. Conversely,
3664 * RFC3542 types should not be specified in the context of RFC2292.
3665 */
3666 if (!cmsg) {
3667 switch (optname) {
3668 case IPV6_2292PKTINFO:
3669 case IPV6_2292HOPLIMIT:
3670 case IPV6_2292NEXTHOP:
3671 case IPV6_2292HOPOPTS:
3672 case IPV6_2292DSTOPTS:
3673 case IPV6_2292RTHDR:
3674 case IPV6_2292PKTOPTIONS:
3675 return ENOPROTOOPT;
3676 }
3677 }
3678 if (sticky && cmsg) {
3679 switch (optname) {
3680 case IPV6_PKTINFO:
3681 case IPV6_HOPLIMIT:
3682 case IPV6_NEXTHOP:
3683 case IPV6_HOPOPTS:
3684 case IPV6_DSTOPTS:
3685 case IPV6_RTHDRDSTOPTS:
3686 case IPV6_RTHDR:
3687 case IPV6_USE_MIN_MTU:
3688 case IPV6_DONTFRAG:
3689 case IPV6_TCLASS:
3690 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3691 return ENOPROTOOPT;
3692 }
3693 }
3694
3695 switch (optname) {
3696 case IPV6_2292PKTINFO:
3697 case IPV6_PKTINFO: {
3698 struct ifnet *ifp = NULL;
3699 struct in6_pktinfo *pktinfo;
3700
3701 if (len != sizeof(struct in6_pktinfo)) {
3702 return EINVAL;
3703 }
3704
3705 pktinfo = (struct in6_pktinfo *)(void *)buf;
3706
3707 /*
3708 * An application can clear any sticky IPV6_PKTINFO option by
3709 * doing a "regular" setsockopt with ipi6_addr being
3710 * in6addr_any and ipi6_ifindex being zero.
3711 * [RFC 3542, Section 6]
3712 */
3713 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3714 pktinfo->ipi6_ifindex == 0 &&
3715 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3716 ip6_clearpktopts(pktopt: opt, optname);
3717 break;
3718 }
3719
3720 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3721 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3722 return EINVAL;
3723 }
3724
3725 /* validate the interface index if specified. */
3726 ifnet_head_lock_shared();
3727
3728 if (pktinfo->ipi6_ifindex > if_index) {
3729 ifnet_head_done();
3730 return ENXIO;
3731 }
3732
3733 if (pktinfo->ipi6_ifindex) {
3734 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3735 if (ifp == NULL) {
3736 ifnet_head_done();
3737 return ENXIO;
3738 }
3739 }
3740
3741 ifnet_head_done();
3742
3743 /*
3744 * We store the address anyway, and let in6_selectsrc()
3745 * validate the specified address. This is because ipi6_addr
3746 * may not have enough information about its scope zone, and
3747 * we may need additional information (such as outgoing
3748 * interface or the scope zone of a destination address) to
3749 * disambiguate the scope.
3750 * XXX: the delay of the validation may confuse the
3751 * application when it is used as a sticky option.
3752 */
3753 if (opt->ip6po_pktinfo == NULL) {
3754 opt->ip6po_pktinfo = kalloc_type(struct in6_pktinfo, Z_NOWAIT);
3755 if (opt->ip6po_pktinfo == NULL) {
3756 return ENOBUFS;
3757 }
3758 }
3759 bcopy(src: pktinfo, dst: opt->ip6po_pktinfo, n: sizeof(*pktinfo));
3760 break;
3761 }
3762
3763 case IPV6_2292HOPLIMIT:
3764 case IPV6_HOPLIMIT: {
3765 int *hlimp;
3766
3767 /*
3768 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3769 * to simplify the ordering among hoplimit options.
3770 */
3771 if (optname == IPV6_HOPLIMIT && sticky) {
3772 return ENOPROTOOPT;
3773 }
3774
3775 if (len != sizeof(int)) {
3776 return EINVAL;
3777 }
3778 hlimp = (int *)(void *)buf;
3779 if (*hlimp < -1 || *hlimp > IPV6_MAXHLIM) {
3780 return EINVAL;
3781 }
3782
3783 opt->ip6po_hlim = *hlimp;
3784 break;
3785 }
3786
3787 case IPV6_TCLASS: {
3788 int tclass;
3789
3790 if (len != sizeof(int)) {
3791 return EINVAL;
3792 }
3793 tclass = *(int *)(void *)buf;
3794 if (tclass < -1 || tclass > 255) {
3795 return EINVAL;
3796 }
3797
3798 opt->ip6po_tclass = tclass;
3799 break;
3800 }
3801
3802 case IPV6_2292NEXTHOP:
3803 case IPV6_NEXTHOP:
3804 error = suser(cred: kauth_cred_get(), acflag: 0);
3805 if (error) {
3806 return EACCES;
3807 }
3808
3809 if (len == 0) { /* just remove the option */
3810 ip6_clearpktopts(pktopt: opt, IPV6_NEXTHOP);
3811 break;
3812 }
3813
3814 /* check if cmsg_len is large enough for sa_len */
3815 if (len < sizeof(struct sockaddr) || len < *buf) {
3816 return EINVAL;
3817 }
3818
3819 switch (SA(buf)->sa_family) {
3820 case AF_INET6: {
3821 struct sockaddr_in6 *sa6 = SIN6(buf);
3822
3823 if (sa6->sin6_len != sizeof(struct sockaddr_in6)) {
3824 return EINVAL;
3825 }
3826
3827 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3828 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3829 return EINVAL;
3830 }
3831 if ((error = sa6_embedscope(sa6, ip6_use_defzone, IN6_NULL_IF_EMBEDDED_SCOPE(&sa6->sin6_scope_id)))
3832 != 0) {
3833 return error;
3834 }
3835 break;
3836 }
3837 case AF_LINK: /* should eventually be supported */
3838 default:
3839 return EAFNOSUPPORT;
3840 }
3841
3842 /* turn off the previous option, then set the new option. */
3843 ip6_clearpktopts(pktopt: opt, IPV6_NEXTHOP);
3844 opt->ip6po_nexthop = kalloc_data(*buf, Z_NOWAIT);
3845 if (opt->ip6po_nexthop == NULL) {
3846 return ENOBUFS;
3847 }
3848 SOCKADDR_COPY(buf, opt->ip6po_nexthop, *buf);
3849 break;
3850
3851 case IPV6_2292HOPOPTS:
3852 case IPV6_HOPOPTS: {
3853 struct ip6_hbh *hbh;
3854 int hbhlen;
3855
3856 /*
3857 * XXX: We don't allow a non-privileged user to set ANY HbH
3858 * options, since per-option restriction has too much
3859 * overhead.
3860 */
3861 error = suser(cred: kauth_cred_get(), acflag: 0);
3862 if (error) {
3863 return EACCES;
3864 }
3865
3866 if (len == 0) {
3867 ip6_clearpktopts(pktopt: opt, IPV6_HOPOPTS);
3868 break; /* just remove the option */
3869 }
3870
3871 /* message length validation */
3872 if (len < sizeof(struct ip6_hbh)) {
3873 return EINVAL;
3874 }
3875 hbh = (struct ip6_hbh *)(void *)buf;
3876 hbhlen = (hbh->ip6h_len + 1) << 3;
3877 if (len != hbhlen) {
3878 return EINVAL;
3879 }
3880
3881 /* turn off the previous option, then set the new option. */
3882 ip6_clearpktopts(pktopt: opt, IPV6_HOPOPTS);
3883 opt->ip6po_hbh = kalloc_data(hbhlen, Z_NOWAIT);
3884 if (opt->ip6po_hbh == NULL) {
3885 return ENOBUFS;
3886 }
3887 bcopy(src: hbh, dst: opt->ip6po_hbh, n: hbhlen);
3888 capture_exthdrstat_out = TRUE;
3889 break;
3890 }
3891
3892 case IPV6_2292DSTOPTS:
3893 case IPV6_DSTOPTS:
3894 case IPV6_RTHDRDSTOPTS: {
3895 struct ip6_dest *dest, **newdest = NULL;
3896 int destlen;
3897
3898 error = suser(cred: kauth_cred_get(), acflag: 0);
3899 if (error) {
3900 return EACCES;
3901 }
3902
3903 if (len == 0) {
3904 ip6_clearpktopts(pktopt: opt, optname);
3905 break; /* just remove the option */
3906 }
3907
3908 /* message length validation */
3909 if (len < sizeof(struct ip6_dest)) {
3910 return EINVAL;
3911 }
3912 dest = (struct ip6_dest *)(void *)buf;
3913 destlen = (dest->ip6d_len + 1) << 3;
3914 if (len != destlen) {
3915 return EINVAL;
3916 }
3917
3918 /*
3919 * Determine the position that the destination options header
3920 * should be inserted; before or after the routing header.
3921 */
3922 switch (optname) {
3923 case IPV6_2292DSTOPTS:
3924 /*
3925 * The old advacned API is ambiguous on this point.
3926 * Our approach is to determine the position based
3927 * according to the existence of a routing header.
3928 * Note, however, that this depends on the order of the
3929 * extension headers in the ancillary data; the 1st
3930 * part of the destination options header must appear
3931 * before the routing header in the ancillary data,
3932 * too.
3933 * RFC3542 solved the ambiguity by introducing
3934 * separate ancillary data or option types.
3935 */
3936 if (opt->ip6po_rthdr == NULL) {
3937 newdest = &opt->ip6po_dest1;
3938 } else {
3939 newdest = &opt->ip6po_dest2;
3940 }
3941 break;
3942 case IPV6_RTHDRDSTOPTS:
3943 newdest = &opt->ip6po_dest1;
3944 break;
3945 case IPV6_DSTOPTS:
3946 newdest = &opt->ip6po_dest2;
3947 break;
3948 }
3949
3950 /* turn off the previous option, then set the new option. */
3951 ip6_clearpktopts(pktopt: opt, optname);
3952 *newdest = kalloc_data(destlen, Z_NOWAIT);
3953 if (*newdest == NULL) {
3954 return ENOBUFS;
3955 }
3956 bcopy(src: dest, dst: *newdest, n: destlen);
3957 capture_exthdrstat_out = TRUE;
3958 break;
3959 }
3960
3961 case IPV6_2292RTHDR:
3962 case IPV6_RTHDR: {
3963 struct ip6_rthdr *rth;
3964 int rthlen;
3965
3966 if (len == 0) {
3967 ip6_clearpktopts(pktopt: opt, IPV6_RTHDR);
3968 break; /* just remove the option */
3969 }
3970
3971 /* message length validation */
3972 if (len < sizeof(struct ip6_rthdr)) {
3973 return EINVAL;
3974 }
3975 rth = (struct ip6_rthdr *)(void *)buf;
3976 rthlen = (rth->ip6r_len + 1) << 3;
3977 if (len != rthlen) {
3978 return EINVAL;
3979 }
3980
3981 switch (rth->ip6r_type) {
3982 case IPV6_RTHDR_TYPE_0:
3983 if (rth->ip6r_len == 0) { /* must contain one addr */
3984 return EINVAL;
3985 }
3986 if (rth->ip6r_len % 2) { /* length must be even */
3987 return EINVAL;
3988 }
3989 if (rth->ip6r_len / 2 != rth->ip6r_segleft) {
3990 return EINVAL;
3991 }
3992 break;
3993 default:
3994 return EINVAL; /* not supported */
3995 }
3996
3997 /* turn off the previous option */
3998 ip6_clearpktopts(pktopt: opt, IPV6_RTHDR);
3999 opt->ip6po_rthdr = kalloc_data(rthlen, Z_NOWAIT);
4000 if (opt->ip6po_rthdr == NULL) {
4001 return ENOBUFS;
4002 }
4003 bcopy(src: rth, dst: opt->ip6po_rthdr, n: rthlen);
4004 capture_exthdrstat_out = TRUE;
4005 break;
4006 }
4007
4008 case IPV6_USE_MIN_MTU:
4009 if (len != sizeof(int)) {
4010 return EINVAL;
4011 }
4012 minmtupolicy = *(int *)(void *)buf;
4013 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
4014 minmtupolicy != IP6PO_MINMTU_DISABLE &&
4015 minmtupolicy != IP6PO_MINMTU_ALL) {
4016 return EINVAL;
4017 }
4018 opt->ip6po_minmtu = minmtupolicy;
4019 break;
4020
4021 case IPV6_DONTFRAG:
4022 if (len != sizeof(int)) {
4023 return EINVAL;
4024 }
4025
4026 if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
4027 /*
4028 * we ignore this option for TCP sockets.
4029 * (RFC3542 leaves this case unspecified.)
4030 */
4031 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
4032 } else {
4033 opt->ip6po_flags |= IP6PO_DONTFRAG;
4034 }
4035 break;
4036
4037 case IPV6_PREFER_TEMPADDR:
4038 if (len != sizeof(int)) {
4039 return EINVAL;
4040 }
4041 preftemp = *(int *)(void *)buf;
4042 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
4043 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
4044 preftemp != IP6PO_TEMPADDR_PREFER) {
4045 return EINVAL;
4046 }
4047 opt->ip6po_prefer_tempaddr = preftemp;
4048 break;
4049
4050 default:
4051 return ENOPROTOOPT;
4052 } /* end of switch */
4053
4054 if (capture_exthdrstat_out) {
4055 if (uproto == IPPROTO_TCP) {
4056 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_out);
4057 } else if (uproto == IPPROTO_UDP) {
4058 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_out);
4059 }
4060 }
4061
4062 return 0;
4063}
4064
4065/*
4066 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
4067 * packet to the input queue of a specified interface. Note that this
4068 * calls the output routine of the loopback "driver", but with an interface
4069 * pointer that might NOT be &loif -- easier than replicating that code here.
4070 */
4071void
4072ip6_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
4073 struct sockaddr_in6 *dst, uint32_t optlen, int32_t nxt0)
4074{
4075 struct mbuf *copym;
4076 struct ip6_hdr *ip6;
4077 struct in6_addr src;
4078
4079 if (lo_ifp == NULL) {
4080 return;
4081 }
4082
4083 /*
4084 * Copy the packet header as it's needed for the checksum.
4085 * Make sure to deep-copy IPv6 header portion in case the data
4086 * is in an mbuf cluster, so that we can safely override the IPv6
4087 * header portion later.
4088 */
4089 copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, NULL, NULL, M_COPYM_COPY_HDR);
4090 if (copym != NULL && ((copym->m_flags & M_EXT) ||
4091 copym->m_len < sizeof(struct ip6_hdr))) {
4092 copym = m_pullup(copym, sizeof(struct ip6_hdr));
4093 }
4094
4095 if (copym == NULL) {
4096 return;
4097 }
4098
4099 ip6 = mtod(copym, struct ip6_hdr *);
4100 src = ip6->ip6_src;
4101 /*
4102 * clear embedded scope identifiers if necessary.
4103 * in6_clearscope will touch the addresses only when necessary.
4104 */
4105 in6_clearscope(&ip6->ip6_src);
4106 in6_clearscope(&ip6->ip6_dst);
4107
4108 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
4109 in6_delayed_cksum_offset(copym, 0, optlen, nxt0);
4110 }
4111
4112 /*
4113 * Stuff the 'real' ifp into the pkthdr, to be used in matching
4114 * in ip6_input(); we need the loopback ifp/dl_tag passed as args
4115 * to make the loopback driver compliant with the data link
4116 * requirements.
4117 */
4118 copym->m_pkthdr.rcvif = origifp;
4119
4120 /*
4121 * Also record the source interface (which owns the source address).
4122 * This is basically a stripped down version of ifa_foraddr6().
4123 */
4124 if (srcifp == NULL) {
4125 struct in6_ifaddr *ia;
4126
4127 lck_rw_lock_shared(lck: &in6_ifaddr_rwlock);
4128 TAILQ_FOREACH(ia, IN6ADDR_HASH(&src), ia6_hash) {
4129 IFA_LOCK_SPIN(&ia->ia_ifa);
4130 /* compare against src addr with embedded scope */
4131 if (in6_are_addr_equal_scoped(&ia->ia_addr.sin6_addr, &src, ia->ia_addr.sin6_scope_id, ip6_output_getsrcifscope(m))) {
4132 srcifp = ia->ia_ifp;
4133 IFA_UNLOCK(&ia->ia_ifa);
4134 break;
4135 }
4136 IFA_UNLOCK(&ia->ia_ifa);
4137 }
4138 lck_rw_done(lck: &in6_ifaddr_rwlock);
4139 }
4140 if (srcifp != NULL) {
4141 ip6_setsrcifaddr_info(copym, srcifp->if_index, NULL);
4142 }
4143 ip6_setdstifaddr_info(copym, origifp->if_index, NULL);
4144
4145 dlil_output(lo_ifp, PF_INET6, copym, NULL, SA(dst), 0, NULL);
4146}
4147
4148/*
4149 * Chop IPv6 header off from the payload.
4150 */
4151static int
4152ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
4153{
4154 struct mbuf *mh;
4155 struct ip6_hdr *ip6;
4156
4157 ip6 = mtod(m, struct ip6_hdr *);
4158 if (m->m_len > sizeof(*ip6)) {
4159 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
4160 if (mh == NULL) {
4161 m_freem(m);
4162 return ENOBUFS;
4163 }
4164 M_COPY_PKTHDR(mh, m);
4165 MH_ALIGN(mh, sizeof(*ip6));
4166 m->m_flags &= ~M_PKTHDR;
4167 m->m_len -= sizeof(*ip6);
4168 m->m_data += sizeof(*ip6);
4169 mh->m_next = m;
4170 m = mh;
4171 m->m_len = sizeof(*ip6);
4172 bcopy(src: (caddr_t)ip6, mtod(m, caddr_t), n: sizeof(*ip6));
4173 }
4174 exthdrs->ip6e_ip6 = m;
4175 return 0;
4176}
4177
4178static void
4179ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m,
4180 int nxt0, uint32_t tlen, uint32_t optlen)
4181{
4182 uint32_t sw_csum, hwcap = ifp->if_hwassist;
4183
4184 if (!hwcksum_tx) {
4185 /* do all in software; checksum offload is disabled */
4186 sw_csum = CSUM_DELAY_IPV6_DATA & m->m_pkthdr.csum_flags;
4187 } else {
4188 /* do in software what the hardware cannot */
4189 sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
4190 }
4191
4192 if (optlen != 0) {
4193 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4194 m->m_pkthdr.csum_flags);
4195 } else if ((sw_csum & CSUM_DELAY_IPV6_DATA) && (hwcap & CSUM_PARTIAL)) {
4196 /*
4197 * Partial checksum offload, ere), if no extension headers,
4198 * and TCP only (no UDP support, as the hardware may not be
4199 * able to convert +0 to -0 (0xffff) per RFC1122 4.1.3.4.
4200 * unless the interface supports "invert zero" capability.)
4201 */
4202 if (hwcksum_tx &&
4203 ((m->m_pkthdr.csum_flags & CSUM_TCPIPV6) ||
4204 ((hwcap & CSUM_ZERO_INVERT) &&
4205 (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
4206 tlen <= mtu) {
4207 uint16_t start = sizeof(struct ip6_hdr);
4208 uint16_t ulpoff =
4209 m->m_pkthdr.csum_data & 0xffff;
4210 m->m_pkthdr.csum_flags |=
4211 (CSUM_DATA_VALID | CSUM_PARTIAL);
4212 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
4213 m->m_pkthdr.csum_tx_start = start;
4214 sw_csum = 0;
4215 } else {
4216 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4217 m->m_pkthdr.csum_flags);
4218 }
4219 }
4220
4221 if (sw_csum & CSUM_DELAY_IPV6_DATA) {
4222 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
4223 sw_csum &= ~CSUM_DELAY_IPV6_DATA;
4224 }
4225
4226 if (hwcksum_tx) {
4227 uint32_t delay_data = m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA;
4228 uint32_t hw_csum = IF_HWASSIST_CSUM_FLAGS(hwcap);
4229
4230 /*
4231 * Drop off bits that aren't supported by hardware;
4232 * also make sure to preserve non-checksum related bits.
4233 */
4234 m->m_pkthdr.csum_flags =
4235 ((m->m_pkthdr.csum_flags & (hw_csum | CSUM_DATA_VALID)) |
4236 (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
4237
4238 /*
4239 * If hardware supports partial checksum but not delay_data,
4240 * add back delay_data.
4241 */
4242 if ((hw_csum & CSUM_PARTIAL) != 0 &&
4243 (hw_csum & delay_data) == 0) {
4244 m->m_pkthdr.csum_flags |= delay_data;
4245 }
4246 } else {
4247 /* drop all bits; checksum offload is disabled */
4248 m->m_pkthdr.csum_flags = 0;
4249 }
4250}
4251
4252/*
4253 * Compute IPv6 extension header length.
4254 */
4255int
4256ip6_optlen(struct in6pcb *in6p)
4257{
4258 int len;
4259
4260 if (!in6p->in6p_outputopts) {
4261 return 0;
4262 }
4263
4264 len = 0;
4265#define elen(x) \
4266 (((struct ip6_ext *)(x)) ? \
4267 (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
4268
4269 len += elen(in6p->in6p_outputopts->ip6po_hbh);
4270 if (in6p->in6p_outputopts->ip6po_rthdr) {
4271 /* dest1 is valid with rthdr only */
4272 len += elen(in6p->in6p_outputopts->ip6po_dest1);
4273 }
4274 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
4275 len += elen(in6p->in6p_outputopts->ip6po_dest2);
4276 return len;
4277#undef elen
4278}
4279
4280static int
4281sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS
4282{
4283#pragma unused(arg1, arg2)
4284 int error, i;
4285
4286 i = ip6_output_measure;
4287 error = sysctl_handle_int(oidp, arg1: &i, arg2: 0, req);
4288 if (error || req->newptr == USER_ADDR_NULL) {
4289 goto done;
4290 }
4291 /* impose bounds */
4292 if (i < 0 || i > 1) {
4293 error = EINVAL;
4294 goto done;
4295 }
4296 if (ip6_output_measure != i && i == 1) {
4297 net_perf_initialize(npp: &net_perf, bins: ip6_output_measure_bins);
4298 }
4299 ip6_output_measure = i;
4300done:
4301 return error;
4302}
4303
4304static int
4305sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS
4306{
4307#pragma unused(arg1, arg2)
4308 int error;
4309 uint64_t i;
4310
4311 i = ip6_output_measure_bins;
4312 error = sysctl_handle_quad(oidp, arg1: &i, arg2: 0, req);
4313 if (error || req->newptr == USER_ADDR_NULL) {
4314 goto done;
4315 }
4316 /* validate data */
4317 if (!net_perf_validate_bins(bins: i)) {
4318 error = EINVAL;
4319 goto done;
4320 }
4321 ip6_output_measure_bins = i;
4322done:
4323 return error;
4324}
4325
4326static int
4327sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS
4328{
4329#pragma unused(oidp, arg1, arg2)
4330 if (req->oldptr == USER_ADDR_NULL) {
4331 req->oldlen = (size_t)sizeof(struct ipstat);
4332 }
4333
4334 return SYSCTL_OUT(req, &net_perf, MIN(sizeof(net_perf), req->oldlen));
4335}
4336
4337void
4338ip6_output_setsrcifscope(struct mbuf *m, uint32_t src_idx, struct in6_ifaddr *ia6)
4339{
4340 VERIFY(m->m_flags & M_PKTHDR);
4341
4342 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_OUTPUT_SCOPE;
4343 if (ia6 != NULL) {
4344 m->m_pkthdr.src_ifindex = ia6->ia_ifp->if_index;
4345 } else {
4346 m->m_pkthdr.src_ifindex = (uint16_t)src_idx;
4347 }
4348}
4349
4350void
4351ip6_output_setdstifscope(struct mbuf *m, uint32_t dst_idx, struct in6_ifaddr *ia6)
4352{
4353 VERIFY(m->m_flags & M_PKTHDR);
4354
4355 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_OUTPUT_SCOPE;
4356 if (ia6 != NULL) {
4357 m->m_pkthdr.dst_ifindex = ia6->ia_ifp->if_index;
4358 } else {
4359 m->m_pkthdr.dst_ifindex = (uint16_t)dst_idx;
4360 }
4361}
4362
4363uint32_t
4364ip6_output_getsrcifscope(struct mbuf *m)
4365{
4366 VERIFY(m->m_flags & M_PKTHDR);
4367 if (in6_embedded_scope_debug) {
4368 VERIFY(m->m_pkthdr.pkt_ext_flags & PKTF_EXT_OUTPUT_SCOPE);
4369 VERIFY((m->m_pkthdr.pkt_flags & PKTF_IFAINFO) == 0);
4370 }
4371
4372 return m->m_pkthdr.src_ifindex;
4373}
4374
4375uint32_t
4376ip6_output_getdstifscope(struct mbuf *m)
4377{
4378 VERIFY(m->m_flags & M_PKTHDR);
4379 if (in6_embedded_scope_debug) {
4380 VERIFY(m->m_pkthdr.pkt_ext_flags & PKTF_EXT_OUTPUT_SCOPE);
4381 VERIFY((m->m_pkthdr.pkt_flags & PKTF_IFAINFO) == 0);
4382 }
4383
4384 return m->m_pkthdr.dst_ifindex;
4385}
4386