1 | /* |
2 | * Copyright (c) 2000-2023 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */ |
30 | /* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */ |
31 | |
32 | /* |
33 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. |
34 | * All rights reserved. |
35 | * |
36 | * Redistribution and use in source and binary forms, with or without |
37 | * modification, are permitted provided that the following conditions |
38 | * are met: |
39 | * 1. Redistributions of source code must retain the above copyright |
40 | * notice, this list of conditions and the following disclaimer. |
41 | * 2. Redistributions in binary form must reproduce the above copyright |
42 | * notice, this list of conditions and the following disclaimer in the |
43 | * documentation and/or other materials provided with the distribution. |
44 | * 3. Neither the name of the project nor the names of its contributors |
45 | * may be used to endorse or promote products derived from this software |
46 | * without specific prior written permission. |
47 | * |
48 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
58 | * SUCH DAMAGE. |
59 | */ |
60 | |
61 | #include <sys/param.h> |
62 | #include <sys/systm.h> |
63 | #include <sys/malloc.h> |
64 | #include <sys/mcache.h> |
65 | #include <sys/mbuf.h> |
66 | #include <sys/domain.h> |
67 | #include <sys/protosw.h> |
68 | #include <sys/socket.h> |
69 | #include <sys/errno.h> |
70 | #include <sys/time.h> |
71 | #include <sys/kernel.h> |
72 | #include <sys/syslog.h> |
73 | #include <kern/queue.h> |
74 | #include <kern/locks.h> |
75 | |
76 | #include <net/if.h> |
77 | #include <net/route.h> |
78 | |
79 | #include <netinet/in.h> |
80 | #include <netinet/in_var.h> |
81 | #include <netinet/ip.h> |
82 | #include <netinet/ip_var.h> |
83 | #include <netinet/ip6.h> |
84 | #include <netinet6/ip6_var.h> |
85 | #include <netinet/icmp6.h> |
86 | |
87 | #include <net/net_osdep.h> |
88 | #include <dev/random/randomdev.h> |
89 | |
90 | /* |
91 | * Define it to get a correct behavior on per-interface statistics. |
92 | */ |
93 | #define IN6_IFSTAT_STRICT |
94 | struct ip6asfrag { |
95 | struct ip6asfrag *ip6af_down; |
96 | struct ip6asfrag *ip6af_up; |
97 | struct mbuf *ip6af_m; |
98 | int ip6af_offset; /* offset in ip6af_m to next header */ |
99 | int ip6af_frglen; /* fragmentable part length */ |
100 | int ip6af_off; /* fragment offset */ |
101 | u_int16_t ip6af_mff; /* more fragment bit in frag off */ |
102 | }; |
103 | |
104 | #define IP6_REASS_MBUF(ip6af) ((ip6af)->ip6af_m) |
105 | |
106 | MBUFQ_HEAD(fq6_head); |
107 | |
108 | static void frag6_save_context(struct mbuf *, int); |
109 | static void frag6_scrub_context(struct mbuf *); |
110 | static int frag6_restore_context(struct mbuf *); |
111 | |
112 | static void frag6_icmp6_paramprob_error(struct fq6_head *); |
113 | static void frag6_icmp6_timeex_error(struct fq6_head *); |
114 | |
115 | static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); |
116 | static void frag6_deq(struct ip6asfrag *); |
117 | static void frag6_insque(struct ip6q *, struct ip6q *); |
118 | static void frag6_remque(struct ip6q *); |
119 | static void frag6_purgef(struct ip6q *, struct fq6_head *, struct fq6_head *); |
120 | static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *); |
121 | |
122 | static int frag6_timeout_run; /* frag6 timer is scheduled to run */ |
123 | static void frag6_timeout(void *); |
124 | static void frag6_sched_timeout(void); |
125 | |
126 | static struct ip6q *ip6q_alloc(void); |
127 | static void ip6q_free(struct ip6q *); |
128 | static void ip6q_updateparams(void); |
129 | static struct ip6asfrag *ip6af_alloc(void); |
130 | static void ip6af_free(struct ip6asfrag *); |
131 | |
132 | static LCK_GRP_DECLARE(ip6qlock_grp, "ip6qlock" ); |
133 | static LCK_MTX_DECLARE(ip6qlock, &ip6qlock_grp); |
134 | |
135 | /* IPv6 fragment reassembly queues (protected by ip6qlock) */ |
136 | static struct ip6q ip6q; /* ip6 reassembly queues */ |
137 | static int ip6_maxfragpackets; /* max packets in reass queues */ |
138 | static u_int32_t frag6_nfragpackets; /* # of packets in reass queues */ |
139 | static int ip6_maxfrags; /* max fragments in reass queues */ |
140 | static u_int32_t frag6_nfrags; /* # of fragments in reass queues */ |
141 | static u_int32_t ip6q_limit; /* ip6q allocation limit */ |
142 | static u_int32_t ip6q_count; /* current # of allocated ip6q's */ |
143 | static u_int32_t ip6af_limit; /* ip6asfrag allocation limit */ |
144 | static u_int32_t ip6af_count; /* current # of allocated ip6asfrag's */ |
145 | |
146 | static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS; |
147 | static int sysctl_maxfrags SYSCTL_HANDLER_ARGS; |
148 | |
149 | SYSCTL_DECL(_net_inet6_ip6); |
150 | |
151 | SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, |
152 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0, |
153 | sysctl_maxfragpackets, "I" , |
154 | "Maximum number of IPv6 fragment reassembly queue entries" ); |
155 | |
156 | SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets, |
157 | CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0, |
158 | "Current number of IPv6 fragment reassembly queue entries" ); |
159 | |
160 | SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, |
161 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0, |
162 | sysctl_maxfrags, "I" , "Maximum number of IPv6 fragments allowed" ); |
163 | |
164 | /* |
165 | * Initialise reassembly queue and fragment identifier. |
166 | */ |
167 | void |
168 | frag6_init(void) |
169 | { |
170 | lck_mtx_lock(lck: &ip6qlock); |
171 | /* Initialize IPv6 reassembly queue. */ |
172 | ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; |
173 | |
174 | /* same limits as IPv4 */ |
175 | ip6_maxfragpackets = nmbclusters / 32; |
176 | ip6_maxfrags = ip6_maxfragpackets * 2; |
177 | ip6q_updateparams(); |
178 | lck_mtx_unlock(lck: &ip6qlock); |
179 | } |
180 | |
181 | static void |
182 | frag6_save_context(struct mbuf *m, int val) |
183 | { |
184 | m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val; |
185 | } |
186 | |
187 | static void |
188 | frag6_scrub_context(struct mbuf *m) |
189 | { |
190 | m->m_pkthdr.pkt_hdr = NULL; |
191 | } |
192 | |
193 | static int |
194 | frag6_restore_context(struct mbuf *m) |
195 | { |
196 | return (int)m->m_pkthdr.pkt_hdr; |
197 | } |
198 | |
199 | /* |
200 | * Send any deferred ICMP param problem error messages; caller must not be |
201 | * holding ip6qlock and is expected to have saved the per-packet parameter |
202 | * value via frag6_save_context(). |
203 | */ |
204 | static void |
205 | frag6_icmp6_paramprob_error(struct fq6_head *diq6) |
206 | { |
207 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); |
208 | |
209 | if (!MBUFQ_EMPTY(diq6)) { |
210 | struct mbuf *merr, *merr_tmp; |
211 | int param; |
212 | MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) { |
213 | MBUFQ_REMOVE(diq6, merr); |
214 | MBUFQ_NEXT(merr) = NULL; |
215 | param = frag6_restore_context(m: merr); |
216 | frag6_scrub_context(m: merr); |
217 | icmp6_error(merr, ICMP6_PARAM_PROB, |
218 | ICMP6_PARAMPROB_HEADER, param); |
219 | } |
220 | } |
221 | } |
222 | |
223 | /* |
224 | * Send any deferred ICMP time exceeded error messages; |
225 | * caller must not be holding ip6qlock. |
226 | */ |
227 | static void |
228 | frag6_icmp6_timeex_error(struct fq6_head *diq6) |
229 | { |
230 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); |
231 | |
232 | if (!MBUFQ_EMPTY(diq6)) { |
233 | struct mbuf *m, *m_tmp; |
234 | MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) { |
235 | MBUFQ_REMOVE(diq6, m); |
236 | MBUFQ_NEXT(m) = NULL; |
237 | icmp6_error_flag(m, ICMP6_TIME_EXCEEDED, |
238 | ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0); |
239 | } |
240 | } |
241 | } |
242 | |
243 | /* |
244 | * In RFC2460, fragment and reassembly rule do not agree with each other, |
245 | * in terms of next header field handling in fragment header. |
246 | * While the sender will use the same value for all of the fragmented packets, |
247 | * receiver is suggested not to check the consistency. |
248 | * |
249 | * fragment rule (p20): |
250 | * (2) A Fragment header containing: |
251 | * The Next Header value that identifies the first header of |
252 | * the Fragmentable Part of the original packet. |
253 | * -> next header field is same for all fragments |
254 | * |
255 | * reassembly rule (p21): |
256 | * The Next Header field of the last header of the Unfragmentable |
257 | * Part is obtained from the Next Header field of the first |
258 | * fragment's Fragment header. |
259 | * -> should grab it from the first fragment only |
260 | * |
261 | * The following note also contradicts with fragment rule - noone is going to |
262 | * send different fragment with different next header field. |
263 | * |
264 | * additional note (p22): |
265 | * The Next Header values in the Fragment headers of different |
266 | * fragments of the same original packet may differ. Only the value |
267 | * from the Offset zero fragment packet is used for reassembly. |
268 | * -> should grab it from the first fragment only |
269 | * |
270 | * There is no explicit reason given in the RFC. Historical reason maybe? |
271 | */ |
272 | /* |
273 | * Fragment input |
274 | */ |
275 | int |
276 | frag6_input(struct mbuf **mp, int *offp, int proto) |
277 | { |
278 | #pragma unused(proto) |
279 | struct mbuf *m = *mp, *t = NULL; |
280 | struct ip6_hdr *ip6 = NULL; |
281 | struct ip6_frag *ip6f = NULL; |
282 | struct ip6q *q6 = NULL; |
283 | struct ip6asfrag *af6 = NULL, *ip6af = NULL, *af6dwn = NULL; |
284 | int offset = *offp, i = 0, next = 0; |
285 | u_int8_t nxt = 0; |
286 | int first_frag = 0; |
287 | int fragoff = 0, frgpartlen = 0; /* must be larger than u_int16_t */ |
288 | struct ifnet *dstifp = NULL; |
289 | u_int8_t ecn = 0, ecn0 = 0; |
290 | uint32_t csum = 0, csum_flags = 0; |
291 | struct fq6_head diq6 = {}; |
292 | int locked = 0; |
293 | boolean_t drop_fragq = FALSE; |
294 | int local_ip6q_unfrglen; |
295 | u_int8_t local_ip6q_nxt; |
296 | |
297 | VERIFY(m->m_flags & M_PKTHDR); |
298 | |
299 | MBUFQ_INIT(&diq6); /* for deferred ICMP param problem errors */ |
300 | |
301 | /* Expect 32-bit aligned data pointer on strict-align platforms */ |
302 | MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); |
303 | |
304 | IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done); |
305 | ip6 = mtod(m, struct ip6_hdr *); |
306 | ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); |
307 | |
308 | #ifdef IN6_IFSTAT_STRICT |
309 | /* find the destination interface of the packet. */ |
310 | if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) { |
311 | uint32_t idx; |
312 | |
313 | if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) { |
314 | if (idx > 0 && idx <= if_index) { |
315 | ifnet_head_lock_shared(); |
316 | dstifp = ifindex2ifnet[idx]; |
317 | ifnet_head_done(); |
318 | } |
319 | } |
320 | } |
321 | #endif /* IN6_IFSTAT_STRICT */ |
322 | |
323 | /* we are violating the spec, this may not be the dst interface */ |
324 | if (dstifp == NULL) { |
325 | dstifp = m->m_pkthdr.rcvif; |
326 | } |
327 | |
328 | /* jumbo payload can't contain a fragment header */ |
329 | if (ip6->ip6_plen == 0) { |
330 | icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); |
331 | in6_ifstat_inc(dstifp, ifs6_reass_fail); |
332 | m = NULL; |
333 | goto done; |
334 | } |
335 | |
336 | /* |
337 | * check whether fragment packet's fragment length is |
338 | * multiple of 8 octets. |
339 | * sizeof(struct ip6_frag) == 8 |
340 | * sizeof(struct ip6_hdr) = 40 |
341 | */ |
342 | if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && |
343 | (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { |
344 | icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, |
345 | offsetof(struct ip6_hdr, ip6_plen)); |
346 | in6_ifstat_inc(dstifp, ifs6_reass_fail); |
347 | m = NULL; |
348 | goto done; |
349 | } |
350 | |
351 | /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */ |
352 | if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) { |
353 | ip6stat.ip6s_fragments++; |
354 | ip6stat.ip6s_fragdropped++; |
355 | in6_ifstat_inc(dstifp, ifs6_reass_fail); |
356 | m_freem(m); |
357 | m = NULL; |
358 | goto done; |
359 | } |
360 | |
361 | /* offset now points to data portion */ |
362 | offset += sizeof(struct ip6_frag); |
363 | |
364 | /* |
365 | * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0) |
366 | * upfront, unrelated to any reassembly. Just skip the fragment header. |
367 | */ |
368 | if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) { |
369 | /* |
370 | * Mark packet as reassembled. |
371 | * In ICMPv6 processing, we drop certain |
372 | * NDP messages that are not expected to |
373 | * have fragment header based on recommendations |
374 | * against security vulnerability as described in |
375 | * RFC 6980. |
376 | * Treat atomic fragments as re-assembled packets as well. |
377 | */ |
378 | m->m_pkthdr.pkt_flags |= PKTF_REASSEMBLED; |
379 | ip6stat.ip6s_atmfrag_rcvd++; |
380 | in6_ifstat_inc(dstifp, ifs6_atmfrag_rcvd); |
381 | *mp = m; |
382 | *offp = offset; |
383 | return ip6f->ip6f_nxt; |
384 | } |
385 | |
386 | /* |
387 | * Leverage partial checksum offload for simple UDP/IP fragments, |
388 | * as that is the most common case. |
389 | * |
390 | * Perform 1's complement adjustment of octets that got included/ |
391 | * excluded in the hardware-calculated checksum value. Also take |
392 | * care of any trailing bytes and subtract out their partial sum. |
393 | */ |
394 | if (ip6f->ip6f_nxt == IPPROTO_UDP && |
395 | offset == (sizeof(*ip6) + sizeof(*ip6f)) && |
396 | (m->m_pkthdr.csum_flags & |
397 | (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == |
398 | (CSUM_DATA_VALID | CSUM_PARTIAL)) { |
399 | uint32_t start = m->m_pkthdr.csum_rx_start; |
400 | uint32_t ip_len = (sizeof(*ip6) + ntohs(ip6->ip6_plen)); |
401 | int32_t trailer = (m_pktlen(m) - ip_len); |
402 | uint32_t swbytes = (uint32_t)trailer; |
403 | |
404 | csum = m->m_pkthdr.csum_rx_val; |
405 | |
406 | ASSERT(trailer >= 0); |
407 | if (start != offset || trailer != 0) { |
408 | uint16_t s = 0, d = 0; |
409 | |
410 | if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { |
411 | s = ip6->ip6_src.s6_addr16[1]; |
412 | ip6->ip6_src.s6_addr16[1] = 0; |
413 | } |
414 | if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { |
415 | d = ip6->ip6_dst.s6_addr16[1]; |
416 | ip6->ip6_dst.s6_addr16[1] = 0; |
417 | } |
418 | |
419 | /* callee folds in sum */ |
420 | csum = m_adj_sum16(m, start, offset, |
421 | (ip_len - offset), csum); |
422 | if (offset > start) { |
423 | swbytes += (offset - start); |
424 | } else { |
425 | swbytes += (start - offset); |
426 | } |
427 | |
428 | if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { |
429 | ip6->ip6_src.s6_addr16[1] = s; |
430 | } |
431 | if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { |
432 | ip6->ip6_dst.s6_addr16[1] = d; |
433 | } |
434 | } |
435 | csum_flags = m->m_pkthdr.csum_flags; |
436 | |
437 | if (swbytes != 0) { |
438 | udp_in6_cksum_stats(swbytes); |
439 | } |
440 | if (trailer != 0) { |
441 | m_adj(m, -trailer); |
442 | } |
443 | } else { |
444 | csum = 0; |
445 | csum_flags = 0; |
446 | } |
447 | |
448 | /* Invalidate checksum */ |
449 | m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; |
450 | |
451 | ip6stat.ip6s_fragments++; |
452 | in6_ifstat_inc(dstifp, ifs6_reass_reqd); |
453 | |
454 | lck_mtx_lock(lck: &ip6qlock); |
455 | locked = 1; |
456 | |
457 | for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) { |
458 | if (ip6f->ip6f_ident == q6->ip6q_ident && |
459 | in6_are_addr_equal_scoped(&ip6->ip6_src, &q6->ip6q_src, ip6_input_getsrcifscope(m), q6->ip6q_src_ifscope) && |
460 | in6_are_addr_equal_scoped(&ip6->ip6_dst, &q6->ip6q_dst, ip6_input_getdstifscope(m), q6->ip6q_dst_ifscope)) { |
461 | break; |
462 | } |
463 | } |
464 | |
465 | if (q6 == &ip6q) { |
466 | /* |
467 | * Create a reassembly queue as this is the first fragment to |
468 | * arrive. |
469 | * By first frag, we don't mean the one with offset 0, but |
470 | * any of the fragments of the fragmented packet that has |
471 | * reached us first. |
472 | */ |
473 | first_frag = 1; |
474 | |
475 | q6 = ip6q_alloc(); |
476 | if (q6 == NULL) { |
477 | goto dropfrag; |
478 | } |
479 | |
480 | frag6_insque(q6, &ip6q); |
481 | frag6_nfragpackets++; |
482 | |
483 | /* ip6q_nxt will be filled afterwards, from 1st fragment */ |
484 | q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; |
485 | #ifdef notyet |
486 | q6->ip6q_nxtp = (u_char *)nxtp; |
487 | #endif |
488 | q6->ip6q_ident = ip6f->ip6f_ident; |
489 | q6->ip6q_ttl = IPV6_FRAGTTL; |
490 | q6->ip6q_src = ip6->ip6_src; |
491 | q6->ip6q_dst = ip6->ip6_dst; |
492 | q6->ip6q_dst_ifscope = IN6_IS_SCOPE_EMBED(&q6->ip6q_dst) ? ip6_input_getdstifscope(m) : IFSCOPE_NONE; |
493 | q6->ip6q_src_ifscope = IN6_IS_SCOPE_EMBED(&q6->ip6q_src) ? ip6_input_getsrcifscope(m) : IFSCOPE_NONE; |
494 | q6->ip6q_ecn = |
495 | (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; |
496 | q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ |
497 | |
498 | q6->ip6q_nfrag = 0; |
499 | q6->ip6q_flags = 0; |
500 | |
501 | /* |
502 | * If the first fragment has valid checksum offload |
503 | * info, the rest of fragments are eligible as well. |
504 | */ |
505 | if (csum_flags != 0) { |
506 | q6->ip6q_csum = csum; |
507 | q6->ip6q_csum_flags = csum_flags; |
508 | } |
509 | } |
510 | |
511 | if (q6->ip6q_flags & IP6QF_DIRTY) { |
512 | goto dropfrag; |
513 | } |
514 | |
515 | local_ip6q_unfrglen = q6->ip6q_unfrglen; |
516 | local_ip6q_nxt = q6->ip6q_nxt; |
517 | |
518 | /* |
519 | * If it's the 1st fragment, record the length of the |
520 | * unfragmentable part and the next header of the fragment header. |
521 | * Assume the first fragement to arrive will be correct. |
522 | * We do not have any duplicate checks here yet so another packet |
523 | * with fragoff == 0 could come and overwrite the ip6q_unfrglen |
524 | * and worse, the next header, at any time. |
525 | */ |
526 | fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); |
527 | if (fragoff == 0 && local_ip6q_unfrglen == -1) { |
528 | local_ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - |
529 | sizeof(struct ip6_frag); |
530 | local_ip6q_nxt = ip6f->ip6f_nxt; |
531 | /* XXX ECN? */ |
532 | } |
533 | |
534 | /* |
535 | * Check that the reassembled packet would not exceed 65535 bytes |
536 | * in size. |
537 | * If it would exceed, discard the fragment and return an ICMP error. |
538 | */ |
539 | frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; |
540 | if (local_ip6q_unfrglen >= 0) { |
541 | /* The 1st fragment has already arrived. */ |
542 | if (local_ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { |
543 | lck_mtx_unlock(lck: &ip6qlock); |
544 | locked = 0; |
545 | icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, |
546 | offset - sizeof(struct ip6_frag) + |
547 | offsetof(struct ip6_frag, ip6f_offlg)); |
548 | m = NULL; |
549 | goto done; |
550 | } |
551 | } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { |
552 | lck_mtx_unlock(lck: &ip6qlock); |
553 | locked = 0; |
554 | icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, |
555 | offset - sizeof(struct ip6_frag) + |
556 | offsetof(struct ip6_frag, ip6f_offlg)); |
557 | m = NULL; |
558 | goto done; |
559 | } |
560 | /* |
561 | * If it's the first fragment, do the above check for each |
562 | * fragment already stored in the reassembly queue. |
563 | */ |
564 | if (fragoff == 0) { |
565 | /* |
566 | * https://tools.ietf.org/html/rfc8200#page-20 |
567 | * If the first fragment does not include all headers through an |
568 | * Upper-Layer header, then that fragment should be discarded and |
569 | * an ICMP Parameter Problem, Code 3, message should be sent to |
570 | * the source of the fragment, with the Pointer field set to zero. |
571 | */ |
572 | if (!ip6_pkt_has_ulp(m)) { |
573 | lck_mtx_unlock(lck: &ip6qlock); |
574 | locked = 0; |
575 | icmp6_error(m, ICMP6_PARAM_PROB, |
576 | ICMP6_PARAMPROB_FIRSTFRAG_INCOMP_HDR, 0); |
577 | m = NULL; |
578 | goto done; |
579 | } |
580 | for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; |
581 | af6 = af6dwn) { |
582 | af6dwn = af6->ip6af_down; |
583 | |
584 | if (local_ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > |
585 | IPV6_MAXPACKET) { |
586 | struct mbuf *merr = IP6_REASS_MBUF(af6); |
587 | struct ip6_hdr *ip6err; |
588 | int erroff = af6->ip6af_offset; |
589 | |
590 | /* dequeue the fragment. */ |
591 | frag6_deq(af6); |
592 | ip6af_free(af6); |
593 | |
594 | /* adjust pointer. */ |
595 | ip6err = mtod(merr, struct ip6_hdr *); |
596 | |
597 | /* |
598 | * Restore source and destination addresses |
599 | * in the erroneous IPv6 header. |
600 | */ |
601 | ip6err->ip6_src = q6->ip6q_src; |
602 | ip6err->ip6_dst = q6->ip6q_dst; |
603 | ip6_output_setdstifscope(m, q6->ip6q_dst_ifscope, NULL); |
604 | ip6_output_setsrcifscope(m, q6->ip6q_src_ifscope, NULL); |
605 | frag6_save_context(m: merr, |
606 | val: erroff - sizeof(struct ip6_frag) + |
607 | offsetof(struct ip6_frag, ip6f_offlg)); |
608 | |
609 | MBUFQ_ENQUEUE(&diq6, merr); |
610 | } |
611 | } |
612 | } |
613 | |
614 | ip6af = ip6af_alloc(); |
615 | if (ip6af == NULL) { |
616 | goto dropfrag; |
617 | } |
618 | |
619 | ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; |
620 | ip6af->ip6af_off = fragoff; |
621 | ip6af->ip6af_frglen = frgpartlen; |
622 | ip6af->ip6af_offset = offset; |
623 | IP6_REASS_MBUF(ip6af) = m; |
624 | |
625 | if (first_frag) { |
626 | af6 = (struct ip6asfrag *)q6; |
627 | goto insert; |
628 | } |
629 | |
630 | /* |
631 | * Handle ECN by comparing this segment with the first one; |
632 | * if CE is set, do not lose CE. |
633 | * drop if CE and not-ECT are mixed for the same packet. |
634 | */ |
635 | ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; |
636 | ecn0 = q6->ip6q_ecn; |
637 | if (ecn == IPTOS_ECN_CE) { |
638 | if (ecn0 == IPTOS_ECN_NOTECT) { |
639 | ip6af_free(ip6af); |
640 | goto dropfrag; |
641 | } |
642 | if (ecn0 != IPTOS_ECN_CE) { |
643 | q6->ip6q_ecn = IPTOS_ECN_CE; |
644 | } |
645 | } |
646 | if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { |
647 | ip6af_free(ip6af); |
648 | goto dropfrag; |
649 | } |
650 | |
651 | /* |
652 | * Find a segment which begins after this one does. |
653 | */ |
654 | for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; |
655 | af6 = af6->ip6af_down) { |
656 | if (af6->ip6af_off > ip6af->ip6af_off) { |
657 | break; |
658 | } |
659 | } |
660 | |
661 | /* |
662 | * As per RFC 8200 reassembly rules, we MUST drop the entire |
663 | * chain of fragments for a packet to be assembled, if we receive |
664 | * any overlapping fragments. |
665 | * https://tools.ietf.org/html/rfc8200#page-20 |
666 | * |
667 | * To avoid more conditional code, just reuse frag6_freef and defer |
668 | * its call to post fragment insertion in the queue. |
669 | */ |
670 | if (af6->ip6af_up != (struct ip6asfrag *)q6) { |
671 | if (af6->ip6af_up->ip6af_off == ip6af->ip6af_off) { |
672 | if (af6->ip6af_up->ip6af_frglen != ip6af->ip6af_frglen) { |
673 | drop_fragq = TRUE; |
674 | } else { |
675 | /* |
676 | * XXX Ideally we should be comparing the entire |
677 | * packet here but for now just use off and fraglen |
678 | * to ignore a duplicate fragment. |
679 | */ |
680 | ip6af_free(ip6af); |
681 | goto dropfrag; |
682 | } |
683 | } else { |
684 | i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen |
685 | - ip6af->ip6af_off; |
686 | if (i > 0) { |
687 | drop_fragq = TRUE; |
688 | } |
689 | } |
690 | } |
691 | |
692 | if (af6 != (struct ip6asfrag *)q6) { |
693 | /* |
694 | * Given that we break when af6->ip6af_off > ip6af->ip6af_off, |
695 | * we shouldn't need a check for duplicate fragment here. |
696 | * For now just assert. |
697 | */ |
698 | VERIFY(af6->ip6af_off != ip6af->ip6af_off); |
699 | i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; |
700 | if (i > 0) { |
701 | drop_fragq = TRUE; |
702 | } |
703 | } |
704 | |
705 | /* |
706 | * If this fragment contains similar checksum offload info |
707 | * as that of the existing ones, accumulate checksum. Otherwise, |
708 | * invalidate checksum offload info for the entire datagram. |
709 | */ |
710 | if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags) { |
711 | q6->ip6q_csum += csum; |
712 | } else if (q6->ip6q_csum_flags != 0) { |
713 | q6->ip6q_csum_flags = 0; |
714 | } |
715 | |
716 | insert: |
717 | /* |
718 | * Stick new segment in its place; |
719 | * check for complete reassembly. |
720 | * Move to front of packet queue, as we are |
721 | * the most recently active fragmented packet. |
722 | */ |
723 | frag6_enq(ip6af, af6->ip6af_up); |
724 | frag6_nfrags++; |
725 | q6->ip6q_nfrag++; |
726 | |
727 | /* |
728 | * This holds true, when we receive overlapping fragments. |
729 | * We must silently drop all the fragments we have received |
730 | * so far. |
731 | * Also mark q6 as dirty, so as to not add any new fragments to it. |
732 | * Make sure even q6 marked dirty is kept till timer expires for |
733 | * reassembly and when that happens, silenty get rid of q6 |
734 | */ |
735 | if (drop_fragq) { |
736 | struct fq6_head dfq6 = {0}; |
737 | MBUFQ_INIT(&dfq6); /* for deferred frees */ |
738 | q6->ip6q_flags |= IP6QF_DIRTY; |
739 | /* Purge all the fragments but do not free q6 */ |
740 | frag6_purgef(q6, &dfq6, NULL); |
741 | af6 = NULL; |
742 | |
743 | /* free fragments that need to be freed */ |
744 | if (!MBUFQ_EMPTY(&dfq6)) { |
745 | MBUFQ_DRAIN(&dfq6); |
746 | } |
747 | VERIFY(MBUFQ_EMPTY(&dfq6)); |
748 | /* |
749 | * Just in case the above logic got anything added |
750 | * to diq6, drain it. |
751 | * Please note that these mbufs are not present in the |
752 | * fragment queue and are added to diq6 for sending |
753 | * ICMPv6 error. |
754 | * Given that the current fragment was an overlapping |
755 | * fragment and the RFC requires us to not send any |
756 | * ICMPv6 errors while purging the entire queue. |
757 | * Just empty it out. |
758 | */ |
759 | if (!MBUFQ_EMPTY(&diq6)) { |
760 | MBUFQ_DRAIN(&diq6); |
761 | } |
762 | VERIFY(MBUFQ_EMPTY(&diq6)); |
763 | /* |
764 | * MBUFQ_DRAIN would have drained all the mbufs |
765 | * in the fragment queue. |
766 | * This shouldn't be needed as we are returning IPPROTO_DONE |
767 | * from here but change the passed mbuf pointer to NULL. |
768 | */ |
769 | *mp = NULL; |
770 | lck_mtx_unlock(lck: &ip6qlock); |
771 | return IPPROTO_DONE; |
772 | } |
773 | |
774 | /* |
775 | * We're keeping the fragment. |
776 | */ |
777 | q6->ip6q_unfrglen = local_ip6q_unfrglen; |
778 | q6->ip6q_nxt = local_ip6q_nxt; |
779 | |
780 | next = 0; |
781 | for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; |
782 | af6 = af6->ip6af_down) { |
783 | if (af6->ip6af_off != next) { |
784 | lck_mtx_unlock(lck: &ip6qlock); |
785 | locked = 0; |
786 | m = NULL; |
787 | goto done; |
788 | } |
789 | next += af6->ip6af_frglen; |
790 | } |
791 | if (af6->ip6af_up->ip6af_mff) { |
792 | lck_mtx_unlock(lck: &ip6qlock); |
793 | locked = 0; |
794 | m = NULL; |
795 | goto done; |
796 | } |
797 | |
798 | /* |
799 | * Reassembly is complete; concatenate fragments. |
800 | */ |
801 | ip6af = q6->ip6q_down; |
802 | t = m = IP6_REASS_MBUF(ip6af); |
803 | af6 = ip6af->ip6af_down; |
804 | frag6_deq(ip6af); |
805 | while (af6 != (struct ip6asfrag *)q6) { |
806 | af6dwn = af6->ip6af_down; |
807 | frag6_deq(af6); |
808 | while (t->m_next) { |
809 | t = t->m_next; |
810 | } |
811 | t->m_next = IP6_REASS_MBUF(af6); |
812 | m_adj(t->m_next, af6->ip6af_offset); |
813 | ip6af_free(af6); |
814 | af6 = af6dwn; |
815 | } |
816 | |
817 | /* |
818 | * Store partial hardware checksum info from the fragment queue; |
819 | * the receive start offset is set to 40 bytes (see code at the |
820 | * top of this routine.) |
821 | */ |
822 | if (q6->ip6q_csum_flags != 0) { |
823 | csum = q6->ip6q_csum; |
824 | |
825 | ADDCARRY(csum); |
826 | |
827 | m->m_pkthdr.csum_rx_val = (u_int16_t)csum; |
828 | m->m_pkthdr.csum_rx_start = sizeof(struct ip6_hdr); |
829 | m->m_pkthdr.csum_flags = q6->ip6q_csum_flags; |
830 | } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || |
831 | (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { |
832 | /* loopback checksums are always OK */ |
833 | m->m_pkthdr.csum_data = 0xffff; |
834 | m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; |
835 | } |
836 | |
837 | /* adjust offset to point where the original next header starts */ |
838 | offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); |
839 | ip6af_free(ip6af); |
840 | ip6 = mtod(m, struct ip6_hdr *); |
841 | ip6->ip6_plen = htons((uint16_t)(next + offset - sizeof(struct ip6_hdr))); |
842 | ip6->ip6_src = q6->ip6q_src; |
843 | ip6->ip6_dst = q6->ip6q_dst; |
844 | ip6_output_setdstifscope(m, q6->ip6q_dst_ifscope, NULL); |
845 | ip6_output_setsrcifscope(m, q6->ip6q_src_ifscope, NULL); |
846 | if (q6->ip6q_ecn == IPTOS_ECN_CE) { |
847 | ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); |
848 | } |
849 | |
850 | nxt = q6->ip6q_nxt; |
851 | #ifdef notyet |
852 | *q6->ip6q_nxtp = (u_char)(nxt & 0xff); |
853 | #endif |
854 | |
855 | /* Delete frag6 header */ |
856 | if (m->m_len >= offset + sizeof(struct ip6_frag)) { |
857 | /* This is the only possible case with !PULLDOWN_TEST */ |
858 | ovbcopy(from: (caddr_t)ip6, to: (caddr_t)ip6 + sizeof(struct ip6_frag), |
859 | len: offset); |
860 | m->m_data += sizeof(struct ip6_frag); |
861 | m->m_len -= sizeof(struct ip6_frag); |
862 | } else { |
863 | /* this comes with no copy if the boundary is on cluster */ |
864 | if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { |
865 | frag6_remque(q6); |
866 | frag6_nfragpackets--; |
867 | frag6_nfrags -= q6->ip6q_nfrag; |
868 | ip6q_free(q6); |
869 | goto dropfrag; |
870 | } |
871 | m_adj(t, sizeof(struct ip6_frag)); |
872 | m_cat(m, t); |
873 | } |
874 | |
875 | /* |
876 | * Store NXT to the original. |
877 | */ |
878 | { |
879 | char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */ |
880 | *prvnxtp = nxt; |
881 | } |
882 | |
883 | frag6_remque(q6); |
884 | frag6_nfragpackets--; |
885 | frag6_nfrags -= q6->ip6q_nfrag; |
886 | ip6q_free(q6); |
887 | |
888 | if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ |
889 | m_fixhdr(m); |
890 | /* |
891 | * Mark packet as reassembled |
892 | * In ICMPv6 processing, we drop certain |
893 | * NDP messages that are not expected to |
894 | * have fragment header based on recommendations |
895 | * against security vulnerability as described in |
896 | * RFC 6980. |
897 | */ |
898 | m->m_pkthdr.pkt_flags |= PKTF_REASSEMBLED; |
899 | } |
900 | ip6stat.ip6s_reassembled++; |
901 | |
902 | /* |
903 | * Tell launch routine the next header |
904 | */ |
905 | *mp = m; |
906 | *offp = offset; |
907 | |
908 | /* arm the purge timer if not already and if there's work to do */ |
909 | frag6_sched_timeout(); |
910 | lck_mtx_unlock(lck: &ip6qlock); |
911 | in6_ifstat_inc(dstifp, ifs6_reass_ok); |
912 | frag6_icmp6_paramprob_error(diq6: &diq6); |
913 | VERIFY(MBUFQ_EMPTY(&diq6)); |
914 | return nxt; |
915 | |
916 | done: |
917 | VERIFY(m == NULL); |
918 | *mp = m; |
919 | if (!locked) { |
920 | if (frag6_nfragpackets == 0) { |
921 | frag6_icmp6_paramprob_error(diq6: &diq6); |
922 | VERIFY(MBUFQ_EMPTY(&diq6)); |
923 | return IPPROTO_DONE; |
924 | } |
925 | lck_mtx_lock(lck: &ip6qlock); |
926 | } |
927 | /* arm the purge timer if not already and if there's work to do */ |
928 | frag6_sched_timeout(); |
929 | lck_mtx_unlock(lck: &ip6qlock); |
930 | frag6_icmp6_paramprob_error(diq6: &diq6); |
931 | VERIFY(MBUFQ_EMPTY(&diq6)); |
932 | return IPPROTO_DONE; |
933 | |
934 | dropfrag: |
935 | ip6stat.ip6s_fragdropped++; |
936 | /* arm the purge timer if not already and if there's work to do */ |
937 | frag6_sched_timeout(); |
938 | lck_mtx_unlock(lck: &ip6qlock); |
939 | in6_ifstat_inc(dstifp, ifs6_reass_fail); |
940 | m_freem(m); |
941 | *mp = NULL; |
942 | frag6_icmp6_paramprob_error(diq6: &diq6); |
943 | VERIFY(MBUFQ_EMPTY(&diq6)); |
944 | return IPPROTO_DONE; |
945 | } |
946 | |
947 | /* |
948 | * This routine removes the enqueued frames from the passed fragment |
949 | * header and enqueues those to dfq6 which is an out-arg for the dequeued |
950 | * fragments. |
951 | * If the caller also provides diq6, this routine also enqueues the 0 offset |
952 | * fragment to that list as it potentially gets used by the caller |
953 | * to prepare the relevant ICMPv6 error message (time exceeded or |
954 | * param problem). |
955 | * It leaves the fragment header object (q6) intact. |
956 | */ |
957 | static void |
958 | frag6_purgef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6) |
959 | { |
960 | struct ip6asfrag *af6 = NULL; |
961 | struct ip6asfrag *down6 = NULL; |
962 | |
963 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); |
964 | |
965 | for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; |
966 | af6 = down6) { |
967 | struct mbuf *m = IP6_REASS_MBUF(af6); |
968 | |
969 | down6 = af6->ip6af_down; |
970 | frag6_deq(af6); |
971 | |
972 | /* |
973 | * If caller wants to generate ICMP time-exceeded, |
974 | * as indicated by the argument diq6, return it for |
975 | * the first fragment and add others to the fragment |
976 | * free queue. |
977 | */ |
978 | if (af6->ip6af_off == 0 && diq6 != NULL) { |
979 | struct ip6_hdr *ip6; |
980 | |
981 | /* adjust pointer */ |
982 | ip6 = mtod(m, struct ip6_hdr *); |
983 | |
984 | /* restore source and destination addresses */ |
985 | ip6->ip6_src = q6->ip6q_src; |
986 | ip6->ip6_dst = q6->ip6q_dst; |
987 | ip6_output_setdstifscope(m, q6->ip6q_dst_ifscope, NULL); |
988 | ip6_output_setsrcifscope(m, q6->ip6q_src_ifscope, NULL); |
989 | MBUFQ_ENQUEUE(diq6, m); |
990 | } else { |
991 | MBUFQ_ENQUEUE(dfq6, m); |
992 | } |
993 | ip6af_free(af6); |
994 | } |
995 | } |
996 | |
997 | /* |
998 | * This routine removes the enqueued frames from the passed fragment |
999 | * header and enqueues those to dfq6 which is an out-arg for the dequeued |
1000 | * fragments. |
1001 | * If the caller also provides diq6, this routine also enqueues the 0 offset |
1002 | * fragment to that list as it potentially gets used by the caller |
1003 | * to prepare the relevant ICMPv6 error message (time exceeded or |
1004 | * param problem). |
1005 | * It also remove the fragment header object from the queue and frees it. |
1006 | */ |
1007 | static void |
1008 | frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6) |
1009 | { |
1010 | frag6_purgef(q6, dfq6, diq6); |
1011 | frag6_remque(q6); |
1012 | frag6_nfragpackets--; |
1013 | frag6_nfrags -= q6->ip6q_nfrag; |
1014 | ip6q_free(q6); |
1015 | } |
1016 | |
1017 | /* |
1018 | * Put an ip fragment on a reassembly chain. |
1019 | * Like insque, but pointers in middle of structure. |
1020 | */ |
1021 | void |
1022 | frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) |
1023 | { |
1024 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); |
1025 | |
1026 | af6->ip6af_up = up6; |
1027 | af6->ip6af_down = up6->ip6af_down; |
1028 | up6->ip6af_down->ip6af_up = af6; |
1029 | up6->ip6af_down = af6; |
1030 | } |
1031 | |
1032 | /* |
1033 | * To frag6_enq as remque is to insque. |
1034 | */ |
1035 | void |
1036 | frag6_deq(struct ip6asfrag *af6) |
1037 | { |
1038 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); |
1039 | |
1040 | af6->ip6af_up->ip6af_down = af6->ip6af_down; |
1041 | af6->ip6af_down->ip6af_up = af6->ip6af_up; |
1042 | } |
1043 | |
1044 | void |
1045 | frag6_insque(struct ip6q *new, struct ip6q *old) |
1046 | { |
1047 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); |
1048 | |
1049 | new->ip6q_prev = old; |
1050 | new->ip6q_next = old->ip6q_next; |
1051 | old->ip6q_next->ip6q_prev = new; |
1052 | old->ip6q_next = new; |
1053 | } |
1054 | |
1055 | void |
1056 | frag6_remque(struct ip6q *p6) |
1057 | { |
1058 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); |
1059 | |
1060 | p6->ip6q_prev->ip6q_next = p6->ip6q_next; |
1061 | p6->ip6q_next->ip6q_prev = p6->ip6q_prev; |
1062 | } |
1063 | |
1064 | /* |
1065 | * IPv6 reassembling timer processing; |
1066 | * if a timer expires on a reassembly |
1067 | * queue, discard it. |
1068 | */ |
1069 | static void |
1070 | frag6_timeout(void *arg) |
1071 | { |
1072 | #pragma unused(arg) |
1073 | struct fq6_head dfq6, diq6; |
1074 | struct fq6_head *diq6_tmp = NULL; |
1075 | struct ip6q *q6; |
1076 | |
1077 | MBUFQ_INIT(&dfq6); /* for deferred frees */ |
1078 | MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ |
1079 | |
1080 | /* |
1081 | * Update coarse-grained networking timestamp (in sec.); the idea |
1082 | * is to piggy-back on the timeout callout to update the counter |
1083 | * returnable via net_uptime(). |
1084 | */ |
1085 | net_update_uptime(); |
1086 | |
1087 | lck_mtx_lock(lck: &ip6qlock); |
1088 | q6 = ip6q.ip6q_next; |
1089 | if (q6) { |
1090 | while (q6 != &ip6q) { |
1091 | --q6->ip6q_ttl; |
1092 | q6 = q6->ip6q_next; |
1093 | if (q6->ip6q_prev->ip6q_ttl == 0) { |
1094 | ip6stat.ip6s_fragtimeout++; |
1095 | /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ |
1096 | /* |
1097 | * Avoid sending ICMPv6 Time Exceeded for fragment headers |
1098 | * that are marked dirty. |
1099 | */ |
1100 | diq6_tmp = (q6->ip6q_prev->ip6q_flags & IP6QF_DIRTY) ? |
1101 | NULL : &diq6; |
1102 | frag6_freef(q6: q6->ip6q_prev, dfq6: &dfq6, diq6: diq6_tmp); |
1103 | } |
1104 | } |
1105 | } |
1106 | /* |
1107 | * If we are over the maximum number of fragments |
1108 | * (due to the limit being lowered), drain off |
1109 | * enough to get down to the new limit. |
1110 | */ |
1111 | if (ip6_maxfragpackets >= 0) { |
1112 | while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets && |
1113 | ip6q.ip6q_prev) { |
1114 | ip6stat.ip6s_fragoverflow++; |
1115 | /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ |
1116 | /* |
1117 | * Avoid sending ICMPv6 Time Exceeded for fragment headers |
1118 | * that are marked dirty. |
1119 | */ |
1120 | diq6_tmp = (ip6q.ip6q_prev->ip6q_flags & IP6QF_DIRTY) ? |
1121 | NULL : &diq6; |
1122 | frag6_freef(q6: ip6q.ip6q_prev, dfq6: &dfq6, diq6: diq6_tmp); |
1123 | } |
1124 | } |
1125 | /* re-arm the purge timer if there's work to do */ |
1126 | frag6_timeout_run = 0; |
1127 | frag6_sched_timeout(); |
1128 | lck_mtx_unlock(lck: &ip6qlock); |
1129 | |
1130 | /* free fragments that need to be freed */ |
1131 | if (!MBUFQ_EMPTY(&dfq6)) { |
1132 | MBUFQ_DRAIN(&dfq6); |
1133 | } |
1134 | |
1135 | frag6_icmp6_timeex_error(diq6: &diq6); |
1136 | |
1137 | VERIFY(MBUFQ_EMPTY(&dfq6)); |
1138 | VERIFY(MBUFQ_EMPTY(&diq6)); |
1139 | } |
1140 | |
1141 | static void |
1142 | frag6_sched_timeout(void) |
1143 | { |
1144 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); |
1145 | |
1146 | if (!frag6_timeout_run && frag6_nfragpackets > 0) { |
1147 | frag6_timeout_run = 1; |
1148 | timeout(frag6_timeout, NULL, ticks: hz); |
1149 | } |
1150 | } |
1151 | |
1152 | /* |
1153 | * Drain off all datagram fragments. |
1154 | */ |
1155 | void |
1156 | frag6_drain(void) |
1157 | { |
1158 | struct fq6_head dfq6, diq6; |
1159 | struct fq6_head *diq6_tmp = NULL; |
1160 | |
1161 | MBUFQ_INIT(&dfq6); /* for deferred frees */ |
1162 | MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ |
1163 | |
1164 | lck_mtx_lock(lck: &ip6qlock); |
1165 | while (ip6q.ip6q_next != &ip6q) { |
1166 | ip6stat.ip6s_fragdropped++; |
1167 | /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ |
1168 | /* |
1169 | * Avoid sending ICMPv6 Time Exceeded for fragment headers |
1170 | * that are marked dirty. |
1171 | */ |
1172 | diq6_tmp = (ip6q.ip6q_next->ip6q_flags & IP6QF_DIRTY) ? |
1173 | NULL : &diq6; |
1174 | frag6_freef(q6: ip6q.ip6q_next, dfq6: &dfq6, diq6: diq6_tmp); |
1175 | } |
1176 | lck_mtx_unlock(lck: &ip6qlock); |
1177 | |
1178 | /* free fragments that need to be freed */ |
1179 | if (!MBUFQ_EMPTY(&dfq6)) { |
1180 | MBUFQ_DRAIN(&dfq6); |
1181 | } |
1182 | |
1183 | frag6_icmp6_timeex_error(diq6: &diq6); |
1184 | |
1185 | VERIFY(MBUFQ_EMPTY(&dfq6)); |
1186 | VERIFY(MBUFQ_EMPTY(&diq6)); |
1187 | } |
1188 | |
1189 | static struct ip6q * |
1190 | ip6q_alloc(void) |
1191 | { |
1192 | struct ip6q *q6; |
1193 | |
1194 | /* |
1195 | * See comments in ip6q_updateparams(). Keep the count separate |
1196 | * from frag6_nfragpackets since the latter represents the elements |
1197 | * already in the reassembly queues. |
1198 | */ |
1199 | if (ip6q_limit > 0 && ip6q_count > ip6q_limit) { |
1200 | return NULL; |
1201 | } |
1202 | |
1203 | q6 = kalloc_type(struct ip6q, Z_NOWAIT | Z_ZERO); |
1204 | if (q6 != NULL) { |
1205 | os_atomic_inc(&ip6q_count, relaxed); |
1206 | } |
1207 | return q6; |
1208 | } |
1209 | |
1210 | static void |
1211 | ip6q_free(struct ip6q *q6) |
1212 | { |
1213 | kfree_type(struct ip6q, q6); |
1214 | os_atomic_dec(&ip6q_count, relaxed); |
1215 | } |
1216 | |
1217 | static struct ip6asfrag * |
1218 | ip6af_alloc(void) |
1219 | { |
1220 | struct ip6asfrag *af6; |
1221 | |
1222 | /* |
1223 | * See comments in ip6q_updateparams(). Keep the count separate |
1224 | * from frag6_nfrags since the latter represents the elements |
1225 | * already in the reassembly queues. |
1226 | */ |
1227 | if (ip6af_limit > 0 && ip6af_count > ip6af_limit) { |
1228 | return NULL; |
1229 | } |
1230 | |
1231 | af6 = kalloc_type(struct ip6asfrag, Z_NOWAIT | Z_ZERO); |
1232 | if (af6 != NULL) { |
1233 | os_atomic_inc(&ip6af_count, relaxed); |
1234 | } |
1235 | return af6; |
1236 | } |
1237 | |
1238 | static void |
1239 | ip6af_free(struct ip6asfrag *af6) |
1240 | { |
1241 | kfree_type(struct ip6asfrag, af6); |
1242 | os_atomic_dec(&ip6af_count, relaxed); |
1243 | } |
1244 | |
1245 | static void |
1246 | ip6q_updateparams(void) |
1247 | { |
1248 | LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); |
1249 | /* |
1250 | * -1 for unlimited allocation. |
1251 | */ |
1252 | if (ip6_maxfragpackets < 0) { |
1253 | ip6q_limit = 0; |
1254 | } |
1255 | if (ip6_maxfrags < 0) { |
1256 | ip6af_limit = 0; |
1257 | } |
1258 | /* |
1259 | * Positive number for specific bound. |
1260 | */ |
1261 | if (ip6_maxfragpackets > 0) { |
1262 | ip6q_limit = ip6_maxfragpackets; |
1263 | } |
1264 | if (ip6_maxfrags > 0) { |
1265 | ip6af_limit = ip6_maxfrags; |
1266 | } |
1267 | /* |
1268 | * Zero specifies no further fragment queue allocation -- set the |
1269 | * bound very low, but rely on implementation elsewhere to actually |
1270 | * prevent allocation and reclaim current queues. |
1271 | */ |
1272 | if (ip6_maxfragpackets == 0) { |
1273 | ip6q_limit = 1; |
1274 | } |
1275 | if (ip6_maxfrags == 0) { |
1276 | ip6af_limit = 1; |
1277 | } |
1278 | /* |
1279 | * Arm the purge timer if not already and if there's work to do |
1280 | */ |
1281 | frag6_sched_timeout(); |
1282 | } |
1283 | |
1284 | static int |
1285 | sysctl_maxfragpackets SYSCTL_HANDLER_ARGS |
1286 | { |
1287 | #pragma unused(arg1, arg2) |
1288 | int error, i; |
1289 | |
1290 | lck_mtx_lock(lck: &ip6qlock); |
1291 | i = ip6_maxfragpackets; |
1292 | error = sysctl_handle_int(oidp, arg1: &i, arg2: 0, req); |
1293 | if (error || req->newptr == USER_ADDR_NULL) { |
1294 | goto done; |
1295 | } |
1296 | /* impose bounds */ |
1297 | if (i < -1 || i > (nmbclusters / 4)) { |
1298 | error = EINVAL; |
1299 | goto done; |
1300 | } |
1301 | ip6_maxfragpackets = i; |
1302 | ip6q_updateparams(); |
1303 | done: |
1304 | lck_mtx_unlock(lck: &ip6qlock); |
1305 | return error; |
1306 | } |
1307 | |
1308 | static int |
1309 | sysctl_maxfrags SYSCTL_HANDLER_ARGS |
1310 | { |
1311 | #pragma unused(arg1, arg2) |
1312 | int error, i; |
1313 | |
1314 | lck_mtx_lock(lck: &ip6qlock); |
1315 | i = ip6_maxfrags; |
1316 | error = sysctl_handle_int(oidp, arg1: &i, arg2: 0, req); |
1317 | if (error || req->newptr == USER_ADDR_NULL) { |
1318 | goto done; |
1319 | } |
1320 | /* impose bounds */ |
1321 | if (i < -1 || i > (nmbclusters / 4)) { |
1322 | error = EINVAL; |
1323 | goto done; |
1324 | } |
1325 | ip6_maxfrags = i; |
1326 | ip6q_updateparams(); /* see if we need to arm timer */ |
1327 | done: |
1328 | lck_mtx_unlock(lck: &ip6qlock); |
1329 | return error; |
1330 | } |
1331 | |