pf_norm.c source code [xnu/bsd/net/pf_norm.c]

1	/*
2	* Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	/ $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ /
30	/ $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ /
31
32	/*
33	* Copyright 2001 Niels Provos <provos@citi.umich.edu>
34	* All rights reserved.
35	*
36	* Redistribution and use in source and binary forms, with or without
37	* modification, are permitted provided that the following conditions
38	* are met:
39	* 1. Redistributions of source code must retain the above copyright
40	* notice, this list of conditions and the following disclaimer.
41	* 2. Redistributions in binary form must reproduce the above copyright
42	* notice, this list of conditions and the following disclaimer in the
43	* documentation and/or other materials provided with the distribution.
44	*
45	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55	*/
56
57	#include <sys/param.h>
58	#include <sys/systm.h>
59	#include <sys/mbuf.h>
60	#include <sys/filio.h>
61	#include <sys/fcntl.h>
62	#include <sys/socket.h>
63	#include <sys/kernel.h>
64	#include <sys/time.h>
65	#include <sys/random.h>
66	#include <sys/mcache.h>
67
68	#include <net/if.h>
69	#include <net/if_types.h>
70	#include <net/bpf.h>
71	#include <net/route.h>
72	#include <net/if_pflog.h>
73
74	#include <netinet/in.h>
75	#include <netinet/in_var.h>
76	#include <netinet/in_systm.h>
77	#include <netinet/ip.h>
78	#include <netinet/ip_var.h>
79	#include <netinet/tcp.h>
80	#include <netinet/tcp_seq.h>
81	#include <netinet/tcp_fsm.h>
82	#include <netinet/udp.h>
83	#include <netinet/ip_icmp.h>
84
85	#include <netinet/ip6.h>
86	#include <netinet6/ip6_var.h>
87
88	#include <net/pfvar.h>
89
90	struct pf_frent {
91	LIST_ENTRY(pf_frent) fr_next;
92	struct mbuf *fr_m;
93	#define fr_ip fr_u.fru_ipv4
94	#define fr_ip6 fr_u.fru_ipv6
95	union {
96	struct ip *fru_ipv4;
97	struct ip6_hdr *fru_ipv6;
98	} fr_u;
99	struct ip6_frag fr_ip6f_opt;
100	uint16_t fr_ip6f_hlen; / total header length /
101	uint16_t fr_ip6f_extoff; / last extension header offset or 0 /
102	};
103
104	struct pf_frcache {
105	LIST_ENTRY(pf_frcache) fr_next;
106	uint16_t fr_off;
107	uint16_t fr_end;
108	};
109
110	#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
111	#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
112	#define PFFRAG_DROP 0x0004 /* Drop all fragments */
113	#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
114
115	struct pf_fragment {
116	RB_ENTRY(pf_fragment) fr_entry;
117	TAILQ_ENTRY(pf_fragment) frag_next;
118	struct pf_addr fr_srcx;
119	struct pf_addr fr_dstx;
120	u_int8_t fr_p; / protocol of this fragment /
121	u_int8_t fr_flags; / status flags /
122	u_int16_t fr_max; / fragment data max /
123	#define fr_id fr_uid.fru_id4
124	#define fr_id6 fr_uid.fru_id6
125	union {
126	u_int16_t fru_id4;
127	u_int32_t fru_id6;
128	} fr_uid;
129	int fr_af;
130	u_int32_t fr_timeout;
131	#define fr_queue fr_u.fru_queue
132	#define fr_cache fr_u.fru_cache
133	union {
134	LIST_HEAD(pf_fragq, pf_frent) fru_queue; / buffering /
135	LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; / non-buf /
136	} fr_u;
137	uint32_t fr_csum_flags; / checksum flags /
138	uint32_t fr_csum; / partial checksum value /
139	uint16_t fr_ip6_maxlen; / maximum length of a single fragment in IPv6 /
140	};
141
142	static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
143	static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
144
145	static __inline int pf_frag_compare(struct pf_fragment *,
146	struct pf_fragment *);
147	static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
148	RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
149	pf_frag_compare);
150	RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
151
152	/ Private prototypes /
153	static void pf_ip6hdr2key(struct pf_fragment , struct* ip6_hdr *,
154	struct ip6_frag *);
155	static void pf_ip2key(struct pf_fragment , struct* ip *);
156	static void pf_remove_fragment(struct pf_fragment *);
157	static void pf_flush_fragments(void);
158	static void pf_free_fragment(struct pf_fragment *);
159	static struct pf_fragment pf_find_fragment_by_key(struct* pf_fragment *,
160	struct pf_frag_tree *);
161	static __inline struct pf_fragment *
162	pf_find_fragment_by_ipv4_header(struct ip , struct* pf_frag_tree *);
163	static struct mbuf pf_reassemble(struct* mbuf , struct* pf_fragment **,
164	struct pf_frent , int*);
165	static struct mbuf pf_fragcache(struct* mbuf , struct** ip *,
166	struct pf_fragment *, int, int, int* *);
167	static int pf_normalize_tcpopt(struct pf_rule , int, struct* pfi_kif *,
168	struct pf_pdesc , pbuf_t , struct tcphdr , int, int* *);
169	static __inline struct pf_fragment *
170	pf_find_fragment_by_ipv6_header(struct ip6_hdr , struct* ip6_frag *,
171	struct pf_frag_tree *);
172	static struct mbuf pf_reassemble6(struct* mbuf , struct pf_fragment ,
173	struct pf_frent , int*);
174	static struct mbuf pf_frag6cache(struct* mbuf , struct** ip6_hdr*,
175	struct ip6_frag , struct* pf_fragment *, int, int, int, int* *);
176
177	#define DPFPRINTF(x) do { \
178	if (pf_status.debug >= PF_DEBUG_MISC) { \
179	printf("%s: ", __func__); \
180	printf x ; \
181	} \
182	} while (0)
183
184	/ Globals /
185	struct pool pf_frent_pl, pf_frag_pl;
186	static struct pool pf_cache_pl, pf_cent_pl;
187	struct pool pf_state_scrub_pl;
188
189	static int pf_nfrents, pf_ncache;
190
191	void
192	pf_normalize_init(void)
193	{
194	pool_init(&pf_frent_pl, sizeof(struct pf_frent), `0`, `0`, `0`, "pffrent",
195	NULL);
196	pool_init(&pf_frag_pl, sizeof(struct pf_fragment), `0`, `0`, `0`, "pffrag",
197	NULL);
198	pool_init(&pf_cache_pl, sizeof(struct pf_fragment), `0`, `0`, `0`,
199	"pffrcache", NULL);
200	pool_init(&pf_cent_pl, sizeof(struct pf_frcache), `0`, `0`, `0`, "pffrcent",
201	NULL);
202	pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), `0`, `0`, `0`,
203	"pfstscr", NULL);
204
205	pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
206	pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, `0`);
207	pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, `0`);
208	pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, `0`);
209
210	TAILQ_INIT(&pf_fragqueue);
211	TAILQ_INIT(&pf_cachequeue);
212	}
213
214	#if 0
215	void
216	pf_normalize_destroy(void)
217	{
218	pool_destroy(&pf_state_scrub_pl);
219	pool_destroy(&pf_cent_pl);
220	pool_destroy(&pf_cache_pl);
221	pool_destroy(&pf_frag_pl);
222	pool_destroy(&pf_frent_pl);
223	}
224	#endif
225
226	int
227	pf_normalize_isempty(void)
228	{
229	return TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue);
230	}
231
232	static __inline int
233	pf_frag_compare(struct pf_fragment a, struct* pf_fragment *b)
234	{
235	int diff;
236
237	if ((diff = a->fr_af - b->fr_af)) {
238	return diff;
239	} else if ((diff = a->fr_p - b->fr_p)) {
240	return diff;
241	} else {
242	struct pf_addr *sa = &a->fr_srcx;
243	struct pf_addr *sb = &b->fr_srcx;
244	struct pf_addr *da = &a->fr_dstx;
245	struct pf_addr *db = &b->fr_dstx;
246
247	switch (a->fr_af) {
248	#ifdef INET
249	case AF_INET:
250	if ((diff = a->fr_id - b->fr_id)) {
251	return diff;
252	} else if (sa->v4addr.s_addr < sb->v4addr.s_addr) {
253	return -`1`;
254	} else if (sa->v4addr.s_addr > sb->v4addr.s_addr) {
255	return `1`;
256	} else if (da->v4addr.s_addr < db->v4addr.s_addr) {
257	return -`1`;
258	} else if (da->v4addr.s_addr > db->v4addr.s_addr) {
259	return `1`;
260	}
261	break;
262	#endif
263	case AF_INET6:
264	if ((diff = a->fr_id6 - b->fr_id6)) {
265	return diff;
266	} else if (sa->addr32[`3`] < sb->addr32[`3`]) {
267	return -`1`;
268	} else if (sa->addr32[`3`] > sb->addr32[`3`]) {
269	return `1`;
270	} else if (sa->addr32[`2`] < sb->addr32[`2`]) {
271	return -`1`;
272	} else if (sa->addr32[`2`] > sb->addr32[`2`]) {
273	return `1`;
274	} else if (sa->addr32[`1`] < sb->addr32[`1`]) {
275	return -`1`;
276	} else if (sa->addr32[`1`] > sb->addr32[`1`]) {
277	return `1`;
278	} else if (sa->addr32[`0`] < sb->addr32[`0`]) {
279	return -`1`;
280	} else if (sa->addr32[`0`] > sb->addr32[`0`]) {
281	return `1`;
282	} else if (da->addr32[`3`] < db->addr32[`3`]) {
283	return -`1`;
284	} else if (da->addr32[`3`] > db->addr32[`3`]) {
285	return `1`;
286	} else if (da->addr32[`2`] < db->addr32[`2`]) {
287	return -`1`;
288	} else if (da->addr32[`2`] > db->addr32[`2`]) {
289	return `1`;
290	} else if (da->addr32[`1`] < db->addr32[`1`]) {
291	return -`1`;
292	} else if (da->addr32[`1`] > db->addr32[`1`]) {
293	return `1`;
294	} else if (da->addr32[`0`] < db->addr32[`0`]) {
295	return -`1`;
296	} else if (da->addr32[`0`] > db->addr32[`0`]) {
297	return `1`;
298	}
299	break;
300	default:
301	VERIFY(!`0` && "only IPv4 and IPv6 supported!");
302	break;
303	}
304	}
305	return `0`;
306	}
307
308	void
309	pf_purge_expired_fragments(void)
310	{
311	struct pf_fragment *frag;
312	u_int32_t expire = pf_time_second() -
313	pf_default_rule.timeout[PFTM_FRAG];
314
315	while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
316	VERIFY(BUFFER_FRAGMENTS(frag));
317	if (frag->fr_timeout > expire) {
318	break;
319	}
320
321	switch (frag->fr_af) {
322	case AF_INET:
323	DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
324	ntohs(frag->fr_id),
325	(uint64_t)VM_KERNEL_ADDRHASH(frag)));
326	break;
327	case AF_INET6:
328	DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
329	ntohl(frag->fr_id6),
330	(uint64_t)VM_KERNEL_ADDRHASH(frag)));
331	break;
332	default:
333	VERIFY(`0` && "only IPv4 and IPv6 supported");
334	break;
335	}
336	pf_free_fragment(frag);
337	}
338
339	while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
340	VERIFY(!BUFFER_FRAGMENTS(frag));
341	if (frag->fr_timeout > expire) {
342	break;
343	}
344
345	switch (frag->fr_af) {
346	case AF_INET:
347	DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
348	ntohs(frag->fr_id),
349	(uint64_t)VM_KERNEL_ADDRHASH(frag)));
350	break;
351	case AF_INET6:
352	DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
353	ntohl(frag->fr_id6),
354	(uint64_t)VM_KERNEL_ADDRHASH(frag)));
355	break;
356	default:
357	VERIFY(`0` && "only IPv4 and IPv6 supported");
358	break;
359	}
360	pf_free_fragment(frag);
361	VERIFY(TAILQ_EMPTY(&pf_cachequeue) \|\|
362	TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
363	}
364	}
365
366	/*
367	* Try to flush old fragments to make space for new ones
368	*/
369
370	static void
371	pf_flush_fragments(void)
372	{
373	struct pf_fragment *frag;
374	int goal;
375
376	goal = pf_nfrents * `9` / `10`;
377	DPFPRINTF(("trying to free > %d frents\n",
378	pf_nfrents - goal));
379	while (goal < pf_nfrents) {
380	frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
381	if (frag == NULL) {
382	break;
383	}
384	pf_free_fragment(frag);
385	}
386
387
388	goal = pf_ncache * `9` / `10`;
389	DPFPRINTF(("trying to free > %d cache entries\n",
390	pf_ncache - goal));
391	while (goal < pf_ncache) {
392	frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
393	if (frag == NULL) {
394	break;
395	}
396	pf_free_fragment(frag);
397	}
398	}
399
400	/ Frees the fragments and all associated entries /
401
402	static void
403	pf_free_fragment(struct pf_fragment *frag)
404	{
405	struct pf_frent *frent;
406	struct pf_frcache *frcache;
407
408	/ Free all fragments /
409	if (BUFFER_FRAGMENTS(frag)) {
410	for (frent = LIST_FIRST(&frag->fr_queue); frent;
411	frent = LIST_FIRST(&frag->fr_queue)) {
412	LIST_REMOVE(frent, fr_next);
413
414	m_freem(frent->fr_m);
415	pool_put(&pf_frent_pl, frent);
416	pf_nfrents--;
417	}
418	} else {
419	for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
420	frcache = LIST_FIRST(&frag->fr_cache)) {
421	LIST_REMOVE(frcache, fr_next);
422
423	VERIFY(LIST_EMPTY(&frag->fr_cache) \|\|
424	LIST_FIRST(&frag->fr_cache)->fr_off >
425	frcache->fr_end);
426
427	pool_put(&pf_cent_pl, frcache);
428	pf_ncache--;
429	}
430	}
431
432	pf_remove_fragment(frag);
433	}
434
435	static void
436	pf_ip6hdr2key(struct pf_fragment key, struct* ip6_hdr *ip6,
437	struct ip6_frag *fh)
438	{
439	key->fr_p = fh->ip6f_nxt;
440	key->fr_id6 = fh->ip6f_ident;
441	key->fr_af = AF_INET6;
442	key->fr_srcx.v6addr = ip6->ip6_src;
443	key->fr_dstx.v6addr = ip6->ip6_dst;
444	}
445
446	static void
447	pf_ip2key(struct pf_fragment key, struct* ip *ip)
448	{
449	key->fr_p = ip->ip_p;
450	key->fr_id = ip->ip_id;
451	key->fr_af = AF_INET;
452	key->fr_srcx.v4addr.s_addr = ip->ip_src.s_addr;
453	key->fr_dstx.v4addr.s_addr = ip->ip_dst.s_addr;
454	}
455
456	static struct pf_fragment *
457	pf_find_fragment_by_key(struct pf_fragment key, struct* pf_frag_tree *tree)
458	{
459	struct pf_fragment *frag;
460
461	frag = RB_FIND(pf_frag_tree, tree, key);
462	if (frag != NULL) {
463	/ XXX Are we sure we want to update the timeout? /
464	frag->fr_timeout = pf_time_second();
465	if (BUFFER_FRAGMENTS(frag)) {
466	TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
467	TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
468	} else {
469	TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
470	TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
471	}
472	}
473
474	return frag;
475	}
476
477	static __attribute__((noinline)) struct pf_fragment *
478	pf_find_fragment_by_ipv4_header(struct ip ip, struct* pf_frag_tree *tree)
479	{
480	struct pf_fragment key;
481	pf_ip2key(key: &key, ip);
482	return pf_find_fragment_by_key(key: &key, tree);
483	}
484
485	/ Removes a fragment from the fragment queue and frees the fragment /
486	static void
487	pf_remove_fragment(struct pf_fragment *frag)
488	{
489	if (BUFFER_FRAGMENTS(frag)) {
490	RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
491	TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
492	pool_put(&pf_frag_pl, frag);
493	} else {
494	RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
495	TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
496	pool_put(&pf_cache_pl, frag);
497	}
498	}
499
500	#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
501	static struct mbuf *
502	pf_reassemble(struct mbuf m0, struct* pf_fragment **frag,
503	struct pf_frent frent, int* mff)
504	{
505	struct mbuf m = m0, m2;
506	struct pf_frent frea, next;
507	struct pf_frent *frep = NULL;
508	struct ip *ip = frent->fr_ip;
509	uint32_t hlen = ip->ip_hl << `2`;
510	u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << `3`;
511	u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * `4`;
512	u_int16_t fr_max = ip_len + off;
513	uint32_t csum, csum_flags;
514
515	VERIFY(frag == NULL \|\| BUFFER_FRAGMENTS(frag));
516
517	/*
518	* Leverage partial checksum offload for IP fragments. Narrow down
519	* the scope to cover only UDP without IP options, as that is the
520	* most common case.
521	*
522	* Perform 1's complement adjustment of octets that got included/
523	* excluded in the hardware-calculated checksum value. Ignore cases
524	* where the value includes the entire IPv4 header span, as the sum
525	* for those octets would already be 0 by the time we get here; IP
526	* has already performed its header checksum validation. Also take
527	* care of any trailing bytes and subtract out their partial sum.
528	*/
529	if (ip->ip_p == IPPROTO_UDP && hlen == sizeof(struct ip) &&
530	(m->m_pkthdr.csum_flags &
531	(CSUM_DATA_VALID \| CSUM_PARTIAL \| CSUM_PSEUDO_HDR)) ==
532	(CSUM_DATA_VALID \| CSUM_PARTIAL)) {
533	uint32_t start = m->m_pkthdr.csum_rx_start;
534	int32_t trailer = (m_pktlen(m) - ntohs(ip->ip_len));
535	uint32_t swbytes = (uint32_t)trailer;
536
537	csum = m->m_pkthdr.csum_rx_val;
538
539	ASSERT(trailer >= `0`);
540	if ((start != `0` && start != hlen) \|\| trailer != `0`) {
541	#if BYTE_ORDER != BIG_ENDIAN
542	if (start < hlen) {
543	HTONS(ip->ip_len);
544	HTONS(ip->ip_off);
545	}
546	#endif /* BYTE_ORDER != BIG_ENDIAN */
547	/ callee folds in sum /
548	csum = m_adj_sum16(m, start, hlen,
549	(ip->ip_len - hlen), csum);
550	if (hlen > start) {
551	swbytes += (hlen - start);
552	} else {
553	swbytes += (start - hlen);
554	}
555	#if BYTE_ORDER != BIG_ENDIAN
556	if (start < hlen) {
557	NTOHS(ip->ip_off);
558	NTOHS(ip->ip_len);
559	}
560	#endif /* BYTE_ORDER != BIG_ENDIAN */
561	}
562	csum_flags = m->m_pkthdr.csum_flags;
563
564	if (swbytes != `0`) {
565	udp_in_cksum_stats(swbytes);
566	}
567	if (trailer != `0`) {
568	m_adj(m, -trailer);
569	}
570	} else {
571	csum = `0`;
572	csum_flags = `0`;
573	}
574
575	/ Invalidate checksum /
576	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
577
578	/ Strip off ip header /
579	m->m_data += hlen;
580	m->m_len -= hlen;
581
582	/ Create a new reassembly queue for this packet /
583	if (*frag == NULL) {
584	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
585	if (*frag == NULL) {
586	pf_flush_fragments();
587	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
588	if (*frag == NULL) {
589	goto drop_fragment;
590	}
591	}
592
593	(*frag)->fr_flags = `0`;
594	(*frag)->fr_max = `0`;
595	(*frag)->fr_af = AF_INET;
596	(*frag)->fr_srcx.v4addr = frent->fr_ip->ip_src;
597	(*frag)->fr_dstx.v4addr = frent->fr_ip->ip_dst;
598	(*frag)->fr_p = frent->fr_ip->ip_p;
599	(*frag)->fr_id = frent->fr_ip->ip_id;
600	(*frag)->fr_timeout = pf_time_second();
601	if (csum_flags != `0`) {
602	(*frag)->fr_csum_flags = csum_flags;
603	(*frag)->fr_csum = csum;
604	}
605	LIST_INIT(&(*frag)->fr_queue);
606
607	RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
608	TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
609
610	/ We do not have a previous fragment /
611	frep = NULL;
612	goto insert;
613	}
614
615	/*
616	* If this fragment contains similar checksum offload info
617	* as that of the existing ones, accumulate checksum. Otherwise,
618	* invalidate checksum offload info for the entire datagram.
619	*/
620	if (csum_flags != `0` && csum_flags == (*frag)->fr_csum_flags) {
621	(*frag)->fr_csum += csum;
622	} else if ((*frag)->fr_csum_flags != `0`) {
623	(*frag)->fr_csum_flags = `0`;
624	}
625
626	/*
627	* Find a fragment after the current one:
628	* - off contains the real shifted offset.
629	*/
630	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
631	if (FR_IP_OFF(frea) > off) {
632	break;
633	}
634	frep = frea;
635	}
636
637	VERIFY(frep != NULL \|\| frea != NULL);
638
639	if (frep != NULL &&
640	FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
641	`4` > off) {
642	u_int16_t precut;
643
644	precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
645	frep->fr_ip->ip_hl * `4` - off;
646	if (precut >= ip_len) {
647	goto drop_fragment;
648	}
649	m_adj(frent->fr_m, precut);
650	DPFPRINTF(("overlap -%d\n", precut));
651	/ Enforce 8 byte boundaries /
652	ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> `3`));
653	off = (ntohs(ip->ip_off) & IP_OFFMASK) << `3`;
654	ip_len -= precut;
655	ip->ip_len = htons(ip_len);
656	}
657
658	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
659	frea = next) {
660	u_int16_t aftercut;
661
662	aftercut = ip_len + off - FR_IP_OFF(frea);
663	DPFPRINTF(("adjust overlap %d\n", aftercut));
664	if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
665	* `4`) {
666	frea->fr_ip->ip_len =
667	htons(ntohs(frea->fr_ip->ip_len) - aftercut);
668	frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
669	(aftercut >> `3`));
670	m_adj(frea->fr_m, aftercut);
671	break;
672	}
673
674	/ This fragment is completely overlapped, lose it /
675	next = LIST_NEXT(frea, fr_next);
676	m_freem(frea->fr_m);
677	LIST_REMOVE(frea, fr_next);
678	pool_put(&pf_frent_pl, frea);
679	pf_nfrents--;
680	}
681
682	insert:
683	/ Update maximum data size /
684	if ((*frag)->fr_max < fr_max) {
685	(*frag)->fr_max = fr_max;
686	}
687	/ This is the last segment /
688	if (!mff) {
689	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
690	}
691
692	if (frep == NULL) {
693	LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
694	} else {
695	LIST_INSERT_AFTER(frep, frent, fr_next);
696	}
697
698	/ Check if we are completely reassembled /
699	if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) {
700	return NULL;
701	}
702
703	/ Check if we have all the data /
704	off = `0`;
705	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
706	next = LIST_NEXT(frep, fr_next);
707
708	off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * `4`;
709	if (off < (*frag)->fr_max &&
710	(next == NULL \|\| FR_IP_OFF(next) != off)) {
711	DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
712	off, next == NULL ? -`1` : FR_IP_OFF(next),
713	(*frag)->fr_max));
714	return NULL;
715	}
716	}
717	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
718	if (off < (*frag)->fr_max) {
719	return NULL;
720	}
721
722	/ We have all the data /
723	frent = LIST_FIRST(&(*frag)->fr_queue);
724	VERIFY(frent != NULL);
725	if ((frent->fr_ip->ip_hl << `2`) + off > IP_MAXPACKET) {
726	DPFPRINTF(("drop: too big: %d\n", off));
727	pf_free_fragment(frag: *frag);
728	*frag = NULL;
729	return NULL;
730	}
731	next = LIST_NEXT(frent, fr_next);
732
733	/ Magic from ip_input /
734	ip = frent->fr_ip;
735	m = frent->fr_m;
736	m2 = m->m_next;
737	m->m_next = NULL;
738	m_cat(m, m2);
739	pool_put(&pf_frent_pl, frent);
740	pf_nfrents--;
741	for (frent = next; frent != NULL; frent = next) {
742	next = LIST_NEXT(frent, fr_next);
743
744	m2 = frent->fr_m;
745	pool_put(&pf_frent_pl, frent);
746	pf_nfrents--;
747	m_cat(m, m2);
748	}
749
750	ip->ip_src = (*frag)->fr_srcx.v4addr;
751	ip->ip_dst = (*frag)->fr_dstx.v4addr;
752
753	if ((*frag)->fr_csum_flags != `0`) {
754	csum = (*frag)->fr_csum;
755
756	ADDCARRY(csum);
757
758	m->m_pkthdr.csum_rx_val = csum;
759	m->m_pkthdr.csum_rx_start = sizeof(struct ip);
760	m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
761	} else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) \|\|
762	(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
763	/ loopback checksums are always OK /
764	m->m_pkthdr.csum_data = `0xffff`;
765	m->m_pkthdr.csum_flags =
766	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR \|
767	CSUM_IP_CHECKED \| CSUM_IP_VALID;
768	}
769
770	/ Remove from fragment queue /
771	pf_remove_fragment(frag: *frag);
772	*frag = NULL;
773
774	hlen = ip->ip_hl << `2`;
775	ip->ip_len = htons(off + hlen);
776	m->m_len += hlen;
777	m->m_data -= hlen;
778
779	/ some debugging cruft by sklower, below, will go away soon /
780	/ XXX this should be done elsewhere /
781	if (m->m_flags & M_PKTHDR) {
782	int plen = `0`;
783	for (m2 = m; m2; m2 = m2->m_next) {
784	plen += m2->m_len;
785	}
786	m->m_pkthdr.len = plen;
787	}
788
789	DPFPRINTF(("complete: 0x%llx(%d)\n",
790	(uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len)));
791	return m;
792
793	drop_fragment:
794	/ Oops - fail safe - drop packet /
795	pool_put(&pf_frent_pl, frent);
796	pf_nfrents--;
797	m_freem(m);
798	return NULL;
799	}
800
801	static __attribute__((noinline)) struct mbuf *
802	pf_fragcache(struct mbuf m0, struct** ip h, struct* pf_fragment *frag, int* mff,
803	int drop, int *nomem)
804	{
805	struct mbuf m = m0;
806	struct pf_frcache frp, fra, *cur = NULL;
807	int ip_len = ntohs(h->ip_len) - (h->ip_hl << `2`);
808	u_int16_t off = ntohs(h->ip_off) << `3`;
809	u_int16_t fr_max = ip_len + off;
810	int hosed = `0`;
811
812	VERIFY(frag == NULL \|\| !BUFFER_FRAGMENTS(frag));
813
814	/ Create a new range queue for this packet /
815	if (*frag == NULL) {
816	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
817	if (*frag == NULL) {
818	pf_flush_fragments();
819	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
820	if (*frag == NULL) {
821	goto no_mem;
822	}
823	}
824
825	/ Get an entry for the queue /
826	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
827	if (cur == NULL) {
828	pool_put(&pf_cache_pl, *frag);
829	*frag = NULL;
830	goto no_mem;
831	}
832	pf_ncache++;
833
834	(*frag)->fr_flags = PFFRAG_NOBUFFER;
835	(*frag)->fr_max = `0`;
836	(*frag)->fr_af = AF_INET;
837	(*frag)->fr_srcx.v4addr = h->ip_src;
838	(*frag)->fr_dstx.v4addr = h->ip_dst;
839	(*frag)->fr_p = h->ip_p;
840	(*frag)->fr_id = h->ip_id;
841	(*frag)->fr_timeout = pf_time_second();
842
843	cur->fr_off = off;
844	cur->fr_end = fr_max;
845	LIST_INIT(&(*frag)->fr_cache);
846	LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
847
848	RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
849	TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
850
851	DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
852	fr_max));
853
854	goto pass;
855	}
856
857	/*
858	* Find a fragment after the current one:
859	* - off contains the real shifted offset.
860	*/
861	frp = NULL;
862	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
863	if (fra->fr_off > off) {
864	break;
865	}
866	frp = fra;
867	}
868
869	VERIFY(frp != NULL \|\| fra != NULL);
870
871	if (frp != NULL) {
872	int precut;
873
874	precut = frp->fr_end - off;
875	if (precut >= ip_len) {
876	/ Fragment is entirely a duplicate /
877	DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
878	h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
879	goto drop_fragment;
880	}
881	if (precut == `0`) {
882	/ They are adjacent. Fixup cache entry /
883	DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
884	h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
885	frp->fr_end = fr_max;
886	} else if (precut > `0`) {
887	/*
888	* The first part of this payload overlaps with a
889	* fragment that has already been passed.
890	* Need to trim off the first part of the payload.
891	* But to do so easily, we need to create another
892	* mbuf to throw the original header into.
893	*/
894
895	DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
896	h->ip_id, precut, frp->fr_off, frp->fr_end, off,
897	fr_max));
898
899	off += precut;
900	fr_max -= precut;
901	/ Update the previous frag to encompass this one /
902	frp->fr_end = fr_max;
903
904	if (!drop) {
905	/*
906	* XXX Optimization opportunity
907	* This is a very heavy way to trim the payload.
908	* we could do it much faster by diddling mbuf
909	* internals but that would be even less legible
910	* than this mbuf magic. For my next trick,
911	* I'll pull a rabbit out of my laptop.
912	*/
913	*m0 = m_copym(m, `0`, h->ip_hl << `2`, M_NOWAIT);
914	if (*m0 == NULL) {
915	goto no_mem;
916	}
917	VERIFY((*m0)->m_next == NULL);
918	m_adj(m, precut + (h->ip_hl << `2`));
919	m_cat(*m0, m);
920	m = *m0;
921	if (m->m_flags & M_PKTHDR) {
922	int plen = `0`;
923	struct mbuf *t;
924	for (t = m; t; t = t->m_next) {
925	plen += t->m_len;
926	}
927	m->m_pkthdr.len = plen;
928	}
929
930
931	h = mtod(m, struct ip *);
932
933
934	VERIFY((int)m->m_len ==
935	ntohs(h->ip_len) - precut);
936	h->ip_off = htons(ntohs(h->ip_off) +
937	(precut >> `3`));
938	h->ip_len = htons(ntohs(h->ip_len) - precut);
939	} else {
940	hosed++;
941	}
942	} else {
943	/ There is a gap between fragments /
944
945	DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
946	h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
947	fr_max));
948
949	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
950	if (cur == NULL) {
951	goto no_mem;
952	}
953	pf_ncache++;
954
955	cur->fr_off = off;
956	cur->fr_end = fr_max;
957	LIST_INSERT_AFTER(frp, cur, fr_next);
958	}
959	}
960
961	if (fra != NULL) {
962	int aftercut;
963	int merge = `0`;
964
965	aftercut = fr_max - fra->fr_off;
966	if (aftercut == `0`) {
967	/ Adjacent fragments /
968	DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
969	h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
970	fra->fr_off = off;
971	merge = `1`;
972	} else if (aftercut > `0`) {
973	/ Need to chop off the tail of this fragment /
974	DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
975	h->ip_id, aftercut, off, fr_max, fra->fr_off,
976	fra->fr_end));
977	fra->fr_off = off;
978	fr_max -= aftercut;
979
980	merge = `1`;
981
982	if (!drop) {
983	m_adj(m, -aftercut);
984	if (m->m_flags & M_PKTHDR) {
985	int plen = `0`;
986	struct mbuf *t;
987	for (t = m; t; t = t->m_next) {
988	plen += t->m_len;
989	}
990	m->m_pkthdr.len = plen;
991	}
992	h = mtod(m, struct ip *);
993	VERIFY((int)m->m_len ==
994	ntohs(h->ip_len) - aftercut);
995	h->ip_len = htons(ntohs(h->ip_len) - aftercut);
996	} else {
997	hosed++;
998	}
999	} else if (frp == NULL) {
1000	/ There is a gap between fragments /
1001	DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
1002	h->ip_id, -aftercut, off, fr_max, fra->fr_off,
1003	fra->fr_end));
1004
1005	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1006	if (cur == NULL) {
1007	goto no_mem;
1008	}
1009	pf_ncache++;
1010
1011	cur->fr_off = off;
1012	cur->fr_end = fr_max;
1013	LIST_INSERT_BEFORE(fra, cur, fr_next);
1014	}
1015
1016
1017	/ Need to glue together two separate fragment descriptors /
1018	if (merge) {
1019	if (cur && fra->fr_off <= cur->fr_end) {
1020	/ Need to merge in a previous 'cur' /
1021	DPFPRINTF(("fragcache[%d]: adjacent(merge "
1022	"%d-%d) %d-%d (%d-%d)\n",
1023	h->ip_id, cur->fr_off, cur->fr_end, off,
1024	fr_max, fra->fr_off, fra->fr_end));
1025	fra->fr_off = cur->fr_off;
1026	LIST_REMOVE(cur, fr_next);
1027	pool_put(&pf_cent_pl, cur);
1028	pf_ncache--;
1029	cur = NULL;
1030	} else if (frp && fra->fr_off <= frp->fr_end) {
1031	/ Need to merge in a modified 'frp' /
1032	VERIFY(cur == NULL);
1033	DPFPRINTF(("fragcache[%d]: adjacent(merge "
1034	"%d-%d) %d-%d (%d-%d)\n",
1035	h->ip_id, frp->fr_off, frp->fr_end, off,
1036	fr_max, fra->fr_off, fra->fr_end));
1037	fra->fr_off = frp->fr_off;
1038	LIST_REMOVE(frp, fr_next);
1039	pool_put(&pf_cent_pl, frp);
1040	pf_ncache--;
1041	frp = NULL;
1042	}
1043	}
1044	}
1045
1046	if (hosed) {
1047	/*
1048	* We must keep tracking the overall fragment even when
1049	* we're going to drop it anyway so that we know when to
1050	* free the overall descriptor. Thus we drop the frag late.
1051	*/
1052	goto drop_fragment;
1053	}
1054
1055
1056	pass:
1057	/ Update maximum data size /
1058	if ((*frag)->fr_max < fr_max) {
1059	(*frag)->fr_max = fr_max;
1060	}
1061
1062	/ This is the last segment /
1063	if (!mff) {
1064	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1065	}
1066
1067	/ Check if we are completely reassembled /
1068	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1069	LIST_FIRST(&(*frag)->fr_cache)->fr_off == `0` &&
1070	LIST_FIRST(&(frag)->fr_cache)->fr_end == (frag)->fr_max) {
1071	/ Remove from fragment queue /
1072	DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
1073	(*frag)->fr_max));
1074	pf_free_fragment(frag: *frag);
1075	*frag = NULL;
1076	}
1077
1078	return m;
1079
1080	no_mem:
1081	*nomem = `1`;
1082
1083	/ Still need to pay attention to !IP_MF /
1084	if (!mff && *frag != NULL) {
1085	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1086	}
1087
1088	m_freem(m);
1089	return NULL;
1090
1091	drop_fragment:
1092
1093	/ Still need to pay attention to !IP_MF /
1094	if (!mff && *frag != NULL) {
1095	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1096	}
1097
1098	if (drop) {
1099	/ This fragment has been deemed bad. Don't reass /
1100	if (((*frag)->fr_flags & PFFRAG_DROP) == `0`) {
1101	DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1102	h->ip_id));
1103	}
1104	(*frag)->fr_flags \|= PFFRAG_DROP;
1105	}
1106
1107	m_freem(m);
1108	return NULL;
1109	}
1110
1111	#define FR_IP6_OFF(fr) \
1112	(ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1113	#define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1114	struct mbuf *
1115	pf_reassemble6(struct mbuf m0, struct pf_fragment frag,
1116	struct pf_frent frent, int* mff)
1117	{
1118	struct mbuf m, m2;
1119	struct pf_frent frea, frep, *next;
1120	struct ip6_hdr *ip6;
1121	struct ip6_frag *ip6f;
1122	int plen, off, fr_max, pktlen;
1123	uint32_t uoff, csum, csum_flags;
1124
1125	VERIFY(frag == NULL \|\| BUFFER_FRAGMENTS(frag));
1126	m = *m0;
1127	frep = NULL;
1128	ip6 = frent->fr_ip6;
1129	ip6f = &frent->fr_ip6f_opt;
1130	off = FR_IP6_OFF(frent);
1131	uoff = frent->fr_ip6f_hlen;
1132	plen = FR_IP6_PLEN(frent);
1133	fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof(*ip6));
1134	pktlen = plen + sizeof(*ip6);
1135
1136	DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1137	"fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off,
1138	frent->fr_ip6f_hlen, fr_max, m->m_len));
1139
1140	/*
1141	* Leverage partial checksum offload for simple UDP/IP fragments,
1142	* as that is the most common case.
1143	*
1144	* Perform 1's complement adjustment of octets that got included/
1145	* excluded in the hardware-calculated checksum value. Also take
1146	* care of any trailing bytes and subtract out their partial sum.
1147	*/
1148	if (ip6f->ip6f_nxt == IPPROTO_UDP &&
1149	uoff == (sizeof(ip6) + sizeof(ip6f)) &&
1150	(m->m_pkthdr.csum_flags &
1151	(CSUM_DATA_VALID \| CSUM_PARTIAL \| CSUM_PSEUDO_HDR)) ==
1152	(CSUM_DATA_VALID \| CSUM_PARTIAL)) {
1153	uint32_t start = m->m_pkthdr.csum_rx_start;
1154	uint32_t ip_len = (sizeof(*ip6) + ntohs(ip6->ip6_plen));
1155	int32_t trailer = (m_pktlen(m) - ip_len);
1156	uint32_t swbytes = (uint32_t)trailer;
1157
1158	csum = m->m_pkthdr.csum_rx_val;
1159
1160	ASSERT(trailer >= `0`);
1161	if (start != uoff \|\| trailer != `0`) {
1162	uint16_t s = `0`, d = `0`;
1163
1164	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
1165	s = ip6->ip6_src.s6_addr16[`1`];
1166	ip6->ip6_src.s6_addr16[`1`] = `0`;
1167	}
1168	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
1169	d = ip6->ip6_dst.s6_addr16[`1`];
1170	ip6->ip6_dst.s6_addr16[`1`] = `0`;
1171	}
1172
1173	/ callee folds in sum /
1174	csum = m_adj_sum16(m, start, uoff,
1175	(ip_len - uoff), csum);
1176	if (uoff > start) {
1177	swbytes += (uoff - start);
1178	} else {
1179	swbytes += (start - uoff);
1180	}
1181
1182	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
1183	ip6->ip6_src.s6_addr16[`1`] = s;
1184	}
1185	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
1186	ip6->ip6_dst.s6_addr16[`1`] = d;
1187	}
1188	}
1189	csum_flags = m->m_pkthdr.csum_flags;
1190
1191	if (swbytes != `0`) {
1192	udp_in6_cksum_stats(swbytes);
1193	}
1194	if (trailer != `0`) {
1195	m_adj(m, -trailer);
1196	}
1197	} else {
1198	csum = `0`;
1199	csum_flags = `0`;
1200	}
1201
1202	/ Invalidate checksum /
1203	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
1204
1205	/ strip off headers up to the fragment payload /
1206	m->m_data += frent->fr_ip6f_hlen;
1207	m->m_len -= frent->fr_ip6f_hlen;
1208
1209	/ Create a new reassembly queue for this packet /
1210	if (*frag == NULL) {
1211	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1212	if (*frag == NULL) {
1213	pf_flush_fragments();
1214	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1215	if (*frag == NULL) {
1216	goto drop_fragment;
1217	}
1218	}
1219
1220	(*frag)->fr_flags = `0`;
1221	(*frag)->fr_max = `0`;
1222	(*frag)->fr_ip6_maxlen = pktlen;
1223	(*frag)->fr_af = AF_INET6;
1224	(*frag)->fr_srcx.v6addr = frent->fr_ip6->ip6_src;
1225	(*frag)->fr_dstx.v6addr = frent->fr_ip6->ip6_dst;
1226	(*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
1227	(*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
1228	(*frag)->fr_timeout = pf_time_second();
1229	if (csum_flags != `0`) {
1230	(*frag)->fr_csum_flags = csum_flags;
1231	(*frag)->fr_csum = csum;
1232	}
1233	LIST_INIT(&(*frag)->fr_queue);
1234
1235	RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
1236	TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
1237
1238	/ We do not have a previous fragment /
1239	frep = NULL;
1240	goto insert;
1241	}
1242
1243	/ Remember maximum fragment len for refragmentation /
1244	if (pktlen > (*frag)->fr_ip6_maxlen) {
1245	(*frag)->fr_ip6_maxlen = pktlen;
1246	}
1247	/*
1248	* If this fragment contains similar checksum offload info
1249	* as that of the existing ones, accumulate checksum. Otherwise,
1250	* invalidate checksum offload info for the entire datagram.
1251	*/
1252	if (csum_flags != `0` && csum_flags == (*frag)->fr_csum_flags) {
1253	(*frag)->fr_csum += csum;
1254	} else if ((*frag)->fr_csum_flags != `0`) {
1255	(*frag)->fr_csum_flags = `0`;
1256	}
1257
1258	/*
1259	* Find a fragment after the current one:
1260	* - off contains the real shifted offset.
1261	*/
1262	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
1263	if (FR_IP6_OFF(frea) > off) {
1264	break;
1265	}
1266	frep = frea;
1267	}
1268
1269	VERIFY(frep != NULL \|\| frea != NULL);
1270
1271	if (frep != NULL &&
1272	FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off) {
1273	u_int16_t precut;
1274
1275	precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
1276	frep->fr_ip6f_hlen - off;
1277	if (precut >= plen) {
1278	goto drop_fragment;
1279	}
1280	m_adj(frent->fr_m, precut);
1281	DPFPRINTF(("overlap -%d\n", precut));
1282	/ Enforce 8 byte boundaries /
1283	frent->fr_ip6f_opt.ip6f_offlg =
1284	htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
1285	(precut >> `3`));
1286	off = FR_IP6_OFF(frent);
1287	plen -= precut;
1288	ip6->ip6_plen = htons(plen);
1289	}
1290
1291	for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
1292	u_int16_t aftercut;
1293
1294	aftercut = plen + off - FR_IP6_OFF(frea);
1295	DPFPRINTF(("adjust overlap %d\n", aftercut));
1296	if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
1297	frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
1298	aftercut);
1299	frea->fr_ip6f_opt.ip6f_offlg =
1300	htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
1301	(aftercut >> `3`));
1302	m_adj(frea->fr_m, aftercut);
1303	break;
1304	}
1305
1306	/ This fragment is completely overlapped, lose it /
1307	next = LIST_NEXT(frea, fr_next);
1308	m_freem(frea->fr_m);
1309	LIST_REMOVE(frea, fr_next);
1310	pool_put(&pf_frent_pl, frea);
1311	pf_nfrents--;
1312	}
1313
1314	insert:
1315	/ Update maximum data size /
1316	if ((*frag)->fr_max < fr_max) {
1317	(*frag)->fr_max = fr_max;
1318	}
1319	/ This is the last segment /
1320	if (!mff) {
1321	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1322	}
1323
1324	if (frep == NULL) {
1325	LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
1326	} else {
1327	LIST_INSERT_AFTER(frep, frent, fr_next);
1328	}
1329
1330	/ Check if we are completely reassembled /
1331	if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) {
1332	return NULL;
1333	}
1334
1335	/ Check if we have all the data /
1336	off = `0`;
1337	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
1338	next = LIST_NEXT(frep, fr_next);
1339	off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
1340	DPFPRINTF(("frep at %d, next %d, max %d\n",
1341	off, next == NULL ? -`1` : FR_IP6_OFF(next),
1342	(*frag)->fr_max));
1343	if (off < (*frag)->fr_max &&
1344	(next == NULL \|\| FR_IP6_OFF(next) != off)) {
1345	DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1346	off, next == NULL ? -`1` : FR_IP6_OFF(next),
1347	(*frag)->fr_max));
1348	return NULL;
1349	}
1350	}
1351	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
1352	if (off < (*frag)->fr_max) {
1353	return NULL;
1354	}
1355
1356	/ We have all the data /
1357	frent = LIST_FIRST(&(*frag)->fr_queue);
1358	VERIFY(frent != NULL);
1359	if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
1360	DPFPRINTF(("drop: too big: %d\n", off));
1361	pf_free_fragment(frag: *frag);
1362	*frag = NULL;
1363	return NULL;
1364	}
1365
1366	ASSERT(*frag != NULL);
1367	ASSERT(frent != NULL);
1368	next = LIST_NEXT(frent, fr_next);
1369	if (next == NULL) {
1370	DPFPRINTF(("drop: atomic fragment\n"));
1371	pf_free_fragment(frag: *frag);
1372	*frag = NULL;
1373	return NULL;
1374	}
1375
1376	/ retrieve the values to be filled in to reassembled tag /
1377	uint16_t hdrlen, unfragpartlen, extoff, maxlen;
1378	uint32_t id;
1379
1380	/ Get total extension header length from the first fragment /
1381	hdrlen = frent->fr_ip6f_hlen - sizeof(struct ip6_frag);
1382	/*
1383	* Get total extension header length of per-fragment headers from the
1384	* subsequent fragment.
1385	*/
1386	unfragpartlen = next->fr_ip6f_hlen - sizeof(struct ip6_frag);
1387	extoff = frent->fr_ip6f_extoff;
1388	maxlen = (*frag)->fr_ip6_maxlen;
1389	id = (*frag)->fr_id6;
1390
1391	ip6 = frent->fr_ip6;
1392	ip6->ip6_nxt = (*frag)->fr_p;
1393	ip6->ip6_plen = htons(off);
1394	ip6->ip6_src = (*frag)->fr_srcx.v6addr;
1395	ip6->ip6_dst = (*frag)->fr_dstx.v6addr;
1396
1397	if ((*frag)->fr_csum_flags != `0`) {
1398	csum = (*frag)->fr_csum;
1399
1400	ADDCARRY(csum);
1401
1402	m->m_pkthdr.csum_rx_val = csum;
1403	m->m_pkthdr.csum_rx_start = sizeof(struct ip6_hdr);
1404	m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
1405	} else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) \|\|
1406	(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1407	/ loopback checksums are always OK /
1408	m->m_pkthdr.csum_data = `0xffff`;
1409	m->m_pkthdr.csum_flags = CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
1410	}
1411
1412	/ Remove from fragment queue /
1413	pf_remove_fragment(frag: *frag);
1414	*frag = NULL;
1415
1416	m = frent->fr_m;
1417	m->m_len += sizeof(struct ip6_hdr);
1418	m->m_data -= sizeof(struct ip6_hdr);
1419	memmove(dst: m_mtod_current(m), src: ip6, n: sizeof(struct ip6_hdr));
1420
1421	next = LIST_NEXT(frent, fr_next);
1422	pool_put(&pf_frent_pl, frent);
1423	pf_nfrents--;
1424	for (frent = next; next != NULL; frent = next) {
1425	m2 = frent->fr_m;
1426
1427	m_cat(m, m2);
1428	next = LIST_NEXT(frent, fr_next);
1429	pool_put(&pf_frent_pl, frent);
1430	pf_nfrents--;
1431	}
1432
1433	/ XXX this should be done elsewhere /
1434	if (m->m_flags & M_PKTHDR) {
1435	int len = `0`;
1436	for (m2 = m; m2; m2 = m2->m_next) {
1437	len += m2->m_len;
1438	}
1439	m->m_pkthdr.len = len;
1440	}
1441
1442	DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1443	(uint64_t)VM_KERNEL_ADDRHASH(m), ntohs(ip6->ip6_plen),
1444	m->m_pkthdr.len));
1445
1446	/ Add the reassembled tag /
1447	struct m_tag *mtag;
1448	struct pf_fragment_tag *ftag;
1449	mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
1450	sizeof(*ftag), M_NOWAIT, m);
1451	if (mtag == NULL) {
1452	/ XXX: add stats /
1453	m_freem(m);
1454	return NULL;
1455	}
1456	ftag = (struct pf_fragment_tag *)mtag->m_tag_data;
1457	ftag->ft_hdrlen = hdrlen;
1458	ftag->ft_unfragpartlen = unfragpartlen;
1459	ftag->ft_extoff = extoff;
1460	ftag->ft_maxlen = maxlen;
1461	ftag->ft_id = id;
1462	m_tag_prepend(m, mtag);
1463
1464	struct pf_mtag *pftag = pf_get_mtag(m);
1465	ASSERT(pftag != NULL);
1466	pftag->pftag_flags \|= PF_TAG_REASSEMBLED;
1467	return m;
1468
1469	drop_fragment:
1470	/ Oops - fail safe - drop packet /
1471	pool_put(&pf_frent_pl, frent);
1472	--pf_nfrents;
1473	m_freem(m);
1474	return NULL;
1475	}
1476
1477	static __attribute__((noinline)) struct mbuf *
1478	pf_frag6cache(struct mbuf m0, struct** ip6_hdr h, struct* ip6_frag *fh,
1479	struct pf_fragment *frag, int* hlen, int mff, int drop, int *nomem)
1480	{
1481	struct mbuf m = m0;
1482	u_int16_t plen, off, fr_max;
1483	struct pf_frcache frp, fra, *cur = NULL;
1484	int hosed = `0`;
1485
1486	VERIFY(frag == NULL \|\| !BUFFER_FRAGMENTS(frag));
1487	m = *m0;
1488	off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
1489	plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
1490
1491	/*
1492	* Apple Modification: dimambro@apple.com. The hlen, being passed
1493	* into this function Includes all the headers associated with
1494	* the packet, and may include routing headers, so to get to
1495	* the data payload as stored in the original IPv6 header we need
1496	* to subtract al those headers and the IP header.
1497	*
1498	* The 'max' local variable should also contain the offset from the start
1499	* of the reassembled packet to the octet just past the end of the octets
1500	* in the current fragment where:
1501	* - 'off' is the offset from the start of the reassembled packet to the
1502	* first octet in the fragment,
1503	* - 'plen' is the length of the "payload data length" Excluding all the
1504	* IPv6 headers of the fragment.
1505	* - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1506	* of the IPv6 packet to the beginning of the data.
1507	*/
1508	fr_max = off + plen;
1509
1510	DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1511	(uint64_t)VM_KERNEL_ADDRHASH(m), plen, off, fr_max));
1512
1513	/ Create a new range queue for this packet /
1514	if (*frag == NULL) {
1515	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1516	if (*frag == NULL) {
1517	pf_flush_fragments();
1518	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1519	if (*frag == NULL) {
1520	goto no_mem;
1521	}
1522	}
1523
1524	/ Get an entry for the queue /
1525	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1526	if (cur == NULL) {
1527	pool_put(&pf_cache_pl, *frag);
1528	*frag = NULL;
1529	goto no_mem;
1530	}
1531	pf_ncache++;
1532
1533	(*frag)->fr_flags = PFFRAG_NOBUFFER;
1534	(*frag)->fr_max = `0`;
1535	(*frag)->fr_af = AF_INET6;
1536	(*frag)->fr_srcx.v6addr = h->ip6_src;
1537	(*frag)->fr_dstx.v6addr = h->ip6_dst;
1538	(*frag)->fr_p = fh->ip6f_nxt;
1539	(*frag)->fr_id6 = fh->ip6f_ident;
1540	(*frag)->fr_timeout = pf_time_second();
1541
1542	cur->fr_off = off;
1543	cur->fr_end = fr_max;
1544	LIST_INIT(&(*frag)->fr_cache);
1545	LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
1546
1547	RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
1548	TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
1549
1550	DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
1551	off, fr_max));
1552
1553	goto pass;
1554	}
1555
1556	/*
1557	* Find a fragment after the current one:
1558	* - off contains the real shifted offset.
1559	*/
1560	frp = NULL;
1561	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
1562	if (fra->fr_off > off) {
1563	break;
1564	}
1565	frp = fra;
1566	}
1567
1568	VERIFY(frp != NULL \|\| fra != NULL);
1569
1570	if (frp != NULL) {
1571	int precut;
1572
1573	precut = frp->fr_end - off;
1574	if (precut >= plen) {
1575	/ Fragment is entirely a duplicate /
1576	DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1577	ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1578	off, fr_max));
1579	goto drop_fragment;
1580	}
1581	if (precut == `0`) {
1582	/ They are adjacent. Fixup cache entry /
1583	DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1584	ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1585	off, fr_max));
1586	frp->fr_end = fr_max;
1587	} else if (precut > `0`) {
1588	/ The first part of this payload overlaps with a*
1589	* fragment that has already been passed.
1590	* Need to trim off the first part of the payload.
1591	* But to do so easily, we need to create another
1592	* mbuf to throw the original header into.
1593	*/
1594
1595	DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1596	ntohl(fh->ip6f_ident), precut, frp->fr_off,
1597	frp->fr_end, off, fr_max));
1598
1599	off += precut;
1600	fr_max -= precut;
1601	/ Update the previous frag to encompass this one /
1602	frp->fr_end = fr_max;
1603
1604	if (!drop) {
1605	/ XXX Optimization opportunity*
1606	* This is a very heavy way to trim the payload.
1607	* we could do it much faster by diddling mbuf
1608	* internals but that would be even less legible
1609	* than this mbuf magic. For my next trick,
1610	* I'll pull a rabbit out of my laptop.
1611	*/
1612	*m0 = m_copym(m, `0`, hlen, M_NOWAIT);
1613	if (*m0 == NULL) {
1614	goto no_mem;
1615	}
1616	VERIFY((*m0)->m_next == NULL);
1617	m_adj(m, precut + hlen);
1618	m_cat(*m0, m);
1619	m = *m0;
1620	if (m->m_flags & M_PKTHDR) {
1621	int pktlen = `0`;
1622	struct mbuf *t;
1623	for (t = m; t; t = t->m_next) {
1624	pktlen += t->m_len;
1625	}
1626	m->m_pkthdr.len = pktlen;
1627	}
1628
1629	h = mtod(m, struct ip6_hdr *);
1630
1631	VERIFY((int)m->m_len ==
1632	ntohs(h->ip6_plen) - precut);
1633	fh->ip6f_offlg &= ~IP6F_OFF_MASK;
1634	fh->ip6f_offlg \|=
1635	htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
1636	+ (precut >> `3`));
1637	h->ip6_plen = htons(ntohs(h->ip6_plen) -
1638	precut);
1639	} else {
1640	hosed++;
1641	}
1642	} else {
1643	/ There is a gap between fragments /
1644
1645	DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1646	ntohl(fh->ip6f_ident), -precut, frp->fr_off,
1647	frp->fr_end, off, fr_max));
1648
1649	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1650	if (cur == NULL) {
1651	goto no_mem;
1652	}
1653	pf_ncache++;
1654
1655	cur->fr_off = off;
1656	cur->fr_end = fr_max;
1657	LIST_INSERT_AFTER(frp, cur, fr_next);
1658	}
1659	}
1660
1661	if (fra != NULL) {
1662	int aftercut;
1663	int merge = `0`;
1664
1665	aftercut = fr_max - fra->fr_off;
1666	if (aftercut == `0`) {
1667	/ Adjacent fragments /
1668	DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1669	ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
1670	fra->fr_end));
1671	fra->fr_off = off;
1672	merge = `1`;
1673	} else if (aftercut > `0`) {
1674	/ Need to chop off the tail of this fragment /
1675	DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1676	ntohl(fh->ip6f_ident), aftercut, off, fr_max,
1677	fra->fr_off, fra->fr_end));
1678	fra->fr_off = off;
1679	fr_max -= aftercut;
1680
1681	merge = `1`;
1682
1683	if (!drop) {
1684	m_adj(m, -aftercut);
1685	if (m->m_flags & M_PKTHDR) {
1686	int pktlen = `0`;
1687	struct mbuf *t;
1688	for (t = m; t; t = t->m_next) {
1689	pktlen += t->m_len;
1690	}
1691	m->m_pkthdr.len = pktlen;
1692	}
1693	h = mtod(m, struct ip6_hdr *);
1694	VERIFY((int)m->m_len ==
1695	ntohs(h->ip6_plen) - aftercut);
1696	h->ip6_plen =
1697	htons(ntohs(h->ip6_plen) - aftercut);
1698	} else {
1699	hosed++;
1700	}
1701	} else if (frp == NULL) {
1702	/ There is a gap between fragments /
1703	DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1704	ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
1705	fra->fr_off, fra->fr_end));
1706
1707	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1708	if (cur == NULL) {
1709	goto no_mem;
1710	}
1711	pf_ncache++;
1712
1713	cur->fr_off = off;
1714	cur->fr_end = fr_max;
1715	LIST_INSERT_BEFORE(fra, cur, fr_next);
1716	}
1717
1718	/ Need to glue together two separate fragment descriptors /
1719	if (merge) {
1720	if (cur && fra->fr_off <= cur->fr_end) {
1721	/ Need to merge in a previous 'cur' /
1722	DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1723	"%d-%d) %d-%d (%d-%d)\n",
1724	ntohl(fh->ip6f_ident), cur->fr_off,
1725	cur->fr_end, off, fr_max, fra->fr_off,
1726	fra->fr_end));
1727	fra->fr_off = cur->fr_off;
1728	LIST_REMOVE(cur, fr_next);
1729	pool_put(&pf_cent_pl, cur);
1730	pf_ncache--;
1731	cur = NULL;
1732	} else if (frp && fra->fr_off <= frp->fr_end) {
1733	/ Need to merge in a modified 'frp' /
1734	VERIFY(cur == NULL);
1735	DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1736	"%d-%d) %d-%d (%d-%d)\n",
1737	ntohl(fh->ip6f_ident), frp->fr_off,
1738	frp->fr_end, off, fr_max, fra->fr_off,
1739	fra->fr_end));
1740	fra->fr_off = frp->fr_off;
1741	LIST_REMOVE(frp, fr_next);
1742	pool_put(&pf_cent_pl, frp);
1743	pf_ncache--;
1744	frp = NULL;
1745	}
1746	}
1747	}
1748
1749	if (hosed) {
1750	/*
1751	* We must keep tracking the overall fragment even when
1752	* we're going to drop it anyway so that we know when to
1753	* free the overall descriptor. Thus we drop the frag late.
1754	*/
1755	goto drop_fragment;
1756	}
1757
1758	pass:
1759	/ Update maximum data size /
1760	if ((*frag)->fr_max < fr_max) {
1761	(*frag)->fr_max = fr_max;
1762	}
1763
1764	/ This is the last segment /
1765	if (!mff) {
1766	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1767	}
1768
1769	/ Check if we are completely reassembled /
1770	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1771	LIST_FIRST(&(*frag)->fr_cache)->fr_off == `0` &&
1772	LIST_FIRST(&(frag)->fr_cache)->fr_end == (frag)->fr_max) {
1773	/ Remove from fragment queue /
1774	DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1775	ntohl(fh->ip6f_ident), (*frag)->fr_max));
1776	pf_free_fragment(frag: *frag);
1777	*frag = NULL;
1778	}
1779
1780	return m;
1781
1782	no_mem:
1783	*nomem = `1`;
1784
1785	/ Still need to pay attention to !IP_MF /
1786	if (!mff && *frag != NULL) {
1787	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1788	}
1789
1790	m_freem(m);
1791	return NULL;
1792
1793	drop_fragment:
1794
1795	/ Still need to pay attention to !IP_MF /
1796	if (!mff && *frag != NULL) {
1797	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1798	}
1799
1800	if (drop) {
1801	/ This fragment has been deemed bad. Don't reass /
1802	if (((*frag)->fr_flags & PFFRAG_DROP) == `0`) {
1803	DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1804	ntohl(fh->ip6f_ident)));
1805	}
1806	(*frag)->fr_flags \|= PFFRAG_DROP;
1807	}
1808
1809	m_freem(m);
1810	return NULL;
1811	}
1812
1813	int
1814	pf_refragment6(struct ifnet ifp, pbuf_t pbufp, struct* pf_fragment_tag *ftag)
1815	{
1816	struct mbuf *m;
1817	uint32_t frag_id;
1818	uint16_t hdrlen, extoff, maxlen, unfragpartlen;
1819	uint8_t proto;
1820	int error, action;
1821	uint8_t *lexthdrsp;
1822	struct route_in6 ip6route;
1823	struct route_in6 *ro;
1824	struct sockaddr_in6 *dst;
1825	struct ip6_hdr *hdr;
1826	struct pf_mtag *mtag;
1827	struct m_tag *tag;
1828
1829	if (pbufp == NULL \|\| !pbuf_is_valid(*pbufp) \|\| ftag == NULL) {
1830	panic("pf_route6: invalid parameters");
1831	/ NOT REACHED /
1832	}
1833	m = pbuf_to_mbuf(*pbufp, FALSE);
1834	hdr = mtod(m, struct ip6_hdr *);
1835	mtag = pf_find_mtag(m);
1836	hdrlen = ftag->ft_hdrlen - sizeof(struct ip6_hdr);
1837	extoff = ftag->ft_extoff;
1838	maxlen = ftag->ft_maxlen;
1839	frag_id = ftag->ft_id;
1840	unfragpartlen = ftag->ft_unfragpartlen;
1841	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS);
1842	m_tag_delete(m, tag);
1843	ftag = NULL;
1844	tag = NULL;
1845	mtag->pftag_flags &= ~PF_TAG_REASSEMBLED;
1846	ro = &ip6route;
1847	bzero(s: (caddr_t)ro, n: sizeof(*ro));
1848	dst = (struct sockaddr_in6 *)&ro->ro_dst;
1849	dst->sin6_family = AF_INET6;
1850	dst->sin6_len = sizeof(*dst);
1851	dst->sin6_addr = hdr->ip6_dst;
1852
1853	if (extoff) {
1854	int off;
1855	struct mbuf *mexthdr;
1856
1857	/ Use protocol from next field of last extension header /
1858	mexthdr = m_getptr(m, extoff +
1859	offsetof(struct ip6_ext, ip6e_nxt), &off);
1860	ASSERT(mexthdr != NULL);
1861	lexthdrsp = (mtod(mexthdr, uint8_t *) + off);
1862	proto = *lexthdrsp;
1863	if (proto == IPPROTO_DSTOPTS) {
1864	struct ip6_ext ext;
1865	if (!pf_pull_hdr(pbufp, off, &ext, sizeof*(ext), NULL,
1866	NULL, AF_INET6)) {
1867	DPFPRINTF(("pkt too short"));
1868	action = PF_DROP;
1869	goto done;
1870	}
1871	proto = ext.ip6e_nxt;
1872	}
1873	} else {
1874	lexthdrsp = NULL;
1875	proto = hdr->ip6_nxt;
1876	}
1877
1878	/*
1879	* The MTU must be a multiple of 8 bytes, or we risk doing the
1880	* fragmentation wrong.
1881	*/
1882	maxlen = maxlen & ~`7`;
1883
1884	error = ip6_do_fragmentation(&m, hdrlen, NULL, unfragpartlen,
1885	hdr, lexthdrsp, maxlen, proto, frag_id);
1886
1887	if (error == `0`) {
1888	/*
1889	* PF_TAG_REFRAGMENTED flag set to indicate ip6_forward()
1890	* and pf_route6() that the mbuf contains a chain of fragments.
1891	*/
1892	mtag->pftag_flags \|= PF_TAG_REFRAGMENTED;
1893	action = PF_PASS;
1894	pbuf_init_mbuf(*pbufp, m, ifp);
1895	} else {
1896	DPFPRINTF(("refragment error %d", error));
1897	action = PF_DROP;
1898	goto done;
1899	}
1900	done:
1901	return action;
1902	}
1903
1904	int
1905	pf_normalize_ip(pbuf_t pbuf, int* dir, struct pfi_kif kif, u_short reason,
1906	struct pf_pdesc *pd)
1907	{
1908	struct mbuf *m;
1909	struct pf_rule *r;
1910	struct pf_frent *frent;
1911	struct pf_fragment *frag = NULL;
1912	struct ip *h = pbuf->pb_data;
1913	int mff = (ntohs(h->ip_off) & IP_MF);
1914	int hlen = h->ip_hl << `2`;
1915	u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << `3`;
1916	u_int16_t fr_max;
1917	int ip_len;
1918	int ip_off;
1919	int asd = `0`;
1920	struct pf_ruleset *ruleset = NULL;
1921	struct ifnet *ifp = pbuf->pb_ifp;
1922	uint64_t ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
1923
1924	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1925	while (r != NULL) {
1926	r->evaluations++;
1927	if (pfi_kif_match(r->kif, kif) == r->ifnot) {
1928	r = r->skip[PF_SKIP_IFP].ptr;
1929	} else if (r->direction && r->direction != dir) {
1930	r = r->skip[PF_SKIP_DIR].ptr;
1931	} else if (r->af && r->af != AF_INET) {
1932	r = r->skip[PF_SKIP_AF].ptr;
1933	} else if (r->proto && r->proto != h->ip_p) {
1934	r = r->skip[PF_SKIP_PROTO].ptr;
1935	} else if (PF_MISMATCHAW(&r->src.addr,
1936	(struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1937	r->src.neg, kif)) {
1938	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1939	} else if (PF_MISMATCHAW(&r->dst.addr,
1940	(struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1941	r->dst.neg, NULL)) {
1942	r = r->skip[PF_SKIP_DST_ADDR].ptr;
1943	} else {
1944	if (r->anchor == NULL) {
1945	break;
1946	} else {
1947	pf_step_into_anchor(&asd, &ruleset,
1948	PF_RULESET_SCRUB, &r, NULL, NULL);
1949	}
1950	}
1951	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1952	PF_RULESET_SCRUB, &r, NULL, NULL)) {
1953	break;
1954	}
1955	}
1956
1957	if (r == NULL \|\| r->action == PF_NOSCRUB) {
1958	return PF_PASS;
1959	} else {
1960	r->packets[dir == PF_OUT]++;
1961	r->bytes[dir == PF_OUT] += pd->tot_len;
1962	}
1963
1964	/ Check for illegal packets /
1965	if (hlen < (int)sizeof(struct ip)) {
1966	goto drop;
1967	}
1968
1969	if (hlen > ntohs(h->ip_len)) {
1970	goto drop;
1971	}
1972
1973	/ Clear IP_DF if the rule uses the no-df option /
1974	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1975	u_int16_t ipoff = h->ip_off;
1976
1977	h->ip_off &= htons(~IP_DF);
1978	h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, `0`);
1979	}
1980
1981	/ We will need other tests here /
1982	if (!fragoff && !mff) {
1983	goto no_fragment;
1984	}
1985
1986	/*
1987	* We're dealing with a fragment now. Don't allow fragments
1988	* with IP_DF to enter the cache. If the flag was cleared by
1989	* no-df above, fine. Otherwise drop it.
1990	*/
1991	if (h->ip_off & htons(IP_DF)) {
1992	DPFPRINTF(("IP_DF\n"));
1993	goto bad;
1994	}
1995
1996	ip_len = ntohs(h->ip_len) - hlen;
1997	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << `3`;
1998
1999	/ All fragments are 8 byte aligned /
2000	if (mff && (ip_len & `0x7`)) {
2001	DPFPRINTF(("mff and %d\n", ip_len));
2002	goto bad;
2003	}
2004
2005	/ Respect maximum length /
2006	if (fragoff + ip_len > IP_MAXPACKET) {
2007	DPFPRINTF(("max packet %d\n", fragoff + ip_len));
2008	goto bad;
2009	}
2010	fr_max = fragoff + ip_len;
2011
2012	if ((r->rule_flag & (PFRULE_FRAGCROP \| PFRULE_FRAGDROP)) == `0`) {
2013	/ Fully buffer all of the fragments /
2014
2015	frag = pf_find_fragment_by_ipv4_header(ip: h, tree: &pf_frag_tree);
2016	/ Check if we saw the last fragment already /
2017	if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
2018	fr_max > frag->fr_max) {
2019	goto bad;
2020	}
2021
2022	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2023	REASON_SET(reason, PFRES_MEMORY);
2024	return PF_DROP;
2025	}
2026
2027	VERIFY(!pbuf_is_valid(pbuf));
2028
2029	/ Restore iph pointer after pbuf_to_mbuf() /
2030	h = mtod(m, struct ip *);
2031
2032	/ Get an entry for the fragment queue /
2033	frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2034	if (frent == NULL) {
2035	REASON_SET(reason, PFRES_MEMORY);
2036	m_freem(m);
2037	return PF_DROP;
2038	}
2039	pf_nfrents++;
2040	frent->fr_ip = h;
2041	frent->fr_m = m;
2042
2043	/ Might return a completely reassembled mbuf, or NULL /
2044	DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
2045	fragoff, fr_max));
2046	m = pf_reassemble(m0: m, frag: &frag, frent, mff);
2047
2048	if (m == NULL) {
2049	return PF_DROP;
2050	}
2051
2052	VERIFY(m->m_flags & M_PKTHDR);
2053	pbuf_init_mbuf(pbuf, m, ifp);
2054
2055	/ use mtag from concatenated mbuf chain /
2056	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2057	#if 0
2058	// SCW: This check is superfluous
2059	#if DIAGNOSTIC
2060	if (pd->pf_mtag == NULL) {
2061	printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
2062	if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
2063	m_freem(m);
2064	m = NULL;
2065	goto no_mem;
2066	}
2067	}
2068	#endif
2069	#endif
2070
2071	h = mtod(m, struct ip *);
2072
2073	if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) {
2074	goto drop;
2075	}
2076	} else {
2077	/ non-buffering fragment cache (drops or masks overlaps) /
2078	int nomem = `0`;
2079
2080	if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2081	/*
2082	* Already passed the fragment cache in the
2083	* input direction. If we continued, it would
2084	* appear to be a dup and would be dropped.
2085	*/
2086	goto fragment_pass;
2087	}
2088
2089	frag = pf_find_fragment_by_ipv4_header(ip: h, tree: &pf_cache_tree);
2090
2091	/ Check if we saw the last fragment already /
2092	if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
2093	fr_max > frag->fr_max) {
2094	if (r->rule_flag & PFRULE_FRAGDROP) {
2095	frag->fr_flags \|= PFFRAG_DROP;
2096	}
2097	goto bad;
2098	}
2099
2100	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2101	REASON_SET(reason, PFRES_MEMORY);
2102	goto bad;
2103	}
2104
2105	VERIFY(!pbuf_is_valid(pbuf));
2106
2107	/ Restore iph pointer after pbuf_to_mbuf() /
2108	h = mtod(m, struct ip *);
2109
2110	m = pf_fragcache(m0: &m, h, frag: &frag, mff,
2111	drop: (r->rule_flag & PFRULE_FRAGDROP) ? `1` : `0`, nomem: &nomem);
2112	if (m == NULL) {
2113	// Note: pf_fragcache() has already m_freem'd the mbuf
2114	if (nomem) {
2115	goto no_mem;
2116	}
2117	goto drop;
2118	}
2119
2120	VERIFY(m->m_flags & M_PKTHDR);
2121	pbuf_init_mbuf(pbuf, m, ifp);
2122
2123	/ use mtag from copied and trimmed mbuf chain /
2124	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2125	#if 0
2126	// SCW: This check is superfluous
2127	#if DIAGNOSTIC
2128	if (pd->pf_mtag == NULL) {
2129	printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
2130	if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
2131	m_freem(m);
2132	m = NULL;
2133	goto no_mem;
2134	}
2135	}
2136	#endif
2137	#endif
2138	if (dir == PF_IN) {
2139	pd->pf_mtag->pftag_flags \|= PF_TAG_FRAGCACHE;
2140	}
2141
2142	if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) {
2143	goto drop;
2144	}
2145
2146	goto fragment_pass;
2147	}
2148
2149	no_fragment:
2150	/ At this point, only IP_DF is allowed in ip_off /
2151	if (h->ip_off & ~htons(IP_DF)) {
2152	u_int16_t ipoff = h->ip_off;
2153
2154	h->ip_off &= htons(IP_DF);
2155	h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, `0`);
2156	}
2157
2158	/ Enforce a minimum ttl, may cause endless packet loops /
2159	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
2160	u_int16_t ip_ttl = h->ip_ttl;
2161
2162	h->ip_ttl = r->min_ttl;
2163	h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, `0`);
2164	}
2165	if (r->rule_flag & PFRULE_RANDOMID) {
2166	u_int16_t oip_id = h->ip_id;
2167
2168	if (rfc6864 && IP_OFF_IS_ATOMIC(ntohs(h->ip_off))) {
2169	h->ip_id = `0`;
2170	} else {
2171	h->ip_id = ip_randomid(ipid_salt);
2172	}
2173	h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, `0`);
2174	}
2175	if ((r->rule_flag & (PFRULE_FRAGCROP \| PFRULE_FRAGDROP)) == `0`) {
2176	pd->flags \|= PFDESC_IP_REAS;
2177	}
2178
2179	return PF_PASS;
2180
2181	fragment_pass:
2182	/ Enforce a minimum ttl, may cause endless packet loops /
2183	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
2184	u_int16_t ip_ttl = h->ip_ttl;
2185
2186	h->ip_ttl = r->min_ttl;
2187	h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, `0`);
2188	}
2189	if ((r->rule_flag & (PFRULE_FRAGCROP \| PFRULE_FRAGDROP)) == `0`) {
2190	pd->flags \|= PFDESC_IP_REAS;
2191	}
2192	return PF_PASS;
2193
2194	no_mem:
2195	REASON_SET(reason, PFRES_MEMORY);
2196	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2197	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
2198	NULL, NULL, pd);
2199	}
2200	return PF_DROP;
2201
2202	drop:
2203	REASON_SET(reason, PFRES_NORM);
2204	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2205	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
2206	NULL, NULL, pd);
2207	}
2208	return PF_DROP;
2209
2210	bad:
2211	DPFPRINTF(("dropping bad IPv4 fragment\n"));
2212
2213	/ Free associated fragments /
2214	if (frag != NULL) {
2215	pf_free_fragment(frag);
2216	}
2217
2218	REASON_SET(reason, PFRES_FRAG);
2219	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2220	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r, NULL, NULL, pd);
2221	}
2222
2223	return PF_DROP;
2224	}
2225
2226	static __attribute__((noinline)) struct pf_fragment *
2227	pf_find_fragment_by_ipv6_header(struct ip6_hdr ip6, struct* ip6_frag *fh,
2228	struct pf_frag_tree *tree)
2229	{
2230	struct pf_fragment key;
2231	pf_ip6hdr2key(key: &key, ip6, fh);
2232	return pf_find_fragment_by_key(key: &key, tree);
2233	}
2234
2235	int
2236	pf_normalize_ip6(pbuf_t pbuf, int* dir, struct pfi_kif *kif,
2237	u_short reason, struct* pf_pdesc *pd)
2238	{
2239	struct mbuf *m = NULL;
2240	struct pf_rule *r;
2241	struct ip6_hdr *h = pbuf->pb_data;
2242	int extoff;
2243	int off;
2244	struct ip6_ext ext;
2245	struct ip6_opt opt;
2246	struct ip6_opt_jumbo jumbo;
2247	int optend;
2248	int ooff;
2249	struct ip6_frag frag;
2250	u_int32_t jumbolen = `0`, plen;
2251	u_int16_t fragoff = `0`;
2252	u_int8_t proto;
2253	int terminal;
2254	struct pf_frent *frent;
2255	struct pf_fragment *pff = NULL;
2256	int mff = `0`, rh_cnt = `0`;
2257	u_int16_t fr_max;
2258	int asd = `0`;
2259	struct pf_ruleset *ruleset = NULL;
2260	struct ifnet *ifp = pbuf->pb_ifp;
2261
2262	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2263	while (r != NULL) {
2264	r->evaluations++;
2265	if (pfi_kif_match(r->kif, kif) == r->ifnot) {
2266	r = r->skip[PF_SKIP_IFP].ptr;
2267	} else if (r->direction && r->direction != dir) {
2268	r = r->skip[PF_SKIP_DIR].ptr;
2269	} else if (r->af && r->af != AF_INET6) {
2270	r = r->skip[PF_SKIP_AF].ptr;
2271	}
2272	#if 0 /* header chain! */
2273	else if (r->proto && r->proto != h->ip6_nxt) {
2274	r = r->skip[PF_SKIP_PROTO].ptr;
2275	}
2276	#endif
2277	else if (PF_MISMATCHAW(&r->src.addr,
2278	(struct pf_addr *)(uintptr_t)&h->ip6_src, AF_INET6,
2279	r->src.neg, kif)) {
2280	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2281	} else if (PF_MISMATCHAW(&r->dst.addr,
2282	(struct pf_addr *)(uintptr_t)&h->ip6_dst, AF_INET6,
2283	r->dst.neg, NULL)) {
2284	r = r->skip[PF_SKIP_DST_ADDR].ptr;
2285	} else {
2286	if (r->anchor == NULL) {
2287	break;
2288	} else {
2289	pf_step_into_anchor(&asd, &ruleset,
2290	PF_RULESET_SCRUB, &r, NULL, NULL);
2291	}
2292	}
2293	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2294	PF_RULESET_SCRUB, &r, NULL, NULL)) {
2295	break;
2296	}
2297	}
2298
2299	if (r == NULL \|\| r->action == PF_NOSCRUB) {
2300	return PF_PASS;
2301	} else {
2302	r->packets[dir == PF_OUT]++;
2303	r->bytes[dir == PF_OUT] += pd->tot_len;
2304	}
2305
2306	/ Check for illegal packets /
2307	if ((uint32_t)(sizeof(struct ip6_hdr) + IPV6_MAXPACKET) <
2308	pbuf->pb_packet_len) {
2309	goto drop;
2310	}
2311
2312	extoff = `0`;
2313	off = sizeof(struct ip6_hdr);
2314	proto = h->ip6_nxt;
2315	terminal = `0`;
2316	do {
2317	pd->proto = proto;
2318	switch (proto) {
2319	case IPPROTO_FRAGMENT:
2320	goto fragment;
2321	case IPPROTO_AH:
2322	case IPPROTO_ROUTING:
2323	case IPPROTO_DSTOPTS:
2324	if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), NULL,
2325	NULL, AF_INET6)) {
2326	goto shortpkt;
2327	}
2328	extoff = off;
2329	/*
2330	* <jhw@apple.com>
2331	* Multiple routing headers not allowed.
2332	* Routing header type zero considered harmful.
2333	*/
2334	if (proto == IPPROTO_ROUTING) {
2335	const struct ip6_rthdr *rh =
2336	(const struct ip6_rthdr *)&ext;
2337	if (rh_cnt++) {
2338	goto drop;
2339	}
2340	if (rh->ip6r_type == IPV6_RTHDR_TYPE_0) {
2341	goto drop;
2342	}
2343	} else if (proto == IPPROTO_AH) {
2344	off += (ext.ip6e_len + `2`) * `4`;
2345	} else {
2346	off += (ext.ip6e_len + `1`) * `8`;
2347	}
2348	proto = ext.ip6e_nxt;
2349	break;
2350	case IPPROTO_HOPOPTS:
2351	if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), NULL,
2352	NULL, AF_INET6)) {
2353	goto shortpkt;
2354	}
2355	extoff = off;
2356	optend = off + (ext.ip6e_len + `1`) * `8`;
2357	ooff = off + sizeof(ext);
2358	do {
2359	if (!pf_pull_hdr(pbuf, ooff, &opt.ip6o_type,
2360	sizeof(opt.ip6o_type), NULL, NULL,
2361	AF_INET6)) {
2362	goto shortpkt;
2363	}
2364	if (opt.ip6o_type == IP6OPT_PAD1) {
2365	ooff++;
2366	continue;
2367	}
2368	if (!pf_pull_hdr(pbuf, ooff, &opt, sizeof(opt),
2369	NULL, NULL, AF_INET6)) {
2370	goto shortpkt;
2371	}
2372	if ((ooff + (int) sizeof(opt) + opt.ip6o_len) >
2373	optend) {
2374	goto drop;
2375	}
2376	switch (opt.ip6o_type) {
2377	case IP6OPT_JUMBO:
2378	if (h->ip6_plen != `0`) {
2379	goto drop;
2380	}
2381	if (!pf_pull_hdr(pbuf, ooff, &jumbo,
2382	sizeof(jumbo), NULL, NULL,
2383	AF_INET6)) {
2384	goto shortpkt;
2385	}
2386	memcpy(dst: &jumbolen, src: jumbo.ip6oj_jumbo_len,
2387	n: sizeof(jumbolen));
2388	jumbolen = ntohl(jumbolen);
2389	if (jumbolen <= IPV6_MAXPACKET) {
2390	goto drop;
2391	}
2392	if ((sizeof(struct ip6_hdr) +
2393	jumbolen) != pbuf->pb_packet_len) {
2394	goto drop;
2395	}
2396	break;
2397	default:
2398	break;
2399	}
2400	ooff += sizeof(opt) + opt.ip6o_len;
2401	} while (ooff < optend);
2402
2403	off = optend;
2404	proto = ext.ip6e_nxt;
2405	break;
2406	default:
2407	terminal = `1`;
2408	break;
2409	}
2410	} while (!terminal);
2411
2412	/ jumbo payload option must be present, or plen > 0 /
2413	if (ntohs(h->ip6_plen) == `0`) {
2414	plen = jumbolen;
2415	} else {
2416	plen = ntohs(h->ip6_plen);
2417	}
2418	if (plen == `0`) {
2419	goto drop;
2420	}
2421	if ((uint32_t)(sizeof(struct ip6_hdr) + plen) > pbuf->pb_packet_len) {
2422	goto shortpkt;
2423	}
2424
2425	/ Enforce a minimum ttl, may cause endless packet loops /
2426	if (r->min_ttl && h->ip6_hlim < r->min_ttl) {
2427	h->ip6_hlim = r->min_ttl;
2428	}
2429
2430	return PF_PASS;
2431
2432	fragment:
2433	plen = ntohs(h->ip6_plen);
2434	/ Jumbo payload packets cannot be fragmented /
2435	if (plen == `0` \|\| jumbolen) {
2436	goto drop;
2437	}
2438
2439	if (!pf_pull_hdr(pbuf, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) {
2440	goto shortpkt;
2441	}
2442	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
2443	pd->proto = frag.ip6f_nxt;
2444	mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
2445	off += sizeof(frag);
2446	if (fragoff + (plen - off) > IPV6_MAXPACKET) {
2447	goto badfrag;
2448	}
2449
2450	fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
2451	// XXX SCW: mbuf-specific
2452	// DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
2453	// "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
2454	// fragoff, fr_max));
2455
2456	if ((r->rule_flag & (PFRULE_FRAGCROP \| PFRULE_FRAGDROP)) == `0`) {
2457	/ Fully buffer all of the fragments /
2458	pd->flags \|= PFDESC_IP_REAS;
2459
2460	pff = pf_find_fragment_by_ipv6_header(ip6: h, fh: &frag,
2461	tree: &pf_frag_tree);
2462
2463	/ Check if we saw the last fragment already /
2464	if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2465	fr_max > pff->fr_max) {
2466	goto badfrag;
2467	}
2468
2469	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2470	REASON_SET(reason, PFRES_MEMORY);
2471	return PF_DROP;
2472	}
2473
2474	/ Restore iph pointer after pbuf_to_mbuf() /
2475	h = mtod(m, struct ip6_hdr *);
2476
2477	/ Get an entry for the fragment queue /
2478	frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2479	if (frent == NULL) {
2480	REASON_SET(reason, PFRES_MEMORY);
2481	return PF_DROP;
2482	}
2483
2484	pf_nfrents++;
2485	frent->fr_ip6 = h;
2486	frent->fr_m = m;
2487	frent->fr_ip6f_opt = frag;
2488	frent->fr_ip6f_extoff = extoff;
2489	frent->fr_ip6f_hlen = off;
2490	/ account for 2nd Destination Options header if present /
2491	if (pd->proto == IPPROTO_DSTOPTS) {
2492	if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), NULL,
2493	NULL, AF_INET6)) {
2494	goto shortpkt;
2495	}
2496	frent->fr_ip6f_hlen += (ext.ip6e_len + `1`) * `8`;
2497	}
2498
2499	/ Might return a completely reassembled mbuf, or NULL /
2500	DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2501	ntohl(frag.ip6f_ident), fragoff, fr_max));
2502	m = pf_reassemble6(m0: &m, frag: &pff, frent, mff);
2503
2504	if (m == NULL) {
2505	return PF_DROP;
2506	}
2507
2508	pbuf_init_mbuf(pbuf, m, ifp);
2509	h = pbuf->pb_data;
2510
2511	if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) {
2512	goto drop;
2513	}
2514	} else if (dir == PF_IN \|\|
2515	!(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2516	/ non-buffering fragment cache (overlaps: see RFC 5722) /
2517	int nomem = `0`;
2518
2519	pff = pf_find_fragment_by_ipv6_header(ip6: h, fh: &frag,
2520	tree: &pf_cache_tree);
2521
2522	/ Check if we saw the last fragment already /
2523	if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2524	fr_max > pff->fr_max) {
2525	if (r->rule_flag & PFRULE_FRAGDROP) {
2526	pff->fr_flags \|= PFFRAG_DROP;
2527	}
2528	goto badfrag;
2529	}
2530
2531	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2532	goto no_mem;
2533	}
2534
2535	/ Restore iph pointer after pbuf_to_mbuf() /
2536	h = mtod(m, struct ip6_hdr *);
2537
2538	m = pf_frag6cache(m0: &m, h, fh: &frag, frag: &pff, hlen: off, mff,
2539	drop: (r->rule_flag & PFRULE_FRAGDROP) ? `1` : `0`, nomem: &nomem);
2540	if (m == NULL) {
2541	// Note: pf_frag6cache() has already m_freem'd the mbuf
2542	if (nomem) {
2543	goto no_mem;
2544	}
2545	goto drop;
2546	}
2547
2548	pbuf_init_mbuf(pbuf, m, ifp);
2549	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2550	h = pbuf->pb_data;
2551
2552	if (dir == PF_IN) {
2553	pd->pf_mtag->pftag_flags \|= PF_TAG_FRAGCACHE;
2554	}
2555
2556	if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) {
2557	goto drop;
2558	}
2559	}
2560
2561	/ Enforce a minimum ttl, may cause endless packet loops /
2562	if (r->min_ttl && h->ip6_hlim < r->min_ttl) {
2563	h->ip6_hlim = r->min_ttl;
2564	}
2565	return PF_PASS;
2566
2567	no_mem:
2568	REASON_SET(reason, PFRES_MEMORY);
2569	goto dropout;
2570
2571	shortpkt:
2572	REASON_SET(reason, PFRES_SHORT);
2573	goto dropout;
2574
2575	drop:
2576	REASON_SET(reason, PFRES_NORM);
2577	goto dropout;
2578
2579	badfrag:
2580	DPFPRINTF(("dropping bad IPv6 fragment\n"));
2581	REASON_SET(reason, PFRES_FRAG);
2582	goto dropout;
2583
2584	dropout:
2585	if (pff != NULL) {
2586	pf_free_fragment(frag: pff);
2587	}
2588	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2589	PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, *reason, r, NULL, NULL, pd);
2590	}
2591	return PF_DROP;
2592	}
2593
2594	int
2595	pf_normalize_tcp(int dir, struct pfi_kif kif, pbuf_t pbuf, int ipoff,
2596	int off, void h, struct* pf_pdesc *pd)
2597	{
2598	#pragma unused(ipoff, h)
2599	struct pf_rule r, rm = NULL;
2600	struct tcphdr *th = pd->hdr.tcp;
2601	int rewrite = `0`;
2602	int asd = `0`;
2603	u_short reason;
2604	u_int8_t flags;
2605	sa_family_t af = pd->af;
2606	struct pf_ruleset *ruleset = NULL;
2607	union pf_state_xport sxport, dxport;
2608
2609	sxport.port = th->th_sport;
2610	dxport.port = th->th_dport;
2611
2612	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2613	while (r != NULL) {
2614	r->evaluations++;
2615	if (pfi_kif_match(r->kif, kif) == r->ifnot) {
2616	r = r->skip[PF_SKIP_IFP].ptr;
2617	} else if (r->direction && r->direction != dir) {
2618	r = r->skip[PF_SKIP_DIR].ptr;
2619	} else if (r->af && r->af != af) {
2620	r = r->skip[PF_SKIP_AF].ptr;
2621	} else if (r->proto && r->proto != pd->proto) {
2622	r = r->skip[PF_SKIP_PROTO].ptr;
2623	} else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
2624	r->src.neg, kif)) {
2625	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2626	} else if (r->src.xport.range.op &&
2627	!pf_match_xport(r->src.xport.range.op, r->proto_variant,
2628	&r->src.xport, &sxport)) {
2629	r = r->skip[PF_SKIP_SRC_PORT].ptr;
2630	} else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
2631	r->dst.neg, NULL)) {
2632	r = r->skip[PF_SKIP_DST_ADDR].ptr;
2633	} else if (r->dst.xport.range.op &&
2634	!pf_match_xport(r->dst.xport.range.op, r->proto_variant,
2635	&r->dst.xport, &dxport)) {
2636	r = r->skip[PF_SKIP_DST_PORT].ptr;
2637	} else if (r->os_fingerprint != PF_OSFP_ANY &&
2638	!pf_osfp_match(pf_osfp_fingerprint(pd, pbuf, off, th),
2639	r->os_fingerprint)) {
2640	r = TAILQ_NEXT(r, entries);
2641	} else {
2642	if (r->anchor == NULL) {
2643	rm = r;
2644	break;
2645	} else {
2646	pf_step_into_anchor(&asd, &ruleset,
2647	PF_RULESET_SCRUB, &r, NULL, NULL);
2648	}
2649	}
2650	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2651	PF_RULESET_SCRUB, &r, NULL, NULL)) {
2652	break;
2653	}
2654	}
2655
2656	if (rm == NULL \|\| rm->action == PF_NOSCRUB) {
2657	return PF_PASS;
2658	} else {
2659	r->packets[dir == PF_OUT]++;
2660	r->bytes[dir == PF_OUT] += pd->tot_len;
2661	}
2662
2663	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) {
2664	pd->flags \|= PFDESC_TCP_NORM;
2665	}
2666
2667	flags = th->th_flags;
2668	if (flags & TH_SYN) {
2669	/ Illegal packet /
2670	if (flags & TH_RST) {
2671	goto tcp_drop;
2672	}
2673
2674	if (flags & TH_FIN) {
2675	flags &= ~TH_FIN;
2676	}
2677	} else {
2678	/ Illegal packet /
2679	if (!(flags & (TH_ACK \| TH_RST))) {
2680	goto tcp_drop;
2681	}
2682	}
2683
2684	if (!(flags & TH_ACK)) {
2685	/ These flags are only valid if ACK is set /
2686	if ((flags & TH_FIN) \|\| (flags & TH_PUSH) \|\| (flags & TH_URG)) {
2687	goto tcp_drop;
2688	}
2689	}
2690
2691	/ Check for illegal header length /
2692	if (th->th_off < (sizeof(struct tcphdr) >> `2`)) {
2693	goto tcp_drop;
2694	}
2695
2696	/ If flags changed, or reserved data set, then adjust /
2697	if (flags != th->th_flags \|\| th->th_x2 != `0`) {
2698	u_int16_t ov, nv;
2699
2700	ov = (u_int16_t )(&th->th_ack + `1`);
2701	th->th_flags = flags;
2702	th->th_x2 = `0`;
2703	nv = (u_int16_t )(&th->th_ack + `1`);
2704
2705	th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, `0`);
2706	rewrite = `1`;
2707	}
2708
2709	/ Remove urgent pointer, if TH_URG is not set /
2710	if (!(flags & TH_URG) && th->th_urp) {
2711	th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, `0`, `0`);
2712	th->th_urp = `0`;
2713	rewrite = `1`;
2714	}
2715
2716	/ copy back packet headers if we sanitized /
2717	/ Process options /
2718	if (r->max_mss) {
2719	int rv = pf_normalize_tcpopt(r, dir, kif, pd, pbuf, th, off,
2720	&rewrite);
2721	if (rv == PF_DROP) {
2722	return rv;
2723	}
2724	pbuf = pd->mp;
2725	}
2726
2727	if (rewrite) {
2728	if (pf_lazy_makewritable(pd, pbuf,
2729	off + sizeof(*th)) == NULL) {
2730	REASON_SET(&reason, PFRES_MEMORY);
2731	if (r->log) {
2732	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
2733	r, `0`, `0`, pd);
2734	}
2735	return PF_DROP;
2736	}
2737
2738	pbuf_copy_back(pbuf, off, sizeof(*th), th);
2739	}
2740
2741	return PF_PASS;
2742
2743	tcp_drop:
2744	REASON_SET(&reason, PFRES_NORM);
2745	if (rm != NULL && r->log) {
2746	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, r, NULL, NULL, pd);
2747	}
2748	return PF_DROP;
2749	}
2750
2751	int
2752	pf_normalize_tcp_init(pbuf_t pbuf, int* off, struct pf_pdesc *pd,
2753	struct tcphdr th, struct* pf_state_peer src, struct* pf_state_peer *dst)
2754	{
2755	#pragma unused(dst)
2756	u_int32_t tsval, tsecr;
2757	u_int8_t hdr[`60`];
2758	u_int8_t *opt;
2759
2760	VERIFY(src->scrub == NULL);
2761
2762	src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
2763	if (src->scrub == NULL) {
2764	return `1`;
2765	}
2766	bzero(s: src->scrub, n: sizeof(*src->scrub));
2767
2768	switch (pd->af) {
2769	#if INET
2770	case AF_INET: {
2771	struct ip *h = pbuf->pb_data;
2772	src->scrub->pfss_ttl = h->ip_ttl;
2773	break;
2774	}
2775	#endif /* INET */
2776	case AF_INET6: {
2777	struct ip6_hdr *h = pbuf->pb_data;
2778	src->scrub->pfss_ttl = h->ip6_hlim;
2779	break;
2780	}
2781	}
2782
2783
2784	/*
2785	* All normalizations below are only begun if we see the start of
2786	* the connections. They must all set an enabled bit in pfss_flags
2787	*/
2788	if ((th->th_flags & TH_SYN) == `0`) {
2789	return `0`;
2790	}
2791
2792
2793	if (th->th_off > (sizeof(struct tcphdr) >> `2`) && src->scrub &&
2794	pf_pull_hdr(pbuf, off, hdr, th->th_off << `2`, NULL, NULL, pd->af)) {
2795	/ Diddle with TCP options /
2796	int hlen;
2797	opt = hdr + sizeof(struct tcphdr);
2798	hlen = (th->th_off << `2`) - sizeof(struct tcphdr);
2799	while (hlen >= TCPOLEN_TIMESTAMP) {
2800	switch (*opt) {
2801	case TCPOPT_EOL: / FALLTHROUGH /
2802	case TCPOPT_NOP:
2803	opt++;
2804	hlen--;
2805	break;
2806	case TCPOPT_TIMESTAMP:
2807	if (opt[`1`] >= TCPOLEN_TIMESTAMP) {
2808	src->scrub->pfss_flags \|=
2809	PFSS_TIMESTAMP;
2810	src->scrub->pfss_ts_mod =
2811	htonl(random());
2812
2813	/ note PFSS_PAWS not set yet /
2814	memcpy(dst: &tsval, src: &opt[`2`],
2815	n: sizeof(u_int32_t));
2816	memcpy(dst: &tsecr, src: &opt[`6`],
2817	n: sizeof(u_int32_t));
2818	src->scrub->pfss_tsval0 = ntohl(tsval);
2819	src->scrub->pfss_tsval = ntohl(tsval);
2820	src->scrub->pfss_tsecr = ntohl(tsecr);
2821	getmicrouptime(&src->scrub->pfss_last);
2822	}
2823	OS_FALLTHROUGH;
2824	default:
2825	hlen -= MAX(opt[`1`], `2`);
2826	opt += MAX(opt[`1`], `2`);
2827	break;
2828	}
2829	}
2830	}
2831
2832	return `0`;
2833	}
2834
2835	void
2836	pf_normalize_tcp_cleanup(struct pf_state *state)
2837	{
2838	if (state->src.scrub) {
2839	pool_put(&pf_state_scrub_pl, state->src.scrub);
2840	}
2841	if (state->dst.scrub) {
2842	pool_put(&pf_state_scrub_pl, state->dst.scrub);
2843	}
2844
2845	/ Someday... flush the TCP segment reassembly descriptors. /
2846	}
2847
2848	int
2849	pf_normalize_tcp_stateful(pbuf_t pbuf, int* off, struct pf_pdesc *pd,
2850	u_short reason, struct* tcphdr th, struct* pf_state *state,
2851	struct pf_state_peer src, struct* pf_state_peer dst, int* *writeback)
2852	{
2853	struct timeval uptime;
2854	u_int32_t tsval = `0`, tsecr = `0`;
2855	u_int tsval_from_last;
2856	u_int8_t hdr[`60`];
2857	u_int8_t *opt;
2858	int copyback = `0`;
2859	int got_ts = `0`;
2860
2861	VERIFY(src->scrub \|\| dst->scrub);
2862
2863	/*
2864	* Enforce the minimum TTL seen for this connection. Negate a common
2865	* technique to evade an intrusion detection system and confuse
2866	* firewall state code.
2867	*/
2868	switch (pd->af) {
2869	#if INET
2870	case AF_INET: {
2871	if (src->scrub) {
2872	struct ip *h = pbuf->pb_data;
2873	if (h->ip_ttl > src->scrub->pfss_ttl) {
2874	src->scrub->pfss_ttl = h->ip_ttl;
2875	}
2876	h->ip_ttl = src->scrub->pfss_ttl;
2877	}
2878	break;
2879	}
2880	#endif /* INET */
2881	case AF_INET6: {
2882	if (src->scrub) {
2883	struct ip6_hdr *h = pbuf->pb_data;
2884	if (h->ip6_hlim > src->scrub->pfss_ttl) {
2885	src->scrub->pfss_ttl = h->ip6_hlim;
2886	}
2887	h->ip6_hlim = src->scrub->pfss_ttl;
2888	}
2889	break;
2890	}
2891	}
2892
2893	if (th->th_off > (sizeof(struct tcphdr) >> `2`) &&
2894	((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) \|\|
2895	(dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
2896	pf_pull_hdr(pbuf, off, hdr, th->th_off << `2`, NULL, NULL, pd->af)) {
2897	/ Diddle with TCP options /
2898	int hlen;
2899	opt = hdr + sizeof(struct tcphdr);
2900	hlen = (th->th_off << `2`) - sizeof(struct tcphdr);
2901	while (hlen >= TCPOLEN_TIMESTAMP) {
2902	switch (*opt) {
2903	case TCPOPT_EOL: / FALLTHROUGH /
2904	case TCPOPT_NOP:
2905	opt++;
2906	hlen--;
2907	break;
2908	case TCPOPT_TIMESTAMP:
2909	/*
2910	* Modulate the timestamps. Can be used for
2911	* NAT detection, OS uptime determination or
2912	* reboot detection.
2913	*/
2914
2915	if (got_ts) {
2916	/ Huh? Multiple timestamps!? /
2917	if (pf_status.debug >= PF_DEBUG_MISC) {
2918	DPFPRINTF(("multiple TS??"));
2919	pf_print_state(state);
2920	printf("\n");
2921	}
2922	REASON_SET(reason, PFRES_TS);
2923	return PF_DROP;
2924	}
2925	if (opt[`1`] >= TCPOLEN_TIMESTAMP) {
2926	memcpy(dst: &tsval, src: &opt[`2`],
2927	n: sizeof(u_int32_t));
2928	if (tsval && src->scrub &&
2929	(src->scrub->pfss_flags &
2930	PFSS_TIMESTAMP)) {
2931	tsval = ntohl(tsval);
2932	pf_change_a(&opt[`2`],
2933	&th->th_sum,
2934	htonl(tsval +
2935	src->scrub->pfss_ts_mod),
2936	`0`);
2937	copyback = `1`;
2938	}
2939
2940	/ Modulate TS reply iff valid (!0) /
2941	memcpy(dst: &tsecr, src: &opt[`6`],
2942	n: sizeof(u_int32_t));
2943	if (tsecr && dst->scrub &&
2944	(dst->scrub->pfss_flags &
2945	PFSS_TIMESTAMP)) {
2946	tsecr = ntohl(tsecr)
2947	- dst->scrub->pfss_ts_mod;
2948	pf_change_a(&opt[`6`],
2949	&th->th_sum, htonl(tsecr),
2950	`0`);
2951	copyback = `1`;
2952	}
2953	got_ts = `1`;
2954	}
2955	OS_FALLTHROUGH;
2956	default:
2957	hlen -= MAX(opt[`1`], `2`);
2958	opt += MAX(opt[`1`], `2`);
2959	break;
2960	}
2961	}
2962	if (copyback) {
2963	/ Copyback the options, caller copys back header /
2964	int optoff = off + sizeof(*th);
2965	int optlen = (th->th_off << `2`) - sizeof(*th);
2966	if (pf_lazy_makewritable(pd, pbuf, optoff + optlen) ==
2967	NULL) {
2968	REASON_SET(reason, PFRES_MEMORY);
2969	return PF_DROP;
2970	}
2971	*writeback = optoff + optlen;
2972	pbuf_copy_back(pbuf, optoff, optlen, hdr + sizeof(*th));
2973	}
2974	}
2975
2976
2977	/*
2978	* Must invalidate PAWS checks on connections idle for too long.
2979	* The fastest allowed timestamp clock is 1ms. That turns out to
2980	* be about 24 days before it wraps. XXX Right now our lowerbound
2981	* TS echo check only works for the first 12 days of a connection
2982	* when the TS has exhausted half its 32bit space
2983	*/
2984	#define TS_MAX_IDLE (242460*60)
2985	#define TS_MAX_CONN (12246060) / XXX remove when better tsecr check */
2986
2987	getmicrouptime(&uptime);
2988	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
2989	(uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE \|\|
2990	pf_time_second() - state->creation > TS_MAX_CONN)) {
2991	if (pf_status.debug >= PF_DEBUG_MISC) {
2992	DPFPRINTF(("src idled out of PAWS\n"));
2993	pf_print_state(state);
2994	printf("\n");
2995	}
2996	src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
2997	\| PFSS_PAWS_IDLED;
2998	}
2999	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
3000	uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
3001	if (pf_status.debug >= PF_DEBUG_MISC) {
3002	DPFPRINTF(("dst idled out of PAWS\n"));
3003	pf_print_state(state);
3004	printf("\n");
3005	}
3006	dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
3007	\| PFSS_PAWS_IDLED;
3008	}
3009
3010	if (got_ts && src->scrub && dst->scrub &&
3011	(src->scrub->pfss_flags & PFSS_PAWS) &&
3012	(dst->scrub->pfss_flags & PFSS_PAWS)) {
3013	/*
3014	* Validate that the timestamps are "in-window".
3015	* RFC1323 describes TCP Timestamp options that allow
3016	* measurement of RTT (round trip time) and PAWS
3017	* (protection against wrapped sequence numbers). PAWS
3018	* gives us a set of rules for rejecting packets on
3019	* long fat pipes (packets that were somehow delayed
3020	* in transit longer than the time it took to send the
3021	* full TCP sequence space of 4Gb). We can use these
3022	* rules and infer a few others that will let us treat
3023	* the 32bit timestamp and the 32bit echoed timestamp
3024	* as sequence numbers to prevent a blind attacker from
3025	* inserting packets into a connection.
3026	*
3027	* RFC1323 tells us:
3028	* - The timestamp on this packet must be greater than
3029	* or equal to the last value echoed by the other
3030	* endpoint. The RFC says those will be discarded
3031	* since it is a dup that has already been acked.
3032	* This gives us a lowerbound on the timestamp.
3033	* timestamp >= other last echoed timestamp
3034	* - The timestamp will be less than or equal to
3035	* the last timestamp plus the time between the
3036	* last packet and now. The RFC defines the max
3037	* clock rate as 1ms. We will allow clocks to be
3038	* up to 10% fast and will allow a total difference
3039	* or 30 seconds due to a route change. And this
3040	* gives us an upperbound on the timestamp.
3041	* timestamp <= last timestamp + max ticks
3042	* We have to be careful here. Windows will send an
3043	* initial timestamp of zero and then initialize it
3044	* to a random value after the 3whs; presumably to
3045	* avoid a DoS by having to call an expensive RNG
3046	* during a SYN flood. Proof MS has at least one
3047	* good security geek.
3048	*
3049	* - The TCP timestamp option must also echo the other
3050	* endpoints timestamp. The timestamp echoed is the
3051	* one carried on the earliest unacknowledged segment
3052	* on the left edge of the sequence window. The RFC
3053	* states that the host will reject any echoed
3054	* timestamps that were larger than any ever sent.
3055	* This gives us an upperbound on the TS echo.
3056	* tescr <= largest_tsval
3057	* - The lowerbound on the TS echo is a little more
3058	* tricky to determine. The other endpoint's echoed
3059	* values will not decrease. But there may be
3060	* network conditions that re-order packets and
3061	* cause our view of them to decrease. For now the
3062	* only lowerbound we can safely determine is that
3063	* the TS echo will never be less than the original
3064	* TS. XXX There is probably a better lowerbound.
3065	* Remove TS_MAX_CONN with better lowerbound check.
3066	* tescr >= other original TS
3067	*
3068	* It is also important to note that the fastest
3069	* timestamp clock of 1ms will wrap its 32bit space in
3070	* 24 days. So we just disable TS checking after 24
3071	* days of idle time. We actually must use a 12d
3072	* connection limit until we can come up with a better
3073	* lowerbound to the TS echo check.
3074	*/
3075	struct timeval delta_ts;
3076	int ts_fudge;
3077
3078
3079	/*
3080	* PFTM_TS_DIFF is how many seconds of leeway to allow
3081	* a host's timestamp. This can happen if the previous
3082	* packet got delayed in transit for much longer than
3083	* this packet.
3084	*/
3085	if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == `0`) {
3086	ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
3087	}
3088
3089
3090	/ Calculate max ticks since the last timestamp /
3091	#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
3092	#define TS_MICROSECS 1000000 /* microseconds per second */
3093	timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
3094	tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
3095	tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS / TS_MAXFREQ);
3096
3097
3098	if ((src->state >= TCPS_ESTABLISHED &&
3099	dst->state >= TCPS_ESTABLISHED) &&
3100	(SEQ_LT(tsval, dst->scrub->pfss_tsecr) \|\|
3101	SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) \|\|
3102	(tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) \|\|
3103	SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
3104	/*
3105	* Bad RFC1323 implementation or an insertion attack.
3106	*
3107	* - Solaris 2.6 and 2.7 are known to send another ACK
3108	* after the FIN,FIN\|ACK,ACK closing that carries
3109	* an old timestamp.
3110	*/
3111
3112	DPFPRINTF(("Timestamp failed %c%c%c%c\n",
3113	SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? `'0'` : `' '`,
3114	SEQ_GT(tsval, src->scrub->pfss_tsval +
3115	tsval_from_last) ? `'1'` : `' '`,
3116	SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? `'2'` : `' '`,
3117	SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? `'3'` : `' '`));
3118	DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
3119	"idle: %lus %ums\n",
3120	tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
3121	delta_ts.tv_usec / `1000`));
3122	DPFPRINTF((" src->tsval: %u tsecr: %u\n",
3123	src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
3124	DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
3125	dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
3126	dst->scrub->pfss_tsval0));
3127	if (pf_status.debug >= PF_DEBUG_MISC) {
3128	pf_print_state(state);
3129	pf_print_flags(th->th_flags);
3130	printf("\n");
3131	}
3132	REASON_SET(reason, PFRES_TS);
3133	return PF_DROP;
3134	}
3135
3136	/ XXX I'd really like to require tsecr but it's optional /
3137	} else if (!got_ts && (th->th_flags & TH_RST) == `0` &&
3138	((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
3139	\|\| pd->p_len > `0` \|\| (th->th_flags & TH_SYN)) &&
3140	src->scrub && dst->scrub &&
3141	(src->scrub->pfss_flags & PFSS_PAWS) &&
3142	(dst->scrub->pfss_flags & PFSS_PAWS)) {
3143	/*
3144	* Didn't send a timestamp. Timestamps aren't really useful
3145	* when:
3146	* - connection opening or closing (often not even sent).
3147	* but we must not let an attacker to put a FIN on a
3148	* data packet to sneak it through our ESTABLISHED check.
3149	* - on a TCP reset. RFC suggests not even looking at TS.
3150	* - on an empty ACK. The TS will not be echoed so it will
3151	* probably not help keep the RTT calculation in sync and
3152	* there isn't as much danger when the sequence numbers
3153	* got wrapped. So some stacks don't include TS on empty
3154	* ACKs :-(
3155	*
3156	* To minimize the disruption to mostly RFC1323 conformant
3157	* stacks, we will only require timestamps on data packets.
3158	*
3159	* And what do ya know, we cannot require timestamps on data
3160	* packets. There appear to be devices that do legitimate
3161	* TCP connection hijacking. There are HTTP devices that allow
3162	* a 3whs (with timestamps) and then buffer the HTTP request.
3163	* If the intermediate device has the HTTP response cache, it
3164	* will spoof the response but not bother timestamping its
3165	* packets. So we can look for the presence of a timestamp in
3166	* the first data packet and if there, require it in all future
3167	* packets.
3168	*/
3169
3170	if (pd->p_len > `0` && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
3171	/*
3172	* Hey! Someone tried to sneak a packet in. Or the
3173	* stack changed its RFC1323 behavior?!?!
3174	*/
3175	if (pf_status.debug >= PF_DEBUG_MISC) {
3176	DPFPRINTF(("Did not receive expected RFC1323 "
3177	"timestamp\n"));
3178	pf_print_state(state);
3179	pf_print_flags(th->th_flags);
3180	printf("\n");
3181	}
3182	REASON_SET(reason, PFRES_TS);
3183	return PF_DROP;
3184	}
3185	}
3186
3187
3188	/*
3189	* We will note if a host sends his data packets with or without
3190	* timestamps. And require all data packets to contain a timestamp
3191	* if the first does. PAWS implicitly requires that all data packets be
3192	* timestamped. But I think there are middle-man devices that hijack
3193	* TCP streams immediately after the 3whs and don't timestamp their
3194	* packets (seen in a WWW accelerator or cache).
3195	*/
3196	if (pd->p_len > `0` && src->scrub && (src->scrub->pfss_flags &
3197	(PFSS_TIMESTAMP \| PFSS_DATA_TS \| PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
3198	if (got_ts) {
3199	src->scrub->pfss_flags \|= PFSS_DATA_TS;
3200	} else {
3201	src->scrub->pfss_flags \|= PFSS_DATA_NOTS;
3202	if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
3203	(dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
3204	/ Don't warn if other host rejected RFC1323 /
3205	DPFPRINTF(("Broken RFC1323 stack did not "
3206	"timestamp data packet. Disabled PAWS "
3207	"security.\n"));
3208	pf_print_state(state);
3209	pf_print_flags(th->th_flags);
3210	printf("\n");
3211	}
3212	}
3213	}
3214
3215
3216	/*
3217	* Update PAWS values
3218	*/
3219	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
3220	(PFSS_PAWS_IDLED \| PFSS_TIMESTAMP))) {
3221	getmicrouptime(&src->scrub->pfss_last);
3222	if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) \|\|
3223	(src->scrub->pfss_flags & PFSS_PAWS) == `0`) {
3224	src->scrub->pfss_tsval = tsval;
3225	}
3226
3227	if (tsecr) {
3228	if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) \|\|
3229	(src->scrub->pfss_flags & PFSS_PAWS) == `0`) {
3230	src->scrub->pfss_tsecr = tsecr;
3231	}
3232
3233	if ((src->scrub->pfss_flags & PFSS_PAWS) == `0` &&
3234	(SEQ_LT(tsval, src->scrub->pfss_tsval0) \|\|
3235	src->scrub->pfss_tsval0 == `0`)) {
3236	/ tsval0 MUST be the lowest timestamp /
3237	src->scrub->pfss_tsval0 = tsval;
3238	}
3239
3240	/ Only fully initialized after a TS gets echoed /
3241	if ((src->scrub->pfss_flags & PFSS_PAWS) == `0`) {
3242	src->scrub->pfss_flags \|= PFSS_PAWS;
3243	}
3244	}
3245	}
3246
3247	/ I have a dream.... TCP segment reassembly.... /
3248	return `0`;
3249	}
3250
3251	static __attribute__((noinline)) int
3252	pf_normalize_tcpopt(struct pf_rule r, int* dir, struct pfi_kif *kif,
3253	struct pf_pdesc pd, pbuf_t pbuf, struct tcphdr th, int* off,
3254	int *rewrptr)
3255	{
3256	#pragma unused(dir, kif)
3257	sa_family_t af = pd->af;
3258	u_int16_t *mss;
3259	int thoff;
3260	int opt, cnt, optlen = `0`;
3261	int rewrite = `0`;
3262	u_char opts[MAX_TCPOPTLEN];
3263	u_char *optp = opts;
3264
3265	thoff = th->th_off << `2`;
3266	cnt = thoff - sizeof(struct tcphdr);
3267
3268	if (cnt > `0` && !pf_pull_hdr(pbuf, off + sizeof(*th), opts, cnt,
3269	NULL, NULL, af)) {
3270	return PF_DROP;
3271	}
3272
3273	for (; cnt > `0`; cnt -= optlen, optp += optlen) {
3274	opt = optp[`0`];
3275	if (opt == TCPOPT_EOL) {
3276	break;
3277	}
3278	if (opt == TCPOPT_NOP) {
3279	optlen = `1`;
3280	} else {
3281	if (cnt < `2`) {
3282	break;
3283	}
3284	optlen = optp[`1`];
3285	if (optlen < `2` \|\| optlen > cnt) {
3286	break;
3287	}
3288	}
3289	switch (opt) {
3290	case TCPOPT_MAXSEG:
3291	mss = (u_int16_t )(void* *)(optp + `2`);
3292	if ((ntohs(*mss)) > r->max_mss) {
3293	/*
3294	* <jhw@apple.com>
3295	* Only do the TCP checksum fixup if delayed
3296	* checksum calculation will not be performed.
3297	*/
3298	if (pbuf->pb_ifp \|\|
3299	!(*pbuf->pb_csum_flags & CSUM_TCP)) {
3300	th->th_sum = pf_cksum_fixup(th->th_sum,
3301	*mss, htons(r->max_mss), `0`);
3302	}
3303	*mss = htons(r->max_mss);
3304	rewrite = `1`;
3305	}
3306	break;
3307	default:
3308	break;
3309	}
3310	}
3311
3312	if (rewrite) {
3313	u_short reason;
3314
3315	VERIFY(pbuf == pd->mp);
3316
3317	if (pf_lazy_makewritable(pd, pd->mp,
3318	off + sizeof(*th) + thoff) == NULL) {
3319	REASON_SET(&reason, PFRES_MEMORY);
3320	if (r->log) {
3321	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
3322	r, `0`, `0`, pd);
3323	}
3324	return PF_DROP;
3325	}
3326
3327	*rewrptr = `1`;
3328	pbuf_copy_back(pd->mp, off + sizeof(th), thoff - sizeof(th), opts);
3329	}
3330
3331	return PF_PASS;
3332	}
3333

Browse the source code of xnu/bsd/net/pf_norm.c