pf_norm.c source code [xnu/bsd/net/pf_norm.c]

1	/*
2	* Copyright (c) 2007-2016 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	/ $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ /
30	/ $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ /
31
32	/*
33	* Copyright 2001 Niels Provos <provos@citi.umich.edu>
34	* All rights reserved.
35	*
36	* Redistribution and use in source and binary forms, with or without
37	* modification, are permitted provided that the following conditions
38	* are met:
39	* 1. Redistributions of source code must retain the above copyright
40	* notice, this list of conditions and the following disclaimer.
41	* 2. Redistributions in binary form must reproduce the above copyright
42	* notice, this list of conditions and the following disclaimer in the
43	* documentation and/or other materials provided with the distribution.
44	*
45	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55	*/
56
57	#include <sys/param.h>
58	#include <sys/systm.h>
59	#include <sys/mbuf.h>
60	#include <sys/filio.h>
61	#include <sys/fcntl.h>
62	#include <sys/socket.h>
63	#include <sys/kernel.h>
64	#include <sys/time.h>
65	#include <sys/random.h>
66	#include <sys/mcache.h>
67
68	#include <net/if.h>
69	#include <net/if_types.h>
70	#include <net/bpf.h>
71	#include <net/route.h>
72	#include <net/if_pflog.h>
73
74	#include <netinet/in.h>
75	#include <netinet/in_var.h>
76	#include <netinet/in_systm.h>
77	#include <netinet/ip.h>
78	#include <netinet/ip_var.h>
79	#include <netinet/tcp.h>
80	#include <netinet/tcp_seq.h>
81	#include <netinet/tcp_fsm.h>
82	#include <netinet/udp.h>
83	#include <netinet/ip_icmp.h>
84
85	#if INET6
86	#include <netinet/ip6.h>
87	#endif /* INET6 */
88
89	#include <net/pfvar.h>
90
91	struct pf_frent {
92	LIST_ENTRY(pf_frent) fr_next;
93	struct mbuf *fr_m;
94	#define fr_ip fr_u.fru_ipv4
95	#define fr_ip6 fr_u.fru_ipv6
96	union {
97	struct ip *fru_ipv4;
98	struct ip6_hdr *fru_ipv6;
99	} fr_u;
100	struct ip6_frag fr_ip6f_opt;
101	int fr_ip6f_hlen;
102	};
103
104	struct pf_frcache {
105	LIST_ENTRY(pf_frcache) fr_next;
106	uint16_t fr_off;
107	uint16_t fr_end;
108	};
109
110	#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
111	#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
112	#define PFFRAG_DROP 0x0004 /* Drop all fragments */
113	#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
114
115	struct pf_fragment {
116	RB_ENTRY(pf_fragment) fr_entry;
117	TAILQ_ENTRY(pf_fragment) frag_next;
118	struct pf_addr fr_srcx;
119	struct pf_addr fr_dstx;
120	u_int8_t fr_p; / protocol of this fragment /
121	u_int8_t fr_flags; / status flags /
122	u_int16_t fr_max; / fragment data max /
123	#define fr_id fr_uid.fru_id4
124	#define fr_id6 fr_uid.fru_id6
125	union {
126	u_int16_t fru_id4;
127	u_int32_t fru_id6;
128	} fr_uid;
129	int fr_af;
130	u_int32_t fr_timeout;
131	#define fr_queue fr_u.fru_queue
132	#define fr_cache fr_u.fru_cache
133	union {
134	LIST_HEAD(pf_fragq, pf_frent) fru_queue; / buffering /
135	LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; / non-buf /
136	} fr_u;
137	uint32_t fr_csum_flags; / checksum flags /
138	uint32_t fr_csum; / partial checksum value /
139	};
140
141	static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
142	static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
143
144	static __inline int pf_frag_compare(struct pf_fragment *,
145	struct pf_fragment *);
146	static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
147	RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
148	pf_frag_compare);
149	RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
150
151	/ Private prototypes /
152	static void pf_ip6hdr2key(struct pf_fragment , struct* ip6_hdr *,
153	struct ip6_frag *);
154	static void pf_ip2key(struct pf_fragment , struct* ip *);
155	static void pf_remove_fragment(struct pf_fragment *);
156	static void pf_flush_fragments(void);
157	static void pf_free_fragment(struct pf_fragment *);
158	static struct pf_fragment pf_find_fragment_by_key(struct* pf_fragment *,
159	struct pf_frag_tree *);
160	static __inline struct pf_fragment *
161	pf_find_fragment_by_ipv4_header(struct ip , struct* pf_frag_tree *);
162	static __inline struct pf_fragment *
163	pf_find_fragment_by_ipv6_header(struct ip6_hdr , struct* ip6_frag *,
164	struct pf_frag_tree *);
165	static struct mbuf pf_reassemble(struct* mbuf , struct* pf_fragment **,
166	struct pf_frent , int*);
167	static struct mbuf pf_fragcache(struct* mbuf , struct** ip *,
168	struct pf_fragment *, int, int, int* *);
169	static struct mbuf pf_reassemble6(struct* mbuf , struct pf_fragment ,
170	struct pf_frent , int*);
171	static struct mbuf pf_frag6cache(struct* mbuf , struct** ip6_hdr*,
172	struct ip6_frag , struct* pf_fragment *, int, int, int, int* *);
173	static int pf_normalize_tcpopt(struct pf_rule , int, struct* pfi_kif *,
174	struct pf_pdesc , pbuf_t , struct tcphdr , int, int* *);
175
176	#define DPFPRINTF(x) do { \
177	if (pf_status.debug >= PF_DEBUG_MISC) { \
178	printf("%s: ", __func__); \
179	printf x ; \
180	} \
181	} while (0)
182
183	/ Globals /
184	struct pool pf_frent_pl, pf_frag_pl;
185	static struct pool pf_cache_pl, pf_cent_pl;
186	struct pool pf_state_scrub_pl;
187
188	static int pf_nfrents, pf_ncache;
189
190	void
191	pf_normalize_init(void)
192	{
193	pool_init(&pf_frent_pl, sizeof (struct pf_frent), `0`, `0`, `0`, "pffrent",
194	NULL);
195	pool_init(&pf_frag_pl, sizeof (struct pf_fragment), `0`, `0`, `0`, "pffrag",
196	NULL);
197	pool_init(&pf_cache_pl, sizeof (struct pf_fragment), `0`, `0`, `0`,
198	"pffrcache", NULL);
199	pool_init(&pf_cent_pl, sizeof (struct pf_frcache), `0`, `0`, `0`, "pffrcent",
200	NULL);
201	pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), `0`, `0`, `0`,
202	"pfstscr", NULL);
203
204	pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
205	pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, `0`);
206	pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, `0`);
207	pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, `0`);
208
209	TAILQ_INIT(&pf_fragqueue);
210	TAILQ_INIT(&pf_cachequeue);
211	}
212
213	#if 0
214	void
215	pf_normalize_destroy(void)
216	{
217	pool_destroy(&pf_state_scrub_pl);
218	pool_destroy(&pf_cent_pl);
219	pool_destroy(&pf_cache_pl);
220	pool_destroy(&pf_frag_pl);
221	pool_destroy(&pf_frent_pl);
222	}
223	#endif
224
225	int
226	pf_normalize_isempty(void)
227	{
228	return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue));
229	}
230
231	static __inline int
232	pf_frag_compare(struct pf_fragment a, struct* pf_fragment *b)
233	{
234	int diff;
235
236	if ((diff = a->fr_af - b->fr_af))
237	return (diff);
238	else if ((diff = a->fr_p - b->fr_p))
239	return (diff);
240	else {
241	struct pf_addr *sa = &a->fr_srcx;
242	struct pf_addr *sb = &b->fr_srcx;
243	struct pf_addr *da = &a->fr_dstx;
244	struct pf_addr *db = &b->fr_dstx;
245
246	switch (a->fr_af) {
247	#ifdef INET
248	case AF_INET:
249	if ((diff = a->fr_id - b->fr_id))
250	return (diff);
251	else if (sa->v4addr.s_addr < sb->v4addr.s_addr)
252	return (-`1`);
253	else if (sa->v4addr.s_addr > sb->v4addr.s_addr)
254	return (`1`);
255	else if (da->v4addr.s_addr < db->v4addr.s_addr)
256	return (-`1`);
257	else if (da->v4addr.s_addr > db->v4addr.s_addr)
258	return (`1`);
259	break;
260	#endif
261	#ifdef INET6
262	case AF_INET6:
263	if ((diff = a->fr_id6 - b->fr_id6))
264	return (diff);
265	else if (sa->addr32[`3`] < sb->addr32[`3`])
266	return (-`1`);
267	else if (sa->addr32[`3`] > sb->addr32[`3`])
268	return (`1`);
269	else if (sa->addr32[`2`] < sb->addr32[`2`])
270	return (-`1`);
271	else if (sa->addr32[`2`] > sb->addr32[`2`])
272	return (`1`);
273	else if (sa->addr32[`1`] < sb->addr32[`1`])
274	return (-`1`);
275	else if (sa->addr32[`1`] > sb->addr32[`1`])
276	return (`1`);
277	else if (sa->addr32[`0`] < sb->addr32[`0`])
278	return (-`1`);
279	else if (sa->addr32[`0`] > sb->addr32[`0`])
280	return (`1`);
281	else if (da->addr32[`3`] < db->addr32[`3`])
282	return (-`1`);
283	else if (da->addr32[`3`] > db->addr32[`3`])
284	return (`1`);
285	else if (da->addr32[`2`] < db->addr32[`2`])
286	return (-`1`);
287	else if (da->addr32[`2`] > db->addr32[`2`])
288	return (`1`);
289	else if (da->addr32[`1`] < db->addr32[`1`])
290	return (-`1`);
291	else if (da->addr32[`1`] > db->addr32[`1`])
292	return (`1`);
293	else if (da->addr32[`0`] < db->addr32[`0`])
294	return (-`1`);
295	else if (da->addr32[`0`] > db->addr32[`0`])
296	return (`1`);
297	break;
298	#endif
299	default:
300	VERIFY(!`0` && "only IPv4 and IPv6 supported!");
301	break;
302	}
303	}
304	return (`0`);
305	}
306
307	void
308	pf_purge_expired_fragments(void)
309	{
310	struct pf_fragment *frag;
311	u_int32_t expire = pf_time_second() -
312	pf_default_rule.timeout[PFTM_FRAG];
313
314	while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
315	VERIFY(BUFFER_FRAGMENTS(frag));
316	if (frag->fr_timeout > expire)
317	break;
318
319	switch (frag->fr_af) {
320	case AF_INET:
321	DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
322	ntohs(frag->fr_id),
323	(uint64_t)VM_KERNEL_ADDRPERM(frag)));
324	break;
325	case AF_INET6:
326	DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
327	ntohl(frag->fr_id6),
328	(uint64_t)VM_KERNEL_ADDRPERM(frag)));
329	break;
330	default:
331	VERIFY(`0` && "only IPv4 and IPv6 supported");
332	break;
333	}
334	pf_free_fragment(frag);
335	}
336
337	while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
338	VERIFY(!BUFFER_FRAGMENTS(frag));
339	if (frag->fr_timeout > expire)
340	break;
341
342	switch (frag->fr_af) {
343	case AF_INET:
344	DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
345	ntohs(frag->fr_id),
346	(uint64_t)VM_KERNEL_ADDRPERM(frag)));
347	break;
348	case AF_INET6:
349	DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
350	ntohl(frag->fr_id6),
351	(uint64_t)VM_KERNEL_ADDRPERM(frag)));
352	break;
353	default:
354	VERIFY(`0` && "only IPv4 and IPv6 supported");
355	break;
356	}
357	pf_free_fragment(frag);
358	VERIFY(TAILQ_EMPTY(&pf_cachequeue) \|\|
359	TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
360	}
361	}
362
363	/*
364	* Try to flush old fragments to make space for new ones
365	*/
366
367	static void
368	pf_flush_fragments(void)
369	{
370	struct pf_fragment *frag;
371	int goal;
372
373	goal = pf_nfrents * `9` / `10`;
374	DPFPRINTF(("trying to free > %d frents\n",
375	pf_nfrents - goal));
376	while (goal < pf_nfrents) {
377	frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
378	if (frag == NULL)
379	break;
380	pf_free_fragment(frag);
381	}
382
383
384	goal = pf_ncache * `9` / `10`;
385	DPFPRINTF(("trying to free > %d cache entries\n",
386	pf_ncache - goal));
387	while (goal < pf_ncache) {
388	frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
389	if (frag == NULL)
390	break;
391	pf_free_fragment(frag);
392	}
393	}
394
395	/ Frees the fragments and all associated entries /
396
397	static void
398	pf_free_fragment(struct pf_fragment *frag)
399	{
400	struct pf_frent *frent;
401	struct pf_frcache *frcache;
402
403	/ Free all fragments /
404	if (BUFFER_FRAGMENTS(frag)) {
405	for (frent = LIST_FIRST(&frag->fr_queue); frent;
406	frent = LIST_FIRST(&frag->fr_queue)) {
407	LIST_REMOVE(frent, fr_next);
408
409	m_freem(frent->fr_m);
410	pool_put(&pf_frent_pl, frent);
411	pf_nfrents--;
412	}
413	} else {
414	for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
415	frcache = LIST_FIRST(&frag->fr_cache)) {
416	LIST_REMOVE(frcache, fr_next);
417
418	VERIFY(LIST_EMPTY(&frag->fr_cache) \|\|
419	LIST_FIRST(&frag->fr_cache)->fr_off >
420	frcache->fr_end);
421
422	pool_put(&pf_cent_pl, frcache);
423	pf_ncache--;
424	}
425	}
426
427	pf_remove_fragment(frag);
428	}
429
430	static void
431	pf_ip6hdr2key(struct pf_fragment key, struct* ip6_hdr *ip6,
432	struct ip6_frag *fh)
433	{
434	key->fr_p = fh->ip6f_nxt;
435	key->fr_id6 = fh->ip6f_ident;
436	key->fr_af = AF_INET6;
437	key->fr_srcx.v6addr = ip6->ip6_src;
438	key->fr_dstx.v6addr = ip6->ip6_dst;
439	}
440
441	static void
442	pf_ip2key(struct pf_fragment key, struct* ip *ip)
443	{
444	key->fr_p = ip->ip_p;
445	key->fr_id = ip->ip_id;
446	key->fr_af = AF_INET;
447	key->fr_srcx.v4addr.s_addr = ip->ip_src.s_addr;
448	key->fr_dstx.v4addr.s_addr = ip->ip_dst.s_addr;
449	}
450
451	static struct pf_fragment *
452	pf_find_fragment_by_key(struct pf_fragment key, struct* pf_frag_tree *tree)
453	{
454	struct pf_fragment *frag;
455
456	frag = RB_FIND(pf_frag_tree, tree, key);
457	if (frag != NULL) {
458	/ XXX Are we sure we want to update the timeout? /
459	frag->fr_timeout = pf_time_second();
460	if (BUFFER_FRAGMENTS(frag)) {
461	TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
462	TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
463	} else {
464	TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
465	TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
466	}
467	}
468
469	return (frag);
470	}
471
472	static __inline struct pf_fragment *
473	pf_find_fragment_by_ipv4_header(struct ip ip, struct* pf_frag_tree *tree)
474	{
475	struct pf_fragment key;
476	pf_ip2key(&key, ip);
477	return pf_find_fragment_by_key(&key, tree);
478	}
479
480	static __inline struct pf_fragment *
481	pf_find_fragment_by_ipv6_header(struct ip6_hdr ip6, struct* ip6_frag *fh,
482	struct pf_frag_tree *tree)
483	{
484	struct pf_fragment key;
485	pf_ip6hdr2key(&key, ip6, fh);
486	return pf_find_fragment_by_key(&key, tree);
487	}
488
489	/ Removes a fragment from the fragment queue and frees the fragment /
490
491	static void
492	pf_remove_fragment(struct pf_fragment *frag)
493	{
494	if (BUFFER_FRAGMENTS(frag)) {
495	RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
496	TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
497	pool_put(&pf_frag_pl, frag);
498	} else {
499	RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
500	TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
501	pool_put(&pf_cache_pl, frag);
502	}
503	}
504
505	#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
506	static struct mbuf *
507	pf_reassemble(struct mbuf m0, struct* pf_fragment **frag,
508	struct pf_frent frent, int* mff)
509	{
510	struct mbuf m = m0, m2;
511	struct pf_frent frea, next;
512	struct pf_frent *frep = NULL;
513	struct ip *ip = frent->fr_ip;
514	uint32_t hlen = ip->ip_hl << `2`;
515	u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << `3`;
516	u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * `4`;
517	u_int16_t fr_max = ip_len + off;
518	uint32_t csum, csum_flags;
519
520	VERIFY(frag == NULL \|\| BUFFER_FRAGMENTS(frag));
521
522	/*
523	* Leverage partial checksum offload for IP fragments. Narrow down
524	* the scope to cover only UDP without IP options, as that is the
525	* most common case.
526	*
527	* Perform 1's complement adjustment of octets that got included/
528	* excluded in the hardware-calculated checksum value. Ignore cases
529	* where the value includes the entire IPv4 header span, as the sum
530	* for those octets would already be 0 by the time we get here; IP
531	* has already performed its header checksum validation. Also take
532	* care of any trailing bytes and subtract out their partial sum.
533	*/
534	if (ip->ip_p == IPPROTO_UDP && hlen == sizeof (struct ip) &&
535	(m->m_pkthdr.csum_flags &
536	(CSUM_DATA_VALID \| CSUM_PARTIAL \| CSUM_PSEUDO_HDR)) ==
537	(CSUM_DATA_VALID \| CSUM_PARTIAL)) {
538	uint32_t start = m->m_pkthdr.csum_rx_start;
539	int32_t trailer = (m_pktlen(m) - ntohs(ip->ip_len));
540	uint32_t swbytes = (uint32_t)trailer;
541
542	csum = m->m_pkthdr.csum_rx_val;
543
544	ASSERT(trailer >= `0`);
545	if ((start != `0` && start != hlen) \|\| trailer != `0`) {
546	#if BYTE_ORDER != BIG_ENDIAN
547	if (start < hlen) {
548	HTONS(ip->ip_len);
549	HTONS(ip->ip_off);
550	}
551	#endif /* BYTE_ORDER != BIG_ENDIAN */
552	/ callee folds in sum /
553	csum = m_adj_sum16(m, start, hlen,
554	(ip->ip_len - hlen), csum);
555	if (hlen > start)
556	swbytes += (hlen - start);
557	else
558	swbytes += (start - hlen);
559	#if BYTE_ORDER != BIG_ENDIAN
560	if (start < hlen) {
561	NTOHS(ip->ip_off);
562	NTOHS(ip->ip_len);
563	}
564	#endif /* BYTE_ORDER != BIG_ENDIAN */
565	}
566	csum_flags = m->m_pkthdr.csum_flags;
567
568	if (swbytes != `0`)
569	udp_in_cksum_stats(swbytes);
570	if (trailer != `0`)
571	m_adj(m, -trailer);
572	} else {
573	csum = `0`;
574	csum_flags = `0`;
575	}
576
577	/ Invalidate checksum /
578	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
579
580	/ Strip off ip header /
581	m->m_data += hlen;
582	m->m_len -= hlen;
583
584	/ Create a new reassembly queue for this packet /
585	if (*frag == NULL) {
586	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
587	if (*frag == NULL) {
588	pf_flush_fragments();
589	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
590	if (*frag == NULL)
591	goto drop_fragment;
592	}
593
594	(*frag)->fr_flags = `0`;
595	(*frag)->fr_max = `0`;
596	(*frag)->fr_af = AF_INET;
597	(*frag)->fr_srcx.v4addr = frent->fr_ip->ip_src;
598	(*frag)->fr_dstx.v4addr = frent->fr_ip->ip_dst;
599	(*frag)->fr_p = frent->fr_ip->ip_p;
600	(*frag)->fr_id = frent->fr_ip->ip_id;
601	(*frag)->fr_timeout = pf_time_second();
602	if (csum_flags != `0`) {
603	(*frag)->fr_csum_flags = csum_flags;
604	(*frag)->fr_csum = csum;
605	}
606	LIST_INIT(&(*frag)->fr_queue);
607
608	RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
609	TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
610
611	/ We do not have a previous fragment /
612	frep = NULL;
613	goto insert;
614	}
615
616	/*
617	* If this fragment contains similar checksum offload info
618	* as that of the existing ones, accumulate checksum. Otherwise,
619	* invalidate checksum offload info for the entire datagram.
620	*/
621	if (csum_flags != `0` && csum_flags == (*frag)->fr_csum_flags)
622	(*frag)->fr_csum += csum;
623	else if ((*frag)->fr_csum_flags != `0`)
624	(*frag)->fr_csum_flags = `0`;
625
626	/*
627	* Find a fragment after the current one:
628	* - off contains the real shifted offset.
629	*/
630	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
631	if (FR_IP_OFF(frea) > off)
632	break;
633	frep = frea;
634	}
635
636	VERIFY(frep != NULL \|\| frea != NULL);
637
638	if (frep != NULL &&
639	FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
640	`4` > off) {
641	u_int16_t precut;
642
643	precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
644	frep->fr_ip->ip_hl * `4` - off;
645	if (precut >= ip_len)
646	goto drop_fragment;
647	m_adj(frent->fr_m, precut);
648	DPFPRINTF(("overlap -%d\n", precut));
649	/ Enforce 8 byte boundaries /
650	ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> `3`));
651	off = (ntohs(ip->ip_off) & IP_OFFMASK) << `3`;
652	ip_len -= precut;
653	ip->ip_len = htons(ip_len);
654	}
655
656	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
657	frea = next) {
658	u_int16_t aftercut;
659
660	aftercut = ip_len + off - FR_IP_OFF(frea);
661	DPFPRINTF(("adjust overlap %d\n", aftercut));
662	if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
663	* `4`) {
664	frea->fr_ip->ip_len =
665	htons(ntohs(frea->fr_ip->ip_len) - aftercut);
666	frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
667	(aftercut >> `3`));
668	m_adj(frea->fr_m, aftercut);
669	break;
670	}
671
672	/ This fragment is completely overlapped, lose it /
673	next = LIST_NEXT(frea, fr_next);
674	m_freem(frea->fr_m);
675	LIST_REMOVE(frea, fr_next);
676	pool_put(&pf_frent_pl, frea);
677	pf_nfrents--;
678	}
679
680	insert:
681	/ Update maximum data size /
682	if ((*frag)->fr_max < fr_max)
683	(*frag)->fr_max = fr_max;
684	/ This is the last segment /
685	if (!mff)
686	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
687
688	if (frep == NULL)
689	LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
690	else
691	LIST_INSERT_AFTER(frep, frent, fr_next);
692
693	/ Check if we are completely reassembled /
694	if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
695	return (NULL);
696
697	/ Check if we have all the data /
698	off = `0`;
699	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
700	next = LIST_NEXT(frep, fr_next);
701
702	off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * `4`;
703	if (off < (*frag)->fr_max &&
704	(next == NULL \|\| FR_IP_OFF(next) != off)) {
705	DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
706	off, next == NULL ? -`1` : FR_IP_OFF(next),
707	(*frag)->fr_max));
708	return (NULL);
709	}
710	}
711	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
712	if (off < (*frag)->fr_max)
713	return (NULL);
714
715	/ We have all the data /
716	frent = LIST_FIRST(&(*frag)->fr_queue);
717	VERIFY(frent != NULL);
718	if ((frent->fr_ip->ip_hl << `2`) + off > IP_MAXPACKET) {
719	DPFPRINTF(("drop: too big: %d\n", off));
720	pf_free_fragment(*frag);
721	*frag = NULL;
722	return (NULL);
723	}
724	next = LIST_NEXT(frent, fr_next);
725
726	/ Magic from ip_input /
727	ip = frent->fr_ip;
728	m = frent->fr_m;
729	m2 = m->m_next;
730	m->m_next = NULL;
731	m_cat(m, m2);
732	pool_put(&pf_frent_pl, frent);
733	pf_nfrents--;
734	for (frent = next; frent != NULL; frent = next) {
735	next = LIST_NEXT(frent, fr_next);
736
737	m2 = frent->fr_m;
738	pool_put(&pf_frent_pl, frent);
739	pf_nfrents--;
740	m_cat(m, m2);
741	}
742
743	ip->ip_src = (*frag)->fr_srcx.v4addr;
744	ip->ip_dst = (*frag)->fr_dstx.v4addr;
745
746	if ((*frag)->fr_csum_flags != `0`) {
747	csum = (*frag)->fr_csum;
748
749	ADDCARRY(csum);
750
751	m->m_pkthdr.csum_rx_val = csum;
752	m->m_pkthdr.csum_rx_start = sizeof (struct ip);
753	m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
754	} else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) \|\|
755	(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
756	/ loopback checksums are always OK /
757	m->m_pkthdr.csum_data = `0xffff`;
758	m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL;
759	m->m_pkthdr.csum_flags =
760	CSUM_DATA_VALID \| CSUM_PSEUDO_HDR \|
761	CSUM_IP_CHECKED \| CSUM_IP_VALID;
762	}
763
764	/ Remove from fragment queue /
765	pf_remove_fragment(*frag);
766	*frag = NULL;
767
768	hlen = ip->ip_hl << `2`;
769	ip->ip_len = htons(off + hlen);
770	m->m_len += hlen;
771	m->m_data -= hlen;
772
773	/ some debugging cruft by sklower, below, will go away soon /
774	/ XXX this should be done elsewhere /
775	if (m->m_flags & M_PKTHDR) {
776	int plen = `0`;
777	for (m2 = m; m2; m2 = m2->m_next)
778	plen += m2->m_len;
779	m->m_pkthdr.len = plen;
780	}
781
782	DPFPRINTF(("complete: 0x%llx(%d)\n",
783	(uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len)));
784	return (m);
785
786	drop_fragment:
787	/ Oops - fail safe - drop packet /
788	pool_put(&pf_frent_pl, frent);
789	pf_nfrents--;
790	m_freem(m);
791	return (NULL);
792	}
793
794	static struct mbuf *
795	pf_fragcache(struct mbuf m0, struct** ip h, struct* pf_fragment *frag, int* mff,
796	int drop, int *nomem)
797	{
798	struct mbuf m = m0;
799	struct pf_frcache frp, fra, *cur = NULL;
800	int ip_len = ntohs(h->ip_len) - (h->ip_hl << `2`);
801	u_int16_t off = ntohs(h->ip_off) << `3`;
802	u_int16_t fr_max = ip_len + off;
803	int hosed = `0`;
804
805	VERIFY(frag == NULL \|\| !BUFFER_FRAGMENTS(frag));
806
807	/ Create a new range queue for this packet /
808	if (*frag == NULL) {
809	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
810	if (*frag == NULL) {
811	pf_flush_fragments();
812	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
813	if (*frag == NULL)
814	goto no_mem;
815	}
816
817	/ Get an entry for the queue /
818	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
819	if (cur == NULL) {
820	pool_put(&pf_cache_pl, *frag);
821	*frag = NULL;
822	goto no_mem;
823	}
824	pf_ncache++;
825
826	(*frag)->fr_flags = PFFRAG_NOBUFFER;
827	(*frag)->fr_max = `0`;
828	(*frag)->fr_af = AF_INET;
829	(*frag)->fr_srcx.v4addr = h->ip_src;
830	(*frag)->fr_dstx.v4addr = h->ip_dst;
831	(*frag)->fr_p = h->ip_p;
832	(*frag)->fr_id = h->ip_id;
833	(*frag)->fr_timeout = pf_time_second();
834
835	cur->fr_off = off;
836	cur->fr_end = fr_max;
837	LIST_INIT(&(*frag)->fr_cache);
838	LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
839
840	RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
841	TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
842
843	DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
844	fr_max));
845
846	goto pass;
847	}
848
849	/*
850	* Find a fragment after the current one:
851	* - off contains the real shifted offset.
852	*/
853	frp = NULL;
854	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
855	if (fra->fr_off > off)
856	break;
857	frp = fra;
858	}
859
860	VERIFY(frp != NULL \|\| fra != NULL);
861
862	if (frp != NULL) {
863	int precut;
864
865	precut = frp->fr_end - off;
866	if (precut >= ip_len) {
867	/ Fragment is entirely a duplicate /
868	DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
869	h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
870	goto drop_fragment;
871	}
872	if (precut == `0`) {
873	/ They are adjacent. Fixup cache entry /
874	DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
875	h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
876	frp->fr_end = fr_max;
877	} else if (precut > `0`) {
878	/*
879	* The first part of this payload overlaps with a
880	* fragment that has already been passed.
881	* Need to trim off the first part of the payload.
882	* But to do so easily, we need to create another
883	* mbuf to throw the original header into.
884	*/
885
886	DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
887	h->ip_id, precut, frp->fr_off, frp->fr_end, off,
888	fr_max));
889
890	off += precut;
891	fr_max -= precut;
892	/ Update the previous frag to encompass this one /
893	frp->fr_end = fr_max;
894
895	if (!drop) {
896	/*
897	* XXX Optimization opportunity
898	* This is a very heavy way to trim the payload.
899	* we could do it much faster by diddling mbuf
900	* internals but that would be even less legible
901	* than this mbuf magic. For my next trick,
902	* I'll pull a rabbit out of my laptop.
903	*/
904	*m0 = m_copym(m, `0`, h->ip_hl << `2`, M_NOWAIT);
905	if (*m0 == NULL)
906	goto no_mem;
907	VERIFY((*m0)->m_next == NULL);
908	m_adj(m, precut + (h->ip_hl << `2`));
909	m_cat(*m0, m);
910	m = *m0;
911	if (m->m_flags & M_PKTHDR) {
912	int plen = `0`;
913	struct mbuf *t;
914	for (t = m; t; t = t->m_next)
915	plen += t->m_len;
916	m->m_pkthdr.len = plen;
917	}
918
919
920	h = mtod(m, struct ip *);
921
922
923	VERIFY((int)m->m_len ==
924	ntohs(h->ip_len) - precut);
925	h->ip_off = htons(ntohs(h->ip_off) +
926	(precut >> `3`));
927	h->ip_len = htons(ntohs(h->ip_len) - precut);
928	} else {
929	hosed++;
930	}
931	} else {
932	/ There is a gap between fragments /
933
934	DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
935	h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
936	fr_max));
937
938	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
939	if (cur == NULL)
940	goto no_mem;
941	pf_ncache++;
942
943	cur->fr_off = off;
944	cur->fr_end = fr_max;
945	LIST_INSERT_AFTER(frp, cur, fr_next);
946	}
947	}
948
949	if (fra != NULL) {
950	int aftercut;
951	int merge = `0`;
952
953	aftercut = fr_max - fra->fr_off;
954	if (aftercut == `0`) {
955	/ Adjacent fragments /
956	DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
957	h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
958	fra->fr_off = off;
959	merge = `1`;
960	} else if (aftercut > `0`) {
961	/ Need to chop off the tail of this fragment /
962	DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
963	h->ip_id, aftercut, off, fr_max, fra->fr_off,
964	fra->fr_end));
965	fra->fr_off = off;
966	fr_max -= aftercut;
967
968	merge = `1`;
969
970	if (!drop) {
971	m_adj(m, -aftercut);
972	if (m->m_flags & M_PKTHDR) {
973	int plen = `0`;
974	struct mbuf *t;
975	for (t = m; t; t = t->m_next)
976	plen += t->m_len;
977	m->m_pkthdr.len = plen;
978	}
979	h = mtod(m, struct ip *);
980	VERIFY((int)m->m_len ==
981	ntohs(h->ip_len) - aftercut);
982	h->ip_len = htons(ntohs(h->ip_len) - aftercut);
983	} else {
984	hosed++;
985	}
986	} else if (frp == NULL) {
987	/ There is a gap between fragments /
988	DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
989	h->ip_id, -aftercut, off, fr_max, fra->fr_off,
990	fra->fr_end));
991
992	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
993	if (cur == NULL)
994	goto no_mem;
995	pf_ncache++;
996
997	cur->fr_off = off;
998	cur->fr_end = fr_max;
999	LIST_INSERT_BEFORE(fra, cur, fr_next);
1000	}
1001
1002
1003	/ Need to glue together two separate fragment descriptors /
1004	if (merge) {
1005	if (cur && fra->fr_off <= cur->fr_end) {
1006	/ Need to merge in a previous 'cur' /
1007	DPFPRINTF(("fragcache[%d]: adjacent(merge "
1008	"%d-%d) %d-%d (%d-%d)\n",
1009	h->ip_id, cur->fr_off, cur->fr_end, off,
1010	fr_max, fra->fr_off, fra->fr_end));
1011	fra->fr_off = cur->fr_off;
1012	LIST_REMOVE(cur, fr_next);
1013	pool_put(&pf_cent_pl, cur);
1014	pf_ncache--;
1015	cur = NULL;
1016
1017	} else if (frp && fra->fr_off <= frp->fr_end) {
1018	/ Need to merge in a modified 'frp' /
1019	VERIFY(cur == NULL);
1020	DPFPRINTF(("fragcache[%d]: adjacent(merge "
1021	"%d-%d) %d-%d (%d-%d)\n",
1022	h->ip_id, frp->fr_off, frp->fr_end, off,
1023	fr_max, fra->fr_off, fra->fr_end));
1024	fra->fr_off = frp->fr_off;
1025	LIST_REMOVE(frp, fr_next);
1026	pool_put(&pf_cent_pl, frp);
1027	pf_ncache--;
1028	frp = NULL;
1029
1030	}
1031	}
1032	}
1033
1034	if (hosed) {
1035	/*
1036	* We must keep tracking the overall fragment even when
1037	* we're going to drop it anyway so that we know when to
1038	* free the overall descriptor. Thus we drop the frag late.
1039	*/
1040	goto drop_fragment;
1041	}
1042
1043
1044	pass:
1045	/ Update maximum data size /
1046	if ((*frag)->fr_max < fr_max)
1047	(*frag)->fr_max = fr_max;
1048
1049	/ This is the last segment /
1050	if (!mff)
1051	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1052
1053	/ Check if we are completely reassembled /
1054	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1055	LIST_FIRST(&(*frag)->fr_cache)->fr_off == `0` &&
1056	LIST_FIRST(&(frag)->fr_cache)->fr_end == (frag)->fr_max) {
1057	/ Remove from fragment queue /
1058	DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
1059	(*frag)->fr_max));
1060	pf_free_fragment(*frag);
1061	*frag = NULL;
1062	}
1063
1064	return (m);
1065
1066	no_mem:
1067	*nomem = `1`;
1068
1069	/ Still need to pay attention to !IP_MF /
1070	if (!mff && *frag != NULL)
1071	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1072
1073	m_freem(m);
1074	return (NULL);
1075
1076	drop_fragment:
1077
1078	/ Still need to pay attention to !IP_MF /
1079	if (!mff && *frag != NULL)
1080	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1081
1082	if (drop) {
1083	/ This fragment has been deemed bad. Don't reass /
1084	if (((*frag)->fr_flags & PFFRAG_DROP) == `0`)
1085	DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1086	h->ip_id));
1087	(*frag)->fr_flags \|= PFFRAG_DROP;
1088	}
1089
1090	m_freem(m);
1091	return (NULL);
1092	}
1093
1094	#define FR_IP6_OFF(fr) \
1095	(ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1096	#define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1097	struct mbuf *
1098	pf_reassemble6(struct mbuf m0, struct pf_fragment frag,
1099	struct pf_frent frent, int* mff)
1100	{
1101	struct mbuf m, m2;
1102	struct pf_frent frea, frep, *next;
1103	struct ip6_hdr *ip6;
1104	struct ip6_frag *ip6f;
1105	int plen, off, fr_max;
1106	uint32_t uoff, csum, csum_flags;
1107
1108	VERIFY(frag == NULL \|\| BUFFER_FRAGMENTS(frag));
1109	m = *m0;
1110	frep = NULL;
1111	ip6 = frent->fr_ip6;
1112	ip6f = &frent->fr_ip6f_opt;
1113	off = FR_IP6_OFF(frent);
1114	uoff = frent->fr_ip6f_hlen;
1115	plen = FR_IP6_PLEN(frent);
1116	fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6);
1117
1118	DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1119	"fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off,
1120	frent->fr_ip6f_hlen, fr_max, m->m_len));
1121
1122	/*
1123	* Leverage partial checksum offload for simple UDP/IP fragments,
1124	* as that is the most common case.
1125	*
1126	* Perform 1's complement adjustment of octets that got included/
1127	* excluded in the hardware-calculated checksum value. Also take
1128	* care of any trailing bytes and subtract out their partial sum.
1129	*/
1130	if (ip6f->ip6f_nxt == IPPROTO_UDP &&
1131	uoff == (sizeof (ip6) + sizeof* (*ip6f)) &&
1132	(m->m_pkthdr.csum_flags &
1133	(CSUM_DATA_VALID \| CSUM_PARTIAL \| CSUM_PSEUDO_HDR)) ==
1134	(CSUM_DATA_VALID \| CSUM_PARTIAL)) {
1135	uint32_t start = m->m_pkthdr.csum_rx_start;
1136	uint32_t ip_len = (sizeof (*ip6) + ntohs(ip6->ip6_plen));
1137	int32_t trailer = (m_pktlen(m) - ip_len);
1138	uint32_t swbytes = (uint32_t)trailer;
1139
1140	csum = m->m_pkthdr.csum_rx_val;
1141
1142	ASSERT(trailer >= `0`);
1143	if (start != uoff \|\| trailer != `0`) {
1144	uint16_t s = `0`, d = `0`;
1145
1146	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
1147	s = ip6->ip6_src.s6_addr16[`1`];
1148	ip6->ip6_src.s6_addr16[`1`] = `0` ;
1149	}
1150	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
1151	d = ip6->ip6_dst.s6_addr16[`1`];
1152	ip6->ip6_dst.s6_addr16[`1`] = `0`;
1153	}
1154
1155	/ callee folds in sum /
1156	csum = m_adj_sum16(m, start, uoff,
1157	(ip_len - uoff), csum);
1158	if (uoff > start)
1159	swbytes += (uoff - start);
1160	else
1161	swbytes += (start - uoff);
1162
1163	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
1164	ip6->ip6_src.s6_addr16[`1`] = s;
1165	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
1166	ip6->ip6_dst.s6_addr16[`1`] = d;
1167
1168	}
1169	csum_flags = m->m_pkthdr.csum_flags;
1170
1171	if (swbytes != `0`)
1172	udp_in6_cksum_stats(swbytes);
1173	if (trailer != `0`)
1174	m_adj(m, -trailer);
1175	} else {
1176	csum = `0`;
1177	csum_flags = `0`;
1178	}
1179
1180	/ Invalidate checksum /
1181	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
1182
1183	/ strip off headers up to the fragment payload /
1184	m->m_data += frent->fr_ip6f_hlen;
1185	m->m_len -= frent->fr_ip6f_hlen;
1186
1187	/ Create a new reassembly queue for this packet /
1188	if (*frag == NULL) {
1189	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1190	if (*frag == NULL) {
1191	pf_flush_fragments();
1192	*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1193	if (*frag == NULL)
1194	goto drop_fragment;
1195	}
1196
1197	(*frag)->fr_flags = `0`;
1198	(*frag)->fr_max = `0`;
1199	(*frag)->fr_af = AF_INET6;
1200	(*frag)->fr_srcx.v6addr = frent->fr_ip6->ip6_src;
1201	(*frag)->fr_dstx.v6addr = frent->fr_ip6->ip6_dst;
1202	(*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
1203	(*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
1204	(*frag)->fr_timeout = pf_time_second();
1205	if (csum_flags != `0`) {
1206	(*frag)->fr_csum_flags = csum_flags;
1207	(*frag)->fr_csum = csum;
1208	}
1209	LIST_INIT(&(*frag)->fr_queue);
1210
1211	RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
1212	TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
1213
1214	/ We do not have a previous fragment /
1215	frep = NULL;
1216	goto insert;
1217	}
1218
1219	/*
1220	* If this fragment contains similar checksum offload info
1221	* as that of the existing ones, accumulate checksum. Otherwise,
1222	* invalidate checksum offload info for the entire datagram.
1223	*/
1224	if (csum_flags != `0` && csum_flags == (*frag)->fr_csum_flags)
1225	(*frag)->fr_csum += csum;
1226	else if ((*frag)->fr_csum_flags != `0`)
1227	(*frag)->fr_csum_flags = `0`;
1228
1229	/*
1230	* Find a fragment after the current one:
1231	* - off contains the real shifted offset.
1232	*/
1233	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
1234	if (FR_IP6_OFF(frea) > off)
1235	break;
1236	frep = frea;
1237	}
1238
1239	VERIFY(frep != NULL \|\| frea != NULL);
1240
1241	if (frep != NULL &&
1242	FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off)
1243	{
1244	u_int16_t precut;
1245
1246	precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
1247	frep->fr_ip6f_hlen - off;
1248	if (precut >= plen)
1249	goto drop_fragment;
1250	m_adj(frent->fr_m, precut);
1251	DPFPRINTF(("overlap -%d\n", precut));
1252	/ Enforce 8 byte boundaries /
1253	frent->fr_ip6f_opt.ip6f_offlg =
1254	htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
1255	(precut >> `3`));
1256	off = FR_IP6_OFF(frent);
1257	plen -= precut;
1258	ip6->ip6_plen = htons(plen);
1259	}
1260
1261	for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
1262	u_int16_t aftercut;
1263
1264	aftercut = plen + off - FR_IP6_OFF(frea);
1265	DPFPRINTF(("adjust overlap %d\n", aftercut));
1266	if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
1267	frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
1268	aftercut);
1269	frea->fr_ip6f_opt.ip6f_offlg =
1270	htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
1271	(aftercut >> `3`));
1272	m_adj(frea->fr_m, aftercut);
1273	break;
1274	}
1275
1276	/ This fragment is completely overlapped, lose it /
1277	next = LIST_NEXT(frea, fr_next);
1278	m_freem(frea->fr_m);
1279	LIST_REMOVE(frea, fr_next);
1280	pool_put(&pf_frent_pl, frea);
1281	pf_nfrents--;
1282	}
1283
1284	insert:
1285	/ Update maximum data size /
1286	if ((*frag)->fr_max < fr_max)
1287	(*frag)->fr_max = fr_max;
1288	/ This is the last segment /
1289	if (!mff)
1290	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1291
1292	if (frep == NULL)
1293	LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
1294	else
1295	LIST_INSERT_AFTER(frep, frent, fr_next);
1296
1297	/ Check if we are completely reassembled /
1298	if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
1299	return (NULL);
1300
1301	/ Check if we have all the data /
1302	off = `0`;
1303	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
1304	next = LIST_NEXT(frep, fr_next);
1305	off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
1306	DPFPRINTF(("frep at %d, next %d, max %d\n",
1307	off, next == NULL ? -`1` : FR_IP6_OFF(next),
1308	(*frag)->fr_max));
1309	if (off < (*frag)->fr_max &&
1310	(next == NULL \|\| FR_IP6_OFF(next) != off)) {
1311	DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1312	off, next == NULL ? -`1` : FR_IP6_OFF(next),
1313	(*frag)->fr_max));
1314	return (NULL);
1315	}
1316	}
1317	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
1318	if (off < (*frag)->fr_max)
1319	return (NULL);
1320
1321	/ We have all the data /
1322	frent = LIST_FIRST(&(*frag)->fr_queue);
1323	VERIFY(frent != NULL);
1324	if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
1325	DPFPRINTF(("drop: too big: %d\n", off));
1326	pf_free_fragment(*frag);
1327	*frag = NULL;
1328	return (NULL);
1329	}
1330
1331	ip6 = frent->fr_ip6;
1332	ip6->ip6_nxt = (*frag)->fr_p;
1333	ip6->ip6_plen = htons(off);
1334	ip6->ip6_src = (*frag)->fr_srcx.v6addr;
1335	ip6->ip6_dst = (*frag)->fr_dstx.v6addr;
1336
1337	if ((*frag)->fr_csum_flags != `0`) {
1338	csum = (*frag)->fr_csum;
1339
1340	ADDCARRY(csum);
1341
1342	m->m_pkthdr.csum_rx_val = csum;
1343	m->m_pkthdr.csum_rx_start = sizeof (struct ip6_hdr);
1344	m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
1345	} else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) \|\|
1346	(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1347	/ loopback checksums are always OK /
1348	m->m_pkthdr.csum_data = `0xffff`;
1349	m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL;
1350	m->m_pkthdr.csum_flags = CSUM_DATA_VALID \| CSUM_PSEUDO_HDR;
1351	}
1352
1353	/ Remove from fragment queue /
1354	pf_remove_fragment(*frag);
1355	*frag = NULL;
1356
1357	m = frent->fr_m;
1358	m->m_len += sizeof(struct ip6_hdr);
1359	m->m_data -= sizeof(struct ip6_hdr);
1360	memmove(m->m_data, ip6, sizeof(struct ip6_hdr));
1361
1362	next = LIST_NEXT(frent, fr_next);
1363	pool_put(&pf_frent_pl, frent);
1364	pf_nfrents--;
1365	for (frent = next; next != NULL; frent = next) {
1366	m2 = frent->fr_m;
1367
1368	m_cat(m, m2);
1369	next = LIST_NEXT(frent, fr_next);
1370	pool_put(&pf_frent_pl, frent);
1371	pf_nfrents--;
1372	}
1373
1374	/ XXX this should be done elsewhere /
1375	if (m->m_flags & M_PKTHDR) {
1376	int pktlen = `0`;
1377	for (m2 = m; m2; m2 = m2->m_next)
1378	pktlen += m2->m_len;
1379	m->m_pkthdr.len = pktlen;
1380	}
1381
1382	DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1383	(uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip6->ip6_plen),
1384	m->m_pkthdr.len));
1385
1386	return m;
1387
1388	drop_fragment:
1389	/ Oops - fail safe - drop packet /
1390	pool_put(&pf_frent_pl, frent);
1391	--pf_nfrents;
1392	m_freem(m);
1393	return NULL;
1394	}
1395
1396	static struct mbuf *
1397	pf_frag6cache(struct mbuf m0, struct** ip6_hdr h, struct* ip6_frag *fh,
1398	struct pf_fragment *frag, int* hlen, int mff, int drop, int *nomem)
1399	{
1400	struct mbuf m = m0;
1401	u_int16_t plen, off, fr_max;
1402	struct pf_frcache frp, fra, *cur = NULL;
1403	int hosed = `0`;
1404
1405	VERIFY(frag == NULL \|\| !BUFFER_FRAGMENTS(frag));
1406	m = *m0;
1407	off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
1408	plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
1409
1410	/*
1411	* Apple Modification: dimambro@apple.com. The hlen, being passed
1412	* into this function Includes all the headers associated with
1413	* the packet, and may include routing headers, so to get to
1414	* the data payload as stored in the original IPv6 header we need
1415	* to subtract al those headers and the IP header.
1416	*
1417	* The 'max' local variable should also contain the offset from the start
1418	* of the reassembled packet to the octet just past the end of the octets
1419	* in the current fragment where:
1420	* - 'off' is the offset from the start of the reassembled packet to the
1421	* first octet in the fragment,
1422	* - 'plen' is the length of the "payload data length" Excluding all the
1423	* IPv6 headers of the fragment.
1424	* - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1425	* of the IPv6 packet to the beginning of the data.
1426	*/
1427	fr_max = off + plen;
1428
1429	DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1430	(uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, fr_max));
1431
1432	/ Create a new range queue for this packet /
1433	if (*frag == NULL) {
1434	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1435	if (*frag == NULL) {
1436	pf_flush_fragments();
1437	*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1438	if (*frag == NULL)
1439	goto no_mem;
1440	}
1441
1442	/ Get an entry for the queue /
1443	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1444	if (cur == NULL) {
1445	pool_put(&pf_cache_pl, *frag);
1446	*frag = NULL;
1447	goto no_mem;
1448	}
1449	pf_ncache++;
1450
1451	(*frag)->fr_flags = PFFRAG_NOBUFFER;
1452	(*frag)->fr_max = `0`;
1453	(*frag)->fr_af = AF_INET6;
1454	(*frag)->fr_srcx.v6addr = h->ip6_src;
1455	(*frag)->fr_dstx.v6addr = h->ip6_dst;
1456	(*frag)->fr_p = fh->ip6f_nxt;
1457	(*frag)->fr_id6 = fh->ip6f_ident;
1458	(*frag)->fr_timeout = pf_time_second();
1459
1460	cur->fr_off = off;
1461	cur->fr_end = fr_max;
1462	LIST_INIT(&(*frag)->fr_cache);
1463	LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
1464
1465	RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
1466	TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
1467
1468	DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
1469	off, fr_max));
1470
1471	goto pass;
1472	}
1473
1474	/*
1475	* Find a fragment after the current one:
1476	* - off contains the real shifted offset.
1477	*/
1478	frp = NULL;
1479	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
1480	if (fra->fr_off > off)
1481	break;
1482	frp = fra;
1483	}
1484
1485	VERIFY(frp != NULL \|\| fra != NULL);
1486
1487	if (frp != NULL) {
1488	int precut;
1489
1490	precut = frp->fr_end - off;
1491	if (precut >= plen) {
1492	/ Fragment is entirely a duplicate /
1493	DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1494	ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1495	off, fr_max));
1496	goto drop_fragment;
1497	}
1498	if (precut == `0`) {
1499	/ They are adjacent. Fixup cache entry /
1500	DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1501	ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1502	off, fr_max));
1503	frp->fr_end = fr_max;
1504	} else if (precut > `0`) {
1505	/ The first part of this payload overlaps with a*
1506	* fragment that has already been passed.
1507	* Need to trim off the first part of the payload.
1508	* But to do so easily, we need to create another
1509	* mbuf to throw the original header into.
1510	*/
1511
1512	DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1513	ntohl(fh->ip6f_ident), precut, frp->fr_off,
1514	frp->fr_end, off, fr_max));
1515
1516	off += precut;
1517	fr_max -= precut;
1518	/ Update the previous frag to encompass this one /
1519	frp->fr_end = fr_max;
1520
1521	if (!drop) {
1522	/ XXX Optimization opportunity*
1523	* This is a very heavy way to trim the payload.
1524	* we could do it much faster by diddling mbuf
1525	* internals but that would be even less legible
1526	* than this mbuf magic. For my next trick,
1527	* I'll pull a rabbit out of my laptop.
1528	*/
1529	*m0 = m_copym(m, `0`, hlen, M_NOWAIT);
1530	if (*m0 == NULL)
1531	goto no_mem;
1532	VERIFY((*m0)->m_next == NULL);
1533	m_adj(m, precut + hlen);
1534	m_cat(*m0, m);
1535	m = *m0;
1536	if (m->m_flags & M_PKTHDR) {
1537	int pktlen = `0`;
1538	struct mbuf *t;
1539	for (t = m; t; t = t->m_next)
1540	pktlen += t->m_len;
1541	m->m_pkthdr.len = pktlen;
1542	}
1543
1544	h = mtod(m, struct ip6_hdr *);
1545
1546	VERIFY((int)m->m_len ==
1547	ntohs(h->ip6_plen) - precut);
1548	fh->ip6f_offlg &= ~IP6F_OFF_MASK;
1549	fh->ip6f_offlg \|=
1550	htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
1551	+ (precut >> `3`));
1552	h->ip6_plen = htons(ntohs(h->ip6_plen) -
1553	precut);
1554	} else {
1555	hosed++;
1556	}
1557	} else {
1558	/ There is a gap between fragments /
1559
1560	DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1561	ntohl(fh->ip6f_ident), -precut, frp->fr_off,
1562	frp->fr_end, off, fr_max));
1563
1564	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1565	if (cur == NULL)
1566	goto no_mem;
1567	pf_ncache++;
1568
1569	cur->fr_off = off;
1570	cur->fr_end = fr_max;
1571	LIST_INSERT_AFTER(frp, cur, fr_next);
1572	}
1573	}
1574
1575	if (fra != NULL) {
1576	int aftercut;
1577	int merge = `0`;
1578
1579	aftercut = fr_max - fra->fr_off;
1580	if (aftercut == `0`) {
1581	/ Adjacent fragments /
1582	DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1583	ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
1584	fra->fr_end));
1585	fra->fr_off = off;
1586	merge = `1`;
1587	} else if (aftercut > `0`) {
1588	/ Need to chop off the tail of this fragment /
1589	DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1590	ntohl(fh->ip6f_ident), aftercut, off, fr_max,
1591	fra->fr_off, fra->fr_end));
1592	fra->fr_off = off;
1593	fr_max -= aftercut;
1594
1595	merge = `1`;
1596
1597	if (!drop) {
1598	m_adj(m, -aftercut);
1599	if (m->m_flags & M_PKTHDR) {
1600	int pktlen = `0`;
1601	struct mbuf *t;
1602	for (t = m; t; t = t->m_next)
1603	pktlen += t->m_len;
1604	m->m_pkthdr.len = pktlen;
1605	}
1606	h = mtod(m, struct ip6_hdr *);
1607	VERIFY((int)m->m_len ==
1608	ntohs(h->ip6_plen) - aftercut);
1609	h->ip6_plen =
1610	htons(ntohs(h->ip6_plen) - aftercut);
1611	} else {
1612	hosed++;
1613	}
1614	} else if (frp == NULL) {
1615	/ There is a gap between fragments /
1616	DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1617	ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
1618	fra->fr_off, fra->fr_end));
1619
1620	cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1621	if (cur == NULL)
1622	goto no_mem;
1623	pf_ncache++;
1624
1625	cur->fr_off = off;
1626	cur->fr_end = fr_max;
1627	LIST_INSERT_BEFORE(fra, cur, fr_next);
1628	}
1629
1630	/ Need to glue together two separate fragment descriptors /
1631	if (merge) {
1632	if (cur && fra->fr_off <= cur->fr_end) {
1633	/ Need to merge in a previous 'cur' /
1634	DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1635	"%d-%d) %d-%d (%d-%d)\n",
1636	ntohl(fh->ip6f_ident), cur->fr_off,
1637	cur->fr_end, off, fr_max, fra->fr_off,
1638	fra->fr_end));
1639	fra->fr_off = cur->fr_off;
1640	LIST_REMOVE(cur, fr_next);
1641	pool_put(&pf_cent_pl, cur);
1642	pf_ncache--;
1643	cur = NULL;
1644	} else if (frp && fra->fr_off <= frp->fr_end) {
1645	/ Need to merge in a modified 'frp' /
1646	VERIFY(cur == NULL);
1647	DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1648	"%d-%d) %d-%d (%d-%d)\n",
1649	ntohl(fh->ip6f_ident), frp->fr_off,
1650	frp->fr_end, off, fr_max, fra->fr_off,
1651	fra->fr_end));
1652	fra->fr_off = frp->fr_off;
1653	LIST_REMOVE(frp, fr_next);
1654	pool_put(&pf_cent_pl, frp);
1655	pf_ncache--;
1656	frp = NULL;
1657	}
1658	}
1659	}
1660
1661	if (hosed) {
1662	/*
1663	* We must keep tracking the overall fragment even when
1664	* we're going to drop it anyway so that we know when to
1665	* free the overall descriptor. Thus we drop the frag late.
1666	*/
1667	goto drop_fragment;
1668	}
1669
1670	pass:
1671	/ Update maximum data size /
1672	if ((*frag)->fr_max < fr_max)
1673	(*frag)->fr_max = fr_max;
1674
1675	/ This is the last segment /
1676	if (!mff)
1677	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1678
1679	/ Check if we are completely reassembled /
1680	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1681	LIST_FIRST(&(*frag)->fr_cache)->fr_off == `0` &&
1682	LIST_FIRST(&(frag)->fr_cache)->fr_end == (frag)->fr_max) {
1683	/ Remove from fragment queue /
1684	DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1685	ntohl(fh->ip6f_ident), (*frag)->fr_max));
1686	pf_free_fragment(*frag);
1687	*frag = NULL;
1688	}
1689
1690	return (m);
1691
1692	no_mem:
1693	*nomem = `1`;
1694
1695	/ Still need to pay attention to !IP_MF /
1696	if (!mff && *frag != NULL)
1697	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1698
1699	m_freem(m);
1700	return (NULL);
1701
1702	drop_fragment:
1703
1704	/ Still need to pay attention to !IP_MF /
1705	if (!mff && *frag != NULL)
1706	(*frag)->fr_flags \|= PFFRAG_SEENLAST;
1707
1708	if (drop) {
1709	/ This fragment has been deemed bad. Don't reass /
1710	if (((*frag)->fr_flags & PFFRAG_DROP) == `0`)
1711	DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1712	ntohl(fh->ip6f_ident)));
1713	(*frag)->fr_flags \|= PFFRAG_DROP;
1714	}
1715
1716	m_freem(m);
1717	return (NULL);
1718	}
1719
1720	int
1721	pf_normalize_ip(pbuf_t pbuf, int* dir, struct pfi_kif kif, u_short reason,
1722	struct pf_pdesc *pd)
1723	{
1724	struct mbuf *m;
1725	struct pf_rule *r;
1726	struct pf_frent *frent;
1727	struct pf_fragment *frag = NULL;
1728	struct ip *h = pbuf->pb_data;
1729	int mff = (ntohs(h->ip_off) & IP_MF);
1730	int hlen = h->ip_hl << `2`;
1731	u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << `3`;
1732	u_int16_t fr_max;
1733	int ip_len;
1734	int ip_off;
1735	int asd = `0`;
1736	struct pf_ruleset *ruleset = NULL;
1737	struct ifnet *ifp = pbuf->pb_ifp;
1738
1739	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1740	while (r != NULL) {
1741	r->evaluations++;
1742	if (pfi_kif_match(r->kif, kif) == r->ifnot)
1743	r = r->skip[PF_SKIP_IFP].ptr;
1744	else if (r->direction && r->direction != dir)
1745	r = r->skip[PF_SKIP_DIR].ptr;
1746	else if (r->af && r->af != AF_INET)
1747	r = r->skip[PF_SKIP_AF].ptr;
1748	else if (r->proto && r->proto != h->ip_p)
1749	r = r->skip[PF_SKIP_PROTO].ptr;
1750	else if (PF_MISMATCHAW(&r->src.addr,
1751	(struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1752	r->src.neg, kif))
1753	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1754	else if (PF_MISMATCHAW(&r->dst.addr,
1755	(struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1756	r->dst.neg, NULL))
1757	r = r->skip[PF_SKIP_DST_ADDR].ptr;
1758	else {
1759	if (r->anchor == NULL)
1760	break;
1761	else
1762	pf_step_into_anchor(&asd, &ruleset,
1763	PF_RULESET_SCRUB, &r, NULL, NULL);
1764	}
1765	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1766	PF_RULESET_SCRUB, &r, NULL, NULL))
1767	break;
1768	}
1769
1770	if (r == NULL \|\| r->action == PF_NOSCRUB)
1771	return (PF_PASS);
1772	else {
1773	r->packets[dir == PF_OUT]++;
1774	r->bytes[dir == PF_OUT] += pd->tot_len;
1775	}
1776
1777	/ Check for illegal packets /
1778	if (hlen < (int)sizeof (struct ip))
1779	goto drop;
1780
1781	if (hlen > ntohs(h->ip_len))
1782	goto drop;
1783
1784	/ Clear IP_DF if the rule uses the no-df option /
1785	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1786	u_int16_t ipoff = h->ip_off;
1787
1788	h->ip_off &= htons(~IP_DF);
1789	h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, `0`);
1790	}
1791
1792	/ We will need other tests here /
1793	if (!fragoff && !mff)
1794	goto no_fragment;
1795
1796	/*
1797	* We're dealing with a fragment now. Don't allow fragments
1798	* with IP_DF to enter the cache. If the flag was cleared by
1799	* no-df above, fine. Otherwise drop it.
1800	*/
1801	if (h->ip_off & htons(IP_DF)) {
1802	DPFPRINTF(("IP_DF\n"));
1803	goto bad;
1804	}
1805
1806	ip_len = ntohs(h->ip_len) - hlen;
1807	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << `3`;
1808
1809	/ All fragments are 8 byte aligned /
1810	if (mff && (ip_len & `0x7`)) {
1811	DPFPRINTF(("mff and %d\n", ip_len));
1812	goto bad;
1813	}
1814
1815	/ Respect maximum length /
1816	if (fragoff + ip_len > IP_MAXPACKET) {
1817	DPFPRINTF(("max packet %d\n", fragoff + ip_len));
1818	goto bad;
1819	}
1820	fr_max = fragoff + ip_len;
1821
1822	if ((r->rule_flag & (PFRULE_FRAGCROP\|PFRULE_FRAGDROP)) == `0`) {
1823	/ Fully buffer all of the fragments /
1824
1825	frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
1826	/ Check if we saw the last fragment already /
1827	if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1828	fr_max > frag->fr_max)
1829	goto bad;
1830
1831	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
1832	REASON_SET(reason, PFRES_MEMORY);
1833	return (PF_DROP);
1834	}
1835
1836	VERIFY(!pbuf_is_valid(pbuf));
1837
1838	/ Restore iph pointer after pbuf_to_mbuf() /
1839	h = mtod(m, struct ip *);
1840
1841	/ Get an entry for the fragment queue /
1842	frent = pool_get(&pf_frent_pl, PR_NOWAIT);
1843	if (frent == NULL) {
1844	REASON_SET(reason, PFRES_MEMORY);
1845	m_freem(m);
1846	return (PF_DROP);
1847	}
1848	pf_nfrents++;
1849	frent->fr_ip = h;
1850	frent->fr_m = m;
1851
1852	/ Might return a completely reassembled mbuf, or NULL /
1853	DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
1854	fragoff, fr_max));
1855	m = pf_reassemble(m, &frag, frent, mff);
1856
1857	if (m == NULL)
1858	return (PF_DROP);
1859
1860	VERIFY(m->m_flags & M_PKTHDR);
1861	pbuf_init_mbuf(pbuf, m, ifp);
1862
1863	/ use mtag from concatenated mbuf chain /
1864	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
1865	#if 0
1866	// SCW: This check is superfluous
1867	#if DIAGNOSTIC
1868	if (pd->pf_mtag == NULL) {
1869	printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
1870	if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1871	m_freem(m);
1872	m = NULL;
1873	goto no_mem;
1874	}
1875	}
1876	#endif
1877	#endif
1878
1879	h = mtod(m, struct ip *);
1880
1881	if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1882	goto drop;
1883	} else {
1884	/ non-buffering fragment cache (drops or masks overlaps) /
1885	int nomem = `0`;
1886
1887	if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
1888	/*
1889	* Already passed the fragment cache in the
1890	* input direction. If we continued, it would
1891	* appear to be a dup and would be dropped.
1892	*/
1893	goto fragment_pass;
1894	}
1895
1896	frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
1897
1898	/ Check if we saw the last fragment already /
1899	if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1900	fr_max > frag->fr_max) {
1901	if (r->rule_flag & PFRULE_FRAGDROP)
1902	frag->fr_flags \|= PFFRAG_DROP;
1903	goto bad;
1904	}
1905
1906	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
1907	REASON_SET(reason, PFRES_MEMORY);
1908	goto bad;
1909	}
1910
1911	VERIFY(!pbuf_is_valid(pbuf));
1912
1913	/ Restore iph pointer after pbuf_to_mbuf() /
1914	h = mtod(m, struct ip *);
1915
1916	m = pf_fragcache(&m, h, &frag, mff,
1917	(r->rule_flag & PFRULE_FRAGDROP) ? `1` : `0`, &nomem);
1918	if (m == NULL) {
1919	// Note: pf_fragcache() has already m_freem'd the mbuf
1920	if (nomem)
1921	goto no_mem;
1922	goto drop;
1923	}
1924
1925	VERIFY(m->m_flags & M_PKTHDR);
1926	pbuf_init_mbuf(pbuf, m, ifp);
1927
1928	/ use mtag from copied and trimmed mbuf chain /
1929	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
1930	#if 0
1931	// SCW: This check is superfluous
1932	#if DIAGNOSTIC
1933	if (pd->pf_mtag == NULL) {
1934	printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
1935	if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
1936	m_freem(m);
1937	m = NULL;
1938	goto no_mem;
1939	}
1940	}
1941	#endif
1942	#endif
1943	if (dir == PF_IN)
1944	pd->pf_mtag->pftag_flags \|= PF_TAG_FRAGCACHE;
1945
1946	if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1947	goto drop;
1948
1949	goto fragment_pass;
1950	}
1951
1952	no_fragment:
1953	/ At this point, only IP_DF is allowed in ip_off /
1954	if (h->ip_off & ~htons(IP_DF)) {
1955	u_int16_t ipoff = h->ip_off;
1956
1957	h->ip_off &= htons(IP_DF);
1958	h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, `0`);
1959	}
1960
1961	/ Enforce a minimum ttl, may cause endless packet loops /
1962	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1963	u_int16_t ip_ttl = h->ip_ttl;
1964
1965	h->ip_ttl = r->min_ttl;
1966	h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, `0`);
1967	}
1968	if (r->rule_flag & PFRULE_RANDOMID) {
1969	u_int16_t oip_id = h->ip_id;
1970
1971	if (rfc6864 && IP_OFF_IS_ATOMIC(ntohs(h->ip_off))) {
1972	h->ip_id = `0`;
1973	} else {
1974	h->ip_id = ip_randomid();
1975	}
1976	h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, `0`);
1977	}
1978	if ((r->rule_flag & (PFRULE_FRAGCROP\|PFRULE_FRAGDROP)) == `0`)
1979	pd->flags \|= PFDESC_IP_REAS;
1980
1981	return (PF_PASS);
1982
1983	fragment_pass:
1984	/ Enforce a minimum ttl, may cause endless packet loops /
1985	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1986	u_int16_t ip_ttl = h->ip_ttl;
1987
1988	h->ip_ttl = r->min_ttl;
1989	h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, `0`);
1990	}
1991	if ((r->rule_flag & (PFRULE_FRAGCROP\|PFRULE_FRAGDROP)) == `0`)
1992	pd->flags \|= PFDESC_IP_REAS;
1993	return (PF_PASS);
1994
1995	no_mem:
1996	REASON_SET(reason, PFRES_MEMORY);
1997	if (r != NULL && r->log && pbuf_is_valid(pbuf))
1998	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
1999	NULL, NULL, pd);
2000	return (PF_DROP);
2001
2002	drop:
2003	REASON_SET(reason, PFRES_NORM);
2004	if (r != NULL && r->log && pbuf_is_valid(pbuf))
2005	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
2006	NULL, NULL, pd);
2007	return (PF_DROP);
2008
2009	bad:
2010	DPFPRINTF(("dropping bad IPv4 fragment\n"));
2011
2012	/ Free associated fragments /
2013	if (frag != NULL)
2014	pf_free_fragment(frag);
2015
2016	REASON_SET(reason, PFRES_FRAG);
2017	if (r != NULL && r->log && pbuf_is_valid(pbuf))
2018	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r, NULL, NULL, pd);
2019
2020	return (PF_DROP);
2021	}
2022
2023	#if INET6
2024	int
2025	pf_normalize_ip6(pbuf_t pbuf, int* dir, struct pfi_kif *kif,
2026	u_short reason, struct* pf_pdesc *pd)
2027	{
2028	struct mbuf *m;
2029	struct pf_rule *r;
2030	struct ip6_hdr *h = pbuf->pb_data;
2031	int off;
2032	struct ip6_ext ext;
2033	/ adi XXX /
2034	#if 0
2035	struct ip6_opt opt;
2036	struct ip6_opt_jumbo jumbo;
2037	int optend;
2038	int ooff;
2039	#endif
2040	struct ip6_frag frag;
2041	u_int32_t jumbolen = `0`, plen;
2042	u_int16_t fragoff = `0`;
2043	u_int8_t proto;
2044	int terminal;
2045	struct pf_frent *frent;
2046	struct pf_fragment *pff = NULL;
2047	int mff = `0`, rh_cnt = `0`;
2048	u_int16_t fr_max;
2049	int asd = `0`;
2050	struct pf_ruleset *ruleset = NULL;
2051	struct ifnet *ifp = pbuf->pb_ifp;
2052
2053	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2054	while (r != NULL) {
2055	r->evaluations++;
2056	if (pfi_kif_match(r->kif, kif) == r->ifnot)
2057	r = r->skip[PF_SKIP_IFP].ptr;
2058	else if (r->direction && r->direction != dir)
2059	r = r->skip[PF_SKIP_DIR].ptr;
2060	else if (r->af && r->af != AF_INET6)
2061	r = r->skip[PF_SKIP_AF].ptr;
2062	#if 0 /* header chain! */
2063	else if (r->proto && r->proto != h->ip6_nxt)
2064	r = r->skip[PF_SKIP_PROTO].ptr;
2065	#endif
2066	else if (PF_MISMATCHAW(&r->src.addr,
2067	(struct pf_addr *)(uintptr_t)&h->ip6_src, AF_INET6,
2068	r->src.neg, kif))
2069	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2070	else if (PF_MISMATCHAW(&r->dst.addr,
2071	(struct pf_addr *)(uintptr_t)&h->ip6_dst, AF_INET6,
2072	r->dst.neg, NULL))
2073	r = r->skip[PF_SKIP_DST_ADDR].ptr;
2074	else {
2075	if (r->anchor == NULL)
2076	break;
2077	else
2078	pf_step_into_anchor(&asd, &ruleset,
2079	PF_RULESET_SCRUB, &r, NULL, NULL);
2080	}
2081	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2082	PF_RULESET_SCRUB, &r, NULL, NULL))
2083	break;
2084	}
2085
2086	if (r == NULL \|\| r->action == PF_NOSCRUB)
2087	return (PF_PASS);
2088	else {
2089	r->packets[dir == PF_OUT]++;
2090	r->bytes[dir == PF_OUT] += pd->tot_len;
2091	}
2092
2093	/ Check for illegal packets /
2094	if ((uint32_t)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) <
2095	pbuf->pb_packet_len)
2096	goto drop;
2097
2098	off = sizeof (struct ip6_hdr);
2099	proto = h->ip6_nxt;
2100	terminal = `0`;
2101	do {
2102	pd->proto = proto;
2103	switch (proto) {
2104	case IPPROTO_FRAGMENT:
2105	goto fragment;
2106	case IPPROTO_AH:
2107	case IPPROTO_ROUTING:
2108	case IPPROTO_DSTOPTS:
2109	if (!pf_pull_hdr(pbuf, off, &ext, sizeof (ext), NULL,
2110	NULL, AF_INET6))
2111	goto shortpkt;
2112	/*
2113	* <jhw@apple.com>
2114	* Multiple routing headers not allowed.
2115	* Routing header type zero considered harmful.
2116	*/
2117	if (proto == IPPROTO_ROUTING) {
2118	const struct ip6_rthdr *rh =
2119	(const struct ip6_rthdr *)&ext;
2120	if (rh_cnt++)
2121	goto drop;
2122	if (rh->ip6r_type == IPV6_RTHDR_TYPE_0)
2123	goto drop;
2124	}
2125	else
2126	if (proto == IPPROTO_AH)
2127	off += (ext.ip6e_len + `2`) * `4`;
2128	else
2129	off += (ext.ip6e_len + `1`) * `8`;
2130	proto = ext.ip6e_nxt;
2131	break;
2132	case IPPROTO_HOPOPTS:
2133	/ adi XXX /
2134	#if 0
2135	if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL,
2136	NULL, AF_INET6))
2137	goto shortpkt;
2138	optend = off + (ext.ip6e_len + `1`) * `8`;
2139	ooff = off + sizeof (ext);
2140	do {
2141	if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
2142	sizeof (opt.ip6o_type), NULL, NULL,
2143	AF_INET6))
2144	goto shortpkt;
2145	if (opt.ip6o_type == IP6OPT_PAD1) {
2146	ooff++;
2147	continue;
2148	}
2149	if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt),
2150	NULL, NULL, AF_INET6))
2151	goto shortpkt;
2152	if (ooff + sizeof (opt) + opt.ip6o_len > optend)
2153	goto drop;
2154	switch (opt.ip6o_type) {
2155	case IP6OPT_JUMBO:
2156	if (h->ip6_plen != `0`)
2157	goto drop;
2158	if (!pf_pull_hdr(m, ooff, &jumbo,
2159	sizeof (jumbo), NULL, NULL,
2160	AF_INET6))
2161	goto shortpkt;
2162	memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
2163	sizeof (jumbolen));
2164	jumbolen = ntohl(jumbolen);
2165	if (jumbolen <= IPV6_MAXPACKET)
2166	goto drop;
2167	if (sizeof (struct ip6_hdr) +
2168	jumbolen != m->m_pkthdr.len)
2169	goto drop;
2170	break;
2171	default:
2172	break;
2173	}
2174	ooff += sizeof (opt) + opt.ip6o_len;
2175	} while (ooff < optend);
2176
2177	off = optend;
2178	proto = ext.ip6e_nxt;
2179	break;
2180	#endif
2181	default:
2182	terminal = `1`;
2183	break;
2184	}
2185	} while (!terminal);
2186
2187	/ jumbo payload option must be present, or plen > 0 /
2188	if (ntohs(h->ip6_plen) == `0`)
2189	plen = jumbolen;
2190	else
2191	plen = ntohs(h->ip6_plen);
2192	if (plen == `0`)
2193	goto drop;
2194	if ((uint32_t)(sizeof (struct ip6_hdr) + plen) > pbuf->pb_packet_len)
2195	goto shortpkt;
2196
2197	/ Enforce a minimum ttl, may cause endless packet loops /
2198	if (r->min_ttl && h->ip6_hlim < r->min_ttl)
2199	h->ip6_hlim = r->min_ttl;
2200
2201	return (PF_PASS);
2202
2203	fragment:
2204	if (ntohs(h->ip6_plen) == `0` \|\| jumbolen)
2205	goto drop;
2206	plen = ntohs(h->ip6_plen);
2207
2208	if (!pf_pull_hdr(pbuf, off, &frag, sizeof (frag), NULL, NULL, AF_INET6))
2209	goto shortpkt;
2210	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
2211	pd->proto = frag.ip6f_nxt;
2212	mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
2213	off += sizeof frag;
2214	if (fragoff + (plen - off) > IPV6_MAXPACKET)
2215	goto badfrag;
2216
2217	fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
2218	// XXX SCW: mbuf-specific
2219	// DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
2220	// "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
2221	// fragoff, fr_max));
2222
2223	if ((r->rule_flag & (PFRULE_FRAGCROP\|PFRULE_FRAGDROP)) == `0`) {
2224	/ Fully buffer all of the fragments /
2225	pd->flags \|= PFDESC_IP_REAS;
2226
2227	pff = pf_find_fragment_by_ipv6_header(h, &frag,
2228	&pf_frag_tree);
2229
2230	/ Check if we saw the last fragment already /
2231	if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2232	fr_max > pff->fr_max)
2233	goto badfrag;
2234
2235	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2236	REASON_SET(reason, PFRES_MEMORY);
2237	return (PF_DROP);
2238	}
2239
2240	/ Restore iph pointer after pbuf_to_mbuf() /
2241	h = mtod(m, struct ip6_hdr *);
2242
2243	/ Get an entry for the fragment queue /
2244	frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2245	if (frent == NULL) {
2246	REASON_SET(reason, PFRES_MEMORY);
2247	return (PF_DROP);
2248	}
2249
2250	pf_nfrents++;
2251	frent->fr_ip6 = h;
2252	frent->fr_m = m;
2253	frent->fr_ip6f_opt = frag;
2254	frent->fr_ip6f_hlen = off;
2255
2256	/ Might return a completely reassembled mbuf, or NULL /
2257	DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2258	ntohl(frag.ip6f_ident), fragoff, fr_max));
2259	m = pf_reassemble6(&m, &pff, frent, mff);
2260
2261	if (m == NULL)
2262	return (PF_DROP);
2263
2264	pbuf_init_mbuf(pbuf, m, ifp);
2265	h = pbuf->pb_data;
2266
2267	if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2268	goto drop;
2269	}
2270	else if (dir == PF_IN \|\| !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2271	/ non-buffering fragment cache (overlaps: see RFC 5722) /
2272	int nomem = `0`;
2273
2274	pff = pf_find_fragment_by_ipv6_header(h, &frag,
2275	&pf_cache_tree);
2276
2277	/ Check if we saw the last fragment already /
2278	if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2279	fr_max > pff->fr_max) {
2280	if (r->rule_flag & PFRULE_FRAGDROP)
2281	pff->fr_flags \|= PFFRAG_DROP;
2282	goto badfrag;
2283	}
2284
2285	if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2286	goto no_mem;
2287	}
2288
2289	/ Restore iph pointer after pbuf_to_mbuf() /
2290	h = mtod(m, struct ip6_hdr *);
2291
2292	m = pf_frag6cache(&m, h, &frag, &pff, off, mff,
2293	(r->rule_flag & PFRULE_FRAGDROP) ? `1` : `0`, &nomem);
2294	if (m == NULL) {
2295	// Note: pf_frag6cache() has already m_freem'd the mbuf
2296	if (nomem)
2297	goto no_mem;
2298	goto drop;
2299	}
2300
2301	pbuf_init_mbuf(pbuf, m, ifp);
2302	pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2303	h = pbuf->pb_data;
2304
2305	if (dir == PF_IN)
2306	pd->pf_mtag->pftag_flags \|= PF_TAG_FRAGCACHE;
2307
2308	if (pff != NULL && (pff->fr_flags & PFFRAG_DROP))
2309	goto drop;
2310	}
2311
2312	/ Enforce a minimum ttl, may cause endless packet loops /
2313	if (r->min_ttl && h->ip6_hlim < r->min_ttl)
2314	h->ip6_hlim = r->min_ttl;
2315	return (PF_PASS);
2316
2317	no_mem:
2318	REASON_SET(reason, PFRES_MEMORY);
2319	goto dropout;
2320
2321	shortpkt:
2322	REASON_SET(reason, PFRES_SHORT);
2323	goto dropout;
2324
2325	drop:
2326	REASON_SET(reason, PFRES_NORM);
2327	goto dropout;
2328
2329	badfrag:
2330	DPFPRINTF(("dropping bad IPv6 fragment\n"));
2331	REASON_SET(reason, PFRES_FRAG);
2332	goto dropout;
2333
2334	dropout:
2335	if (pff != NULL)
2336	pf_free_fragment(pff);
2337	if (r != NULL && r->log && pbuf_is_valid(pbuf))
2338	PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, *reason, r, NULL, NULL, pd);
2339	return (PF_DROP);
2340	}
2341	#endif /* INET6 */
2342
2343	int
2344	pf_normalize_tcp(int dir, struct pfi_kif kif, pbuf_t pbuf, int ipoff,
2345	int off, void h, struct* pf_pdesc *pd)
2346	{
2347	#pragma unused(ipoff, h)
2348	struct pf_rule r, rm = NULL;
2349	struct tcphdr *th = pd->hdr.tcp;
2350	int rewrite = `0`;
2351	int asd = `0`;
2352	u_short reason;
2353	u_int8_t flags;
2354	sa_family_t af = pd->af;
2355	struct pf_ruleset *ruleset = NULL;
2356	union pf_state_xport sxport, dxport;
2357
2358	sxport.port = th->th_sport;
2359	dxport.port = th->th_dport;
2360
2361	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2362	while (r != NULL) {
2363	r->evaluations++;
2364	if (pfi_kif_match(r->kif, kif) == r->ifnot)
2365	r = r->skip[PF_SKIP_IFP].ptr;
2366	else if (r->direction && r->direction != dir)
2367	r = r->skip[PF_SKIP_DIR].ptr;
2368	else if (r->af && r->af != af)
2369	r = r->skip[PF_SKIP_AF].ptr;
2370	else if (r->proto && r->proto != pd->proto)
2371	r = r->skip[PF_SKIP_PROTO].ptr;
2372	else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
2373	r->src.neg, kif))
2374	r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2375	else if (r->src.xport.range.op &&
2376	!pf_match_xport(r->src.xport.range.op, r->proto_variant,
2377	&r->src.xport, &sxport))
2378	r = r->skip[PF_SKIP_SRC_PORT].ptr;
2379	else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
2380	r->dst.neg, NULL))
2381	r = r->skip[PF_SKIP_DST_ADDR].ptr;
2382	else if (r->dst.xport.range.op &&
2383	!pf_match_xport(r->dst.xport.range.op, r->proto_variant,
2384	&r->dst.xport, &dxport))
2385	r = r->skip[PF_SKIP_DST_PORT].ptr;
2386	else if (r->os_fingerprint != PF_OSFP_ANY &&
2387	!pf_osfp_match(pf_osfp_fingerprint(pd, pbuf, off, th),
2388	r->os_fingerprint))
2389	r = TAILQ_NEXT(r, entries);
2390	else {
2391	if (r->anchor == NULL) {
2392	rm = r;
2393	break;
2394	} else {
2395	pf_step_into_anchor(&asd, &ruleset,
2396	PF_RULESET_SCRUB, &r, NULL, NULL);
2397	}
2398	}
2399	if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2400	PF_RULESET_SCRUB, &r, NULL, NULL))
2401	break;
2402	}
2403
2404	if (rm == NULL \|\| rm->action == PF_NOSCRUB)
2405	return (PF_PASS);
2406	else {
2407	r->packets[dir == PF_OUT]++;
2408	r->bytes[dir == PF_OUT] += pd->tot_len;
2409	}
2410
2411	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
2412	pd->flags \|= PFDESC_TCP_NORM;
2413
2414	flags = th->th_flags;
2415	if (flags & TH_SYN) {
2416	/ Illegal packet /
2417	if (flags & TH_RST)
2418	goto tcp_drop;
2419
2420	if (flags & TH_FIN)
2421	flags &= ~TH_FIN;
2422	} else {
2423	/ Illegal packet /
2424	if (!(flags & (TH_ACK\|TH_RST)))
2425	goto tcp_drop;
2426	}
2427
2428	if (!(flags & TH_ACK)) {
2429	/ These flags are only valid if ACK is set /
2430	if ((flags & TH_FIN) \|\| (flags & TH_PUSH) \|\| (flags & TH_URG))
2431	goto tcp_drop;
2432	}
2433
2434	/ Check for illegal header length /
2435	if (th->th_off < (sizeof (struct tcphdr) >> `2`))
2436	goto tcp_drop;
2437
2438	/ If flags changed, or reserved data set, then adjust /
2439	if (flags != th->th_flags \|\| th->th_x2 != `0`) {
2440	u_int16_t ov, nv;
2441
2442	ov = (u_int16_t )(&th->th_ack + `1`);
2443	th->th_flags = flags;
2444	th->th_x2 = `0`;
2445	nv = (u_int16_t )(&th->th_ack + `1`);
2446
2447	th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, `0`);
2448	rewrite = `1`;
2449	}
2450
2451	/ Remove urgent pointer, if TH_URG is not set /
2452	if (!(flags & TH_URG) && th->th_urp) {
2453	th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, `0`, `0`);
2454	th->th_urp = `0`;
2455	rewrite = `1`;
2456	}
2457
2458	/ copy back packet headers if we sanitized /
2459	/ Process options /
2460	if (r->max_mss) {
2461	int rv = pf_normalize_tcpopt(r, dir, kif, pd, pbuf, th, off,
2462	&rewrite);
2463	if (rv == PF_DROP)
2464	return rv;
2465	pbuf = pd->mp;
2466	}
2467
2468	if (rewrite) {
2469	if (pf_lazy_makewritable(pd, pbuf,
2470	off + sizeof (*th)) == NULL) {
2471	REASON_SET(&reason, PFRES_MEMORY);
2472	if (r->log)
2473	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
2474	r, `0`, `0`, pd);
2475	return PF_DROP;
2476	}
2477
2478	pbuf_copy_back(pbuf, off, sizeof (*th), th);
2479	}
2480
2481	return (PF_PASS);
2482
2483	tcp_drop:
2484	REASON_SET(&reason, PFRES_NORM);
2485	if (rm != NULL && r->log)
2486	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, r, NULL, NULL, pd);
2487	return (PF_DROP);
2488	}
2489
2490	int
2491	pf_normalize_tcp_init(pbuf_t pbuf, int* off, struct pf_pdesc *pd,
2492	struct tcphdr th, struct* pf_state_peer src, struct* pf_state_peer *dst)
2493	{
2494	#pragma unused(dst)
2495	u_int32_t tsval, tsecr;
2496	u_int8_t hdr[`60`];
2497	u_int8_t *opt;
2498
2499	VERIFY(src->scrub == NULL);
2500
2501	src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
2502	if (src->scrub == NULL)
2503	return (`1`);
2504	bzero(src->scrub, sizeof (*src->scrub));
2505
2506	switch (pd->af) {
2507	#if INET
2508	case AF_INET: {
2509	struct ip *h = pbuf->pb_data;
2510	src->scrub->pfss_ttl = h->ip_ttl;
2511	break;
2512	}
2513	#endif /* INET */
2514	#if INET6
2515	case AF_INET6: {
2516	struct ip6_hdr *h = pbuf->pb_data;
2517	src->scrub->pfss_ttl = h->ip6_hlim;
2518	break;
2519	}
2520	#endif /* INET6 */
2521	}
2522
2523
2524	/*
2525	* All normalizations below are only begun if we see the start of
2526	* the connections. They must all set an enabled bit in pfss_flags
2527	*/
2528	if ((th->th_flags & TH_SYN) == `0`)
2529	return (`0`);
2530
2531
2532	if (th->th_off > (sizeof (struct tcphdr) >> `2`) && src->scrub &&
2533	pf_pull_hdr(pbuf, off, hdr, th->th_off << `2`, NULL, NULL, pd->af)) {
2534	/ Diddle with TCP options /
2535	int hlen;
2536	opt = hdr + sizeof (struct tcphdr);
2537	hlen = (th->th_off << `2`) - sizeof (struct tcphdr);
2538	while (hlen >= TCPOLEN_TIMESTAMP) {
2539	switch (*opt) {
2540	case TCPOPT_EOL: / FALLTHROUGH /
2541	case TCPOPT_NOP:
2542	opt++;
2543	hlen--;
2544	break;
2545	case TCPOPT_TIMESTAMP:
2546	if (opt[`1`] >= TCPOLEN_TIMESTAMP) {
2547	src->scrub->pfss_flags \|=
2548	PFSS_TIMESTAMP;
2549	src->scrub->pfss_ts_mod =
2550	htonl(random());
2551
2552	/ note PFSS_PAWS not set yet /
2553	memcpy(&tsval, &opt[`2`],
2554	sizeof (u_int32_t));
2555	memcpy(&tsecr, &opt[`6`],
2556	sizeof (u_int32_t));
2557	src->scrub->pfss_tsval0 = ntohl(tsval);
2558	src->scrub->pfss_tsval = ntohl(tsval);
2559	src->scrub->pfss_tsecr = ntohl(tsecr);
2560	getmicrouptime(&src->scrub->pfss_last);
2561	}
2562	/ FALLTHROUGH /
2563	default:
2564	hlen -= MAX(opt[`1`], `2`);
2565	opt += MAX(opt[`1`], `2`);
2566	break;
2567	}
2568	}
2569	}
2570
2571	return (`0`);
2572	}
2573
2574	void
2575	pf_normalize_tcp_cleanup(struct pf_state *state)
2576	{
2577	if (state->src.scrub)
2578	pool_put(&pf_state_scrub_pl, state->src.scrub);
2579	if (state->dst.scrub)
2580	pool_put(&pf_state_scrub_pl, state->dst.scrub);
2581
2582	/ Someday... flush the TCP segment reassembly descriptors. /
2583	}
2584
2585	int
2586	pf_normalize_tcp_stateful(pbuf_t pbuf, int* off, struct pf_pdesc *pd,
2587	u_short reason, struct* tcphdr th, struct* pf_state *state,
2588	struct pf_state_peer src, struct* pf_state_peer dst, int* *writeback)
2589	{
2590	struct timeval uptime;
2591	u_int32_t tsval = `0`, tsecr = `0`;
2592	u_int tsval_from_last;
2593	u_int8_t hdr[`60`];
2594	u_int8_t *opt;
2595	int copyback = `0`;
2596	int got_ts = `0`;
2597
2598	VERIFY(src->scrub \|\| dst->scrub);
2599
2600	/*
2601	* Enforce the minimum TTL seen for this connection. Negate a common
2602	* technique to evade an intrusion detection system and confuse
2603	* firewall state code.
2604	*/
2605	switch (pd->af) {
2606	#if INET
2607	case AF_INET: {
2608	if (src->scrub) {
2609	struct ip *h = pbuf->pb_data;
2610	if (h->ip_ttl > src->scrub->pfss_ttl)
2611	src->scrub->pfss_ttl = h->ip_ttl;
2612	h->ip_ttl = src->scrub->pfss_ttl;
2613	}
2614	break;
2615	}
2616	#endif /* INET */
2617	#if INET6
2618	case AF_INET6: {
2619	if (src->scrub) {
2620	struct ip6_hdr *h = pbuf->pb_data;
2621	if (h->ip6_hlim > src->scrub->pfss_ttl)
2622	src->scrub->pfss_ttl = h->ip6_hlim;
2623	h->ip6_hlim = src->scrub->pfss_ttl;
2624	}
2625	break;
2626	}
2627	#endif /* INET6 */
2628	}
2629
2630	if (th->th_off > (sizeof (struct tcphdr) >> `2`) &&
2631	((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) \|\|
2632	(dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
2633	pf_pull_hdr(pbuf, off, hdr, th->th_off << `2`, NULL, NULL, pd->af)) {
2634	/ Diddle with TCP options /
2635	int hlen;
2636	opt = hdr + sizeof (struct tcphdr);
2637	hlen = (th->th_off << `2`) - sizeof (struct tcphdr);
2638	while (hlen >= TCPOLEN_TIMESTAMP) {
2639	switch (*opt) {
2640	case TCPOPT_EOL: / FALLTHROUGH /
2641	case TCPOPT_NOP:
2642	opt++;
2643	hlen--;
2644	break;
2645	case TCPOPT_TIMESTAMP:
2646	/*
2647	* Modulate the timestamps. Can be used for
2648	* NAT detection, OS uptime determination or
2649	* reboot detection.
2650	*/
2651
2652	if (got_ts) {
2653	/ Huh? Multiple timestamps!? /
2654	if (pf_status.debug >= PF_DEBUG_MISC) {
2655	DPFPRINTF(("multiple TS??"));
2656	pf_print_state(state);
2657	printf("\n");
2658	}
2659	REASON_SET(reason, PFRES_TS);
2660	return (PF_DROP);
2661	}
2662	if (opt[`1`] >= TCPOLEN_TIMESTAMP) {
2663	memcpy(&tsval, &opt[`2`],
2664	sizeof (u_int32_t));
2665	if (tsval && src->scrub &&
2666	(src->scrub->pfss_flags &
2667	PFSS_TIMESTAMP)) {
2668	tsval = ntohl(tsval);
2669	pf_change_a(&opt[`2`],
2670	&th->th_sum,
2671	htonl(tsval +
2672	src->scrub->pfss_ts_mod),
2673	`0`);
2674	copyback = `1`;
2675	}
2676
2677	/ Modulate TS reply iff valid (!0) /
2678	memcpy(&tsecr, &opt[`6`],
2679	sizeof (u_int32_t));
2680	if (tsecr && dst->scrub &&
2681	(dst->scrub->pfss_flags &
2682	PFSS_TIMESTAMP)) {
2683	tsecr = ntohl(tsecr)
2684	- dst->scrub->pfss_ts_mod;
2685	pf_change_a(&opt[`6`],
2686	&th->th_sum, htonl(tsecr),
2687	`0`);
2688	copyback = `1`;
2689	}
2690	got_ts = `1`;
2691	}
2692	/ FALLTHROUGH /
2693	default:
2694	hlen -= MAX(opt[`1`], `2`);
2695	opt += MAX(opt[`1`], `2`);
2696	break;
2697	}
2698	}
2699	if (copyback) {
2700	/ Copyback the options, caller copys back header /
2701	int optoff = off + sizeof (*th);
2702	int optlen = (th->th_off << `2`) - sizeof (*th);
2703	if (pf_lazy_makewritable(pd, pbuf, optoff + optlen) ==
2704	NULL) {
2705	REASON_SET(reason, PFRES_MEMORY);
2706	return PF_DROP;
2707	}
2708	*writeback = optoff + optlen;
2709	pbuf_copy_back(pbuf, optoff, optlen, hdr + sizeof(*th));
2710	}
2711	}
2712
2713
2714	/*
2715	* Must invalidate PAWS checks on connections idle for too long.
2716	* The fastest allowed timestamp clock is 1ms. That turns out to
2717	* be about 24 days before it wraps. XXX Right now our lowerbound
2718	* TS echo check only works for the first 12 days of a connection
2719	* when the TS has exhausted half its 32bit space
2720	*/
2721	#define TS_MAX_IDLE (242460*60)
2722	#define TS_MAX_CONN (12246060) / XXX remove when better tsecr check */
2723
2724	getmicrouptime(&uptime);
2725	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
2726	(uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE \|\|
2727	pf_time_second() - state->creation > TS_MAX_CONN)) {
2728	if (pf_status.debug >= PF_DEBUG_MISC) {
2729	DPFPRINTF(("src idled out of PAWS\n"));
2730	pf_print_state(state);
2731	printf("\n");
2732	}
2733	src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
2734	\| PFSS_PAWS_IDLED;
2735	}
2736	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
2737	uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
2738	if (pf_status.debug >= PF_DEBUG_MISC) {
2739	DPFPRINTF(("dst idled out of PAWS\n"));
2740	pf_print_state(state);
2741	printf("\n");
2742	}
2743	dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
2744	\| PFSS_PAWS_IDLED;
2745	}
2746
2747	if (got_ts && src->scrub && dst->scrub &&
2748	(src->scrub->pfss_flags & PFSS_PAWS) &&
2749	(dst->scrub->pfss_flags & PFSS_PAWS)) {
2750	/*
2751	* Validate that the timestamps are "in-window".
2752	* RFC1323 describes TCP Timestamp options that allow
2753	* measurement of RTT (round trip time) and PAWS
2754	* (protection against wrapped sequence numbers). PAWS
2755	* gives us a set of rules for rejecting packets on
2756	* long fat pipes (packets that were somehow delayed
2757	* in transit longer than the time it took to send the
2758	* full TCP sequence space of 4Gb). We can use these
2759	* rules and infer a few others that will let us treat
2760	* the 32bit timestamp and the 32bit echoed timestamp
2761	* as sequence numbers to prevent a blind attacker from
2762	* inserting packets into a connection.
2763	*
2764	* RFC1323 tells us:
2765	* - The timestamp on this packet must be greater than
2766	* or equal to the last value echoed by the other
2767	* endpoint. The RFC says those will be discarded
2768	* since it is a dup that has already been acked.
2769	* This gives us a lowerbound on the timestamp.
2770	* timestamp >= other last echoed timestamp
2771	* - The timestamp will be less than or equal to
2772	* the last timestamp plus the time between the
2773	* last packet and now. The RFC defines the max
2774	* clock rate as 1ms. We will allow clocks to be
2775	* up to 10% fast and will allow a total difference
2776	* or 30 seconds due to a route change. And this
2777	* gives us an upperbound on the timestamp.
2778	* timestamp <= last timestamp + max ticks
2779	* We have to be careful here. Windows will send an
2780	* initial timestamp of zero and then initialize it
2781	* to a random value after the 3whs; presumably to
2782	* avoid a DoS by having to call an expensive RNG
2783	* during a SYN flood. Proof MS has at least one
2784	* good security geek.
2785	*
2786	* - The TCP timestamp option must also echo the other
2787	* endpoints timestamp. The timestamp echoed is the
2788	* one carried on the earliest unacknowledged segment
2789	* on the left edge of the sequence window. The RFC
2790	* states that the host will reject any echoed
2791	* timestamps that were larger than any ever sent.
2792	* This gives us an upperbound on the TS echo.
2793	* tescr <= largest_tsval
2794	* - The lowerbound on the TS echo is a little more
2795	* tricky to determine. The other endpoint's echoed
2796	* values will not decrease. But there may be
2797	* network conditions that re-order packets and
2798	* cause our view of them to decrease. For now the
2799	* only lowerbound we can safely determine is that
2800	* the TS echo will never be less than the original
2801	* TS. XXX There is probably a better lowerbound.
2802	* Remove TS_MAX_CONN with better lowerbound check.
2803	* tescr >= other original TS
2804	*
2805	* It is also important to note that the fastest
2806	* timestamp clock of 1ms will wrap its 32bit space in
2807	* 24 days. So we just disable TS checking after 24
2808	* days of idle time. We actually must use a 12d
2809	* connection limit until we can come up with a better
2810	* lowerbound to the TS echo check.
2811	*/
2812	struct timeval delta_ts;
2813	int ts_fudge;
2814
2815
2816	/*
2817	* PFTM_TS_DIFF is how many seconds of leeway to allow
2818	* a host's timestamp. This can happen if the previous
2819	* packet got delayed in transit for much longer than
2820	* this packet.
2821	*/
2822	if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == `0`)
2823	ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
2824
2825
2826	/ Calculate max ticks since the last timestamp /
2827	#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
2828	#define TS_MICROSECS 1000000 /* microseconds per second */
2829	timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
2830	tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
2831	tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
2832
2833
2834	if ((src->state >= TCPS_ESTABLISHED &&
2835	dst->state >= TCPS_ESTABLISHED) &&
2836	(SEQ_LT(tsval, dst->scrub->pfss_tsecr) \|\|
2837	SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) \|\|
2838	(tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) \|\|
2839	SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
2840	/*
2841	* Bad RFC1323 implementation or an insertion attack.
2842	*
2843	* - Solaris 2.6 and 2.7 are known to send another ACK
2844	* after the FIN,FIN\|ACK,ACK closing that carries
2845	* an old timestamp.
2846	*/
2847
2848	DPFPRINTF(("Timestamp failed %c%c%c%c\n",
2849	SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? `'0'` : `' '`,
2850	SEQ_GT(tsval, src->scrub->pfss_tsval +
2851	tsval_from_last) ? `'1'` : `' '`,
2852	SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? `'2'` : `' '`,
2853	SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? `'3'` : `' '`));
2854	DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
2855	"idle: %lus %ums\n",
2856	tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
2857	delta_ts.tv_usec / `1000`));
2858	DPFPRINTF((" src->tsval: %u tsecr: %u\n",
2859	src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
2860	DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
2861	dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
2862	dst->scrub->pfss_tsval0));
2863	if (pf_status.debug >= PF_DEBUG_MISC) {
2864	pf_print_state(state);
2865	pf_print_flags(th->th_flags);
2866	printf("\n");
2867	}
2868	REASON_SET(reason, PFRES_TS);
2869	return (PF_DROP);
2870	}
2871
2872	/ XXX I'd really like to require tsecr but it's optional /
2873
2874	} else if (!got_ts && (th->th_flags & TH_RST) == `0` &&
2875	((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
2876	\|\| pd->p_len > `0` \|\| (th->th_flags & TH_SYN)) &&
2877	src->scrub && dst->scrub &&
2878	(src->scrub->pfss_flags & PFSS_PAWS) &&
2879	(dst->scrub->pfss_flags & PFSS_PAWS)) {
2880	/*
2881	* Didn't send a timestamp. Timestamps aren't really useful
2882	* when:
2883	* - connection opening or closing (often not even sent).
2884	* but we must not let an attacker to put a FIN on a
2885	* data packet to sneak it through our ESTABLISHED check.
2886	* - on a TCP reset. RFC suggests not even looking at TS.
2887	* - on an empty ACK. The TS will not be echoed so it will
2888	* probably not help keep the RTT calculation in sync and
2889	* there isn't as much danger when the sequence numbers
2890	* got wrapped. So some stacks don't include TS on empty
2891	* ACKs :-(
2892	*
2893	* To minimize the disruption to mostly RFC1323 conformant
2894	* stacks, we will only require timestamps on data packets.
2895	*
2896	* And what do ya know, we cannot require timestamps on data
2897	* packets. There appear to be devices that do legitimate
2898	* TCP connection hijacking. There are HTTP devices that allow
2899	* a 3whs (with timestamps) and then buffer the HTTP request.
2900	* If the intermediate device has the HTTP response cache, it
2901	* will spoof the response but not bother timestamping its
2902	* packets. So we can look for the presence of a timestamp in
2903	* the first data packet and if there, require it in all future
2904	* packets.
2905	*/
2906
2907	if (pd->p_len > `0` && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
2908	/*
2909	* Hey! Someone tried to sneak a packet in. Or the
2910	* stack changed its RFC1323 behavior?!?!
2911	*/
2912	if (pf_status.debug >= PF_DEBUG_MISC) {
2913	DPFPRINTF(("Did not receive expected RFC1323 "
2914	"timestamp\n"));
2915	pf_print_state(state);
2916	pf_print_flags(th->th_flags);
2917	printf("\n");
2918	}
2919	REASON_SET(reason, PFRES_TS);
2920	return (PF_DROP);
2921	}
2922	}
2923
2924
2925	/*
2926	* We will note if a host sends his data packets with or without
2927	* timestamps. And require all data packets to contain a timestamp
2928	* if the first does. PAWS implicitly requires that all data packets be
2929	* timestamped. But I think there are middle-man devices that hijack
2930	* TCP streams immediately after the 3whs and don't timestamp their
2931	* packets (seen in a WWW accelerator or cache).
2932	*/
2933	if (pd->p_len > `0` && src->scrub && (src->scrub->pfss_flags &
2934	(PFSS_TIMESTAMP\|PFSS_DATA_TS\|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
2935	if (got_ts)
2936	src->scrub->pfss_flags \|= PFSS_DATA_TS;
2937	else {
2938	src->scrub->pfss_flags \|= PFSS_DATA_NOTS;
2939	if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
2940	(dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
2941	/ Don't warn if other host rejected RFC1323 /
2942	DPFPRINTF(("Broken RFC1323 stack did not "
2943	"timestamp data packet. Disabled PAWS "
2944	"security.\n"));
2945	pf_print_state(state);
2946	pf_print_flags(th->th_flags);
2947	printf("\n");
2948	}
2949	}
2950	}
2951
2952
2953	/*
2954	* Update PAWS values
2955	*/
2956	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
2957	(PFSS_PAWS_IDLED\|PFSS_TIMESTAMP))) {
2958	getmicrouptime(&src->scrub->pfss_last);
2959	if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) \|\|
2960	(src->scrub->pfss_flags & PFSS_PAWS) == `0`)
2961	src->scrub->pfss_tsval = tsval;
2962
2963	if (tsecr) {
2964	if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) \|\|
2965	(src->scrub->pfss_flags & PFSS_PAWS) == `0`)
2966	src->scrub->pfss_tsecr = tsecr;
2967
2968	if ((src->scrub->pfss_flags & PFSS_PAWS) == `0` &&
2969	(SEQ_LT(tsval, src->scrub->pfss_tsval0) \|\|
2970	src->scrub->pfss_tsval0 == `0`)) {
2971	/ tsval0 MUST be the lowest timestamp /
2972	src->scrub->pfss_tsval0 = tsval;
2973	}
2974
2975	/ Only fully initialized after a TS gets echoed /
2976	if ((src->scrub->pfss_flags & PFSS_PAWS) == `0`)
2977	src->scrub->pfss_flags \|= PFSS_PAWS;
2978	}
2979	}
2980
2981	/ I have a dream.... TCP segment reassembly.... /
2982	return (`0`);
2983	}
2984
2985	static int
2986	pf_normalize_tcpopt(struct pf_rule r, int* dir, struct pfi_kif *kif,
2987	struct pf_pdesc pd, pbuf_t pbuf, struct tcphdr th, int* off,
2988	int *rewrptr)
2989	{
2990	#pragma unused(dir, kif)
2991	sa_family_t af = pd->af;
2992	u_int16_t *mss;
2993	int thoff;
2994	int opt, cnt, optlen = `0`;
2995	int rewrite = `0`;
2996	u_char opts[MAX_TCPOPTLEN];
2997	u_char *optp = opts;
2998
2999	thoff = th->th_off << `2`;
3000	cnt = thoff - sizeof (struct tcphdr);
3001
3002	if (cnt > `0` && !pf_pull_hdr(pbuf, off + sizeof (*th), opts, cnt,
3003	NULL, NULL, af))
3004	return PF_DROP;
3005
3006	for (; cnt > `0`; cnt -= optlen, optp += optlen) {
3007	opt = optp[`0`];
3008	if (opt == TCPOPT_EOL)
3009	break;
3010	if (opt == TCPOPT_NOP)
3011	optlen = `1`;
3012	else {
3013	if (cnt < `2`)
3014	break;
3015	optlen = optp[`1`];
3016	if (optlen < `2` \|\| optlen > cnt)
3017	break;
3018	}
3019	switch (opt) {
3020	case TCPOPT_MAXSEG:
3021	mss = (u_int16_t )(void* *)(optp + `2`);
3022	if ((ntohs(*mss)) > r->max_mss) {
3023	/*
3024	* <jhw@apple.com>
3025	* Only do the TCP checksum fixup if delayed
3026	* checksum calculation will not be performed.
3027	*/
3028	if (pbuf->pb_ifp \|\|
3029	!(*pbuf->pb_csum_flags & CSUM_TCP))
3030	th->th_sum = pf_cksum_fixup(th->th_sum,
3031	*mss, htons(r->max_mss), `0`);
3032	*mss = htons(r->max_mss);
3033	rewrite = `1`;
3034	}
3035	break;
3036	default:
3037	break;
3038	}
3039	}
3040
3041	if (rewrite) {
3042	u_short reason;
3043
3044	VERIFY(pbuf == pd->mp);
3045
3046	if (pf_lazy_makewritable(pd, pd->mp,
3047	off + sizeof (*th) + thoff) == NULL) {
3048	REASON_SET(&reason, PFRES_MEMORY);
3049	if (r->log)
3050	PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
3051	r, `0`, `0`, pd);
3052	return PF_DROP;
3053	}
3054
3055	*rewrptr = `1`;
3056	pbuf_copy_back(pd->mp, off + sizeof (th), thoff - sizeof* (*th), opts);
3057	}
3058
3059	return PF_PASS;
3060	}
3061

Browse the source code of xnu/bsd/net/pf_norm.c