radix.c source code [xnu/bsd/net/radix.c]

1	/*
2	* Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* Copyright (c) 1988, 1989, 1993
30	* The Regents of the University of California. All rights reserved.
31	*
32	* Redistribution and use in source and binary forms, with or without
33	* modification, are permitted provided that the following conditions
34	* are met:
35	* 1. Redistributions of source code must retain the above copyright
36	* notice, this list of conditions and the following disclaimer.
37	* 2. Redistributions in binary form must reproduce the above copyright
38	* notice, this list of conditions and the following disclaimer in the
39	* documentation and/or other materials provided with the distribution.
40	* 3. All advertising materials mentioning features or use of this software
41	* must display the following acknowledgement:
42	* This product includes software developed by the University of
43	* California, Berkeley and its contributors.
44	* 4. Neither the name of the University nor the names of its contributors
45	* may be used to endorse or promote products derived from this software
46	* without specific prior written permission.
47	*
48	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58	* SUCH DAMAGE.
59	*
60	* @(#)radix.c 8.4 (Berkeley) 11/2/94
61	* $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $
62	*/
63
64	/*
65	* Routines to build and maintain radix trees for routing lookups.
66	*/
67	#ifndef _RADIX_H_
68	#include <sys/param.h>
69	#include <sys/systm.h>
70	#include <sys/domain.h>
71	#include <sys/syslog.h>
72	#include <net/radix.h>
73	#include <sys/socket.h>
74	#include <sys/socketvar.h>
75	#include <kern/locks.h>
76	#endif
77
78	static int rn_walktree_from(struct radix_node_head h, void* *a,
79	void m, walktree_f_t f, void *w);
80	static int rn_walktree(struct radix_node_head , walktree_f_t , void *);
81	static struct radix_node rn_insert(void* , struct* radix_node_head , int* , struct* radix_node[`2`]);
82	static struct radix_node rn_newpair(void* , int, struct* radix_node[`2`]);
83	static struct radix_node rn_search(void* , struct* radix_node *);
84	static struct radix_node rn_search_m(void* , struct* radix_node , void* *);
85
86	static int max_keylen;
87	static struct radix_mask *rn_mkfreelist;
88	static struct radix_node_head *mask_rnhead;
89	static char *addmask_key;
90	static char normal_chars[] = {`0`, `0x80`, `0xc0`, `0xe0`, `0xf0`, `0xf8`, `0xfc`, `0xfe`, -`1`};
91	static char rn_zeros, rn_ones;
92
93	static zone_t radix_node_zone;
94	KALLOC_TYPE_DEFINE(radix_node_head_zone, struct radix_node_head, KT_DEFAULT);
95
96	#define rn_masktop (mask_rnhead->rnh_treetop)
97	#undef Bcmp
98	#define Bcmp(a, b, l) \
99	(l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (uint32_t)l))
100
101	static int rn_lexobetter(void m_arg, void* *n_arg);
102	static struct radix_mask *
103	rn_new_radix_mask(struct radix_node *tt,
104	struct radix_mask *next);
105	static int rn_satisfies_leaf(char trial, struct* radix_node leaf, int* skip,
106	rn_matchf_t f, void* *w);
107
108	#define RN_MATCHF(rn, f, arg) (f == NULL \|\| (*f)((rn), arg))
109
110	/*
111	* The data structure for the keys is a radix tree with one way
112	* branching removed. The index rn_bit at an internal node n represents a bit
113	* position to be tested. The tree is arranged so that all descendants
114	* of a node n have keys whose bits all agree up to position rn_bit - 1.
115	* (We say the index of n is rn_bit.)
116	*
117	* There is at least one descendant which has a one bit at position rn_bit,
118	* and at least one with a zero there.
119	*
120	* A route is determined by a pair of key and mask. We require that the
121	* bit-wise logical and of the key and mask to be the key.
122	* We define the index of a route to associated with the mask to be
123	* the first bit number in the mask where 0 occurs (with bit number 0
124	* representing the highest order bit).
125	*
126	* We say a mask is normal if every bit is 0, past the index of the mask.
127	* If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
128	* and m is a normal mask, then the route applies to every descendant of n.
129	* If the index(m) < rn_bit, this implies the trailing last few bits of k
130	* before bit b are all 0, (and hence consequently true of every descendant
131	* of n), so the route applies to all descendants of the node as well.
132	*
133	* Similar logic shows that a non-normal mask m such that
134	* index(m) <= index(n) could potentially apply to many children of n.
135	* Thus, for each non-host route, we attach its mask to a list at an internal
136	* node as high in the tree as we can go.
137	*
138	* The present version of the code makes use of normal routes in short-
139	* circuiting an explict mask and compare operation when testing whether
140	* a key satisfies a normal route, and also in remembering the unique leaf
141	* that governs a subtree.
142	*/
143
144	static struct radix_node *
145	rn_search(void v_arg, struct* radix_node *head)
146	{
147	struct radix_node *x;
148	caddr_t v;
149
150	for (x = head, v = v_arg; x->rn_bit >= `0`;) {
151	if (x->rn_bmask & v[x->rn_offset]) {
152	x = x->rn_right;
153	} else {
154	x = x->rn_left;
155	}
156	}
157	return x;
158	}
159
160	static struct radix_node *
161	rn_search_m(void v_arg, struct* radix_node head, void* *m_arg)
162	{
163	struct radix_node *x;
164	caddr_t v = v_arg, m = m_arg;
165
166	for (x = head; x->rn_bit >= `0`;) {
167	if ((x->rn_bmask & m[x->rn_offset]) &&
168	(x->rn_bmask & v[x->rn_offset])) {
169	x = x->rn_right;
170	} else {
171	x = x->rn_left;
172	}
173	}
174	return x;
175	}
176
177	int
178	rn_refines(void m_arg, void* *n_arg)
179	{
180	caddr_t m = m_arg, n = n_arg;
181	caddr_t lim, lim2 = lim = n + (u_char )n;
182	int longer = ((u_char )n++) - (int)((u_char )m++);
183	int masks_are_equal = `1`;
184
185	if (longer > `0`) {
186	lim -= longer;
187	}
188	while (n < lim) {
189	if (n & ~(m)) {
190	return `0`;
191	}
192	if (n++ != m++) {
193	masks_are_equal = `0`;
194	}
195	}
196	while (n < lim2) {
197	if (*n++) {
198	return `0`;
199	}
200	}
201	if (masks_are_equal && (longer < `0`)) {
202	for (lim2 = m - longer; m < lim2;) {
203	if (*m++) {
204	return `1`;
205	}
206	}
207	}
208	return !masks_are_equal;
209	}
210
211	struct radix_node *
212	rn_lookup(void v_arg, void* m_arg, struct* radix_node_head *head)
213	{
214	return rn_lookup_args(v_arg, m_arg, head, NULL, NULL);
215	}
216
217	struct radix_node *
218	rn_lookup_args(void v_arg, void* m_arg, struct* radix_node_head *head,
219	rn_matchf_t f, void* *w)
220	{
221	struct radix_node *x;
222	caddr_t netmask = NULL;
223
224	if (m_arg) {
225	x = rn_addmask(m_arg, `1`, head->rnh_treetop->rn_offset);
226	if (x == `0`) {
227	return NULL;
228	}
229	/*
230	* Note: the auxillary mask is stored as a "key".
231	*/
232	netmask = rn_get_key(rn: x);
233	}
234	x = rn_match_args(v_arg, head, f, w);
235	if (x && netmask) {
236	while (x && rn_get_mask(rn: x) != netmask) {
237	x = x->rn_dupedkey;
238	}
239	}
240	return x;
241	}
242
243	/*
244	* Returns true if address 'trial' has no bits differing from the
245	* leaf's key when compared under the leaf's mask. In other words,
246	* returns true when 'trial' matches leaf. If a leaf-matching
247	* routine is passed in, it is also used to find a match on the
248	* conditions defined by the caller of rn_match.
249	*/
250	static int
251	rn_satisfies_leaf(char trial, struct* radix_node leaf, int* skip,
252	rn_matchf_t f, void* *w)
253	{
254	char *cp = trial;
255	char *cp2 = rn_get_key(rn: leaf);
256	char *cp3 = rn_get_mask(rn: leaf);
257	char *cplim;
258	int length = min(a: (u_char )cp, b: (u_char )cp2);
259
260	if (cp3 == `0`) {
261	cp3 = rn_ones;
262	} else {
263	length = min(a: length, b: (u_char )cp3);
264	}
265	cplim = cp + length; cp3 += skip; cp2 += skip;
266	for (cp += skip; cp < cplim; cp++, cp2++, cp3++) {
267	if ((cp ^ cp2) & *cp3) {
268	return `0`;
269	}
270	}
271
272	return RN_MATCHF(leaf, f, w);
273	}
274
275	struct radix_node *
276	rn_match(void v_arg, struct* radix_node_head *head)
277	{
278	return rn_match_args(v_arg, head, NULL, NULL);
279	}
280
281	struct radix_node *
282	rn_match_args(void v_arg, struct* radix_node_head *head,
283	rn_matchf_t f, void* *w)
284	{
285	caddr_t v = v_arg;
286	struct radix_node t = head->rnh_treetop, x;
287	caddr_t cp = v, cp2;
288	caddr_t cplim;
289	struct radix_node saved_t, top = t;
290	int off = t->rn_offset, vlen = (u_char )cp, matched_off;
291	int test, b, rn_bit;
292
293	/*
294	* Open code rn_search(v, top) to avoid overhead of extra
295	* subroutine call.
296	*/
297	for (; t->rn_bit >= `0`;) {
298	if (t->rn_bmask & cp[t->rn_offset]) {
299	t = t->rn_right;
300	} else {
301	t = t->rn_left;
302	}
303	}
304	/*
305	* See if we match exactly as a host destination
306	* or at least learn how many bits match, for normal mask finesse.
307	*
308	* It doesn't hurt us to limit how many bytes to check
309	* to the length of the mask, since if it matches we had a genuine
310	* match and the leaf we have is the most specific one anyway;
311	* if it didn't match with a shorter length it would fail
312	* with a long one. This wins big for class B&C netmasks which
313	* are probably the most common case...
314	*/
315	if (rn_get_mask(rn: t)) {
316	vlen = (u_char )rn_get_mask(rn: t);
317	}
318	cp += off;
319	cp2 = rn_get_key(rn: t) + off;
320	cplim = v + vlen;
321
322	for (; cp < cplim; cp++, cp2++) {
323	if (cp != cp2) {
324	goto on1;
325	}
326	}
327	/*
328	* This extra grot is in case we are explicitly asked
329	* to look up the default. Ugh!
330	*
331	* Never return the root node itself, it seems to cause a
332	* lot of confusion.
333	*/
334	if (t->rn_flags & RNF_ROOT) {
335	t = t->rn_dupedkey;
336	}
337	if (t == NULL \|\| RN_MATCHF(t, f, w)) {
338	return t;
339	} else {
340	/*
341	* Although we found an exact match on the key,
342	* f() is looking for some other criteria as well.
343	* Continue looking as if the exact match failed.
344	*/
345	if (t->rn_parent->rn_flags & RNF_ROOT) {
346	/ Hit the top; have to give up /
347	return NULL;
348	}
349	b = `0`;
350	goto keeplooking;
351	}
352	on1:
353	test = (cp ^ cp2) & `0xff`; / find first bit that differs /
354	for (b = `7`; (test >>= `1`) > `0`;) {
355	b--;
356	}
357	keeplooking:
358	matched_off = (int)(cp - v);
359	b += matched_off << `3`;
360	rn_bit = -`1` - b;
361	/*
362	* If there is a host route in a duped-key chain, it will be first.
363	*/
364	saved_t = t;
365	if (rn_get_mask(rn: t) == `0`) {
366	t = t->rn_dupedkey;
367	}
368	for (; t; t = t->rn_dupedkey) {
369	/*
370	* Even if we don't match exactly as a host,
371	* we may match if the leaf we wound up at is
372	* a route to a net.
373	*/
374	if (t->rn_flags & RNF_NORMAL) {
375	if ((rn_bit <= t->rn_bit) && RN_MATCHF(t, f, w)) {
376	return t;
377	}
378	} else if (rn_satisfies_leaf(trial: v, leaf: t, skip: matched_off, f, w)) {
379	return t;
380	}
381	}
382	t = saved_t;
383	/ start searching up the tree /
384	do {
385	struct radix_mask *m;
386	t = t->rn_parent;
387	m = t->rn_mklist;
388	/*
389	* If non-contiguous masks ever become important
390	* we can restore the masking and open coding of
391	* the search and satisfaction test and put the
392	* calculation of "off" back before the "do".
393	*/
394	while (m) {
395	if (m->rm_flags & RNF_NORMAL) {
396	if ((rn_bit <= m->rm_bit) &&
397	RN_MATCHF(m->rm_leaf, f, w)) {
398	return m->rm_leaf;
399	}
400	} else {
401	off = min(a: t->rn_offset, b: matched_off);
402	x = rn_search_m(v_arg: v, head: t, m_arg: rm_get_mask(rm: m));
403	while (x && rn_get_mask(rn: x) != rm_get_mask(rm: m)) {
404	x = x->rn_dupedkey;
405	}
406	if (x && rn_satisfies_leaf(trial: v, leaf: x, skip: off, f, w)) {
407	return x;
408	}
409	}
410	m = m->rm_mklist;
411	}
412	} while (t != top);
413	return NULL;
414	}
415
416	#ifdef RN_DEBUG
417	int rn_nodenum;
418	struct radix_node *rn_clist;
419	int rn_saveinfo;
420	int rn_debug = `1`;
421	#endif
422
423	static struct radix_node *
424	rn_newpair(void v, int* b, struct radix_node nodes[`2`])
425	{
426	struct radix_node tt = nodes, t = tt + `1`;
427	t->rn_bit = (short)b;
428	t->rn_bmask = `0x80` >> (b & `7`);
429	t->rn_left = tt;
430	t->rn_offset = b >> `3`;
431	tt->rn_bit = -`1`;
432	tt->rn_key = (caddr_t)v;
433	tt->rn_parent = t;
434	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
435	tt->rn_mklist = t->rn_mklist = NULL;
436	#ifdef RN_DEBUG
437	tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
438	tt->rn_twin = t;
439	tt->rn_ybro = rn_clist;
440	rn_clist = tt;
441	#endif
442	return t;
443	}
444
445	static struct radix_node *
446	rn_insert(void v_arg, struct* radix_node_head head, int* *dupentry,
447	struct radix_node nodes[`2`])
448	{
449	caddr_t v = v_arg;
450	struct radix_node *top = head->rnh_treetop;
451	int head_off = top->rn_offset, vlen = (int)((u_char )v);
452	struct radix_node *t = rn_search(v_arg, head: top);
453	caddr_t cp = v + head_off;
454	int b;
455	struct radix_node *tt;
456	/*
457	* Find first bit at which v and t->rn_key differ
458	*/
459	{
460	caddr_t cp2 = rn_get_key(rn: t) + head_off;
461	int cmp_res;
462	caddr_t cplim = v + vlen;
463
464	while (cp < cplim) {
465	if (cp2++ != cp++) {
466	goto on1;
467	}
468	}
469	*dupentry = `1`;
470	return t;
471	on1:
472	*dupentry = `0`;
473	cmp_res = (cp[-`1`] ^ cp2[-`1`]) & `0xff`;
474	for (b = (int)(cp - v) << `3`; cmp_res; b--) {
475	cmp_res >>= `1`;
476	}
477	}
478	{
479	struct radix_node p, x = top;
480	cp = v;
481	do {
482	p = x;
483	if (cp[x->rn_offset] & x->rn_bmask) {
484	x = x->rn_right;
485	} else {
486	x = x->rn_left;
487	}
488	} while (b > (unsigned) x->rn_bit);
489	/ x->rn_bit < b && x->rn_bit >= 0 /
490	#ifdef RN_DEBUG
491	if (rn_debug) {
492	log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
493	}
494	#endif
495	t = rn_newpair(v: v_arg, b, nodes);
496	tt = t->rn_left;
497	if ((cp[p->rn_offset] & p->rn_bmask) == `0`) {
498	p->rn_left = t;
499	} else {
500	p->rn_right = t;
501	}
502	x->rn_parent = t;
503	t->rn_parent = p; / frees x, p as temp vars below /
504	if ((cp[t->rn_offset] & t->rn_bmask) == `0`) {
505	t->rn_right = x;
506	} else {
507	t->rn_right = tt;
508	t->rn_left = x;
509	}
510	#ifdef RN_DEBUG
511	if (rn_debug) {
512	log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
513	}
514	#endif
515	}
516	return tt;
517	}
518
519	struct radix_node *
520	rn_addmask(void n_arg, int* search, int skip)
521	{
522	caddr_t netmask = (caddr_t)n_arg;
523	struct radix_node *x;
524	caddr_t cp, cplim;
525	int b = `0`, mlen, j;
526	int maskduplicated, m0, isnormal;
527	struct radix_node *saved_x;
528	static int last_zeroed = `0`;
529
530	if ((mlen = (u_char )netmask) > max_keylen) {
531	mlen = max_keylen;
532	}
533	if (skip == `0`) {
534	skip = `1`;
535	}
536	if (mlen <= skip) {
537	return mask_rnhead->rnh_nodes;
538	}
539	if (skip > `1`) {
540	Bcopy(rn_ones + `1`, addmask_key + `1`, skip - `1`);
541	}
542	if ((m0 = mlen) > skip) {
543	Bcopy(netmask + skip, addmask_key + skip, mlen - skip);
544	}
545	/*
546	* Trim trailing zeroes.
547	*/
548	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-`1`] == `0`;) {
549	cp--;
550	}
551	mlen = (int)(cp - addmask_key);
552	if (mlen <= skip) {
553	if (m0 >= last_zeroed) {
554	last_zeroed = mlen;
555	}
556	return mask_rnhead->rnh_nodes;
557	}
558	if (m0 < last_zeroed) {
559	Bzero(addmask_key + m0, last_zeroed - m0);
560	}
561	addmask_key = last_zeroed = (char*)mlen;
562	x = rn_search(v_arg: addmask_key, rn_masktop);
563	if (Bcmp(addmask_key, rn_get_key(x), mlen) != `0`) {
564	x = NULL;
565	}
566	if (x \|\| search) {
567	return x;
568	}
569	x = saved_x = zalloc_flags(radix_node_zone, Z_WAITOK_ZERO_NOFAIL);
570	netmask = cp = (caddr_t)(x + `2`);
571	Bcopy(addmask_key, cp, mlen);
572	x = rn_insert(v_arg: cp, head: mask_rnhead, dupentry: &maskduplicated, nodes: x);
573	if (maskduplicated) {
574	log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
575	zfree(radix_node_zone, saved_x);
576	return x;
577	}
578	mask_rnhead->rnh_cnt++;
579	/*
580	* Calculate index of mask, and check for normalcy.
581	*/
582	cplim = netmask + mlen; isnormal = `1`;
583	for (cp = netmask + skip; (cp < cplim) && (u_char )cp == `0xff`;) {
584	cp++;
585	}
586	if (cp != cplim) {
587	for (j = `0x80`; (j & *cp) != `0`; j >>= `1`) {
588	b++;
589	}
590	if (*cp != normal_chars[b] \|\| cp != (cplim - `1`)) {
591	isnormal = `0`;
592	}
593	}
594	b += (cp - netmask) << `3`;
595	x->rn_bit = (short)(-`1` - b);
596	if (isnormal) {
597	x->rn_flags \|= RNF_NORMAL;
598	}
599	return x;
600	}
601
602	static int
603	/ XXX: arbitrary ordering for non-contiguous masks /
604	rn_lexobetter(void m_arg, void* *n_arg)
605	{
606	u_char mp = m_arg, np = n_arg, *lim;
607
608	if (mp > np) {
609	return `1`; / not really, but need to check longer one first /
610	}
611	if (mp == np) {
612	for (lim = mp + *mp; mp < lim;) {
613	if (mp++ > np++) {
614	return `1`;
615	}
616	}
617	}
618	return `0`;
619	}
620
621	static struct radix_mask *
622	rn_new_radix_mask(struct radix_node tt, struct* radix_mask *next)
623	{
624	struct radix_mask *m;
625
626	MKGet(m);
627	m->rm_bit = tt->rn_bit;
628	m->rm_flags = tt->rn_flags;
629	if (tt->rn_flags & RNF_NORMAL) {
630	m->rm_leaf = tt;
631	} else {
632	m->rm_mask = rn_get_mask(rn: tt);
633	}
634	m->rm_mklist = next;
635	tt->rn_mklist = m;
636	return m;
637	}
638
639	struct radix_node *
640	rn_addroute(void v_arg, void* n_arg, struct* radix_node_head *head,
641	struct radix_node treenodes[`2`])
642	{
643	caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
644	struct radix_node t, x = NULL, *tt;
645	struct radix_node saved_tt, top = head->rnh_treetop;
646	short b = `0`, b_leaf = `0`;
647	int keyduplicated;
648	caddr_t mmask;
649	struct radix_mask m, *mp;
650
651	/*
652	* In dealing with non-contiguous masks, there may be
653	* many different routes which have the same mask.
654	* We will find it useful to have a unique pointer to
655	* the mask to speed avoiding duplicate references at
656	* nodes and possibly save time in calculating indices.
657	*/
658	if (netmask) {
659	if ((x = rn_addmask(n_arg: netmask, search: `0`, skip: top->rn_offset)) == `0`) {
660	return NULL;
661	}
662	b_leaf = x->rn_bit;
663	b = -`1` - x->rn_bit;
664	/*
665	* Note: the auxillary mask is stored as a "key".
666	*/
667	netmask = rn_get_key(rn: x);
668	}
669	/*
670	* Deal with duplicated keys: attach node to previous instance
671	*/
672	saved_tt = tt = rn_insert(v_arg: v, head, dupentry: &keyduplicated, nodes: treenodes);
673	if (keyduplicated) {
674	for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
675	if (rn_get_mask(rn: tt) == netmask) {
676	return NULL;
677	}
678	if (netmask == `0` \|\|
679	(rn_get_mask(rn: tt) != NULL &&
680	((b_leaf < tt->rn_bit) / index(netmask) > node /
681	\|\| rn_refines(m_arg: netmask, n_arg: rn_get_mask(rn: tt))
682	\|\| rn_lexobetter(m_arg: netmask, n_arg: rn_get_mask(rn: tt))))) {
683	break;
684	}
685	}
686	/*
687	* If the mask is not duplicated, we wouldn't
688	* find it among possible duplicate key entries
689	* anyway, so the above test doesn't hurt.
690	*
691	* We sort the masks for a duplicated key the same way as
692	* in a masklist -- most specific to least specific.
693	* This may require the unfortunate nuisance of relocating
694	* the head of the list.
695	*/
696	if (tt == saved_tt) {
697	struct radix_node *xx = x;
698	/ link in at head of list /
699	(tt = treenodes)->rn_dupedkey = t;
700	tt->rn_flags = t->rn_flags;
701	tt->rn_parent = x = t->rn_parent;
702	t->rn_parent = tt; / parent /
703	if (x->rn_left == t) {
704	x->rn_left = tt;
705	} else {
706	x->rn_right = tt;
707	}
708	saved_tt = tt; x = xx;
709	} else {
710	(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
711	t->rn_dupedkey = tt;
712	tt->rn_parent = t; / parent /
713	if (tt->rn_dupedkey) { / parent /
714	tt->rn_dupedkey->rn_parent = tt; / parent /
715	}
716	}
717	#ifdef RN_DEBUG
718	t = tt + `1`; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
719	tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
720	#endif
721	tt->rn_key = (caddr_t) v;
722	tt->rn_bit = -`1`;
723	tt->rn_flags = RNF_ACTIVE;
724	}
725	head->rnh_cnt++;
726	/*
727	* Put mask in tree.
728	*/
729	if (netmask) {
730	tt->rn_mask = netmask;
731	tt->rn_bit = x->rn_bit;
732	tt->rn_flags \|= x->rn_flags & RNF_NORMAL;
733	}
734	t = saved_tt->rn_parent;
735	if (keyduplicated) {
736	goto on2;
737	}
738	b_leaf = -`1` - t->rn_bit;
739	if (t->rn_right == saved_tt) {
740	x = t->rn_left;
741	} else {
742	x = t->rn_right;
743	}
744	/ Promote general routes from below /
745	if (x->rn_bit < `0`) {
746	for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) {
747	if (rn_get_mask(rn: x) != NULL && (x->rn_bit >= b_leaf) && x->rn_mklist == `0`) {
748	*mp = m = rn_new_radix_mask(tt: x, NULL);
749	if (m) {
750	mp = &m->rm_mklist;
751	}
752	}
753	}
754	} else if (x->rn_mklist) {
755	/*
756	* Skip over masks whose index is > that of new node
757	*/
758	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
759	if (m->rm_bit >= b_leaf) {
760	break;
761	}
762	}
763	t->rn_mklist = m; *mp = NULL;
764	}
765	on2:
766	/ Add new route to highest possible ancestor's list /
767	if ((netmask == `0`) \|\| (b > t->rn_bit)) {
768	return tt; / can't lift at all /
769	}
770	b_leaf = tt->rn_bit;
771	do {
772	x = t;
773	t = t->rn_parent;
774	} while (b <= t->rn_bit && x != top);
775	/*
776	* Search through routes associated with node to
777	* insert new route according to index.
778	* Need same criteria as when sorting dupedkeys to avoid
779	* double loop on deletion.
780	*/
781	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
782	if (m->rm_bit < b_leaf) {
783	continue;
784	}
785	if (m->rm_bit > b_leaf) {
786	break;
787	}
788	if (m->rm_flags & RNF_NORMAL) {
789	mmask = rn_get_mask(rn: m->rm_leaf);
790	if (tt->rn_flags & RNF_NORMAL) {
791	log(LOG_ERR,
792	"Non-unique normal route, mask not entered");
793	return tt;
794	}
795	} else {
796	mmask = rm_get_mask(rm: m);
797	}
798	if (mmask == netmask) {
799	m->rm_refs++;
800	tt->rn_mklist = m;
801	return tt;
802	}
803	if (rn_refines(m_arg: netmask, n_arg: mmask)
804	\|\| rn_lexobetter(m_arg: netmask, n_arg: mmask)) {
805	break;
806	}
807	}
808	mp = rn_new_radix_mask(tt, next: mp);
809	return tt;
810	}
811
812	struct radix_node *
813	rn_delete(void v_arg, void* netmask_arg, struct* radix_node_head *head)
814	{
815	struct radix_node t, p, x, tt;
816	struct radix_mask m, saved_m, **mp;
817	struct radix_node dupedkey, saved_tt, *top;
818	caddr_t v, netmask;
819	int b, head_off, vlen;
820
821	v = v_arg;
822	netmask = netmask_arg;
823	x = head->rnh_treetop;
824	tt = rn_search(v_arg: v, head: x);
825	head_off = x->rn_offset;
826	vlen = (u_char )v;
827	saved_tt = tt;
828	top = x;
829	if (tt == `0` \|\|
830	Bcmp(v + head_off, rn_get_key(tt) + head_off, vlen - head_off)) {
831	return NULL;
832	}
833	/*
834	* Delete our route from mask lists.
835	*/
836	if (netmask) {
837	if ((x = rn_addmask(n_arg: netmask, search: `1`, skip: head_off)) == `0`) {
838	return NULL;
839	}
840	netmask = rn_get_key(rn: x);
841	while (rn_get_mask(rn: tt) != netmask) {
842	if ((tt = tt->rn_dupedkey) == `0`) {
843	return NULL;
844	}
845	}
846	}
847	if (rn_get_mask(rn: tt) == `0` \|\| (saved_m = m = tt->rn_mklist) == `0`) {
848	goto on1;
849	}
850	if (tt->rn_flags & RNF_NORMAL) {
851	if (m->rm_leaf != tt \|\| m->rm_refs > `0`) {
852	log(LOG_ERR, "rn_delete: inconsistent annotation\n");
853	return NULL; / dangling ref could cause disaster /
854	}
855	} else {
856	if (rm_get_mask(rm: m) != rn_get_mask(rn: tt)) {
857	log(LOG_ERR, "rn_delete: inconsistent annotation\n");
858	goto on1;
859	}
860	if (--m->rm_refs >= `0`) {
861	goto on1;
862	}
863	}
864	b = -`1` - tt->rn_bit;
865	t = saved_tt->rn_parent;
866	if (b > t->rn_bit) {
867	goto on1; / Wasn't lifted at all /
868	}
869	do {
870	x = t;
871	t = t->rn_parent;
872	} while (b <= t->rn_bit && x != top);
873	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
874	if (m == saved_m) {
875	*mp = m->rm_mklist;
876	if (tt->rn_mklist == m) {
877	tt->rn_mklist = *mp;
878	}
879	MKFree(m);
880	break;
881	}
882	}
883	if (m == `0`) {
884	log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
885	if (tt->rn_flags & RNF_NORMAL) {
886	return NULL; / Dangling ref to us /
887	}
888	}
889	on1:
890	/*
891	* Eliminate us from tree
892	*/
893	if (tt->rn_flags & RNF_ROOT) {
894	return NULL;
895	}
896	head->rnh_cnt--;
897	#ifdef RN_DEBUG
898	/ Get us out of the creation list /
899	for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {
900	}
901	if (t) {
902	t->rn_ybro = tt->rn_ybro;
903	}
904	#endif
905	t = tt->rn_parent;
906	dupedkey = saved_tt->rn_dupedkey;
907	if (dupedkey) {
908	/*
909	* at this point, tt is the deletion target and saved_tt
910	* is the head of the dupekey chain
911	*/
912	if (tt == saved_tt) {
913	/ remove from head of chain /
914	x = dupedkey; x->rn_parent = t;
915	if (t->rn_left == tt) {
916	t->rn_left = x;
917	} else {
918	t->rn_right = x;
919	}
920	} else {
921	/ find node in front of tt on the chain /
922	for (x = p = saved_tt; p && p->rn_dupedkey != tt;) {
923	p = p->rn_dupedkey;
924	}
925	if (p) {
926	p->rn_dupedkey = tt->rn_dupedkey;
927	if (tt->rn_dupedkey) { / parent /
928	tt->rn_dupedkey->rn_parent = p;
929	}
930	/ parent /
931	} else {
932	log(LOG_ERR, "rn_delete: couldn't find us\n");
933	}
934	}
935	t = tt + `1`;
936	if (t->rn_flags & RNF_ACTIVE) {
937	#ifndef RN_DEBUG
938	++x = t;
939	p = t->rn_parent;
940	#else
941	b = t->rn_info;
942	++x = t;
943	t->rn_info = b;
944	p = t->rn_parent;
945	#endif
946	if (p->rn_left == t) {
947	p->rn_left = x;
948	} else {
949	p->rn_right = x;
950	}
951	x->rn_left->rn_parent = x;
952	x->rn_right->rn_parent = x;
953	}
954	goto out;
955	}
956	if (t->rn_left == tt) {
957	x = t->rn_right;
958	} else {
959	x = t->rn_left;
960	}
961	p = t->rn_parent;
962	if (p->rn_right == t) {
963	p->rn_right = x;
964	} else {
965	p->rn_left = x;
966	}
967	x->rn_parent = p;
968	/*
969	* Demote routes attached to us.
970	*/
971	if (t->rn_mklist) {
972	if (x->rn_bit >= `0`) {
973	for (mp = &x->rn_mklist; (m = *mp);) {
974	mp = &m->rm_mklist;
975	}
976	*mp = t->rn_mklist;
977	} else {
978	/ If there are any key,mask pairs in a sibling*
979	* duped-key chain, some subset will appear sorted
980	* in the same order attached to our mklist */
981	for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) {
982	if (m == x->rn_mklist) {
983	struct radix_mask *mm = m->rm_mklist;
984	x->rn_mklist = NULL;
985	if (--(m->rm_refs) < `0`) {
986	MKFree(m);
987	}
988	m = mm;
989	}
990	}
991	if (m) {
992	log(LOG_ERR, "rn_delete: Orphaned Mask "
993	"0x%llx at 0x%llx\n",
994	(uint64_t)VM_KERNEL_ADDRPERM(m),
995	(uint64_t)VM_KERNEL_ADDRPERM(x));
996	}
997	}
998	}
999	/*
1000	* We may be holding an active internal node in the tree.
1001	*/
1002	x = tt + `1`;
1003	if (t != x) {
1004	#ifndef RN_DEBUG
1005	t = x;
1006	#else
1007	b = t->rn_info;
1008	t = x;
1009	t->rn_info = b;
1010	#endif
1011	t->rn_left->rn_parent = t;
1012	t->rn_right->rn_parent = t;
1013	p = x->rn_parent;
1014	if (p->rn_left == x) {
1015	p->rn_left = t;
1016	} else {
1017	p->rn_right = t;
1018	}
1019	}
1020	out:
1021	tt->rn_flags &= ~RNF_ACTIVE;
1022	tt[`1`].rn_flags &= ~RNF_ACTIVE;
1023	return tt;
1024	}
1025
1026	/*
1027	* This is the same as rn_walktree() except for the parameters and the
1028	* exit.
1029	*/
1030	static int
1031	rn_walktree_from(struct radix_node_head h, void* a, void* m, walktree_f_t f,
1032	void *w)
1033	{
1034	int error;
1035	struct radix_node base, next;
1036	u_char xa = (u_char )a;
1037	u_char xm = (u_char )m;
1038	struct radix_node rn, last;
1039	int stopping;
1040	int lastb;
1041	int rnh_cnt;
1042
1043	/*
1044	* This gets complicated because we may delete the node while
1045	* applying the function f to it; we cannot simply use the next
1046	* leaf as the successor node in advance, because that leaf may
1047	* be removed as well during deletion when it is a clone of the
1048	* current node. When that happens, we would end up referring
1049	* to an already-freed radix node as the successor node. To get
1050	* around this issue, if we detect that the radix tree has changed
1051	* in dimension (smaller than before), we simply restart the walk
1052	* from the top of tree.
1053	*/
1054	restart:
1055	last = NULL;
1056	stopping = `0`;
1057	rnh_cnt = h->rnh_cnt;
1058
1059	/*
1060	* rn_search_m is sort-of-open-coded here.
1061	*/
1062	for (rn = h->rnh_treetop; rn->rn_bit >= `0`;) {
1063	last = rn;
1064	if (!(rn->rn_bmask & xm[rn->rn_offset])) {
1065	break;
1066	}
1067
1068	if (rn->rn_bmask & xa[rn->rn_offset]) {
1069	rn = rn->rn_right;
1070	} else {
1071	rn = rn->rn_left;
1072	}
1073	}
1074
1075	/*
1076	* Two cases: either we stepped off the end of our mask,
1077	* in which case last == rn, or we reached a leaf, in which
1078	* case we want to start from the last node we looked at.
1079	* Either way, last is the node we want to start from.
1080	*/
1081	rn = last;
1082	lastb = rn->rn_bit;
1083
1084	/ First time through node, go left /
1085	while (rn->rn_bit >= `0`) {
1086	rn = rn->rn_left;
1087	}
1088
1089	while (!stopping) {
1090	base = rn;
1091	/ If at right child go back up, otherwise, go right /
1092	while (rn->rn_parent->rn_right == rn
1093	&& !(rn->rn_flags & RNF_ROOT)) {
1094	rn = rn->rn_parent;
1095
1096	/ if went up beyond last, stop /
1097	if (rn->rn_bit <= lastb) {
1098	stopping = `1`;
1099	/*
1100	* XXX we should jump to the 'Process leaves'
1101	* part, because the values of 'rn' and 'next'
1102	* we compute will not be used. Not a big deal
1103	* because this loop will terminate, but it is
1104	* inefficient and hard to understand!
1105	*/
1106	}
1107	}
1108
1109	/*
1110	* The following code (bug fix) inherited from FreeBSD is
1111	* currently disabled, because our implementation uses the
1112	* RTF_PRCLONING scheme that has been abandoned in current
1113	* FreeBSD release. The scheme involves setting such a flag
1114	* for the default route entry, and therefore all off-link
1115	* destinations would become clones of that entry. Enabling
1116	* the following code would be problematic at this point,
1117	* because the removal of default route would cause only
1118	* the left-half of the tree to be traversed, leaving the
1119	* right-half untouched. If there are clones of the entry
1120	* that reside in that right-half, they would not be deleted
1121	* and would linger around until they expire or explicitly
1122	* deleted, which is a very bad thing.
1123	*
1124	* This code should be uncommented only after we get rid
1125	* of the RTF_PRCLONING scheme.
1126	*/
1127	#if 0
1128	/*
1129	* At the top of the tree, no need to traverse the right
1130	* half, prevent the traversal of the entire tree in the
1131	* case of default route.
1132	*/
1133	if (rn->rn_parent->rn_flags & RNF_ROOT) {
1134	stopping = `1`;
1135	}
1136	#endif
1137
1138	/ Find the next leaf to start from /
1139	for (rn = rn->rn_parent->rn_right; rn->rn_bit >= `0`;) {
1140	rn = rn->rn_left;
1141	}
1142	next = rn;
1143	/ Process leaves /
1144	while ((rn = base) != `0`) {
1145	base = rn->rn_dupedkey;
1146	if (!(rn->rn_flags & RNF_ROOT)
1147	&& (error = (*f)(rn, w))) {
1148	return error;
1149	}
1150	}
1151	/ If one or more nodes got deleted, restart from top /
1152	if (h->rnh_cnt < rnh_cnt) {
1153	goto restart;
1154	}
1155	rn = next;
1156	if (rn->rn_flags & RNF_ROOT) {
1157	stopping = `1`;
1158	}
1159	}
1160	return `0`;
1161	}
1162
1163	static int
1164	rn_walktree(struct radix_node_head h, walktree_f_t f, void *w)
1165	{
1166	int error;
1167	struct radix_node base, next;
1168	struct radix_node *rn;
1169	int rnh_cnt;
1170
1171	/*
1172	* This gets complicated because we may delete the node while
1173	* applying the function f to it; we cannot simply use the next
1174	* leaf as the successor node in advance, because that leaf may
1175	* be removed as well during deletion when it is a clone of the
1176	* current node. When that happens, we would end up referring
1177	* to an already-freed radix node as the successor node. To get
1178	* around this issue, if we detect that the radix tree has changed
1179	* in dimension (smaller than before), we simply restart the walk
1180	* from the top of tree.
1181	*/
1182	restart:
1183	rn = h->rnh_treetop;
1184	rnh_cnt = h->rnh_cnt;
1185
1186	/ First time through node, go left /
1187	while (rn->rn_bit >= `0`) {
1188	rn = rn->rn_left;
1189	}
1190	for (;;) {
1191	base = rn;
1192	/ If at right child go back up, otherwise, go right /
1193	while (rn->rn_parent->rn_right == rn &&
1194	(rn->rn_flags & RNF_ROOT) == `0`) {
1195	rn = rn->rn_parent;
1196	}
1197	/ Find the next leaf to start from /
1198	for (rn = rn->rn_parent->rn_right; rn->rn_bit >= `0`;) {
1199	rn = rn->rn_left;
1200	}
1201	next = rn;
1202	/ Process leaves /
1203	while ((rn = base) != NULL) {
1204	base = rn->rn_dupedkey;
1205	if (!(rn->rn_flags & RNF_ROOT)
1206	&& (error = (*f)(rn, w))) {
1207	return error;
1208	}
1209	}
1210	/ If one or more nodes got deleted, restart from top /
1211	if (h->rnh_cnt < rnh_cnt) {
1212	goto restart;
1213	}
1214	rn = next;
1215	if (rn->rn_flags & RNF_ROOT) {
1216	return `0`;
1217	}
1218	}
1219	/ NOTREACHED /
1220	}
1221
1222	int
1223	rn_inithead(void *head, int* off)
1224	{
1225	struct radix_node_head *rnh;
1226	struct radix_node t, tt, *ttt;
1227	if (off > INT8_MAX) {
1228	return `0`;
1229	}
1230	if (*head) {
1231	return `1`;
1232	}
1233
1234	rnh = zalloc_flags(radix_node_head_zone, Z_WAITOK_ZERO_NOFAIL);
1235	*head = rnh;
1236	t = rn_newpair(v: rn_zeros, b: off, nodes: rnh->rnh_nodes);
1237	ttt = rnh->rnh_nodes + `2`;
1238	t->rn_right = ttt;
1239	t->rn_parent = t;
1240	tt = t->rn_left;
1241	tt->rn_flags = t->rn_flags = RNF_ROOT \| RNF_ACTIVE;
1242	tt->rn_bit = (short)(-`1` - off);
1243	ttt = tt;
1244	ttt->rn_key = rn_ones;
1245	rnh->rnh_addaddr = rn_addroute;
1246	rnh->rnh_deladdr = rn_delete;
1247	rnh->rnh_matchaddr = rn_match;
1248	rnh->rnh_matchaddr_args = rn_match_args;
1249	rnh->rnh_lookup = rn_lookup;
1250	rnh->rnh_lookup_args = rn_lookup_args;
1251	rnh->rnh_walktree = rn_walktree;
1252	rnh->rnh_walktree_from = rn_walktree_from;
1253	rnh->rnh_treetop = t;
1254	rnh->rnh_cnt = `3`;
1255	return `1`;
1256	}
1257
1258	void
1259	rn_init(void)
1260	{
1261	char cp, cplim;
1262	struct domain *dom;
1263
1264	/ lock already held when rn_init is called /
1265	TAILQ_FOREACH(dom, &domains, dom_entry) {
1266	if (dom->dom_maxrtkey > max_keylen) {
1267	max_keylen = dom->dom_maxrtkey;
1268	}
1269	}
1270	if (max_keylen == `0`) {
1271	log(LOG_ERR,
1272	"rn_init: radix functions require max_keylen be set\n");
1273	return;
1274	}
1275	rn_zeros = zalloc_permanent(`3` * max_keylen, ZALIGN_NONE);
1276	rn_ones = cp = rn_zeros + max_keylen;
1277	addmask_key = cplim = rn_ones + max_keylen;
1278	while (cp < cplim) {
1279	*cp++ = -`1`;
1280	}
1281	if (rn_inithead(head: (void **)&mask_rnhead, off: `0`) == `0`) {
1282	panic("rn_init 2");
1283	}
1284
1285	radix_node_zone = zone_create(name: "radix_node",
1286	size: sizeof(struct radix_node) * `2` + max_keylen,
1287	flags: ZC_PGZ_USE_GUARDS \| ZC_ZFREE_CLEARMEM);
1288	}
1289

Browse the source code of xnu/bsd/net/radix.c