1 | /* |
2 | * Copyright (c) 2003-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /* |
30 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. |
31 | * All rights reserved. |
32 | * |
33 | * Redistribution and use in source and binary forms, with or without |
34 | * modification, are permitted provided that the following conditions |
35 | * are met: |
36 | * 1. Redistributions of source code must retain the above copyright |
37 | * notice, this list of conditions and the following disclaimer. |
38 | * 2. Redistributions in binary form must reproduce the above copyright |
39 | * notice, this list of conditions and the following disclaimer in the |
40 | * documentation and/or other materials provided with the distribution. |
41 | * 3. Neither the name of the project nor the names of its contributors |
42 | * may be used to endorse or promote products derived from this software |
43 | * without specific prior written permission. |
44 | * |
45 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND |
46 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
47 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
48 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE |
49 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
50 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
51 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
52 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
53 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
54 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
55 | * SUCH DAMAGE. |
56 | */ |
57 | |
58 | /* |
59 | * Copyright 1994, 1995 Massachusetts Institute of Technology |
60 | * |
61 | * Permission to use, copy, modify, and distribute this software and |
62 | * its documentation for any purpose and without fee is hereby |
63 | * granted, provided that both the above copyright notice and this |
64 | * permission notice appear in all copies, that both the above |
65 | * copyright notice and this permission notice appear in all |
66 | * supporting documentation, and that the name of M.I.T. not be used |
67 | * in advertising or publicity pertaining to distribution of the |
68 | * software without specific, written prior permission. M.I.T. makes |
69 | * no representations about the suitability of this software for any |
70 | * purpose. It is provided "as is" without express or implied |
71 | * warranty. |
72 | * |
73 | * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS |
74 | * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, |
75 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
76 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT |
77 | * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
78 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
79 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
80 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
81 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
82 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
83 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
84 | * SUCH DAMAGE. |
85 | * |
86 | */ |
87 | |
88 | /* |
89 | * This code does two things necessary for the enhanced TCP metrics to |
90 | * function in a useful manner: |
91 | * 1) It marks all non-host routes as `cloning', thus ensuring that |
92 | * every actual reference to such a route actually gets turned |
93 | * into a reference to a host route to the specific destination |
94 | * requested. |
95 | * 2) When such routes lose all their references, it arranges for them |
96 | * to be deleted in some random collection of circumstances, so that |
97 | * a large quantity of stale routing data is not kept in kernel memory |
98 | * indefinitely. See in6_rtqtimo() below for the exact mechanism. |
99 | */ |
100 | |
101 | #include <sys/param.h> |
102 | #include <sys/systm.h> |
103 | #include <sys/kernel.h> |
104 | #include <sys/sysctl.h> |
105 | #include <kern/queue.h> |
106 | #include <sys/socket.h> |
107 | #include <sys/socketvar.h> |
108 | #include <sys/protosw.h> |
109 | #include <sys/mbuf.h> |
110 | #include <sys/syslog.h> |
111 | #include <sys/mcache.h> |
112 | #include <kern/locks.h> |
113 | |
114 | #include <net/if.h> |
115 | #include <net/route.h> |
116 | #include <netinet/in.h> |
117 | #include <netinet/ip_var.h> |
118 | #include <netinet/in_var.h> |
119 | |
120 | #include <netinet/ip6.h> |
121 | #include <netinet6/ip6_var.h> |
122 | |
123 | #include <netinet/icmp6.h> |
124 | |
125 | #include <netinet/tcp.h> |
126 | #include <netinet/tcp_seq.h> |
127 | #include <netinet/tcp_timer.h> |
128 | #include <netinet/tcp_var.h> |
129 | |
130 | #include <net/sockaddr_utils.h> |
131 | |
132 | extern int tvtohz(struct timeval *); |
133 | |
134 | static int in6_rtqtimo_run; /* in6_rtqtimo is scheduled to run */ |
135 | static void in6_rtqtimo(void *); |
136 | static void in6_sched_rtqtimo(struct timeval *); |
137 | |
138 | static struct radix_node *in6_addroute(void *, void *, struct radix_node_head *, |
139 | struct radix_node *); |
140 | static struct radix_node *in6_deleteroute(void *, void *, |
141 | struct radix_node_head *); |
142 | static struct radix_node *in6_matroute(void *, struct radix_node_head *); |
143 | static struct radix_node *in6_matroute_args(void *, struct radix_node_head *, |
144 | rn_matchf_t *, void *); |
145 | static void in6_clsroute(struct radix_node *, struct radix_node_head *); |
146 | static int in6_rtqkill(struct radix_node *, void *); |
147 | |
148 | /* |
149 | * Accessed by in6_addroute(), in6_deleteroute() and in6_rtqkill(), during |
150 | * which the routing lock (rnh_lock) is held and thus protects the variable. |
151 | */ |
152 | static int in6dynroutes; |
153 | |
154 | /* |
155 | * Do what we need to do when inserting a route. |
156 | */ |
157 | static struct radix_node * |
158 | in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, |
159 | struct radix_node *treenodes) |
160 | { |
161 | struct rtentry *rt = (struct rtentry *)treenodes; |
162 | struct sockaddr_in6 *sin6 = SIN6(rt_key(rt)); |
163 | struct radix_node *ret; |
164 | char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; |
165 | uint32_t flags = rt->rt_flags; |
166 | boolean_t verbose = (rt_verbose > 0); |
167 | |
168 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
169 | RT_LOCK_ASSERT_HELD(rt); |
170 | |
171 | if (verbose) { |
172 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
173 | } |
174 | |
175 | /* |
176 | * If this is a dynamic route (which is created via Redirect) and |
177 | * we already have the maximum acceptable number of such route entries, |
178 | * reject creating a new one. We could initiate garbage collection to |
179 | * make available space right now, but the benefit would probably not |
180 | * be worth the cleaning overhead; we only have to endure a slightly |
181 | * suboptimal path even without the redirected route. |
182 | */ |
183 | if ((rt->rt_flags & RTF_DYNAMIC) && |
184 | ip6_maxdynroutes >= 0 && in6dynroutes >= ip6_maxdynroutes) { |
185 | return NULL; |
186 | } |
187 | |
188 | /* |
189 | * For IPv6, all unicast non-host routes are automatically cloning. |
190 | */ |
191 | if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { |
192 | rt->rt_flags |= RTF_MULTICAST; |
193 | } |
194 | |
195 | if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { |
196 | rt->rt_flags |= RTF_PRCLONING; |
197 | } |
198 | |
199 | /* |
200 | * A little bit of help for both IPv6 output and input: |
201 | * For local addresses, we make sure that RTF_LOCAL is set, |
202 | * with the thought that this might one day be used to speed up |
203 | * ip_input(). |
204 | * |
205 | * We also mark routes to multicast addresses as such, because |
206 | * it's easy to do and might be useful (but this is much more |
207 | * dubious since it's so easy to inspect the address). (This |
208 | * is done above.) |
209 | * |
210 | * XXX |
211 | * should elaborate the code. |
212 | */ |
213 | if (rt->rt_flags & RTF_HOST) { |
214 | IFA_LOCK_SPIN(rt->rt_ifa); |
215 | if (in6_are_addr_equal_scoped(&satosin6(rt->rt_ifa->ifa_addr)-> |
216 | sin6_addr, &sin6->sin6_addr, satosin6(rt->rt_ifa->ifa_addr)->sin6_scope_id, sin6->sin6_scope_id)) { |
217 | rt->rt_flags |= RTF_LOCAL; |
218 | } |
219 | IFA_UNLOCK(rt->rt_ifa); |
220 | } |
221 | |
222 | if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && |
223 | rt->rt_ifp) { |
224 | rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; |
225 | } |
226 | |
227 | /* |
228 | * rn_addroute route returns NULL if an exact entry is being attempted |
229 | * to be added again. |
230 | */ |
231 | ret = rn_addroute(v_arg, n_arg, head, treenodes); |
232 | if (ret == NULL && (rt->rt_flags & RTF_HOST)) { |
233 | struct rtentry *rt2; |
234 | /* |
235 | * We are trying to add a host route, but can't. |
236 | * Find out if it is because of an |
237 | * ND6 entry and delete it if so. |
238 | */ |
239 | rt2 = rtalloc1_scoped_locked(SA(sin6), 0, |
240 | RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt))); |
241 | if (rt2 != NULL) { |
242 | char dbufc[MAX_IPv6_STR_LEN]; |
243 | |
244 | RT_LOCK(rt2); |
245 | if (verbose) { |
246 | rt_str(rt2, dbufc, sizeof(dbufc), NULL, 0); |
247 | } |
248 | |
249 | if ((rt2->rt_flags & RTF_LLINFO) && |
250 | (rt2->rt_flags & RTF_HOST) && |
251 | rt2->rt_gateway != NULL && |
252 | rt2->rt_gateway->sa_family == AF_LINK) { |
253 | if (verbose) { |
254 | os_log_debug(OS_LOG_DEFAULT, "%s: unable to insert " |
255 | "route to %s:%s, flags=0x%x, due to " |
256 | "existing ND6 route %s->%s " |
257 | "flags=0x%x, attempting to delete\n" , |
258 | __func__, dbuf, |
259 | (rt->rt_ifp != NULL) ? |
260 | rt->rt_ifp->if_xname : "" , |
261 | rt->rt_flags, |
262 | dbufc, (rt2->rt_ifp != NULL) ? |
263 | rt2->rt_ifp->if_xname : "" , |
264 | rt2->rt_flags); |
265 | } |
266 | /* |
267 | * Safe to drop rt_lock and use rt_key, |
268 | * rt_gateway, since holding rnh_lock here |
269 | * prevents another thread from calling |
270 | * rt_setgate() on this route. |
271 | */ |
272 | RT_UNLOCK(rt2); |
273 | (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), |
274 | rt2->rt_gateway, rt_mask(rt2), |
275 | rt2->rt_flags, NULL); |
276 | ret = rn_addroute(v_arg, n_arg, head, |
277 | treenodes); |
278 | } else { |
279 | RT_UNLOCK(rt2); |
280 | } |
281 | rtfree_locked(rt2); |
282 | } |
283 | } else if (ret == NULL && (rt->rt_flags & RTF_CLONING)) { |
284 | struct rtentry *rt2; |
285 | /* |
286 | * We are trying to add a net route, but can't. |
287 | * The following case should be allowed, so we'll make a |
288 | * special check for this: |
289 | * Two IPv6 addresses with the same prefix is assigned |
290 | * to a single interrface. |
291 | * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1) |
292 | * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2) |
293 | * In this case, (*1) and (*2) want to add the same |
294 | * net route entry, 3ffe:0501:: -> if0. |
295 | * This case should not raise an error. |
296 | */ |
297 | rt2 = rtalloc1_scoped_locked(SA(sin6), 0, |
298 | RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt))); |
299 | if (rt2 != NULL) { |
300 | RT_LOCK(rt2); |
301 | if ((rt2->rt_flags & (RTF_CLONING | RTF_HOST | |
302 | RTF_GATEWAY)) == RTF_CLONING && |
303 | rt2->rt_gateway && |
304 | rt2->rt_gateway->sa_family == AF_LINK && |
305 | rt2->rt_ifp == rt->rt_ifp) { |
306 | ret = rt2->rt_nodes; |
307 | } |
308 | RT_UNLOCK(rt2); |
309 | rtfree_locked(rt2); |
310 | } |
311 | } |
312 | |
313 | if (ret != NULL && (rt->rt_flags & RTF_DYNAMIC)) { |
314 | in6dynroutes++; |
315 | } |
316 | |
317 | if (!verbose) { |
318 | goto done; |
319 | } |
320 | |
321 | if (ret != NULL) { |
322 | if (flags != rt->rt_flags) { |
323 | os_log_debug(OS_LOG_DEFAULT, "%s: route to %s->%s->%s inserted, " |
324 | "oflags=0x%x, flags=0x%x\n" , __func__, |
325 | dbuf, gbuf, (rt->rt_ifp != NULL) ? |
326 | rt->rt_ifp->if_xname : "" , flags, |
327 | rt->rt_flags); |
328 | } else { |
329 | os_log_debug(OS_LOG_DEFAULT, "%s: route to %s->%s->%s inserted, " |
330 | "flags=0x%x\n" , __func__, dbuf, gbuf, |
331 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "" , |
332 | rt->rt_flags); |
333 | } |
334 | } else { |
335 | os_log_debug(OS_LOG_DEFAULT, "%s: unable to insert route to %s->%s->%s, " |
336 | "flags=0x%x, already exists\n" , __func__, dbuf, gbuf, |
337 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "" , |
338 | rt->rt_flags); |
339 | } |
340 | done: |
341 | return ret; |
342 | } |
343 | |
344 | static struct radix_node * |
345 | in6_deleteroute(void *v_arg, void *netmask_arg, struct radix_node_head *head) |
346 | { |
347 | struct radix_node *rn; |
348 | |
349 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
350 | |
351 | rn = rn_delete(v_arg, netmask_arg, head); |
352 | if (rn != NULL) { |
353 | struct rtentry *rt = (struct rtentry *)rn; |
354 | |
355 | RT_LOCK(rt); |
356 | if (rt->rt_flags & RTF_DYNAMIC) { |
357 | in6dynroutes--; |
358 | } |
359 | if (rt_verbose) { |
360 | char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; |
361 | |
362 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
363 | os_log_debug(OS_LOG_DEFAULT, "%s: route to %s->%s->%s deleted, " |
364 | "flags=0x%x\n" , __func__, dbuf, gbuf, |
365 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "" , |
366 | rt->rt_flags); |
367 | } |
368 | RT_UNLOCK(rt); |
369 | } |
370 | return rn; |
371 | } |
372 | |
373 | /* |
374 | * Validate (unexpire) an expiring AF_INET6 route. |
375 | */ |
376 | struct radix_node * |
377 | in6_validate(struct radix_node *rn) |
378 | { |
379 | struct rtentry *rt = (struct rtentry *)rn; |
380 | |
381 | RT_LOCK_ASSERT_HELD(rt); |
382 | |
383 | /* This is first reference? */ |
384 | if (rt->rt_refcnt == 0) { |
385 | if (rt_verbose > 2) { |
386 | char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; |
387 | |
388 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
389 | os_log_debug(OS_LOG_DEFAULT, "%s: route to %s->%s->%s validated, " |
390 | "flags=0x%x\n" , __func__, dbuf, gbuf, |
391 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "" , |
392 | rt->rt_flags); |
393 | } |
394 | |
395 | /* |
396 | * It's one of ours; unexpire it. If the timer is already |
397 | * scheduled, let it run later as it won't re-arm itself |
398 | * if there's nothing to do. |
399 | */ |
400 | if (rt->rt_flags & RTPRF_OURS) { |
401 | rt->rt_flags &= ~RTPRF_OURS; |
402 | rt_setexpire(rt, 0); |
403 | } |
404 | } |
405 | return rn; |
406 | } |
407 | |
408 | /* |
409 | * Similar to in6_matroute_args except without the leaf-matching parameters. |
410 | */ |
411 | static struct radix_node * |
412 | in6_matroute(void *v_arg, struct radix_node_head *head) |
413 | { |
414 | return in6_matroute_args(v_arg, head, NULL, NULL); |
415 | } |
416 | |
417 | /* |
418 | * This code is the inverse of in6_clsroute: on first reference, if we |
419 | * were managing the route, stop doing so and set the expiration timer |
420 | * back off again. |
421 | */ |
422 | static struct radix_node * |
423 | in6_matroute_args(void *v_arg, struct radix_node_head *head, |
424 | rn_matchf_t *f, void *w) |
425 | { |
426 | struct radix_node *rn = rn_match_args(v_arg, head, f, w); |
427 | |
428 | if (rn != NULL) { |
429 | RT_LOCK_SPIN((struct rtentry *)rn); |
430 | in6_validate(rn); |
431 | RT_UNLOCK((struct rtentry *)rn); |
432 | } |
433 | return rn; |
434 | } |
435 | |
436 | SYSCTL_DECL(_net_inet6_ip6); |
437 | |
438 | /* one hour is ``really old'' */ |
439 | static uint32_t rtq_reallyold = 60 * 60; |
440 | SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, |
441 | CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold, 0, "" ); |
442 | |
443 | /* never automatically crank down to less */ |
444 | static uint32_t rtq_minreallyold = 10; |
445 | SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, |
446 | CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold, 0, "" ); |
447 | |
448 | /* 128 cached routes is ``too many'' */ |
449 | static uint32_t rtq_toomany = 128; |
450 | SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, |
451 | CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany, 0, "" ); |
452 | |
453 | /* |
454 | * On last reference drop, mark the route as belong to us so that it can be |
455 | * timed out. |
456 | */ |
457 | static void |
458 | in6_clsroute(struct radix_node *rn, struct radix_node_head *head) |
459 | { |
460 | #pragma unused(head) |
461 | char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; |
462 | struct rtentry *rt = (struct rtentry *)rn; |
463 | boolean_t verbose = (rt_verbose > 1); |
464 | |
465 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
466 | RT_LOCK_ASSERT_HELD(rt); |
467 | |
468 | if (!(rt->rt_flags & RTF_UP)) { |
469 | return; /* prophylactic measures */ |
470 | } |
471 | if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) { |
472 | return; |
473 | } |
474 | |
475 | if (rt->rt_flags & RTPRF_OURS) { |
476 | return; |
477 | } |
478 | |
479 | if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC))) { |
480 | return; |
481 | } |
482 | |
483 | if (verbose) { |
484 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
485 | } |
486 | |
487 | /* |
488 | * Delete the route immediately if RTF_DELCLONE is set or |
489 | * if route caching is disabled (rtq_reallyold set to 0). |
490 | * Otherwise, let it expire and be deleted by in6_rtqkill(). |
491 | */ |
492 | if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) { |
493 | int err; |
494 | |
495 | if (verbose) { |
496 | os_log_debug(OS_LOG_DEFAULT, "%s: deleting route to %s->%s->%s, " |
497 | "flags=0x%x\n" , __func__, dbuf, gbuf, |
498 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "" , |
499 | rt->rt_flags); |
500 | } |
501 | /* |
502 | * Delete the route from the radix tree but since we are |
503 | * called when the route's reference count is 0, don't |
504 | * deallocate it until we return from this routine by |
505 | * telling rtrequest that we're interested in it. |
506 | * Safe to drop rt_lock and use rt_key, rt_gateway, |
507 | * since holding rnh_lock here prevents another thread |
508 | * from calling rt_setgate() on this route. |
509 | */ |
510 | RT_UNLOCK(rt); |
511 | err = rtrequest_locked(RTM_DELETE, rt_key(rt), |
512 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt); |
513 | if (err == 0) { |
514 | /* Now let the caller free it */ |
515 | RT_LOCK(rt); |
516 | RT_REMREF_LOCKED(rt); |
517 | } else { |
518 | RT_LOCK(rt); |
519 | if (!verbose) { |
520 | rt_str(rt, dbuf, sizeof(dbuf), |
521 | gbuf, sizeof(gbuf)); |
522 | } |
523 | os_log_error(OS_LOG_DEFAULT, "%s: error deleting route to " |
524 | "%s->%s->%s, flags=0x%x, err=%d\n" , __func__, |
525 | dbuf, gbuf, (rt->rt_ifp != NULL) ? |
526 | rt->rt_ifp->if_xname : "" , rt->rt_flags, |
527 | err); |
528 | } |
529 | } else { |
530 | uint64_t timenow; |
531 | |
532 | timenow = net_uptime(); |
533 | rt->rt_flags |= RTPRF_OURS; |
534 | rt_setexpire(rt, timenow + rtq_reallyold); |
535 | |
536 | if (verbose) { |
537 | os_log_debug(OS_LOG_DEFAULT, "%s: route to %s->%s->%s invalidated, " |
538 | "flags=0x%x, expire=T+%u\n" , __func__, dbuf, gbuf, |
539 | (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "" , |
540 | rt->rt_flags, rt->rt_expire - timenow); |
541 | } |
542 | |
543 | /* We have at least one entry; arm the timer if not already */ |
544 | in6_sched_rtqtimo(NULL); |
545 | } |
546 | } |
547 | |
548 | struct rtqk_arg { |
549 | struct radix_node_head *rnh; |
550 | int updating; |
551 | int draining; |
552 | uint32_t killed; |
553 | uint32_t found; |
554 | uint64_t nextstop; |
555 | }; |
556 | |
557 | /* |
558 | * Get rid of old routes. When draining, this deletes everything, even when |
559 | * the timeout is not expired yet. This also applies if the route is dynamic |
560 | * and there are sufficiently large number of such routes (more than a half of |
561 | * maximum). When updating, this makes sure that nothing has a timeout longer |
562 | * than the current value of rtq_reallyold. |
563 | */ |
564 | static int |
565 | in6_rtqkill(struct radix_node *rn, void *rock) |
566 | { |
567 | struct rtqk_arg *ap = rock; |
568 | struct rtentry *rt = (struct rtentry *)rn; |
569 | boolean_t verbose = (rt_verbose > 1); |
570 | uint64_t timenow; |
571 | int err; |
572 | |
573 | timenow = net_uptime(); |
574 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
575 | |
576 | RT_LOCK(rt); |
577 | if (rt->rt_flags & RTPRF_OURS) { |
578 | char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; |
579 | |
580 | if (verbose) { |
581 | rt_str(rt, dbuf, sizeof(dbuf), gbuf, sizeof(gbuf)); |
582 | } |
583 | |
584 | ap->found++; |
585 | VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); |
586 | VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); |
587 | if (ap->draining || rt->rt_expire <= timenow || |
588 | ((rt->rt_flags & RTF_DYNAMIC) && ip6_maxdynroutes >= 0 && |
589 | in6dynroutes > ip6_maxdynroutes / 2)) { |
590 | if (rt->rt_refcnt > 0) { |
591 | panic("%s: route %p marked with RTPRF_OURS " |
592 | "with non-zero refcnt (%u)" , __func__, |
593 | rt, rt->rt_refcnt); |
594 | /* NOTREACHED */ |
595 | } |
596 | |
597 | if (verbose) { |
598 | os_log_debug(OS_LOG_DEFAULT, "%s: deleting route to " |
599 | "%s->%s->%s, flags=0x%x, draining=%d\n" , |
600 | __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ? |
601 | rt->rt_ifp->if_xname : "" , rt->rt_flags, |
602 | ap->draining); |
603 | } |
604 | RT_ADDREF_LOCKED(rt); /* for us to free below */ |
605 | /* |
606 | * Delete this route since we're done with it; |
607 | * the route may be freed afterwards, so we |
608 | * can no longer refer to 'rt' upon returning |
609 | * from rtrequest(). Safe to drop rt_lock and |
610 | * use rt_key, rt_gateway, since holding rnh_lock |
611 | * here prevents another thread from calling |
612 | * rt_setgate() on this route. |
613 | */ |
614 | RT_UNLOCK(rt); |
615 | err = rtrequest_locked(RTM_DELETE, rt_key(rt), |
616 | rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); |
617 | if (err != 0) { |
618 | RT_LOCK(rt); |
619 | if (!verbose) { |
620 | rt_str(rt, dbuf, sizeof(dbuf), |
621 | gbuf, sizeof(gbuf)); |
622 | } |
623 | os_log_error(OS_LOG_DEFAULT, "%s: error deleting route to " |
624 | "%s->%s->%s, flags=0x%x, err=%d\n" , __func__, |
625 | dbuf, gbuf, (rt->rt_ifp != NULL) ? |
626 | rt->rt_ifp->if_xname : "" , rt->rt_flags, |
627 | err); |
628 | RT_UNLOCK(rt); |
629 | } else { |
630 | ap->killed++; |
631 | } |
632 | rtfree_locked(rt); |
633 | } else { |
634 | uint64_t expire = (rt->rt_expire - timenow); |
635 | |
636 | if (ap->updating && expire > rtq_reallyold) { |
637 | rt_setexpire(rt, timenow + rtq_reallyold); |
638 | if (verbose) { |
639 | os_log_debug(OS_LOG_DEFAULT, "%s: route to " |
640 | "%s->%s->%s, flags=0x%x, adjusted " |
641 | "expire=T+%u (was T+%u)\n" , |
642 | __func__, dbuf, gbuf, |
643 | (rt->rt_ifp != NULL) ? |
644 | rt->rt_ifp->if_xname : "" , |
645 | rt->rt_flags, |
646 | (rt->rt_expire - timenow), expire); |
647 | } |
648 | } |
649 | ap->nextstop = lmin(a: ap->nextstop, b: rt->rt_expire); |
650 | RT_UNLOCK(rt); |
651 | } |
652 | } else { |
653 | RT_UNLOCK(rt); |
654 | } |
655 | |
656 | return 0; |
657 | } |
658 | |
659 | #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ |
660 | static int rtq_timeout = RTQ_TIMEOUT; |
661 | |
662 | static void |
663 | in6_rtqtimo(void *targ) |
664 | { |
665 | #pragma unused(targ) |
666 | struct radix_node_head *rnh; |
667 | struct rtqk_arg arg; |
668 | struct timeval atv; |
669 | static uint64_t last_adjusted_timeout = 0; |
670 | boolean_t verbose = (rt_verbose > 1); |
671 | uint64_t timenow; |
672 | uint32_t ours; |
673 | |
674 | lck_mtx_lock(rnh_lock); |
675 | rnh = rt_tables[AF_INET6]; |
676 | VERIFY(rnh != NULL); |
677 | |
678 | /* Get the timestamp after we acquire the lock for better accuracy */ |
679 | timenow = net_uptime(); |
680 | if (verbose) { |
681 | os_log_debug(OS_LOG_DEFAULT, "%s: initial nextstop is T+%u seconds\n" , |
682 | __func__, rtq_timeout); |
683 | } |
684 | bzero(s: &arg, n: sizeof(arg)); |
685 | arg.rnh = rnh; |
686 | arg.nextstop = timenow + rtq_timeout; |
687 | rnh->rnh_walktree(rnh, in6_rtqkill, &arg); |
688 | if (verbose) { |
689 | os_log_debug(OS_LOG_DEFAULT, "%s: found %u, killed %u\n" , __func__, |
690 | arg.found, arg.killed); |
691 | } |
692 | /* |
693 | * Attempt to be somewhat dynamic about this: |
694 | * If there are ``too many'' routes sitting around taking up space, |
695 | * then crank down the timeout, and see if we can't make some more |
696 | * go away. However, we make sure that we will never adjust more |
697 | * than once in rtq_timeout seconds, to keep from cranking down too |
698 | * hard. |
699 | */ |
700 | ours = (arg.found - arg.killed); |
701 | if (ours > rtq_toomany && |
702 | ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) && |
703 | rtq_reallyold > rtq_minreallyold) { |
704 | rtq_reallyold = 2 * rtq_reallyold / 3; |
705 | if (rtq_reallyold < rtq_minreallyold) { |
706 | rtq_reallyold = rtq_minreallyold; |
707 | } |
708 | |
709 | last_adjusted_timeout = timenow; |
710 | if (verbose) { |
711 | os_log_debug(OS_LOG_DEFAULT, "%s: adjusted rtq_reallyold to %d " |
712 | "seconds\n" , __func__, rtq_reallyold); |
713 | } |
714 | arg.found = arg.killed = 0; |
715 | arg.updating = 1; |
716 | rnh->rnh_walktree(rnh, in6_rtqkill, &arg); |
717 | } |
718 | |
719 | atv.tv_usec = 0; |
720 | atv.tv_sec = arg.nextstop - timenow; |
721 | /* re-arm the timer only if there's work to do */ |
722 | in6_rtqtimo_run = 0; |
723 | if (ours > 0) { |
724 | in6_sched_rtqtimo(&atv); |
725 | } else if (verbose) { |
726 | os_log_debug(OS_LOG_DEFAULT, "%s: not rescheduling timer\n" , __func__); |
727 | } |
728 | lck_mtx_unlock(rnh_lock); |
729 | } |
730 | |
731 | static void |
732 | in6_sched_rtqtimo(struct timeval *atv) |
733 | { |
734 | LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); |
735 | |
736 | if (!in6_rtqtimo_run) { |
737 | struct timeval tv; |
738 | |
739 | if (atv == NULL) { |
740 | tv.tv_usec = 0; |
741 | tv.tv_sec = MAX(rtq_timeout / 10, 1); |
742 | atv = &tv; |
743 | } |
744 | if (rt_verbose > 2) { |
745 | os_log_debug(OS_LOG_DEFAULT, "%s: timer scheduled in " |
746 | "T+%llus.%lluu\n" , __func__, |
747 | (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec); |
748 | } |
749 | in6_rtqtimo_run = 1; |
750 | timeout(in6_rtqtimo, NULL, ticks: tvtohz(atv)); |
751 | } |
752 | } |
753 | |
754 | void |
755 | in6_rtqdrain(void) |
756 | { |
757 | struct radix_node_head *rnh; |
758 | struct rtqk_arg arg; |
759 | |
760 | if (rt_verbose > 1) { |
761 | os_log_debug(OS_LOG_DEFAULT, "%s: draining routes\n" , __func__); |
762 | } |
763 | |
764 | lck_mtx_lock(rnh_lock); |
765 | rnh = rt_tables[AF_INET6]; |
766 | VERIFY(rnh != NULL); |
767 | bzero(s: &arg, n: sizeof(arg)); |
768 | arg.rnh = rnh; |
769 | arg.draining = 1; |
770 | rnh->rnh_walktree(rnh, in6_rtqkill, &arg); |
771 | lck_mtx_unlock(rnh_lock); |
772 | } |
773 | |
774 | /* |
775 | * Initialize our routing tree. |
776 | */ |
777 | int |
778 | in6_inithead(void **head, int off) |
779 | { |
780 | struct radix_node_head *rnh; |
781 | |
782 | /* If called from route_init(), make sure it is exactly once */ |
783 | VERIFY(head != (void **)&rt_tables[AF_INET6] || *head == NULL); |
784 | |
785 | if (!rn_inithead(head, off)) { |
786 | return 0; |
787 | } |
788 | |
789 | /* |
790 | * We can get here from nfs_subs.c as well, in which case this |
791 | * won't be for the real routing table and thus we're done; |
792 | * this also takes care of the case when we're called more than |
793 | * once from anywhere but route_init(). |
794 | */ |
795 | if (head != (void **)&rt_tables[AF_INET6]) { |
796 | return 1; /* only do this for the real routing table */ |
797 | } |
798 | rnh = *head; |
799 | rnh->rnh_addaddr = in6_addroute; |
800 | rnh->rnh_deladdr = in6_deleteroute; |
801 | rnh->rnh_matchaddr = in6_matroute; |
802 | rnh->rnh_matchaddr_args = in6_matroute_args; |
803 | rnh->rnh_close = in6_clsroute; |
804 | return 1; |
805 | } |
806 | |