1/*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright 1994, 1995 Massachusetts Institute of Technology
30 *
31 * Permission to use, copy, modify, and distribute this software and
32 * its documentation for any purpose and without fee is hereby
33 * granted, provided that both the above copyright notice and this
34 * permission notice appear in all copies, that both the above
35 * copyright notice and this permission notice appear in all
36 * supporting documentation, and that the name of M.I.T. not be used
37 * in advertising or publicity pertaining to distribution of the
38 * software without specific, written prior permission. M.I.T. makes
39 * no representations about the suitability of this software for any
40 * purpose. It is provided "as is" without express or implied
41 * warranty.
42 *
43 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
44 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
45 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
47 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
50 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
51 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 */
57
58/*
59 * This code does two things necessary for the enhanced TCP metrics to
60 * function in a useful manner:
61 * 1) It marks all non-host routes as `cloning', thus ensuring that
62 * every actual reference to such a route actually gets turned
63 * into a reference to a host route to the specific destination
64 * requested.
65 * 2) When such routes lose all their references, it arranges for them
66 * to be deleted in some random collection of circumstances, so that
67 * a large quantity of stale routing data is not kept in kernel memory
68 * indefinitely. See in_rtqtimo() below for the exact mechanism.
69 */
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/kernel.h>
74#include <sys/sysctl.h>
75#include <sys/socket.h>
76#include <sys/mbuf.h>
77#include <sys/protosw.h>
78#include <sys/syslog.h>
79#include <sys/mcache.h>
80#include <kern/locks.h>
81
82#include <net/if.h>
83#include <net/route.h>
84#include <netinet/in.h>
85#include <netinet/in_var.h>
86#include <netinet/in_arp.h>
87#include <netinet/ip.h>
88#include <netinet/ip6.h>
89#include <netinet6/nd6.h>
90
91extern int tvtohz(struct timeval *);
92
93static int in_rtqtimo_run; /* in_rtqtimo is scheduled to run */
94static void in_rtqtimo(void *);
95static void in_sched_rtqtimo(struct timeval *);
96
97static struct radix_node *in_addroute(void *, void *, struct radix_node_head *,
98 struct radix_node *);
99static struct radix_node *in_deleteroute(void *, void *,
100 struct radix_node_head *);
101static struct radix_node *in_matroute(void *, struct radix_node_head *);
102static struct radix_node *in_matroute_args(void *, struct radix_node_head *,
103 rn_matchf_t *f, void *);
104static void in_clsroute(struct radix_node *, struct radix_node_head *);
105static int in_rtqkill(struct radix_node *, void *);
106
107static int in_ifadownkill(struct radix_node *, void *);
108
109/*
110 * Do what we need to do when inserting a route.
111 */
112static struct radix_node *
113in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
114 struct radix_node *treenodes)
115{
116 struct rtentry *rt = (struct rtentry *)treenodes;
117 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)rt_key(rt);
118 struct radix_node *ret;
119 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
120 uint32_t flags = rt->rt_flags;
121 boolean_t verbose = (rt_verbose > 1);
122
123 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
124 RT_LOCK_ASSERT_HELD(rt);
125
126 if (verbose)
127 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
128
129 /*
130 * For IP, all unicast non-host routes are automatically cloning.
131 */
132 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
133 rt->rt_flags |= RTF_MULTICAST;
134
135 if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST)))
136 rt->rt_flags |= RTF_PRCLONING;
137
138 /*
139 * A little bit of help for both IP output and input:
140 * For host routes, we make sure that RTF_BROADCAST
141 * is set for anything that looks like a broadcast address.
142 * This way, we can avoid an expensive call to in_broadcast()
143 * in ip_output() most of the time (because the route passed
144 * to ip_output() is almost always a host route).
145 *
146 * We also do the same for local addresses, with the thought
147 * that this might one day be used to speed up ip_input().
148 *
149 * We also mark routes to multicast addresses as such, because
150 * it's easy to do and might be useful (but this is much more
151 * dubious since it's so easy to inspect the address). (This
152 * is done above.)
153 */
154 if (rt->rt_flags & RTF_HOST) {
155 if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
156 rt->rt_flags |= RTF_BROADCAST;
157 } else {
158 /* Become a regular mutex */
159 RT_CONVERT_LOCK(rt);
160 IFA_LOCK_SPIN(rt->rt_ifa);
161 if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr ==
162 sin->sin_addr.s_addr)
163 rt->rt_flags |= RTF_LOCAL;
164 IFA_UNLOCK(rt->rt_ifa);
165 }
166 }
167
168 if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
169 rt->rt_ifp) {
170 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
171 if (INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
172 rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
173 /* Further adjust the size for CLAT46 expansion */
174 rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
175 }
176 }
177
178 ret = rn_addroute(v_arg, n_arg, head, treenodes);
179 if (ret == NULL && (rt->rt_flags & RTF_HOST)) {
180 struct rtentry *rt2;
181 /*
182 * We are trying to add a host route, but can't.
183 * Find out if it is because of an
184 * ARP entry and delete it if so.
185 */
186 rt2 = rtalloc1_scoped_locked(rt_key(rt), 0,
187 RTF_CLONING | RTF_PRCLONING, sin_get_ifscope(rt_key(rt)));
188 if (rt2 != NULL) {
189 char dbufc[MAX_IPv4_STR_LEN];
190
191 RT_LOCK(rt2);
192 if (verbose)
193 rt_str(rt2, dbufc, sizeof (dbufc), NULL, 0);
194
195 if ((rt2->rt_flags & RTF_LLINFO) &&
196 (rt2->rt_flags & RTF_HOST) &&
197 rt2->rt_gateway != NULL &&
198 rt2->rt_gateway->sa_family == AF_LINK) {
199 if (verbose) {
200 log(LOG_DEBUG, "%s: unable to insert "
201 "route to %s;%s, flags=%b, due to "
202 "existing ARP route %s->%s "
203 "flags=%b, attempting to delete\n",
204 __func__, dbuf,
205 (rt->rt_ifp != NULL) ?
206 rt->rt_ifp->if_xname : "",
207 rt->rt_flags, RTF_BITS, dbufc,
208 (rt2->rt_ifp != NULL) ?
209 rt2->rt_ifp->if_xname : "",
210 rt2->rt_flags, RTF_BITS);
211 }
212 /*
213 * Safe to drop rt_lock and use rt_key,
214 * rt_gateway, since holding rnh_lock here
215 * prevents another thread from calling
216 * rt_setgate() on this route.
217 */
218 RT_UNLOCK(rt2);
219 (void) rtrequest_locked(RTM_DELETE, rt_key(rt2),
220 rt2->rt_gateway, rt_mask(rt2),
221 rt2->rt_flags, NULL);
222 ret = rn_addroute(v_arg, n_arg, head,
223 treenodes);
224 } else {
225 RT_UNLOCK(rt2);
226 }
227 rtfree_locked(rt2);
228 }
229 }
230
231 if (!verbose)
232 goto done;
233
234 if (ret != NULL) {
235 if (flags != rt->rt_flags) {
236 log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, "
237 "oflags=%b, flags=%b\n", __func__,
238 dbuf, gbuf, (rt->rt_ifp != NULL) ?
239 rt->rt_ifp->if_xname : "", flags, RTF_BITS,
240 rt->rt_flags, RTF_BITS);
241 } else {
242 log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, "
243 "flags=%b\n", __func__, dbuf, gbuf,
244 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
245 rt->rt_flags, RTF_BITS);
246 }
247 } else {
248 log(LOG_DEBUG, "%s: unable to insert route to %s->%s->%s, "
249 "flags=%b, already exists\n", __func__, dbuf, gbuf,
250 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
251 rt->rt_flags, RTF_BITS);
252 }
253done:
254 return (ret);
255}
256
257static struct radix_node *
258in_deleteroute(void *v_arg, void *netmask_arg, struct radix_node_head *head)
259{
260 struct radix_node *rn;
261
262 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
263
264 rn = rn_delete(v_arg, netmask_arg, head);
265 if (rt_verbose > 1 && rn != NULL) {
266 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
267 struct rtentry *rt = (struct rtentry *)rn;
268
269 RT_LOCK(rt);
270 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
271 log(LOG_DEBUG, "%s: route to %s->%s->%s deleted, "
272 "flags=%b\n", __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ?
273 rt->rt_ifp->if_xname : "", rt->rt_flags, RTF_BITS);
274 RT_UNLOCK(rt);
275 }
276 return (rn);
277}
278
279/*
280 * Validate (unexpire) an expiring AF_INET route.
281 */
282struct radix_node *
283in_validate(struct radix_node *rn)
284{
285 struct rtentry *rt = (struct rtentry *)rn;
286
287 RT_LOCK_ASSERT_HELD(rt);
288
289 /* This is first reference? */
290 if (rt->rt_refcnt == 0) {
291 if (rt_verbose > 2) {
292 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
293
294 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
295 log(LOG_DEBUG, "%s: route to %s->%s->%s validated, "
296 "flags=%b\n", __func__, dbuf, gbuf,
297 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
298 rt->rt_flags, RTF_BITS);
299 }
300
301 /*
302 * It's one of ours; unexpire it. If the timer is already
303 * scheduled, let it run later as it won't re-arm itself
304 * if there's nothing to do.
305 */
306 if (rt->rt_flags & RTPRF_OURS) {
307 rt->rt_flags &= ~RTPRF_OURS;
308 rt_setexpire(rt, 0);
309 }
310 }
311 return (rn);
312}
313
314/*
315 * Similar to in_matroute_args except without the leaf-matching parameters.
316 */
317static struct radix_node *
318in_matroute(void *v_arg, struct radix_node_head *head)
319{
320 return (in_matroute_args(v_arg, head, NULL, NULL));
321}
322
323/*
324 * This code is the inverse of in_clsroute: on first reference, if we
325 * were managing the route, stop doing so and set the expiration timer
326 * back off again.
327 */
328static struct radix_node *
329in_matroute_args(void *v_arg, struct radix_node_head *head,
330 rn_matchf_t *f, void *w)
331{
332 struct radix_node *rn = rn_match_args(v_arg, head, f, w);
333
334 if (rn != NULL) {
335 RT_LOCK_SPIN((struct rtentry *)rn);
336 in_validate(rn);
337 RT_UNLOCK((struct rtentry *)rn);
338 }
339 return (rn);
340}
341
342/* one hour is ``really old'' */
343static uint32_t rtq_reallyold = 60*60;
344SYSCTL_UINT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire,
345 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold, 0,
346 "Default expiration time on dynamically learned routes");
347
348/* never automatically crank down to less */
349static uint32_t rtq_minreallyold = 10;
350SYSCTL_UINT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire,
351 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold, 0,
352 "Minimum time to attempt to hold onto dynamically learned routes");
353
354/* 128 cached routes is ``too many'' */
355static uint32_t rtq_toomany = 128;
356SYSCTL_UINT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache,
357 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany, 0,
358 "Upper limit on dynamically learned routes");
359
360/*
361 * On last reference drop, mark the route as belong to us so that it can be
362 * timed out.
363 */
364static void
365in_clsroute(struct radix_node *rn, struct radix_node_head *head)
366{
367#pragma unused(head)
368 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
369 struct rtentry *rt = (struct rtentry *)rn;
370 boolean_t verbose = (rt_verbose > 1);
371
372 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
373 RT_LOCK_ASSERT_HELD(rt);
374
375 if (!(rt->rt_flags & RTF_UP))
376 return; /* prophylactic measures */
377
378 if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
379 return;
380
381 if (rt->rt_flags & RTPRF_OURS)
382 return;
383
384 if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC)))
385 return;
386
387 if (verbose)
388 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
389
390 /*
391 * Delete the route immediately if RTF_DELCLONE is set or
392 * if route caching is disabled (rtq_reallyold set to 0).
393 * Otherwise, let it expire and be deleted by in_rtqkill().
394 */
395 if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) {
396 int err;
397
398 if (verbose) {
399 log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, "
400 "flags=%b\n", __func__, dbuf, gbuf,
401 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
402 rt->rt_flags, RTF_BITS);
403 }
404 /*
405 * Delete the route from the radix tree but since we are
406 * called when the route's reference count is 0, don't
407 * deallocate it until we return from this routine by
408 * telling rtrequest that we're interested in it.
409 * Safe to drop rt_lock and use rt_key, rt_gateway since
410 * holding rnh_lock here prevents another thread from
411 * calling rt_setgate() on this route.
412 */
413 RT_UNLOCK(rt);
414 err = rtrequest_locked(RTM_DELETE, rt_key(rt),
415 rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt);
416 if (err == 0) {
417 /* Now let the caller free it */
418 RT_LOCK(rt);
419 RT_REMREF_LOCKED(rt);
420 } else {
421 RT_LOCK(rt);
422 if (!verbose)
423 rt_str(rt, dbuf, sizeof (dbuf),
424 gbuf, sizeof (gbuf));
425 log(LOG_ERR, "%s: error deleting route to "
426 "%s->%s->%s, flags=%b, err=%d\n", __func__,
427 dbuf, gbuf, (rt->rt_ifp != NULL) ?
428 rt->rt_ifp->if_xname : "", rt->rt_flags,
429 RTF_BITS, err);
430 }
431 } else {
432 uint64_t timenow;
433
434 timenow = net_uptime();
435 rt->rt_flags |= RTPRF_OURS;
436 rt_setexpire(rt, timenow + rtq_reallyold);
437
438 if (verbose) {
439 log(LOG_DEBUG, "%s: route to %s->%s->%s invalidated, "
440 "flags=%b, expire=T+%u\n", __func__, dbuf, gbuf,
441 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
442 rt->rt_flags, RTF_BITS, rt->rt_expire - timenow);
443 }
444
445 /* We have at least one entry; arm the timer if not already */
446 in_sched_rtqtimo(NULL);
447 }
448}
449
450struct rtqk_arg {
451 struct radix_node_head *rnh;
452 int updating;
453 int draining;
454 uint32_t killed;
455 uint32_t found;
456 uint64_t nextstop;
457};
458
459/*
460 * Get rid of old routes. When draining, this deletes everything, even when
461 * the timeout is not expired yet. When updating, this makes sure that
462 * nothing has a timeout longer than the current value of rtq_reallyold.
463 */
464static int
465in_rtqkill(struct radix_node *rn, void *rock)
466{
467 struct rtqk_arg *ap = rock;
468 struct rtentry *rt = (struct rtentry *)rn;
469 boolean_t verbose = (rt_verbose > 1);
470 uint64_t timenow;
471 int err;
472
473 timenow = net_uptime();
474 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
475
476 RT_LOCK(rt);
477 if (rt->rt_flags & RTPRF_OURS) {
478 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
479
480 if (verbose)
481 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
482
483 ap->found++;
484 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
485 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
486 if (ap->draining || rt->rt_expire <= timenow) {
487 if (rt->rt_refcnt > 0) {
488 panic("%s: route %p marked with RTPRF_OURS "
489 "with non-zero refcnt (%u)", __func__,
490 rt, rt->rt_refcnt);
491 /* NOTREACHED */
492 }
493
494 if (verbose) {
495 log(LOG_DEBUG, "%s: deleting route to "
496 "%s->%s->%s, flags=%b, draining=%d\n",
497 __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ?
498 rt->rt_ifp->if_xname : "", rt->rt_flags,
499 RTF_BITS, ap->draining);
500 }
501 RT_ADDREF_LOCKED(rt); /* for us to free below */
502 /*
503 * Delete this route since we're done with it;
504 * the route may be freed afterwards, so we
505 * can no longer refer to 'rt' upon returning
506 * from rtrequest(). Safe to drop rt_lock and
507 * use rt_key, rt_gateway since holding rnh_lock
508 * here prevents another thread from calling
509 * rt_setgate() on this route.
510 */
511 RT_UNLOCK(rt);
512 err = rtrequest_locked(RTM_DELETE, rt_key(rt),
513 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
514 if (err != 0) {
515 RT_LOCK(rt);
516 if (!verbose)
517 rt_str(rt, dbuf, sizeof (dbuf),
518 gbuf, sizeof (gbuf));
519 log(LOG_ERR, "%s: error deleting route to "
520 "%s->%s->%s, flags=%b, err=%d\n", __func__,
521 dbuf, gbuf, (rt->rt_ifp != NULL) ?
522 rt->rt_ifp->if_xname : "", rt->rt_flags,
523 RTF_BITS, err);
524 RT_UNLOCK(rt);
525 } else {
526 ap->killed++;
527 }
528 rtfree_locked(rt);
529 } else {
530 uint64_t expire = (rt->rt_expire - timenow);
531
532 if (ap->updating && expire > rtq_reallyold) {
533 rt_setexpire(rt, timenow + rtq_reallyold);
534 if (verbose) {
535 log(LOG_DEBUG, "%s: route to "
536 "%s->%s->%s, flags=%b, adjusted "
537 "expire=T+%u (was T+%u)\n",
538 __func__, dbuf, gbuf,
539 (rt->rt_ifp != NULL) ?
540 rt->rt_ifp->if_xname : "",
541 rt->rt_flags, RTF_BITS,
542 (rt->rt_expire - timenow), expire);
543 }
544 }
545 ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
546 RT_UNLOCK(rt);
547 }
548 } else {
549 RT_UNLOCK(rt);
550 }
551
552 return (0);
553}
554
555#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */
556static int rtq_timeout = RTQ_TIMEOUT;
557
558static void
559in_rtqtimo(void *targ)
560{
561#pragma unused(targ)
562 struct radix_node_head *rnh;
563 struct rtqk_arg arg;
564 struct timeval atv;
565 static uint64_t last_adjusted_timeout = 0;
566 boolean_t verbose = (rt_verbose > 1);
567 uint64_t timenow;
568 uint32_t ours;
569
570 lck_mtx_lock(rnh_lock);
571 rnh = rt_tables[AF_INET];
572 VERIFY(rnh != NULL);
573
574 /* Get the timestamp after we acquire the lock for better accuracy */
575 timenow = net_uptime();
576 if (verbose) {
577 log(LOG_DEBUG, "%s: initial nextstop is T+%u seconds\n",
578 __func__, rtq_timeout);
579 }
580 bzero(&arg, sizeof (arg));
581 arg.rnh = rnh;
582 arg.nextstop = timenow + rtq_timeout;
583 rnh->rnh_walktree(rnh, in_rtqkill, &arg);
584 if (verbose) {
585 log(LOG_DEBUG, "%s: found %u, killed %u\n", __func__,
586 arg.found, arg.killed);
587 }
588 /*
589 * Attempt to be somewhat dynamic about this:
590 * If there are ``too many'' routes sitting around taking up space,
591 * then crank down the timeout, and see if we can't make some more
592 * go away. However, we make sure that we will never adjust more
593 * than once in rtq_timeout seconds, to keep from cranking down too
594 * hard.
595 */
596 ours = (arg.found - arg.killed);
597 if (ours > rtq_toomany &&
598 ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) &&
599 rtq_reallyold > rtq_minreallyold) {
600 rtq_reallyold = 2 * rtq_reallyold / 3;
601 if (rtq_reallyold < rtq_minreallyold)
602 rtq_reallyold = rtq_minreallyold;
603
604 last_adjusted_timeout = timenow;
605 if (verbose) {
606 log(LOG_DEBUG, "%s: adjusted rtq_reallyold to %d "
607 "seconds\n", __func__, rtq_reallyold);
608 }
609 arg.found = arg.killed = 0;
610 arg.updating = 1;
611 rnh->rnh_walktree(rnh, in_rtqkill, &arg);
612 }
613
614 atv.tv_usec = 0;
615 atv.tv_sec = arg.nextstop - timenow;
616 /* re-arm the timer only if there's work to do */
617 in_rtqtimo_run = 0;
618 if (ours > 0)
619 in_sched_rtqtimo(&atv);
620 else if (verbose)
621 log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__);
622 lck_mtx_unlock(rnh_lock);
623}
624
625static void
626in_sched_rtqtimo(struct timeval *atv)
627{
628 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
629
630 if (!in_rtqtimo_run) {
631 struct timeval tv;
632
633 if (atv == NULL) {
634 tv.tv_usec = 0;
635 tv.tv_sec = MAX(rtq_timeout / 10, 1);
636 atv = &tv;
637 }
638 if (rt_verbose > 1) {
639 log(LOG_DEBUG, "%s: timer scheduled in "
640 "T+%llus.%lluu\n", __func__,
641 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
642 }
643 in_rtqtimo_run = 1;
644 timeout(in_rtqtimo, NULL, tvtohz(atv));
645 }
646}
647
648void
649in_rtqdrain(void)
650{
651 struct radix_node_head *rnh;
652 struct rtqk_arg arg;
653
654 if (rt_verbose > 1)
655 log(LOG_DEBUG, "%s: draining routes\n", __func__);
656
657 lck_mtx_lock(rnh_lock);
658 rnh = rt_tables[AF_INET];
659 VERIFY(rnh != NULL);
660 bzero(&arg, sizeof (arg));
661 arg.rnh = rnh;
662 arg.draining = 1;
663 rnh->rnh_walktree(rnh, in_rtqkill, &arg);
664 lck_mtx_unlock(rnh_lock);
665}
666
667/*
668 * Initialize our routing tree.
669 */
670int
671in_inithead(void **head, int off)
672{
673 struct radix_node_head *rnh;
674
675 /* If called from route_init(), make sure it is exactly once */
676 VERIFY(head != (void **)&rt_tables[AF_INET] || *head == NULL);
677
678 if (!rn_inithead(head, off))
679 return (0);
680
681 /*
682 * We can get here from nfs_subs.c as well, in which case this
683 * won't be for the real routing table and thus we're done;
684 * this also takes care of the case when we're called more than
685 * once from anywhere but route_init().
686 */
687 if (head != (void **)&rt_tables[AF_INET])
688 return (1); /* only do this for the real routing table */
689
690 rnh = *head;
691 rnh->rnh_addaddr = in_addroute;
692 rnh->rnh_deladdr = in_deleteroute;
693 rnh->rnh_matchaddr = in_matroute;
694 rnh->rnh_matchaddr_args = in_matroute_args;
695 rnh->rnh_close = in_clsroute;
696 return (1);
697}
698
699/*
700 * This zaps old routes when the interface goes down or interface
701 * address is deleted. In the latter case, it deletes static routes
702 * that point to this address. If we don't do this, we may end up
703 * using the old address in the future. The ones we always want to
704 * get rid of are things like ARP entries, since the user might down
705 * the interface, walk over to a completely different network, and
706 * plug back in.
707 */
708struct in_ifadown_arg {
709 struct radix_node_head *rnh;
710 struct ifaddr *ifa;
711 int del;
712};
713
714static int
715in_ifadownkill(struct radix_node *rn, void *xap)
716{
717 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
718 struct in_ifadown_arg *ap = xap;
719 struct rtentry *rt = (struct rtentry *)rn;
720 boolean_t verbose = (rt_verbose != 0);
721 int err;
722
723 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
724
725 RT_LOCK(rt);
726 if (rt->rt_ifa == ap->ifa &&
727 (ap->del || !(rt->rt_flags & RTF_STATIC))) {
728 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
729 if (verbose) {
730 log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, "
731 "flags=%b\n", __func__, dbuf, gbuf,
732 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
733 rt->rt_flags, RTF_BITS);
734 }
735 RT_ADDREF_LOCKED(rt); /* for us to free below */
736 /*
737 * We need to disable the automatic prune that happens
738 * in this case in rtrequest() because it will blow
739 * away the pointers that rn_walktree() needs in order
740 * continue our descent. We will end up deleting all
741 * the routes that rtrequest() would have in any case,
742 * so that behavior is not needed there. Safe to drop
743 * rt_lock and use rt_key, rt_gateway, since holding
744 * rnh_lock here prevents another thread from calling
745 * rt_setgate() on this route.
746 */
747 rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING);
748 RT_UNLOCK(rt);
749 err = rtrequest_locked(RTM_DELETE, rt_key(rt),
750 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
751 if (err != 0) {
752 RT_LOCK(rt);
753 if (!verbose)
754 rt_str(rt, dbuf, sizeof (dbuf),
755 gbuf, sizeof (gbuf));
756 log(LOG_ERR, "%s: error deleting route to "
757 "%s->%s->%s, flags=%b, err=%d\n", __func__,
758 dbuf, gbuf, (rt->rt_ifp != NULL) ?
759 rt->rt_ifp->if_xname : "", rt->rt_flags,
760 RTF_BITS, err);
761 RT_UNLOCK(rt);
762 }
763 rtfree_locked(rt);
764 } else {
765 RT_UNLOCK(rt);
766 }
767 return (0);
768}
769
770int
771in_ifadown(struct ifaddr *ifa, int delete)
772{
773 struct in_ifadown_arg arg;
774 struct radix_node_head *rnh;
775
776 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
777
778 /*
779 * Holding rnh_lock here prevents the possibility of
780 * ifa from changing (e.g. in_ifinit), so it is safe
781 * to access its ifa_addr without locking.
782 */
783 if (ifa->ifa_addr->sa_family != AF_INET)
784 return (1);
785
786 /* trigger route cache reevaluation */
787 routegenid_inet_update();
788
789 arg.rnh = rnh = rt_tables[AF_INET];
790 arg.ifa = ifa;
791 arg.del = delete;
792 rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
793 IFA_LOCK_SPIN(ifa);
794 ifa->ifa_flags &= ~IFA_ROUTE;
795 IFA_UNLOCK(ifa);
796 return (0);
797}
798