nd6.c source code [xnu/bsd/netinet6/nd6.c]

1	/*
2	* Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	/*
30	* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31	* All rights reserved.
32	*
33	* Redistribution and use in source and binary forms, with or without
34	* modification, are permitted provided that the following conditions
35	* are met:
36	* 1. Redistributions of source code must retain the above copyright
37	* notice, this list of conditions and the following disclaimer.
38	* 2. Redistributions in binary form must reproduce the above copyright
39	* notice, this list of conditions and the following disclaimer in the
40	* documentation and/or other materials provided with the distribution.
41	* 3. Neither the name of the project nor the names of its contributors
42	* may be used to endorse or promote products derived from this software
43	* without specific prior written permission.
44	*
45	* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48	* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55	* SUCH DAMAGE.
56	*/
57
58	/*
59	* XXX
60	* KAME 970409 note:
61	* BSD/OS version heavily modifies this code, related to llinfo.
62	* Since we don't have BSD/OS version of net/route.c in our hand,
63	* I left the code mostly as it was in 970310. -- itojun
64	*/
65
66	#include <sys/param.h>
67	#include <sys/systm.h>
68	#include <sys/malloc.h>
69	#include <sys/mbuf.h>
70	#include <sys/socket.h>
71	#include <sys/sockio.h>
72	#include <sys/time.h>
73	#include <sys/kernel.h>
74	#include <sys/sysctl.h>
75	#include <sys/errno.h>
76	#include <sys/syslog.h>
77	#include <sys/protosw.h>
78	#include <sys/proc.h>
79	#include <sys/mcache.h>
80
81	#include <dev/random/randomdev.h>
82
83	#include <kern/queue.h>
84	#include <kern/zalloc.h>
85
86	#include <net/if.h>
87	#include <net/if_dl.h>
88	#include <net/if_types.h>
89	#include <net/if_llreach.h>
90	#include <net/route.h>
91	#include <net/dlil.h>
92	#include <net/ntstat.h>
93	#include <net/net_osdep.h>
94	#include <net/nwk_wq.h>
95
96	#include <netinet/in.h>
97	#include <netinet/in_arp.h>
98	#include <netinet/if_ether.h>
99	#include <netinet6/in6_var.h>
100	#include <netinet/ip6.h>
101	#include <netinet6/ip6_var.h>
102	#include <netinet6/nd6.h>
103	#include <netinet6/scope6_var.h>
104	#include <netinet/icmp6.h>
105
106	#include <net/sockaddr_utils.h>
107
108	#include <os/log.h>
109
110	#include "loop.h"
111
112	#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
113	#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
114
115	#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
116
117	/ timer values /
118	int nd6_prune = `1`; / walk list every 1 seconds /
119	int nd6_prune_lazy = `5`; / lazily walk list every 5 seconds /
120	int nd6_delay = `5`; / delay first probe time 5 second /
121	int nd6_umaxtries = `3`; / maximum unicast query /
122	int nd6_mmaxtries = `3`; / maximum multicast query /
123	int nd6_useloopback = `1`; / use loopback interface for local traffic /
124	int nd6_gctimer = (`60` * `60` * `24`); / 1 day: garbage collection timer /
125
126	/ preventing too many loops in ND option parsing /
127	int nd6_maxndopt = `10`; / max # of ND options allowed /
128
129	int nd6_maxqueuelen = `1`; / max # of packets cached in unresolved ND entries /
130
131	#if ND6_DEBUG
132	int nd6_debug = `1`;
133	#else
134	int nd6_debug = `0`;
135	#endif
136
137	int nd6_optimistic_dad = ND6_OPTIMISTIC_DAD_DEFAULT;
138
139	/ for debugging? /
140	static int nd6_inuse, nd6_allocated;
141
142	/*
143	* Synchronization notes:
144	*
145	* The global list of ND entries are stored in llinfo_nd6; an entry
146	* gets inserted into the list when the route is created and gets
147	* removed from the list when it is deleted; this is done as part
148	* of RTM_ADD/RTM_RESOLVE/RTM_DELETE in nd6_rtrequest().
149	*
150	* Because rnh_lock and rt_lock for the entry are held during those
151	* operations, the same locks (and thus lock ordering) must be used
152	* elsewhere to access the relevant data structure fields:
153	*
154	* ln_next, ln_prev, ln_rt
155	*
156	* - Routing lock (rnh_lock)
157	*
158	* ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_flags,
159	* ln_llreach, ln_lastused
160	*
161	* - Routing entry lock (rt_lock)
162	*
163	* Due to the dependency on rt_lock, llinfo_nd6 has the same lifetime
164	* as the route entry itself. When a route is deleted (RTM_DELETE),
165	* it is simply removed from the global list but the memory is not
166	* freed until the route itself is freed.
167	*/
168	struct llinfo_nd6 llinfo_nd6 = {
169	.ln_next = &llinfo_nd6,
170	.ln_prev = &llinfo_nd6,
171	};
172
173	static LCK_GRP_DECLARE(nd_if_lock_grp, "nd_if_lock");
174	static LCK_ATTR_DECLARE(nd_if_lock_attr, `0`, `0`);
175
176	/ Protected by nd6_mutex /
177	struct nd_drhead nd_defrouter_list;
178	struct nd_prhead nd_prefix = { .lh_first = `0` };
179	struct nd_rtihead nd_rti_list;
180	/*
181	* nd6_timeout() is scheduled on a demand basis. nd6_timeout_run is used
182	* to indicate whether or not a timeout has been scheduled. The rnh_lock
183	* mutex is used to protect this scheduling; it is a natural choice given
184	* the work done in the timer callback. Unfortunately, there are cases
185	* when nd6_timeout() needs to be scheduled while rnh_lock cannot be easily
186	* held, due to lock ordering. In those cases, we utilize a "demand" counter
187	* nd6_sched_timeout_want which can be atomically incremented without
188	* having to hold rnh_lock. On places where we acquire rnh_lock, such as
189	* nd6_rtrequest(), we check this counter and schedule the timer if it is
190	* non-zero. The increment happens on various places when we allocate
191	* new ND entries, default routers, prefixes and addresses.
192	*/
193	static int nd6_timeout_run; / nd6_timeout is scheduled to run /
194	static void nd6_timeout(void *);
195	int nd6_sched_timeout_want; / demand count for timer to be sched /
196	static boolean_t nd6_fast_timer_on = FALSE;
197
198	/ Serialization variables for nd6_service(), protected by rnh_lock /
199	static boolean_t nd6_service_busy;
200	static void *nd6_service_wc = &nd6_service_busy;
201	static int nd6_service_waiters = `0`;
202
203	int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
204	static struct sockaddr_in6 all1_sa;
205
206	static int regen_tmpaddr(struct in6_ifaddr *);
207
208	static struct llinfo_nd6 *nd6_llinfo_alloc(zalloc_flags_t);
209	static void nd6_llinfo_free(void *);
210	static void nd6_llinfo_purge(struct rtentry *);
211	static void nd6_llinfo_get_ri(struct rtentry , struct* rt_reach_info *);
212	static void nd6_llinfo_get_iflri(struct rtentry , struct* ifnet_llreach_info *);
213	static void nd6_llinfo_refresh(struct rtentry *);
214	static uint64_t ln_getexpire(struct llinfo_nd6 *);
215
216	static void nd6_service(void *);
217	static void nd6_slowtimo(void *);
218	static int nd6_is_new_addr_neighbor(struct sockaddr_in6 , struct* ifnet *);
219	static int nd6_siocgdrlst(void , int*);
220	static int nd6_siocgprlst(void , int*);
221
222	static void nd6_router_select_rti_entries(struct ifnet *);
223	static void nd6_purge_interface_default_routers(struct ifnet *);
224	static void nd6_purge_interface_rti_entries(struct ifnet *);
225	static void nd6_purge_interface_prefixes(struct ifnet *);
226	static void nd6_purge_interface_llinfo(struct ifnet *);
227
228	static int nd6_sysctl_drlist SYSCTL_HANDLER_ARGS;
229	static int nd6_sysctl_prlist SYSCTL_HANDLER_ARGS;
230
231	/*
232	* Insertion and removal from llinfo_nd6 must be done with rnh_lock held.
233	*/
234	#define LN_DEQUEUE(_ln) do { \
235	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); \
236	RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \
237	(_ln)->ln_next->ln_prev = (_ln)->ln_prev; \
238	(_ln)->ln_prev->ln_next = (_ln)->ln_next; \
239	(_ln)->ln_prev = (_ln)->ln_next = NULL; \
240	(_ln)->ln_flags &= ~ND6_LNF_IN_USE; \
241	} while (0)
242
243	#define LN_INSERTHEAD(_ln) do { \
244	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED); \
245	RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \
246	(_ln)->ln_next = llinfo_nd6.ln_next; \
247	llinfo_nd6.ln_next = (_ln); \
248	(_ln)->ln_prev = &llinfo_nd6; \
249	(_ln)->ln_next->ln_prev = (_ln); \
250	(_ln)->ln_flags \|= ND6_LNF_IN_USE; \
251	} while (0)
252
253	static KALLOC_TYPE_DEFINE(llinfo_nd6_zone, struct llinfo_nd6, NET_KT_DEFAULT);
254
255	extern int tvtohz(struct timeval *);
256
257	static int nd6_init_done;
258
259	SYSCTL_DECL(_net_inet6_icmp6);
260
261	SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
262	CTLTYPE_STRUCT \| CTLFLAG_RD \| CTLFLAG_LOCKED, `0`, `0`,
263	nd6_sysctl_drlist, "S,in6_defrouter", "");
264
265	SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
266	CTLTYPE_STRUCT \| CTLFLAG_RD \| CTLFLAG_LOCKED, `0`, `0`,
267	nd6_sysctl_prlist, "S,in6_defrouter", "");
268
269	SYSCTL_DECL(_net_inet6_ip6);
270
271	static int ip6_maxchainsent = `0`;
272	SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
273	CTLFLAG_RW \| CTLFLAG_LOCKED, &ip6_maxchainsent, `0`,
274	"use dlil_output_list");
275
276	SYSCTL_DECL(_net_inet6_icmp6);
277	int nd6_process_rti = ND6_PROCESS_RTI_DEFAULT;
278
279	SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_process_rti, CTLFLAG_RW \| CTLFLAG_LOCKED,
280	&nd6_process_rti, `0`,
281	"Enable/disable processing of Route Information Option in the "
282	"IPv6 Router Advertisement.");
283
284	void
285	nd6_init(void)
286	{
287	int i;
288
289	VERIFY(!nd6_init_done);
290
291	all1_sa.sin6_family = AF_INET6;
292	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
293	for (i = `0`; i < sizeof(all1_sa.sin6_addr); i++) {
294	all1_sa.sin6_addr.s6_addr[i] = `0xff`;
295	}
296
297	/ initialization of the default router list /
298	TAILQ_INIT(&nd_defrouter_list);
299	TAILQ_INIT(&nd_rti_list);
300
301	nd6_nbr_init();
302	nd6_rtr_init();
303
304	nd6_init_done = `1`;
305
306	/ start timer /
307	timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
308	}
309
310	static struct llinfo_nd6 *
311	nd6_llinfo_alloc(zalloc_flags_t how)
312	{
313	return zalloc_flags(llinfo_nd6_zone, how \| Z_ZERO);
314	}
315
316	static void
317	nd6_llinfo_free(void *arg)
318	{
319	struct llinfo_nd6 *ln = arg;
320
321	if (ln->ln_next != NULL \|\| ln->ln_prev != NULL) {
322	panic("%s: trying to free %p when it is in use", __func__, ln);
323	/ NOTREACHED /
324	}
325
326	/ Just in case there's anything there, free it /
327	if (ln->ln_hold != NULL) {
328	m_freem_list(ln->ln_hold);
329	ln->ln_hold = NULL;
330	}
331
332	/ Purge any link-layer info caching /
333	VERIFY(ln->ln_rt->rt_llinfo == ln);
334	if (ln->ln_rt->rt_llinfo_purge != NULL) {
335	ln->ln_rt->rt_llinfo_purge(ln->ln_rt);
336	}
337
338	zfree(llinfo_nd6_zone, ln);
339	}
340
341	static void
342	nd6_llinfo_purge(struct rtentry *rt)
343	{
344	struct llinfo_nd6 *ln = rt->rt_llinfo;
345
346	RT_LOCK_ASSERT_HELD(rt);
347	VERIFY(rt->rt_llinfo_purge == nd6_llinfo_purge && ln != NULL);
348
349	if (ln->ln_llreach != NULL) {
350	RT_CONVERT_LOCK(rt);
351	ifnet_llreach_free(ln->ln_llreach);
352	ln->ln_llreach = NULL;
353	}
354	ln->ln_lastused = `0`;
355	}
356
357	static void
358	nd6_llinfo_get_ri(struct rtentry rt, struct* rt_reach_info *ri)
359	{
360	struct llinfo_nd6 *ln = rt->rt_llinfo;
361	struct if_llreach *lr = ln->ln_llreach;
362
363	if (lr == NULL) {
364	bzero(s: ri, n: sizeof(*ri));
365	ri->ri_rssi = IFNET_RSSI_UNKNOWN;
366	ri->ri_lqm = IFNET_LQM_THRESH_OFF;
367	ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
368	} else {
369	IFLR_LOCK(lr);
370	/ Export to rt_reach_info structure /
371	ifnet_lr2ri(lr, ri);
372	/ Export ND6 send expiration (calendar) time /
373	ri->ri_snd_expire =
374	ifnet_llreach_up2calexp(lr, ln->ln_lastused);
375	IFLR_UNLOCK(lr);
376	}
377	}
378
379	static void
380	nd6_llinfo_get_iflri(struct rtentry rt, struct* ifnet_llreach_info *iflri)
381	{
382	struct llinfo_nd6 *ln = rt->rt_llinfo;
383	struct if_llreach *lr = ln->ln_llreach;
384
385	if (lr == NULL) {
386	bzero(s: iflri, n: sizeof(*iflri));
387	iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
388	iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
389	iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
390	} else {
391	IFLR_LOCK(lr);
392	/ Export to ifnet_llreach_info structure /
393	ifnet_lr2iflri(lr, iflri);
394	/ Export ND6 send expiration (uptime) time /
395	iflri->iflri_snd_expire =
396	ifnet_llreach_up2upexp(lr, ln->ln_lastused);
397	IFLR_UNLOCK(lr);
398	}
399	}
400
401	static void
402	nd6_llinfo_refresh(struct rtentry *rt)
403	{
404	struct llinfo_nd6 *ln = rt->rt_llinfo;
405	uint64_t timenow = net_uptime();
406	struct ifnet *ifp = rt->rt_ifp;
407	/*
408	* Can't refresh permanent, static or entries that are
409	* not direct host entries. Also skip if the entry is for
410	* host over an interface that has alternate neighbor cache
411	* management mechanisms (AWDL/NAN)
412	*/
413	if (!ln \|\| ln->ln_expire == `0` \|\| (rt->rt_flags & RTF_STATIC) \|\|
414	!(rt->rt_flags & RTF_LLINFO) \|\| !ifp \|\|
415	(ifp->if_eflags & IFEF_IPV6_ND6ALT)) {
416	return;
417	}
418
419	if ((ln->ln_state > ND6_LLINFO_INCOMPLETE) &&
420	(ln->ln_state < ND6_LLINFO_PROBE)) {
421	if (ln->ln_expire > timenow) {
422	ln_setexpire(ln, timenow);
423	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_PROBE);
424	}
425	}
426	return;
427	}
428
429	const char *
430	ndcache_state2str(short ndp_state)
431	{
432	const char *ndp_state_str = "UNKNOWN";
433	switch (ndp_state) {
434	case ND6_LLINFO_PURGE:
435	ndp_state_str = "ND6_LLINFO_PURGE";
436	break;
437	case ND6_LLINFO_NOSTATE:
438	ndp_state_str = "ND6_LLINFO_NOSTATE";
439	break;
440	case ND6_LLINFO_INCOMPLETE:
441	ndp_state_str = "ND6_LLINFO_INCOMPLETE";
442	break;
443	case ND6_LLINFO_REACHABLE:
444	ndp_state_str = "ND6_LLINFO_REACHABLE";
445	break;
446	case ND6_LLINFO_STALE:
447	ndp_state_str = "ND6_LLINFO_STALE";
448	break;
449	case ND6_LLINFO_DELAY:
450	ndp_state_str = "ND6_LLINFO_DELAY";
451	break;
452	case ND6_LLINFO_PROBE:
453	ndp_state_str = "ND6_LLINFO_PROBE";
454	break;
455	default:
456	/ Init'd to UNKNOWN /
457	break;
458	}
459	return ndp_state_str;
460	}
461
462	void
463	ln_setexpire(struct llinfo_nd6 *ln, uint64_t expiry)
464	{
465	ln->ln_expire = expiry;
466	}
467
468	static uint64_t
469	ln_getexpire(struct llinfo_nd6 *ln)
470	{
471	struct timeval caltime;
472	uint64_t expiry;
473
474	if (ln->ln_expire != `0`) {
475	struct rtentry *rt = ln->ln_rt;
476
477	VERIFY(rt != NULL);
478	/ account for system time change /
479	getmicrotime(&caltime);
480
481	rt->base_calendartime +=
482	NET_CALCULATE_CLOCKSKEW(caltime,
483	rt->base_calendartime, net_uptime(), rt->base_uptime);
484
485	expiry = rt->base_calendartime +
486	ln->ln_expire - rt->base_uptime;
487	} else {
488	expiry = `0`;
489	}
490	return expiry;
491	}
492
493	void
494	nd6_ifreset(struct ifnet *ifp)
495	{
496	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
497	VERIFY(NULL != ndi);
498	VERIFY(ndi->initialized);
499
500	LCK_MTX_ASSERT(&ndi->lock, LCK_MTX_ASSERT_OWNED);
501	ndi->linkmtu = ifp->if_mtu;
502	ndi->chlim = IPV6_DEFHLIM;
503	ndi->basereachable = REACHABLE_TIME;
504	ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
505	ndi->retrans = RETRANS_TIMER;
506	}
507
508	void
509	nd6_ifattach(struct ifnet *ifp)
510	{
511	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
512
513	VERIFY(NULL != ndi);
514	if (!ndi->initialized) {
515	lck_mtx_init(lck: &ndi->lock, grp: &nd_if_lock_grp, attr: &nd_if_lock_attr);
516	ndi->flags = ND6_IFF_PERFORMNUD;
517	ndi->flags \|= ND6_IFF_DAD;
518	ndi->initialized = TRUE;
519	}
520
521	lck_mtx_lock(lck: &ndi->lock);
522
523	if (!(ifp->if_flags & IFF_MULTICAST)) {
524	ndi->flags \|= ND6_IFF_IFDISABLED;
525	}
526
527	nd6_ifreset(ifp);
528	lck_mtx_unlock(lck: &ndi->lock);
529	nd6_setmtu(ifp);
530
531	nd6log0(info,
532	"Reinit'd ND information for interface %s\n",
533	if_name(ifp));
534	return;
535	}
536
537	#if 0
538	/*
539	* XXX Look more into this. Especially since we recycle ifnets and do delayed
540	* cleanup
541	*/
542	void
543	nd6_ifdetach(struct nd_ifinfo *nd)
544	{
545	/ XXX destroy nd's lock? /
546	FREE(nd, M_IP6NDP);
547	}
548	#endif
549
550	void
551	nd6_setmtu(struct ifnet *ifp)
552	{
553	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
554	u_int32_t oldmaxmtu, maxmtu;
555
556	if ((NULL == ndi) \|\| (FALSE == ndi->initialized)) {
557	return;
558	}
559
560	lck_mtx_lock(lck: &ndi->lock);
561	oldmaxmtu = ndi->maxmtu;
562
563	/*
564	* The ND level maxmtu is somewhat redundant to the interface MTU
565	* and is an implementation artifact of KAME. Instead of hard-
566	* limiting the maxmtu based on the interface type here, we simply
567	* take the if_mtu value since SIOCSIFMTU would have taken care of
568	* the sanity checks related to the maximum MTU allowed for the
569	* interface (a value that is known only by the interface layer),
570	* by sending the request down via ifnet_ioctl(). The use of the
571	* ND level maxmtu and linkmtu are done via IN6_LINKMTU() which
572	* does further checking against if_mtu.
573	*/
574	maxmtu = ndi->maxmtu = ifp->if_mtu;
575
576	/*
577	* Decreasing the interface MTU under IPV6 minimum MTU may cause
578	* undesirable situation. We thus notify the operator of the change
579	* explicitly. The check for oldmaxmtu is necessary to restrict the
580	* log to the case of changing the MTU, not initializing it.
581	*/
582	if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
583	log(LOG_NOTICE, "nd6_setmtu: "
584	"new link MTU on %s (%u) is too small for IPv6\n",
585	if_name(ifp), (uint32_t)ndi->maxmtu);
586	}
587	ndi->linkmtu = ifp->if_mtu;
588	lck_mtx_unlock(lck: &ndi->lock);
589
590	/ also adjust in6_maxmtu if necessary. /
591	if (maxmtu > in6_maxmtu) {
592	in6_setmaxmtu();
593	}
594	}
595
596	void
597	nd6_option_init(void opt, int* icmp6len, union nd_opts *ndopts)
598	{
599	bzero(s: ndopts, n: sizeof(*ndopts));
600	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
601	ndopts->nd_opts_last =
602	(struct nd_opt_hdr )(((u_char )opt) + icmp6len);
603
604	if (icmp6len == `0`) {
605	ndopts->nd_opts_done = `1`;
606	ndopts->nd_opts_search = NULL;
607	}
608	}
609
610	/*
611	* Take one ND option.
612	*/
613	struct nd_opt_hdr *
614	nd6_option(union nd_opts *ndopts)
615	{
616	struct nd_opt_hdr *nd_opt;
617	int olen;
618
619	if (!ndopts) {
620	panic("ndopts == NULL in nd6_option");
621	}
622	if (!ndopts->nd_opts_last) {
623	panic("uninitialized ndopts in nd6_option");
624	}
625	if (!ndopts->nd_opts_search) {
626	return NULL;
627	}
628	if (ndopts->nd_opts_done) {
629	return NULL;
630	}
631
632	nd_opt = ndopts->nd_opts_search;
633
634	/ make sure nd_opt_len is inside the buffer /
635	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
636	bzero(s: ndopts, n: sizeof(*ndopts));
637	return NULL;
638	}
639
640	olen = nd_opt->nd_opt_len << `3`;
641	if (olen == `0`) {
642	/*
643	* Message validation requires that all included
644	* options have a length that is greater than zero.
645	*/
646	bzero(s: ndopts, n: sizeof(*ndopts));
647	return NULL;
648	}
649
650	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
651	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
652	/ option overruns the end of buffer, invalid /
653	bzero(s: ndopts, n: sizeof(*ndopts));
654	return NULL;
655	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
656	/ reached the end of options chain /
657	ndopts->nd_opts_done = `1`;
658	ndopts->nd_opts_search = NULL;
659	}
660	return nd_opt;
661	}
662
663	/*
664	* Parse multiple ND options.
665	* This function is much easier to use, for ND routines that do not need
666	* multiple options of the same type.
667	*/
668	int
669	nd6_options(union nd_opts *ndopts)
670	{
671	struct nd_opt_hdr *nd_opt;
672	int i = `0`;
673
674	if (ndopts == NULL) {
675	panic("ndopts == NULL in nd6_options");
676	}
677	if (ndopts->nd_opts_last == NULL) {
678	panic("uninitialized ndopts in nd6_options");
679	}
680	if (ndopts->nd_opts_search == NULL) {
681	return `0`;
682	}
683
684	while (`1`) {
685	nd_opt = nd6_option(ndopts);
686	if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
687	/*
688	* Message validation requires that all included
689	* options have a length that is greater than zero.
690	*/
691	icmp6stat.icp6s_nd_badopt++;
692	bzero(s: ndopts, n: sizeof(*ndopts));
693	return -`1`;
694	}
695
696	if (nd_opt == NULL) {
697	goto skip1;
698	}
699
700	switch (nd_opt->nd_opt_type) {
701	case ND_OPT_SOURCE_LINKADDR:
702	case ND_OPT_TARGET_LINKADDR:
703	case ND_OPT_MTU:
704	case ND_OPT_REDIRECTED_HEADER:
705	case ND_OPT_NONCE:
706	if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
707	nd6log(error,
708	"duplicated ND6 option found (type=%d)\n",
709	nd_opt->nd_opt_type);
710	/ XXX bark? /
711	} else {
712	ndopts->nd_opt_array[nd_opt->nd_opt_type] =
713	nd_opt;
714	}
715	break;
716	case ND_OPT_PREFIX_INFORMATION:
717	if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == `0`) {
718	ndopts->nd_opt_array[nd_opt->nd_opt_type] =
719	nd_opt;
720	}
721	ndopts->nd_opts_pi_end =
722	(struct nd_opt_prefix_info *)nd_opt;
723	break;
724	case ND_OPT_RDNSS:
725	case ND_OPT_DNSSL:
726	case ND_OPT_CAPTIVE_PORTAL:
727	/ ignore /
728	break;
729	case ND_OPT_ROUTE_INFO:
730	if (nd6_process_rti) {
731	if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == `0`) {
732	ndopts->nd_opt_array[nd_opt->nd_opt_type]
733	= nd_opt;
734	}
735	ndopts->nd_opts_rti_end =
736	(struct nd_opt_route_info *)nd_opt;
737	break;
738	}
739	OS_FALLTHROUGH;
740	default:
741	/*
742	* Unknown options must be silently ignored,
743	* to accomodate future extension to the protocol.
744	*/
745	nd6log(debug,
746	"nd6_options: unsupported option %d - "
747	"option ignored\n", nd_opt->nd_opt_type);
748	}
749
750	skip1:
751	i++;
752	if (i > nd6_maxndopt) {
753	icmp6stat.icp6s_nd_toomanyopt++;
754	nd6log(info, "too many loop in nd opt\n");
755	break;
756	}
757
758	if (ndopts->nd_opts_done) {
759	break;
760	}
761	}
762
763	return `0`;
764	}
765
766	struct nd6svc_arg {
767	int draining;
768	uint32_t killed;
769	uint32_t aging_lazy;
770	uint32_t aging;
771	uint32_t sticky;
772	uint32_t found;
773	};
774
775
776	static void
777	nd6_service_neighbor_cache(struct nd6svc_arg *ap, uint64_t timenow)
778	{
779	struct llinfo_nd6 *ln;
780	struct ifnet *ifp = NULL;
781	boolean_t send_nc_failure_kev = FALSE;
782	struct radix_node_head *rnh = rt_tables[AF_INET6];
783
784	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
785	again:
786	/*
787	* send_nc_failure_kev gets set when default router's IPv6 address
788	* can't be resolved.
789	* That can happen either:
790	* 1. When the entry has resolved once but can't be
791	* resolved later and the neighbor cache entry for gateway is deleted
792	* after max probe attempts.
793	*
794	* 2. When the entry is in ND6_LLINFO_INCOMPLETE but can not be resolved
795	* after max neighbor address resolution attempts.
796	*
797	* Both set send_nc_failure_kev to true. ifp is also set to the previous
798	* neighbor cache entry's route's ifp.
799	* Once we are done sending the notification, set send_nc_failure_kev
800	* to false to stop sending false notifications for non default router
801	* neighbors.
802	*
803	* We may to send more information like Gateway's IP that could not be
804	* resolved, however right now we do not install more than one default
805	* route per interface in the routing table.
806	*/
807	if (send_nc_failure_kev && ifp != NULL &&
808	ifp->if_addrlen == IF_LLREACH_MAXLEN) {
809	struct kev_msg ev_msg;
810	struct kev_nd6_ndfailure nd6_ndfailure;
811	bzero(s: &ev_msg, n: sizeof(ev_msg));
812	bzero(s: &nd6_ndfailure, n: sizeof(nd6_ndfailure));
813	ev_msg.vendor_code = KEV_VENDOR_APPLE;
814	ev_msg.kev_class = KEV_NETWORK_CLASS;
815	ev_msg.kev_subclass = KEV_ND6_SUBCLASS;
816	ev_msg.event_code = KEV_ND6_NDFAILURE;
817
818	nd6_ndfailure.link_data.if_family = ifp->if_family;
819	nd6_ndfailure.link_data.if_unit = ifp->if_unit;
820	strlcpy(dst: nd6_ndfailure.link_data.if_name,
821	src: ifp->if_name,
822	n: sizeof(nd6_ndfailure.link_data.if_name));
823	ev_msg.dv[`0`].data_ptr = &nd6_ndfailure;
824	ev_msg.dv[`0`].data_length =
825	sizeof(nd6_ndfailure);
826	dlil_post_complete_msg(NULL, &ev_msg);
827	}
828
829	send_nc_failure_kev = FALSE;
830	ifp = NULL;
831	/*
832	* The global list llinfo_nd6 is modified by nd6_request() and is
833	* therefore protected by rnh_lock. For obvious reasons, we cannot
834	* hold rnh_lock across calls that might lead to code paths which
835	* attempt to acquire rnh_lock, else we deadlock. Hence for such
836	* cases we drop rt_lock and rnh_lock, make the calls, and repeat the
837	* loop. To ensure that we don't process the same entry more than
838	* once in a single timeout, we mark the "already-seen" entries with
839	* ND6_LNF_TIMER_SKIP flag. At the end of the loop, we do a second
840	* pass thru the entries and clear the flag so they can be processed
841	* during the next timeout.
842	*/
843	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
844
845	ln = llinfo_nd6.ln_next;
846	while (ln != NULL && ln != &llinfo_nd6) {
847	struct rtentry *rt;
848	struct sockaddr_in6 *dst;
849	struct llinfo_nd6 *next;
850	u_int32_t retrans, flags;
851	struct nd_ifinfo *ndi = NULL;
852	boolean_t is_router = FALSE;
853
854	/ ln_next/prev/rt is protected by rnh_lock /
855	next = ln->ln_next;
856	rt = ln->ln_rt;
857	RT_LOCK(rt);
858
859	/ We've seen this already; skip it /
860	if (ln->ln_flags & ND6_LNF_TIMER_SKIP) {
861	RT_UNLOCK(rt);
862	ln = next;
863	continue;
864	}
865	ap->found++;
866
867	/ rt->rt_ifp should never be NULL /
868	if ((ifp = rt->rt_ifp) == NULL) {
869	panic("%s: ln(%p) rt(%p) rt_ifp == NULL", __func__,
870	ln, rt);
871	/ NOTREACHED /
872	}
873
874	/ rt_llinfo must always be equal to ln /
875	if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) {
876	panic("%s: rt_llinfo(%p) is not equal to ln(%p)",
877	__func__, rt->rt_llinfo, ln);
878	/ NOTREACHED /
879	}
880
881	/ rt_key should never be NULL /
882	dst = SIN6(rt_key(rt));
883	if (dst == NULL) {
884	panic("%s: rt(%p) key is NULL ln(%p)", __func__,
885	rt, ln);
886	/ NOTREACHED /
887	}
888
889	/ Set the flag in case we jump to "again" /
890	ln->ln_flags \|= ND6_LNF_TIMER_SKIP;
891
892	/*
893	* Do not touch neighbor cache entries that are permanent,
894	* static or are for interfaces that manage neighbor cache
895	* entries via alternate NDP means.
896	*/
897	if (ln->ln_expire == `0` \|\| (rt->rt_flags & RTF_STATIC) \|\|
898	(rt->rt_ifp->if_eflags & IFEF_IPV6_ND6ALT)) {
899	ap->sticky++;
900	} else if (ap->draining && (rt->rt_refcnt == `0`)) {
901	/*
902	* If we are draining, immediately purge non-static
903	* entries without oustanding route refcnt.
904	*/
905	if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
906	ND6_CACHE_STATE_TRANSITION(ln, (short)ND6_LLINFO_STALE);
907	} else {
908	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_PURGE);
909	}
910	ln_setexpire(ln, expiry: timenow);
911	}
912
913	/*
914	* If the entry has not expired, skip it. Take note on the
915	* state, as entries that are in the STALE state are simply
916	* waiting to be garbage collected, in which case we can
917	* relax the callout scheduling (use nd6_prune_lazy).
918	*/
919	if (ln->ln_expire > timenow) {
920	switch (ln->ln_state) {
921	case ND6_LLINFO_STALE:
922	ap->aging_lazy++;
923	break;
924	default:
925	ap->aging++;
926	break;
927	}
928	RT_UNLOCK(rt);
929	ln = next;
930	continue;
931	}
932
933	ndi = ND_IFINFO(ifp);
934	/*
935	* The IPv6 initialization of the loopback interface
936	* may happen after another interface gets assigned
937	* an IPv6 address
938	*/
939	if (ndi == NULL && ifp == lo_ifp) {
940	RT_UNLOCK(rt);
941	ln = next;
942	continue;
943	}
944	VERIFY(ndi->initialized);
945	retrans = ndi->retrans;
946	flags = ndi->flags;
947
948	RT_LOCK_ASSERT_HELD(rt);
949	is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
950
951	switch (ln->ln_state) {
952	case ND6_LLINFO_INCOMPLETE:
953	if (ln->ln_asked < nd6_mmaxtries) {
954	struct ifnet *exclifp = ln->ln_exclifp;
955	ln->ln_asked++;
956	ln_setexpire(ln, expiry: timenow + retrans / `1000`);
957	RT_ADDREF_LOCKED(rt);
958	RT_UNLOCK(rt);
959	lck_mtx_unlock(rnh_lock);
960	if (ip6_forwarding) {
961	nd6_prproxy_ns_output(ifp, exclifp,
962	NULL, &dst->sin6_addr, ln);
963	} else {
964	nd6_ns_output(ifp, NULL,
965	&dst->sin6_addr, ln, NULL);
966	}
967	RT_REMREF(rt);
968	ap->aging++;
969	lck_mtx_lock(rnh_lock);
970	} else {
971	struct mbuf *m = ln->ln_hold;
972	ln->ln_hold = NULL;
973	send_nc_failure_kev = is_router;
974	if (m != NULL) {
975	RT_ADDREF_LOCKED(rt);
976	RT_UNLOCK(rt);
977	lck_mtx_unlock(rnh_lock);
978
979	struct mbuf *mnext;
980	while (m) {
981	mnext = m->m_nextpkt;
982	m->m_nextpkt = NULL;
983	m->m_pkthdr.rcvif = ifp;
984	icmp6_error_flag(m, ICMP6_DST_UNREACH,
985	ICMP6_DST_UNREACH_ADDR, `0`, `0`);
986	m = mnext;
987	}
988	} else {
989	RT_ADDREF_LOCKED(rt);
990	RT_UNLOCK(rt);
991	lck_mtx_unlock(rnh_lock);
992	}
993
994	/*
995	* Enqueue work item to invoke callback for
996	* this route entry
997	*/
998	route_event_enqueue_nwk_wq_entry(rt, NULL,
999	ROUTE_LLENTRY_UNREACH, NULL, FALSE);
1000	defrouter_set_reachability(&SIN6(rt_key(rt))->sin6_addr, rt->rt_ifp,
1001	FALSE);
1002	nd6_free(rt);
1003	ap->killed++;
1004	lck_mtx_lock(rnh_lock);
1005	/*
1006	* nd6_free above would flush out the routing table of
1007	* any cloned routes with same next-hop.
1008	* Walk the tree anyways as there could be static routes
1009	* left.
1010	*
1011	* We also already have a reference to rt that gets freed right
1012	* after the block below executes. Don't need an extra reference
1013	* on rt here.
1014	*/
1015	if (is_router) {
1016	struct route_event rt_ev;
1017	route_event_init(p_route_ev: &rt_ev, rt, NULL, route_ev_code: ROUTE_LLENTRY_UNREACH);
1018	(void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
1019	}
1020	rtfree_locked(rt);
1021	}
1022	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1023	goto again;
1024
1025	case ND6_LLINFO_REACHABLE:
1026	if (ln->ln_expire != `0`) {
1027	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_STALE);
1028	ln_setexpire(ln, expiry: timenow + nd6_gctimer);
1029	ap->aging_lazy++;
1030	/*
1031	* Enqueue work item to invoke callback for
1032	* this route entry
1033	*/
1034	route_event_enqueue_nwk_wq_entry(rt, NULL,
1035	ROUTE_LLENTRY_STALE, NULL, TRUE);
1036
1037	RT_ADDREF_LOCKED(rt);
1038	RT_UNLOCK(rt);
1039	if (is_router) {
1040	struct route_event rt_ev;
1041	route_event_init(p_route_ev: &rt_ev, rt, NULL, route_ev_code: ROUTE_LLENTRY_STALE);
1042	(void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
1043	}
1044	rtfree_locked(rt);
1045	} else {
1046	RT_UNLOCK(rt);
1047	}
1048	break;
1049
1050	case ND6_LLINFO_STALE:
1051	case ND6_LLINFO_PURGE:
1052	/ Garbage Collection(RFC 4861 5.3) /
1053	if (ln->ln_expire != `0`) {
1054	RT_ADDREF_LOCKED(rt);
1055	RT_UNLOCK(rt);
1056	lck_mtx_unlock(rnh_lock);
1057	nd6_free(rt);
1058	ap->killed++;
1059	lck_mtx_lock(rnh_lock);
1060	rtfree_locked(rt);
1061	goto again;
1062	} else {
1063	RT_UNLOCK(rt);
1064	}
1065	break;
1066
1067	case ND6_LLINFO_DELAY:
1068	if ((flags & ND6_IFF_PERFORMNUD) != `0`) {
1069	/ We need NUD /
1070	ln->ln_asked = `1`;
1071	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_PROBE);
1072	ln_setexpire(ln, expiry: timenow + retrans / `1000`);
1073	RT_ADDREF_LOCKED(rt);
1074	RT_UNLOCK(rt);
1075	lck_mtx_unlock(rnh_lock);
1076	nd6_ns_output(ifp, &dst->sin6_addr,
1077	&dst->sin6_addr, ln, NULL);
1078	RT_REMREF(rt);
1079	ap->aging++;
1080	lck_mtx_lock(rnh_lock);
1081	goto again;
1082	}
1083	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_STALE); / XXX /
1084	ln_setexpire(ln, expiry: timenow + nd6_gctimer);
1085	RT_UNLOCK(rt);
1086	ap->aging_lazy++;
1087	break;
1088
1089	case ND6_LLINFO_PROBE:
1090	if (ln->ln_asked < nd6_umaxtries) {
1091	ln->ln_asked++;
1092	ln_setexpire(ln, expiry: timenow + retrans / `1000`);
1093	RT_ADDREF_LOCKED(rt);
1094	RT_UNLOCK(rt);
1095	lck_mtx_unlock(rnh_lock);
1096	nd6_ns_output(ifp, &dst->sin6_addr,
1097	&dst->sin6_addr, ln, NULL);
1098	RT_REMREF(rt);
1099	ap->aging++;
1100	lck_mtx_lock(rnh_lock);
1101	} else {
1102	is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
1103	send_nc_failure_kev = is_router;
1104	RT_ADDREF_LOCKED(rt);
1105	RT_UNLOCK(rt);
1106	lck_mtx_unlock(rnh_lock);
1107	nd6_free(rt);
1108	ap->killed++;
1109
1110	/*
1111	* Enqueue work item to invoke callback for
1112	* this route entry
1113	*/
1114	route_event_enqueue_nwk_wq_entry(rt, NULL,
1115	ROUTE_LLENTRY_UNREACH, NULL, FALSE);
1116	defrouter_set_reachability(&SIN6(rt_key(rt))->sin6_addr, rt->rt_ifp,
1117	FALSE);
1118	lck_mtx_lock(rnh_lock);
1119	/*
1120	* nd6_free above would flush out the routing table of
1121	* any cloned routes with same next-hop.
1122	* Walk the tree anyways as there could be static routes
1123	* left.
1124	*
1125	* We also already have a reference to rt that gets freed right
1126	* after the block below executes. Don't need an extra reference
1127	* on rt here.
1128	*/
1129	if (is_router) {
1130	struct route_event rt_ev;
1131	route_event_init(p_route_ev: &rt_ev, rt, NULL, route_ev_code: ROUTE_LLENTRY_UNREACH);
1132	(void) rnh->rnh_walktree(rnh,
1133	route_event_walktree, (void *)&rt_ev);
1134	}
1135	rtfree_locked(rt);
1136	}
1137	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1138	goto again;
1139
1140	default:
1141	RT_UNLOCK(rt);
1142	break;
1143	}
1144	ln = next;
1145	}
1146	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1147
1148	/ Now clear the flag from all entries /
1149	ln = llinfo_nd6.ln_next;
1150	while (ln != NULL && ln != &llinfo_nd6) {
1151	struct rtentry *rt = ln->ln_rt;
1152	struct llinfo_nd6 *next = ln->ln_next;
1153
1154	RT_LOCK_SPIN(rt);
1155	if (ln->ln_flags & ND6_LNF_TIMER_SKIP) {
1156	ln->ln_flags &= ~ND6_LNF_TIMER_SKIP;
1157	}
1158	RT_UNLOCK(rt);
1159	ln = next;
1160	}
1161	}
1162
1163	static void
1164	nd6_service_expired_default_router(struct nd6svc_arg *ap, uint64_t timenow)
1165	{
1166	struct nd_defrouter *dr = NULL;
1167	struct nd_defrouter *ndr = NULL;
1168	struct nd_drhead nd_defrouter_tmp;
1169	/ expire default router list /
1170	TAILQ_INIT(&nd_defrouter_tmp);
1171
1172	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1173	lck_mtx_lock(nd6_mutex);
1174
1175	TAILQ_FOREACH_SAFE(dr, &nd_defrouter_list, dr_entry, ndr) {
1176	ap->found++;
1177	if (dr->expire != `0` && dr->expire < timenow) {
1178	VERIFY(dr->ifp != NULL);
1179	in6_ifstat_inc(dr->ifp, ifs6_defrtr_expiry_cnt);
1180	if ((dr->stateflags & NDDRF_INELIGIBLE) == `0`) {
1181	in6_event_enqueue_nwk_wq_entry(IN6_NDP_RTR_EXPIRY, dr->ifp,
1182	&dr->rtaddr, dr->rtlifetime);
1183	}
1184	if (dr->ifp != NULL &&
1185	dr->ifp->if_type == IFT_CELLULAR) {
1186	/*
1187	* Some buggy cellular gateways may not send
1188	* periodic router advertisements.
1189	* Or they may send it with router lifetime
1190	* value that is less than the configured Max and Min
1191	* Router Advertisement interval.
1192	* To top that an idle device may not wake up
1193	* when periodic RA is received on cellular
1194	* interface.
1195	* We could send RS on every wake but RFC
1196	* 4861 precludes that.
1197	* The addresses are of infinite lifetimes
1198	* and are tied to the lifetime of the bearer,
1199	* so keeping the addresses and just getting rid of
1200	* the router does not help us anyways.
1201	* If there's network renumbering, a lifetime with
1202	* value 0 would remove the default router.
1203	* Also it will get deleted as part of purge when
1204	* the PDP context is torn down and configured again.
1205	* For that reason, do not expire the default router
1206	* learned on cellular interface. Ever.
1207	*/
1208	dr->expire += dr->rtlifetime;
1209	nd6log2(debug,
1210	"%s: Refreshing expired default router entry "
1211	"%s for interface %s\n", __func__,
1212	ip6_sprintf(&dr->rtaddr), if_name(dr->ifp));
1213	} else {
1214	ap->killed++;
1215	/*
1216	* Remove the entry from default router list
1217	* and add it to the temp list.
1218	* nd_defrouter_tmp will be a local temporary
1219	* list as no one else can get the same
1220	* removed entry once it is removed from default
1221	* router list.
1222	* Remove the reference after calling defrtrlist_del
1223	*/
1224	TAILQ_REMOVE(&nd_defrouter_list, dr, dr_entry);
1225	TAILQ_INSERT_TAIL(&nd_defrouter_tmp, dr, dr_entry);
1226	}
1227	} else {
1228	if (dr->expire == `0` \|\| (dr->stateflags & NDDRF_STATIC)) {
1229	ap->sticky++;
1230	} else {
1231	ap->aging_lazy++;
1232	}
1233	}
1234	}
1235
1236	/*
1237	* Keep the following separate from the above
1238	* iteration of nd_defrouter because it's not safe
1239	* to call defrtrlist_del while iterating global default
1240	* router list. Global list has to be traversed
1241	* while holding nd6_mutex throughout.
1242	*
1243	* The following call to defrtrlist_del should be
1244	* safe as we are iterating a local list of
1245	* default routers.
1246	*/
1247	TAILQ_FOREACH_SAFE(dr, &nd_defrouter_tmp, dr_entry, ndr) {
1248	TAILQ_REMOVE(&nd_defrouter_tmp, dr, dr_entry);
1249	defrtrlist_del(dr, NULL);
1250	NDDR_REMREF(dr); / remove list reference /
1251	}
1252
1253	/ XXX TBD: Also iterate through RTI router lists /
1254	/*
1255	* Also check if default router selection needs to be triggered
1256	* for default interface, to avoid an issue with co-existence of
1257	* static un-scoped default route configuration and default router
1258	* discovery/selection.
1259	*/
1260	if (trigger_v6_defrtr_select) {
1261	defrouter_select(NULL, NULL);
1262	trigger_v6_defrtr_select = FALSE;
1263	}
1264	lck_mtx_unlock(nd6_mutex);
1265	}
1266
1267	static void
1268	nd6_service_expired_route_info(struct nd6svc_arg *ap, uint64_t timenow)
1269	{
1270	struct nd_route_info *rti = NULL;
1271	struct nd_route_info *rti_next = NULL;
1272
1273	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1274	lck_mtx_lock(nd6_mutex);
1275	nd6_rti_list_wait(__func__);
1276
1277	TAILQ_FOREACH_SAFE(rti, &nd_rti_list, nd_rti_entry, rti_next) {
1278	struct nd_defrouter *dr = NULL;
1279	struct nd_defrouter *ndr = NULL;
1280	struct nd_route_info rti_tmp = {};
1281
1282	rti_tmp.nd_rti_prefix = rti->nd_rti_prefix;
1283	rti_tmp.nd_rti_prefixlen = rti->nd_rti_prefixlen;
1284	TAILQ_INIT(&rti_tmp.nd_rti_router_list);
1285
1286	TAILQ_FOREACH_SAFE(dr, &rti->nd_rti_router_list, dr_entry, ndr) {
1287	ap->found++;
1288	if (dr->expire != `0` && dr->expire < timenow) {
1289	VERIFY(dr->ifp != NULL);
1290	if (dr->ifp != NULL &&
1291	dr->ifp->if_type == IFT_CELLULAR) {
1292	/*
1293	* Don't expire these routes over cellular.
1294	* XXX Should we change this for non default routes?
1295	*/
1296	dr->expire += dr->rtlifetime;
1297	nd6log2(debug,
1298	"%s: Refreshing expired default router entry "
1299	"%s for interface %s\n", __func__,
1300	ip6_sprintf(&dr->rtaddr), if_name(dr->ifp));
1301	} else {
1302	ap->killed++;
1303	/*
1304	* Remove the entry from rti entry's router list
1305	* and add it to the temp list.
1306	* Remove the reference after calling defrtrlist_del
1307	*/
1308	TAILQ_REMOVE(&rti->nd_rti_router_list, dr, dr_entry);
1309	TAILQ_INSERT_TAIL(&rti_tmp.nd_rti_router_list, dr, dr_entry);
1310	}
1311	} else {
1312	if (dr->expire == `0` \|\| (dr->stateflags & NDDRF_STATIC)) {
1313	ap->sticky++;
1314	} else {
1315	ap->aging_lazy++;
1316	}
1317	}
1318	}
1319
1320	/*
1321	* Keep the following separate from the above
1322	* iteration of nd_defrouter because it's not safe
1323	* to call defrtrlist_del while iterating global default
1324	* router list. Global list has to be traversed
1325	* while holding nd6_mutex throughout.
1326	*
1327	* The following call to defrtrlist_del should be
1328	* safe as we are iterating a local list of
1329	* default routers.
1330	*/
1331	TAILQ_FOREACH_SAFE(dr, &rti_tmp.nd_rti_router_list, dr_entry, ndr) {
1332	TAILQ_REMOVE(&rti_tmp.nd_rti_router_list, dr, dr_entry);
1333	defrtrlist_del(dr, &rti->nd_rti_router_list);
1334	NDDR_REMREF(dr); / remove list reference /
1335	}
1336
1337	/*
1338	* The above may have removed an entry from default router list.
1339	* If it did and the list is now empty, remove the rti as well.
1340	*/
1341	if (TAILQ_EMPTY(&rti->nd_rti_router_list)) {
1342	TAILQ_REMOVE(&nd_rti_list, rti, nd_rti_entry);
1343	ndrti_free(rti);
1344	}
1345	}
1346
1347	LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
1348	nd6_rti_list_signal_done();
1349	lck_mtx_unlock(nd6_mutex);
1350	}
1351
1352
1353	/*
1354	* @function nd6_handle_duplicated_ip6_addr
1355	*
1356	* @brief
1357	* Handle a duplicated IPv6 secured non-termporary address
1358	*
1359	* @discussion
1360	* If the collision count hasn't been exceeded, removes the old
1361	* conflicting IPv6 address, increments the collision count,
1362	* and allocates a new address.
1363	*
1364	* Returns TRUE if the old address was removed, and the locks
1365	* (in6_ifaddr_rwlock, ia6->ia_ifa) were unlocked.
1366	*/
1367	static boolean_t
1368	nd6_handle_duplicated_ip6_addr(struct in6_ifaddr *ia6)
1369	{
1370	uint8_t collision_count;
1371	int error = `0`;
1372	struct in6_ifaddr *new_ia6;
1373	struct nd_prefix *pr;
1374	struct ifnet *ifp;
1375
1376	LCK_RW_ASSERT(&in6_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
1377	IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa);
1378
1379	/ don't retry too many times /
1380	collision_count = ia6->ia6_cga_collision_count;
1381	if (collision_count >= ip6_cga_conflict_retries) {
1382	return FALSE;
1383	}
1384
1385	/ need the prefix to allocate a new address /
1386	pr = ia6->ia6_ndpr;
1387	if (pr == NULL) {
1388	return FALSE;
1389	}
1390	NDPR_ADDREF(pr);
1391	ifp = pr->ndpr_ifp;
1392	log(LOG_DEBUG,
1393	"%s: %s duplicated (collision count %d)\n",
1394	ifp->if_xname, ip6_sprintf(&ia6->ia_addr.sin6_addr),
1395	collision_count);
1396
1397	/ remove the old address /
1398	IFA_UNLOCK(&ia6->ia_ifa);
1399	lck_rw_done(lck: &in6_ifaddr_rwlock);
1400	in6_purgeaddr(&ia6->ia_ifa);
1401
1402	/ allocate a new address with new collision count /
1403	collision_count++;
1404	new_ia6 = in6_pfx_newpersistaddr(pr, `1`, &error, FALSE, collision_count);
1405	if (new_ia6 != NULL) {
1406	log(LOG_DEBUG,
1407	"%s: %s new (collision count %d)\n",
1408	ifp->if_xname, ip6_sprintf(&new_ia6->ia_addr.sin6_addr),
1409	collision_count);
1410	IFA_LOCK(&new_ia6->ia_ifa);
1411	NDPR_LOCK(pr);
1412	new_ia6->ia6_ndpr = pr;
1413	NDPR_ADDREF(pr); / for addr reference /
1414	pr->ndpr_addrcnt++;
1415	VERIFY(pr->ndpr_addrcnt != `0`);
1416	NDPR_UNLOCK(pr);
1417	IFA_UNLOCK(&new_ia6->ia_ifa);
1418	ifa_remref(ifa: &new_ia6->ia_ifa);
1419	} else {
1420	log(LOG_ERR, "%s: in6_pfx_newpersistaddr failed %d\n",
1421	__func__, error);
1422	}
1423
1424	/ release extra prefix reference /
1425	NDPR_REMREF(pr);
1426	return TRUE;
1427	}
1428
1429	static boolean_t
1430	secured_address_is_duplicated(int flags)
1431	{
1432	#define _IN6_IFF_DUPLICATED_AUTOCONF_SECURED \
1433	(IN6_IFF_DUPLICATED \| IN6_IFF_AUTOCONF \| IN6_IFF_SECURED)
1434	return (flags & _IN6_IFF_DUPLICATED_AUTOCONF_SECURED) ==
1435	_IN6_IFF_DUPLICATED_AUTOCONF_SECURED;
1436	}
1437
1438	static void
1439	nd6_service_ip6_addr(struct nd6svc_arg *ap, uint64_t timenow)
1440	{
1441	struct in6_ifaddr *ia6 = NULL;
1442	struct in6_ifaddr *nia6 = NULL;
1443	/*
1444	* expire interface addresses.
1445	* in the past the loop was inside prefix expiry processing.
1446	* However, from a stricter spec-conformance standpoint, we should
1447	* rather separate address lifetimes and prefix lifetimes.
1448	*/
1449
1450	addrloop:
1451	lck_rw_lock_exclusive(lck: &in6_ifaddr_rwlock);
1452
1453	TAILQ_FOREACH_SAFE(ia6, &in6_ifaddrhead, ia6_link, nia6) {
1454	int oldflags = ia6->ia6_flags;
1455	ap->found++;
1456	IFA_LOCK(&ia6->ia_ifa);
1457	/*
1458	* Extra reference for ourselves; it's no-op if
1459	* we don't have to regenerate temporary address,
1460	* otherwise it protects the address from going
1461	* away since we drop in6_ifaddr_rwlock below.
1462	*/
1463	ifa_addref(ifa: &ia6->ia_ifa);
1464
1465	/*
1466	* Check for duplicated secured address
1467	*
1468	* nd6_handle_duplicated_ip6_addr attempts to regenerate
1469	* secure address in the event of a collision.
1470	* On successful generation this returns success
1471	* and we restart the loop.
1472	*
1473	* When we hit the maximum attempts, this returns
1474	* false.
1475	*/
1476	if (secured_address_is_duplicated(flags: ia6->ia6_flags) &&
1477	nd6_handle_duplicated_ip6_addr(ia6)) {
1478	/*
1479	* nd6_handle_duplicated_ip6_addr() unlocked
1480	* (in6_ifaddr_rwlock, ia6->ia_ifa) already.
1481	* Still need to release extra reference on
1482	* ia6->ia_ifa taken above.
1483	*/
1484	ifa_remref(ifa: &ia6->ia_ifa);
1485	goto addrloop;
1486	}
1487
1488	/ check address lifetime /
1489	if (IFA6_IS_INVALID(ia6, timenow)) {
1490	/*
1491	* If the expiring address is temporary, try
1492	* regenerating a new one. This would be useful when
1493	* we suspended a laptop PC, then turned it on after a
1494	* period that could invalidate all temporary
1495	* addresses. Although we may have to restart the
1496	* loop (see below), it must be after purging the
1497	* address. Otherwise, we'd see an infinite loop of
1498	* regeneration.
1499	*/
1500	if (ip6_use_tempaddr &&
1501	(ia6->ia6_flags & IN6_IFF_TEMPORARY) != `0`) {
1502	/*
1503	* NOTE: We have to drop the lock here
1504	* because regen_tmpaddr() eventually calls
1505	* in6_update_ifa(), which must take the lock
1506	* and would otherwise cause a hang. This is
1507	* safe because the goto addrloop leads to a
1508	* re-evaluation of the in6_ifaddrs list
1509	*/
1510	IFA_UNLOCK(&ia6->ia_ifa);
1511	lck_rw_done(lck: &in6_ifaddr_rwlock);
1512	(void) regen_tmpaddr(ia6);
1513	} else {
1514	IFA_UNLOCK(&ia6->ia_ifa);
1515	lck_rw_done(lck: &in6_ifaddr_rwlock);
1516	}
1517
1518	/*
1519	* Purging the address would have caused
1520	* in6_ifaddr_rwlock to be dropped and reacquired;
1521	* therefore search again from the beginning
1522	* of in6_ifaddrs list.
1523	*/
1524	in6_purgeaddr(&ia6->ia_ifa);
1525	ap->killed++;
1526
1527	if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) == `0`) {
1528	in6_ifstat_inc(ia6->ia_ifa.ifa_ifp, ifs6_addr_expiry_cnt);
1529	in6_event_enqueue_nwk_wq_entry(IN6_NDP_ADDR_EXPIRY,
1530	ia6->ia_ifa.ifa_ifp, &ia6->ia_addr.sin6_addr,
1531	`0`);
1532	}
1533	/ Release extra reference taken above /
1534	ifa_remref(ifa: &ia6->ia_ifa);
1535	goto addrloop;
1536	}
1537	/*
1538	* The lazy timer runs every nd6_prune_lazy seconds with at
1539	* most "2 * nd6_prune_lazy - 1" leeway. We consider the worst
1540	* case here and make sure we schedule the regular timer if an
1541	* interface address is about to expire.
1542	*/
1543	if (IFA6_IS_INVALID(ia6, timenow + `3` * nd6_prune_lazy)) {
1544	ap->aging++;
1545	} else {
1546	ap->aging_lazy++;
1547	}
1548	IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa);
1549	if (IFA6_IS_DEPRECATED(ia6, timenow)) {
1550	ia6->ia6_flags \|= IN6_IFF_DEPRECATED;
1551
1552	if ((oldflags & IN6_IFF_DEPRECATED) == `0`) {
1553	#if SKYWALK
1554	SK_NXS_MS_IF_ADDR_GENCNT_INC(ia6->ia_ifp);
1555	#endif /* SKYWALK */
1556	/*
1557	* Only enqueue the Deprecated event when the address just
1558	* becomes deprecated.
1559	* Keep it limited to the stable address as it is common for
1560	* older temporary addresses to get deprecated while we generate
1561	* new ones.
1562	*/
1563	if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) == `0`) {
1564	in6_event_enqueue_nwk_wq_entry(IN6_ADDR_MARKED_DEPRECATED,
1565	ia6->ia_ifa.ifa_ifp, &ia6->ia_addr.sin6_addr,
1566	`0`);
1567	}
1568	}
1569	/*
1570	* If a temporary address has just become deprecated,
1571	* regenerate a new one if possible.
1572	*/
1573	if (ip6_use_tempaddr &&
1574	(ia6->ia6_flags & IN6_IFF_TEMPORARY) != `0` &&
1575	(oldflags & IN6_IFF_DEPRECATED) == `0`) {
1576	/ see NOTE above /
1577	IFA_UNLOCK(&ia6->ia_ifa);
1578	lck_rw_done(lck: &in6_ifaddr_rwlock);
1579	if (regen_tmpaddr(ia6) == `0`) {
1580	/*
1581	* A new temporary address is
1582	* generated.
1583	* XXX: this means the address chain
1584	* has changed while we are still in
1585	* the loop. Although the change
1586	* would not cause disaster (because
1587	* it's not a deletion, but an
1588	* addition,) we'd rather restart the
1589	* loop just for safety. Or does this
1590	* significantly reduce performance??
1591	*/
1592	/ Release extra reference /
1593	ifa_remref(ifa: &ia6->ia_ifa);
1594	goto addrloop;
1595	}
1596	lck_rw_lock_exclusive(lck: &in6_ifaddr_rwlock);
1597	} else {
1598	IFA_UNLOCK(&ia6->ia_ifa);
1599	}
1600	} else {
1601	/*
1602	* A new RA might have made a deprecated address
1603	* preferred.
1604	*/
1605	ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
1606	#if SKYWALK
1607	if ((oldflags & IN6_IFF_DEPRECATED) != `0`) {
1608	SK_NXS_MS_IF_ADDR_GENCNT_INC(ia6->ia_ifp);
1609	}
1610	#endif /* SKYWALK */
1611	IFA_UNLOCK(&ia6->ia_ifa);
1612	}
1613	LCK_RW_ASSERT(&in6_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
1614	/ Release extra reference taken above /
1615	ifa_remref(ifa: &ia6->ia_ifa);
1616	}
1617	lck_rw_done(lck: &in6_ifaddr_rwlock);
1618	}
1619
1620	static void
1621	nd6_service_expired_prefix(struct nd6svc_arg *ap, uint64_t timenow)
1622	{
1623	struct nd_prefix *pr = NULL;
1624
1625	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1626	lck_mtx_lock(nd6_mutex);
1627	/ expire prefix list /
1628	pr = nd_prefix.lh_first;
1629	while (pr != NULL) {
1630	ap->found++;
1631	/*
1632	* Skip already processed or defunct prefixes
1633	* We may iterate the prefix list from head again
1634	* so, we are trying to not revisit the same prefix
1635	* for the same instance of nd6_service
1636	*/
1637	NDPR_LOCK(pr);
1638	if (pr->ndpr_stateflags & NDPRF_PROCESSED_SERVICE \|\|
1639	pr->ndpr_stateflags & NDPRF_DEFUNCT) {
1640	pr->ndpr_stateflags \|= NDPRF_PROCESSED_SERVICE;
1641	NDPR_UNLOCK(pr);
1642	pr = pr->ndpr_next;
1643	continue;
1644	}
1645
1646	/*
1647	* If there are still manual addresses configured in the system
1648	* that are associated with the prefix, ignore prefix expiry
1649	*/
1650	if (pr->ndpr_manual_addrcnt != `0`) {
1651	pr->ndpr_stateflags \|= NDPRF_PROCESSED_SERVICE;
1652	NDPR_UNLOCK(pr);
1653	pr = pr->ndpr_next;
1654	continue;
1655	}
1656
1657	/*
1658	* check prefix lifetime.
1659	* since pltime is just for autoconf, pltime processing for
1660	* prefix is not necessary.
1661	*/
1662	if (pr->ndpr_expire != `0` && pr->ndpr_expire < timenow) {
1663	/*
1664	* address expiration and prefix expiration are
1665	* separate. NEVER perform in6_purgeaddr here.
1666	*/
1667	pr->ndpr_stateflags \|= NDPRF_PROCESSED_SERVICE;
1668	NDPR_ADDREF(pr);
1669	prelist_remove(pr);
1670	NDPR_UNLOCK(pr);
1671
1672	in6_ifstat_inc(pr->ndpr_ifp, ifs6_pfx_expiry_cnt);
1673	in6_event_enqueue_nwk_wq_entry(IN6_NDP_PFX_EXPIRY,
1674	pr->ndpr_ifp, &pr->ndpr_prefix.sin6_addr,
1675	`0`);
1676	NDPR_REMREF(pr);
1677	pfxlist_onlink_check();
1678	pr = nd_prefix.lh_first;
1679	ap->killed++;
1680	} else {
1681	if (pr->ndpr_expire == `0` \|\|
1682	(pr->ndpr_stateflags & NDPRF_STATIC)) {
1683	ap->sticky++;
1684	} else {
1685	ap->aging_lazy++;
1686	}
1687	pr->ndpr_stateflags \|= NDPRF_PROCESSED_SERVICE;
1688	NDPR_UNLOCK(pr);
1689	pr = pr->ndpr_next;
1690	}
1691	}
1692	LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
1693	NDPR_LOCK(pr);
1694	pr->ndpr_stateflags &= ~NDPRF_PROCESSED_SERVICE;
1695	NDPR_UNLOCK(pr);
1696	}
1697	lck_mtx_unlock(nd6_mutex);
1698	}
1699
1700
1701	/*
1702	* ND6 service routine to expire default route list and prefix list
1703	*/
1704	static void
1705	nd6_service(void *arg)
1706	{
1707	struct nd6svc_arg *ap = arg;
1708	uint64_t timenow;
1709
1710	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1711	/*
1712	* Since we may drop rnh_lock and nd6_mutex below, we want
1713	* to run this entire operation single threaded.
1714	*/
1715	while (nd6_service_busy) {
1716	nd6log2(debug, "%s: %s is blocked by %d waiters\n",
1717	__func__, ap->draining ? "drainer" : "timer",
1718	nd6_service_waiters);
1719	nd6_service_waiters++;
1720	(void) msleep(chan: nd6_service_wc, rnh_lock, pri: (PZERO - `1`),
1721	wmesg: __func__, NULL);
1722	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1723	}
1724
1725	/ We are busy now; tell everyone else to go away /
1726	nd6_service_busy = TRUE;
1727	net_update_uptime();
1728	timenow = net_uptime();
1729
1730	/ Iterate and service neighbor cache entries /
1731	nd6_service_neighbor_cache(ap, timenow);
1732
1733	/*
1734	* There is lock ordering requirement and rnh_lock
1735	* has to be released before acquiring nd6_mutex.
1736	*/
1737	lck_mtx_unlock(rnh_lock);
1738
1739	/ Iterate and service expired default router /
1740	nd6_service_expired_default_router(ap, timenow);
1741	/ Iterate and service expired route information entries /
1742	nd6_service_expired_route_info(ap, timenow);
1743
1744	/ Iterate and service expired/duplicated IPv6 address /
1745	nd6_service_ip6_addr(ap, timenow);
1746
1747	/ Iterate and service expired IPv6 prefixes /
1748	nd6_service_expired_prefix(ap, timenow);
1749
1750	lck_mtx_lock(rnh_lock);
1751	/ We're done; let others enter /
1752	nd6_service_busy = FALSE;
1753	if (nd6_service_waiters > `0`) {
1754	nd6_service_waiters = `0`;
1755	wakeup(chan: nd6_service_wc);
1756	}
1757	}
1758
1759	static int nd6_need_draining = `0`;
1760
1761	void
1762	nd6_drain(void *arg)
1763	{
1764	#pragma unused(arg)
1765	nd6log2(debug, "%s: draining ND6 entries\n", __func__);
1766
1767	lck_mtx_lock(rnh_lock);
1768	nd6_need_draining = `1`;
1769	nd6_sched_timeout(NULL, NULL);
1770	lck_mtx_unlock(rnh_lock);
1771	}
1772
1773	/*
1774	* We use the ``arg'' variable to decide whether or not the timer we're
1775	* running is the fast timer. We do this to reset the nd6_fast_timer_on
1776	* variable so that later we don't end up ignoring a ``fast timer''
1777	* request if the 5 second timer is running (see nd6_sched_timeout).
1778	*/
1779	static void
1780	nd6_timeout(void *arg)
1781	{
1782	struct nd6svc_arg sarg;
1783	uint32_t buf;
1784
1785	lck_mtx_lock(rnh_lock);
1786	bzero(s: &sarg, n: sizeof(sarg));
1787	if (nd6_need_draining != `0`) {
1788	nd6_need_draining = `0`;
1789	sarg.draining = `1`;
1790	}
1791	nd6_service(arg: &sarg);
1792	nd6log2(debug, "%s: found %u, aging_lazy %u, aging %u, "
1793	"sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy,
1794	sarg.aging, sarg.sticky, sarg.killed);
1795	/ re-arm the timer if there's work to do /
1796	nd6_timeout_run--;
1797	VERIFY(nd6_timeout_run >= `0` && nd6_timeout_run < `2`);
1798	if (arg == &nd6_fast_timer_on) {
1799	nd6_fast_timer_on = FALSE;
1800	}
1801	if (sarg.aging_lazy > `0` \|\| sarg.aging > `0` \|\| nd6_sched_timeout_want) {
1802	struct timeval atv, ltv, *leeway;
1803	int lazy = nd6_prune_lazy;
1804
1805	if (sarg.aging > `0` \|\| lazy < `1`) {
1806	atv.tv_usec = `0`;
1807	atv.tv_sec = nd6_prune;
1808	leeway = NULL;
1809	} else {
1810	VERIFY(lazy >= `1`);
1811	atv.tv_usec = `0`;
1812	atv.tv_sec = MAX(nd6_prune, lazy);
1813	ltv.tv_usec = `0`;
1814	read_frandom(buffer: &buf, numBytes: sizeof(buf));
1815	ltv.tv_sec = MAX(buf % lazy, `1`) * `2`;
1816	leeway = &ltv;
1817	}
1818	nd6_sched_timeout(&atv, leeway);
1819	} else if (nd6_debug) {
1820	nd6log2(debug, "%s: not rescheduling timer\n", __func__);
1821	}
1822	lck_mtx_unlock(rnh_lock);
1823	}
1824
1825	void
1826	nd6_sched_timeout(struct timeval atv, struct* timeval *ltv)
1827	{
1828	struct timeval tv;
1829
1830	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1831	if (atv == NULL) {
1832	tv.tv_usec = `0`;
1833	tv.tv_sec = MAX(nd6_prune, `1`);
1834	atv = &tv;
1835	ltv = NULL; / ignore leeway /
1836	}
1837	/ see comments on top of this file /
1838	if (nd6_timeout_run == `0`) {
1839	if (ltv == NULL) {
1840	nd6log2(debug, "%s: timer scheduled in "
1841	"T+%llus.%lluu (demand %d)\n", __func__,
1842	(uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1843	nd6_sched_timeout_want);
1844	nd6_fast_timer_on = TRUE;
1845	timeout(nd6_timeout, arg: &nd6_fast_timer_on, ticks: tvtohz(atv));
1846	} else {
1847	nd6log2(debug, "%s: timer scheduled in "
1848	"T+%llus.%lluu with %llus.%lluu leeway "
1849	"(demand %d)\n", __func__, (uint64_t)atv->tv_sec,
1850	(uint64_t)atv->tv_usec, (uint64_t)ltv->tv_sec,
1851	(uint64_t)ltv->tv_usec, nd6_sched_timeout_want);
1852	nd6_fast_timer_on = FALSE;
1853	timeout_with_leeway(nd6_timeout, NULL,
1854	ticks: tvtohz(atv), leeway_ticks: tvtohz(ltv));
1855	}
1856	nd6_timeout_run++;
1857	nd6_sched_timeout_want = `0`;
1858	} else if (nd6_timeout_run == `1` && ltv == NULL &&
1859	nd6_fast_timer_on == FALSE) {
1860	nd6log2(debug, "%s: fast timer scheduled in "
1861	"T+%llus.%lluu (demand %d)\n", __func__,
1862	(uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1863	nd6_sched_timeout_want);
1864	nd6_fast_timer_on = TRUE;
1865	nd6_sched_timeout_want = `0`;
1866	nd6_timeout_run++;
1867	timeout(nd6_timeout, arg: &nd6_fast_timer_on, ticks: tvtohz(atv));
1868	} else {
1869	if (ltv == NULL) {
1870	nd6log2(debug, "%s: not scheduling timer: "
1871	"timers %d, fast_timer %d, T+%llus.%lluu\n",
1872	__func__, nd6_timeout_run, nd6_fast_timer_on,
1873	(uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
1874	} else {
1875	nd6log2(debug, "%s: not scheduling timer: "
1876	"timers %d, fast_timer %d, T+%llus.%lluu "
1877	"with %llus.%lluu leeway\n", __func__,
1878	nd6_timeout_run, nd6_fast_timer_on,
1879	(uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1880	(uint64_t)ltv->tv_sec, (uint64_t)ltv->tv_usec);
1881	}
1882	}
1883	}
1884
1885	/*
1886	* ND6 router advertisement kernel notification
1887	*/
1888	void
1889	nd6_post_msg(u_int32_t code, struct nd_prefix_list *prefix_list,
1890	u_int32_t list_length, u_int32_t mtu)
1891	{
1892	struct kev_msg ev_msg;
1893	struct kev_nd6_ra_data nd6_ra_msg_data;
1894	struct nd_prefix_list *itr = prefix_list;
1895
1896	bzero(s: &ev_msg, n: sizeof(struct kev_msg));
1897	ev_msg.vendor_code = KEV_VENDOR_APPLE;
1898	ev_msg.kev_class = KEV_NETWORK_CLASS;
1899	ev_msg.kev_subclass = KEV_ND6_SUBCLASS;
1900	ev_msg.event_code = code;
1901
1902	bzero(s: &nd6_ra_msg_data, n: sizeof(nd6_ra_msg_data));
1903
1904	if (mtu > `0` && mtu >= IPV6_MMTU) {
1905	nd6_ra_msg_data.mtu = mtu;
1906	nd6_ra_msg_data.flags \|= KEV_ND6_DATA_VALID_MTU;
1907	}
1908
1909	if (list_length > `0` && prefix_list != NULL) {
1910	nd6_ra_msg_data.list_length = list_length;
1911	nd6_ra_msg_data.flags \|= KEV_ND6_DATA_VALID_PREFIX;
1912	}
1913
1914	while (itr != NULL && nd6_ra_msg_data.list_index < list_length) {
1915	SOCKADDR_COPY(&itr->pr.ndpr_prefix, &nd6_ra_msg_data.prefix.prefix,
1916	sizeof(nd6_ra_msg_data.prefix.prefix));
1917	nd6_ra_msg_data.prefix.raflags = itr->pr.ndpr_raf;
1918	nd6_ra_msg_data.prefix.prefixlen = itr->pr.ndpr_plen;
1919	nd6_ra_msg_data.prefix.origin = PR_ORIG_RA;
1920	nd6_ra_msg_data.prefix.vltime = itr->pr.ndpr_vltime;
1921	nd6_ra_msg_data.prefix.pltime = itr->pr.ndpr_pltime;
1922	nd6_ra_msg_data.prefix.expire = ndpr_getexpire(&itr->pr);
1923	nd6_ra_msg_data.prefix.flags = itr->pr.ndpr_stateflags;
1924	nd6_ra_msg_data.prefix.refcnt = itr->pr.ndpr_addrcnt;
1925	nd6_ra_msg_data.prefix.if_index = itr->pr.ndpr_ifp->if_index;
1926
1927	/ send the message up /
1928	ev_msg.dv[`0`].data_ptr = &nd6_ra_msg_data;
1929	ev_msg.dv[`0`].data_length = sizeof(nd6_ra_msg_data);
1930	ev_msg.dv[`1`].data_length = `0`;
1931	dlil_post_complete_msg(NULL, &ev_msg);
1932
1933	/ clean up for the next prefix /
1934	bzero(s: &nd6_ra_msg_data.prefix, n: sizeof(nd6_ra_msg_data.prefix));
1935	itr = itr->next;
1936	nd6_ra_msg_data.list_index++;
1937	}
1938	}
1939
1940	/*
1941	* Regenerate deprecated/invalidated temporary address
1942	*/
1943	static int
1944	regen_tmpaddr(struct in6_ifaddr *ia6)
1945	{
1946	struct ifaddr *ifa;
1947	struct ifnet *ifp;
1948	struct in6_ifaddr *public_ifa6 = NULL;
1949	uint64_t timenow = net_uptime();
1950
1951	ifp = ia6->ia_ifa.ifa_ifp;
1952	ifnet_lock_shared(ifp);
1953	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1954	struct in6_ifaddr *it6;
1955
1956	IFA_LOCK(ifa);
1957	if (ifa->ifa_addr->sa_family != AF_INET6) {
1958	IFA_UNLOCK(ifa);
1959	continue;
1960	}
1961	it6 = (struct in6_ifaddr *)ifa;
1962
1963	/ ignore no autoconf addresses. /
1964	if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == `0`) {
1965	IFA_UNLOCK(ifa);
1966	continue;
1967	}
1968	/ ignore autoconf addresses with different prefixes. /
1969	if (it6->ia6_ndpr == NULL \|\| it6->ia6_ndpr != ia6->ia6_ndpr) {
1970	IFA_UNLOCK(ifa);
1971	continue;
1972	}
1973	/*
1974	* Now we are looking at an autoconf address with the same
1975	* prefix as ours. If the address is temporary and is still
1976	* preferred, do not create another one. It would be rare, but
1977	* could happen, for example, when we resume a laptop PC after
1978	* a long period.
1979	*/
1980	if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != `0` &&
1981	!IFA6_IS_DEPRECATED(it6, timenow)) {
1982	IFA_UNLOCK(ifa);
1983	if (public_ifa6 != NULL) {
1984	ifa_remref(ifa: &public_ifa6->ia_ifa);
1985	}
1986	public_ifa6 = NULL;
1987	break;
1988	}
1989
1990	/*
1991	* This is a public autoconf address that has the same prefix
1992	* as ours. If it is preferred, keep it. We can't break the
1993	* loop here, because there may be a still-preferred temporary
1994	* address with the prefix.
1995	*/
1996	if (!IFA6_IS_DEPRECATED(it6, timenow)) {
1997	ifa_addref(ifa); / for public_ifa6 /
1998	IFA_UNLOCK(ifa);
1999	if (public_ifa6 != NULL) {
2000	ifa_remref(ifa: &public_ifa6->ia_ifa);
2001	}
2002	public_ifa6 = it6;
2003	} else {
2004	IFA_UNLOCK(ifa);
2005	}
2006	}
2007	ifnet_lock_done(ifp);
2008
2009	if (public_ifa6 != NULL) {
2010	int e;
2011
2012	if ((e = in6_tmpifadd(public_ifa6, `0`)) != `0`) {
2013	log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
2014	" tmp addr,errno=%d\n", e);
2015	ifa_remref(ifa: &public_ifa6->ia_ifa);
2016	return -`1`;
2017	}
2018	ifa_remref(ifa: &public_ifa6->ia_ifa);
2019	return `0`;
2020	}
2021
2022	return -`1`;
2023	}
2024
2025	static void
2026	nd6_purge_interface_default_routers(struct ifnet *ifp)
2027	{
2028	struct nd_defrouter *dr = NULL;
2029	struct nd_defrouter *ndr = NULL;
2030	struct nd_drhead nd_defrouter_tmp = {};
2031
2032	TAILQ_INIT(&nd_defrouter_tmp);
2033
2034	LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2035
2036	TAILQ_FOREACH_SAFE(dr, &nd_defrouter_list, dr_entry, ndr) {
2037	if (dr->ifp != ifp) {
2038	continue;
2039	}
2040	/*
2041	* Remove the entry from default router list
2042	* and add it to the temp list.
2043	* nd_defrouter_tmp will be a local temporary
2044	* list as no one else can get the same
2045	* removed entry once it is removed from default
2046	* router list.
2047	* Remove the reference after calling defrtrlist_del.
2048	*
2049	* The uninstalled entries have to be iterated first
2050	* when we call defrtrlist_del.
2051	* This is to ensure that we don't end up calling
2052	* default router selection when there are other
2053	* uninstalled candidate default routers on
2054	* the interface.
2055	* If we don't respect that order, we may end
2056	* up missing out on some entries.
2057	*
2058	* For that reason, installed ones must be inserted
2059	* at the tail and uninstalled ones at the head
2060	*/
2061	TAILQ_REMOVE(&nd_defrouter_list, dr, dr_entry);
2062
2063	if (dr->stateflags & NDDRF_INSTALLED) {
2064	TAILQ_INSERT_TAIL(&nd_defrouter_tmp, dr, dr_entry);
2065	} else {
2066	TAILQ_INSERT_HEAD(&nd_defrouter_tmp, dr, dr_entry);
2067	}
2068	}
2069
2070	/*
2071	* The following call to defrtrlist_del should be
2072	* safe as we are iterating a local list of
2073	* default routers.
2074	*
2075	* We don't really need nd6_mutex here but keeping
2076	* it as it is to avoid changing assertios held in
2077	* the functions in the call-path.
2078	*/
2079	TAILQ_FOREACH_SAFE(dr, &nd_defrouter_tmp, dr_entry, ndr) {
2080	TAILQ_REMOVE(&nd_defrouter_tmp, dr, dr_entry);
2081	defrtrlist_del(dr, NULL);
2082	NDDR_REMREF(dr); / remove list reference /
2083	}
2084	}
2085
2086	static void
2087	nd6_purge_interface_prefixes(struct ifnet *ifp)
2088	{
2089	boolean_t removed = FALSE;
2090	struct nd_prefix *pr = NULL;
2091	struct nd_prefix *npr = NULL;
2092
2093	LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2094
2095	/ Nuke prefix list entries toward ifp /
2096	for (pr = nd_prefix.lh_first; pr; pr = npr) {
2097	NDPR_LOCK(pr);
2098	npr = pr->ndpr_next;
2099	if (pr->ndpr_ifp == ifp &&
2100	!(pr->ndpr_stateflags & NDPRF_DEFUNCT)) {
2101	/*
2102	* Because if_detach() does not release prefixes
2103	* while purging addresses the reference count will
2104	* still be above zero. We therefore reset it to
2105	* make sure that the prefix really gets purged.
2106	*/
2107	pr->ndpr_addrcnt = `0`;
2108
2109	/*
2110	* Previously, pr->ndpr_addr is removed as well,
2111	* but I strongly believe we don't have to do it.
2112	* nd6_purge() is only called from in6_ifdetach(),
2113	* which removes all the associated interface addresses
2114	* by itself.
2115	* (jinmei@kame.net 20010129)
2116	*/
2117	NDPR_ADDREF(pr);
2118	prelist_remove(pr);
2119	NDPR_UNLOCK(pr);
2120	NDPR_REMREF(pr);
2121	removed = TRUE;
2122	npr = nd_prefix.lh_first;
2123	} else {
2124	NDPR_UNLOCK(pr);
2125	}
2126	}
2127	if (removed) {
2128	pfxlist_onlink_check();
2129	}
2130	}
2131
2132	static void
2133	nd6_router_select_rti_entries(struct ifnet *ifp)
2134	{
2135	struct nd_route_info *rti = NULL;
2136	struct nd_route_info *rti_next = NULL;
2137
2138	nd6_rti_list_wait(__func__);
2139
2140	TAILQ_FOREACH_SAFE(rti, &nd_rti_list, nd_rti_entry, rti_next) {
2141	defrouter_select(ifp, &rti->nd_rti_router_list);
2142	}
2143
2144	nd6_rti_list_signal_done();
2145	}
2146
2147	static void
2148	nd6_purge_interface_rti_entries(struct ifnet *ifp)
2149	{
2150	struct nd_route_info *rti = NULL;
2151	struct nd_route_info *rti_next = NULL;
2152
2153	nd6_rti_list_wait(__func__);
2154
2155	TAILQ_FOREACH_SAFE(rti, &nd_rti_list, nd_rti_entry, rti_next) {
2156	struct nd_route_info rti_tmp = {};
2157	struct nd_defrouter *dr = NULL;
2158	struct nd_defrouter *ndr = NULL;
2159
2160	rti_tmp.nd_rti_prefix = rti->nd_rti_prefix;
2161	rti_tmp.nd_rti_prefixlen = rti->nd_rti_prefixlen;
2162	TAILQ_INIT(&rti_tmp.nd_rti_router_list);
2163
2164	TAILQ_FOREACH_SAFE(dr, &rti->nd_rti_router_list, dr_entry, ndr) {
2165	/*
2166	* If ifp is provided, skip the entries that don't match.
2167	* Else it is treated as a purge.
2168	*/
2169	if (ifp != NULL && dr->ifp != ifp) {
2170	continue;
2171	}
2172
2173	/*
2174	* Remove the entry from rti's router list
2175	* and add it to the temp list.
2176	* Remove the reference after calling defrtrlist_del.
2177	*
2178	* The uninstalled entries have to be iterated first
2179	* when we call defrtrlist_del.
2180	* This is to ensure that we don't end up calling
2181	* router selection when there are other
2182	* uninstalled candidate default routers on
2183	* the interface.
2184	* If we don't respect that order, we may end
2185	* up missing out on some entries.
2186	*
2187	* For that reason, installed ones must be inserted
2188	* at the tail and uninstalled ones at the head
2189	*/
2190
2191	TAILQ_REMOVE(&rti->nd_rti_router_list, dr, dr_entry);
2192	if (dr->stateflags & NDDRF_INSTALLED) {
2193	TAILQ_INSERT_TAIL(&rti_tmp.nd_rti_router_list, dr, dr_entry);
2194	} else {
2195	TAILQ_INSERT_HEAD(&rti_tmp.nd_rti_router_list, dr, dr_entry);
2196	}
2197	}
2198
2199	/*
2200	* The following call to defrtrlist_del should be
2201	* safe as we are iterating a local list of
2202	* routers.
2203	*
2204	* We don't really need nd6_mutex here but keeping
2205	* it as it is to avoid changing assertios held in
2206	* the functions in the call-path.
2207	*/
2208	TAILQ_FOREACH_SAFE(dr, &rti_tmp.nd_rti_router_list, dr_entry, ndr) {
2209	TAILQ_REMOVE(&rti_tmp.nd_rti_router_list, dr, dr_entry);
2210	defrtrlist_del(dr, &rti->nd_rti_router_list);
2211	NDDR_REMREF(dr); / remove list reference /
2212	}
2213	/*
2214	* The above may have removed an entry from default router list.
2215	* If it did and the list is now empty, remove the rti as well.
2216	*/
2217	if (TAILQ_EMPTY(&rti->nd_rti_router_list)) {
2218	TAILQ_REMOVE(&nd_rti_list, rti, nd_rti_entry);
2219	ndrti_free(rti);
2220	}
2221	}
2222
2223	nd6_rti_list_signal_done();
2224	}
2225
2226	static void
2227	nd6_purge_interface_llinfo(struct ifnet *ifp)
2228	{
2229	struct llinfo_nd6 *ln = NULL;
2230	/ Note that rt->rt_ifp may not be the same as ifp,*
2231	* due to KAME goto ours hack. See RTM_RESOLVE case in
2232	* nd6_rtrequest(), and ip6_input().
2233	*/
2234	again:
2235	lck_mtx_lock(rnh_lock);
2236	ln = llinfo_nd6.ln_next;
2237	while (ln != NULL && ln != &llinfo_nd6) {
2238	struct rtentry *rt;
2239	struct llinfo_nd6 *nln;
2240
2241	nln = ln->ln_next;
2242	rt = ln->ln_rt;
2243	RT_LOCK(rt);
2244	if (rt->rt_gateway != NULL &&
2245	rt->rt_gateway->sa_family == AF_LINK &&
2246	SDL(rt->rt_gateway)->sdl_index == ifp->if_index) {
2247	RT_ADDREF_LOCKED(rt);
2248	RT_UNLOCK(rt);
2249	lck_mtx_unlock(rnh_lock);
2250	/*
2251	* See comments on nd6_service() for reasons why
2252	* this loop is repeated; we bite the costs of
2253	* going thru the same llinfo_nd6 more than once
2254	* here, since this purge happens during detach,
2255	* and that unlike the timer case, it's possible
2256	* there's more than one purges happening at the
2257	* same time (thus a flag wouldn't buy anything).
2258	*/
2259	nd6_free(rt);
2260	RT_REMREF(rt);
2261	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
2262	goto again;
2263	} else {
2264	RT_UNLOCK(rt);
2265	}
2266	ln = nln;
2267	}
2268	lck_mtx_unlock(rnh_lock);
2269	}
2270
2271	/*
2272	* Nuke neighbor cache/prefix/default router management table, right before
2273	* ifp goes away.
2274	*/
2275	void
2276	nd6_purge(struct ifnet *ifp)
2277	{
2278	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
2279	lck_mtx_lock(nd6_mutex);
2280
2281	/ Nuke default router list entries toward ifp /
2282	nd6_purge_interface_default_routers(ifp);
2283
2284	/ Nuke prefix list entries toward ifp /
2285	nd6_purge_interface_prefixes(ifp);
2286
2287	/ Nuke route info option entries toward ifp /
2288	nd6_purge_interface_rti_entries(ifp);
2289
2290	lck_mtx_unlock(nd6_mutex);
2291
2292	/ cancel default outgoing interface setting /
2293	if (nd6_defifindex == ifp->if_index) {
2294	nd6_setdefaultiface(`0`);
2295	}
2296
2297	/*
2298	* Perform default router selection even when we are a router,
2299	* if Scoped Routing is enabled.
2300	* XXX ?Should really not be needed since when defrouter_select
2301	* was changed to work on interface.
2302	*/
2303	lck_mtx_lock(nd6_mutex);
2304	/ refresh default router list /
2305	defrouter_select(ifp, NULL);
2306	lck_mtx_unlock(nd6_mutex);
2307
2308	/ Nuke neighbor cache entries for the ifp. /
2309	nd6_purge_interface_llinfo(ifp);
2310	}
2311
2312	/*
2313	* Upon success, the returned route will be locked and the caller is
2314	* responsible for releasing the reference and doing RT_UNLOCK(rt).
2315	* This routine does not require rnh_lock to be held by the caller,
2316	* although it needs to be indicated of such a case in order to call
2317	* the correct variant of the relevant routing routines.
2318	*/
2319	struct rtentry *
2320	nd6_lookup(struct in6_addr addr6, int* create, struct ifnet ifp, int* rt_locked)
2321	{
2322	struct rtentry *rt __single;
2323	struct sockaddr_in6 sin6;
2324	unsigned int ifscope;
2325
2326	SOCKADDR_ZERO(&sin6, sizeof(sin6));
2327	sin6.sin6_len = sizeof(struct sockaddr_in6);
2328	sin6.sin6_family = AF_INET6;
2329	sin6.sin6_addr = *addr6;
2330
2331	ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
2332	if (rt_locked) {
2333	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2334	rt = rtalloc1_scoped_locked(SA(&sin6), create, `0`, ifscope);
2335	} else {
2336	rt = rtalloc1_scoped(SA(&sin6), create, `0`, ifscope);
2337	}
2338
2339	if (rt != NULL) {
2340	RT_LOCK(rt);
2341	if ((rt->rt_flags & RTF_LLINFO) == `0`) {
2342	/*
2343	* This is the case for the default route.
2344	* If we want to create a neighbor cache for the
2345	* address, we should free the route for the
2346	* destination and allocate an interface route.
2347	*/
2348	if (create) {
2349	RT_UNLOCK(rt);
2350	if (rt_locked) {
2351	rtfree_locked(rt);
2352	} else {
2353	rtfree(rt);
2354	}
2355	rt = NULL;
2356	}
2357	}
2358	}
2359	if (rt == NULL) {
2360	if (create && ifp) {
2361	struct ifaddr *ifa;
2362	u_int32_t ifa_flags;
2363	int e;
2364
2365	/*
2366	* If no route is available and create is set,
2367	* we allocate a host route for the destination
2368	* and treat it like an interface route.
2369	* This hack is necessary for a neighbor which can't
2370	* be covered by our own prefix.
2371	*/
2372	ifa = ifaof_ifpforaddr(SA(&sin6), ifp);
2373	if (ifa == NULL) {
2374	return NULL;
2375	}
2376
2377	/*
2378	* Create a new route. RTF_LLINFO is necessary
2379	* to create a Neighbor Cache entry for the
2380	* destination in nd6_rtrequest which will be
2381	* called in rtrequest via ifa->ifa_rtrequest.
2382	*/
2383	if (!rt_locked) {
2384	lck_mtx_lock(rnh_lock);
2385	}
2386	IFA_LOCK_SPIN(ifa);
2387	ifa_flags = ifa->ifa_flags;
2388	IFA_UNLOCK(ifa);
2389	e = rtrequest_scoped_locked(RTM_ADD, SA(&sin6), ifa->ifa_addr, SA(&all1_sa),
2390	(ifa_flags \| RTF_HOST \| RTF_LLINFO) & ~RTF_CLONING, &rt, ifscope);
2391	if (e != `0`) {
2392	if (e != EEXIST) {
2393	log(LOG_ERR, "%s: failed to add route "
2394	"for a neighbor(%s), errno=%d\n",
2395	__func__, ip6_sprintf(addr6), e);
2396	}
2397	}
2398	if (!rt_locked) {
2399	lck_mtx_unlock(rnh_lock);
2400	}
2401	ifa_remref(ifa);
2402	if (rt == NULL) {
2403	return NULL;
2404	}
2405
2406	RT_LOCK(rt);
2407	if (rt->rt_llinfo) {
2408	struct llinfo_nd6 *ln = rt->rt_llinfo;
2409	boolean_t nud_enabled = FALSE;
2410
2411	/*
2412	* The IPv6 initialization of the loopback interface
2413	* may happen after another interface gets assigned
2414	* an IPv6 address.
2415	* To avoid asserting treat local routes as special
2416	* case.
2417	*/
2418	if (rt->rt_ifp != lo_ifp) {
2419	struct nd_ifinfo *ndi = ND_IFINFO(rt->rt_ifp);
2420	VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
2421	nud_enabled = !!(ndi->flags & ND6_IFF_PERFORMNUD);
2422	}
2423
2424	/*
2425	* For interface's that do not perform NUD
2426	* neighbor cache entres must always be marked
2427	* reachable with no expiry
2428	*/
2429	if (nud_enabled) {
2430	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_NOSTATE);
2431	} else {
2432	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_REACHABLE);
2433	ln_setexpire(ln, expiry: `0`);
2434	}
2435	}
2436	} else {
2437	return NULL;
2438	}
2439	}
2440	RT_LOCK_ASSERT_HELD(rt);
2441	/*
2442	* Validation for the entry.
2443	* Note that the check for rt_llinfo is necessary because a cloned
2444	* route from a parent route that has the L flag (e.g. the default
2445	* route to a p2p interface) may have the flag, too, while the
2446	* destination is not actually a neighbor.
2447	* XXX: we can't use rt->rt_ifp to check for the interface, since
2448	* it might be the loopback interface if the entry is for our
2449	* own address on a non-loopback interface. Instead, we should
2450	* use rt->rt_ifa->ifa_ifp, which would specify the REAL
2451	* interface.
2452	* Note also that ifa_ifp and ifp may differ when we connect two
2453	* interfaces to a same link, install a link prefix to an interface,
2454	* and try to install a neighbor cache on an interface that does not
2455	* have a route to the prefix.
2456	*
2457	* If the address is from a proxied prefix, the ifa_ifp and ifp might
2458	* not match, because nd6_na_input() could have modified the ifp
2459	* of the route to point to the interface where the NA arrived on,
2460	* hence the test for RTF_PROXY.
2461	*/
2462	if ((rt->rt_flags & RTF_GATEWAY) \|\| (rt->rt_flags & RTF_LLINFO) == `0` \|\|
2463	rt->rt_gateway->sa_family != AF_LINK \|\| rt->rt_llinfo == NULL \|\|
2464	(ifp && rt->rt_ifa->ifa_ifp != ifp &&
2465	!(rt->rt_flags & RTF_PROXY))) {
2466	RT_REMREF_LOCKED(rt);
2467	RT_UNLOCK(rt);
2468	if (create) {
2469	log(LOG_DEBUG, "%s: failed to lookup %s "
2470	"(if = %s)\n", __func__, ip6_sprintf(addr6),
2471	ifp ? if_name(ifp) : "unspec");
2472	/ xxx more logs... kazu /
2473	}
2474	return NULL;
2475	}
2476	/*
2477	* Caller needs to release reference and call RT_UNLOCK(rt).
2478	*/
2479	return rt;
2480	}
2481
2482	/*
2483	* Test whether a given IPv6 address is a neighbor or not, ignoring
2484	* the actual neighbor cache. The neighbor cache is ignored in order
2485	* to not reenter the routing code from within itself.
2486	*/
2487	static int
2488	nd6_is_new_addr_neighbor(struct sockaddr_in6 addr, struct* ifnet *ifp)
2489	{
2490	struct nd_prefix *pr;
2491	struct ifaddr *dstaddr;
2492
2493	LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2494
2495	/*
2496	* A link-local address is always a neighbor.
2497	* XXX: a link does not necessarily specify a single interface.
2498	*/
2499	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
2500	struct sockaddr_in6 sin6_copy;
2501	u_int32_t zone;
2502
2503	/*
2504	* We need sin6_copy since sa6_recoverscope() may modify the
2505	* content (XXX).
2506	*/
2507	sin6_copy = *addr;
2508	if (sa6_recoverscope(&sin6_copy, FALSE)) {
2509	return `0`; / XXX: should be impossible /
2510	}
2511	if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) {
2512	return `0`;
2513	}
2514	if (sin6_copy.sin6_scope_id == zone) {
2515	return `1`;
2516	} else {
2517	return `0`;
2518	}
2519	}
2520
2521	/*
2522	* If the address matches one of our addresses,
2523	* it should be a neighbor.
2524	* If the address matches one of our on-link prefixes, it should be a
2525	* neighbor.
2526	*/
2527	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
2528	NDPR_LOCK(pr);
2529	if (pr->ndpr_ifp != ifp) {
2530	NDPR_UNLOCK(pr);
2531	continue;
2532	}
2533	if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
2534	NDPR_UNLOCK(pr);
2535	continue;
2536	}
2537	if (in6_are_masked_addr_scope_equal(&pr->ndpr_prefix.sin6_addr, pr->ndpr_prefix.sin6_scope_id,
2538	&addr->sin6_addr, addr->sin6_scope_id, &pr->ndpr_mask)) {
2539	NDPR_UNLOCK(pr);
2540	return `1`;
2541	}
2542	NDPR_UNLOCK(pr);
2543	}
2544
2545	/*
2546	* If the address is assigned on the node of the other side of
2547	* a p2p interface, the address should be a neighbor.
2548	*/
2549	dstaddr = ifa_ifwithdstaddr(SA(addr));
2550	if (dstaddr != NULL) {
2551	if (dstaddr->ifa_ifp == ifp) {
2552	ifa_remref(ifa: dstaddr);
2553	return `1`;
2554	}
2555	ifa_remref(ifa: dstaddr);
2556	dstaddr = NULL;
2557	}
2558
2559	return `0`;
2560	}
2561
2562
2563	/*
2564	* Detect if a given IPv6 address identifies a neighbor on a given link.
2565	* XXX: should take care of the destination of a p2p link?
2566	*/
2567	int
2568	nd6_is_addr_neighbor(struct sockaddr_in6 addr, struct* ifnet *ifp,
2569	int rt_locked)
2570	{
2571	struct rtentry *rt;
2572
2573	LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
2574	lck_mtx_lock(nd6_mutex);
2575	if (nd6_is_new_addr_neighbor(addr, ifp)) {
2576	lck_mtx_unlock(nd6_mutex);
2577	return `1`;
2578	}
2579	lck_mtx_unlock(nd6_mutex);
2580
2581	/*
2582	* Even if the address matches none of our addresses, it might be
2583	* in the neighbor cache.
2584	*/
2585	if ((rt = nd6_lookup(addr6: &addr->sin6_addr, create: `0`, ifp, rt_locked)) != NULL) {
2586	RT_LOCK_ASSERT_HELD(rt);
2587	RT_REMREF_LOCKED(rt);
2588	RT_UNLOCK(rt);
2589	return `1`;
2590	}
2591
2592	return `0`;
2593	}
2594
2595	/*
2596	* Free an nd6 llinfo entry.
2597	* Since the function would cause significant changes in the kernel, DO NOT
2598	* make it global, unless you have a strong reason for the change, and are sure
2599	* that the change is safe.
2600	*/
2601	void
2602	nd6_free(struct rtentry *rt)
2603	{
2604	struct llinfo_nd6 *ln = NULL;
2605	struct in6_addr in6 = {};
2606	struct nd_defrouter *dr = NULL;
2607
2608	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
2609	RT_LOCK_ASSERT_NOTHELD(rt);
2610	lck_mtx_lock(nd6_mutex);
2611
2612	RT_LOCK(rt);
2613	RT_ADDREF_LOCKED(rt); / Extra ref /
2614	ln = rt->rt_llinfo;
2615	in6 = SIN6(rt_key(rt))->sin6_addr;
2616
2617	/*
2618	* Prevent another thread from modifying rt_key, rt_gateway
2619	* via rt_setgate() after the rt_lock is dropped by marking
2620	* the route as defunct.
2621	*/
2622	rt->rt_flags \|= RTF_CONDEMNED;
2623
2624	/*
2625	* We used to have pfctlinput(PRC_HOSTDEAD) here. Even though it is
2626	* not harmful, it was not really necessary. Perform default router
2627	* selection even when we are a router, if Scoped Routing is enabled.
2628	*/
2629	/ XXX TDB Handle lists in route information option as well /
2630	dr = defrouter_lookup(NULL, &SIN6(rt_key(rt))->sin6_addr, rt->rt_ifp);
2631
2632	if ((ln && ln->ln_router) \|\| dr) {
2633	/*
2634	* rt6_flush must be called whether or not the neighbor
2635	* is in the Default Router List.
2636	* See a corresponding comment in nd6_na_input().
2637	*/
2638	RT_UNLOCK(rt);
2639	lck_mtx_unlock(nd6_mutex);
2640	rt6_flush(&in6, rt->rt_ifp);
2641	lck_mtx_lock(nd6_mutex);
2642	} else {
2643	RT_UNLOCK(rt);
2644	}
2645
2646	if (dr) {
2647	NDDR_REMREF(dr);
2648	/*
2649	* Unreachablity of a router might affect the default
2650	* router selection and on-link detection of advertised
2651	* prefixes.
2652	*/
2653
2654	/*
2655	* Temporarily fake the state to choose a new default
2656	* router and to perform on-link determination of
2657	* prefixes correctly.
2658	* Below the state will be set correctly,
2659	* or the entry itself will be deleted.
2660	*/
2661	RT_LOCK_SPIN(rt);
2662	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_INCOMPLETE);
2663
2664	/*
2665	* Since defrouter_select() does not affect the
2666	* on-link determination and MIP6 needs the check
2667	* before the default router selection, we perform
2668	* the check now.
2669	*/
2670	RT_UNLOCK(rt);
2671	pfxlist_onlink_check();
2672
2673	/*
2674	* refresh default router list
2675	*/
2676	defrouter_select(rt->rt_ifp, NULL);
2677
2678	/ Loop through all RTI's as well and trigger router selection. /
2679	nd6_router_select_rti_entries(ifp: rt->rt_ifp);
2680	}
2681	RT_LOCK_ASSERT_NOTHELD(rt);
2682	lck_mtx_unlock(nd6_mutex);
2683	/*
2684	* Detach the route from the routing tree and the list of neighbor
2685	* caches, and disable the route entry not to be used in already
2686	* cached routes.
2687	*/
2688	(void) rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), `0`, NULL);
2689
2690	/ Extra ref held above; now free it /
2691	rtfree(rt);
2692	}
2693
2694	void
2695	nd6_rtrequest(int req, struct rtentry rt, struct* sockaddr *sa)
2696	{
2697	#pragma unused(sa)
2698	struct sockaddr *gate = rt->rt_gateway;
2699	struct llinfo_nd6 *ln = rt->rt_llinfo;
2700	static struct sockaddr_dl null_sdl =
2701	{ .sdl_len = sizeof(null_sdl), .sdl_family = AF_LINK };
2702	struct ifnet *ifp = rt->rt_ifp;
2703	struct ifaddr *ifa;
2704	uint64_t timenow;
2705	char buf[MAX_IPv6_STR_LEN];
2706	boolean_t nud_enabled = FALSE;
2707
2708	/*
2709	* The IPv6 initialization of the loopback interface
2710	* may happen after another interface gets assigned
2711	* an IPv6 address.
2712	* To avoid asserting treat local routes as special
2713	* case.
2714	*/
2715	if (rt->rt_ifp != lo_ifp) {
2716	struct nd_ifinfo *ndi = ND_IFINFO(rt->rt_ifp);
2717	VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
2718	nud_enabled = !!(ndi->flags & ND6_IFF_PERFORMNUD);
2719	}
2720
2721	VERIFY(nd6_init_done);
2722	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2723	RT_LOCK_ASSERT_HELD(rt);
2724
2725	/*
2726	* We have rnh_lock held, see if we need to schedule the timer;
2727	* we might do this again below during RTM_RESOLVE, but doing it
2728	* now handles all other cases.
2729	*/
2730	if (nd6_sched_timeout_want) {
2731	nd6_sched_timeout(NULL, NULL);
2732	}
2733
2734	if (rt->rt_flags & RTF_GATEWAY) {
2735	return;
2736	}
2737
2738	if (!nd6_need_cache(ifp) && !(rt->rt_flags & RTF_HOST)) {
2739	/*
2740	* This is probably an interface direct route for a link
2741	* which does not need neighbor caches (e.g. fe80::%lo0/64).
2742	* We do not need special treatment below for such a route.
2743	* Moreover, the RTF_LLINFO flag which would be set below
2744	* would annoy the ndp(8) command.
2745	*/
2746	return;
2747	}
2748
2749	if (req == RTM_RESOLVE) {
2750	int no_nd_cache;
2751
2752	if (!nd6_need_cache(ifp)) { / stf case /
2753	no_nd_cache = `1`;
2754	} else {
2755	struct sockaddr_in6 sin6;
2756
2757	rtkey_to_sa6(rt, &sin6);
2758	/*
2759	* nd6_is_addr_neighbor() may call nd6_lookup(),
2760	* therefore we drop rt_lock to avoid deadlock
2761	* during the lookup.
2762	*/
2763	RT_ADDREF_LOCKED(rt);
2764	RT_UNLOCK(rt);
2765	no_nd_cache = !nd6_is_addr_neighbor(addr: &sin6, ifp, rt_locked: `1`);
2766	RT_LOCK(rt);
2767	RT_REMREF_LOCKED(rt);
2768	}
2769
2770	/*
2771	* FreeBSD and BSD/OS often make a cloned host route based
2772	* on a less-specific route (e.g. the default route).
2773	* If the less specific route does not have a "gateway"
2774	* (this is the case when the route just goes to a p2p or an
2775	* stf interface), we'll mistakenly make a neighbor cache for
2776	* the host route, and will see strange neighbor solicitation
2777	* for the corresponding destination. In order to avoid the
2778	* confusion, we check if the destination of the route is
2779	* a neighbor in terms of neighbor discovery, and stop the
2780	* process if not. Additionally, we remove the LLINFO flag
2781	* so that ndp(8) will not try to get the neighbor information
2782	* of the destination.
2783	*/
2784	if (no_nd_cache) {
2785	rt->rt_flags &= ~RTF_LLINFO;
2786	return;
2787	}
2788	}
2789
2790	timenow = net_uptime();
2791
2792	switch (req) {
2793	case RTM_ADD:
2794	/*
2795	* There is no backward compatibility :)
2796	*
2797	* if ((rt->rt_flags & RTF_HOST) == 0 &&
2798	* SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
2799	* rt->rt_flags \|= RTF_CLONING;
2800	*/
2801	if ((rt->rt_flags & RTF_CLONING) \|\|
2802	((rt->rt_flags & RTF_LLINFO) && ln == NULL)) {
2803	/*
2804	* Case 1: This route should come from a route to
2805	* interface (RTF_CLONING case) or the route should be
2806	* treated as on-link but is currently not
2807	* (RTF_LLINFO && ln == NULL case).
2808	*/
2809	if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == `0`) {
2810	gate = rt->rt_gateway;
2811	SDL(gate)->sdl_type = ifp->if_type;
2812	SDL(gate)->sdl_index = ifp->if_index;
2813	/*
2814	* In case we're called before 1.0 sec.
2815	* has elapsed.
2816	*/
2817	if (ln != NULL) {
2818	ln_setexpire(ln,
2819	expiry: (ifp->if_eflags & IFEF_IPV6_ND6ALT)
2820	? `0` : MAX(timenow, `1`));
2821	}
2822	}
2823	if (rt->rt_flags & RTF_CLONING) {
2824	break;
2825	}
2826	}
2827	/*
2828	* In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
2829	* We don't do that here since llinfo is not ready yet.
2830	*
2831	* There are also couple of other things to be discussed:
2832	* - unsolicited NA code needs improvement beforehand
2833	* - RFC4861 says we MAY send multicast unsolicited NA
2834	* (7.2.6 paragraph 4), however, it also says that we
2835	* SHOULD provide a mechanism to prevent multicast NA storm.
2836	* we don't have anything like it right now.
2837	* note that the mechanism needs a mutual agreement
2838	* between proxies, which means that we need to implement
2839	* a new protocol, or a new kludge.
2840	* - from RFC4861 6.2.4, host MUST NOT send an unsolicited RA.
2841	* we need to check ip6forwarding before sending it.
2842	* (or should we allow proxy ND configuration only for
2843	* routers? there's no mention about proxy ND from hosts)
2844	*/
2845	OS_FALLTHROUGH;
2846	case RTM_RESOLVE:
2847	if (!(ifp->if_flags & (IFF_POINTOPOINT \| IFF_LOOPBACK))) {
2848	/*
2849	* Address resolution isn't necessary for a point to
2850	* point link, so we can skip this test for a p2p link.
2851	*/
2852	if (gate->sa_family != AF_LINK \|\|
2853	gate->sa_len < sizeof(null_sdl)) {
2854	/ Don't complain in case of RTM_ADD /
2855	if (req == RTM_RESOLVE) {
2856	log(LOG_ERR, "%s: route to %s has bad "
2857	"gateway address (sa_family %u "
2858	"sa_len %u) on %s\n", __func__,
2859	inet_ntop(AF_INET6,
2860	&SIN6(rt_key(rt))->sin6_addr, buf,
2861	sizeof(buf)), gate->sa_family,
2862	gate->sa_len, if_name(ifp));
2863	}
2864	break;
2865	}
2866	SDL(gate)->sdl_type = ifp->if_type;
2867	SDL(gate)->sdl_index = ifp->if_index;
2868	}
2869	if (ln != NULL) {
2870	break; / This happens on a route change /
2871	}
2872	/*
2873	* Case 2: This route may come from cloning, or a manual route
2874	* add with a LL address.
2875	*/
2876	rt->rt_llinfo = ln = nd6_llinfo_alloc(how: Z_WAITOK);
2877
2878	nd6_allocated++;
2879	rt->rt_llinfo_get_ri = nd6_llinfo_get_ri;
2880	rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri;
2881	rt->rt_llinfo_purge = nd6_llinfo_purge;
2882	rt->rt_llinfo_free = nd6_llinfo_free;
2883	rt->rt_llinfo_refresh = nd6_llinfo_refresh;
2884	rt->rt_flags \|= RTF_LLINFO;
2885	ln->ln_rt = rt;
2886	/ this is required for "ndp" command. - shin /
2887	/*
2888	* For interface's that do not perform NUD
2889	* neighbor cache entries must always be marked
2890	* reachable with no expiry
2891	*/
2892	if ((req == RTM_ADD) \|\| !nud_enabled) {
2893	/*
2894	* gate should have some valid AF_LINK entry,
2895	* and ln->ln_expire should have some lifetime
2896	* which is specified by ndp command.
2897	*/
2898	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_REACHABLE);
2899	ln_setexpire(ln, expiry: `0`);
2900	} else {
2901	/*
2902	* When req == RTM_RESOLVE, rt is created and
2903	* initialized in rtrequest(), so rt_expire is 0.
2904	*/
2905	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_NOSTATE);
2906	/ In case we're called before 1.0 sec. has elapsed /
2907	ln_setexpire(ln, expiry: (ifp->if_eflags & IFEF_IPV6_ND6ALT) ?
2908	`0` : MAX(timenow, `1`));
2909	}
2910	LN_INSERTHEAD(ln);
2911	nd6_inuse++;
2912
2913	/ We have at least one entry; arm the timer if not already /
2914	nd6_sched_timeout(NULL, NULL);
2915
2916	/*
2917	* If we have too many cache entries, initiate immediate
2918	* purging for some "less recently used" entries. Note that
2919	* we cannot directly call nd6_free() here because it would
2920	* cause re-entering rtable related routines triggering an LOR
2921	* problem.
2922	*/
2923	if (ip6_neighborgcthresh > `0` &&
2924	nd6_inuse >= ip6_neighborgcthresh) {
2925	int i;
2926
2927	for (i = `0`; i < `10` && llinfo_nd6.ln_prev != ln; i++) {
2928	struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev;
2929	struct rtentry *rt_end = ln_end->ln_rt;
2930
2931	/ Move this entry to the head /
2932	RT_LOCK(rt_end);
2933	LN_DEQUEUE(ln_end);
2934	LN_INSERTHEAD(ln_end);
2935
2936	if (ln_end->ln_expire == `0`) {
2937	RT_UNLOCK(rt_end);
2938	continue;
2939	}
2940	if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE) {
2941	ND6_CACHE_STATE_TRANSITION(ln_end, ND6_LLINFO_STALE);
2942	} else {
2943	ND6_CACHE_STATE_TRANSITION(ln_end, ND6_LLINFO_PURGE);
2944	}
2945	ln_setexpire(ln: ln_end, expiry: timenow);
2946	RT_UNLOCK(rt_end);
2947	}
2948	}
2949
2950	/*
2951	* check if rt_key(rt) is one of my address assigned
2952	* to the interface.
2953	*/
2954	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
2955	&SIN6(rt_key(rt))->sin6_addr);
2956	if (ifa != NULL) {
2957	caddr_t macp = nd6_ifptomac(ifp);
2958	ln_setexpire(ln, expiry: `0`);
2959	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_REACHABLE);
2960	if (macp != NULL) {
2961	Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
2962	SDL(gate)->sdl_alen = ifp->if_addrlen;
2963	}
2964	if (nd6_useloopback) {
2965	if (rt->rt_ifp != lo_ifp) {
2966	/*
2967	* Purge any link-layer info caching.
2968	*/
2969	if (rt->rt_llinfo_purge != NULL) {
2970	rt->rt_llinfo_purge(rt);
2971	}
2972
2973	/*
2974	* Adjust route ref count for the
2975	* interfaces.
2976	*/
2977	if (rt->rt_if_ref_fn != NULL) {
2978	rt->rt_if_ref_fn(lo_ifp, `1`);
2979	rt->rt_if_ref_fn(rt->rt_ifp,
2980	-`1`);
2981	}
2982	}
2983	rt->rt_ifp = lo_ifp;
2984	/*
2985	* If rmx_mtu is not locked, update it
2986	* to the MTU used by the new interface.
2987	*/
2988	if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
2989	rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
2990	}
2991	/*
2992	* Make sure rt_ifa be equal to the ifaddr
2993	* corresponding to the address.
2994	* We need this because when we refer
2995	* rt_ifa->ia6_flags in ip6_input, we assume
2996	* that the rt_ifa points to the address instead
2997	* of the loopback address.
2998	*/
2999	if (ifa != rt->rt_ifa) {
3000	rtsetifa(rt, ifa);
3001	}
3002	}
3003	ifa_remref(ifa);
3004	} else if (rt->rt_flags & RTF_ANNOUNCE) {
3005	ln_setexpire(ln, expiry: `0`);
3006	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_REACHABLE);
3007
3008	/ join solicited node multicast for proxy ND /
3009	if (ifp->if_flags & IFF_MULTICAST) {
3010	struct in6_addr llsol;
3011	struct in6_multi *in6m;
3012	int error;
3013
3014	llsol = SIN6(rt_key(rt))->sin6_addr;
3015	llsol.s6_addr32[`0`] = IPV6_ADDR_INT32_MLL;
3016	llsol.s6_addr32[`1`] = `0`;
3017	llsol.s6_addr32[`2`] = htonl(`1`);
3018	llsol.s6_addr8[`12`] = `0xff`;
3019	if (in6_setscope(&llsol, ifp, NULL)) {
3020	break;
3021	}
3022	error = in6_mc_join(ifp, &llsol,
3023	NULL, &in6m, `0`);
3024	if (error) {
3025	nd6log(error, "%s: failed to join "
3026	"%s (errno=%d)\n", if_name(ifp),
3027	ip6_sprintf(&llsol), error);
3028	} else {
3029	IN6M_REMREF(in6m);
3030	}
3031	}
3032	}
3033	break;
3034
3035	case RTM_DELETE:
3036	if (ln == NULL) {
3037	break;
3038	}
3039	/ leave from solicited node multicast for proxy ND /
3040	if ((rt->rt_flags & RTF_ANNOUNCE) &&
3041	(ifp->if_flags & IFF_MULTICAST)) {
3042	struct in6_addr llsol;
3043	struct in6_multi *in6m;
3044
3045	llsol = SIN6(rt_key(rt))->sin6_addr;
3046	llsol.s6_addr32[`0`] = IPV6_ADDR_INT32_MLL;
3047	llsol.s6_addr32[`1`] = `0`;
3048	llsol.s6_addr32[`2`] = htonl(`1`);
3049	llsol.s6_addr8[`12`] = `0xff`;
3050	if (in6_setscope(&llsol, ifp, NULL) == `0`) {
3051	in6_multihead_lock_shared();
3052	IN6_LOOKUP_MULTI(&llsol, ifp, in6m);
3053	in6_multihead_lock_done();
3054	if (in6m != NULL) {
3055	in6_mc_leave(in6m, NULL);
3056	IN6M_REMREF(in6m);
3057	}
3058	}
3059	}
3060	nd6_inuse--;
3061	/*
3062	* Unchain it but defer the actual freeing until the route
3063	* itself is to be freed. rt->rt_llinfo still points to
3064	* llinfo_nd6, and likewise, ln->ln_rt stil points to this
3065	* route entry, except that RTF_LLINFO is now cleared.
3066	*/
3067	if (ln->ln_flags & ND6_LNF_IN_USE) {
3068	LN_DEQUEUE(ln);
3069	}
3070
3071	/*
3072	* Purge any link-layer info caching.
3073	*/
3074	if (rt->rt_llinfo_purge != NULL) {
3075	rt->rt_llinfo_purge(rt);
3076	}
3077
3078	rt->rt_flags &= ~RTF_LLINFO;
3079	if (ln->ln_hold != NULL) {
3080	m_freem_list(ln->ln_hold);
3081	ln->ln_hold = NULL;
3082	}
3083	}
3084	}
3085
3086	static int
3087	nd6_siocgdrlst(void data, int* data_is_64)
3088	{
3089	struct in6_drlist_32 *drl_32;
3090	struct nd_defrouter *dr;
3091	int i = `0`;
3092
3093	LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
3094
3095	dr = TAILQ_FIRST(&nd_defrouter_list);
3096
3097	/ XXX Handle mapped defrouter entries /
3098	/ For 64-bit process /
3099	if (data_is_64) {
3100	struct in6_drlist_64 *drl_64;
3101
3102	drl_64 = kalloc_type(struct in6_drlist_64,
3103	Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
3104
3105	/ preserve the interface name /
3106	bcopy(src: data, dst: drl_64, n: sizeof(drl_64->ifname));
3107
3108	while (dr && i < DRLSTSIZ) {
3109	drl_64->defrouter[i].rtaddr = dr->rtaddr;
3110	if (IN6_IS_ADDR_LINKLOCAL(
3111	&drl_64->defrouter[i].rtaddr)) {
3112	/ XXX: need to this hack for KAME stack /
3113	drl_64->defrouter[i].rtaddr.s6_addr16[`1`] = `0`;
3114	} else {
3115	log(LOG_ERR,
3116	"default router list contains a "
3117	"non-linklocal address(%s)\n",
3118	ip6_sprintf(&drl_64->defrouter[i].rtaddr));
3119	}
3120	drl_64->defrouter[i].flags = dr->flags;
3121	drl_64->defrouter[i].rtlifetime = (u_short)dr->rtlifetime;
3122	drl_64->defrouter[i].expire = (u_long)nddr_getexpire(dr);
3123	drl_64->defrouter[i].if_index = dr->ifp->if_index;
3124	i++;
3125	dr = TAILQ_NEXT(dr, dr_entry);
3126	}
3127	bcopy(src: drl_64, dst: data, n: sizeof(*drl_64));
3128	kfree_type(struct in6_drlist_64, drl_64);
3129	return `0`;
3130	}
3131
3132	/ For 32-bit process /
3133	drl_32 = kalloc_type(struct in6_drlist_32, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
3134
3135	/ preserve the interface name /
3136	bcopy(src: data, dst: drl_32, n: sizeof(drl_32->ifname));
3137
3138	while (dr != NULL && i < DRLSTSIZ) {
3139	drl_32->defrouter[i].rtaddr = dr->rtaddr;
3140	if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) {
3141	/ XXX: need to this hack for KAME stack /
3142	drl_32->defrouter[i].rtaddr.s6_addr16[`1`] = `0`;
3143	} else {
3144	log(LOG_ERR,
3145	"default router list contains a "
3146	"non-linklocal address(%s)\n",
3147	ip6_sprintf(&drl_32->defrouter[i].rtaddr));
3148	}
3149	drl_32->defrouter[i].flags = dr->flags;
3150	drl_32->defrouter[i].rtlifetime = (u_short)dr->rtlifetime;
3151	drl_32->defrouter[i].expire = (u_int32_t)nddr_getexpire(dr);
3152	drl_32->defrouter[i].if_index = dr->ifp->if_index;
3153	i++;
3154	dr = TAILQ_NEXT(dr, dr_entry);
3155	}
3156	bcopy(src: drl_32, dst: data, n: sizeof(*drl_32));
3157	kfree_type(struct in6_drlist_32, drl_32);
3158	return `0`;
3159	}
3160
3161	/*
3162	* XXX meaning of fields, especialy "raflags", is very
3163	* differnet between RA prefix list and RR/static prefix list.
3164	* how about separating ioctls into two?
3165	*/
3166	static int
3167	nd6_siocgprlst(void data, int* data_is_64)
3168	{
3169	struct in6_prlist_32 *prl_32;
3170	struct nd_prefix *pr;
3171	int i = `0`;
3172
3173	LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
3174
3175	pr = nd_prefix.lh_first;
3176
3177	/ XXX Handle mapped defrouter entries /
3178	/ For 64-bit process /
3179	if (data_is_64) {
3180	struct in6_prlist_64 *prl_64;
3181
3182	prl_64 = kalloc_type(struct in6_prlist_64,
3183	Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
3184
3185	/ preserve the interface name /
3186	bcopy(src: data, dst: prl_64, n: sizeof(prl_64->ifname));
3187
3188	while (pr && i < PRLSTSIZ) {
3189	struct nd_pfxrouter *pfr;
3190	int j;
3191	uint32_t ifscope;
3192
3193	NDPR_LOCK(pr);
3194	(void) in6_embedscope(&prl_64->prefix[i].prefix,
3195	&pr->ndpr_prefix, NULL, NULL, NULL, &ifscope);
3196	prl_64->prefix[i].prefix.s6_addr16[`1`] = htons((uint16_t)ifscope);
3197	prl_64->prefix[i].raflags = pr->ndpr_raf;
3198	prl_64->prefix[i].prefixlen = pr->ndpr_plen;
3199	prl_64->prefix[i].vltime = pr->ndpr_vltime;
3200	prl_64->prefix[i].pltime = pr->ndpr_pltime;
3201	prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index;
3202	prl_64->prefix[i].expire = (u_long)ndpr_getexpire(pr);
3203
3204	pfr = pr->ndpr_advrtrs.lh_first;
3205	j = `0`;
3206	while (pfr) {
3207	if (j < DRLSTSIZ) {
3208	#define RTRADDR prl_64->prefix[i].advrtr[j]
3209	RTRADDR = pfr->router->rtaddr;
3210	if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
3211	/ XXX: hack for KAME /
3212	RTRADDR.s6_addr16[`1`] = `0`;
3213	} else {
3214	log(LOG_ERR,
3215	"a router(%s) advertises "
3216	"a prefix with "
3217	"non-link local address\n",
3218	ip6_sprintf(&RTRADDR));
3219	}
3220	#undef RTRADDR
3221	}
3222	j++;
3223	pfr = pfr->pfr_next;
3224	}
3225	ASSERT(j <= USHRT_MAX);
3226	prl_64->prefix[i].advrtrs = (u_short)j;
3227	prl_64->prefix[i].origin = PR_ORIG_RA;
3228	NDPR_UNLOCK(pr);
3229
3230	i++;
3231	pr = pr->ndpr_next;
3232	}
3233	bcopy(src: prl_64, dst: data, n: sizeof(*prl_64));
3234	kfree_type(struct in6_prlist_64, prl_64);
3235	return `0`;
3236	}
3237
3238	/ For 32-bit process /
3239	prl_32 = kalloc_type(struct in6_prlist_32, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
3240
3241	/ preserve the interface name /
3242	bcopy(src: data, dst: prl_32, n: sizeof(prl_32->ifname));
3243
3244	while (pr && i < PRLSTSIZ) {
3245	struct nd_pfxrouter *pfr;
3246	int j;
3247	uint32_t ifscope;
3248
3249	NDPR_LOCK(pr);
3250	(void) in6_embedscope(&prl_32->prefix[i].prefix,
3251	&pr->ndpr_prefix, NULL, NULL, NULL, &ifscope);
3252	prl_32->prefix[i].prefix.s6_addr16[`1`] = htons((uint16_t)ifscope);
3253	prl_32->prefix[i].raflags = pr->ndpr_raf;
3254	prl_32->prefix[i].prefixlen = pr->ndpr_plen;
3255	prl_32->prefix[i].vltime = pr->ndpr_vltime;
3256	prl_32->prefix[i].pltime = pr->ndpr_pltime;
3257	prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index;
3258	prl_32->prefix[i].expire = (u_int32_t)ndpr_getexpire(pr);
3259
3260	pfr = pr->ndpr_advrtrs.lh_first;
3261	j = `0`;
3262	while (pfr) {
3263	if (j < DRLSTSIZ) {
3264	#define RTRADDR prl_32->prefix[i].advrtr[j]
3265	RTRADDR = pfr->router->rtaddr;
3266	if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
3267	/ XXX: hack for KAME /
3268	RTRADDR.s6_addr16[`1`] = `0`;
3269	} else {
3270	log(LOG_ERR,
3271	"a router(%s) advertises "
3272	"a prefix with "
3273	"non-link local address\n",
3274	ip6_sprintf(&RTRADDR));
3275	}
3276	#undef RTRADDR
3277	}
3278	j++;
3279	pfr = pfr->pfr_next;
3280	}
3281	ASSERT(j <= USHRT_MAX);
3282	prl_32->prefix[i].advrtrs = (u_short)j;
3283	prl_32->prefix[i].origin = PR_ORIG_RA;
3284	NDPR_UNLOCK(pr);
3285
3286	i++;
3287	pr = pr->ndpr_next;
3288	}
3289	bcopy(src: prl_32, dst: data, n: sizeof(*prl_32));
3290	kfree_type(struct in6_prlist_32, prl_32);
3291	return `0`;
3292	}
3293
3294	int
3295	nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
3296	{
3297	struct nd_defrouter *dr;
3298	struct nd_prefix *pr;
3299	struct rtentry *rt;
3300	int error = `0`;
3301
3302	VERIFY(ifp != NULL);
3303
3304	switch (cmd) {
3305	case SIOCGDRLST_IN6_32: / struct in6_drlist_32 /
3306	case SIOCGDRLST_IN6_64: / struct in6_drlist_64 /
3307	/*
3308	* obsolete API, use sysctl under net.inet6.icmp6
3309	*/
3310	lck_mtx_lock(nd6_mutex);
3311	error = nd6_siocgdrlst(data, data_is_64: cmd == SIOCGDRLST_IN6_64);
3312	lck_mtx_unlock(nd6_mutex);
3313	break;
3314
3315	case SIOCGPRLST_IN6_32: / struct in6_prlist_32 /
3316	case SIOCGPRLST_IN6_64: / struct in6_prlist_64 /
3317	/*
3318	* obsolete API, use sysctl under net.inet6.icmp6
3319	*/
3320	lck_mtx_lock(nd6_mutex);
3321	error = nd6_siocgprlst(data, data_is_64: cmd == SIOCGPRLST_IN6_64);
3322	lck_mtx_unlock(nd6_mutex);
3323	break;
3324
3325	case OSIOCGIFINFO_IN6: / struct in6_ondireq /
3326	case SIOCGIFINFO_IN6: { / struct in6_ondireq /
3327	u_int32_t linkmtu;
3328	struct in6_ondireq ondi = (struct* in6_ondireq )(void* *)data;
3329	struct nd_ifinfo *ndi;
3330	/*
3331	* SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq
3332	* instead of in6_ndireq, so we treat it as such.
3333	*/
3334	ndi = ND_IFINFO(ifp);
3335	if ((NULL == ndi) \|\| (FALSE == ndi->initialized)) {
3336	error = EINVAL;
3337	break;
3338	}
3339	lck_mtx_lock(lck: &ndi->lock);
3340	linkmtu = IN6_LINKMTU(ifp);
3341	bcopy(src: &linkmtu, dst: &ondi->ndi.linkmtu, n: sizeof(linkmtu));
3342	bcopy(src: &ndi->maxmtu, dst: &ondi->ndi.maxmtu,
3343	n: sizeof(u_int32_t));
3344	bcopy(src: &ndi->basereachable, dst: &ondi->ndi.basereachable,
3345	n: sizeof(u_int32_t));
3346	bcopy(src: &ndi->reachable, dst: &ondi->ndi.reachable,
3347	n: sizeof(u_int32_t));
3348	bcopy(src: &ndi->retrans, dst: &ondi->ndi.retrans,
3349	n: sizeof(u_int32_t));
3350	bcopy(src: &ndi->flags, dst: &ondi->ndi.flags,
3351	n: sizeof(u_int32_t));
3352	bcopy(src: &ndi->recalctm, dst: &ondi->ndi.recalctm,
3353	n: sizeof(int));
3354	ondi->ndi.chlim = ndi->chlim;
3355	/*
3356	* The below truncation is fine as we mostly use it for
3357	* debugging purpose.
3358	*/
3359	ondi->ndi.receivedra = (uint8_t)ndi->ndefrouters;
3360	ondi->ndi.collision_count = (uint8_t)ndi->cga_collision_count;
3361	lck_mtx_unlock(lck: &ndi->lock);
3362	break;
3363	}
3364
3365	case SIOCSIFINFO_FLAGS: { / struct in6_ndireq /
3366	/*
3367	* XXX BSD has a bunch of checks here to ensure
3368	* that interface disabled flag is not reset if
3369	* link local address has failed DAD.
3370	* Investigate that part.
3371	*/
3372	struct in6_ndireq cndi = (struct* in6_ndireq )(void* *)data;
3373	u_int32_t oflags, flags;
3374	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
3375
3376	/ XXX: almost all other fields of cndi->ndi is unused /
3377	if ((NULL == ndi) \|\| !ndi->initialized) {
3378	error = EINVAL;
3379	break;
3380	}
3381
3382	lck_mtx_lock(lck: &ndi->lock);
3383	oflags = ndi->flags;
3384	bcopy(src: &cndi->ndi.flags, dst: &(ndi->flags), n: sizeof(flags));
3385	flags = ndi->flags;
3386	lck_mtx_unlock(lck: &ndi->lock);
3387
3388	if (oflags == flags) {
3389	break;
3390	}
3391
3392	error = nd6_setifinfo(ifp, oflags, flags);
3393	break;
3394	}
3395
3396	case SIOCSNDFLUSH_IN6: / struct in6_ifreq /
3397	/ flush default router list /
3398	/*
3399	* xxx sumikawa: should not delete route if default
3400	* route equals to the top of default router list
3401	*
3402	* XXX TODO: Needs to be done for RTI as well
3403	* Is very specific flush command with ndp for default routers.
3404	*/
3405	lck_mtx_lock(nd6_mutex);
3406	defrouter_reset();
3407	defrouter_select(ifp, NULL);
3408	lck_mtx_unlock(nd6_mutex);
3409	/ xxx sumikawa: flush prefix list /
3410	break;
3411
3412	case SIOCSPFXFLUSH_IN6: { / struct in6_ifreq /
3413	/ flush all the prefix advertised by routers /
3414	struct nd_prefix *next = NULL;
3415
3416	lck_mtx_lock(nd6_mutex);
3417	for (pr = nd_prefix.lh_first; pr; pr = next) {
3418	struct in6_ifaddr *ia = NULL;
3419	bool iterate_pfxlist_again = false;
3420
3421	next = pr->ndpr_next;
3422
3423	NDPR_LOCK(pr);
3424	if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) {
3425	NDPR_UNLOCK(pr);
3426	continue; / XXX /
3427	}
3428	if (ifp != lo_ifp && pr->ndpr_ifp != ifp) {
3429	NDPR_UNLOCK(pr);
3430	continue;
3431	}
3432	/ do we really have to remove addresses as well? /
3433	NDPR_ADDREF(pr);
3434	NDPR_UNLOCK(pr);
3435	lck_rw_lock_exclusive(lck: &in6_ifaddr_rwlock);
3436	bool from_begining = true;
3437	while (from_begining) {
3438	from_begining = false;
3439	TAILQ_FOREACH(ia, &in6_ifaddrhead, ia6_link) {
3440	IFA_LOCK(&ia->ia_ifa);
3441	if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == `0`) {
3442	IFA_UNLOCK(&ia->ia_ifa);
3443	continue;
3444	}
3445
3446	if (ia->ia6_ndpr == pr) {
3447	ifa_addref(ifa: &ia->ia_ifa);
3448	IFA_UNLOCK(&ia->ia_ifa);
3449	lck_rw_done(lck: &in6_ifaddr_rwlock);
3450	lck_mtx_unlock(nd6_mutex);
3451	in6_purgeaddr(&ia->ia_ifa);
3452	ifa_remref(ifa: &ia->ia_ifa);
3453	lck_mtx_lock(nd6_mutex);
3454	lck_rw_lock_exclusive(
3455	lck: &in6_ifaddr_rwlock);
3456	/*
3457	* Purging the address caused
3458	* in6_ifaddr_rwlock to be
3459	* dropped and
3460	* reacquired; therefore search again
3461	* from the beginning of in6_ifaddrs.
3462	* The same applies for the prefix list.
3463	*/
3464	iterate_pfxlist_again = true;
3465	from_begining = true;
3466	break;
3467	}
3468	IFA_UNLOCK(&ia->ia_ifa);
3469	}
3470	}
3471	lck_rw_done(lck: &in6_ifaddr_rwlock);
3472	NDPR_LOCK(pr);
3473	prelist_remove(pr);
3474	NDPR_UNLOCK(pr);
3475	pfxlist_onlink_check();
3476	NDPR_REMREF(pr);
3477	if (iterate_pfxlist_again) {
3478	next = nd_prefix.lh_first;
3479	}
3480	}
3481	lck_mtx_unlock(nd6_mutex);
3482	break;
3483	}
3484
3485	case SIOCSRTRFLUSH_IN6: { / struct in6_ifreq /
3486	/ flush all the default routers /
3487	struct nd_defrouter *next;
3488	struct nd_drhead nd_defrouter_tmp;
3489
3490	TAILQ_INIT(&nd_defrouter_tmp);
3491	lck_mtx_lock(nd6_mutex);
3492	if ((dr = TAILQ_FIRST(&nd_defrouter_list)) != NULL) {
3493	/*
3494	* The first entry of the list may be stored in
3495	* the routing table, so we'll delete it later.
3496	*/
3497	for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
3498	next = TAILQ_NEXT(dr, dr_entry);
3499	if (ifp == lo_ifp \|\| dr->ifp == ifp) {
3500	/*
3501	* Remove the entry from default router list
3502	* and add it to the temp list.
3503	* nd_defrouter_tmp will be a local temporary
3504	* list as no one else can get the same
3505	* removed entry once it is removed from default
3506	* router list.
3507	* Remove the reference after calling defrtrlist_de
3508	*/
3509	TAILQ_REMOVE(&nd_defrouter_list, dr, dr_entry);
3510	TAILQ_INSERT_TAIL(&nd_defrouter_tmp, dr, dr_entry);
3511	}
3512	}
3513
3514	dr = TAILQ_FIRST(&nd_defrouter_list);
3515	if (ifp == lo_ifp \|\|
3516	dr->ifp == ifp) {
3517	TAILQ_REMOVE(&nd_defrouter_list, dr, dr_entry);
3518	TAILQ_INSERT_TAIL(&nd_defrouter_tmp, dr, dr_entry);
3519	}
3520	}
3521
3522	/*
3523	* Keep the following separate from the above iteration of
3524	* nd_defrouter because it's not safe to call
3525	* defrtrlist_del while iterating global default
3526	* router list. Global list has to be traversed
3527	* while holding nd6_mutex throughout.
3528	*
3529	* The following call to defrtrlist_del should be
3530	* safe as we are iterating a local list of
3531	* default routers.
3532	*/
3533	TAILQ_FOREACH_SAFE(dr, &nd_defrouter_tmp, dr_entry, next) {
3534	TAILQ_REMOVE(&nd_defrouter_tmp, dr, dr_entry);
3535	defrtrlist_del(dr, NULL);
3536	NDDR_REMREF(dr); / remove list reference /
3537	}
3538
3539	/ For now flush RTI routes here as well to avoid any regressions /
3540	nd6_purge_interface_rti_entries(ifp: (ifp == lo_ifp) ? NULL : ifp);
3541
3542	lck_mtx_unlock(nd6_mutex);
3543	break;
3544	}
3545
3546	case SIOCGNBRINFO_IN6_32: { / struct in6_nbrinfo_32 /
3547	struct llinfo_nd6 *ln;
3548	struct in6_nbrinfo_32 nbi_32;
3549	struct in6_addr nb_addr; / make local for safety /
3550
3551	bcopy(src: data, dst: &nbi_32, n: sizeof(nbi_32));
3552	nb_addr = nbi_32.addr;
3553	/*
3554	* XXX: KAME specific hack for scoped addresses
3555	* XXXX: for other scopes than link-local?
3556	*/
3557	if (in6_embedded_scope && (IN6_IS_ADDR_LINKLOCAL(&nbi_32.addr) \|\|
3558	IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32.addr))) {
3559	u_int16_t *idp =
3560	(u_int16_t )(void* *)&nb_addr.s6_addr[`2`];
3561
3562	if (*idp == `0`) {
3563	*idp = htons(ifp->if_index);
3564	}
3565	}
3566
3567	/ Callee returns a locked route upon success /
3568	if ((rt = nd6_lookup(addr6: &nb_addr, create: `0`, ifp, rt_locked: `0`)) == NULL) {
3569	error = EINVAL;
3570	break;
3571	}
3572	RT_LOCK_ASSERT_HELD(rt);
3573	ln = rt->rt_llinfo;
3574	nbi_32.state = ln->ln_state;
3575	nbi_32.asked = ln->ln_asked;
3576	nbi_32.isrouter = ln->ln_router;
3577	nbi_32.expire = (int)ln_getexpire(ln);
3578	RT_REMREF_LOCKED(rt);
3579	RT_UNLOCK(rt);
3580	bcopy(src: &nbi_32, dst: data, n: sizeof(nbi_32));
3581	break;
3582	}
3583
3584	case SIOCGNBRINFO_IN6_64: { / struct in6_nbrinfo_64 /
3585	struct llinfo_nd6 *ln;
3586	struct in6_nbrinfo_64 nbi_64;
3587	struct in6_addr nb_addr; / make local for safety /
3588
3589	bcopy(src: data, dst: &nbi_64, n: sizeof(nbi_64));
3590	nb_addr = nbi_64.addr;
3591	/*
3592	* XXX: KAME specific hack for scoped addresses
3593	* XXXX: for other scopes than link-local?
3594	*/
3595	if (in6_embedded_scope && (IN6_IS_ADDR_LINKLOCAL(&nbi_64.addr) \|\|
3596	IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64.addr))) {
3597	u_int16_t *idp =
3598	(u_int16_t )(void* *)&nb_addr.s6_addr[`2`];
3599
3600	if (*idp == `0`) {
3601	*idp = htons(ifp->if_index);
3602	}
3603	}
3604
3605	/ Callee returns a locked route upon success /
3606	if ((rt = nd6_lookup(addr6: &nb_addr, create: `0`, ifp, rt_locked: `0`)) == NULL) {
3607	error = EINVAL;
3608	break;
3609	}
3610	RT_LOCK_ASSERT_HELD(rt);
3611	ln = rt->rt_llinfo;
3612	nbi_64.state = ln->ln_state;
3613	nbi_64.asked = ln->ln_asked;
3614	nbi_64.isrouter = ln->ln_router;
3615	nbi_64.expire = (int)ln_getexpire(ln);
3616	RT_REMREF_LOCKED(rt);
3617	RT_UNLOCK(rt);
3618	bcopy(src: &nbi_64, dst: data, n: sizeof(nbi_64));
3619	break;
3620	}
3621
3622	case SIOCGDEFIFACE_IN6_32: / struct in6_ndifreq_32 /
3623	case SIOCGDEFIFACE_IN6_64: { / struct in6_ndifreq_64 /
3624	struct in6_ndifreq_64 *ndif_64 =
3625	(struct in6_ndifreq_64 )(void* *)data;
3626	struct in6_ndifreq_32 *ndif_32 =
3627	(struct in6_ndifreq_32 )(void* *)data;
3628
3629	if (cmd == SIOCGDEFIFACE_IN6_64) {
3630	u_int64_t j = nd6_defifindex;
3631	__nochk_bcopy(src: &j, dst: &ndif_64->ifindex, n: sizeof(j));
3632	} else {
3633	bcopy(src: &nd6_defifindex, dst: &ndif_32->ifindex,
3634	n: sizeof(u_int32_t));
3635	}
3636	break;
3637	}
3638
3639	case SIOCSDEFIFACE_IN6_32: / struct in6_ndifreq_32 /
3640	case SIOCSDEFIFACE_IN6_64: { / struct in6_ndifreq_64 /
3641	struct in6_ndifreq_64 *ndif_64 =
3642	(struct in6_ndifreq_64 )(void* *)data;
3643	struct in6_ndifreq_32 *ndif_32 =
3644	(struct in6_ndifreq_32 )(void* *)data;
3645	u_int32_t idx;
3646
3647	if (cmd == SIOCSDEFIFACE_IN6_64) {
3648	u_int64_t j;
3649	__nochk_bcopy(src: &ndif_64->ifindex, dst: &j, n: sizeof(j));
3650	idx = (u_int32_t)j;
3651	} else {
3652	bcopy(src: &ndif_32->ifindex, dst: &idx, n: sizeof(idx));
3653	}
3654
3655	error = nd6_setdefaultiface(idx);
3656	return error;
3657	/ NOTREACHED /
3658	}
3659	case SIOCGIFCGAPREP_IN6_32:
3660	case SIOCGIFCGAPREP_IN6_64: {
3661	/ get CGA parameters /
3662	union {
3663	struct in6_cgareq_32 *cga32;
3664	struct in6_cgareq_64 *cga64;
3665	void *data;
3666	} cgareq_u;
3667	struct nd_ifinfo *ndi;
3668	struct in6_cga_modifier *ndi_cga_mod;
3669	struct in6_cga_modifier *req_cga_mod;
3670
3671	ndi = ND_IFINFO(ifp);
3672	if ((NULL == ndi) \|\| !ndi->initialized) {
3673	error = EINVAL;
3674	break;
3675	}
3676	cgareq_u.data = data;
3677	req_cga_mod = (cmd == SIOCGIFCGAPREP_IN6_64)
3678	? &(cgareq_u.cga64->cgar_cgaprep.cga_modifier)
3679	: &(cgareq_u.cga32->cgar_cgaprep.cga_modifier);
3680	lck_mtx_lock(lck: &ndi->lock);
3681	ndi_cga_mod = &(ndi->local_cga_modifier);
3682	bcopy(src: ndi_cga_mod, dst: req_cga_mod, n: sizeof(*req_cga_mod));
3683	lck_mtx_unlock(lck: &ndi->lock);
3684	break;
3685	}
3686	case SIOCSIFCGAPREP_IN6_32:
3687	case SIOCSIFCGAPREP_IN6_64:
3688	{
3689	/ set CGA parameters /
3690	struct in6_cgareq cgareq;
3691	int is64;
3692	struct nd_ifinfo *ndi;
3693	struct in6_cga_modifier *ndi_cga_mod;
3694	struct in6_cga_modifier *req_cga_mod;
3695
3696	ndi = ND_IFINFO(ifp);
3697	if ((NULL == ndi) \|\| !ndi->initialized) {
3698	error = EINVAL;
3699	break;
3700	}
3701	is64 = (cmd == SIOCSIFCGAPREP_IN6_64);
3702	in6_cgareq_copy_from_user(data, is64, cgareq: &cgareq);
3703	req_cga_mod = &cgareq.cgar_cgaprep.cga_modifier;
3704	lck_mtx_lock(lck: &ndi->lock);
3705	ndi_cga_mod = &(ndi->local_cga_modifier);
3706	bcopy(src: req_cga_mod, dst: ndi_cga_mod, n: sizeof(*ndi_cga_mod));
3707	ndi->cga_initialized = TRUE;
3708	ndi->cga_collision_count = `0`;
3709	lck_mtx_unlock(lck: &ndi->lock);
3710	break;
3711	}
3712	default:
3713	break;
3714	}
3715	return error;
3716	}
3717
3718	/*
3719	* Create neighbor cache entry and cache link-layer address,
3720	* on reception of inbound ND6 packets. (RS/RA/NS/redirect)
3721	*/
3722	void
3723	nd6_cache_lladdr(struct ifnet ifp, struct* in6_addr from, char* *lladdr,
3724	int lladdrlen, int type, int code, int *did_update)
3725	{
3726	#pragma unused(lladdrlen)
3727	struct rtentry *rt = NULL;
3728	struct llinfo_nd6 *ln = NULL;
3729	int is_newentry;
3730	struct sockaddr_dl *sdl = NULL;
3731	int do_update;
3732	int olladdr;
3733	int llchange;
3734	short newstate = `0`;
3735	uint64_t timenow;
3736	boolean_t sched_timeout = FALSE;
3737	struct nd_ifinfo *ndi = NULL;
3738
3739	if (ifp == NULL) {
3740	panic("ifp == NULL in nd6_cache_lladdr");
3741	}
3742	if (from == NULL) {
3743	panic("from == NULL in nd6_cache_lladdr");
3744	}
3745
3746	if (did_update != NULL) {
3747	did_update = `0`;
3748	}
3749
3750	/ nothing must be updated for unspecified address /
3751	if (IN6_IS_ADDR_UNSPECIFIED(from)) {
3752	return;
3753	}
3754
3755	/*
3756	* Validation about ifp->if_addrlen and lladdrlen must be done in
3757	* the caller.
3758	*/
3759	timenow = net_uptime();
3760
3761	rt = nd6_lookup(addr6: from, create: `0`, ifp, rt_locked: `0`);
3762	if (rt == NULL) {
3763	if ((rt = nd6_lookup(addr6: from, create: `1`, ifp, rt_locked: `0`)) == NULL) {
3764	return;
3765	}
3766	RT_LOCK_ASSERT_HELD(rt);
3767	is_newentry = `1`;
3768	} else {
3769	RT_LOCK_ASSERT_HELD(rt);
3770	/ do nothing if static ndp is set /
3771	if (rt->rt_flags & RTF_STATIC) {
3772	RT_REMREF_LOCKED(rt);
3773	RT_UNLOCK(rt);
3774	return;
3775	}
3776	is_newentry = `0`;
3777	}
3778
3779	if ((rt->rt_flags & (RTF_GATEWAY \| RTF_LLINFO)) != RTF_LLINFO) {
3780	fail:
3781	RT_UNLOCK(rt);
3782	nd6_free(rt);
3783	rtfree(rt);
3784	return;
3785	}
3786	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
3787	if (ln == NULL) {
3788	goto fail;
3789	}
3790	if (rt->rt_gateway == NULL) {
3791	goto fail;
3792	}
3793	if (rt->rt_gateway->sa_family != AF_LINK) {
3794	goto fail;
3795	}
3796	sdl = SDL(rt->rt_gateway);
3797
3798	olladdr = (sdl->sdl_alen) ? `1` : `0`;
3799	if (olladdr && lladdr) {
3800	if (bcmp(s1: lladdr, LLADDR(sdl), n: ifp->if_addrlen)) {
3801	llchange = `1`;
3802	} else {
3803	llchange = `0`;
3804	}
3805	} else {
3806	llchange = `0`;
3807	}
3808
3809	/*
3810	* newentry olladdr lladdr llchange (*=record)
3811	* 0 n n -- (1)
3812	* 0 y n -- (2)
3813	* 0 n y -- (3) * STALE
3814	* 0 y y n (4) *
3815	* 0 y y y (5) * STALE
3816	* 1 -- n -- (6) NOSTATE(= PASSIVE)
3817	* 1 -- y -- (7) * STALE
3818	*/
3819
3820	if (lladdr != NULL) { / (3-5) and (7) /
3821	/*
3822	* Record source link-layer address
3823	* XXX is it dependent to ifp->if_type?
3824	*/
3825	sdl->sdl_alen = ifp->if_addrlen;
3826	bcopy(src: lladdr, LLADDR(sdl), n: ifp->if_addrlen);
3827
3828	/ cache the gateway (sender HW) address /
3829	nd6_llreach_alloc(rt, ifp, LLADDR(sdl), sdl->sdl_alen, FALSE);
3830	}
3831
3832	if (is_newentry == `0`) {
3833	if ((!olladdr && lladdr != NULL) \|\| / (3) /
3834	(olladdr && lladdr != NULL && llchange)) { / (5) /
3835	do_update = `1`;
3836	newstate = ND6_LLINFO_STALE;
3837	} else { / (1-2,4) /
3838	do_update = `0`;
3839	}
3840	} else {
3841	do_update = `1`;
3842	if (lladdr == NULL) { / (6) /
3843	newstate = ND6_LLINFO_NOSTATE;
3844	} else { / (7) /
3845	newstate = ND6_LLINFO_STALE;
3846	}
3847	}
3848
3849	/*
3850	* For interface's that do not perform NUD or NDP
3851	* neighbor cache entres must always be marked
3852	* reachable with no expiry
3853	*/
3854	ndi = ND_IFINFO(ifp);
3855	VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
3856
3857	if ((ndi && !(ndi->flags & ND6_IFF_PERFORMNUD)) \|\|
3858	(ifp->if_eflags & IFEF_IPV6_ND6ALT)) {
3859	newstate = ND6_LLINFO_REACHABLE;
3860	ln_setexpire(ln, expiry: `0`);
3861	}
3862
3863	if (do_update) {
3864	/*
3865	* Update the state of the neighbor cache.
3866	*/
3867	ND6_CACHE_STATE_TRANSITION(ln, newstate);
3868
3869	if ((ln->ln_state == ND6_LLINFO_STALE) \|\|
3870	(ln->ln_state == ND6_LLINFO_REACHABLE)) {
3871	struct mbuf *m = ln->ln_hold;
3872	/*
3873	* XXX: since nd6_output() below will cause
3874	* state tansition to DELAY and reset the timer,
3875	* we must set the timer now, although it is actually
3876	* meaningless.
3877	*/
3878	if (ln->ln_state == ND6_LLINFO_STALE) {
3879	ln_setexpire(ln, expiry: timenow + nd6_gctimer);
3880	}
3881
3882	ln->ln_hold = NULL;
3883	if (m != NULL) {
3884	struct sockaddr_in6 sin6;
3885
3886	rtkey_to_sa6(rt, &sin6);
3887	/*
3888	* we assume ifp is not a p2p here, so just
3889	* set the 2nd argument as the 1st one.
3890	*/
3891	RT_UNLOCK(rt);
3892	nd6_output_list(ifp, ifp, m, &sin6, rt, NULL);
3893	RT_LOCK(rt);
3894	}
3895	} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
3896	/ probe right away /
3897	ln_setexpire(ln, expiry: timenow);
3898	sched_timeout = TRUE;
3899	}
3900	}
3901
3902	/*
3903	* ICMP6 type dependent behavior.
3904	*
3905	* NS: clear IsRouter if new entry
3906	* RS: clear IsRouter
3907	* RA: set IsRouter if there's lladdr
3908	* redir: clear IsRouter if new entry
3909	*
3910	* RA case, (1):
3911	* The spec says that we must set IsRouter in the following cases:
3912	* - If lladdr exist, set IsRouter. This means (1-5).
3913	* - If it is old entry (!newentry), set IsRouter. This means (7).
3914	* So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
3915	* A quetion arises for (1) case. (1) case has no lladdr in the
3916	* neighbor cache, this is similar to (6).
3917	* This case is rare but we figured that we MUST NOT set IsRouter.
3918	*
3919	* newentry olladdr lladdr llchange NS RS RA redir
3920	* D R
3921	* 0 n n -- (1) c ? s
3922	* 0 y n -- (2) c s s
3923	* 0 n y -- (3) c s s
3924	* 0 y y n (4) c s s
3925	* 0 y y y (5) c s s
3926	* 1 -- n -- (6) c c c s
3927	* 1 -- y -- (7) c c s c s
3928	*
3929	* (c=clear s=set)
3930	*/
3931	switch (type & `0xff`) {
3932	case ND_NEIGHBOR_SOLICIT:
3933	/*
3934	* New entry must have is_router flag cleared.
3935	*/
3936	if (is_newentry) { / (6-7) /
3937	ln->ln_router = `0`;
3938	}
3939	break;
3940	case ND_REDIRECT:
3941	/*
3942	* If the ICMP message is a Redirect to a better router, always
3943	* set the is_router flag. Otherwise, if the entry is newly
3944	* created, then clear the flag. [RFC 4861, sec 8.3]
3945	*/
3946	if (code == ND_REDIRECT_ROUTER) {
3947	ln->ln_router = `1`;
3948	} else if (is_newentry) { / (6-7) /
3949	ln->ln_router = `0`;
3950	}
3951	break;
3952	case ND_ROUTER_SOLICIT:
3953	/*
3954	* is_router flag must always be cleared.
3955	*/
3956	ln->ln_router = `0`;
3957	break;
3958	case ND_ROUTER_ADVERT:
3959	/*
3960	* Mark an entry with lladdr as a router.
3961	*/
3962	if ((!is_newentry && (olladdr \|\| lladdr)) \|\| / (2-5) /
3963	(is_newentry && lladdr)) { / (7) /
3964	ln->ln_router = `1`;
3965	}
3966	break;
3967	}
3968
3969	if (do_update) {
3970	int route_ev_code = `0`;
3971
3972	if (llchange) {
3973	route_ev_code = ROUTE_LLENTRY_CHANGED;
3974	} else {
3975	route_ev_code = ROUTE_LLENTRY_RESOLVED;
3976	}
3977
3978	/ Enqueue work item to invoke callback for this route entry /
3979	route_event_enqueue_nwk_wq_entry(rt, NULL, route_ev_code, NULL, TRUE);
3980
3981	if (ln->ln_router \|\| (rt->rt_flags & RTF_ROUTER)) {
3982	struct radix_node_head *rnh = NULL;
3983	struct in6_addr rt_addr = SIN6(rt_key(rt))->sin6_addr;
3984	struct ifnet *rt_ifp = rt->rt_ifp;
3985	struct route_event rt_ev;
3986	route_event_init(p_route_ev: &rt_ev, rt, NULL, route_ev_code: llchange ? ROUTE_LLENTRY_CHANGED :
3987	ROUTE_LLENTRY_RESOLVED);
3988	/*
3989	* We already have a valid reference on rt.
3990	* The function frees that before returning.
3991	* We therefore don't need an extra reference here
3992	*/
3993	RT_UNLOCK(rt);
3994	defrouter_set_reachability(&rt_addr, rt_ifp, TRUE);
3995	lck_mtx_lock(rnh_lock);
3996
3997	rnh = rt_tables[AF_INET6];
3998	if (rnh != NULL) {
3999	(void) rnh->rnh_walktree(rnh, route_event_walktree,
4000	(void *)&rt_ev);
4001	}
4002	lck_mtx_unlock(rnh_lock);
4003	RT_LOCK(rt);
4004	}
4005	}
4006
4007	if (did_update != NULL) {
4008	*did_update = do_update;
4009	}
4010
4011	/*
4012	* When the link-layer address of a router changes, select the
4013	* best router again. In particular, when the neighbor entry is newly
4014	* created, it might affect the selection policy.
4015	* Question: can we restrict the first condition to the "is_newentry"
4016	* case?
4017	*
4018	* Note: Perform default router selection even when we are a router,
4019	* if Scoped Routing is enabled.
4020	*/
4021	if (do_update && ln->ln_router) {
4022	/*
4023	* XXX TODO: This should also be iterated over router list
4024	* for route information option's router lists as well.
4025	*/
4026	RT_REMREF_LOCKED(rt);
4027	RT_UNLOCK(rt);
4028	lck_mtx_lock(nd6_mutex);
4029	defrouter_select(ifp, NULL);
4030	nd6_router_select_rti_entries(ifp);
4031	lck_mtx_unlock(nd6_mutex);
4032	} else {
4033	RT_REMREF_LOCKED(rt);
4034	RT_UNLOCK(rt);
4035	}
4036	if (sched_timeout) {
4037	lck_mtx_lock(rnh_lock);
4038	nd6_sched_timeout(NULL, NULL);
4039	lck_mtx_unlock(rnh_lock);
4040	}
4041	}
4042
4043	static void
4044	nd6_slowtimo(void *arg)
4045	{
4046	#pragma unused(arg)
4047	struct nd_ifinfo *nd6if = NULL;
4048	struct ifnet *ifp = NULL;
4049
4050	ifnet_head_lock_shared();
4051	for (ifp = ifnet_head.tqh_first; ifp;
4052	ifp = ifp->if_link.tqe_next) {
4053	nd6if = ND_IFINFO(ifp);
4054	if ((NULL == nd6if) \|\| (FALSE == nd6if->initialized)) {
4055	continue;
4056	}
4057
4058	lck_mtx_lock(lck: &nd6if->lock);
4059	if (nd6if->basereachable && / already initialized /
4060	(nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= `0`) {
4061	/*
4062	* Since reachable time rarely changes by router
4063	* advertisements, we SHOULD insure that a new random
4064	* value gets recomputed at least once every few hours.
4065	* (RFC 4861, 6.3.4)
4066	*/
4067	nd6if->recalctm = nd6_recalc_reachtm_interval;
4068	nd6if->reachable =
4069	ND_COMPUTE_RTIME(nd6if->basereachable);
4070	}
4071	lck_mtx_unlock(lck: &nd6if->lock);
4072	}
4073	ifnet_head_done();
4074	timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
4075	}
4076
4077	int
4078	nd6_output(struct ifnet ifp, struct* ifnet origifp, struct* mbuf *m0,
4079	struct sockaddr_in6 dst, struct* rtentry hint0, struct* flowadv *adv)
4080	{
4081	return nd6_output_list(ifp, origifp, m0, dst, hint0, adv);
4082	}
4083
4084	/*
4085	* nd6_output_list()
4086	*
4087	* Assumption: route determination for first packet can be correctly applied to
4088	* all packets in the chain.
4089	*/
4090	#define senderr(e) { error = (e); goto bad; }
4091	int
4092	nd6_output_list(struct ifnet ifp, struct* ifnet origifp, struct* mbuf *m0,
4093	struct sockaddr_in6 dst, struct* rtentry hint0, struct* flowadv *adv)
4094	{
4095	struct rtentry rt = hint0, hint = hint0;
4096	struct llinfo_nd6 *ln = NULL;
4097	int error = `0`;
4098	uint64_t timenow;
4099	struct rtentry *rtrele = NULL;
4100	struct nd_ifinfo *ndi = NULL;
4101
4102	if (rt != NULL) {
4103	RT_LOCK_SPIN(rt);
4104	RT_ADDREF_LOCKED(rt);
4105	}
4106
4107	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr) \|\| !nd6_need_cache(ifp)) {
4108	if (rt != NULL) {
4109	RT_UNLOCK(rt);
4110	}
4111	goto sendpkt;
4112	}
4113
4114	/*
4115	* Next hop determination. Because we may involve the gateway route
4116	* in addition to the original route, locking is rather complicated.
4117	* The general concept is that regardless of whether the route points
4118	* to the original route or to the gateway route, this routine takes
4119	* an extra reference on such a route. This extra reference will be
4120	* released at the end.
4121	*
4122	* Care must be taken to ensure that the "hint0" route never gets freed
4123	* via rtfree(), since the caller may have stored it inside a struct
4124	* route with a reference held for that placeholder.
4125	*
4126	* This logic is similar to, though not exactly the same as the one
4127	* used by route_to_gwroute().
4128	*/
4129	if (rt != NULL) {
4130	/*
4131	* We have a reference to "rt" by now (or below via rtalloc1),
4132	* which will either be released or freed at the end of this
4133	* routine.
4134	*/
4135	RT_LOCK_ASSERT_HELD(rt);
4136	if (!(rt->rt_flags & RTF_UP)) {
4137	RT_REMREF_LOCKED(rt);
4138	RT_UNLOCK(rt);
4139	if ((hint = rt = rtalloc1_scoped(SA(dst), `1`, `0`,
4140	ifp->if_index)) != NULL) {
4141	RT_LOCK_SPIN(rt);
4142	if (rt->rt_ifp != ifp) {
4143	/ XXX: loop care? /
4144	RT_UNLOCK(rt);
4145	error = nd6_output_list(ifp, origifp, m0,
4146	dst, hint0: rt, adv);
4147	rtfree(rt);
4148	return error;
4149	}
4150	} else {
4151	senderr(EHOSTUNREACH);
4152	}
4153	}
4154
4155	if (rt->rt_flags & RTF_GATEWAY) {
4156	struct rtentry *gwrt;
4157	struct in6_ifaddr *ia6 = NULL;
4158	struct sockaddr_in6 gw6;
4159
4160	rtgw_to_sa6(rt, &gw6);
4161	/*
4162	* Must drop rt_lock since nd6_is_addr_neighbor()
4163	* calls nd6_lookup() and acquires rnh_lock.
4164	*/
4165	RT_UNLOCK(rt);
4166
4167	/*
4168	* We skip link-layer address resolution and NUD
4169	* if the gateway is not a neighbor from ND point
4170	* of view, regardless of the value of nd_ifinfo.flags.
4171	* The second condition is a bit tricky; we skip
4172	* if the gateway is our own address, which is
4173	* sometimes used to install a route to a p2p link.
4174	*/
4175	if (!nd6_is_addr_neighbor(addr: &gw6, ifp, rt_locked: `0`) \|\|
4176	(ia6 = in6ifa_ifpwithaddr(ifp, &gw6.sin6_addr))) {
4177	/*
4178	* We allow this kind of tricky route only
4179	* when the outgoing interface is p2p.
4180	* XXX: we may need a more generic rule here.
4181	*/
4182	if (ia6 != NULL) {
4183	ifa_remref(ifa: &ia6->ia_ifa);
4184	}
4185	if ((ifp->if_flags & IFF_POINTOPOINT) == `0`) {
4186	senderr(EHOSTUNREACH);
4187	}
4188	goto sendpkt;
4189	}
4190
4191	RT_LOCK_SPIN(rt);
4192	gw6 = *(SIN6(rt->rt_gateway));
4193
4194	/ If hint is now down, give up /
4195	if (!(rt->rt_flags & RTF_UP)) {
4196	RT_UNLOCK(rt);
4197	senderr(EHOSTUNREACH);
4198	}
4199
4200	/ If there's no gateway route, look it up /
4201	if ((gwrt = rt->rt_gwroute) == NULL) {
4202	RT_UNLOCK(rt);
4203	goto lookup;
4204	}
4205	/ Become a regular mutex /
4206	RT_CONVERT_LOCK(rt);
4207
4208	/*
4209	* Take gwrt's lock while holding route's lock;
4210	* this is okay since gwrt never points back
4211	* to rt, so no lock ordering issues.
4212	*/
4213	RT_LOCK_SPIN(gwrt);
4214	if (!(gwrt->rt_flags & RTF_UP)) {
4215	rt->rt_gwroute = NULL;
4216	RT_UNLOCK(gwrt);
4217	RT_UNLOCK(rt);
4218	rtfree(gwrt);
4219	lookup:
4220	lck_mtx_lock(rnh_lock);
4221	gwrt = rtalloc1_scoped_locked(SA(&gw6), `1`, `0`,
4222	ifp->if_index);
4223
4224	RT_LOCK(rt);
4225	/*
4226	* Bail out if the route is down, no route
4227	* to gateway, circular route, or if the
4228	* gateway portion of "rt" has changed.
4229	*/
4230	if (!(rt->rt_flags & RTF_UP) \|\|
4231	gwrt == NULL \|\| gwrt == rt \|\|
4232	!equal(SA(&gw6), rt->rt_gateway)) {
4233	if (gwrt == rt) {
4234	RT_REMREF_LOCKED(gwrt);
4235	gwrt = NULL;
4236	}
4237	RT_UNLOCK(rt);
4238	if (gwrt != NULL) {
4239	rtfree_locked(gwrt);
4240	}
4241	lck_mtx_unlock(rnh_lock);
4242	senderr(EHOSTUNREACH);
4243	}
4244	VERIFY(gwrt != NULL);
4245	/*
4246	* Set gateway route; callee adds ref to gwrt;
4247	* gwrt has an extra ref from rtalloc1() for
4248	* this routine.
4249	*/
4250	rt_set_gwroute(rt, rt_key(rt), gwrt);
4251	RT_UNLOCK(rt);
4252	lck_mtx_unlock(rnh_lock);
4253	/ Remember to release/free "rt" at the end /
4254	rtrele = rt;
4255	rt = gwrt;
4256	} else {
4257	RT_ADDREF_LOCKED(gwrt);
4258	RT_UNLOCK(gwrt);
4259	RT_UNLOCK(rt);
4260	/ Remember to release/free "rt" at the end /
4261	rtrele = rt;
4262	rt = gwrt;
4263	}
4264	VERIFY(rt == gwrt);
4265
4266	/*
4267	* This is an opportunity to revalidate the parent
4268	* route's gwroute, in case it now points to a dead
4269	* route entry. Parent route won't go away since the
4270	* clone (hint) holds a reference to it. rt == gwrt.
4271	*/
4272	RT_LOCK_SPIN(hint);
4273	if ((hint->rt_flags & (RTF_WASCLONED \| RTF_UP)) ==
4274	(RTF_WASCLONED \| RTF_UP)) {
4275	struct rtentry *prt = hint->rt_parent;
4276	VERIFY(prt != NULL);
4277
4278	RT_CONVERT_LOCK(hint);
4279	RT_ADDREF(prt);
4280	RT_UNLOCK(hint);
4281	rt_revalidate_gwroute(prt, rt);
4282	RT_REMREF(prt);
4283	} else {
4284	RT_UNLOCK(hint);
4285	}
4286
4287	RT_LOCK_SPIN(rt);
4288	/ rt == gwrt; if it is now down, give up /
4289	if (!(rt->rt_flags & RTF_UP)) {
4290	RT_UNLOCK(rt);
4291	rtfree(rt);
4292	rt = NULL;
4293	/ "rtrele" == original "rt" /
4294	senderr(EHOSTUNREACH);
4295	}
4296	}
4297
4298	/ Become a regular mutex /
4299	RT_CONVERT_LOCK(rt);
4300	}
4301
4302	/*
4303	* Address resolution or Neighbor Unreachability Detection
4304	* for the next hop.
4305	* At this point, the destination of the packet must be a unicast
4306	* or an anycast address(i.e. not a multicast).
4307	*/
4308
4309	/ Look up the neighbor cache for the nexthop /
4310	if (rt && (rt->rt_flags & RTF_LLINFO) != `0`) {
4311	ln = rt->rt_llinfo;
4312	} else {
4313	struct sockaddr_in6 sin6;
4314	/*
4315	* Clear out Scope ID field in case it is set.
4316	*/
4317	sin6 = *dst;
4318	if (in6_embedded_scope) {
4319	sin6.sin6_scope_id = `0`;
4320	}
4321	/*
4322	* Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
4323	* the condition below is not very efficient. But we believe
4324	* it is tolerable, because this should be a rare case.
4325	* Must drop rt_lock since nd6_is_addr_neighbor() calls
4326	* nd6_lookup() and acquires rnh_lock.
4327	*/
4328	if (rt != NULL) {
4329	RT_UNLOCK(rt);
4330	}
4331	if (nd6_is_addr_neighbor(addr: &sin6, ifp, rt_locked: `0`)) {
4332	/ "rtrele" may have been used, so clean up "rt" now /
4333	if (rt != NULL) {
4334	/ Don't free "hint0" /
4335	if (rt == hint0) {
4336	RT_REMREF(rt);
4337	} else {
4338	rtfree(rt);
4339	}
4340	}
4341	/ Callee returns a locked route upon success /
4342	rt = nd6_lookup(addr6: &dst->sin6_addr, create: `1`, ifp, rt_locked: `0`);
4343	if (rt != NULL) {
4344	RT_LOCK_ASSERT_HELD(rt);
4345	ln = rt->rt_llinfo;
4346	}
4347	} else if (rt != NULL) {
4348	RT_LOCK(rt);
4349	}
4350	}
4351
4352	if (!ln \|\| !rt) {
4353	if (rt != NULL) {
4354	RT_UNLOCK(rt);
4355	}
4356	ndi = ND_IFINFO(ifp);
4357	VERIFY(ndi != NULL && ndi->initialized);
4358	lck_mtx_lock(lck: &ndi->lock);
4359	if ((ifp->if_flags & IFF_POINTOPOINT) == `0` &&
4360	!(ndi->flags & ND6_IFF_PERFORMNUD)) {
4361	lck_mtx_unlock(lck: &ndi->lock);
4362	log(LOG_DEBUG,
4363	"nd6_output: can't allocate llinfo for %s "
4364	"(ln=0x%llx, rt=0x%llx)\n",
4365	ip6_sprintf(&dst->sin6_addr),
4366	(uint64_t)VM_KERNEL_ADDRPERM(ln),
4367	(uint64_t)VM_KERNEL_ADDRPERM(rt));
4368	senderr(EIO); / XXX: good error? /
4369	}
4370	lck_mtx_unlock(lck: &ndi->lock);
4371
4372	goto sendpkt; / send anyway /
4373	}
4374
4375	net_update_uptime();
4376	timenow = net_uptime();
4377
4378	/ We don't have to do link-layer address resolution on a p2p link. /
4379	if ((ifp->if_flags & IFF_POINTOPOINT) != `0` &&
4380	ln->ln_state < ND6_LLINFO_REACHABLE) {
4381	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_STALE);
4382	ln_setexpire(ln, expiry: timenow + nd6_gctimer);
4383	}
4384
4385	/*
4386	* The first time we send a packet to a neighbor whose entry is
4387	* STALE, we have to change the state to DELAY and a sets a timer to
4388	* expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
4389	* neighbor unreachability detection on expiration.
4390	* (RFC 4861 7.3.3)
4391	*/
4392	if (ln->ln_state == ND6_LLINFO_STALE) {
4393	ln->ln_asked = `0`;
4394	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_DELAY);
4395	ln_setexpire(ln, expiry: timenow + nd6_delay);
4396	/ N.B.: we will re-arm the timer below. /
4397	_CASSERT(ND6_LLINFO_DELAY > ND6_LLINFO_INCOMPLETE);
4398	}
4399
4400	/*
4401	* If the neighbor cache entry has a state other than INCOMPLETE
4402	* (i.e. its link-layer address is already resolved), just
4403	* send the packet.
4404	*/
4405	if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
4406	RT_UNLOCK(rt);
4407	/*
4408	* Move this entry to the head of the queue so that it is
4409	* less likely for this entry to be a target of forced
4410	* garbage collection (see nd6_rtrequest()). Do this only
4411	* if the entry is non-permanent (as permanent ones will
4412	* never be purged), and if the number of active entries
4413	* is at least half of the threshold.
4414	*/
4415	if (ln->ln_state == ND6_LLINFO_DELAY \|\|
4416	(ln->ln_expire != `0` && ip6_neighborgcthresh > `0` &&
4417	nd6_inuse >= (ip6_neighborgcthresh >> `1`))) {
4418	lck_mtx_lock(rnh_lock);
4419	if (ln->ln_state == ND6_LLINFO_DELAY) {
4420	nd6_sched_timeout(NULL, NULL);
4421	}
4422	if (ln->ln_expire != `0` && ip6_neighborgcthresh > `0` &&
4423	nd6_inuse >= (ip6_neighborgcthresh >> `1`)) {
4424	RT_LOCK_SPIN(rt);
4425	if (ln->ln_flags & ND6_LNF_IN_USE) {
4426	LN_DEQUEUE(ln);
4427	LN_INSERTHEAD(ln);
4428	}
4429	RT_UNLOCK(rt);
4430	}
4431	lck_mtx_unlock(rnh_lock);
4432	}
4433	goto sendpkt;
4434	}
4435
4436	/*
4437	* If this is a prefix proxy route, record the inbound interface
4438	* so that it can be excluded from the list of interfaces eligible
4439	* for forwarding the proxied NS in nd6_prproxy_ns_output().
4440	*/
4441	if (rt->rt_flags & RTF_PROXY) {
4442	ln->ln_exclifp = ((origifp == ifp) ? NULL : origifp);
4443	}
4444
4445	/*
4446	* There is a neighbor cache entry, but no ethernet address
4447	* response yet. Replace the held mbuf (if any) with this
4448	* latest one.
4449	*
4450	* This code conforms to the rate-limiting rule described in Section
4451	* 7.2.2 of RFC 4861, because the timer is set correctly after sending
4452	* an NS below.
4453	*/
4454	if (ln->ln_state == ND6_LLINFO_NOSTATE) {
4455	ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_INCOMPLETE);
4456	}
4457	if (ln->ln_hold) {
4458	m_freem_list(ln->ln_hold);
4459	}
4460	ln->ln_hold = m0;
4461	if (!ND6_LLINFO_PERMANENT(ln) && ln->ln_asked == `0`) {
4462	ln->ln_asked++;
4463	ndi = ND_IFINFO(ifp);
4464	VERIFY(ndi != NULL && ndi->initialized);
4465	lck_mtx_lock(lck: &ndi->lock);
4466	ln_setexpire(ln, expiry: timenow + ndi->retrans / `1000`);
4467	lck_mtx_unlock(lck: &ndi->lock);
4468	RT_UNLOCK(rt);
4469	/ We still have a reference on rt (for ln) /
4470	if (ip6_forwarding) {
4471	nd6_prproxy_ns_output(ifp, origifp, NULL,
4472	&dst->sin6_addr, ln);
4473	} else {
4474	nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, NULL);
4475	}
4476	lck_mtx_lock(rnh_lock);
4477	nd6_sched_timeout(NULL, NULL);
4478	lck_mtx_unlock(rnh_lock);
4479	} else {
4480	RT_UNLOCK(rt);
4481	}
4482	/*
4483	* Move this entry to the head of the queue so that it is
4484	* less likely for this entry to be a target of forced
4485	* garbage collection (see nd6_rtrequest()). Do this only
4486	* if the entry is non-permanent (as permanent ones will
4487	* never be purged), and if the number of active entries
4488	* is at least half of the threshold.
4489	*/
4490	if (ln->ln_expire != `0` && ip6_neighborgcthresh > `0` &&
4491	nd6_inuse >= (ip6_neighborgcthresh >> `1`)) {
4492	lck_mtx_lock(rnh_lock);
4493	RT_LOCK_SPIN(rt);
4494	if (ln->ln_flags & ND6_LNF_IN_USE) {
4495	LN_DEQUEUE(ln);
4496	LN_INSERTHEAD(ln);
4497	}
4498	/ Clean up "rt" now while we can /
4499	if (rt == hint0) {
4500	RT_REMREF_LOCKED(rt);
4501	RT_UNLOCK(rt);
4502	} else {
4503	RT_UNLOCK(rt);
4504	rtfree_locked(rt);
4505	}
4506	rt = NULL; / "rt" has been taken care of /
4507	lck_mtx_unlock(rnh_lock);
4508	}
4509	error = `0`;
4510	goto release;
4511
4512	sendpkt:
4513	if (rt != NULL) {
4514	RT_LOCK_ASSERT_NOTHELD(rt);
4515	}
4516
4517	/ discard the packet if IPv6 operation is disabled on the interface /
4518	if (ifp->if_eflags & IFEF_IPV6_DISABLED) {
4519	error = ENETDOWN; / better error? /
4520	goto bad;
4521	}
4522
4523	if (ifp->if_flags & IFF_LOOPBACK) {
4524	/ forwarding rules require the original scope_id /
4525	m0->m_pkthdr.rcvif = origifp;
4526	error = dlil_output(origifp, PF_INET6, m0, (caddr_t)rt,
4527	SA(dst), `0`, adv);
4528	goto release;
4529	} else {
4530	/ Do not allow loopback address to wind up on a wire /
4531	struct ip6_hdr ip6 = mtod(m0, struct* ip6_hdr *);
4532
4533	if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) \|\|
4534	IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) {
4535	ip6stat.ip6s_badscope++;
4536	error = EADDRNOTAVAIL;
4537	goto bad;
4538	}
4539	}
4540
4541	if (rt != NULL) {
4542	RT_LOCK_SPIN(rt);
4543	/ Mark use timestamp /
4544	if (rt->rt_llinfo != NULL) {
4545	nd6_llreach_use(rt->rt_llinfo);
4546	}
4547	RT_UNLOCK(rt);
4548	}
4549
4550	struct mbuf *mcur = m0;
4551	uint32_t pktcnt = `0`;
4552
4553	while (mcur) {
4554	if (hint != NULL && nstat_collect) {
4555	int scnt;
4556
4557	if ((mcur->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
4558	(mcur->m_pkthdr.tso_segsz > `0`)) {
4559	scnt = mcur->m_pkthdr.len / mcur->m_pkthdr.tso_segsz;
4560	} else {
4561	scnt = `1`;
4562	}
4563
4564	nstat_route_tx(rte: hint, packets: scnt, bytes: mcur->m_pkthdr.len, flags: `0`);
4565	}
4566	pktcnt++;
4567
4568	mcur->m_pkthdr.rcvif = NULL;
4569	mcur = mcur->m_nextpkt;
4570	}
4571	if (pktcnt > ip6_maxchainsent) {
4572	ip6_maxchainsent = pktcnt;
4573	}
4574	error = dlil_output(ifp, PF_INET6, m0, (caddr_t)rt, SA(dst), `0`, adv);
4575	goto release;
4576
4577	bad:
4578	if (m0 != NULL) {
4579	m_freem_list(m0);
4580	}
4581
4582	release:
4583	/ Clean up "rt" unless it's already been done /
4584	if (rt != NULL) {
4585	RT_LOCK_SPIN(rt);
4586	if (rt == hint0) {
4587	RT_REMREF_LOCKED(rt);
4588	RT_UNLOCK(rt);
4589	} else {
4590	RT_UNLOCK(rt);
4591	rtfree(rt);
4592	}
4593	}
4594	/ And now clean up "rtrele" if there is any /
4595	if (rtrele != NULL) {
4596	RT_LOCK_SPIN(rtrele);
4597	if (rtrele == hint0) {
4598	RT_REMREF_LOCKED(rtrele);
4599	RT_UNLOCK(rtrele);
4600	} else {
4601	RT_UNLOCK(rtrele);
4602	rtfree(rtrele);
4603	}
4604	}
4605	return error;
4606	}
4607	#undef senderr
4608
4609	int
4610	nd6_need_cache(struct ifnet *ifp)
4611	{
4612	/*
4613	* XXX: we currently do not make neighbor cache on any interface
4614	* other than ARCnet, Ethernet, FDDI and GIF.
4615	*
4616	* RFC2893 says:
4617	* - unidirectional tunnels needs no ND
4618	*/
4619	switch (ifp->if_type) {
4620	case IFT_ARCNET:
4621	case IFT_ETHER:
4622	case IFT_FDDI:
4623	case IFT_IEEE1394:
4624	case IFT_L2VLAN:
4625	case IFT_IEEE8023ADLAG:
4626	#if IFT_IEEE80211
4627	case IFT_IEEE80211:
4628	#endif
4629	case IFT_GIF: / XXX need more cases? /
4630	case IFT_PPP:
4631	#if IFT_TUNNEL
4632	case IFT_TUNNEL:
4633	#endif
4634	case IFT_BRIDGE:
4635	case IFT_CELLULAR:
4636	return `1`;
4637	default:
4638	return `0`;
4639	}
4640	}
4641
4642	int
4643	nd6_storelladdr(struct ifnet ifp, struct* rtentry rt, struct* mbuf *m,
4644	struct sockaddr dst, u_char desten)
4645	{
4646	int i;
4647	struct sockaddr_dl *sdl;
4648
4649	if (m->m_flags & M_MCAST) {
4650	switch (ifp->if_type) {
4651	case IFT_ETHER:
4652	case IFT_FDDI:
4653	case IFT_L2VLAN:
4654	case IFT_IEEE8023ADLAG:
4655	#if IFT_IEEE80211
4656	case IFT_IEEE80211:
4657	#endif
4658	case IFT_BRIDGE:
4659	ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, desten);
4660	return `1`;
4661	case IFT_IEEE1394:
4662	for (i = `0`; i < ifp->if_addrlen; i++) {
4663	desten[i] = ~`0`;
4664	}
4665	return `1`;
4666	case IFT_ARCNET:
4667	*desten = `0`;
4668	return `1`;
4669	default:
4670	return `0`; / caller will free mbuf /
4671	}
4672	}
4673
4674	if (rt == NULL) {
4675	/ this could happen, if we could not allocate memory /
4676	return `0`; / caller will free mbuf /
4677	}
4678	RT_LOCK(rt);
4679	if (rt->rt_gateway->sa_family != AF_LINK) {
4680	printf("nd6_storelladdr: something odd happens\n");
4681	RT_UNLOCK(rt);
4682	return `0`; / caller will free mbuf /
4683	}
4684	sdl = SDL(rt->rt_gateway);
4685	if (sdl->sdl_alen == `0`) {
4686	/ this should be impossible, but we bark here for debugging /
4687	printf("nd6_storelladdr: sdl_alen == 0\n");
4688	RT_UNLOCK(rt);
4689	return `0`; / caller will free mbuf /
4690	}
4691
4692	bcopy(LLADDR(sdl), dst: desten, n: sdl->sdl_alen);
4693	RT_UNLOCK(rt);
4694	return `1`;
4695	}
4696
4697	/*
4698	* This is the ND pre-output routine; care must be taken to ensure that
4699	* the "hint" route never gets freed via rtfree(), since the caller may
4700	* have stored it inside a struct route with a reference held for that
4701	* placeholder.
4702	*/
4703	errno_t
4704	nd6_lookup_ipv6(ifnet_t ifp, const struct sockaddr_in6 *ip6_dest,
4705	struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
4706	mbuf_t packet)
4707	{
4708	route_t route __single = hint;
4709	errno_t result = `0`;
4710	struct sockaddr_dl *sdl = NULL;
4711	size_t copy_len;
4712
4713	if (ifp == NULL \|\| ip6_dest == NULL) {
4714	return EINVAL;
4715	}
4716
4717	if (ip6_dest->sin6_family != AF_INET6) {
4718	return EAFNOSUPPORT;
4719	}
4720
4721	if ((ifp->if_flags & (IFF_UP \| IFF_RUNNING)) != (IFF_UP \| IFF_RUNNING)) {
4722	return ENETDOWN;
4723	}
4724
4725	if (hint != NULL) {
4726	/*
4727	* Callee holds a reference on the route and returns
4728	* with the route entry locked, upon success.
4729	*/
4730	result = route_to_gwroute(SA(ip6_dest), hint, &route);
4731	if (result != `0`) {
4732	return result;
4733	}
4734	if (route != NULL) {
4735	RT_LOCK_ASSERT_HELD(route);
4736	}
4737	}
4738
4739	if ((packet != NULL && (packet->m_flags & M_MCAST) != `0`) \|\|
4740	((ifp->if_flags & IFF_MULTICAST) &&
4741	IN6_IS_ADDR_MULTICAST(&ip6_dest->sin6_addr))) {
4742	if (route != NULL) {
4743	RT_UNLOCK(route);
4744	}
4745	result = dlil_resolve_multi(ifp, SA(ip6_dest), SA(ll_dest), ll_dest_len);
4746	if (route != NULL) {
4747	RT_LOCK(route);
4748	}
4749	goto release;
4750	} else if (route == NULL) {
4751	/*
4752	* rdar://24596652
4753	* For unicast, lookup existing ND6 entries but
4754	* do not trigger a resolution
4755	*/
4756	lck_mtx_lock(rnh_lock);
4757	route = rt_lookup(TRUE,
4758	__DECONST(struct sockaddr *, ip6_dest), NULL,
4759	rt_tables[AF_INET6], ifp->if_index);
4760	lck_mtx_unlock(rnh_lock);
4761
4762	if (route != NULL) {
4763	RT_LOCK(route);
4764	}
4765	}
4766
4767	if (route == NULL) {
4768	/*
4769	* This could happen, if we could not allocate memory or
4770	* if route_to_gwroute() didn't return a route.
4771	*/
4772	result = ENOBUFS;
4773	goto release;
4774	}
4775
4776	if (route->rt_gateway->sa_family != AF_LINK) {
4777	nd6log0(error, "%s: route %s on %s%d gateway address not AF_LINK\n",
4778	__func__, ip6_sprintf(&ip6_dest->sin6_addr),
4779	route->rt_ifp->if_name, route->rt_ifp->if_unit);
4780	result = EADDRNOTAVAIL;
4781	goto release;
4782	}
4783
4784	sdl = SDL(route->rt_gateway);
4785	if (sdl->sdl_alen == `0`) {
4786	/ this should be impossible, but we bark here for debugging /
4787	nd6log(error, "%s: route %s on %s%d sdl_alen == 0\n", __func__,
4788	ip6_sprintf(&ip6_dest->sin6_addr), route->rt_ifp->if_name,
4789	route->rt_ifp->if_unit);
4790	result = EHOSTUNREACH;
4791	goto release;
4792	}
4793
4794	copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len;
4795	SOCKADDR_COPY(sdl, ll_dest, copy_len);
4796
4797	release:
4798	if (route != NULL) {
4799	if (route == hint) {
4800	RT_REMREF_LOCKED(route);
4801	RT_UNLOCK(route);
4802	} else {
4803	RT_UNLOCK(route);
4804	rtfree(route);
4805	}
4806	}
4807	return result;
4808	}
4809
4810	#if (DEVELOPMENT \|\| DEBUG)
4811
4812	static int sysctl_nd6_lookup_ipv6 SYSCTL_HANDLER_ARGS;
4813	SYSCTL_PROC(_net_inet6_icmp6, OID_AUTO, nd6_lookup_ipv6,
4814	CTLTYPE_STRUCT \| CTLFLAG_RW \| CTLFLAG_LOCKED, `0`, `0`,
4815	sysctl_nd6_lookup_ipv6, "S", "");
4816
4817	int
4818	sysctl_nd6_lookup_ipv6 SYSCTL_HANDLER_ARGS
4819	{
4820	#pragma unused(oidp, arg1, arg2)
4821	int error = `0`;
4822	struct nd6_lookup_ipv6_args nd6_lookup_ipv6_args;
4823	ifnet_t ifp = NULL;
4824
4825	/*
4826	* Only root can lookup MAC addresses
4827	*/
4828	error = proc_suser(current_proc());
4829	if (error != `0`) {
4830	nd6log0(error, "%s: proc_suser() error %d\n",
4831	__func__, error);
4832	goto done;
4833	}
4834	if (req->oldptr == USER_ADDR_NULL) {
4835	req->oldidx = sizeof(struct nd6_lookup_ipv6_args);
4836	}
4837	if (req->newptr == USER_ADDR_NULL) {
4838	goto done;
4839	}
4840	if (req->oldlen != sizeof(struct nd6_lookup_ipv6_args) \|\|
4841	req->newlen != sizeof(struct nd6_lookup_ipv6_args)) {
4842	error = EINVAL;
4843	nd6log0(error, "%s: bad req, error %d\n",
4844	__func__, error);
4845	goto done;
4846	}
4847	error = SYSCTL_IN(req, &nd6_lookup_ipv6_args,
4848	sizeof(struct nd6_lookup_ipv6_args));
4849	if (error != `0`) {
4850	nd6log0(error, "%s: SYSCTL_IN() error %d\n",
4851	__func__, error);
4852	goto done;
4853	}
4854
4855	if (nd6_lookup_ipv6_args.ll_dest_len > sizeof(nd6_lookup_ipv6_args.ll_dest_)) {
4856	error = EINVAL;
4857	nd6log0(error, "%s: bad ll_dest_len, error %d\n",
4858	__func__, error);
4859	goto done;
4860	}
4861
4862	/ Make sure to terminate the string /
4863	nd6_lookup_ipv6_args.ifname[IFNAMSIZ - `1`] = `0`;
4864
4865	error = ifnet_find_by_name(nd6_lookup_ipv6_args.ifname, &ifp);
4866	if (error != `0`) {
4867	nd6log0(error, "%s: ifnet_find_by_name() error %d\n",
4868	__func__, error);
4869	goto done;
4870	}
4871
4872	error = nd6_lookup_ipv6(ifp, &nd6_lookup_ipv6_args.ip6_dest,
4873	&nd6_lookup_ipv6_args.ll_dest_._sdl,
4874	nd6_lookup_ipv6_args.ll_dest_len, NULL, NULL);
4875	if (error != `0`) {
4876	nd6log0(error, "%s: nd6_lookup_ipv6() error %d\n",
4877	__func__, error);
4878	goto done;
4879	}
4880
4881	error = SYSCTL_OUT(req, &nd6_lookup_ipv6_args,
4882	sizeof(struct nd6_lookup_ipv6_args));
4883	if (error != `0`) {
4884	nd6log0(error, "%s: SYSCTL_OUT() error %d\n",
4885	__func__, error);
4886	goto done;
4887	}
4888	done:
4889	return error;
4890	}
4891
4892	#endif /* (DEVELOPEMENT \|\| DEBUG) */
4893
4894	int
4895	nd6_setifinfo(struct ifnet *ifp, u_int32_t before, u_int32_t after)
4896	{
4897	uint32_t b, a;
4898	int err = `0`;
4899
4900	/*
4901	* Handle ND6_IFF_IFDISABLED
4902	*/
4903	if ((before & ND6_IFF_IFDISABLED) \|\|
4904	(after & ND6_IFF_IFDISABLED)) {
4905	b = (before & ND6_IFF_IFDISABLED);
4906	a = (after & ND6_IFF_IFDISABLED);
4907
4908	if (b != a && (err = nd6_if_disable(ifp,
4909	((int32_t)(a - b) > `0`))) != `0`) {
4910	goto done;
4911	}
4912	}
4913
4914	/*
4915	* Handle ND6_IFF_PROXY_PREFIXES
4916	*/
4917	if ((before & ND6_IFF_PROXY_PREFIXES) \|\|
4918	(after & ND6_IFF_PROXY_PREFIXES)) {
4919	b = (before & ND6_IFF_PROXY_PREFIXES);
4920	a = (after & ND6_IFF_PROXY_PREFIXES);
4921
4922	if (b != a && (err = nd6_if_prproxy(ifp,
4923	((int32_t)(a - b) > `0`))) != `0`) {
4924	goto done;
4925	}
4926	}
4927	done:
4928	return err;
4929	}
4930
4931	/*
4932	* Enable/disable IPv6 on an interface, called as part of
4933	* setting/clearing ND6_IFF_IFDISABLED, or during DAD failure.
4934	*/
4935	int
4936	nd6_if_disable(struct ifnet *ifp, boolean_t enable)
4937	{
4938	if (enable) {
4939	if_set_eflags(ifp, IFEF_IPV6_DISABLED);
4940	} else {
4941	if_clear_eflags(ifp, IFEF_IPV6_DISABLED);
4942	}
4943
4944	return `0`;
4945	}
4946
4947	static int
4948	nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
4949	{
4950	#pragma unused(oidp, arg1, arg2)
4951	char pbuf[MAX_IPv6_STR_LEN];
4952	struct nd_defrouter *dr;
4953	int error = `0`;
4954
4955	if (req->newptr != USER_ADDR_NULL) {
4956	return EPERM;
4957	}
4958
4959	/ XXX Handle mapped defrouter entries /
4960	lck_mtx_lock(nd6_mutex);
4961	if (proc_is64bit(req->p)) {
4962	struct in6_defrouter_64 d;
4963
4964	bzero(s: &d, n: sizeof(d));
4965	d.rtaddr.sin6_family = AF_INET6;
4966	d.rtaddr.sin6_len = sizeof(d.rtaddr);
4967
4968	TAILQ_FOREACH(dr, &nd_defrouter_list, dr_entry) {
4969	d.rtaddr.sin6_addr = dr->rtaddr;
4970	if (in6_recoverscope(&d.rtaddr,
4971	&dr->rtaddr, dr->ifp) != `0`) {
4972	log(LOG_ERR, "scope error in default router "
4973	"list (%s)\n", inet_ntop(AF_INET6,
4974	&dr->rtaddr, pbuf, sizeof(pbuf)));
4975	}
4976	d.flags = dr->flags;
4977	d.stateflags = dr->stateflags;
4978	d.rtlifetime = (u_short)dr->rtlifetime;
4979	d.expire = (int)nddr_getexpire(dr);
4980	d.if_index = dr->ifp->if_index;
4981	error = SYSCTL_OUT(req, &d, sizeof(d));
4982	if (error != `0`) {
4983	break;
4984	}
4985	}
4986	} else {
4987	struct in6_defrouter_32 d;
4988
4989	bzero(s: &d, n: sizeof(d));
4990	d.rtaddr.sin6_family = AF_INET6;
4991	d.rtaddr.sin6_len = sizeof(d.rtaddr);
4992
4993	TAILQ_FOREACH(dr, &nd_defrouter_list, dr_entry) {
4994	d.rtaddr.sin6_addr = dr->rtaddr;
4995	if (in6_recoverscope(&d.rtaddr,
4996	&dr->rtaddr, dr->ifp) != `0`) {
4997	log(LOG_ERR, "scope error in default router "
4998	"list (%s)\n", inet_ntop(AF_INET6,
4999	&dr->rtaddr, pbuf, sizeof(pbuf)));
5000	}
5001	d.flags = dr->flags;
5002	d.stateflags = dr->stateflags;
5003	d.rtlifetime = (u_short)dr->rtlifetime;
5004	d.expire = (int)nddr_getexpire(dr);
5005	d.if_index = dr->ifp->if_index;
5006	error = SYSCTL_OUT(req, &d, sizeof(d));
5007	if (error != `0`) {
5008	break;
5009	}
5010	}
5011	}
5012	lck_mtx_unlock(nd6_mutex);
5013	return error;
5014	}
5015
5016	static int
5017	nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
5018	{
5019	#pragma unused(oidp, arg1, arg2)
5020	char pbuf[MAX_IPv6_STR_LEN];
5021	struct nd_pfxrouter *pfr;
5022	struct sockaddr_in6 s6;
5023	struct nd_prefix *pr;
5024	int error = `0`;
5025
5026	if (req->newptr != USER_ADDR_NULL) {
5027	return EPERM;
5028	}
5029
5030	SOCKADDR_ZERO(&s6, sizeof(s6));
5031	s6.sin6_family = AF_INET6;
5032	s6.sin6_len = sizeof(s6);
5033
5034	/ XXX Handle mapped defrouter entries /
5035	lck_mtx_lock(nd6_mutex);
5036	if (proc_is64bit(req->p)) {
5037	struct in6_prefix_64 p;
5038
5039	bzero(s: &p, n: sizeof(p));
5040	p.origin = PR_ORIG_RA;
5041
5042	LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
5043	NDPR_LOCK(pr);
5044	p.prefix = pr->ndpr_prefix;
5045	if (in6_recoverscope(&p.prefix,
5046	&pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != `0`) {
5047	log(LOG_ERR, "scope error in "
5048	"prefix list (%s)\n", inet_ntop(AF_INET6,
5049	&p.prefix.sin6_addr, pbuf, sizeof(pbuf)));
5050	}
5051	p.raflags = pr->ndpr_raf;
5052	p.prefixlen = pr->ndpr_plen;
5053	p.vltime = pr->ndpr_vltime;
5054	p.pltime = pr->ndpr_pltime;
5055	p.if_index = pr->ndpr_ifp->if_index;
5056	p.expire = (u_long)ndpr_getexpire(pr);
5057	p.refcnt = pr->ndpr_addrcnt;
5058	p.flags = pr->ndpr_stateflags;
5059	p.advrtrs = `0`;
5060	LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
5061	p.advrtrs++;
5062	error = SYSCTL_OUT(req, &p, sizeof(p));
5063	if (error != `0`) {
5064	NDPR_UNLOCK(pr);
5065	break;
5066	}
5067	LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
5068	s6.sin6_addr = pfr->router->rtaddr;
5069	if (in6_recoverscope(&s6, &pfr->router->rtaddr,
5070	pfr->router->ifp) != `0`) {
5071	log(LOG_ERR,
5072	"scope error in prefix list (%s)\n",
5073	inet_ntop(AF_INET6, &s6.sin6_addr,
5074	pbuf, sizeof(pbuf)));
5075	}
5076	error = SYSCTL_OUT(req, &s6, sizeof(s6));
5077	if (error != `0`) {
5078	break;
5079	}
5080	}
5081	NDPR_UNLOCK(pr);
5082	if (error != `0`) {
5083	break;
5084	}
5085	}
5086	} else {
5087	struct in6_prefix_32 p;
5088
5089	bzero(s: &p, n: sizeof(p));
5090	p.origin = PR_ORIG_RA;
5091
5092	LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
5093	NDPR_LOCK(pr);
5094	p.prefix = pr->ndpr_prefix;
5095	if (in6_recoverscope(&p.prefix,
5096	&pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != `0`) {
5097	log(LOG_ERR,
5098	"scope error in prefix list (%s)\n",
5099	inet_ntop(AF_INET6, &p.prefix.sin6_addr,
5100	pbuf, sizeof(pbuf)));
5101	}
5102	p.raflags = pr->ndpr_raf;
5103	p.prefixlen = pr->ndpr_plen;
5104	p.vltime = pr->ndpr_vltime;
5105	p.pltime = pr->ndpr_pltime;
5106	p.if_index = pr->ndpr_ifp->if_index;
5107	p.expire = (u_int32_t)ndpr_getexpire(pr);
5108	p.refcnt = pr->ndpr_addrcnt;
5109	p.flags = pr->ndpr_stateflags;
5110	p.advrtrs = `0`;
5111	LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
5112	p.advrtrs++;
5113	error = SYSCTL_OUT(req, &p, sizeof(p));
5114	if (error != `0`) {
5115	NDPR_UNLOCK(pr);
5116	break;
5117	}
5118	LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
5119	s6.sin6_addr = pfr->router->rtaddr;
5120	if (in6_recoverscope(&s6, &pfr->router->rtaddr,
5121	pfr->router->ifp) != `0`) {
5122	log(LOG_ERR,
5123	"scope error in prefix list (%s)\n",
5124	inet_ntop(AF_INET6, &s6.sin6_addr,
5125	pbuf, sizeof(pbuf)));
5126	}
5127	error = SYSCTL_OUT(req, &s6, sizeof(s6));
5128	if (error != `0`) {
5129	break;
5130	}
5131	}
5132	NDPR_UNLOCK(pr);
5133	if (error != `0`) {
5134	break;
5135	}
5136	}
5137	}
5138	lck_mtx_unlock(nd6_mutex);
5139
5140	return error;
5141	}
5142
5143	void
5144	in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia)
5145	{
5146	struct ifnet* ifp = ia->ia_ifp;
5147	uint32_t flags = IN6_IFF_TENTATIVE;
5148	uint32_t optdad = nd6_optimistic_dad;
5149	struct nd_ifinfo *ndi = NULL;
5150
5151	ndi = ND_IFINFO(ifp);
5152	VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
5153	if (!(ndi->flags & ND6_IFF_DAD)) {
5154	return;
5155	}
5156
5157	if (optdad) {
5158	if (ifp->if_ipv6_router_mode == IPV6_ROUTER_MODE_EXCLUSIVE) {
5159	optdad = `0`;
5160	} else {
5161	lck_mtx_lock(lck: &ndi->lock);
5162	if ((ndi->flags & ND6_IFF_REPLICATED) != `0`) {
5163	optdad = `0`;
5164	}
5165	lck_mtx_unlock(lck: &ndi->lock);
5166	}
5167	}
5168
5169	if (optdad) {
5170	if ((optdad & ND6_OPTIMISTIC_DAD_LINKLOCAL) &&
5171	IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
5172	flags = IN6_IFF_OPTIMISTIC;
5173	} else if ((optdad & ND6_OPTIMISTIC_DAD_AUTOCONF) &&
5174	(ia->ia6_flags & IN6_IFF_AUTOCONF)) {
5175	if (ia->ia6_flags & IN6_IFF_TEMPORARY) {
5176	if (optdad & ND6_OPTIMISTIC_DAD_TEMPORARY) {
5177	flags = IN6_IFF_OPTIMISTIC;
5178	}
5179	} else if (ia->ia6_flags & IN6_IFF_SECURED) {
5180	if (optdad & ND6_OPTIMISTIC_DAD_SECURED) {
5181	flags = IN6_IFF_OPTIMISTIC;
5182	}
5183	} else {
5184	/*
5185	* Keeping the behavior for temp and CGA
5186	* SLAAC addresses to have a knob for optimistic
5187	* DAD.
5188	* Other than that if ND6_OPTIMISTIC_DAD_AUTOCONF
5189	* is set, we should default to optimistic
5190	* DAD.
5191	* For now this means SLAAC addresses with interface
5192	* identifier derived from modified EUI-64 bit
5193	* identifiers.
5194	*/
5195	flags = IN6_IFF_OPTIMISTIC;
5196	}
5197	} else if ((optdad & ND6_OPTIMISTIC_DAD_DYNAMIC) &&
5198	(ia->ia6_flags & IN6_IFF_DYNAMIC)) {
5199	if (ia->ia6_flags & IN6_IFF_TEMPORARY) {
5200	if (optdad & ND6_OPTIMISTIC_DAD_TEMPORARY) {
5201	flags = IN6_IFF_OPTIMISTIC;
5202	}
5203	} else {
5204	flags = IN6_IFF_OPTIMISTIC;
5205	}
5206	} else if ((optdad & ND6_OPTIMISTIC_DAD_MANUAL) &&
5207	(ia->ia6_flags & IN6_IFF_OPTIMISTIC)) {
5208	/*
5209	* rdar://17483438
5210	* Bypass tentative for address assignments
5211	* not covered above (e.g. manual) upon request
5212	*/
5213	if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr) &&
5214	!(ia->ia6_flags & IN6_IFF_AUTOCONF) &&
5215	!(ia->ia6_flags & IN6_IFF_DYNAMIC)) {
5216	flags = IN6_IFF_OPTIMISTIC;
5217	}
5218	}
5219	}
5220
5221	ia->ia6_flags &= ~(IN6_IFF_DUPLICATED \| IN6_IFF_DADPROGRESS);
5222	ia->ia6_flags \|= flags;
5223
5224	nd6log2(debug, "%s - %s ifp %s ia6_flags 0x%x\n",
5225	__func__,
5226	ip6_sprintf(&ia->ia_addr.sin6_addr),
5227	if_name(ia->ia_ifp),
5228	ia->ia6_flags);
5229	}
5230

Browse the source code of xnu/bsd/netinet6/nd6.c