vfs_cache.c source code [xnu/bsd/vfs/vfs_cache.c]

1	/*
2	* Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/ Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved /
29	/*
30	* Copyright (c) 1989, 1993, 1995
31	* The Regents of the University of California. All rights reserved.
32	*
33	* This code is derived from software contributed to Berkeley by
34	* Poul-Henning Kamp of the FreeBSD Project.
35	*
36	* Redistribution and use in source and binary forms, with or without
37	* modification, are permitted provided that the following conditions
38	* are met:
39	* 1. Redistributions of source code must retain the above copyright
40	* notice, this list of conditions and the following disclaimer.
41	* 2. Redistributions in binary form must reproduce the above copyright
42	* notice, this list of conditions and the following disclaimer in the
43	* documentation and/or other materials provided with the distribution.
44	* 3. All advertising materials mentioning features or use of this software
45	* must display the following acknowledgement:
46	* This product includes software developed by the University of
47	* California, Berkeley and its contributors.
48	* 4. Neither the name of the University nor the names of its contributors
49	* may be used to endorse or promote products derived from this software
50	* without specific prior written permission.
51	*
52	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62	* SUCH DAMAGE.
63	*
64	*
65	* @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
66	*/
67	/*
68	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69	* support for mandatory and extensible security protections. This notice
70	* is included in support of clause 2.2 (b) of the Apple Public License,
71	* Version 2.0.
72	*/
73	#include <sys/param.h>
74	#include <sys/systm.h>
75	#include <sys/time.h>
76	#include <sys/mount_internal.h>
77	#include <sys/vnode_internal.h>
78	#include <miscfs/specfs/specdev.h>
79	#include <sys/namei.h>
80	#include <sys/errno.h>
81	#include <kern/kalloc.h>
82	#include <sys/kauth.h>
83	#include <sys/user.h>
84	#include <sys/paths.h>
85	#include <os/overflow.h>
86
87	#if CONFIG_MACF
88	#include <security/mac_framework.h>
89	#endif
90
91	/*
92	* Name caching works as follows:
93	*
94	* Names found by directory scans are retained in a cache
95	* for future reference. It is managed LRU, so frequently
96	* used names will hang around. Cache is indexed by hash value
97	* obtained from (vp, name) where vp refers to the directory
98	* containing name.
99	*
100	* If it is a "negative" entry, (i.e. for a name that is known NOT to
101	* exist) the vnode pointer will be NULL.
102	*
103	* Upon reaching the last segment of a path, if the reference
104	* is for DELETE, or NOCACHE is set (rewrite), and the
105	* name is located in the cache, it will be dropped.
106	*/
107
108	/*
109	* Structures associated with name cacheing.
110	*/
111
112	ZONE_DEFINE_TYPE(namecache_zone, "namecache", struct namecache, ZC_NONE);
113
114	struct smrq_list_head nchashtbl; /* Hash Table /
115	u_long nchashmask;
116	u_long nchash; / size of hash table - 1 /
117	long numcache; / number of cache entries allocated /
118	int desiredNodes;
119	int desiredNegNodes;
120	int ncs_negtotal;
121	TUNABLE_WRITEABLE(int, nc_disabled, "-novfscache", `0`);
122	__options_decl(nc_smr_level_t, uint32_t, {
123	NC_SMR_DISABLED = `0`,
124	NC_SMR_LOOKUP = `1`
125	});
126	TUNABLE(nc_smr_level_t, nc_smr_enabled, "ncsmr", NC_SMR_LOOKUP);
127	TAILQ_HEAD(, namecache) nchead; / chain of all name cache entries /
128	TAILQ_HEAD(, namecache) neghead; / chain of only negative cache entries /
129
130
131	#if COLLECT_STATS
132
133	struct nchstats nchstats; / cache effectiveness statistics /
134
135	#define NCHSTAT(v) { \
136	nchstats.v++; \
137	}
138	#define NAME_CACHE_LOCK_SHARED() name_cache_lock()
139	#define NAME_CACHE_LOCK_SHARED_TO_EXCLUSIVE() TRUE
140
141	#else
142
143	#define NCHSTAT(v)
144	#define NAME_CACHE_LOCK_SHARED() name_cache_lock_shared()
145	#define NAME_CACHE_LOCK_SHARED_TO_EXCLUSIVE() name_cache_lock_shared_to_exclusive()
146
147	#endif
148
149	#define NAME_CACHE_LOCK() name_cache_lock()
150	#define NAME_CACHE_UNLOCK() name_cache_unlock()
151
152	/ vars for name cache list lock /
153	static LCK_GRP_DECLARE(namecache_lck_grp, "Name Cache");
154	static LCK_RW_DECLARE(namecache_rw_lock, &namecache_lck_grp);
155
156	typedef struct string_t {
157	LIST_ENTRY(string_t) hash_chain;
158	char *str;
159	uint32_t strbuflen;
160	uint32_t refcount;
161	} string_t;
162
163	ZONE_DEFINE_TYPE(stringcache_zone, "vfsstringcache", string_t, ZC_NONE);
164
165	static LCK_GRP_DECLARE(strcache_lck_grp, "String Cache");
166	static LCK_ATTR_DECLARE(strcache_lck_attr, `0`, `0`);
167	LCK_RW_DECLARE_ATTR(strtable_rw_lock, &strcache_lck_grp, &strcache_lck_attr);
168
169	static LCK_GRP_DECLARE(rootvnode_lck_grp, "rootvnode");
170	LCK_RW_DECLARE(rootvnode_rw_lock, &rootvnode_lck_grp);
171
172	#define NUM_STRCACHE_LOCKS 1024
173
174	lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
175
176	SYSCTL_NODE(_vfs, OID_AUTO, ncstats, CTLFLAG_RD \| CTLFLAG_LOCKED, NULL, "vfs name cache stats");
177
178	SYSCTL_COMPAT_INT(_vfs_ncstats, OID_AUTO, nc_smr_enabled,
179	CTLFLAG_RD \| CTLFLAG_LOCKED,
180	&nc_smr_enabled, `0`, "");
181
182	#if COLLECT_NC_SMR_STATS
183	struct ncstats {
184	uint64_t cl_smr_hits;
185	uint64_t cl_smr_miss;
186	uint64_t cl_smr_negative_hits;
187	uint64_t cl_smr_fallback;
188	uint64_t cl_lock_hits;
189	uint64_t clp_next;
190	uint64_t clp_next_fail;
191	uint64_t clp_smr_next;
192	uint64_t clp_smr_next_fail;
193	uint64_t clp_smr_fallback;
194	uint64_t nc_lock_shared;
195	uint64_t nc_lock;
196	} ncstats = {`0`};
197
198	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, cl_smr_hits,
199	CTLFLAG_RD \| CTLFLAG_LOCKED,
200	&ncstats.cl_smr_hits, "");
201	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, cl_smr_misses,
202	CTLFLAG_RD \| CTLFLAG_LOCKED,
203	&ncstats.cl_smr_miss, "");
204	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, cl_smr_negative_hits,
205	CTLFLAG_RD \| CTLFLAG_LOCKED,
206	&ncstats.cl_smr_negative_hits, "");
207	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, cl_smr_fallback,
208	CTLFLAG_RD \| CTLFLAG_LOCKED,
209	&ncstats.cl_smr_fallback, "");
210	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, cl_lock_hits,
211	CTLFLAG_RD \| CTLFLAG_LOCKED,
212	&ncstats.cl_lock_hits, "");
213	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, clp_next,
214	CTLFLAG_RD \| CTLFLAG_LOCKED,
215	&ncstats.clp_next, "");
216	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, clp_next_fail,
217	CTLFLAG_RD \| CTLFLAG_LOCKED,
218	&ncstats.clp_next_fail, "");
219	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, clp_smr_next,
220	CTLFLAG_RD \| CTLFLAG_LOCKED,
221	&ncstats.clp_smr_next, "");
222	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, clp_smr_next_fail,
223	CTLFLAG_RD \| CTLFLAG_LOCKED,
224	&ncstats.clp_smr_next_fail, "");
225	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, nc_lock_shared,
226	CTLFLAG_RD \| CTLFLAG_LOCKED,
227	&ncstats.nc_lock_shared, "");
228	SYSCTL_LONG(_vfs_ncstats, OID_AUTO, nc_lock,
229	CTLFLAG_RD \| CTLFLAG_LOCKED,
230	&ncstats.nc_lock, "");
231
232	#define NC_SMR_STATS(v) os_atomic_inc(&ncstats.v, relaxed)
233	#else
234	#define NC_SMR_STATS(v)
235	#endif /* COLLECT_NC_SMR_STATS */
236
237	static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname cnp, uint32_t vidp);
238	static vnode_t cache_lookup_smr(vnode_t dvp, struct componentname cnp, uint32_t vidp);
239	static const char add_name_internal(const* char *, uint32_t, u_int, boolean_t, u_int);
240	static void init_string_table(void);
241	static void cache_delete(struct namecache , int*);
242	static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname cnp, const* char *strname);
243	static void cache_purge_locked(vnode_t vp, kauth_cred_t *credp);
244	static void namecache_smr_free(void *, size_t);
245	static void string_smr_free(void *, size_t);
246
247
248	#ifdef DUMP_STRING_TABLE
249	/*
250	* Internal dump function used for debugging
251	*/
252	void dump_string_table(void);
253	#endif /* DUMP_STRING_TABLE */
254
255	static void init_crc32(void);
256	static unsigned int crc32tab[`256`];
257
258
259	#define NCHHASH(dvp, hash_val) \
260	(&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
261
262	/*
263	* This function tries to check if a directory vp is a subdirectory of dvp
264	* only from valid v_parent pointers. It is called with the name cache lock
265	* held and does not drop the lock anytime inside the function.
266	*
267	* It returns a boolean that indicates whether or not it was able to
268	* successfully infer the parent/descendent relationship via the v_parent
269	* pointers, or if it could not infer such relationship and that the decision
270	* must be delegated to the owning filesystem.
271	*
272	* If it does not defer the decision, i.e. it was successfuly able to determine
273	* the parent/descendent relationship, *is_subdir tells the caller if vp is a
274	* subdirectory of dvp.
275	*
276	* If the decision is deferred, next_vp is where it stopped i.e. next_vp
277	* is the vnode whose parent is to be determined from the filesystem.
278	* *is_subdir, in this case, is not indicative of anything and should be
279	* ignored.
280	*
281	* The return value and output args should be used as follows :
282	*
283	* defer = cache_check_vnode_issubdir(vp, dvp, is_subdir, next_vp);
284	* if (!defer) {
285	* if (*is_subdir)
286	* vp is subdirectory;
287	* else
288	* vp is not a subdirectory;
289	* } else {
290	* if (*next_vp)
291	* check this vnode's parent from the filesystem
292	* else
293	* error (likely because of forced unmount).
294	* }
295	*
296	*/
297	static boolean_t
298	cache_check_vnode_issubdir(vnode_t vp, vnode_t dvp, boolean_t *is_subdir,
299	vnode_t *next_vp)
300	{
301	vnode_t tvp = vp;
302	int defer = FALSE;
303
304	*is_subdir = FALSE;
305	*next_vp = NULLVP;
306	while (`1`) {
307	mount_t tmp;
308
309	if (tvp == dvp) {
310	*is_subdir = TRUE;
311	break;
312	} else if (tvp == rootvnode) {
313	/ is_subdir = FALSE /*
314	break;
315	}
316
317	tmp = tvp->v_mount;
318	while ((tvp->v_flag & VROOT) && tmp && tmp->mnt_vnodecovered &&
319	tvp != dvp && tvp != rootvnode) {
320	tvp = tmp->mnt_vnodecovered;
321	tmp = tvp->v_mount;
322	}
323
324	/*
325	* If dvp is not at the top of a mount "stack" then
326	* vp is not a subdirectory of dvp either.
327	*/
328	if (tvp == dvp \|\| tvp == rootvnode) {
329	/ is_subdir = FALSE /*
330	break;
331	}
332
333	if (!tmp) {
334	defer = TRUE;
335	*next_vp = NULLVP;
336	break;
337	}
338
339	if ((tvp->v_flag & VISHARDLINK) \|\| !(tvp->v_parent)) {
340	defer = TRUE;
341	*next_vp = tvp;
342	break;
343	}
344
345	tvp = tvp->v_parent;
346	}
347
348	return defer;
349	}
350
351	/ maximum times retry from potentially transient errors in vnode_issubdir /
352	#define MAX_ERROR_RETRY 3
353
354	/*
355	* This function checks if a given directory (vp) is a subdirectory of dvp.
356	* It walks backwards from vp and if it hits dvp in its parent chain,
357	* it is a subdirectory. If it encounters the root directory, it is not
358	* a subdirectory.
359	*
360	* This function returns an error if it is unsuccessful and 0 on success.
361	*
362	* On entry (and exit) vp has an iocount and if this function has to take
363	* any iocounts on other vnodes in the parent chain traversal, it releases them.
364	*/
365	int
366	vnode_issubdir(vnode_t vp, vnode_t dvp, int *is_subdir, vfs_context_t ctx)
367	{
368	vnode_t start_vp, tvp;
369	vnode_t vp_with_iocount;
370	int error = `0`;
371	char dotdotbuf[] = "..";
372	int error_retry_count = `0`; / retry count for potentially transient*
373	* errors */
374
375	*is_subdir = FALSE;
376	tvp = start_vp = vp;
377	/*
378	* Anytime we acquire an iocount in this function, we save the vnode
379	* in this variable and release it before exiting.
380	*/
381	vp_with_iocount = NULLVP;
382
383	while (`1`) {
384	boolean_t defer;
385	vnode_t pvp;
386	uint32_t vid = `0`;
387	struct componentname cn;
388	boolean_t is_subdir_locked = FALSE;
389
390	if (tvp == dvp) {
391	*is_subdir = TRUE;
392	break;
393	} else if (tvp == rootvnode) {
394	/ is_subdir = FALSE /*
395	break;
396	}
397
398	NAME_CACHE_LOCK_SHARED();
399
400	defer = cache_check_vnode_issubdir(vp: tvp, dvp, is_subdir: &is_subdir_locked,
401	next_vp: &tvp);
402
403	if (defer && tvp) {
404	vid = vnode_vid(vp: tvp);
405	vnode_hold(vp: tvp);
406	}
407
408	NAME_CACHE_UNLOCK();
409
410	if (!defer) {
411	*is_subdir = is_subdir_locked;
412	break;
413	}
414
415	if (!tvp) {
416	if (error_retry_count++ < MAX_ERROR_RETRY) {
417	tvp = vp;
418	continue;
419	}
420	error = ENOENT;
421	break;
422	}
423
424	if (tvp != start_vp) {
425	if (vp_with_iocount) {
426	vnode_put(vp: vp_with_iocount);
427	vp_with_iocount = NULLVP;
428	}
429
430	error = vnode_getwithvid(tvp, vid);
431	vnode_drop(vp: tvp);
432	if (error) {
433	if (error_retry_count++ < MAX_ERROR_RETRY) {
434	tvp = vp;
435	error = `0`;
436	continue;
437	}
438	break;
439	}
440	vp_with_iocount = tvp;
441	} else {
442	tvp = vnode_drop(vp: tvp);
443	}
444
445	bzero(s: &cn, n: sizeof(cn));
446	cn.cn_nameiop = LOOKUP;
447	cn.cn_flags = ISLASTCN \| ISDOTDOT;
448	cn.cn_context = ctx;
449	cn.cn_pnbuf = &dotdotbuf[`0`];
450	cn.cn_pnlen = sizeof(dotdotbuf);
451	cn.cn_nameptr = cn.cn_pnbuf;
452	cn.cn_namelen = `2`;
453
454	pvp = NULLVP;
455	if ((error = VNOP_LOOKUP(tvp, &pvp, &cn, ctx))) {
456	break;
457	}
458
459	if (!(tvp->v_flag & VISHARDLINK) && tvp->v_parent != pvp) {
460	(void)vnode_update_identity(vp: tvp, dvp: pvp, NULL, name_len: `0`, name_hashval: `0`,
461	VNODE_UPDATE_PARENT);
462	}
463
464	if (vp_with_iocount) {
465	vnode_put(vp: vp_with_iocount);
466	}
467
468	vp_with_iocount = tvp = pvp;
469	}
470
471	if (vp_with_iocount) {
472	vnode_put(vp: vp_with_iocount);
473	}
474
475	return error;
476	}
477
478	/*
479	* This function builds the path in "buff" from the supplied vnode.
480	* The length of the buffer INCLUDING the trailing zero byte is
481	* returned in outlen. NOTE: the length includes the trailing zero
482	* byte and thus the length is one greater than what strlen would
483	* return. This is important and lots of code elsewhere in the kernel
484	* assumes this behavior.
485	*
486	* This function can call vnop in file system if the parent vnode
487	* does not exist or when called for hardlinks via volfs path.
488	* If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
489	* in the name cache and does not enter the file system.
490	*
491	* If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when
492	* we encounter ENOENT during path reconstruction. ENOENT means that
493	* one of the parents moved while we were building the path. The
494	* caller can special handle this case by calling build_path again.
495	*
496	* If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path
497	* that is relative to the nearest mount point, i.e. do not
498	* cross over mount points during building the path.
499	*
500	* passed in vp must have a valid io_count reference
501	*
502	* If parent vnode is non-NULL it also must have an io count. This
503	* allows build_path_with_parent to be safely called for operations
504	* unlink, rmdir and rename that already have io counts on the target
505	* and the directory. In this way build_path_with_parent does not have
506	* to try and obtain an additional io count on the parent. Taking an
507	* io count ont the parent can lead to dead lock if a forced unmount
508	* occures at the right moment. For a fuller explaination on how this
509	* can occur see the comment for vn_getpath_with_parent.
510	*
511	*/
512	int
513	build_path_with_parent(vnode_t first_vp, vnode_t parent_vp, char buff, int* buflen,
514	int outlen, size_t mntpt_outlen, int flags, vfs_context_t ctx)
515	{
516	vnode_t vp, tvp;
517	vnode_t vp_with_iocount;
518	vnode_t proc_root_dir_vp;
519	char *end;
520	char *mntpt_end;
521	const char *str;
522	unsigned int len;
523	int ret = `0`;
524	int fixhardlink;
525
526	if (first_vp == NULLVP) {
527	return EINVAL;
528	}
529
530	if (buflen <= `1`) {
531	return ENOSPC;
532	}
533
534	/*
535	* Grab the process fd so we can evaluate fd_rdir.
536	*/
537	if (!(flags & BUILDPATH_NO_PROCROOT)) {
538	proc_root_dir_vp = vfs_context_proc(ctx)->p_fd.fd_rdir;
539	} else {
540	proc_root_dir_vp = NULL;
541	}
542
543	vp_with_iocount = NULLVP;
544	again:
545	vp = first_vp;
546
547	end = &buff[buflen - `1`];
548	*end = `'\0'`;
549	mntpt_end = NULL;
550
551	/*
552	* Catch a special corner case here: chroot to /full/path/to/dir, chdir to
553	* it, then open it. Without this check, the path to it will be
554	* /full/path/to/dir instead of "/".
555	*/
556	if (proc_root_dir_vp == first_vp) {
557	*--end = `'/'`;
558	goto out;
559	}
560
561	/*
562	* holding the NAME_CACHE_LOCK in shared mode is
563	* sufficient to stabilize both the vp->v_parent chain
564	* and the 'vp->v_mount->mnt_vnodecovered' chain
565	*
566	* if we need to drop this lock, we must first grab the v_id
567	* from the vnode we're currently working with... if that
568	* vnode doesn't already have an io_count reference (the vp
569	* passed in comes with one), we must grab a reference
570	* after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
571	* deadlocks may result if you call vnode_get while holding
572	* the NAME_CACHE_LOCK... we lazily release the reference
573	* we pick up the next time we encounter a need to drop
574	* the NAME_CACHE_LOCK or before we return from this routine
575	*/
576	NAME_CACHE_LOCK_SHARED();
577
578	#if CONFIG_FIRMLINKS
579	if (!(flags & BUILDPATH_NO_FIRMLINK) &&
580	(vp->v_flag & VFMLINKTARGET) && vp->v_fmlink && (vp->v_fmlink->v_type == VDIR)) {
581	vp = vp->v_fmlink;
582	}
583	#endif
584
585	/*
586	* Check if this is the root of a file system.
587	*/
588	while (vp && vp->v_flag & VROOT) {
589	if (vp->v_mount == NULL) {
590	ret = EINVAL;
591	goto out_unlock;
592	}
593	if ((vp->v_mount->mnt_flag & MNT_ROOTFS) \|\| (vp == proc_root_dir_vp)) {
594	/*
595	* It's the root of the root file system, so it's
596	* just "/".
597	*/
598	*--end = `'/'`;
599
600	goto out_unlock;
601	} else {
602	/*
603	* This the root of the volume and the caller does not
604	* want to cross mount points. Therefore just return
605	* '/' as the relative path.
606	*/
607	#if CONFIG_FIRMLINKS
608	if (!(flags & BUILDPATH_NO_FIRMLINK) &&
609	(vp->v_flag & VFMLINKTARGET) && vp->v_fmlink && (vp->v_fmlink->v_type == VDIR)) {
610	vp = vp->v_fmlink;
611	} else
612	#endif
613	if (flags & BUILDPATH_VOLUME_RELATIVE) {
614	*--end = `'/'`;
615	goto out_unlock;
616	} else {
617	vp = vp->v_mount->mnt_vnodecovered;
618	if (!mntpt_end && vp) {
619	mntpt_end = end;
620	}
621	}
622	}
623	}
624
625	while ((vp != NULLVP) && (vp->v_parent != vp)) {
626	int vid;
627
628	/*
629	* For hardlinks the v_name may be stale, so if its OK
630	* to enter a file system, ask the file system for the
631	* name and parent (below).
632	*/
633	fixhardlink = (vp->v_flag & VISHARDLINK) &&
634	(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
635	!(flags & BUILDPATH_NO_FS_ENTER);
636
637	if (!fixhardlink) {
638	str = vp->v_name;
639
640	if (str == NULL \|\| *str == `'\0'`) {
641	if (vp->v_parent != NULL) {
642	ret = EINVAL;
643	} else {
644	ret = ENOENT;
645	}
646	goto out_unlock;
647	}
648	len = (unsigned int)strlen(s: str);
649	/*
650	* Check that there's enough space (including space for the '/')
651	*/
652	if ((unsigned int)(end - buff) < (len + `1`)) {
653	ret = ENOSPC;
654	goto out_unlock;
655	}
656	/*
657	* Copy the name backwards.
658	*/
659	str += len;
660
661	for (; len > `0`; len--) {
662	--end = --str;
663	}
664	/*
665	* Add a path separator.
666	*/
667	*--end = `'/'`;
668	}
669
670	/*
671	* Walk up the parent chain.
672	*/
673	if (((vp->v_parent != NULLVP) && !fixhardlink) \|\|
674	(flags & BUILDPATH_NO_FS_ENTER)) {
675	/*
676	* In this if () block we are not allowed to enter the filesystem
677	* to conclusively get the most accurate parent identifier.
678	* As a result, if 'vp' does not identify '/' and it
679	* does not have a valid v_parent, then error out
680	* and disallow further path construction
681	*/
682	if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
683	/*
684	* Only '/' is allowed to have a NULL parent
685	* pointer. Upper level callers should ideally
686	* re-drive name lookup on receiving a ENOENT.
687	*/
688	ret = ENOENT;
689
690	/ The code below will exit early if 'tvp = vp' == NULL /
691	}
692	vp = vp->v_parent;
693
694	/*
695	* if the vnode we have in hand isn't a directory and it
696	* has a v_parent, then we started with the resource fork
697	* so skip up to avoid getting a duplicate copy of the
698	* file name in the path.
699	*/
700	if (vp && !vnode_isdir(vp) && vp->v_parent) {
701	vp = vp->v_parent;
702	}
703	} else {
704	/*
705	* No parent, go get it if supported.
706	*/
707	struct vnode_attr va;
708	vnode_t dvp;
709
710	/*
711	* Make sure file system supports obtaining a path from id.
712	*/
713	if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
714	ret = ENOENT;
715	goto out_unlock;
716	}
717	vid = vp->v_id;
718
719	vnode_hold(vp);
720	NAME_CACHE_UNLOCK();
721
722	if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
723	if (vp_with_iocount) {
724	vnode_put(vp: vp_with_iocount);
725	vp_with_iocount = NULLVP;
726	}
727	if (vnode_getwithvid(vp, vid)) {
728	vnode_drop(vp);
729	goto again;
730	}
731	vp_with_iocount = vp;
732	}
733
734	vnode_drop(vp);
735
736	VATTR_INIT(&va);
737	VATTR_WANTED(&va, va_parentid);
738
739	if (fixhardlink) {
740	VATTR_WANTED(&va, va_name);
741	va.va_name = zalloc(view: ZV_NAMEI);
742	} else {
743	va.va_name = NULL;
744	}
745	/*
746	* Ask the file system for its parent id and for its name (optional).
747	*/
748	ret = vnode_getattr(vp, vap: &va, ctx);
749
750	if (fixhardlink) {
751	if ((ret == `0`) && (VATTR_IS_SUPPORTED(&va, va_name))) {
752	str = va.va_name;
753	vnode_update_identity(vp, NULL, name: str, name_len: (unsigned int)strlen(s: str), name_hashval: `0`, VNODE_UPDATE_NAME);
754	} else if (vp->v_name) {
755	str = vp->v_name;
756	ret = `0`;
757	} else {
758	ret = ENOENT;
759	goto bad_news;
760	}
761	len = (unsigned int)strlen(s: str);
762
763	/*
764	* Check that there's enough space.
765	*/
766	if ((unsigned int)(end - buff) < (len + `1`)) {
767	ret = ENOSPC;
768	} else {
769	/ Copy the name backwards. /
770	str += len;
771
772	for (; len > `0`; len--) {
773	--end = --str;
774	}
775	/*
776	* Add a path separator.
777	*/
778	*--end = `'/'`;
779	}
780	bad_news:
781	zfree(ZV_NAMEI, va.va_name);
782	}
783	if (ret \|\| !VATTR_IS_SUPPORTED(&va, va_parentid)) {
784	ret = ENOENT;
785	goto out;
786	}
787	/*
788	* Ask the file system for the parent vnode.
789	*/
790	if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx))) {
791	goto out;
792	}
793
794	if (!fixhardlink && (vp->v_parent != dvp)) {
795	vnode_update_identity(vp, dvp, NULL, name_len: `0`, name_hashval: `0`, VNODE_UPDATE_PARENT);
796	}
797
798	if (vp_with_iocount) {
799	vnode_put(vp: vp_with_iocount);
800	}
801	vp = dvp;
802	vp_with_iocount = vp;
803
804	NAME_CACHE_LOCK_SHARED();
805
806	/*
807	* if the vnode we have in hand isn't a directory and it
808	* has a v_parent, then we started with the resource fork
809	* so skip up to avoid getting a duplicate copy of the
810	* file name in the path.
811	*/
812	if (vp && !vnode_isdir(vp) && vp->v_parent) {
813	vp = vp->v_parent;
814	}
815	}
816
817	if (vp && (flags & BUILDPATH_CHECKACCESS)) {
818	vid = vp->v_id;
819
820	vnode_hold(vp);
821	NAME_CACHE_UNLOCK();
822
823	if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
824	if (vp_with_iocount) {
825	vnode_put(vp: vp_with_iocount);
826	vp_with_iocount = NULLVP;
827	}
828	if (vnode_getwithvid(vp, vid)) {
829	vnode_drop(vp);
830	goto again;
831	}
832	vp_with_iocount = vp;
833	}
834	vnode_drop(vp);
835
836	if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx))) {
837	goto out; / no peeking /
838	}
839	NAME_CACHE_LOCK_SHARED();
840	}
841
842	/*
843	* When a mount point is crossed switch the vp.
844	* Continue until we find the root or we find
845	* a vnode that's not the root of a mounted
846	* file system.
847	*/
848	tvp = vp;
849
850	while (tvp) {
851	if (tvp == proc_root_dir_vp) {
852	goto out_unlock; / encountered the root /
853	}
854
855	#if CONFIG_FIRMLINKS
856	if (!(flags & BUILDPATH_NO_FIRMLINK) &&
857	(tvp->v_flag & VFMLINKTARGET) && tvp->v_fmlink && (tvp->v_fmlink->v_type == VDIR)) {
858	tvp = tvp->v_fmlink;
859	break;
860	}
861	#endif
862
863	if (!(tvp->v_flag & VROOT) \|\| !tvp->v_mount) {
864	break; / not the root of a mounted FS /
865	}
866	if (flags & BUILDPATH_VOLUME_RELATIVE) {
867	/ Do not cross over mount points /
868	tvp = NULL;
869	} else {
870	tvp = tvp->v_mount->mnt_vnodecovered;
871	if (!mntpt_end && tvp) {
872	mntpt_end = end;
873	}
874	}
875	}
876	if (tvp == NULLVP) {
877	goto out_unlock;
878	}
879	vp = tvp;
880	}
881	out_unlock:
882	NAME_CACHE_UNLOCK();
883	out:
884	if (vp_with_iocount) {
885	vnode_put(vp: vp_with_iocount);
886	}
887	/*
888	* Slide the name down to the beginning of the buffer.
889	*/
890	memmove(dst: buff, src: end, n: &buff[buflen] - end);
891
892	/*
893	* length includes the trailing zero byte
894	*/
895	outlen = (int*)(&buff[buflen] - end);
896	if (mntpt_outlen && mntpt_end) {
897	mntpt_outlen = (size_t)outlen - (size_t)(&buff[buflen] - mntpt_end);
898	}
899
900	/ One of the parents was moved during path reconstruction.*
901	* The caller is interested in knowing whether any of the
902	* parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
903	*/
904	if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
905	ret = EAGAIN;
906	}
907
908	return ret;
909	}
910
911	int
912	build_path(vnode_t first_vp, char buff, int* buflen, int outlen, int* flags, vfs_context_t ctx)
913	{
914	return build_path_with_parent(first_vp, NULL, buff, buflen, outlen, NULL, flags, ctx);
915	}
916
917	/*
918	* Combined version of vnode_getparent() and vnode_getname() to acquire both vnode name and parent
919	* without releasing the name cache lock in interim.
920	*/
921	void
922	vnode_getparent_and_name(vnode_t vp, vnode_t out_pvp, const* char **out_name)
923	{
924	vnode_t pvp = NULLVP;
925	int locked = `0`;
926	int pvid;
927
928	NAME_CACHE_LOCK_SHARED();
929	locked = `1`;
930
931	if (out_name) {
932	const char *name = NULL;
933	if (vp->v_name) {
934	name = vfs_addname(name: vp->v_name, len: (unsigned int)strlen(s: vp->v_name), nc_hash: `0`, flags: `0`);
935	}
936	*out_name = name;
937	}
938
939	if (!out_pvp) {
940	goto out;
941	}
942
943	pvp = vp->v_parent;
944
945	/*
946	* v_parent is stable behind the name_cache lock
947	* however, the only thing we can really guarantee
948	* is that we've grabbed a valid iocount on the
949	* parent of 'vp' at the time we took the name_cache lock...
950	* once we drop the lock, vp could get re-parented
951	*/
952	if (pvp != NULLVP) {
953	pvid = pvp->v_id;
954
955	vnode_hold(vp: pvp);
956	NAME_CACHE_UNLOCK();
957	locked = `0`;
958
959	if (vnode_getwithvid(pvp, pvid) != `0`) {
960	vnode_drop(vp: pvp);
961	pvp = NULL;
962	} else {
963	vnode_drop(vp: pvp);
964	}
965	}
966	*out_pvp = pvp;
967
968	out:
969	if (locked) {
970	NAME_CACHE_UNLOCK();
971	}
972	}
973
974	/*
975	* return NULLVP if vp's parent doesn't
976	* exist, or we can't get a valid iocount
977	* else return the parent of vp
978	*/
979	vnode_t
980	vnode_getparent(vnode_t vp)
981	{
982	vnode_t pvp = NULLVP;
983	vnode_getparent_and_name(vp, out_pvp: &pvp, NULL);
984
985	return pvp;
986	}
987
988	/*
989	* Similar to vnode_getparent() but only returned parent vnode (with iocount
990	* held) if the actual parent vnode is different than the given 'pvp'.
991	*/
992	__private_extern__ vnode_t
993	vnode_getparent_if_different(vnode_t vp, vnode_t pvp)
994	{
995	vnode_t real_pvp = NULLVP;
996	int pvid;
997
998	if (vp->v_parent == pvp) {
999	goto out;
1000	}
1001
1002	NAME_CACHE_LOCK_SHARED();
1003
1004	real_pvp = vp->v_parent;
1005	if (real_pvp == NULLVP) {
1006	NAME_CACHE_UNLOCK();
1007	goto out;
1008	}
1009
1010	/*
1011	* Do the check again after namecache lock is acquired as the parent vnode
1012	* could have changed.
1013	*/
1014	if (real_pvp != pvp) {
1015	pvid = real_pvp->v_id;
1016
1017	vnode_hold(vp: real_pvp);
1018	NAME_CACHE_UNLOCK();
1019
1020	if (vnode_getwithvid(real_pvp, pvid) != `0`) {
1021	vnode_drop(vp: real_pvp);
1022	real_pvp = NULLVP;
1023	} else {
1024	vnode_drop(vp: real_pvp);
1025	}
1026	} else {
1027	real_pvp = NULLVP;
1028	NAME_CACHE_UNLOCK();
1029	}
1030
1031	out:
1032	return real_pvp;
1033	}
1034
1035	const char *
1036	vnode_getname(vnode_t vp)
1037	{
1038	const char *name = NULL;
1039	vnode_getparent_and_name(vp, NULL, out_name: &name);
1040
1041	return name;
1042	}
1043
1044	void
1045	vnode_putname(const char *name)
1046	{
1047	vfs_removename(name);
1048	}
1049
1050	static const char unknown_vnodename[] = "(unknown vnode name)";
1051
1052	const char *
1053	vnode_getname_printable(vnode_t vp)
1054	{
1055	const char *name = vnode_getname(vp);
1056	if (name != NULL) {
1057	return name;
1058	}
1059
1060	switch (vp->v_type) {
1061	case VCHR:
1062	case VBLK:
1063	{
1064	/*
1065	* Create an artificial dev name from
1066	* major and minor device number
1067	*/
1068	char dev_name[`64`];
1069	(void) snprintf(dev_name, count: sizeof(dev_name),
1070	"%c(%u, %u)", VCHR == vp->v_type ? `'c'`:`'b'`,
1071	major(vp->v_rdev), minor(vp->v_rdev));
1072	/*
1073	* Add the newly created dev name to the name
1074	* cache to allow easier cleanup. Also,
1075	* vfs_addname allocates memory for the new name
1076	* and returns it.
1077	*/
1078	NAME_CACHE_LOCK_SHARED();
1079	name = vfs_addname(name: dev_name, len: (unsigned int)strlen(s: dev_name), nc_hash: `0`, flags: `0`);
1080	NAME_CACHE_UNLOCK();
1081	return name;
1082	}
1083	default:
1084	return unknown_vnodename;
1085	}
1086	}
1087
1088	void
1089	vnode_putname_printable(const char *name)
1090	{
1091	if (name == unknown_vnodename) {
1092	return;
1093	}
1094	vnode_putname(name);
1095	}
1096
1097
1098	/*
1099	* if VNODE_UPDATE_PARENT, and we can take
1100	* a reference on dvp, then update vp with
1101	* it's new parent... if vp already has a parent,
1102	* then drop the reference vp held on it
1103	*
1104	* if VNODE_UPDATE_NAME,
1105	* then drop string ref on v_name if it exists, and if name is non-NULL
1106	* then pick up a string reference on name and record it in v_name...
1107	* optionally pass in the length and hashval of name if known
1108	*
1109	* if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
1110	*/
1111	void
1112	vnode_update_identity(vnode_t vp, vnode_t dvp, const char name, int* name_len, uint32_t name_hashval, int flags)
1113	{
1114	struct namecache *ncp;
1115	vnode_t old_parentvp = NULLVP;
1116	int isstream = (vp->v_flag & VISNAMEDSTREAM);
1117	int kusecountbumped = `0`;
1118	kauth_cred_t tcred = NULL;
1119	const char *vname = NULL;
1120	const char *tname = NULL;
1121
1122	if (name_len < `0`) {
1123	return;
1124	}
1125
1126	if (flags & VNODE_UPDATE_PARENT) {
1127	if (dvp && vnode_ref(vp: dvp) != `0`) {
1128	dvp = NULLVP;
1129	}
1130	/ Don't count a stream's parent ref during unmounts /
1131	if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
1132	vnode_lock_spin(dvp);
1133	++dvp->v_kusecount;
1134	kusecountbumped = `1`;
1135	vnode_unlock(dvp);
1136	}
1137	} else {
1138	dvp = NULLVP;
1139	}
1140	if ((flags & VNODE_UPDATE_NAME)) {
1141	if (name != vp->v_name) {
1142	if (name && *name) {
1143	if (name_len == `0`) {
1144	name_len = (int)strlen(s: name);
1145	}
1146	tname = vfs_addname(name, len: name_len, nc_hash: name_hashval, flags: `0`);
1147	}
1148	} else {
1149	flags &= ~VNODE_UPDATE_NAME;
1150	}
1151	}
1152	if ((flags & (VNODE_UPDATE_PURGE \| VNODE_UPDATE_PARENT \| VNODE_UPDATE_CACHE \| VNODE_UPDATE_NAME \| VNODE_UPDATE_PURGEFIRMLINK))) {
1153	NAME_CACHE_LOCK();
1154
1155	#if CONFIG_FIRMLINKS
1156	if (flags & VNODE_UPDATE_PURGEFIRMLINK) {
1157	vnode_t old_fvp = vp->v_fmlink;
1158	if (old_fvp) {
1159	vnode_lock_spin(vp);
1160	vp->v_flag &= ~VFMLINKTARGET;
1161	vp->v_fmlink = NULLVP;
1162	vnode_unlock(vp);
1163	NAME_CACHE_UNLOCK();
1164
1165	/*
1166	* vnode_rele can result in cascading series of
1167	* usecount releases. The combination of calling
1168	* vnode_recycle and dont_reenter (3rd arg to
1169	* vnode_rele_internal) ensures we don't have
1170	* that issue.
1171	*/
1172	vnode_recycle(vp: old_fvp);
1173	vnode_rele_internal(old_fvp, O_EVTONLY, `1`, `0`);
1174
1175	NAME_CACHE_LOCK();
1176	}
1177	}
1178	#endif
1179
1180	if ((flags & VNODE_UPDATE_PURGE)) {
1181	if (vp->v_parent) {
1182	vp->v_parent->v_nc_generation++;
1183	}
1184
1185	while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
1186	cache_delete(ncp, `1`);
1187	}
1188
1189	while ((ncp = TAILQ_FIRST(&vp->v_ncchildren))) {
1190	cache_delete(ncp, `1`);
1191	}
1192
1193	/*
1194	* Use a temp variable to avoid kauth_cred_drop() while NAME_CACHE_LOCK is held
1195	*/
1196	tcred = vnode_cred(vp);
1197	vp->v_cred = NOCRED;
1198	vp->v_authorized_actions = `0`;
1199	vp->v_cred_timestamp = `0`;
1200	}
1201	if ((flags & VNODE_UPDATE_NAME)) {
1202	vname = vp->v_name;
1203	vp->v_name = tname;
1204	}
1205	if (flags & VNODE_UPDATE_PARENT) {
1206	if (dvp != vp && dvp != vp->v_parent) {
1207	old_parentvp = vp->v_parent;
1208	vp->v_parent = dvp;
1209	dvp = NULLVP;
1210
1211	if (old_parentvp) {
1212	flags \|= VNODE_UPDATE_CACHE;
1213	}
1214	}
1215	}
1216	if (flags & VNODE_UPDATE_CACHE) {
1217	while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
1218	cache_delete(ncp, `1`);
1219	}
1220	}
1221	NAME_CACHE_UNLOCK();
1222
1223	if (vname != NULL) {
1224	vfs_removename(name: vname);
1225	}
1226
1227	if (IS_VALID_CRED(tcred)) {
1228	kauth_cred_unref(&tcred);
1229	}
1230	}
1231	if (dvp != NULLVP) {
1232	/ Back-out the ref we took if we lost a race for vp->v_parent. /
1233	if (kusecountbumped) {
1234	vnode_lock_spin(dvp);
1235	if (dvp->v_kusecount > `0`) {
1236	--dvp->v_kusecount;
1237	}
1238	vnode_unlock(dvp);
1239	}
1240	vnode_rele(vp: dvp);
1241	}
1242	if (old_parentvp) {
1243	struct uthread *ut;
1244	vnode_t vreclaims = NULLVP;
1245
1246	if (isstream) {
1247	vnode_lock_spin(old_parentvp);
1248	if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > `0`)) {
1249	--old_parentvp->v_kusecount;
1250	}
1251	vnode_unlock(old_parentvp);
1252	}
1253	ut = current_uthread();
1254
1255	/*
1256	* indicated to vnode_rele that it shouldn't do a
1257	* vnode_reclaim at this time... instead it will
1258	* chain the vnode to the uu_vreclaims list...
1259	* we'll be responsible for calling vnode_reclaim
1260	* on each of the vnodes in this list...
1261	*/
1262	ut->uu_defer_reclaims = `1`;
1263	ut->uu_vreclaims = NULLVP;
1264
1265	while ((vp = old_parentvp) != NULLVP) {
1266	vnode_hold(vp);
1267	vnode_lock_spin(vp);
1268	vnode_rele_internal(vp, `0`, `0`, `1`);
1269
1270	/*
1271	* check to see if the vnode is now in the state
1272	* that would have triggered a vnode_reclaim in vnode_rele
1273	* if it is, we save it's parent pointer and then NULL
1274	* out the v_parent field... we'll drop the reference
1275	* that was held on the next iteration of this loop...
1276	* this short circuits a potential deep recursion if we
1277	* have a long chain of parents in this state...
1278	* we'll sit in this loop until we run into
1279	* a parent in this chain that is not in this state
1280	*
1281	* make our check and the vnode_rele atomic
1282	* with respect to the current vnode we're working on
1283	* by holding the vnode lock
1284	* if vnode_rele deferred the vnode_reclaim and has put
1285	* this vnode on the list to be reaped by us, than
1286	* it has left this vnode with an iocount == 1
1287	*/
1288	if (ut->uu_vreclaims == vp) {
1289	/*
1290	* This vnode is on the head of the uu_vreclaims chain
1291	* which means vnode_rele wanted to do a vnode_reclaim
1292	* on this vnode. Pull the parent pointer now so that when we do the
1293	* vnode_reclaim for each of the vnodes in the uu_vreclaims
1294	* list, we won't recurse back through here
1295	*
1296	* need to do a convert here in case vnode_rele_internal
1297	* returns with the lock held in the spin mode... it
1298	* can drop and retake the lock under certain circumstances
1299	*/
1300	vnode_lock_convert(vp);
1301
1302	NAME_CACHE_LOCK();
1303	old_parentvp = vp->v_parent;
1304	vp->v_parent = NULLVP;
1305	NAME_CACHE_UNLOCK();
1306	} else {
1307	/*
1308	* we're done... we ran into a vnode that isn't
1309	* being terminated
1310	*/
1311	old_parentvp = NULLVP;
1312	}
1313	vnode_drop_and_unlock(vp);
1314	}
1315	vreclaims = ut->uu_vreclaims;
1316	ut->uu_vreclaims = NULLVP;
1317	ut->uu_defer_reclaims = `0`;
1318
1319	while ((vp = vreclaims) != NULLVP) {
1320	vreclaims = vp->v_defer_reclaimlist;
1321
1322	/*
1323	* vnode_put will drive the vnode_reclaim if
1324	* we are still the only reference on this vnode
1325	*/
1326	vnode_put(vp);
1327	}
1328	}
1329	}
1330
1331	#if CONFIG_FIRMLINKS
1332	errno_t
1333	vnode_setasfirmlink(vnode_t vp, vnode_t target_vp)
1334	{
1335	int error = `0`;
1336	vnode_t old_target_vp = NULLVP;
1337	vnode_t old_target_vp_v_fmlink = NULLVP;
1338	kauth_cred_t target_vp_cred = NULL;
1339	kauth_cred_t old_target_vp_cred = NULL;
1340
1341	if (!vp) {
1342	return EINVAL;
1343	}
1344
1345	if (target_vp) {
1346	if (vp->v_fmlink == target_vp) { / Will be checked again under the name cache lock /
1347	return `0`;
1348	}
1349
1350	/*
1351	* Firmlink source and target will take both a usecount
1352	* and kusecount on each other.
1353	*/
1354	if ((error = vnode_ref_ext(target_vp, O_EVTONLY, VNODE_REF_FORCE))) {
1355	return error;
1356	}
1357
1358	if ((error = vnode_ref_ext(vp, O_EVTONLY, VNODE_REF_FORCE))) {
1359	vnode_rele_ext(target_vp, O_EVTONLY, `1`);
1360	return error;
1361	}
1362	}
1363
1364	NAME_CACHE_LOCK();
1365
1366	old_target_vp = vp->v_fmlink;
1367	if (target_vp && (target_vp == old_target_vp)) {
1368	NAME_CACHE_UNLOCK();
1369	return `0`;
1370	}
1371	vp->v_fmlink = target_vp;
1372
1373	vnode_lock_spin(vp);
1374	vp->v_flag &= ~VFMLINKTARGET;
1375	vnode_unlock(vp);
1376
1377	if (target_vp) {
1378	target_vp->v_fmlink = vp;
1379	vnode_lock_spin(target_vp);
1380	target_vp->v_flag \|= VFMLINKTARGET;
1381	vnode_unlock(target_vp);
1382	cache_purge_locked(vp, credp: &target_vp_cred);
1383	}
1384
1385	if (old_target_vp) {
1386	old_target_vp_v_fmlink = old_target_vp->v_fmlink;
1387	old_target_vp->v_fmlink = NULLVP;
1388	vnode_lock_spin(old_target_vp);
1389	old_target_vp->v_flag &= ~VFMLINKTARGET;
1390	vnode_unlock(old_target_vp);
1391	cache_purge_locked(vp, credp: &old_target_vp_cred);
1392	}
1393
1394	NAME_CACHE_UNLOCK();
1395
1396	if (IS_VALID_CRED(target_vp_cred)) {
1397	kauth_cred_unref(&target_vp_cred);
1398	}
1399
1400	if (old_target_vp) {
1401	if (IS_VALID_CRED(old_target_vp_cred)) {
1402	kauth_cred_unref(&old_target_vp_cred);
1403	}
1404
1405	vnode_rele_ext(old_target_vp, O_EVTONLY, `1`);
1406	if (old_target_vp_v_fmlink) {
1407	vnode_rele_ext(old_target_vp_v_fmlink, O_EVTONLY, `1`);
1408	}
1409	}
1410
1411	return `0`;
1412	}
1413
1414	errno_t
1415	vnode_getfirmlink(vnode_t vp, vnode_t *target_vp)
1416	{
1417	int error;
1418
1419	if (!vp->v_fmlink) {
1420	return ENODEV;
1421	}
1422
1423	NAME_CACHE_LOCK_SHARED();
1424	if (vp->v_fmlink && !(vp->v_flag & VFMLINKTARGET) &&
1425	(vnode_get(vp->v_fmlink) == `0`)) {
1426	vnode_t tvp = vp->v_fmlink;
1427
1428	vnode_lock_spin(tvp);
1429	if (tvp->v_lflag & (VL_TERMINATE \| VL_DEAD)) {
1430	vnode_unlock(tvp);
1431	NAME_CACHE_UNLOCK();
1432	vnode_put(vp: tvp);
1433	return ENOENT;
1434	}
1435	if (!(tvp->v_flag & VFMLINKTARGET)) {
1436	panic("firmlink target for vnode %p does not have flag set", vp);
1437	}
1438	vnode_unlock(tvp);
1439	*target_vp = tvp;
1440	error = `0`;
1441	} else {
1442	*target_vp = NULLVP;
1443	error = ENODEV;
1444	}
1445	NAME_CACHE_UNLOCK();
1446	return error;
1447	}
1448
1449	#else /* CONFIG_FIRMLINKS */
1450
1451	errno_t
1452	vnode_setasfirmlink(__unused vnode_t vp, __unused vnode_t src_vp)
1453	{
1454	return ENOTSUP;
1455	}
1456
1457	errno_t
1458	vnode_getfirmlink(__unused vnode_t vp, __unused vnode_t *target_vp)
1459	{
1460	return ENOTSUP;
1461	}
1462
1463	#endif
1464
1465	/*
1466	* Mark a vnode as having multiple hard links. HFS makes use of this
1467	* because it keeps track of each link separately, and wants to know
1468	* which link was actually used.
1469	*
1470	* This will cause the name cache to force a VNOP_LOOKUP on the vnode
1471	* so that HFS can post-process the lookup. Also, volfs will call
1472	* VNOP_GETATTR2 to determine the parent, instead of using v_parent.
1473	*/
1474	void
1475	vnode_setmultipath(vnode_t vp)
1476	{
1477	vnode_lock_spin(vp);
1478
1479	/*
1480	* In theory, we're changing the vnode's identity as far as the
1481	* name cache is concerned, so we ought to grab the name cache lock
1482	* here. However, there is already a race, and grabbing the name
1483	* cache lock only makes the race window slightly smaller.
1484	*
1485	* The race happens because the vnode already exists in the name
1486	* cache, and could be found by one thread before another thread
1487	* can set the hard link flag.
1488	*/
1489
1490	vp->v_flag \|= VISHARDLINK;
1491
1492	vnode_unlock(vp);
1493	}
1494
1495
1496
1497	/*
1498	* backwards compatibility
1499	*/
1500	void
1501	vnode_uncache_credentials(vnode_t vp)
1502	{
1503	vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
1504	}
1505
1506
1507	/*
1508	* use the exclusive form of NAME_CACHE_LOCK to protect the update of the
1509	* following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
1510	* we use this lock so that we can look at the v_cred and v_authorized_actions
1511	* atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
1512	* which is the super-hot path... if we are updating the authorized actions for this
1513	* vnode, we are already in the super-slow and far less frequented path so its not
1514	* that bad that we take the lock exclusive for this case... of course we strive
1515	* to hold it for the minimum amount of time possible
1516	*/
1517
1518	void
1519	vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
1520	{
1521	kauth_cred_t tcred = NOCRED;
1522
1523	NAME_CACHE_LOCK();
1524
1525	vp->v_authorized_actions &= ~action;
1526
1527	if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
1528	IS_VALID_CRED(vp->v_cred)) {
1529	/*
1530	* Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1531	*/
1532	tcred = vnode_cred(vp);
1533	vp->v_cred = NOCRED;
1534	}
1535	NAME_CACHE_UNLOCK();
1536
1537	if (IS_VALID_CRED(tcred)) {
1538	kauth_cred_unref(&tcred);
1539	}
1540	}
1541
1542
1543	/ disable vnode_cache_is_authorized() by setting vnode_cache_defeat /
1544	static TUNABLE(int, bootarg_vnode_cache_defeat, "-vnode_cache_defeat", `0`);
1545
1546	boolean_t
1547	vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1548	{
1549	kauth_cred_t ucred;
1550	boolean_t retval = FALSE;
1551
1552	/ Boot argument to defeat rights caching /
1553	if (bootarg_vnode_cache_defeat) {
1554	return FALSE;
1555	}
1556
1557	if ((vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE \| MNTK_AUTH_CACHE_TTL))) {
1558	/*
1559	* a TTL is enabled on the rights cache... handle it here
1560	* a TTL of 0 indicates that no rights should be cached
1561	*/
1562	if (vp->v_mount->mnt_authcache_ttl) {
1563	if (!(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL)) {
1564	/*
1565	* For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
1566	* we will only allow a SEARCH right on a directory to be cached...
1567	* that cached right always has a default TTL associated with it
1568	*/
1569	if (action != KAUTH_VNODE_SEARCH \|\| vp->v_type != VDIR) {
1570	vp = NULLVP;
1571	}
1572	}
1573	if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
1574	vnode_uncache_authorized_action(vp, action: vp->v_authorized_actions);
1575	vp = NULLVP;
1576	}
1577	} else {
1578	vp = NULLVP;
1579	}
1580	}
1581	if (vp != NULLVP) {
1582	ucred = vfs_context_ucred(ctx);
1583
1584	NAME_CACHE_LOCK_SHARED();
1585
1586	if (vnode_cred(vp) == ucred && (vp->v_authorized_actions & action) == action) {
1587	retval = TRUE;
1588	}
1589
1590	NAME_CACHE_UNLOCK();
1591	}
1592	return retval;
1593	}
1594
1595
1596	void
1597	vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1598	{
1599	kauth_cred_t tcred = NOCRED;
1600	kauth_cred_t ucred;
1601	struct timeval tv;
1602	boolean_t ttl_active = FALSE;
1603
1604	ucred = vfs_context_ucred(ctx);
1605
1606	if (!IS_VALID_CRED(ucred) \|\| action == `0`) {
1607	return;
1608	}
1609
1610	if ((vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE \| MNTK_AUTH_CACHE_TTL))) {
1611	/*
1612	* a TTL is enabled on the rights cache... handle it here
1613	* a TTL of 0 indicates that no rights should be cached
1614	*/
1615	if (vp->v_mount->mnt_authcache_ttl == `0`) {
1616	return;
1617	}
1618
1619	if (!(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL)) {
1620	/*
1621	* only cache SEARCH action for filesystems marked
1622	* MNTK_AUTH_OPAQUE on VDIRs...
1623	* the lookup_path code will time these out
1624	*/
1625	if ((action & ~KAUTH_VNODE_SEARCH) \|\| vp->v_type != VDIR) {
1626	return;
1627	}
1628	}
1629	ttl_active = TRUE;
1630
1631	microuptime(tv: &tv);
1632	}
1633	NAME_CACHE_LOCK();
1634
1635	tcred = vnode_cred(vp);
1636	if (tcred == ucred) {
1637	tcred = NOCRED;
1638	} else {
1639	/*
1640	* Use a temp variable to avoid kauth_cred_drop() while NAME_CACHE_LOCK is held
1641	*/
1642	kauth_cred_ref(cred: ucred);
1643	vp->v_cred = ucred;
1644	vp->v_authorized_actions = `0`;
1645	}
1646	if (ttl_active == TRUE && vp->v_authorized_actions == `0`) {
1647	/*
1648	* only reset the timestamnp on the
1649	* first authorization cached after the previous
1650	* timer has expired or we're switching creds...
1651	* 'vnode_cache_is_authorized' will clear the
1652	* authorized actions if the TTL is active and
1653	* it has expired
1654	*/
1655	vp->v_cred_timestamp = (int)tv.tv_sec;
1656	}
1657	vp->v_authorized_actions \|= action;
1658
1659	NAME_CACHE_UNLOCK();
1660
1661	if (IS_VALID_CRED(tcred)) {
1662	kauth_cred_unref(&tcred);
1663	}
1664	}
1665
1666
1667	boolean_t
1668	vnode_cache_is_stale(vnode_t vp)
1669	{
1670	struct timeval tv;
1671	boolean_t retval;
1672
1673	microuptime(tv: &tv);
1674
1675	if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl) {
1676	retval = TRUE;
1677	} else {
1678	retval = FALSE;
1679	}
1680
1681	return retval;
1682	}
1683
1684	VFS_SMR_DECLARE;
1685
1686	/*
1687	* Components of nameidata (or objects it can point to) which may
1688	* need restoring in case fast path lookup fails.
1689	*/
1690	struct nameidata_state {
1691	u_long ni_loopcnt;
1692	char *ni_next;
1693	u_int ni_pathlen;
1694	int32_t ni_flag;
1695	char *cn_nameptr;
1696	int cn_namelen;
1697	int cn_flags;
1698	uint32_t cn_hash;
1699	};
1700
1701	static void
1702	save_ndp_state(struct nameidata ndp, struct* componentname cnp, struct* nameidata_state *saved_statep)
1703	{
1704	saved_statep->ni_loopcnt = ndp->ni_loopcnt;
1705	saved_statep->ni_next = ndp->ni_next;
1706	saved_statep->ni_pathlen = ndp->ni_pathlen;
1707	saved_statep->ni_flag = ndp->ni_flag;
1708	saved_statep->cn_nameptr = cnp->cn_nameptr;
1709	saved_statep->cn_namelen = cnp->cn_namelen;
1710	saved_statep->cn_flags = cnp->cn_flags;
1711	saved_statep->cn_hash = cnp->cn_hash;
1712	}
1713
1714	static void
1715	restore_ndp_state(struct nameidata ndp, struct* componentname cnp, struct* nameidata_state *saved_statep)
1716	{
1717	ndp->ni_loopcnt = saved_statep->ni_loopcnt;
1718	ndp->ni_next = saved_statep->ni_next;
1719	ndp->ni_pathlen = saved_statep->ni_pathlen;
1720	ndp->ni_flag = saved_statep->ni_flag;
1721	cnp->cn_nameptr = saved_statep->cn_nameptr;
1722	cnp->cn_namelen = saved_statep->cn_namelen;
1723	cnp->cn_flags = saved_statep->cn_flags;
1724	cnp->cn_hash = saved_statep->cn_hash;
1725	}
1726
1727	static inline bool
1728	vid_is_same(vnode_t vp, uint32_t vid)
1729	{
1730	return !(os_atomic_load(&vp->v_lflag, relaxed) & (VL_DRAIN \| VL_TERMINATE \| VL_DEAD)) && (vnode_vid(vp) == vid);
1731	}
1732
1733	static inline bool
1734	can_check_v_mountedhere(vnode_t vp)
1735	{
1736	return (os_atomic_load(&vp->v_usecount, relaxed) > `0`) &&
1737	(os_atomic_load(&vp->v_flag, relaxed) & VMOUNTEDHERE) &&
1738	!(os_atomic_load(&vp->v_lflag, relaxed) & (VL_TERMINATE \| VL_DEAD) &&
1739	(vp->v_type == VDIR));
1740	}
1741
1742	/*
1743	* Returns: 0 Success
1744	* ERECYCLE vnode was recycled from underneath us. Force lookup to be re-driven from namei.
1745	* This errno value should not be seen by anyone outside of the kernel.
1746	*/
1747	int
1748	cache_lookup_path(struct nameidata ndp, struct* componentname *cnp, vnode_t dp,
1749	vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
1750	{
1751	struct nameidata_state saved_state;
1752	char cp; /* pointer into pathname argument /
1753	uint32_t vid;
1754	uint32_t vvid = `0`; / protected by vp != NULLVP /
1755	vnode_t vp = NULLVP;
1756	vnode_t tdp = NULLVP;
1757	vnode_t start_dp = dp;
1758	kauth_cred_t ucred;
1759	boolean_t ttl_enabled = FALSE;
1760	struct timeval tv;
1761	mount_t mp;
1762	mount_t dmp;
1763	unsigned int hash;
1764	int error = `0`;
1765	boolean_t dotdotchecked = FALSE;
1766	bool locked = false;
1767	bool needs_lock = false;
1768	bool dp_iocount_taken = false;
1769
1770	#if CONFIG_TRIGGERS
1771	vnode_t trigger_vp;
1772	#endif /* CONFIG_TRIGGERS */
1773
1774	ucred = vfs_context_ucred(ctx);
1775	retry:
1776	if (nc_smr_enabled && !needs_lock) {
1777	save_ndp_state(ndp, cnp, saved_statep: &saved_state);
1778	vfs_smr_enter();
1779	} else {
1780	NAME_CACHE_LOCK_SHARED();
1781	locked = true;
1782	}
1783	ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1784
1785	dmp = dp->v_mount;
1786	vid = dp->v_id;
1787	if (dmp && (dmp->mnt_kern_flag & (MNTK_AUTH_OPAQUE \| MNTK_AUTH_CACHE_TTL))) {
1788	ttl_enabled = TRUE;
1789	microuptime(tv: &tv);
1790	}
1791	for (;;) {
1792	/*
1793	* Search a directory.
1794	*
1795	* The cn_hash value is for use by cache_lookup
1796	* The last component of the filename is left accessible via
1797	* cnp->cn_nameptr for callers that need the name.
1798	*/
1799	hash = `0`;
1800	cp = cnp->cn_nameptr;
1801
1802	while (cp && (cp != `'/'`)) {
1803	hash = crc32tab[((hash >> `24`) ^ (unsigned char)*cp++)] ^ hash << `8`;
1804	}
1805	/*
1806	* the crc generator can legitimately generate
1807	* a 0... however, 0 for us means that we
1808	* haven't computed a hash, so use 1 instead
1809	*/
1810	if (hash == `0`) {
1811	hash = `1`;
1812	}
1813	cnp->cn_hash = hash;
1814	cnp->cn_namelen = (int)(cp - cnp->cn_nameptr);
1815
1816	ndp->ni_pathlen -= cnp->cn_namelen;
1817	ndp->ni_next = cp;
1818
1819	/*
1820	* Replace multiple slashes by a single slash and trailing slashes
1821	* by a null. This must be done before VNOP_LOOKUP() because some
1822	* fs's don't know about trailing slashes. Remember if there were
1823	* trailing slashes to handle symlinks, existing non-directories
1824	* and non-existing files that won't be directories specially later.
1825	*/
1826	while (*cp == `'/'` && (cp[`1`] == `'/'` \|\| cp[`1`] == `'\0'`)) {
1827	cp++;
1828	ndp->ni_pathlen--;
1829
1830	if (*cp == `'\0'`) {
1831	ndp->ni_flag \|= NAMEI_TRAILINGSLASH;
1832	*ndp->ni_next = `'\0'`;
1833	}
1834	}
1835	ndp->ni_next = cp;
1836
1837	cnp->cn_flags &= ~(MAKEENTRY \| ISLASTCN \| ISDOTDOT);
1838
1839	if (*cp == `'\0'`) {
1840	cnp->cn_flags \|= ISLASTCN;
1841	}
1842
1843	if (cnp->cn_namelen == `2` && cnp->cn_nameptr[`1`] == `'.'` && cnp->cn_nameptr[`0`] == `'.'`) {
1844	cnp->cn_flags \|= ISDOTDOT;
1845	}
1846
1847	#if NAMEDRSRCFORK
1848	/*
1849	* Process a request for a file's resource fork.
1850	*
1851	* Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1852	*/
1853	if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1854	(cp[`1`] == `'.'` && cp[`2`] == `'.'`) &&
1855	bcmp(s1: cp, _PATH_RSRCFORKSPEC, n: sizeof(_PATH_RSRCFORKSPEC)) == `0`) {
1856	/ Skip volfs file systems that don't support native streams. /
1857	if ((dmp != NULL) &&
1858	(dmp->mnt_flag & MNT_DOVOLFS) &&
1859	(dmp->mnt_kern_flag & MNTK_NAMED_STREAMS) == `0`) {
1860	goto skiprsrcfork;
1861	}
1862	cnp->cn_flags \|= CN_WANTSRSRCFORK;
1863	cnp->cn_flags \|= ISLASTCN;
1864	ndp->ni_next[`0`] = `'\0'`;
1865	ndp->ni_pathlen = `1`;
1866	}
1867	skiprsrcfork:
1868	#endif
1869
1870	*dp_authorized = `0`;
1871
1872	#if CONFIG_FIRMLINKS
1873	if ((cnp->cn_flags & ISDOTDOT) && (dp->v_flag & VFMLINKTARGET) && dp->v_fmlink) {
1874	/*
1875	* If this is a firmlink target then dp has to be switched to the
1876	* firmlink "source" before exiting this loop.
1877	*
1878	* For a firmlink "target", the policy is to pick the parent of the
1879	* firmlink "source" as the parent. This means that you can never
1880	* get to the "real" parent of firmlink target via a dotdot lookup.
1881	*/
1882	vnode_t v_fmlink = dp->v_fmlink;
1883	uint32_t old_vid = vid;
1884	mp = dmp;
1885	if (v_fmlink) {
1886	vid = v_fmlink->v_id;
1887	dmp = v_fmlink->v_mount;
1888	if ((dp->v_fmlink == v_fmlink) && dmp) {
1889	dp = v_fmlink;
1890	} else {
1891	vid = old_vid;
1892	dmp = mp;
1893	}
1894	}
1895	}
1896	#endif
1897
1898
1899	if (ttl_enabled &&
1900	(dmp->mnt_authcache_ttl == `0` \|\|
1901	((tv.tv_sec - dp->v_cred_timestamp) > dmp->mnt_authcache_ttl))) {
1902	break;
1903	}
1904
1905	/*
1906	* NAME_CACHE_LOCK holds these fields stable
1907	*
1908	* We can't cache KAUTH_VNODE_SEARCHBYANYONE for root correctly
1909	* so we make an ugly check for root here. root is always
1910	* allowed and breaking out of here only to find out that is
1911	* authorized by virtue of being root is very very expensive.
1912	* However, the check for not root is valid only for filesystems
1913	* which use local authorization.
1914	*
1915	* XXX: Remove the check for root when we can reliably set
1916	* KAUTH_VNODE_SEARCHBYANYONE as root.
1917	*/
1918	int v_authorized_actions = os_atomic_load(&dp->v_authorized_actions, relaxed);
1919	if ((vnode_cred(dp) != ucred \|\| !(v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
1920	!(v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE) &&
1921	(ttl_enabled \|\| !vfs_context_issuser(ctx))) {
1922	break;
1923	}
1924
1925	/*
1926	* indicate that we're allowed to traverse this directory...
1927	* even if we fail the cache lookup or decide to bail for
1928	* some other reason, this information is valid and is used
1929	* to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1930	*/
1931	*dp_authorized = `1`;
1932
1933	if ((cnp->cn_flags & (ISLASTCN \| ISDOTDOT))) {
1934	if (cnp->cn_nameiop != LOOKUP) {
1935	break;
1936	}
1937	if (cnp->cn_flags & LOCKPARENT) {
1938	break;
1939	}
1940	if (cnp->cn_flags & NOCACHE) {
1941	break;
1942	}
1943
1944	if (cnp->cn_flags & ISDOTDOT) {
1945	/*
1946	* Force directory hardlinks to go to
1947	* file system for ".." requests.
1948	*/
1949	if ((dp->v_flag & VISHARDLINK)) {
1950	break;
1951	}
1952	/*
1953	* Quit here only if we can't use
1954	* the parent directory pointer or
1955	* don't have one. Otherwise, we'll
1956	* use it below.
1957	*/
1958	if ((dp->v_flag & VROOT) \|\|
1959	dp == ndp->ni_rootdir \|\|
1960	dp->v_parent == NULLVP) {
1961	break;
1962	}
1963	}
1964	}
1965
1966	if ((cnp->cn_flags & CN_SKIPNAMECACHE)) {
1967	/*
1968	* Force lookup to go to the filesystem with
1969	* all cnp fields set up.
1970	*/
1971	break;
1972	}
1973
1974	/*
1975	* "." and ".." aren't supposed to be cached, so check
1976	* for them before checking the cache.
1977	*/
1978	if (cnp->cn_namelen == `1` && cnp->cn_nameptr[`0`] == `'.'`) {
1979	vp = dp;
1980	vvid = vid;
1981	} else if ((cnp->cn_flags & ISDOTDOT)) {
1982	/*
1983	* If this is a chrooted process, we need to check if
1984	* the process is trying to break out of its chrooted
1985	* jail. We do that by trying to determine if dp is
1986	* a subdirectory of ndp->ni_rootdir. If we aren't
1987	* able to determine that by the v_parent pointers, we
1988	* will leave the fast path.
1989	*
1990	* Since this function may see dotdot components
1991	* many times and it has the name cache lock held for
1992	* the entire duration, we optimise this by doing this
1993	* check only once per cache_lookup_path call.
1994	* If dotdotchecked is set, it means we've done this
1995	* check once already and don't need to do it again.
1996	*/
1997	if (!locked && (ndp->ni_rootdir != rootvnode)) {
1998	vfs_smr_leave();
1999	needs_lock = true;
2000	goto prep_lock_retry;
2001	} else if (locked && !dotdotchecked && (ndp->ni_rootdir != rootvnode)) {
2002	vnode_t tvp = dp;
2003	boolean_t defer = FALSE;
2004	boolean_t is_subdir = FALSE;
2005
2006	defer = cache_check_vnode_issubdir(vp: tvp,
2007	dvp: ndp->ni_rootdir, is_subdir: &is_subdir, next_vp: &tvp);
2008
2009	if (defer) {
2010	/ defer to Filesystem /
2011	break;
2012	} else if (!is_subdir) {
2013	/*
2014	* This process is trying to break out
2015	* of its chrooted jail, so all its
2016	* dotdot accesses will be translated to
2017	* its root directory.
2018	*/
2019	vp = ndp->ni_rootdir;
2020	} else {
2021	/*
2022	* All good, let this dotdot access
2023	* proceed normally
2024	*/
2025	vp = dp->v_parent;
2026	}
2027	dotdotchecked = TRUE;
2028	} else {
2029	vp = dp->v_parent;
2030	}
2031	if (!vp) {
2032	break;
2033	}
2034	vvid = vp->v_id;
2035	} else {
2036	if (!locked) {
2037	vp = cache_lookup_smr(dvp: dp, cnp, vidp: &vvid);
2038	if (!vid_is_same(vp: dp, vid)) {
2039	vp = NULLVP;
2040	needs_lock = true;
2041	vfs_smr_leave();
2042	goto prep_lock_retry;
2043	}
2044	} else {
2045	vp = cache_lookup_locked(dvp: dp, cnp, vidp: &vvid);
2046	}
2047
2048
2049	if (!vp) {
2050	break;
2051	}
2052
2053	if ((vp->v_flag & VISHARDLINK)) {
2054	/*
2055	* The file system wants a VNOP_LOOKUP on this vnode
2056	*/
2057	vp = NULL;
2058	break;
2059	}
2060
2061	#if CONFIG_FIRMLINKS
2062	vnode_t v_fmlink = vp->v_fmlink;
2063	if (v_fmlink && !(vp->v_flag & VFMLINKTARGET)) {
2064	if (cnp->cn_flags & CN_FIRMLINK_NOFOLLOW \|\|
2065	((vp->v_type != VDIR) && (vp->v_type != VLNK))) {
2066	/ Leave it to the filesystem /
2067	vp = NULLVP;
2068	break;
2069	}
2070
2071	/*
2072	* Always switch to the target unless it is a VLNK
2073	* and it is the last component and we have NOFOLLOW
2074	* semantics
2075	*/
2076	if (vp->v_type == VDIR) {
2077	vp = v_fmlink;
2078	vvid = vnode_vid(vp);
2079	} else if ((cnp->cn_flags & FOLLOW) \|\|
2080	(ndp->ni_flag & NAMEI_TRAILINGSLASH) \|\| *ndp->ni_next == `'/'`) {
2081	if (ndp->ni_loopcnt >= MAXSYMLINKS - `1`) {
2082	vp = NULLVP;
2083	break;
2084	}
2085	ndp->ni_loopcnt++;
2086	vp = v_fmlink;
2087	vvid = vnode_vid(vp);
2088	}
2089	}
2090	#endif
2091	}
2092	if ((cnp->cn_flags & ISLASTCN)) {
2093	break;
2094	}
2095
2096	if (vp->v_type != VDIR) {
2097	if (vp->v_type != VLNK) {
2098	vp = NULL;
2099	}
2100	break;
2101	}
2102
2103	/*
2104	* v_mountedhere is PAC protected which means vp has to be a VDIR
2105	* to access that pointer as v_mountedhere. However, if we don't
2106	* have the name cache lock or an iocount (which we won't in the
2107	* !locked case) we can't guarantee that. So we try to detect it
2108	* via other fields to avoid having to dereference v_mountedhere
2109	* when we don't need to. Note that in theory if entire reclaim
2110	* happens between the time we check can_check_v_mountedhere()
2111	* and the subsequent access this will still fail but the fields
2112	* we check make that exceedingly unlikely and will result in
2113	* the chances of that happening being practically zero (but not
2114	* zero).
2115	*/
2116	if ((locked \|\| can_check_v_mountedhere(vp)) &&
2117	(mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == `0`)) {
2118	vnode_t tmp_vp;
2119	int tmp_vid;
2120
2121	if (!(locked \|\| vid_is_same(vp, vid: vvid))) {
2122	vp = NULL;
2123	break;
2124	}
2125	tmp_vp = mp->mnt_realrootvp;
2126	tmp_vid = mp->mnt_realrootvp_vid;
2127	if (tmp_vp == NULLVP \|\| mp->mnt_generation != mount_generation \|\|
2128	tmp_vid != tmp_vp->v_id) {
2129	break;
2130	}
2131
2132	if ((mp = tmp_vp->v_mount) == NULL) {
2133	break;
2134	}
2135
2136	vp = tmp_vp;
2137	vvid = tmp_vid;
2138	dmp = mp;
2139	if (dmp->mnt_kern_flag & (MNTK_AUTH_OPAQUE \| MNTK_AUTH_CACHE_TTL)) {
2140	ttl_enabled = TRUE;
2141	microuptime(tv: &tv);
2142	} else {
2143	ttl_enabled = FALSE;
2144	}
2145	}
2146
2147	#if CONFIG_TRIGGERS
2148	/*
2149	* After traversing all mountpoints stacked here, if we have a
2150	* trigger in hand, resolve it. Note that we don't need to
2151	* leave the fast path if the mount has already happened.
2152	*/
2153	if (vp->v_resolve) {
2154	break;
2155	}
2156	#endif /* CONFIG_TRIGGERS */
2157
2158	if (!(locked \|\| vid_is_same(vp, vid: vvid))) {
2159	vp = NULL;
2160	break;
2161	}
2162
2163	dp = vp;
2164	vid = vvid;
2165	vp = NULLVP;
2166	vvid = `0`;
2167
2168	cnp->cn_nameptr = ndp->ni_next + `1`;
2169	ndp->ni_pathlen--;
2170	while (*cnp->cn_nameptr == `'/'`) {
2171	cnp->cn_nameptr++;
2172	ndp->ni_pathlen--;
2173	}
2174	}
2175	if (!locked) {
2176	if (vp && !vnode_hold_smr(vp)) {
2177	vp = NULLVP;
2178	vvid = `0`;
2179	}
2180	if (!vnode_hold_smr(dp)) {
2181	vfs_smr_leave();
2182	if (vp) {
2183	vnode_drop(vp);
2184	vp = NULLVP;
2185	vvid = `0`;
2186	}
2187	goto prep_lock_retry;
2188	}
2189	vfs_smr_leave();
2190	} else {
2191	if (vp != NULLVP) {
2192	vvid = vp->v_id;
2193	vnode_hold(vp);
2194	}
2195	vid = dp->v_id;
2196
2197	vnode_hold(vp: dp);
2198	NAME_CACHE_UNLOCK();
2199	}
2200
2201	tdp = NULLVP;
2202	if (!(cnp->cn_flags & DONOTAUTH) &&
2203	(vp != NULLVP) && (vp->v_type != VLNK) &&
2204	((cnp->cn_flags & (ISLASTCN \| LOCKPARENT \| WANTPARENT \| SAVESTART)) == ISLASTCN)) {
2205	/*
2206	* if we've got a child and it's the last component, and
2207	* the lookup doesn't need to return the parent then we
2208	* can skip grabbing an iocount on the parent, since all
2209	* we're going to do with it is a vnode_put just before
2210	* we return from 'lookup'. If it's a symbolic link,
2211	* we need the parent in case the link happens to be
2212	* a relative pathname.
2213	*
2214	* However, we can't make this optimisation if we have to call
2215	* a MAC hook.
2216	*/
2217	tdp = dp;
2218	dp = NULLVP;
2219	} else {
2220	need_dp:
2221	/*
2222	* return the last directory we looked at
2223	* with an io reference held. If it was the one passed
2224	* in as a result of the last iteration of VNOP_LOOKUP,
2225	* it should already hold an io ref. No need to increase ref.
2226	*/
2227	if (last_dp != dp) {
2228	if (dp == ndp->ni_usedvp) {
2229	/*
2230	* if this vnode matches the one passed in via USEDVP
2231	* than this context already holds an io_count... just
2232	* use vnode_get to get an extra ref for lookup to play
2233	* with... can't use the getwithvid variant here because
2234	* it will block behind a vnode_drain which would result
2235	* in a deadlock (since we already own an io_count that the
2236	* vnode_drain is waiting on)... vnode_get grabs the io_count
2237	* immediately w/o waiting... it always succeeds
2238	*/
2239	vnode_get(dp);
2240	} else if ((error = vnode_getwithvid_drainok(dp, vid))) {
2241	/*
2242	* failure indicates the vnode
2243	* changed identity or is being
2244	* TERMINATED... in either case
2245	* punt this lookup.
2246	*
2247	* don't necessarily return ENOENT, though, because
2248	* we really want to go back to disk and make sure it's
2249	* there or not if someone else is changing this
2250	* vnode. That being said, the one case where we do want
2251	* to return ENOENT is when the vnode's mount point is
2252	* in the process of unmounting and we might cause a deadlock
2253	* in our attempt to take an iocount. An ENODEV error return
2254	* is from vnode_get* is an indication this but we change that
2255	* ENOENT for upper layers.
2256	*/
2257	if (error == ENODEV) {
2258	error = ENOENT;
2259	} else {
2260	error = ERECYCLE;
2261	}
2262	vnode_drop(vp: dp);
2263	if (vp) {
2264	vnode_drop(vp);
2265	}
2266	goto errorout;
2267	}
2268	dp_iocount_taken = true;
2269	}
2270	vnode_drop(vp: dp);
2271	}
2272
2273	#if CONFIG_MACF
2274	/*
2275	* Name cache provides authorization caching (see below)
2276	* that will short circuit MAC checks in lookup().
2277	* We must perform MAC check here. On denial
2278	* dp_authorized will remain 0 and second check will
2279	* be perfomed in lookup().
2280	*/
2281	if (!(cnp->cn_flags & DONOTAUTH)) {
2282	error = mac_vnode_check_lookup(ctx, dvp: dp, cnp);
2283	if (error) {
2284	*dp_authorized = `0`;
2285	if (dp_iocount_taken) {
2286	vnode_put(vp: dp);
2287	}
2288	if (vp) {
2289	vnode_drop(vp);
2290	vp = NULLVP;
2291	}
2292	goto errorout;
2293	}
2294	}
2295	#endif /* MAC */
2296
2297	if (vp != NULLVP) {
2298	if ((vnode_getwithvid_drainok(vp, vvid))) {
2299	vnode_drop(vp);
2300	vp = NULLVP;
2301
2302	/*
2303	* can't get reference on the vp we'd like
2304	* to return... if we didn't grab a reference
2305	* on the directory (due to fast path bypass),
2306	* then we need to do it now... we can't return
2307	* with both ni_dvp and ni_vp NULL, and no
2308	* error condition
2309	*/
2310	if (dp == NULLVP) {
2311	dp = tdp;
2312	tdp = NULLVP;
2313	goto need_dp;
2314	}
2315	} else {
2316	vnode_drop(vp);
2317	}
2318	if (dp_iocount_taken && vp && (vp->v_type != VLNK) &&
2319	((cnp->cn_flags & (ISLASTCN \| LOCKPARENT \| WANTPARENT \| SAVESTART)) == ISLASTCN)) {
2320	vnode_put(vp: dp);
2321	dp = NULLVP;
2322	}
2323	}
2324
2325	if (tdp) {
2326	vnode_drop(vp: tdp);
2327	tdp = NULLVP;
2328	}
2329
2330	ndp->ni_dvp = dp;
2331	ndp->ni_vp = vp;
2332
2333	#if CONFIG_TRIGGERS
2334	trigger_vp = vp ? vp : dp;
2335	if ((error == `0`) && (trigger_vp != NULLVP) && vnode_isdir(vp: trigger_vp)) {
2336	error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
2337	if (error) {
2338	if (vp) {
2339	vnode_put(vp);
2340	}
2341	if (dp) {
2342	vnode_put(vp: dp);
2343	}
2344	goto errorout;
2345	}
2346	}
2347	#endif /* CONFIG_TRIGGERS */
2348
2349	errorout:
2350	/*
2351	* If we came into cache_lookup_path after an iteration of the lookup loop that
2352	* resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
2353	* on it. It is now the job of cache_lookup_path to drop the ref on this vnode
2354	* when it is no longer needed. If we get to this point, and last_dp is not NULL
2355	* and it is ALSO not the dvp we want to return to caller of this function, it MUST be
2356	* the case that we got to a subsequent path component and this previous vnode is
2357	* no longer needed. We can then drop the io ref on it.
2358	*/
2359	if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)) {
2360	vnode_put(vp: last_dp);
2361	}
2362
2363	//initialized to 0, should be the same if no error cases occurred.
2364	return error;
2365
2366	prep_lock_retry:
2367	restore_ndp_state(ndp, cnp, saved_statep: &saved_state);
2368	dp = start_dp;
2369	goto retry;
2370	}
2371
2372
2373	static vnode_t
2374	cache_lookup_locked(vnode_t dvp, struct componentname cnp, uint32_t vidp)
2375	{
2376	struct namecache *ncp;
2377	long namelen = cnp->cn_namelen;
2378	unsigned int hashval = cnp->cn_hash;
2379
2380	if (nc_disabled) {
2381	return NULL;
2382	}
2383
2384	smrq_serialized_foreach(ncp, NCHHASH(dvp, cnp->cn_hash), nc_hash) {
2385	if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
2386	if (strncmp(s1: ncp->nc_name, s2: cnp->cn_nameptr, n: namelen) == `0` && ncp->nc_name[namelen] == `0`) {
2387	break;
2388	}
2389	}
2390	}
2391	if (ncp == `0`) {
2392	/*
2393	* We failed to find an entry
2394	*/
2395	NCHSTAT(ncs_miss);
2396	NC_SMR_STATS(clp_next_fail);
2397	return NULL;
2398	}
2399	NCHSTAT(ncs_goodhits);
2400
2401	if (!ncp->nc_vp) {
2402	return NULL;
2403	}
2404
2405	*vidp = ncp->nc_vid;
2406	NC_SMR_STATS(clp_next);
2407
2408	return ncp->nc_vp;
2409	}
2410
2411	static vnode_t
2412	cache_lookup_smr(vnode_t dvp, struct componentname cnp, uint32_t vidp)
2413	{
2414	struct namecache *ncp;
2415	long namelen = cnp->cn_namelen;
2416	unsigned int hashval = cnp->cn_hash;
2417	vnode_t vp = NULLVP;
2418	uint32_t vid = `0`;
2419	uint32_t counter = `1`;
2420
2421	if (nc_disabled) {
2422	return NULL;
2423	}
2424
2425	smrq_entered_foreach(ncp, NCHHASH(dvp, cnp->cn_hash), nc_hash) {
2426	counter = os_atomic_load(&ncp->nc_counter, acquire);
2427	if (!(counter & NC_VALID)) {
2428	ncp = NULL;
2429	goto out;
2430	}
2431	if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
2432	const char *nc_name =
2433	os_atomic_load(&ncp->nc_name, relaxed);
2434	if (nc_name &&
2435	strncmp(s1: nc_name, s2: cnp->cn_nameptr, n: namelen) == `0` &&
2436	nc_name[namelen] == `0`) {
2437	break;
2438	} else if (!nc_name) {
2439	ncp = NULL;
2440	goto out;
2441	}
2442	}
2443	}
2444
2445	/ We failed to find an entry /
2446	if (ncp == `0`) {
2447	goto out;
2448	}
2449
2450	vp = ncp->nc_vp;
2451	vid = ncp->nc_vid;
2452
2453	/*
2454	* The validity of vp and vid depends on the value of the counter being
2455	* the same when we read it first in the loop and now. Anything else
2456	* and we can't use this vp & vid.
2457	* Hopefully this ncp wasn't reused 2 billion times between the time
2458	* we read it first and when we the counter value again.
2459	*/
2460	if (os_atomic_load(&ncp->nc_counter, acquire) != counter) {
2461	vp = NULLVP;
2462	goto out;
2463	}
2464
2465	*vidp = vid;
2466	NC_SMR_STATS(clp_smr_next);
2467
2468	return vp;
2469
2470	out:
2471	NC_SMR_STATS(clp_smr_next_fail);
2472	return NULL;
2473	}
2474
2475
2476	unsigned int hash_string(const char cp, int* len);
2477	//
2478	// Have to take a len argument because we may only need to
2479	// hash part of a componentname.
2480	//
2481	unsigned int
2482	hash_string(const char cp, int* len)
2483	{
2484	unsigned hash = `0`;
2485
2486	if (len) {
2487	while (len--) {
2488	hash = crc32tab[((hash >> `24`) ^ (unsigned char)*cp++)] ^ hash << `8`;
2489	}
2490	} else {
2491	while (*cp != `'\0'`) {
2492	hash = crc32tab[((hash >> `24`) ^ (unsigned char)*cp++)] ^ hash << `8`;
2493	}
2494	}
2495	/*
2496	* the crc generator can legitimately generate
2497	* a 0... however, 0 for us means that we
2498	* haven't computed a hash, so use 1 instead
2499	*/
2500	if (hash == `0`) {
2501	hash = `1`;
2502	}
2503	return hash;
2504	}
2505
2506
2507	/*
2508	* Lookup an entry in the cache
2509	*
2510	* We don't do this if the segment name is long, simply so the cache
2511	* can avoid holding long names (which would either waste space, or
2512	* add greatly to the complexity).
2513	*
2514	* Lookup is called with dvp pointing to the directory to search,
2515	* cnp pointing to the name of the entry being sought. If the lookup
2516	* succeeds, the vnode is returned in *vpp, and a status of -1 is
2517	* returned. If the lookup determines that the name does not exist
2518	* (negative cacheing), a status of ENOENT is returned. If the lookup
2519	* fails, a status of zero is returned.
2520	*/
2521
2522	static int
2523	cache_lookup_fallback(struct vnode dvp, struct* vnode vpp, struct** componentname *cnp)
2524	{
2525	struct namecache *ncp;
2526	long namelen = cnp->cn_namelen;
2527	unsigned int hashval = cnp->cn_hash;
2528	boolean_t have_exclusive = FALSE;
2529	uint32_t vid;
2530	vnode_t vp;
2531
2532	NAME_CACHE_LOCK_SHARED();
2533
2534	relook:
2535	smrq_serialized_foreach(ncp, NCHHASH(dvp, cnp->cn_hash), nc_hash) {
2536	if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
2537	if (strncmp(s1: ncp->nc_name, s2: cnp->cn_nameptr, n: namelen) == `0` && ncp->nc_name[namelen] == `0`) {
2538	break;
2539	}
2540	}
2541	}
2542	/ We failed to find an entry /
2543	if (ncp == `0`) {
2544	NCHSTAT(ncs_miss);
2545	NAME_CACHE_UNLOCK();
2546	return `0`;
2547	}
2548
2549	/ We don't want to have an entry, so dump it /
2550	if ((cnp->cn_flags & MAKEENTRY) == `0`) {
2551	if (have_exclusive == TRUE) {
2552	NCHSTAT(ncs_badhits);
2553	cache_delete(ncp, `1`);
2554	NAME_CACHE_UNLOCK();
2555	return `0`;
2556	}
2557	if (!NAME_CACHE_LOCK_SHARED_TO_EXCLUSIVE()) {
2558	NAME_CACHE_LOCK();
2559	}
2560	have_exclusive = TRUE;
2561	goto relook;
2562	}
2563	vp = ncp->nc_vp;
2564
2565	/ We found a "positive" match, return the vnode /
2566	if (vp) {
2567	NCHSTAT(ncs_goodhits);
2568
2569	vid = ncp->nc_vid;
2570	vnode_hold(vp);
2571	NAME_CACHE_UNLOCK();
2572
2573	if (vnode_getwithvid(vp, vid)) {
2574	vnode_drop(vp);
2575	#if COLLECT_STATS
2576	NAME_CACHE_LOCK();
2577	NCHSTAT(ncs_badvid);
2578	NAME_CACHE_UNLOCK();
2579	#endif
2580	return `0`;
2581	}
2582	vnode_drop(vp);
2583	*vpp = vp;
2584	NC_SMR_STATS(cl_lock_hits);
2585	return -`1`;
2586	}
2587
2588	/ We found a negative match, and want to create it, so purge /
2589	if (cnp->cn_nameiop == CREATE \|\| cnp->cn_nameiop == RENAME) {
2590	if (have_exclusive == TRUE) {
2591	NCHSTAT(ncs_badhits);
2592	cache_delete(ncp, `1`);
2593	NAME_CACHE_UNLOCK();
2594	return `0`;
2595	}
2596	if (!NAME_CACHE_LOCK_SHARED_TO_EXCLUSIVE()) {
2597	NAME_CACHE_LOCK();
2598	}
2599	have_exclusive = TRUE;
2600	goto relook;
2601	}
2602
2603	/*
2604	* We found a "negative" match, ENOENT notifies client of this match.
2605	*/
2606	NCHSTAT(ncs_neghits);
2607
2608	NAME_CACHE_UNLOCK();
2609	return ENOENT;
2610	}
2611
2612
2613
2614	/*
2615	* Lookup an entry in the cache
2616	*
2617	* Lookup is called with dvp pointing to the directory to search,
2618	* cnp pointing to the name of the entry being sought. If the lookup
2619	* succeeds, the vnode is returned in *vpp, and a status of -1 is
2620	* returned. If the lookup determines that the name does not exist
2621	* (negative cacheing), a status of ENOENT is returned. If the lookup
2622	* fails, a status of zero is returned.
2623	*/
2624	int
2625	cache_lookup(struct vnode dvp, struct* vnode vpp, struct** componentname *cnp)
2626	{
2627	struct namecache *ncp;
2628	long namelen = cnp->cn_namelen;
2629	vnode_t vp;
2630	uint32_t vid = `0`;
2631	uint32_t counter = `1`;
2632	unsigned int hashval;
2633
2634	*vpp = NULLVP;
2635
2636	if (cnp->cn_hash == `0`) {
2637	cnp->cn_hash = hash_string(cp: cnp->cn_nameptr, len: cnp->cn_namelen);
2638	}
2639	hashval = cnp->cn_hash;
2640
2641	if (nc_disabled) {
2642	return `0`;
2643	}
2644
2645	if (!nc_smr_enabled) {
2646	goto out_fallback;
2647	}
2648
2649	/ We don't want to have an entry, so dump it /
2650	if ((cnp->cn_flags & MAKEENTRY) == `0`) {
2651	goto out_fallback;
2652	}
2653
2654	vfs_smr_enter();
2655
2656	smrq_entered_foreach(ncp, NCHHASH(dvp, cnp->cn_hash), nc_hash) {
2657	counter = os_atomic_load(&ncp->nc_counter, acquire);
2658	if (!(counter & NC_VALID)) {
2659	vfs_smr_leave();
2660	goto out_fallback;
2661	}
2662	if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
2663	const char *nc_name =
2664	os_atomic_load(&ncp->nc_name, relaxed);
2665	if (nc_name &&
2666	strncmp(s1: nc_name, s2: cnp->cn_nameptr, n: namelen) == `0` &&
2667	nc_name[namelen] == `0`) {
2668	break;
2669	} else if (!nc_name) {
2670	vfs_smr_leave();
2671	goto out_fallback;
2672	}
2673	}
2674	}
2675
2676	/ We failed to find an entry /
2677	if (ncp == `0`) {
2678	NCHSTAT(ncs_miss);
2679	vfs_smr_leave();
2680	NC_SMR_STATS(cl_smr_miss);
2681	return `0`;
2682	}
2683
2684	vp = ncp->nc_vp;
2685	vid = ncp->nc_vid;
2686
2687	/*
2688	* The validity of vp and vid depends on the value of the counter being
2689	* the same when we read it first in the loop and now. Anything else
2690	* and we can't use this vp & vid.
2691	* Hopefully this ncp wasn't reused 2 billion times between the time
2692	* we read it first and when we the counter value again.
2693	*/
2694	if (os_atomic_load(&ncp->nc_counter, acquire) != counter) {
2695	vfs_smr_leave();
2696	goto out_fallback;
2697	}
2698
2699	if (vp) {
2700	bool holdcount_acquired = vnode_hold_smr(vp);
2701
2702	vfs_smr_leave();
2703
2704	if (!holdcount_acquired) {
2705	goto out_fallback;
2706	}
2707
2708	if (vnode_getwithvid(vp, vid) != `0`) {
2709	vnode_drop(vp);
2710	goto out_fallback;
2711	}
2712	vnode_drop(vp);
2713	NCHSTAT(ncs_goodhits);
2714
2715	*vpp = vp;
2716	NC_SMR_STATS(cl_smr_hits);
2717	return -`1`;
2718	}
2719
2720	vfs_smr_leave();
2721
2722	/ We found a negative match, and want to create it, so purge /
2723	if (cnp->cn_nameiop == CREATE \|\| cnp->cn_nameiop == RENAME) {
2724	goto out_fallback;
2725	}
2726
2727	/*
2728	* We found a "negative" match, ENOENT notifies client of this match.
2729	*/
2730	NCHSTAT(ncs_neghits);
2731	NC_SMR_STATS(cl_smr_negative_hits);
2732	return ENOENT;
2733
2734	out_fallback:
2735	NC_SMR_STATS(cl_smr_fallback);
2736	return cache_lookup_fallback(dvp, vpp, cnp);
2737	}
2738
2739	const char *
2740	cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
2741	{
2742	const char *strname;
2743
2744	if (cnp->cn_hash == `0`) {
2745	cnp->cn_hash = hash_string(cp: cnp->cn_nameptr, len: cnp->cn_namelen);
2746	}
2747
2748	/*
2749	* grab 2 references on the string entered
2750	* one for the cache_enter_locked to consume
2751	* and the second to be consumed by v_name (vnode_create call point)
2752	*/
2753	strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, `0`);
2754
2755	NAME_CACHE_LOCK();
2756
2757	cache_enter_locked(dvp, vp, cnp, strname);
2758
2759	NAME_CACHE_UNLOCK();
2760
2761	return strname;
2762	}
2763
2764
2765	/*
2766	* Add an entry to the cache...
2767	* but first check to see if the directory
2768	* that this entry is to be associated with has
2769	* had any cache_purges applied since we took
2770	* our identity snapshot... this check needs to
2771	* be done behind the name cache lock
2772	*/
2773	void
2774	cache_enter_with_gen(struct vnode dvp, struct* vnode vp, struct* componentname cnp, int* gen)
2775	{
2776	if (cnp->cn_hash == `0`) {
2777	cnp->cn_hash = hash_string(cp: cnp->cn_nameptr, len: cnp->cn_namelen);
2778	}
2779
2780	NAME_CACHE_LOCK();
2781
2782	if (dvp->v_nc_generation == gen) {
2783	(void)cache_enter_locked(dvp, vp, cnp, NULL);
2784	}
2785
2786	NAME_CACHE_UNLOCK();
2787	}
2788
2789
2790	/*
2791	* Add an entry to the cache.
2792	*/
2793	void
2794	cache_enter(struct vnode dvp, struct* vnode vp, struct* componentname *cnp)
2795	{
2796	const char *strname;
2797
2798	if (cnp->cn_hash == `0`) {
2799	cnp->cn_hash = hash_string(cp: cnp->cn_nameptr, len: cnp->cn_namelen);
2800	}
2801
2802	/*
2803	* grab 1 reference on the string entered
2804	* for the cache_enter_locked to consume
2805	*/
2806	strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, `0`);
2807
2808	NAME_CACHE_LOCK();
2809
2810	cache_enter_locked(dvp, vp, cnp, strname);
2811
2812	NAME_CACHE_UNLOCK();
2813	}
2814
2815
2816	static void
2817	cache_enter_locked(struct vnode dvp, struct* vnode vp, struct* componentname cnp, const* char *strname)
2818	{
2819	struct namecache ncp, negp;
2820	struct smrq_list_head *ncpp;
2821
2822	if (nc_disabled) {
2823	return;
2824	}
2825
2826	/*
2827	* if the entry is for -ve caching vp is null
2828	*/
2829	if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
2830	/*
2831	* someone beat us to the punch..
2832	* this vnode is already in the cache
2833	*/
2834	if (strname != NULL) {
2835	vfs_removename(name: strname);
2836	}
2837	return;
2838	}
2839	/*
2840	* We allocate a new entry if we are less than the maximum
2841	* allowed and the one at the front of the list is in use.
2842	* Otherwise we use the one at the front of the list.
2843	*/
2844	if (numcache < desiredNodes &&
2845	((ncp = nchead.tqh_first) == NULL \|\|
2846	(ncp->nc_counter & NC_VALID))) {
2847	/*
2848	* Allocate one more entry
2849	*/
2850	if (nc_smr_enabled) {
2851	ncp = zalloc_smr(namecache_zone, Z_WAITOK_ZERO_NOFAIL);
2852	} else {
2853	ncp = zalloc(zone: namecache_zone);
2854	}
2855	ncp->nc_counter = `0`;
2856	numcache++;
2857	} else {
2858	/*
2859	* reuse an old entry
2860	*/
2861	ncp = TAILQ_FIRST(&nchead);
2862	TAILQ_REMOVE(&nchead, ncp, nc_entry);
2863
2864	if (ncp->nc_counter & NC_VALID) {
2865	/*
2866	* still in use... we need to
2867	* delete it before re-using it
2868	*/
2869	NCHSTAT(ncs_stolen);
2870	cache_delete(ncp, `0`);
2871	}
2872	}
2873	NCHSTAT(ncs_enters);
2874
2875	/*
2876	* Fill in cache info, if vp is NULL this is a "negative" cache entry.
2877	*/
2878	if (vp) {
2879	ncp->nc_vid = vnode_vid(vp);
2880	vnode_hold(vp);
2881	}
2882	ncp->nc_vp = vp;
2883	ncp->nc_dvp = dvp;
2884	ncp->nc_hashval = cnp->cn_hash;
2885
2886	if (strname == NULL) {
2887	ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, `0`);
2888	} else {
2889	ncp->nc_name = strname;
2890	}
2891
2892	//
2893	// If the bytes of the name associated with the vnode differ,
2894	// use the name associated with the vnode since the file system
2895	// may have set that explicitly in the case of a lookup on a
2896	// case-insensitive file system where the case of the looked up
2897	// name differs from what is on disk. For more details, see:
2898	// <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
2899	//
2900	const char *vn_name = vp ? vp->v_name : NULL;
2901	unsigned int len = vn_name ? (unsigned int)strlen(s: vn_name) : `0`;
2902	if (vn_name && ncp && ncp->nc_name && strncmp(s1: ncp->nc_name, s2: vn_name, n: len) != `0`) {
2903	unsigned int hash = hash_string(cp: vn_name, len);
2904
2905	vfs_removename(name: ncp->nc_name);
2906	ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, `0`);
2907	ncp->nc_hashval = hash;
2908	}
2909
2910	/*
2911	* make us the newest entry in the cache
2912	* i.e. we'll be the last to be stolen
2913	*/
2914	TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
2915
2916	ncpp = NCHHASH(dvp, cnp->cn_hash);
2917	#if DIAGNOSTIC
2918	{
2919	struct namecache *p;
2920
2921	smrq_serialized_foreach(p, ncpp, nc_hash) {
2922	if (p == ncp) {
2923	panic("cache_enter: duplicate");
2924	}
2925	}
2926	}
2927	#endif
2928	/*
2929	* make us available to be found via lookup
2930	*/
2931	smrq_serialized_insert_head(ncpp, &ncp->nc_hash);
2932
2933	if (vp) {
2934	/*
2935	* add to the list of name cache entries
2936	* that point at vp
2937	*/
2938	LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
2939	} else {
2940	/*
2941	* this is a negative cache entry (vp == NULL)
2942	* stick it on the negative cache list.
2943	*/
2944	TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
2945
2946	ncs_negtotal++;
2947
2948	if (ncs_negtotal > desiredNegNodes) {
2949	/*
2950	* if we've reached our desired limit
2951	* of negative cache entries, delete
2952	* the oldest
2953	*/
2954	negp = TAILQ_FIRST(&neghead);
2955	cache_delete(negp, `1`);
2956	}
2957	}
2958
2959	/*
2960	* add us to the list of name cache entries that
2961	* are children of dvp
2962	*/
2963	if (vp) {
2964	TAILQ_INSERT_TAIL(&dvp->v_ncchildren, ncp, nc_child);
2965	} else {
2966	TAILQ_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
2967	}
2968
2969	/*
2970	* nc_counter represents a sequence counter and 1 bit valid flag.
2971	* When the counter value is odd, it represents a valid and in use
2972	* namecache structure. We increment the value on every state transition
2973	* (invalid to valid (here) and valid to invalid (in cache delete).
2974	* Lockless readers have to read the value before reading other fields
2975	* and ensure that the field is valid and remains the same after the fields
2976	* have been read.
2977	*/
2978	uint32_t old_count = os_atomic_inc_orig(&ncp->nc_counter, release);
2979	if (old_count & NC_VALID) {
2980	/ This is a invalid to valid transition /
2981	panic("Incorrect state for old nc_counter(%d), should be even", old_count);
2982	}
2983	}
2984
2985
2986	/*
2987	* Initialize CRC-32 remainder table.
2988	*/
2989	static void
2990	init_crc32(void)
2991	{
2992	/*
2993	* the CRC-32 generator polynomial is:
2994	* x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
2995	* + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
2996	*/
2997	unsigned int crc32_polynomial = `0x04c11db7`;
2998	unsigned int i, j;
2999
3000	/*
3001	* pre-calculate the CRC-32 remainder for each possible octet encoding
3002	*/
3003	for (i = `0`; i < `256`; i++) {
3004	unsigned int crc_rem = i << `24`;
3005
3006	for (j = `0`; j < `8`; j++) {
3007	if (crc_rem & `0x80000000`) {
3008	crc_rem = (crc_rem << `1`) ^ crc32_polynomial;
3009	} else {
3010	crc_rem = (crc_rem << `1`);
3011	}
3012	}
3013	crc32tab[i] = crc_rem;
3014	}
3015	}
3016
3017
3018	/*
3019	* Name cache initialization, from vfs_init() when we are booting
3020	*/
3021	void
3022	nchinit(void)
3023	{
3024	desiredNegNodes = (desiredvnodes / `10`);
3025	desiredNodes = desiredvnodes + desiredNegNodes;
3026
3027	if (nc_smr_enabled) {
3028	zone_enable_smr(zone: namecache_zone, VFS_SMR(), free_cb: &namecache_smr_free);
3029	zone_enable_smr(zone: stringcache_zone, VFS_SMR(), free_cb: &string_smr_free);
3030	}
3031	TAILQ_INIT(&nchead);
3032	TAILQ_INIT(&neghead);
3033
3034	init_crc32();
3035
3036	nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (`2` * desiredNodes)), M_CACHE, hashmask: &nchash);
3037	nchashmask = nchash;
3038	nchash++;
3039
3040	init_string_table();
3041
3042	for (int i = `0`; i < NUM_STRCACHE_LOCKS; i++) {
3043	lck_mtx_init(lck: &strcache_mtx_locks[i], grp: &strcache_lck_grp, attr: &strcache_lck_attr);
3044	}
3045	}
3046
3047	void
3048	name_cache_lock_shared(void)
3049	{
3050	lck_rw_lock_shared(lck: &namecache_rw_lock);
3051	NC_SMR_STATS(nc_lock_shared);
3052	}
3053
3054	void
3055	name_cache_lock(void)
3056	{
3057	lck_rw_lock_exclusive(lck: &namecache_rw_lock);
3058	NC_SMR_STATS(nc_lock);
3059	}
3060
3061	boolean_t
3062	name_cache_lock_shared_to_exclusive(void)
3063	{
3064	return lck_rw_lock_shared_to_exclusive(lck: &namecache_rw_lock);
3065	}
3066
3067	void
3068	name_cache_unlock(void)
3069	{
3070	lck_rw_done(lck: &namecache_rw_lock);
3071	}
3072
3073
3074	int
3075	resize_namecache(int newsize)
3076	{
3077	struct smrq_list_head *new_table;
3078	struct smrq_list_head *old_table;
3079	struct smrq_list_head *old_head;
3080	struct namecache *entry;
3081	uint32_t i, hashval;
3082	int dNodes, dNegNodes, nelements;
3083	u_long new_size, old_size;
3084
3085	if (newsize < `0`) {
3086	return EINVAL;
3087	}
3088
3089	dNegNodes = (newsize / `10`);
3090	dNodes = newsize + dNegNodes;
3091	// we don't support shrinking yet
3092	if (dNodes <= desiredNodes) {
3093	return `0`;
3094	}
3095
3096	if (os_mul_overflow(dNodes, `2`, &nelements)) {
3097	return EINVAL;
3098	}
3099
3100	new_table = hashinit(count: nelements, M_CACHE, hashmask: &nchashmask);
3101	new_size = nchashmask + `1`;
3102
3103	if (new_table == NULL) {
3104	return ENOMEM;
3105	}
3106
3107	NAME_CACHE_LOCK();
3108
3109	/ No need to switch if the hash table size hasn't changed. /
3110	if (new_size == nchash) {
3111	NAME_CACHE_UNLOCK();
3112	hashdestroy(new_table, M_CACHE, hashmask: new_size - `1`);
3113	return `0`;
3114	}
3115
3116	// do the switch!
3117	old_table = nchashtbl;
3118	nchashtbl = new_table;
3119	old_size = nchash;
3120	nchash = new_size;
3121
3122	// walk the old table and insert all the entries into
3123	// the new table
3124	//
3125	for (i = `0`; i < old_size; i++) {
3126	old_head = &old_table[i];
3127	smrq_serialized_foreach_safe(entry, old_head, nc_hash) {
3128	//
3129	// XXXdbg - Beware: this assumes that hash_string() does
3130	// the same thing as what happens in
3131	// lookup() over in vfs_lookup.c
3132	hashval = hash_string(cp: entry->nc_name, len: `0`);
3133	entry->nc_hashval = hashval;
3134
3135	smrq_serialized_insert_head(NCHHASH(entry->nc_dvp, hashval), &entry->nc_hash);
3136	}
3137	}
3138	desiredNodes = dNodes;
3139	desiredNegNodes = dNegNodes;
3140
3141	NAME_CACHE_UNLOCK();
3142	hashdestroy(old_table, M_CACHE, hashmask: old_size - `1`);
3143
3144	return `0`;
3145	}
3146
3147	static void
3148	namecache_smr_free(void *_ncp, __unused size_t _size)
3149	{
3150	struct namecache *ncp = _ncp;
3151
3152	bzero(s: ncp, n: sizeof(*ncp));
3153	}
3154
3155	static void
3156	cache_delete(struct namecache ncp, int* free_entry)
3157	{
3158	NCHSTAT(ncs_deletes);
3159
3160	/*
3161	* See comment at the end of cache_enter_locked expalining the usage of
3162	* nc_counter.
3163	*/
3164	uint32_t old_count = os_atomic_inc_orig(&ncp->nc_counter, release);
3165	if (!(old_count & NC_VALID)) {
3166	/ This should be a valid to invalid transition /
3167	panic("Incorrect state for old nc_counter(%d), should be odd", old_count);
3168	}
3169
3170	if (ncp->nc_vp) {
3171	LIST_REMOVE(ncp, nc_un.nc_link);
3172	} else {
3173	TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
3174	ncs_negtotal--;
3175	}
3176	TAILQ_REMOVE(&(ncp->nc_dvp->v_ncchildren), ncp, nc_child);
3177
3178	smrq_serialized_remove((NCHHASH(ncp->nc_dvp, ncp->nc_hashval)), &ncp->nc_hash);
3179
3180	const char *nc_name = ncp->nc_name;
3181	ncp->nc_name = NULL;
3182	vfs_removename(name: nc_name);
3183	if (ncp->nc_vp) {
3184	vnode_t vp = ncp->nc_vp;
3185
3186	ncp->nc_vp = NULLVP;
3187	vnode_drop(vp);
3188	}
3189
3190	if (free_entry) {
3191	TAILQ_REMOVE(&nchead, ncp, nc_entry);
3192	if (nc_smr_enabled) {
3193	zfree_smr(namecache_zone, ncp);
3194	} else {
3195	zfree(namecache_zone, ncp);
3196	}
3197	numcache--;
3198	}
3199	}
3200
3201
3202	/*
3203	* purge the entry associated with the
3204	* specified vnode from the name cache
3205	*/
3206	static void
3207	cache_purge_locked(vnode_t vp, kauth_cred_t *credp)
3208	{
3209	struct namecache *ncp;
3210
3211	*credp = NULL;
3212	if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
3213	(TAILQ_FIRST(&vp->v_ncchildren) == NULL) &&
3214	(vnode_cred(vp) == NOCRED) &&
3215	(vp->v_parent == NULLVP)) {
3216	return;
3217	}
3218
3219	if (vp->v_parent) {
3220	vp->v_parent->v_nc_generation++;
3221	}
3222
3223	while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
3224	cache_delete(ncp, free_entry: `1`);
3225	}
3226
3227	while ((ncp = TAILQ_FIRST(&vp->v_ncchildren))) {
3228	cache_delete(ncp, free_entry: `1`);
3229	}
3230
3231	/*
3232	* Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
3233	*/
3234	*credp = vnode_cred(vp);
3235	vp->v_cred = NOCRED;
3236	vp->v_authorized_actions = `0`;
3237	}
3238
3239	void
3240	cache_purge(vnode_t vp)
3241	{
3242	kauth_cred_t tcred = NULL;
3243
3244	if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
3245	(TAILQ_FIRST(&vp->v_ncchildren) == NULL) &&
3246	(vnode_cred(vp) == NOCRED) &&
3247	(vp->v_parent == NULLVP)) {
3248	return;
3249	}
3250
3251	NAME_CACHE_LOCK();
3252
3253	cache_purge_locked(vp, credp: &tcred);
3254
3255	NAME_CACHE_UNLOCK();
3256
3257	if (IS_VALID_CRED(tcred)) {
3258	kauth_cred_unref(&tcred);
3259	}
3260	}
3261
3262	/*
3263	* Purge all negative cache entries that are children of the
3264	* given vnode. A case-insensitive file system (or any file
3265	* system that has multiple equivalent names for the same
3266	* directory entry) can use this when creating or renaming
3267	* to remove negative entries that may no longer apply.
3268	*/
3269	void
3270	cache_purge_negatives(vnode_t vp)
3271	{
3272	struct namecache ncp, next_ncp;
3273
3274	NAME_CACHE_LOCK();
3275
3276	TAILQ_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) {
3277	if (ncp->nc_vp) {
3278	break;
3279	}
3280
3281	cache_delete(ncp, free_entry: `1`);
3282	}
3283
3284	NAME_CACHE_UNLOCK();
3285	}
3286
3287	/*
3288	* Flush all entries referencing a particular filesystem.
3289	*
3290	* Since we need to check it anyway, we will flush all the invalid
3291	* entries at the same time.
3292	*/
3293	void
3294	cache_purgevfs(struct mount *mp)
3295	{
3296	struct smrq_list_head *ncpp;
3297	struct namecache *ncp;
3298
3299	NAME_CACHE_LOCK();
3300	/ Scan hash tables for applicable entries /
3301	for (ncpp = &nchashtbl[nchash - `1`]; ncpp >= nchashtbl; ncpp--) {
3302	restart:
3303	smrq_serialized_foreach(ncp, ncpp, nc_hash) {
3304	if (ncp->nc_dvp->v_mount == mp) {
3305	cache_delete(ncp, free_entry: `0`);
3306	goto restart;
3307	}
3308	}
3309	}
3310	NAME_CACHE_UNLOCK();
3311	}
3312
3313
3314
3315	//
3316	// String ref routines
3317	//
3318	static LIST_HEAD(stringhead, string_t) * string_ref_table;
3319	static u_long string_table_mask;
3320	static uint32_t filled_buckets = `0`;
3321
3322
3323
3324
3325	static void
3326	resize_string_ref_table(void)
3327	{
3328	struct stringhead *new_table;
3329	struct stringhead *old_table;
3330	struct stringhead old_head, head;
3331	string_t entry, next;
3332	uint32_t i, hashval;
3333	u_long new_mask, old_mask;
3334
3335	/*
3336	* need to hold the table lock exclusively
3337	* in order to grow the table... need to recheck
3338	* the need to resize again after we've taken
3339	* the lock exclusively in case some other thread
3340	* beat us to the punch
3341	*/
3342	lck_rw_lock_exclusive(lck: &strtable_rw_lock);
3343
3344	if (`4` * filled_buckets < ((string_table_mask + `1`) * `3`)) {
3345	lck_rw_done(lck: &strtable_rw_lock);
3346	return;
3347	}
3348	assert(string_table_mask < INT32_MAX);
3349	new_table = hashinit(count: (int)(string_table_mask + `1`) * `2`, M_CACHE, hashmask: &new_mask);
3350
3351	if (new_table == NULL) {
3352	printf("failed to resize the hash table.\n");
3353	lck_rw_done(lck: &strtable_rw_lock);
3354	return;
3355	}
3356
3357	// do the switch!
3358	old_table = string_ref_table;
3359	string_ref_table = new_table;
3360	old_mask = string_table_mask;
3361	string_table_mask = new_mask;
3362	filled_buckets = `0`;
3363
3364	// walk the old table and insert all the entries into
3365	// the new table
3366	//
3367	for (i = `0`; i <= old_mask; i++) {
3368	old_head = &old_table[i];
3369	for (entry = old_head->lh_first; entry != NULL; entry = next) {
3370	hashval = hash_string(cp: (const char *)entry->str, len: `0`);
3371	head = &string_ref_table[hashval & string_table_mask];
3372	if (head->lh_first == NULL) {
3373	filled_buckets++;
3374	}
3375	next = entry->hash_chain.le_next;
3376	LIST_INSERT_HEAD(head, entry, hash_chain);
3377	}
3378	}
3379	lck_rw_done(lck: &strtable_rw_lock);
3380
3381	hashdestroy(old_table, M_CACHE, hashmask: old_mask);
3382	}
3383
3384
3385	static void
3386	init_string_table(void)
3387	{
3388	string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, hashmask: &string_table_mask);
3389	}
3390
3391
3392	const char *
3393	vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
3394	{
3395	return add_name_internal(name, len, hashval, FALSE, flags);
3396	}
3397
3398
3399	static const char *
3400	add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
3401	{
3402	struct stringhead *head;
3403	string_t *entry;
3404	uint32_t chain_len = `0`;
3405	uint32_t hash_index;
3406	uint32_t lock_index;
3407	char *ptr;
3408
3409	if (len > MAXPATHLEN) {
3410	len = MAXPATHLEN;
3411	}
3412
3413	/*
3414	* if the length already accounts for the null-byte, then
3415	* subtract one so later on we don't index past the end
3416	* of the string.
3417	*/
3418	if (len > `0` && name[len - `1`] == `'\0'`) {
3419	len--;
3420	}
3421	if (hashval == `0`) {
3422	hashval = hash_string(cp: name, len);
3423	}
3424
3425	/*
3426	* take this lock 'shared' to keep the hash stable
3427	* if someone else decides to grow the pool they
3428	* will take this lock exclusively
3429	*/
3430	lck_rw_lock_shared(lck: &strtable_rw_lock);
3431
3432	/*
3433	* If the table gets more than 3/4 full, resize it
3434	*/
3435	if (`4` * filled_buckets >= ((string_table_mask + `1`) * `3`)) {
3436	lck_rw_done(lck: &strtable_rw_lock);
3437
3438	resize_string_ref_table();
3439
3440	lck_rw_lock_shared(lck: &strtable_rw_lock);
3441	}
3442	hash_index = hashval & string_table_mask;
3443	lock_index = hash_index % NUM_STRCACHE_LOCKS;
3444
3445	head = &string_ref_table[hash_index];
3446
3447	lck_mtx_lock_spin(lck: &strcache_mtx_locks[lock_index]);
3448
3449	for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
3450	if (strncmp(s1: entry->str, s2: name, n: len) == `0` && entry->str[len] == `0`) {
3451	entry->refcount++;
3452	break;
3453	}
3454	}
3455	if (entry == NULL) {
3456	const uint32_t buflen = len + `1`;
3457
3458	lck_mtx_convert_spin(lck: &strcache_mtx_locks[lock_index]);
3459	/*
3460	* it wasn't already there so add it.
3461	*/
3462	if (nc_smr_enabled) {
3463	entry = zalloc_smr(stringcache_zone, Z_WAITOK_ZERO_NOFAIL);
3464	} else {
3465	entry = zalloc(zone: stringcache_zone);
3466	}
3467
3468	if (head->lh_first == NULL) {
3469	OSAddAtomic(`1`, &filled_buckets);
3470	}
3471	ptr = kalloc_data(buflen, Z_WAITOK);
3472	strncpy(ptr, name, len);
3473	ptr[len] = `'\0'`;
3474	entry->str = ptr;
3475	entry->strbuflen = buflen;
3476	entry->refcount = `1`;
3477	LIST_INSERT_HEAD(head, entry, hash_chain);
3478	}
3479	if (need_extra_ref == TRUE) {
3480	entry->refcount++;
3481	}
3482
3483	lck_mtx_unlock(lck: &strcache_mtx_locks[lock_index]);
3484	lck_rw_done(lck: &strtable_rw_lock);
3485
3486	return (const char *)entry->str;
3487	}
3488
3489	static void
3490	string_smr_free(void *_entry, __unused size_t size)
3491	{
3492	string_t *entry = _entry;
3493
3494	kfree_data(entry->str, entry->strbuflen);
3495	bzero(s: entry, n: sizeof(*entry));
3496	}
3497
3498	int
3499	vfs_removename(const char *nameref)
3500	{
3501	struct stringhead *head;
3502	string_t *entry;
3503	uint32_t hashval;
3504	uint32_t hash_index;
3505	uint32_t lock_index;
3506	int retval = ENOENT;
3507
3508	hashval = hash_string(cp: nameref, len: `0`);
3509
3510	/*
3511	* take this lock 'shared' to keep the hash stable
3512	* if someone else decides to grow the pool they
3513	* will take this lock exclusively
3514	*/
3515	lck_rw_lock_shared(lck: &strtable_rw_lock);
3516	/*
3517	* must compute the head behind the table lock
3518	* since the size and location of the table
3519	* can change on the fly
3520	*/
3521	hash_index = hashval & string_table_mask;
3522	lock_index = hash_index % NUM_STRCACHE_LOCKS;
3523
3524	head = &string_ref_table[hash_index];
3525
3526	lck_mtx_lock_spin(lck: &strcache_mtx_locks[lock_index]);
3527
3528	for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
3529	if (entry->str == nameref) {
3530	entry->refcount--;
3531
3532	if (entry->refcount == `0`) {
3533	LIST_REMOVE(entry, hash_chain);
3534
3535	if (head->lh_first == NULL) {
3536	OSAddAtomic(-`1`, &filled_buckets);
3537	}
3538	} else {
3539	entry = NULL;
3540	}
3541	retval = `0`;
3542	break;
3543	}
3544	}
3545	lck_mtx_unlock(lck: &strcache_mtx_locks[lock_index]);
3546	lck_rw_done(lck: &strtable_rw_lock);
3547
3548	if (entry) {
3549	assert(entry->refcount == `0`);
3550	if (nc_smr_enabled) {
3551	zfree_smr(stringcache_zone, entry);
3552	} else {
3553	kfree_data(entry->str, entry->strbuflen);
3554	entry->str = NULL;
3555	entry->strbuflen = `0`;
3556	zfree(stringcache_zone, entry);
3557	}
3558	}
3559
3560	return retval;
3561	}
3562
3563
3564	#ifdef DUMP_STRING_TABLE
3565	void
3566	dump_string_table(void)
3567	{
3568	struct stringhead *head;
3569	string_t *entry;
3570	u_long i;
3571
3572	lck_rw_lock_shared(&strtable_rw_lock);
3573
3574	for (i = `0`; i <= string_table_mask; i++) {
3575	head = &string_ref_table[i];
3576	for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
3577	printf("%6d - %s\n", entry->refcount, entry->str);
3578	}
3579	}
3580	lck_rw_done(&strtable_rw_lock);
3581	}
3582	#endif /* DUMP_STRING_TABLE */
3583

Browse the source code of xnu/bsd/vfs/vfs_cache.c