1/*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Poul-Henning Kamp of the FreeBSD Project.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *
65 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
66 */
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/time.h>
76#include <sys/mount_internal.h>
77#include <sys/vnode_internal.h>
78#include <miscfs/specfs/specdev.h>
79#include <sys/namei.h>
80#include <sys/errno.h>
81#include <sys/malloc.h>
82#include <sys/kauth.h>
83#include <sys/user.h>
84#include <sys/paths.h>
85#include <os/overflow.h>
86
87#if CONFIG_MACF
88#include <security/mac_framework.h>
89#endif
90
91/*
92 * Name caching works as follows:
93 *
94 * Names found by directory scans are retained in a cache
95 * for future reference. It is managed LRU, so frequently
96 * used names will hang around. Cache is indexed by hash value
97 * obtained from (vp, name) where vp refers to the directory
98 * containing name.
99 *
100 * If it is a "negative" entry, (i.e. for a name that is known NOT to
101 * exist) the vnode pointer will be NULL.
102 *
103 * Upon reaching the last segment of a path, if the reference
104 * is for DELETE, or NOCACHE is set (rewrite), and the
105 * name is located in the cache, it will be dropped.
106 */
107
108/*
109 * Structures associated with name cacheing.
110 */
111
112LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
113u_long nchashmask;
114u_long nchash; /* size of hash table - 1 */
115long numcache; /* number of cache entries allocated */
116int desiredNodes;
117int desiredNegNodes;
118int ncs_negtotal;
119int nc_disabled = 0;
120TAILQ_HEAD(, namecache) nchead; /* chain of all name cache entries */
121TAILQ_HEAD(, namecache) neghead; /* chain of only negative cache entries */
122
123
124#if COLLECT_STATS
125
126struct nchstats nchstats; /* cache effectiveness statistics */
127
128#define NCHSTAT(v) { \
129 nchstats.v++; \
130}
131#define NAME_CACHE_LOCK() name_cache_lock()
132#define NAME_CACHE_UNLOCK() name_cache_unlock()
133#define NAME_CACHE_LOCK_SHARED() name_cache_lock()
134
135#else
136
137#define NCHSTAT(v)
138#define NAME_CACHE_LOCK() name_cache_lock()
139#define NAME_CACHE_UNLOCK() name_cache_unlock()
140#define NAME_CACHE_LOCK_SHARED() name_cache_lock_shared()
141
142#endif
143
144
145/* vars for name cache list lock */
146lck_grp_t * namecache_lck_grp;
147lck_grp_attr_t * namecache_lck_grp_attr;
148lck_attr_t * namecache_lck_attr;
149
150lck_grp_t * strcache_lck_grp;
151lck_grp_attr_t * strcache_lck_grp_attr;
152lck_attr_t * strcache_lck_attr;
153
154lck_rw_t * namecache_rw_lock;
155lck_rw_t * strtable_rw_lock;
156
157#define NUM_STRCACHE_LOCKS 1024
158
159lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
160
161
162static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
163static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
164static void init_string_table(void);
165static void cache_delete(struct namecache *, int);
166static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
167
168#ifdef DUMP_STRING_TABLE
169/*
170 * Internal dump function used for debugging
171 */
172void dump_string_table(void);
173#endif /* DUMP_STRING_TABLE */
174
175static void init_crc32(void);
176static unsigned int crc32tab[256];
177
178
179#define NCHHASH(dvp, hash_val) \
180 (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
181
182/*
183 * This function tries to check if a directory vp is a subdirectory of dvp
184 * only from valid v_parent pointers. It is called with the name cache lock
185 * held and does not drop the lock anytime inside the function.
186 *
187 * It returns a boolean that indicates whether or not it was able to
188 * successfully infer the parent/descendent relationship via the v_parent
189 * pointers, or if it could not infer such relationship and that the decision
190 * must be delegated to the owning filesystem.
191 *
192 * If it does not defer the decision, i.e. it was successfuly able to determine
193 * the parent/descendent relationship, *is_subdir tells the caller if vp is a
194 * subdirectory of dvp.
195 *
196 * If the decision is deferred, *next_vp is where it stopped i.e. *next_vp
197 * is the vnode whose parent is to be determined from the filesystem.
198 * *is_subdir, in this case, is not indicative of anything and should be
199 * ignored.
200 *
201 * The return value and output args should be used as follows :
202 *
203 * defer = cache_check_vnode_issubdir(vp, dvp, is_subdir, next_vp);
204 * if (!defer) {
205 * if (*is_subdir)
206 * vp is subdirectory;
207 * else
208 * vp is not a subdirectory;
209 * } else {
210 * if (*next_vp)
211 * check this vnode's parent from the filesystem
212 * else
213 * error (likely because of forced unmount).
214 * }
215 *
216 */
217static boolean_t
218cache_check_vnode_issubdir(vnode_t vp, vnode_t dvp, boolean_t *is_subdir,
219 vnode_t *next_vp)
220{
221 vnode_t tvp = vp;
222 int defer = FALSE;
223
224 *is_subdir = FALSE;
225 *next_vp = NULLVP;
226 while (1) {
227 mount_t tmp;
228
229 if (tvp == dvp) {
230 *is_subdir = TRUE;
231 break;
232 } else if (tvp == rootvnode) {
233 /* *is_subdir = FALSE */
234 break;
235 }
236
237 tmp = tvp->v_mount;
238 while ((tvp->v_flag & VROOT) && tmp && tmp->mnt_vnodecovered &&
239 tvp != dvp && tvp != rootvnode) {
240 tvp = tmp->mnt_vnodecovered;
241 tmp = tvp->v_mount;
242 }
243
244 /*
245 * If dvp is not at the top of a mount "stack" then
246 * vp is not a subdirectory of dvp either.
247 */
248 if (tvp == dvp || tvp == rootvnode) {
249 /* *is_subdir = FALSE */
250 break;
251 }
252
253 if (!tmp) {
254 defer = TRUE;
255 *next_vp = NULLVP;
256 break;
257 }
258
259 if ((tvp->v_flag & VISHARDLINK) || !(tvp->v_parent)) {
260 defer = TRUE;
261 *next_vp = tvp;
262 break;
263 }
264
265 tvp = tvp->v_parent;
266 }
267
268 return (defer);
269}
270
271/* maximum times retry from potentially transient errors in vnode_issubdir */
272#define MAX_ERROR_RETRY 3
273
274/*
275 * This function checks if a given directory (vp) is a subdirectory of dvp.
276 * It walks backwards from vp and if it hits dvp in its parent chain,
277 * it is a subdirectory. If it encounters the root directory, it is not
278 * a subdirectory.
279 *
280 * This function returns an error if it is unsuccessful and 0 on success.
281 *
282 * On entry (and exit) vp has an iocount and if this function has to take
283 * any iocounts on other vnodes in the parent chain traversal, it releases them.
284 */
285int
286vnode_issubdir(vnode_t vp, vnode_t dvp, int *is_subdir, vfs_context_t ctx)
287{
288 vnode_t start_vp, tvp;
289 vnode_t vp_with_iocount;
290 int error = 0;
291 char dotdotbuf[] = "..";
292 int error_retry_count = 0; /* retry count for potentially transient
293 errors */
294
295 *is_subdir = FALSE;
296 tvp = start_vp = vp;
297 /*
298 * Anytime we acquire an iocount in this function, we save the vnode
299 * in this variable and release it before exiting.
300 */
301 vp_with_iocount = NULLVP;
302
303 while (1) {
304 boolean_t defer;
305 vnode_t pvp;
306 uint32_t vid;
307 struct componentname cn;
308 boolean_t is_subdir_locked = FALSE;
309
310 if (tvp == dvp) {
311 *is_subdir = TRUE;
312 break;
313 } else if (tvp == rootvnode) {
314 /* *is_subdir = FALSE */
315 break;
316 }
317
318 NAME_CACHE_LOCK_SHARED();
319
320 defer = cache_check_vnode_issubdir(tvp, dvp, &is_subdir_locked,
321 &tvp);
322
323 if (defer && tvp)
324 vid = vnode_vid(tvp);
325
326 NAME_CACHE_UNLOCK();
327
328 if (!defer) {
329 *is_subdir = is_subdir_locked;
330 break;
331 }
332
333 if (!tvp) {
334 if (error_retry_count++ < MAX_ERROR_RETRY) {
335 tvp = vp;
336 continue;
337 }
338 error = ENOENT;
339 break;
340 }
341
342 if (tvp != start_vp) {
343 if (vp_with_iocount) {
344 vnode_put(vp_with_iocount);
345 vp_with_iocount = NULLVP;
346 }
347
348 error = vnode_getwithvid(tvp, vid);
349 if (error) {
350 if (error_retry_count++ < MAX_ERROR_RETRY) {
351 tvp = vp;
352 error = 0;
353 continue;
354 }
355 break;
356 }
357
358 vp_with_iocount = tvp;
359 }
360
361 bzero(&cn, sizeof(cn));
362 cn.cn_nameiop = LOOKUP;
363 cn.cn_flags = ISLASTCN | ISDOTDOT;
364 cn.cn_context = ctx;
365 cn.cn_pnbuf = &dotdotbuf[0];
366 cn.cn_pnlen = sizeof(dotdotbuf);
367 cn.cn_nameptr = cn.cn_pnbuf;
368 cn.cn_namelen = 2;
369
370 pvp = NULLVP;
371 if ((error = VNOP_LOOKUP(tvp, &pvp, &cn, ctx)))
372 break;
373
374 if (!(tvp->v_flag & VISHARDLINK) && tvp->v_parent != pvp) {
375 (void)vnode_update_identity(tvp, pvp, NULL, 0, 0,
376 VNODE_UPDATE_PARENT);
377 }
378
379 if (vp_with_iocount)
380 vnode_put(vp_with_iocount);
381
382 vp_with_iocount = tvp = pvp;
383 }
384
385 if (vp_with_iocount)
386 vnode_put(vp_with_iocount);
387
388 return (error);
389}
390
391/*
392 * This function builds the path in "buff" from the supplied vnode.
393 * The length of the buffer *INCLUDING* the trailing zero byte is
394 * returned in outlen. NOTE: the length includes the trailing zero
395 * byte and thus the length is one greater than what strlen would
396 * return. This is important and lots of code elsewhere in the kernel
397 * assumes this behavior.
398 *
399 * This function can call vnop in file system if the parent vnode
400 * does not exist or when called for hardlinks via volfs path.
401 * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
402 * in the name cache and does not enter the file system.
403 *
404 * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when
405 * we encounter ENOENT during path reconstruction. ENOENT means that
406 * one of the parents moved while we were building the path. The
407 * caller can special handle this case by calling build_path again.
408 *
409 * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path
410 * that is relative to the nearest mount point, i.e. do not
411 * cross over mount points during building the path.
412 *
413 * passed in vp must have a valid io_count reference
414 *
415 * If parent vnode is non-NULL it also must have an io count. This
416 * allows build_path_with_parent to be safely called for operations
417 * unlink, rmdir and rename that already have io counts on the target
418 * and the directory. In this way build_path_with_parent does not have
419 * to try and obtain an additional io count on the parent. Taking an
420 * io count ont the parent can lead to dead lock if a forced unmount
421 * occures at the right moment. For a fuller explaination on how this
422 * can occur see the comment for vn_getpath_with_parent.
423 *
424 */
425int
426build_path_with_parent(vnode_t first_vp, vnode_t parent_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
427{
428 vnode_t vp, tvp;
429 vnode_t vp_with_iocount;
430 vnode_t proc_root_dir_vp;
431 char *end;
432 const char *str;
433 int len;
434 int ret = 0;
435 int fixhardlink;
436
437 if (first_vp == NULLVP)
438 return (EINVAL);
439
440 if (buflen <= 1)
441 return (ENOSPC);
442
443 /*
444 * Grab the process fd so we can evaluate fd_rdir.
445 */
446 if (vfs_context_proc(ctx)->p_fd)
447 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
448 else
449 proc_root_dir_vp = NULL;
450
451 vp_with_iocount = NULLVP;
452again:
453 vp = first_vp;
454
455 end = &buff[buflen-1];
456 *end = '\0';
457
458 /*
459 * holding the NAME_CACHE_LOCK in shared mode is
460 * sufficient to stabilize both the vp->v_parent chain
461 * and the 'vp->v_mount->mnt_vnodecovered' chain
462 *
463 * if we need to drop this lock, we must first grab the v_id
464 * from the vnode we're currently working with... if that
465 * vnode doesn't already have an io_count reference (the vp
466 * passed in comes with one), we must grab a reference
467 * after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
468 * deadlocks may result if you call vnode_get while holding
469 * the NAME_CACHE_LOCK... we lazily release the reference
470 * we pick up the next time we encounter a need to drop
471 * the NAME_CACHE_LOCK or before we return from this routine
472 */
473 NAME_CACHE_LOCK_SHARED();
474
475 /*
476 * Check if this is the root of a file system.
477 */
478 while (vp && vp->v_flag & VROOT) {
479 if (vp->v_mount == NULL) {
480 ret = EINVAL;
481 goto out_unlock;
482 }
483 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) {
484 /*
485 * It's the root of the root file system, so it's
486 * just "/".
487 */
488 *--end = '/';
489
490 goto out_unlock;
491 } else {
492 /*
493 * This the root of the volume and the caller does not
494 * want to cross mount points. Therefore just return
495 * '/' as the relative path.
496 */
497 if (flags & BUILDPATH_VOLUME_RELATIVE) {
498 *--end = '/';
499 goto out_unlock;
500 } else {
501 vp = vp->v_mount->mnt_vnodecovered;
502 }
503 }
504 }
505
506 while ((vp != NULLVP) && (vp->v_parent != vp)) {
507 int vid;
508
509 /*
510 * For hardlinks the v_name may be stale, so if its OK
511 * to enter a file system, ask the file system for the
512 * name and parent (below).
513 */
514 fixhardlink = (vp->v_flag & VISHARDLINK) &&
515 (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
516 !(flags & BUILDPATH_NO_FS_ENTER);
517
518 if (!fixhardlink) {
519 str = vp->v_name;
520
521 if (str == NULL || *str == '\0') {
522 if (vp->v_parent != NULL)
523 ret = EINVAL;
524 else
525 ret = ENOENT;
526 goto out_unlock;
527 }
528 len = strlen(str);
529 /*
530 * Check that there's enough space (including space for the '/')
531 */
532 if ((end - buff) < (len + 1)) {
533 ret = ENOSPC;
534 goto out_unlock;
535 }
536 /*
537 * Copy the name backwards.
538 */
539 str += len;
540
541 for (; len > 0; len--)
542 *--end = *--str;
543 /*
544 * Add a path separator.
545 */
546 *--end = '/';
547 }
548
549 /*
550 * Walk up the parent chain.
551 */
552 if (((vp->v_parent != NULLVP) && !fixhardlink) ||
553 (flags & BUILDPATH_NO_FS_ENTER)) {
554
555 /*
556 * In this if () block we are not allowed to enter the filesystem
557 * to conclusively get the most accurate parent identifier.
558 * As a result, if 'vp' does not identify '/' and it
559 * does not have a valid v_parent, then error out
560 * and disallow further path construction
561 */
562 if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
563 /*
564 * Only '/' is allowed to have a NULL parent
565 * pointer. Upper level callers should ideally
566 * re-drive name lookup on receiving a ENOENT.
567 */
568 ret = ENOENT;
569
570 /* The code below will exit early if 'tvp = vp' == NULL */
571 }
572 vp = vp->v_parent;
573
574 /*
575 * if the vnode we have in hand isn't a directory and it
576 * has a v_parent, then we started with the resource fork
577 * so skip up to avoid getting a duplicate copy of the
578 * file name in the path.
579 */
580 if (vp && !vnode_isdir(vp) && vp->v_parent) {
581 vp = vp->v_parent;
582 }
583 } else {
584 /*
585 * No parent, go get it if supported.
586 */
587 struct vnode_attr va;
588 vnode_t dvp;
589
590 /*
591 * Make sure file system supports obtaining a path from id.
592 */
593 if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
594 ret = ENOENT;
595 goto out_unlock;
596 }
597 vid = vp->v_id;
598
599 NAME_CACHE_UNLOCK();
600
601 if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
602 if (vp_with_iocount) {
603 vnode_put(vp_with_iocount);
604 vp_with_iocount = NULLVP;
605 }
606 if (vnode_getwithvid(vp, vid))
607 goto again;
608 vp_with_iocount = vp;
609 }
610 VATTR_INIT(&va);
611 VATTR_WANTED(&va, va_parentid);
612
613 if (fixhardlink) {
614 VATTR_WANTED(&va, va_name);
615 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
616 } else {
617 va.va_name = NULL;
618 }
619 /*
620 * Ask the file system for its parent id and for its name (optional).
621 */
622 ret = vnode_getattr(vp, &va, ctx);
623
624 if (fixhardlink) {
625 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
626 str = va.va_name;
627 vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME);
628 } else if (vp->v_name) {
629 str = vp->v_name;
630 ret = 0;
631 } else {
632 ret = ENOENT;
633 goto bad_news;
634 }
635 len = strlen(str);
636
637 /*
638 * Check that there's enough space.
639 */
640 if ((end - buff) < (len + 1)) {
641 ret = ENOSPC;
642 } else {
643 /* Copy the name backwards. */
644 str += len;
645
646 for (; len > 0; len--) {
647 *--end = *--str;
648 }
649 /*
650 * Add a path separator.
651 */
652 *--end = '/';
653 }
654bad_news:
655 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI);
656 }
657 if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
658 ret = ENOENT;
659 goto out;
660 }
661 /*
662 * Ask the file system for the parent vnode.
663 */
664 if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx)))
665 goto out;
666
667 if (!fixhardlink && (vp->v_parent != dvp))
668 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT);
669
670 if (vp_with_iocount)
671 vnode_put(vp_with_iocount);
672 vp = dvp;
673 vp_with_iocount = vp;
674
675 NAME_CACHE_LOCK_SHARED();
676
677 /*
678 * if the vnode we have in hand isn't a directory and it
679 * has a v_parent, then we started with the resource fork
680 * so skip up to avoid getting a duplicate copy of the
681 * file name in the path.
682 */
683 if (vp && !vnode_isdir(vp) && vp->v_parent)
684 vp = vp->v_parent;
685 }
686
687 if (vp && (flags & BUILDPATH_CHECKACCESS)) {
688 vid = vp->v_id;
689
690 NAME_CACHE_UNLOCK();
691
692 if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
693 if (vp_with_iocount) {
694 vnode_put(vp_with_iocount);
695 vp_with_iocount = NULLVP;
696 }
697 if (vnode_getwithvid(vp, vid))
698 goto again;
699 vp_with_iocount = vp;
700 }
701 if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx)))
702 goto out; /* no peeking */
703
704 NAME_CACHE_LOCK_SHARED();
705 }
706
707 /*
708 * When a mount point is crossed switch the vp.
709 * Continue until we find the root or we find
710 * a vnode that's not the root of a mounted
711 * file system.
712 */
713 tvp = vp;
714
715 while (tvp) {
716 if (tvp == proc_root_dir_vp)
717 goto out_unlock; /* encountered the root */
718
719 if (!(tvp->v_flag & VROOT) || !tvp->v_mount)
720 break; /* not the root of a mounted FS */
721
722 if (flags & BUILDPATH_VOLUME_RELATIVE) {
723 /* Do not cross over mount points */
724 tvp = NULL;
725 } else {
726 tvp = tvp->v_mount->mnt_vnodecovered;
727 }
728 }
729 if (tvp == NULLVP)
730 goto out_unlock;
731 vp = tvp;
732 }
733out_unlock:
734 NAME_CACHE_UNLOCK();
735out:
736 if (vp_with_iocount)
737 vnode_put(vp_with_iocount);
738 /*
739 * Slide the name down to the beginning of the buffer.
740 */
741 memmove(buff, end, &buff[buflen] - end);
742
743 /*
744 * length includes the trailing zero byte
745 */
746 *outlen = &buff[buflen] - end;
747
748 /* One of the parents was moved during path reconstruction.
749 * The caller is interested in knowing whether any of the
750 * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
751 */
752 if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
753 ret = EAGAIN;
754 }
755
756 return (ret);
757}
758
759int
760build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
761{
762 return (build_path_with_parent(first_vp, NULL, buff, buflen, outlen, flags, ctx));
763}
764
765/*
766 * return NULLVP if vp's parent doesn't
767 * exist, or we can't get a valid iocount
768 * else return the parent of vp
769 */
770vnode_t
771vnode_getparent(vnode_t vp)
772{
773 vnode_t pvp = NULLVP;
774 int pvid;
775
776 NAME_CACHE_LOCK_SHARED();
777 /*
778 * v_parent is stable behind the name_cache lock
779 * however, the only thing we can really guarantee
780 * is that we've grabbed a valid iocount on the
781 * parent of 'vp' at the time we took the name_cache lock...
782 * once we drop the lock, vp could get re-parented
783 */
784 if ( (pvp = vp->v_parent) != NULLVP ) {
785 pvid = pvp->v_id;
786
787 NAME_CACHE_UNLOCK();
788
789 if (vnode_getwithvid(pvp, pvid) != 0)
790 pvp = NULL;
791 } else
792 NAME_CACHE_UNLOCK();
793 return (pvp);
794}
795
796const char *
797vnode_getname(vnode_t vp)
798{
799 const char *name = NULL;
800
801 NAME_CACHE_LOCK_SHARED();
802
803 if (vp->v_name)
804 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0);
805 NAME_CACHE_UNLOCK();
806
807 return (name);
808}
809
810void
811vnode_putname(const char *name)
812{
813 vfs_removename(name);
814}
815
816static const char unknown_vnodename[] = "(unknown vnode name)";
817
818const char *
819vnode_getname_printable(vnode_t vp)
820{
821 const char *name = vnode_getname(vp);
822 if (name != NULL)
823 return name;
824
825 switch (vp->v_type) {
826 case VCHR:
827 case VBLK:
828 {
829 /*
830 * Create an artificial dev name from
831 * major and minor device number
832 */
833 char dev_name[64];
834 (void) snprintf(dev_name, sizeof(dev_name),
835 "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b',
836 major(vp->v_rdev), minor(vp->v_rdev));
837 /*
838 * Add the newly created dev name to the name
839 * cache to allow easier cleanup. Also,
840 * vfs_addname allocates memory for the new name
841 * and returns it.
842 */
843 NAME_CACHE_LOCK_SHARED();
844 name = vfs_addname(dev_name, strlen(dev_name), 0, 0);
845 NAME_CACHE_UNLOCK();
846 return name;
847 }
848 default:
849 return unknown_vnodename;
850 }
851}
852
853void
854vnode_putname_printable(const char *name)
855{
856 if (name == unknown_vnodename)
857 return;
858 vnode_putname(name);
859}
860
861
862/*
863 * if VNODE_UPDATE_PARENT, and we can take
864 * a reference on dvp, then update vp with
865 * it's new parent... if vp already has a parent,
866 * then drop the reference vp held on it
867 *
868 * if VNODE_UPDATE_NAME,
869 * then drop string ref on v_name if it exists, and if name is non-NULL
870 * then pick up a string reference on name and record it in v_name...
871 * optionally pass in the length and hashval of name if known
872 *
873 * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
874 */
875void
876vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags)
877{
878 struct namecache *ncp;
879 vnode_t old_parentvp = NULLVP;
880 int isstream = (vp->v_flag & VISNAMEDSTREAM);
881 int kusecountbumped = 0;
882 kauth_cred_t tcred = NULL;
883 const char *vname = NULL;
884 const char *tname = NULL;
885
886 if (flags & VNODE_UPDATE_PARENT) {
887 if (dvp && vnode_ref(dvp) != 0) {
888 dvp = NULLVP;
889 }
890 /* Don't count a stream's parent ref during unmounts */
891 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
892 vnode_lock_spin(dvp);
893 ++dvp->v_kusecount;
894 kusecountbumped = 1;
895 vnode_unlock(dvp);
896 }
897 } else {
898 dvp = NULLVP;
899 }
900 if ( (flags & VNODE_UPDATE_NAME) ) {
901 if (name != vp->v_name) {
902 if (name && *name) {
903 if (name_len == 0)
904 name_len = strlen(name);
905 tname = vfs_addname(name, name_len, name_hashval, 0);
906 }
907 } else
908 flags &= ~VNODE_UPDATE_NAME;
909 }
910 if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) {
911
912 NAME_CACHE_LOCK();
913
914 if ( (flags & VNODE_UPDATE_PURGE) ) {
915
916 if (vp->v_parent)
917 vp->v_parent->v_nc_generation++;
918
919 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
920 cache_delete(ncp, 1);
921
922 while ( (ncp = TAILQ_FIRST(&vp->v_ncchildren)) )
923 cache_delete(ncp, 1);
924
925 /*
926 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
927 */
928 tcred = vp->v_cred;
929 vp->v_cred = NOCRED;
930 vp->v_authorized_actions = 0;
931 vp->v_cred_timestamp = 0;
932 }
933 if ( (flags & VNODE_UPDATE_NAME) ) {
934 vname = vp->v_name;
935 vp->v_name = tname;
936 }
937 if (flags & VNODE_UPDATE_PARENT) {
938 if (dvp != vp && dvp != vp->v_parent) {
939 old_parentvp = vp->v_parent;
940 vp->v_parent = dvp;
941 dvp = NULLVP;
942
943 if (old_parentvp)
944 flags |= VNODE_UPDATE_CACHE;
945 }
946 }
947 if (flags & VNODE_UPDATE_CACHE) {
948 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
949 cache_delete(ncp, 1);
950 }
951 NAME_CACHE_UNLOCK();
952
953 if (vname != NULL)
954 vfs_removename(vname);
955
956 if (IS_VALID_CRED(tcred))
957 kauth_cred_unref(&tcred);
958 }
959 if (dvp != NULLVP) {
960 /* Back-out the ref we took if we lost a race for vp->v_parent. */
961 if (kusecountbumped) {
962 vnode_lock_spin(dvp);
963 if (dvp->v_kusecount > 0)
964 --dvp->v_kusecount;
965 vnode_unlock(dvp);
966 }
967 vnode_rele(dvp);
968 }
969 if (old_parentvp) {
970 struct uthread *ut;
971
972 if (isstream) {
973 vnode_lock_spin(old_parentvp);
974 if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0))
975 --old_parentvp->v_kusecount;
976 vnode_unlock(old_parentvp);
977 }
978 ut = get_bsdthread_info(current_thread());
979
980 /*
981 * indicated to vnode_rele that it shouldn't do a
982 * vnode_reclaim at this time... instead it will
983 * chain the vnode to the uu_vreclaims list...
984 * we'll be responsible for calling vnode_reclaim
985 * on each of the vnodes in this list...
986 */
987 ut->uu_defer_reclaims = 1;
988 ut->uu_vreclaims = NULLVP;
989
990 while ( (vp = old_parentvp) != NULLVP ) {
991
992 vnode_lock_spin(vp);
993 vnode_rele_internal(vp, 0, 0, 1);
994
995 /*
996 * check to see if the vnode is now in the state
997 * that would have triggered a vnode_reclaim in vnode_rele
998 * if it is, we save it's parent pointer and then NULL
999 * out the v_parent field... we'll drop the reference
1000 * that was held on the next iteration of this loop...
1001 * this short circuits a potential deep recursion if we
1002 * have a long chain of parents in this state...
1003 * we'll sit in this loop until we run into
1004 * a parent in this chain that is not in this state
1005 *
1006 * make our check and the vnode_rele atomic
1007 * with respect to the current vnode we're working on
1008 * by holding the vnode lock
1009 * if vnode_rele deferred the vnode_reclaim and has put
1010 * this vnode on the list to be reaped by us, than
1011 * it has left this vnode with an iocount == 1
1012 */
1013 if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) &&
1014 ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
1015 /*
1016 * vnode_rele wanted to do a vnode_reclaim on this vnode
1017 * it should be sitting on the head of the uu_vreclaims chain
1018 * pull the parent pointer now so that when we do the
1019 * vnode_reclaim for each of the vnodes in the uu_vreclaims
1020 * list, we won't recurse back through here
1021 *
1022 * need to do a convert here in case vnode_rele_internal
1023 * returns with the lock held in the spin mode... it
1024 * can drop and retake the lock under certain circumstances
1025 */
1026 vnode_lock_convert(vp);
1027
1028 NAME_CACHE_LOCK();
1029 old_parentvp = vp->v_parent;
1030 vp->v_parent = NULLVP;
1031 NAME_CACHE_UNLOCK();
1032 } else {
1033 /*
1034 * we're done... we ran into a vnode that isn't
1035 * being terminated
1036 */
1037 old_parentvp = NULLVP;
1038 }
1039 vnode_unlock(vp);
1040 }
1041 ut->uu_defer_reclaims = 0;
1042
1043 while ( (vp = ut->uu_vreclaims) != NULLVP) {
1044 ut->uu_vreclaims = vp->v_defer_reclaimlist;
1045
1046 /*
1047 * vnode_put will drive the vnode_reclaim if
1048 * we are still the only reference on this vnode
1049 */
1050 vnode_put(vp);
1051 }
1052 }
1053}
1054
1055
1056/*
1057 * Mark a vnode as having multiple hard links. HFS makes use of this
1058 * because it keeps track of each link separately, and wants to know
1059 * which link was actually used.
1060 *
1061 * This will cause the name cache to force a VNOP_LOOKUP on the vnode
1062 * so that HFS can post-process the lookup. Also, volfs will call
1063 * VNOP_GETATTR2 to determine the parent, instead of using v_parent.
1064 */
1065void vnode_setmultipath(vnode_t vp)
1066{
1067 vnode_lock_spin(vp);
1068
1069 /*
1070 * In theory, we're changing the vnode's identity as far as the
1071 * name cache is concerned, so we ought to grab the name cache lock
1072 * here. However, there is already a race, and grabbing the name
1073 * cache lock only makes the race window slightly smaller.
1074 *
1075 * The race happens because the vnode already exists in the name
1076 * cache, and could be found by one thread before another thread
1077 * can set the hard link flag.
1078 */
1079
1080 vp->v_flag |= VISHARDLINK;
1081
1082 vnode_unlock(vp);
1083}
1084
1085
1086
1087/*
1088 * backwards compatibility
1089 */
1090void vnode_uncache_credentials(vnode_t vp)
1091{
1092 vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
1093}
1094
1095
1096/*
1097 * use the exclusive form of NAME_CACHE_LOCK to protect the update of the
1098 * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
1099 * we use this lock so that we can look at the v_cred and v_authorized_actions
1100 * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
1101 * which is the super-hot path... if we are updating the authorized actions for this
1102 * vnode, we are already in the super-slow and far less frequented path so its not
1103 * that bad that we take the lock exclusive for this case... of course we strive
1104 * to hold it for the minimum amount of time possible
1105 */
1106
1107void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
1108{
1109 kauth_cred_t tcred = NOCRED;
1110
1111 NAME_CACHE_LOCK();
1112
1113 vp->v_authorized_actions &= ~action;
1114
1115 if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
1116 IS_VALID_CRED(vp->v_cred)) {
1117 /*
1118 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1119 */
1120 tcred = vp->v_cred;
1121 vp->v_cred = NOCRED;
1122 }
1123 NAME_CACHE_UNLOCK();
1124
1125 if (tcred != NOCRED)
1126 kauth_cred_unref(&tcred);
1127}
1128
1129
1130extern int bootarg_vnode_cache_defeat; /* default = 0, from bsd_init.c */
1131
1132boolean_t
1133vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1134{
1135 kauth_cred_t ucred;
1136 boolean_t retval = FALSE;
1137
1138 /* Boot argument to defeat rights caching */
1139 if (bootarg_vnode_cache_defeat)
1140 return FALSE;
1141
1142 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1143 /*
1144 * a TTL is enabled on the rights cache... handle it here
1145 * a TTL of 0 indicates that no rights should be cached
1146 */
1147 if (vp->v_mount->mnt_authcache_ttl) {
1148 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
1149 /*
1150 * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
1151 * we will only allow a SEARCH right on a directory to be cached...
1152 * that cached right always has a default TTL associated with it
1153 */
1154 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR)
1155 vp = NULLVP;
1156 }
1157 if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
1158 vnode_uncache_authorized_action(vp, vp->v_authorized_actions);
1159 vp = NULLVP;
1160 }
1161 } else
1162 vp = NULLVP;
1163 }
1164 if (vp != NULLVP) {
1165 ucred = vfs_context_ucred(ctx);
1166
1167 NAME_CACHE_LOCK_SHARED();
1168
1169 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action)
1170 retval = TRUE;
1171
1172 NAME_CACHE_UNLOCK();
1173 }
1174 return retval;
1175}
1176
1177
1178void vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1179{
1180 kauth_cred_t tcred = NOCRED;
1181 kauth_cred_t ucred;
1182 struct timeval tv;
1183 boolean_t ttl_active = FALSE;
1184
1185 ucred = vfs_context_ucred(ctx);
1186
1187 if (!IS_VALID_CRED(ucred) || action == 0)
1188 return;
1189
1190 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1191 /*
1192 * a TTL is enabled on the rights cache... handle it here
1193 * a TTL of 0 indicates that no rights should be cached
1194 */
1195 if (vp->v_mount->mnt_authcache_ttl == 0)
1196 return;
1197
1198 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
1199 /*
1200 * only cache SEARCH action for filesystems marked
1201 * MNTK_AUTH_OPAQUE on VDIRs...
1202 * the lookup_path code will time these out
1203 */
1204 if ( (action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR )
1205 return;
1206 }
1207 ttl_active = TRUE;
1208
1209 microuptime(&tv);
1210 }
1211 NAME_CACHE_LOCK();
1212
1213 if (vp->v_cred != ucred) {
1214 kauth_cred_ref(ucred);
1215 /*
1216 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1217 */
1218 tcred = vp->v_cred;
1219 vp->v_cred = ucred;
1220 vp->v_authorized_actions = 0;
1221 }
1222 if (ttl_active == TRUE && vp->v_authorized_actions == 0) {
1223 /*
1224 * only reset the timestamnp on the
1225 * first authorization cached after the previous
1226 * timer has expired or we're switching creds...
1227 * 'vnode_cache_is_authorized' will clear the
1228 * authorized actions if the TTL is active and
1229 * it has expired
1230 */
1231 vp->v_cred_timestamp = tv.tv_sec;
1232 }
1233 vp->v_authorized_actions |= action;
1234
1235 NAME_CACHE_UNLOCK();
1236
1237 if (IS_VALID_CRED(tcred))
1238 kauth_cred_unref(&tcred);
1239}
1240
1241
1242boolean_t vnode_cache_is_stale(vnode_t vp)
1243{
1244 struct timeval tv;
1245 boolean_t retval;
1246
1247 microuptime(&tv);
1248
1249 if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl)
1250 retval = TRUE;
1251 else
1252 retval = FALSE;
1253
1254 return retval;
1255}
1256
1257
1258
1259/*
1260 * Returns: 0 Success
1261 * ERECYCLE vnode was recycled from underneath us. Force lookup to be re-driven from namei.
1262 * This errno value should not be seen by anyone outside of the kernel.
1263 */
1264int
1265cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1266 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
1267{
1268 char *cp; /* pointer into pathname argument */
1269 int vid;
1270 int vvid = 0; /* protected by vp != NULLVP */
1271 vnode_t vp = NULLVP;
1272 vnode_t tdp = NULLVP;
1273 kauth_cred_t ucred;
1274 boolean_t ttl_enabled = FALSE;
1275 struct timeval tv;
1276 mount_t mp;
1277 unsigned int hash;
1278 int error = 0;
1279 boolean_t dotdotchecked = FALSE;
1280
1281#if CONFIG_TRIGGERS
1282 vnode_t trigger_vp;
1283#endif /* CONFIG_TRIGGERS */
1284
1285 ucred = vfs_context_ucred(ctx);
1286 ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1287
1288 NAME_CACHE_LOCK_SHARED();
1289
1290 if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1291 ttl_enabled = TRUE;
1292 microuptime(&tv);
1293 }
1294 for (;;) {
1295 /*
1296 * Search a directory.
1297 *
1298 * The cn_hash value is for use by cache_lookup
1299 * The last component of the filename is left accessible via
1300 * cnp->cn_nameptr for callers that need the name.
1301 */
1302 hash = 0;
1303 cp = cnp->cn_nameptr;
1304
1305 while (*cp && (*cp != '/')) {
1306 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1307 }
1308 /*
1309 * the crc generator can legitimately generate
1310 * a 0... however, 0 for us means that we
1311 * haven't computed a hash, so use 1 instead
1312 */
1313 if (hash == 0)
1314 hash = 1;
1315 cnp->cn_hash = hash;
1316 cnp->cn_namelen = cp - cnp->cn_nameptr;
1317
1318 ndp->ni_pathlen -= cnp->cn_namelen;
1319 ndp->ni_next = cp;
1320
1321 /*
1322 * Replace multiple slashes by a single slash and trailing slashes
1323 * by a null. This must be done before VNOP_LOOKUP() because some
1324 * fs's don't know about trailing slashes. Remember if there were
1325 * trailing slashes to handle symlinks, existing non-directories
1326 * and non-existing files that won't be directories specially later.
1327 */
1328 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
1329 cp++;
1330 ndp->ni_pathlen--;
1331
1332 if (*cp == '\0') {
1333 ndp->ni_flag |= NAMEI_TRAILINGSLASH;
1334 *ndp->ni_next = '\0';
1335 }
1336 }
1337 ndp->ni_next = cp;
1338
1339 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT);
1340
1341 if (*cp == '\0')
1342 cnp->cn_flags |= ISLASTCN;
1343
1344 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
1345 cnp->cn_flags |= ISDOTDOT;
1346
1347 *dp_authorized = 0;
1348#if NAMEDRSRCFORK
1349 /*
1350 * Process a request for a file's resource fork.
1351 *
1352 * Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1353 */
1354 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1355 (cp[1] == '.' && cp[2] == '.') &&
1356 bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1357 /* Skip volfs file systems that don't support native streams. */
1358 if ((dp->v_mount != NULL) &&
1359 (dp->v_mount->mnt_flag & MNT_DOVOLFS) &&
1360 (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
1361 goto skiprsrcfork;
1362 }
1363 cnp->cn_flags |= CN_WANTSRSRCFORK;
1364 cnp->cn_flags |= ISLASTCN;
1365 ndp->ni_next[0] = '\0';
1366 ndp->ni_pathlen = 1;
1367 }
1368skiprsrcfork:
1369#endif
1370
1371#if CONFIG_MACF
1372
1373 /*
1374 * Name cache provides authorization caching (see below)
1375 * that will short circuit MAC checks in lookup().
1376 * We must perform MAC check here. On denial
1377 * dp_authorized will remain 0 and second check will
1378 * be perfomed in lookup().
1379 */
1380 if (!(cnp->cn_flags & DONOTAUTH)) {
1381 error = mac_vnode_check_lookup(ctx, dp, cnp);
1382 if (error) {
1383 NAME_CACHE_UNLOCK();
1384 goto errorout;
1385 }
1386 }
1387#endif /* MAC */
1388 if (ttl_enabled &&
1389 (dp->v_mount->mnt_authcache_ttl == 0 ||
1390 ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl))) {
1391 break;
1392 }
1393
1394 /*
1395 * NAME_CACHE_LOCK holds these fields stable
1396 *
1397 * We can't cache KAUTH_VNODE_SEARCHBYANYONE for root correctly
1398 * so we make an ugly check for root here. root is always
1399 * allowed and breaking out of here only to find out that is
1400 * authorized by virtue of being root is very very expensive.
1401 * However, the check for not root is valid only for filesystems
1402 * which use local authorization.
1403 *
1404 * XXX: Remove the check for root when we can reliably set
1405 * KAUTH_VNODE_SEARCHBYANYONE as root.
1406 */
1407 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
1408 !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE) &&
1409 (ttl_enabled || !vfs_context_issuser(ctx))) {
1410 break;
1411 }
1412
1413 /*
1414 * indicate that we're allowed to traverse this directory...
1415 * even if we fail the cache lookup or decide to bail for
1416 * some other reason, this information is valid and is used
1417 * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1418 */
1419 *dp_authorized = 1;
1420
1421 if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) {
1422 if (cnp->cn_nameiop != LOOKUP)
1423 break;
1424 if (cnp->cn_flags & LOCKPARENT)
1425 break;
1426 if (cnp->cn_flags & NOCACHE)
1427 break;
1428 if (cnp->cn_flags & ISDOTDOT) {
1429 /*
1430 * Force directory hardlinks to go to
1431 * file system for ".." requests.
1432 */
1433 if ((dp->v_flag & VISHARDLINK)) {
1434 break;
1435 }
1436 /*
1437 * Quit here only if we can't use
1438 * the parent directory pointer or
1439 * don't have one. Otherwise, we'll
1440 * use it below.
1441 */
1442 if ((dp->v_flag & VROOT) ||
1443 dp == ndp->ni_rootdir ||
1444 dp->v_parent == NULLVP)
1445 break;
1446 }
1447 }
1448
1449 if ((cnp->cn_flags & CN_SKIPNAMECACHE)) {
1450 /*
1451 * Force lookup to go to the filesystem with
1452 * all cnp fields set up.
1453 */
1454 break;
1455 }
1456
1457 /*
1458 * "." and ".." aren't supposed to be cached, so check
1459 * for them before checking the cache.
1460 */
1461 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')
1462 vp = dp;
1463 else if ( (cnp->cn_flags & ISDOTDOT) ) {
1464 /*
1465 * If this is a chrooted process, we need to check if
1466 * the process is trying to break out of its chrooted
1467 * jail. We do that by trying to determine if dp is
1468 * a subdirectory of ndp->ni_rootdir. If we aren't
1469 * able to determine that by the v_parent pointers, we
1470 * will leave the fast path.
1471 *
1472 * Since this function may see dotdot components
1473 * many times and it has the name cache lock held for
1474 * the entire duration, we optimise this by doing this
1475 * check only once per cache_lookup_path call.
1476 * If dotdotchecked is set, it means we've done this
1477 * check once already and don't need to do it again.
1478 */
1479 if (!dotdotchecked && (ndp->ni_rootdir != rootvnode)) {
1480 vnode_t tvp = dp;
1481 boolean_t defer = FALSE;
1482 boolean_t is_subdir = FALSE;
1483
1484 defer = cache_check_vnode_issubdir(tvp,
1485 ndp->ni_rootdir, &is_subdir, &tvp);
1486
1487 if (defer) {
1488 /* defer to Filesystem */
1489 break;
1490 } else if (!is_subdir) {
1491 /*
1492 * This process is trying to break out
1493 * of its chrooted jail, so all its
1494 * dotdot accesses will be translated to
1495 * its root directory.
1496 */
1497 vp = ndp->ni_rootdir;
1498 } else {
1499 /*
1500 * All good, let this dotdot access
1501 * proceed normally
1502 */
1503 vp = dp->v_parent;
1504 }
1505 dotdotchecked = TRUE;
1506 } else {
1507 vp = dp->v_parent;
1508 }
1509 } else {
1510 if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP)
1511 break;
1512
1513 if ( (vp->v_flag & VISHARDLINK) ) {
1514 /*
1515 * The file system wants a VNOP_LOOKUP on this vnode
1516 */
1517 vp = NULL;
1518 break;
1519 }
1520 }
1521 if ( (cnp->cn_flags & ISLASTCN) )
1522 break;
1523
1524 if (vp->v_type != VDIR) {
1525 if (vp->v_type != VLNK)
1526 vp = NULL;
1527 break;
1528 }
1529
1530 if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
1531 vnode_t tmp_vp = mp->mnt_realrootvp;
1532 if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation ||
1533 mp->mnt_realrootvp_vid != tmp_vp->v_id)
1534 break;
1535 vp = tmp_vp;
1536 }
1537
1538#if CONFIG_TRIGGERS
1539 /*
1540 * After traversing all mountpoints stacked here, if we have a
1541 * trigger in hand, resolve it. Note that we don't need to
1542 * leave the fast path if the mount has already happened.
1543 */
1544 if (vp->v_resolve)
1545 break;
1546#endif /* CONFIG_TRIGGERS */
1547
1548
1549 dp = vp;
1550 vp = NULLVP;
1551
1552 cnp->cn_nameptr = ndp->ni_next + 1;
1553 ndp->ni_pathlen--;
1554 while (*cnp->cn_nameptr == '/') {
1555 cnp->cn_nameptr++;
1556 ndp->ni_pathlen--;
1557 }
1558 }
1559 if (vp != NULLVP)
1560 vvid = vp->v_id;
1561 vid = dp->v_id;
1562
1563 NAME_CACHE_UNLOCK();
1564
1565 if ((vp != NULLVP) && (vp->v_type != VLNK) &&
1566 ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) {
1567 /*
1568 * if we've got a child and it's the last component, and
1569 * the lookup doesn't need to return the parent then we
1570 * can skip grabbing an iocount on the parent, since all
1571 * we're going to do with it is a vnode_put just before
1572 * we return from 'lookup'. If it's a symbolic link,
1573 * we need the parent in case the link happens to be
1574 * a relative pathname.
1575 */
1576 tdp = dp;
1577 dp = NULLVP;
1578 } else {
1579need_dp:
1580 /*
1581 * return the last directory we looked at
1582 * with an io reference held. If it was the one passed
1583 * in as a result of the last iteration of VNOP_LOOKUP,
1584 * it should already hold an io ref. No need to increase ref.
1585 */
1586 if (last_dp != dp){
1587
1588 if (dp == ndp->ni_usedvp) {
1589 /*
1590 * if this vnode matches the one passed in via USEDVP
1591 * than this context already holds an io_count... just
1592 * use vnode_get to get an extra ref for lookup to play
1593 * with... can't use the getwithvid variant here because
1594 * it will block behind a vnode_drain which would result
1595 * in a deadlock (since we already own an io_count that the
1596 * vnode_drain is waiting on)... vnode_get grabs the io_count
1597 * immediately w/o waiting... it always succeeds
1598 */
1599 vnode_get(dp);
1600 } else if ((error = vnode_getwithvid_drainok(dp, vid))) {
1601 /*
1602 * failure indicates the vnode
1603 * changed identity or is being
1604 * TERMINATED... in either case
1605 * punt this lookup.
1606 *
1607 * don't necessarily return ENOENT, though, because
1608 * we really want to go back to disk and make sure it's
1609 * there or not if someone else is changing this
1610 * vnode. That being said, the one case where we do want
1611 * to return ENOENT is when the vnode's mount point is
1612 * in the process of unmounting and we might cause a deadlock
1613 * in our attempt to take an iocount. An ENODEV error return
1614 * is from vnode_get* is an indication this but we change that
1615 * ENOENT for upper layers.
1616 */
1617 if (error == ENODEV) {
1618 error = ENOENT;
1619 } else {
1620 error = ERECYCLE;
1621 }
1622 goto errorout;
1623 }
1624 }
1625 }
1626 if (vp != NULLVP) {
1627 if ( (vnode_getwithvid_drainok(vp, vvid)) ) {
1628 vp = NULLVP;
1629
1630 /*
1631 * can't get reference on the vp we'd like
1632 * to return... if we didn't grab a reference
1633 * on the directory (due to fast path bypass),
1634 * then we need to do it now... we can't return
1635 * with both ni_dvp and ni_vp NULL, and no
1636 * error condition
1637 */
1638 if (dp == NULLVP) {
1639 dp = tdp;
1640 goto need_dp;
1641 }
1642 }
1643 }
1644
1645 ndp->ni_dvp = dp;
1646 ndp->ni_vp = vp;
1647
1648#if CONFIG_TRIGGERS
1649 trigger_vp = vp ? vp : dp;
1650 if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
1651 error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
1652 if (error) {
1653 if (vp)
1654 vnode_put(vp);
1655 if (dp)
1656 vnode_put(dp);
1657 goto errorout;
1658 }
1659 }
1660#endif /* CONFIG_TRIGGERS */
1661
1662errorout:
1663 /*
1664 * If we came into cache_lookup_path after an iteration of the lookup loop that
1665 * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
1666 * on it. It is now the job of cache_lookup_path to drop the ref on this vnode
1667 * when it is no longer needed. If we get to this point, and last_dp is not NULL
1668 * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
1669 * the case that we got to a subsequent path component and this previous vnode is
1670 * no longer needed. We can then drop the io ref on it.
1671 */
1672 if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
1673 vnode_put(last_dp);
1674 }
1675
1676 //initialized to 0, should be the same if no error cases occurred.
1677 return error;
1678}
1679
1680
1681static vnode_t
1682cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
1683{
1684 struct namecache *ncp;
1685 struct nchashhead *ncpp;
1686 long namelen = cnp->cn_namelen;
1687 unsigned int hashval = cnp->cn_hash;
1688
1689 if (nc_disabled) {
1690 return NULL;
1691 }
1692
1693 ncpp = NCHHASH(dvp, cnp->cn_hash);
1694 LIST_FOREACH(ncp, ncpp, nc_hash) {
1695 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1696 if (strncmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1697 break;
1698 }
1699 }
1700 if (ncp == 0) {
1701 /*
1702 * We failed to find an entry
1703 */
1704 NCHSTAT(ncs_miss);
1705 return (NULL);
1706 }
1707 NCHSTAT(ncs_goodhits);
1708
1709 return (ncp->nc_vp);
1710}
1711
1712
1713unsigned int hash_string(const char *cp, int len);
1714//
1715// Have to take a len argument because we may only need to
1716// hash part of a componentname.
1717//
1718unsigned int
1719hash_string(const char *cp, int len)
1720{
1721 unsigned hash = 0;
1722
1723 if (len) {
1724 while (len--) {
1725 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1726 }
1727 } else {
1728 while (*cp != '\0') {
1729 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1730 }
1731 }
1732 /*
1733 * the crc generator can legitimately generate
1734 * a 0... however, 0 for us means that we
1735 * haven't computed a hash, so use 1 instead
1736 */
1737 if (hash == 0)
1738 hash = 1;
1739 return hash;
1740}
1741
1742
1743/*
1744 * Lookup an entry in the cache
1745 *
1746 * We don't do this if the segment name is long, simply so the cache
1747 * can avoid holding long names (which would either waste space, or
1748 * add greatly to the complexity).
1749 *
1750 * Lookup is called with dvp pointing to the directory to search,
1751 * cnp pointing to the name of the entry being sought. If the lookup
1752 * succeeds, the vnode is returned in *vpp, and a status of -1 is
1753 * returned. If the lookup determines that the name does not exist
1754 * (negative cacheing), a status of ENOENT is returned. If the lookup
1755 * fails, a status of zero is returned.
1756 */
1757
1758int
1759cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1760{
1761 struct namecache *ncp;
1762 struct nchashhead *ncpp;
1763 long namelen = cnp->cn_namelen;
1764 unsigned int hashval;
1765 boolean_t have_exclusive = FALSE;
1766 uint32_t vid;
1767 vnode_t vp;
1768
1769 if (cnp->cn_hash == 0)
1770 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1771 hashval = cnp->cn_hash;
1772
1773 if (nc_disabled) {
1774 return 0;
1775 }
1776
1777 NAME_CACHE_LOCK_SHARED();
1778
1779relook:
1780 ncpp = NCHHASH(dvp, cnp->cn_hash);
1781 LIST_FOREACH(ncp, ncpp, nc_hash) {
1782 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1783 if (strncmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1784 break;
1785 }
1786 }
1787 /* We failed to find an entry */
1788 if (ncp == 0) {
1789 NCHSTAT(ncs_miss);
1790 NAME_CACHE_UNLOCK();
1791 return (0);
1792 }
1793
1794 /* We don't want to have an entry, so dump it */
1795 if ((cnp->cn_flags & MAKEENTRY) == 0) {
1796 if (have_exclusive == TRUE) {
1797 NCHSTAT(ncs_badhits);
1798 cache_delete(ncp, 1);
1799 NAME_CACHE_UNLOCK();
1800 return (0);
1801 }
1802 NAME_CACHE_UNLOCK();
1803 NAME_CACHE_LOCK();
1804 have_exclusive = TRUE;
1805 goto relook;
1806 }
1807 vp = ncp->nc_vp;
1808
1809 /* We found a "positive" match, return the vnode */
1810 if (vp) {
1811 NCHSTAT(ncs_goodhits);
1812
1813 vid = vp->v_id;
1814 NAME_CACHE_UNLOCK();
1815
1816 if (vnode_getwithvid(vp, vid)) {
1817#if COLLECT_STATS
1818 NAME_CACHE_LOCK();
1819 NCHSTAT(ncs_badvid);
1820 NAME_CACHE_UNLOCK();
1821#endif
1822 return (0);
1823 }
1824 *vpp = vp;
1825 return (-1);
1826 }
1827
1828 /* We found a negative match, and want to create it, so purge */
1829 if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) {
1830 if (have_exclusive == TRUE) {
1831 NCHSTAT(ncs_badhits);
1832 cache_delete(ncp, 1);
1833 NAME_CACHE_UNLOCK();
1834 return (0);
1835 }
1836 NAME_CACHE_UNLOCK();
1837 NAME_CACHE_LOCK();
1838 have_exclusive = TRUE;
1839 goto relook;
1840 }
1841
1842 /*
1843 * We found a "negative" match, ENOENT notifies client of this match.
1844 */
1845 NCHSTAT(ncs_neghits);
1846
1847 NAME_CACHE_UNLOCK();
1848 return (ENOENT);
1849}
1850
1851const char *
1852cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
1853{
1854 const char *strname;
1855
1856 if (cnp->cn_hash == 0)
1857 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1858
1859 /*
1860 * grab 2 references on the string entered
1861 * one for the cache_enter_locked to consume
1862 * and the second to be consumed by v_name (vnode_create call point)
1863 */
1864 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0);
1865
1866 NAME_CACHE_LOCK();
1867
1868 cache_enter_locked(dvp, vp, cnp, strname);
1869
1870 NAME_CACHE_UNLOCK();
1871
1872 return (strname);
1873}
1874
1875
1876/*
1877 * Add an entry to the cache...
1878 * but first check to see if the directory
1879 * that this entry is to be associated with has
1880 * had any cache_purges applied since we took
1881 * our identity snapshot... this check needs to
1882 * be done behind the name cache lock
1883 */
1884void
1885cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen)
1886{
1887
1888 if (cnp->cn_hash == 0)
1889 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1890
1891 NAME_CACHE_LOCK();
1892
1893 if (dvp->v_nc_generation == gen)
1894 (void)cache_enter_locked(dvp, vp, cnp, NULL);
1895
1896 NAME_CACHE_UNLOCK();
1897}
1898
1899
1900/*
1901 * Add an entry to the cache.
1902 */
1903void
1904cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1905{
1906 const char *strname;
1907
1908 if (cnp->cn_hash == 0)
1909 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1910
1911 /*
1912 * grab 1 reference on the string entered
1913 * for the cache_enter_locked to consume
1914 */
1915 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1916
1917 NAME_CACHE_LOCK();
1918
1919 cache_enter_locked(dvp, vp, cnp, strname);
1920
1921 NAME_CACHE_UNLOCK();
1922}
1923
1924
1925static void
1926cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname)
1927{
1928 struct namecache *ncp, *negp;
1929 struct nchashhead *ncpp;
1930
1931 if (nc_disabled)
1932 return;
1933
1934 /*
1935 * if the entry is for -ve caching vp is null
1936 */
1937 if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
1938 /*
1939 * someone beat us to the punch..
1940 * this vnode is already in the cache
1941 */
1942 if (strname != NULL)
1943 vfs_removename(strname);
1944 return;
1945 }
1946 /*
1947 * We allocate a new entry if we are less than the maximum
1948 * allowed and the one at the front of the list is in use.
1949 * Otherwise we use the one at the front of the list.
1950 */
1951 if (numcache < desiredNodes &&
1952 ((ncp = nchead.tqh_first) == NULL ||
1953 ncp->nc_hash.le_prev != 0)) {
1954 /*
1955 * Allocate one more entry
1956 */
1957 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK);
1958 numcache++;
1959 } else {
1960 /*
1961 * reuse an old entry
1962 */
1963 ncp = TAILQ_FIRST(&nchead);
1964 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1965
1966 if (ncp->nc_hash.le_prev != 0) {
1967 /*
1968 * still in use... we need to
1969 * delete it before re-using it
1970 */
1971 NCHSTAT(ncs_stolen);
1972 cache_delete(ncp, 0);
1973 }
1974 }
1975 NCHSTAT(ncs_enters);
1976
1977 /*
1978 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
1979 */
1980 ncp->nc_vp = vp;
1981 ncp->nc_dvp = dvp;
1982 ncp->nc_hashval = cnp->cn_hash;
1983
1984 if (strname == NULL)
1985 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1986 else
1987 ncp->nc_name = strname;
1988
1989 //
1990 // If the bytes of the name associated with the vnode differ,
1991 // use the name associated with the vnode since the file system
1992 // may have set that explicitly in the case of a lookup on a
1993 // case-insensitive file system where the case of the looked up
1994 // name differs from what is on disk. For more details, see:
1995 // <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
1996 //
1997 const char *vn_name = vp ? vp->v_name : NULL;
1998 unsigned int len = vn_name ? strlen(vn_name) : 0;
1999 if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) {
2000 unsigned int hash = hash_string(vn_name, len);
2001
2002 vfs_removename(ncp->nc_name);
2003 ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0);
2004 ncp->nc_hashval = hash;
2005 }
2006
2007 /*
2008 * make us the newest entry in the cache
2009 * i.e. we'll be the last to be stolen
2010 */
2011 TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
2012
2013 ncpp = NCHHASH(dvp, cnp->cn_hash);
2014#if DIAGNOSTIC
2015 {
2016 struct namecache *p;
2017
2018 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
2019 if (p == ncp)
2020 panic("cache_enter: duplicate");
2021 }
2022#endif
2023 /*
2024 * make us available to be found via lookup
2025 */
2026 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
2027
2028 if (vp) {
2029 /*
2030 * add to the list of name cache entries
2031 * that point at vp
2032 */
2033 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
2034 } else {
2035 /*
2036 * this is a negative cache entry (vp == NULL)
2037 * stick it on the negative cache list.
2038 */
2039 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
2040
2041 ncs_negtotal++;
2042
2043 if (ncs_negtotal > desiredNegNodes) {
2044 /*
2045 * if we've reached our desired limit
2046 * of negative cache entries, delete
2047 * the oldest
2048 */
2049 negp = TAILQ_FIRST(&neghead);
2050 cache_delete(negp, 1);
2051 }
2052 }
2053 /*
2054 * add us to the list of name cache entries that
2055 * are children of dvp
2056 */
2057 if (vp)
2058 TAILQ_INSERT_TAIL(&dvp->v_ncchildren, ncp, nc_child);
2059 else
2060 TAILQ_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
2061}
2062
2063
2064/*
2065 * Initialize CRC-32 remainder table.
2066 */
2067static void init_crc32(void)
2068{
2069 /*
2070 * the CRC-32 generator polynomial is:
2071 * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
2072 * + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
2073 */
2074 unsigned int crc32_polynomial = 0x04c11db7;
2075 unsigned int i,j;
2076
2077 /*
2078 * pre-calculate the CRC-32 remainder for each possible octet encoding
2079 */
2080 for (i = 0; i < 256; i++) {
2081 unsigned int crc_rem = i << 24;
2082
2083 for (j = 0; j < 8; j++) {
2084 if (crc_rem & 0x80000000)
2085 crc_rem = (crc_rem << 1) ^ crc32_polynomial;
2086 else
2087 crc_rem = (crc_rem << 1);
2088 }
2089 crc32tab[i] = crc_rem;
2090 }
2091}
2092
2093
2094/*
2095 * Name cache initialization, from vfs_init() when we are booting
2096 */
2097void
2098nchinit(void)
2099{
2100 int i;
2101
2102 desiredNegNodes = (desiredvnodes / 10);
2103 desiredNodes = desiredvnodes + desiredNegNodes;
2104
2105 TAILQ_INIT(&nchead);
2106 TAILQ_INIT(&neghead);
2107
2108 init_crc32();
2109
2110 nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 *desiredNodes)), M_CACHE, &nchash);
2111 nchashmask = nchash;
2112 nchash++;
2113
2114 init_string_table();
2115
2116 /* Allocate name cache lock group attribute and group */
2117 namecache_lck_grp_attr= lck_grp_attr_alloc_init();
2118
2119 namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr);
2120
2121 /* Allocate name cache lock attribute */
2122 namecache_lck_attr = lck_attr_alloc_init();
2123
2124 /* Allocate name cache lock */
2125 namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
2126
2127
2128 /* Allocate string cache lock group attribute and group */
2129 strcache_lck_grp_attr= lck_grp_attr_alloc_init();
2130
2131 strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr);
2132
2133 /* Allocate string cache lock attribute */
2134 strcache_lck_attr = lck_attr_alloc_init();
2135
2136 /* Allocate string cache lock */
2137 strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
2138
2139 for (i = 0; i < NUM_STRCACHE_LOCKS; i++)
2140 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
2141}
2142
2143void
2144name_cache_lock_shared(void)
2145{
2146 lck_rw_lock_shared(namecache_rw_lock);
2147}
2148
2149void
2150name_cache_lock(void)
2151{
2152 lck_rw_lock_exclusive(namecache_rw_lock);
2153}
2154
2155void
2156name_cache_unlock(void)
2157{
2158 lck_rw_done(namecache_rw_lock);
2159}
2160
2161
2162int
2163resize_namecache(int newsize)
2164{
2165 struct nchashhead *new_table;
2166 struct nchashhead *old_table;
2167 struct nchashhead *old_head, *head;
2168 struct namecache *entry, *next;
2169 uint32_t i, hashval;
2170 int dNodes, dNegNodes, nelements;
2171 u_long new_size, old_size;
2172
2173 if (newsize < 0)
2174 return EINVAL;
2175
2176 dNegNodes = (newsize / 10);
2177 dNodes = newsize + dNegNodes;
2178 // we don't support shrinking yet
2179 if (dNodes <= desiredNodes) {
2180 return 0;
2181 }
2182
2183 if (os_mul_overflow(dNodes, 2, &nelements)) {
2184 return EINVAL;
2185 }
2186
2187 new_table = hashinit(nelements, M_CACHE, &nchashmask);
2188 new_size = nchashmask + 1;
2189
2190 if (new_table == NULL) {
2191 return ENOMEM;
2192 }
2193
2194 NAME_CACHE_LOCK();
2195 // do the switch!
2196 old_table = nchashtbl;
2197 nchashtbl = new_table;
2198 old_size = nchash;
2199 nchash = new_size;
2200
2201 // walk the old table and insert all the entries into
2202 // the new table
2203 //
2204 for(i=0; i < old_size; i++) {
2205 old_head = &old_table[i];
2206 for (entry=old_head->lh_first; entry != NULL; entry=next) {
2207 //
2208 // XXXdbg - Beware: this assumes that hash_string() does
2209 // the same thing as what happens in
2210 // lookup() over in vfs_lookup.c
2211 hashval = hash_string(entry->nc_name, 0);
2212 entry->nc_hashval = hashval;
2213 head = NCHHASH(entry->nc_dvp, hashval);
2214
2215 next = entry->nc_hash.le_next;
2216 LIST_INSERT_HEAD(head, entry, nc_hash);
2217 }
2218 }
2219 desiredNodes = dNodes;
2220 desiredNegNodes = dNegNodes;
2221
2222 NAME_CACHE_UNLOCK();
2223 FREE(old_table, M_CACHE);
2224
2225 return 0;
2226}
2227
2228static void
2229cache_delete(struct namecache *ncp, int free_entry)
2230{
2231 NCHSTAT(ncs_deletes);
2232
2233 if (ncp->nc_vp) {
2234 LIST_REMOVE(ncp, nc_un.nc_link);
2235 } else {
2236 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
2237 ncs_negtotal--;
2238 }
2239 TAILQ_REMOVE(&(ncp->nc_dvp->v_ncchildren), ncp, nc_child);
2240
2241 LIST_REMOVE(ncp, nc_hash);
2242 /*
2243 * this field is used to indicate
2244 * that the entry is in use and
2245 * must be deleted before it can
2246 * be reused...
2247 */
2248 ncp->nc_hash.le_prev = NULL;
2249
2250 vfs_removename(ncp->nc_name);
2251 ncp->nc_name = NULL;
2252 if (free_entry) {
2253 TAILQ_REMOVE(&nchead, ncp, nc_entry);
2254 FREE_ZONE(ncp, sizeof(*ncp), M_CACHE);
2255 numcache--;
2256 }
2257}
2258
2259
2260/*
2261 * purge the entry associated with the
2262 * specified vnode from the name cache
2263 */
2264void
2265cache_purge(vnode_t vp)
2266{
2267 struct namecache *ncp;
2268 kauth_cred_t tcred = NULL;
2269
2270 if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
2271 (TAILQ_FIRST(&vp->v_ncchildren) == NULL) &&
2272 (vp->v_cred == NOCRED) &&
2273 (vp->v_parent == NULLVP))
2274 return;
2275
2276 NAME_CACHE_LOCK();
2277
2278 if (vp->v_parent)
2279 vp->v_parent->v_nc_generation++;
2280
2281 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
2282 cache_delete(ncp, 1);
2283
2284 while ( (ncp = TAILQ_FIRST(&vp->v_ncchildren)) )
2285 cache_delete(ncp, 1);
2286
2287 /*
2288 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
2289 */
2290 tcred = vp->v_cred;
2291 vp->v_cred = NOCRED;
2292 vp->v_authorized_actions = 0;
2293
2294 NAME_CACHE_UNLOCK();
2295
2296 if (IS_VALID_CRED(tcred))
2297 kauth_cred_unref(&tcred);
2298}
2299
2300/*
2301 * Purge all negative cache entries that are children of the
2302 * given vnode. A case-insensitive file system (or any file
2303 * system that has multiple equivalent names for the same
2304 * directory entry) can use this when creating or renaming
2305 * to remove negative entries that may no longer apply.
2306 */
2307void
2308cache_purge_negatives(vnode_t vp)
2309{
2310 struct namecache *ncp, *next_ncp;
2311
2312 NAME_CACHE_LOCK();
2313
2314 TAILQ_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) {
2315 if (ncp->nc_vp)
2316 break;
2317
2318 cache_delete(ncp, 1);
2319 }
2320
2321 NAME_CACHE_UNLOCK();
2322}
2323
2324/*
2325 * Flush all entries referencing a particular filesystem.
2326 *
2327 * Since we need to check it anyway, we will flush all the invalid
2328 * entries at the same time.
2329 */
2330void
2331cache_purgevfs(struct mount *mp)
2332{
2333 struct nchashhead *ncpp;
2334 struct namecache *ncp;
2335
2336 NAME_CACHE_LOCK();
2337 /* Scan hash tables for applicable entries */
2338 for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
2339restart:
2340 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) {
2341 if (ncp->nc_dvp->v_mount == mp) {
2342 cache_delete(ncp, 0);
2343 goto restart;
2344 }
2345 }
2346 }
2347 NAME_CACHE_UNLOCK();
2348}
2349
2350
2351
2352//
2353// String ref routines
2354//
2355static LIST_HEAD(stringhead, string_t) *string_ref_table;
2356static u_long string_table_mask;
2357static uint32_t filled_buckets=0;
2358
2359
2360typedef struct string_t {
2361 LIST_ENTRY(string_t) hash_chain;
2362 const char *str;
2363 uint32_t refcount;
2364} string_t;
2365
2366
2367static void
2368resize_string_ref_table(void)
2369{
2370 struct stringhead *new_table;
2371 struct stringhead *old_table;
2372 struct stringhead *old_head, *head;
2373 string_t *entry, *next;
2374 uint32_t i, hashval;
2375 u_long new_mask, old_mask;
2376
2377 /*
2378 * need to hold the table lock exclusively
2379 * in order to grow the table... need to recheck
2380 * the need to resize again after we've taken
2381 * the lock exclusively in case some other thread
2382 * beat us to the punch
2383 */
2384 lck_rw_lock_exclusive(strtable_rw_lock);
2385
2386 if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
2387 lck_rw_done(strtable_rw_lock);
2388 return;
2389 }
2390 new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
2391
2392 if (new_table == NULL) {
2393 printf("failed to resize the hash table.\n");
2394 lck_rw_done(strtable_rw_lock);
2395 return;
2396 }
2397
2398 // do the switch!
2399 old_table = string_ref_table;
2400 string_ref_table = new_table;
2401 old_mask = string_table_mask;
2402 string_table_mask = new_mask;
2403 filled_buckets = 0;
2404
2405 // walk the old table and insert all the entries into
2406 // the new table
2407 //
2408 for (i = 0; i <= old_mask; i++) {
2409 old_head = &old_table[i];
2410 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2411 hashval = hash_string((const char *)entry->str, 0);
2412 head = &string_ref_table[hashval & string_table_mask];
2413 if (head->lh_first == NULL) {
2414 filled_buckets++;
2415 }
2416 next = entry->hash_chain.le_next;
2417 LIST_INSERT_HEAD(head, entry, hash_chain);
2418 }
2419 }
2420 lck_rw_done(strtable_rw_lock);
2421
2422 FREE(old_table, M_CACHE);
2423}
2424
2425
2426static void
2427init_string_table(void)
2428{
2429 string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask);
2430}
2431
2432
2433const char *
2434vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
2435{
2436 return (add_name_internal(name, len, hashval, FALSE, flags));
2437}
2438
2439
2440static const char *
2441add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
2442{
2443 struct stringhead *head;
2444 string_t *entry;
2445 uint32_t chain_len = 0;
2446 uint32_t hash_index;
2447 uint32_t lock_index;
2448 char *ptr;
2449
2450 if (len > MAXPATHLEN)
2451 len = MAXPATHLEN;
2452
2453 /*
2454 * if the length already accounts for the null-byte, then
2455 * subtract one so later on we don't index past the end
2456 * of the string.
2457 */
2458 if (len > 0 && name[len-1] == '\0') {
2459 len--;
2460 }
2461 if (hashval == 0) {
2462 hashval = hash_string(name, len);
2463 }
2464
2465 /*
2466 * take this lock 'shared' to keep the hash stable
2467 * if someone else decides to grow the pool they
2468 * will take this lock exclusively
2469 */
2470 lck_rw_lock_shared(strtable_rw_lock);
2471
2472 /*
2473 * If the table gets more than 3/4 full, resize it
2474 */
2475 if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
2476 lck_rw_done(strtable_rw_lock);
2477
2478 resize_string_ref_table();
2479
2480 lck_rw_lock_shared(strtable_rw_lock);
2481 }
2482 hash_index = hashval & string_table_mask;
2483 lock_index = hash_index % NUM_STRCACHE_LOCKS;
2484
2485 head = &string_ref_table[hash_index];
2486
2487 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2488
2489 for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
2490 if (strncmp(entry->str, name, len) == 0 && entry->str[len] == 0) {
2491 entry->refcount++;
2492 break;
2493 }
2494 }
2495 if (entry == NULL) {
2496 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]);
2497 /*
2498 * it wasn't already there so add it.
2499 */
2500 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
2501
2502 if (head->lh_first == NULL) {
2503 OSAddAtomic(1, &filled_buckets);
2504 }
2505 ptr = (char *)((char *)entry + sizeof(string_t));
2506 strncpy(ptr, name, len);
2507 ptr[len] = '\0';
2508 entry->str = ptr;
2509 entry->refcount = 1;
2510 LIST_INSERT_HEAD(head, entry, hash_chain);
2511 }
2512 if (need_extra_ref == TRUE)
2513 entry->refcount++;
2514
2515 lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2516 lck_rw_done(strtable_rw_lock);
2517
2518 return (const char *)entry->str;
2519}
2520
2521
2522int
2523vfs_removename(const char *nameref)
2524{
2525 struct stringhead *head;
2526 string_t *entry;
2527 uint32_t hashval;
2528 uint32_t hash_index;
2529 uint32_t lock_index;
2530 int retval = ENOENT;
2531
2532 hashval = hash_string(nameref, 0);
2533
2534 /*
2535 * take this lock 'shared' to keep the hash stable
2536 * if someone else decides to grow the pool they
2537 * will take this lock exclusively
2538 */
2539 lck_rw_lock_shared(strtable_rw_lock);
2540 /*
2541 * must compute the head behind the table lock
2542 * since the size and location of the table
2543 * can change on the fly
2544 */
2545 hash_index = hashval & string_table_mask;
2546 lock_index = hash_index % NUM_STRCACHE_LOCKS;
2547
2548 head = &string_ref_table[hash_index];
2549
2550 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2551
2552 for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2553 if (entry->str == nameref) {
2554 entry->refcount--;
2555
2556 if (entry->refcount == 0) {
2557 LIST_REMOVE(entry, hash_chain);
2558
2559 if (head->lh_first == NULL) {
2560 OSAddAtomic(-1, &filled_buckets);
2561 }
2562 } else {
2563 entry = NULL;
2564 }
2565 retval = 0;
2566 break;
2567 }
2568 }
2569 lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2570 lck_rw_done(strtable_rw_lock);
2571
2572 if (entry != NULL)
2573 FREE(entry, M_TEMP);
2574
2575 return retval;
2576}
2577
2578
2579#ifdef DUMP_STRING_TABLE
2580void
2581dump_string_table(void)
2582{
2583 struct stringhead *head;
2584 string_t *entry;
2585 u_long i;
2586
2587 lck_rw_lock_shared(strtable_rw_lock);
2588
2589 for (i = 0; i <= string_table_mask; i++) {
2590 head = &string_ref_table[i];
2591 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
2592 printf("%6d - %s\n", entry->refcount, entry->str);
2593 }
2594 }
2595 lck_rw_done(strtable_rw_lock);
2596}
2597#endif /* DUMP_STRING_TABLE */
2598