1/*
2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
79#include <sys/file_internal.h>
80#include <sys/stat.h>
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
86#include <sys/malloc.h>
87#include <sys/mman.h>
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
92#include <sys/quota.h>
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
95#include <sys/imgsrc.h>
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
100#include <sys/ubc_internal.h>
101#include <sys/disk.h>
102#include <sys/content_protection.h>
103#include <sys/clonefile.h>
104#include <sys/snapshot.h>
105#include <sys/priv.h>
106#include <machine/cons.h>
107#include <machine/limits.h>
108#include <miscfs/specfs/specdev.h>
109
110#include <vfs/vfs_disk_conditioner.h>
111
112#include <security/audit/audit.h>
113#include <bsm/audit_kevents.h>
114
115#include <mach/mach_types.h>
116#include <kern/kern_types.h>
117#include <kern/kalloc.h>
118#include <kern/task.h>
119
120#include <vm/vm_pageout.h>
121#include <vm/vm_protos.h>
122
123#include <libkern/OSAtomic.h>
124#include <pexpert/pexpert.h>
125#include <IOKit/IOBSD.h>
126
127#if ROUTEFS
128#include <miscfs/routefs/routefs.h>
129#endif /* ROUTEFS */
130
131#if CONFIG_MACF
132#include <security/mac.h>
133#include <security/mac_framework.h>
134#endif
135
136#if CONFIG_FSE
137#define GET_PATH(x) \
138 (x) = get_pathbuff();
139#define RELEASE_PATH(x) \
140 release_pathbuff(x);
141#else
142#define GET_PATH(x) \
143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
144#define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146#endif /* CONFIG_FSE */
147
148#ifndef HFS_GET_BOOT_INFO
149#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
150#endif
151
152#ifndef HFS_SET_BOOT_INFO
153#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
154#endif
155
156#ifndef APFSIOC_REVERT_TO_SNAPSHOT
157#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
158#endif
159
160extern void disk_conditioner_unmount(mount_t mp);
161
162/* struct for checkdirs iteration */
163struct cdirargs {
164 vnode_t olddp;
165 vnode_t newdp;
166};
167/* callback for checkdirs iteration */
168static int checkdirs_callback(proc_t p, void * arg);
169
170static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
171static int checkdirs(vnode_t olddp, vfs_context_t ctx);
172void enablequotas(struct mount *mp, vfs_context_t ctx);
173static int getfsstat_callback(mount_t mp, void * arg);
174static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
175static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
176static int sync_callback(mount_t, void *);
177static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
178 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
179 boolean_t partial_copy);
180static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
181 user_addr_t bufp);
182static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
183static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
184 struct componentname *cnp, user_addr_t fsmountargs,
185 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
186 vfs_context_t ctx);
187void vfs_notify_mount(vnode_t pdvp);
188
189int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
190
191struct fd_vn_data * fg_vn_data_alloc(void);
192
193/*
194 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
195 * Concurrent lookups (or lookups by ids) on hard links can cause the
196 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
197 * does) to return ENOENT as the path cannot be returned from the name cache
198 * alone. We have no option but to retry and hope to get one namei->reverse path
199 * generation done without an intervening lookup, lookup by id on the hard link
200 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
201 * which currently are the MAC hooks for rename, unlink and rmdir.
202 */
203#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
204
205static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
206
207static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
208
209#ifdef CONFIG_IMGSRC_ACCESS
210static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
211static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
212static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
213static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
214static void mount_end_update(mount_t mp);
215static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
216#endif /* CONFIG_IMGSRC_ACCESS */
217
218//snapshot functions
219#if CONFIG_MNT_ROOTSNAP
220static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
221#else
222static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
223#endif
224
225int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
226
227__private_extern__
228int sync_internal(void);
229
230__private_extern__
231int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
232
233extern lck_grp_t *fd_vn_lck_grp;
234extern lck_grp_attr_t *fd_vn_lck_grp_attr;
235extern lck_attr_t *fd_vn_lck_attr;
236
237/*
238 * incremented each time a mount or unmount operation occurs
239 * used to invalidate the cached value of the rootvp in the
240 * mount structure utilized by cache_lookup_path
241 */
242uint32_t mount_generation = 0;
243
244/* counts number of mount and unmount operations */
245unsigned int vfs_nummntops=0;
246
247extern const struct fileops vnops;
248#if CONFIG_APPLEDOUBLE
249extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
250#endif /* CONFIG_APPLEDOUBLE */
251
252/*
253 * Virtual File System System Calls
254 */
255
256#if NFSCLIENT || DEVFS || ROUTEFS
257/*
258 * Private in-kernel mounting spi (NFS only, not exported)
259 */
260 __private_extern__
261boolean_t
262vfs_iskernelmount(mount_t mp)
263{
264 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
265}
266
267 __private_extern__
268int
269kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
270 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
271{
272 struct nameidata nd;
273 boolean_t did_namei;
274 int error;
275
276 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
277 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
278
279 /*
280 * Get the vnode to be covered if it's not supplied
281 */
282 if (vp == NULLVP) {
283 error = namei(&nd);
284 if (error)
285 return (error);
286 vp = nd.ni_vp;
287 pvp = nd.ni_dvp;
288 did_namei = TRUE;
289 } else {
290 char *pnbuf = CAST_DOWN(char *, path);
291
292 nd.ni_cnd.cn_pnbuf = pnbuf;
293 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
294 did_namei = FALSE;
295 }
296
297 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
298 syscall_flags, kern_flags, NULL, TRUE, ctx);
299
300 if (did_namei) {
301 vnode_put(vp);
302 vnode_put(pvp);
303 nameidone(&nd);
304 }
305
306 return (error);
307}
308#endif /* NFSCLIENT || DEVFS */
309
310/*
311 * Mount a file system.
312 */
313/* ARGSUSED */
314int
315mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
316{
317 struct __mac_mount_args muap;
318
319 muap.type = uap->type;
320 muap.path = uap->path;
321 muap.flags = uap->flags;
322 muap.data = uap->data;
323 muap.mac_p = USER_ADDR_NULL;
324 return (__mac_mount(p, &muap, retval));
325}
326
327int
328fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
329{
330 struct componentname cn;
331 vfs_context_t ctx = vfs_context_current();
332 size_t dummy = 0;
333 int error;
334 int flags = uap->flags;
335 char fstypename[MFSNAMELEN];
336 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
337 vnode_t pvp;
338 vnode_t vp;
339
340 AUDIT_ARG(fd, uap->fd);
341 AUDIT_ARG(fflags, flags);
342 /* fstypename will get audited by mount_common */
343
344 /* Sanity check the flags */
345 if (flags & (MNT_IMGSRC_BY_INDEX|MNT_ROOTFS)) {
346 return (ENOTSUP);
347 }
348
349 if (flags & MNT_UNION) {
350 return (EPERM);
351 }
352
353 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
354 if (error) {
355 return (error);
356 }
357
358 if ((error = file_vnode(uap->fd, &vp)) != 0) {
359 return (error);
360 }
361
362 if ((error = vnode_getwithref(vp)) != 0) {
363 file_drop(uap->fd);
364 return (error);
365 }
366
367 pvp = vnode_getparent(vp);
368 if (pvp == NULL) {
369 vnode_put(vp);
370 file_drop(uap->fd);
371 return (EINVAL);
372 }
373
374 memset(&cn, 0, sizeof(struct componentname));
375 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
376 cn.cn_pnlen = MAXPATHLEN;
377
378 if((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
379 FREE(cn.cn_pnbuf, M_TEMP);
380 vnode_put(pvp);
381 vnode_put(vp);
382 file_drop(uap->fd);
383 return (error);
384 }
385
386 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
387
388 FREE(cn.cn_pnbuf, M_TEMP);
389 vnode_put(pvp);
390 vnode_put(vp);
391 file_drop(uap->fd);
392
393 return (error);
394}
395
396void
397vfs_notify_mount(vnode_t pdvp)
398{
399 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
400 lock_vnode_and_post(pdvp, NOTE_WRITE);
401}
402
403/*
404 * __mac_mount:
405 * Mount a file system taking into account MAC label behavior.
406 * See mount(2) man page for more information
407 *
408 * Parameters: p Process requesting the mount
409 * uap User argument descriptor (see below)
410 * retval (ignored)
411 *
412 * Indirect: uap->type Filesystem type
413 * uap->path Path to mount
414 * uap->data Mount arguments
415 * uap->mac_p MAC info
416 * uap->flags Mount flags
417 *
418 *
419 * Returns: 0 Success
420 * !0 Not success
421 */
422boolean_t root_fs_upgrade_try = FALSE;
423
424int
425__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
426{
427 vnode_t pvp = NULL;
428 vnode_t vp = NULL;
429 int need_nameidone = 0;
430 vfs_context_t ctx = vfs_context_current();
431 char fstypename[MFSNAMELEN];
432 struct nameidata nd;
433 size_t dummy=0;
434 char *labelstr = NULL;
435 int flags = uap->flags;
436 int error;
437#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
438 boolean_t is_64bit = IS_64BIT_PROCESS(p);
439#else
440#pragma unused(p)
441#endif
442 /*
443 * Get the fs type name from user space
444 */
445 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
446 if (error)
447 return (error);
448
449 /*
450 * Get the vnode to be covered
451 */
452 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
453 UIO_USERSPACE, uap->path, ctx);
454 error = namei(&nd);
455 if (error) {
456 goto out;
457 }
458 need_nameidone = 1;
459 vp = nd.ni_vp;
460 pvp = nd.ni_dvp;
461
462#ifdef CONFIG_IMGSRC_ACCESS
463 /* Mounting image source cannot be batched with other operations */
464 if (flags == MNT_IMGSRC_BY_INDEX) {
465 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
466 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
467 goto out;
468 }
469#endif /* CONFIG_IMGSRC_ACCESS */
470
471#if CONFIG_MACF
472 /*
473 * Get the label string (if any) from user space
474 */
475 if (uap->mac_p != USER_ADDR_NULL) {
476 struct user_mac mac;
477 size_t ulen = 0;
478
479 if (is_64bit) {
480 struct user64_mac mac64;
481 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
482 mac.m_buflen = mac64.m_buflen;
483 mac.m_string = mac64.m_string;
484 } else {
485 struct user32_mac mac32;
486 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
487 mac.m_buflen = mac32.m_buflen;
488 mac.m_string = mac32.m_string;
489 }
490 if (error)
491 goto out;
492 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
493 (mac.m_buflen < 2)) {
494 error = EINVAL;
495 goto out;
496 }
497 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
498 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
499 if (error) {
500 goto out;
501 }
502 AUDIT_ARG(mac_string, labelstr);
503 }
504#endif /* CONFIG_MACF */
505
506 AUDIT_ARG(fflags, flags);
507
508#if SECURE_KERNEL
509 if (flags & MNT_UNION) {
510 /* No union mounts on release kernels */
511 error = EPERM;
512 goto out;
513 }
514#endif
515
516 if ((vp->v_flag & VROOT) &&
517 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
518 if (!(flags & MNT_UNION)) {
519 flags |= MNT_UPDATE;
520 }
521 else {
522 /*
523 * For a union mount on '/', treat it as fresh
524 * mount instead of update.
525 * Otherwise, union mouting on '/' used to panic the
526 * system before, since mnt_vnodecovered was found to
527 * be NULL for '/' which is required for unionlookup
528 * after it gets ENOENT on union mount.
529 */
530 flags = (flags & ~(MNT_UPDATE));
531 }
532
533#if SECURE_KERNEL
534 if ((flags & MNT_RDONLY) == 0) {
535 /* Release kernels are not allowed to mount "/" as rw */
536 error = EPERM;
537 goto out;
538 }
539#endif
540 /*
541 * See 7392553 for more details on why this check exists.
542 * Suffice to say: If this check is ON and something tries
543 * to mount the rootFS RW, we'll turn off the codesign
544 * bitmap optimization.
545 */
546#if CHECK_CS_VALIDATION_BITMAP
547 if ((flags & MNT_RDONLY) == 0 ) {
548 root_fs_upgrade_try = TRUE;
549 }
550#endif
551 }
552
553 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
554 labelstr, FALSE, ctx);
555
556out:
557
558#if CONFIG_MACF
559 if (labelstr)
560 FREE(labelstr, M_MACTEMP);
561#endif /* CONFIG_MACF */
562
563 if (vp) {
564 vnode_put(vp);
565 }
566 if (pvp) {
567 vnode_put(pvp);
568 }
569 if (need_nameidone) {
570 nameidone(&nd);
571 }
572
573 return (error);
574}
575
576/*
577 * common mount implementation (final stage of mounting)
578
579 * Arguments:
580 * fstypename file system type (ie it's vfs name)
581 * pvp parent of covered vnode
582 * vp covered vnode
583 * cnp component name (ie path) of covered vnode
584 * flags generic mount flags
585 * fsmountargs file system specific data
586 * labelstr optional MAC label
587 * kernelmount TRUE for mounts initiated from inside the kernel
588 * ctx caller's context
589 */
590static int
591mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
592 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
593 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
594{
595#if !CONFIG_MACF
596#pragma unused(labelstr)
597#endif
598 struct vnode *devvp = NULLVP;
599 struct vnode *device_vnode = NULLVP;
600#if CONFIG_MACF
601 struct vnode *rvp;
602#endif
603 struct mount *mp;
604 struct vfstable *vfsp = (struct vfstable *)0;
605 struct proc *p = vfs_context_proc(ctx);
606 int error, flag = 0;
607 user_addr_t devpath = USER_ADDR_NULL;
608 int ronly = 0;
609 int mntalloc = 0;
610 boolean_t vfsp_ref = FALSE;
611 boolean_t is_rwlock_locked = FALSE;
612 boolean_t did_rele = FALSE;
613 boolean_t have_usecount = FALSE;
614
615 /*
616 * Process an update for an existing mount
617 */
618 if (flags & MNT_UPDATE) {
619 if ((vp->v_flag & VROOT) == 0) {
620 error = EINVAL;
621 goto out1;
622 }
623 mp = vp->v_mount;
624
625 /* unmount in progress return error */
626 mount_lock_spin(mp);
627 if (mp->mnt_lflag & MNT_LUNMOUNT) {
628 mount_unlock(mp);
629 error = EBUSY;
630 goto out1;
631 }
632 mount_unlock(mp);
633 lck_rw_lock_exclusive(&mp->mnt_rwlock);
634 is_rwlock_locked = TRUE;
635 /*
636 * We only allow the filesystem to be reloaded if it
637 * is currently mounted read-only.
638 */
639 if ((flags & MNT_RELOAD) &&
640 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
641 error = ENOTSUP;
642 goto out1;
643 }
644
645 /*
646 * If content protection is enabled, update mounts are not
647 * allowed to turn it off.
648 */
649 if ((mp->mnt_flag & MNT_CPROTECT) &&
650 ((flags & MNT_CPROTECT) == 0)) {
651 error = EINVAL;
652 goto out1;
653 }
654
655#ifdef CONFIG_IMGSRC_ACCESS
656 /* Can't downgrade the backer of the root FS */
657 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
658 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
659 error = ENOTSUP;
660 goto out1;
661 }
662#endif /* CONFIG_IMGSRC_ACCESS */
663
664 /*
665 * Only root, or the user that did the original mount is
666 * permitted to update it.
667 */
668 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
669 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
670 goto out1;
671 }
672#if CONFIG_MACF
673 error = mac_mount_check_remount(ctx, mp);
674 if (error != 0) {
675 goto out1;
676 }
677#endif
678 /*
679 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
680 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
681 */
682 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
683 flags |= MNT_NOSUID | MNT_NODEV;
684 if (mp->mnt_flag & MNT_NOEXEC)
685 flags |= MNT_NOEXEC;
686 }
687 flag = mp->mnt_flag;
688
689
690
691 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
692
693 vfsp = mp->mnt_vtable;
694 goto update;
695 }
696
697 /*
698 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
699 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
700 */
701 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
702 flags |= MNT_NOSUID | MNT_NODEV;
703 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
704 flags |= MNT_NOEXEC;
705 }
706
707 /* XXXAUDIT: Should we capture the type on the error path as well? */
708 AUDIT_ARG(text, fstypename);
709 mount_list_lock();
710 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
711 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
712 vfsp->vfc_refcount++;
713 vfsp_ref = TRUE;
714 break;
715 }
716 mount_list_unlock();
717 if (vfsp == NULL) {
718 error = ENODEV;
719 goto out1;
720 }
721
722 /*
723 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
724 */
725 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
726 error = EINVAL; /* unsupported request */
727 goto out1;
728 }
729
730 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
731 if (error != 0) {
732 goto out1;
733 }
734
735 /*
736 * Allocate and initialize the filesystem (mount_t)
737 */
738 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
739 M_MOUNT, M_WAITOK);
740 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
741 mntalloc = 1;
742
743 /* Initialize the default IO constraints */
744 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
745 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
746 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
747 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
748 mp->mnt_devblocksize = DEV_BSIZE;
749 mp->mnt_alignmentmask = PAGE_MASK;
750 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
751 mp->mnt_ioscale = 1;
752 mp->mnt_ioflags = 0;
753 mp->mnt_realrootvp = NULLVP;
754 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
755
756 TAILQ_INIT(&mp->mnt_vnodelist);
757 TAILQ_INIT(&mp->mnt_workerqueue);
758 TAILQ_INIT(&mp->mnt_newvnodes);
759 mount_lock_init(mp);
760 lck_rw_lock_exclusive(&mp->mnt_rwlock);
761 is_rwlock_locked = TRUE;
762 mp->mnt_op = vfsp->vfc_vfsops;
763 mp->mnt_vtable = vfsp;
764 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
765 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
766 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
767 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
768 mp->mnt_vnodecovered = vp;
769 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
770 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
771 mp->mnt_devbsdunit = 0;
772
773 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
774 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
775
776#if NFSCLIENT || DEVFS || ROUTEFS
777 if (kernelmount)
778 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
779 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
780 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
781#endif /* NFSCLIENT || DEVFS */
782
783update:
784
785 /*
786 * Set the mount level flags.
787 */
788 if (flags & MNT_RDONLY)
789 mp->mnt_flag |= MNT_RDONLY;
790 else if (mp->mnt_flag & MNT_RDONLY) {
791 // disallow read/write upgrades of file systems that
792 // had the TYPENAME_OVERRIDE feature set.
793 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
794 error = EPERM;
795 goto out1;
796 }
797 mp->mnt_kern_flag |= MNTK_WANTRDWR;
798 }
799 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
800 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
801 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
802 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
803 MNT_QUARANTINE | MNT_CPROTECT);
804
805#if SECURE_KERNEL
806#if !CONFIG_MNT_SUID
807 /*
808 * On release builds of iOS based platforms, always enforce NOSUID on
809 * all mounts. We do this here because we can catch update mounts as well as
810 * non-update mounts in this case.
811 */
812 mp->mnt_flag |= (MNT_NOSUID);
813#endif
814#endif
815
816 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
817 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
818 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
819 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
820 MNT_QUARANTINE | MNT_CPROTECT);
821
822#if CONFIG_MACF
823 if (flags & MNT_MULTILABEL) {
824 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
825 error = EINVAL;
826 goto out1;
827 }
828 mp->mnt_flag |= MNT_MULTILABEL;
829 }
830#endif
831 /*
832 * Process device path for local file systems if requested
833 */
834 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
835 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
836 if (vfs_context_is64bit(ctx)) {
837 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
838 goto out1;
839 fsmountargs += sizeof(devpath);
840 } else {
841 user32_addr_t tmp;
842 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
843 goto out1;
844 /* munge into LP64 addr */
845 devpath = CAST_USER_ADDR_T(tmp);
846 fsmountargs += sizeof(tmp);
847 }
848
849 /* Lookup device and authorize access to it */
850 if ((devpath)) {
851 struct nameidata nd;
852
853 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
854 if ( (error = namei(&nd)) )
855 goto out1;
856
857 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
858 devvp = nd.ni_vp;
859
860 nameidone(&nd);
861
862 if (devvp->v_type != VBLK) {
863 error = ENOTBLK;
864 goto out2;
865 }
866 if (major(devvp->v_rdev) >= nblkdev) {
867 error = ENXIO;
868 goto out2;
869 }
870 /*
871 * If mount by non-root, then verify that user has necessary
872 * permissions on the device.
873 */
874 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
875 mode_t accessmode = KAUTH_VNODE_READ_DATA;
876
877 if ((mp->mnt_flag & MNT_RDONLY) == 0)
878 accessmode |= KAUTH_VNODE_WRITE_DATA;
879 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
880 goto out2;
881 }
882 }
883 /* On first mount, preflight and open device */
884 if (devpath && ((flags & MNT_UPDATE) == 0)) {
885 if ( (error = vnode_ref(devvp)) )
886 goto out2;
887 /*
888 * Disallow multiple mounts of the same device.
889 * Disallow mounting of a device that is currently in use
890 * (except for root, which might share swap device for miniroot).
891 * Flush out any old buffers remaining from a previous use.
892 */
893 if ( (error = vfs_mountedon(devvp)) )
894 goto out3;
895
896 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
897 error = EBUSY;
898 goto out3;
899 }
900 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
901 error = ENOTBLK;
902 goto out3;
903 }
904 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
905 goto out3;
906
907 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
908#if CONFIG_MACF
909 error = mac_vnode_check_open(ctx,
910 devvp,
911 ronly ? FREAD : FREAD|FWRITE);
912 if (error)
913 goto out3;
914#endif /* MAC */
915 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
916 goto out3;
917
918 mp->mnt_devvp = devvp;
919 device_vnode = devvp;
920
921 } else if ((mp->mnt_flag & MNT_RDONLY) &&
922 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
923 (device_vnode = mp->mnt_devvp)) {
924 dev_t dev;
925 int maj;
926 /*
927 * If upgrade to read-write by non-root, then verify
928 * that user has necessary permissions on the device.
929 */
930 vnode_getalways(device_vnode);
931
932 if (suser(vfs_context_ucred(ctx), NULL) &&
933 (error = vnode_authorize(device_vnode, NULL,
934 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
935 ctx)) != 0) {
936 vnode_put(device_vnode);
937 goto out2;
938 }
939
940 /* Tell the device that we're upgrading */
941 dev = (dev_t)device_vnode->v_rdev;
942 maj = major(dev);
943
944 if ((u_int)maj >= (u_int)nblkdev)
945 panic("Volume mounted on a device with invalid major number.");
946
947 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
948 vnode_put(device_vnode);
949 device_vnode = NULLVP;
950 if (error != 0) {
951 goto out2;
952 }
953 }
954 }
955#if CONFIG_MACF
956 if ((flags & MNT_UPDATE) == 0) {
957 mac_mount_label_init(mp);
958 mac_mount_label_associate(ctx, mp);
959 }
960 if (labelstr) {
961 if ((flags & MNT_UPDATE) != 0) {
962 error = mac_mount_check_label_update(ctx, mp);
963 if (error != 0)
964 goto out3;
965 }
966 }
967#endif
968 /*
969 * Mount the filesystem.
970 */
971 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
972 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
973 (caddr_t)fsmountargs, 0, ctx);
974 } else {
975 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
976 }
977
978 if (flags & MNT_UPDATE) {
979 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
980 mp->mnt_flag &= ~MNT_RDONLY;
981 mp->mnt_flag &=~
982 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
983 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
984 if (error)
985 mp->mnt_flag = flag; /* restore flag value */
986 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
987 lck_rw_done(&mp->mnt_rwlock);
988 is_rwlock_locked = FALSE;
989 if (!error)
990 enablequotas(mp, ctx);
991 goto exit;
992 }
993
994 /*
995 * Put the new filesystem on the mount list after root.
996 */
997 if (error == 0) {
998 struct vfs_attr vfsattr;
999#if CONFIG_MACF
1000 if (vfs_flags(mp) & MNT_MULTILABEL) {
1001 error = VFS_ROOT(mp, &rvp, ctx);
1002 if (error) {
1003 printf("%s() VFS_ROOT returned %d\n", __func__, error);
1004 goto out3;
1005 }
1006 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
1007 /*
1008 * drop reference provided by VFS_ROOT
1009 */
1010 vnode_put(rvp);
1011
1012 if (error)
1013 goto out3;
1014 }
1015#endif /* MAC */
1016
1017 vnode_lock_spin(vp);
1018 CLR(vp->v_flag, VMOUNT);
1019 vp->v_mountedhere = mp;
1020 vnode_unlock(vp);
1021
1022 /*
1023 * taking the name_cache_lock exclusively will
1024 * insure that everyone is out of the fast path who
1025 * might be trying to use a now stale copy of
1026 * vp->v_mountedhere->mnt_realrootvp
1027 * bumping mount_generation causes the cached values
1028 * to be invalidated
1029 */
1030 name_cache_lock();
1031 mount_generation++;
1032 name_cache_unlock();
1033
1034 error = vnode_ref(vp);
1035 if (error != 0) {
1036 goto out4;
1037 }
1038
1039 have_usecount = TRUE;
1040
1041 error = checkdirs(vp, ctx);
1042 if (error != 0) {
1043 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1044 goto out4;
1045 }
1046 /*
1047 * there is no cleanup code here so I have made it void
1048 * we need to revisit this
1049 */
1050 (void)VFS_START(mp, 0, ctx);
1051
1052 if (mount_list_add(mp) != 0) {
1053 /*
1054 * The system is shutting down trying to umount
1055 * everything, so fail with a plausible errno.
1056 */
1057 error = EBUSY;
1058 goto out4;
1059 }
1060 lck_rw_done(&mp->mnt_rwlock);
1061 is_rwlock_locked = FALSE;
1062
1063 /* Check if this mounted file system supports EAs or named streams. */
1064 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1065 VFSATTR_INIT(&vfsattr);
1066 VFSATTR_WANTED(&vfsattr, f_capabilities);
1067 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
1068 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
1069 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1070 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1071 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1072 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1073 }
1074#if NAMEDSTREAMS
1075 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1076 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1077 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1078 }
1079#endif
1080 /* Check if this file system supports path from id lookups. */
1081 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1082 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1083 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1084 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1085 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1086 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1087 }
1088
1089 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1090 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1091 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1092 }
1093 }
1094 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1095 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1096 }
1097 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1098 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1099 }
1100 /* increment the operations count */
1101 OSAddAtomic(1, &vfs_nummntops);
1102 enablequotas(mp, ctx);
1103
1104 if (device_vnode) {
1105 device_vnode->v_specflags |= SI_MOUNTEDON;
1106
1107 /*
1108 * cache the IO attributes for the underlying physical media...
1109 * an error return indicates the underlying driver doesn't
1110 * support all the queries necessary... however, reasonable
1111 * defaults will have been set, so no reason to bail or care
1112 */
1113 vfs_init_io_attributes(device_vnode, mp);
1114 }
1115
1116 /* Now that mount is setup, notify the listeners */
1117 vfs_notify_mount(pvp);
1118 IOBSDMountChange(mp, kIOMountChangeMount);
1119
1120 } else {
1121 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1122 if (mp->mnt_vnodelist.tqh_first != NULL) {
1123 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1124 mp->mnt_vtable->vfc_name, error);
1125 }
1126
1127 vnode_lock_spin(vp);
1128 CLR(vp->v_flag, VMOUNT);
1129 vnode_unlock(vp);
1130 mount_list_lock();
1131 mp->mnt_vtable->vfc_refcount--;
1132 mount_list_unlock();
1133
1134 if (device_vnode ) {
1135 vnode_rele(device_vnode);
1136 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1137 }
1138 lck_rw_done(&mp->mnt_rwlock);
1139 is_rwlock_locked = FALSE;
1140
1141 /*
1142 * if we get here, we have a mount structure that needs to be freed,
1143 * but since the coveredvp hasn't yet been updated to point at it,
1144 * no need to worry about other threads holding a crossref on this mp
1145 * so it's ok to just free it
1146 */
1147 mount_lock_destroy(mp);
1148#if CONFIG_MACF
1149 mac_mount_label_destroy(mp);
1150#endif
1151 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1152 }
1153exit:
1154 /*
1155 * drop I/O count on the device vp if there was one
1156 */
1157 if (devpath && devvp)
1158 vnode_put(devvp);
1159
1160 return(error);
1161
1162/* Error condition exits */
1163out4:
1164 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1165
1166 /*
1167 * If the mount has been placed on the covered vp,
1168 * it may have been discovered by now, so we have
1169 * to treat this just like an unmount
1170 */
1171 mount_lock_spin(mp);
1172 mp->mnt_lflag |= MNT_LDEAD;
1173 mount_unlock(mp);
1174
1175 if (device_vnode != NULLVP) {
1176 vnode_rele(device_vnode);
1177 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1178 ctx);
1179 did_rele = TRUE;
1180 }
1181
1182 vnode_lock_spin(vp);
1183
1184 mp->mnt_crossref++;
1185 vp->v_mountedhere = (mount_t) 0;
1186
1187 vnode_unlock(vp);
1188
1189 if (have_usecount) {
1190 vnode_rele(vp);
1191 }
1192out3:
1193 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1194 vnode_rele(devvp);
1195out2:
1196 if (devpath && devvp)
1197 vnode_put(devvp);
1198out1:
1199 /* Release mnt_rwlock only when it was taken */
1200 if (is_rwlock_locked == TRUE) {
1201 lck_rw_done(&mp->mnt_rwlock);
1202 }
1203
1204 if (mntalloc) {
1205 if (mp->mnt_crossref)
1206 mount_dropcrossref(mp, vp, 0);
1207 else {
1208 mount_lock_destroy(mp);
1209#if CONFIG_MACF
1210 mac_mount_label_destroy(mp);
1211#endif
1212 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1213 }
1214 }
1215 if (vfsp_ref) {
1216 mount_list_lock();
1217 vfsp->vfc_refcount--;
1218 mount_list_unlock();
1219 }
1220
1221 return(error);
1222}
1223
1224/*
1225 * Flush in-core data, check for competing mount attempts,
1226 * and set VMOUNT
1227 */
1228int
1229prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1230{
1231#if !CONFIG_MACF
1232#pragma unused(cnp,fsname)
1233#endif
1234 struct vnode_attr va;
1235 int error;
1236
1237 if (!skip_auth) {
1238 /*
1239 * If the user is not root, ensure that they own the directory
1240 * onto which we are attempting to mount.
1241 */
1242 VATTR_INIT(&va);
1243 VATTR_WANTED(&va, va_uid);
1244 if ((error = vnode_getattr(vp, &va, ctx)) ||
1245 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1246 (!vfs_context_issuser(ctx)))) {
1247 error = EPERM;
1248 goto out;
1249 }
1250 }
1251
1252 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1253 goto out;
1254
1255 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1256 goto out;
1257
1258 if (vp->v_type != VDIR) {
1259 error = ENOTDIR;
1260 goto out;
1261 }
1262
1263 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1264 error = EBUSY;
1265 goto out;
1266 }
1267
1268#if CONFIG_MACF
1269 error = mac_mount_check_mount(ctx, vp,
1270 cnp, fsname);
1271 if (error != 0)
1272 goto out;
1273#endif
1274
1275 vnode_lock_spin(vp);
1276 SET(vp->v_flag, VMOUNT);
1277 vnode_unlock(vp);
1278
1279out:
1280 return error;
1281}
1282
1283#if CONFIG_IMGSRC_ACCESS
1284
1285#if DEBUG
1286#define IMGSRC_DEBUG(args...) printf(args)
1287#else
1288#define IMGSRC_DEBUG(args...) do { } while(0)
1289#endif
1290
1291static int
1292authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1293{
1294 struct nameidata nd;
1295 vnode_t vp, realdevvp;
1296 mode_t accessmode;
1297 int error;
1298
1299 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1300 if ( (error = namei(&nd)) ) {
1301 IMGSRC_DEBUG("namei() failed with %d\n", error);
1302 return error;
1303 }
1304
1305 vp = nd.ni_vp;
1306
1307 if (!vnode_isblk(vp)) {
1308 IMGSRC_DEBUG("Not block device.\n");
1309 error = ENOTBLK;
1310 goto out;
1311 }
1312
1313 realdevvp = mp->mnt_devvp;
1314 if (realdevvp == NULLVP) {
1315 IMGSRC_DEBUG("No device backs the mount.\n");
1316 error = ENXIO;
1317 goto out;
1318 }
1319
1320 error = vnode_getwithref(realdevvp);
1321 if (error != 0) {
1322 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1323 goto out;
1324 }
1325
1326 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1327 IMGSRC_DEBUG("Wrong dev_t.\n");
1328 error = ENXIO;
1329 goto out1;
1330 }
1331
1332 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1333
1334 /*
1335 * If mount by non-root, then verify that user has necessary
1336 * permissions on the device.
1337 */
1338 if (!vfs_context_issuser(ctx)) {
1339 accessmode = KAUTH_VNODE_READ_DATA;
1340 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1341 accessmode |= KAUTH_VNODE_WRITE_DATA;
1342 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1343 IMGSRC_DEBUG("Access denied.\n");
1344 goto out1;
1345 }
1346 }
1347
1348 *devvpp = vp;
1349
1350out1:
1351 vnode_put(realdevvp);
1352out:
1353 nameidone(&nd);
1354 if (error) {
1355 vnode_put(vp);
1356 }
1357
1358 return error;
1359}
1360
1361/*
1362 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1363 * and call checkdirs()
1364 */
1365static int
1366place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1367{
1368 int error;
1369
1370 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1371
1372 vnode_lock_spin(vp);
1373 CLR(vp->v_flag, VMOUNT);
1374 vp->v_mountedhere = mp;
1375 vnode_unlock(vp);
1376
1377 /*
1378 * taking the name_cache_lock exclusively will
1379 * insure that everyone is out of the fast path who
1380 * might be trying to use a now stale copy of
1381 * vp->v_mountedhere->mnt_realrootvp
1382 * bumping mount_generation causes the cached values
1383 * to be invalidated
1384 */
1385 name_cache_lock();
1386 mount_generation++;
1387 name_cache_unlock();
1388
1389 error = vnode_ref(vp);
1390 if (error != 0) {
1391 goto out;
1392 }
1393
1394 error = checkdirs(vp, ctx);
1395 if (error != 0) {
1396 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1397 vnode_rele(vp);
1398 goto out;
1399 }
1400
1401out:
1402 if (error != 0) {
1403 mp->mnt_vnodecovered = NULLVP;
1404 }
1405 return error;
1406}
1407
1408static void
1409undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1410{
1411 vnode_rele(vp);
1412 vnode_lock_spin(vp);
1413 vp->v_mountedhere = (mount_t)NULL;
1414 vnode_unlock(vp);
1415
1416 mp->mnt_vnodecovered = NULLVP;
1417}
1418
1419static int
1420mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1421{
1422 int error;
1423
1424 /* unmount in progress return error */
1425 mount_lock_spin(mp);
1426 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1427 mount_unlock(mp);
1428 return EBUSY;
1429 }
1430 mount_unlock(mp);
1431 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1432
1433 /*
1434 * We only allow the filesystem to be reloaded if it
1435 * is currently mounted read-only.
1436 */
1437 if ((flags & MNT_RELOAD) &&
1438 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1439 error = ENOTSUP;
1440 goto out;
1441 }
1442
1443 /*
1444 * Only root, or the user that did the original mount is
1445 * permitted to update it.
1446 */
1447 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1448 (!vfs_context_issuser(ctx))) {
1449 error = EPERM;
1450 goto out;
1451 }
1452#if CONFIG_MACF
1453 error = mac_mount_check_remount(ctx, mp);
1454 if (error != 0) {
1455 goto out;
1456 }
1457#endif
1458
1459out:
1460 if (error) {
1461 lck_rw_done(&mp->mnt_rwlock);
1462 }
1463
1464 return error;
1465}
1466
1467static void
1468mount_end_update(mount_t mp)
1469{
1470 lck_rw_done(&mp->mnt_rwlock);
1471}
1472
1473static int
1474get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1475{
1476 vnode_t vp;
1477
1478 if (height >= MAX_IMAGEBOOT_NESTING) {
1479 return EINVAL;
1480 }
1481
1482 vp = imgsrc_rootvnodes[height];
1483 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1484 *rvpp = vp;
1485 return 0;
1486 } else {
1487 return ENOENT;
1488 }
1489}
1490
1491static int
1492relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1493 const char *fsname, vfs_context_t ctx,
1494 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1495{
1496 int error;
1497 mount_t mp;
1498 boolean_t placed = FALSE;
1499 vnode_t devvp = NULLVP;
1500 struct vfstable *vfsp;
1501 user_addr_t devpath;
1502 char *old_mntonname;
1503 vnode_t rvp;
1504 uint32_t height;
1505 uint32_t flags;
1506
1507 /* If we didn't imageboot, nothing to move */
1508 if (imgsrc_rootvnodes[0] == NULLVP) {
1509 return EINVAL;
1510 }
1511
1512 /* Only root can do this */
1513 if (!vfs_context_issuser(ctx)) {
1514 return EPERM;
1515 }
1516
1517 IMGSRC_DEBUG("looking for root vnode.\n");
1518
1519 /*
1520 * Get root vnode of filesystem we're moving.
1521 */
1522 if (by_index) {
1523 if (is64bit) {
1524 struct user64_mnt_imgsrc_args mia64;
1525 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1526 if (error != 0) {
1527 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1528 return error;
1529 }
1530
1531 height = mia64.mi_height;
1532 flags = mia64.mi_flags;
1533 devpath = mia64.mi_devpath;
1534 } else {
1535 struct user32_mnt_imgsrc_args mia32;
1536 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1537 if (error != 0) {
1538 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1539 return error;
1540 }
1541
1542 height = mia32.mi_height;
1543 flags = mia32.mi_flags;
1544 devpath = mia32.mi_devpath;
1545 }
1546 } else {
1547 /*
1548 * For binary compatibility--assumes one level of nesting.
1549 */
1550 if (is64bit) {
1551 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1552 return error;
1553 } else {
1554 user32_addr_t tmp;
1555 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1556 return error;
1557
1558 /* munge into LP64 addr */
1559 devpath = CAST_USER_ADDR_T(tmp);
1560 }
1561
1562 height = 0;
1563 flags = 0;
1564 }
1565
1566 if (flags != 0) {
1567 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1568 return EINVAL;
1569 }
1570
1571 error = get_imgsrc_rootvnode(height, &rvp);
1572 if (error != 0) {
1573 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1574 return error;
1575 }
1576
1577 IMGSRC_DEBUG("got root vnode.\n");
1578
1579 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1580
1581 /* Can only move once */
1582 mp = vnode_mount(rvp);
1583 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1584 IMGSRC_DEBUG("Already moved.\n");
1585 error = EBUSY;
1586 goto out0;
1587 }
1588
1589 IMGSRC_DEBUG("Starting updated.\n");
1590
1591 /* Get exclusive rwlock on mount, authorize update on mp */
1592 error = mount_begin_update(mp , ctx, 0);
1593 if (error != 0) {
1594 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1595 goto out0;
1596 }
1597
1598 /*
1599 * It can only be moved once. Flag is set under the rwlock,
1600 * so we're now safe to proceed.
1601 */
1602 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1603 IMGSRC_DEBUG("Already moved [2]\n");
1604 goto out1;
1605 }
1606
1607
1608 IMGSRC_DEBUG("Preparing coveredvp.\n");
1609
1610 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1611 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1612 if (error != 0) {
1613 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1614 goto out1;
1615 }
1616
1617 IMGSRC_DEBUG("Covered vp OK.\n");
1618
1619 /* Sanity check the name caller has provided */
1620 vfsp = mp->mnt_vtable;
1621 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1622 IMGSRC_DEBUG("Wrong fs name.\n");
1623 error = EINVAL;
1624 goto out2;
1625 }
1626
1627 /* Check the device vnode and update mount-from name, for local filesystems */
1628 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1629 IMGSRC_DEBUG("Local, doing device validation.\n");
1630
1631 if (devpath != USER_ADDR_NULL) {
1632 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1633 if (error) {
1634 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1635 goto out2;
1636 }
1637
1638 vnode_put(devvp);
1639 }
1640 }
1641
1642 /*
1643 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1644 * and increment the name cache's mount generation
1645 */
1646
1647 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1648 error = place_mount_and_checkdirs(mp, vp, ctx);
1649 if (error != 0) {
1650 goto out2;
1651 }
1652
1653 placed = TRUE;
1654
1655 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1656 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1657
1658 /* Forbid future moves */
1659 mount_lock(mp);
1660 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1661 mount_unlock(mp);
1662
1663 /* Finally, add to mount list, completely ready to go */
1664 if (mount_list_add(mp) != 0) {
1665 /*
1666 * The system is shutting down trying to umount
1667 * everything, so fail with a plausible errno.
1668 */
1669 error = EBUSY;
1670 goto out3;
1671 }
1672
1673 mount_end_update(mp);
1674 vnode_put(rvp);
1675 FREE(old_mntonname, M_TEMP);
1676
1677 vfs_notify_mount(pvp);
1678
1679 return 0;
1680out3:
1681 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1682
1683 mount_lock(mp);
1684 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1685 mount_unlock(mp);
1686
1687out2:
1688 /*
1689 * Placing the mp on the vnode clears VMOUNT,
1690 * so cleanup is different after that point
1691 */
1692 if (placed) {
1693 /* Rele the vp, clear VMOUNT and v_mountedhere */
1694 undo_place_on_covered_vp(mp, vp);
1695 } else {
1696 vnode_lock_spin(vp);
1697 CLR(vp->v_flag, VMOUNT);
1698 vnode_unlock(vp);
1699 }
1700out1:
1701 mount_end_update(mp);
1702
1703out0:
1704 vnode_put(rvp);
1705 FREE(old_mntonname, M_TEMP);
1706 return error;
1707}
1708
1709#endif /* CONFIG_IMGSRC_ACCESS */
1710
1711void
1712enablequotas(struct mount *mp, vfs_context_t ctx)
1713{
1714 struct nameidata qnd;
1715 int type;
1716 char qfpath[MAXPATHLEN];
1717 const char *qfname = QUOTAFILENAME;
1718 const char *qfopsname = QUOTAOPSNAME;
1719 const char *qfextension[] = INITQFNAMES;
1720
1721 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1722 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1723 return;
1724 }
1725 /*
1726 * Enable filesystem disk quotas if necessary.
1727 * We ignore errors as this should not interfere with final mount
1728 */
1729 for (type=0; type < MAXQUOTAS; type++) {
1730 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1731 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1732 CAST_USER_ADDR_T(qfpath), ctx);
1733 if (namei(&qnd) != 0)
1734 continue; /* option file to trigger quotas is not present */
1735 vnode_put(qnd.ni_vp);
1736 nameidone(&qnd);
1737 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1738
1739 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1740 }
1741 return;
1742}
1743
1744
1745static int
1746checkdirs_callback(proc_t p, void * arg)
1747{
1748 struct cdirargs * cdrp = (struct cdirargs * )arg;
1749 vnode_t olddp = cdrp->olddp;
1750 vnode_t newdp = cdrp->newdp;
1751 struct filedesc *fdp;
1752 vnode_t tvp;
1753 vnode_t fdp_cvp;
1754 vnode_t fdp_rvp;
1755 int cdir_changed = 0;
1756 int rdir_changed = 0;
1757
1758 /*
1759 * XXX Also needs to iterate each thread in the process to see if it
1760 * XXX is using a per-thread current working directory, and, if so,
1761 * XXX update that as well.
1762 */
1763
1764 proc_fdlock(p);
1765 fdp = p->p_fd;
1766 if (fdp == (struct filedesc *)0) {
1767 proc_fdunlock(p);
1768 return(PROC_RETURNED);
1769 }
1770 fdp_cvp = fdp->fd_cdir;
1771 fdp_rvp = fdp->fd_rdir;
1772 proc_fdunlock(p);
1773
1774 if (fdp_cvp == olddp) {
1775 vnode_ref(newdp);
1776 tvp = fdp->fd_cdir;
1777 fdp_cvp = newdp;
1778 cdir_changed = 1;
1779 vnode_rele(tvp);
1780 }
1781 if (fdp_rvp == olddp) {
1782 vnode_ref(newdp);
1783 tvp = fdp->fd_rdir;
1784 fdp_rvp = newdp;
1785 rdir_changed = 1;
1786 vnode_rele(tvp);
1787 }
1788 if (cdir_changed || rdir_changed) {
1789 proc_fdlock(p);
1790 fdp->fd_cdir = fdp_cvp;
1791 fdp->fd_rdir = fdp_rvp;
1792 proc_fdunlock(p);
1793 }
1794 return(PROC_RETURNED);
1795}
1796
1797
1798
1799/*
1800 * Scan all active processes to see if any of them have a current
1801 * or root directory onto which the new filesystem has just been
1802 * mounted. If so, replace them with the new mount point.
1803 */
1804static int
1805checkdirs(vnode_t olddp, vfs_context_t ctx)
1806{
1807 vnode_t newdp;
1808 vnode_t tvp;
1809 int err;
1810 struct cdirargs cdr;
1811
1812 if (olddp->v_usecount == 1)
1813 return(0);
1814 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1815
1816 if (err != 0) {
1817#if DIAGNOSTIC
1818 panic("mount: lost mount: error %d", err);
1819#endif
1820 return(err);
1821 }
1822
1823 cdr.olddp = olddp;
1824 cdr.newdp = newdp;
1825 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1826 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1827
1828 if (rootvnode == olddp) {
1829 vnode_ref(newdp);
1830 tvp = rootvnode;
1831 rootvnode = newdp;
1832 vnode_rele(tvp);
1833 }
1834
1835 vnode_put(newdp);
1836 return(0);
1837}
1838
1839/*
1840 * Unmount a file system.
1841 *
1842 * Note: unmount takes a path to the vnode mounted on as argument,
1843 * not special file (as before).
1844 */
1845/* ARGSUSED */
1846int
1847unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1848{
1849 vnode_t vp;
1850 struct mount *mp;
1851 int error;
1852 struct nameidata nd;
1853 vfs_context_t ctx = vfs_context_current();
1854
1855 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1856 UIO_USERSPACE, uap->path, ctx);
1857 error = namei(&nd);
1858 if (error)
1859 return (error);
1860 vp = nd.ni_vp;
1861 mp = vp->v_mount;
1862 nameidone(&nd);
1863
1864#if CONFIG_MACF
1865 error = mac_mount_check_umount(ctx, mp);
1866 if (error != 0) {
1867 vnode_put(vp);
1868 return (error);
1869 }
1870#endif
1871 /*
1872 * Must be the root of the filesystem
1873 */
1874 if ((vp->v_flag & VROOT) == 0) {
1875 vnode_put(vp);
1876 return (EINVAL);
1877 }
1878 mount_ref(mp, 0);
1879 vnode_put(vp);
1880 /* safedounmount consumes the mount ref */
1881 return (safedounmount(mp, uap->flags, ctx));
1882}
1883
1884int
1885vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
1886{
1887 mount_t mp;
1888
1889 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1890 if (mp == (mount_t)0) {
1891 return(ENOENT);
1892 }
1893 mount_ref(mp, 0);
1894 mount_iterdrop(mp);
1895 /* safedounmount consumes the mount ref */
1896 return(safedounmount(mp, flags, ctx));
1897}
1898
1899
1900/*
1901 * The mount struct comes with a mount ref which will be consumed.
1902 * Do the actual file system unmount, prevent some common foot shooting.
1903 */
1904int
1905safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1906{
1907 int error;
1908 proc_t p = vfs_context_proc(ctx);
1909
1910 /*
1911 * If the file system is not responding and MNT_NOBLOCK
1912 * is set and not a forced unmount then return EBUSY.
1913 */
1914 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1915 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1916 error = EBUSY;
1917 goto out;
1918 }
1919
1920 /*
1921 * Skip authorization if the mount is tagged as permissive and
1922 * this is not a forced-unmount attempt.
1923 */
1924 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1925 /*
1926 * Only root, or the user that did the original mount is
1927 * permitted to unmount this filesystem.
1928 */
1929 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1930 (error = suser(kauth_cred_get(), &p->p_acflag)))
1931 goto out;
1932 }
1933 /*
1934 * Don't allow unmounting the root file system.
1935 */
1936 if (mp->mnt_flag & MNT_ROOTFS) {
1937 error = EBUSY; /* the root is always busy */
1938 goto out;
1939 }
1940
1941#ifdef CONFIG_IMGSRC_ACCESS
1942 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1943 error = EBUSY;
1944 goto out;
1945 }
1946#endif /* CONFIG_IMGSRC_ACCESS */
1947
1948 return (dounmount(mp, flags, 1, ctx));
1949
1950out:
1951 mount_drop(mp, 0);
1952 return(error);
1953}
1954
1955/*
1956 * Do the actual file system unmount.
1957 */
1958int
1959dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1960{
1961 vnode_t coveredvp = (vnode_t)0;
1962 int error;
1963 int needwakeup = 0;
1964 int forcedunmount = 0;
1965 int lflags = 0;
1966 struct vnode *devvp = NULLVP;
1967#if CONFIG_TRIGGERS
1968 proc_t p = vfs_context_proc(ctx);
1969 int did_vflush = 0;
1970 int pflags_save = 0;
1971#endif /* CONFIG_TRIGGERS */
1972
1973#if CONFIG_FSE
1974 if (!(flags & MNT_FORCE)) {
1975 fsevent_unmount(mp, ctx); /* has to come first! */
1976 }
1977#endif
1978
1979 mount_lock(mp);
1980
1981 /*
1982 * If already an unmount in progress just return EBUSY.
1983 * Even a forced unmount cannot override.
1984 */
1985 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1986 if (withref != 0)
1987 mount_drop(mp, 1);
1988 mount_unlock(mp);
1989 return (EBUSY);
1990 }
1991
1992 if (flags & MNT_FORCE) {
1993 forcedunmount = 1;
1994 mp->mnt_lflag |= MNT_LFORCE;
1995 }
1996
1997#if CONFIG_TRIGGERS
1998 if (flags & MNT_NOBLOCK && p != kernproc)
1999 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
2000#endif
2001
2002 mp->mnt_kern_flag |= MNTK_UNMOUNT;
2003 mp->mnt_lflag |= MNT_LUNMOUNT;
2004 mp->mnt_flag &=~ MNT_ASYNC;
2005 /*
2006 * anyone currently in the fast path that
2007 * trips over the cached rootvp will be
2008 * dumped out and forced into the slow path
2009 * to regenerate a new cached value
2010 */
2011 mp->mnt_realrootvp = NULLVP;
2012 mount_unlock(mp);
2013
2014 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2015 /*
2016 * Force unmount any mounts in this filesystem.
2017 * If any unmounts fail - just leave them dangling.
2018 * Avoids recursion.
2019 */
2020 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2021 }
2022
2023 /*
2024 * taking the name_cache_lock exclusively will
2025 * insure that everyone is out of the fast path who
2026 * might be trying to use a now stale copy of
2027 * vp->v_mountedhere->mnt_realrootvp
2028 * bumping mount_generation causes the cached values
2029 * to be invalidated
2030 */
2031 name_cache_lock();
2032 mount_generation++;
2033 name_cache_unlock();
2034
2035
2036 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2037 if (withref != 0)
2038 mount_drop(mp, 0);
2039 error = 0;
2040 if (forcedunmount == 0) {
2041 ubc_umount(mp); /* release cached vnodes */
2042 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2043 error = VFS_SYNC(mp, MNT_WAIT, ctx);
2044 if (error) {
2045 mount_lock(mp);
2046 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2047 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2048 mp->mnt_lflag &= ~MNT_LFORCE;
2049 goto out;
2050 }
2051 }
2052 }
2053
2054 /* free disk_conditioner_info structure for this mount */
2055 disk_conditioner_unmount(mp);
2056
2057 IOBSDMountChange(mp, kIOMountChangeUnmount);
2058
2059#if CONFIG_TRIGGERS
2060 vfs_nested_trigger_unmounts(mp, flags, ctx);
2061 did_vflush = 1;
2062#endif
2063 if (forcedunmount)
2064 lflags |= FORCECLOSE;
2065 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2066 if ((forcedunmount == 0) && error) {
2067 mount_lock(mp);
2068 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2069 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2070 mp->mnt_lflag &= ~MNT_LFORCE;
2071 goto out;
2072 }
2073
2074 /* make sure there are no one in the mount iterations or lookup */
2075 mount_iterdrain(mp);
2076
2077 error = VFS_UNMOUNT(mp, flags, ctx);
2078 if (error) {
2079 mount_iterreset(mp);
2080 mount_lock(mp);
2081 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2082 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2083 mp->mnt_lflag &= ~MNT_LFORCE;
2084 goto out;
2085 }
2086
2087 /* increment the operations count */
2088 if (!error)
2089 OSAddAtomic(1, &vfs_nummntops);
2090
2091 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
2092 /* hold an io reference and drop the usecount before close */
2093 devvp = mp->mnt_devvp;
2094 vnode_getalways(devvp);
2095 vnode_rele(devvp);
2096 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2097 ctx);
2098 vnode_clearmountedon(devvp);
2099 vnode_put(devvp);
2100 }
2101 lck_rw_done(&mp->mnt_rwlock);
2102 mount_list_remove(mp);
2103 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2104
2105 /* mark the mount point hook in the vp but not drop the ref yet */
2106 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
2107 /*
2108 * The covered vnode needs special handling. Trying to get an
2109 * iocount must not block here as this may lead to deadlocks
2110 * if the Filesystem to which the covered vnode belongs is
2111 * undergoing forced unmounts. Since we hold a usecount, the
2112 * vnode cannot be reused (it can, however, still be terminated)
2113 */
2114 vnode_getalways(coveredvp);
2115 vnode_lock_spin(coveredvp);
2116
2117 mp->mnt_crossref++;
2118 coveredvp->v_mountedhere = (struct mount *)0;
2119 CLR(coveredvp->v_flag, VMOUNT);
2120
2121 vnode_unlock(coveredvp);
2122 vnode_put(coveredvp);
2123 }
2124
2125 mount_list_lock();
2126 mp->mnt_vtable->vfc_refcount--;
2127 mount_list_unlock();
2128
2129 cache_purgevfs(mp); /* remove cache entries for this file sys */
2130 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2131 mount_lock(mp);
2132 mp->mnt_lflag |= MNT_LDEAD;
2133
2134 if (mp->mnt_lflag & MNT_LWAIT) {
2135 /*
2136 * do the wakeup here
2137 * in case we block in mount_refdrain
2138 * which will drop the mount lock
2139 * and allow anyone blocked in vfs_busy
2140 * to wakeup and see the LDEAD state
2141 */
2142 mp->mnt_lflag &= ~MNT_LWAIT;
2143 wakeup((caddr_t)mp);
2144 }
2145 mount_refdrain(mp);
2146out:
2147 if (mp->mnt_lflag & MNT_LWAIT) {
2148 mp->mnt_lflag &= ~MNT_LWAIT;
2149 needwakeup = 1;
2150 }
2151
2152#if CONFIG_TRIGGERS
2153 if (flags & MNT_NOBLOCK && p != kernproc) {
2154 // Restore P_NOREMOTEHANG bit to its previous value
2155 if ((pflags_save & P_NOREMOTEHANG) == 0)
2156 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2157 }
2158
2159 /*
2160 * Callback and context are set together under the mount lock, and
2161 * never cleared, so we're safe to examine them here, drop the lock,
2162 * and call out.
2163 */
2164 if (mp->mnt_triggercallback != NULL) {
2165 mount_unlock(mp);
2166 if (error == 0) {
2167 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2168 } else if (did_vflush) {
2169 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2170 }
2171 } else {
2172 mount_unlock(mp);
2173 }
2174#else
2175 mount_unlock(mp);
2176#endif /* CONFIG_TRIGGERS */
2177
2178 lck_rw_done(&mp->mnt_rwlock);
2179
2180 if (needwakeup)
2181 wakeup((caddr_t)mp);
2182
2183 if (!error) {
2184 if ((coveredvp != NULLVP)) {
2185 vnode_t pvp = NULLVP;
2186
2187 /*
2188 * The covered vnode needs special handling. Trying to
2189 * get an iocount must not block here as this may lead
2190 * to deadlocks if the Filesystem to which the covered
2191 * vnode belongs is undergoing forced unmounts. Since we
2192 * hold a usecount, the vnode cannot be reused
2193 * (it can, however, still be terminated).
2194 */
2195 vnode_getalways(coveredvp);
2196
2197 mount_dropcrossref(mp, coveredvp, 0);
2198 /*
2199 * We'll _try_ to detect if this really needs to be
2200 * done. The coveredvp can only be in termination (or
2201 * terminated) if the coveredvp's mount point is in a
2202 * forced unmount (or has been) since we still hold the
2203 * ref.
2204 */
2205 if (!vnode_isrecycled(coveredvp)) {
2206 pvp = vnode_getparent(coveredvp);
2207#if CONFIG_TRIGGERS
2208 if (coveredvp->v_resolve) {
2209 vnode_trigger_rearm(coveredvp, ctx);
2210 }
2211#endif
2212 }
2213
2214 vnode_rele(coveredvp);
2215 vnode_put(coveredvp);
2216 coveredvp = NULLVP;
2217
2218 if (pvp) {
2219 lock_vnode_and_post(pvp, NOTE_WRITE);
2220 vnode_put(pvp);
2221 }
2222 } else if (mp->mnt_flag & MNT_ROOTFS) {
2223 mount_lock_destroy(mp);
2224#if CONFIG_MACF
2225 mac_mount_label_destroy(mp);
2226#endif
2227 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2228 } else
2229 panic("dounmount: no coveredvp");
2230 }
2231 return (error);
2232}
2233
2234/*
2235 * Unmount any mounts in this filesystem.
2236 */
2237void
2238dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2239{
2240 mount_t smp;
2241 fsid_t *fsids, fsid;
2242 int fsids_sz;
2243 int count = 0, i, m = 0;
2244 vnode_t vp;
2245
2246 mount_list_lock();
2247
2248 // Get an array to hold the submounts fsids.
2249 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2250 count++;
2251 fsids_sz = count * sizeof(fsid_t);
2252 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2253 if (fsids == NULL) {
2254 mount_list_unlock();
2255 goto out;
2256 }
2257 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2258
2259 /*
2260 * Fill the array with submount fsids.
2261 * Since mounts are always added to the tail of the mount list, the
2262 * list is always in mount order.
2263 * For each mount check if the mounted-on vnode belongs to a
2264 * mount that's already added to our array of mounts to be unmounted.
2265 */
2266 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2267 vp = smp->mnt_vnodecovered;
2268 if (vp == NULL)
2269 continue;
2270 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2271 for (i = 0; i <= m; i++) {
2272 if (fsids[i].val[0] == fsid.val[0] &&
2273 fsids[i].val[1] == fsid.val[1]) {
2274 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2275 break;
2276 }
2277 }
2278 }
2279 mount_list_unlock();
2280
2281 // Unmount the submounts in reverse order. Ignore errors.
2282 for (i = m; i > 0; i--) {
2283 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2284 if (smp) {
2285 mount_ref(smp, 0);
2286 mount_iterdrop(smp);
2287 (void) dounmount(smp, flags, 1, ctx);
2288 }
2289 }
2290out:
2291 if (fsids)
2292 FREE(fsids, M_TEMP);
2293}
2294
2295void
2296mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2297{
2298 vnode_lock(dp);
2299 mp->mnt_crossref--;
2300
2301 if (mp->mnt_crossref < 0)
2302 panic("mount cross refs -ve");
2303
2304 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2305
2306 if (need_put)
2307 vnode_put_locked(dp);
2308 vnode_unlock(dp);
2309
2310 mount_lock_destroy(mp);
2311#if CONFIG_MACF
2312 mac_mount_label_destroy(mp);
2313#endif
2314 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2315 return;
2316 }
2317 if (need_put)
2318 vnode_put_locked(dp);
2319 vnode_unlock(dp);
2320}
2321
2322
2323/*
2324 * Sync each mounted filesystem.
2325 */
2326#if DIAGNOSTIC
2327int syncprt = 0;
2328#endif
2329
2330int print_vmpage_stat=0;
2331
2332static int
2333sync_callback(mount_t mp, __unused void *arg)
2334{
2335 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2336 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2337
2338 mp->mnt_flag &= ~MNT_ASYNC;
2339 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2340 if (asyncflag)
2341 mp->mnt_flag |= MNT_ASYNC;
2342 }
2343
2344 return (VFS_RETURNED);
2345}
2346
2347/* ARGSUSED */
2348int
2349sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2350{
2351 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2352
2353 if (print_vmpage_stat) {
2354 vm_countdirtypages();
2355 }
2356
2357#if DIAGNOSTIC
2358 if (syncprt)
2359 vfs_bufstats();
2360#endif /* DIAGNOSTIC */
2361 return 0;
2362}
2363
2364typedef enum {
2365 SYNC_ALL = 0,
2366 SYNC_ONLY_RELIABLE_MEDIA = 1,
2367 SYNC_ONLY_UNRELIABLE_MEDIA = 2
2368} sync_type_t;
2369
2370static int
2371sync_internal_callback(mount_t mp, void *arg)
2372{
2373 if (arg) {
2374 int is_reliable = !(mp->mnt_kern_flag & MNTK_VIRTUALDEV) &&
2375 (mp->mnt_flag & MNT_LOCAL);
2376 sync_type_t sync_type = *((sync_type_t *)arg);
2377
2378 if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable)
2379 return (VFS_RETURNED);
2380 else if ((sync_type = SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable)
2381 return (VFS_RETURNED);
2382 }
2383
2384 (void)sync_callback(mp, NULL);
2385
2386 return (VFS_RETURNED);
2387}
2388
2389int sync_thread_state = 0;
2390int sync_timeout_seconds = 5;
2391
2392#define SYNC_THREAD_RUN 0x0001
2393#define SYNC_THREAD_RUNNING 0x0002
2394
2395static void
2396sync_thread(__unused void *arg, __unused wait_result_t wr)
2397{
2398 sync_type_t sync_type;
2399
2400 lck_mtx_lock(sync_mtx_lck);
2401 while (sync_thread_state & SYNC_THREAD_RUN) {
2402 sync_thread_state &= ~SYNC_THREAD_RUN;
2403 lck_mtx_unlock(sync_mtx_lck);
2404
2405 sync_type = SYNC_ONLY_RELIABLE_MEDIA;
2406 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2407 sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
2408 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2409
2410 lck_mtx_lock(sync_mtx_lck);
2411 }
2412 /*
2413 * This wakeup _has_ to be issued before the lock is released otherwise
2414 * we may end up waking up a thread in sync_internal which is
2415 * expecting a wakeup from a thread it just created and not from this
2416 * thread which is about to exit.
2417 */
2418 wakeup(&sync_thread_state);
2419 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2420 lck_mtx_unlock(sync_mtx_lck);
2421
2422 if (print_vmpage_stat) {
2423 vm_countdirtypages();
2424 }
2425
2426#if DIAGNOSTIC
2427 if (syncprt)
2428 vfs_bufstats();
2429#endif /* DIAGNOSTIC */
2430}
2431
2432struct timeval sync_timeout_last_print = {0, 0};
2433
2434/*
2435 * An in-kernel sync for power management to call.
2436 * This function always returns within sync_timeout seconds.
2437 */
2438__private_extern__ int
2439sync_internal(void)
2440{
2441 thread_t thd;
2442 int error;
2443 int thread_created = FALSE;
2444 struct timespec ts = {sync_timeout_seconds, 0};
2445
2446 lck_mtx_lock(sync_mtx_lck);
2447 sync_thread_state |= SYNC_THREAD_RUN;
2448 if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
2449 int kr;
2450
2451 sync_thread_state |= SYNC_THREAD_RUNNING;
2452 kr = kernel_thread_start(sync_thread, NULL, &thd);
2453 if (kr != KERN_SUCCESS) {
2454 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2455 lck_mtx_unlock(sync_mtx_lck);
2456 printf("sync_thread failed\n");
2457 return (0);
2458 }
2459 thread_created = TRUE;
2460 }
2461
2462 error = msleep((caddr_t)&sync_thread_state, sync_mtx_lck,
2463 (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2464 if (error) {
2465 struct timeval now;
2466
2467 microtime(&now);
2468 if (now.tv_sec - sync_timeout_last_print.tv_sec > 120) {
2469 printf("sync timed out: %d sec\n", sync_timeout_seconds);
2470 sync_timeout_last_print.tv_sec = now.tv_sec;
2471 }
2472 }
2473
2474 if (thread_created)
2475 thread_deallocate(thd);
2476
2477 return (0);
2478} /* end of sync_internal call */
2479
2480/*
2481 * Change filesystem quotas.
2482 */
2483#if QUOTA
2484int
2485quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2486{
2487 struct mount *mp;
2488 int error, quota_cmd, quota_status = 0;
2489 caddr_t datap;
2490 size_t fnamelen;
2491 struct nameidata nd;
2492 vfs_context_t ctx = vfs_context_current();
2493 struct dqblk my_dqblk = {};
2494
2495 AUDIT_ARG(uid, uap->uid);
2496 AUDIT_ARG(cmd, uap->cmd);
2497 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2498 uap->path, ctx);
2499 error = namei(&nd);
2500 if (error)
2501 return (error);
2502 mp = nd.ni_vp->v_mount;
2503 vnode_put(nd.ni_vp);
2504 nameidone(&nd);
2505
2506 /* copyin any data we will need for downstream code */
2507 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2508
2509 switch (quota_cmd) {
2510 case Q_QUOTAON:
2511 /* uap->arg specifies a file from which to take the quotas */
2512 fnamelen = MAXPATHLEN;
2513 datap = kalloc(MAXPATHLEN);
2514 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2515 break;
2516 case Q_GETQUOTA:
2517 /* uap->arg is a pointer to a dqblk structure. */
2518 datap = (caddr_t) &my_dqblk;
2519 break;
2520 case Q_SETQUOTA:
2521 case Q_SETUSE:
2522 /* uap->arg is a pointer to a dqblk structure. */
2523 datap = (caddr_t) &my_dqblk;
2524 if (proc_is64bit(p)) {
2525 struct user_dqblk my_dqblk64;
2526 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2527 if (error == 0) {
2528 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2529 }
2530 }
2531 else {
2532 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2533 }
2534 break;
2535 case Q_QUOTASTAT:
2536 /* uap->arg is a pointer to an integer */
2537 datap = (caddr_t) &quota_status;
2538 break;
2539 default:
2540 datap = NULL;
2541 break;
2542 } /* switch */
2543
2544 if (error == 0) {
2545 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2546 }
2547
2548 switch (quota_cmd) {
2549 case Q_QUOTAON:
2550 if (datap != NULL)
2551 kfree(datap, MAXPATHLEN);
2552 break;
2553 case Q_GETQUOTA:
2554 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2555 if (error == 0) {
2556 if (proc_is64bit(p)) {
2557 struct user_dqblk my_dqblk64;
2558
2559 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
2560 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2561 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2562 }
2563 else {
2564 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2565 }
2566 }
2567 break;
2568 case Q_QUOTASTAT:
2569 /* uap->arg is a pointer to an integer */
2570 if (error == 0) {
2571 error = copyout(datap, uap->arg, sizeof(quota_status));
2572 }
2573 break;
2574 default:
2575 break;
2576 } /* switch */
2577
2578 return (error);
2579}
2580#else
2581int
2582quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2583{
2584 return (EOPNOTSUPP);
2585}
2586#endif /* QUOTA */
2587
2588/*
2589 * Get filesystem statistics.
2590 *
2591 * Returns: 0 Success
2592 * namei:???
2593 * vfs_update_vfsstat:???
2594 * munge_statfs:EFAULT
2595 */
2596/* ARGSUSED */
2597int
2598statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2599{
2600 struct mount *mp;
2601 struct vfsstatfs *sp;
2602 int error;
2603 struct nameidata nd;
2604 vfs_context_t ctx = vfs_context_current();
2605 vnode_t vp;
2606
2607 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2608 UIO_USERSPACE, uap->path, ctx);
2609 error = namei(&nd);
2610 if (error != 0)
2611 return (error);
2612 vp = nd.ni_vp;
2613 mp = vp->v_mount;
2614 sp = &mp->mnt_vfsstat;
2615 nameidone(&nd);
2616
2617#if CONFIG_MACF
2618 error = mac_mount_check_stat(ctx, mp);
2619 if (error != 0)
2620 return (error);
2621#endif
2622
2623 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2624 if (error != 0) {
2625 vnode_put(vp);
2626 return (error);
2627 }
2628
2629 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2630 vnode_put(vp);
2631 return (error);
2632}
2633
2634/*
2635 * Get filesystem statistics.
2636 */
2637/* ARGSUSED */
2638int
2639fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2640{
2641 vnode_t vp;
2642 struct mount *mp;
2643 struct vfsstatfs *sp;
2644 int error;
2645
2646 AUDIT_ARG(fd, uap->fd);
2647
2648 if ( (error = file_vnode(uap->fd, &vp)) )
2649 return (error);
2650
2651 error = vnode_getwithref(vp);
2652 if (error) {
2653 file_drop(uap->fd);
2654 return (error);
2655 }
2656
2657 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2658
2659 mp = vp->v_mount;
2660 if (!mp) {
2661 error = EBADF;
2662 goto out;
2663 }
2664
2665#if CONFIG_MACF
2666 error = mac_mount_check_stat(vfs_context_current(), mp);
2667 if (error != 0)
2668 goto out;
2669#endif
2670
2671 sp = &mp->mnt_vfsstat;
2672 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2673 goto out;
2674 }
2675
2676 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2677
2678out:
2679 file_drop(uap->fd);
2680 vnode_put(vp);
2681
2682 return (error);
2683}
2684
2685/*
2686 * Common routine to handle copying of statfs64 data to user space
2687 */
2688static int
2689statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2690{
2691 int error;
2692 struct statfs64 sfs;
2693
2694 bzero(&sfs, sizeof(sfs));
2695
2696 sfs.f_bsize = sfsp->f_bsize;
2697 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2698 sfs.f_blocks = sfsp->f_blocks;
2699 sfs.f_bfree = sfsp->f_bfree;
2700 sfs.f_bavail = sfsp->f_bavail;
2701 sfs.f_files = sfsp->f_files;
2702 sfs.f_ffree = sfsp->f_ffree;
2703 sfs.f_fsid = sfsp->f_fsid;
2704 sfs.f_owner = sfsp->f_owner;
2705 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2706 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2707 sfs.f_fssubtype = sfsp->f_fssubtype;
2708 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2709 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2710 } else {
2711 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2712 }
2713 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2714 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2715
2716 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2717
2718 return(error);
2719}
2720
2721/*
2722 * Get file system statistics in 64-bit mode
2723 */
2724int
2725statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2726{
2727 struct mount *mp;
2728 struct vfsstatfs *sp;
2729 int error;
2730 struct nameidata nd;
2731 vfs_context_t ctxp = vfs_context_current();
2732 vnode_t vp;
2733
2734 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2735 UIO_USERSPACE, uap->path, ctxp);
2736 error = namei(&nd);
2737 if (error != 0)
2738 return (error);
2739 vp = nd.ni_vp;
2740 mp = vp->v_mount;
2741 sp = &mp->mnt_vfsstat;
2742 nameidone(&nd);
2743
2744#if CONFIG_MACF
2745 error = mac_mount_check_stat(ctxp, mp);
2746 if (error != 0)
2747 return (error);
2748#endif
2749
2750 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2751 if (error != 0) {
2752 vnode_put(vp);
2753 return (error);
2754 }
2755
2756 error = statfs64_common(mp, sp, uap->buf);
2757 vnode_put(vp);
2758
2759 return (error);
2760}
2761
2762/*
2763 * Get file system statistics in 64-bit mode
2764 */
2765int
2766fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2767{
2768 struct vnode *vp;
2769 struct mount *mp;
2770 struct vfsstatfs *sp;
2771 int error;
2772
2773 AUDIT_ARG(fd, uap->fd);
2774
2775 if ( (error = file_vnode(uap->fd, &vp)) )
2776 return (error);
2777
2778 error = vnode_getwithref(vp);
2779 if (error) {
2780 file_drop(uap->fd);
2781 return (error);
2782 }
2783
2784 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2785
2786 mp = vp->v_mount;
2787 if (!mp) {
2788 error = EBADF;
2789 goto out;
2790 }
2791
2792#if CONFIG_MACF
2793 error = mac_mount_check_stat(vfs_context_current(), mp);
2794 if (error != 0)
2795 goto out;
2796#endif
2797
2798 sp = &mp->mnt_vfsstat;
2799 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2800 goto out;
2801 }
2802
2803 error = statfs64_common(mp, sp, uap->buf);
2804
2805out:
2806 file_drop(uap->fd);
2807 vnode_put(vp);
2808
2809 return (error);
2810}
2811
2812struct getfsstat_struct {
2813 user_addr_t sfsp;
2814 user_addr_t *mp;
2815 int count;
2816 int maxcount;
2817 int flags;
2818 int error;
2819};
2820
2821
2822static int
2823getfsstat_callback(mount_t mp, void * arg)
2824{
2825
2826 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2827 struct vfsstatfs *sp;
2828 int error, my_size;
2829 vfs_context_t ctx = vfs_context_current();
2830
2831 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2832#if CONFIG_MACF
2833 error = mac_mount_check_stat(ctx, mp);
2834 if (error != 0) {
2835 fstp->error = error;
2836 return(VFS_RETURNED_DONE);
2837 }
2838#endif
2839 sp = &mp->mnt_vfsstat;
2840 /*
2841 * If MNT_NOWAIT is specified, do not refresh the
2842 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2843 */
2844 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2845 (error = vfs_update_vfsstat(mp, ctx,
2846 VFS_USER_EVENT))) {
2847 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2848 return(VFS_RETURNED);
2849 }
2850
2851 /*
2852 * Need to handle LP64 version of struct statfs
2853 */
2854 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2855 if (error) {
2856 fstp->error = error;
2857 return(VFS_RETURNED_DONE);
2858 }
2859 fstp->sfsp += my_size;
2860
2861 if (fstp->mp) {
2862#if CONFIG_MACF
2863 error = mac_mount_label_get(mp, *fstp->mp);
2864 if (error) {
2865 fstp->error = error;
2866 return(VFS_RETURNED_DONE);
2867 }
2868#endif
2869 fstp->mp++;
2870 }
2871 }
2872 fstp->count++;
2873 return(VFS_RETURNED);
2874}
2875
2876/*
2877 * Get statistics on all filesystems.
2878 */
2879int
2880getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2881{
2882 struct __mac_getfsstat_args muap;
2883
2884 muap.buf = uap->buf;
2885 muap.bufsize = uap->bufsize;
2886 muap.mac = USER_ADDR_NULL;
2887 muap.macsize = 0;
2888 muap.flags = uap->flags;
2889
2890 return (__mac_getfsstat(p, &muap, retval));
2891}
2892
2893/*
2894 * __mac_getfsstat: Get MAC-related file system statistics
2895 *
2896 * Parameters: p (ignored)
2897 * uap User argument descriptor (see below)
2898 * retval Count of file system statistics (N stats)
2899 *
2900 * Indirect: uap->bufsize Buffer size
2901 * uap->macsize MAC info size
2902 * uap->buf Buffer where information will be returned
2903 * uap->mac MAC info
2904 * uap->flags File system flags
2905 *
2906 *
2907 * Returns: 0 Success
2908 * !0 Not success
2909 *
2910 */
2911int
2912__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2913{
2914 user_addr_t sfsp;
2915 user_addr_t *mp;
2916 size_t count, maxcount, bufsize, macsize;
2917 struct getfsstat_struct fst;
2918
2919 bufsize = (size_t) uap->bufsize;
2920 macsize = (size_t) uap->macsize;
2921
2922 if (IS_64BIT_PROCESS(p)) {
2923 maxcount = bufsize / sizeof(struct user64_statfs);
2924 }
2925 else {
2926 maxcount = bufsize / sizeof(struct user32_statfs);
2927 }
2928 sfsp = uap->buf;
2929 count = 0;
2930
2931 mp = NULL;
2932
2933#if CONFIG_MACF
2934 if (uap->mac != USER_ADDR_NULL) {
2935 u_int32_t *mp0;
2936 int error;
2937 unsigned int i;
2938
2939 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2940 if (count != maxcount)
2941 return (EINVAL);
2942
2943 /* Copy in the array */
2944 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2945 if (mp0 == NULL) {
2946 return (ENOMEM);
2947 }
2948
2949 error = copyin(uap->mac, mp0, macsize);
2950 if (error) {
2951 FREE(mp0, M_MACTEMP);
2952 return (error);
2953 }
2954
2955 /* Normalize to an array of user_addr_t */
2956 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2957 if (mp == NULL) {
2958 FREE(mp0, M_MACTEMP);
2959 return (ENOMEM);
2960 }
2961
2962 for (i = 0; i < count; i++) {
2963 if (IS_64BIT_PROCESS(p))
2964 mp[i] = ((user_addr_t *)mp0)[i];
2965 else
2966 mp[i] = (user_addr_t)mp0[i];
2967 }
2968 FREE(mp0, M_MACTEMP);
2969 }
2970#endif
2971
2972
2973 fst.sfsp = sfsp;
2974 fst.mp = mp;
2975 fst.flags = uap->flags;
2976 fst.count = 0;
2977 fst.error = 0;
2978 fst.maxcount = maxcount;
2979
2980
2981 vfs_iterate(0, getfsstat_callback, &fst);
2982
2983 if (mp)
2984 FREE(mp, M_MACTEMP);
2985
2986 if (fst.error ) {
2987 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2988 return(fst.error);
2989 }
2990
2991 if (fst.sfsp && fst.count > fst.maxcount)
2992 *retval = fst.maxcount;
2993 else
2994 *retval = fst.count;
2995 return (0);
2996}
2997
2998static int
2999getfsstat64_callback(mount_t mp, void * arg)
3000{
3001 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3002 struct vfsstatfs *sp;
3003 int error;
3004
3005 if (fstp->sfsp && fstp->count < fstp->maxcount) {
3006#if CONFIG_MACF
3007 error = mac_mount_check_stat(vfs_context_current(), mp);
3008 if (error != 0) {
3009 fstp->error = error;
3010 return(VFS_RETURNED_DONE);
3011 }
3012#endif
3013 sp = &mp->mnt_vfsstat;
3014 /*
3015 * If MNT_NOWAIT is specified, do not refresh the fsstat
3016 * cache. MNT_WAIT overrides MNT_NOWAIT.
3017 *
3018 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3019 * getfsstat, since the constants are out of the same
3020 * namespace.
3021 */
3022 if (((fstp->flags & MNT_NOWAIT) == 0 ||
3023 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3024 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
3025 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3026 return(VFS_RETURNED);
3027 }
3028
3029 error = statfs64_common(mp, sp, fstp->sfsp);
3030 if (error) {
3031 fstp->error = error;
3032 return(VFS_RETURNED_DONE);
3033 }
3034 fstp->sfsp += sizeof(struct statfs64);
3035 }
3036 fstp->count++;
3037 return(VFS_RETURNED);
3038}
3039
3040/*
3041 * Get statistics on all file systems in 64 bit mode.
3042 */
3043int
3044getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
3045{
3046 user_addr_t sfsp;
3047 int count, maxcount;
3048 struct getfsstat_struct fst;
3049
3050 maxcount = uap->bufsize / sizeof(struct statfs64);
3051
3052 sfsp = uap->buf;
3053 count = 0;
3054
3055 fst.sfsp = sfsp;
3056 fst.flags = uap->flags;
3057 fst.count = 0;
3058 fst.error = 0;
3059 fst.maxcount = maxcount;
3060
3061 vfs_iterate(0, getfsstat64_callback, &fst);
3062
3063 if (fst.error ) {
3064 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3065 return(fst.error);
3066 }
3067
3068 if (fst.sfsp && fst.count > fst.maxcount)
3069 *retval = fst.maxcount;
3070 else
3071 *retval = fst.count;
3072
3073 return (0);
3074}
3075
3076/*
3077 * gets the associated vnode with the file descriptor passed.
3078 * as input
3079 *
3080 * INPUT
3081 * ctx - vfs context of caller
3082 * fd - file descriptor for which vnode is required.
3083 * vpp - Pointer to pointer to vnode to be returned.
3084 *
3085 * The vnode is returned with an iocount so any vnode obtained
3086 * by this call needs a vnode_put
3087 *
3088 */
3089int
3090vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3091{
3092 int error;
3093 vnode_t vp;
3094 struct fileproc *fp;
3095 proc_t p = vfs_context_proc(ctx);
3096
3097 *vpp = NULLVP;
3098
3099 error = fp_getfvp(p, fd, &fp, &vp);
3100 if (error)
3101 return (error);
3102
3103 error = vnode_getwithref(vp);
3104 if (error) {
3105 (void)fp_drop(p, fd, fp, 0);
3106 return (error);
3107 }
3108
3109 (void)fp_drop(p, fd, fp, 0);
3110 *vpp = vp;
3111 return (error);
3112}
3113
3114/*
3115 * Wrapper function around namei to start lookup from a directory
3116 * specified by a file descriptor ni_dirfd.
3117 *
3118 * In addition to all the errors returned by namei, this call can
3119 * return ENOTDIR if the file descriptor does not refer to a directory.
3120 * and EBADF if the file descriptor is not valid.
3121 */
3122int
3123nameiat(struct nameidata *ndp, int dirfd)
3124{
3125 if ((dirfd != AT_FDCWD) &&
3126 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3127 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3128 int error = 0;
3129 char c;
3130
3131 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3132 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3133 if (error)
3134 return (error);
3135 } else {
3136 c = *((char *)(ndp->ni_dirp));
3137 }
3138
3139 if (c != '/') {
3140 vnode_t dvp_at;
3141
3142 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3143 &dvp_at);
3144 if (error)
3145 return (error);
3146
3147 if (vnode_vtype(dvp_at) != VDIR) {
3148 vnode_put(dvp_at);
3149 return (ENOTDIR);
3150 }
3151
3152 ndp->ni_dvp = dvp_at;
3153 ndp->ni_cnd.cn_flags |= USEDVP;
3154 error = namei(ndp);
3155 ndp->ni_cnd.cn_flags &= ~USEDVP;
3156 vnode_put(dvp_at);
3157 return (error);
3158 }
3159 }
3160
3161 return (namei(ndp));
3162}
3163
3164/*
3165 * Change current working directory to a given file descriptor.
3166 */
3167/* ARGSUSED */
3168static int
3169common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
3170{
3171 struct filedesc *fdp = p->p_fd;
3172 vnode_t vp;
3173 vnode_t tdp;
3174 vnode_t tvp;
3175 struct mount *mp;
3176 int error;
3177 vfs_context_t ctx = vfs_context_current();
3178
3179 AUDIT_ARG(fd, uap->fd);
3180 if (per_thread && uap->fd == -1) {
3181 /*
3182 * Switching back from per-thread to per process CWD; verify we
3183 * in fact have one before proceeding. The only success case
3184 * for this code path is to return 0 preemptively after zapping
3185 * the thread structure contents.
3186 */
3187 thread_t th = vfs_context_thread(ctx);
3188 if (th) {
3189 uthread_t uth = get_bsdthread_info(th);
3190 tvp = uth->uu_cdir;
3191 uth->uu_cdir = NULLVP;
3192 if (tvp != NULLVP) {
3193 vnode_rele(tvp);
3194 return (0);
3195 }
3196 }
3197 return (EBADF);
3198 }
3199
3200 if ( (error = file_vnode(uap->fd, &vp)) )
3201 return(error);
3202 if ( (error = vnode_getwithref(vp)) ) {
3203 file_drop(uap->fd);
3204 return(error);
3205 }
3206
3207 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3208
3209 if (vp->v_type != VDIR) {
3210 error = ENOTDIR;
3211 goto out;
3212 }
3213
3214#if CONFIG_MACF
3215 error = mac_vnode_check_chdir(ctx, vp);
3216 if (error)
3217 goto out;
3218#endif
3219 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3220 if (error)
3221 goto out;
3222
3223 while (!error && (mp = vp->v_mountedhere) != NULL) {
3224 if (vfs_busy(mp, LK_NOWAIT)) {
3225 error = EACCES;
3226 goto out;
3227 }
3228 error = VFS_ROOT(mp, &tdp, ctx);
3229 vfs_unbusy(mp);
3230 if (error)
3231 break;
3232 vnode_put(vp);
3233 vp = tdp;
3234 }
3235 if (error)
3236 goto out;
3237 if ( (error = vnode_ref(vp)) )
3238 goto out;
3239 vnode_put(vp);
3240
3241 if (per_thread) {
3242 thread_t th = vfs_context_thread(ctx);
3243 if (th) {
3244 uthread_t uth = get_bsdthread_info(th);
3245 tvp = uth->uu_cdir;
3246 uth->uu_cdir = vp;
3247 OSBitOrAtomic(P_THCWD, &p->p_flag);
3248 } else {
3249 vnode_rele(vp);
3250 return (ENOENT);
3251 }
3252 } else {
3253 proc_fdlock(p);
3254 tvp = fdp->fd_cdir;
3255 fdp->fd_cdir = vp;
3256 proc_fdunlock(p);
3257 }
3258
3259 if (tvp)
3260 vnode_rele(tvp);
3261 file_drop(uap->fd);
3262
3263 return (0);
3264out:
3265 vnode_put(vp);
3266 file_drop(uap->fd);
3267
3268 return(error);
3269}
3270
3271int
3272fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3273{
3274 return common_fchdir(p, uap, 0);
3275}
3276
3277int
3278__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3279{
3280 return common_fchdir(p, (void *)uap, 1);
3281}
3282
3283/*
3284 * Change current working directory (".").
3285 *
3286 * Returns: 0 Success
3287 * change_dir:ENOTDIR
3288 * change_dir:???
3289 * vnode_ref:ENOENT No such file or directory
3290 */
3291/* ARGSUSED */
3292static int
3293common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3294{
3295 struct filedesc *fdp = p->p_fd;
3296 int error;
3297 struct nameidata nd;
3298 vnode_t tvp;
3299 vfs_context_t ctx = vfs_context_current();
3300
3301 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3302 UIO_USERSPACE, uap->path, ctx);
3303 error = change_dir(&nd, ctx);
3304 if (error)
3305 return (error);
3306 if ( (error = vnode_ref(nd.ni_vp)) ) {
3307 vnode_put(nd.ni_vp);
3308 return (error);
3309 }
3310 /*
3311 * drop the iocount we picked up in change_dir
3312 */
3313 vnode_put(nd.ni_vp);
3314
3315 if (per_thread) {
3316 thread_t th = vfs_context_thread(ctx);
3317 if (th) {
3318 uthread_t uth = get_bsdthread_info(th);
3319 tvp = uth->uu_cdir;
3320 uth->uu_cdir = nd.ni_vp;
3321 OSBitOrAtomic(P_THCWD, &p->p_flag);
3322 } else {
3323 vnode_rele(nd.ni_vp);
3324 return (ENOENT);
3325 }
3326 } else {
3327 proc_fdlock(p);
3328 tvp = fdp->fd_cdir;
3329 fdp->fd_cdir = nd.ni_vp;
3330 proc_fdunlock(p);
3331 }
3332
3333 if (tvp)
3334 vnode_rele(tvp);
3335
3336 return (0);
3337}
3338
3339
3340/*
3341 * chdir
3342 *
3343 * Change current working directory (".") for the entire process
3344 *
3345 * Parameters: p Process requesting the call
3346 * uap User argument descriptor (see below)
3347 * retval (ignored)
3348 *
3349 * Indirect parameters: uap->path Directory path
3350 *
3351 * Returns: 0 Success
3352 * common_chdir: ENOTDIR
3353 * common_chdir: ENOENT No such file or directory
3354 * common_chdir: ???
3355 *
3356 */
3357int
3358chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3359{
3360 return common_chdir(p, (void *)uap, 0);
3361}
3362
3363/*
3364 * __pthread_chdir
3365 *
3366 * Change current working directory (".") for a single thread
3367 *
3368 * Parameters: p Process requesting the call
3369 * uap User argument descriptor (see below)
3370 * retval (ignored)
3371 *
3372 * Indirect parameters: uap->path Directory path
3373 *
3374 * Returns: 0 Success
3375 * common_chdir: ENOTDIR
3376 * common_chdir: ENOENT No such file or directory
3377 * common_chdir: ???
3378 *
3379 */
3380int
3381__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3382{
3383 return common_chdir(p, (void *)uap, 1);
3384}
3385
3386
3387/*
3388 * Change notion of root (``/'') directory.
3389 */
3390/* ARGSUSED */
3391int
3392chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3393{
3394 struct filedesc *fdp = p->p_fd;
3395 int error;
3396 struct nameidata nd;
3397 vnode_t tvp;
3398 vfs_context_t ctx = vfs_context_current();
3399
3400 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3401 return (error);
3402
3403 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3404 UIO_USERSPACE, uap->path, ctx);
3405 error = change_dir(&nd, ctx);
3406 if (error)
3407 return (error);
3408
3409#if CONFIG_MACF
3410 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3411 &nd.ni_cnd);
3412 if (error) {
3413 vnode_put(nd.ni_vp);
3414 return (error);
3415 }
3416#endif
3417
3418 if ( (error = vnode_ref(nd.ni_vp)) ) {
3419 vnode_put(nd.ni_vp);
3420 return (error);
3421 }
3422 vnode_put(nd.ni_vp);
3423
3424 proc_fdlock(p);
3425 tvp = fdp->fd_rdir;
3426 fdp->fd_rdir = nd.ni_vp;
3427 fdp->fd_flags |= FD_CHROOT;
3428 proc_fdunlock(p);
3429
3430 if (tvp != NULL)
3431 vnode_rele(tvp);
3432
3433 return (0);
3434}
3435
3436/*
3437 * Common routine for chroot and chdir.
3438 *
3439 * Returns: 0 Success
3440 * ENOTDIR Not a directory
3441 * namei:??? [anything namei can return]
3442 * vnode_authorize:??? [anything vnode_authorize can return]
3443 */
3444static int
3445change_dir(struct nameidata *ndp, vfs_context_t ctx)
3446{
3447 vnode_t vp;
3448 int error;
3449
3450 if ((error = namei(ndp)))
3451 return (error);
3452 nameidone(ndp);
3453 vp = ndp->ni_vp;
3454
3455 if (vp->v_type != VDIR) {
3456 vnode_put(vp);
3457 return (ENOTDIR);
3458 }
3459
3460#if CONFIG_MACF
3461 error = mac_vnode_check_chdir(ctx, vp);
3462 if (error) {
3463 vnode_put(vp);
3464 return (error);
3465 }
3466#endif
3467
3468 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3469 if (error) {
3470 vnode_put(vp);
3471 return (error);
3472 }
3473
3474 return (error);
3475}
3476
3477/*
3478 * Free the vnode data (for directories) associated with the file glob.
3479 */
3480struct fd_vn_data *
3481fg_vn_data_alloc(void)
3482{
3483 struct fd_vn_data *fvdata;
3484
3485 /* Allocate per fd vnode data */
3486 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3487 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3488 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3489 return fvdata;
3490}
3491
3492/*
3493 * Free the vnode data (for directories) associated with the file glob.
3494 */
3495void
3496fg_vn_data_free(void *fgvndata)
3497{
3498 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3499
3500 if (fvdata->fv_buf)
3501 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3502 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3503 FREE(fvdata, M_FD_VN_DATA);
3504}
3505
3506/*
3507 * Check permissions, allocate an open file structure,
3508 * and call the device open routine if any.
3509 *
3510 * Returns: 0 Success
3511 * EINVAL
3512 * EINTR
3513 * falloc:ENFILE
3514 * falloc:EMFILE
3515 * falloc:ENOMEM
3516 * vn_open_auth:???
3517 * dupfdopen:???
3518 * VNOP_ADVLOCK:???
3519 * vnode_setsize:???
3520 *
3521 * XXX Need to implement uid, gid
3522 */
3523int
3524open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3525 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3526 int32_t *retval)
3527{
3528 proc_t p = vfs_context_proc(ctx);
3529 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3530 struct fileproc *fp;
3531 vnode_t vp;
3532 int flags, oflags;
3533 int type, indx, error;
3534 struct flock lf;
3535 struct vfs_context context;
3536
3537 oflags = uflags;
3538
3539 if ((oflags & O_ACCMODE) == O_ACCMODE)
3540 return(EINVAL);
3541
3542 flags = FFLAGS(uflags);
3543 CLR(flags, FENCRYPTED);
3544 CLR(flags, FUNENCRYPTED);
3545
3546 AUDIT_ARG(fflags, oflags);
3547 AUDIT_ARG(mode, vap->va_mode);
3548
3549 if ((error = falloc_withalloc(p,
3550 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3551 return (error);
3552 }
3553 uu->uu_dupfd = -indx - 1;
3554
3555 if ((error = vn_open_auth(ndp, &flags, vap))) {
3556 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3557 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3558 fp_drop(p, indx, NULL, 0);
3559 *retval = indx;
3560 return (0);
3561 }
3562 }
3563 if (error == ERESTART)
3564 error = EINTR;
3565 fp_free(p, indx, fp);
3566 return (error);
3567 }
3568 uu->uu_dupfd = 0;
3569 vp = ndp->ni_vp;
3570
3571 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3572 fp->f_fglob->fg_ops = &vnops;
3573 fp->f_fglob->fg_data = (caddr_t)vp;
3574
3575 if (flags & (O_EXLOCK | O_SHLOCK)) {
3576 lf.l_whence = SEEK_SET;
3577 lf.l_start = 0;
3578 lf.l_len = 0;
3579 if (flags & O_EXLOCK)
3580 lf.l_type = F_WRLCK;
3581 else
3582 lf.l_type = F_RDLCK;
3583 type = F_FLOCK;
3584 if ((flags & FNONBLOCK) == 0)
3585 type |= F_WAIT;
3586#if CONFIG_MACF
3587 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3588 F_SETLK, &lf);
3589 if (error)
3590 goto bad;
3591#endif
3592 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3593 goto bad;
3594 fp->f_fglob->fg_flag |= FHASLOCK;
3595 }
3596
3597#if DEVELOPMENT || DEBUG
3598 /*
3599 * XXX VSWAP: Check for entitlements or special flag here
3600 * so we can restrict access appropriately.
3601 */
3602#else /* DEVELOPMENT || DEBUG */
3603
3604 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3605 /* block attempt to write/truncate swapfile */
3606 error = EPERM;
3607 goto bad;
3608 }
3609#endif /* DEVELOPMENT || DEBUG */
3610
3611 /* try to truncate by setting the size attribute */
3612 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3613 goto bad;
3614
3615 /*
3616 * For directories we hold some additional information in the fd.
3617 */
3618 if (vnode_vtype(vp) == VDIR) {
3619 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3620 } else {
3621 fp->f_fglob->fg_vn_data = NULL;
3622 }
3623
3624 vnode_put(vp);
3625
3626 /*
3627 * The first terminal open (without a O_NOCTTY) by a session leader
3628 * results in it being set as the controlling terminal.
3629 */
3630 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3631 !(flags & O_NOCTTY)) {
3632 int tmp = 0;
3633
3634 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3635 (caddr_t)&tmp, ctx);
3636 }
3637
3638 proc_fdlock(p);
3639 if (flags & O_CLOEXEC)
3640 *fdflags(p, indx) |= UF_EXCLOSE;
3641 if (flags & O_CLOFORK)
3642 *fdflags(p, indx) |= UF_FORKCLOSE;
3643 procfdtbl_releasefd(p, indx, NULL);
3644
3645#if CONFIG_SECLUDED_MEMORY
3646 if (secluded_for_filecache &&
3647 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3648 vnode_vtype(vp) == VREG) {
3649 memory_object_control_t moc;
3650
3651 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3652
3653 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3654 /* nothing to do... */
3655 } else if (fp->f_fglob->fg_flag & FWRITE) {
3656 /* writable -> no longer eligible for secluded pages */
3657 memory_object_mark_eligible_for_secluded(moc,
3658 FALSE);
3659 } else if (secluded_for_filecache == 1) {
3660 char pathname[32] = { 0, };
3661 size_t copied;
3662 /* XXX FBDP: better way to detect /Applications/ ? */
3663 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3664 copyinstr(ndp->ni_dirp,
3665 pathname,
3666 sizeof (pathname),
3667 &copied);
3668 } else {
3669 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3670 pathname,
3671 sizeof (pathname),
3672 &copied);
3673 }
3674 pathname[sizeof (pathname) - 1] = '\0';
3675 if (strncmp(pathname,
3676 "/Applications/",
3677 strlen("/Applications/")) == 0 &&
3678 strncmp(pathname,
3679 "/Applications/Camera.app/",
3680 strlen("/Applications/Camera.app/")) != 0) {
3681 /*
3682 * not writable
3683 * AND from "/Applications/"
3684 * AND not from "/Applications/Camera.app/"
3685 * ==> eligible for secluded
3686 */
3687 memory_object_mark_eligible_for_secluded(moc,
3688 TRUE);
3689 }
3690 } else if (secluded_for_filecache == 2) {
3691#if __arm64__
3692#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3693#elif __arm__
3694#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3695#else
3696/* not implemented... */
3697#endif
3698 if (!strncmp(vp->v_name,
3699 DYLD_SHARED_CACHE_NAME,
3700 strlen(DYLD_SHARED_CACHE_NAME)) ||
3701 !strncmp(vp->v_name,
3702 "dyld",
3703 strlen(vp->v_name)) ||
3704 !strncmp(vp->v_name,
3705 "launchd",
3706 strlen(vp->v_name)) ||
3707 !strncmp(vp->v_name,
3708 "Camera",
3709 strlen(vp->v_name)) ||
3710 !strncmp(vp->v_name,
3711 "mediaserverd",
3712 strlen(vp->v_name)) ||
3713 !strncmp(vp->v_name,
3714 "SpringBoard",
3715 strlen(vp->v_name)) ||
3716 !strncmp(vp->v_name,
3717 "backboardd",
3718 strlen(vp->v_name))) {
3719 /*
3720 * This file matters when launching Camera:
3721 * do not store its contents in the secluded
3722 * pool that will be drained on Camera launch.
3723 */
3724 memory_object_mark_eligible_for_secluded(moc,
3725 FALSE);
3726 }
3727 }
3728 }
3729#endif /* CONFIG_SECLUDED_MEMORY */
3730
3731 fp_drop(p, indx, fp, 1);
3732 proc_fdunlock(p);
3733
3734 *retval = indx;
3735
3736 return (0);
3737bad:
3738 context = *vfs_context_current();
3739 context.vc_ucred = fp->f_fglob->fg_cred;
3740
3741 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3742 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3743 lf.l_whence = SEEK_SET;
3744 lf.l_start = 0;
3745 lf.l_len = 0;
3746 lf.l_type = F_UNLCK;
3747
3748 (void)VNOP_ADVLOCK(
3749 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3750 }
3751
3752 vn_close(vp, fp->f_fglob->fg_flag, &context);
3753 vnode_put(vp);
3754 fp_free(p, indx, fp);
3755
3756 return (error);
3757}
3758
3759/*
3760 * While most of the *at syscall handlers can call nameiat() which
3761 * is a wrapper around namei, the use of namei and initialisation
3762 * of nameidata are far removed and in different functions - namei
3763 * gets called in vn_open_auth for open1. So we'll just do here what
3764 * nameiat() does.
3765 */
3766static int
3767open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3768 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3769 int dirfd)
3770{
3771 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3772 int error;
3773 char c;
3774
3775 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3776 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3777 if (error)
3778 return (error);
3779 } else {
3780 c = *((char *)(ndp->ni_dirp));
3781 }
3782
3783 if (c != '/') {
3784 vnode_t dvp_at;
3785
3786 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3787 &dvp_at);
3788 if (error)
3789 return (error);
3790
3791 if (vnode_vtype(dvp_at) != VDIR) {
3792 vnode_put(dvp_at);
3793 return (ENOTDIR);
3794 }
3795
3796 ndp->ni_dvp = dvp_at;
3797 ndp->ni_cnd.cn_flags |= USEDVP;
3798 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3799 retval);
3800 vnode_put(dvp_at);
3801 return (error);
3802 }
3803 }
3804
3805 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3806}
3807
3808/*
3809 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3810 *
3811 * Parameters: p Process requesting the open
3812 * uap User argument descriptor (see below)
3813 * retval Pointer to an area to receive the
3814 * return calue from the system call
3815 *
3816 * Indirect: uap->path Path to open (same as 'open')
3817 * uap->flags Flags to open (same as 'open'
3818 * uap->uid UID to set, if creating
3819 * uap->gid GID to set, if creating
3820 * uap->mode File mode, if creating (same as 'open')
3821 * uap->xsecurity ACL to set, if creating
3822 *
3823 * Returns: 0 Success
3824 * !0 errno value
3825 *
3826 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3827 *
3828 * XXX: We should enummerate the possible errno values here, and where
3829 * in the code they originated.
3830 */
3831int
3832open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3833{
3834 struct filedesc *fdp = p->p_fd;
3835 int ciferror;
3836 kauth_filesec_t xsecdst;
3837 struct vnode_attr va;
3838 struct nameidata nd;
3839 int cmode;
3840
3841 AUDIT_ARG(owner, uap->uid, uap->gid);
3842
3843 xsecdst = NULL;
3844 if ((uap->xsecurity != USER_ADDR_NULL) &&
3845 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3846 return ciferror;
3847
3848 VATTR_INIT(&va);
3849 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3850 VATTR_SET(&va, va_mode, cmode);
3851 if (uap->uid != KAUTH_UID_NONE)
3852 VATTR_SET(&va, va_uid, uap->uid);
3853 if (uap->gid != KAUTH_GID_NONE)
3854 VATTR_SET(&va, va_gid, uap->gid);
3855 if (xsecdst != NULL)
3856 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3857
3858 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3859 uap->path, vfs_context_current());
3860
3861 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3862 fileproc_alloc_init, NULL, retval);
3863 if (xsecdst != NULL)
3864 kauth_filesec_free(xsecdst);
3865
3866 return ciferror;
3867}
3868
3869/*
3870 * Go through the data-protected atomically controlled open (2)
3871 *
3872 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3873 */
3874int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3875 int flags = uap->flags;
3876 int class = uap->class;
3877 int dpflags = uap->dpflags;
3878
3879 /*
3880 * Follow the same path as normal open(2)
3881 * Look up the item if it exists, and acquire the vnode.
3882 */
3883 struct filedesc *fdp = p->p_fd;
3884 struct vnode_attr va;
3885 struct nameidata nd;
3886 int cmode;
3887 int error;
3888
3889 VATTR_INIT(&va);
3890 /* Mask off all but regular access permissions */
3891 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3892 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3893
3894 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3895 uap->path, vfs_context_current());
3896
3897 /*
3898 * Initialize the extra fields in vnode_attr to pass down our
3899 * extra fields.
3900 * 1. target cprotect class.
3901 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3902 */
3903 if (flags & O_CREAT) {
3904 /* lower level kernel code validates that the class is valid before applying it. */
3905 if (class != PROTECTION_CLASS_DEFAULT) {
3906 /*
3907 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3908 * file behave the same as open (2)
3909 */
3910 VATTR_SET(&va, va_dataprotect_class, class);
3911 }
3912 }
3913
3914 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
3915 if ( flags & (O_RDWR | O_WRONLY)) {
3916 /* Not allowed to write raw encrypted bytes */
3917 return EINVAL;
3918 }
3919 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3920 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3921 }
3922 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3923 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3924 }
3925 }
3926
3927 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3928 fileproc_alloc_init, NULL, retval);
3929
3930 return error;
3931}
3932
3933static int
3934openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3935 int fd, enum uio_seg segflg, int *retval)
3936{
3937 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3938 struct vnode_attr va;
3939 struct nameidata nd;
3940 int cmode;
3941
3942 VATTR_INIT(&va);
3943 /* Mask off all but regular access permissions */
3944 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3945 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3946
3947 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3948 segflg, path, ctx);
3949
3950 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3951 retval, fd));
3952}
3953
3954int
3955open(proc_t p, struct open_args *uap, int32_t *retval)
3956{
3957 __pthread_testcancel(1);
3958 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3959}
3960
3961int
3962open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3963 int32_t *retval)
3964{
3965 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3966 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3967}
3968
3969int
3970openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3971 int32_t *retval)
3972{
3973 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3974 uap->mode, uap->fd, UIO_USERSPACE, retval));
3975}
3976
3977int
3978openat(proc_t p, struct openat_args *uap, int32_t *retval)
3979{
3980 __pthread_testcancel(1);
3981 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3982}
3983
3984/*
3985 * openbyid_np: open a file given a file system id and a file system object id
3986 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3987 * file systems that don't support object ids it is a node id (uint64_t).
3988 *
3989 * Parameters: p Process requesting the open
3990 * uap User argument descriptor (see below)
3991 * retval Pointer to an area to receive the
3992 * return calue from the system call
3993 *
3994 * Indirect: uap->path Path to open (same as 'open')
3995 *
3996 * uap->fsid id of target file system
3997 * uap->objid id of target file system object
3998 * uap->flags Flags to open (same as 'open')
3999 *
4000 * Returns: 0 Success
4001 * !0 errno value
4002 *
4003 *
4004 * XXX: We should enummerate the possible errno values here, and where
4005 * in the code they originated.
4006 */
4007int
4008openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
4009{
4010 fsid_t fsid;
4011 uint64_t objid;
4012 int error;
4013 char *buf = NULL;
4014 int buflen = MAXPATHLEN;
4015 int pathlen = 0;
4016 vfs_context_t ctx = vfs_context_current();
4017
4018 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
4019 return (error);
4020 }
4021
4022 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
4023 return (error);
4024 }
4025
4026 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4027 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
4028 return (error);
4029 }
4030
4031 AUDIT_ARG(value32, fsid.val[0]);
4032 AUDIT_ARG(value64, objid);
4033
4034 /*resolve path from fsis, objid*/
4035 do {
4036 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
4037 if (buf == NULL) {
4038 return (ENOMEM);
4039 }
4040
4041 error = fsgetpath_internal(
4042 ctx, fsid.val[0], objid,
4043 buflen, buf, &pathlen);
4044
4045 if (error) {
4046 FREE(buf, M_TEMP);
4047 buf = NULL;
4048 }
4049 } while (error == ENOSPC && (buflen += MAXPATHLEN));
4050
4051 if (error) {
4052 return error;
4053 }
4054
4055 buf[pathlen] = 0;
4056
4057 error = openat_internal(
4058 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
4059
4060 FREE(buf, M_TEMP);
4061
4062 return error;
4063}
4064
4065
4066/*
4067 * Create a special file.
4068 */
4069static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4070
4071int
4072mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4073{
4074 struct vnode_attr va;
4075 vfs_context_t ctx = vfs_context_current();
4076 int error;
4077 struct nameidata nd;
4078 vnode_t vp, dvp;
4079
4080 VATTR_INIT(&va);
4081 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4082 VATTR_SET(&va, va_rdev, uap->dev);
4083
4084 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4085 if ((uap->mode & S_IFMT) == S_IFIFO)
4086 return(mkfifo1(ctx, uap->path, &va));
4087
4088 AUDIT_ARG(mode, uap->mode);
4089 AUDIT_ARG(value32, uap->dev);
4090
4091 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
4092 return (error);
4093 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
4094 UIO_USERSPACE, uap->path, ctx);
4095 error = namei(&nd);
4096 if (error)
4097 return (error);
4098 dvp = nd.ni_dvp;
4099 vp = nd.ni_vp;
4100
4101 if (vp != NULL) {
4102 error = EEXIST;
4103 goto out;
4104 }
4105
4106 switch (uap->mode & S_IFMT) {
4107 case S_IFCHR:
4108 VATTR_SET(&va, va_type, VCHR);
4109 break;
4110 case S_IFBLK:
4111 VATTR_SET(&va, va_type, VBLK);
4112 break;
4113 default:
4114 error = EINVAL;
4115 goto out;
4116 }
4117
4118#if CONFIG_MACF
4119 error = mac_vnode_check_create(ctx,
4120 nd.ni_dvp, &nd.ni_cnd, &va);
4121 if (error)
4122 goto out;
4123#endif
4124
4125 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4126 goto out;
4127
4128 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
4129 goto out;
4130
4131 if (vp) {
4132 int update_flags = 0;
4133
4134 // Make sure the name & parent pointers are hooked up
4135 if (vp->v_name == NULL)
4136 update_flags |= VNODE_UPDATE_NAME;
4137 if (vp->v_parent == NULLVP)
4138 update_flags |= VNODE_UPDATE_PARENT;
4139
4140 if (update_flags)
4141 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4142
4143#if CONFIG_FSE
4144 add_fsevent(FSE_CREATE_FILE, ctx,
4145 FSE_ARG_VNODE, vp,
4146 FSE_ARG_DONE);
4147#endif
4148 }
4149
4150out:
4151 /*
4152 * nameidone has to happen before we vnode_put(dvp)
4153 * since it may need to release the fs_nodelock on the dvp
4154 */
4155 nameidone(&nd);
4156
4157 if (vp)
4158 vnode_put(vp);
4159 vnode_put(dvp);
4160
4161 return (error);
4162}
4163
4164/*
4165 * Create a named pipe.
4166 *
4167 * Returns: 0 Success
4168 * EEXIST
4169 * namei:???
4170 * vnode_authorize:???
4171 * vn_create:???
4172 */
4173static int
4174mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
4175{
4176 vnode_t vp, dvp;
4177 int error;
4178 struct nameidata nd;
4179
4180 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
4181 UIO_USERSPACE, upath, ctx);
4182 error = namei(&nd);
4183 if (error)
4184 return (error);
4185 dvp = nd.ni_dvp;
4186 vp = nd.ni_vp;
4187
4188 /* check that this is a new file and authorize addition */
4189 if (vp != NULL) {
4190 error = EEXIST;
4191 goto out;
4192 }
4193 VATTR_SET(vap, va_type, VFIFO);
4194
4195 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
4196 goto out;
4197
4198 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4199out:
4200 /*
4201 * nameidone has to happen before we vnode_put(dvp)
4202 * since it may need to release the fs_nodelock on the dvp
4203 */
4204 nameidone(&nd);
4205
4206 if (vp)
4207 vnode_put(vp);
4208 vnode_put(dvp);
4209
4210 return error;
4211}
4212
4213
4214/*
4215 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4216 *
4217 * Parameters: p Process requesting the open
4218 * uap User argument descriptor (see below)
4219 * retval (Ignored)
4220 *
4221 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4222 * uap->uid UID to set
4223 * uap->gid GID to set
4224 * uap->mode File mode to set (same as 'mkfifo')
4225 * uap->xsecurity ACL to set, if creating
4226 *
4227 * Returns: 0 Success
4228 * !0 errno value
4229 *
4230 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4231 *
4232 * XXX: We should enummerate the possible errno values here, and where
4233 * in the code they originated.
4234 */
4235int
4236mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4237{
4238 int ciferror;
4239 kauth_filesec_t xsecdst;
4240 struct vnode_attr va;
4241
4242 AUDIT_ARG(owner, uap->uid, uap->gid);
4243
4244 xsecdst = KAUTH_FILESEC_NONE;
4245 if (uap->xsecurity != USER_ADDR_NULL) {
4246 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4247 return ciferror;
4248 }
4249
4250 VATTR_INIT(&va);
4251 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4252 if (uap->uid != KAUTH_UID_NONE)
4253 VATTR_SET(&va, va_uid, uap->uid);
4254 if (uap->gid != KAUTH_GID_NONE)
4255 VATTR_SET(&va, va_gid, uap->gid);
4256 if (xsecdst != KAUTH_FILESEC_NONE)
4257 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4258
4259 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4260
4261 if (xsecdst != KAUTH_FILESEC_NONE)
4262 kauth_filesec_free(xsecdst);
4263 return ciferror;
4264}
4265
4266/* ARGSUSED */
4267int
4268mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4269{
4270 struct vnode_attr va;
4271
4272 VATTR_INIT(&va);
4273 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4274
4275 return(mkfifo1(vfs_context_current(), uap->path, &va));
4276}
4277
4278
4279static char *
4280my_strrchr(char *p, int ch)
4281{
4282 char *save;
4283
4284 for (save = NULL;; ++p) {
4285 if (*p == ch)
4286 save = p;
4287 if (!*p)
4288 return(save);
4289 }
4290 /* NOTREACHED */
4291}
4292
4293extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4294
4295int
4296safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4297{
4298 int ret, len = _len;
4299
4300 *truncated_path = 0;
4301 ret = vn_getpath(dvp, path, &len);
4302 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4303 if (leafname) {
4304 path[len-1] = '/';
4305 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4306 if (len > MAXPATHLEN) {
4307 char *ptr;
4308
4309 // the string got truncated!
4310 *truncated_path = 1;
4311 ptr = my_strrchr(path, '/');
4312 if (ptr) {
4313 *ptr = '\0'; // chop off the string at the last directory component
4314 }
4315 len = strlen(path) + 1;
4316 }
4317 }
4318 } else if (ret == 0) {
4319 *truncated_path = 1;
4320 } else if (ret != 0) {
4321 struct vnode *mydvp=dvp;
4322
4323 if (ret != ENOSPC) {
4324 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4325 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4326 }
4327 *truncated_path = 1;
4328
4329 do {
4330 if (mydvp->v_parent != NULL) {
4331 mydvp = mydvp->v_parent;
4332 } else if (mydvp->v_mount) {
4333 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4334 break;
4335 } else {
4336 // no parent and no mount point? only thing is to punt and say "/" changed
4337 strlcpy(path, "/", _len);
4338 len = 2;
4339 mydvp = NULL;
4340 }
4341
4342 if (mydvp == NULL) {
4343 break;
4344 }
4345
4346 len = _len;
4347 ret = vn_getpath(mydvp, path, &len);
4348 } while (ret == ENOSPC);
4349 }
4350
4351 return len;
4352}
4353
4354
4355/*
4356 * Make a hard file link.
4357 *
4358 * Returns: 0 Success
4359 * EPERM
4360 * EEXIST
4361 * EXDEV
4362 * namei:???
4363 * vnode_authorize:???
4364 * VNOP_LINK:???
4365 */
4366/* ARGSUSED */
4367static int
4368linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4369 user_addr_t link, int flag, enum uio_seg segflg)
4370{
4371 vnode_t vp, dvp, lvp;
4372 struct nameidata nd;
4373 int follow;
4374 int error;
4375#if CONFIG_FSE
4376 fse_info finfo;
4377#endif
4378 int need_event, has_listeners;
4379 char *target_path = NULL;
4380 int truncated=0;
4381
4382 vp = dvp = lvp = NULLVP;
4383
4384 /* look up the object we are linking to */
4385 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4386 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4387 segflg, path, ctx);
4388
4389 error = nameiat(&nd, fd1);
4390 if (error)
4391 return (error);
4392 vp = nd.ni_vp;
4393
4394 nameidone(&nd);
4395
4396 /*
4397 * Normally, linking to directories is not supported.
4398 * However, some file systems may have limited support.
4399 */
4400 if (vp->v_type == VDIR) {
4401 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4402 error = EPERM; /* POSIX */
4403 goto out;
4404 }
4405
4406 /* Linking to a directory requires ownership. */
4407 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4408 struct vnode_attr dva;
4409
4410 VATTR_INIT(&dva);
4411 VATTR_WANTED(&dva, va_uid);
4412 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4413 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4414 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4415 error = EACCES;
4416 goto out;
4417 }
4418 }
4419 }
4420
4421 /* lookup the target node */
4422#if CONFIG_TRIGGERS
4423 nd.ni_op = OP_LINK;
4424#endif
4425 nd.ni_cnd.cn_nameiop = CREATE;
4426 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4427 nd.ni_dirp = link;
4428 error = nameiat(&nd, fd2);
4429 if (error != 0)
4430 goto out;
4431 dvp = nd.ni_dvp;
4432 lvp = nd.ni_vp;
4433
4434#if CONFIG_MACF
4435 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4436 goto out2;
4437#endif
4438
4439 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4440 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4441 goto out2;
4442
4443 /* target node must not exist */
4444 if (lvp != NULLVP) {
4445 error = EEXIST;
4446 goto out2;
4447 }
4448 /* cannot link across mountpoints */
4449 if (vnode_mount(vp) != vnode_mount(dvp)) {
4450 error = EXDEV;
4451 goto out2;
4452 }
4453
4454 /* authorize creation of the target note */
4455 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4456 goto out2;
4457
4458 /* and finally make the link */
4459 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4460 if (error)
4461 goto out2;
4462
4463#if CONFIG_MACF
4464 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4465#endif
4466
4467#if CONFIG_FSE
4468 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4469#else
4470 need_event = 0;
4471#endif
4472 has_listeners = kauth_authorize_fileop_has_listeners();
4473
4474 if (need_event || has_listeners) {
4475 char *link_to_path = NULL;
4476 int len, link_name_len;
4477
4478 /* build the path to the new link file */
4479 GET_PATH(target_path);
4480 if (target_path == NULL) {
4481 error = ENOMEM;
4482 goto out2;
4483 }
4484
4485 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4486
4487 if (has_listeners) {
4488 /* build the path to file we are linking to */
4489 GET_PATH(link_to_path);
4490 if (link_to_path == NULL) {
4491 error = ENOMEM;
4492 goto out2;
4493 }
4494
4495 link_name_len = MAXPATHLEN;
4496 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4497 /*
4498 * Call out to allow 3rd party notification of rename.
4499 * Ignore result of kauth_authorize_fileop call.
4500 */
4501 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4502 (uintptr_t)link_to_path,
4503 (uintptr_t)target_path);
4504 }
4505 if (link_to_path != NULL) {
4506 RELEASE_PATH(link_to_path);
4507 }
4508 }
4509#if CONFIG_FSE
4510 if (need_event) {
4511 /* construct fsevent */
4512 if (get_fse_info(vp, &finfo, ctx) == 0) {
4513 if (truncated) {
4514 finfo.mode |= FSE_TRUNCATED_PATH;
4515 }
4516
4517 // build the path to the destination of the link
4518 add_fsevent(FSE_CREATE_FILE, ctx,
4519 FSE_ARG_STRING, len, target_path,
4520 FSE_ARG_FINFO, &finfo,
4521 FSE_ARG_DONE);
4522 }
4523 if (vp->v_parent) {
4524 add_fsevent(FSE_STAT_CHANGED, ctx,
4525 FSE_ARG_VNODE, vp->v_parent,
4526 FSE_ARG_DONE);
4527 }
4528 }
4529#endif
4530 }
4531out2:
4532 /*
4533 * nameidone has to happen before we vnode_put(dvp)
4534 * since it may need to release the fs_nodelock on the dvp
4535 */
4536 nameidone(&nd);
4537 if (target_path != NULL) {
4538 RELEASE_PATH(target_path);
4539 }
4540out:
4541 if (lvp)
4542 vnode_put(lvp);
4543 if (dvp)
4544 vnode_put(dvp);
4545 vnode_put(vp);
4546 return (error);
4547}
4548
4549int
4550link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4551{
4552 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4553 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4554}
4555
4556int
4557linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4558{
4559 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4560 return (EINVAL);
4561
4562 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4563 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4564}
4565
4566/*
4567 * Make a symbolic link.
4568 *
4569 * We could add support for ACLs here too...
4570 */
4571/* ARGSUSED */
4572static int
4573symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4574 user_addr_t link, enum uio_seg segflg)
4575{
4576 struct vnode_attr va;
4577 char *path;
4578 int error;
4579 struct nameidata nd;
4580 vnode_t vp, dvp;
4581 size_t dummy=0;
4582 proc_t p;
4583
4584 error = 0;
4585 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4586 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4587 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4588 } else {
4589 path = (char *)path_data;
4590 }
4591 if (error)
4592 goto out;
4593 AUDIT_ARG(text, path); /* This is the link string */
4594
4595 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4596 segflg, link, ctx);
4597
4598 error = nameiat(&nd, fd);
4599 if (error)
4600 goto out;
4601 dvp = nd.ni_dvp;
4602 vp = nd.ni_vp;
4603
4604 p = vfs_context_proc(ctx);
4605 VATTR_INIT(&va);
4606 VATTR_SET(&va, va_type, VLNK);
4607 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4608
4609#if CONFIG_MACF
4610 error = mac_vnode_check_create(ctx,
4611 dvp, &nd.ni_cnd, &va);
4612#endif
4613 if (error != 0) {
4614 goto skipit;
4615 }
4616
4617 if (vp != NULL) {
4618 error = EEXIST;
4619 goto skipit;
4620 }
4621
4622 /* authorize */
4623 if (error == 0)
4624 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4625 /* get default ownership, etc. */
4626 if (error == 0)
4627 error = vnode_authattr_new(dvp, &va, 0, ctx);
4628 if (error == 0)
4629 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4630
4631#if CONFIG_MACF
4632 if (error == 0 && vp)
4633 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4634#endif
4635
4636 /* do fallback attribute handling */
4637 if (error == 0 && vp)
4638 error = vnode_setattr_fallback(vp, &va, ctx);
4639
4640 if (error == 0) {
4641 int update_flags = 0;
4642
4643 /*check if a new vnode was created, else try to get one*/
4644 if (vp == NULL) {
4645 nd.ni_cnd.cn_nameiop = LOOKUP;
4646#if CONFIG_TRIGGERS
4647 nd.ni_op = OP_LOOKUP;
4648#endif
4649 nd.ni_cnd.cn_flags = 0;
4650 error = nameiat(&nd, fd);
4651 vp = nd.ni_vp;
4652
4653 if (vp == NULL)
4654 goto skipit;
4655 }
4656
4657#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4658 /* call out to allow 3rd party notification of rename.
4659 * Ignore result of kauth_authorize_fileop call.
4660 */
4661 if (kauth_authorize_fileop_has_listeners() &&
4662 namei(&nd) == 0) {
4663 char *new_link_path = NULL;
4664 int len;
4665
4666 /* build the path to the new link file */
4667 new_link_path = get_pathbuff();
4668 len = MAXPATHLEN;
4669 vn_getpath(dvp, new_link_path, &len);
4670 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4671 new_link_path[len - 1] = '/';
4672 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4673 }
4674
4675 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4676 (uintptr_t)path, (uintptr_t)new_link_path);
4677 if (new_link_path != NULL)
4678 release_pathbuff(new_link_path);
4679 }
4680#endif
4681 // Make sure the name & parent pointers are hooked up
4682 if (vp->v_name == NULL)
4683 update_flags |= VNODE_UPDATE_NAME;
4684 if (vp->v_parent == NULLVP)
4685 update_flags |= VNODE_UPDATE_PARENT;
4686
4687 if (update_flags)
4688 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4689
4690#if CONFIG_FSE
4691 add_fsevent(FSE_CREATE_FILE, ctx,
4692 FSE_ARG_VNODE, vp,
4693 FSE_ARG_DONE);
4694#endif
4695 }
4696
4697skipit:
4698 /*
4699 * nameidone has to happen before we vnode_put(dvp)
4700 * since it may need to release the fs_nodelock on the dvp
4701 */
4702 nameidone(&nd);
4703
4704 if (vp)
4705 vnode_put(vp);
4706 vnode_put(dvp);
4707out:
4708 if (path && (path != (char *)path_data))
4709 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4710
4711 return (error);
4712}
4713
4714int
4715symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4716{
4717 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4718 uap->link, UIO_USERSPACE));
4719}
4720
4721int
4722symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4723 __unused int32_t *retval)
4724{
4725 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4726 uap->path2, UIO_USERSPACE));
4727}
4728
4729/*
4730 * Delete a whiteout from the filesystem.
4731 * No longer supported.
4732 */
4733int
4734undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4735{
4736 return (ENOTSUP);
4737}
4738
4739/*
4740 * Delete a name from the filesystem.
4741 */
4742/* ARGSUSED */
4743static int
4744unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4745 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4746{
4747 struct nameidata nd;
4748 vnode_t vp, dvp;
4749 int error;
4750 struct componentname *cnp;
4751 char *path = NULL;
4752 int len=0;
4753#if CONFIG_FSE
4754 fse_info finfo;
4755 struct vnode_attr va;
4756#endif
4757 int flags;
4758 int need_event;
4759 int has_listeners;
4760 int truncated_path;
4761 int batched;
4762 struct vnode_attr *vap;
4763 int do_retry;
4764 int retry_count = 0;
4765 int cn_flags;
4766
4767 cn_flags = LOCKPARENT;
4768 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4769 cn_flags |= AUDITVNPATH1;
4770 /* If a starting dvp is passed, it trumps any fd passed. */
4771 if (start_dvp)
4772 cn_flags |= USEDVP;
4773
4774#if NAMEDRSRCFORK
4775 /* unlink or delete is allowed on rsrc forks and named streams */
4776 cn_flags |= CN_ALLOWRSRCFORK;
4777#endif
4778
4779retry:
4780 do_retry = 0;
4781 flags = 0;
4782 need_event = 0;
4783 has_listeners = 0;
4784 truncated_path = 0;
4785 vap = NULL;
4786
4787 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4788
4789 nd.ni_dvp = start_dvp;
4790 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4791 cnp = &nd.ni_cnd;
4792
4793continue_lookup:
4794 error = nameiat(&nd, fd);
4795 if (error)
4796 return (error);
4797
4798 dvp = nd.ni_dvp;
4799 vp = nd.ni_vp;
4800
4801
4802 /* With Carbon delete semantics, busy files cannot be deleted */
4803 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4804 flags |= VNODE_REMOVE_NODELETEBUSY;
4805 }
4806
4807 /* Skip any potential upcalls if told to. */
4808 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4809 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4810 }
4811
4812 if (vp) {
4813 batched = vnode_compound_remove_available(vp);
4814 /*
4815 * The root of a mounted filesystem cannot be deleted.
4816 */
4817 if (vp->v_flag & VROOT) {
4818 error = EBUSY;
4819 }
4820
4821#if DEVELOPMENT || DEBUG
4822 /*
4823 * XXX VSWAP: Check for entitlements or special flag here
4824 * so we can restrict access appropriately.
4825 */
4826#else /* DEVELOPMENT || DEBUG */
4827
4828 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4829 error = EPERM;
4830 goto out;
4831 }
4832#endif /* DEVELOPMENT || DEBUG */
4833
4834 if (!batched) {
4835 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4836 if (error) {
4837 if (error == ENOENT) {
4838 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4839 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4840 do_retry = 1;
4841 retry_count++;
4842 }
4843 }
4844 goto out;
4845 }
4846 }
4847 } else {
4848 batched = 1;
4849
4850 if (!vnode_compound_remove_available(dvp)) {
4851 panic("No vp, but no compound remove?");
4852 }
4853 }
4854
4855#if CONFIG_FSE
4856 need_event = need_fsevent(FSE_DELETE, dvp);
4857 if (need_event) {
4858 if (!batched) {
4859 if ((vp->v_flag & VISHARDLINK) == 0) {
4860 /* XXX need to get these data in batched VNOP */
4861 get_fse_info(vp, &finfo, ctx);
4862 }
4863 } else {
4864 error = vfs_get_notify_attributes(&va);
4865 if (error) {
4866 goto out;
4867 }
4868
4869 vap = &va;
4870 }
4871 }
4872#endif
4873 has_listeners = kauth_authorize_fileop_has_listeners();
4874 if (need_event || has_listeners) {
4875 if (path == NULL) {
4876 GET_PATH(path);
4877 if (path == NULL) {
4878 error = ENOMEM;
4879 goto out;
4880 }
4881 }
4882 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4883 }
4884
4885#if NAMEDRSRCFORK
4886 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4887 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4888 else
4889#endif
4890 {
4891 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4892 vp = nd.ni_vp;
4893 if (error == EKEEPLOOKING) {
4894 if (!batched) {
4895 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4896 }
4897
4898 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4899 panic("EKEEPLOOKING, but continue flag not set?");
4900 }
4901
4902 if (vnode_isdir(vp)) {
4903 error = EISDIR;
4904 goto out;
4905 }
4906 goto continue_lookup;
4907 } else if (error == ENOENT && batched) {
4908 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4909 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4910 /*
4911 * For compound VNOPs, the authorization callback may
4912 * return ENOENT in case of racing hardlink lookups
4913 * hitting the name cache, redrive the lookup.
4914 */
4915 do_retry = 1;
4916 retry_count += 1;
4917 goto out;
4918 }
4919 }
4920 }
4921
4922 /*
4923 * Call out to allow 3rd party notification of delete.
4924 * Ignore result of kauth_authorize_fileop call.
4925 */
4926 if (!error) {
4927 if (has_listeners) {
4928 kauth_authorize_fileop(vfs_context_ucred(ctx),
4929 KAUTH_FILEOP_DELETE,
4930 (uintptr_t)vp,
4931 (uintptr_t)path);
4932 }
4933
4934 if (vp->v_flag & VISHARDLINK) {
4935 //
4936 // if a hardlink gets deleted we want to blow away the
4937 // v_parent link because the path that got us to this
4938 // instance of the link is no longer valid. this will
4939 // force the next call to get the path to ask the file
4940 // system instead of just following the v_parent link.
4941 //
4942 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4943 }
4944
4945#if CONFIG_FSE
4946 if (need_event) {
4947 if (vp->v_flag & VISHARDLINK) {
4948 get_fse_info(vp, &finfo, ctx);
4949 } else if (vap) {
4950 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4951 }
4952 if (truncated_path) {
4953 finfo.mode |= FSE_TRUNCATED_PATH;
4954 }
4955 add_fsevent(FSE_DELETE, ctx,
4956 FSE_ARG_STRING, len, path,
4957 FSE_ARG_FINFO, &finfo,
4958 FSE_ARG_DONE);
4959 }
4960#endif
4961 }
4962
4963out:
4964 if (path != NULL)
4965 RELEASE_PATH(path);
4966
4967#if NAMEDRSRCFORK
4968 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4969 * will cause its shadow file to go away if necessary.
4970 */
4971 if (vp && (vnode_isnamedstream(vp)) &&
4972 (vp->v_parent != NULLVP) &&
4973 vnode_isshadow(vp)) {
4974 vnode_recycle(vp);
4975 }
4976#endif
4977 /*
4978 * nameidone has to happen before we vnode_put(dvp)
4979 * since it may need to release the fs_nodelock on the dvp
4980 */
4981 nameidone(&nd);
4982 vnode_put(dvp);
4983 if (vp) {
4984 vnode_put(vp);
4985 }
4986
4987 if (do_retry) {
4988 goto retry;
4989 }
4990
4991 return (error);
4992}
4993
4994int
4995unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4996 enum uio_seg segflg, int unlink_flags)
4997{
4998 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4999 unlink_flags));
5000}
5001
5002/*
5003 * Delete a name from the filesystem using Carbon semantics.
5004 */
5005int
5006delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
5007{
5008 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5009 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
5010}
5011
5012/*
5013 * Delete a name from the filesystem using POSIX semantics.
5014 */
5015int
5016unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
5017{
5018 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5019 uap->path, UIO_USERSPACE, 0));
5020}
5021
5022int
5023unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
5024{
5025 if (uap->flag & ~AT_REMOVEDIR)
5026 return (EINVAL);
5027
5028 if (uap->flag & AT_REMOVEDIR)
5029 return (rmdirat_internal(vfs_context_current(), uap->fd,
5030 uap->path, UIO_USERSPACE));
5031 else
5032 return (unlinkat_internal(vfs_context_current(), uap->fd,
5033 NULLVP, uap->path, UIO_USERSPACE, 0));
5034}
5035
5036/*
5037 * Reposition read/write file offset.
5038 */
5039int
5040lseek(proc_t p, struct lseek_args *uap, off_t *retval)
5041{
5042 struct fileproc *fp;
5043 vnode_t vp;
5044 struct vfs_context *ctx;
5045 off_t offset = uap->offset, file_size;
5046 int error;
5047
5048 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
5049 if (error == ENOTSUP)
5050 return (ESPIPE);
5051 return (error);
5052 }
5053 if (vnode_isfifo(vp)) {
5054 file_drop(uap->fd);
5055 return(ESPIPE);
5056 }
5057
5058
5059 ctx = vfs_context_current();
5060#if CONFIG_MACF
5061 if (uap->whence == L_INCR && uap->offset == 0)
5062 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
5063 fp->f_fglob);
5064 else
5065 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5066 fp->f_fglob);
5067 if (error) {
5068 file_drop(uap->fd);
5069 return (error);
5070 }
5071#endif
5072 if ( (error = vnode_getwithref(vp)) ) {
5073 file_drop(uap->fd);
5074 return(error);
5075 }
5076
5077 switch (uap->whence) {
5078 case L_INCR:
5079 offset += fp->f_fglob->fg_offset;
5080 break;
5081 case L_XTND:
5082 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
5083 break;
5084 offset += file_size;
5085 break;
5086 case L_SET:
5087 break;
5088 case SEEK_HOLE:
5089 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
5090 break;
5091 case SEEK_DATA:
5092 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
5093 break;
5094 default:
5095 error = EINVAL;
5096 }
5097 if (error == 0) {
5098 if (uap->offset > 0 && offset < 0) {
5099 /* Incremented/relative move past max size */
5100 error = EOVERFLOW;
5101 } else {
5102 /*
5103 * Allow negative offsets on character devices, per
5104 * POSIX 1003.1-2001. Most likely for writing disk
5105 * labels.
5106 */
5107 if (offset < 0 && vp->v_type != VCHR) {
5108 /* Decremented/relative move before start */
5109 error = EINVAL;
5110 } else {
5111 /* Success */
5112 fp->f_fglob->fg_offset = offset;
5113 *retval = fp->f_fglob->fg_offset;
5114 }
5115 }
5116 }
5117
5118 /*
5119 * An lseek can affect whether data is "available to read." Use
5120 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5121 */
5122 post_event_if_success(vp, error, NOTE_NONE);
5123 (void)vnode_put(vp);
5124 file_drop(uap->fd);
5125 return (error);
5126}
5127
5128
5129/*
5130 * Check access permissions.
5131 *
5132 * Returns: 0 Success
5133 * vnode_authorize:???
5134 */
5135static int
5136access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
5137{
5138 kauth_action_t action;
5139 int error;
5140
5141 /*
5142 * If just the regular access bits, convert them to something
5143 * that vnode_authorize will understand.
5144 */
5145 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5146 action = 0;
5147 if (uflags & R_OK)
5148 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5149 if (uflags & W_OK) {
5150 if (vnode_isdir(vp)) {
5151 action |= KAUTH_VNODE_ADD_FILE |
5152 KAUTH_VNODE_ADD_SUBDIRECTORY;
5153 /* might want delete rights here too */
5154 } else {
5155 action |= KAUTH_VNODE_WRITE_DATA;
5156 }
5157 }
5158 if (uflags & X_OK) {
5159 if (vnode_isdir(vp)) {
5160 action |= KAUTH_VNODE_SEARCH;
5161 } else {
5162 action |= KAUTH_VNODE_EXECUTE;
5163 }
5164 }
5165 } else {
5166 /* take advantage of definition of uflags */
5167 action = uflags >> 8;
5168 }
5169
5170#if CONFIG_MACF
5171 error = mac_vnode_check_access(ctx, vp, uflags);
5172 if (error)
5173 return (error);
5174#endif /* MAC */
5175
5176 /* action == 0 means only check for existence */
5177 if (action != 0) {
5178 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5179 } else {
5180 error = 0;
5181 }
5182
5183 return(error);
5184}
5185
5186
5187
5188/*
5189 * access_extended: Check access permissions in bulk.
5190 *
5191 * Description: uap->entries Pointer to an array of accessx
5192 * descriptor structs, plus one or
5193 * more NULL terminated strings (see
5194 * "Notes" section below).
5195 * uap->size Size of the area pointed to by
5196 * uap->entries.
5197 * uap->results Pointer to the results array.
5198 *
5199 * Returns: 0 Success
5200 * ENOMEM Insufficient memory
5201 * EINVAL Invalid arguments
5202 * namei:EFAULT Bad address
5203 * namei:ENAMETOOLONG Filename too long
5204 * namei:ENOENT No such file or directory
5205 * namei:ELOOP Too many levels of symbolic links
5206 * namei:EBADF Bad file descriptor
5207 * namei:ENOTDIR Not a directory
5208 * namei:???
5209 * access1:
5210 *
5211 * Implicit returns:
5212 * uap->results Array contents modified
5213 *
5214 * Notes: The uap->entries are structured as an arbitrary length array
5215 * of accessx descriptors, followed by one or more NULL terminated
5216 * strings
5217 *
5218 * struct accessx_descriptor[0]
5219 * ...
5220 * struct accessx_descriptor[n]
5221 * char name_data[0];
5222 *
5223 * We determine the entry count by walking the buffer containing
5224 * the uap->entries argument descriptor. For each descriptor we
5225 * see, the valid values for the offset ad_name_offset will be
5226 * in the byte range:
5227 *
5228 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5229 * to
5230 * [ uap->entries + uap->size - 2 ]
5231 *
5232 * since we must have at least one string, and the string must
5233 * be at least one character plus the NULL terminator in length.
5234 *
5235 * XXX: Need to support the check-as uid argument
5236 */
5237int
5238access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5239{
5240 struct accessx_descriptor *input = NULL;
5241 errno_t *result = NULL;
5242 errno_t error = 0;
5243 int wantdelete = 0;
5244 unsigned int desc_max, desc_actual, i, j;
5245 struct vfs_context context;
5246 struct nameidata nd;
5247 int niopts;
5248 vnode_t vp = NULL;
5249 vnode_t dvp = NULL;
5250#define ACCESSX_MAX_DESCR_ON_STACK 10
5251 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5252
5253 context.vc_ucred = NULL;
5254
5255 /*
5256 * Validate parameters; if valid, copy the descriptor array and string
5257 * arguments into local memory. Before proceeding, the following
5258 * conditions must have been met:
5259 *
5260 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5261 * o There must be sufficient room in the request for at least one
5262 * descriptor and a one yte NUL terminated string.
5263 * o The allocation of local storage must not fail.
5264 */
5265 if (uap->size > ACCESSX_MAX_TABLESIZE)
5266 return(ENOMEM);
5267 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
5268 return(EINVAL);
5269 if (uap->size <= sizeof (stack_input)) {
5270 input = stack_input;
5271 } else {
5272 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5273 if (input == NULL) {
5274 error = ENOMEM;
5275 goto out;
5276 }
5277 }
5278 error = copyin(uap->entries, input, uap->size);
5279 if (error)
5280 goto out;
5281
5282 AUDIT_ARG(opaque, input, uap->size);
5283
5284 /*
5285 * Force NUL termination of the copyin buffer to avoid nami() running
5286 * off the end. If the caller passes us bogus data, they may get a
5287 * bogus result.
5288 */
5289 ((char *)input)[uap->size - 1] = 0;
5290
5291 /*
5292 * Access is defined as checking against the process' real identity,
5293 * even if operations are checking the effective identity. This
5294 * requires that we use a local vfs context.
5295 */
5296 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5297 context.vc_thread = current_thread();
5298
5299 /*
5300 * Find out how many entries we have, so we can allocate the result
5301 * array by walking the list and adjusting the count downward by the
5302 * earliest string offset we see.
5303 */
5304 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5305 desc_actual = desc_max;
5306 for (i = 0; i < desc_actual; i++) {
5307 /*
5308 * Take the offset to the name string for this entry and
5309 * convert to an input array index, which would be one off
5310 * the end of the array if this entry was the lowest-addressed
5311 * name string.
5312 */
5313 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5314
5315 /*
5316 * An offset greater than the max allowable offset is an error.
5317 * It is also an error for any valid entry to point
5318 * to a location prior to the end of the current entry, if
5319 * it's not a reference to the string of the previous entry.
5320 */
5321 if (j > desc_max || (j != 0 && j <= i)) {
5322 error = EINVAL;
5323 goto out;
5324 }
5325
5326 /* Also do not let ad_name_offset point to something beyond the size of the input */
5327 if (input[i].ad_name_offset >= uap->size) {
5328 error = EINVAL;
5329 goto out;
5330 }
5331
5332 /*
5333 * An offset of 0 means use the previous descriptor's offset;
5334 * this is used to chain multiple requests for the same file
5335 * to avoid multiple lookups.
5336 */
5337 if (j == 0) {
5338 /* This is not valid for the first entry */
5339 if (i == 0) {
5340 error = EINVAL;
5341 goto out;
5342 }
5343 continue;
5344 }
5345
5346 /*
5347 * If the offset of the string for this descriptor is before
5348 * what we believe is the current actual last descriptor,
5349 * then we need to adjust our estimate downward; this permits
5350 * the string table following the last descriptor to be out
5351 * of order relative to the descriptor list.
5352 */
5353 if (j < desc_actual)
5354 desc_actual = j;
5355 }
5356
5357 /*
5358 * We limit the actual number of descriptors we are willing to process
5359 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5360 * requested does not exceed this limit,
5361 */
5362 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5363 error = ENOMEM;
5364 goto out;
5365 }
5366 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK | M_ZERO);
5367 if (result == NULL) {
5368 error = ENOMEM;
5369 goto out;
5370 }
5371
5372 /*
5373 * Do the work by iterating over the descriptor entries we know to
5374 * at least appear to contain valid data.
5375 */
5376 error = 0;
5377 for (i = 0; i < desc_actual; i++) {
5378 /*
5379 * If the ad_name_offset is 0, then we use the previous
5380 * results to make the check; otherwise, we are looking up
5381 * a new file name.
5382 */
5383 if (input[i].ad_name_offset != 0) {
5384 /* discard old vnodes */
5385 if (vp) {
5386 vnode_put(vp);
5387 vp = NULL;
5388 }
5389 if (dvp) {
5390 vnode_put(dvp);
5391 dvp = NULL;
5392 }
5393
5394 /*
5395 * Scan forward in the descriptor list to see if we
5396 * need the parent vnode. We will need it if we are
5397 * deleting, since we must have rights to remove
5398 * entries in the parent directory, as well as the
5399 * rights to delete the object itself.
5400 */
5401 wantdelete = input[i].ad_flags & _DELETE_OK;
5402 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5403 if (input[j].ad_flags & _DELETE_OK)
5404 wantdelete = 1;
5405
5406 niopts = FOLLOW | AUDITVNPATH1;
5407
5408 /* need parent for vnode_authorize for deletion test */
5409 if (wantdelete)
5410 niopts |= WANTPARENT;
5411
5412 /* do the lookup */
5413 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5414 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5415 &context);
5416 error = namei(&nd);
5417 if (!error) {
5418 vp = nd.ni_vp;
5419 if (wantdelete)
5420 dvp = nd.ni_dvp;
5421 }
5422 nameidone(&nd);
5423 }
5424
5425 /*
5426 * Handle lookup errors.
5427 */
5428 switch(error) {
5429 case ENOENT:
5430 case EACCES:
5431 case EPERM:
5432 case ENOTDIR:
5433 result[i] = error;
5434 break;
5435 case 0:
5436 /* run this access check */
5437 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5438 break;
5439 default:
5440 /* fatal lookup error */
5441
5442 goto out;
5443 }
5444 }
5445
5446 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5447
5448 /* copy out results */
5449 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5450
5451out:
5452 if (input && input != stack_input)
5453 FREE(input, M_TEMP);
5454 if (result)
5455 FREE(result, M_TEMP);
5456 if (vp)
5457 vnode_put(vp);
5458 if (dvp)
5459 vnode_put(dvp);
5460 if (IS_VALID_CRED(context.vc_ucred))
5461 kauth_cred_unref(&context.vc_ucred);
5462 return(error);
5463}
5464
5465
5466/*
5467 * Returns: 0 Success
5468 * namei:EFAULT Bad address
5469 * namei:ENAMETOOLONG Filename too long
5470 * namei:ENOENT No such file or directory
5471 * namei:ELOOP Too many levels of symbolic links
5472 * namei:EBADF Bad file descriptor
5473 * namei:ENOTDIR Not a directory
5474 * namei:???
5475 * access1:
5476 */
5477static int
5478faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5479 int flag, enum uio_seg segflg)
5480{
5481 int error;
5482 struct nameidata nd;
5483 int niopts;
5484 struct vfs_context context;
5485#if NAMEDRSRCFORK
5486 int is_namedstream = 0;
5487#endif
5488
5489 /*
5490 * Unless the AT_EACCESS option is used, Access is defined as checking
5491 * against the process' real identity, even if operations are checking
5492 * the effective identity. So we need to tweak the credential
5493 * in the context for that case.
5494 */
5495 if (!(flag & AT_EACCESS))
5496 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5497 else
5498 context.vc_ucred = ctx->vc_ucred;
5499 context.vc_thread = ctx->vc_thread;
5500
5501
5502 niopts = FOLLOW | AUDITVNPATH1;
5503 /* need parent for vnode_authorize for deletion test */
5504 if (amode & _DELETE_OK)
5505 niopts |= WANTPARENT;
5506 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5507 path, &context);
5508
5509#if NAMEDRSRCFORK
5510 /* access(F_OK) calls are allowed for resource forks. */
5511 if (amode == F_OK)
5512 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5513#endif
5514 error = nameiat(&nd, fd);
5515 if (error)
5516 goto out;
5517
5518#if NAMEDRSRCFORK
5519 /* Grab reference on the shadow stream file vnode to
5520 * force an inactive on release which will mark it
5521 * for recycle.
5522 */
5523 if (vnode_isnamedstream(nd.ni_vp) &&
5524 (nd.ni_vp->v_parent != NULLVP) &&
5525 vnode_isshadow(nd.ni_vp)) {
5526 is_namedstream = 1;
5527 vnode_ref(nd.ni_vp);
5528 }
5529#endif
5530
5531 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5532
5533#if NAMEDRSRCFORK
5534 if (is_namedstream) {
5535 vnode_rele(nd.ni_vp);
5536 }
5537#endif
5538
5539 vnode_put(nd.ni_vp);
5540 if (amode & _DELETE_OK)
5541 vnode_put(nd.ni_dvp);
5542 nameidone(&nd);
5543
5544out:
5545 if (!(flag & AT_EACCESS))
5546 kauth_cred_unref(&context.vc_ucred);
5547 return (error);
5548}
5549
5550int
5551access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5552{
5553 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5554 uap->path, uap->flags, 0, UIO_USERSPACE));
5555}
5556
5557int
5558faccessat(__unused proc_t p, struct faccessat_args *uap,
5559 __unused int32_t *retval)
5560{
5561 if (uap->flag & ~AT_EACCESS)
5562 return (EINVAL);
5563
5564 return (faccessat_internal(vfs_context_current(), uap->fd,
5565 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5566}
5567
5568/*
5569 * Returns: 0 Success
5570 * EFAULT
5571 * copyout:EFAULT
5572 * namei:???
5573 * vn_stat:???
5574 */
5575static int
5576fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5577 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5578 enum uio_seg segflg, int fd, int flag)
5579{
5580 struct nameidata nd;
5581 int follow;
5582 union {
5583 struct stat sb;
5584 struct stat64 sb64;
5585 } source = {};
5586 union {
5587 struct user64_stat user64_sb;
5588 struct user32_stat user32_sb;
5589 struct user64_stat64 user64_sb64;
5590 struct user32_stat64 user32_sb64;
5591 } dest = {};
5592 caddr_t sbp;
5593 int error, my_size;
5594 kauth_filesec_t fsec;
5595 size_t xsecurity_bufsize;
5596 void * statptr;
5597
5598 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5599 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5600 segflg, path, ctx);
5601
5602#if NAMEDRSRCFORK
5603 int is_namedstream = 0;
5604 /* stat calls are allowed for resource forks. */
5605 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5606#endif
5607 error = nameiat(&nd, fd);
5608 if (error)
5609 return (error);
5610 fsec = KAUTH_FILESEC_NONE;
5611
5612 statptr = (void *)&source;
5613
5614#if NAMEDRSRCFORK
5615 /* Grab reference on the shadow stream file vnode to
5616 * force an inactive on release which will mark it
5617 * for recycle.
5618 */
5619 if (vnode_isnamedstream(nd.ni_vp) &&
5620 (nd.ni_vp->v_parent != NULLVP) &&
5621 vnode_isshadow(nd.ni_vp)) {
5622 is_namedstream = 1;
5623 vnode_ref(nd.ni_vp);
5624 }
5625#endif
5626
5627 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5628
5629#if NAMEDRSRCFORK
5630 if (is_namedstream) {
5631 vnode_rele(nd.ni_vp);
5632 }
5633#endif
5634 vnode_put(nd.ni_vp);
5635 nameidone(&nd);
5636
5637 if (error)
5638 return (error);
5639 /* Zap spare fields */
5640 if (isstat64 != 0) {
5641 source.sb64.st_lspare = 0;
5642 source.sb64.st_qspare[0] = 0LL;
5643 source.sb64.st_qspare[1] = 0LL;
5644 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5645 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5646 my_size = sizeof(dest.user64_sb64);
5647 sbp = (caddr_t)&dest.user64_sb64;
5648 } else {
5649 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5650 my_size = sizeof(dest.user32_sb64);
5651 sbp = (caddr_t)&dest.user32_sb64;
5652 }
5653 /*
5654 * Check if we raced (post lookup) against the last unlink of a file.
5655 */
5656 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5657 source.sb64.st_nlink = 1;
5658 }
5659 } else {
5660 source.sb.st_lspare = 0;
5661 source.sb.st_qspare[0] = 0LL;
5662 source.sb.st_qspare[1] = 0LL;
5663 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5664 munge_user64_stat(&source.sb, &dest.user64_sb);
5665 my_size = sizeof(dest.user64_sb);
5666 sbp = (caddr_t)&dest.user64_sb;
5667 } else {
5668 munge_user32_stat(&source.sb, &dest.user32_sb);
5669 my_size = sizeof(dest.user32_sb);
5670 sbp = (caddr_t)&dest.user32_sb;
5671 }
5672
5673 /*
5674 * Check if we raced (post lookup) against the last unlink of a file.
5675 */
5676 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5677 source.sb.st_nlink = 1;
5678 }
5679 }
5680 if ((error = copyout(sbp, ub, my_size)) != 0)
5681 goto out;
5682
5683 /* caller wants extended security information? */
5684 if (xsecurity != USER_ADDR_NULL) {
5685
5686 /* did we get any? */
5687 if (fsec == KAUTH_FILESEC_NONE) {
5688 if (susize(xsecurity_size, 0) != 0) {
5689 error = EFAULT;
5690 goto out;
5691 }
5692 } else {
5693 /* find the user buffer size */
5694 xsecurity_bufsize = fusize(xsecurity_size);
5695
5696 /* copy out the actual data size */
5697 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5698 error = EFAULT;
5699 goto out;
5700 }
5701
5702 /* if the caller supplied enough room, copy out to it */
5703 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5704 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5705 }
5706 }
5707out:
5708 if (fsec != KAUTH_FILESEC_NONE)
5709 kauth_filesec_free(fsec);
5710 return (error);
5711}
5712
5713/*
5714 * stat_extended: Get file status; with extended security (ACL).
5715 *
5716 * Parameters: p (ignored)
5717 * uap User argument descriptor (see below)
5718 * retval (ignored)
5719 *
5720 * Indirect: uap->path Path of file to get status from
5721 * uap->ub User buffer (holds file status info)
5722 * uap->xsecurity ACL to get (extended security)
5723 * uap->xsecurity_size Size of ACL
5724 *
5725 * Returns: 0 Success
5726 * !0 errno value
5727 *
5728 */
5729int
5730stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5731 __unused int32_t *retval)
5732{
5733 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5734 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5735 0));
5736}
5737
5738/*
5739 * Returns: 0 Success
5740 * fstatat_internal:??? [see fstatat_internal() in this file]
5741 */
5742int
5743stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5744{
5745 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5746 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5747}
5748
5749int
5750stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5751{
5752 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5753 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5754}
5755
5756/*
5757 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5758 *
5759 * Parameters: p (ignored)
5760 * uap User argument descriptor (see below)
5761 * retval (ignored)
5762 *
5763 * Indirect: uap->path Path of file to get status from
5764 * uap->ub User buffer (holds file status info)
5765 * uap->xsecurity ACL to get (extended security)
5766 * uap->xsecurity_size Size of ACL
5767 *
5768 * Returns: 0 Success
5769 * !0 errno value
5770 *
5771 */
5772int
5773stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5774{
5775 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5776 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5777 0));
5778}
5779
5780/*
5781 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5782 *
5783 * Parameters: p (ignored)
5784 * uap User argument descriptor (see below)
5785 * retval (ignored)
5786 *
5787 * Indirect: uap->path Path of file to get status from
5788 * uap->ub User buffer (holds file status info)
5789 * uap->xsecurity ACL to get (extended security)
5790 * uap->xsecurity_size Size of ACL
5791 *
5792 * Returns: 0 Success
5793 * !0 errno value
5794 *
5795 */
5796int
5797lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5798{
5799 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5800 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5801 AT_SYMLINK_NOFOLLOW));
5802}
5803
5804/*
5805 * Get file status; this version does not follow links.
5806 */
5807int
5808lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5809{
5810 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5811 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5812}
5813
5814int
5815lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5816{
5817 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5818 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5819}
5820
5821/*
5822 * lstat64_extended: Get file status; can handle large inode numbers; does not
5823 * follow links; with extended security (ACL).
5824 *
5825 * Parameters: p (ignored)
5826 * uap User argument descriptor (see below)
5827 * retval (ignored)
5828 *
5829 * Indirect: uap->path Path of file to get status from
5830 * uap->ub User buffer (holds file status info)
5831 * uap->xsecurity ACL to get (extended security)
5832 * uap->xsecurity_size Size of ACL
5833 *
5834 * Returns: 0 Success
5835 * !0 errno value
5836 *
5837 */
5838int
5839lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5840{
5841 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5842 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5843 AT_SYMLINK_NOFOLLOW));
5844}
5845
5846int
5847fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5848{
5849 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5850 return (EINVAL);
5851
5852 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5853 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5854}
5855
5856int
5857fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5858 __unused int32_t *retval)
5859{
5860 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5861 return (EINVAL);
5862
5863 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5864 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5865}
5866
5867/*
5868 * Get configurable pathname variables.
5869 *
5870 * Returns: 0 Success
5871 * namei:???
5872 * vn_pathconf:???
5873 *
5874 * Notes: Global implementation constants are intended to be
5875 * implemented in this function directly; all other constants
5876 * are per-FS implementation, and therefore must be handled in
5877 * each respective FS, instead.
5878 *
5879 * XXX We implement some things globally right now that should actually be
5880 * XXX per-FS; we will need to deal with this at some point.
5881 */
5882/* ARGSUSED */
5883int
5884pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5885{
5886 int error;
5887 struct nameidata nd;
5888 vfs_context_t ctx = vfs_context_current();
5889
5890 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5891 UIO_USERSPACE, uap->path, ctx);
5892 error = namei(&nd);
5893 if (error)
5894 return (error);
5895
5896 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5897
5898 vnode_put(nd.ni_vp);
5899 nameidone(&nd);
5900 return (error);
5901}
5902
5903/*
5904 * Return target name of a symbolic link.
5905 */
5906/* ARGSUSED */
5907static int
5908readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5909 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5910 int *retval)
5911{
5912 vnode_t vp;
5913 uio_t auio;
5914 int error;
5915 struct nameidata nd;
5916 char uio_buf[ UIO_SIZEOF(1) ];
5917
5918 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5919 seg, path, ctx);
5920
5921 error = nameiat(&nd, fd);
5922 if (error)
5923 return (error);
5924 vp = nd.ni_vp;
5925
5926 nameidone(&nd);
5927
5928 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5929 &uio_buf[0], sizeof(uio_buf));
5930 uio_addiov(auio, buf, bufsize);
5931 if (vp->v_type != VLNK) {
5932 error = EINVAL;
5933 } else {
5934#if CONFIG_MACF
5935 error = mac_vnode_check_readlink(ctx, vp);
5936#endif
5937 if (error == 0)
5938 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5939 ctx);
5940 if (error == 0)
5941 error = VNOP_READLINK(vp, auio, ctx);
5942 }
5943 vnode_put(vp);
5944
5945 *retval = bufsize - (int)uio_resid(auio);
5946 return (error);
5947}
5948
5949int
5950readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5951{
5952 enum uio_seg procseg;
5953
5954 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5955 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5956 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5957 uap->count, procseg, retval));
5958}
5959
5960int
5961readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5962{
5963 enum uio_seg procseg;
5964
5965 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5966 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5967 procseg, uap->buf, uap->bufsize, procseg, retval));
5968}
5969
5970/*
5971 * Change file flags.
5972 *
5973 * NOTE: this will vnode_put() `vp'
5974 */
5975static int
5976chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5977{
5978 struct vnode_attr va;
5979 kauth_action_t action;
5980 int error;
5981
5982 VATTR_INIT(&va);
5983 VATTR_SET(&va, va_flags, flags);
5984
5985#if CONFIG_MACF
5986 error = mac_vnode_check_setflags(ctx, vp, flags);
5987 if (error)
5988 goto out;
5989#endif
5990
5991 /* request authorisation, disregard immutability */
5992 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5993 goto out;
5994 /*
5995 * Request that the auth layer disregard those file flags it's allowed to when
5996 * authorizing this operation; we need to do this in order to be able to
5997 * clear immutable flags.
5998 */
5999 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
6000 goto out;
6001 error = vnode_setattr(vp, &va, ctx);
6002
6003#if CONFIG_MACF
6004 if (error == 0)
6005 mac_vnode_notify_setflags(ctx, vp, flags);
6006#endif
6007
6008 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
6009 error = ENOTSUP;
6010 }
6011out:
6012 vnode_put(vp);
6013 return(error);
6014}
6015
6016/*
6017 * Change flags of a file given a path name.
6018 */
6019/* ARGSUSED */
6020int
6021chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
6022{
6023 vnode_t vp;
6024 vfs_context_t ctx = vfs_context_current();
6025 int error;
6026 struct nameidata nd;
6027
6028 AUDIT_ARG(fflags, uap->flags);
6029 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6030 UIO_USERSPACE, uap->path, ctx);
6031 error = namei(&nd);
6032 if (error)
6033 return (error);
6034 vp = nd.ni_vp;
6035 nameidone(&nd);
6036
6037 /* we don't vnode_put() here because chflags1 does internally */
6038 error = chflags1(vp, uap->flags, ctx);
6039
6040 return(error);
6041}
6042
6043/*
6044 * Change flags of a file given a file descriptor.
6045 */
6046/* ARGSUSED */
6047int
6048fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
6049{
6050 vnode_t vp;
6051 int error;
6052
6053 AUDIT_ARG(fd, uap->fd);
6054 AUDIT_ARG(fflags, uap->flags);
6055 if ( (error = file_vnode(uap->fd, &vp)) )
6056 return (error);
6057
6058 if ((error = vnode_getwithref(vp))) {
6059 file_drop(uap->fd);
6060 return(error);
6061 }
6062
6063 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6064
6065 /* we don't vnode_put() here because chflags1 does internally */
6066 error = chflags1(vp, uap->flags, vfs_context_current());
6067
6068 file_drop(uap->fd);
6069 return (error);
6070}
6071
6072/*
6073 * Change security information on a filesystem object.
6074 *
6075 * Returns: 0 Success
6076 * EPERM Operation not permitted
6077 * vnode_authattr:??? [anything vnode_authattr can return]
6078 * vnode_authorize:??? [anything vnode_authorize can return]
6079 * vnode_setattr:??? [anything vnode_setattr can return]
6080 *
6081 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6082 * translated to EPERM before being returned.
6083 */
6084static int
6085chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
6086{
6087 kauth_action_t action;
6088 int error;
6089
6090 AUDIT_ARG(mode, vap->va_mode);
6091 /* XXX audit new args */
6092
6093#if NAMEDSTREAMS
6094 /* chmod calls are not allowed for resource forks. */
6095 if (vp->v_flag & VISNAMEDSTREAM) {
6096 return (EPERM);
6097 }
6098#endif
6099
6100#if CONFIG_MACF
6101 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6102 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
6103 return (error);
6104
6105 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6106 if ((error = mac_vnode_check_setowner(ctx, vp,
6107 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6108 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
6109 return (error);
6110 }
6111
6112 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6113 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
6114 return (error);
6115#endif
6116
6117 /* make sure that the caller is allowed to set this security information */
6118 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6119 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6120 if (error == EACCES)
6121 error = EPERM;
6122 return(error);
6123 }
6124
6125 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
6126 return (error);
6127
6128#if CONFIG_MACF
6129 if (VATTR_IS_ACTIVE(vap, va_mode))
6130 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6131
6132 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
6133 mac_vnode_notify_setowner(ctx, vp,
6134 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6135 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6136
6137 if (VATTR_IS_ACTIVE(vap, va_acl))
6138 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6139#endif
6140
6141 return (error);
6142}
6143
6144
6145/*
6146 * Change mode of a file given a path name.
6147 *
6148 * Returns: 0 Success
6149 * namei:??? [anything namei can return]
6150 * chmod_vnode:??? [anything chmod_vnode can return]
6151 */
6152static int
6153chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6154 int fd, int flag, enum uio_seg segflg)
6155{
6156 struct nameidata nd;
6157 int follow, error;
6158
6159 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6160 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6161 segflg, path, ctx);
6162 if ((error = nameiat(&nd, fd)))
6163 return (error);
6164 error = chmod_vnode(ctx, nd.ni_vp, vap);
6165 vnode_put(nd.ni_vp);
6166 nameidone(&nd);
6167 return(error);
6168}
6169
6170/*
6171 * chmod_extended: Change the mode of a file given a path name; with extended
6172 * argument list (including extended security (ACL)).
6173 *
6174 * Parameters: p Process requesting the open
6175 * uap User argument descriptor (see below)
6176 * retval (ignored)
6177 *
6178 * Indirect: uap->path Path to object (same as 'chmod')
6179 * uap->uid UID to set
6180 * uap->gid GID to set
6181 * uap->mode File mode to set (same as 'chmod')
6182 * uap->xsecurity ACL to set (or delete)
6183 *
6184 * Returns: 0 Success
6185 * !0 errno value
6186 *
6187 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6188 *
6189 * XXX: We should enummerate the possible errno values here, and where
6190 * in the code they originated.
6191 */
6192int
6193chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
6194{
6195 int error;
6196 struct vnode_attr va;
6197 kauth_filesec_t xsecdst;
6198
6199 AUDIT_ARG(owner, uap->uid, uap->gid);
6200
6201 VATTR_INIT(&va);
6202 if (uap->mode != -1)
6203 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6204 if (uap->uid != KAUTH_UID_NONE)
6205 VATTR_SET(&va, va_uid, uap->uid);
6206 if (uap->gid != KAUTH_GID_NONE)
6207 VATTR_SET(&va, va_gid, uap->gid);
6208
6209 xsecdst = NULL;
6210 switch(uap->xsecurity) {
6211 /* explicit remove request */
6212 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6213 VATTR_SET(&va, va_acl, NULL);
6214 break;
6215 /* not being set */
6216 case USER_ADDR_NULL:
6217 break;
6218 default:
6219 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6220 return(error);
6221 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6222 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6223 }
6224
6225 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6226 UIO_USERSPACE);
6227
6228 if (xsecdst != NULL)
6229 kauth_filesec_free(xsecdst);
6230 return(error);
6231}
6232
6233/*
6234 * Returns: 0 Success
6235 * chmodat:??? [anything chmodat can return]
6236 */
6237static int
6238fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6239 int flag, enum uio_seg segflg)
6240{
6241 struct vnode_attr va;
6242
6243 VATTR_INIT(&va);
6244 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6245
6246 return (chmodat(ctx, path, &va, fd, flag, segflg));
6247}
6248
6249int
6250chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6251{
6252 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6253 AT_FDCWD, 0, UIO_USERSPACE));
6254}
6255
6256int
6257fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6258{
6259 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6260 return (EINVAL);
6261
6262 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6263 uap->fd, uap->flag, UIO_USERSPACE));
6264}
6265
6266/*
6267 * Change mode of a file given a file descriptor.
6268 */
6269static int
6270fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6271{
6272 vnode_t vp;
6273 int error;
6274
6275 AUDIT_ARG(fd, fd);
6276
6277 if ((error = file_vnode(fd, &vp)) != 0)
6278 return (error);
6279 if ((error = vnode_getwithref(vp)) != 0) {
6280 file_drop(fd);
6281 return(error);
6282 }
6283 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6284
6285 error = chmod_vnode(vfs_context_current(), vp, vap);
6286 (void)vnode_put(vp);
6287 file_drop(fd);
6288
6289 return (error);
6290}
6291
6292/*
6293 * fchmod_extended: Change mode of a file given a file descriptor; with
6294 * extended argument list (including extended security (ACL)).
6295 *
6296 * Parameters: p Process requesting to change file mode
6297 * uap User argument descriptor (see below)
6298 * retval (ignored)
6299 *
6300 * Indirect: uap->mode File mode to set (same as 'chmod')
6301 * uap->uid UID to set
6302 * uap->gid GID to set
6303 * uap->xsecurity ACL to set (or delete)
6304 * uap->fd File descriptor of file to change mode
6305 *
6306 * Returns: 0 Success
6307 * !0 errno value
6308 *
6309 */
6310int
6311fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6312{
6313 int error;
6314 struct vnode_attr va;
6315 kauth_filesec_t xsecdst;
6316
6317 AUDIT_ARG(owner, uap->uid, uap->gid);
6318
6319 VATTR_INIT(&va);
6320 if (uap->mode != -1)
6321 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6322 if (uap->uid != KAUTH_UID_NONE)
6323 VATTR_SET(&va, va_uid, uap->uid);
6324 if (uap->gid != KAUTH_GID_NONE)
6325 VATTR_SET(&va, va_gid, uap->gid);
6326
6327 xsecdst = NULL;
6328 switch(uap->xsecurity) {
6329 case USER_ADDR_NULL:
6330 VATTR_SET(&va, va_acl, NULL);
6331 break;
6332 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6333 VATTR_SET(&va, va_acl, NULL);
6334 break;
6335 /* not being set */
6336 case CAST_USER_ADDR_T(-1):
6337 break;
6338 default:
6339 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6340 return(error);
6341 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6342 }
6343
6344 error = fchmod1(p, uap->fd, &va);
6345
6346
6347 switch(uap->xsecurity) {
6348 case USER_ADDR_NULL:
6349 case CAST_USER_ADDR_T(-1):
6350 break;
6351 default:
6352 if (xsecdst != NULL)
6353 kauth_filesec_free(xsecdst);
6354 }
6355 return(error);
6356}
6357
6358int
6359fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6360{
6361 struct vnode_attr va;
6362
6363 VATTR_INIT(&va);
6364 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6365
6366 return(fchmod1(p, uap->fd, &va));
6367}
6368
6369
6370/*
6371 * Set ownership given a path name.
6372 */
6373/* ARGSUSED */
6374static int
6375fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6376 gid_t gid, int flag, enum uio_seg segflg)
6377{
6378 vnode_t vp;
6379 struct vnode_attr va;
6380 int error;
6381 struct nameidata nd;
6382 int follow;
6383 kauth_action_t action;
6384
6385 AUDIT_ARG(owner, uid, gid);
6386
6387 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6388 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6389 path, ctx);
6390 error = nameiat(&nd, fd);
6391 if (error)
6392 return (error);
6393 vp = nd.ni_vp;
6394
6395 nameidone(&nd);
6396
6397 VATTR_INIT(&va);
6398 if (uid != (uid_t)VNOVAL)
6399 VATTR_SET(&va, va_uid, uid);
6400 if (gid != (gid_t)VNOVAL)
6401 VATTR_SET(&va, va_gid, gid);
6402
6403#if CONFIG_MACF
6404 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6405 if (error)
6406 goto out;
6407#endif
6408
6409 /* preflight and authorize attribute changes */
6410 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6411 goto out;
6412 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6413 goto out;
6414 error = vnode_setattr(vp, &va, ctx);
6415
6416#if CONFIG_MACF
6417 if (error == 0)
6418 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6419#endif
6420
6421out:
6422 /*
6423 * EACCES is only allowed from namei(); permissions failure should
6424 * return EPERM, so we need to translate the error code.
6425 */
6426 if (error == EACCES)
6427 error = EPERM;
6428
6429 vnode_put(vp);
6430 return (error);
6431}
6432
6433int
6434chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6435{
6436 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6437 uap->uid, uap->gid, 0, UIO_USERSPACE));
6438}
6439
6440int
6441lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6442{
6443 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6444 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6445}
6446
6447int
6448fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6449{
6450 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6451 return (EINVAL);
6452
6453 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6454 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6455}
6456
6457/*
6458 * Set ownership given a file descriptor.
6459 */
6460/* ARGSUSED */
6461int
6462fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6463{
6464 struct vnode_attr va;
6465 vfs_context_t ctx = vfs_context_current();
6466 vnode_t vp;
6467 int error;
6468 kauth_action_t action;
6469
6470 AUDIT_ARG(owner, uap->uid, uap->gid);
6471 AUDIT_ARG(fd, uap->fd);
6472
6473 if ( (error = file_vnode(uap->fd, &vp)) )
6474 return (error);
6475
6476 if ( (error = vnode_getwithref(vp)) ) {
6477 file_drop(uap->fd);
6478 return(error);
6479 }
6480 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6481
6482 VATTR_INIT(&va);
6483 if (uap->uid != VNOVAL)
6484 VATTR_SET(&va, va_uid, uap->uid);
6485 if (uap->gid != VNOVAL)
6486 VATTR_SET(&va, va_gid, uap->gid);
6487
6488#if NAMEDSTREAMS
6489 /* chown calls are not allowed for resource forks. */
6490 if (vp->v_flag & VISNAMEDSTREAM) {
6491 error = EPERM;
6492 goto out;
6493 }
6494#endif
6495
6496#if CONFIG_MACF
6497 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6498 if (error)
6499 goto out;
6500#endif
6501
6502 /* preflight and authorize attribute changes */
6503 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6504 goto out;
6505 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6506 if (error == EACCES)
6507 error = EPERM;
6508 goto out;
6509 }
6510 error = vnode_setattr(vp, &va, ctx);
6511
6512#if CONFIG_MACF
6513 if (error == 0)
6514 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6515#endif
6516
6517out:
6518 (void)vnode_put(vp);
6519 file_drop(uap->fd);
6520 return (error);
6521}
6522
6523static int
6524getutimes(user_addr_t usrtvp, struct timespec *tsp)
6525{
6526 int error;
6527
6528 if (usrtvp == USER_ADDR_NULL) {
6529 struct timeval old_tv;
6530 /* XXX Y2038 bug because of microtime argument */
6531 microtime(&old_tv);
6532 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6533 tsp[1] = tsp[0];
6534 } else {
6535 if (IS_64BIT_PROCESS(current_proc())) {
6536 struct user64_timeval tv[2];
6537 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6538 if (error)
6539 return (error);
6540 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6541 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6542 } else {
6543 struct user32_timeval tv[2];
6544 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6545 if (error)
6546 return (error);
6547 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6548 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6549 }
6550 }
6551 return 0;
6552}
6553
6554static int
6555setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6556 int nullflag)
6557{
6558 int error;
6559 struct vnode_attr va;
6560 kauth_action_t action;
6561
6562 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6563
6564 VATTR_INIT(&va);
6565 VATTR_SET(&va, va_access_time, ts[0]);
6566 VATTR_SET(&va, va_modify_time, ts[1]);
6567 if (nullflag)
6568 va.va_vaflags |= VA_UTIMES_NULL;
6569
6570#if NAMEDSTREAMS
6571 /* utimes calls are not allowed for resource forks. */
6572 if (vp->v_flag & VISNAMEDSTREAM) {
6573 error = EPERM;
6574 goto out;
6575 }
6576#endif
6577
6578#if CONFIG_MACF
6579 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6580 if (error)
6581 goto out;
6582#endif
6583 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6584 if (!nullflag && error == EACCES)
6585 error = EPERM;
6586 goto out;
6587 }
6588
6589 /* since we may not need to auth anything, check here */
6590 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6591 if (!nullflag && error == EACCES)
6592 error = EPERM;
6593 goto out;
6594 }
6595 error = vnode_setattr(vp, &va, ctx);
6596
6597#if CONFIG_MACF
6598 if (error == 0)
6599 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6600#endif
6601
6602out:
6603 return error;
6604}
6605
6606/*
6607 * Set the access and modification times of a file.
6608 */
6609/* ARGSUSED */
6610int
6611utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6612{
6613 struct timespec ts[2];
6614 user_addr_t usrtvp;
6615 int error;
6616 struct nameidata nd;
6617 vfs_context_t ctx = vfs_context_current();
6618
6619 /*
6620 * AUDIT: Needed to change the order of operations to do the
6621 * name lookup first because auditing wants the path.
6622 */
6623 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6624 UIO_USERSPACE, uap->path, ctx);
6625 error = namei(&nd);
6626 if (error)
6627 return (error);
6628 nameidone(&nd);
6629
6630 /*
6631 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6632 * the current time instead.
6633 */
6634 usrtvp = uap->tptr;
6635 if ((error = getutimes(usrtvp, ts)) != 0)
6636 goto out;
6637
6638 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6639
6640out:
6641 vnode_put(nd.ni_vp);
6642 return (error);
6643}
6644
6645/*
6646 * Set the access and modification times of a file.
6647 */
6648/* ARGSUSED */
6649int
6650futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6651{
6652 struct timespec ts[2];
6653 vnode_t vp;
6654 user_addr_t usrtvp;
6655 int error;
6656
6657 AUDIT_ARG(fd, uap->fd);
6658 usrtvp = uap->tptr;
6659 if ((error = getutimes(usrtvp, ts)) != 0)
6660 return (error);
6661 if ((error = file_vnode(uap->fd, &vp)) != 0)
6662 return (error);
6663 if((error = vnode_getwithref(vp))) {
6664 file_drop(uap->fd);
6665 return(error);
6666 }
6667
6668 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6669 vnode_put(vp);
6670 file_drop(uap->fd);
6671 return(error);
6672}
6673
6674/*
6675 * Truncate a file given its path name.
6676 */
6677/* ARGSUSED */
6678int
6679truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6680{
6681 vnode_t vp;
6682 struct vnode_attr va;
6683 vfs_context_t ctx = vfs_context_current();
6684 int error;
6685 struct nameidata nd;
6686 kauth_action_t action;
6687
6688 if (uap->length < 0)
6689 return(EINVAL);
6690 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6691 UIO_USERSPACE, uap->path, ctx);
6692 if ((error = namei(&nd)))
6693 return (error);
6694 vp = nd.ni_vp;
6695
6696 nameidone(&nd);
6697
6698 VATTR_INIT(&va);
6699 VATTR_SET(&va, va_data_size, uap->length);
6700
6701#if CONFIG_MACF
6702 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6703 if (error)
6704 goto out;
6705#endif
6706
6707 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6708 goto out;
6709 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6710 goto out;
6711 error = vnode_setattr(vp, &va, ctx);
6712
6713#if CONFIG_MACF
6714 if (error == 0)
6715 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6716#endif
6717
6718out:
6719 vnode_put(vp);
6720 return (error);
6721}
6722
6723/*
6724 * Truncate a file given a file descriptor.
6725 */
6726/* ARGSUSED */
6727int
6728ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6729{
6730 vfs_context_t ctx = vfs_context_current();
6731 struct vnode_attr va;
6732 vnode_t vp;
6733 struct fileproc *fp;
6734 int error ;
6735 int fd = uap->fd;
6736
6737 AUDIT_ARG(fd, uap->fd);
6738 if (uap->length < 0)
6739 return(EINVAL);
6740
6741 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6742 return(error);
6743 }
6744
6745 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6746 case DTYPE_PSXSHM:
6747 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6748 goto out;
6749 case DTYPE_VNODE:
6750 break;
6751 default:
6752 error = EINVAL;
6753 goto out;
6754 }
6755
6756 vp = (vnode_t)fp->f_fglob->fg_data;
6757
6758 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6759 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6760 error = EINVAL;
6761 goto out;
6762 }
6763
6764 if ((error = vnode_getwithref(vp)) != 0) {
6765 goto out;
6766 }
6767
6768 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6769
6770#if CONFIG_MACF
6771 error = mac_vnode_check_truncate(ctx,
6772 fp->f_fglob->fg_cred, vp);
6773 if (error) {
6774 (void)vnode_put(vp);
6775 goto out;
6776 }
6777#endif
6778 VATTR_INIT(&va);
6779 VATTR_SET(&va, va_data_size, uap->length);
6780 error = vnode_setattr(vp, &va, ctx);
6781
6782#if CONFIG_MACF
6783 if (error == 0)
6784 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6785#endif
6786
6787 (void)vnode_put(vp);
6788out:
6789 file_drop(fd);
6790 return (error);
6791}
6792
6793
6794/*
6795 * Sync an open file with synchronized I/O _file_ integrity completion
6796 */
6797/* ARGSUSED */
6798int
6799fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6800{
6801 __pthread_testcancel(1);
6802 return(fsync_common(p, uap, MNT_WAIT));
6803}
6804
6805
6806/*
6807 * Sync an open file with synchronized I/O _file_ integrity completion
6808 *
6809 * Notes: This is a legacy support function that does not test for
6810 * thread cancellation points.
6811 */
6812/* ARGSUSED */
6813int
6814fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6815{
6816 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6817}
6818
6819
6820/*
6821 * Sync an open file with synchronized I/O _data_ integrity completion
6822 */
6823/* ARGSUSED */
6824int
6825fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6826{
6827 __pthread_testcancel(1);
6828 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6829}
6830
6831
6832/*
6833 * fsync_common
6834 *
6835 * Common fsync code to support both synchronized I/O file integrity completion
6836 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6837 *
6838 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6839 * will only guarantee that the file data contents are retrievable. If
6840 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6841 * includes additional metadata unnecessary for retrieving the file data
6842 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6843 * storage.
6844 *
6845 * Parameters: p The process
6846 * uap->fd The descriptor to synchronize
6847 * flags The data integrity flags
6848 *
6849 * Returns: int Success
6850 * fp_getfvp:EBADF Bad file descriptor
6851 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6852 * VNOP_FSYNC:??? unspecified
6853 *
6854 * Notes: We use struct fsync_args because it is a short name, and all
6855 * caller argument structures are otherwise identical.
6856 */
6857static int
6858fsync_common(proc_t p, struct fsync_args *uap, int flags)
6859{
6860 vnode_t vp;
6861 struct fileproc *fp;
6862 vfs_context_t ctx = vfs_context_current();
6863 int error;
6864
6865 AUDIT_ARG(fd, uap->fd);
6866
6867 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6868 return (error);
6869 if ( (error = vnode_getwithref(vp)) ) {
6870 file_drop(uap->fd);
6871 return(error);
6872 }
6873
6874 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6875
6876 error = VNOP_FSYNC(vp, flags, ctx);
6877
6878#if NAMEDRSRCFORK
6879 /* Sync resource fork shadow file if necessary. */
6880 if ((error == 0) &&
6881 (vp->v_flag & VISNAMEDSTREAM) &&
6882 (vp->v_parent != NULLVP) &&
6883 vnode_isshadow(vp) &&
6884 (fp->f_flags & FP_WRITTEN)) {
6885 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6886 }
6887#endif
6888
6889 (void)vnode_put(vp);
6890 file_drop(uap->fd);
6891 return (error);
6892}
6893
6894/*
6895 * Duplicate files. Source must be a file, target must be a file or
6896 * must not exist.
6897 *
6898 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6899 * perform inheritance correctly.
6900 */
6901/* ARGSUSED */
6902int
6903copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6904{
6905 vnode_t tvp, fvp, tdvp, sdvp;
6906 struct nameidata fromnd, tond;
6907 int error;
6908 vfs_context_t ctx = vfs_context_current();
6909#if CONFIG_MACF
6910 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6911 struct vnode_attr va;
6912#endif
6913
6914 /* Check that the flags are valid. */
6915
6916 if (uap->flags & ~CPF_MASK) {
6917 return(EINVAL);
6918 }
6919
6920 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
6921 UIO_USERSPACE, uap->from, ctx);
6922 if ((error = namei(&fromnd)))
6923 return (error);
6924 fvp = fromnd.ni_vp;
6925
6926 NDINIT(&tond, CREATE, OP_LINK,
6927 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6928 UIO_USERSPACE, uap->to, ctx);
6929 if ((error = namei(&tond))) {
6930 goto out1;
6931 }
6932 tdvp = tond.ni_dvp;
6933 tvp = tond.ni_vp;
6934
6935 if (tvp != NULL) {
6936 if (!(uap->flags & CPF_OVERWRITE)) {
6937 error = EEXIST;
6938 goto out;
6939 }
6940 }
6941
6942 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6943 error = EISDIR;
6944 goto out;
6945 }
6946
6947 /* This calls existing MAC hooks for open */
6948 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6949 NULL))) {
6950 goto out;
6951 }
6952
6953 if (tvp) {
6954 /*
6955 * See unlinkat_internal for an explanation of the potential
6956 * ENOENT from the MAC hook but the gist is that the MAC hook
6957 * can fail because vn_getpath isn't able to return the full
6958 * path. We choose to ignore this failure.
6959 */
6960 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6961 if (error && error != ENOENT)
6962 goto out;
6963 error = 0;
6964 }
6965
6966#if CONFIG_MACF
6967 VATTR_INIT(&va);
6968 VATTR_SET(&va, va_type, fvp->v_type);
6969 /* Mask off all but regular access permissions */
6970 VATTR_SET(&va, va_mode,
6971 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6972 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6973 if (error)
6974 goto out;
6975#endif /* CONFIG_MACF */
6976
6977 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6978 goto out;
6979
6980 if (fvp == tdvp)
6981 error = EINVAL;
6982 /*
6983 * If source is the same as the destination (that is the
6984 * same inode number) then there is nothing to do.
6985 * (fixed to have POSIX semantics - CSM 3/2/98)
6986 */
6987 if (fvp == tvp)
6988 error = -1;
6989 if (!error)
6990 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6991out:
6992 sdvp = tond.ni_startdir;
6993 /*
6994 * nameidone has to happen before we vnode_put(tdvp)
6995 * since it may need to release the fs_nodelock on the tdvp
6996 */
6997 nameidone(&tond);
6998
6999 if (tvp)
7000 vnode_put(tvp);
7001 vnode_put(tdvp);
7002 vnode_put(sdvp);
7003out1:
7004 vnode_put(fvp);
7005
7006 nameidone(&fromnd);
7007
7008 if (error == -1)
7009 return (0);
7010 return (error);
7011}
7012
7013#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7014
7015/*
7016 * Helper function for doing clones. The caller is expected to provide an
7017 * iocounted source vnode and release it.
7018 */
7019static int
7020clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
7021 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
7022{
7023 vnode_t tvp, tdvp;
7024 struct nameidata tond;
7025 int error;
7026 int follow;
7027 boolean_t free_src_acl;
7028 boolean_t attr_cleanup;
7029 enum vtype v_type;
7030 kauth_action_t action;
7031 struct componentname *cnp;
7032 uint32_t defaulted;
7033 struct vnode_attr va;
7034 struct vnode_attr nva;
7035 uint32_t vnop_flags;
7036
7037 v_type = vnode_vtype(fvp);
7038 switch (v_type) {
7039 case VLNK:
7040 /* FALLTHRU */
7041 case VREG:
7042 action = KAUTH_VNODE_ADD_FILE;
7043 break;
7044 case VDIR:
7045 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
7046 fvp->v_mountedhere) {
7047 return (EINVAL);
7048 }
7049 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
7050 break;
7051 default:
7052 return (EINVAL);
7053 }
7054
7055 AUDIT_ARG(fd2, dst_dirfd);
7056 AUDIT_ARG(value32, flags);
7057
7058 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7059 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
7060 UIO_USERSPACE, dst, ctx);
7061 if ((error = nameiat(&tond, dst_dirfd)))
7062 return (error);
7063 cnp = &tond.ni_cnd;
7064 tdvp = tond.ni_dvp;
7065 tvp = tond.ni_vp;
7066
7067 free_src_acl = FALSE;
7068 attr_cleanup = FALSE;
7069
7070 if (tvp != NULL) {
7071 error = EEXIST;
7072 goto out;
7073 }
7074
7075 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7076 error = EXDEV;
7077 goto out;
7078 }
7079
7080#if CONFIG_MACF
7081 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
7082 goto out;
7083#endif
7084 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
7085 goto out;
7086
7087 action = KAUTH_VNODE_GENERIC_READ_BITS;
7088 if (data_read_authorised)
7089 action &= ~KAUTH_VNODE_READ_DATA;
7090 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
7091 goto out;
7092
7093 /*
7094 * certain attributes may need to be changed from the source, we ask for
7095 * those here.
7096 */
7097 VATTR_INIT(&va);
7098 VATTR_WANTED(&va, va_uid);
7099 VATTR_WANTED(&va, va_gid);
7100 VATTR_WANTED(&va, va_mode);
7101 VATTR_WANTED(&va, va_flags);
7102 VATTR_WANTED(&va, va_acl);
7103
7104 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
7105 goto out;
7106
7107 VATTR_INIT(&nva);
7108 VATTR_SET(&nva, va_type, v_type);
7109 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7110 VATTR_SET(&nva, va_acl, va.va_acl);
7111 free_src_acl = TRUE;
7112 }
7113
7114 /* Handle ACL inheritance, initialize vap. */
7115 if (v_type == VLNK) {
7116 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
7117 } else {
7118 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7119 if (error)
7120 goto out;
7121 attr_cleanup = TRUE;
7122 }
7123
7124 vnop_flags = VNODE_CLONEFILE_DEFAULT;
7125 /*
7126 * We've got initial values for all security parameters,
7127 * If we are superuser, then we can change owners to be the
7128 * same as the source. Both superuser and the owner have default
7129 * WRITE_SECURITY privileges so all other fields can be taken
7130 * from source as well.
7131 */
7132 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
7133 if (VATTR_IS_SUPPORTED(&va, va_uid))
7134 VATTR_SET(&nva, va_uid, va.va_uid);
7135 if (VATTR_IS_SUPPORTED(&va, va_gid))
7136 VATTR_SET(&nva, va_gid, va.va_gid);
7137 } else {
7138 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
7139 }
7140
7141 if (VATTR_IS_SUPPORTED(&va, va_mode))
7142 VATTR_SET(&nva, va_mode, va.va_mode);
7143 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7144 VATTR_SET(&nva, va_flags,
7145 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7146 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
7147 }
7148
7149 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
7150
7151 if (!error && tvp) {
7152 int update_flags = 0;
7153#if CONFIG_FSE
7154 int fsevent;
7155#endif /* CONFIG_FSE */
7156
7157#if CONFIG_MACF
7158 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7159 VNODE_LABEL_CREATE, ctx);
7160#endif
7161 /*
7162 * If some of the requested attributes weren't handled by the
7163 * VNOP, use our fallback code.
7164 */
7165 if (!VATTR_ALL_SUPPORTED(&va))
7166 (void)vnode_setattr_fallback(tvp, &nva, ctx);
7167
7168 // Make sure the name & parent pointers are hooked up
7169 if (tvp->v_name == NULL)
7170 update_flags |= VNODE_UPDATE_NAME;
7171 if (tvp->v_parent == NULLVP)
7172 update_flags |= VNODE_UPDATE_PARENT;
7173
7174 if (update_flags) {
7175 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7176 cnp->cn_namelen, cnp->cn_hash, update_flags);
7177 }
7178
7179#if CONFIG_FSE
7180 switch (vnode_vtype(tvp)) {
7181 case VLNK:
7182 /* FALLTHRU */
7183 case VREG:
7184 fsevent = FSE_CREATE_FILE;
7185 break;
7186 case VDIR:
7187 fsevent = FSE_CREATE_DIR;
7188 break;
7189 default:
7190 goto out;
7191 }
7192
7193 if (need_fsevent(fsevent, tvp)) {
7194 /*
7195 * The following is a sequence of three explicit events.
7196 * A pair of FSE_CLONE events representing the source and destination
7197 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7198 * fseventsd may coalesce the destination clone and create events
7199 * into a single event resulting in the following sequence for a client
7200 * FSE_CLONE (src)
7201 * FSE_CLONE | FSE_CREATE (dst)
7202 */
7203 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7204 FSE_ARG_DONE);
7205 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7206 FSE_ARG_DONE);
7207 }
7208#endif /* CONFIG_FSE */
7209 }
7210
7211out:
7212 if (attr_cleanup)
7213 vn_attribute_cleanup(&nva, defaulted);
7214 if (free_src_acl && va.va_acl)
7215 kauth_acl_free(va.va_acl);
7216 nameidone(&tond);
7217 if (tvp)
7218 vnode_put(tvp);
7219 vnode_put(tdvp);
7220 return (error);
7221}
7222
7223/*
7224 * clone files or directories, target must not exist.
7225 */
7226/* ARGSUSED */
7227int
7228clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7229 __unused int32_t *retval)
7230{
7231 vnode_t fvp;
7232 struct nameidata fromnd;
7233 int follow;
7234 int error;
7235 vfs_context_t ctx = vfs_context_current();
7236
7237 /* Check that the flags are valid. */
7238 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7239 return (EINVAL);
7240
7241 AUDIT_ARG(fd, uap->src_dirfd);
7242
7243 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7244 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7245 UIO_USERSPACE, uap->src, ctx);
7246 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7247 return (error);
7248
7249 fvp = fromnd.ni_vp;
7250 nameidone(&fromnd);
7251
7252 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7253 uap->flags, ctx);
7254
7255 vnode_put(fvp);
7256 return (error);
7257}
7258
7259int
7260fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7261 __unused int32_t *retval)
7262{
7263 vnode_t fvp;
7264 struct fileproc *fp;
7265 int error;
7266 vfs_context_t ctx = vfs_context_current();
7267
7268 /* Check that the flags are valid. */
7269 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7270 return (EINVAL);
7271
7272 AUDIT_ARG(fd, uap->src_fd);
7273 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7274 if (error)
7275 return (error);
7276
7277 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7278 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7279 error = EBADF;
7280 goto out;
7281 }
7282
7283 if ((error = vnode_getwithref(fvp)))
7284 goto out;
7285
7286 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7287
7288 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7289 uap->flags, ctx);
7290
7291 vnode_put(fvp);
7292out:
7293 file_drop(uap->src_fd);
7294 return (error);
7295}
7296
7297/*
7298 * Rename files. Source and destination must either both be directories,
7299 * or both not be directories. If target is a directory, it must be empty.
7300 */
7301/* ARGSUSED */
7302static int
7303renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7304 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7305{
7306 if (flags & ~VFS_RENAME_FLAGS_MASK)
7307 return EINVAL;
7308
7309 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7310 return EINVAL;
7311
7312 vnode_t tvp, tdvp;
7313 vnode_t fvp, fdvp;
7314 struct nameidata *fromnd, *tond;
7315 int error;
7316 int do_retry;
7317 int retry_count;
7318 int mntrename;
7319 int need_event;
7320 const char *oname = NULL;
7321 char *from_name = NULL, *to_name = NULL;
7322 int from_len=0, to_len=0;
7323 int holding_mntlock;
7324 mount_t locked_mp = NULL;
7325 vnode_t oparent = NULLVP;
7326#if CONFIG_FSE
7327 fse_info from_finfo, to_finfo;
7328#endif
7329 int from_truncated=0, to_truncated;
7330 int batched = 0;
7331 struct vnode_attr *fvap, *tvap;
7332 int continuing = 0;
7333 /* carving out a chunk for structs that are too big to be on stack. */
7334 struct {
7335 struct nameidata from_node, to_node;
7336 struct vnode_attr fv_attr, tv_attr;
7337 } * __rename_data;
7338 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7339 fromnd = &__rename_data->from_node;
7340 tond = &__rename_data->to_node;
7341
7342 holding_mntlock = 0;
7343 do_retry = 0;
7344 retry_count = 0;
7345retry:
7346 fvp = tvp = NULL;
7347 fdvp = tdvp = NULL;
7348 fvap = tvap = NULL;
7349 mntrename = FALSE;
7350
7351 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
7352 segflg, from, ctx);
7353 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
7354
7355 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7356 segflg, to, ctx);
7357 tond->ni_flag = NAMEI_COMPOUNDRENAME;
7358
7359continue_lookup:
7360 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7361 if ( (error = nameiat(fromnd, fromfd)) )
7362 goto out1;
7363 fdvp = fromnd->ni_dvp;
7364 fvp = fromnd->ni_vp;
7365
7366 if (fvp && fvp->v_type == VDIR)
7367 tond->ni_cnd.cn_flags |= WILLBEDIR;
7368 }
7369
7370 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7371 if ( (error = nameiat(tond, tofd)) ) {
7372 /*
7373 * Translate error code for rename("dir1", "dir2/.").
7374 */
7375 if (error == EISDIR && fvp->v_type == VDIR)
7376 error = EINVAL;
7377 goto out1;
7378 }
7379 tdvp = tond->ni_dvp;
7380 tvp = tond->ni_vp;
7381 }
7382
7383#if DEVELOPMENT || DEBUG
7384 /*
7385 * XXX VSWAP: Check for entitlements or special flag here
7386 * so we can restrict access appropriately.
7387 */
7388#else /* DEVELOPMENT || DEBUG */
7389
7390 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7391 error = EPERM;
7392 goto out1;
7393 }
7394
7395 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7396 error = EPERM;
7397 goto out1;
7398 }
7399#endif /* DEVELOPMENT || DEBUG */
7400
7401 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7402 error = ENOENT;
7403 goto out1;
7404 }
7405
7406 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7407 error = EEXIST;
7408 goto out1;
7409 }
7410
7411 batched = vnode_compound_rename_available(fdvp);
7412
7413#if CONFIG_FSE
7414 need_event = need_fsevent(FSE_RENAME, fdvp);
7415 if (need_event) {
7416 if (fvp) {
7417 get_fse_info(fvp, &from_finfo, ctx);
7418 } else {
7419 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
7420 if (error) {
7421 goto out1;
7422 }
7423
7424 fvap = &__rename_data->fv_attr;
7425 }
7426
7427 if (tvp) {
7428 get_fse_info(tvp, &to_finfo, ctx);
7429 } else if (batched) {
7430 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
7431 if (error) {
7432 goto out1;
7433 }
7434
7435 tvap = &__rename_data->tv_attr;
7436 }
7437 }
7438#else
7439 need_event = 0;
7440#endif /* CONFIG_FSE */
7441
7442 if (need_event || kauth_authorize_fileop_has_listeners()) {
7443 if (from_name == NULL) {
7444 GET_PATH(from_name);
7445 if (from_name == NULL) {
7446 error = ENOMEM;
7447 goto out1;
7448 }
7449 }
7450
7451 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
7452
7453 if (to_name == NULL) {
7454 GET_PATH(to_name);
7455 if (to_name == NULL) {
7456 error = ENOMEM;
7457 goto out1;
7458 }
7459 }
7460
7461 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
7462 }
7463 if (!fvp) {
7464 /*
7465 * Claim: this check will never reject a valid rename.
7466 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7467 * Suppose fdvp and tdvp are not on the same mount.
7468 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7469 * then you can't move it to within another dir on the same mountpoint.
7470 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7471 *
7472 * If this check passes, then we are safe to pass these vnodes to the same FS.
7473 */
7474 if (fdvp->v_mount != tdvp->v_mount) {
7475 error = EXDEV;
7476 goto out1;
7477 }
7478 goto skipped_lookup;
7479 }
7480
7481 if (!batched) {
7482 error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
7483 if (error) {
7484 if (error == ENOENT) {
7485 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7486 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7487 /*
7488 * We encountered a race where after doing the namei, tvp stops
7489 * being valid. If so, simply re-drive the rename call from the
7490 * top.
7491 */
7492 do_retry = 1;
7493 retry_count += 1;
7494 }
7495 }
7496 goto out1;
7497 }
7498 }
7499
7500 /*
7501 * If the source and destination are the same (i.e. they're
7502 * links to the same vnode) and the target file system is
7503 * case sensitive, then there is nothing to do.
7504 *
7505 * XXX Come back to this.
7506 */
7507 if (fvp == tvp) {
7508 int pathconf_val;
7509
7510 /*
7511 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7512 * then assume that this file system is case sensitive.
7513 */
7514 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7515 pathconf_val != 0) {
7516 goto out1;
7517 }
7518 }
7519
7520 /*
7521 * Allow the renaming of mount points.
7522 * - target must not exist
7523 * - target must reside in the same directory as source
7524 * - union mounts cannot be renamed
7525 * - "/" cannot be renamed
7526 *
7527 * XXX Handle this in VFS after a continued lookup (if we missed
7528 * in the cache to start off)
7529 *
7530 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7531 * we'll skip past here. The file system is responsible for
7532 * checking that @tvp is not a descendent of @fvp and vice versa
7533 * so it should always return EINVAL if either @tvp or @fvp is the
7534 * root of a volume.
7535 */
7536 if ((fvp->v_flag & VROOT) &&
7537 (fvp->v_type == VDIR) &&
7538 (tvp == NULL) &&
7539 (fvp->v_mountedhere == NULL) &&
7540 (fdvp == tdvp) &&
7541 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7542 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
7543 vnode_t coveredvp;
7544
7545 /* switch fvp to the covered vnode */
7546 coveredvp = fvp->v_mount->mnt_vnodecovered;
7547 if ( (vnode_getwithref(coveredvp)) ) {
7548 error = ENOENT;
7549 goto out1;
7550 }
7551 vnode_put(fvp);
7552
7553 fvp = coveredvp;
7554 mntrename = TRUE;
7555 }
7556 /*
7557 * Check for cross-device rename.
7558 */
7559 if ((fvp->v_mount != tdvp->v_mount) ||
7560 (tvp && (fvp->v_mount != tvp->v_mount))) {
7561 error = EXDEV;
7562 goto out1;
7563 }
7564
7565 /*
7566 * If source is the same as the destination (that is the
7567 * same inode number) then there is nothing to do...
7568 * EXCEPT if the underlying file system supports case
7569 * insensitivity and is case preserving. In this case
7570 * the file system needs to handle the special case of
7571 * getting the same vnode as target (fvp) and source (tvp).
7572 *
7573 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7574 * and _PC_CASE_PRESERVING can have this exception, and they need to
7575 * handle the special case of getting the same vnode as target and
7576 * source. NOTE: Then the target is unlocked going into vnop_rename,
7577 * so not to cause locking problems. There is a single reference on tvp.
7578 *
7579 * NOTE - that fvp == tvp also occurs if they are hard linked and
7580 * that correct behaviour then is just to return success without doing
7581 * anything.
7582 *
7583 * XXX filesystem should take care of this itself, perhaps...
7584 */
7585 if (fvp == tvp && fdvp == tdvp) {
7586 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7587 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7588 fromnd->ni_cnd.cn_namelen)) {
7589 goto out1;
7590 }
7591 }
7592
7593 if (holding_mntlock && fvp->v_mount != locked_mp) {
7594 /*
7595 * we're holding a reference and lock
7596 * on locked_mp, but it no longer matches
7597 * what we want to do... so drop our hold
7598 */
7599 mount_unlock_renames(locked_mp);
7600 mount_drop(locked_mp, 0);
7601 holding_mntlock = 0;
7602 }
7603 if (tdvp != fdvp && fvp->v_type == VDIR) {
7604 /*
7605 * serialize renames that re-shape
7606 * the tree... if holding_mntlock is
7607 * set, then we're ready to go...
7608 * otherwise we
7609 * first need to drop the iocounts
7610 * we picked up, second take the
7611 * lock to serialize the access,
7612 * then finally start the lookup
7613 * process over with the lock held
7614 */
7615 if (!holding_mntlock) {
7616 /*
7617 * need to grab a reference on
7618 * the mount point before we
7619 * drop all the iocounts... once
7620 * the iocounts are gone, the mount
7621 * could follow
7622 */
7623 locked_mp = fvp->v_mount;
7624 mount_ref(locked_mp, 0);
7625
7626 /*
7627 * nameidone has to happen before we vnode_put(tvp)
7628 * since it may need to release the fs_nodelock on the tvp
7629 */
7630 nameidone(tond);
7631
7632 if (tvp)
7633 vnode_put(tvp);
7634 vnode_put(tdvp);
7635
7636 /*
7637 * nameidone has to happen before we vnode_put(fdvp)
7638 * since it may need to release the fs_nodelock on the fvp
7639 */
7640 nameidone(fromnd);
7641
7642 vnode_put(fvp);
7643 vnode_put(fdvp);
7644
7645 mount_lock_renames(locked_mp);
7646 holding_mntlock = 1;
7647
7648 goto retry;
7649 }
7650 } else {
7651 /*
7652 * when we dropped the iocounts to take
7653 * the lock, we allowed the identity of
7654 * the various vnodes to change... if they did,
7655 * we may no longer be dealing with a rename
7656 * that reshapes the tree... once we're holding
7657 * the iocounts, the vnodes can't change type
7658 * so we're free to drop the lock at this point
7659 * and continue on
7660 */
7661 if (holding_mntlock) {
7662 mount_unlock_renames(locked_mp);
7663 mount_drop(locked_mp, 0);
7664 holding_mntlock = 0;
7665 }
7666 }
7667
7668 // save these off so we can later verify that fvp is the same
7669 oname = fvp->v_name;
7670 oparent = fvp->v_parent;
7671
7672skipped_lookup:
7673 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7674 tdvp, &tvp, &tond->ni_cnd, tvap,
7675 flags, ctx);
7676
7677 if (holding_mntlock) {
7678 /*
7679 * we can drop our serialization
7680 * lock now
7681 */
7682 mount_unlock_renames(locked_mp);
7683 mount_drop(locked_mp, 0);
7684 holding_mntlock = 0;
7685 }
7686 if (error) {
7687 if (error == EKEEPLOOKING) {
7688 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7689 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7690 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7691 }
7692 }
7693
7694 fromnd->ni_vp = fvp;
7695 tond->ni_vp = tvp;
7696
7697 goto continue_lookup;
7698 }
7699
7700 /*
7701 * We may encounter a race in the VNOP where the destination didn't
7702 * exist when we did the namei, but it does by the time we go and
7703 * try to create the entry. In this case, we should re-drive this rename
7704 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7705 * but other filesystems susceptible to this race could return it, too.
7706 */
7707 if (error == ERECYCLE) {
7708 do_retry = 1;
7709 }
7710
7711 /*
7712 * For compound VNOPs, the authorization callback may return
7713 * ENOENT in case of racing hardlink lookups hitting the name
7714 * cache, redrive the lookup.
7715 */
7716 if (batched && error == ENOENT) {
7717 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7718 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7719 do_retry = 1;
7720 retry_count += 1;
7721 }
7722 }
7723
7724 goto out1;
7725 }
7726
7727 /* call out to allow 3rd party notification of rename.
7728 * Ignore result of kauth_authorize_fileop call.
7729 */
7730 kauth_authorize_fileop(vfs_context_ucred(ctx),
7731 KAUTH_FILEOP_RENAME,
7732 (uintptr_t)from_name, (uintptr_t)to_name);
7733 if (flags & VFS_RENAME_SWAP) {
7734 kauth_authorize_fileop(vfs_context_ucred(ctx),
7735 KAUTH_FILEOP_RENAME,
7736 (uintptr_t)to_name, (uintptr_t)from_name);
7737 }
7738
7739#if CONFIG_FSE
7740 if (from_name != NULL && to_name != NULL) {
7741 if (from_truncated || to_truncated) {
7742 // set it here since only the from_finfo gets reported up to user space
7743 from_finfo.mode |= FSE_TRUNCATED_PATH;
7744 }
7745
7746 if (tvap && tvp) {
7747 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7748 }
7749 if (fvap) {
7750 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7751 }
7752
7753 if (tvp) {
7754 add_fsevent(FSE_RENAME, ctx,
7755 FSE_ARG_STRING, from_len, from_name,
7756 FSE_ARG_FINFO, &from_finfo,
7757 FSE_ARG_STRING, to_len, to_name,
7758 FSE_ARG_FINFO, &to_finfo,
7759 FSE_ARG_DONE);
7760 if (flags & VFS_RENAME_SWAP) {
7761 /*
7762 * Strictly speaking, swap is the equivalent of
7763 * *three* renames. FSEvents clients should only take
7764 * the events as a hint, so we only bother reporting
7765 * two.
7766 */
7767 add_fsevent(FSE_RENAME, ctx,
7768 FSE_ARG_STRING, to_len, to_name,
7769 FSE_ARG_FINFO, &to_finfo,
7770 FSE_ARG_STRING, from_len, from_name,
7771 FSE_ARG_FINFO, &from_finfo,
7772 FSE_ARG_DONE);
7773 }
7774 } else {
7775 add_fsevent(FSE_RENAME, ctx,
7776 FSE_ARG_STRING, from_len, from_name,
7777 FSE_ARG_FINFO, &from_finfo,
7778 FSE_ARG_STRING, to_len, to_name,
7779 FSE_ARG_DONE);
7780 }
7781 }
7782#endif /* CONFIG_FSE */
7783
7784 /*
7785 * update filesystem's mount point data
7786 */
7787 if (mntrename) {
7788 char *cp, *pathend, *mpname;
7789 char * tobuf;
7790 struct mount *mp;
7791 int maxlen;
7792 size_t len = 0;
7793
7794 mp = fvp->v_mountedhere;
7795
7796 if (vfs_busy(mp, LK_NOWAIT)) {
7797 error = EBUSY;
7798 goto out1;
7799 }
7800 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7801
7802 if (UIO_SEG_IS_USER_SPACE(segflg))
7803 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7804 else
7805 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7806 if (!error) {
7807 /* find current mount point prefix */
7808 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7809 for (cp = pathend; *cp != '\0'; ++cp) {
7810 if (*cp == '/')
7811 pathend = cp + 1;
7812 }
7813 /* find last component of target name */
7814 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7815 if (*cp == '/')
7816 mpname = cp + 1;
7817 }
7818 /* append name to prefix */
7819 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7820 bzero(pathend, maxlen);
7821 strlcpy(pathend, mpname, maxlen);
7822 }
7823 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7824
7825 vfs_unbusy(mp);
7826 }
7827 /*
7828 * fix up name & parent pointers. note that we first
7829 * check that fvp has the same name/parent pointers it
7830 * had before the rename call... this is a 'weak' check
7831 * at best...
7832 *
7833 * XXX oparent and oname may not be set in the compound vnop case
7834 */
7835 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7836 int update_flags;
7837
7838 update_flags = VNODE_UPDATE_NAME;
7839
7840 if (fdvp != tdvp)
7841 update_flags |= VNODE_UPDATE_PARENT;
7842
7843 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7844 }
7845out1:
7846 if (to_name != NULL) {
7847 RELEASE_PATH(to_name);
7848 to_name = NULL;
7849 }
7850 if (from_name != NULL) {
7851 RELEASE_PATH(from_name);
7852 from_name = NULL;
7853 }
7854 if (holding_mntlock) {
7855 mount_unlock_renames(locked_mp);
7856 mount_drop(locked_mp, 0);
7857 holding_mntlock = 0;
7858 }
7859 if (tdvp) {
7860 /*
7861 * nameidone has to happen before we vnode_put(tdvp)
7862 * since it may need to release the fs_nodelock on the tdvp
7863 */
7864 nameidone(tond);
7865
7866 if (tvp)
7867 vnode_put(tvp);
7868 vnode_put(tdvp);
7869 }
7870 if (fdvp) {
7871 /*
7872 * nameidone has to happen before we vnode_put(fdvp)
7873 * since it may need to release the fs_nodelock on the fdvp
7874 */
7875 nameidone(fromnd);
7876
7877 if (fvp)
7878 vnode_put(fvp);
7879 vnode_put(fdvp);
7880 }
7881
7882 /*
7883 * If things changed after we did the namei, then we will re-drive
7884 * this rename call from the top.
7885 */
7886 if (do_retry) {
7887 do_retry = 0;
7888 goto retry;
7889 }
7890
7891 FREE(__rename_data, M_TEMP);
7892 return (error);
7893}
7894
7895int
7896rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7897{
7898 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7899 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7900}
7901
7902int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
7903{
7904 return renameat_internal(
7905 vfs_context_current(),
7906 uap->fromfd, uap->from,
7907 uap->tofd, uap->to,
7908 UIO_USERSPACE, uap->flags);
7909}
7910
7911int
7912renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7913{
7914 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7915 uap->tofd, uap->to, UIO_USERSPACE, 0));
7916}
7917
7918/*
7919 * Make a directory file.
7920 *
7921 * Returns: 0 Success
7922 * EEXIST
7923 * namei:???
7924 * vnode_authorize:???
7925 * vn_create:???
7926 */
7927/* ARGSUSED */
7928static int
7929mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7930 enum uio_seg segflg)
7931{
7932 vnode_t vp, dvp;
7933 int error;
7934 int update_flags = 0;
7935 int batched;
7936 struct nameidata nd;
7937
7938 AUDIT_ARG(mode, vap->va_mode);
7939 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7940 path, ctx);
7941 nd.ni_cnd.cn_flags |= WILLBEDIR;
7942 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7943
7944continue_lookup:
7945 error = nameiat(&nd, fd);
7946 if (error)
7947 return (error);
7948 dvp = nd.ni_dvp;
7949 vp = nd.ni_vp;
7950
7951 if (vp != NULL) {
7952 error = EEXIST;
7953 goto out;
7954 }
7955
7956 batched = vnode_compound_mkdir_available(dvp);
7957
7958 VATTR_SET(vap, va_type, VDIR);
7959
7960 /*
7961 * XXX
7962 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7963 * only get EXISTS or EISDIR for existing path components, and not that it could see
7964 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7965 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7966 */
7967 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7968 if (error == EACCES || error == EPERM) {
7969 int error2;
7970
7971 nameidone(&nd);
7972 vnode_put(dvp);
7973 dvp = NULLVP;
7974
7975 /*
7976 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7977 * rather than EACCESS if the target exists.
7978 */
7979 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7980 path, ctx);
7981 error2 = nameiat(&nd, fd);
7982 if (error2) {
7983 goto out;
7984 } else {
7985 vp = nd.ni_vp;
7986 error = EEXIST;
7987 goto out;
7988 }
7989 }
7990
7991 goto out;
7992 }
7993
7994 /*
7995 * make the directory
7996 */
7997 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7998 if (error == EKEEPLOOKING) {
7999 nd.ni_vp = vp;
8000 goto continue_lookup;
8001 }
8002
8003 goto out;
8004 }
8005
8006 // Make sure the name & parent pointers are hooked up
8007 if (vp->v_name == NULL)
8008 update_flags |= VNODE_UPDATE_NAME;
8009 if (vp->v_parent == NULLVP)
8010 update_flags |= VNODE_UPDATE_PARENT;
8011
8012 if (update_flags)
8013 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
8014
8015#if CONFIG_FSE
8016 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
8017#endif
8018
8019out:
8020 /*
8021 * nameidone has to happen before we vnode_put(dvp)
8022 * since it may need to release the fs_nodelock on the dvp
8023 */
8024 nameidone(&nd);
8025
8026 if (vp)
8027 vnode_put(vp);
8028 if (dvp)
8029 vnode_put(dvp);
8030
8031 return (error);
8032}
8033
8034/*
8035 * mkdir_extended: Create a directory; with extended security (ACL).
8036 *
8037 * Parameters: p Process requesting to create the directory
8038 * uap User argument descriptor (see below)
8039 * retval (ignored)
8040 *
8041 * Indirect: uap->path Path of directory to create
8042 * uap->mode Access permissions to set
8043 * uap->xsecurity ACL to set
8044 *
8045 * Returns: 0 Success
8046 * !0 Not success
8047 *
8048 */
8049int
8050mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
8051{
8052 int ciferror;
8053 kauth_filesec_t xsecdst;
8054 struct vnode_attr va;
8055
8056 AUDIT_ARG(owner, uap->uid, uap->gid);
8057
8058 xsecdst = NULL;
8059 if ((uap->xsecurity != USER_ADDR_NULL) &&
8060 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
8061 return ciferror;
8062
8063 VATTR_INIT(&va);
8064 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8065 if (xsecdst != NULL)
8066 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
8067
8068 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8069 UIO_USERSPACE);
8070 if (xsecdst != NULL)
8071 kauth_filesec_free(xsecdst);
8072 return ciferror;
8073}
8074
8075int
8076mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
8077{
8078 struct vnode_attr va;
8079
8080 VATTR_INIT(&va);
8081 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8082
8083 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8084 UIO_USERSPACE));
8085}
8086
8087int
8088mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8089{
8090 struct vnode_attr va;
8091
8092 VATTR_INIT(&va);
8093 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8094
8095 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8096 UIO_USERSPACE));
8097}
8098
8099static int
8100rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8101 enum uio_seg segflg)
8102{
8103 vnode_t vp, dvp;
8104 int error;
8105 struct nameidata nd;
8106 char *path = NULL;
8107 int len=0;
8108 int has_listeners = 0;
8109 int need_event = 0;
8110 int truncated = 0;
8111#if CONFIG_FSE
8112 struct vnode_attr va;
8113#endif /* CONFIG_FSE */
8114 struct vnode_attr *vap = NULL;
8115 int restart_count = 0;
8116 int batched;
8117
8118 int restart_flag;
8119
8120 /*
8121 * This loop exists to restart rmdir in the unlikely case that two
8122 * processes are simultaneously trying to remove the same directory
8123 * containing orphaned appleDouble files.
8124 */
8125 do {
8126 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
8127 segflg, dirpath, ctx);
8128 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8129continue_lookup:
8130 restart_flag = 0;
8131 vap = NULL;
8132
8133 error = nameiat(&nd, fd);
8134 if (error)
8135 return (error);
8136
8137 dvp = nd.ni_dvp;
8138 vp = nd.ni_vp;
8139
8140 if (vp) {
8141 batched = vnode_compound_rmdir_available(vp);
8142
8143 if (vp->v_flag & VROOT) {
8144 /*
8145 * The root of a mounted filesystem cannot be deleted.
8146 */
8147 error = EBUSY;
8148 goto out;
8149 }
8150
8151#if DEVELOPMENT || DEBUG
8152 /*
8153 * XXX VSWAP: Check for entitlements or special flag here
8154 * so we can restrict access appropriately.
8155 */
8156#else /* DEVELOPMENT || DEBUG */
8157
8158 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8159 error = EPERM;
8160 goto out;
8161 }
8162#endif /* DEVELOPMENT || DEBUG */
8163
8164 /*
8165 * Removed a check here; we used to abort if vp's vid
8166 * was not the same as what we'd seen the last time around.
8167 * I do not think that check was valid, because if we retry
8168 * and all dirents are gone, the directory could legitimately
8169 * be recycled but still be present in a situation where we would
8170 * have had permission to delete. Therefore, we won't make
8171 * an effort to preserve that check now that we may not have a
8172 * vp here.
8173 */
8174
8175 if (!batched) {
8176 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8177 if (error) {
8178 if (error == ENOENT) {
8179 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8180 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8181 restart_flag = 1;
8182 restart_count += 1;
8183 }
8184 }
8185 goto out;
8186 }
8187 }
8188 } else {
8189 batched = 1;
8190
8191 if (!vnode_compound_rmdir_available(dvp)) {
8192 panic("No error, but no compound rmdir?");
8193 }
8194 }
8195
8196#if CONFIG_FSE
8197 fse_info finfo;
8198
8199 need_event = need_fsevent(FSE_DELETE, dvp);
8200 if (need_event) {
8201 if (!batched) {
8202 get_fse_info(vp, &finfo, ctx);
8203 } else {
8204 error = vfs_get_notify_attributes(&va);
8205 if (error) {
8206 goto out;
8207 }
8208
8209 vap = &va;
8210 }
8211 }
8212#endif
8213 has_listeners = kauth_authorize_fileop_has_listeners();
8214 if (need_event || has_listeners) {
8215 if (path == NULL) {
8216 GET_PATH(path);
8217 if (path == NULL) {
8218 error = ENOMEM;
8219 goto out;
8220 }
8221 }
8222
8223 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
8224#if CONFIG_FSE
8225 if (truncated) {
8226 finfo.mode |= FSE_TRUNCATED_PATH;
8227 }
8228#endif
8229 }
8230
8231 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8232 nd.ni_vp = vp;
8233 if (vp == NULLVP) {
8234 /* Couldn't find a vnode */
8235 goto out;
8236 }
8237
8238 if (error == EKEEPLOOKING) {
8239 goto continue_lookup;
8240 } else if (batched && error == ENOENT) {
8241 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8242 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8243 /*
8244 * For compound VNOPs, the authorization callback
8245 * may return ENOENT in case of racing hard link lookups
8246 * redrive the lookup.
8247 */
8248 restart_flag = 1;
8249 restart_count += 1;
8250 goto out;
8251 }
8252 }
8253#if CONFIG_APPLEDOUBLE
8254 /*
8255 * Special case to remove orphaned AppleDouble
8256 * files. I don't like putting this in the kernel,
8257 * but carbon does not like putting this in carbon either,
8258 * so here we are.
8259 */
8260 if (error == ENOTEMPTY) {
8261 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8262 if (error == EBUSY) {
8263 goto out;
8264 }
8265
8266
8267 /*
8268 * Assuming everything went well, we will try the RMDIR again
8269 */
8270 if (!error)
8271 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8272 }
8273#endif /* CONFIG_APPLEDOUBLE */
8274 /*
8275 * Call out to allow 3rd party notification of delete.
8276 * Ignore result of kauth_authorize_fileop call.
8277 */
8278 if (!error) {
8279 if (has_listeners) {
8280 kauth_authorize_fileop(vfs_context_ucred(ctx),
8281 KAUTH_FILEOP_DELETE,
8282 (uintptr_t)vp,
8283 (uintptr_t)path);
8284 }
8285
8286 if (vp->v_flag & VISHARDLINK) {
8287 // see the comment in unlink1() about why we update
8288 // the parent of a hard link when it is removed
8289 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8290 }
8291
8292#if CONFIG_FSE
8293 if (need_event) {
8294 if (vap) {
8295 vnode_get_fse_info_from_vap(vp, &finfo, vap);
8296 }
8297 add_fsevent(FSE_DELETE, ctx,
8298 FSE_ARG_STRING, len, path,
8299 FSE_ARG_FINFO, &finfo,
8300 FSE_ARG_DONE);
8301 }
8302#endif
8303 }
8304
8305out:
8306 if (path != NULL) {
8307 RELEASE_PATH(path);
8308 path = NULL;
8309 }
8310 /*
8311 * nameidone has to happen before we vnode_put(dvp)
8312 * since it may need to release the fs_nodelock on the dvp
8313 */
8314 nameidone(&nd);
8315 vnode_put(dvp);
8316
8317 if (vp)
8318 vnode_put(vp);
8319
8320 if (restart_flag == 0) {
8321 wakeup_one((caddr_t)vp);
8322 return (error);
8323 }
8324 tsleep(vp, PVFS, "rm AD", 1);
8325
8326 } while (restart_flag != 0);
8327
8328 return (error);
8329
8330}
8331
8332/*
8333 * Remove a directory file.
8334 */
8335/* ARGSUSED */
8336int
8337rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8338{
8339 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8340 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8341}
8342
8343/* Get direntry length padded to 8 byte alignment */
8344#define DIRENT64_LEN(namlen) \
8345 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8346
8347/* Get dirent length padded to 4 byte alignment */
8348#define DIRENT_LEN(namelen) \
8349 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8350
8351/* Get the end of this dirent */
8352#define DIRENT_END(dep) \
8353 (((char *)(dep)) + (dep)->d_reclen - 1)
8354
8355errno_t
8356vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8357 int *numdirent, vfs_context_t ctxp)
8358{
8359 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8360 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
8361 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
8362 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8363 } else {
8364 size_t bufsize;
8365 void * bufptr;
8366 uio_t auio;
8367 struct direntry *entry64;
8368 struct dirent *dep;
8369 int bytesread;
8370 int error;
8371
8372 /*
8373 * We're here because the underlying file system does not
8374 * support direnties or we mounted denying support so we must
8375 * fall back to dirents and convert them to direntries.
8376 *
8377 * Our kernel buffer needs to be smaller since re-packing will
8378 * expand each dirent. The worse case (when the name length
8379 * is 3 or less) corresponds to a struct direntry size of 32
8380 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8381 * (4-byte aligned). So having a buffer that is 3/8 the size
8382 * will prevent us from reading more than we can pack.
8383 *
8384 * Since this buffer is wired memory, we will limit the
8385 * buffer size to a maximum of 32K. We would really like to
8386 * use 32K in the MIN(), but we use magic number 87371 to
8387 * prevent uio_resid() * 3 / 8 from overflowing.
8388 */
8389 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
8390 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
8391 if (bufptr == NULL) {
8392 return ENOMEM;
8393 }
8394
8395 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
8396 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8397 auio->uio_offset = uio->uio_offset;
8398
8399 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8400
8401 dep = (struct dirent *)bufptr;
8402 bytesread = bufsize - uio_resid(auio);
8403
8404 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8405 M_TEMP, M_WAITOK);
8406 /*
8407 * Convert all the entries and copy them out to user's buffer.
8408 */
8409 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
8410 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8411
8412 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
8413 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
8414 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
8415 vp->v_mount->mnt_vfsstat.f_mntonname,
8416 vp->v_name ? vp->v_name : "<unknown>");
8417 error = EIO;
8418 break;
8419 }
8420
8421 bzero(entry64, enbufsize);
8422 /* Convert a dirent to a dirent64. */
8423 entry64->d_ino = dep->d_ino;
8424 entry64->d_seekoff = 0;
8425 entry64->d_reclen = enbufsize;
8426 entry64->d_namlen = dep->d_namlen;
8427 entry64->d_type = dep->d_type;
8428 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
8429
8430 /* Move to next entry. */
8431 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8432
8433 /* Copy entry64 to user's buffer. */
8434 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
8435 }
8436
8437 /* Update the real offset using the offset we got from VNOP_READDIR. */
8438 if (error == 0) {
8439 uio->uio_offset = auio->uio_offset;
8440 }
8441 uio_free(auio);
8442 FREE(bufptr, M_TEMP);
8443 FREE(entry64, M_TEMP);
8444 return (error);
8445 }
8446}
8447
8448#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8449
8450/*
8451 * Read a block of directory entries in a file system independent format.
8452 */
8453static int
8454getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8455 off_t *offset, int flags)
8456{
8457 vnode_t vp;
8458 struct vfs_context context = *vfs_context_current(); /* local copy */
8459 struct fileproc *fp;
8460 uio_t auio;
8461 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8462 off_t loff;
8463 int error, eofflag, numdirent;
8464 char uio_buf[ UIO_SIZEOF(1) ];
8465
8466 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8467 if (error) {
8468 return (error);
8469 }
8470 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8471 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8472 error = EBADF;
8473 goto out;
8474 }
8475
8476 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8477 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8478
8479#if CONFIG_MACF
8480 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8481 if (error)
8482 goto out;
8483#endif
8484 if ( (error = vnode_getwithref(vp)) ) {
8485 goto out;
8486 }
8487 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8488
8489unionread:
8490 if (vp->v_type != VDIR) {
8491 (void)vnode_put(vp);
8492 error = EINVAL;
8493 goto out;
8494 }
8495
8496#if CONFIG_MACF
8497 error = mac_vnode_check_readdir(&context, vp);
8498 if (error != 0) {
8499 (void)vnode_put(vp);
8500 goto out;
8501 }
8502#endif /* MAC */
8503
8504 loff = fp->f_fglob->fg_offset;
8505 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8506 uio_addiov(auio, bufp, bufsize);
8507
8508 if (flags & VNODE_READDIR_EXTENDED) {
8509 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8510 fp->f_fglob->fg_offset = uio_offset(auio);
8511 } else {
8512 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8513 fp->f_fglob->fg_offset = uio_offset(auio);
8514 }
8515 if (error) {
8516 (void)vnode_put(vp);
8517 goto out;
8518 }
8519
8520 if ((user_ssize_t)bufsize == uio_resid(auio)){
8521 if (union_dircheckp) {
8522 error = union_dircheckp(&vp, fp, &context);
8523 if (error == -1)
8524 goto unionread;
8525 if (error) {
8526 (void)vnode_put(vp);
8527 goto out;
8528 }
8529 }
8530
8531 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
8532 struct vnode *tvp = vp;
8533 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8534 vnode_ref(vp);
8535 fp->f_fglob->fg_data = (caddr_t) vp;
8536 fp->f_fglob->fg_offset = 0;
8537 vnode_rele(tvp);
8538 vnode_put(tvp);
8539 goto unionread;
8540 }
8541 vp = tvp;
8542 }
8543 }
8544
8545 vnode_put(vp);
8546 if (offset) {
8547 *offset = loff;
8548 }
8549
8550 *bytesread = bufsize - uio_resid(auio);
8551out:
8552 file_drop(fd);
8553 return (error);
8554}
8555
8556
8557int
8558getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
8559{
8560 off_t offset;
8561 ssize_t bytesread;
8562 int error;
8563
8564 AUDIT_ARG(fd, uap->fd);
8565 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8566
8567 if (error == 0) {
8568 if (proc_is64bit(p)) {
8569 user64_long_t base = (user64_long_t)offset;
8570 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8571 } else {
8572 user32_long_t base = (user32_long_t)offset;
8573 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8574 }
8575 *retval = bytesread;
8576 }
8577 return (error);
8578}
8579
8580int
8581getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8582{
8583 off_t offset;
8584 ssize_t bytesread;
8585 int error;
8586
8587 AUDIT_ARG(fd, uap->fd);
8588 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8589
8590 if (error == 0) {
8591 *retval = bytesread;
8592 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8593 }
8594 return (error);
8595}
8596
8597
8598/*
8599 * Set the mode mask for creation of filesystem nodes.
8600 * XXX implement xsecurity
8601 */
8602#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8603static int
8604umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
8605{
8606 struct filedesc *fdp;
8607
8608 AUDIT_ARG(mask, newmask);
8609 proc_fdlock(p);
8610 fdp = p->p_fd;
8611 *retval = fdp->fd_cmask;
8612 fdp->fd_cmask = newmask & ALLPERMS;
8613 proc_fdunlock(p);
8614 return (0);
8615}
8616
8617/*
8618 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8619 *
8620 * Parameters: p Process requesting to set the umask
8621 * uap User argument descriptor (see below)
8622 * retval umask of the process (parameter p)
8623 *
8624 * Indirect: uap->newmask umask to set
8625 * uap->xsecurity ACL to set
8626 *
8627 * Returns: 0 Success
8628 * !0 Not success
8629 *
8630 */
8631int
8632umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
8633{
8634 int ciferror;
8635 kauth_filesec_t xsecdst;
8636
8637 xsecdst = KAUTH_FILESEC_NONE;
8638 if (uap->xsecurity != USER_ADDR_NULL) {
8639 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8640 return ciferror;
8641 } else {
8642 xsecdst = KAUTH_FILESEC_NONE;
8643 }
8644
8645 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8646
8647 if (xsecdst != KAUTH_FILESEC_NONE)
8648 kauth_filesec_free(xsecdst);
8649 return ciferror;
8650}
8651
8652int
8653umask(proc_t p, struct umask_args *uap, int32_t *retval)
8654{
8655 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8656}
8657
8658/*
8659 * Void all references to file by ripping underlying filesystem
8660 * away from vnode.
8661 */
8662/* ARGSUSED */
8663int
8664revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
8665{
8666 vnode_t vp;
8667 struct vnode_attr va;
8668 vfs_context_t ctx = vfs_context_current();
8669 int error;
8670 struct nameidata nd;
8671
8672 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8673 uap->path, ctx);
8674 error = namei(&nd);
8675 if (error)
8676 return (error);
8677 vp = nd.ni_vp;
8678
8679 nameidone(&nd);
8680
8681 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8682 error = ENOTSUP;
8683 goto out;
8684 }
8685
8686 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8687 error = EBUSY;
8688 goto out;
8689 }
8690
8691#if CONFIG_MACF
8692 error = mac_vnode_check_revoke(ctx, vp);
8693 if (error)
8694 goto out;
8695#endif
8696
8697 VATTR_INIT(&va);
8698 VATTR_WANTED(&va, va_uid);
8699 if ((error = vnode_getattr(vp, &va, ctx)))
8700 goto out;
8701 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8702 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
8703 goto out;
8704 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
8705 VNOP_REVOKE(vp, REVOKEALL, ctx);
8706out:
8707 vnode_put(vp);
8708 return (error);
8709}
8710
8711
8712/*
8713 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8714 * The following system calls are designed to support features
8715 * which are specific to the HFS & HFS Plus volume formats
8716 */
8717
8718
8719/*
8720 * Obtain attribute information on objects in a directory while enumerating
8721 * the directory.
8722 */
8723/* ARGSUSED */
8724int
8725getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
8726{
8727 vnode_t vp;
8728 struct fileproc *fp;
8729 uio_t auio = NULL;
8730 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8731 uint32_t count = 0, savecount = 0;
8732 uint32_t newstate = 0;
8733 int error, eofflag;
8734 uint32_t loff = 0;
8735 struct attrlist attributelist;
8736 vfs_context_t ctx = vfs_context_current();
8737 int fd = uap->fd;
8738 char uio_buf[ UIO_SIZEOF(1) ];
8739 kauth_action_t action;
8740
8741 AUDIT_ARG(fd, fd);
8742
8743 /* Get the attributes into kernel space */
8744 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
8745 return(error);
8746 }
8747 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8748 return(error);
8749 }
8750 savecount = count;
8751 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
8752 return (error);
8753 }
8754 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8755 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8756 error = EBADF;
8757 goto out;
8758 }
8759
8760
8761#if CONFIG_MACF
8762 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8763 fp->f_fglob);
8764 if (error)
8765 goto out;
8766#endif
8767
8768
8769 if ( (error = vnode_getwithref(vp)) )
8770 goto out;
8771
8772 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8773
8774unionread:
8775 if (vp->v_type != VDIR) {
8776 (void)vnode_put(vp);
8777 error = EINVAL;
8778 goto out;
8779 }
8780
8781#if CONFIG_MACF
8782 error = mac_vnode_check_readdir(ctx, vp);
8783 if (error != 0) {
8784 (void)vnode_put(vp);
8785 goto out;
8786 }
8787#endif /* MAC */
8788
8789 /* set up the uio structure which will contain the users return buffer */
8790 loff = fp->f_fglob->fg_offset;
8791 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8792 uio_addiov(auio, uap->buffer, uap->buffersize);
8793
8794 /*
8795 * If the only item requested is file names, we can let that past with
8796 * just LIST_DIRECTORY. If they want any other attributes, that means
8797 * they need SEARCH as well.
8798 */
8799 action = KAUTH_VNODE_LIST_DIRECTORY;
8800 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8801 attributelist.fileattr || attributelist.dirattr)
8802 action |= KAUTH_VNODE_SEARCH;
8803
8804 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8805
8806 /* Believe it or not, uap->options only has 32-bits of valid
8807 * info, so truncate before extending again */
8808
8809 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8810 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8811 }
8812
8813 if (error) {
8814 (void) vnode_put(vp);
8815 goto out;
8816 }
8817
8818 /*
8819 * If we've got the last entry of a directory in a union mount
8820 * then reset the eofflag and pretend there's still more to come.
8821 * The next call will again set eofflag and the buffer will be empty,
8822 * so traverse to the underlying directory and do the directory
8823 * read there.
8824 */
8825 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8826 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8827 eofflag = 0;
8828 } else { // Empty buffer
8829 struct vnode *tvp = vp;
8830 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8831 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8832 fp->f_fglob->fg_data = (caddr_t) vp;
8833 fp->f_fglob->fg_offset = 0; // reset index for new dir
8834 count = savecount;
8835 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8836 vnode_put(tvp);
8837 goto unionread;
8838 }
8839 vp = tvp;
8840 }
8841 }
8842
8843 (void)vnode_put(vp);
8844
8845 if (error)
8846 goto out;
8847 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8848
8849 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8850 goto out;
8851 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8852 goto out;
8853 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8854 goto out;
8855
8856 *retval = eofflag; /* similar to getdirentries */
8857 error = 0;
8858out:
8859 file_drop(fd);
8860 return (error); /* return error earlier, an retval of 0 or 1 now */
8861
8862} /* end of getdirentriesattr system call */
8863
8864/*
8865* Exchange data between two files
8866*/
8867
8868/* ARGSUSED */
8869int
8870exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8871{
8872
8873 struct nameidata fnd, snd;
8874 vfs_context_t ctx = vfs_context_current();
8875 vnode_t fvp;
8876 vnode_t svp;
8877 int error;
8878 u_int32_t nameiflags;
8879 char *fpath = NULL;
8880 char *spath = NULL;
8881 int flen=0, slen=0;
8882 int from_truncated=0, to_truncated=0;
8883#if CONFIG_FSE
8884 fse_info f_finfo, s_finfo;
8885#endif
8886
8887 nameiflags = 0;
8888 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8889
8890 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8891 UIO_USERSPACE, uap->path1, ctx);
8892
8893 error = namei(&fnd);
8894 if (error)
8895 goto out2;
8896
8897 nameidone(&fnd);
8898 fvp = fnd.ni_vp;
8899
8900 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8901 UIO_USERSPACE, uap->path2, ctx);
8902
8903 error = namei(&snd);
8904 if (error) {
8905 vnode_put(fvp);
8906 goto out2;
8907 }
8908 nameidone(&snd);
8909 svp = snd.ni_vp;
8910
8911 /*
8912 * if the files are the same, return an inval error
8913 */
8914 if (svp == fvp) {
8915 error = EINVAL;
8916 goto out;
8917 }
8918
8919 /*
8920 * if the files are on different volumes, return an error
8921 */
8922 if (svp->v_mount != fvp->v_mount) {
8923 error = EXDEV;
8924 goto out;
8925 }
8926
8927 /* If they're not files, return an error */
8928 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8929 error = EINVAL;
8930 goto out;
8931 }
8932
8933#if CONFIG_MACF
8934 error = mac_vnode_check_exchangedata(ctx,
8935 fvp, svp);
8936 if (error)
8937 goto out;
8938#endif
8939 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8940 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8941 goto out;
8942
8943 if (
8944#if CONFIG_FSE
8945 need_fsevent(FSE_EXCHANGE, fvp) ||
8946#endif
8947 kauth_authorize_fileop_has_listeners()) {
8948 GET_PATH(fpath);
8949 GET_PATH(spath);
8950 if (fpath == NULL || spath == NULL) {
8951 error = ENOMEM;
8952 goto out;
8953 }
8954
8955 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8956 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8957
8958#if CONFIG_FSE
8959 get_fse_info(fvp, &f_finfo, ctx);
8960 get_fse_info(svp, &s_finfo, ctx);
8961 if (from_truncated || to_truncated) {
8962 // set it here since only the f_finfo gets reported up to user space
8963 f_finfo.mode |= FSE_TRUNCATED_PATH;
8964 }
8965#endif
8966 }
8967 /* Ok, make the call */
8968 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8969
8970 if (error == 0) {
8971 const char *tmpname;
8972
8973 if (fpath != NULL && spath != NULL) {
8974 /* call out to allow 3rd party notification of exchangedata.
8975 * Ignore result of kauth_authorize_fileop call.
8976 */
8977 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8978 (uintptr_t)fpath, (uintptr_t)spath);
8979 }
8980 name_cache_lock();
8981
8982 tmpname = fvp->v_name;
8983 fvp->v_name = svp->v_name;
8984 svp->v_name = tmpname;
8985
8986 if (fvp->v_parent != svp->v_parent) {
8987 vnode_t tmp;
8988
8989 tmp = fvp->v_parent;
8990 fvp->v_parent = svp->v_parent;
8991 svp->v_parent = tmp;
8992 }
8993 name_cache_unlock();
8994
8995#if CONFIG_FSE
8996 if (fpath != NULL && spath != NULL) {
8997 add_fsevent(FSE_EXCHANGE, ctx,
8998 FSE_ARG_STRING, flen, fpath,
8999 FSE_ARG_FINFO, &f_finfo,
9000 FSE_ARG_STRING, slen, spath,
9001 FSE_ARG_FINFO, &s_finfo,
9002 FSE_ARG_DONE);
9003 }
9004#endif
9005 }
9006
9007out:
9008 if (fpath != NULL)
9009 RELEASE_PATH(fpath);
9010 if (spath != NULL)
9011 RELEASE_PATH(spath);
9012 vnode_put(svp);
9013 vnode_put(fvp);
9014out2:
9015 return (error);
9016}
9017
9018/*
9019 * Return (in MB) the amount of freespace on the given vnode's volume.
9020 */
9021uint32_t freespace_mb(vnode_t vp);
9022
9023uint32_t
9024freespace_mb(vnode_t vp)
9025{
9026 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
9027 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
9028 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
9029}
9030
9031#if CONFIG_SEARCHFS
9032
9033/* ARGSUSED */
9034
9035int
9036searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
9037{
9038 vnode_t vp, tvp;
9039 int i, error=0;
9040 int fserror = 0;
9041 struct nameidata nd;
9042 struct user64_fssearchblock searchblock;
9043 struct searchstate *state;
9044 struct attrlist *returnattrs;
9045 struct timeval timelimit;
9046 void *searchparams1,*searchparams2;
9047 uio_t auio = NULL;
9048 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9049 uint32_t nummatches;
9050 int mallocsize;
9051 uint32_t nameiflags;
9052 vfs_context_t ctx = vfs_context_current();
9053 char uio_buf[ UIO_SIZEOF(1) ];
9054
9055 /* Start by copying in fsearchblock parameter list */
9056 if (IS_64BIT_PROCESS(p)) {
9057 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
9058 timelimit.tv_sec = searchblock.timelimit.tv_sec;
9059 timelimit.tv_usec = searchblock.timelimit.tv_usec;
9060 }
9061 else {
9062 struct user32_fssearchblock tmp_searchblock;
9063
9064 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
9065 // munge into 64-bit version
9066 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
9067 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
9068 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
9069 searchblock.maxmatches = tmp_searchblock.maxmatches;
9070 /*
9071 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9072 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9073 */
9074 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9075 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
9076 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9077 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9078 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9079 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9080 searchblock.searchattrs = tmp_searchblock.searchattrs;
9081 }
9082 if (error)
9083 return(error);
9084
9085 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9086 */
9087 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
9088 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
9089 return(EINVAL);
9090
9091 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9092 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9093 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9094 /* block. */
9095 /* */
9096 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9097 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9098 /* assumes the size is still 556 bytes it will continue to work */
9099
9100 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
9101 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
9102
9103 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9104
9105 /* Now set up the various pointers to the correct place in our newly allocated memory */
9106
9107 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9108 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9109 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
9110
9111 /* Now copy in the stuff given our local variables. */
9112
9113 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
9114 goto freeandexit;
9115
9116 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
9117 goto freeandexit;
9118
9119 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
9120 goto freeandexit;
9121
9122 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
9123 goto freeandexit;
9124
9125 /*
9126 * When searching a union mount, need to set the
9127 * start flag at the first call on each layer to
9128 * reset state for the new volume.
9129 */
9130 if (uap->options & SRCHFS_START)
9131 state->ss_union_layer = 0;
9132 else
9133 uap->options |= state->ss_union_flags;
9134 state->ss_union_flags = 0;
9135
9136 /*
9137 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9138 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
9139 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9140 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
9141 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9142 */
9143
9144 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
9145 attrreference_t* string_ref;
9146 u_int32_t* start_length;
9147 user64_size_t param_length;
9148
9149 /* validate searchparams1 */
9150 param_length = searchblock.sizeofsearchparams1;
9151 /* skip the word that specifies length of the buffer */
9152 start_length= (u_int32_t*) searchparams1;
9153 start_length= start_length+1;
9154 string_ref= (attrreference_t*) start_length;
9155
9156 /* ensure no negative offsets or too big offsets */
9157 if (string_ref->attr_dataoffset < 0 ) {
9158 error = EINVAL;
9159 goto freeandexit;
9160 }
9161 if (string_ref->attr_length > MAXPATHLEN) {
9162 error = EINVAL;
9163 goto freeandexit;
9164 }
9165
9166 /* Check for pointer overflow in the string ref */
9167 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
9168 error = EINVAL;
9169 goto freeandexit;
9170 }
9171
9172 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
9173 error = EINVAL;
9174 goto freeandexit;
9175 }
9176 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
9177 error = EINVAL;
9178 goto freeandexit;
9179 }
9180 }
9181
9182 /* set up the uio structure which will contain the users return buffer */
9183 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9184 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
9185
9186 nameiflags = 0;
9187 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
9188 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
9189 UIO_USERSPACE, uap->path, ctx);
9190
9191 error = namei(&nd);
9192 if (error)
9193 goto freeandexit;
9194 vp = nd.ni_vp;
9195 nameidone(&nd);
9196
9197 /*
9198 * Switch to the root vnode for the volume
9199 */
9200 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
9201 vnode_put(vp);
9202 if (error)
9203 goto freeandexit;
9204 vp = tvp;
9205
9206 /*
9207 * If it's a union mount, the path lookup takes
9208 * us to the top layer. But we may need to descend
9209 * to a lower layer. For non-union mounts the layer
9210 * is always zero.
9211 */
9212 for (i = 0; i < (int) state->ss_union_layer; i++) {
9213 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
9214 break;
9215 tvp = vp;
9216 vp = vp->v_mount->mnt_vnodecovered;
9217 if (vp == NULL) {
9218 vnode_put(tvp);
9219 error = ENOENT;
9220 goto freeandexit;
9221 }
9222 error = vnode_getwithref(vp);
9223 vnode_put(tvp);
9224 if (error)
9225 goto freeandexit;
9226 }
9227
9228#if CONFIG_MACF
9229 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9230 if (error) {
9231 vnode_put(vp);
9232 goto freeandexit;
9233 }
9234#endif
9235
9236
9237 /*
9238 * If searchblock.maxmatches == 0, then skip the search. This has happened
9239 * before and sometimes the underlying code doesnt deal with it well.
9240 */
9241 if (searchblock.maxmatches == 0) {
9242 nummatches = 0;
9243 goto saveandexit;
9244 }
9245
9246 /*
9247 * Allright, we have everything we need, so lets make that call.
9248 *
9249 * We keep special track of the return value from the file system:
9250 * EAGAIN is an acceptable error condition that shouldn't keep us
9251 * from copying out any results...
9252 */
9253
9254 fserror = VNOP_SEARCHFS(vp,
9255 searchparams1,
9256 searchparams2,
9257 &searchblock.searchattrs,
9258 (u_long)searchblock.maxmatches,
9259 &timelimit,
9260 returnattrs,
9261 &nummatches,
9262 (u_long)uap->scriptcode,
9263 (u_long)uap->options,
9264 auio,
9265 (struct searchstate *) &state->ss_fsstate,
9266 ctx);
9267
9268 /*
9269 * If it's a union mount we need to be called again
9270 * to search the mounted-on filesystem.
9271 */
9272 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9273 state->ss_union_flags = SRCHFS_START;
9274 state->ss_union_layer++; // search next layer down
9275 fserror = EAGAIN;
9276 }
9277
9278saveandexit:
9279
9280 vnode_put(vp);
9281
9282 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9283 search state. Everything was already put into he return buffer by the vop call. */
9284
9285 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9286 goto freeandexit;
9287
9288 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
9289 goto freeandexit;
9290
9291 error = fserror;
9292
9293freeandexit:
9294
9295 FREE(searchparams1,M_TEMP);
9296
9297 return(error);
9298
9299
9300} /* end of searchfs system call */
9301
9302#else /* CONFIG_SEARCHFS */
9303
9304int
9305searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9306{
9307 return (ENOTSUP);
9308}
9309
9310#endif /* CONFIG_SEARCHFS */
9311
9312
9313lck_grp_attr_t * nspace_group_attr;
9314lck_attr_t * nspace_lock_attr;
9315lck_grp_t * nspace_mutex_group;
9316
9317lck_mtx_t nspace_handler_lock;
9318lck_mtx_t nspace_handler_exclusion_lock;
9319
9320time_t snapshot_timestamp=0;
9321int nspace_allow_virtual_devs=0;
9322
9323void nspace_handler_init(void);
9324
9325typedef struct nspace_item_info {
9326 struct vnode *vp;
9327 void *arg;
9328 uint64_t op;
9329 uint32_t vid;
9330 uint32_t flags;
9331 uint32_t token;
9332 uint32_t refcount;
9333} nspace_item_info;
9334
9335#define MAX_NSPACE_ITEMS 128
9336nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9337uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9338uint32_t nspace_token_id=0;
9339uint32_t nspace_handler_timeout = 15; // seconds
9340
9341#define NSPACE_ITEM_NEW 0x0001
9342#define NSPACE_ITEM_PROCESSING 0x0002
9343#define NSPACE_ITEM_DEAD 0x0004
9344#define NSPACE_ITEM_CANCELLED 0x0008
9345#define NSPACE_ITEM_DONE 0x0010
9346#define NSPACE_ITEM_RESET_TIMER 0x0020
9347
9348#define NSPACE_ITEM_NSPACE_EVENT 0x0040
9349#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9350
9351#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9352
9353//#pragma optimization_level 0
9354
9355typedef enum {
9356 NSPACE_HANDLER_NSPACE = 0,
9357 NSPACE_HANDLER_SNAPSHOT = 1,
9358
9359 NSPACE_HANDLER_COUNT,
9360} nspace_type_t;
9361
9362typedef struct {
9363 uint64_t handler_tid;
9364 struct proc *handler_proc;
9365 int handler_busy;
9366} nspace_handler_t;
9367
9368nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9369
9370/* namespace fsctl functions */
9371static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9372static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9373static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9374static nspace_type_t nspace_type_for_op(uint64_t op);
9375static int nspace_is_special_process(struct proc *proc);
9376static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9377static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9378static int validate_namespace_args (int is64bit, int size);
9379static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9380
9381
9382static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9383{
9384 switch(nspace_type) {
9385 case NSPACE_HANDLER_NSPACE:
9386 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9387 case NSPACE_HANDLER_SNAPSHOT:
9388 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
9389 default:
9390 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9391 return 0;
9392 }
9393}
9394
9395static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9396{
9397 switch(nspace_type) {
9398 case NSPACE_HANDLER_NSPACE:
9399 return NSPACE_ITEM_NSPACE_EVENT;
9400 case NSPACE_HANDLER_SNAPSHOT:
9401 return NSPACE_ITEM_SNAPSHOT_EVENT;
9402 default:
9403 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9404 return 0;
9405 }
9406}
9407
9408static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9409{
9410 switch(nspace_type) {
9411 case NSPACE_HANDLER_NSPACE:
9412 return FREAD | FWRITE | O_EVTONLY;
9413 case NSPACE_HANDLER_SNAPSHOT:
9414 return FREAD | O_EVTONLY;
9415 default:
9416 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9417 return 0;
9418 }
9419}
9420
9421static inline nspace_type_t nspace_type_for_op(uint64_t op)
9422{
9423 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9424 case NAMESPACE_HANDLER_NSPACE_EVENT:
9425 return NSPACE_HANDLER_NSPACE;
9426 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9427 return NSPACE_HANDLER_SNAPSHOT;
9428 default:
9429 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9430 return NSPACE_HANDLER_NSPACE;
9431 }
9432}
9433
9434static inline int nspace_is_special_process(struct proc *proc)
9435{
9436 int i;
9437 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9438 if (proc == nspace_handlers[i].handler_proc)
9439 return 1;
9440 }
9441 return 0;
9442}
9443
9444void
9445nspace_handler_init(void)
9446{
9447 nspace_lock_attr = lck_attr_alloc_init();
9448 nspace_group_attr = lck_grp_attr_alloc_init();
9449 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9450 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9451 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9452 memset(&nspace_items[0], 0, sizeof(nspace_items));
9453}
9454
9455void
9456nspace_proc_exit(struct proc *p)
9457{
9458 int i, event_mask = 0;
9459
9460 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9461 if (p == nspace_handlers[i].handler_proc) {
9462 event_mask |= nspace_item_flags_for_type(i);
9463 nspace_handlers[i].handler_tid = 0;
9464 nspace_handlers[i].handler_proc = NULL;
9465 }
9466 }
9467
9468 if (event_mask == 0) {
9469 return;
9470 }
9471
9472 lck_mtx_lock(&nspace_handler_lock);
9473 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9474 // if this process was the snapshot handler, zero snapshot_timeout
9475 snapshot_timestamp = 0;
9476 }
9477
9478 //
9479 // unblock anyone that's waiting for the handler that died
9480 //
9481 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9482 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9483
9484 if ( nspace_items[i].flags & event_mask ) {
9485
9486 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9487 vnode_lock_spin(nspace_items[i].vp);
9488 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9489 vnode_unlock(nspace_items[i].vp);
9490 }
9491 nspace_items[i].vp = NULL;
9492 nspace_items[i].vid = 0;
9493 nspace_items[i].flags = NSPACE_ITEM_DONE;
9494 nspace_items[i].token = 0;
9495
9496 wakeup((caddr_t)&(nspace_items[i].vp));
9497 }
9498 }
9499 }
9500
9501 wakeup((caddr_t)&nspace_item_idx);
9502 lck_mtx_unlock(&nspace_handler_lock);
9503}
9504
9505
9506int
9507resolve_nspace_item(struct vnode *vp, uint64_t op)
9508{
9509 return resolve_nspace_item_ext(vp, op, NULL);
9510}
9511
9512int
9513resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9514{
9515 int i, error, keep_waiting;
9516 struct timespec ts;
9517 nspace_type_t nspace_type = nspace_type_for_op(op);
9518
9519 // only allow namespace events on regular files, directories and symlinks.
9520 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9521 return 0;
9522 }
9523
9524 //
9525 // if this is a snapshot event and the vnode is on a
9526 // disk image just pretend nothing happened since any
9527 // change to the disk image will cause the disk image
9528 // itself to get backed up and this avoids multi-way
9529 // deadlocks between the snapshot handler and the ever
9530 // popular diskimages-helper process. the variable
9531 // nspace_allow_virtual_devs allows this behavior to
9532 // be overridden (for use by the Mobile TimeMachine
9533 // testing infrastructure which uses disk images)
9534 //
9535 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9536 && (vp->v_mount != NULL)
9537 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9538 && !nspace_allow_virtual_devs) {
9539
9540 return 0;
9541 }
9542
9543 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9544 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9545 return 0;
9546 }
9547
9548 if (nspace_is_special_process(current_proc())) {
9549 return EDEADLK;
9550 }
9551
9552 lck_mtx_lock(&nspace_handler_lock);
9553
9554retry:
9555 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9556 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9557 break;
9558 }
9559 }
9560
9561 if (i >= MAX_NSPACE_ITEMS) {
9562 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9563 if (nspace_items[i].flags == 0) {
9564 break;
9565 }
9566 }
9567 } else {
9568 nspace_items[i].refcount++;
9569 }
9570
9571 if (i >= MAX_NSPACE_ITEMS) {
9572 ts.tv_sec = nspace_handler_timeout;
9573 ts.tv_nsec = 0;
9574
9575 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9576 if (error == 0) {
9577 // an entry got free'd up, go see if we can get a slot
9578 goto retry;
9579 } else {
9580 lck_mtx_unlock(&nspace_handler_lock);
9581 return error;
9582 }
9583 }
9584
9585 //
9586 // if it didn't already exist, add it. if it did exist
9587 // we'll get woken up when someone does a wakeup() on
9588 // the slot in the nspace_items table.
9589 //
9590 if (vp != nspace_items[i].vp) {
9591 nspace_items[i].vp = vp;
9592 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
9593 nspace_items[i].op = op;
9594 nspace_items[i].vid = vnode_vid(vp);
9595 nspace_items[i].flags = NSPACE_ITEM_NEW;
9596 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9597 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9598 if (arg) {
9599 vnode_lock_spin(vp);
9600 vp->v_flag |= VNEEDSSNAPSHOT;
9601 vnode_unlock(vp);
9602 }
9603 }
9604
9605 nspace_items[i].token = 0;
9606 nspace_items[i].refcount = 1;
9607
9608 wakeup((caddr_t)&nspace_item_idx);
9609 }
9610
9611 //
9612 // Now go to sleep until the handler does a wakeup on this
9613 // slot in the nspace_items table (or we timeout).
9614 //
9615 keep_waiting = 1;
9616 while(keep_waiting) {
9617 ts.tv_sec = nspace_handler_timeout;
9618 ts.tv_nsec = 0;
9619 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9620
9621 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9622 error = 0;
9623 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9624 error = nspace_items[i].token;
9625 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9626 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9627 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9628 continue;
9629 } else {
9630 error = ETIMEDOUT;
9631 }
9632 } else if (error == 0) {
9633 // hmmm, why did we get woken up?
9634 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9635 nspace_items[i].token);
9636 }
9637
9638 if (--nspace_items[i].refcount == 0) {
9639 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9640 nspace_items[i].arg = NULL;
9641 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9642 nspace_items[i].flags = 0; // this clears it for re-use
9643 }
9644 wakeup(&nspace_token_id);
9645 keep_waiting = 0;
9646 }
9647
9648 lck_mtx_unlock(&nspace_handler_lock);
9649
9650 return error;
9651}
9652
9653int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
9654{
9655 int snapshot_error = 0;
9656
9657 if (vp == NULL) {
9658 return 0;
9659 }
9660
9661 /* Swap files are special; skip them */
9662 if (vnode_isswap(vp)) {
9663 return 0;
9664 }
9665
9666 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9667 // the change time is within this epoch
9668 int error;
9669
9670 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9671 if (error == EDEADLK) {
9672 snapshot_error = 0;
9673 } else if (error) {
9674 if (error == EAGAIN) {
9675 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9676 } else if (error == EINTR) {
9677 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9678 snapshot_error = EINTR;
9679 }
9680 }
9681 }
9682
9683 return snapshot_error;
9684}
9685
9686int
9687get_nspace_item_status(struct vnode *vp, int32_t *status)
9688{
9689 int i;
9690
9691 lck_mtx_lock(&nspace_handler_lock);
9692 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9693 if (nspace_items[i].vp == vp) {
9694 break;
9695 }
9696 }
9697
9698 if (i >= MAX_NSPACE_ITEMS) {
9699 lck_mtx_unlock(&nspace_handler_lock);
9700 return ENOENT;
9701 }
9702
9703 *status = nspace_items[i].flags;
9704 lck_mtx_unlock(&nspace_handler_lock);
9705 return 0;
9706}
9707
9708
9709#if 0
9710static int
9711build_volfs_path(struct vnode *vp, char *path, int *len)
9712{
9713 struct vnode_attr va;
9714 int ret;
9715
9716 VATTR_INIT(&va);
9717 VATTR_WANTED(&va, va_fsid);
9718 VATTR_WANTED(&va, va_fileid);
9719
9720 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9721 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9722 ret = -1;
9723 } else {
9724 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9725 ret = 0;
9726 }
9727
9728 return ret;
9729}
9730#endif
9731
9732//
9733// Note: this function does NOT check permissions on all of the
9734// parent directories leading to this vnode. It should only be
9735// called on behalf of a root process. Otherwise a process may
9736// get access to a file because the file itself is readable even
9737// though its parent directories would prevent access.
9738//
9739static int
9740vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9741{
9742 int error, action;
9743
9744 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9745 return error;
9746 }
9747
9748#if CONFIG_MACF
9749 error = mac_vnode_check_open(ctx, vp, fmode);
9750 if (error)
9751 return error;
9752#endif
9753
9754 /* compute action to be authorized */
9755 action = 0;
9756 if (fmode & FREAD) {
9757 action |= KAUTH_VNODE_READ_DATA;
9758 }
9759 if (fmode & (FWRITE | O_TRUNC)) {
9760 /*
9761 * If we are writing, appending, and not truncating,
9762 * indicate that we are appending so that if the
9763 * UF_APPEND or SF_APPEND bits are set, we do not deny
9764 * the open.
9765 */
9766 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9767 action |= KAUTH_VNODE_APPEND_DATA;
9768 } else {
9769 action |= KAUTH_VNODE_WRITE_DATA;
9770 }
9771 }
9772
9773 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9774 return error;
9775
9776
9777 //
9778 // if the vnode is tagged VOPENEVT and the current process
9779 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9780 // flag to the open mode so that this open won't count against
9781 // the vnode when carbon delete() does a vnode_isinuse() to see
9782 // if a file is currently in use. this allows spotlight
9783 // importers to not interfere with carbon apps that depend on
9784 // the no-delete-if-busy semantics of carbon delete().
9785 //
9786 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9787 fmode |= O_EVTONLY;
9788 }
9789
9790 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9791 return error;
9792 }
9793 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9794 VNOP_CLOSE(vp, fmode, ctx);
9795 return error;
9796 }
9797
9798 /* Call out to allow 3rd party notification of open.
9799 * Ignore result of kauth_authorize_fileop call.
9800 */
9801#if CONFIG_MACF
9802 mac_vnode_notify_open(ctx, vp, fmode);
9803#endif
9804 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
9805 (uintptr_t)vp, 0);
9806
9807
9808 return 0;
9809}
9810
9811static int
9812wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
9813{
9814 int i;
9815 int error = 0;
9816 int unblock = 0;
9817 task_t curtask;
9818
9819 lck_mtx_lock(&nspace_handler_exclusion_lock);
9820 if (nspace_handlers[nspace_type].handler_busy) {
9821 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9822 return EBUSY;
9823 }
9824
9825 nspace_handlers[nspace_type].handler_busy = 1;
9826 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9827
9828 /*
9829 * Any process that gets here will be one of the namespace handlers.
9830 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9831 * as we can cause deadlocks to occur, because the namespace handler may prevent
9832 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9833 * process.
9834 */
9835 curtask = current_task();
9836 bsd_set_dependency_capable (curtask);
9837
9838 lck_mtx_lock(&nspace_handler_lock);
9839 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9840 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9841 nspace_handlers[nspace_type].handler_proc = current_proc();
9842 }
9843
9844 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9845 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9846 error = EINVAL;
9847 }
9848
9849 while (error == 0) {
9850
9851 /* Try to find matching namespace item */
9852 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
9853 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9854 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9855 break;
9856 }
9857 }
9858 }
9859
9860 if (i >= MAX_NSPACE_ITEMS) {
9861 /* Nothing is there yet. Wait for wake up and retry */
9862 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9863 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9864 /* Prevent infinite loop if snapshot handler exited */
9865 error = EINVAL;
9866 break;
9867 }
9868 continue;
9869 }
9870
9871 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9872 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9873 nspace_items[i].token = ++nspace_token_id;
9874
9875 assert(nspace_items[i].vp);
9876 struct fileproc *fp;
9877 int32_t indx;
9878 int32_t fmode;
9879 struct proc *p = current_proc();
9880 vfs_context_t ctx = vfs_context_current();
9881 struct vnode_attr va;
9882 bool vn_get_succsessful = false;
9883 bool vn_open_successful = false;
9884 bool fp_alloc_successful = false;
9885
9886 /*
9887 * Use vnode pointer to acquire a file descriptor for
9888 * hand-off to userland
9889 */
9890 fmode = nspace_open_flags_for_type(nspace_type);
9891 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9892 if (error) goto cleanup;
9893 vn_get_succsessful = true;
9894
9895 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9896 if (error) goto cleanup;
9897 vn_open_successful = true;
9898
9899 error = falloc(p, &fp, &indx, ctx);
9900 if (error) goto cleanup;
9901 fp_alloc_successful = true;
9902
9903 fp->f_fglob->fg_flag = fmode;
9904 fp->f_fglob->fg_ops = &vnops;
9905 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9906
9907 proc_fdlock(p);
9908 procfdtbl_releasefd(p, indx, NULL);
9909 fp_drop(p, indx, fp, 1);
9910 proc_fdunlock(p);
9911
9912 /*
9913 * All variants of the namespace handler struct support these three fields:
9914 * token, flags, and the FD pointer
9915 */
9916 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9917 if (error) goto cleanup;
9918 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9919 if (error) goto cleanup;
9920 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9921 if (error) goto cleanup;
9922
9923 /*
9924 * Handle optional fields:
9925 * extended version support an info ptr (offset, length), and the
9926 *
9927 * namedata version supports a unique per-link object ID
9928 *
9929 */
9930 if (nhd->infoptr) {
9931 uio_t uio = (uio_t)nspace_items[i].arg;
9932 uint64_t u_offset, u_length;
9933
9934 if (uio) {
9935 u_offset = uio_offset(uio);
9936 u_length = uio_resid(uio);
9937 } else {
9938 u_offset = 0;
9939 u_length = 0;
9940 }
9941 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9942 if (error) goto cleanup;
9943 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9944 if (error) goto cleanup;
9945 }
9946
9947 if (nhd->objid) {
9948 VATTR_INIT(&va);
9949 VATTR_WANTED(&va, va_linkid);
9950 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9951 if (error) goto cleanup;
9952
9953 uint64_t linkid = 0;
9954 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9955 linkid = (uint64_t)va.va_linkid;
9956 }
9957 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9958 }
9959cleanup:
9960 if (error) {
9961 if (fp_alloc_successful) fp_free(p, indx, fp);
9962 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9963 unblock = 1;
9964 }
9965
9966 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9967
9968 break;
9969 }
9970
9971 if (unblock) {
9972 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9973 vnode_lock_spin(nspace_items[i].vp);
9974 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9975 vnode_unlock(nspace_items[i].vp);
9976 }
9977 nspace_items[i].vp = NULL;
9978 nspace_items[i].vid = 0;
9979 nspace_items[i].flags = NSPACE_ITEM_DONE;
9980 nspace_items[i].token = 0;
9981
9982 wakeup((caddr_t)&(nspace_items[i].vp));
9983 }
9984
9985 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9986 // just go through every snapshot event and unblock it immediately.
9987 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9988 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
9989 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9990 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9991 nspace_items[i].vp = NULL;
9992 nspace_items[i].vid = 0;
9993 nspace_items[i].flags = NSPACE_ITEM_DONE;
9994 nspace_items[i].token = 0;
9995
9996 wakeup((caddr_t)&(nspace_items[i].vp));
9997 }
9998 }
9999 }
10000 }
10001 }
10002
10003 lck_mtx_unlock(&nspace_handler_lock);
10004
10005 lck_mtx_lock(&nspace_handler_exclusion_lock);
10006 nspace_handlers[nspace_type].handler_busy = 0;
10007 lck_mtx_unlock(&nspace_handler_exclusion_lock);
10008
10009 return error;
10010}
10011
10012static inline int validate_namespace_args (int is64bit, int size) {
10013
10014 if (is64bit) {
10015 /* Must be one of these */
10016 if (size == sizeof(user64_namespace_handler_info)) {
10017 goto sizeok;
10018 }
10019 if (size == sizeof(user64_namespace_handler_info_ext)) {
10020 goto sizeok;
10021 }
10022 if (size == sizeof(user64_namespace_handler_data)) {
10023 goto sizeok;
10024 }
10025 return EINVAL;
10026 }
10027 else {
10028 /* 32 bit -- must be one of these */
10029 if (size == sizeof(user32_namespace_handler_info)) {
10030 goto sizeok;
10031 }
10032 if (size == sizeof(user32_namespace_handler_info_ext)) {
10033 goto sizeok;
10034 }
10035 if (size == sizeof(user32_namespace_handler_data)) {
10036 goto sizeok;
10037 }
10038 return EINVAL;
10039 }
10040
10041sizeok:
10042
10043 return 0;
10044
10045}
10046
10047static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
10048{
10049 int error = 0;
10050 namespace_handler_data nhd;
10051
10052 bzero (&nhd, sizeof(namespace_handler_data));
10053
10054 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10055 return error;
10056 }
10057
10058 error = validate_namespace_args (is64bit, size);
10059 if (error) {
10060 return error;
10061 }
10062
10063 /* Copy in the userland pointers into our kernel-only struct */
10064
10065 if (is64bit) {
10066 /* 64 bit userland structures */
10067 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
10068 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
10069 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
10070
10071 /* If the size is greater than the standard info struct, add in extra fields */
10072 if (size > (sizeof(user64_namespace_handler_info))) {
10073 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
10074 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
10075 }
10076 if (size == (sizeof(user64_namespace_handler_data))) {
10077 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
10078 }
10079 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10080 }
10081 }
10082 else {
10083 /* 32 bit userland structures */
10084 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
10085 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
10086 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
10087
10088 if (size > (sizeof(user32_namespace_handler_info))) {
10089 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
10090 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
10091 }
10092 if (size == (sizeof(user32_namespace_handler_data))) {
10093 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
10094 }
10095 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10096 }
10097 }
10098
10099 return wait_for_namespace_event(&nhd, nspace_type);
10100}
10101
10102static unsigned long
10103fsctl_bogus_command_compat(unsigned long cmd)
10104{
10105
10106 switch (cmd) {
10107 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10108 return (FSIOC_SYNC_VOLUME);
10109 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10110 return (FSIOC_ROUTEFS_SETROUTEID);
10111 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10112 return (FSIOC_SET_PACKAGE_EXTS);
10113 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET):
10114 return (FSIOC_NAMESPACE_HANDLER_GET);
10115 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET):
10116 return (FSIOC_OLD_SNAPSHOT_HANDLER_GET);
10117 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT):
10118 return (FSIOC_SNAPSHOT_HANDLER_GET_EXT);
10119 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE):
10120 return (FSIOC_NAMESPACE_HANDLER_UPDATE);
10121 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK):
10122 return (FSIOC_NAMESPACE_HANDLER_UNBLOCK);
10123 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL):
10124 return (FSIOC_NAMESPACE_HANDLER_CANCEL);
10125 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME):
10126 return (FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME);
10127 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS):
10128 return (FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS);
10129 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10130 return (FSIOC_SET_FSTYPENAME_OVERRIDE);
10131 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10132 return (DISK_CONDITIONER_IOC_GET);
10133 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10134 return (DISK_CONDITIONER_IOC_SET);
10135 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10136 return (FSIOC_FIOSEEKHOLE);
10137 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10138 return (FSIOC_FIOSEEKDATA);
10139 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10140 return (SPOTLIGHT_IOC_GET_MOUNT_TIME);
10141 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10142 return (SPOTLIGHT_IOC_GET_LAST_MTIME);
10143 }
10144
10145 return (cmd);
10146}
10147
10148/*
10149 * Make a filesystem-specific control call:
10150 */
10151/* ARGSUSED */
10152static int
10153fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
10154{
10155 int error=0;
10156 boolean_t is64bit;
10157 u_int size;
10158#define STK_PARAMS 128
10159 char stkbuf[STK_PARAMS] = {0};
10160 caddr_t data, memp;
10161 vnode_t vp = *arg_vp;
10162
10163 cmd = fsctl_bogus_command_compat(cmd);
10164
10165 size = IOCPARM_LEN(cmd);
10166 if (size > IOCPARM_MAX) return (EINVAL);
10167
10168 is64bit = proc_is64bit(p);
10169
10170 memp = NULL;
10171
10172 if (size > sizeof (stkbuf)) {
10173 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
10174 data = memp;
10175 } else {
10176 data = &stkbuf[0];
10177 };
10178
10179 if (cmd & IOC_IN) {
10180 if (size) {
10181 error = copyin(udata, data, size);
10182 if (error) {
10183 if (memp) {
10184 kfree (memp, size);
10185 }
10186 return error;
10187 }
10188 } else {
10189 if (is64bit) {
10190 *(user_addr_t *)data = udata;
10191 }
10192 else {
10193 *(uint32_t *)data = (uint32_t)udata;
10194 }
10195 };
10196 } else if ((cmd & IOC_OUT) && size) {
10197 /*
10198 * Zero the buffer so the user always
10199 * gets back something deterministic.
10200 */
10201 bzero(data, size);
10202 } else if (cmd & IOC_VOID) {
10203 if (is64bit) {
10204 *(user_addr_t *)data = udata;
10205 }
10206 else {
10207 *(uint32_t *)data = (uint32_t)udata;
10208 }
10209 }
10210
10211 /* Check to see if it's a generic command */
10212 switch (cmd) {
10213
10214 case FSIOC_SYNC_VOLUME: {
10215 mount_t mp = vp->v_mount;
10216 int arg = *(uint32_t*)data;
10217
10218 /* record vid of vp so we can drop it below. */
10219 uint32_t vvid = vp->v_id;
10220
10221 /*
10222 * Then grab mount_iterref so that we can release the vnode.
10223 * Without this, a thread may call vnode_iterate_prepare then
10224 * get into a deadlock because we've never released the root vp
10225 */
10226 error = mount_iterref (mp, 0);
10227 if (error) {
10228 break;
10229 }
10230 vnode_put(vp);
10231
10232 /* issue the sync for this volume */
10233 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
10234
10235 /*
10236 * Then release the mount_iterref once we're done syncing; it's not
10237 * needed for the VNOP_IOCTL below
10238 */
10239 mount_iterdrop(mp);
10240
10241 if (arg & FSCTL_SYNC_FULLSYNC) {
10242 /* re-obtain vnode iocount on the root vp, if possible */
10243 error = vnode_getwithvid (vp, vvid);
10244 if (error == 0) {
10245 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10246 vnode_put (vp);
10247 }
10248 }
10249 /* mark the argument VP as having been released */
10250 *arg_vp = NULL;
10251 }
10252 break;
10253
10254 case FSIOC_ROUTEFS_SETROUTEID: {
10255#if ROUTEFS
10256 char routepath[MAXPATHLEN];
10257 size_t len = 0;
10258
10259 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10260 break;
10261 }
10262 bzero(routepath, MAXPATHLEN);
10263 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10264 if (error) {
10265 break;
10266 }
10267 error = routefs_kernel_mount(routepath);
10268 if (error) {
10269 break;
10270 }
10271#endif
10272 }
10273 break;
10274
10275 case FSIOC_SET_PACKAGE_EXTS: {
10276 user_addr_t ext_strings;
10277 uint32_t num_entries;
10278 uint32_t max_width;
10279
10280 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
10281 break;
10282
10283 if ( (is64bit && size != sizeof(user64_package_ext_info))
10284 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10285
10286 // either you're 64-bit and passed a 64-bit struct or
10287 // you're 32-bit and passed a 32-bit struct. otherwise
10288 // it's not ok.
10289 error = EINVAL;
10290 break;
10291 }
10292
10293 if (is64bit) {
10294 ext_strings = ((user64_package_ext_info *)data)->strings;
10295 num_entries = ((user64_package_ext_info *)data)->num_entries;
10296 max_width = ((user64_package_ext_info *)data)->max_width;
10297 } else {
10298 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10299 num_entries = ((user32_package_ext_info *)data)->num_entries;
10300 max_width = ((user32_package_ext_info *)data)->max_width;
10301 }
10302 error = set_package_extensions_table(ext_strings, num_entries, max_width);
10303 }
10304 break;
10305
10306 /* namespace handlers */
10307 case FSIOC_NAMESPACE_HANDLER_GET: {
10308 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
10309 }
10310 break;
10311
10312 /* Snapshot handlers */
10313 case FSIOC_OLD_SNAPSHOT_HANDLER_GET: {
10314 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10315 }
10316 break;
10317
10318 case FSIOC_SNAPSHOT_HANDLER_GET_EXT: {
10319 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10320 }
10321 break;
10322
10323 case FSIOC_NAMESPACE_HANDLER_UPDATE: {
10324 uint32_t token, val;
10325 int i;
10326
10327 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10328 break;
10329 }
10330
10331 if (!nspace_is_special_process(p)) {
10332 error = EINVAL;
10333 break;
10334 }
10335
10336 token = ((uint32_t *)data)[0];
10337 val = ((uint32_t *)data)[1];
10338
10339 lck_mtx_lock(&nspace_handler_lock);
10340
10341 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10342 if (nspace_items[i].token == token) {
10343 break; /* exit for loop, not case stmt */
10344 }
10345 }
10346
10347 if (i >= MAX_NSPACE_ITEMS) {
10348 error = ENOENT;
10349 } else {
10350 //
10351 // if this bit is set, when resolve_nspace_item() times out
10352 // it will loop and go back to sleep.
10353 //
10354 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10355 }
10356
10357 lck_mtx_unlock(&nspace_handler_lock);
10358
10359 if (error) {
10360 printf("nspace-handler-update: did not find token %u\n", token);
10361 }
10362 }
10363 break;
10364
10365 case FSIOC_NAMESPACE_HANDLER_UNBLOCK: {
10366 uint32_t token, val;
10367 int i;
10368
10369 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10370 break;
10371 }
10372
10373 if (!nspace_is_special_process(p)) {
10374 error = EINVAL;
10375 break;
10376 }
10377
10378 token = ((uint32_t *)data)[0];
10379 val = ((uint32_t *)data)[1];
10380
10381 lck_mtx_lock(&nspace_handler_lock);
10382
10383 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10384 if (nspace_items[i].token == token) {
10385 break; /* exit for loop, not case statement */
10386 }
10387 }
10388
10389 if (i >= MAX_NSPACE_ITEMS) {
10390 printf("nspace-handler-unblock: did not find token %u\n", token);
10391 error = ENOENT;
10392 } else {
10393 if (val == 0 && nspace_items[i].vp) {
10394 vnode_lock_spin(nspace_items[i].vp);
10395 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10396 vnode_unlock(nspace_items[i].vp);
10397 }
10398
10399 nspace_items[i].vp = NULL;
10400 nspace_items[i].arg = NULL;
10401 nspace_items[i].op = 0;
10402 nspace_items[i].vid = 0;
10403 nspace_items[i].flags = NSPACE_ITEM_DONE;
10404 nspace_items[i].token = 0;
10405
10406 wakeup((caddr_t)&(nspace_items[i].vp));
10407 }
10408
10409 lck_mtx_unlock(&nspace_handler_lock);
10410 }
10411 break;
10412
10413 case FSIOC_NAMESPACE_HANDLER_CANCEL: {
10414 uint32_t token, val;
10415 int i;
10416
10417 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10418 break;
10419 }
10420
10421 if (!nspace_is_special_process(p)) {
10422 error = EINVAL;
10423 break;
10424 }
10425
10426 token = ((uint32_t *)data)[0];
10427 val = ((uint32_t *)data)[1];
10428
10429 lck_mtx_lock(&nspace_handler_lock);
10430
10431 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10432 if (nspace_items[i].token == token) {
10433 break; /* exit for loop, not case stmt */
10434 }
10435 }
10436
10437 if (i >= MAX_NSPACE_ITEMS) {
10438 printf("nspace-handler-cancel: did not find token %u\n", token);
10439 error = ENOENT;
10440 } else {
10441 if (nspace_items[i].vp) {
10442 vnode_lock_spin(nspace_items[i].vp);
10443 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10444 vnode_unlock(nspace_items[i].vp);
10445 }
10446
10447 nspace_items[i].vp = NULL;
10448 nspace_items[i].arg = NULL;
10449 nspace_items[i].vid = 0;
10450 nspace_items[i].token = val;
10451 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
10452 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
10453
10454 wakeup((caddr_t)&(nspace_items[i].vp));
10455 }
10456
10457 lck_mtx_unlock(&nspace_handler_lock);
10458 }
10459 break;
10460
10461 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10462 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10463 break;
10464 }
10465
10466 // we explicitly do not do the namespace_handler_proc check here
10467
10468 lck_mtx_lock(&nspace_handler_lock);
10469 snapshot_timestamp = ((uint32_t *)data)[0];
10470 wakeup(&nspace_item_idx);
10471 lck_mtx_unlock(&nspace_handler_lock);
10472 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
10473
10474 }
10475 break;
10476
10477 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10478 {
10479 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10480 break;
10481 }
10482
10483 lck_mtx_lock(&nspace_handler_lock);
10484 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10485 lck_mtx_unlock(&nspace_handler_lock);
10486 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10487 nspace_allow_virtual_devs ? "" : " NOT");
10488 error = 0;
10489
10490 }
10491 break;
10492
10493 case FSIOC_SET_FSTYPENAME_OVERRIDE:
10494 {
10495 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10496 break;
10497 }
10498 if (vp->v_mount) {
10499 mount_lock(vp->v_mount);
10500 if (data[0] != 0) {
10501 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10502 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10503 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10504 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10505 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10506 }
10507 } else {
10508 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10509 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10510 }
10511 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10512 vp->v_mount->fstypename_override[0] = '\0';
10513 }
10514 mount_unlock(vp->v_mount);
10515 }
10516 }
10517 break;
10518
10519 case DISK_CONDITIONER_IOC_GET: {
10520 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
10521 }
10522 break;
10523
10524 case DISK_CONDITIONER_IOC_SET: {
10525 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
10526 }
10527 break;
10528
10529 default: {
10530 /* other, known commands shouldn't be passed down here */
10531 switch (cmd) {
10532 case F_PUNCHHOLE:
10533 case F_TRIM_ACTIVE_FILE:
10534 case F_RDADVISE:
10535 case F_TRANSCODEKEY:
10536 case F_GETPROTECTIONLEVEL:
10537 case F_GETDEFAULTPROTLEVEL:
10538 case F_MAKECOMPRESSED:
10539 case F_SET_GREEDY_MODE:
10540 case F_SETSTATICCONTENT:
10541 case F_SETIOTYPE:
10542 case F_SETBACKINGSTORE:
10543 case F_GETPATH_MTMINFO:
10544 case APFSIOC_REVERT_TO_SNAPSHOT:
10545 case FSIOC_FIOSEEKHOLE:
10546 case FSIOC_FIOSEEKDATA:
10547 case HFS_GET_BOOT_INFO:
10548 case HFS_SET_BOOT_INFO:
10549 case FIOPINSWAP:
10550 case F_CHKCLEAN:
10551 case F_FULLFSYNC:
10552 case F_BARRIERFSYNC:
10553 case F_FREEZE_FS:
10554 case F_THAW_FS:
10555 error = EINVAL;
10556 goto outdrop;
10557 }
10558 /* Invoke the filesystem-specific code */
10559 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
10560 }
10561
10562 } /* end switch stmt */
10563
10564 /*
10565 * if no errors, copy any data to user. Size was
10566 * already set and checked above.
10567 */
10568 if (error == 0 && (cmd & IOC_OUT) && size)
10569 error = copyout(data, udata, size);
10570
10571outdrop:
10572 if (memp) {
10573 kfree(memp, size);
10574 }
10575
10576 return error;
10577}
10578
10579/* ARGSUSED */
10580int
10581fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10582{
10583 int error;
10584 struct nameidata nd;
10585 u_long nameiflags;
10586 vnode_t vp = NULL;
10587 vfs_context_t ctx = vfs_context_current();
10588
10589 AUDIT_ARG(cmd, uap->cmd);
10590 AUDIT_ARG(value32, uap->options);
10591 /* Get the vnode for the file we are getting info on: */
10592 nameiflags = 0;
10593 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
10594 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10595 UIO_USERSPACE, uap->path, ctx);
10596 if ((error = namei(&nd))) goto done;
10597 vp = nd.ni_vp;
10598 nameidone(&nd);
10599
10600#if CONFIG_MACF
10601 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10602 if (error) {
10603 goto done;
10604 }
10605#endif
10606
10607 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10608
10609done:
10610 if (vp)
10611 vnode_put(vp);
10612 return error;
10613}
10614/* ARGSUSED */
10615int
10616ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10617{
10618 int error;
10619 vnode_t vp = NULL;
10620 vfs_context_t ctx = vfs_context_current();
10621 int fd = -1;
10622
10623 AUDIT_ARG(fd, uap->fd);
10624 AUDIT_ARG(cmd, uap->cmd);
10625 AUDIT_ARG(value32, uap->options);
10626
10627 /* Get the vnode for the file we are getting info on: */
10628 if ((error = file_vnode(uap->fd, &vp)))
10629 return error;
10630 fd = uap->fd;
10631 if ((error = vnode_getwithref(vp))) {
10632 file_drop(fd);
10633 return error;
10634 }
10635
10636#if CONFIG_MACF
10637 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10638 file_drop(fd);
10639 vnode_put(vp);
10640 return error;
10641 }
10642#endif
10643
10644 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10645
10646 file_drop(fd);
10647
10648 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10649 if (vp) {
10650 vnode_put(vp);
10651 }
10652
10653 return error;
10654}
10655/* end of fsctl system call */
10656
10657/*
10658 * Retrieve the data of an extended attribute.
10659 */
10660int
10661getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
10662{
10663 vnode_t vp;
10664 struct nameidata nd;
10665 char attrname[XATTR_MAXNAMELEN+1];
10666 vfs_context_t ctx = vfs_context_current();
10667 uio_t auio = NULL;
10668 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10669 size_t attrsize = 0;
10670 size_t namelen;
10671 u_int32_t nameiflags;
10672 int error;
10673 char uio_buf[ UIO_SIZEOF(1) ];
10674
10675 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10676 return (EINVAL);
10677
10678 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10679 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
10680 if ((error = namei(&nd))) {
10681 return (error);
10682 }
10683 vp = nd.ni_vp;
10684 nameidone(&nd);
10685
10686 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10687 if (error != 0) {
10688 goto out;
10689 }
10690 if (xattr_protected(attrname)) {
10691 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10692 error = EPERM;
10693 goto out;
10694 }
10695 }
10696 /*
10697 * the specific check for 0xffffffff is a hack to preserve
10698 * binaray compatibilty in K64 with applications that discovered
10699 * that passing in a buf pointer and a size of -1 resulted in
10700 * just the size of the indicated extended attribute being returned.
10701 * this isn't part of the documented behavior, but because of the
10702 * original implemtation's check for "uap->size > 0", this behavior
10703 * was allowed. In K32 that check turned into a signed comparison
10704 * even though uap->size is unsigned... in K64, we blow by that
10705 * check because uap->size is unsigned and doesn't get sign smeared
10706 * in the munger for a 32 bit user app. we also need to add a
10707 * check to limit the maximum size of the buffer being passed in...
10708 * unfortunately, the underlying fileystems seem to just malloc
10709 * the requested size even if the actual extended attribute is tiny.
10710 * because that malloc is for kernel wired memory, we have to put a
10711 * sane limit on it.
10712 *
10713 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10714 * U64 running on K64 will yield -1 (64 bits wide)
10715 * U32/U64 running on K32 will yield -1 (32 bits wide)
10716 */
10717 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10718 goto no_uio;
10719
10720 if (uap->value) {
10721 if (uap->size > (size_t)XATTR_MAXSIZE)
10722 uap->size = XATTR_MAXSIZE;
10723
10724 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10725 &uio_buf[0], sizeof(uio_buf));
10726 uio_addiov(auio, uap->value, uap->size);
10727 }
10728no_uio:
10729 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
10730out:
10731 vnode_put(vp);
10732
10733 if (auio) {
10734 *retval = uap->size - uio_resid(auio);
10735 } else {
10736 *retval = (user_ssize_t)attrsize;
10737 }
10738
10739 return (error);
10740}
10741
10742/*
10743 * Retrieve the data of an extended attribute.
10744 */
10745int
10746fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
10747{
10748 vnode_t vp;
10749 char attrname[XATTR_MAXNAMELEN+1];
10750 uio_t auio = NULL;
10751 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10752 size_t attrsize = 0;
10753 size_t namelen;
10754 int error;
10755 char uio_buf[ UIO_SIZEOF(1) ];
10756
10757 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10758 return (EINVAL);
10759
10760 if ( (error = file_vnode(uap->fd, &vp)) ) {
10761 return (error);
10762 }
10763 if ( (error = vnode_getwithref(vp)) ) {
10764 file_drop(uap->fd);
10765 return(error);
10766 }
10767 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10768 if (error != 0) {
10769 goto out;
10770 }
10771 if (xattr_protected(attrname)) {
10772 error = EPERM;
10773 goto out;
10774 }
10775 if (uap->value && uap->size > 0) {
10776 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10777 &uio_buf[0], sizeof(uio_buf));
10778 uio_addiov(auio, uap->value, uap->size);
10779 }
10780
10781 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
10782out:
10783 (void)vnode_put(vp);
10784 file_drop(uap->fd);
10785
10786 if (auio) {
10787 *retval = uap->size - uio_resid(auio);
10788 } else {
10789 *retval = (user_ssize_t)attrsize;
10790 }
10791 return (error);
10792}
10793
10794/*
10795 * Set the data of an extended attribute.
10796 */
10797int
10798setxattr(proc_t p, struct setxattr_args *uap, int *retval)
10799{
10800 vnode_t vp;
10801 struct nameidata nd;
10802 char attrname[XATTR_MAXNAMELEN+1];
10803 vfs_context_t ctx = vfs_context_current();
10804 uio_t auio = NULL;
10805 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10806 size_t namelen;
10807 u_int32_t nameiflags;
10808 int error;
10809 char uio_buf[ UIO_SIZEOF(1) ];
10810
10811 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10812 return (EINVAL);
10813
10814 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10815 if (error != 0) {
10816 if (error == EPERM) {
10817 /* if the string won't fit in attrname, copyinstr emits EPERM */
10818 return (ENAMETOOLONG);
10819 }
10820 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10821 return error;
10822 }
10823 if (xattr_protected(attrname))
10824 return(EPERM);
10825 if (uap->size != 0 && uap->value == 0) {
10826 return (EINVAL);
10827 }
10828
10829 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10830 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
10831 if ((error = namei(&nd))) {
10832 return (error);
10833 }
10834 vp = nd.ni_vp;
10835 nameidone(&nd);
10836
10837 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10838 &uio_buf[0], sizeof(uio_buf));
10839 uio_addiov(auio, uap->value, uap->size);
10840
10841 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10842#if CONFIG_FSE
10843 if (error == 0) {
10844 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10845 FSE_ARG_VNODE, vp,
10846 FSE_ARG_DONE);
10847 }
10848#endif
10849 vnode_put(vp);
10850 *retval = 0;
10851 return (error);
10852}
10853
10854/*
10855 * Set the data of an extended attribute.
10856 */
10857int
10858fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
10859{
10860 vnode_t vp;
10861 char attrname[XATTR_MAXNAMELEN+1];
10862 uio_t auio = NULL;
10863 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10864 size_t namelen;
10865 int error;
10866 char uio_buf[ UIO_SIZEOF(1) ];
10867#if CONFIG_FSE
10868 vfs_context_t ctx = vfs_context_current();
10869#endif
10870
10871 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10872 return (EINVAL);
10873
10874 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10875 if (error != 0) {
10876 if (error == EPERM) {
10877 /* if the string won't fit in attrname, copyinstr emits EPERM */
10878 return (ENAMETOOLONG);
10879 }
10880 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10881 return error;
10882 }
10883 if (xattr_protected(attrname))
10884 return(EPERM);
10885 if (uap->size != 0 && uap->value == 0) {
10886 return (EINVAL);
10887 }
10888 if ( (error = file_vnode(uap->fd, &vp)) ) {
10889 return (error);
10890 }
10891 if ( (error = vnode_getwithref(vp)) ) {
10892 file_drop(uap->fd);
10893 return(error);
10894 }
10895 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10896 &uio_buf[0], sizeof(uio_buf));
10897 uio_addiov(auio, uap->value, uap->size);
10898
10899 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10900#if CONFIG_FSE
10901 if (error == 0) {
10902 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10903 FSE_ARG_VNODE, vp,
10904 FSE_ARG_DONE);
10905 }
10906#endif
10907 vnode_put(vp);
10908 file_drop(uap->fd);
10909 *retval = 0;
10910 return (error);
10911}
10912
10913/*
10914 * Remove an extended attribute.
10915 * XXX Code duplication here.
10916 */
10917int
10918removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10919{
10920 vnode_t vp;
10921 struct nameidata nd;
10922 char attrname[XATTR_MAXNAMELEN+1];
10923 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10924 vfs_context_t ctx = vfs_context_current();
10925 size_t namelen;
10926 u_int32_t nameiflags;
10927 int error;
10928
10929 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10930 return (EINVAL);
10931
10932 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10933 if (error != 0) {
10934 return (error);
10935 }
10936 if (xattr_protected(attrname))
10937 return(EPERM);
10938 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10939 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10940 if ((error = namei(&nd))) {
10941 return (error);
10942 }
10943 vp = nd.ni_vp;
10944 nameidone(&nd);
10945
10946 error = vn_removexattr(vp, attrname, uap->options, ctx);
10947#if CONFIG_FSE
10948 if (error == 0) {
10949 add_fsevent(FSE_XATTR_REMOVED, ctx,
10950 FSE_ARG_VNODE, vp,
10951 FSE_ARG_DONE);
10952 }
10953#endif
10954 vnode_put(vp);
10955 *retval = 0;
10956 return (error);
10957}
10958
10959/*
10960 * Remove an extended attribute.
10961 * XXX Code duplication here.
10962 */
10963int
10964fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10965{
10966 vnode_t vp;
10967 char attrname[XATTR_MAXNAMELEN+1];
10968 size_t namelen;
10969 int error;
10970#if CONFIG_FSE
10971 vfs_context_t ctx = vfs_context_current();
10972#endif
10973
10974 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10975 return (EINVAL);
10976
10977 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10978 if (error != 0) {
10979 return (error);
10980 }
10981 if (xattr_protected(attrname))
10982 return(EPERM);
10983 if ( (error = file_vnode(uap->fd, &vp)) ) {
10984 return (error);
10985 }
10986 if ( (error = vnode_getwithref(vp)) ) {
10987 file_drop(uap->fd);
10988 return(error);
10989 }
10990
10991 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10992#if CONFIG_FSE
10993 if (error == 0) {
10994 add_fsevent(FSE_XATTR_REMOVED, ctx,
10995 FSE_ARG_VNODE, vp,
10996 FSE_ARG_DONE);
10997 }
10998#endif
10999 vnode_put(vp);
11000 file_drop(uap->fd);
11001 *retval = 0;
11002 return (error);
11003}
11004
11005/*
11006 * Retrieve the list of extended attribute names.
11007 * XXX Code duplication here.
11008 */
11009int
11010listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
11011{
11012 vnode_t vp;
11013 struct nameidata nd;
11014 vfs_context_t ctx = vfs_context_current();
11015 uio_t auio = NULL;
11016 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11017 size_t attrsize = 0;
11018 u_int32_t nameiflags;
11019 int error;
11020 char uio_buf[ UIO_SIZEOF(1) ];
11021
11022 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
11023 return (EINVAL);
11024
11025 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11026 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
11027 if ((error = namei(&nd))) {
11028 return (error);
11029 }
11030 vp = nd.ni_vp;
11031 nameidone(&nd);
11032 if (uap->namebuf != 0 && uap->bufsize > 0) {
11033 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
11034 &uio_buf[0], sizeof(uio_buf));
11035 uio_addiov(auio, uap->namebuf, uap->bufsize);
11036 }
11037
11038 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
11039
11040 vnode_put(vp);
11041 if (auio) {
11042 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11043 } else {
11044 *retval = (user_ssize_t)attrsize;
11045 }
11046 return (error);
11047}
11048
11049/*
11050 * Retrieve the list of extended attribute names.
11051 * XXX Code duplication here.
11052 */
11053int
11054flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
11055{
11056 vnode_t vp;
11057 uio_t auio = NULL;
11058 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11059 size_t attrsize = 0;
11060 int error;
11061 char uio_buf[ UIO_SIZEOF(1) ];
11062
11063 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
11064 return (EINVAL);
11065
11066 if ( (error = file_vnode(uap->fd, &vp)) ) {
11067 return (error);
11068 }
11069 if ( (error = vnode_getwithref(vp)) ) {
11070 file_drop(uap->fd);
11071 return(error);
11072 }
11073 if (uap->namebuf != 0 && uap->bufsize > 0) {
11074 auio = uio_createwithbuffer(1, 0, spacetype,
11075 UIO_READ, &uio_buf[0], sizeof(uio_buf));
11076 uio_addiov(auio, uap->namebuf, uap->bufsize);
11077 }
11078
11079 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
11080
11081 vnode_put(vp);
11082 file_drop(uap->fd);
11083 if (auio) {
11084 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11085 } else {
11086 *retval = (user_ssize_t)attrsize;
11087 }
11088 return (error);
11089}
11090
11091static int fsgetpath_internal(
11092 vfs_context_t ctx, int volfs_id, uint64_t objid,
11093 vm_size_t bufsize, caddr_t buf, int *pathlen)
11094{
11095 int error;
11096 struct mount *mp = NULL;
11097 vnode_t vp;
11098 int length;
11099 int bpflags;
11100 /* maximum number of times to retry build_path */
11101 unsigned int retries = 0x10;
11102
11103 if (bufsize > PAGE_SIZE) {
11104 return (EINVAL);
11105 }
11106
11107 if (buf == NULL) {
11108 return (ENOMEM);
11109 }
11110
11111retry:
11112 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
11113 error = ENOTSUP; /* unexpected failure */
11114 return ENOTSUP;
11115 }
11116
11117unionget:
11118 if (objid == 2) {
11119 error = VFS_ROOT(mp, &vp, ctx);
11120 } else {
11121 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
11122 }
11123
11124 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11125 /*
11126 * If the fileid isn't found and we're in a union
11127 * mount volume, then see if the fileid is in the
11128 * mounted-on volume.
11129 */
11130 struct mount *tmp = mp;
11131 mp = vnode_mount(tmp->mnt_vnodecovered);
11132 vfs_unbusy(tmp);
11133 if (vfs_busy(mp, LK_NOWAIT) == 0)
11134 goto unionget;
11135 } else {
11136 vfs_unbusy(mp);
11137 }
11138
11139 if (error) {
11140 return error;
11141 }
11142
11143#if CONFIG_MACF
11144 error = mac_vnode_check_fsgetpath(ctx, vp);
11145 if (error) {
11146 vnode_put(vp);
11147 return error;
11148 }
11149#endif
11150
11151 /* Obtain the absolute path to this vnode. */
11152 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
11153 bpflags |= BUILDPATH_CHECK_MOVED;
11154 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
11155 vnode_put(vp);
11156
11157 if (error) {
11158 /* there was a race building the path, try a few more times */
11159 if (error == EAGAIN) {
11160 --retries;
11161 if (retries > 0)
11162 goto retry;
11163
11164 error = ENOENT;
11165 }
11166 goto out;
11167 }
11168
11169 AUDIT_ARG(text, buf);
11170
11171 if (kdebug_enable) {
11172 long dbg_parms[NUMPARMS];
11173 int dbg_namelen;
11174
11175 dbg_namelen = (int)sizeof(dbg_parms);
11176
11177 if (length < dbg_namelen) {
11178 memcpy((char *)dbg_parms, buf, length);
11179 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11180
11181 dbg_namelen = length;
11182 } else {
11183 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11184 }
11185
11186 kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)vp,
11187 KDBG_VFS_LOOKUP_FLAG_LOOKUP);
11188 }
11189
11190 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11191
11192out:
11193 return (error);
11194}
11195
11196/*
11197 * Obtain the full pathname of a file system object by id.
11198 */
11199int
11200fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11201{
11202 vfs_context_t ctx = vfs_context_current();
11203 fsid_t fsid;
11204 char *realpath;
11205 int length;
11206 int error;
11207
11208 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11209 return (error);
11210 }
11211 AUDIT_ARG(value32, fsid.val[0]);
11212 AUDIT_ARG(value64, uap->objid);
11213 /* Restrict output buffer size for now. */
11214
11215 if (uap->bufsize > PAGE_SIZE) {
11216 return (EINVAL);
11217 }
11218 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK | M_ZERO);
11219 if (realpath == NULL) {
11220 return (ENOMEM);
11221 }
11222
11223 error = fsgetpath_internal(
11224 ctx, fsid.val[0], uap->objid,
11225 uap->bufsize, realpath, &length);
11226
11227 if (error) {
11228 goto out;
11229 }
11230
11231 error = copyout((caddr_t)realpath, uap->buf, length);
11232
11233 *retval = (user_ssize_t)length; /* may be superseded by error */
11234out:
11235 if (realpath) {
11236 FREE(realpath, M_TEMP);
11237 }
11238 return (error);
11239}
11240
11241/*
11242 * Common routine to handle various flavors of statfs data heading out
11243 * to user space.
11244 *
11245 * Returns: 0 Success
11246 * EFAULT
11247 */
11248static int
11249munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11250 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
11251 boolean_t partial_copy)
11252{
11253 int error;
11254 int my_size, copy_size;
11255
11256 if (is_64_bit) {
11257 struct user64_statfs sfs;
11258 my_size = copy_size = sizeof(sfs);
11259 bzero(&sfs, my_size);
11260 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11261 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11262 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
11263 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
11264 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
11265 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
11266 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
11267 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
11268 sfs.f_files = (user64_long_t)sfsp->f_files;
11269 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
11270 sfs.f_fsid = sfsp->f_fsid;
11271 sfs.f_owner = sfsp->f_owner;
11272 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11273 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11274 } else {
11275 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11276 }
11277 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11278 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11279
11280 if (partial_copy) {
11281 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11282 }
11283 error = copyout((caddr_t)&sfs, bufp, copy_size);
11284 }
11285 else {
11286 struct user32_statfs sfs;
11287
11288 my_size = copy_size = sizeof(sfs);
11289 bzero(&sfs, my_size);
11290
11291 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11292 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11293 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
11294
11295 /*
11296 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11297 * have to fudge the numbers here in that case. We inflate the blocksize in order
11298 * to reflect the filesystem size as best we can.
11299 */
11300 if ((sfsp->f_blocks > INT_MAX)
11301 /* Hack for 4061702 . I think the real fix is for Carbon to
11302 * look for some volume capability and not depend on hidden
11303 * semantics agreed between a FS and carbon.
11304 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11305 * for Carbon to set bNoVolumeSizes volume attribute.
11306 * Without this the webdavfs files cannot be copied onto
11307 * disk as they look huge. This change should not affect
11308 * XSAN as they should not setting these to -1..
11309 */
11310 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11311 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11312 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
11313 int shift;
11314
11315 /*
11316 * Work out how far we have to shift the block count down to make it fit.
11317 * Note that it's possible to have to shift so far that the resulting
11318 * blocksize would be unreportably large. At that point, we will clip
11319 * any values that don't fit.
11320 *
11321 * For safety's sake, we also ensure that f_iosize is never reported as
11322 * being smaller than f_bsize.
11323 */
11324 for (shift = 0; shift < 32; shift++) {
11325 if ((sfsp->f_blocks >> shift) <= INT_MAX)
11326 break;
11327 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
11328 break;
11329 }
11330#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11331 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11332 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11333 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
11334#undef __SHIFT_OR_CLIP
11335 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
11336 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11337 } else {
11338 /* filesystem is small enough to be reported honestly */
11339 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11340 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11341 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11342 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11343 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
11344 }
11345 sfs.f_files = (user32_long_t)sfsp->f_files;
11346 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
11347 sfs.f_fsid = sfsp->f_fsid;
11348 sfs.f_owner = sfsp->f_owner;
11349 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11350 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11351 } else {
11352 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11353 }
11354 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11355 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11356
11357 if (partial_copy) {
11358 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11359 }
11360 error = copyout((caddr_t)&sfs, bufp, copy_size);
11361 }
11362
11363 if (sizep != NULL) {
11364 *sizep = my_size;
11365 }
11366 return(error);
11367}
11368
11369/*
11370 * copy stat structure into user_stat structure.
11371 */
11372void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
11373{
11374 bzero(usbp, sizeof(*usbp));
11375
11376 usbp->st_dev = sbp->st_dev;
11377 usbp->st_ino = sbp->st_ino;
11378 usbp->st_mode = sbp->st_mode;
11379 usbp->st_nlink = sbp->st_nlink;
11380 usbp->st_uid = sbp->st_uid;
11381 usbp->st_gid = sbp->st_gid;
11382 usbp->st_rdev = sbp->st_rdev;
11383#ifndef _POSIX_C_SOURCE
11384 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11385 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11386 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11387 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11388 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11389 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11390#else
11391 usbp->st_atime = sbp->st_atime;
11392 usbp->st_atimensec = sbp->st_atimensec;
11393 usbp->st_mtime = sbp->st_mtime;
11394 usbp->st_mtimensec = sbp->st_mtimensec;
11395 usbp->st_ctime = sbp->st_ctime;
11396 usbp->st_ctimensec = sbp->st_ctimensec;
11397#endif
11398 usbp->st_size = sbp->st_size;
11399 usbp->st_blocks = sbp->st_blocks;
11400 usbp->st_blksize = sbp->st_blksize;
11401 usbp->st_flags = sbp->st_flags;
11402 usbp->st_gen = sbp->st_gen;
11403 usbp->st_lspare = sbp->st_lspare;
11404 usbp->st_qspare[0] = sbp->st_qspare[0];
11405 usbp->st_qspare[1] = sbp->st_qspare[1];
11406}
11407
11408void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11409{
11410 bzero(usbp, sizeof(*usbp));
11411
11412 usbp->st_dev = sbp->st_dev;
11413 usbp->st_ino = sbp->st_ino;
11414 usbp->st_mode = sbp->st_mode;
11415 usbp->st_nlink = sbp->st_nlink;
11416 usbp->st_uid = sbp->st_uid;
11417 usbp->st_gid = sbp->st_gid;
11418 usbp->st_rdev = sbp->st_rdev;
11419#ifndef _POSIX_C_SOURCE
11420 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11421 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11422 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11423 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11424 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11425 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11426#else
11427 usbp->st_atime = sbp->st_atime;
11428 usbp->st_atimensec = sbp->st_atimensec;
11429 usbp->st_mtime = sbp->st_mtime;
11430 usbp->st_mtimensec = sbp->st_mtimensec;
11431 usbp->st_ctime = sbp->st_ctime;
11432 usbp->st_ctimensec = sbp->st_ctimensec;
11433#endif
11434 usbp->st_size = sbp->st_size;
11435 usbp->st_blocks = sbp->st_blocks;
11436 usbp->st_blksize = sbp->st_blksize;
11437 usbp->st_flags = sbp->st_flags;
11438 usbp->st_gen = sbp->st_gen;
11439 usbp->st_lspare = sbp->st_lspare;
11440 usbp->st_qspare[0] = sbp->st_qspare[0];
11441 usbp->st_qspare[1] = sbp->st_qspare[1];
11442}
11443
11444/*
11445 * copy stat64 structure into user_stat64 structure.
11446 */
11447void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11448{
11449 bzero(usbp, sizeof(*usbp));
11450
11451 usbp->st_dev = sbp->st_dev;
11452 usbp->st_ino = sbp->st_ino;
11453 usbp->st_mode = sbp->st_mode;
11454 usbp->st_nlink = sbp->st_nlink;
11455 usbp->st_uid = sbp->st_uid;
11456 usbp->st_gid = sbp->st_gid;
11457 usbp->st_rdev = sbp->st_rdev;
11458#ifndef _POSIX_C_SOURCE
11459 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11460 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11461 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11462 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11463 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11464 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11465 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11466 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11467#else
11468 usbp->st_atime = sbp->st_atime;
11469 usbp->st_atimensec = sbp->st_atimensec;
11470 usbp->st_mtime = sbp->st_mtime;
11471 usbp->st_mtimensec = sbp->st_mtimensec;
11472 usbp->st_ctime = sbp->st_ctime;
11473 usbp->st_ctimensec = sbp->st_ctimensec;
11474 usbp->st_birthtime = sbp->st_birthtime;
11475 usbp->st_birthtimensec = sbp->st_birthtimensec;
11476#endif
11477 usbp->st_size = sbp->st_size;
11478 usbp->st_blocks = sbp->st_blocks;
11479 usbp->st_blksize = sbp->st_blksize;
11480 usbp->st_flags = sbp->st_flags;
11481 usbp->st_gen = sbp->st_gen;
11482 usbp->st_lspare = sbp->st_lspare;
11483 usbp->st_qspare[0] = sbp->st_qspare[0];
11484 usbp->st_qspare[1] = sbp->st_qspare[1];
11485}
11486
11487void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
11488{
11489 bzero(usbp, sizeof(*usbp));
11490
11491 usbp->st_dev = sbp->st_dev;
11492 usbp->st_ino = sbp->st_ino;
11493 usbp->st_mode = sbp->st_mode;
11494 usbp->st_nlink = sbp->st_nlink;
11495 usbp->st_uid = sbp->st_uid;
11496 usbp->st_gid = sbp->st_gid;
11497 usbp->st_rdev = sbp->st_rdev;
11498#ifndef _POSIX_C_SOURCE
11499 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11500 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11501 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11502 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11503 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11504 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11505 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11506 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11507#else
11508 usbp->st_atime = sbp->st_atime;
11509 usbp->st_atimensec = sbp->st_atimensec;
11510 usbp->st_mtime = sbp->st_mtime;
11511 usbp->st_mtimensec = sbp->st_mtimensec;
11512 usbp->st_ctime = sbp->st_ctime;
11513 usbp->st_ctimensec = sbp->st_ctimensec;
11514 usbp->st_birthtime = sbp->st_birthtime;
11515 usbp->st_birthtimensec = sbp->st_birthtimensec;
11516#endif
11517 usbp->st_size = sbp->st_size;
11518 usbp->st_blocks = sbp->st_blocks;
11519 usbp->st_blksize = sbp->st_blksize;
11520 usbp->st_flags = sbp->st_flags;
11521 usbp->st_gen = sbp->st_gen;
11522 usbp->st_lspare = sbp->st_lspare;
11523 usbp->st_qspare[0] = sbp->st_qspare[0];
11524 usbp->st_qspare[1] = sbp->st_qspare[1];
11525}
11526
11527/*
11528 * Purge buffer cache for simulating cold starts
11529 */
11530static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11531{
11532 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11533
11534 return VNODE_RETURNED;
11535}
11536
11537static int vfs_purge_callback(mount_t mp, __unused void * arg)
11538{
11539 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11540
11541 return VFS_RETURNED;
11542}
11543
11544int
11545vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11546{
11547 if (!kauth_cred_issuser(kauth_cred_get()))
11548 return EPERM;
11549
11550 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11551
11552 return 0;
11553}
11554
11555/*
11556 * gets the vnode associated with the (unnamed) snapshot directory
11557 * for a Filesystem. The snapshot directory vnode is returned with
11558 * an iocount on it.
11559 */
11560int
11561vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11562{
11563 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
11564}
11565
11566/*
11567 * Get the snapshot vnode.
11568 *
11569 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11570 * needs nameidone() on ndp.
11571 *
11572 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11573 *
11574 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11575 * not needed.
11576 */
11577static int
11578vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11579 user_addr_t name, struct nameidata *ndp, int32_t op,
11580#if !CONFIG_TRIGGERS
11581 __unused
11582#endif
11583 enum path_operation pathop,
11584 vfs_context_t ctx)
11585{
11586 int error, i;
11587 caddr_t name_buf;
11588 size_t name_len;
11589 struct vfs_attr vfa;
11590
11591 *sdvpp = NULLVP;
11592 *rvpp = NULLVP;
11593
11594 error = vnode_getfromfd(ctx, dirfd, rvpp);
11595 if (error)
11596 return (error);
11597
11598 if (!vnode_isvroot(*rvpp)) {
11599 error = EINVAL;
11600 goto out;
11601 }
11602
11603 /* Make sure the filesystem supports snapshots */
11604 VFSATTR_INIT(&vfa);
11605 VFSATTR_WANTED(&vfa, f_capabilities);
11606 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11607 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11608 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11609 VOL_CAP_INT_SNAPSHOT)) ||
11610 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11611 VOL_CAP_INT_SNAPSHOT))) {
11612 error = ENOTSUP;
11613 goto out;
11614 }
11615
11616 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11617 if (error)
11618 goto out;
11619
11620 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11621 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11622 if (error)
11623 goto out1;
11624
11625 /*
11626 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11627 * (the length returned by copyinstr includes the terminating NUL)
11628 */
11629 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11630 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11631 error = EINVAL;
11632 goto out1;
11633 }
11634 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11635 if (i < (int)name_len) {
11636 error = EINVAL;
11637 goto out1;
11638 }
11639
11640#if CONFIG_MACF
11641 if (op == CREATE) {
11642 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11643 name_buf);
11644 } else if (op == DELETE) {
11645 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11646 name_buf);
11647 }
11648 if (error)
11649 goto out1;
11650#endif
11651
11652 /* Check if the snapshot already exists ... */
11653 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11654 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11655 ndp->ni_dvp = *sdvpp;
11656
11657 error = namei(ndp);
11658out1:
11659 FREE(name_buf, M_TEMP);
11660out:
11661 if (error) {
11662 if (*sdvpp) {
11663 vnode_put(*sdvpp);
11664 *sdvpp = NULLVP;
11665 }
11666 if (*rvpp) {
11667 vnode_put(*rvpp);
11668 *rvpp = NULLVP;
11669 }
11670 }
11671 return (error);
11672}
11673
11674/*
11675 * create a filesystem snapshot (for supporting filesystems)
11676 *
11677 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11678 * We get to the (unnamed) snapshot directory vnode and create the vnode
11679 * for the snapshot in it.
11680 *
11681 * Restrictions:
11682 *
11683 * a) Passed in name for snapshot cannot have slashes.
11684 * b) name can't be "." or ".."
11685 *
11686 * Since this requires superuser privileges, vnode_authorize calls are not
11687 * made.
11688 */
11689static int
11690snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11691 vfs_context_t ctx)
11692{
11693 vnode_t rvp, snapdvp;
11694 int error;
11695 struct nameidata namend;
11696
11697 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11698 OP_LINK, ctx);
11699 if (error)
11700 return (error);
11701
11702 if (namend.ni_vp) {
11703 vnode_put(namend.ni_vp);
11704 error = EEXIST;
11705 } else {
11706 struct vnode_attr va;
11707 vnode_t vp = NULLVP;
11708
11709 VATTR_INIT(&va);
11710 VATTR_SET(&va, va_type, VREG);
11711 VATTR_SET(&va, va_mode, 0);
11712
11713 error = vn_create(snapdvp, &vp, &namend, &va,
11714 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11715 if (!error && vp)
11716 vnode_put(vp);
11717 }
11718
11719 nameidone(&namend);
11720 vnode_put(snapdvp);
11721 vnode_put(rvp);
11722 return (error);
11723}
11724
11725/*
11726 * Delete a Filesystem snapshot
11727 *
11728 * get the vnode for the unnamed snapshot directory and the snapshot and
11729 * delete the snapshot.
11730 */
11731static int
11732snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11733 vfs_context_t ctx)
11734{
11735 vnode_t rvp, snapdvp;
11736 int error;
11737 struct nameidata namend;
11738
11739 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11740 OP_UNLINK, ctx);
11741 if (error)
11742 goto out;
11743
11744 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11745 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11746
11747 vnode_put(namend.ni_vp);
11748 nameidone(&namend);
11749 vnode_put(snapdvp);
11750 vnode_put(rvp);
11751out:
11752 return (error);
11753}
11754
11755/*
11756 * Revert a filesystem to a snapshot
11757 *
11758 * Marks the filesystem to revert to the given snapshot on next mount.
11759 */
11760static int
11761snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11762 vfs_context_t ctx)
11763{
11764 int error;
11765 vnode_t rvp;
11766 mount_t mp;
11767 struct fs_snapshot_revert_args revert_data;
11768 struct componentname cnp;
11769 caddr_t name_buf;
11770 size_t name_len;
11771
11772 error = vnode_getfromfd(ctx, dirfd, &rvp);
11773 if (error) {
11774 return (error);
11775 }
11776 mp = vnode_mount(rvp);
11777
11778 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11779 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11780 if (error) {
11781 FREE(name_buf, M_TEMP);
11782 vnode_put(rvp);
11783 return (error);
11784 }
11785
11786#if CONFIG_MACF
11787 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11788 if (error) {
11789 FREE(name_buf, M_TEMP);
11790 vnode_put(rvp);
11791 return (error);
11792 }
11793#endif
11794
11795 /*
11796 * Grab mount_iterref so that we can release the vnode,
11797 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11798 */
11799 error = mount_iterref (mp, 0);
11800 vnode_put(rvp);
11801 if (error) {
11802 FREE(name_buf, M_TEMP);
11803 return (error);
11804 }
11805
11806 memset(&cnp, 0, sizeof(cnp));
11807 cnp.cn_pnbuf = (char *)name_buf;
11808 cnp.cn_nameiop = LOOKUP;
11809 cnp.cn_flags = ISLASTCN | HASBUF;
11810 cnp.cn_pnlen = MAXPATHLEN;
11811 cnp.cn_nameptr = cnp.cn_pnbuf;
11812 cnp.cn_namelen = (int)name_len;
11813 revert_data.sr_cnp = &cnp;
11814
11815 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11816 mount_iterdrop(mp);
11817 FREE(name_buf, M_TEMP);
11818
11819 if (error) {
11820 /* If there was any error, try again using VNOP_IOCTL */
11821
11822 vnode_t snapdvp;
11823 struct nameidata namend;
11824
11825 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11826 OP_LOOKUP, ctx);
11827 if (error) {
11828 return (error);
11829 }
11830
11831
11832 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
11833 0, ctx);
11834
11835 vnode_put(namend.ni_vp);
11836 nameidone(&namend);
11837 vnode_put(snapdvp);
11838 vnode_put(rvp);
11839 }
11840
11841 return (error);
11842}
11843
11844/*
11845 * rename a Filesystem snapshot
11846 *
11847 * get the vnode for the unnamed snapshot directory and the snapshot and
11848 * rename the snapshot. This is a very specialised (and simple) case of
11849 * rename(2) (which has to deal with a lot more complications). It differs
11850 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11851 */
11852static int
11853snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11854 __unused uint32_t flags, vfs_context_t ctx)
11855{
11856 vnode_t rvp, snapdvp;
11857 int error, i;
11858 caddr_t newname_buf;
11859 size_t name_len;
11860 vnode_t fvp;
11861 struct nameidata *fromnd, *tond;
11862 /* carving out a chunk for structs that are too big to be on stack. */
11863 struct {
11864 struct nameidata from_node;
11865 struct nameidata to_node;
11866 } * __rename_data;
11867
11868 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11869 fromnd = &__rename_data->from_node;
11870 tond = &__rename_data->to_node;
11871
11872 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11873 OP_UNLINK, ctx);
11874 if (error)
11875 goto out;
11876 fvp = fromnd->ni_vp;
11877
11878 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11879 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11880 if (error)
11881 goto out1;
11882
11883 /*
11884 * Some sanity checks- new name can't be empty, "." or ".." or have
11885 * slashes.
11886 * (the length returned by copyinstr includes the terminating NUL)
11887 *
11888 * The FS rename VNOP is suppossed to handle this but we'll pick it
11889 * off here itself.
11890 */
11891 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11892 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11893 error = EINVAL;
11894 goto out1;
11895 }
11896 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11897 if (i < (int)name_len) {
11898 error = EINVAL;
11899 goto out1;
11900 }
11901
11902#if CONFIG_MACF
11903 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11904 newname_buf);
11905 if (error)
11906 goto out1;
11907#endif
11908
11909 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11910 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11911 tond->ni_dvp = snapdvp;
11912
11913 error = namei(tond);
11914 if (error) {
11915 goto out2;
11916 } else if (tond->ni_vp) {
11917 /*
11918 * snapshot rename behaves differently than rename(2) - if the
11919 * new name exists, EEXIST is returned.
11920 */
11921 vnode_put(tond->ni_vp);
11922 error = EEXIST;
11923 goto out2;
11924 }
11925
11926 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11927 &tond->ni_cnd, ctx);
11928
11929out2:
11930 nameidone(tond);
11931out1:
11932 FREE(newname_buf, M_TEMP);
11933 vnode_put(fvp);
11934 vnode_put(snapdvp);
11935 vnode_put(rvp);
11936 nameidone(fromnd);
11937out:
11938 FREE(__rename_data, M_TEMP);
11939 return (error);
11940}
11941
11942/*
11943 * Mount a Filesystem snapshot
11944 *
11945 * get the vnode for the unnamed snapshot directory and the snapshot and
11946 * mount the snapshot.
11947 */
11948static int
11949snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
11950 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
11951{
11952 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11953 int error;
11954 struct nameidata *snapndp, *dirndp;
11955 /* carving out a chunk for structs that are too big to be on stack. */
11956 struct {
11957 struct nameidata snapnd;
11958 struct nameidata dirnd;
11959 } * __snapshot_mount_data;
11960
11961 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11962 M_TEMP, M_WAITOK);
11963 snapndp = &__snapshot_mount_data->snapnd;
11964 dirndp = &__snapshot_mount_data->dirnd;
11965
11966 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11967 OP_LOOKUP, ctx);
11968 if (error)
11969 goto out;
11970
11971 snapvp = snapndp->ni_vp;
11972 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11973 error = EIO;
11974 goto out1;
11975 }
11976
11977 /* Get the vnode to be covered */
11978 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11979 UIO_USERSPACE, directory, ctx);
11980 error = namei(dirndp);
11981 if (error)
11982 goto out1;
11983
11984 vp = dirndp->ni_vp;
11985 pvp = dirndp->ni_dvp;
11986
11987 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11988 error = EINVAL;
11989 } else {
11990 mount_t mp = vnode_mount(rvp);
11991 struct fs_snapshot_mount_args smnt_data;
11992
11993 smnt_data.sm_mp = mp;
11994 smnt_data.sm_cnp = &snapndp->ni_cnd;
11995 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
11996 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
11997 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
11998 }
11999
12000 vnode_put(vp);
12001 vnode_put(pvp);
12002 nameidone(dirndp);
12003out1:
12004 vnode_put(snapvp);
12005 vnode_put(snapdvp);
12006 vnode_put(rvp);
12007 nameidone(snapndp);
12008out:
12009 FREE(__snapshot_mount_data, M_TEMP);
12010 return (error);
12011}
12012
12013/*
12014 * Root from a snapshot of the filesystem
12015 *
12016 * Marks the filesystem to root from the given snapshot on next boot.
12017 */
12018static int
12019snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
12020 vfs_context_t ctx)
12021{
12022 int error;
12023 vnode_t rvp;
12024 mount_t mp;
12025 struct fs_snapshot_root_args root_data;
12026 struct componentname cnp;
12027 caddr_t name_buf;
12028 size_t name_len;
12029
12030 error = vnode_getfromfd(ctx, dirfd, &rvp);
12031 if (error) {
12032 return (error);
12033 }
12034 mp = vnode_mount(rvp);
12035
12036 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12037 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12038 if (error) {
12039 FREE(name_buf, M_TEMP);
12040 vnode_put(rvp);
12041 return (error);
12042 }
12043
12044 // XXX MAC checks ?
12045
12046 /*
12047 * Grab mount_iterref so that we can release the vnode,
12048 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12049 */
12050 error = mount_iterref (mp, 0);
12051 vnode_put(rvp);
12052 if (error) {
12053 FREE(name_buf, M_TEMP);
12054 return (error);
12055 }
12056
12057 memset(&cnp, 0, sizeof(cnp));
12058 cnp.cn_pnbuf = (char *)name_buf;
12059 cnp.cn_nameiop = LOOKUP;
12060 cnp.cn_flags = ISLASTCN | HASBUF;
12061 cnp.cn_pnlen = MAXPATHLEN;
12062 cnp.cn_nameptr = cnp.cn_pnbuf;
12063 cnp.cn_namelen = (int)name_len;
12064 root_data.sr_cnp = &cnp;
12065
12066 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
12067
12068 mount_iterdrop(mp);
12069 FREE(name_buf, M_TEMP);
12070
12071 return (error);
12072}
12073
12074/*
12075 * FS snapshot operations dispatcher
12076 */
12077int
12078fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12079 __unused int32_t *retval)
12080{
12081 int error;
12082 vfs_context_t ctx = vfs_context_current();
12083
12084 AUDIT_ARG(fd, uap->dirfd);
12085 AUDIT_ARG(value32, uap->op);
12086
12087 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
12088 if (error)
12089 return (error);
12090
12091 switch (uap->op) {
12092 case SNAPSHOT_OP_CREATE:
12093 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12094 break;
12095 case SNAPSHOT_OP_DELETE:
12096 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12097 break;
12098 case SNAPSHOT_OP_RENAME:
12099 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12100 uap->flags, ctx);
12101 break;
12102 case SNAPSHOT_OP_MOUNT:
12103 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12104 uap->data, uap->flags, ctx);
12105 break;
12106 case SNAPSHOT_OP_REVERT:
12107 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12108 break;
12109#if CONFIG_MNT_ROOTSNAP
12110 case SNAPSHOT_OP_ROOT:
12111 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12112 break;
12113#endif /* CONFIG_MNT_ROOTSNAP */
12114 default:
12115 error = ENOSYS;
12116 }
12117
12118 return (error);
12119}
12120