kern_descrip.c source code [xnu/bsd/kern/kern_descrip.c]

1	/*
2	* Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/ Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved /
29	/*
30	* Copyright (c) 1982, 1986, 1989, 1991, 1993
31	* The Regents of the University of California. All rights reserved.
32	* (c) UNIX System Laboratories, Inc.
33	* All or some portions of this file are derived from material licensed
34	* to the University of California by American Telephone and Telegraph
35	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
36	* the permission of UNIX System Laboratories, Inc.
37	*
38	* Redistribution and use in source and binary forms, with or without
39	* modification, are permitted provided that the following conditions
40	* are met:
41	* 1. Redistributions of source code must retain the above copyright
42	* notice, this list of conditions and the following disclaimer.
43	* 2. Redistributions in binary form must reproduce the above copyright
44	* notice, this list of conditions and the following disclaimer in the
45	* documentation and/or other materials provided with the distribution.
46	* 3. All advertising materials mentioning features or use of this software
47	* must display the following acknowledgement:
48	* This product includes software developed by the University of
49	* California, Berkeley and its contributors.
50	* 4. Neither the name of the University nor the names of its contributors
51	* may be used to endorse or promote products derived from this software
52	* without specific prior written permission.
53	*
54	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64	* SUCH DAMAGE.
65	*
66	* @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67	*/
68	/*
69	* NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70	* support for mandatory and extensible security protections. This notice
71	* is included in support of clause 2.2 (b) of the Apple Public License,
72	* Version 2.0.
73	*/
74
75	#include <sys/param.h>
76	#include <sys/systm.h>
77	#include <sys/filedesc.h>
78	#include <sys/kernel.h>
79	#include <sys/vnode_internal.h>
80	#include <sys/proc_internal.h>
81	#include <sys/kauth.h>
82	#include <sys/file_internal.h>
83	#include <sys/guarded.h>
84	#include <sys/priv.h>
85	#include <sys/socket.h>
86	#include <sys/socketvar.h>
87	#include <sys/stat.h>
88	#include <sys/ioctl.h>
89	#include <sys/fcntl.h>
90	#include <sys/fsctl.h>
91	#include <sys/malloc.h>
92	#include <sys/mman.h>
93	#include <sys/mount.h>
94	#include <sys/syslog.h>
95	#include <sys/unistd.h>
96	#include <sys/resourcevar.h>
97	#include <sys/aio_kern.h>
98	#include <sys/ev.h>
99	#include <kern/locks.h>
100	#include <sys/uio_internal.h>
101	#include <sys/codesign.h>
102	#include <sys/codedir_internal.h>
103	#include <sys/mount_internal.h>
104	#include <sys/kdebug.h>
105	#include <sys/sysproto.h>
106	#include <sys/pipe.h>
107	#include <sys/spawn.h>
108	#include <sys/cprotect.h>
109	#include <sys/ubc_internal.h>
110
111	#include <kern/kern_types.h>
112	#include <kern/kalloc.h>
113	#include <kern/waitq.h>
114	#include <kern/ipc_misc.h>
115	#include <kern/ast.h>
116
117	#include <vm/vm_protos.h>
118	#include <mach/mach_port.h>
119
120	#include <security/audit/audit.h>
121	#if CONFIG_MACF
122	#include <security/mac_framework.h>
123	#endif
124
125	#include <stdbool.h>
126	#include <os/atomic_private.h>
127	#include <os/overflow.h>
128	#include <IOKit/IOBSD.h>
129
130	#define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131	kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132	mach_msg_type_name_t, ipc_port_t , mach_port_context_t, mach_msg_guard_flags_t , uint32_t);
133	void ipc_port_release_send(ipc_port_t);
134
135	void fileport_releasefg(struct fileglob *fg);
136
137	/ flags for fp_close_and_unlock /
138	#define FD_DUP2RESV 1
139
140	/ We don't want these exported /
141
142	__private_extern__
143	int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144
145	/ Conflict wait queue for when selects collide (opaque type) /
146	extern struct waitq select_conflict_queue;
147
148	#define f_flag fp_glob->fg_flag
149	#define f_type fp_glob->fg_ops->fo_type
150	#define f_cred fp_glob->fg_cred
151	#define f_ops fp_glob->fg_ops
152	#define f_offset fp_glob->fg_offset
153
154	ZONE_DEFINE_TYPE(fg_zone, "fileglob", struct fileglob, ZC_ZFREE_CLEARMEM);
155	ZONE_DEFINE_ID(ZONE_ID_FILEPROC, "fileproc", struct fileproc, ZC_ZFREE_CLEARMEM);
156
157	/*
158	* Descriptor management.
159	*/
160	int nfiles; / actual number of open files /
161	/*
162	* "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
163	*/
164	static const struct fileops uninitops;
165
166	os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
167	static LCK_GRP_DECLARE(file_lck_grp, "file");
168
169
170	#pragma mark fileglobs
171
172	/!*
173	* @function fg_free
174	*
175	* @brief
176	* Free a file structure.
177	*/
178	static void
179	fg_free(struct fileglob *fg)
180	{
181	os_atomic_dec(&nfiles, relaxed);
182
183	if (fg->fg_vn_data) {
184	fg_vn_data_free(fgvndata: fg->fg_vn_data);
185	fg->fg_vn_data = NULL;
186	}
187
188	kauth_cred_t cred = fg->fg_cred;
189	if (IS_VALID_CRED(cred)) {
190	kauth_cred_unref(&cred);
191	fg->fg_cred = NOCRED;
192	}
193	lck_mtx_destroy(lck: &fg->fg_lock, grp: &file_lck_grp);
194
195	#if CONFIG_MACF && CONFIG_VNGUARD
196	vng_file_label_destroy(fg);
197	#endif
198	zfree(fg_zone, fg);
199	}
200
201	OS_ALWAYS_INLINE
202	void
203	fg_ref(proc_t p, struct fileglob *fg)
204	{
205	#if DEBUG \|\| DEVELOPMENT
206	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
207	#else
208	(void)p;
209	#endif
210	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
211	}
212
213	void
214	fg_drop_live(struct fileglob *fg)
215	{
216	os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
217	}
218
219	int
220	fg_drop(proc_t p, struct fileglob *fg)
221	{
222	struct vnode *vp;
223	struct vfs_context context;
224	int error = `0`;
225
226	if (fg == NULL) {
227	return `0`;
228	}
229
230	/ Set up context with cred stashed in fg /
231	if (p == current_proc()) {
232	context.vc_thread = current_thread();
233	} else {
234	context.vc_thread = NULL;
235	}
236	context.vc_ucred = fg->fg_cred;
237
238	/*
239	* POSIX record locking dictates that any close releases ALL
240	* locks owned by this process. This is handled by setting
241	* a flag in the unlock to free ONLY locks obeying POSIX
242	* semantics, and not to free BSD-style file locks.
243	* If the descriptor was in a message, POSIX-style locks
244	* aren't passed with the descriptor.
245	*/
246	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
247	(p->p_ladvflag & P_LADVLOCK)) {
248	struct flock lf = {
249	.l_whence = SEEK_SET,
250	.l_type = F_UNLCK,
251	};
252
253	vp = (struct vnode *)fg_get_data(fg);
254	if ((error = vnode_getwithref(vp)) == `0`) {
255	(void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
256	(void)vnode_put(vp);
257	}
258	}
259
260	if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == `0`) {
261	/*
262	* Since we ensure that fg->fg_ops is always initialized,
263	* it is safe to invoke fo_close on the fg
264	*/
265	error = fo_close(fg, ctx: &context);
266
267	fg_free(fg);
268	}
269
270	return error;
271	}
272
273	inline
274	void
275	fg_set_data(
276	struct fileglob *fg,
277	void *fg_data)
278	{
279	uintptr_t *store = &fg->fg_data;
280
281	#if __has_feature(ptrauth_calls)
282	int type = FILEGLOB_DTYPE(fg);
283
284	if (fg_data) {
285	type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
286	fg_data = ptrauth_sign_unauthenticated(fg_data,
287	ptrauth_key_process_independent_data,
288	ptrauth_blend_discriminator(store, type));
289	}
290	#endif // __has_feature(ptrauth_calls)
291
292	*store = (uintptr_t)fg_data;
293	}
294
295	inline
296	void *
297	fg_get_data_volatile(struct fileglob *fg)
298	{
299	uintptr_t *store = &fg->fg_data;
300	void fg_data = (void* )store;
301
302	#if __has_feature(ptrauth_calls)
303	int type = FILEGLOB_DTYPE(fg);
304
305	if (fg_data) {
306	type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
307	fg_data = ptrauth_auth_data(fg_data,
308	ptrauth_key_process_independent_data,
309	ptrauth_blend_discriminator(store, type));
310	}
311	#endif // __has_feature(ptrauth_calls)
312
313	return fg_data;
314	}
315
316	static void
317	fg_transfer_filelocks(proc_t p, struct fileglob *fg, thread_t thread)
318	{
319	struct vnode *vp;
320	struct vfs_context context;
321	struct proc *old_proc = current_proc();
322
323	assert(fg != NULL);
324
325	assert(p != old_proc);
326	context.vc_thread = thread;
327	context.vc_ucred = fg->fg_cred;
328
329	/ Transfer all POSIX Style locks to new proc /
330	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
331	(p->p_ladvflag & P_LADVLOCK)) {
332	struct flock lf = {
333	.l_whence = SEEK_SET,
334	.l_start = `0`,
335	.l_len = `0`,
336	.l_type = F_TRANSFER,
337	};
338
339	vp = (struct vnode *)fg_get_data(fg);
340	if (vnode_getwithref(vp) == `0`) {
341	(void)VNOP_ADVLOCK(vp, (caddr_t)old_proc, F_TRANSFER, &lf, F_POSIX, &context, NULL);
342	(void)vnode_put(vp);
343	}
344	}
345
346	/ Transfer all OFD Style locks to new proc /
347	if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
348	(fg->fg_lflags & FG_HAS_OFDLOCK)) {
349	struct flock lf = {
350	.l_whence = SEEK_SET,
351	.l_start = `0`,
352	.l_len = `0`,
353	.l_type = F_TRANSFER,
354	};
355
356	vp = (struct vnode *)fg_get_data(fg);
357	if (vnode_getwithref(vp) == `0`) {
358	(void)VNOP_ADVLOCK(vp, ofd_to_id(fg), F_TRANSFER, &lf, F_OFD_LOCK, &context, NULL);
359	(void)vnode_put(vp);
360	}
361	}
362	return;
363	}
364
365	bool
366	fg_sendable(struct fileglob *fg)
367	{
368	switch (FILEGLOB_DTYPE(fg)) {
369	case DTYPE_VNODE:
370	case DTYPE_SOCKET:
371	case DTYPE_PIPE:
372	case DTYPE_PSXSHM:
373	case DTYPE_NETPOLICY:
374	return (fg->fg_lflags & FG_CONFINED) == `0`;
375
376	default:
377	return false;
378	}
379	}
380
381	#pragma mark file descriptor table (static helpers)
382
383	static void
384	procfdtbl_reservefd(struct proc * p, int fd)
385	{
386	p->p_fd.fd_ofiles[fd] = NULL;
387	p->p_fd.fd_ofileflags[fd] \|= UF_RESERVED;
388	}
389
390	void
391	procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
392	{
393	if (fp != NULL) {
394	p->p_fd.fd_ofiles[fd] = fp;
395	}
396	p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
397	if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
398	p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
399	wakeup(chan: &p->p_fd);
400	}
401	}
402
403	static void
404	procfdtbl_waitfd(struct proc * p, int fd)
405	{
406	p->p_fd.fd_ofileflags[fd] \|= UF_RESVWAIT;
407	msleep(chan: &p->p_fd, mtx: &p->p_fd.fd_lock, PRIBIO, wmesg: "ftbl_waitfd", NULL);
408	}
409
410	static void
411	procfdtbl_clearfd(struct proc * p, int fd)
412	{
413	int waiting;
414
415	waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
416	p->p_fd.fd_ofiles[fd] = NULL;
417	p->p_fd.fd_ofileflags[fd] = `0`;
418	if (waiting == UF_RESVWAIT) {
419	wakeup(chan: &p->p_fd);
420	}
421	}
422
423	/*
424	* fdrelse
425	*
426	* Description: Inline utility function to free an fd in a filedesc
427	*
428	* Parameters: fdp Pointer to filedesc fd lies in
429	* fd fd to free
430	* reserv fd should be reserved
431	*
432	* Returns: void
433	*
434	* Locks: Assumes proc_fdlock for process pointing to fdp is held by
435	* the caller
436	*/
437	void
438	fdrelse(struct proc * p, int fd)
439	{
440	struct filedesc *fdp = &p->p_fd;
441	int nfd = `0`;
442
443	if (fd < fdp->fd_freefile) {
444	fdp->fd_freefile = fd;
445	}
446	#if DIAGNOSTIC
447	if (fd >= fdp->fd_afterlast) {
448	panic("fdrelse: fd_afterlast inconsistent");
449	}
450	#endif
451	procfdtbl_clearfd(p, fd);
452
453	nfd = fdp->fd_afterlast;
454	while (nfd > `0` && fdp->fd_ofiles[nfd - `1`] == NULL &&
455	!(fdp->fd_ofileflags[nfd - `1`] & UF_RESERVED)) {
456	nfd--;
457	}
458	fdp->fd_afterlast = nfd;
459
460	#if CONFIG_PROC_RESOURCE_LIMITS
461	fdp->fd_nfiles_open--;
462	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
463	}
464
465
466	/*
467	* finishdup
468	*
469	* Description: Common code for dup, dup2, and fcntl(F_DUPFD).
470	*
471	* Parameters: p Process performing the dup
472	* old The fd to dup
473	* new The fd to dup it to
474	* fp_flags Flags to augment the new fp
475	* retval Pointer to the call return area
476	*
477	* Returns: 0 Success
478	* EBADF
479	* ENOMEM
480	*
481	* Implicit returns:
482	* *retval (modified) The new descriptor
483	*
484	* Locks: Assumes proc_fdlock for process pointing to fdp is held by
485	* the caller
486	*
487	* Notes: This function may drop and reacquire this lock; it is unsafe
488	* for a caller to assume that other state protected by the lock
489	* has not been subsequently changed out from under it.
490	*/
491	static int
492	finishdup(
493	proc_t p,
494	kauth_cred_t p_cred,
495	int old,
496	int new,
497	fileproc_flags_t fp_flags,
498	int32_t *retval)
499	{
500	struct filedesc *fdp = &p->p_fd;
501	struct fileproc *nfp;
502	struct fileproc *ofp;
503	#if CONFIG_MACF
504	int error;
505	#endif
506
507	#if DIAGNOSTIC
508	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
509	#endif
510	if ((ofp = fdp->fd_ofiles[old]) == NULL \|\|
511	(fdp->fd_ofileflags[old] & UF_RESERVED)) {
512	fdrelse(p, fd: new);
513	return EBADF;
514	}
515
516	#if CONFIG_MACF
517	error = mac_file_check_dup(cred: p_cred, fg: ofp->fp_glob, newfd: new);
518
519	if (error) {
520	fdrelse(p, fd: new);
521	return error;
522	}
523	#else
524	(void)p_cred;
525	#endif
526
527	fg_ref(p, fg: ofp->fp_glob);
528
529	proc_fdunlock(p);
530
531	nfp = fileproc_alloc_init();
532
533	if (fp_flags) {
534	nfp->fp_flags \|= fp_flags;
535	}
536	nfp->fp_glob = ofp->fp_glob;
537
538	proc_fdlock(p);
539
540	#if DIAGNOSTIC
541	if (fdp->fd_ofiles[new] != `0`) {
542	panic("finishdup: overwriting fd_ofiles with new %d", new);
543	}
544	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == `0`) {
545	panic("finishdup: unreserved fileflags with new %d", new);
546	}
547	#endif
548
549	if (new >= fdp->fd_afterlast) {
550	fdp->fd_afterlast = new + `1`;
551	}
552	procfdtbl_releasefd(p, fd: new, fp: nfp);
553	*retval = new;
554	return `0`;
555	}
556
557
558	#pragma mark file descriptor table (exported functions)
559
560	void
561	proc_dirs_lock_shared(proc_t p)
562	{
563	lck_rw_lock_shared(lck: &p->p_fd.fd_dirs_lock);
564	}
565
566	void
567	proc_dirs_unlock_shared(proc_t p)
568	{
569	lck_rw_unlock_shared(lck: &p->p_fd.fd_dirs_lock);
570	}
571
572	void
573	proc_dirs_lock_exclusive(proc_t p)
574	{
575	lck_rw_lock_exclusive(lck: &p->p_fd.fd_dirs_lock);
576	}
577
578	void
579	proc_dirs_unlock_exclusive(proc_t p)
580	{
581	lck_rw_unlock_exclusive(lck: &p->p_fd.fd_dirs_lock);
582	}
583
584	/*
585	* proc_fdlock, proc_fdlock_spin
586	*
587	* Description: Lock to control access to the per process struct fileproc
588	* and struct filedesc
589	*
590	* Parameters: p Process to take the lock on
591	*
592	* Returns: void
593	*
594	* Notes: The lock is initialized in forkproc() and destroyed in
595	* reap_child_process().
596	*/
597	void
598	proc_fdlock(proc_t p)
599	{
600	lck_mtx_lock(lck: &p->p_fd.fd_lock);
601	}
602
603	void
604	proc_fdlock_spin(proc_t p)
605	{
606	lck_mtx_lock_spin(lck: &p->p_fd.fd_lock);
607	}
608
609	void
610	proc_fdlock_assert(proc_t p, int assertflags)
611	{
612	lck_mtx_assert(lck: &p->p_fd.fd_lock, type: assertflags);
613	}
614
615
616	/*
617	* proc_fdunlock
618	*
619	* Description: Unlock the lock previously locked by a call to proc_fdlock()
620	*
621	* Parameters: p Process to drop the lock on
622	*
623	* Returns: void
624	*/
625	void
626	proc_fdunlock(proc_t p)
627	{
628	lck_mtx_unlock(lck: &p->p_fd.fd_lock);
629	}
630
631	bool
632	fdt_available_locked(proc_t p, int n)
633	{
634	struct filedesc *fdp = &p->p_fd;
635	struct fileproc **fpp;
636	char *flags;
637	int i;
638	int lim = proc_limitgetcur_nofile(p);
639
640	if ((i = lim - fdp->fd_nfiles) > `0` && (n -= i) <= `0`) {
641	return true;
642	}
643	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
644	flags = &fdp->fd_ofileflags[fdp->fd_freefile];
645	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= `0`; fpp++, flags++) {
646	if (fpp == NULL && !(flags & UF_RESERVED) && --n <= `0`) {
647	return true;
648	}
649	}
650	return false;
651	}
652
653
654	struct fdt_iterator
655	fdt_next(proc_t p, int fd, bool only_settled)
656	{
657	struct fdt_iterator it;
658	struct filedesc *fdp = &p->p_fd;
659	struct fileproc *fp;
660	int nfds = fdp->fd_afterlast;
661
662	while (++fd < nfds) {
663	fp = fdp->fd_ofiles[fd];
664	if (fp == NULL \|\| fp->fp_glob == NULL) {
665	continue;
666	}
667	if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
668	continue;
669	}
670	it.fdti_fd = fd;
671	it.fdti_fp = fp;
672	return it;
673	}
674
675	it.fdti_fd = nfds;
676	it.fdti_fp = NULL;
677	return it;
678	}
679
680	struct fdt_iterator
681	fdt_prev(proc_t p, int fd, bool only_settled)
682	{
683	struct fdt_iterator it;
684	struct filedesc *fdp = &p->p_fd;
685	struct fileproc *fp;
686
687	while (--fd >= `0`) {
688	fp = fdp->fd_ofiles[fd];
689	if (fp == NULL \|\| fp->fp_glob == NULL) {
690	continue;
691	}
692	if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
693	continue;
694	}
695	it.fdti_fd = fd;
696	it.fdti_fp = fp;
697	return it;
698	}
699
700	it.fdti_fd = -`1`;
701	it.fdti_fp = NULL;
702	return it;
703	}
704
705	void
706	fdt_init(proc_t p)
707	{
708	struct filedesc *fdp = &p->p_fd;
709
710	lck_mtx_init(lck: &fdp->fd_kqhashlock, grp: &proc_kqhashlock_grp, attr: &proc_lck_attr);
711	lck_mtx_init(lck: &fdp->fd_knhashlock, grp: &proc_knhashlock_grp, attr: &proc_lck_attr);
712	lck_mtx_init(lck: &fdp->fd_lock, grp: &proc_fdmlock_grp, attr: &proc_lck_attr);
713	lck_rw_init(lck: &fdp->fd_dirs_lock, grp: &proc_dirslock_grp, attr: &proc_lck_attr);
714	}
715
716	void
717	fdt_destroy(proc_t p)
718	{
719	struct filedesc *fdp = &p->p_fd;
720
721	lck_mtx_destroy(lck: &fdp->fd_kqhashlock, grp: &proc_kqhashlock_grp);
722	lck_mtx_destroy(lck: &fdp->fd_knhashlock, grp: &proc_knhashlock_grp);
723	lck_mtx_destroy(lck: &fdp->fd_lock, grp: &proc_fdmlock_grp);
724	lck_rw_destroy(lck: &fdp->fd_dirs_lock, grp: &proc_dirslock_grp);
725	}
726
727	void
728	fdt_exec(proc_t p, kauth_cred_t p_cred, short posix_spawn_flags, thread_t thread, bool in_exec)
729	{
730	struct filedesc *fdp = &p->p_fd;
731	thread_t self = current_thread();
732	struct uthread *ut = get_bsdthread_info(self);
733	struct kqworkq *dealloc_kqwq = NULL;
734
735	/*
736	* If the current thread is bound as a workq/workloop
737	* servicing thread, we need to unbind it first.
738	*/
739	if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
740	kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
741	}
742
743	/*
744	* Deallocate the knotes for this process
745	* and mark the tables non-existent so
746	* subsequent kqueue closes go faster.
747	*/
748	knotes_dealloc(p);
749	assert(fdp->fd_knlistsize == `0`);
750	assert(fdp->fd_knhashmask == `0`);
751
752	proc_fdlock(p);
753
754	/ Set the P_LADVLOCK flag if the flag set on old proc /
755	if (in_exec && (current_proc()->p_ladvflag & P_LADVLOCK)) {
756	os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
757	}
758
759	for (int i = fdp->fd_afterlast; i-- > `0`;) {
760	struct fileproc *fp = fdp->fd_ofiles[i];
761	char *flagp = &fdp->fd_ofileflags[i];
762	bool inherit_file = true;
763
764	if (fp == FILEPROC_NULL) {
765	continue;
766	}
767
768	/*
769	* no file descriptor should be in flux when in exec,
770	* because we stopped all other threads
771	*/
772	if (*flagp & ~UF_INHERIT) {
773	panic("file %d/%p in flux during exec of %p", i, fp, p);
774	}
775
776	if (fp->fp_flags & FP_CLOEXEC) {
777	inherit_file = false;
778	} else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
779	!(*flagp & UF_INHERIT)) {
780	/*
781	* Reverse the usual semantics of file descriptor
782	* inheritance - all of them should be closed
783	* except files marked explicitly as "inherit" and
784	* not marked close-on-exec.
785	*/
786	inherit_file = false;
787	#if CONFIG_MACF
788	} else if (mac_file_check_inherit(cred: p_cred, fg: fp->fp_glob)) {
789	inherit_file = false;
790	#endif
791	}
792
793	flagp = `0`; /* clear UF_INHERIT /
794
795	if (!inherit_file) {
796	fp_close_and_unlock(p, p_cred, fd: i, fp, flags: `0`);
797	proc_fdlock(p);
798	} else if (in_exec) {
799	/ Transfer F_POSIX style lock to new proc /
800	proc_fdunlock(p);
801	fg_transfer_filelocks(p, fg: fp->fp_glob, thread);
802	proc_fdlock(p);
803	}
804	}
805
806	/ release the per-process workq kq /
807	if (fdp->fd_wqkqueue) {
808	dealloc_kqwq = fdp->fd_wqkqueue;
809	fdp->fd_wqkqueue = NULL;
810	}
811
812	proc_fdunlock(p);
813
814	/ Anything to free? /
815	if (dealloc_kqwq) {
816	kqworkq_dealloc(kqwq: dealloc_kqwq);
817	}
818	}
819
820
821	int
822	fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir, bool in_exec)
823	{
824	struct filedesc *fdp = &p->p_fd;
825	struct fileproc **ofiles;
826	char *ofileflags;
827	int n_files, afterlast, freefile;
828	vnode_t v_dir;
829	#if CONFIG_PROC_RESOURCE_LIMITS
830	int fd_nfiles_open = `0`;
831	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
832	proc_fdlock(p);
833
834	newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
835	newfdp->fd_cmask = fdp->fd_cmask;
836	#if CONFIG_PROC_RESOURCE_LIMITS
837	newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
838	newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
839
840	newfdp->kqwl_dyn_soft_limit = fdp->kqwl_dyn_soft_limit;
841	newfdp->kqwl_dyn_hard_limit = fdp->kqwl_dyn_hard_limit;
842	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
843
844	/*
845	* For both fd_cdir and fd_rdir make sure we get
846	* a valid reference... if we can't, than set
847	* set the pointer(s) to NULL in the child... this
848	* will keep us from using a non-referenced vp
849	* and allows us to do the vnode_rele only on
850	* a properly referenced vp
851	*/
852	if ((v_dir = fdp->fd_rdir)) {
853	if (vnode_getwithref(vp: v_dir) == `0`) {
854	if (vnode_ref(vp: v_dir) == `0`) {
855	newfdp->fd_rdir = v_dir;
856	}
857	vnode_put(vp: v_dir);
858	}
859	if (newfdp->fd_rdir == NULL) {
860	/*
861	* We couldn't get a new reference on
862	* the chroot directory being
863	* inherited... this is fatal, since
864	* otherwise it would constitute an
865	* escape from a chroot environment by
866	* the new process.
867	*/
868	proc_fdunlock(p);
869	return EPERM;
870	}
871	}
872
873	/*
874	* If we are running with per-thread current working directories,
875	* inherit the new current working directory from the current thread.
876	*/
877	if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
878	if (vnode_getwithref(vp: v_dir) == `0`) {
879	if (vnode_ref(vp: v_dir) == `0`) {
880	newfdp->fd_cdir = v_dir;
881	}
882	vnode_put(vp: v_dir);
883	}
884	if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
885	/*
886	* we couldn't get a new reference on
887	* the current working directory being
888	* inherited... we might as well drop
889	* our reference from the parent also
890	* since the vnode has gone DEAD making
891	* it useless... by dropping it we'll
892	* be that much closer to recycling it
893	*/
894	vnode_rele(vp: fdp->fd_cdir);
895	fdp->fd_cdir = NULL;
896	}
897	}
898
899	/*
900	* If the number of open files fits in the internal arrays
901	* of the open file structure, use them, otherwise allocate
902	* additional memory for the number of descriptors currently
903	* in use.
904	*/
905	afterlast = fdp->fd_afterlast;
906	freefile = fdp->fd_freefile;
907	if (afterlast <= NDFILE) {
908	n_files = NDFILE;
909	} else {
910	n_files = roundup(afterlast, NDEXTENT);
911	}
912
913	proc_fdunlock(p);
914
915	ofiles = kalloc_type(struct fileproc *, n_files, Z_WAITOK \| Z_ZERO);
916	ofileflags = kalloc_data(n_files, Z_WAITOK \| Z_ZERO);
917	if (ofiles == NULL \|\| ofileflags == NULL) {
918	kfree_type(struct fileproc *, n_files, ofiles);
919	kfree_data(ofileflags, n_files);
920	if (newfdp->fd_cdir) {
921	vnode_rele(vp: newfdp->fd_cdir);
922	newfdp->fd_cdir = NULL;
923	}
924	if (newfdp->fd_rdir) {
925	vnode_rele(vp: newfdp->fd_rdir);
926	newfdp->fd_rdir = NULL;
927	}
928	return ENOMEM;
929	}
930
931	proc_fdlock(p);
932
933	for (int i = afterlast; i-- > `0`;) {
934	struct fileproc ofp, nfp;
935	char flags;
936
937	ofp = fdp->fd_ofiles[i];
938	flags = fdp->fd_ofileflags[i];
939
940	if (ofp == NULL \|\|
941	(ofp->fp_glob->fg_lflags & FG_CONFINED) \|\|
942	((ofp->fp_flags & FP_CLOFORK) && !in_exec) \|\|
943	((ofp->fp_flags & FP_CLOEXEC) && in_exec) \|\|
944	(flags & UF_RESERVED)) {
945	if (i + `1` == afterlast) {
946	afterlast = i;
947	}
948	if (i < freefile) {
949	freefile = i;
950	}
951
952	continue;
953	}
954
955	nfp = fileproc_alloc_init();
956	nfp->fp_glob = ofp->fp_glob;
957	if (in_exec) {
958	nfp->fp_flags = (ofp->fp_flags & (FP_CLOEXEC \| FP_CLOFORK));
959	if (ofp->fp_guard_attrs) {
960	guarded_fileproc_copy_guard(ofp, nfp);
961	}
962	} else {
963	assert(ofp->fp_guard_attrs == `0`);
964	nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
965	}
966	fg_ref(p, fg: nfp->fp_glob);
967
968	ofiles[i] = nfp;
969	#if CONFIG_PROC_RESOURCE_LIMITS
970	fd_nfiles_open++;
971	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
972	}
973
974	proc_fdunlock(p);
975
976	newfdp->fd_ofiles = ofiles;
977	newfdp->fd_ofileflags = ofileflags;
978	newfdp->fd_nfiles = n_files;
979	newfdp->fd_afterlast = afterlast;
980	newfdp->fd_freefile = freefile;
981
982	#if CONFIG_PROC_RESOURCE_LIMITS
983	newfdp->fd_nfiles_open = fd_nfiles_open;
984	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
985
986	return `0`;
987	}
988
989	void
990	fdt_invalidate(proc_t p)
991	{
992	struct filedesc *fdp = &p->p_fd;
993	struct fileproc fp, *ofiles;
994	kauth_cred_t p_cred;
995	char *ofileflags;
996	struct kqworkq *kqwq = NULL;
997	vnode_t vn1 = NULL, vn2 = NULL;
998	struct kqwllist *kqhash = NULL;
999	u_long kqhashmask = `0`;
1000	int n_files = `0`;
1001
1002	/*
1003	* deallocate all the knotes up front and claim empty
1004	* tables to make any subsequent kqueue closes faster.
1005	*/
1006	knotes_dealloc(p);
1007	assert(fdp->fd_knlistsize == `0`);
1008	assert(fdp->fd_knhashmask == `0`);
1009
1010	/*
1011	* dealloc all workloops that have outstanding retains
1012	* when created with scheduling parameters.
1013	*/
1014	kqworkloops_dealloc(p);
1015
1016	proc_fdlock(p);
1017
1018	/ proc_ucred_unsafe() is ok: process is terminating /
1019	p_cred = proc_ucred_unsafe(p);
1020
1021	/ close file descriptors /
1022	if (fdp->fd_nfiles > `0` && fdp->fd_ofiles) {
1023	for (int i = fdp->fd_afterlast; i-- > `0`;) {
1024	if ((fp = fdp->fd_ofiles[i]) != NULL) {
1025	if (fdp->fd_ofileflags[i] & UF_RESERVED) {
1026	panic("fdfree: found fp with UF_RESERVED");
1027	}
1028	/ proc_ucred_unsafe() is ok: process is terminating /
1029	fp_close_and_unlock(p, p_cred, fd: i, fp, flags: `0`);
1030	proc_fdlock(p);
1031	}
1032	}
1033	}
1034
1035	n_files = fdp->fd_nfiles;
1036	ofileflags = fdp->fd_ofileflags;
1037	ofiles = fdp->fd_ofiles;
1038	kqwq = fdp->fd_wqkqueue;
1039	vn1 = fdp->fd_cdir;
1040	vn2 = fdp->fd_rdir;
1041
1042	fdp->fd_ofileflags = NULL;
1043	fdp->fd_ofiles = NULL;
1044	fdp->fd_nfiles = `0`;
1045	fdp->fd_wqkqueue = NULL;
1046	fdp->fd_cdir = NULL;
1047	fdp->fd_rdir = NULL;
1048
1049	proc_fdunlock(p);
1050
1051	lck_mtx_lock(lck: &fdp->fd_kqhashlock);
1052
1053	kqhash = fdp->fd_kqhash;
1054	kqhashmask = fdp->fd_kqhashmask;
1055
1056	fdp->fd_kqhash = `0`;
1057	fdp->fd_kqhashmask = `0`;
1058
1059	lck_mtx_unlock(lck: &fdp->fd_kqhashlock);
1060
1061	kfree_type(struct fileproc *, n_files, ofiles);
1062	kfree_data(ofileflags, n_files);
1063
1064	if (kqwq) {
1065	kqworkq_dealloc(kqwq);
1066	}
1067	if (vn1) {
1068	vnode_rele(vp: vn1);
1069	}
1070	if (vn2) {
1071	vnode_rele(vp: vn2);
1072	}
1073	if (kqhash) {
1074	for (uint32_t i = `0`; i <= kqhashmask; i++) {
1075	assert(LIST_EMPTY(&kqhash[i]));
1076	}
1077	hashdestroy(kqhash, M_KQUEUE, hashmask: kqhashmask);
1078	}
1079	}
1080
1081
1082	struct fileproc *
1083	fileproc_alloc_init(void)
1084	{
1085	struct fileproc *fp;
1086
1087	fp = zalloc_id(ZONE_ID_FILEPROC, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
1088	os_ref_init(&fp->fp_iocount, &f_refgrp);
1089	return fp;
1090	}
1091
1092
1093	void
1094	fileproc_free(struct fileproc *fp)
1095	{
1096	os_ref_count_t __unused refc = os_ref_release(rc: &fp->fp_iocount);
1097	#if DEVELOPMENT \|\| DEBUG
1098	if (`0` != refc) {
1099	panic("%s: pid %d refc: %u != 0",
1100	__func__, proc_pid(current_proc()), refc);
1101	}
1102	#endif
1103	if (fp->fp_guard_attrs) {
1104	guarded_fileproc_unguard(fp);
1105	}
1106	assert(fp->fp_wset == NULL);
1107	zfree_id(ZONE_ID_FILEPROC, fp);
1108	}
1109
1110
1111	/*
1112	* Statistics counter for the number of times a process calling fdalloc()
1113	* has resulted in an expansion of the per process open file table.
1114	*
1115	* XXX This would likely be of more use if it were per process
1116	*/
1117	int fdexpand;
1118
1119	#if CONFIG_PROC_RESOURCE_LIMITS
1120	/*
1121	* Should be called only with the proc_fdlock held.
1122	*/
1123	void
1124	fd_check_limit_exceeded(struct filedesc *fdp)
1125	{
1126	#if DIAGNOSTIC
1127	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1128	#endif
1129
1130	if (!fd_above_soft_limit_notified(fdp) && fdp->fd_nfiles_soft_limit &&
1131	(fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1132	fd_above_soft_limit_send_notification(fdp);
1133	act_set_astproc_resource(current_thread());
1134	} else if (!fd_above_hard_limit_notified(fdp) && fdp->fd_nfiles_hard_limit &&
1135	(fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1136	fd_above_hard_limit_send_notification(fdp);
1137	act_set_astproc_resource(current_thread());
1138	}
1139	}
1140	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
1141
1142	/*
1143	* fdalloc
1144	*
1145	* Description: Allocate a file descriptor for the process.
1146	*
1147	* Parameters: p Process to allocate the fd in
1148	* want The fd we would prefer to get
1149	* result Pointer to fd we got
1150	*
1151	* Returns: 0 Success
1152	* EMFILE
1153	* ENOMEM
1154	*
1155	* Implicit returns:
1156	* *result (modified) The fd which was allocated
1157	*/
1158	int
1159	fdalloc(proc_t p, int want, int *result)
1160	{
1161	struct filedesc *fdp = &p->p_fd;
1162	int i;
1163	int last, numfiles, oldnfiles;
1164	struct fileproc **newofiles;
1165	char *newofileflags;
1166	int lim = proc_limitgetcur_nofile(p);
1167
1168	/*
1169	* Search for a free descriptor starting at the higher
1170	* of want or fd_freefile. If that fails, consider
1171	* expanding the ofile array.
1172	*/
1173	#if DIAGNOSTIC
1174	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1175	#endif
1176
1177	for (;;) {
1178	last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1179	if ((i = want) < fdp->fd_freefile) {
1180	i = fdp->fd_freefile;
1181	}
1182	for (; i < last; i++) {
1183	if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1184	procfdtbl_reservefd(p, fd: i);
1185	if (i >= fdp->fd_afterlast) {
1186	fdp->fd_afterlast = i + `1`;
1187	}
1188	if (want <= fdp->fd_freefile) {
1189	fdp->fd_freefile = i;
1190	}
1191	*result = i;
1192	#if CONFIG_PROC_RESOURCE_LIMITS
1193	fdp->fd_nfiles_open++;
1194	fd_check_limit_exceeded(fdp);
1195	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
1196	return `0`;
1197	}
1198	}
1199
1200	/*
1201	* No space in current array. Expand?
1202	*/
1203	if ((rlim_t)fdp->fd_nfiles >= lim) {
1204	return EMFILE;
1205	}
1206	if (fdp->fd_nfiles < NDEXTENT) {
1207	numfiles = NDEXTENT;
1208	} else {
1209	numfiles = `2` * fdp->fd_nfiles;
1210	}
1211	/ Enforce lim /
1212	if ((rlim_t)numfiles > lim) {
1213	numfiles = (int)lim;
1214	}
1215	proc_fdunlock(p);
1216	newofiles = kalloc_type(struct fileproc *, numfiles, Z_WAITOK \| Z_ZERO);
1217	newofileflags = kalloc_data(numfiles, Z_WAITOK \| Z_ZERO);
1218	proc_fdlock(p);
1219	if (newofileflags == NULL \|\| newofiles == NULL) {
1220	kfree_type(struct fileproc *, numfiles, newofiles);
1221	kfree_data(newofileflags, numfiles);
1222	return ENOMEM;
1223	}
1224	if (fdp->fd_nfiles >= numfiles) {
1225	kfree_type(struct fileproc *, numfiles, newofiles);
1226	kfree_data(newofileflags, numfiles);
1227	continue;
1228	}
1229
1230	/*
1231	* Copy the existing ofile and ofileflags arrays
1232	* and zero the new portion of each array.
1233	*/
1234	oldnfiles = fdp->fd_nfiles;
1235	memcpy(dst: newofiles, src: fdp->fd_ofiles,
1236	n: oldnfiles * sizeof(*fdp->fd_ofiles));
1237	memcpy(dst: newofileflags, src: fdp->fd_ofileflags, n: oldnfiles);
1238
1239	kfree_type(struct fileproc *, oldnfiles, fdp->fd_ofiles);
1240	kfree_data(fdp->fd_ofileflags, oldnfiles);
1241	fdp->fd_ofiles = newofiles;
1242	fdp->fd_ofileflags = newofileflags;
1243	fdp->fd_nfiles = numfiles;
1244	fdexpand++;
1245	}
1246	}
1247
1248
1249	#pragma mark fileprocs
1250
1251	void
1252	fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1253	{
1254	if (clearflags) {
1255	os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1256	} else {
1257	os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1258	}
1259	}
1260
1261	fileproc_vflags_t
1262	fileproc_get_vflags(struct fileproc *fp)
1263	{
1264	return os_atomic_load(&fp->fp_vflags, relaxed);
1265	}
1266
1267	/*
1268	* falloc_withinit
1269	*
1270	* Create a new open file structure and allocate
1271	* a file descriptor for the process that refers to it.
1272	*
1273	* Returns: 0 Success
1274	*
1275	* Description: Allocate an entry in the per process open file table and
1276	* return the corresponding fileproc and fd.
1277	*
1278	* Parameters: p The process in whose open file
1279	* table the fd is to be allocated
1280	* resultfp Pointer to fileproc pointer
1281	* return area
1282	* resultfd Pointer to fd return area
1283	* ctx VFS context
1284	* fp_zalloc fileproc allocator to use
1285	* crarg allocator args
1286	*
1287	* Returns: 0 Success
1288	* ENFILE Too many open files in system
1289	* fdalloc:EMFILE Too many open files in process
1290	* fdalloc:ENOMEM M_OFILETABL zone exhausted
1291	* ENOMEM fp_zone or fg_zone zone
1292	* exhausted
1293	*
1294	* Implicit returns:
1295	* *resultfd (modified) Returned fileproc pointer
1296	* *resultfd (modified) Returned fd
1297	*
1298	* Notes: This function takes separate process and context arguments
1299	* solely to support kern_exec.c; otherwise, it would take
1300	* neither, and use the vfs_context_current() routine internally.
1301	*/
1302	int
1303	falloc_withinit(
1304	proc_t p,
1305	struct ucred *p_cred,
1306	struct vfs_context *ctx,
1307	struct fileproc **resultfp,
1308	int *resultfd,
1309	fp_initfn_t fp_init,
1310	void *initarg)
1311	{
1312	struct fileproc *fp;
1313	struct fileglob *fg;
1314	int error, nfd;
1315
1316	/ Make sure we don't go beyond the system-wide limit /
1317	if (nfiles >= maxfiles) {
1318	tablefull("file");
1319	return ENFILE;
1320	}
1321
1322	proc_fdlock(p);
1323
1324	/ fdalloc will make sure the process stays below per-process limit /
1325	if ((error = fdalloc(p, want: `0`, result: &nfd))) {
1326	proc_fdunlock(p);
1327	return error;
1328	}
1329
1330	#if CONFIG_MACF
1331	error = mac_file_check_create(cred: p_cred);
1332	if (error) {
1333	proc_fdunlock(p);
1334	return error;
1335	}
1336	#else
1337	(void)p_cred;
1338	#endif
1339
1340	/*
1341	* Allocate a new file descriptor.
1342	* If the process has file descriptor zero open, add to the list
1343	* of open files at that point, otherwise put it at the front of
1344	* the list of open files.
1345	*/
1346	proc_fdunlock(p);
1347
1348	fp = fileproc_alloc_init();
1349	if (fp_init) {
1350	fp_init(fp, initarg);
1351	}
1352
1353	fg = zalloc_flags(fg_zone, Z_WAITOK \| Z_ZERO);
1354	lck_mtx_init(lck: &fg->fg_lock, grp: &file_lck_grp, LCK_ATTR_NULL);
1355
1356	os_ref_retain_locked(rc: &fp->fp_iocount);
1357	os_ref_init_raw(&fg->fg_count, &f_refgrp);
1358	fg->fg_ops = &uninitops;
1359	fp->fp_glob = fg;
1360
1361	kauth_cred_ref(cred: ctx->vc_ucred);
1362
1363	fp->f_cred = ctx->vc_ucred;
1364
1365	os_atomic_inc(&nfiles, relaxed);
1366
1367	proc_fdlock(p);
1368
1369	p->p_fd.fd_ofiles[nfd] = fp;
1370
1371	proc_fdunlock(p);
1372
1373	if (resultfp) {
1374	*resultfp = fp;
1375	}
1376	if (resultfd) {
1377	*resultfd = nfd;
1378	}
1379
1380	return `0`;
1381	}
1382
1383	/*
1384	* fp_free
1385	*
1386	* Description: Release the fd and free the fileproc associated with the fd
1387	* in the per process open file table of the specified process;
1388	* these values must correspond.
1389	*
1390	* Parameters: p Process containing fd
1391	* fd fd to be released
1392	* fp fileproc to be freed
1393	*/
1394	void
1395	fp_free(proc_t p, int fd, struct fileproc * fp)
1396	{
1397	proc_fdlock_spin(p);
1398	fdrelse(p, fd);
1399	proc_fdunlock(p);
1400
1401	fg_free(fg: fp->fp_glob);
1402	os_ref_release_live(rc: &fp->fp_iocount);
1403	fileproc_free(fp);
1404	}
1405
1406
1407	struct fileproc *
1408	fp_get_noref_locked(proc_t p, int fd)
1409	{
1410	struct filedesc *fdp = &p->p_fd;
1411	struct fileproc *fp;
1412
1413	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
1414	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
1415	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1416	return NULL;
1417	}
1418
1419	zone_id_require(zone_id: ZONE_ID_FILEPROC, elem_size: sizeof(*fp), addr: fp);
1420	return fp;
1421	}
1422
1423	struct fileproc *
1424	fp_get_noref_locked_with_iocount(proc_t p, int fd)
1425	{
1426	struct filedesc *fdp = &p->p_fd;
1427	struct fileproc *fp = NULL;
1428
1429	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
1430	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
1431	os_ref_get_count(rc: &fp->fp_iocount) <= `1` \|\|
1432	((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1433	!(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1434	panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1435	__func__, fd, fp);
1436	}
1437
1438	zone_id_require(zone_id: ZONE_ID_FILEPROC, elem_size: sizeof(*fp), addr: fp);
1439	return fp;
1440	}
1441
1442
1443	/*
1444	* fp_lookup
1445	*
1446	* Description: Get fileproc pointer for a given fd from the per process
1447	* open file table of the specified process and if successful,
1448	* increment the fp_iocount
1449	*
1450	* Parameters: p Process in which fd lives
1451	* fd fd to get information for
1452	* resultfp Pointer to result fileproc
1453	* pointer area, or 0 if none
1454	* locked !0 if the caller holds the
1455	* proc_fdlock, 0 otherwise
1456	*
1457	* Returns: 0 Success
1458	* EBADF Bad file descriptor
1459	*
1460	* Implicit returns:
1461	* *resultfp (modified) Fileproc pointer
1462	*
1463	* Locks: If the argument 'locked' is non-zero, then the caller is
1464	* expected to have taken and held the proc_fdlock; if it is
1465	* zero, than this routine internally takes and drops this lock.
1466	*/
1467	int
1468	fp_lookup(proc_t p, int fd, struct fileproc *resultfp, int* locked)
1469	{
1470	struct filedesc *fdp = &p->p_fd;
1471	struct fileproc *fp;
1472
1473	if (!locked) {
1474	proc_fdlock_spin(p);
1475	}
1476	if (fd < `0` \|\| fdp == NULL \|\| fd >= fdp->fd_nfiles \|\|
1477	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
1478	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1479	if (!locked) {
1480	proc_fdunlock(p);
1481	}
1482	return EBADF;
1483	}
1484
1485	zone_id_require(zone_id: ZONE_ID_FILEPROC, elem_size: sizeof(*fp), addr: fp);
1486	os_ref_retain_locked(rc: &fp->fp_iocount);
1487
1488	if (resultfp) {
1489	*resultfp = fp;
1490	}
1491	if (!locked) {
1492	proc_fdunlock(p);
1493	}
1494
1495	return `0`;
1496	}
1497
1498
1499	int
1500	fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1501	{
1502	struct filedesc *fdp = &p->p_fd;
1503	struct fileproc *fp;
1504
1505	proc_fdlock_spin(p);
1506	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
1507	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
1508	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1509	proc_fdunlock(p);
1510	return EBADF;
1511	}
1512
1513	if (fp->f_type != ftype) {
1514	proc_fdunlock(p);
1515	return err;
1516	}
1517
1518	zone_id_require(zone_id: ZONE_ID_FILEPROC, elem_size: sizeof(*fp), addr: fp);
1519	os_ref_retain_locked(rc: &fp->fp_iocount);
1520	proc_fdunlock(p);
1521
1522	*fpp = fp;
1523	return `0`;
1524	}
1525
1526
1527	/*
1528	* fp_drop
1529	*
1530	* Description: Drop the I/O reference previously taken by calling fp_lookup
1531	* et. al.
1532	*
1533	* Parameters: p Process in which the fd lives
1534	* fd fd associated with the fileproc
1535	* fp fileproc on which to set the
1536	* flag and drop the reference
1537	* locked flag to internally take and
1538	* drop proc_fdlock if it is not
1539	* already held by the caller
1540	*
1541	* Returns: 0 Success
1542	* EBADF Bad file descriptor
1543	*
1544	* Locks: This function internally takes and drops the proc_fdlock for
1545	* the supplied process if 'locked' is non-zero, and assumes that
1546	* the caller already holds this lock if 'locked' is non-zero.
1547	*
1548	* Notes: The fileproc must correspond to the fd in the supplied proc
1549	*/
1550	int
1551	fp_drop(proc_t p, int fd, struct fileproc fp, int* locked)
1552	{
1553	struct filedesc *fdp = &p->p_fd;
1554	int needwakeup = `0`;
1555
1556	if (!locked) {
1557	proc_fdlock_spin(p);
1558	}
1559	if ((fp == FILEPROC_NULL) && (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
1560	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
1561	((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1562	!(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1563	if (!locked) {
1564	proc_fdunlock(p);
1565	}
1566	return EBADF;
1567	}
1568
1569	if (`1` == os_ref_release_locked(rc: &fp->fp_iocount)) {
1570	if (fp->fp_flags & FP_SELCONFLICT) {
1571	fp->fp_flags &= ~FP_SELCONFLICT;
1572	}
1573
1574	if (fdp->fd_fpdrainwait) {
1575	fdp->fd_fpdrainwait = `0`;
1576	needwakeup = `1`;
1577	}
1578	}
1579	if (!locked) {
1580	proc_fdunlock(p);
1581	}
1582	if (needwakeup) {
1583	wakeup(chan: &fdp->fd_fpdrainwait);
1584	}
1585
1586	return `0`;
1587	}
1588
1589
1590	/*
1591	* fileproc_drain
1592	*
1593	* Description: Drain out pending I/O operations
1594	*
1595	* Parameters: p Process closing this file
1596	* fp fileproc struct for the open
1597	* instance on the file
1598	*
1599	* Returns: void
1600	*
1601	* Locks: Assumes the caller holds the proc_fdlock
1602	*
1603	* Notes: For character devices, this occurs on the last close of the
1604	* device; for all other file descriptors, this occurs on each
1605	* close to prevent fd's from being closed out from under
1606	* operations currently in progress and blocked
1607	*
1608	* See Also: file_vnode(), file_socket(), file_drop(), and the cautions
1609	* regarding their use and interaction with this function.
1610	*/
1611	static void
1612	fileproc_drain(proc_t p, struct fileproc * fp)
1613	{
1614	struct filedesc *fdp = &p->p_fd;
1615	struct vfs_context context;
1616	thread_t thread;
1617	bool is_current_proc;
1618
1619	is_current_proc = (p == current_proc());
1620
1621	if (!is_current_proc) {
1622	proc_lock(p);
1623	thread = proc_thread(p); / XXX /
1624	thread_reference(thread);
1625	proc_unlock(p);
1626	} else {
1627	thread = current_thread();
1628	}
1629
1630	context.vc_thread = thread;
1631	context.vc_ucred = fp->fp_glob->fg_cred;
1632
1633	/ Set the vflag for drain /
1634	fileproc_modify_vflags(fp, vflags: FPV_DRAIN, FALSE);
1635
1636	while (os_ref_get_count(rc: &fp->fp_iocount) > `1`) {
1637	lck_mtx_convert_spin(lck: &fdp->fd_lock);
1638
1639	fo_drain(fp, ctx: &context);
1640	if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1641	struct select_set *selset;
1642
1643	if (fp->fp_guard_attrs) {
1644	selset = fp->fp_guard->fpg_wset;
1645	} else {
1646	selset = fp->fp_wset;
1647	}
1648	if (waitq_wakeup64_all(waitq: selset, NO_EVENT64,
1649	THREAD_INTERRUPTED, flags: WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1650	panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1651	selset, fp->fp_guard_attrs ? "guarded " : "", fp);
1652	}
1653	}
1654	if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1655	if (waitq_wakeup64_all(waitq: &select_conflict_queue, NO_EVENT64,
1656	THREAD_INTERRUPTED, flags: WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1657	panic("bad select_conflict_queue");
1658	}
1659	}
1660	fdp->fd_fpdrainwait = `1`;
1661	msleep(chan: &fdp->fd_fpdrainwait, mtx: &fdp->fd_lock, PRIBIO, wmesg: "fpdrain", NULL);
1662	}
1663	#if DIAGNOSTIC
1664	if ((fp->fp_flags & FP_INSELECT) != `0`) {
1665	panic("FP_INSELECT set on drained fp");
1666	}
1667	#endif
1668	if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1669	fp->fp_flags &= ~FP_SELCONFLICT;
1670	}
1671
1672	if (!is_current_proc) {
1673	thread_deallocate(thread);
1674	}
1675	}
1676
1677
1678	int
1679	fp_close_and_unlock(proc_t p, kauth_cred_t cred, int fd, struct fileproc fp, int* flags)
1680	{
1681	struct filedesc *fdp = &p->p_fd;
1682	struct fileglob *fg = fp->fp_glob;
1683
1684	#if DIAGNOSTIC
1685	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1686	#endif
1687
1688	/*
1689	* Keep most people from finding the filedesc while we are closing it.
1690	*
1691	* Callers are:
1692	*
1693	* - dup2() which always waits for UF_RESERVED to clear
1694	*
1695	* - close/guarded_close/... who will fail the fileproc lookup if
1696	* UF_RESERVED is set,
1697	*
1698	* - fdexec()/fdfree() who only run once all threads in the proc
1699	* are properly canceled, hence no fileproc in this proc should
1700	* be in flux.
1701	*
1702	* Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1703	*
1704	* Callers of fp_get_noref_locked_with_iocount() can still find
1705	* this entry so that they can drop their I/O reference despite
1706	* not having remembered the fileproc pointer (namely select() and
1707	* file_drop()).
1708	*/
1709	if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED \| UF_CLOSING)) {
1710	panic("%s: called with fileproc in flux (%d/:%p)",
1711	__func__, fd, fp);
1712	}
1713	p->p_fd.fd_ofileflags[fd] \|= (UF_RESERVED \| UF_CLOSING);
1714
1715	if ((fp->fp_flags & FP_AIOISSUED) \|\|
1716	#if CONFIG_MACF
1717	(FILEGLOB_DTYPE(fg) == DTYPE_VNODE)
1718	#else
1719	kauth_authorize_fileop_has_listeners()
1720	#endif
1721	) {
1722	proc_fdunlock(p);
1723
1724	if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1725	/*
1726	* call out to allow 3rd party notification of close.
1727	* Ignore result of kauth_authorize_fileop call.
1728	*/
1729	#if CONFIG_MACF
1730	mac_file_notify_close(cred, fg: fp->fp_glob);
1731	#else
1732	(void)cred;
1733	#endif
1734
1735	if (kauth_authorize_fileop_has_listeners() &&
1736	vnode_getwithref(vp: (vnode_t)fg_get_data(fg)) == `0`) {
1737	u_int fileop_flags = `0`;
1738	if (fg->fg_flag & FWASWRITTEN) {
1739	fileop_flags \|= KAUTH_FILEOP_CLOSE_MODIFIED;
1740	}
1741	kauth_authorize_fileop(credential: fg->fg_cred, KAUTH_FILEOP_CLOSE,
1742	arg0: (uintptr_t)fg_get_data(fg), arg1: (uintptr_t)fileop_flags);
1743
1744	vnode_put(vp: (vnode_t)fg_get_data(fg));
1745	}
1746	}
1747
1748	if (fp->fp_flags & FP_AIOISSUED) {
1749	/*
1750	* cancel all async IO requests that can be cancelled.
1751	*/
1752	_aio_close( p, fd );
1753	}
1754
1755	proc_fdlock(p);
1756	}
1757
1758	if (fd < fdp->fd_knlistsize) {
1759	knote_fdclose(p, fd);
1760	}
1761
1762	fileproc_drain(p, fp);
1763
1764	if (flags & FD_DUP2RESV) {
1765	fdp->fd_ofiles[fd] = NULL;
1766	fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1767	} else {
1768	fdrelse(p, fd);
1769	}
1770
1771	proc_fdunlock(p);
1772
1773	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1774	KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1775	fd, `0`, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1776	}
1777
1778	fileproc_free(fp);
1779
1780	return fg_drop(p, fg);
1781	}
1782
1783	/*
1784	* dupfdopen
1785	*
1786	* Description: Duplicate the specified descriptor to a free descriptor;
1787	* this is the second half of fdopen(), above.
1788	*
1789	* Parameters: p current process pointer
1790	* indx fd to dup to
1791	* dfd fd to dup from
1792	* mode mode to set on new fd
1793	* error command code
1794	*
1795	* Returns: 0 Success
1796	* EBADF Source fd is bad
1797	* EACCES Requested mode not allowed
1798	* !0 'error', if not ENODEV or
1799	* ENXIO
1800	*
1801	* Notes: XXX This is not thread safe; see fdopen() above
1802	*/
1803	int
1804	dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1805	{
1806	struct filedesc *fdp = &p->p_fd;
1807	struct fileproc *wfp;
1808	struct fileproc *fp;
1809	#if CONFIG_MACF
1810	int myerror;
1811	#endif
1812
1813	/*
1814	* If the to-be-dup'd fd number is greater than the allowed number
1815	* of file descriptors, or the fd to be dup'd has already been
1816	* closed, reject. Note, check for new == old is necessary as
1817	* falloc could allocate an already closed to-be-dup'd descriptor
1818	* as the new descriptor.
1819	*/
1820	proc_fdlock(p);
1821
1822	fp = fdp->fd_ofiles[indx];
1823	if (dfd < `0` \|\| dfd >= fdp->fd_nfiles \|\|
1824	(wfp = fdp->fd_ofiles[dfd]) == NULL \|\| wfp == fp \|\|
1825	(fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1826	proc_fdunlock(p);
1827	return EBADF;
1828	}
1829	#if CONFIG_MACF
1830	myerror = mac_file_check_dup(cred: kauth_cred_get(), fg: wfp->fp_glob, newfd: dfd);
1831	if (myerror) {
1832	proc_fdunlock(p);
1833	return myerror;
1834	}
1835	#endif
1836	/*
1837	* There are two cases of interest here.
1838	*
1839	* For ENODEV simply dup (dfd) to file descriptor
1840	* (indx) and return.
1841	*
1842	* For ENXIO steal away the file structure from (dfd) and
1843	* store it in (indx). (dfd) is effectively closed by
1844	* this operation.
1845	*
1846	* Any other error code is just returned.
1847	*/
1848	switch (error) {
1849	case ENODEV:
1850	if (fp_isguarded(fp: wfp, GUARD_DUP)) {
1851	proc_fdunlock(p);
1852	return EPERM;
1853	}
1854
1855	/*
1856	* Check that the mode the file is being opened for is a
1857	* subset of the mode of the existing descriptor.
1858	*/
1859	if (((flags & (FREAD \| FWRITE)) \| wfp->f_flag) != wfp->f_flag) {
1860	proc_fdunlock(p);
1861	return EACCES;
1862	}
1863	if (indx >= fdp->fd_afterlast) {
1864	fdp->fd_afterlast = indx + `1`;
1865	}
1866
1867	if (fp->fp_glob) {
1868	fg_free(fg: fp->fp_glob);
1869	}
1870	fg_ref(p, fg: wfp->fp_glob);
1871	fp->fp_glob = wfp->fp_glob;
1872	/*
1873	* Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1874	* unlike dup(), dup2() or fcntl(F_DUPFD).
1875	*
1876	* open1() already handled O_CLO{EXEC,FORK}
1877	*/
1878	fp->fp_flags \|= (wfp->fp_flags & (FP_CLOFORK \| FP_CLOEXEC));
1879
1880	procfdtbl_releasefd(p, fd: indx, NULL);
1881	fp_drop(p, fd: indx, fp, locked: `1`);
1882	proc_fdunlock(p);
1883	return `0`;
1884
1885	default:
1886	proc_fdunlock(p);
1887	return error;
1888	}
1889	/ NOTREACHED /
1890	}
1891
1892
1893	#pragma mark KPIS (sys/file.h)
1894
1895	/*
1896	* fg_get_vnode
1897	*
1898	* Description: Return vnode associated with the file structure, if
1899	* any. The lifetime of the returned vnode is bound to
1900	* the lifetime of the file structure.
1901	*
1902	* Parameters: fg Pointer to fileglob to
1903	* inspect
1904	*
1905	* Returns: vnode_t
1906	*/
1907	vnode_t
1908	fg_get_vnode(struct fileglob *fg)
1909	{
1910	if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1911	return (vnode_t)fg_get_data(fg);
1912	} else {
1913	return NULL;
1914	}
1915	}
1916
1917
1918	/*
1919	* fp_getfvp
1920	*
1921	* Description: Get fileproc and vnode pointer for a given fd from the per
1922	* process open file table of the specified process, and if
1923	* successful, increment the fp_iocount
1924	*
1925	* Parameters: p Process in which fd lives
1926	* fd fd to get information for
1927	* resultfp Pointer to result fileproc
1928	* pointer area, or 0 if none
1929	* resultvp Pointer to result vnode pointer
1930	* area, or 0 if none
1931	*
1932	* Returns: 0 Success
1933	* EBADF Bad file descriptor
1934	* ENOTSUP fd does not refer to a vnode
1935	*
1936	* Implicit returns:
1937	* *resultfp (modified) Fileproc pointer
1938	* *resultvp (modified) vnode pointer
1939	*
1940	* Notes: The resultfp and resultvp fields are optional, and may be
1941	* independently specified as NULL to skip returning information
1942	*
1943	* Locks: Internally takes and releases proc_fdlock
1944	*/
1945	int
1946	fp_getfvp(proc_t p, int fd, struct fileproc resultfp, struct vnode resultvp)
1947	{
1948	struct fileproc *fp;
1949	int error;
1950
1951	error = fp_get_ftype(p, fd, ftype: DTYPE_VNODE, ENOTSUP, fpp: &fp);
1952	if (error == `0`) {
1953	if (resultfp) {
1954	*resultfp = fp;
1955	}
1956	if (resultvp) {
1957	resultvp = (struct* vnode *)fp_get_data(fp);
1958	}
1959	}
1960
1961	return error;
1962	}
1963
1964
1965	/*
1966	* fp_get_pipe_id
1967	*
1968	* Description: Get pipe id for a given fd from the per process open file table
1969	* of the specified process.
1970	*
1971	* Parameters: p Process in which fd lives
1972	* fd fd to get information for
1973	* result_pipe_id Pointer to result pipe id
1974	*
1975	* Returns: 0 Success
1976	* EIVAL NULL pointer arguments passed
1977	* fp_lookup:EBADF Bad file descriptor
1978	* ENOTSUP fd does not refer to a pipe
1979	*
1980	* Implicit returns:
1981	* *result_pipe_id (modified) pipe id
1982	*
1983	* Locks: Internally takes and releases proc_fdlock
1984	*/
1985	int
1986	fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1987	{
1988	struct fileproc *fp = FILEPROC_NULL;
1989	struct fileglob *fg = NULL;
1990	int error = `0`;
1991
1992	if (p == NULL \|\| result_pipe_id == NULL) {
1993	return EINVAL;
1994	}
1995
1996	proc_fdlock(p);
1997	if ((error = fp_lookup(p, fd, resultfp: &fp, locked: `1`))) {
1998	proc_fdunlock(p);
1999	return error;
2000	}
2001	fg = fp->fp_glob;
2002
2003	if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
2004	result_pipe_id = pipe_id((struct* pipe*)fg_get_data(fg));
2005	} else {
2006	error = ENOTSUP;
2007	}
2008
2009	fp_drop(p, fd, fp, locked: `1`);
2010	proc_fdunlock(p);
2011	return error;
2012	}
2013
2014
2015	/*
2016	* file_vnode
2017	*
2018	* Description: Given an fd, look it up in the current process's per process
2019	* open file table, and return its internal vnode pointer.
2020	*
2021	* Parameters: fd fd to obtain vnode from
2022	* vpp pointer to vnode return area
2023	*
2024	* Returns: 0 Success
2025	* EINVAL The fd does not refer to a
2026	* vnode fileproc entry
2027	* fp_lookup:EBADF Bad file descriptor
2028	*
2029	* Implicit returns:
2030	* *vpp (modified) Returned vnode pointer
2031	*
2032	* Locks: This function internally takes and drops the proc_fdlock for
2033	* the current process
2034	*
2035	* Notes: If successful, this function increments the fp_iocount on the
2036	* fd's corresponding fileproc.
2037	*
2038	* The fileproc referenced is not returned; because of this, care
2039	* must be taken to not drop the last reference (e.g. by closing
2040	* the file). This is inherently unsafe, since the reference may
2041	* not be recoverable from the vnode, if there is a subsequent
2042	* close that destroys the associate fileproc. The caller should
2043	* therefore retain their own reference on the fileproc so that
2044	* the fp_iocount can be dropped subsequently. Failure to do this
2045	* can result in the returned pointer immediately becoming invalid
2046	* following the call.
2047	*
2048	* Use of this function is discouraged.
2049	*/
2050	int
2051	file_vnode(int fd, struct vnode **vpp)
2052	{
2053	return file_vnode_withvid(fd, vpp, NULL);
2054	}
2055
2056
2057	/*
2058	* file_vnode_withvid
2059	*
2060	* Description: Given an fd, look it up in the current process's per process
2061	* open file table, and return its internal vnode pointer.
2062	*
2063	* Parameters: fd fd to obtain vnode from
2064	* vpp pointer to vnode return area
2065	* vidp pointer to vid of the returned vnode
2066	*
2067	* Returns: 0 Success
2068	* EINVAL The fd does not refer to a
2069	* vnode fileproc entry
2070	* fp_lookup:EBADF Bad file descriptor
2071	*
2072	* Implicit returns:
2073	* *vpp (modified) Returned vnode pointer
2074	*
2075	* Locks: This function internally takes and drops the proc_fdlock for
2076	* the current process
2077	*
2078	* Notes: If successful, this function increments the fp_iocount on the
2079	* fd's corresponding fileproc.
2080	*
2081	* The fileproc referenced is not returned; because of this, care
2082	* must be taken to not drop the last reference (e.g. by closing
2083	* the file). This is inherently unsafe, since the reference may
2084	* not be recoverable from the vnode, if there is a subsequent
2085	* close that destroys the associate fileproc. The caller should
2086	* therefore retain their own reference on the fileproc so that
2087	* the fp_iocount can be dropped subsequently. Failure to do this
2088	* can result in the returned pointer immediately becoming invalid
2089	* following the call.
2090	*
2091	* Use of this function is discouraged.
2092	*/
2093	int
2094	file_vnode_withvid(int fd, struct vnode *vpp, uint32_t vidp)
2095	{
2096	struct fileproc *fp;
2097	int error;
2098
2099	error = fp_get_ftype(p: current_proc(), fd, ftype: DTYPE_VNODE, EINVAL, fpp: &fp);
2100	if (error == `0`) {
2101	if (vpp) {
2102	vpp = (struct* vnode *)fp_get_data(fp);
2103	}
2104	if (vidp) {
2105	vidp = vnode_vid(vp: (struct* vnode *)fp_get_data(fp));
2106	}
2107	}
2108	return error;
2109	}
2110
2111	/*
2112	* file_socket
2113	*
2114	* Description: Given an fd, look it up in the current process's per process
2115	* open file table, and return its internal socket pointer.
2116	*
2117	* Parameters: fd fd to obtain vnode from
2118	* sp pointer to socket return area
2119	*
2120	* Returns: 0 Success
2121	* ENOTSOCK Not a socket
2122	* fp_lookup:EBADF Bad file descriptor
2123	*
2124	* Implicit returns:
2125	* *sp (modified) Returned socket pointer
2126	*
2127	* Locks: This function internally takes and drops the proc_fdlock for
2128	* the current process
2129	*
2130	* Notes: If successful, this function increments the fp_iocount on the
2131	* fd's corresponding fileproc.
2132	*
2133	* The fileproc referenced is not returned; because of this, care
2134	* must be taken to not drop the last reference (e.g. by closing
2135	* the file). This is inherently unsafe, since the reference may
2136	* not be recoverable from the socket, if there is a subsequent
2137	* close that destroys the associate fileproc. The caller should
2138	* therefore retain their own reference on the fileproc so that
2139	* the fp_iocount can be dropped subsequently. Failure to do this
2140	* can result in the returned pointer immediately becoming invalid
2141	* following the call.
2142	*
2143	* Use of this function is discouraged.
2144	*/
2145	int
2146	file_socket(int fd, struct socket **sp)
2147	{
2148	struct fileproc *fp;
2149	int error;
2150
2151	error = fp_get_ftype(p: current_proc(), fd, ftype: DTYPE_SOCKET, ENOTSOCK, fpp: &fp);
2152	if (error == `0`) {
2153	if (sp) {
2154	sp = (struct* socket *)fp_get_data(fp);
2155	}
2156	}
2157	return error;
2158	}
2159
2160
2161	/*
2162	* file_flags
2163	*
2164	* Description: Given an fd, look it up in the current process's per process
2165	* open file table, and return its fileproc's flags field.
2166	*
2167	* Parameters: fd fd whose flags are to be
2168	* retrieved
2169	* flags pointer to flags data area
2170	*
2171	* Returns: 0 Success
2172	* ENOTSOCK Not a socket
2173	* fp_lookup:EBADF Bad file descriptor
2174	*
2175	* Implicit returns:
2176	* *flags (modified) Returned flags field
2177	*
2178	* Locks: This function internally takes and drops the proc_fdlock for
2179	* the current process
2180	*/
2181	int
2182	file_flags(int fd, int *flags)
2183	{
2184	proc_t p = current_proc();
2185	struct fileproc *fp;
2186	int error = EBADF;
2187
2188	proc_fdlock_spin(p);
2189	fp = fp_get_noref_locked(p, fd);
2190	if (fp) {
2191	flags = (int*)fp->f_flag;
2192	error = `0`;
2193	}
2194	proc_fdunlock(p);
2195
2196	return error;
2197	}
2198
2199
2200	/*
2201	* file_drop
2202	*
2203	* Description: Drop an iocount reference on an fd, and wake up any waiters
2204	* for draining (i.e. blocked in fileproc_drain() called during
2205	* the last attempt to close a file).
2206	*
2207	* Parameters: fd fd on which an ioreference is
2208	* to be dropped
2209	*
2210	* Returns: 0 Success
2211	*
2212	* Description: Given an fd, look it up in the current process's per process
2213	* open file table, and drop it's fileproc's fp_iocount by one
2214	*
2215	* Notes: This is intended as a corresponding operation to the functions
2216	* file_vnode() and file_socket() operations.
2217	*
2218	* If the caller can't possibly hold an I/O reference,
2219	* this function will panic the kernel rather than allowing
2220	* for memory corruption. Callers should always call this
2221	* because they acquired an I/O reference on this file before.
2222	*
2223	* Use of this function is discouraged.
2224	*/
2225	int
2226	file_drop(int fd)
2227	{
2228	struct fileproc *fp;
2229	proc_t p = current_proc();
2230	struct filedesc *fdp = &p->p_fd;
2231	int needwakeup = `0`;
2232
2233	proc_fdlock_spin(p);
2234	fp = fp_get_noref_locked_with_iocount(p, fd);
2235
2236	if (`1` == os_ref_release_locked(rc: &fp->fp_iocount)) {
2237	if (fp->fp_flags & FP_SELCONFLICT) {
2238	fp->fp_flags &= ~FP_SELCONFLICT;
2239	}
2240
2241	if (fdp->fd_fpdrainwait) {
2242	fdp->fd_fpdrainwait = `0`;
2243	needwakeup = `1`;
2244	}
2245	}
2246	proc_fdunlock(p);
2247
2248	if (needwakeup) {
2249	wakeup(chan: &fdp->fd_fpdrainwait);
2250	}
2251	return `0`;
2252	}
2253
2254
2255	#pragma mark syscalls
2256
2257	#ifndef HFS_GET_BOOT_INFO
2258	#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2259	#endif
2260
2261	#ifndef HFS_SET_BOOT_INFO
2262	#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2263	#endif
2264
2265	#ifndef APFSIOC_REVERT_TO_SNAPSHOT
2266	#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
2267	#endif
2268
2269	#define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2270	(((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) \|\| \
2271	(((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2272	? 1 : 0)
2273
2274	/*
2275	* sys_getdtablesize
2276	*
2277	* Description: Returns the per process maximum size of the descriptor table
2278	*
2279	* Parameters: p Process being queried
2280	* retval Pointer to the call return area
2281	*
2282	* Returns: 0 Success
2283	*
2284	* Implicit returns:
2285	* *retval (modified) Size of dtable
2286	*/
2287	int
2288	sys_getdtablesize(proc_t p, __unused struct getdtablesize_args uap, int32_t retval)
2289	{
2290	*retval = proc_limitgetcur_nofile(p);
2291	return `0`;
2292	}
2293
2294
2295	/*
2296	* check_file_seek_range
2297	*
2298	* Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2299	*
2300	* Parameters: fl Flock structure.
2301	* cur_file_offset Current offset in the file.
2302	*
2303	* Returns: 0 on Success.
2304	* EOVERFLOW on overflow.
2305	* EINVAL on offset less than zero.
2306	*/
2307
2308	static int
2309	check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2310	{
2311	if (fl->l_whence == SEEK_CUR) {
2312	/ Check if the start marker is beyond LLONG_MAX. /
2313	if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2314	/ Check if start marker is negative /
2315	if (fl->l_start < `0`) {
2316	return EINVAL;
2317	}
2318	return EOVERFLOW;
2319	}
2320	/ Check if the start marker is negative. /
2321	if (fl->l_start + cur_file_offset < `0`) {
2322	return EINVAL;
2323	}
2324	/ Check if end marker is beyond LLONG_MAX. /
2325	if ((fl->l_len > `0`) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2326	cur_file_offset, fl->l_len - `1`))) {
2327	return EOVERFLOW;
2328	}
2329	/ Check if the end marker is negative. /
2330	if ((fl->l_len <= `0`) && (fl->l_start + cur_file_offset +
2331	fl->l_len < `0`)) {
2332	return EINVAL;
2333	}
2334	} else if (fl->l_whence == SEEK_SET) {
2335	/ Check if the start marker is negative. /
2336	if (fl->l_start < `0`) {
2337	return EINVAL;
2338	}
2339	/ Check if the end marker is beyond LLONG_MAX. /
2340	if ((fl->l_len > `0`) &&
2341	CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - `1`)) {
2342	return EOVERFLOW;
2343	}
2344	/ Check if the end marker is negative. /
2345	if ((fl->l_len < `0`) && fl->l_start + fl->l_len < `0`) {
2346	return EINVAL;
2347	}
2348	}
2349	return `0`;
2350	}
2351
2352
2353	/*
2354	* sys_dup
2355	*
2356	* Description: Duplicate a file descriptor.
2357	*
2358	* Parameters: p Process performing the dup
2359	* uap->fd The fd to dup
2360	* retval Pointer to the call return area
2361	*
2362	* Returns: 0 Success
2363	* !0 Errno
2364	*
2365	* Implicit returns:
2366	* *retval (modified) The new descriptor
2367	*/
2368	int
2369	sys_dup(proc_t p, struct dup_args uap, int32_t retval)
2370	{
2371	int old = uap->fd;
2372	int new, error;
2373	struct fileproc *fp;
2374	kauth_cred_t p_cred;
2375
2376	proc_fdlock(p);
2377	if ((error = fp_lookup(p, fd: old, resultfp: &fp, locked: `1`))) {
2378	proc_fdunlock(p);
2379	return error;
2380	}
2381	if (fp_isguarded(fp, GUARD_DUP)) {
2382	error = fp_guard_exception(p, fd: old, fp, attribs: kGUARD_EXC_DUP);
2383	(void) fp_drop(p, fd: old, fp, locked: `1`);
2384	proc_fdunlock(p);
2385	return error;
2386	}
2387	if ((error = fdalloc(p, want: `0`, result: &new))) {
2388	fp_drop(p, fd: old, fp, locked: `1`);
2389	proc_fdunlock(p);
2390	return error;
2391	}
2392	p_cred = current_cached_proc_cred(p);
2393	error = finishdup(p, p_cred, old, new, fp_flags: `0`, retval);
2394
2395	if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2396	KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2397	new, `0`, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2398	}
2399
2400	fp_drop(p, fd: old, fp, locked: `1`);
2401	proc_fdunlock(p);
2402
2403	return error;
2404	}
2405
2406	/*
2407	* sys_dup2
2408	*
2409	* Description: Duplicate a file descriptor to a particular value.
2410	*
2411	* Parameters: p Process performing the dup
2412	* uap->from The fd to dup
2413	* uap->to The fd to dup it to
2414	* retval Pointer to the call return area
2415	*
2416	* Returns: 0 Success
2417	* !0 Errno
2418	*
2419	* Implicit returns:
2420	* *retval (modified) The new descriptor
2421	*/
2422	int
2423	sys_dup2(proc_t p, struct dup2_args uap, int32_t retval)
2424	{
2425	kauth_cred_t p_cred = current_cached_proc_cred(p);
2426
2427	return dup2(p, p_cred, from: uap->from, to: uap->to, fd: retval);
2428	}
2429
2430	int
2431	dup2(proc_t p, kauth_cred_t p_cred, int old, int new, int *retval)
2432	{
2433	struct filedesc *fdp = &p->p_fd;
2434	struct fileproc fp, nfp;
2435	int i, error;
2436
2437	proc_fdlock(p);
2438
2439	startover:
2440	if ((error = fp_lookup(p, fd: old, resultfp: &fp, locked: `1`))) {
2441	proc_fdunlock(p);
2442	return error;
2443	}
2444	if (fp_isguarded(fp, GUARD_DUP)) {
2445	error = fp_guard_exception(p, fd: old, fp, attribs: kGUARD_EXC_DUP);
2446	(void) fp_drop(p, fd: old, fp, locked: `1`);
2447	proc_fdunlock(p);
2448	return error;
2449	}
2450	if (new < `0` \|\| new >= proc_limitgetcur_nofile(p)) {
2451	fp_drop(p, fd: old, fp, locked: `1`);
2452	proc_fdunlock(p);
2453	return EBADF;
2454	}
2455	if (old == new) {
2456	fp_drop(p, fd: old, fp, locked: `1`);
2457	*retval = new;
2458	proc_fdunlock(p);
2459	return `0`;
2460	}
2461	if (new < `0` \|\| new >= fdp->fd_nfiles) {
2462	if ((error = fdalloc(p, want: new, result: &i))) {
2463	fp_drop(p, fd: old, fp, locked: `1`);
2464	proc_fdunlock(p);
2465	return error;
2466	}
2467	if (new != i) {
2468	fdrelse(p, fd: i);
2469	goto closeit;
2470	}
2471	} else {
2472	closeit:
2473	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2474	fp_drop(p, fd: old, fp, locked: `1`);
2475	procfdtbl_waitfd(p, fd: new);
2476	#if DIAGNOSTIC
2477	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2478	#endif
2479	goto startover;
2480	}
2481
2482	if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2483	if (fp_isguarded(fp: nfp, GUARD_CLOSE)) {
2484	fp_drop(p, fd: old, fp, locked: `1`);
2485	error = fp_guard_exception(p,
2486	fd: new, fp: nfp, attribs: kGUARD_EXC_CLOSE);
2487	proc_fdunlock(p);
2488	return error;
2489	}
2490	(void)fp_close_and_unlock(p, cred: p_cred, fd: new, fp: nfp, FD_DUP2RESV);
2491	proc_fdlock(p);
2492	assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2493	} else {
2494	#if DIAGNOSTIC
2495	if (fdp->fd_ofiles[new] != NULL) {
2496	panic("dup2: no ref on fileproc %d", new);
2497	}
2498	#endif
2499	procfdtbl_reservefd(p, fd: new);
2500	}
2501	}
2502	#if DIAGNOSTIC
2503	if (fdp->fd_ofiles[new] != `0`) {
2504	panic("dup2: overwriting fd_ofiles with new %d", new);
2505	}
2506	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == `0`) {
2507	panic("dup2: unreserved fileflags with new %d", new);
2508	}
2509	#endif
2510	error = finishdup(p, p_cred, old, new, fp_flags: `0`, retval);
2511	fp_drop(p, fd: old, fp, locked: `1`);
2512	proc_fdunlock(p);
2513
2514	return error;
2515	}
2516
2517
2518	/*
2519	* fcntl
2520	*
2521	* Description: The file control system call.
2522	*
2523	* Parameters: p Process performing the fcntl
2524	* uap->fd The fd to operate against
2525	* uap->cmd The command to perform
2526	* uap->arg Pointer to the command argument
2527	* retval Pointer to the call return area
2528	*
2529	* Returns: 0 Success
2530	* !0 Errno (see fcntl_nocancel)
2531	*
2532	* Implicit returns:
2533	* *retval (modified) fcntl return value (if any)
2534	*
2535	* Notes: This system call differs from fcntl_nocancel() in that it
2536	* tests for cancellation prior to performing a potentially
2537	* blocking operation.
2538	*/
2539	int
2540	sys_fcntl(proc_t p, struct fcntl_args uap, int32_t retval)
2541	{
2542	__pthread_testcancel(presyscall: `1`);
2543	return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2544	}
2545
2546	#define ACCOUNT_OPENFROM_ENTITLEMENT \
2547	"com.apple.private.vfs.role-account-openfrom"
2548
2549	/*
2550	* sys_fcntl_nocancel
2551	*
2552	* Description: A non-cancel-testing file control system call.
2553	*
2554	* Parameters: p Process performing the fcntl
2555	* uap->fd The fd to operate against
2556	* uap->cmd The command to perform
2557	* uap->arg Pointer to the command argument
2558	* retval Pointer to the call return area
2559	*
2560	* Returns: 0 Success
2561	* EINVAL
2562	* fp_lookup:EBADF Bad file descriptor
2563	* [F_DUPFD]
2564	* fdalloc:EMFILE
2565	* fdalloc:ENOMEM
2566	* finishdup:EBADF
2567	* finishdup:ENOMEM
2568	* [F_SETOWN]
2569	* ESRCH
2570	* [F_SETLK]
2571	* EBADF
2572	* EOVERFLOW
2573	* copyin:EFAULT
2574	* vnode_getwithref:???
2575	* VNOP_ADVLOCK:???
2576	* msleep:ETIMEDOUT
2577	* [F_GETLK]
2578	* EBADF
2579	* EOVERFLOW
2580	* copyin:EFAULT
2581	* copyout:EFAULT
2582	* vnode_getwithref:???
2583	* VNOP_ADVLOCK:???
2584	* [F_PREALLOCATE]
2585	* EBADF
2586	* EFBIG
2587	* EINVAL
2588	* ENOSPC
2589	* copyin:EFAULT
2590	* copyout:EFAULT
2591	* vnode_getwithref:???
2592	* VNOP_ALLOCATE:???
2593	* [F_SETSIZE,F_RDADVISE]
2594	* EBADF
2595	* EINVAL
2596	* copyin:EFAULT
2597	* vnode_getwithref:???
2598	* [F_RDAHEAD,F_NOCACHE]
2599	* EBADF
2600	* vnode_getwithref:???
2601	* [???]
2602	*
2603	* Implicit returns:
2604	* *retval (modified) fcntl return value (if any)
2605	*/
2606	#define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2607	struct vfs_context context = { \
2608	.vc_thread = current_thread(), \
2609	.vc_ucred = fp->f_cred, \
2610	}
2611
2612	static user_addr_t
2613	sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2614	{
2615	/*
2616	* Since the arg parameter is defined as a long but may be
2617	* either a long or a pointer we must take care to handle
2618	* sign extension issues. Our sys call munger will sign
2619	* extend a long when we are called from a 32-bit process.
2620	* Since we can never have an address greater than 32-bits
2621	* from a 32-bit process we lop off the top 32-bits to avoid
2622	* getting the wrong address
2623	*/
2624	return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2625	}
2626
2627	/ cleanup code common to fnctl functions, for when the fdlock is still held /
2628	static int
2629	sys_fcntl_out(proc_t p, int fd, struct fileproc fp, int* error)
2630	{
2631	fp_drop(p, fd, fp, locked: `1`);
2632	proc_fdunlock(p);
2633	return error;
2634	}
2635
2636	/ cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock /
2637	static int
2638	sys_fcntl_outdrop(proc_t p, int fd, struct fileproc fp, struct* vnode vp, int* error)
2639	{
2640	#pragma unused(vp)
2641
2642	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2643	fp_drop(p, fd, fp, locked: `0`);
2644	return error;
2645	}
2646
2647	typedef int (sys_fnctl_handler_t)(proc_t p, int* fd, int cmd, user_long_t arg,
2648	struct fileproc fp, int32_t retval);
2649
2650	typedef int (sys_fnctl_vnode_handler_t)(proc_t p, int* fd, int cmd,
2651	user_long_t arg, struct fileproc fp, struct* vnode vp, int32_t retval);
2652
2653	/*
2654	* SPI (private) for opening a file starting from a dir fd
2655	*
2656	* Note: do not inline to keep stack usage under control.
2657	*/
2658	__attribute__((noinline))
2659	static int
2660	sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2661	struct fileproc fp, struct* vnode vp, int32_t retval)
2662	{
2663	#pragma unused(cmd)
2664
2665	user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2666	struct user_fopenfrom fopen;
2667	struct vnode_attr *va;
2668	struct nameidata *nd;
2669	int error, cmode;
2670	bool has_entitlement;
2671
2672	/ Check if this isn't a valid file descriptor /
2673	if ((fp->f_flag & FREAD) == `0`) {
2674	return sys_fcntl_out(p, fd, fp, EBADF);
2675	}
2676	proc_fdunlock(p);
2677
2678	if (vnode_getwithref(vp)) {
2679	error = ENOENT;
2680	goto outdrop;
2681	}
2682
2683	/ Only valid for directories /
2684	if (vp->v_type != VDIR) {
2685	vnode_put(vp);
2686	error = ENOTDIR;
2687	goto outdrop;
2688	}
2689
2690	/*
2691	* Only entitled apps may use the credentials of the thread
2692	* that opened the file descriptor.
2693	* Non-entitled threads will use their own context.
2694	*/
2695	has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2696
2697	/ Get flags, mode and pathname arguments. /
2698	if (IS_64BIT_PROCESS(p)) {
2699	error = copyin(argp, &fopen, sizeof(fopen));
2700	} else {
2701	struct user32_fopenfrom fopen32;
2702
2703	error = copyin(argp, &fopen32, sizeof(fopen32));
2704	fopen.o_flags = fopen32.o_flags;
2705	fopen.o_mode = fopen32.o_mode;
2706	fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2707	}
2708	if (error) {
2709	vnode_put(vp);
2710	goto outdrop;
2711	}
2712
2713	/ open1() can have really deep stacks, so allocate those /
2714	va = kalloc_type(struct vnode_attr, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
2715	nd = kalloc_type(struct nameidata, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
2716
2717	AUDIT_ARG(fflags, fopen.o_flags);
2718	AUDIT_ARG(mode, fopen.o_mode);
2719	VATTR_INIT(va);
2720	/ Mask off all but regular access permissions /
2721	cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2722	VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2723
2724	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2725
2726	/ Start the lookup relative to the file descriptor's vnode. /
2727	NDINIT(nd, LOOKUP, OP_OPEN, USEDVP \| FOLLOW \| AUDITVNPATH1, UIO_USERSPACE,
2728	fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2729	nd->ni_dvp = vp;
2730
2731	error = open1(ctx: has_entitlement ? &context : vfs_context_current(),
2732	ndp: nd, uflags: fopen.o_flags, vap: va, NULL, NULL, retval, AUTH_OPEN_NOAUTHFD);
2733
2734	kfree_type(struct vnode_attr, va);
2735	kfree_type(struct nameidata, nd);
2736
2737	vnode_put(vp);
2738
2739	outdrop:
2740	return sys_fcntl_outdrop(p, fd, fp, vp, error);
2741	}
2742
2743	int
2744	sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args uap, int32_t retval)
2745	{
2746	int fd = uap->fd;
2747	int cmd = uap->cmd;
2748	struct fileproc *fp;
2749	struct vnode vp = NULLVP; /* for AUDIT_ARG() at end /
2750	unsigned int oflags, nflags;
2751	int i, tmp, error, error2, flg = `0`;
2752	struct flock fl = {};
2753	struct flocktimeout fltimeout;
2754	struct timespec *timeout = NULL;
2755	off_t offset;
2756	int newmin;
2757	daddr64_t lbn, bn;
2758	unsigned int fflag;
2759	user_addr_t argp;
2760	boolean_t is64bit;
2761	int has_entitlement = `0`;
2762	kauth_cred_t p_cred;
2763
2764	AUDIT_ARG(fd, uap->fd);
2765	AUDIT_ARG(cmd, uap->cmd);
2766
2767	proc_fdlock(p);
2768	if ((error = fp_lookup(p, fd, resultfp: &fp, locked: `1`))) {
2769	proc_fdunlock(p);
2770	return error;
2771	}
2772
2773	SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2774
2775	is64bit = proc_is64bit(p);
2776	if (is64bit) {
2777	argp = uap->arg;
2778	} else {
2779	/*
2780	* Since the arg parameter is defined as a long but may be
2781	* either a long or a pointer we must take care to handle
2782	* sign extension issues. Our sys call munger will sign
2783	* extend a long when we are called from a 32-bit process.
2784	* Since we can never have an address greater than 32-bits
2785	* from a 32-bit process we lop off the top 32-bits to avoid
2786	* getting the wrong address
2787	*/
2788	argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2789	}
2790
2791	#if CONFIG_MACF
2792	error = mac_file_check_fcntl(cred: kauth_cred_get(), fg: fp->fp_glob, cmd, arg: uap->arg);
2793	if (error) {
2794	goto out;
2795	}
2796	#endif
2797
2798	switch (cmd) {
2799	case F_DUPFD:
2800	case F_DUPFD_CLOEXEC:
2801	if (fp_isguarded(fp, GUARD_DUP)) {
2802	error = fp_guard_exception(p, fd, fp, attribs: kGUARD_EXC_DUP);
2803	goto out;
2804	}
2805	newmin = CAST_DOWN_EXPLICIT(int, uap->arg); / arg is an int, so we won't lose bits /
2806	AUDIT_ARG(value32, newmin);
2807	if (newmin < `0` \|\| newmin >= proc_limitgetcur_nofile(p)) {
2808	error = EINVAL;
2809	goto out;
2810	}
2811	if ((error = fdalloc(p, want: newmin, result: &i))) {
2812	goto out;
2813	}
2814	p_cred = current_cached_proc_cred(p);
2815	error = finishdup(p, p_cred, old: fd, new: i,
2816	fp_flags: cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : `0`, retval);
2817	goto out;
2818
2819	case F_GETFD:
2820	*retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : `0`;
2821	error = `0`;
2822	goto out;
2823
2824	case F_SETFD:
2825	AUDIT_ARG(value32, (uint32_t)uap->arg);
2826	if (uap->arg & FD_CLOEXEC) {
2827	fp->fp_flags \|= FP_CLOEXEC;
2828	error = `0`;
2829	} else if (!fp->fp_guard_attrs) {
2830	fp->fp_flags &= ~FP_CLOEXEC;
2831	error = `0`;
2832	} else {
2833	error = fp_guard_exception(p,
2834	fd, fp, attribs: kGUARD_EXC_NOCLOEXEC);
2835	}
2836	goto out;
2837
2838	case F_GETFL:
2839	fflag = fp->f_flag;
2840	if ((fflag & O_EVTONLY) && proc_disallow_rw_for_o_evtonly(p)) {
2841	/*
2842	* We insert back F_READ so that conversion back to open flags with
2843	* OFLAGS() will come out right. We only need to set 'FREAD' as the
2844	* 'O_RDONLY' is always implied.
2845	*/
2846	fflag \|= FREAD;
2847	}
2848	*retval = OFLAGS(fflag);
2849	error = `0`;
2850	goto out;
2851
2852	case F_SETFL:
2853	// FIXME (rdar://54898652)
2854	//
2855	// this code is broken if fnctl(F_SETFL), ioctl() are
2856	// called concurrently for the same fileglob.
2857
2858	tmp = CAST_DOWN_EXPLICIT(int, uap->arg); / arg is an int, so we won't lose bits /
2859	AUDIT_ARG(value32, tmp);
2860
2861	os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2862	nflags = oflags & ~FCNTLFLAGS;
2863	nflags \|= FFLAGS(tmp) & FCNTLFLAGS;
2864	});
2865	tmp = nflags & FNONBLOCK;
2866	error = fo_ioctl(fp, FIONBIO, data: (caddr_t)&tmp, ctx: &context);
2867	if (error) {
2868	goto out;
2869	}
2870	tmp = nflags & FASYNC;
2871	error = fo_ioctl(fp, FIOASYNC, data: (caddr_t)&tmp, ctx: &context);
2872	if (!error) {
2873	goto out;
2874	}
2875	os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2876	tmp = `0`;
2877	(void)fo_ioctl(fp, FIONBIO, data: (caddr_t)&tmp, ctx: &context);
2878	goto out;
2879
2880	case F_GETOWN:
2881	if (fp->f_type == DTYPE_SOCKET) {
2882	retval = ((struct* socket *)fp_get_data(fp))->so_pgid;
2883	error = `0`;
2884	goto out;
2885	}
2886	error = fo_ioctl(fp, TIOCGPGRP, data: (caddr_t)retval, ctx: &context);
2887	retval = -retval;
2888	goto out;
2889
2890	case F_SETOWN:
2891	tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); / arg is an int, so we won't lose bits /
2892	AUDIT_ARG(value32, tmp);
2893	if (fp->f_type == DTYPE_SOCKET) {
2894	((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2895	error = `0`;
2896	goto out;
2897	}
2898	if (fp->f_type == DTYPE_PIPE) {
2899	error = fo_ioctl(fp, TIOCSPGRP, data: (caddr_t)&tmp, ctx: &context);
2900	goto out;
2901	}
2902
2903	if (tmp <= `0`) {
2904	tmp = -tmp;
2905	} else {
2906	proc_t p1 = proc_find(pid: tmp);
2907	if (p1 == `0`) {
2908	error = ESRCH;
2909	goto out;
2910	}
2911	tmp = (int)p1->p_pgrpid;
2912	proc_rele(p: p1);
2913	}
2914	error = fo_ioctl(fp, TIOCSPGRP, data: (caddr_t)&tmp, ctx: &context);
2915	goto out;
2916
2917	case F_SETNOSIGPIPE:
2918	tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2919	if (fp->f_type == DTYPE_SOCKET) {
2920	#if SOCKETS
2921	error = sock_setsockopt(so: (struct socket *)fp_get_data(fp),
2922	SOL_SOCKET, SO_NOSIGPIPE, optval: &tmp, optlen: sizeof(tmp));
2923	#else
2924	error = EINVAL;
2925	#endif
2926	} else {
2927	struct fileglob *fg = fp->fp_glob;
2928
2929	lck_mtx_lock_spin(lck: &fg->fg_lock);
2930	if (tmp) {
2931	fg->fg_lflags \|= FG_NOSIGPIPE;
2932	} else {
2933	fg->fg_lflags &= ~FG_NOSIGPIPE;
2934	}
2935	lck_mtx_unlock(lck: &fg->fg_lock);
2936	error = `0`;
2937	}
2938	goto out;
2939
2940	case F_GETNOSIGPIPE:
2941	if (fp->f_type == DTYPE_SOCKET) {
2942	#if SOCKETS
2943	int retsize = sizeof(*retval);
2944	error = sock_getsockopt(so: (struct socket *)fp_get_data(fp),
2945	SOL_SOCKET, SO_NOSIGPIPE, optval: retval, optlen: &retsize);
2946	#else
2947	error = EINVAL;
2948	#endif
2949	} else {
2950	*retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2951	`1` : `0`;
2952	error = `0`;
2953	}
2954	goto out;
2955
2956	case F_SETCONFINED:
2957	/*
2958	* If this is the only reference to this fglob in the process
2959	* and it's already marked as close-on-fork then mark it as
2960	* (immutably) "confined" i.e. any fd that points to it will
2961	* forever be close-on-fork, and attempts to use an IPC
2962	* mechanism to move the descriptor elsewhere will fail.
2963	*/
2964	if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2965	struct fileglob *fg = fp->fp_glob;
2966
2967	lck_mtx_lock_spin(lck: &fg->fg_lock);
2968	if (fg->fg_lflags & FG_CONFINED) {
2969	error = `0`;
2970	} else if (`1` != os_ref_get_count_raw(rc: &fg->fg_count)) {
2971	error = EAGAIN; / go close the dup .. /
2972	} else if (fp->fp_flags & FP_CLOFORK) {
2973	fg->fg_lflags \|= FG_CONFINED;
2974	error = `0`;
2975	} else {
2976	error = EBADF; / open without O_CLOFORK? /
2977	}
2978	lck_mtx_unlock(lck: &fg->fg_lock);
2979	} else {
2980	/*
2981	* Other subsystems may have built on the immutability
2982	* of FG_CONFINED; clearing it may be tricky.
2983	*/
2984	error = EPERM; / immutable /
2985	}
2986	goto out;
2987
2988	case F_GETCONFINED:
2989	*retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? `1` : `0`;
2990	error = `0`;
2991	goto out;
2992
2993	case F_SETLKWTIMEOUT:
2994	case F_SETLKW:
2995	case F_OFD_SETLKWTIMEOUT:
2996	case F_OFD_SETLKW:
2997	flg \|= F_WAIT;
2998	OS_FALLTHROUGH;
2999
3000	case F_SETLK:
3001	case F_OFD_SETLK:
3002	if (fp->f_type != DTYPE_VNODE) {
3003	error = EBADF;
3004	goto out;
3005	}
3006	vp = (struct vnode *)fp_get_data(fp);
3007
3008	fflag = fp->f_flag;
3009	offset = fp->f_offset;
3010	proc_fdunlock(p);
3011
3012	/ Copy in the lock structure /
3013	if (F_SETLKWTIMEOUT == cmd \|\| F_OFD_SETLKWTIMEOUT == cmd) {
3014	error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3015	if (error) {
3016	goto outdrop;
3017	}
3018	fl = fltimeout.fl;
3019	timeout = &fltimeout.timeout;
3020	} else {
3021	error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3022	if (error) {
3023	goto outdrop;
3024	}
3025	}
3026
3027	/ Check starting byte and ending byte for EOVERFLOW in SEEK_CUR /
3028	/ and ending byte for EOVERFLOW in SEEK_SET /
3029	error = check_file_seek_range(fl: &fl, cur_file_offset: offset);
3030	if (error) {
3031	goto outdrop;
3032	}
3033
3034	if ((error = vnode_getwithref(vp))) {
3035	goto outdrop;
3036	}
3037	if (fl.l_whence == SEEK_CUR) {
3038	fl.l_start += offset;
3039	}
3040
3041	#if CONFIG_MACF
3042	error = mac_file_check_lock(cred: kauth_cred_get(), fg: fp->fp_glob,
3043	F_SETLK, fl: &fl);
3044	if (error) {
3045	(void)vnode_put(vp);
3046	goto outdrop;
3047	}
3048	#endif
3049
3050	#if CONFIG_FILE_LEASES
3051	(void)vnode_breaklease(vp, O_WRONLY, ctx: vfs_context_current());
3052	#endif
3053
3054	switch (cmd) {
3055	case F_OFD_SETLK:
3056	case F_OFD_SETLKW:
3057	case F_OFD_SETLKWTIMEOUT:
3058	flg \|= F_OFD_LOCK;
3059	if (fp->fp_glob->fg_lflags & FG_CONFINED) {
3060	flg \|= F_CONFINED;
3061	}
3062	switch (fl.l_type) {
3063	case F_RDLCK:
3064	if ((fflag & FREAD) == `0`) {
3065	error = EBADF;
3066	break;
3067	}
3068	error = VNOP_ADVLOCK(vp, ofd_to_id(fg: fp->fp_glob),
3069	F_SETLK, &fl, flg, &context, timeout);
3070	break;
3071	case F_WRLCK:
3072	if ((fflag & FWRITE) == `0`) {
3073	error = EBADF;
3074	break;
3075	}
3076	error = VNOP_ADVLOCK(vp, ofd_to_id(fg: fp->fp_glob),
3077	F_SETLK, &fl, flg, &context, timeout);
3078	break;
3079	case F_UNLCK:
3080	error = VNOP_ADVLOCK(vp, ofd_to_id(fg: fp->fp_glob),
3081	F_UNLCK, &fl, F_OFD_LOCK, &context,
3082	timeout);
3083	break;
3084	default:
3085	error = EINVAL;
3086	break;
3087	}
3088	if (`0` == error &&
3089	(F_RDLCK == fl.l_type \|\| F_WRLCK == fl.l_type)) {
3090	struct fileglob *fg = fp->fp_glob;
3091
3092	/*
3093	* arrange F_UNLCK on last close (once
3094	* set, FG_HAS_OFDLOCK is immutable)
3095	*/
3096	if ((fg->fg_lflags & FG_HAS_OFDLOCK) == `0`) {
3097	lck_mtx_lock_spin(lck: &fg->fg_lock);
3098	fg->fg_lflags \|= FG_HAS_OFDLOCK;
3099	lck_mtx_unlock(lck: &fg->fg_lock);
3100	}
3101	}
3102	break;
3103	default:
3104	flg \|= F_POSIX;
3105	switch (fl.l_type) {
3106	case F_RDLCK:
3107	if ((fflag & FREAD) == `0`) {
3108	error = EBADF;
3109	break;
3110	}
3111	// XXX UInt32 unsafe for LP64 kernel
3112	os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3113	error = VNOP_ADVLOCK(vp, (caddr_t)p,
3114	F_SETLK, &fl, flg, &context, timeout);
3115	break;
3116	case F_WRLCK:
3117	if ((fflag & FWRITE) == `0`) {
3118	error = EBADF;
3119	break;
3120	}
3121	// XXX UInt32 unsafe for LP64 kernel
3122	os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3123	error = VNOP_ADVLOCK(vp, (caddr_t)p,
3124	F_SETLK, &fl, flg, &context, timeout);
3125	break;
3126	case F_UNLCK:
3127	error = VNOP_ADVLOCK(vp, (caddr_t)p,
3128	F_UNLCK, &fl, F_POSIX, &context, timeout);
3129	break;
3130	default:
3131	error = EINVAL;
3132	break;
3133	}
3134	break;
3135	}
3136	(void) vnode_put(vp);
3137	goto outdrop;
3138
3139	case F_GETLK:
3140	case F_OFD_GETLK:
3141	case F_GETLKPID:
3142	case F_OFD_GETLKPID:
3143	if (fp->f_type != DTYPE_VNODE) {
3144	error = EBADF;
3145	goto out;
3146	}
3147	vp = (struct vnode *)fp_get_data(fp);
3148
3149	offset = fp->f_offset;
3150	proc_fdunlock(p);
3151
3152	/ Copy in the lock structure /
3153	error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3154	if (error) {
3155	goto outdrop;
3156	}
3157
3158	/ Check starting byte and ending byte for EOVERFLOW in SEEK_CUR /
3159	/ and ending byte for EOVERFLOW in SEEK_SET /
3160	error = check_file_seek_range(fl: &fl, cur_file_offset: offset);
3161	if (error) {
3162	goto outdrop;
3163	}
3164
3165	if ((fl.l_whence == SEEK_SET) && (fl.l_start < `0`)) {
3166	error = EINVAL;
3167	goto outdrop;
3168	}
3169
3170	switch (fl.l_type) {
3171	case F_RDLCK:
3172	case F_UNLCK:
3173	case F_WRLCK:
3174	break;
3175	default:
3176	error = EINVAL;
3177	goto outdrop;
3178	}
3179
3180	switch (fl.l_whence) {
3181	case SEEK_CUR:
3182	case SEEK_SET:
3183	case SEEK_END:
3184	break;
3185	default:
3186	error = EINVAL;
3187	goto outdrop;
3188	}
3189
3190	if ((error = vnode_getwithref(vp)) == `0`) {
3191	if (fl.l_whence == SEEK_CUR) {
3192	fl.l_start += offset;
3193	}
3194
3195	#if CONFIG_MACF
3196	error = mac_file_check_lock(cred: kauth_cred_get(), fg: fp->fp_glob,
3197	op: cmd, fl: &fl);
3198	if (error == `0`)
3199	#endif
3200	switch (cmd) {
3201	case F_OFD_GETLK:
3202	error = VNOP_ADVLOCK(vp, ofd_to_id(fg: fp->fp_glob),
3203	F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3204	break;
3205	case F_OFD_GETLKPID:
3206	error = VNOP_ADVLOCK(vp, ofd_to_id(fg: fp->fp_glob),
3207	F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3208	break;
3209	default:
3210	error = VNOP_ADVLOCK(vp, (caddr_t)p,
3211	cmd, &fl, F_POSIX, &context, NULL);
3212	break;
3213	}
3214
3215	(void)vnode_put(vp);
3216
3217	if (error == `0`) {
3218	error = copyout((caddr_t)&fl, argp, sizeof(fl));
3219	}
3220	}
3221	goto outdrop;
3222
3223	case F_PREALLOCATE: {
3224	fstore_t alloc_struct; / structure for allocate command /
3225	u_int32_t alloc_flags = `0`;
3226
3227	if (fp->f_type != DTYPE_VNODE) {
3228	error = EBADF;
3229	goto out;
3230	}
3231
3232	vp = (struct vnode *)fp_get_data(fp);
3233	proc_fdunlock(p);
3234
3235	/ make sure that we have write permission /
3236	if ((fp->f_flag & FWRITE) == `0`) {
3237	error = EBADF;
3238	goto outdrop;
3239	}
3240
3241	error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3242	if (error) {
3243	goto outdrop;
3244	}
3245
3246	/ now set the space allocated to 0 /
3247	alloc_struct.fst_bytesalloc = `0`;
3248
3249	/*
3250	* Do some simple parameter checking
3251	*/
3252
3253	/ set up the flags /
3254
3255	alloc_flags \|= PREALLOCATE;
3256
3257	if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3258	alloc_flags \|= ALLOCATECONTIG;
3259	}
3260
3261	if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3262	alloc_flags \|= ALLOCATEALL;
3263	}
3264
3265	if (alloc_struct.fst_flags & F_ALLOCATEPERSIST) {
3266	alloc_flags \|= ALLOCATEPERSIST;
3267	}
3268
3269	/*
3270	* Do any position mode specific stuff. The only
3271	* position mode supported now is PEOFPOSMODE
3272	*/
3273
3274	switch (alloc_struct.fst_posmode) {
3275	case F_PEOFPOSMODE:
3276	if (alloc_struct.fst_offset != `0`) {
3277	error = EINVAL;
3278	goto outdrop;
3279	}
3280
3281	alloc_flags \|= ALLOCATEFROMPEOF;
3282	break;
3283
3284	case F_VOLPOSMODE:
3285	if (alloc_struct.fst_offset <= `0`) {
3286	error = EINVAL;
3287	goto outdrop;
3288	}
3289
3290	alloc_flags \|= ALLOCATEFROMVOL;
3291	break;
3292
3293	default: {
3294	error = EINVAL;
3295	goto outdrop;
3296	}
3297	}
3298	if ((error = vnode_getwithref(vp)) == `0`) {
3299	/*
3300	* call allocate to get the space
3301	*/
3302	error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3303	&alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3304	&context);
3305	(void)vnode_put(vp);
3306
3307	error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3308
3309	if (error == `0`) {
3310	error = error2;
3311	}
3312	}
3313	goto outdrop;
3314	}
3315	case F_PUNCHHOLE: {
3316	fpunchhole_t args;
3317
3318	if (fp->f_type != DTYPE_VNODE) {
3319	error = EBADF;
3320	goto out;
3321	}
3322
3323	vp = (struct vnode *)fp_get_data(fp);
3324	proc_fdunlock(p);
3325
3326	/ need write permissions /
3327	if ((fp->f_flag & FWRITE) == `0`) {
3328	error = EPERM;
3329	goto outdrop;
3330	}
3331
3332	if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3333	goto outdrop;
3334	}
3335
3336	if ((error = vnode_getwithref(vp))) {
3337	goto outdrop;
3338	}
3339
3340	#if CONFIG_MACF
3341	if ((error = mac_vnode_check_write(ctx: &context, file_cred: fp->fp_glob->fg_cred, vp))) {
3342	(void)vnode_put(vp);
3343	goto outdrop;
3344	}
3345	#endif
3346
3347	error = VNOP_IOCTL(vp, F_PUNCHHOLE, data: (caddr_t)&args, fflag: `0`, ctx: &context);
3348	(void)vnode_put(vp);
3349
3350	goto outdrop;
3351	}
3352	case F_TRIM_ACTIVE_FILE: {
3353	ftrimactivefile_t args;
3354
3355	if (priv_check_cred(cred: kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, flags: `0`)) {
3356	error = EACCES;
3357	goto out;
3358	}
3359
3360	if (fp->f_type != DTYPE_VNODE) {
3361	error = EBADF;
3362	goto out;
3363	}
3364
3365	vp = (struct vnode *)fp_get_data(fp);
3366	proc_fdunlock(p);
3367
3368	/ need write permissions /
3369	if ((fp->f_flag & FWRITE) == `0`) {
3370	error = EPERM;
3371	goto outdrop;
3372	}
3373
3374	if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3375	goto outdrop;
3376	}
3377
3378	if ((error = vnode_getwithref(vp))) {
3379	goto outdrop;
3380	}
3381
3382	error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, data: (caddr_t)&args, fflag: `0`, ctx: &context);
3383	(void)vnode_put(vp);
3384
3385	goto outdrop;
3386	}
3387	case F_SPECULATIVE_READ: {
3388	fspecread_t args;
3389	off_t temp_length = `0`;
3390
3391	if (fp->f_type != DTYPE_VNODE) {
3392	error = EBADF;
3393	goto out;
3394	}
3395
3396	vp = (struct vnode *)fp_get_data(fp);
3397	proc_fdunlock(p);
3398
3399	if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3400	goto outdrop;
3401	}
3402
3403	/ Discard invalid offsets or lengths /
3404	if ((args.fsr_offset < `0`) \|\| (args.fsr_length < `0`)) {
3405	error = EINVAL;
3406	goto outdrop;
3407	}
3408
3409	/*
3410	* Round the file offset down to a page-size boundary (or to 0).
3411	* The filesystem will need to round the length up to the end of the page boundary
3412	* or to the EOF of the file.
3413	*/
3414	uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3415	uint64_t foff_delta = args.fsr_offset - foff;
3416	args.fsr_offset = (off_t) foff;
3417
3418	/*
3419	* Now add in the delta to the supplied length. Since we may have adjusted the
3420	* offset, increase it by the amount that we adjusted.
3421	*/
3422	if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3423	error = EOVERFLOW;
3424	goto outdrop;
3425	}
3426
3427	/*
3428	* Make sure (fsr_offset + fsr_length) does not overflow.
3429	*/
3430	if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3431	error = EOVERFLOW;
3432	goto outdrop;
3433	}
3434
3435	if ((error = vnode_getwithref(vp))) {
3436	goto outdrop;
3437	}
3438	error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, data: (caddr_t)&args, fflag: `0`, ctx: &context);
3439	(void)vnode_put(vp);
3440
3441	goto outdrop;
3442	}
3443	case F_ATTRIBUTION_TAG: {
3444	fattributiontag_t args;
3445
3446	if (fp->f_type != DTYPE_VNODE) {
3447	error = EBADF;
3448	goto out;
3449	}
3450
3451	vp = (struct vnode *)fp_get_data(fp);
3452	proc_fdunlock(p);
3453
3454	if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3455	goto outdrop;
3456	}
3457
3458	if ((error = vnode_getwithref(vp))) {
3459	goto outdrop;
3460	}
3461
3462	error = VNOP_IOCTL(vp, F_ATTRIBUTION_TAG, data: (caddr_t)&args, fflag: `0`, ctx: &context);
3463	(void)vnode_put(vp);
3464
3465	if (error == `0`) {
3466	error = copyout((caddr_t)&args, argp, sizeof(args));
3467	}
3468
3469	goto outdrop;
3470	}
3471	case F_SETSIZE:
3472	if (fp->f_type != DTYPE_VNODE) {
3473	error = EBADF;
3474	goto out;
3475	}
3476	vp = (struct vnode *)fp_get_data(fp);
3477	proc_fdunlock(p);
3478
3479	error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3480	if (error) {
3481	goto outdrop;
3482	}
3483	AUDIT_ARG(value64, offset);
3484
3485	error = vnode_getwithref(vp);
3486	if (error) {
3487	goto outdrop;
3488	}
3489
3490	#if CONFIG_MACF
3491	error = mac_vnode_check_truncate(ctx: &context,
3492	file_cred: fp->fp_glob->fg_cred, vp);
3493	if (error) {
3494	(void)vnode_put(vp);
3495	goto outdrop;
3496	}
3497	#endif
3498	/*
3499	* Make sure that we are root. Growing a file
3500	* without zero filling the data is a security hole.
3501	*/
3502	if (!kauth_cred_issuser(cred: kauth_cred_get())) {
3503	error = EACCES;
3504	} else {
3505	/*
3506	* Require privilege to change file size without zerofill,
3507	* else will change the file size and zerofill it.
3508	*/
3509	error = priv_check_cred(cred: kauth_cred_get(), PRIV_VFS_SETSIZE, flags: `0`);
3510	if (error == `0`) {
3511	error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3512	} else {
3513	error = vnode_setsize(vp, offset, ioflag: `0`, &context);
3514	}
3515
3516	#if CONFIG_MACF
3517	if (error == `0`) {
3518	mac_vnode_notify_truncate(ctx: &context, file_cred: fp->fp_glob->fg_cred, vp);
3519	}
3520	#endif
3521	}
3522
3523	(void)vnode_put(vp);
3524	goto outdrop;
3525
3526	case F_RDAHEAD:
3527	if (fp->f_type != DTYPE_VNODE) {
3528	error = EBADF;
3529	goto out;
3530	}
3531	if (uap->arg) {
3532	os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3533	} else {
3534	os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3535	}
3536	goto out;
3537
3538	case F_NOCACHE:
3539	if (fp->f_type != DTYPE_VNODE) {
3540	error = EBADF;
3541	goto out;
3542	}
3543	if (uap->arg) {
3544	os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3545	} else {
3546	os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3547	}
3548	goto out;
3549
3550	case F_NODIRECT:
3551	if (fp->f_type != DTYPE_VNODE) {
3552	error = EBADF;
3553	goto out;
3554	}
3555	if (uap->arg) {
3556	os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3557	} else {
3558	os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3559	}
3560	goto out;
3561
3562	case F_SINGLE_WRITER:
3563	if (fp->f_type != DTYPE_VNODE) {
3564	error = EBADF;
3565	goto out;
3566	}
3567	if (uap->arg) {
3568	os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3569	} else {
3570	os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3571	}
3572	goto out;
3573
3574	case F_GLOBAL_NOCACHE:
3575	if (fp->f_type != DTYPE_VNODE) {
3576	error = EBADF;
3577	goto out;
3578	}
3579	vp = (struct vnode *)fp_get_data(fp);
3580	proc_fdunlock(p);
3581
3582	if ((error = vnode_getwithref(vp)) == `0`) {
3583	*retval = vnode_isnocache(vp);
3584
3585	if (uap->arg) {
3586	vnode_setnocache(vp);
3587	} else {
3588	vnode_clearnocache(vp);
3589	}
3590
3591	(void)vnode_put(vp);
3592	}
3593	goto outdrop;
3594
3595	case F_CHECK_OPENEVT:
3596	if (fp->f_type != DTYPE_VNODE) {
3597	error = EBADF;
3598	goto out;
3599	}
3600	vp = (struct vnode *)fp_get_data(fp);
3601	proc_fdunlock(p);
3602
3603	if ((error = vnode_getwithref(vp)) == `0`) {
3604	*retval = vnode_is_openevt(vp);
3605
3606	if (uap->arg) {
3607	vnode_set_openevt(vp);
3608	} else {
3609	vnode_clear_openevt(vp);
3610	}
3611
3612	(void)vnode_put(vp);
3613	}
3614	goto outdrop;
3615
3616	case F_RDADVISE: {
3617	struct radvisory ra_struct;
3618
3619	if (fp->f_type != DTYPE_VNODE) {
3620	error = EBADF;
3621	goto out;
3622	}
3623	vp = (struct vnode *)fp_get_data(fp);
3624	proc_fdunlock(p);
3625
3626	if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3627	goto outdrop;
3628	}
3629	if (ra_struct.ra_offset < `0` \|\| ra_struct.ra_count < `0`) {
3630	error = EINVAL;
3631	goto outdrop;
3632	}
3633	if ((error = vnode_getwithref(vp)) == `0`) {
3634	error = VNOP_IOCTL(vp, F_RDADVISE, data: (caddr_t)&ra_struct, fflag: `0`, ctx: &context);
3635
3636	(void)vnode_put(vp);
3637	}
3638	goto outdrop;
3639	}
3640
3641	case F_FLUSH_DATA:
3642
3643	if (fp->f_type != DTYPE_VNODE) {
3644	error = EBADF;
3645	goto out;
3646	}
3647	vp = (struct vnode *)fp_get_data(fp);
3648	proc_fdunlock(p);
3649
3650	if ((error = vnode_getwithref(vp)) == `0`) {
3651	error = VNOP_FSYNC(vp, MNT_NOWAIT, ctx: &context);
3652
3653	(void)vnode_put(vp);
3654	}
3655	goto outdrop;
3656
3657	case F_LOG2PHYS:
3658	case F_LOG2PHYS_EXT: {
3659	struct log2phys l2p_struct = {}; / structure for allocate command /
3660	int devBlockSize;
3661
3662	off_t file_offset = `0`;
3663	size_t a_size = `0`;
3664	size_t run = `0`;
3665
3666	if (cmd == F_LOG2PHYS_EXT) {
3667	error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3668	if (error) {
3669	goto out;
3670	}
3671	file_offset = l2p_struct.l2p_devoffset;
3672	} else {
3673	file_offset = fp->f_offset;
3674	}
3675	if (fp->f_type != DTYPE_VNODE) {
3676	error = EBADF;
3677	goto out;
3678	}
3679	vp = (struct vnode *)fp_get_data(fp);
3680	proc_fdunlock(p);
3681	if ((error = vnode_getwithref(vp))) {
3682	goto outdrop;
3683	}
3684	error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3685	if (error) {
3686	(void)vnode_put(vp);
3687	goto outdrop;
3688	}
3689	error = VNOP_BLKTOOFF(vp, lbn, &offset);
3690	if (error) {
3691	(void)vnode_put(vp);
3692	goto outdrop;
3693	}
3694	devBlockSize = vfs_devblocksize(mp: vnode_mount(vp));
3695	if (cmd == F_LOG2PHYS_EXT) {
3696	if (l2p_struct.l2p_contigbytes < `0`) {
3697	vnode_put(vp);
3698	error = EINVAL;
3699	goto outdrop;
3700	}
3701
3702	a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3703	} else {
3704	a_size = devBlockSize;
3705	}
3706
3707	error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, `0`, &context);
3708
3709	(void)vnode_put(vp);
3710
3711	if (!error) {
3712	l2p_struct.l2p_flags = `0`; / for now /
3713	if (cmd == F_LOG2PHYS_EXT) {
3714	l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3715	} else {
3716	l2p_struct.l2p_contigbytes = `0`; / for now /
3717	}
3718
3719	/*
3720	* The block number being -1 suggests that the file offset is not backed
3721	* by any real blocks on-disk. As a result, just let it be passed back up wholesale.
3722	*/
3723	if (bn == -`1`) {
3724	/ Don't multiply it by the block size /
3725	l2p_struct.l2p_devoffset = bn;
3726	} else {
3727	l2p_struct.l2p_devoffset = bn * devBlockSize;
3728	l2p_struct.l2p_devoffset += file_offset - offset;
3729	}
3730	error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3731	}
3732	goto outdrop;
3733	}
3734	case F_GETPATH:
3735	case F_GETPATH_NOFIRMLINK: {
3736	char *pathbufp;
3737	size_t pathlen;
3738
3739	if (fp->f_type != DTYPE_VNODE) {
3740	error = EBADF;
3741	goto out;
3742	}
3743	vp = (struct vnode *)fp_get_data(fp);
3744	proc_fdunlock(p);
3745
3746	pathlen = MAXPATHLEN;
3747	pathbufp = zalloc(view: ZV_NAMEI);
3748
3749	if ((error = vnode_getwithref(vp)) == `0`) {
3750	error = vn_getpath_ext(vp, NULL, pathbuf: pathbufp,
3751	len: &pathlen, flags: cmd == F_GETPATH_NOFIRMLINK ?
3752	VN_GETPATH_NO_FIRMLINK : `0`);
3753	(void)vnode_put(vp);
3754
3755	if (error == `0`) {
3756	error = copyout((caddr_t)pathbufp, argp, pathlen);
3757	}
3758	}
3759	zfree(ZV_NAMEI, pathbufp);
3760	goto outdrop;
3761	}
3762
3763	case F_PATHPKG_CHECK: {
3764	char *pathbufp;
3765	size_t pathlen;
3766
3767	if (fp->f_type != DTYPE_VNODE) {
3768	error = EBADF;
3769	goto out;
3770	}
3771	vp = (struct vnode *)fp_get_data(fp);
3772	proc_fdunlock(p);
3773
3774	pathlen = MAXPATHLEN;
3775	pathbufp = zalloc(view: ZV_NAMEI);
3776
3777	if ((error = copyinstr(uaddr: argp, kaddr: pathbufp, MAXPATHLEN, done: &pathlen)) == `0`) {
3778	if ((error = vnode_getwithref(vp)) == `0`) {
3779	AUDIT_ARG(text, pathbufp);
3780	error = vn_path_package_check(vp, path: pathbufp, pathlen: (int)pathlen, component: retval);
3781
3782	(void)vnode_put(vp);
3783	}
3784	}
3785	zfree(ZV_NAMEI, pathbufp);
3786	goto outdrop;
3787	}
3788
3789	case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
3790	case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
3791	case F_BARRIERFSYNC: // fsync + barrier
3792	case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
3793	case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
3794	if (fp->f_type != DTYPE_VNODE) {
3795	error = EBADF;
3796	goto out;
3797	}
3798	vp = (struct vnode *)fp_get_data(fp);
3799	proc_fdunlock(p);
3800
3801	if ((error = vnode_getwithref(vp)) == `0`) {
3802	if ((cmd == F_BARRIERFSYNC) &&
3803	(vp->v_mount->mnt_supl_kern_flag & MNTK_SUPL_USE_FULLSYNC)) {
3804	cmd = F_FULLFSYNC;
3805	}
3806	error = VNOP_IOCTL(vp, command: cmd, data: (caddr_t)NULL, fflag: `0`, ctx: &context);
3807
3808	/*
3809	* Promote F_BARRIERFSYNC to F_FULLFSYNC if the underlying
3810	* filesystem doesn't support it.
3811	*/
3812	if ((error == ENOTTY \|\| error == ENOTSUP \|\| error == EINVAL) &&
3813	(cmd == F_BARRIERFSYNC)) {
3814	os_atomic_or(&vp->v_mount->mnt_supl_kern_flag,
3815	MNTK_SUPL_USE_FULLSYNC, relaxed);
3816
3817	error = VNOP_IOCTL(vp, F_FULLFSYNC, data: (caddr_t)NULL, fflag: `0`, ctx: &context);
3818	}
3819
3820	(void)vnode_put(vp);
3821	}
3822	break;
3823	}
3824
3825	/*
3826	* SPI (private) for opening a file starting from a dir fd
3827	*/
3828	case F_OPENFROM: {
3829	/ Check if this isn't a valid file descriptor /
3830	if (fp->f_type != DTYPE_VNODE) {
3831	error = EBADF;
3832	goto out;
3833	}
3834	vp = (struct vnode *)fp_get_data(fp);
3835
3836	return sys_fcntl__OPENFROM(p, fd, cmd, arg: uap->arg, fp, vp, retval);
3837	}
3838
3839	/*
3840	* SPI (private) for unlinking a file starting from a dir fd
3841	*/
3842	case F_UNLINKFROM: {
3843	user_addr_t pathname;
3844
3845	/ Check if this isn't a valid file descriptor /
3846	if ((fp->f_type != DTYPE_VNODE) \|\|
3847	(fp->f_flag & FREAD) == `0`) {
3848	error = EBADF;
3849	goto out;
3850	}
3851	vp = (struct vnode *)fp_get_data(fp);
3852	proc_fdunlock(p);
3853
3854	if (vnode_getwithref(vp)) {
3855	error = ENOENT;
3856	goto outdrop;
3857	}
3858
3859	/ Only valid for directories /
3860	if (vp->v_type != VDIR) {
3861	vnode_put(vp);
3862	error = ENOTDIR;
3863	goto outdrop;
3864	}
3865
3866	/*
3867	* Only entitled apps may use the credentials of the thread
3868	* that opened the file descriptor.
3869	* Non-entitled threads will use their own context.
3870	*/
3871	if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3872	has_entitlement = `1`;
3873	}
3874
3875	/ Get flags, mode and pathname arguments. /
3876	if (IS_64BIT_PROCESS(p)) {
3877	pathname = (user_addr_t)argp;
3878	} else {
3879	pathname = CAST_USER_ADDR_T(argp);
3880	}
3881
3882	/ Start the lookup relative to the file descriptor's vnode. /
3883	error = unlink1(has_entitlement ? &context : vfs_context_current(),
3884	vp, pathname, UIO_USERSPACE, `0`);
3885
3886	vnode_put(vp);
3887	break;
3888	}
3889
3890	case F_ADDSIGS:
3891	case F_ADDFILESIGS:
3892	case F_ADDFILESIGS_FOR_DYLD_SIM:
3893	case F_ADDFILESIGS_RETURN:
3894	case F_ADDFILESIGS_INFO:
3895	{
3896	struct cs_blob *blob = NULL;
3897	struct user_fsignatures fs;
3898	kern_return_t kr;
3899	vm_offset_t kernel_blob_addr;
3900	vm_size_t kernel_blob_size;
3901	int blob_add_flags = `0`;
3902	const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3903	offsetof(struct user_fsignatures, fs_cdhash / first output element /) :
3904	offsetof(struct user_fsignatures, fs_fsignatures_size / compat /));
3905
3906	if (fp->f_type != DTYPE_VNODE) {
3907	error = EBADF;
3908	goto out;
3909	}
3910	vp = (struct vnode *)fp_get_data(fp);
3911	proc_fdunlock(p);
3912
3913	if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3914	blob_add_flags \|= MAC_VNODE_CHECK_DYLD_SIM;
3915	if ((proc_getcsflags(p) & CS_KILL) == `0`) {
3916	proc_lock(p);
3917	proc_csflags_set(p, CS_KILL);
3918	proc_unlock(p);
3919	}
3920	}
3921
3922	error = vnode_getwithref(vp);
3923	if (error) {
3924	goto outdrop;
3925	}
3926
3927	if (IS_64BIT_PROCESS(p)) {
3928	error = copyin(argp, &fs, sizeof_fs);
3929	} else {
3930	if (cmd == F_ADDFILESIGS_INFO) {
3931	error = EINVAL;
3932	vnode_put(vp);
3933	goto outdrop;
3934	}
3935
3936	struct user32_fsignatures fs32;
3937
3938	error = copyin(argp, &fs32, sizeof(fs32));
3939	fs.fs_file_start = fs32.fs_file_start;
3940	fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3941	fs.fs_blob_size = fs32.fs_blob_size;
3942	}
3943
3944	if (error) {
3945	vnode_put(vp);
3946	goto outdrop;
3947	}
3948
3949	/*
3950	* First check if we have something loaded a this offset
3951	*/
3952	blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3953	if (blob != NULL) {
3954	/ If this is for dyld_sim revalidate the blob /
3955	if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3956	error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3957	if (error) {
3958	blob = NULL;
3959	if (error != EAGAIN) {
3960	vnode_put(vp);
3961	goto outdrop;
3962	}
3963	}
3964	}
3965	}
3966
3967	if (blob == NULL) {
3968	/*
3969	* An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
3970	* our use cases for the immediate future, but note that at the time of this commit, some
3971	* platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3972	*
3973	* We should consider how we can manage this more effectively; the above means that some
3974	* platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3975	* threshold considered ridiculous at the time of this change.
3976	*/
3977	#define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3978	if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3979	error = E2BIG;
3980	vnode_put(vp);
3981	goto outdrop;
3982	}
3983
3984	kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3985	kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3986	if (kr != KERN_SUCCESS \|\| kernel_blob_size < fs.fs_blob_size) {
3987	error = ENOMEM;
3988	vnode_put(vp);
3989	goto outdrop;
3990	}
3991
3992	if (cmd == F_ADDSIGS) {
3993	error = copyin(fs.fs_blob_start,
3994	(void *) kernel_blob_addr,
3995	fs.fs_blob_size);
3996	} else { / F_ADDFILESIGS \|\| F_ADDFILESIGS_RETURN \|\| F_ADDFILESIGS_FOR_DYLD_SIM \|\| F_ADDFILESIGS_INFO /
3997	int resid;
3998
3999	error = vn_rdwr(rw: UIO_READ,
4000	vp,
4001	base: (caddr_t) kernel_blob_addr,
4002	len: (int)kernel_blob_size,
4003	offset: fs.fs_file_start + fs.fs_blob_start,
4004	segflg: UIO_SYSSPACE,
4005	ioflg: `0`,
4006	cred: kauth_cred_get(),
4007	aresid: &resid,
4008	p);
4009	if ((error == `0`) && resid) {
4010	/ kernel_blob_size rounded to a page size, but signature may be at end of file /
4011	memset(s: (void *)(kernel_blob_addr + (kernel_blob_size - resid)), c: `0x0`, n: resid);
4012	}
4013	}
4014
4015	if (error) {
4016	ubc_cs_blob_deallocate(kernel_blob_addr,
4017	kernel_blob_size);
4018	vnode_put(vp);
4019	goto outdrop;
4020	}
4021
4022	blob = NULL;
4023	error = ubc_cs_blob_add(vp,
4024	proc_platform(p),
4025	CPU_TYPE_ANY, / not for a specific architecture /
4026	CPU_SUBTYPE_ANY,
4027	fs.fs_file_start,
4028	&kernel_blob_addr,
4029	kernel_blob_size,
4030	NULL,
4031	blob_add_flags,
4032	&blob);
4033
4034	/ ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed /
4035	if (error) {
4036	if (kernel_blob_addr) {
4037	ubc_cs_blob_deallocate(kernel_blob_addr,
4038	kernel_blob_size);
4039	}
4040	vnode_put(vp);
4041	goto outdrop;
4042	} else {
4043	#if CHECK_CS_VALIDATION_BITMAP
4044	ubc_cs_validation_bitmap_allocate( vp );
4045	#endif
4046	}
4047	}
4048
4049	if (cmd == F_ADDFILESIGS_RETURN \|\| cmd == F_ADDFILESIGS_FOR_DYLD_SIM \|\|
4050	cmd == F_ADDFILESIGS_INFO) {
4051	/*
4052	* The first element of the structure is a
4053	* off_t that happen to have the same size for
4054	* all archs. Lets overwrite that.
4055	*/
4056	off_t end_offset = `0`;
4057	if (blob) {
4058	end_offset = blob->csb_end_offset;
4059	}
4060	error = copyout(&end_offset, argp, sizeof(end_offset));
4061
4062	if (error) {
4063	vnode_put(vp);
4064	goto outdrop;
4065	}
4066	}
4067
4068	if (cmd == F_ADDFILESIGS_INFO) {
4069	/ Return information. What we copy out depends on the size of the*
4070	* passed in structure, to keep binary compatibility. */
4071
4072	if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4073	// enough room for fs_cdhash[20]+fs_hash_type
4074
4075	if (blob != NULL) {
4076	error = copyout(blob->csb_cdhash,
4077	(vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4078	USER_FSIGNATURES_CDHASH_LEN);
4079	if (error) {
4080	vnode_put(vp);
4081	goto outdrop;
4082	}
4083	int hashtype = cs_hash_type(blob->csb_hashtype);
4084	error = copyout(&hashtype,
4085	(vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4086	sizeof(int));
4087	if (error) {
4088	vnode_put(vp);
4089	goto outdrop;
4090	}
4091	}
4092	}
4093	}
4094
4095	(void) vnode_put(vp);
4096	break;
4097	}
4098	#if CONFIG_SUPPLEMENTAL_SIGNATURES
4099	case F_ADDFILESUPPL:
4100	{
4101	struct vnode *ivp;
4102	struct cs_blob *blob = NULL;
4103	struct user_fsupplement fs;
4104	int orig_fd;
4105	struct fileproc* orig_fp = NULL;
4106	kern_return_t kr;
4107	vm_offset_t kernel_blob_addr;
4108	vm_size_t kernel_blob_size;
4109
4110	if (!IS_64BIT_PROCESS(p)) {
4111	error = EINVAL;
4112	goto out; // drop fp and unlock fds
4113	}
4114
4115	if (fp->f_type != DTYPE_VNODE) {
4116	error = EBADF;
4117	goto out;
4118	}
4119
4120	error = copyin(argp, &fs, sizeof(fs));
4121	if (error) {
4122	goto out;
4123	}
4124
4125	orig_fd = fs.fs_orig_fd;
4126	if ((error = fp_lookup(p, fd: orig_fd, resultfp: &orig_fp, locked: `1`))) {
4127	printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4128	goto out;
4129	}
4130
4131	if (orig_fp->f_type != DTYPE_VNODE) {
4132	error = EBADF;
4133	fp_drop(p, fd: orig_fd, fp: orig_fp, locked: `1`);
4134	goto out;
4135	}
4136
4137	ivp = (struct vnode *)fp_get_data(fp: orig_fp);
4138
4139	vp = (struct vnode *)fp_get_data(fp);
4140
4141	proc_fdunlock(p);
4142
4143	error = vnode_getwithref(vp: ivp);
4144	if (error) {
4145	fp_drop(p, fd: orig_fd, fp: orig_fp, locked: `0`);
4146	goto outdrop; //drop fp
4147	}
4148
4149	error = vnode_getwithref(vp);
4150	if (error) {
4151	vnode_put(vp: ivp);
4152	fp_drop(p, fd: orig_fd, fp: orig_fp, locked: `0`);
4153	goto outdrop;
4154	}
4155
4156	if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4157	error = E2BIG;
4158	goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4159	}
4160
4161	kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4162	kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4163	if (kr != KERN_SUCCESS) {
4164	error = ENOMEM;
4165	goto dropboth;
4166	}
4167
4168	int resid;
4169	error = vn_rdwr(rw: UIO_READ, vp,
4170	base: (caddr_t)kernel_blob_addr, len: (int)kernel_blob_size,
4171	offset: fs.fs_file_start + fs.fs_blob_start,
4172	segflg: UIO_SYSSPACE, ioflg: `0`,
4173	cred: kauth_cred_get(), aresid: &resid, p);
4174	if ((error == `0`) && resid) {
4175	/ kernel_blob_size rounded to a page size, but signature may be at end of file /
4176	memset(s: (void *)(kernel_blob_addr + (kernel_blob_size - resid)), c: `0x0`, n: resid);
4177	}
4178
4179	if (error) {
4180	ubc_cs_blob_deallocate(kernel_blob_addr,
4181	kernel_blob_size);
4182	goto dropboth;
4183	}
4184
4185	error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4186	&kernel_blob_addr, kernel_blob_size, &blob);
4187
4188	/ ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed /
4189	if (error) {
4190	if (kernel_blob_addr) {
4191	ubc_cs_blob_deallocate(kernel_blob_addr,
4192	kernel_blob_size);
4193	}
4194	goto dropboth;
4195	}
4196	vnode_put(vp: ivp);
4197	vnode_put(vp);
4198	fp_drop(p, fd: orig_fd, fp: orig_fp, locked: `0`);
4199	break;
4200
4201	dropboth:
4202	vnode_put(vp: ivp);
4203	vnode_put(vp);
4204	fp_drop(p, fd: orig_fd, fp: orig_fp, locked: `0`);
4205	goto outdrop;
4206	}
4207	#endif
4208	case F_GETCODEDIR:
4209	case F_FINDSIGS: {
4210	error = ENOTSUP;
4211	goto out;
4212	}
4213	case F_CHECK_LV: {
4214	struct fileglob *fg;
4215	fchecklv_t lv = {};
4216
4217	if (fp->f_type != DTYPE_VNODE) {
4218	error = EBADF;
4219	goto out;
4220	}
4221	fg = fp->fp_glob;
4222	proc_fdunlock(p);
4223
4224	if (IS_64BIT_PROCESS(p)) {
4225	error = copyin(argp, &lv, sizeof(lv));
4226	} else {
4227	struct user32_fchecklv lv32 = {};
4228
4229	error = copyin(argp, &lv32, sizeof(lv32));
4230	lv.lv_file_start = lv32.lv_file_start;
4231	lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4232	lv.lv_error_message_size = lv32.lv_error_message_size;
4233	}
4234	if (error) {
4235	goto outdrop;
4236	}
4237
4238	#if CONFIG_MACF
4239	error = mac_file_check_library_validation(proc: p, fg, slice_offset: lv.lv_file_start,
4240	error_message: (user_long_t)lv.lv_error_message, error_message_size: lv.lv_error_message_size);
4241	#endif
4242
4243	break;
4244	}
4245	case F_GETSIGSINFO: {
4246	struct cs_blob *blob = NULL;
4247	fgetsigsinfo_t sigsinfo = {};
4248
4249	if (fp->f_type != DTYPE_VNODE) {
4250	error = EBADF;
4251	goto out;
4252	}
4253	vp = (struct vnode *)fp_get_data(fp);
4254	proc_fdunlock(p);
4255
4256	error = vnode_getwithref(vp);
4257	if (error) {
4258	goto outdrop;
4259	}
4260
4261	error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4262	if (error) {
4263	vnode_put(vp);
4264	goto outdrop;
4265	}
4266
4267	blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4268	if (blob == NULL) {
4269	error = ENOENT;
4270	vnode_put(vp);
4271	goto outdrop;
4272	}
4273	switch (sigsinfo.fg_info_request) {
4274	case GETSIGSINFO_PLATFORM_BINARY:
4275	sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4276	error = copyout(&sigsinfo.fg_sig_is_platform,
4277	(vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4278	sizeof(sigsinfo.fg_sig_is_platform));
4279	if (error) {
4280	vnode_put(vp);
4281	goto outdrop;
4282	}
4283	break;
4284	default:
4285	error = EINVAL;
4286	vnode_put(vp);
4287	goto outdrop;
4288	}
4289	vnode_put(vp);
4290	break;
4291	}
4292	#if CONFIG_PROTECT
4293	case F_GETPROTECTIONCLASS: {
4294	if (fp->f_type != DTYPE_VNODE) {
4295	error = EBADF;
4296	goto out;
4297	}
4298	vp = (struct vnode *)fp_get_data(fp);
4299
4300	proc_fdunlock(p);
4301
4302	if (vnode_getwithref(vp)) {
4303	error = ENOENT;
4304	goto outdrop;
4305	}
4306
4307	struct vnode_attr va;
4308
4309	VATTR_INIT(&va);
4310	VATTR_WANTED(&va, va_dataprotect_class);
4311	error = VNOP_GETATTR(vp, &va, &context);
4312	if (!error) {
4313	if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4314	*retval = va.va_dataprotect_class;
4315	} else {
4316	error = ENOTSUP;
4317	}
4318	}
4319
4320	vnode_put(vp);
4321	break;
4322	}
4323
4324	case F_SETPROTECTIONCLASS: {
4325	/ tmp must be a valid PROTECTION_CLASS_* /
4326	tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4327
4328	if (fp->f_type != DTYPE_VNODE) {
4329	error = EBADF;
4330	goto out;
4331	}
4332	vp = (struct vnode *)fp_get_data(fp);
4333
4334	proc_fdunlock(p);
4335
4336	if (vnode_getwithref(vp)) {
4337	error = ENOENT;
4338	goto outdrop;
4339	}
4340
4341	/ Only go forward if you have write access /
4342	vfs_context_t ctx = vfs_context_current();
4343	if (vnode_authorize(vp, NULLVP, action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
4344	vnode_put(vp);
4345	error = EBADF;
4346	goto outdrop;
4347	}
4348
4349	struct vnode_attr va;
4350
4351	VATTR_INIT(&va);
4352	VATTR_SET(&va, va_dataprotect_class, tmp);
4353
4354	error = VNOP_SETATTR(vp, &va, ctx);
4355
4356	vnode_put(vp);
4357	break;
4358	}
4359
4360	case F_TRANSCODEKEY: {
4361	if (fp->f_type != DTYPE_VNODE) {
4362	error = EBADF;
4363	goto out;
4364	}
4365
4366	vp = (struct vnode *)fp_get_data(fp);
4367	proc_fdunlock(p);
4368
4369	if (vnode_getwithref(vp)) {
4370	error = ENOENT;
4371	goto outdrop;
4372	}
4373
4374	cp_key_t k = {
4375	.len = CP_MAX_WRAPPEDKEYSIZE,
4376	};
4377
4378	k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK \| Z_ZERO);
4379	if (k.key == NULL) {
4380	error = ENOMEM;
4381	} else {
4382	error = VNOP_IOCTL(vp, F_TRANSCODEKEY, data: (caddr_t)&k, fflag: `1`, ctx: &context);
4383	}
4384
4385	vnode_put(vp);
4386
4387	if (error == `0`) {
4388	error = copyout(k.key, argp, k.len);
4389	*retval = k.len;
4390	}
4391	kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4392
4393	break;
4394	}
4395
4396	case F_GETPROTECTIONLEVEL: {
4397	if (fp->f_type != DTYPE_VNODE) {
4398	error = EBADF;
4399	goto out;
4400	}
4401
4402	vp = (struct vnode*)fp_get_data(fp);
4403	proc_fdunlock(p);
4404
4405	if (vnode_getwithref(vp)) {
4406	error = ENOENT;
4407	goto outdrop;
4408	}
4409
4410	error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, data: (caddr_t)retval, fflag: `0`, ctx: &context);
4411
4412	vnode_put(vp);
4413	break;
4414	}
4415
4416	case F_GETDEFAULTPROTLEVEL: {
4417	if (fp->f_type != DTYPE_VNODE) {
4418	error = EBADF;
4419	goto out;
4420	}
4421
4422	vp = (struct vnode*)fp_get_data(fp);
4423	proc_fdunlock(p);
4424
4425	if (vnode_getwithref(vp)) {
4426	error = ENOENT;
4427	goto outdrop;
4428	}
4429
4430	/*
4431	* if cp_get_major_vers fails, error will be set to proper errno
4432	* and cp_version will still be 0.
4433	*/
4434
4435	error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, data: (caddr_t)retval, fflag: `0`, ctx: &context);
4436
4437	vnode_put(vp);
4438	break;
4439	}
4440
4441	#endif /* CONFIG_PROTECT */
4442
4443	case F_MOVEDATAEXTENTS: {
4444	struct fileproc *fp2 = NULL;
4445	struct vnode *src_vp = NULLVP;
4446	struct vnode *dst_vp = NULLVP;
4447	/ We need to grab the 2nd FD out of the arguments before moving on. /
4448	int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4449
4450	error = priv_check_cred(cred: kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, flags: `0`);
4451	if (error) {
4452	goto out;
4453	}
4454
4455	if (fp->f_type != DTYPE_VNODE) {
4456	error = EBADF;
4457	goto out;
4458	}
4459
4460	/*
4461	* For now, special case HFS+ and APFS only, since this
4462	* is SPI.
4463	*/
4464	src_vp = (struct vnode *)fp_get_data(fp);
4465	if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4466	error = ENOTSUP;
4467	goto out;
4468	}
4469
4470	/*
4471	* Get the references before we start acquiring iocounts on the vnodes,
4472	* while we still hold the proc fd lock
4473	*/
4474	if ((error = fp_lookup(p, fd: fd2, resultfp: &fp2, locked: `1`))) {
4475	error = EBADF;
4476	goto out;
4477	}
4478	if (fp2->f_type != DTYPE_VNODE) {
4479	fp_drop(p, fd: fd2, fp: fp2, locked: `1`);
4480	error = EBADF;
4481	goto out;
4482	}
4483	dst_vp = (struct vnode *)fp_get_data(fp: fp2);
4484	if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4485	fp_drop(p, fd: fd2, fp: fp2, locked: `1`);
4486	error = ENOTSUP;
4487	goto out;
4488	}
4489
4490	#if CONFIG_MACF
4491	/ Re-do MAC checks against the new FD, pass in a fake argument /
4492	error = mac_file_check_fcntl(cred: kauth_cred_get(), fg: fp2->fp_glob, cmd, arg: `0`);
4493	if (error) {
4494	fp_drop(p, fd: fd2, fp: fp2, locked: `1`);
4495	goto out;
4496	}
4497	#endif
4498	/ Audit the 2nd FD /
4499	AUDIT_ARG(fd, fd2);
4500
4501	proc_fdunlock(p);
4502
4503	if (vnode_getwithref(vp: src_vp)) {
4504	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4505	error = ENOENT;
4506	goto outdrop;
4507	}
4508	if (vnode_getwithref(vp: dst_vp)) {
4509	vnode_put(vp: src_vp);
4510	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4511	error = ENOENT;
4512	goto outdrop;
4513	}
4514
4515	/*
4516	* Basic asserts; validate they are not the same and that
4517	* both live on the same filesystem.
4518	*/
4519	if (dst_vp == src_vp) {
4520	vnode_put(vp: src_vp);
4521	vnode_put(vp: dst_vp);
4522	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4523	error = EINVAL;
4524	goto outdrop;
4525	}
4526
4527	if (dst_vp->v_mount != src_vp->v_mount) {
4528	vnode_put(vp: src_vp);
4529	vnode_put(vp: dst_vp);
4530	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4531	error = EXDEV;
4532	goto outdrop;
4533	}
4534
4535	/ Now we have a legit pair of FDs. Go to work /
4536
4537	/ Now check for write access to the target files /
4538	if (vnode_authorize(vp: src_vp, NULLVP,
4539	action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx: &context) != `0`) {
4540	vnode_put(vp: src_vp);
4541	vnode_put(vp: dst_vp);
4542	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4543	error = EBADF;
4544	goto outdrop;
4545	}
4546
4547	if (vnode_authorize(vp: dst_vp, NULLVP,
4548	action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx: &context) != `0`) {
4549	vnode_put(vp: src_vp);
4550	vnode_put(vp: dst_vp);
4551	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4552	error = EBADF;
4553	goto outdrop;
4554	}
4555
4556	/ Verify that both vps point to files and not directories /
4557	if (!vnode_isreg(vp: src_vp) \|\| !vnode_isreg(vp: dst_vp)) {
4558	error = EINVAL;
4559	vnode_put(vp: src_vp);
4560	vnode_put(vp: dst_vp);
4561	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4562	goto outdrop;
4563	}
4564
4565	/*
4566	* The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4567	* We'll pass in our special bit indicating that the new behavior is expected
4568	*/
4569
4570	error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4571
4572	vnode_put(vp: src_vp);
4573	vnode_put(vp: dst_vp);
4574	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4575	break;
4576	}
4577
4578	case F_TRANSFEREXTENTS: {
4579	struct fileproc *fp2 = NULL;
4580	struct vnode *src_vp = NULLVP;
4581	struct vnode *dst_vp = NULLVP;
4582
4583	/ Get 2nd FD out of the arguments. /
4584	int fd2 = CAST_DOWN_EXPLICIT(int, uap->arg);
4585	if (fd2 < `0`) {
4586	error = EINVAL;
4587	goto out;
4588	}
4589
4590	if (fp->f_type != DTYPE_VNODE) {
4591	error = EBADF;
4592	goto out;
4593	}
4594
4595	/*
4596	* Only allow this for APFS
4597	*/
4598	src_vp = (struct vnode *)fp_get_data(fp);
4599	if (src_vp->v_tag != VT_APFS) {
4600	error = ENOTSUP;
4601	goto out;
4602	}
4603
4604	/*
4605	* Get the references before we start acquiring iocounts on the vnodes,
4606	* while we still hold the proc fd lock
4607	*/
4608	if ((error = fp_lookup(p, fd: fd2, resultfp: &fp2, locked: `1`))) {
4609	error = EBADF;
4610	goto out;
4611	}
4612	if (fp2->f_type != DTYPE_VNODE) {
4613	fp_drop(p, fd: fd2, fp: fp2, locked: `1`);
4614	error = EBADF;
4615	goto out;
4616	}
4617	dst_vp = (struct vnode *)fp_get_data(fp: fp2);
4618	if (dst_vp->v_tag != VT_APFS) {
4619	fp_drop(p, fd: fd2, fp: fp2, locked: `1`);
4620	error = ENOTSUP;
4621	goto out;
4622	}
4623
4624	#if CONFIG_MACF
4625	/ Re-do MAC checks against the new FD, pass in a fake argument /
4626	error = mac_file_check_fcntl(cred: kauth_cred_get(), fg: fp2->fp_glob, cmd, arg: `0`);
4627	if (error) {
4628	fp_drop(p, fd: fd2, fp: fp2, locked: `1`);
4629	goto out;
4630	}
4631	#endif
4632	/ Audit the 2nd FD /
4633	AUDIT_ARG(fd, fd2);
4634
4635	proc_fdunlock(p);
4636
4637	if (vnode_getwithref(vp: src_vp)) {
4638	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4639	error = ENOENT;
4640	goto outdrop;
4641	}
4642	if (vnode_getwithref(vp: dst_vp)) {
4643	vnode_put(vp: src_vp);
4644	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4645	error = ENOENT;
4646	goto outdrop;
4647	}
4648
4649	/*
4650	* Validate they are not the same and that
4651	* both live on the same filesystem.
4652	*/
4653	if (dst_vp == src_vp) {
4654	vnode_put(vp: src_vp);
4655	vnode_put(vp: dst_vp);
4656	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4657	error = EINVAL;
4658	goto outdrop;
4659	}
4660	if (dst_vp->v_mount != src_vp->v_mount) {
4661	vnode_put(vp: src_vp);
4662	vnode_put(vp: dst_vp);
4663	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4664	error = EXDEV;
4665	goto outdrop;
4666	}
4667
4668	/ Verify that both vps point to files and not directories /
4669	if (!vnode_isreg(vp: src_vp) \|\| !vnode_isreg(vp: dst_vp)) {
4670	error = EINVAL;
4671	vnode_put(vp: src_vp);
4672	vnode_put(vp: dst_vp);
4673	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4674	goto outdrop;
4675	}
4676
4677
4678	/*
4679	* Okay, vps are legit. Check access. We'll require write access
4680	* to both files.
4681	*/
4682	if (vnode_authorize(vp: src_vp, NULLVP,
4683	action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx: &context) != `0`) {
4684	vnode_put(vp: src_vp);
4685	vnode_put(vp: dst_vp);
4686	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4687	error = EBADF;
4688	goto outdrop;
4689	}
4690	if (vnode_authorize(vp: dst_vp, NULLVP,
4691	action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx: &context) != `0`) {
4692	vnode_put(vp: src_vp);
4693	vnode_put(vp: dst_vp);
4694	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4695	error = EBADF;
4696	goto outdrop;
4697	}
4698
4699	/ Pass it on through to the fs /
4700	error = VNOP_IOCTL(vp: src_vp, command: cmd, data: (caddr_t)dst_vp, fflag: `0`, ctx: &context);
4701
4702	vnode_put(vp: src_vp);
4703	vnode_put(vp: dst_vp);
4704	fp_drop(p, fd: fd2, fp: fp2, locked: `0`);
4705	break;
4706	}
4707
4708	/*
4709	* SPI for making a file compressed.
4710	*/
4711	case F_MAKECOMPRESSED: {
4712	uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4713
4714	if (fp->f_type != DTYPE_VNODE) {
4715	error = EBADF;
4716	goto out;
4717	}
4718
4719	vp = (struct vnode*)fp_get_data(fp);
4720	proc_fdunlock(p);
4721
4722	/ get the vnode /
4723	if (vnode_getwithref(vp)) {
4724	error = ENOENT;
4725	goto outdrop;
4726	}
4727
4728	/ Is it a file? /
4729	if ((vnode_isreg(vp) == `0`) && (vnode_islnk(vp) == `0`)) {
4730	vnode_put(vp);
4731	error = EBADF;
4732	goto outdrop;
4733	}
4734
4735	/ invoke ioctl to pass off to FS /
4736	/ Only go forward if you have write access /
4737	vfs_context_t ctx = vfs_context_current();
4738	if (vnode_authorize(vp, NULLVP, action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
4739	vnode_put(vp);
4740	error = EBADF;
4741	goto outdrop;
4742	}
4743
4744	error = VNOP_IOCTL(vp, command: cmd, data: (caddr_t)&gcounter, fflag: `0`, ctx: &context);
4745
4746	vnode_put(vp);
4747	break;
4748	}
4749
4750	/*
4751	* SPI (private) for indicating to a filesystem that subsequent writes to
4752	* the open FD will written to the Fastflow.
4753	*/
4754	case F_SET_GREEDY_MODE:
4755	/ intentionally drop through to the same handler as F_SETSTATIC.*
4756	* both fcntls should pass the argument and their selector into VNOP_IOCTL.
4757	*/
4758
4759	/*
4760	* SPI (private) for indicating to a filesystem that subsequent writes to
4761	* the open FD will represent static content.
4762	*/
4763	case F_SETSTATICCONTENT: {
4764	caddr_t ioctl_arg = NULL;
4765
4766	if (uap->arg) {
4767	ioctl_arg = (caddr_t) `1`;
4768	}
4769
4770	if (fp->f_type != DTYPE_VNODE) {
4771	error = EBADF;
4772	goto out;
4773	}
4774	vp = (struct vnode *)fp_get_data(fp);
4775	proc_fdunlock(p);
4776
4777	error = vnode_getwithref(vp);
4778	if (error) {
4779	error = ENOENT;
4780	goto outdrop;
4781	}
4782
4783	/ Only go forward if you have write access /
4784	vfs_context_t ctx = vfs_context_current();
4785	if (vnode_authorize(vp, NULLVP, action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
4786	vnode_put(vp);
4787	error = EBADF;
4788	goto outdrop;
4789	}
4790
4791	error = VNOP_IOCTL(vp, command: cmd, data: ioctl_arg, fflag: `0`, ctx: &context);
4792	(void)vnode_put(vp);
4793
4794	break;
4795	}
4796
4797	/*
4798	* SPI (private) for indicating to the lower level storage driver that the
4799	* subsequent writes should be of a particular IO type (burst, greedy, static),
4800	* or other flavors that may be necessary.
4801	*/
4802	case F_SETIOTYPE: {
4803	caddr_t param_ptr;
4804	uint32_t param;
4805
4806	if (uap->arg) {
4807	/ extract 32 bits of flags from userland /
4808	param_ptr = (caddr_t) uap->arg;
4809	param = (uint32_t) param_ptr;
4810	} else {
4811	/ If no argument is specified, error out /
4812	error = EINVAL;
4813	goto out;
4814	}
4815
4816	/*
4817	* Validate the different types of flags that can be specified:
4818	* all of them are mutually exclusive for now.
4819	*/
4820	switch (param) {
4821	case F_IOTYPE_ISOCHRONOUS:
4822	break;
4823
4824	default:
4825	error = EINVAL;
4826	goto out;
4827	}
4828
4829
4830	if (fp->f_type != DTYPE_VNODE) {
4831	error = EBADF;
4832	goto out;
4833	}
4834	vp = (struct vnode *)fp_get_data(fp);
4835	proc_fdunlock(p);
4836
4837	error = vnode_getwithref(vp);
4838	if (error) {
4839	error = ENOENT;
4840	goto outdrop;
4841	}
4842
4843	/ Only go forward if you have write access /
4844	vfs_context_t ctx = vfs_context_current();
4845	if (vnode_authorize(vp, NULLVP, action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
4846	vnode_put(vp);
4847	error = EBADF;
4848	goto outdrop;
4849	}
4850
4851	error = VNOP_IOCTL(vp, command: cmd, data: param_ptr, fflag: `0`, ctx: &context);
4852	(void)vnode_put(vp);
4853
4854	break;
4855	}
4856
4857	/*
4858	* Set the vnode pointed to by 'fd'
4859	* and tag it as the (potentially future) backing store
4860	* for another filesystem
4861	*/
4862	case F_SETBACKINGSTORE: {
4863	if (fp->f_type != DTYPE_VNODE) {
4864	error = EBADF;
4865	goto out;
4866	}
4867
4868	vp = (struct vnode *)fp_get_data(fp);
4869
4870	if (vp->v_tag != VT_HFS) {
4871	error = EINVAL;
4872	goto out;
4873	}
4874	proc_fdunlock(p);
4875
4876	if (vnode_getwithref(vp)) {
4877	error = ENOENT;
4878	goto outdrop;
4879	}
4880
4881	/ only proceed if you have write access /
4882	vfs_context_t ctx = vfs_context_current();
4883	if (vnode_authorize(vp, NULLVP, action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
4884	vnode_put(vp);
4885	error = EBADF;
4886	goto outdrop;
4887	}
4888
4889
4890	/ If arg != 0, set, otherwise unset /
4891	if (uap->arg) {
4892	error = VNOP_IOCTL(vp, command: cmd, data: (caddr_t)`1`, fflag: `0`, ctx: &context);
4893	} else {
4894	error = VNOP_IOCTL(vp, command: cmd, data: (caddr_t)NULL, fflag: `0`, ctx: &context);
4895	}
4896
4897	vnode_put(vp);
4898	break;
4899	}
4900
4901	/*
4902	* like F_GETPATH, but special semantics for
4903	* the mobile time machine handler.
4904	*/
4905	case F_GETPATH_MTMINFO: {
4906	char *pathbufp;
4907	int pathlen;
4908
4909	if (fp->f_type != DTYPE_VNODE) {
4910	error = EBADF;
4911	goto out;
4912	}
4913	vp = (struct vnode *)fp_get_data(fp);
4914	proc_fdunlock(p);
4915
4916	pathlen = MAXPATHLEN;
4917	pathbufp = zalloc(view: ZV_NAMEI);
4918
4919	if ((error = vnode_getwithref(vp)) == `0`) {
4920	int backingstore = `0`;
4921
4922	/ Check for error from vn_getpath before moving on /
4923	if ((error = vn_getpath(vp, pathbuf: pathbufp, len: &pathlen)) == `0`) {
4924	if (vp->v_tag == VT_HFS) {
4925	error = VNOP_IOCTL(vp, command: cmd, data: (caddr_t) &backingstore, fflag: `0`, ctx: &context);
4926	}
4927	(void)vnode_put(vp);
4928
4929	if (error == `0`) {
4930	error = copyout((caddr_t)pathbufp, argp, pathlen);
4931	}
4932	if (error == `0`) {
4933	/*
4934	* If the copyout was successful, now check to ensure
4935	* that this vnode is not a BACKINGSTORE vnode. mtmd
4936	* wants the path regardless.
4937	*/
4938	if (backingstore) {
4939	error = EBUSY;
4940	}
4941	}
4942	} else {
4943	(void)vnode_put(vp);
4944	}
4945	}
4946
4947	zfree(ZV_NAMEI, pathbufp);
4948	goto outdrop;
4949	}
4950
4951	case F_RECYCLE: {
4952	#if !DEBUG && !DEVELOPMENT
4953	bool allowed = false;
4954
4955	//
4956	// non-debug and non-development kernels have restrictions
4957	// on who can all this fcntl. the process has to be marked
4958	// with the dataless-manipulator entitlement and either the
4959	// process or thread have to be marked rapid-aging.
4960	//
4961	if (!vfs_context_is_dataless_manipulator(&context)) {
4962	error = EPERM;
4963	goto out;
4964	}
4965
4966	proc_t proc = vfs_context_proc(ctx: &context);
4967	if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4968	allowed = true;
4969	} else {
4970	thread_t thr = vfs_context_thread(ctx: &context);
4971	if (thr) {
4972	struct uthread *ut = get_bsdthread_info(thr);
4973
4974	if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4975	allowed = true;
4976	}
4977	}
4978	}
4979	if (!allowed) {
4980	error = EPERM;
4981	goto out;
4982	}
4983	#endif
4984
4985	if (fp->f_type != DTYPE_VNODE) {
4986	error = EBADF;
4987	goto out;
4988	}
4989	vp = (struct vnode *)fp_get_data(fp);
4990	proc_fdunlock(p);
4991
4992	vnode_recycle(vp);
4993	break;
4994	}
4995
4996	#if CONFIG_FILE_LEASES
4997	case F_SETLEASE: {
4998	struct fileglob *fg;
4999	int fl_type;
5000	int expcounts;
5001
5002	if (fp->f_type != DTYPE_VNODE) {
5003	error = EBADF;
5004	goto out;
5005	}
5006	vp = (struct vnode *)fp_get_data(fp);
5007	fg = fp->fp_glob;;
5008	proc_fdunlock(p);
5009
5010	/*
5011	* In order to allow a process to avoid breaking
5012	* its own leases, the expected open count needs
5013	* to be provided to F_SETLEASE when placing write lease.
5014	* Similarly, in order to allow a process to place a read lease
5015	* after opening the file multiple times in RW mode, the expected
5016	* write count needs to be provided to F_SETLEASE when placing a
5017	* read lease.
5018	*
5019	* We use the upper 30 bits of the integer argument (way more than
5020	* enough) as the expected open/write count.
5021	*
5022	* If the caller passed 0 for the expected open count,
5023	* assume 1.
5024	*/
5025	fl_type = CAST_DOWN_EXPLICIT(int, uap->arg);
5026	expcounts = (unsigned int)fl_type >> `2`;
5027	fl_type &= `3`;
5028
5029	if (fl_type == F_WRLCK && expcounts == `0`) {
5030	expcounts = `1`;
5031	}
5032
5033	AUDIT_ARG(value32, fl_type);
5034
5035	if ((error = vnode_getwithref(vp))) {
5036	goto outdrop;
5037	}
5038
5039	/*
5040	* Only support for regular file/dir mounted on local-based filesystem.
5041	*/
5042	if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) \|\|
5043	!(vfs_flags(mp: vnode_mount(vp)) & MNT_LOCAL)) {
5044	error = EBADF;
5045	vnode_put(vp);
5046	goto outdrop;
5047	}
5048
5049	/ For directory, we only support read lease. /
5050	if (vnode_vtype(vp) == VDIR && fl_type == F_WRLCK) {
5051	error = ENOTSUP;
5052	vnode_put(vp);
5053	goto outdrop;
5054	}
5055
5056	switch (fl_type) {
5057	case F_RDLCK:
5058	case F_WRLCK:
5059	case F_UNLCK:
5060	error = vnode_setlease(vp, fg, fl_type, expcounts,
5061	ctx: vfs_context_current());
5062	break;
5063	default:
5064	error = EINVAL;
5065	break;
5066	}
5067
5068	vnode_put(vp);
5069	goto outdrop;
5070	}
5071
5072	case F_GETLEASE: {
5073	if (fp->f_type != DTYPE_VNODE) {
5074	error = EBADF;
5075	goto out;
5076	}
5077	vp = (struct vnode *)fp_get_data(fp);
5078	proc_fdunlock(p);
5079
5080	if ((error = vnode_getwithref(vp))) {
5081	goto outdrop;
5082	}
5083
5084	if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) \|\|
5085	!(vfs_flags(mp: vnode_mount(vp)) & MNT_LOCAL)) {
5086	error = EBADF;
5087	vnode_put(vp);
5088	goto outdrop;
5089	}
5090
5091	error = `0`;
5092	*retval = vnode_getlease(vp);
5093	vnode_put(vp);
5094	goto outdrop;
5095	}
5096	#endif /* CONFIG_FILE_LEASES */
5097
5098	/ SPI (private) for asserting background access to a file /
5099	case F_ASSERT_BG_ACCESS:
5100	/ SPI (private) for releasing background access to a file /
5101	case F_RELEASE_BG_ACCESS: {
5102	/*
5103	* Check if the process is platform code, which means
5104	* that it is considered part of the Operating System.
5105	*/
5106	if (!csproc_get_platform_binary(p)) {
5107	error = EPERM;
5108	goto out;
5109	}
5110
5111	if (fp->f_type != DTYPE_VNODE) {
5112	error = EBADF;
5113	goto out;
5114	}
5115
5116	vp = (struct vnode *)fp_get_data(fp);
5117	proc_fdunlock(p);
5118
5119	if (vnode_getwithref(vp)) {
5120	error = ENOENT;
5121	goto outdrop;
5122	}
5123
5124	/ Verify that vp points to a file and not a directory /
5125	if (!vnode_isreg(vp)) {
5126	vnode_put(vp);
5127	error = EINVAL;
5128	goto outdrop;
5129	}
5130
5131	/ Only proceed if you have read access /
5132	if (vnode_authorize(vp, NULLVP, action: (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_READ_DATA), ctx: &context) != `0`) {
5133	vnode_put(vp);
5134	error = EBADF;
5135	goto outdrop;
5136	}
5137
5138	if (cmd == F_ASSERT_BG_ACCESS) {
5139	fassertbgaccess_t args;
5140
5141	if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
5142	vnode_put(vp);
5143	goto outdrop;
5144	}
5145
5146	error = VNOP_IOCTL(vp, F_ASSERT_BG_ACCESS, data: (caddr_t)&args, fflag: `0`, ctx: &context);
5147	} else {
5148	// cmd == F_RELEASE_BG_ACCESS
5149	error = VNOP_IOCTL(vp, F_RELEASE_BG_ACCESS, data: (caddr_t)NULL, fflag: `0`, ctx: &context);
5150	}
5151
5152	vnode_put(vp);
5153
5154	goto outdrop;
5155	}
5156
5157	default:
5158	/*
5159	* This is an fcntl() that we d not recognize at this level;
5160	* if this is a vnode, we send it down into the VNOP_IOCTL
5161	* for this vnode; this can include special devices, and will
5162	* effectively overload fcntl() to send ioctl()'s.
5163	*/
5164	if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
5165	error = EINVAL;
5166	goto out;
5167	}
5168
5169	/*
5170	* Catch any now-invalid fcntl() selectors.
5171	* (When adding a selector to this list, it may be prudent
5172	* to consider adding it to the list in fsctl_internal() as well.)
5173	*/
5174	switch (cmd) {
5175	case (int)APFSIOC_REVERT_TO_SNAPSHOT:
5176	case (int)FSIOC_FIOSEEKHOLE:
5177	case (int)FSIOC_FIOSEEKDATA:
5178	case (int)FSIOC_CAS_BSDFLAGS:
5179	case (int)FSIOC_KERNEL_ROOTAUTH:
5180	case (int)FSIOC_GRAFT_FS:
5181	case (int)FSIOC_UNGRAFT_FS:
5182	case (int)FSIOC_AUTH_FS:
5183	case HFS_GET_BOOT_INFO:
5184	case HFS_SET_BOOT_INFO:
5185	case FIOPINSWAP:
5186	case F_MARKDEPENDENCY:
5187	case TIOCREVOKE:
5188	case TIOCREVOKECLEAR:
5189	error = EINVAL;
5190	goto out;
5191	default:
5192	break;
5193	}
5194
5195	if (fp->f_type != DTYPE_VNODE) {
5196	error = EBADF;
5197	goto out;
5198	}
5199	vp = (struct vnode *)fp_get_data(fp);
5200	proc_fdunlock(p);
5201
5202	if ((error = vnode_getwithref(vp)) == `0`) {
5203	#define STK_PARAMS 128
5204	char stkbuf[STK_PARAMS] = {`0`};
5205	unsigned int size;
5206	caddr_t data, memp;
5207	/*
5208	* For this to work properly, we have to copy in the
5209	* ioctl() cmd argument if there is one; we must also
5210	* check that a command parameter, if present, does
5211	* not exceed the maximum command length dictated by
5212	* the number of bits we have available in the command
5213	* to represent a structure length. Finally, we have
5214	* to copy the results back out, if it is that type of
5215	* ioctl().
5216	*/
5217	size = IOCPARM_LEN(cmd);
5218	if (size > IOCPARM_MAX) {
5219	(void)vnode_put(vp);
5220	error = EINVAL;
5221	break;
5222	}
5223
5224	memp = NULL;
5225	if (size > sizeof(stkbuf)) {
5226	memp = (caddr_t)kalloc_data(size, Z_WAITOK);
5227	if (memp == `0`) {
5228	(void)vnode_put(vp);
5229	error = ENOMEM;
5230	goto outdrop;
5231	}
5232	data = memp;
5233	} else {
5234	data = &stkbuf[`0`];
5235	}
5236
5237	if (cmd & IOC_IN) {
5238	if (size) {
5239	/ structure /
5240	error = copyin(argp, data, size);
5241	if (error) {
5242	(void)vnode_put(vp);
5243	if (memp) {
5244	kfree_data(memp, size);
5245	}
5246	goto outdrop;
5247	}
5248
5249	/ Bzero the section beyond that which was needed /
5250	if (size <= sizeof(stkbuf)) {
5251	bzero(s: (((uint8_t)data) + size), n: (sizeof*(stkbuf) - size));
5252	}
5253	} else {
5254	/ int /
5255	if (is64bit) {
5256	(user_addr_t )data = argp;
5257	} else {
5258	(uint32_t )data = (uint32_t)argp;
5259	}
5260	};
5261	} else if ((cmd & IOC_OUT) && size) {
5262	/*
5263	* Zero the buffer so the user always
5264	* gets back something deterministic.
5265	*/
5266	bzero(s: data, n: size);
5267	} else if (cmd & IOC_VOID) {
5268	if (is64bit) {
5269	(user_addr_t )data = argp;
5270	} else {
5271	(uint32_t )data = (uint32_t)argp;
5272	}
5273	}
5274
5275	error = VNOP_IOCTL(vp, command: cmd, CAST_DOWN(caddr_t, data), fflag: `0`, ctx: &context);
5276
5277	(void)vnode_put(vp);
5278
5279	/ Copy any output data to user /
5280	if (error == `0` && (cmd & IOC_OUT) && size) {
5281	error = copyout(data, argp, size);
5282	}
5283	if (memp) {
5284	kfree_data(memp, size);
5285	}
5286	}
5287	break;
5288	}
5289
5290	outdrop:
5291	return sys_fcntl_outdrop(p, fd, fp, vp, error);
5292
5293	out:
5294	return sys_fcntl_out(p, fd, fp, error);
5295	}
5296
5297
5298	/*
5299	* sys_close
5300	*
5301	* Description: The implementation of the close(2) system call
5302	*
5303	* Parameters: p Process in whose per process file table
5304	* the close is to occur
5305	* uap->fd fd to be closed
5306	* retval <unused>
5307	*
5308	* Returns: 0 Success
5309	* fp_lookup:EBADF Bad file descriptor
5310	* fp_guard_exception:??? Guarded file descriptor
5311	* close_internal:EBADF
5312	* close_internal:??? Anything returnable by a per-fileops
5313	* close function
5314	*/
5315	int
5316	sys_close(proc_t p, struct close_args uap, __unused int32_t retval)
5317	{
5318	kauth_cred_t p_cred = current_cached_proc_cred(p);
5319
5320	__pthread_testcancel(presyscall: `1`);
5321	return close_nocancel(p, p_cred, fd: uap->fd);
5322	}
5323
5324	int
5325	sys_close_nocancel(proc_t p, struct close_nocancel_args uap, __unused int32_t retval)
5326	{
5327	kauth_cred_t p_cred = current_cached_proc_cred(p);
5328
5329	return close_nocancel(p, p_cred, fd: uap->fd);
5330	}
5331
5332	int
5333	close_nocancel(proc_t p, kauth_cred_t p_cred, int fd)
5334	{
5335	struct fileproc *fp;
5336
5337	AUDIT_SYSCLOSE(p, fd);
5338
5339	proc_fdlock(p);
5340	if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
5341	proc_fdunlock(p);
5342	return EBADF;
5343	}
5344
5345	if (fp_isguarded(fp, GUARD_CLOSE)) {
5346	int error = fp_guard_exception(p, fd, fp, attribs: kGUARD_EXC_CLOSE);
5347	proc_fdunlock(p);
5348	return error;
5349	}
5350
5351	return fp_close_and_unlock(p, cred: p_cred, fd, fp, flags: `0`);
5352	}
5353
5354
5355	/*
5356	* fstat
5357	*
5358	* Description: Return status information about a file descriptor.
5359	*
5360	* Parameters: p The process doing the fstat
5361	* fd The fd to stat
5362	* ub The user stat buffer
5363	* xsecurity The user extended security
5364	* buffer, or 0 if none
5365	* xsecurity_size The size of xsecurity, or 0
5366	* if no xsecurity
5367	* isstat64 Flag to indicate 64 bit version
5368	* for inode size, etc.
5369	*
5370	* Returns: 0 Success
5371	* EBADF
5372	* EFAULT
5373	* fp_lookup:EBADF Bad file descriptor
5374	* vnode_getwithref:???
5375	* copyout:EFAULT
5376	* vnode_getwithref:???
5377	* vn_stat:???
5378	* soo_stat:???
5379	* pipe_stat:???
5380	* pshm_stat:???
5381	* kqueue_stat:???
5382	*
5383	* Notes: Internal implementation for all other fstat() related
5384	* functions
5385	*
5386	* XXX switch on node type is bogus; need a stat in struct
5387	* XXX fileops instead.
5388	*/
5389	static int
5390	fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5391	user_addr_t xsecurity_size, int isstat64)
5392	{
5393	struct fileproc *fp;
5394	union {
5395	struct stat sb;
5396	struct stat64 sb64;
5397	} source;
5398	union {
5399	struct user64_stat user64_sb;
5400	struct user32_stat user32_sb;
5401	struct user64_stat64 user64_sb64;
5402	struct user32_stat64 user32_sb64;
5403	} dest;
5404	int error, my_size;
5405	file_type_t type;
5406	caddr_t data;
5407	kauth_filesec_t fsec;
5408	user_size_t xsecurity_bufsize;
5409	vfs_context_t ctx = vfs_context_current();
5410	void * sbptr;
5411
5412
5413	AUDIT_ARG(fd, fd);
5414
5415	if ((error = fp_lookup(p, fd, resultfp: &fp, locked: `0`)) != `0`) {
5416	return error;
5417	}
5418	type = fp->f_type;
5419	data = (caddr_t)fp_get_data(fp);
5420	fsec = KAUTH_FILESEC_NONE;
5421
5422	sbptr = (void *)&source;
5423
5424	switch (type) {
5425	case DTYPE_VNODE:
5426	if ((error = vnode_getwithref(vp: (vnode_t)data)) == `0`) {
5427	/*
5428	* If the caller has the file open, and is not
5429	* requesting extended security information, we are
5430	* going to let them get the basic stat information.
5431	*/
5432	if (xsecurity == USER_ADDR_NULL) {
5433	error = vn_stat_noauth(vp: (vnode_t)data, sb: sbptr, NULL, isstat64, needsrealdev: `0`, ctx,
5434	file_cred: fp->fp_glob->fg_cred);
5435	} else {
5436	error = vn_stat(vp: (vnode_t)data, sb: sbptr, xsec: &fsec, isstat64, needsrealdev: `0`, ctx);
5437	}
5438
5439	AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5440	(void)vnode_put(vp: (vnode_t)data);
5441	}
5442	break;
5443
5444	#if SOCKETS
5445	case DTYPE_SOCKET:
5446	error = soo_stat((struct socket *)data, sbptr, isstat64);
5447	break;
5448	#endif /* SOCKETS */
5449
5450	case DTYPE_PIPE:
5451	error = pipe_stat((void *)data, sbptr, isstat64);
5452	break;
5453
5454	case DTYPE_PSXSHM:
5455	error = pshm_stat(pnode: (void *)data, ub: sbptr, isstat64);
5456	break;
5457
5458	case DTYPE_KQUEUE:
5459	error = kqueue_stat((void *)data, sbptr, isstat64, p);
5460	break;
5461
5462	default:
5463	error = EBADF;
5464	goto out;
5465	}
5466	if (error == `0`) {
5467	caddr_t sbp;
5468
5469	if (isstat64 != `0`) {
5470	source.sb64.st_lspare = `0`;
5471	source.sb64.st_qspare[`0`] = `0LL`;
5472	source.sb64.st_qspare[`1`] = `0LL`;
5473
5474	if (IS_64BIT_PROCESS(p)) {
5475	munge_user64_stat64(sbp: &source.sb64, usbp: &dest.user64_sb64);
5476	my_size = sizeof(dest.user64_sb64);
5477	sbp = (caddr_t)&dest.user64_sb64;
5478	} else {
5479	munge_user32_stat64(sbp: &source.sb64, usbp: &dest.user32_sb64);
5480	my_size = sizeof(dest.user32_sb64);
5481	sbp = (caddr_t)&dest.user32_sb64;
5482	}
5483	} else {
5484	source.sb.st_lspare = `0`;
5485	source.sb.st_qspare[`0`] = `0LL`;
5486	source.sb.st_qspare[`1`] = `0LL`;
5487	if (IS_64BIT_PROCESS(p)) {
5488	munge_user64_stat(sbp: &source.sb, usbp: &dest.user64_sb);
5489	my_size = sizeof(dest.user64_sb);
5490	sbp = (caddr_t)&dest.user64_sb;
5491	} else {
5492	munge_user32_stat(sbp: &source.sb, usbp: &dest.user32_sb);
5493	my_size = sizeof(dest.user32_sb);
5494	sbp = (caddr_t)&dest.user32_sb;
5495	}
5496	}
5497
5498	error = copyout(sbp, ub, my_size);
5499	}
5500
5501	/ caller wants extended security information? /
5502	if (xsecurity != USER_ADDR_NULL) {
5503	/ did we get any? /
5504	if (fsec == KAUTH_FILESEC_NONE) {
5505	if (susize(xsecurity_size, `0`) != `0`) {
5506	error = EFAULT;
5507	goto out;
5508	}
5509	} else {
5510	/ find the user buffer size /
5511	xsecurity_bufsize = fusize(xsecurity_size);
5512
5513	/ copy out the actual data size /
5514	if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != `0`) {
5515	error = EFAULT;
5516	goto out;
5517	}
5518
5519	/ if the caller supplied enough room, copy out to it /
5520	if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5521	error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5522	}
5523	}
5524	}
5525	out:
5526	fp_drop(p, fd, fp, locked: `0`);
5527	if (fsec != NULL) {
5528	kauth_filesec_free(fsp: fsec);
5529	}
5530	return error;
5531	}
5532
5533
5534	/*
5535	* sys_fstat_extended
5536	*
5537	* Description: Extended version of fstat supporting returning extended
5538	* security information
5539	*
5540	* Parameters: p The process doing the fstat
5541	* uap->fd The fd to stat
5542	* uap->ub The user stat buffer
5543	* uap->xsecurity The user extended security
5544	* buffer, or 0 if none
5545	* uap->xsecurity_size The size of xsecurity, or 0
5546	*
5547	* Returns: 0 Success
5548	* !0 Errno (see fstat)
5549	*/
5550	int
5551	sys_fstat_extended(proc_t p, struct fstat_extended_args uap, __unused int32_t retval)
5552	{
5553	return fstat(p, fd: uap->fd, ub: uap->ub, xsecurity: uap->xsecurity, xsecurity_size: uap->xsecurity_size, isstat64: `0`);
5554	}
5555
5556
5557	/*
5558	* sys_fstat
5559	*
5560	* Description: Get file status for the file associated with fd
5561	*
5562	* Parameters: p The process doing the fstat
5563	* uap->fd The fd to stat
5564	* uap->ub The user stat buffer
5565	*
5566	* Returns: 0 Success
5567	* !0 Errno (see fstat)
5568	*/
5569	int
5570	sys_fstat(proc_t p, struct fstat_args uap, __unused int32_t retval)
5571	{
5572	return fstat(p, fd: uap->fd, ub: uap->ub, xsecurity: `0`, xsecurity_size: `0`, isstat64: `0`);
5573	}
5574
5575
5576	/*
5577	* sys_fstat64_extended
5578	*
5579	* Description: Extended version of fstat64 supporting returning extended
5580	* security information
5581	*
5582	* Parameters: p The process doing the fstat
5583	* uap->fd The fd to stat
5584	* uap->ub The user stat buffer
5585	* uap->xsecurity The user extended security
5586	* buffer, or 0 if none
5587	* uap->xsecurity_size The size of xsecurity, or 0
5588	*
5589	* Returns: 0 Success
5590	* !0 Errno (see fstat)
5591	*/
5592	int
5593	sys_fstat64_extended(proc_t p, struct fstat64_extended_args uap, __unused int32_t retval)
5594	{
5595	return fstat(p, fd: uap->fd, ub: uap->ub, xsecurity: uap->xsecurity, xsecurity_size: uap->xsecurity_size, isstat64: `1`);
5596	}
5597
5598
5599	/*
5600	* sys_fstat64
5601	*
5602	* Description: Get 64 bit version of the file status for the file associated
5603	* with fd
5604	*
5605	* Parameters: p The process doing the fstat
5606	* uap->fd The fd to stat
5607	* uap->ub The user stat buffer
5608	*
5609	* Returns: 0 Success
5610	* !0 Errno (see fstat)
5611	*/
5612	int
5613	sys_fstat64(proc_t p, struct fstat64_args uap, __unused int32_t retval)
5614	{
5615	return fstat(p, fd: uap->fd, ub: uap->ub, xsecurity: `0`, xsecurity_size: `0`, isstat64: `1`);
5616	}
5617
5618
5619	/*
5620	* sys_fpathconf
5621	*
5622	* Description: Return pathconf information about a file descriptor.
5623	*
5624	* Parameters: p Process making the request
5625	* uap->fd fd to get information about
5626	* uap->name Name of information desired
5627	* retval Pointer to the call return area
5628	*
5629	* Returns: 0 Success
5630	* EINVAL
5631	* fp_lookup:EBADF Bad file descriptor
5632	* vnode_getwithref:???
5633	* vn_pathconf:???
5634	*
5635	* Implicit returns:
5636	* *retval (modified) Returned information (numeric)
5637	*/
5638	int
5639	sys_fpathconf(proc_t p, struct fpathconf_args uap, int32_t retval)
5640	{
5641	int fd = uap->fd;
5642	struct fileproc *fp;
5643	struct vnode *vp;
5644	int error = `0`;
5645	file_type_t type;
5646
5647
5648	AUDIT_ARG(fd, uap->fd);
5649	if ((error = fp_lookup(p, fd, resultfp: &fp, locked: `0`))) {
5650	return error;
5651	}
5652	type = fp->f_type;
5653
5654	switch (type) {
5655	case DTYPE_SOCKET:
5656	if (uap->name != _PC_PIPE_BUF) {
5657	error = EINVAL;
5658	goto out;
5659	}
5660	*retval = PIPE_BUF;
5661	error = `0`;
5662	goto out;
5663
5664	case DTYPE_PIPE:
5665	if (uap->name != _PC_PIPE_BUF) {
5666	error = EINVAL;
5667	goto out;
5668	}
5669	*retval = PIPE_BUF;
5670	error = `0`;
5671	goto out;
5672
5673	case DTYPE_VNODE:
5674	vp = (struct vnode *)fp_get_data(fp);
5675
5676	if ((error = vnode_getwithref(vp)) == `0`) {
5677	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5678
5679	error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5680
5681	(void)vnode_put(vp);
5682	}
5683	goto out;
5684
5685	default:
5686	error = EINVAL;
5687	goto out;
5688	}
5689	/NOTREACHED/
5690	out:
5691	fp_drop(p, fd, fp, locked: `0`);
5692	return error;
5693	}
5694
5695	/*
5696	* sys_flock
5697	*
5698	* Description: Apply an advisory lock on a file descriptor.
5699	*
5700	* Parameters: p Process making request
5701	* uap->fd fd on which the lock is to be
5702	* attempted
5703	* uap->how (Un)Lock bits, including type
5704	* retval Pointer to the call return area
5705	*
5706	* Returns: 0 Success
5707	* fp_getfvp:EBADF Bad file descriptor
5708	* fp_getfvp:ENOTSUP fd does not refer to a vnode
5709	* vnode_getwithref:???
5710	* VNOP_ADVLOCK:???
5711	*
5712	* Implicit returns:
5713	* *retval (modified) Size of dtable
5714	*
5715	* Notes: Just attempt to get a record lock of the requested type on
5716	* the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5717	*/
5718	int
5719	sys_flock(proc_t p, struct flock_args uap, __unused int32_t retval)
5720	{
5721	int fd = uap->fd;
5722	int how = uap->how;
5723	struct fileproc *fp;
5724	struct vnode *vp;
5725	struct flock lf;
5726	vfs_context_t ctx = vfs_context_current();
5727	int error = `0`;
5728
5729	AUDIT_ARG(fd, uap->fd);
5730	if ((error = fp_getfvp(p, fd, resultfp: &fp, resultvp: &vp))) {
5731	return error;
5732	}
5733	if ((error = vnode_getwithref(vp))) {
5734	goto out1;
5735	}
5736	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5737
5738	lf.l_whence = SEEK_SET;
5739	lf.l_start = `0`;
5740	lf.l_len = `0`;
5741	if (how & LOCK_UN) {
5742	lf.l_type = F_UNLCK;
5743	error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5744	goto out;
5745	}
5746	if (how & LOCK_EX) {
5747	lf.l_type = F_WRLCK;
5748	} else if (how & LOCK_SH) {
5749	lf.l_type = F_RDLCK;
5750	} else {
5751	error = EBADF;
5752	goto out;
5753	}
5754	#if CONFIG_MACF
5755	error = mac_file_check_lock(cred: kauth_cred_get(), fg: fp->fp_glob, F_SETLK, fl: &lf);
5756	if (error) {
5757	goto out;
5758	}
5759	#endif
5760	error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5761	(how & LOCK_NB ? F_FLOCK : F_FLOCK \| F_WAIT),
5762	ctx, NULL);
5763	if (!error) {
5764	os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5765	}
5766	out:
5767	(void)vnode_put(vp);
5768	out1:
5769	fp_drop(p, fd, fp, locked: `0`);
5770	return error;
5771	}
5772
5773	/*
5774	* sys_fileport_makeport
5775	*
5776	* Description: Obtain a Mach send right for a given file descriptor.
5777	*
5778	* Parameters: p Process calling fileport
5779	* uap->fd The fd to reference
5780	* uap->portnamep User address at which to place port name.
5781	*
5782	* Returns: 0 Success.
5783	* EBADF Bad file descriptor.
5784	* EINVAL File descriptor had type that cannot be sent, misc. other errors.
5785	* EFAULT Address at which to store port name is not valid.
5786	* EAGAIN Resource shortage.
5787	*
5788	* Implicit returns:
5789	* On success, name of send right is stored at user-specified address.
5790	*/
5791	int
5792	sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5793	__unused int *retval)
5794	{
5795	int err;
5796	int fd = uap->fd;
5797	user_addr_t user_portaddr = uap->portnamep;
5798	struct fileproc *fp = FILEPROC_NULL;
5799	struct fileglob *fg = NULL;
5800	ipc_port_t fileport;
5801	mach_port_name_t name = MACH_PORT_NULL;
5802
5803	proc_fdlock(p);
5804	err = fp_lookup(p, fd, resultfp: &fp, locked: `1`);
5805	if (err != `0`) {
5806	goto out_unlock;
5807	}
5808
5809	fg = fp->fp_glob;
5810	if (!fg_sendable(fg)) {
5811	err = EINVAL;
5812	goto out_unlock;
5813	}
5814
5815	if (fp_isguarded(fp, GUARD_FILEPORT)) {
5816	err = fp_guard_exception(p, fd, fp, attribs: kGUARD_EXC_FILEPORT);
5817	goto out_unlock;
5818	}
5819
5820	/ Dropped when port is deallocated /
5821	fg_ref(p, fg);
5822
5823	proc_fdunlock(p);
5824
5825	/ Allocate and initialize a port /
5826	fileport = fileport_alloc(fg);
5827	if (fileport == IPC_PORT_NULL) {
5828	fg_drop_live(fg);
5829	err = EAGAIN;
5830	goto out;
5831	}
5832
5833	/ Add an entry. Deallocates port on failure. /
5834	name = ipc_port_copyout_send(sright: fileport, space: get_task_ipcspace(t: proc_task(p)));
5835	if (!MACH_PORT_VALID(name)) {
5836	err = EINVAL;
5837	goto out;
5838	}
5839
5840	err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5841	if (err != `0`) {
5842	goto out;
5843	}
5844
5845	/ Tag the fileglob for debugging purposes /
5846	lck_mtx_lock_spin(lck: &fg->fg_lock);
5847	fg->fg_lflags \|= FG_PORTMADE;
5848	lck_mtx_unlock(lck: &fg->fg_lock);
5849
5850	fp_drop(p, fd, fp, locked: `0`);
5851
5852	return `0`;
5853
5854	out_unlock:
5855	proc_fdunlock(p);
5856	out:
5857	if (MACH_PORT_VALID(name)) {
5858	/ Don't care if another thread races us to deallocate the entry /
5859	(void) mach_port_deallocate(task: get_task_ipcspace(t: proc_task(p)), name);
5860	}
5861
5862	if (fp != FILEPROC_NULL) {
5863	fp_drop(p, fd, fp, locked: `0`);
5864	}
5865
5866	return err;
5867	}
5868
5869	void
5870	fileport_releasefg(struct fileglob *fg)
5871	{
5872	(void)fg_drop(PROC_NULL, fg);
5873	}
5874
5875	/*
5876	* fileport_makefd
5877	*
5878	* Description: Obtain the file descriptor for a given Mach send right.
5879	*
5880	* Returns: 0 Success
5881	* EINVAL Invalid Mach port name, or port is not for a file.
5882	* fdalloc:EMFILE
5883	* fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5884	*
5885	* Implicit returns:
5886	* *retval (modified) The new descriptor
5887	*/
5888	int
5889	fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5890	{
5891	struct fileglob *fg;
5892	struct fileproc *fp = FILEPROC_NULL;
5893	int fd;
5894	int err;
5895
5896	fg = fileport_port_to_fileglob(port);
5897	if (fg == NULL) {
5898	err = EINVAL;
5899	goto out;
5900	}
5901
5902	fp = fileproc_alloc_init();
5903
5904	proc_fdlock(p);
5905	err = fdalloc(p, want: `0`, result: &fd);
5906	if (err != `0`) {
5907	proc_fdunlock(p);
5908	goto out;
5909	}
5910	if (fp_flags) {
5911	fp->fp_flags \|= fp_flags;
5912	}
5913
5914	fp->fp_glob = fg;
5915	fg_ref(p, fg);
5916
5917	procfdtbl_releasefd(p, fd, fp);
5918	proc_fdunlock(p);
5919
5920	*retval = fd;
5921	err = `0`;
5922	out:
5923	if ((fp != NULL) && (`0` != err)) {
5924	fileproc_free(fp);
5925	}
5926
5927	return err;
5928	}
5929
5930	/*
5931	* sys_fileport_makefd
5932	*
5933	* Description: Obtain the file descriptor for a given Mach send right.
5934	*
5935	* Parameters: p Process calling fileport
5936	* uap->port Name of send right to file port.
5937	*
5938	* Returns: 0 Success
5939	* EINVAL Invalid Mach port name, or port is not for a file.
5940	* fdalloc:EMFILE
5941	* fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5942	*
5943	* Implicit returns:
5944	* *retval (modified) The new descriptor
5945	*/
5946	int
5947	sys_fileport_makefd(proc_t p, struct fileport_makefd_args uap, int32_t retval)
5948	{
5949	ipc_port_t port = IPC_PORT_NULL;
5950	mach_port_name_t send = uap->port;
5951	kern_return_t res;
5952	int err;
5953
5954	res = ipc_object_copyin(get_task_ipcspace(t: proc_task(p)),
5955	send, MACH_MSG_TYPE_COPY_SEND, &port, `0`, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5956
5957	if (res == KERN_SUCCESS) {
5958	err = fileport_makefd(p, port, fp_flags: FP_CLOEXEC, retval);
5959	} else {
5960	err = EINVAL;
5961	}
5962
5963	if (IPC_PORT_NULL != port) {
5964	ipc_port_release_send(port);
5965	}
5966
5967	return err;
5968	}
5969
5970
5971	#pragma mark fileops wrappers
5972
5973	/*
5974	* fo_read
5975	*
5976	* Description: Generic fileops read indirected through the fileops pointer
5977	* in the fileproc structure
5978	*
5979	* Parameters: fp fileproc structure pointer
5980	* uio user I/O structure pointer
5981	* flags FOF_ flags
5982	* ctx VFS context for operation
5983	*
5984	* Returns: 0 Success
5985	* !0 Errno from read
5986	*/
5987	int
5988	fo_read(struct fileproc fp, struct* uio uio, int* flags, vfs_context_t ctx)
5989	{
5990	return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5991	}
5992
5993	int
5994	fo_no_read(struct fileproc fp, struct* uio uio, int* flags, vfs_context_t ctx)
5995	{
5996	#pragma unused(fp, uio, flags, ctx)
5997	return ENXIO;
5998	}
5999
6000
6001	/*
6002	* fo_write
6003	*
6004	* Description: Generic fileops write indirected through the fileops pointer
6005	* in the fileproc structure
6006	*
6007	* Parameters: fp fileproc structure pointer
6008	* uio user I/O structure pointer
6009	* flags FOF_ flags
6010	* ctx VFS context for operation
6011	*
6012	* Returns: 0 Success
6013	* !0 Errno from write
6014	*/
6015	int
6016	fo_write(struct fileproc fp, struct* uio uio, int* flags, vfs_context_t ctx)
6017	{
6018	return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
6019	}
6020
6021	int
6022	fo_no_write(struct fileproc fp, struct* uio uio, int* flags, vfs_context_t ctx)
6023	{
6024	#pragma unused(fp, uio, flags, ctx)
6025	return ENXIO;
6026	}
6027
6028
6029	/*
6030	* fo_ioctl
6031	*
6032	* Description: Generic fileops ioctl indirected through the fileops pointer
6033	* in the fileproc structure
6034	*
6035	* Parameters: fp fileproc structure pointer
6036	* com ioctl command
6037	* data pointer to internalized copy
6038	* of user space ioctl command
6039	* parameter data in kernel space
6040	* ctx VFS context for operation
6041	*
6042	* Returns: 0 Success
6043	* !0 Errno from ioctl
6044	*
6045	* Locks: The caller is assumed to have held the proc_fdlock; this
6046	* function releases and reacquires this lock. If the caller
6047	* accesses data protected by this lock prior to calling this
6048	* function, it will need to revalidate/reacquire any cached
6049	* protected data obtained prior to the call.
6050	*/
6051	int
6052	fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6053	{
6054	int error;
6055
6056	proc_fdunlock(p: vfs_context_proc(ctx));
6057	error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
6058	proc_fdlock(p: vfs_context_proc(ctx));
6059	return error;
6060	}
6061
6062	int
6063	fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6064	{
6065	#pragma unused(fp, com, data, ctx)
6066	return ENOTTY;
6067	}
6068
6069
6070	/*
6071	* fo_select
6072	*
6073	* Description: Generic fileops select indirected through the fileops pointer
6074	* in the fileproc structure
6075	*
6076	* Parameters: fp fileproc structure pointer
6077	* which select which
6078	* wql pointer to wait queue list
6079	* ctx VFS context for operation
6080	*
6081	* Returns: 0 Success
6082	* !0 Errno from select
6083	*/
6084	int
6085	fo_select(struct fileproc fp, int* which, void *wql, vfs_context_t ctx)
6086	{
6087	return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
6088	}
6089
6090	int
6091	fo_no_select(struct fileproc fp, int* which, void *wql, vfs_context_t ctx)
6092	{
6093	#pragma unused(fp, which, wql, ctx)
6094	return ENOTSUP;
6095	}
6096
6097
6098	/*
6099	* fo_close
6100	*
6101	* Description: Generic fileops close indirected through the fileops pointer
6102	* in the fileproc structure
6103	*
6104	* Parameters: fp fileproc structure pointer for
6105	* file to close
6106	* ctx VFS context for operation
6107	*
6108	* Returns: 0 Success
6109	* !0 Errno from close
6110	*/
6111	int
6112	fo_close(struct fileglob *fg, vfs_context_t ctx)
6113	{
6114	return (*fg->fg_ops->fo_close)(fg, ctx);
6115	}
6116
6117
6118	/*
6119	* fo_drain
6120	*
6121	* Description: Generic fileops kqueue filter indirected through the fileops
6122	* pointer in the fileproc structure
6123	*
6124	* Parameters: fp fileproc structure pointer
6125	* ctx VFS context for operation
6126	*
6127	* Returns: 0 Success
6128	* !0 errno from drain
6129	*/
6130	int
6131	fo_drain(struct fileproc *fp, vfs_context_t ctx)
6132	{
6133	return (*fp->f_ops->fo_drain)(fp, ctx);
6134	}
6135
6136	int
6137	fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
6138	{
6139	#pragma unused(fp, ctx)
6140	return ENOTSUP;
6141	}
6142
6143
6144	/*
6145	* fo_kqfilter
6146	*
6147	* Description: Generic fileops kqueue filter indirected through the fileops
6148	* pointer in the fileproc structure
6149	*
6150	* Parameters: fp fileproc structure pointer
6151	* kn pointer to knote to filter on
6152	*
6153	* Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
6154	* 0 Filter is not active
6155	* !0 Filter is active
6156	*/
6157	int
6158	fo_kqfilter(struct fileproc fp, struct* knote kn, struct* kevent_qos_s *kev)
6159	{
6160	return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
6161	}
6162
6163	int
6164	fo_no_kqfilter(struct fileproc fp, struct* knote kn, struct* kevent_qos_s *kev)
6165	{
6166	#pragma unused(fp, kev)
6167	knote_set_error(kn, ENOTSUP);
6168	return `0`;
6169	}
6170

Browse the source code of xnu/bsd/kern/kern_descrip.c