kern_descrip.c source code [xnu/bsd/kern/kern_descrip.c]

1	/*
2	* Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/ Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved /
29	/*
30	* Copyright (c) 1982, 1986, 1989, 1991, 1993
31	* The Regents of the University of California. All rights reserved.
32	* (c) UNIX System Laboratories, Inc.
33	* All or some portions of this file are derived from material licensed
34	* to the University of California by American Telephone and Telegraph
35	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
36	* the permission of UNIX System Laboratories, Inc.
37	*
38	* Redistribution and use in source and binary forms, with or without
39	* modification, are permitted provided that the following conditions
40	* are met:
41	* 1. Redistributions of source code must retain the above copyright
42	* notice, this list of conditions and the following disclaimer.
43	* 2. Redistributions in binary form must reproduce the above copyright
44	* notice, this list of conditions and the following disclaimer in the
45	* documentation and/or other materials provided with the distribution.
46	* 3. All advertising materials mentioning features or use of this software
47	* must display the following acknowledgement:
48	* This product includes software developed by the University of
49	* California, Berkeley and its contributors.
50	* 4. Neither the name of the University nor the names of its contributors
51	* may be used to endorse or promote products derived from this software
52	* without specific prior written permission.
53	*
54	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64	* SUCH DAMAGE.
65	*
66	* @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67	*/
68	/*
69	* NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70	* support for mandatory and extensible security protections. This notice
71	* is included in support of clause 2.2 (b) of the Apple Public License,
72	* Version 2.0.
73	*/
74
75	#include <sys/param.h>
76	#include <sys/systm.h>
77	#include <sys/filedesc.h>
78	#include <sys/kernel.h>
79	#include <sys/vnode_internal.h>
80	#include <sys/proc_internal.h>
81	#include <sys/kauth.h>
82	#include <sys/file_internal.h>
83	#include <sys/guarded.h>
84	#include <sys/priv.h>
85	#include <sys/socket.h>
86	#include <sys/socketvar.h>
87	#include <sys/stat.h>
88	#include <sys/ioctl.h>
89	#include <sys/fcntl.h>
90	#include <sys/fsctl.h>
91	#include <sys/malloc.h>
92	#include <sys/mman.h>
93	#include <sys/syslog.h>
94	#include <sys/unistd.h>
95	#include <sys/resourcevar.h>
96	#include <sys/aio_kern.h>
97	#include <sys/ev.h>
98	#include <kern/locks.h>
99	#include <sys/uio_internal.h>
100	#include <sys/codesign.h>
101	#include <sys/codedir_internal.h>
102
103	#include <security/audit/audit.h>
104
105	#include <sys/mount_internal.h>
106	#include <sys/kdebug.h>
107	#include <sys/sysproto.h>
108	#include <sys/pipe.h>
109	#include <sys/spawn.h>
110	#include <sys/cprotect.h>
111	#include <kern/kern_types.h>
112	#include <kern/kalloc.h>
113	#include <kern/waitq.h>
114	#include <libkern/OSAtomic.h>
115
116	#include <sys/ubc_internal.h>
117
118	#include <kern/ipc_misc.h>
119	#include <vm/vm_protos.h>
120
121	#include <mach/mach_port.h>
122	#include <stdbool.h>
123
124	#if CONFIG_MACF
125	#include <security/mac_framework.h>
126	#endif
127
128	kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
129	mach_msg_type_name_t, ipc_port_t *);
130	void ipc_port_release_send(ipc_port_t);
131
132	struct psemnode;
133	struct pshmnode;
134
135	static int finishdup(proc_t p,
136	struct filedesc fdp, int* old, int new, int flags, int32_t *retval);
137
138	int falloc_locked(proc_t p, struct fileproc *resultfp, int* resultfd, vfs_context_t ctx, int* locked);
139	void fg_drop(struct fileproc * fp);
140	void fg_free(struct fileglob *fg);
141	void fg_ref(struct fileproc * fp);
142	void fileport_releasefg(struct fileglob *fg);
143
144	/ flags for close_internal_locked /
145	#define FD_DUP2RESV 1
146
147	/ We don't want these exported /
148
149	__private_extern__
150	int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
151
152	static void _fdrelse(struct proc * p, int fd);
153
154
155	extern void file_lock_init(void);
156
157	extern kauth_scope_t kauth_scope_fileop;
158
159	/ Conflict wait queue for when selects collide (opaque type) /
160	extern struct waitq select_conflict_queue;
161
162	#ifndef HFS_GET_BOOT_INFO
163	#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
164	#endif
165
166	#ifndef HFS_SET_BOOT_INFO
167	#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
168	#endif
169
170	#ifndef APFSIOC_REVERT_TO_SNAPSHOT
171	#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
172	#endif
173
174	#define f_flag f_fglob->fg_flag
175	#define f_type f_fglob->fg_ops->fo_type
176	#define f_msgcount f_fglob->fg_msgcount
177	#define f_cred f_fglob->fg_cred
178	#define f_ops f_fglob->fg_ops
179	#define f_offset f_fglob->fg_offset
180	#define f_data f_fglob->fg_data
181	#define CHECK_ADD_OVERFLOW_INT64L(x, y) \
182	(((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) \|\| \
183	(((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
184	? 1 : 0)
185	/*
186	* Descriptor management.
187	*/
188	struct fmsglist fmsghead; / head of list of open files /
189	struct fmsglist fmsg_ithead; / head of list of open files /
190	int nfiles; / actual number of open files /
191
192
193	lck_grp_attr_t * file_lck_grp_attr;
194	lck_grp_t * file_lck_grp;
195	lck_attr_t * file_lck_attr;
196
197	lck_mtx_t * uipc_lock;
198
199
200	/*
201	* check_file_seek_range
202	*
203	* Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
204	*
205	* Parameters: fl Flock structure.
206	* cur_file_offset Current offset in the file.
207	*
208	* Returns: 0 on Success.
209	* EOVERFLOW on overflow.
210	* EINVAL on offset less than zero.
211	*/
212
213	static int
214	check_file_seek_range(struct flock *fl, off_t cur_file_offset)
215	{
216	if (fl->l_whence == SEEK_CUR) {
217	/ Check if the start marker is beyond LLONG_MAX. /
218	if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
219	/ Check if start marker is negative /
220	if (fl->l_start < `0`) {
221	return EINVAL;
222	}
223	return EOVERFLOW;
224	}
225	/ Check if the start marker is negative. /
226	if (fl->l_start + cur_file_offset < `0`) {
227	return EINVAL;
228	}
229	/ Check if end marker is beyond LLONG_MAX. /
230	if ((fl->l_len > `0`) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
231	cur_file_offset, fl->l_len - `1`))) {
232	return EOVERFLOW;
233	}
234	/ Check if the end marker is negative. /
235	if ((fl->l_len <= `0`) && (fl->l_start + cur_file_offset +
236	fl->l_len < `0`)) {
237	return EINVAL;
238	}
239	} else if (fl->l_whence == SEEK_SET) {
240	/ Check if the start marker is negative. /
241	if (fl->l_start < `0`) {
242	return EINVAL;
243	}
244	/ Check if the end marker is beyond LLONG_MAX. /
245	if ((fl->l_len > `0`) &&
246	CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - `1`)) {
247	return EOVERFLOW;
248	}
249	/ Check if the end marker is negative. /
250	if ((fl->l_len < `0`) && fl->l_start + fl->l_len < `0`) {
251	return EINVAL;
252	}
253	}
254	return `0`;
255	}
256
257
258	/*
259	* file_lock_init
260	*
261	* Description: Initialize the file lock group and the uipc and flist locks
262	*
263	* Parameters: (void)
264	*
265	* Returns: void
266	*
267	* Notes: Called at system startup from bsd_init().
268	*/
269	void
270	file_lock_init(void)
271	{
272	/ allocate file lock group attribute and group /
273	file_lck_grp_attr= lck_grp_attr_alloc_init();
274
275	file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr);
276
277	/ Allocate file lock attribute /
278	file_lck_attr = lck_attr_alloc_init();
279
280	uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
281	}
282
283
284	/*
285	* proc_fdlock, proc_fdlock_spin
286	*
287	* Description: Lock to control access to the per process struct fileproc
288	* and struct filedesc
289	*
290	* Parameters: p Process to take the lock on
291	*
292	* Returns: void
293	*
294	* Notes: The lock is initialized in forkproc() and destroyed in
295	* reap_child_process().
296	*/
297	void
298	proc_fdlock(proc_t p)
299	{
300	lck_mtx_lock(&p->p_fdmlock);
301	}
302
303	void
304	proc_fdlock_spin(proc_t p)
305	{
306	lck_mtx_lock_spin(&p->p_fdmlock);
307	}
308
309	void
310	proc_fdlock_assert(proc_t p, int assertflags)
311	{
312	lck_mtx_assert(&p->p_fdmlock, assertflags);
313	}
314
315
316	/*
317	* proc_fdunlock
318	*
319	* Description: Unlock the lock previously locked by a call to proc_fdlock()
320	*
321	* Parameters: p Process to drop the lock on
322	*
323	* Returns: void
324	*/
325	void
326	proc_fdunlock(proc_t p)
327	{
328	lck_mtx_unlock(&p->p_fdmlock);
329	}
330
331
332	/*
333	* System calls on descriptors.
334	*/
335
336
337	/*
338	* getdtablesize
339	*
340	* Description: Returns the per process maximum size of the descriptor table
341	*
342	* Parameters: p Process being queried
343	* retval Pointer to the call return area
344	*
345	* Returns: 0 Success
346	*
347	* Implicit returns:
348	* *retval (modified) Size of dtable
349	*/
350	int
351	getdtablesize(proc_t p, __unused struct getdtablesize_args uap, int32_t retval)
352	{
353	proc_fdlock_spin(p);
354	retval = min((int*)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
355	proc_fdunlock(p);
356
357	return (`0`);
358	}
359
360
361	void
362	procfdtbl_reservefd(struct proc * p, int fd)
363	{
364	p->p_fd->fd_ofiles[fd] = NULL;
365	p->p_fd->fd_ofileflags[fd] \|= UF_RESERVED;
366	}
367
368	void
369	procfdtbl_markclosefd(struct proc * p, int fd)
370	{
371	p->p_fd->fd_ofileflags[fd] \|= (UF_RESERVED \| UF_CLOSING);
372	}
373
374	void
375	procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
376	{
377	if (fp != NULL)
378	p->p_fd->fd_ofiles[fd] = fp;
379	p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
380	if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
381	p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
382	wakeup(&p->p_fd);
383	}
384	}
385
386	void
387	procfdtbl_waitfd(struct proc * p, int fd)
388	{
389	p->p_fd->fd_ofileflags[fd] \|= UF_RESVWAIT;
390	msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
391	}
392
393
394	void
395	procfdtbl_clearfd(struct proc * p, int fd)
396	{
397	int waiting;
398
399	waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
400	p->p_fd->fd_ofiles[fd] = NULL;
401	p->p_fd->fd_ofileflags[fd] = `0`;
402	if ( waiting == UF_RESVWAIT) {
403	wakeup(&p->p_fd);
404	}
405	}
406
407	/*
408	* _fdrelse
409	*
410	* Description: Inline utility function to free an fd in a filedesc
411	*
412	* Parameters: fdp Pointer to filedesc fd lies in
413	* fd fd to free
414	* reserv fd should be reserved
415	*
416	* Returns: void
417	*
418	* Locks: Assumes proc_fdlock for process pointing to fdp is held by
419	* the caller
420	*/
421	static void
422	_fdrelse(struct proc * p, int fd)
423	{
424	struct filedesc *fdp = p->p_fd;
425	int nfd = `0`;
426
427	if (fd < fdp->fd_freefile)
428	fdp->fd_freefile = fd;
429	#if DIAGNOSTIC
430	if (fd > fdp->fd_lastfile)
431	panic("fdrelse: fd_lastfile inconsistent");
432	#endif
433	procfdtbl_clearfd(p, fd);
434
435	while ((nfd = fdp->fd_lastfile) > `0` &&
436	fdp->fd_ofiles[nfd] == NULL &&
437	!(fdp->fd_ofileflags[nfd] & UF_RESERVED))
438	/ JMM - What about files with lingering EV_VANISHED knotes? /
439	fdp->fd_lastfile--;
440	}
441
442
443	int
444	fd_rdwr(
445	int fd,
446	enum uio_rw rw,
447	uint64_t base,
448	int64_t len,
449	enum uio_seg segflg,
450	off_t offset,
451	int io_flg,
452	int64_t *aresid)
453	{
454	struct fileproc *fp;
455	proc_t p;
456	int error = `0`;
457	int flags = `0`;
458	int spacetype;
459	uio_t auio = NULL;
460	char uio_buf[ UIO_SIZEOF(`1`) ];
461	struct vfs_context context = *(vfs_context_current());
462	bool wrote_some = false;
463
464	p = current_proc();
465
466	error = fp_lookup(p, fd, &fp, `0`);
467	if (error)
468	return(error);
469
470	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_PIPE && fp->f_type != DTYPE_SOCKET) {
471	error = EINVAL;
472	goto out;
473	}
474	if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
475	error = EBADF;
476	goto out;
477	}
478
479	if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
480	error = EBADF;
481	goto out;
482	}
483
484	context.vc_ucred = fp->f_fglob->fg_cred;
485
486	if (UIO_SEG_IS_USER_SPACE(segflg))
487	spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
488	else
489	spacetype = UIO_SYSSPACE;
490
491	auio = uio_createwithbuffer(`1`, offset, spacetype, rw, &uio_buf[`0`], sizeof(uio_buf));
492
493	uio_addiov(auio, base, len);
494
495	if ( !(io_flg & IO_APPEND))
496	flags = FOF_OFFSET;
497
498	if (rw == UIO_WRITE) {
499	user_ssize_t orig_resid = uio_resid(auio);
500	error = fo_write(fp, auio, flags, &context);
501	wrote_some = uio_resid(auio) < orig_resid;
502	} else
503	error = fo_read(fp, auio, flags, &context);
504
505	if (aresid)
506	*aresid = uio_resid(auio);
507	else {
508	if (uio_resid(auio) && error == `0`)
509	error = EIO;
510	}
511	out:
512	if (wrote_some)
513	fp_drop_written(p, fd, fp);
514	else
515	fp_drop(p, fd, fp, `0`);
516
517	return error;
518	}
519
520
521
522	/*
523	* dup
524	*
525	* Description: Duplicate a file descriptor.
526	*
527	* Parameters: p Process performing the dup
528	* uap->fd The fd to dup
529	* retval Pointer to the call return area
530	*
531	* Returns: 0 Success
532	* !0 Errno
533	*
534	* Implicit returns:
535	* *retval (modified) The new descriptor
536	*/
537	int
538	dup(proc_t p, struct dup_args uap, int32_t retval)
539	{
540	struct filedesc *fdp = p->p_fd;
541	int old = uap->fd;
542	int new, error;
543	struct fileproc *fp;
544
545	proc_fdlock(p);
546	if ( (error = fp_lookup(p, old, &fp, `1`)) ) {
547	proc_fdunlock(p);
548	return(error);
549	}
550	if (FP_ISGUARDED(fp, GUARD_DUP)) {
551	error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
552	(void) fp_drop(p, old, fp, `1`);
553	proc_fdunlock(p);
554	return (error);
555	}
556	if ( (error = fdalloc(p, `0`, &new)) ) {
557	fp_drop(p, old, fp, `1`);
558	proc_fdunlock(p);
559	return (error);
560	}
561	error = finishdup(p, fdp, old, new, `0`, retval);
562	fp_drop(p, old, fp, `1`);
563	proc_fdunlock(p);
564
565	if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) {
566	KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
567	new, `0`, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
568	}
569
570	return (error);
571	}
572
573	/*
574	* dup2
575	*
576	* Description: Duplicate a file descriptor to a particular value.
577	*
578	* Parameters: p Process performing the dup
579	* uap->from The fd to dup
580	* uap->to The fd to dup it to
581	* retval Pointer to the call return area
582	*
583	* Returns: 0 Success
584	* !0 Errno
585	*
586	* Implicit returns:
587	* *retval (modified) The new descriptor
588	*/
589	int
590	dup2(proc_t p, struct dup2_args uap, int32_t retval)
591	{
592	struct filedesc *fdp = p->p_fd;
593	int old = uap->from, new = uap->to;
594	int i, error;
595	struct fileproc fp, nfp;
596
597	proc_fdlock(p);
598
599	startover:
600	if ( (error = fp_lookup(p, old, &fp, `1`)) ) {
601	proc_fdunlock(p);
602	return(error);
603	}
604	if (FP_ISGUARDED(fp, GUARD_DUP)) {
605	error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
606	(void) fp_drop(p, old, fp, `1`);
607	proc_fdunlock(p);
608	return (error);
609	}
610	if (new < `0` \|\|
611	(rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur \|\|
612	new >= maxfiles) {
613	fp_drop(p, old, fp, `1`);
614	proc_fdunlock(p);
615	return (EBADF);
616	}
617	if (old == new) {
618	fp_drop(p, old, fp, `1`);
619	*retval = new;
620	proc_fdunlock(p);
621	return (`0`);
622	}
623	if (new < `0` \|\| new >= fdp->fd_nfiles) {
624	if ( (error = fdalloc(p, new, &i)) ) {
625	fp_drop(p, old, fp, `1`);
626	proc_fdunlock(p);
627	return (error);
628	}
629	if (new != i) {
630	fdrelse(p, i);
631	goto closeit;
632	}
633	} else {
634	closeit:
635	while ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
636	fp_drop(p, old, fp, `1`);
637	procfdtbl_waitfd(p, new);
638	#if DIAGNOSTIC
639	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
640	#endif
641	goto startover;
642	}
643
644	if ((fdp->fd_ofiles[new] != NULL) &&
645	((error = fp_lookup(p, new, &nfp, `1`)) == `0`)) {
646	fp_drop(p, old, fp, `1`);
647	if (FP_ISGUARDED(nfp, GUARD_CLOSE)) {
648	error = fp_guard_exception(p,
649	new, nfp, kGUARD_EXC_CLOSE);
650	(void) fp_drop(p, new, nfp, `1`);
651	proc_fdunlock(p);
652	return (error);
653	}
654	(void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
655	#if DIAGNOSTIC
656	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
657	#endif
658	procfdtbl_clearfd(p, new);
659	goto startover;
660	} else {
661	#if DIAGNOSTIC
662	if (fdp->fd_ofiles[new] != NULL)
663	panic("dup2: no ref on fileproc %d", new);
664	#endif
665	procfdtbl_reservefd(p, new);
666	}
667
668	#if DIAGNOSTIC
669	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
670	#endif
671
672	}
673	#if DIAGNOSTIC
674	if (fdp->fd_ofiles[new] != `0`)
675	panic("dup2: overwriting fd_ofiles with new %d", new);
676	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == `0`)
677	panic("dup2: unreserved fileflags with new %d", new);
678	#endif
679	error = finishdup(p, fdp, old, new, `0`, retval);
680	fp_drop(p, old, fp, `1`);
681	proc_fdunlock(p);
682
683	return(error);
684	}
685
686
687	/*
688	* fcntl
689	*
690	* Description: The file control system call.
691	*
692	* Parameters: p Process performing the fcntl
693	* uap->fd The fd to operate against
694	* uap->cmd The command to perform
695	* uap->arg Pointer to the command argument
696	* retval Pointer to the call return area
697	*
698	* Returns: 0 Success
699	* !0 Errno (see fcntl_nocancel)
700	*
701	* Implicit returns:
702	* *retval (modified) fcntl return value (if any)
703	*
704	* Notes: This system call differs from fcntl_nocancel() in that it
705	* tests for cancellation prior to performing a potentially
706	* blocking operation.
707	*/
708	int
709	fcntl(proc_t p, struct fcntl_args uap, int32_t retval)
710	{
711	__pthread_testcancel(`1`);
712	return(fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval));
713	}
714
715
716	/*
717	* fcntl_nocancel
718	*
719	* Description: A non-cancel-testing file control system call.
720	*
721	* Parameters: p Process performing the fcntl
722	* uap->fd The fd to operate against
723	* uap->cmd The command to perform
724	* uap->arg Pointer to the command argument
725	* retval Pointer to the call return area
726	*
727	* Returns: 0 Success
728	* EINVAL
729	* fp_lookup:EBADF Bad file descriptor
730	* [F_DUPFD]
731	* fdalloc:EMFILE
732	* fdalloc:ENOMEM
733	* finishdup:EBADF
734	* finishdup:ENOMEM
735	* [F_SETOWN]
736	* ESRCH
737	* [F_SETLK]
738	* EBADF
739	* EOVERFLOW
740	* copyin:EFAULT
741	* vnode_getwithref:???
742	* VNOP_ADVLOCK:???
743	* msleep:ETIMEDOUT
744	* [F_GETLK]
745	* EBADF
746	* EOVERFLOW
747	* copyin:EFAULT
748	* copyout:EFAULT
749	* vnode_getwithref:???
750	* VNOP_ADVLOCK:???
751	* [F_PREALLOCATE]
752	* EBADF
753	* EINVAL
754	* copyin:EFAULT
755	* copyout:EFAULT
756	* vnode_getwithref:???
757	* VNOP_ALLOCATE:???
758	* [F_SETSIZE,F_RDADVISE]
759	* EBADF
760	* copyin:EFAULT
761	* vnode_getwithref:???
762	* [F_RDAHEAD,F_NOCACHE]
763	* EBADF
764	* vnode_getwithref:???
765	* [???]
766	*
767	* Implicit returns:
768	* *retval (modified) fcntl return value (if any)
769	*/
770	int
771	fcntl_nocancel(proc_t p, struct fcntl_nocancel_args uap, int32_t retval)
772	{
773	int fd = uap->fd;
774	struct filedesc *fdp = p->p_fd;
775	struct fileproc *fp;
776	char *pop;
777	struct vnode vp = NULLVP; /* for AUDIT_ARG() at end /
778	int i, tmp, error, error2, flg = `0`;
779	struct flock fl = {};
780	struct flocktimeout fltimeout;
781	struct timespec *timeout = NULL;
782	struct vfs_context context;
783	off_t offset;
784	int newmin;
785	daddr64_t lbn, bn;
786	unsigned int fflag;
787	user_addr_t argp;
788	boolean_t is64bit;
789
790	AUDIT_ARG(fd, uap->fd);
791	AUDIT_ARG(cmd, uap->cmd);
792
793	proc_fdlock(p);
794	if ( (error = fp_lookup(p, fd, &fp, `1`)) ) {
795	proc_fdunlock(p);
796	return(error);
797	}
798	context.vc_thread = current_thread();
799	context.vc_ucred = fp->f_cred;
800
801	is64bit = proc_is64bit(p);
802	if (is64bit) {
803	argp = uap->arg;
804	}
805	else {
806	/*
807	* Since the arg parameter is defined as a long but may be
808	* either a long or a pointer we must take care to handle
809	* sign extension issues. Our sys call munger will sign
810	* extend a long when we are called from a 32-bit process.
811	* Since we can never have an address greater than 32-bits
812	* from a 32-bit process we lop off the top 32-bits to avoid
813	* getting the wrong address
814	*/
815	argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
816	}
817
818	pop = &fdp->fd_ofileflags[fd];
819
820	#if CONFIG_MACF
821	error = mac_file_check_fcntl(proc_ucred(p), fp->f_fglob, uap->cmd,
822	uap->arg);
823	if (error)
824	goto out;
825	#endif
826
827	switch (uap->cmd) {
828
829	case F_DUPFD:
830	case F_DUPFD_CLOEXEC:
831	if (FP_ISGUARDED(fp, GUARD_DUP)) {
832	error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
833	goto out;
834	}
835	newmin = CAST_DOWN_EXPLICIT(int, uap->arg); / arg is an int, so we won't lose bits /
836	AUDIT_ARG(value32, newmin);
837	if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur \|\|
838	newmin >= maxfiles) {
839	error = EINVAL;
840	goto out;
841	}
842	if ( (error = fdalloc(p, newmin, &i)) )
843	goto out;
844	error = finishdup(p, fdp, fd, i,
845	uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : `0`, retval);
846	goto out;
847
848	case F_GETFD:
849	retval = (pop & UF_EXCLOSE)? FD_CLOEXEC : `0`;
850	error = `0`;
851	goto out;
852
853	case F_SETFD:
854	AUDIT_ARG(value32, uap->arg);
855	if (uap->arg & FD_CLOEXEC)
856	*pop \|= UF_EXCLOSE;
857	else {
858	if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
859	error = fp_guard_exception(p,
860	fd, fp, kGUARD_EXC_NOCLOEXEC);
861	goto out;
862	}
863	*pop &= ~UF_EXCLOSE;
864	}
865	error = `0`;
866	goto out;
867
868	case F_GETFL:
869	*retval = OFLAGS(fp->f_flag);
870	error = `0`;
871	goto out;
872
873	case F_SETFL:
874	fp->f_flag &= ~FCNTLFLAGS;
875	tmp = CAST_DOWN_EXPLICIT(int, uap->arg); / arg is an int, so we won't lose bits /
876	AUDIT_ARG(value32, tmp);
877	fp->f_flag \|= FFLAGS(tmp) & FCNTLFLAGS;
878	tmp = fp->f_flag & FNONBLOCK;
879	error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
880	if (error)
881	goto out;
882	tmp = fp->f_flag & FASYNC;
883	error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
884	if (!error)
885	goto out;
886	fp->f_flag &= ~FNONBLOCK;
887	tmp = `0`;
888	(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
889	goto out;
890
891	case F_GETOWN:
892	if (fp->f_type == DTYPE_SOCKET) {
893	retval = ((struct* socket *)fp->f_data)->so_pgid;
894	error = `0`;
895	goto out;
896	}
897	error = fo_ioctl(fp, (int)TIOCGPGRP, (caddr_t)retval, &context);
898	retval = -retval;
899	goto out;
900
901	case F_SETOWN:
902	tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); / arg is an int, so we won't lose bits /
903	AUDIT_ARG(value32, tmp);
904	if (fp->f_type == DTYPE_SOCKET) {
905	((struct socket *)fp->f_data)->so_pgid = tmp;
906	error =`0`;
907	goto out;
908	}
909	if (fp->f_type == DTYPE_PIPE) {
910	error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
911	goto out;
912	}
913
914	if (tmp <= `0`) {
915	tmp = -tmp;
916	} else {
917	proc_t p1 = proc_find(tmp);
918	if (p1 == `0`) {
919	error = ESRCH;
920	goto out;
921	}
922	tmp = (int)p1->p_pgrpid;
923	proc_rele(p1);
924	}
925	error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
926	goto out;
927
928	case F_SETNOSIGPIPE:
929	tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
930	if (fp->f_type == DTYPE_SOCKET) {
931	#if SOCKETS
932	error = sock_setsockopt((struct socket *)fp->f_data,
933	SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof (tmp));
934	#else
935	error = EINVAL;
936	#endif
937	} else {
938	struct fileglob *fg = fp->f_fglob;
939
940	lck_mtx_lock_spin(&fg->fg_lock);
941	if (tmp)
942	fg->fg_lflags \|= FG_NOSIGPIPE;
943	else
944	fg->fg_lflags &= FG_NOSIGPIPE;
945	lck_mtx_unlock(&fg->fg_lock);
946	error = `0`;
947	}
948	goto out;
949
950	case F_GETNOSIGPIPE:
951	if (fp->f_type == DTYPE_SOCKET) {
952	#if SOCKETS
953	int retsize = sizeof (*retval);
954	error = sock_getsockopt((struct socket *)fp->f_data,
955	SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
956	#else
957	error = EINVAL;
958	#endif
959	} else {
960	*retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ?
961	`1` : `0`;
962	error = `0`;
963	}
964	goto out;
965
966	case F_SETCONFINED:
967	/*
968	* If this is the only reference to this fglob in the process
969	* and it's already marked as close-on-fork then mark it as
970	* (immutably) "confined" i.e. any fd that points to it will
971	* forever be close-on-fork, and attempts to use an IPC
972	* mechanism to move the descriptor elsewhere will fail.
973	*/
974	if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
975	struct fileglob *fg = fp->f_fglob;
976
977	lck_mtx_lock_spin(&fg->fg_lock);
978	if (fg->fg_lflags & FG_CONFINED)
979	error = `0`;
980	else if (`1` != fg->fg_count)
981	error = EAGAIN; / go close the dup .. /
982	else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
983	fg->fg_lflags \|= FG_CONFINED;
984	error = `0`;
985	} else
986	error = EBADF; / open without O_CLOFORK? /
987	lck_mtx_unlock(&fg->fg_lock);
988	} else {
989	/*
990	* Other subsystems may have built on the immutability
991	* of FG_CONFINED; clearing it may be tricky.
992	*/
993	error = EPERM; / immutable /
994	}
995	goto out;
996
997	case F_GETCONFINED:
998	*retval = (fp->f_fglob->fg_lflags & FG_CONFINED) ? `1` : `0`;
999	error = `0`;
1000	goto out;
1001
1002	case F_SETLKWTIMEOUT:
1003	case F_SETLKW:
1004	case F_OFD_SETLKWTIMEOUT:
1005	case F_OFD_SETLKW:
1006	flg \|= F_WAIT;
1007	/ Fall into F_SETLK /
1008
1009	case F_SETLK:
1010	case F_OFD_SETLK:
1011	if (fp->f_type != DTYPE_VNODE) {
1012	error = EBADF;
1013	goto out;
1014	}
1015	vp = (struct vnode *)fp->f_data;
1016
1017	fflag = fp->f_flag;
1018	offset = fp->f_offset;
1019	proc_fdunlock(p);
1020
1021	/ Copy in the lock structure /
1022	if (F_SETLKWTIMEOUT == uap->cmd \|\|
1023	F_OFD_SETLKWTIMEOUT == uap->cmd) {
1024	error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1025	if (error) {
1026	goto outdrop;
1027	}
1028	fl = fltimeout.fl;
1029	timeout = &fltimeout.timeout;
1030	} else {
1031	error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1032	if (error) {
1033	goto outdrop;
1034	}
1035	}
1036
1037	/ Check starting byte and ending byte for EOVERFLOW in SEEK_CUR /
1038	/ and ending byte for EOVERFLOW in SEEK_SET /
1039	error = check_file_seek_range(&fl, offset);
1040	if (error) {
1041	goto outdrop;
1042	}
1043
1044	if ( (error = vnode_getwithref(vp)) ) {
1045	goto outdrop;
1046	}
1047	if (fl.l_whence == SEEK_CUR)
1048	fl.l_start += offset;
1049
1050	#if CONFIG_MACF
1051	error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1052	F_SETLK, &fl);
1053	if (error) {
1054	(void)vnode_put(vp);
1055	goto outdrop;
1056	}
1057	#endif
1058	switch (uap->cmd) {
1059	case F_OFD_SETLK:
1060	case F_OFD_SETLKW:
1061	case F_OFD_SETLKWTIMEOUT:
1062	flg \|= F_OFD_LOCK;
1063	switch (fl.l_type) {
1064	case F_RDLCK:
1065	if ((fflag & FREAD) == `0`) {
1066	error = EBADF;
1067	break;
1068	}
1069	error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1070	F_SETLK, &fl, flg, &context, timeout);
1071	break;
1072	case F_WRLCK:
1073	if ((fflag & FWRITE) == `0`) {
1074	error = EBADF;
1075	break;
1076	}
1077	error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1078	F_SETLK, &fl, flg, &context, timeout);
1079	break;
1080	case F_UNLCK:
1081	error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1082	F_UNLCK, &fl, F_OFD_LOCK, &context,
1083	timeout);
1084	break;
1085	default:
1086	error = EINVAL;
1087	break;
1088	}
1089	if (`0` == error &&
1090	(F_RDLCK == fl.l_type \|\| F_WRLCK == fl.l_type)) {
1091	struct fileglob *fg = fp->f_fglob;
1092
1093	/*
1094	* arrange F_UNLCK on last close (once
1095	* set, FG_HAS_OFDLOCK is immutable)
1096	*/
1097	if ((fg->fg_lflags & FG_HAS_OFDLOCK) == `0`) {
1098	lck_mtx_lock_spin(&fg->fg_lock);
1099	fg->fg_lflags \|= FG_HAS_OFDLOCK;
1100	lck_mtx_unlock(&fg->fg_lock);
1101	}
1102	}
1103	break;
1104	default:
1105	flg \|= F_POSIX;
1106	switch (fl.l_type) {
1107	case F_RDLCK:
1108	if ((fflag & FREAD) == `0`) {
1109	error = EBADF;
1110	break;
1111	}
1112	// XXX UInt32 unsafe for LP64 kernel
1113	OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1114	error = VNOP_ADVLOCK(vp, (caddr_t)p,
1115	F_SETLK, &fl, flg, &context, timeout);
1116	break;
1117	case F_WRLCK:
1118	if ((fflag & FWRITE) == `0`) {
1119	error = EBADF;
1120	break;
1121	}
1122	// XXX UInt32 unsafe for LP64 kernel
1123	OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1124	error = VNOP_ADVLOCK(vp, (caddr_t)p,
1125	F_SETLK, &fl, flg, &context, timeout);
1126	break;
1127	case F_UNLCK:
1128	error = VNOP_ADVLOCK(vp, (caddr_t)p,
1129	F_UNLCK, &fl, F_POSIX, &context, timeout);
1130	break;
1131	default:
1132	error = EINVAL;
1133	break;
1134	}
1135	break;
1136	}
1137	(void) vnode_put(vp);
1138	goto outdrop;
1139
1140	case F_GETLK:
1141	case F_OFD_GETLK:
1142	case F_GETLKPID:
1143	case F_OFD_GETLKPID:
1144	if (fp->f_type != DTYPE_VNODE) {
1145	error = EBADF;
1146	goto out;
1147	}
1148	vp = (struct vnode *)fp->f_data;
1149
1150	offset = fp->f_offset;
1151	proc_fdunlock(p);
1152
1153	/ Copy in the lock structure /
1154	error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1155	if (error)
1156	goto outdrop;
1157
1158	/ Check starting byte and ending byte for EOVERFLOW in SEEK_CUR /
1159	/ and ending byte for EOVERFLOW in SEEK_SET /
1160	error = check_file_seek_range(&fl, offset);
1161	if (error) {
1162	goto outdrop;
1163	}
1164
1165	if ((fl.l_whence == SEEK_SET) && (fl.l_start < `0`)) {
1166	error = EINVAL;
1167	goto outdrop;
1168	}
1169
1170	switch (fl.l_type) {
1171	case F_RDLCK:
1172	case F_UNLCK:
1173	case F_WRLCK:
1174	break;
1175	default:
1176	error = EINVAL;
1177	goto outdrop;
1178	}
1179
1180	switch (fl.l_whence) {
1181	case SEEK_CUR:
1182	case SEEK_SET:
1183	case SEEK_END:
1184	break;
1185	default:
1186	error = EINVAL;
1187	goto outdrop;
1188	}
1189
1190	if ( (error = vnode_getwithref(vp)) == `0` ) {
1191	if (fl.l_whence == SEEK_CUR)
1192	fl.l_start += offset;
1193
1194	#if CONFIG_MACF
1195	error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1196	uap->cmd, &fl);
1197	if (error == `0`)
1198	#endif
1199	switch (uap->cmd) {
1200	case F_OFD_GETLK:
1201	error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1202	F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1203	break;
1204	case F_OFD_GETLKPID:
1205	error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1206	F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1207	break;
1208	default:
1209	error = VNOP_ADVLOCK(vp, (caddr_t)p,
1210	uap->cmd, &fl, F_POSIX, &context, NULL);
1211	break;
1212	}
1213
1214	(void)vnode_put(vp);
1215
1216	if (error == `0`)
1217	error = copyout((caddr_t)&fl, argp, sizeof(fl));
1218	}
1219	goto outdrop;
1220
1221	case F_PREALLOCATE: {
1222	fstore_t alloc_struct; / structure for allocate command /
1223	u_int32_t alloc_flags = `0`;
1224
1225	if (fp->f_type != DTYPE_VNODE) {
1226	error = EBADF;
1227	goto out;
1228	}
1229
1230	vp = (struct vnode *)fp->f_data;
1231	proc_fdunlock(p);
1232
1233	/ make sure that we have write permission /
1234	if ((fp->f_flag & FWRITE) == `0`) {
1235	error = EBADF;
1236	goto outdrop;
1237	}
1238
1239	error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1240	if (error)
1241	goto outdrop;
1242
1243	/ now set the space allocated to 0 /
1244	alloc_struct.fst_bytesalloc = `0`;
1245
1246	/*
1247	* Do some simple parameter checking
1248	*/
1249
1250	/ set up the flags /
1251
1252	alloc_flags \|= PREALLOCATE;
1253
1254	if (alloc_struct.fst_flags & F_ALLOCATECONTIG)
1255	alloc_flags \|= ALLOCATECONTIG;
1256
1257	if (alloc_struct.fst_flags & F_ALLOCATEALL)
1258	alloc_flags \|= ALLOCATEALL;
1259
1260	/*
1261	* Do any position mode specific stuff. The only
1262	* position mode supported now is PEOFPOSMODE
1263	*/
1264
1265	switch (alloc_struct.fst_posmode) {
1266
1267	case F_PEOFPOSMODE:
1268	if (alloc_struct.fst_offset != `0`) {
1269	error = EINVAL;
1270	goto outdrop;
1271	}
1272
1273	alloc_flags \|= ALLOCATEFROMPEOF;
1274	break;
1275
1276	case F_VOLPOSMODE:
1277	if (alloc_struct.fst_offset <= `0`) {
1278	error = EINVAL;
1279	goto outdrop;
1280	}
1281
1282	alloc_flags \|= ALLOCATEFROMVOL;
1283	break;
1284
1285	default: {
1286	error = EINVAL;
1287	goto outdrop;
1288	}
1289	}
1290	if ( (error = vnode_getwithref(vp)) == `0` ) {
1291	/*
1292	* call allocate to get the space
1293	*/
1294	error = VNOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags,
1295	&alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1296	&context);
1297	(void)vnode_put(vp);
1298
1299	error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1300
1301	if (error == `0`)
1302	error = error2;
1303	}
1304	goto outdrop;
1305	}
1306	case F_PUNCHHOLE: {
1307	fpunchhole_t args;
1308
1309	if (fp->f_type != DTYPE_VNODE) {
1310	error = EBADF;
1311	goto out;
1312	}
1313
1314	vp = (struct vnode *)fp->f_data;
1315	proc_fdunlock(p);
1316
1317	/ need write permissions /
1318	if ((fp->f_flag & FWRITE) == `0`) {
1319	error = EPERM;
1320	goto outdrop;
1321	}
1322
1323	if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1324	goto outdrop;
1325	}
1326
1327	if ((error = vnode_getwithref(vp))) {
1328	goto outdrop;
1329	}
1330
1331	#if CONFIG_MACF
1332	if ((error = mac_vnode_check_write(&context, fp->f_fglob->fg_cred, vp))) {
1333	(void)vnode_put(vp);
1334	goto outdrop;
1335	}
1336	#endif
1337
1338	error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, `0`, &context);
1339	(void)vnode_put(vp);
1340
1341	goto outdrop;
1342	}
1343	case F_TRIM_ACTIVE_FILE: {
1344	ftrimactivefile_t args;
1345
1346	if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, `0`)) {
1347	error = EACCES;
1348	goto out;
1349	}
1350
1351	if (fp->f_type != DTYPE_VNODE) {
1352	error = EBADF;
1353	goto out;
1354	}
1355
1356	vp = (struct vnode *)fp->f_data;
1357	proc_fdunlock(p);
1358
1359	/ need write permissions /
1360	if ((fp->f_flag & FWRITE) == `0`) {
1361	error = EPERM;
1362	goto outdrop;
1363	}
1364
1365	if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1366	goto outdrop;
1367	}
1368
1369	if ((error = vnode_getwithref(vp))) {
1370	goto outdrop;
1371	}
1372
1373	error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, `0`, &context);
1374	(void)vnode_put(vp);
1375
1376	goto outdrop;
1377	}
1378	case F_SETSIZE:
1379	if (fp->f_type != DTYPE_VNODE) {
1380	error = EBADF;
1381	goto out;
1382	}
1383	vp = (struct vnode *)fp->f_data;
1384	proc_fdunlock(p);
1385
1386	error = copyin(argp, (caddr_t)&offset, sizeof (off_t));
1387	if (error)
1388	goto outdrop;
1389	AUDIT_ARG(value64, offset);
1390
1391	error = vnode_getwithref(vp);
1392	if (error)
1393	goto outdrop;
1394
1395	#if CONFIG_MACF
1396	error = mac_vnode_check_truncate(&context,
1397	fp->f_fglob->fg_cred, vp);
1398	if (error) {
1399	(void)vnode_put(vp);
1400	goto outdrop;
1401	}
1402	#endif
1403	/*
1404	* Make sure that we are root. Growing a file
1405	* without zero filling the data is a security hole
1406	* root would have access anyway so we'll allow it
1407	*/
1408	if (!kauth_cred_issuser(kauth_cred_get())) {
1409	error = EACCES;
1410	} else {
1411	/*
1412	* set the file size
1413	*/
1414	error = vnode_setsize(vp, offset, IO_NOZEROFILL,
1415	&context);
1416
1417	#if CONFIG_MACF
1418	if (error == `0`)
1419	mac_vnode_notify_truncate(&context, fp->f_fglob->fg_cred, vp);
1420	#endif
1421	}
1422
1423	(void)vnode_put(vp);
1424	goto outdrop;
1425
1426	case F_RDAHEAD:
1427	if (fp->f_type != DTYPE_VNODE) {
1428	error = EBADF;
1429	goto out;
1430	}
1431	if (uap->arg)
1432	fp->f_fglob->fg_flag &= ~FNORDAHEAD;
1433	else
1434	fp->f_fglob->fg_flag \|= FNORDAHEAD;
1435
1436	goto out;
1437
1438	case F_NOCACHE:
1439	if (fp->f_type != DTYPE_VNODE) {
1440	error = EBADF;
1441	goto out;
1442	}
1443	if (uap->arg)
1444	fp->f_fglob->fg_flag \|= FNOCACHE;
1445	else
1446	fp->f_fglob->fg_flag &= ~FNOCACHE;
1447
1448	goto out;
1449
1450	case F_NODIRECT:
1451	if (fp->f_type != DTYPE_VNODE) {
1452	error = EBADF;
1453	goto out;
1454	}
1455	if (uap->arg)
1456	fp->f_fglob->fg_flag \|= FNODIRECT;
1457	else
1458	fp->f_fglob->fg_flag &= ~FNODIRECT;
1459
1460	goto out;
1461
1462	case F_SINGLE_WRITER:
1463	if (fp->f_type != DTYPE_VNODE) {
1464	error = EBADF;
1465	goto out;
1466	}
1467	if (uap->arg)
1468	fp->f_fglob->fg_flag \|= FSINGLE_WRITER;
1469	else
1470	fp->f_fglob->fg_flag &= ~FSINGLE_WRITER;
1471
1472	goto out;
1473
1474	case F_GLOBAL_NOCACHE:
1475	if (fp->f_type != DTYPE_VNODE) {
1476	error = EBADF;
1477	goto out;
1478	}
1479	vp = (struct vnode *)fp->f_data;
1480	proc_fdunlock(p);
1481
1482	if ( (error = vnode_getwithref(vp)) == `0` ) {
1483
1484	*retval = vnode_isnocache(vp);
1485
1486	if (uap->arg)
1487	vnode_setnocache(vp);
1488	else
1489	vnode_clearnocache(vp);
1490
1491	(void)vnode_put(vp);
1492	}
1493	goto outdrop;
1494
1495	case F_CHECK_OPENEVT:
1496	if (fp->f_type != DTYPE_VNODE) {
1497	error = EBADF;
1498	goto out;
1499	}
1500	vp = (struct vnode *)fp->f_data;
1501	proc_fdunlock(p);
1502
1503	if ( (error = vnode_getwithref(vp)) == `0` ) {
1504
1505	*retval = vnode_is_openevt(vp);
1506
1507	if (uap->arg)
1508	vnode_set_openevt(vp);
1509	else
1510	vnode_clear_openevt(vp);
1511
1512	(void)vnode_put(vp);
1513	}
1514	goto outdrop;
1515
1516	case F_RDADVISE: {
1517	struct radvisory ra_struct;
1518
1519	if (fp->f_type != DTYPE_VNODE) {
1520	error = EBADF;
1521	goto out;
1522	}
1523	vp = (struct vnode *)fp->f_data;
1524	proc_fdunlock(p);
1525
1526	if ( (error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct))) )
1527	goto outdrop;
1528	if ( (error = vnode_getwithref(vp)) == `0` ) {
1529	error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, `0`, &context);
1530
1531	(void)vnode_put(vp);
1532	}
1533	goto outdrop;
1534	}
1535
1536	case F_FLUSH_DATA:
1537
1538	if (fp->f_type != DTYPE_VNODE) {
1539	error = EBADF;
1540	goto out;
1541	}
1542	vp = (struct vnode *)fp->f_data;
1543	proc_fdunlock(p);
1544
1545	if ( (error = vnode_getwithref(vp)) == `0` ) {
1546	error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
1547
1548	(void)vnode_put(vp);
1549	}
1550	goto outdrop;
1551
1552	case F_LOG2PHYS:
1553	case F_LOG2PHYS_EXT: {
1554	struct log2phys l2p_struct = {}; / structure for allocate command /
1555	int devBlockSize;
1556
1557	off_t file_offset = `0`;
1558	size_t a_size = `0`;
1559	size_t run = `0`;
1560
1561	if (uap->cmd == F_LOG2PHYS_EXT) {
1562	error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1563	if (error)
1564	goto out;
1565	file_offset = l2p_struct.l2p_devoffset;
1566	} else {
1567	file_offset = fp->f_offset;
1568	}
1569	if (fp->f_type != DTYPE_VNODE) {
1570	error = EBADF;
1571	goto out;
1572	}
1573	vp = (struct vnode *)fp->f_data;
1574	proc_fdunlock(p);
1575	if ( (error = vnode_getwithref(vp)) ) {
1576	goto outdrop;
1577	}
1578	error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1579	if (error) {
1580	(void)vnode_put(vp);
1581	goto outdrop;
1582	}
1583	error = VNOP_BLKTOOFF(vp, lbn, &offset);
1584	if (error) {
1585	(void)vnode_put(vp);
1586	goto outdrop;
1587	}
1588	devBlockSize = vfs_devblocksize(vnode_mount(vp));
1589	if (uap->cmd == F_LOG2PHYS_EXT) {
1590	if (l2p_struct.l2p_contigbytes < `0`) {
1591	vnode_put(vp);
1592	error = EINVAL;
1593	goto outdrop;
1594	}
1595
1596	a_size = MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1597	} else {
1598	a_size = devBlockSize;
1599	}
1600
1601	error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, `0`, &context);
1602
1603	(void)vnode_put(vp);
1604
1605	if (!error) {
1606	l2p_struct.l2p_flags = `0`; / for now /
1607	if (uap->cmd == F_LOG2PHYS_EXT) {
1608	l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1609	} else {
1610	l2p_struct.l2p_contigbytes = `0`; / for now /
1611	}
1612
1613	/*
1614	* The block number being -1 suggests that the file offset is not backed
1615	* by any real blocks on-disk. As a result, just let it be passed back up wholesale.
1616	*/
1617	if (bn == -`1`) {
1618	/ Don't multiply it by the block size /
1619	l2p_struct.l2p_devoffset = bn;
1620	}
1621	else {
1622	l2p_struct.l2p_devoffset = bn * devBlockSize;
1623	l2p_struct.l2p_devoffset += file_offset - offset;
1624	}
1625	error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1626	}
1627	goto outdrop;
1628	}
1629	case F_GETPATH: {
1630	char *pathbufp;
1631	int pathlen;
1632
1633	if (fp->f_type != DTYPE_VNODE) {
1634	error = EBADF;
1635	goto out;
1636	}
1637	vp = (struct vnode *)fp->f_data;
1638	proc_fdunlock(p);
1639
1640	pathlen = MAXPATHLEN;
1641	MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1642	if (pathbufp == NULL) {
1643	error = ENOMEM;
1644	goto outdrop;
1645	}
1646	if ( (error = vnode_getwithref(vp)) == `0` ) {
1647	error = vn_getpath(vp, pathbufp, &pathlen);
1648	(void)vnode_put(vp);
1649
1650	if (error == `0`)
1651	error = copyout((caddr_t)pathbufp, argp, pathlen);
1652	}
1653	FREE(pathbufp, M_TEMP);
1654	goto outdrop;
1655	}
1656
1657	case F_PATHPKG_CHECK: {
1658	char *pathbufp;
1659	size_t pathlen;
1660
1661	if (fp->f_type != DTYPE_VNODE) {
1662	error = EBADF;
1663	goto out;
1664	}
1665	vp = (struct vnode *)fp->f_data;
1666	proc_fdunlock(p);
1667
1668	pathlen = MAXPATHLEN;
1669	pathbufp = kalloc(MAXPATHLEN);
1670
1671	if ( (error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == `0` ) {
1672	if ( (error = vnode_getwithref(vp)) == `0` ) {
1673	AUDIT_ARG(text, pathbufp);
1674	error = vn_path_package_check(vp, pathbufp, pathlen, retval);
1675
1676	(void)vnode_put(vp);
1677	}
1678	}
1679	kfree(pathbufp, MAXPATHLEN);
1680	goto outdrop;
1681	}
1682
1683	case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
1684	case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
1685	case F_BARRIERFSYNC: // fsync + barrier
1686	case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
1687	case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
1688	if (fp->f_type != DTYPE_VNODE) {
1689	error = EBADF;
1690	goto out;
1691	}
1692	vp = (struct vnode *)fp->f_data;
1693	proc_fdunlock(p);
1694
1695	if ( (error = vnode_getwithref(vp)) == `0` ) {
1696	error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, `0`, &context);
1697
1698	(void)vnode_put(vp);
1699	}
1700	break;
1701	}
1702
1703	/*
1704	* SPI (private) for opening a file starting from a dir fd
1705	*/
1706	case F_OPENFROM: {
1707	struct user_fopenfrom fopen;
1708	struct vnode_attr va;
1709	struct nameidata nd;
1710	int cmode;
1711
1712	/ Check if this isn't a valid file descriptor /
1713	if ((fp->f_type != DTYPE_VNODE) \|\|
1714	(fp->f_flag & FREAD) == `0`) {
1715	error = EBADF;
1716	goto out;
1717	}
1718	vp = (struct vnode *)fp->f_data;
1719	proc_fdunlock(p);
1720
1721	if (vnode_getwithref(vp)) {
1722	error = ENOENT;
1723	goto outdrop;
1724	}
1725
1726	/ Only valid for directories /
1727	if (vp->v_type != VDIR) {
1728	vnode_put(vp);
1729	error = ENOTDIR;
1730	goto outdrop;
1731	}
1732
1733	/ Get flags, mode and pathname arguments. /
1734	if (IS_64BIT_PROCESS(p)) {
1735	error = copyin(argp, &fopen, sizeof(fopen));
1736	} else {
1737	struct user32_fopenfrom fopen32;
1738
1739	error = copyin(argp, &fopen32, sizeof(fopen32));
1740	fopen.o_flags = fopen32.o_flags;
1741	fopen.o_mode = fopen32.o_mode;
1742	fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
1743	}
1744	if (error) {
1745	vnode_put(vp);
1746	goto outdrop;
1747	}
1748	AUDIT_ARG(fflags, fopen.o_flags);
1749	AUDIT_ARG(mode, fopen.o_mode);
1750	VATTR_INIT(&va);
1751	/ Mask off all but regular access permissions /
1752	cmode = ((fopen.o_mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1753	VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
1754
1755	/ Start the lookup relative to the file descriptor's vnode. /
1756	NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP \| FOLLOW \| AUDITVNPATH1, UIO_USERSPACE,
1757	fopen.o_pathname, &context);
1758	nd.ni_dvp = vp;
1759
1760	error = open1(&context, &nd, fopen.o_flags, &va,
1761	fileproc_alloc_init, NULL, retval);
1762
1763	vnode_put(vp);
1764	break;
1765	}
1766	/*
1767	* SPI (private) for unlinking a file starting from a dir fd
1768	*/
1769	case F_UNLINKFROM: {
1770	user_addr_t pathname;
1771
1772	/ Check if this isn't a valid file descriptor /
1773	if ((fp->f_type != DTYPE_VNODE) \|\|
1774	(fp->f_flag & FREAD) == `0`) {
1775	error = EBADF;
1776	goto out;
1777	}
1778	vp = (struct vnode *)fp->f_data;
1779	proc_fdunlock(p);
1780
1781	if (vnode_getwithref(vp)) {
1782	error = ENOENT;
1783	goto outdrop;
1784	}
1785
1786	/ Only valid for directories /
1787	if (vp->v_type != VDIR) {
1788	vnode_put(vp);
1789	error = ENOTDIR;
1790	goto outdrop;
1791	}
1792
1793	/ Get flags, mode and pathname arguments. /
1794	if (IS_64BIT_PROCESS(p)) {
1795	pathname = (user_addr_t)argp;
1796	} else {
1797	pathname = CAST_USER_ADDR_T(argp);
1798	}
1799
1800	/ Start the lookup relative to the file descriptor's vnode. /
1801	error = unlink1(&context, vp, pathname, UIO_USERSPACE, `0`);
1802
1803	vnode_put(vp);
1804	break;
1805
1806	}
1807
1808	case F_ADDSIGS:
1809	case F_ADDFILESIGS:
1810	case F_ADDFILESIGS_FOR_DYLD_SIM:
1811	case F_ADDFILESIGS_RETURN:
1812	{
1813	struct cs_blob *blob = NULL;
1814	struct user_fsignatures fs;
1815	kern_return_t kr;
1816	vm_offset_t kernel_blob_addr;
1817	vm_size_t kernel_blob_size;
1818	int blob_add_flags = `0`;
1819
1820	if (fp->f_type != DTYPE_VNODE) {
1821	error = EBADF;
1822	goto out;
1823	}
1824	vp = (struct vnode *)fp->f_data;
1825	proc_fdunlock(p);
1826
1827	if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1828	blob_add_flags \|= MAC_VNODE_CHECK_DYLD_SIM;
1829	if ((p->p_csflags & CS_KILL) == `0`) {
1830	proc_lock(p);
1831	p->p_csflags \|= CS_KILL;
1832	proc_unlock(p);
1833	}
1834	}
1835
1836	error = vnode_getwithref(vp);
1837	if (error)
1838	goto outdrop;
1839
1840	if (IS_64BIT_PROCESS(p)) {
1841	error = copyin(argp, &fs, sizeof (fs));
1842	} else {
1843	struct user32_fsignatures fs32;
1844
1845	error = copyin(argp, &fs32, sizeof (fs32));
1846	fs.fs_file_start = fs32.fs_file_start;
1847	fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
1848	fs.fs_blob_size = fs32.fs_blob_size;
1849	}
1850
1851	if (error) {
1852	vnode_put(vp);
1853	goto outdrop;
1854	}
1855
1856	/*
1857	* First check if we have something loaded a this offset
1858	*/
1859	blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start);
1860	if (blob != NULL)
1861	{
1862	/ If this is for dyld_sim revalidate the blob /
1863	if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1864	error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags);
1865	if (error) {
1866	blob = NULL;
1867	if (error != EAGAIN) {
1868	vnode_put(vp);
1869	goto outdrop;
1870	}
1871	}
1872	}
1873	}
1874
1875	if (blob == NULL) {
1876	/*
1877	* An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
1878	* our use cases for the immediate future, but note that at the time of this commit, some
1879	* platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
1880	*
1881	* We should consider how we can manage this more effectively; the above means that some
1882	* platforms are using megabytes of memory for signing data; it merely hasn't crossed the
1883	* threshold considered ridiculous at the time of this change.
1884	*/
1885	#define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
1886	if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
1887	error = E2BIG;
1888	vnode_put(vp);
1889	goto outdrop;
1890	}
1891
1892	kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
1893	kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
1894	if (kr != KERN_SUCCESS) {
1895	error = ENOMEM;
1896	vnode_put(vp);
1897	goto outdrop;
1898	}
1899
1900	if(uap->cmd == F_ADDSIGS) {
1901	error = copyin(fs.fs_blob_start,
1902	(void *) kernel_blob_addr,
1903	kernel_blob_size);
1904	} else / F_ADDFILESIGS \|\| F_ADDFILESIGS_RETURN \|\| F_ADDFILESIGS_FOR_DYLD_SIM / {
1905	int resid;
1906
1907	error = vn_rdwr(UIO_READ,
1908	vp,
1909	(caddr_t) kernel_blob_addr,
1910	kernel_blob_size,
1911	fs.fs_file_start + fs.fs_blob_start,
1912	UIO_SYSSPACE,
1913	`0`,
1914	kauth_cred_get(),
1915	&resid,
1916	p);
1917	if ((error == `0`) && resid) {
1918	/ kernel_blob_size rounded to a page size, but signature may be at end of file /
1919	memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), `0x0`, resid);
1920	}
1921	}
1922
1923	if (error) {
1924	ubc_cs_blob_deallocate(kernel_blob_addr,
1925	kernel_blob_size);
1926	vnode_put(vp);
1927	goto outdrop;
1928	}
1929
1930	blob = NULL;
1931	error = ubc_cs_blob_add(vp,
1932	CPU_TYPE_ANY, / not for a specific architecture /
1933	fs.fs_file_start,
1934	&kernel_blob_addr,
1935	kernel_blob_size,
1936	NULL,
1937	blob_add_flags,
1938	&blob);
1939
1940	/ ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed /
1941	if (error) {
1942	if (kernel_blob_addr) {
1943	ubc_cs_blob_deallocate(kernel_blob_addr,
1944	kernel_blob_size);
1945	}
1946	vnode_put(vp);
1947	goto outdrop;
1948	} else {
1949	#if CHECK_CS_VALIDATION_BITMAP
1950	ubc_cs_validation_bitmap_allocate( vp );
1951	#endif
1952	}
1953	}
1954
1955	if (uap->cmd == F_ADDFILESIGS_RETURN \|\| uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1956	/*
1957	* The first element of the structure is a
1958	* off_t that happen to have the same size for
1959	* all archs. Lets overwrite that.
1960	*/
1961	off_t end_offset = `0`;
1962	if (blob)
1963	end_offset = blob->csb_end_offset;
1964	error = copyout(&end_offset, argp, sizeof (end_offset));
1965	}
1966
1967	(void) vnode_put(vp);
1968	break;
1969	}
1970	case F_GETCODEDIR:
1971	case F_FINDSIGS: {
1972	error = ENOTSUP;
1973	goto out;
1974	}
1975	case F_CHECK_LV: {
1976	struct fileglob *fg;
1977	fchecklv_t lv;
1978
1979	if (fp->f_type != DTYPE_VNODE) {
1980	error = EBADF;
1981	goto out;
1982	}
1983	fg = fp->f_fglob;
1984	proc_fdunlock(p);
1985
1986	if (IS_64BIT_PROCESS(p)) {
1987	error = copyin(argp, &lv, sizeof (lv));
1988	} else {
1989	struct user32_fchecklv lv32;
1990
1991	error = copyin(argp, &lv32, sizeof (lv32));
1992	lv.lv_file_start = lv32.lv_file_start;
1993	lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
1994	lv.lv_error_message_size = lv32.lv_error_message;
1995	}
1996	if (error)
1997	goto outdrop;
1998
1999	#if CONFIG_MACF
2000	error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
2001	(user_long_t)lv.lv_error_message, lv.lv_error_message_size);
2002	#endif
2003
2004	break;
2005	}
2006	#if CONFIG_PROTECT
2007	case F_GETPROTECTIONCLASS: {
2008	if (fp->f_type != DTYPE_VNODE) {
2009	error = EBADF;
2010	goto out;
2011	}
2012	vp = (struct vnode *)fp->f_data;
2013
2014	proc_fdunlock(p);
2015
2016	if (vnode_getwithref(vp)) {
2017	error = ENOENT;
2018	goto outdrop;
2019	}
2020
2021	struct vnode_attr va;
2022
2023	VATTR_INIT(&va);
2024	VATTR_WANTED(&va, va_dataprotect_class);
2025	error = VNOP_GETATTR(vp, &va, &context);
2026	if (!error) {
2027	if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class))
2028	*retval = va.va_dataprotect_class;
2029	else
2030	error = ENOTSUP;
2031	}
2032
2033	vnode_put(vp);
2034	break;
2035	}
2036
2037	case F_SETPROTECTIONCLASS: {
2038	/ tmp must be a valid PROTECTION_CLASS_* /
2039	tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2040
2041	if (fp->f_type != DTYPE_VNODE) {
2042	error = EBADF;
2043	goto out;
2044	}
2045	vp = (struct vnode *)fp->f_data;
2046
2047	proc_fdunlock(p);
2048
2049	if (vnode_getwithref(vp)) {
2050	error = ENOENT;
2051	goto outdrop;
2052	}
2053
2054	/ Only go forward if you have write access /
2055	vfs_context_t ctx = vfs_context_current();
2056	if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
2057	vnode_put(vp);
2058	error = EBADF;
2059	goto outdrop;
2060	}
2061
2062	struct vnode_attr va;
2063
2064	VATTR_INIT(&va);
2065	VATTR_SET(&va, va_dataprotect_class, tmp);
2066
2067	error = VNOP_SETATTR(vp, &va, ctx);
2068
2069	vnode_put(vp);
2070	break;
2071	}
2072
2073	case F_TRANSCODEKEY: {
2074	if (fp->f_type != DTYPE_VNODE) {
2075	error = EBADF;
2076	goto out;
2077	}
2078
2079	vp = (struct vnode *)fp->f_data;
2080	proc_fdunlock(p);
2081
2082	if (vnode_getwithref(vp)) {
2083	error = ENOENT;
2084	goto outdrop;
2085	}
2086
2087	cp_key_t k = {
2088	.len = CP_MAX_WRAPPEDKEYSIZE,
2089	};
2090
2091	MALLOC(k.key, char *, k.len, M_TEMP, M_WAITOK \| M_ZERO);
2092
2093	error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, `1`, &context);
2094
2095	vnode_put(vp);
2096
2097	if (error == `0`) {
2098	error = copyout(k.key, argp, k.len);
2099	*retval = k.len;
2100	}
2101
2102	FREE(k.key, M_TEMP);
2103
2104	break;
2105	}
2106
2107	case F_GETPROTECTIONLEVEL: {
2108	if (fp->f_type != DTYPE_VNODE) {
2109	error = EBADF;
2110	goto out;
2111	}
2112
2113	vp = (struct vnode*) fp->f_data;
2114	proc_fdunlock (p);
2115
2116	if (vnode_getwithref(vp)) {
2117	error = ENOENT;
2118	goto outdrop;
2119	}
2120
2121	error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, `0`, &context);
2122
2123	vnode_put (vp);
2124	break;
2125	}
2126
2127	case F_GETDEFAULTPROTLEVEL: {
2128	if (fp->f_type != DTYPE_VNODE) {
2129	error = EBADF;
2130	goto out;
2131	}
2132
2133	vp = (struct vnode*) fp->f_data;
2134	proc_fdunlock (p);
2135
2136	if (vnode_getwithref(vp)) {
2137	error = ENOENT;
2138	goto outdrop;
2139	}
2140
2141	/*
2142	* if cp_get_major_vers fails, error will be set to proper errno
2143	* and cp_version will still be 0.
2144	*/
2145
2146	error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, `0`, &context);
2147
2148	vnode_put (vp);
2149	break;
2150	}
2151
2152	#endif /* CONFIG_PROTECT */
2153
2154	case F_MOVEDATAEXTENTS: {
2155	struct fileproc *fp2 = NULL;
2156	struct vnode *src_vp = NULLVP;
2157	struct vnode *dst_vp = NULLVP;
2158	/ We need to grab the 2nd FD out of the argments before moving on. /
2159	int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2160
2161	error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, `0`);
2162	if (error)
2163	goto out;
2164
2165	if (fp->f_type != DTYPE_VNODE) {
2166	error = EBADF;
2167	goto out;
2168	}
2169
2170	/ For now, special case HFS+ only, since this is SPI. /
2171	src_vp = (struct vnode *)fp->f_data;
2172	if (src_vp->v_tag != VT_HFS) {
2173	error = ENOTSUP;
2174	goto out;
2175	}
2176
2177	/*
2178	* Get the references before we start acquiring iocounts on the vnodes,
2179	* while we still hold the proc fd lock
2180	*/
2181	if ( (error = fp_lookup(p, fd2, &fp2, `1`)) ) {
2182	error = EBADF;
2183	goto out;
2184	}
2185	if (fp2->f_type != DTYPE_VNODE) {
2186	fp_drop(p, fd2, fp2, `1`);
2187	error = EBADF;
2188	goto out;
2189	}
2190	dst_vp = (struct vnode *)fp2->f_data;
2191	if (dst_vp->v_tag != VT_HFS) {
2192	fp_drop(p, fd2, fp2, `1`);
2193	error = ENOTSUP;
2194	goto out;
2195	}
2196
2197	#if CONFIG_MACF
2198	/ Re-do MAC checks against the new FD, pass in a fake argument /
2199	error = mac_file_check_fcntl(proc_ucred(p), fp2->f_fglob, uap->cmd, `0`);
2200	if (error) {
2201	fp_drop(p, fd2, fp2, `1`);
2202	goto out;
2203	}
2204	#endif
2205	/ Audit the 2nd FD /
2206	AUDIT_ARG(fd, fd2);
2207
2208	proc_fdunlock(p);
2209
2210	if (vnode_getwithref(src_vp)) {
2211	fp_drop(p, fd2, fp2, `0`);
2212	error = ENOENT;
2213	goto outdrop;
2214	}
2215	if (vnode_getwithref(dst_vp)) {
2216	vnode_put (src_vp);
2217	fp_drop(p, fd2, fp2, `0`);
2218	error = ENOENT;
2219	goto outdrop;
2220	}
2221
2222	/*
2223	* Basic asserts; validate they are not the same and that
2224	* both live on the same filesystem.
2225	*/
2226	if (dst_vp == src_vp) {
2227	vnode_put (src_vp);
2228	vnode_put (dst_vp);
2229	fp_drop (p, fd2, fp2, `0`);
2230	error = EINVAL;
2231	goto outdrop;
2232	}
2233
2234	if (dst_vp->v_mount != src_vp->v_mount) {
2235	vnode_put (src_vp);
2236	vnode_put (dst_vp);
2237	fp_drop (p, fd2, fp2, `0`);
2238	error = EXDEV;
2239	goto outdrop;
2240	}
2241
2242	/ Now we have a legit pair of FDs. Go to work /
2243
2244	/ Now check for write access to the target files /
2245	if(vnode_authorize(src_vp, NULLVP,
2246	(KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), &context) != `0`) {
2247	vnode_put(src_vp);
2248	vnode_put(dst_vp);
2249	fp_drop(p, fd2, fp2, `0`);
2250	error = EBADF;
2251	goto outdrop;
2252	}
2253
2254	if(vnode_authorize(dst_vp, NULLVP,
2255	(KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), &context) != `0`) {
2256	vnode_put(src_vp);
2257	vnode_put(dst_vp);
2258	fp_drop(p, fd2, fp2, `0`);
2259	error = EBADF;
2260	goto outdrop;
2261	}
2262
2263	/ Verify that both vps point to files and not directories /
2264	if ( !vnode_isreg(src_vp) \|\| !vnode_isreg(dst_vp)) {
2265	error = EINVAL;
2266	vnode_put (src_vp);
2267	vnode_put (dst_vp);
2268	fp_drop (p, fd2, fp2, `0`);
2269	goto outdrop;
2270	}
2271
2272	/*
2273	* The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2274	* We'll pass in our special bit indicating that the new behavior is expected
2275	*/
2276
2277	error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2278
2279	vnode_put (src_vp);
2280	vnode_put (dst_vp);
2281	fp_drop(p, fd2, fp2, `0`);
2282	break;
2283	}
2284
2285	/*
2286	* SPI for making a file compressed.
2287	*/
2288	case F_MAKECOMPRESSED: {
2289	uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2290
2291	if (fp->f_type != DTYPE_VNODE) {
2292	error = EBADF;
2293	goto out;
2294	}
2295
2296	vp = (struct vnode*) fp->f_data;
2297	proc_fdunlock (p);
2298
2299	/ get the vnode /
2300	if (vnode_getwithref(vp)) {
2301	error = ENOENT;
2302	goto outdrop;
2303	}
2304
2305	/ Is it a file? /
2306	if ((vnode_isreg(vp) == `0`) && (vnode_islnk(vp) == `0`)) {
2307	vnode_put(vp);
2308	error = EBADF;
2309	goto outdrop;
2310	}
2311
2312	/ invoke ioctl to pass off to FS /
2313	/ Only go forward if you have write access /
2314	vfs_context_t ctx = vfs_context_current();
2315	if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
2316	vnode_put(vp);
2317	error = EBADF;
2318	goto outdrop;
2319	}
2320
2321	error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, `0`, &context);
2322
2323	vnode_put (vp);
2324	break;
2325	}
2326
2327	/*
2328	* SPI (private) for indicating to a filesystem that subsequent writes to
2329	* the open FD will written to the Fastflow.
2330	*/
2331	case F_SET_GREEDY_MODE:
2332	/ intentionally drop through to the same handler as F_SETSTATIC.*
2333	* both fcntls should pass the argument and their selector into VNOP_IOCTL.
2334	*/
2335
2336	/*
2337	* SPI (private) for indicating to a filesystem that subsequent writes to
2338	* the open FD will represent static content.
2339	*/
2340	case F_SETSTATICCONTENT: {
2341	caddr_t ioctl_arg = NULL;
2342
2343	if (uap->arg) {
2344	ioctl_arg = (caddr_t) `1`;
2345	}
2346
2347	if (fp->f_type != DTYPE_VNODE) {
2348	error = EBADF;
2349	goto out;
2350	}
2351	vp = (struct vnode *)fp->f_data;
2352	proc_fdunlock(p);
2353
2354	error = vnode_getwithref(vp);
2355	if (error) {
2356	error = ENOENT;
2357	goto outdrop;
2358	}
2359
2360	/ Only go forward if you have write access /
2361	vfs_context_t ctx = vfs_context_current();
2362	if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
2363	vnode_put(vp);
2364	error = EBADF;
2365	goto outdrop;
2366	}
2367
2368	error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, `0`, &context);
2369	(void)vnode_put(vp);
2370
2371	break;
2372	}
2373
2374	/*
2375	* SPI (private) for indicating to the lower level storage driver that the
2376	* subsequent writes should be of a particular IO type (burst, greedy, static),
2377	* or other flavors that may be necessary.
2378	*/
2379	case F_SETIOTYPE: {
2380	caddr_t param_ptr;
2381	uint32_t param;
2382
2383	if (uap->arg) {
2384	/ extract 32 bits of flags from userland /
2385	param_ptr = (caddr_t) uap->arg;
2386	param = (uint32_t) param_ptr;
2387	}
2388	else {
2389	/ If no argument is specified, error out /
2390	error = EINVAL;
2391	goto out;
2392	}
2393
2394	/*
2395	* Validate the different types of flags that can be specified:
2396	* all of them are mutually exclusive for now.
2397	*/
2398	switch (param) {
2399	case F_IOTYPE_ISOCHRONOUS:
2400	break;
2401
2402	default:
2403	error = EINVAL;
2404	goto out;
2405	}
2406
2407
2408	if (fp->f_type != DTYPE_VNODE) {
2409	error = EBADF;
2410	goto out;
2411	}
2412	vp = (struct vnode *)fp->f_data;
2413	proc_fdunlock(p);
2414
2415	error = vnode_getwithref(vp);
2416	if (error) {
2417	error = ENOENT;
2418	goto outdrop;
2419	}
2420
2421	/ Only go forward if you have write access /
2422	vfs_context_t ctx = vfs_context_current();
2423	if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
2424	vnode_put(vp);
2425	error = EBADF;
2426	goto outdrop;
2427	}
2428
2429	error = VNOP_IOCTL(vp, uap->cmd, param_ptr, `0`, &context);
2430	(void)vnode_put(vp);
2431
2432	break;
2433	}
2434
2435	/*
2436	* Set the vnode pointed to by 'fd'
2437	* and tag it as the (potentially future) backing store
2438	* for another filesystem
2439	*/
2440	case F_SETBACKINGSTORE: {
2441	if (fp->f_type != DTYPE_VNODE) {
2442	error = EBADF;
2443	goto out;
2444	}
2445
2446	vp = (struct vnode *)fp->f_data;
2447
2448	if (vp->v_tag != VT_HFS) {
2449	error = EINVAL;
2450	goto out;
2451	}
2452	proc_fdunlock(p);
2453
2454	if (vnode_getwithref(vp)) {
2455	error = ENOENT;
2456	goto outdrop;
2457	}
2458
2459	/ only proceed if you have write access /
2460	vfs_context_t ctx = vfs_context_current();
2461	if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS \| KAUTH_VNODE_WRITE_DATA), ctx) != `0`) {
2462	vnode_put(vp);
2463	error = EBADF;
2464	goto outdrop;
2465	}
2466
2467
2468	/ If arg != 0, set, otherwise unset /
2469	if (uap->arg) {
2470	error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)`1`, `0`, &context);
2471	}
2472	else {
2473	error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)NULL, `0`, &context);
2474	}
2475
2476	vnode_put(vp);
2477	break;
2478	}
2479
2480	/*
2481	* like F_GETPATH, but special semantics for
2482	* the mobile time machine handler.
2483	*/
2484	case F_GETPATH_MTMINFO: {
2485	char *pathbufp;
2486	int pathlen;
2487
2488	if (fp->f_type != DTYPE_VNODE) {
2489	error = EBADF;
2490	goto out;
2491	}
2492	vp = (struct vnode *)fp->f_data;
2493	proc_fdunlock(p);
2494
2495	pathlen = MAXPATHLEN;
2496	MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
2497	if (pathbufp == NULL) {
2498	error = ENOMEM;
2499	goto outdrop;
2500	}
2501	if ( (error = vnode_getwithref(vp)) == `0` ) {
2502	int backingstore = `0`;
2503
2504	/ Check for error from vn_getpath before moving on /
2505	if ((error = vn_getpath(vp, pathbufp, &pathlen)) == `0`) {
2506	if (vp->v_tag == VT_HFS) {
2507	error = VNOP_IOCTL (vp, uap->cmd, (caddr_t) &backingstore, `0`, &context);
2508	}
2509	(void)vnode_put(vp);
2510
2511	if (error == `0`) {
2512	error = copyout((caddr_t)pathbufp, argp, pathlen);
2513	}
2514	if (error == `0`) {
2515	/*
2516	* If the copyout was successful, now check to ensure
2517	* that this vnode is not a BACKINGSTORE vnode. mtmd
2518	* wants the path regardless.
2519	*/
2520	if (backingstore) {
2521	error = EBUSY;
2522	}
2523	}
2524	} else
2525	(void)vnode_put(vp);
2526	}
2527	FREE(pathbufp, M_TEMP);
2528	goto outdrop;
2529	}
2530
2531	#if DEBUG \|\| DEVELOPMENT
2532	case F_RECYCLE:
2533	if (fp->f_type != DTYPE_VNODE) {
2534	error = EBADF;
2535	goto out;
2536	}
2537	vp = (struct vnode *)fp->f_data;
2538	proc_fdunlock(p);
2539
2540	vnode_recycle(vp);
2541	break;
2542	#endif
2543
2544	default:
2545	/*
2546	* This is an fcntl() that we d not recognize at this level;
2547	* if this is a vnode, we send it down into the VNOP_IOCTL
2548	* for this vnode; this can include special devices, and will
2549	* effectively overload fcntl() to send ioctl()'s.
2550	*/
2551	if((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)){
2552	error = EINVAL;
2553	goto out;
2554	}
2555
2556	/ Catch any now-invalid fcntl() selectors /
2557	switch (uap->cmd) {
2558	case (int)APFSIOC_REVERT_TO_SNAPSHOT:
2559	case (int)FSIOC_FIOSEEKHOLE:
2560	case (int)FSIOC_FIOSEEKDATA:
2561	case HFS_GET_BOOT_INFO:
2562	case HFS_SET_BOOT_INFO:
2563	case FIOPINSWAP:
2564	case F_MARKDEPENDENCY:
2565	error = EINVAL;
2566	goto out;
2567	default:
2568	break;
2569	}
2570
2571	if (fp->f_type != DTYPE_VNODE) {
2572	error = EBADF;
2573	goto out;
2574	}
2575	vp = (struct vnode *)fp->f_data;
2576	proc_fdunlock(p);
2577
2578	if ( (error = vnode_getwithref(vp)) == `0` ) {
2579	#define STK_PARAMS 128
2580	char stkbuf[STK_PARAMS] = {`0`};
2581	unsigned int size;
2582	caddr_t data, memp;
2583	/*
2584	* For this to work properly, we have to copy in the
2585	* ioctl() cmd argument if there is one; we must also
2586	* check that a command parameter, if present, does
2587	* not exceed the maximum command length dictated by
2588	* the number of bits we have available in the command
2589	* to represent a structure length. Finally, we have
2590	* to copy the results back out, if it is that type of
2591	* ioctl().
2592	*/
2593	size = IOCPARM_LEN(uap->cmd);
2594	if (size > IOCPARM_MAX) {
2595	(void)vnode_put(vp);
2596	error = EINVAL;
2597	break;
2598	}
2599
2600	memp = NULL;
2601	if (size > sizeof (stkbuf)) {
2602	if ((memp = (caddr_t)kalloc(size)) == `0`) {
2603	(void)vnode_put(vp);
2604	error = ENOMEM;
2605	goto outdrop;
2606	}
2607	data = memp;
2608	} else {
2609	data = &stkbuf[`0`];
2610	}
2611
2612	if (uap->cmd & IOC_IN) {
2613	if (size) {
2614	/ structure /
2615	error = copyin(argp, data, size);
2616	if (error) {
2617	(void)vnode_put(vp);
2618	if (memp)
2619	kfree(memp, size);
2620	goto outdrop;
2621	}
2622
2623	/ Bzero the section beyond that which was needed /
2624	if (size <= sizeof(stkbuf)) {
2625	bzero ( (((uint8_t)data) + size), (sizeof*(stkbuf) - size));
2626	}
2627	} else {
2628	/ int /
2629	if (is64bit) {
2630	(user_addr_t )data = argp;
2631	} else {
2632	(uint32_t )data = (uint32_t)argp;
2633	}
2634	};
2635	} else if ((uap->cmd & IOC_OUT) && size) {
2636	/*
2637	* Zero the buffer so the user always
2638	* gets back something deterministic.
2639	*/
2640	bzero(data, size);
2641	} else if (uap->cmd & IOC_VOID) {
2642	if (is64bit) {
2643	(user_addr_t )data = argp;
2644	} else {
2645	(uint32_t )data = (uint32_t)argp;
2646	}
2647	}
2648
2649	error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), `0`, &context);
2650
2651	(void)vnode_put(vp);
2652
2653	/ Copy any output data to user /
2654	if (error == `0` && (uap->cmd & IOC_OUT) && size)
2655	error = copyout(data, argp, size);
2656	if (memp)
2657	kfree(memp, size);
2658	}
2659	break;
2660	}
2661
2662	outdrop:
2663	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2664	fp_drop(p, fd, fp, `0`);
2665	return(error);
2666	out:
2667	fp_drop(p, fd, fp, `1`);
2668	proc_fdunlock(p);
2669	return(error);
2670	}
2671
2672
2673	/*
2674	* finishdup
2675	*
2676	* Description: Common code for dup, dup2, and fcntl(F_DUPFD).
2677	*
2678	* Parameters: p Process performing the dup
2679	* old The fd to dup
2680	* new The fd to dup it to
2681	* fd_flags Flags to augment the new fd
2682	* retval Pointer to the call return area
2683	*
2684	* Returns: 0 Success
2685	* EBADF
2686	* ENOMEM
2687	*
2688	* Implicit returns:
2689	* *retval (modified) The new descriptor
2690	*
2691	* Locks: Assumes proc_fdlock for process pointing to fdp is held by
2692	* the caller
2693	*
2694	* Notes: This function may drop and reacquire this lock; it is unsafe
2695	* for a caller to assume that other state protected by the lock
2696	* has not been subsequently changed out from under it.
2697	*/
2698	int
2699	finishdup(proc_t p,
2700	struct filedesc fdp, int* old, int new, int fd_flags, int32_t *retval)
2701	{
2702	struct fileproc *nfp;
2703	struct fileproc *ofp;
2704	#if CONFIG_MACF
2705	int error;
2706	#endif
2707
2708	#if DIAGNOSTIC
2709	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2710	#endif
2711	if ((ofp = fdp->fd_ofiles[old]) == NULL \|\|
2712	(fdp->fd_ofileflags[old] & UF_RESERVED)) {
2713	fdrelse(p, new);
2714	return (EBADF);
2715	}
2716	fg_ref(ofp);
2717
2718	#if CONFIG_MACF
2719	error = mac_file_check_dup(proc_ucred(p), ofp->f_fglob, new);
2720	if (error) {
2721	fg_drop(ofp);
2722	fdrelse(p, new);
2723	return (error);
2724	}
2725	#endif
2726
2727	proc_fdunlock(p);
2728
2729	nfp = fileproc_alloc_init(NULL);
2730
2731	proc_fdlock(p);
2732
2733	if (nfp == NULL) {
2734	fg_drop(ofp);
2735	fdrelse(p, new);
2736	return (ENOMEM);
2737	}
2738
2739	nfp->f_fglob = ofp->f_fglob;
2740
2741	#if DIAGNOSTIC
2742	if (fdp->fd_ofiles[new] != `0`)
2743	panic("finishdup: overwriting fd_ofiles with new %d", new);
2744	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == `0`)
2745	panic("finishdup: unreserved fileflags with new %d", new);
2746	#endif
2747
2748	if (new > fdp->fd_lastfile)
2749	fdp->fd_lastfile = new;
2750	*fdflags(p, new) \|= fd_flags;
2751	procfdtbl_releasefd(p, new, nfp);
2752	*retval = new;
2753	return (`0`);
2754	}
2755
2756
2757	/*
2758	* close
2759	*
2760	* Description: The implementation of the close(2) system call
2761	*
2762	* Parameters: p Process in whose per process file table
2763	* the close is to occur
2764	* uap->fd fd to be closed
2765	* retval <unused>
2766	*
2767	* Returns: 0 Success
2768	* fp_lookup:EBADF Bad file descriptor
2769	* fp_guard_exception:??? Guarded file descriptor
2770	* close_internal:EBADF
2771	* close_internal:??? Anything returnable by a per-fileops
2772	* close function
2773	*/
2774	int
2775	close(proc_t p, struct close_args uap, int32_t retval)
2776	{
2777	__pthread_testcancel(`1`);
2778	return(close_nocancel(p, (struct close_nocancel_args *)uap, retval));
2779	}
2780
2781
2782	int
2783	close_nocancel(proc_t p, struct close_nocancel_args uap, __unused int32_t retval)
2784	{
2785	struct fileproc *fp;
2786	int fd = uap->fd;
2787	int error;
2788
2789	AUDIT_SYSCLOSE(p, fd);
2790
2791	proc_fdlock(p);
2792
2793	if ( (error = fp_lookup(p,fd,&fp, `1`)) ) {
2794	proc_fdunlock(p);
2795	return(error);
2796	}
2797
2798	if (FP_ISGUARDED(fp, GUARD_CLOSE)) {
2799	error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
2800	(void) fp_drop(p, fd, fp, `1`);
2801	proc_fdunlock(p);
2802	return (error);
2803	}
2804
2805	error = close_internal_locked(p, fd, fp, `0`);
2806
2807	proc_fdunlock(p);
2808
2809	return (error);
2810	}
2811
2812
2813	/*
2814	* close_internal_locked
2815	*
2816	* Close a file descriptor.
2817	*
2818	* Parameters: p Process in whose per process file table
2819	* the close is to occur
2820	* fd fd to be closed
2821	* fp fileproc associated with the fd
2822	*
2823	* Returns: 0 Success
2824	* EBADF fd already in close wait state
2825	* closef_locked:??? Anything returnable by a per-fileops
2826	* close function
2827	*
2828	* Locks: Assumes proc_fdlock for process is held by the caller and returns
2829	* with lock held
2830	*
2831	* Notes: This function may drop and reacquire this lock; it is unsafe
2832	* for a caller to assume that other state protected by the lock
2833	* has not been subsequently changed out from under it.
2834	*/
2835	int
2836	close_internal_locked(proc_t p, int fd, struct fileproc fp, int* flags)
2837	{
2838	struct filedesc *fdp = p->p_fd;
2839	int error =`0`;
2840	int resvfd = flags & FD_DUP2RESV;
2841
2842
2843	#if DIAGNOSTIC
2844	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2845	#endif
2846
2847	/ Keep people from using the filedesc while we are closing it /
2848	procfdtbl_markclosefd(p, fd);
2849
2850
2851	if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
2852	panic("close_internal_locked: being called on already closing fd");
2853	}
2854
2855
2856	#if DIAGNOSTIC
2857	if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == `0`)
2858	panic("close_internal: unreserved fileflags with fd %d", fd);
2859	#endif
2860
2861	fp->f_flags \|= FP_CLOSING;
2862
2863	if ( (fp->f_flags & FP_AIOISSUED) \|\| kauth_authorize_fileop_has_listeners() ) {
2864
2865	proc_fdunlock(p);
2866
2867	if ( (fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners() ) {
2868	/*
2869	* call out to allow 3rd party notification of close.
2870	* Ignore result of kauth_authorize_fileop call.
2871	*/
2872	if (vnode_getwithref((vnode_t)fp->f_data) == `0`) {
2873	u_int fileop_flags = `0`;
2874	if ((fp->f_flags & FP_WRITTEN) != `0`)
2875	fileop_flags \|= KAUTH_FILEOP_CLOSE_MODIFIED;
2876	kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE,
2877	(uintptr_t)fp->f_data, (uintptr_t)fileop_flags);
2878	vnode_put((vnode_t)fp->f_data);
2879	}
2880	}
2881	if (fp->f_flags & FP_AIOISSUED)
2882	/*
2883	* cancel all async IO requests that can be cancelled.
2884	*/
2885	_aio_close( p, fd );
2886
2887	proc_fdlock(p);
2888	}
2889
2890	if (fd < fdp->fd_knlistsize)
2891	knote_fdclose(p, fd);
2892
2893	if (fp->f_flags & FP_WAITEVENT)
2894	(void)waitevent_close(p, fp);
2895
2896	fileproc_drain(p, fp);
2897
2898	if (resvfd == `0`) {
2899	_fdrelse(p, fd);
2900	} else {
2901	procfdtbl_reservefd(p, fd);
2902	}
2903
2904	if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET)
2905	KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
2906	fd, `0`, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
2907
2908	error = closef_locked(fp, fp->f_fglob, p);
2909	if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE)
2910	wakeup(&fp->f_flags);
2911	fp->f_flags &= ~(FP_WAITCLOSE \| FP_CLOSING);
2912
2913	proc_fdunlock(p);
2914
2915	fileproc_free(fp);
2916
2917	proc_fdlock(p);
2918
2919	#if DIAGNOSTIC
2920	if (resvfd != `0`) {
2921	if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == `0`)
2922	panic("close with reserved fd returns with freed fd:%d: proc: %p", fd, p);
2923	}
2924	#endif
2925
2926	return(error);
2927	}
2928
2929
2930	/*
2931	* fstat1
2932	*
2933	* Description: Return status information about a file descriptor.
2934	*
2935	* Parameters: p The process doing the fstat
2936	* fd The fd to stat
2937	* ub The user stat buffer
2938	* xsecurity The user extended security
2939	* buffer, or 0 if none
2940	* xsecurity_size The size of xsecurity, or 0
2941	* if no xsecurity
2942	* isstat64 Flag to indicate 64 bit version
2943	* for inode size, etc.
2944	*
2945	* Returns: 0 Success
2946	* EBADF
2947	* EFAULT
2948	* fp_lookup:EBADF Bad file descriptor
2949	* vnode_getwithref:???
2950	* copyout:EFAULT
2951	* vnode_getwithref:???
2952	* vn_stat:???
2953	* soo_stat:???
2954	* pipe_stat:???
2955	* pshm_stat:???
2956	* kqueue_stat:???
2957	*
2958	* Notes: Internal implementation for all other fstat() related
2959	* functions
2960	*
2961	* XXX switch on node type is bogus; need a stat in struct
2962	* XXX fileops instead.
2963	*/
2964	static int
2965	fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
2966	{
2967	struct fileproc *fp;
2968	union {
2969	struct stat sb;
2970	struct stat64 sb64;
2971	} source;
2972	union {
2973	struct user64_stat user64_sb;
2974	struct user32_stat user32_sb;
2975	struct user64_stat64 user64_sb64;
2976	struct user32_stat64 user32_sb64;
2977	} dest;
2978	int error, my_size;
2979	file_type_t type;
2980	caddr_t data;
2981	kauth_filesec_t fsec;
2982	user_size_t xsecurity_bufsize;
2983	vfs_context_t ctx = vfs_context_current();
2984	void * sbptr;
2985
2986
2987	AUDIT_ARG(fd, fd);
2988
2989	if ((error = fp_lookup(p, fd, &fp, `0`)) != `0`) {
2990	return(error);
2991	}
2992	type = fp->f_type;
2993	data = fp->f_data;
2994	fsec = KAUTH_FILESEC_NONE;
2995
2996	sbptr = (void *)&source;
2997
2998	switch (type) {
2999
3000	case DTYPE_VNODE:
3001	if ((error = vnode_getwithref((vnode_t)data)) == `0`) {
3002	/*
3003	* If the caller has the file open, and is not
3004	* requesting extended security information, we are
3005	* going to let them get the basic stat information.
3006	*/
3007	if (xsecurity == USER_ADDR_NULL) {
3008	error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, ctx,
3009	fp->f_fglob->fg_cred);
3010	} else {
3011	error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, ctx);
3012	}
3013
3014	AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
3015	(void)vnode_put((vnode_t)data);
3016	}
3017	break;
3018
3019	#if SOCKETS
3020	case DTYPE_SOCKET:
3021	error = soo_stat((struct socket *)data, sbptr, isstat64);
3022	break;
3023	#endif /* SOCKETS */
3024
3025	case DTYPE_PIPE:
3026	error = pipe_stat((void *)data, sbptr, isstat64);
3027	break;
3028
3029	case DTYPE_PSXSHM:
3030	error = pshm_stat((void *)data, sbptr, isstat64);
3031	break;
3032
3033	case DTYPE_KQUEUE:
3034	error = kqueue_stat((void *)data, sbptr, isstat64, p);
3035	break;
3036
3037	default:
3038	error = EBADF;
3039	goto out;
3040	}
3041	if (error == `0`) {
3042	caddr_t sbp;
3043
3044	if (isstat64 != `0`) {
3045	source.sb64.st_lspare = `0`;
3046	source.sb64.st_qspare[`0`] = `0LL`;
3047	source.sb64.st_qspare[`1`] = `0LL`;
3048
3049	if (IS_64BIT_PROCESS(current_proc())) {
3050	munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3051	my_size = sizeof(dest.user64_sb64);
3052	sbp = (caddr_t)&dest.user64_sb64;
3053	} else {
3054	munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3055	my_size = sizeof(dest.user32_sb64);
3056	sbp = (caddr_t)&dest.user32_sb64;
3057	}
3058	} else {
3059	source.sb.st_lspare = `0`;
3060	source.sb.st_qspare[`0`] = `0LL`;
3061	source.sb.st_qspare[`1`] = `0LL`;
3062	if (IS_64BIT_PROCESS(current_proc())) {
3063	munge_user64_stat(&source.sb, &dest.user64_sb);
3064	my_size = sizeof(dest.user64_sb);
3065	sbp = (caddr_t)&dest.user64_sb;
3066	} else {
3067	munge_user32_stat(&source.sb, &dest.user32_sb);
3068	my_size = sizeof(dest.user32_sb);
3069	sbp = (caddr_t)&dest.user32_sb;
3070	}
3071	}
3072
3073	error = copyout(sbp, ub, my_size);
3074	}
3075
3076	/ caller wants extended security information? /
3077	if (xsecurity != USER_ADDR_NULL) {
3078
3079	/ did we get any? /
3080	if (fsec == KAUTH_FILESEC_NONE) {
3081	if (susize(xsecurity_size, `0`) != `0`) {
3082	error = EFAULT;
3083	goto out;
3084	}
3085	} else {
3086	/ find the user buffer size /
3087	xsecurity_bufsize = fusize(xsecurity_size);
3088
3089	/ copy out the actual data size /
3090	if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != `0`) {
3091	error = EFAULT;
3092	goto out;
3093	}
3094
3095	/ if the caller supplied enough room, copy out to it /
3096	if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
3097	error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3098	}
3099	}
3100	out:
3101	fp_drop(p, fd, fp, `0`);
3102	if (fsec != NULL)
3103	kauth_filesec_free(fsec);
3104	return (error);
3105	}
3106
3107
3108	/*
3109	* fstat_extended
3110	*
3111	* Description: Extended version of fstat supporting returning extended
3112	* security information
3113	*
3114	* Parameters: p The process doing the fstat
3115	* uap->fd The fd to stat
3116	* uap->ub The user stat buffer
3117	* uap->xsecurity The user extended security
3118	* buffer, or 0 if none
3119	* uap->xsecurity_size The size of xsecurity, or 0
3120	*
3121	* Returns: 0 Success
3122	* !0 Errno (see fstat1)
3123	*/
3124	int
3125	fstat_extended(proc_t p, struct fstat_extended_args uap, __unused int32_t retval)
3126	{
3127	return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, `0`));
3128	}
3129
3130
3131	/*
3132	* fstat
3133	*
3134	* Description: Get file status for the file associated with fd
3135	*
3136	* Parameters: p The process doing the fstat
3137	* uap->fd The fd to stat
3138	* uap->ub The user stat buffer
3139	*
3140	* Returns: 0 Success
3141	* !0 Errno (see fstat1)
3142	*/
3143	int
3144	fstat(proc_t p, struct fstat_args uap, __unused int32_t retval)
3145	{
3146	return(fstat1(p, uap->fd, uap->ub, `0`, `0`, `0`));
3147	}
3148
3149
3150	/*
3151	* fstat64_extended
3152	*
3153	* Description: Extended version of fstat64 supporting returning extended
3154	* security information
3155	*
3156	* Parameters: p The process doing the fstat
3157	* uap->fd The fd to stat
3158	* uap->ub The user stat buffer
3159	* uap->xsecurity The user extended security
3160	* buffer, or 0 if none
3161	* uap->xsecurity_size The size of xsecurity, or 0
3162	*
3163	* Returns: 0 Success
3164	* !0 Errno (see fstat1)
3165	*/
3166	int
3167	fstat64_extended(proc_t p, struct fstat64_extended_args uap, __unused int32_t retval)
3168	{
3169	return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, `1`));
3170	}
3171
3172
3173	/*
3174	* fstat64
3175	*
3176	* Description: Get 64 bit version of the file status for the file associated
3177	* with fd
3178	*
3179	* Parameters: p The process doing the fstat
3180	* uap->fd The fd to stat
3181	* uap->ub The user stat buffer
3182	*
3183	* Returns: 0 Success
3184	* !0 Errno (see fstat1)
3185	*/
3186	int
3187	fstat64(proc_t p, struct fstat64_args uap, __unused int32_t retval)
3188	{
3189	return(fstat1(p, uap->fd, uap->ub, `0`, `0`, `1`));
3190	}
3191
3192
3193	/*
3194	* fpathconf
3195	*
3196	* Description: Return pathconf information about a file descriptor.
3197	*
3198	* Parameters: p Process making the request
3199	* uap->fd fd to get information about
3200	* uap->name Name of information desired
3201	* retval Pointer to the call return area
3202	*
3203	* Returns: 0 Success
3204	* EINVAL
3205	* fp_lookup:EBADF Bad file descriptor
3206	* vnode_getwithref:???
3207	* vn_pathconf:???
3208	*
3209	* Implicit returns:
3210	* *retval (modified) Returned information (numeric)
3211	*/
3212	int
3213	fpathconf(proc_t p, struct fpathconf_args uap, int32_t retval)
3214	{
3215	int fd = uap->fd;
3216	struct fileproc *fp;
3217	struct vnode *vp;
3218	int error = `0`;
3219	file_type_t type;
3220	caddr_t data;
3221
3222
3223	AUDIT_ARG(fd, uap->fd);
3224	if ( (error = fp_lookup(p, fd, &fp, `0`)) )
3225	return(error);
3226	type = fp->f_type;
3227	data = fp->f_data;
3228
3229	switch (type) {
3230
3231	case DTYPE_SOCKET:
3232	if (uap->name != _PC_PIPE_BUF) {
3233	error = EINVAL;
3234	goto out;
3235	}
3236	*retval = PIPE_BUF;
3237	error = `0`;
3238	goto out;
3239
3240	case DTYPE_PIPE:
3241	if (uap->name != _PC_PIPE_BUF) {
3242	error = EINVAL;
3243	goto out;
3244	}
3245	*retval = PIPE_BUF;
3246	error = `0`;
3247	goto out;
3248
3249	case DTYPE_VNODE:
3250	vp = (struct vnode *)data;
3251
3252	if ( (error = vnode_getwithref(vp)) == `0`) {
3253	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3254
3255	error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3256
3257	(void)vnode_put(vp);
3258	}
3259	goto out;
3260
3261	default:
3262	error = EINVAL;
3263	goto out;
3264
3265	}
3266	/NOTREACHED/
3267	out:
3268	fp_drop(p, fd, fp, `0`);
3269	return(error);
3270	}
3271
3272	/*
3273	* Statistics counter for the number of times a process calling fdalloc()
3274	* has resulted in an expansion of the per process open file table.
3275	*
3276	* XXX This would likely be of more use if it were per process
3277	*/
3278	int fdexpand;
3279
3280
3281	/*
3282	* fdalloc
3283	*
3284	* Description: Allocate a file descriptor for the process.
3285	*
3286	* Parameters: p Process to allocate the fd in
3287	* want The fd we would prefer to get
3288	* result Pointer to fd we got
3289	*
3290	* Returns: 0 Success
3291	* EMFILE
3292	* ENOMEM
3293	*
3294	* Implicit returns:
3295	* *result (modified) The fd which was allocated
3296	*/
3297	int
3298	fdalloc(proc_t p, int want, int *result)
3299	{
3300	struct filedesc *fdp = p->p_fd;
3301	int i;
3302	int lim, last, numfiles, oldnfiles;
3303	struct fileproc newofiles, ofiles;
3304	char *newofileflags;
3305
3306	/*
3307	* Search for a free descriptor starting at the higher
3308	* of want or fd_freefile. If that fails, consider
3309	* expanding the ofile array.
3310	*/
3311	#if DIAGNOSTIC
3312	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3313	#endif
3314
3315	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3316	for (;;) {
3317	last = min(fdp->fd_nfiles, lim);
3318	if ((i = want) < fdp->fd_freefile)
3319	i = fdp->fd_freefile;
3320	for (; i < last; i++) {
3321	if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3322	procfdtbl_reservefd(p, i);
3323	if (i > fdp->fd_lastfile)
3324	fdp->fd_lastfile = i;
3325	if (want <= fdp->fd_freefile)
3326	fdp->fd_freefile = i;
3327	*result = i;
3328	return (`0`);
3329	}
3330	}
3331
3332	/*
3333	* No space in current array. Expand?
3334	*/
3335	if (fdp->fd_nfiles >= lim)
3336	return (EMFILE);
3337	if (fdp->fd_nfiles < NDEXTENT)
3338	numfiles = NDEXTENT;
3339	else
3340	numfiles = `2` * fdp->fd_nfiles;
3341	/ Enforce lim /
3342	if (numfiles > lim)
3343	numfiles = lim;
3344	proc_fdunlock(p);
3345	MALLOC_ZONE(newofiles, struct fileproc **,
3346	numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3347	proc_fdlock(p);
3348	if (newofiles == NULL) {
3349	return (ENOMEM);
3350	}
3351	if (fdp->fd_nfiles >= numfiles) {
3352	FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL);
3353	continue;
3354	}
3355	newofileflags = (char *) &newofiles[numfiles];
3356	/*
3357	* Copy the existing ofile and ofileflags arrays
3358	* and zero the new portion of each array.
3359	*/
3360	oldnfiles = fdp->fd_nfiles;
3361	(void) memcpy(newofiles, fdp->fd_ofiles,
3362	oldnfiles * sizeof(*fdp->fd_ofiles));
3363	(void) memset(&newofiles[oldnfiles], `0`,
3364	(numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3365
3366	(void) memcpy(newofileflags, fdp->fd_ofileflags,
3367	oldnfiles * sizeof(*fdp->fd_ofileflags));
3368	(void) memset(&newofileflags[oldnfiles], `0`,
3369	(numfiles - oldnfiles) *
3370	sizeof(*fdp->fd_ofileflags));
3371	ofiles = fdp->fd_ofiles;
3372	fdp->fd_ofiles = newofiles;
3373	fdp->fd_ofileflags = newofileflags;
3374	fdp->fd_nfiles = numfiles;
3375	FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL);
3376	fdexpand++;
3377	}
3378	}
3379
3380
3381	/*
3382	* fdavail
3383	*
3384	* Description: Check to see whether n user file descriptors are available
3385	* to the process p.
3386	*
3387	* Parameters: p Process to check in
3388	* n The number of fd's desired
3389	*
3390	* Returns: 0 No
3391	* 1 Yes
3392	*
3393	* Locks: Assumes proc_fdlock for process is held by the caller
3394	*
3395	* Notes: The answer only remains valid so long as the proc_fdlock is
3396	* held by the caller.
3397	*/
3398	int
3399	fdavail(proc_t p, int n)
3400	{
3401	struct filedesc *fdp = p->p_fd;
3402	struct fileproc **fpp;
3403	char *flags;
3404	int i, lim;
3405
3406	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3407	if ((i = lim - fdp->fd_nfiles) > `0` && (n -= i) <= `0`)
3408	return (`1`);
3409	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3410	flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3411	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= `0`; fpp++, flags++)
3412	if (fpp == NULL && !(flags & UF_RESERVED) && --n <= `0`)
3413	return (`1`);
3414	return (`0`);
3415	}
3416
3417
3418	/*
3419	* fdrelse
3420	*
3421	* Description: Legacy KPI wrapper function for _fdrelse
3422	*
3423	* Parameters: p Process in which fd lives
3424	* fd fd to free
3425	*
3426	* Returns: void
3427	*
3428	* Locks: Assumes proc_fdlock for process is held by the caller
3429	*/
3430	void
3431	fdrelse(proc_t p, int fd)
3432	{
3433	_fdrelse(p, fd);
3434	}
3435
3436
3437	/*
3438	* fdgetf_noref
3439	*
3440	* Description: Get the fileproc pointer for the given fd from the per process
3441	* open file table without taking an explicit reference on it.
3442	*
3443	* Parameters: p Process containing fd
3444	* fd fd to obtain fileproc for
3445	* resultfp Pointer to pointer return area
3446	*
3447	* Returns: 0 Success
3448	* EBADF
3449	*
3450	* Implicit returns:
3451	* *resultfp (modified) Pointer to fileproc pointer
3452	*
3453	* Locks: Assumes proc_fdlock for process is held by the caller
3454	*
3455	* Notes: Because there is no reference explicitly taken, the returned
3456	* fileproc pointer is only valid so long as the proc_fdlock
3457	* remains held by the caller.
3458	*/
3459	int
3460	fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp)
3461	{
3462	struct filedesc *fdp = p->p_fd;
3463	struct fileproc *fp;
3464
3465	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
3466	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3467	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3468	return (EBADF);
3469	}
3470	if (resultfp)
3471	*resultfp = fp;
3472	return (`0`);
3473	}
3474
3475
3476	/*
3477	* fp_getfvp
3478	*
3479	* Description: Get fileproc and vnode pointer for a given fd from the per
3480	* process open file table of the specified process, and if
3481	* successful, increment the f_iocount
3482	*
3483	* Parameters: p Process in which fd lives
3484	* fd fd to get information for
3485	* resultfp Pointer to result fileproc
3486	* pointer area, or 0 if none
3487	* resultvp Pointer to result vnode pointer
3488	* area, or 0 if none
3489	*
3490	* Returns: 0 Success
3491	* EBADF Bad file descriptor
3492	* ENOTSUP fd does not refer to a vnode
3493	*
3494	* Implicit returns:
3495	* *resultfp (modified) Fileproc pointer
3496	* *resultvp (modified) vnode pointer
3497	*
3498	* Notes: The resultfp and resultvp fields are optional, and may be
3499	* independently specified as NULL to skip returning information
3500	*
3501	* Locks: Internally takes and releases proc_fdlock
3502	*/
3503	int
3504	fp_getfvp(proc_t p, int fd, struct fileproc resultfp, struct vnode resultvp)
3505	{
3506	struct filedesc *fdp = p->p_fd;
3507	struct fileproc *fp;
3508
3509	proc_fdlock_spin(p);
3510	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
3511	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3512	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3513	proc_fdunlock(p);
3514	return (EBADF);
3515	}
3516	if (fp->f_type != DTYPE_VNODE) {
3517	proc_fdunlock(p);
3518	return(ENOTSUP);
3519	}
3520	fp->f_iocount++;
3521
3522	if (resultfp)
3523	*resultfp = fp;
3524	if (resultvp)
3525	resultvp = (struct* vnode *)fp->f_data;
3526	proc_fdunlock(p);
3527
3528	return (`0`);
3529	}
3530
3531
3532	/*
3533	* fp_getfvpandvid
3534	*
3535	* Description: Get fileproc, vnode pointer, and vid for a given fd from the
3536	* per process open file table of the specified process, and if
3537	* successful, increment the f_iocount
3538	*
3539	* Parameters: p Process in which fd lives
3540	* fd fd to get information for
3541	* resultfp Pointer to result fileproc
3542	* pointer area, or 0 if none
3543	* resultvp Pointer to result vnode pointer
3544	* area, or 0 if none
3545	* vidp Pointer to resuld vid area
3546	*
3547	* Returns: 0 Success
3548	* EBADF Bad file descriptor
3549	* ENOTSUP fd does not refer to a vnode
3550	*
3551	* Implicit returns:
3552	* *resultfp (modified) Fileproc pointer
3553	* *resultvp (modified) vnode pointer
3554	* *vidp vid value
3555	*
3556	* Notes: The resultfp and resultvp fields are optional, and may be
3557	* independently specified as NULL to skip returning information
3558	*
3559	* Locks: Internally takes and releases proc_fdlock
3560	*/
3561	int
3562	fp_getfvpandvid(proc_t p, int fd, struct fileproc **resultfp,
3563	struct vnode *resultvp, uint32_t vidp)
3564	{
3565	struct filedesc *fdp = p->p_fd;
3566	struct fileproc *fp;
3567
3568	proc_fdlock_spin(p);
3569	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
3570	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3571	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3572	proc_fdunlock(p);
3573	return (EBADF);
3574	}
3575	if (fp->f_type != DTYPE_VNODE) {
3576	proc_fdunlock(p);
3577	return(ENOTSUP);
3578	}
3579	fp->f_iocount++;
3580
3581	if (resultfp)
3582	*resultfp = fp;
3583	if (resultvp)
3584	resultvp = (struct* vnode *)fp->f_data;
3585	if (vidp)
3586	vidp = (uint32_t)vnode_vid((struct* vnode *)fp->f_data);
3587	proc_fdunlock(p);
3588
3589	return (`0`);
3590	}
3591
3592
3593	/*
3594	* fp_getfsock
3595	*
3596	* Description: Get fileproc and socket pointer for a given fd from the
3597	* per process open file table of the specified process, and if
3598	* successful, increment the f_iocount
3599	*
3600	* Parameters: p Process in which fd lives
3601	* fd fd to get information for
3602	* resultfp Pointer to result fileproc
3603	* pointer area, or 0 if none
3604	* results Pointer to result socket
3605	* pointer area, or 0 if none
3606	*
3607	* Returns: EBADF The file descriptor is invalid
3608	* EOPNOTSUPP The file descriptor is not a socket
3609	* 0 Success
3610	*
3611	* Implicit returns:
3612	* *resultfp (modified) Fileproc pointer
3613	* *results (modified) socket pointer
3614	*
3615	* Notes: EOPNOTSUPP should probably be ENOTSOCK; this function is only
3616	* ever called from accept1().
3617	*/
3618	int
3619	fp_getfsock(proc_t p, int fd, struct fileproc **resultfp,
3620	struct socket **results)
3621	{
3622	struct filedesc *fdp = p->p_fd;
3623	struct fileproc *fp;
3624
3625	proc_fdlock_spin(p);
3626	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
3627	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3628	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3629	proc_fdunlock(p);
3630	return (EBADF);
3631	}
3632	if (fp->f_type != DTYPE_SOCKET) {
3633	proc_fdunlock(p);
3634	return(EOPNOTSUPP);
3635	}
3636	fp->f_iocount++;
3637
3638	if (resultfp)
3639	*resultfp = fp;
3640	if (results)
3641	results = (struct* socket *)fp->f_data;
3642	proc_fdunlock(p);
3643
3644	return (`0`);
3645	}
3646
3647
3648	/*
3649	* fp_getfkq
3650	*
3651	* Description: Get fileproc and kqueue pointer for a given fd from the
3652	* per process open file table of the specified process, and if
3653	* successful, increment the f_iocount
3654	*
3655	* Parameters: p Process in which fd lives
3656	* fd fd to get information for
3657	* resultfp Pointer to result fileproc
3658	* pointer area, or 0 if none
3659	* resultkq Pointer to result kqueue
3660	* pointer area, or 0 if none
3661	*
3662	* Returns: EBADF The file descriptor is invalid
3663	* EBADF The file descriptor is not a socket
3664	* 0 Success
3665	*
3666	* Implicit returns:
3667	* *resultfp (modified) Fileproc pointer
3668	* *resultkq (modified) kqueue pointer
3669	*
3670	* Notes: The second EBADF should probably be something else to make
3671	* the error condition distinct.
3672	*/
3673	int
3674	fp_getfkq(proc_t p, int fd, struct fileproc **resultfp,
3675	struct kqueue **resultkq)
3676	{
3677	struct filedesc *fdp = p->p_fd;
3678	struct fileproc *fp;
3679
3680	proc_fdlock_spin(p);
3681	if ( fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
3682	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3683	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3684	proc_fdunlock(p);
3685	return (EBADF);
3686	}
3687	if (fp->f_type != DTYPE_KQUEUE) {
3688	proc_fdunlock(p);
3689	return(EBADF);
3690	}
3691	fp->f_iocount++;
3692
3693	if (resultfp)
3694	*resultfp = fp;
3695	if (resultkq)
3696	resultkq = (struct* kqueue *)fp->f_data;
3697	proc_fdunlock(p);
3698
3699	return (`0`);
3700	}
3701
3702
3703	/*
3704	* fp_getfpshm
3705	*
3706	* Description: Get fileproc and POSIX shared memory pointer for a given fd
3707	* from the per process open file table of the specified process
3708	* and if successful, increment the f_iocount
3709	*
3710	* Parameters: p Process in which fd lives
3711	* fd fd to get information for
3712	* resultfp Pointer to result fileproc
3713	* pointer area, or 0 if none
3714	* resultpshm Pointer to result POSIX
3715	* shared memory pointer
3716	* pointer area, or 0 if none
3717	*
3718	* Returns: EBADF The file descriptor is invalid
3719	* EBADF The file descriptor is not a POSIX
3720	* shared memory area
3721	* 0 Success
3722	*
3723	* Implicit returns:
3724	* *resultfp (modified) Fileproc pointer
3725	* *resultpshm (modified) POSIX shared memory pointer
3726	*
3727	* Notes: The second EBADF should probably be something else to make
3728	* the error condition distinct.
3729	*/
3730	int
3731	fp_getfpshm(proc_t p, int fd, struct fileproc **resultfp,
3732	struct pshmnode **resultpshm)
3733	{
3734	struct filedesc *fdp = p->p_fd;
3735	struct fileproc *fp;
3736
3737	proc_fdlock_spin(p);
3738	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
3739	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3740	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3741	proc_fdunlock(p);
3742	return (EBADF);
3743	}
3744	if (fp->f_type != DTYPE_PSXSHM) {
3745
3746	proc_fdunlock(p);
3747	return(EBADF);
3748	}
3749	fp->f_iocount++;
3750
3751	if (resultfp)
3752	*resultfp = fp;
3753	if (resultpshm)
3754	resultpshm = (struct* pshmnode *)fp->f_data;
3755	proc_fdunlock(p);
3756
3757	return (`0`);
3758	}
3759
3760
3761	/*
3762	* fp_getfsem
3763	*
3764	* Description: Get fileproc and POSIX semaphore pointer for a given fd from
3765	* the per process open file table of the specified process
3766	* and if successful, increment the f_iocount
3767	*
3768	* Parameters: p Process in which fd lives
3769	* fd fd to get information for
3770	* resultfp Pointer to result fileproc
3771	* pointer area, or 0 if none
3772	* resultpsem Pointer to result POSIX
3773	* semaphore pointer area, or
3774	* 0 if none
3775	*
3776	* Returns: EBADF The file descriptor is invalid
3777	* EBADF The file descriptor is not a POSIX
3778	* semaphore
3779	* 0 Success
3780	*
3781	* Implicit returns:
3782	* *resultfp (modified) Fileproc pointer
3783	* *resultpsem (modified) POSIX semaphore pointer
3784	*
3785	* Notes: The second EBADF should probably be something else to make
3786	* the error condition distinct.
3787	*
3788	* In order to support unnamed POSIX semaphores, the named
3789	* POSIX semaphores will have to move out of the per-process
3790	* open filetable, and into a global table that is shared with
3791	* unnamed POSIX semaphores, since unnamed POSIX semaphores
3792	* are typically used by declaring instances in shared memory,
3793	* and there's no other way to do this without changing the
3794	* underlying type, which would introduce binary compatibility
3795	* issues.
3796	*/
3797	int
3798	fp_getfpsem(proc_t p, int fd, struct fileproc **resultfp,
3799	struct psemnode **resultpsem)
3800	{
3801	struct filedesc *fdp = p->p_fd;
3802	struct fileproc *fp;
3803
3804	proc_fdlock_spin(p);
3805	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
3806	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3807	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3808	proc_fdunlock(p);
3809	return (EBADF);
3810	}
3811	if (fp->f_type != DTYPE_PSXSEM) {
3812	proc_fdunlock(p);
3813	return(EBADF);
3814	}
3815	fp->f_iocount++;
3816
3817	if (resultfp)
3818	*resultfp = fp;
3819	if (resultpsem)
3820	resultpsem = (struct* psemnode *)fp->f_data;
3821	proc_fdunlock(p);
3822
3823	return (`0`);
3824	}
3825
3826
3827	/*
3828	* fp_getfpipe
3829	*
3830	* Description: Get fileproc and pipe pointer for a given fd from the
3831	* per process open file table of the specified process
3832	* and if successful, increment the f_iocount
3833	*
3834	* Parameters: p Process in which fd lives
3835	* fd fd to get information for
3836	* resultfp Pointer to result fileproc
3837	* pointer area, or 0 if none
3838	* resultpipe Pointer to result pipe
3839	* pointer area, or 0 if none
3840	*
3841	* Returns: EBADF The file descriptor is invalid
3842	* EBADF The file descriptor is not a socket
3843	* 0 Success
3844	*
3845	* Implicit returns:
3846	* *resultfp (modified) Fileproc pointer
3847	* *resultpipe (modified) pipe pointer
3848	*
3849	* Notes: The second EBADF should probably be something else to make
3850	* the error condition distinct.
3851	*/
3852	int
3853	fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp,
3854	struct pipe **resultpipe)
3855	{
3856	struct filedesc *fdp = p->p_fd;
3857	struct fileproc *fp;
3858
3859	proc_fdlock_spin(p);
3860	if (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
3861	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3862	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3863	proc_fdunlock(p);
3864	return (EBADF);
3865	}
3866	if (fp->f_type != DTYPE_PIPE) {
3867	proc_fdunlock(p);
3868	return(EBADF);
3869	}
3870	fp->f_iocount++;
3871
3872	if (resultfp)
3873	*resultfp = fp;
3874	if (resultpipe)
3875	resultpipe = (struct* pipe *)fp->f_data;
3876	proc_fdunlock(p);
3877
3878	return (`0`);
3879	}
3880
3881
3882	/*
3883	* fp_lookup
3884	*
3885	* Description: Get fileproc pointer for a given fd from the per process
3886	* open file table of the specified process and if successful,
3887	* increment the f_iocount
3888	*
3889	* Parameters: p Process in which fd lives
3890	* fd fd to get information for
3891	* resultfp Pointer to result fileproc
3892	* pointer area, or 0 if none
3893	* locked !0 if the caller holds the
3894	* proc_fdlock, 0 otherwise
3895	*
3896	* Returns: 0 Success
3897	* EBADF Bad file descriptor
3898	*
3899	* Implicit returns:
3900	* *resultfp (modified) Fileproc pointer
3901	*
3902	* Locks: If the argument 'locked' is non-zero, then the caller is
3903	* expected to have taken and held the proc_fdlock; if it is
3904	* zero, than this routine internally takes and drops this lock.
3905	*/
3906	int
3907	fp_lookup(proc_t p, int fd, struct fileproc *resultfp, int* locked)
3908	{
3909	struct filedesc *fdp = p->p_fd;
3910	struct fileproc *fp;
3911
3912	if (!locked)
3913	proc_fdlock_spin(p);
3914	if (fd < `0` \|\| fdp == NULL \|\| fd >= fdp->fd_nfiles \|\|
3915	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
3916	(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3917	if (!locked)
3918	proc_fdunlock(p);
3919	return (EBADF);
3920	}
3921	fp->f_iocount++;
3922
3923	if (resultfp)
3924	*resultfp = fp;
3925	if (!locked)
3926	proc_fdunlock(p);
3927
3928	return (`0`);
3929	}
3930
3931
3932	/*
3933	* fp_tryswap
3934	*
3935	* Description: Swap the fileproc pointer for a given fd with a new
3936	* fileproc pointer in the per-process open file table of
3937	* the specified process. The fdlock must be held at entry.
3938	*
3939	* Parameters: p Process containing the fd
3940	* fd The fd of interest
3941	* nfp Pointer to the newfp
3942	*
3943	* Returns: 0 Success
3944	* EBADF Bad file descriptor
3945	* EINTR Interrupted
3946	* EKEEPLOOKING f_iocount changed while lock was dropped.
3947	*/
3948	int
3949	fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
3950	{
3951	struct fileproc *fp;
3952	int error;
3953
3954	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3955
3956	if (`0` != (error = fp_lookup(p, fd, &fp, `1`)))
3957	return (error);
3958	/*
3959	* At this point, our caller (change_guardedfd_np) has
3960	* one f_iocount reference, and we just took another
3961	* one to begin the replacement.
3962	*/
3963	if (fp->f_iocount < `2`) {
3964	panic("f_iocount too small %d", fp->f_iocount);
3965	} else if (`2` == fp->f_iocount) {
3966
3967	/ Copy the contents of fp, preserving the "type" of nfp /
3968
3969	nfp->f_flags = (nfp->f_flags & FP_TYPEMASK) \|
3970	(fp->f_flags & ~FP_TYPEMASK);
3971	nfp->f_iocount = fp->f_iocount;
3972	nfp->f_fglob = fp->f_fglob;
3973	nfp->f_wset = fp->f_wset;
3974
3975	p->p_fd->fd_ofiles[fd] = nfp;
3976	(void) fp_drop(p, fd, nfp, `1`);
3977	} else {
3978	/*
3979	* Wait for all other active references to evaporate.
3980	*/
3981	p->p_fpdrainwait = `1`;
3982	error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
3983	PRIBIO \| PCATCH, "tryswap fpdrain", NULL);
3984	if (`0` == error) {
3985	/*
3986	* Return an "internal" errno to trigger a full
3987	* reevaluation of the change-guard attempt.
3988	*/
3989	error = EKEEPLOOKING;
3990	printf("%s: lookup collision fd %d\n", __func__, fd);
3991	}
3992	(void) fp_drop(p, fd, fp, `1`);
3993	}
3994	return (error);
3995	}
3996
3997
3998	/*
3999	* fp_drop_written
4000	*
4001	* Description: Set the FP_WRITTEN flag on the fileproc and drop the I/O
4002	* reference previously taken by calling fp_lookup et. al.
4003	*
4004	* Parameters: p Process in which the fd lives
4005	* fd fd associated with the fileproc
4006	* fp fileproc on which to set the
4007	* flag and drop the reference
4008	*
4009	* Returns: 0 Success
4010	* fp_drop:EBADF Bad file descriptor
4011	*
4012	* Locks: This function internally takes and drops the proc_fdlock for
4013	* the supplied process
4014	*
4015	* Notes: The fileproc must correspond to the fd in the supplied proc
4016	*/
4017	int
4018	fp_drop_written(proc_t p, int fd, struct fileproc *fp)
4019	{
4020	int error;
4021
4022	proc_fdlock_spin(p);
4023
4024	fp->f_flags \|= FP_WRITTEN;
4025
4026	error = fp_drop(p, fd, fp, `1`);
4027
4028	proc_fdunlock(p);
4029
4030	return (error);
4031	}
4032
4033
4034	/*
4035	* fp_drop_event
4036	*
4037	* Description: Set the FP_WAITEVENT flag on the fileproc and drop the I/O
4038	* reference previously taken by calling fp_lookup et. al.
4039	*
4040	* Parameters: p Process in which the fd lives
4041	* fd fd associated with the fileproc
4042	* fp fileproc on which to set the
4043	* flag and drop the reference
4044	*
4045	* Returns: 0 Success
4046	* fp_drop:EBADF Bad file descriptor
4047	*
4048	* Locks: This function internally takes and drops the proc_fdlock for
4049	* the supplied process
4050	*
4051	* Notes: The fileproc must correspond to the fd in the supplied proc
4052	*/
4053	int
4054	fp_drop_event(proc_t p, int fd, struct fileproc *fp)
4055	{
4056	int error;
4057
4058	proc_fdlock_spin(p);
4059
4060	fp->f_flags \|= FP_WAITEVENT;
4061
4062	error = fp_drop(p, fd, fp, `1`);
4063
4064	proc_fdunlock(p);
4065
4066	return (error);
4067	}
4068
4069
4070	/*
4071	* fp_drop
4072	*
4073	* Description: Drop the I/O reference previously taken by calling fp_lookup
4074	* et. al.
4075	*
4076	* Parameters: p Process in which the fd lives
4077	* fd fd associated with the fileproc
4078	* fp fileproc on which to set the
4079	* flag and drop the reference
4080	* locked flag to internally take and
4081	* drop proc_fdlock if it is not
4082	* already held by the caller
4083	*
4084	* Returns: 0 Success
4085	* EBADF Bad file descriptor
4086	*
4087	* Locks: This function internally takes and drops the proc_fdlock for
4088	* the supplied process if 'locked' is non-zero, and assumes that
4089	* the caller already holds this lock if 'locked' is non-zero.
4090	*
4091	* Notes: The fileproc must correspond to the fd in the supplied proc
4092	*/
4093	int
4094	fp_drop(proc_t p, int fd, struct fileproc fp, int* locked)
4095	{
4096	struct filedesc *fdp = p->p_fd;
4097	int needwakeup = `0`;
4098
4099	if (!locked)
4100	proc_fdlock_spin(p);
4101	if ((fp == FILEPROC_NULL) && (fd < `0` \|\| fd >= fdp->fd_nfiles \|\|
4102	(fp = fdp->fd_ofiles[fd]) == NULL \|\|
4103	((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4104	!(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4105	if (!locked)
4106	proc_fdunlock(p);
4107	return (EBADF);
4108	}
4109	fp->f_iocount--;
4110
4111	if (fp->f_iocount == `0`) {
4112	if (fp->f_flags & FP_SELCONFLICT)
4113	fp->f_flags &= ~FP_SELCONFLICT;
4114
4115	if (p->p_fpdrainwait) {
4116	p->p_fpdrainwait = `0`;
4117	needwakeup = `1`;
4118	}
4119	}
4120	if (!locked)
4121	proc_fdunlock(p);
4122	if (needwakeup)
4123	wakeup(&p->p_fpdrainwait);
4124
4125	return (`0`);
4126	}
4127
4128
4129	/*
4130	* file_vnode
4131	*
4132	* Description: Given an fd, look it up in the current process's per process
4133	* open file table, and return its internal vnode pointer.
4134	*
4135	* Parameters: fd fd to obtain vnode from
4136	* vpp pointer to vnode return area
4137	*
4138	* Returns: 0 Success
4139	* EINVAL The fd does not refer to a
4140	* vnode fileproc entry
4141	* fp_lookup:EBADF Bad file descriptor
4142	*
4143	* Implicit returns:
4144	* *vpp (modified) Returned vnode pointer
4145	*
4146	* Locks: This function internally takes and drops the proc_fdlock for
4147	* the current process
4148	*
4149	* Notes: If successful, this function increments the f_iocount on the
4150	* fd's corresponding fileproc.
4151	*
4152	* The fileproc referenced is not returned; because of this, care
4153	* must be taken to not drop the last reference (e.g. by closing
4154	* the file). This is inherently unsafe, since the reference may
4155	* not be recoverable from the vnode, if there is a subsequent
4156	* close that destroys the associate fileproc. The caller should
4157	* therefore retain their own reference on the fileproc so that
4158	* the f_iocount can be dropped subsequently. Failure to do this
4159	* can result in the returned pointer immediately becoming invalid
4160	* following the call.
4161	*
4162	* Use of this function is discouraged.
4163	*/
4164	int
4165	file_vnode(int fd, struct vnode **vpp)
4166	{
4167	proc_t p = current_proc();
4168	struct fileproc *fp;
4169	int error;
4170
4171	proc_fdlock_spin(p);
4172	if ( (error = fp_lookup(p, fd, &fp, `1`)) ) {
4173	proc_fdunlock(p);
4174	return(error);
4175	}
4176	if (fp->f_type != DTYPE_VNODE) {
4177	fp_drop(p, fd, fp,`1`);
4178	proc_fdunlock(p);
4179	return(EINVAL);
4180	}
4181	if (vpp != NULL)
4182	vpp = (struct* vnode *)fp->f_data;
4183	proc_fdunlock(p);
4184
4185	return(`0`);
4186	}
4187
4188
4189	/*
4190	* file_vnode_withvid
4191	*
4192	* Description: Given an fd, look it up in the current process's per process
4193	* open file table, and return its internal vnode pointer.
4194	*
4195	* Parameters: fd fd to obtain vnode from
4196	* vpp pointer to vnode return area
4197	* vidp pointer to vid of the returned vnode
4198	*
4199	* Returns: 0 Success
4200	* EINVAL The fd does not refer to a
4201	* vnode fileproc entry
4202	* fp_lookup:EBADF Bad file descriptor
4203	*
4204	* Implicit returns:
4205	* *vpp (modified) Returned vnode pointer
4206	*
4207	* Locks: This function internally takes and drops the proc_fdlock for
4208	* the current process
4209	*
4210	* Notes: If successful, this function increments the f_iocount on the
4211	* fd's corresponding fileproc.
4212	*
4213	* The fileproc referenced is not returned; because of this, care
4214	* must be taken to not drop the last reference (e.g. by closing
4215	* the file). This is inherently unsafe, since the reference may
4216	* not be recoverable from the vnode, if there is a subsequent
4217	* close that destroys the associate fileproc. The caller should
4218	* therefore retain their own reference on the fileproc so that
4219	* the f_iocount can be dropped subsequently. Failure to do this
4220	* can result in the returned pointer immediately becoming invalid
4221	* following the call.
4222	*
4223	* Use of this function is discouraged.
4224	*/
4225	int
4226	file_vnode_withvid(int fd, struct vnode *vpp, uint32_t vidp)
4227	{
4228	proc_t p = current_proc();
4229	struct fileproc *fp;
4230	vnode_t vp;
4231	int error;
4232
4233	proc_fdlock_spin(p);
4234	if ( (error = fp_lookup(p, fd, &fp, `1`)) ) {
4235	proc_fdunlock(p);
4236	return(error);
4237	}
4238	if (fp->f_type != DTYPE_VNODE) {
4239	fp_drop(p, fd, fp,`1`);
4240	proc_fdunlock(p);
4241	return(EINVAL);
4242	}
4243	vp = (struct vnode *)fp->f_data;
4244	if (vpp != NULL)
4245	*vpp = vp;
4246
4247	if ((vidp != NULL) && (vp != NULLVP))
4248	*vidp = (uint32_t)vp->v_id;
4249
4250	proc_fdunlock(p);
4251
4252	return(`0`);
4253	}
4254
4255
4256	/*
4257	* file_socket
4258	*
4259	* Description: Given an fd, look it up in the current process's per process
4260	* open file table, and return its internal socket pointer.
4261	*
4262	* Parameters: fd fd to obtain vnode from
4263	* sp pointer to socket return area
4264	*
4265	* Returns: 0 Success
4266	* ENOTSOCK Not a socket
4267	* fp_lookup:EBADF Bad file descriptor
4268	*
4269	* Implicit returns:
4270	* *sp (modified) Returned socket pointer
4271	*
4272	* Locks: This function internally takes and drops the proc_fdlock for
4273	* the current process
4274	*
4275	* Notes: If successful, this function increments the f_iocount on the
4276	* fd's corresponding fileproc.
4277	*
4278	* The fileproc referenced is not returned; because of this, care
4279	* must be taken to not drop the last reference (e.g. by closing
4280	* the file). This is inherently unsafe, since the reference may
4281	* not be recoverable from the socket, if there is a subsequent
4282	* close that destroys the associate fileproc. The caller should
4283	* therefore retain their own reference on the fileproc so that
4284	* the f_iocount can be dropped subsequently. Failure to do this
4285	* can result in the returned pointer immediately becoming invalid
4286	* following the call.
4287	*
4288	* Use of this function is discouraged.
4289	*/
4290	int
4291	file_socket(int fd, struct socket **sp)
4292	{
4293	proc_t p = current_proc();
4294	struct fileproc *fp;
4295	int error;
4296
4297	proc_fdlock_spin(p);
4298	if ( (error = fp_lookup(p, fd, &fp, `1`)) ) {
4299	proc_fdunlock(p);
4300	return(error);
4301	}
4302	if (fp->f_type != DTYPE_SOCKET) {
4303	fp_drop(p, fd, fp,`1`);
4304	proc_fdunlock(p);
4305	return(ENOTSOCK);
4306	}
4307	sp = (struct* socket *)fp->f_data;
4308	proc_fdunlock(p);
4309
4310	return(`0`);
4311	}
4312
4313
4314	/*
4315	* file_flags
4316	*
4317	* Description: Given an fd, look it up in the current process's per process
4318	* open file table, and return its fileproc's flags field.
4319	*
4320	* Parameters: fd fd whose flags are to be
4321	* retrieved
4322	* flags pointer to flags data area
4323	*
4324	* Returns: 0 Success
4325	* ENOTSOCK Not a socket
4326	* fp_lookup:EBADF Bad file descriptor
4327	*
4328	* Implicit returns:
4329	* *flags (modified) Returned flags field
4330	*
4331	* Locks: This function internally takes and drops the proc_fdlock for
4332	* the current process
4333	*
4334	* Notes: This function will internally increment and decrement the
4335	* f_iocount of the fileproc as part of its operation.
4336	*/
4337	int
4338	file_flags(int fd, int *flags)
4339	{
4340
4341	proc_t p = current_proc();
4342	struct fileproc *fp;
4343	int error;
4344
4345	proc_fdlock_spin(p);
4346	if ( (error = fp_lookup(p, fd, &fp, `1`)) ) {
4347	proc_fdunlock(p);
4348	return(error);
4349	}
4350	flags = (int*)fp->f_flag;
4351	fp_drop(p, fd, fp,`1`);
4352	proc_fdunlock(p);
4353
4354	return(`0`);
4355	}
4356
4357
4358	/*
4359	* file_drop
4360	*
4361	* Description: Drop an iocount reference on an fd, and wake up any waiters
4362	* for draining (i.e. blocked in fileproc_drain() called during
4363	* the last attempt to close a file).
4364	*
4365	* Parameters: fd fd on which an ioreference is
4366	* to be dropped
4367	*
4368	* Returns: 0 Success
4369	* EBADF Bad file descriptor
4370	*
4371	* Description: Given an fd, look it up in the current process's per process
4372	* open file table, and drop it's fileproc's f_iocount by one
4373	*
4374	* Notes: This is intended as a corresponding operation to the functions
4375	* file_vnode() and file_socket() operations.
4376	*
4377	* Technically, the close reference is supposed to be protected
4378	* by a fileproc_drain(), however, a drain will only block if
4379	* the fd refers to a character device, and that device has had
4380	* preparefileread() called on it. If it refers to something
4381	* other than a character device, then the drain will occur and
4382	* block each close attempt, rather than merely the last close.
4383	*
4384	* Since it's possible for an fd that refers to a character
4385	* device to have an intermediate close followed by an open to
4386	* cause a different file to correspond to that descriptor,
4387	* unless there was a cautionary reference taken on the fileproc,
4388	* this is an inherently unsafe function. This happens in the
4389	* case where multiple fd's in a process refer to the same
4390	* character device (e.g. stdin/out/err pointing to a tty, etc.).
4391	*
4392	* Use of this function is discouraged.
4393	*/
4394	int
4395	file_drop(int fd)
4396	{
4397	struct fileproc *fp;
4398	proc_t p = current_proc();
4399	int needwakeup = `0`;
4400
4401	proc_fdlock_spin(p);
4402	if (fd < `0` \|\| fd >= p->p_fd->fd_nfiles \|\|
4403	(fp = p->p_fd->fd_ofiles[fd]) == NULL \|\|
4404	((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) &&
4405	!(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) {
4406	proc_fdunlock(p);
4407	return (EBADF);
4408	}
4409	fp->f_iocount --;
4410
4411	if (fp->f_iocount == `0`) {
4412	if (fp->f_flags & FP_SELCONFLICT)
4413	fp->f_flags &= ~FP_SELCONFLICT;
4414
4415	if (p->p_fpdrainwait) {
4416	p->p_fpdrainwait = `0`;
4417	needwakeup = `1`;
4418	}
4419	}
4420	proc_fdunlock(p);
4421
4422	if (needwakeup)
4423	wakeup(&p->p_fpdrainwait);
4424	return(`0`);
4425	}
4426
4427
4428	static int falloc_withalloc_locked(proc_t, struct fileproc *, int* *,
4429	vfs_context_t, struct fileproc * ()(void* ), void* , int*);
4430
4431	/*
4432	* falloc
4433	*
4434	* Description: Allocate an entry in the per process open file table and
4435	* return the corresponding fileproc and fd.
4436	*
4437	* Parameters: p The process in whose open file
4438	* table the fd is to be allocated
4439	* resultfp Pointer to fileproc pointer
4440	* return area
4441	* resultfd Pointer to fd return area
4442	* ctx VFS context
4443	*
4444	* Returns: 0 Success
4445	* falloc:ENFILE Too many open files in system
4446	* falloc:EMFILE Too many open files in process
4447	* falloc:ENOMEM M_FILEPROC or M_FILEGLOB zone
4448	* exhausted
4449	*
4450	* Implicit returns:
4451	* *resultfd (modified) Returned fileproc pointer
4452	* *resultfd (modified) Returned fd
4453	*
4454	* Locks: This function takes and drops the proc_fdlock; if this lock
4455	* is already held, use falloc_locked() instead.
4456	*
4457	* Notes: This function takes separate process and context arguments
4458	* solely to support kern_exec.c; otherwise, it would take
4459	* neither, and expect falloc_locked() to use the
4460	* vfs_context_current() routine internally.
4461	*/
4462	int
4463	falloc(proc_t p, struct fileproc *resultfp, int* *resultfd, vfs_context_t ctx)
4464	{
4465	return (falloc_withalloc(p, resultfp, resultfd, ctx,
4466	fileproc_alloc_init, NULL));
4467	}
4468
4469	/*
4470	* Like falloc, but including the fileproc allocator and create-args
4471	*/
4472	int
4473	falloc_withalloc(proc_t p, struct fileproc *resultfp, int* *resultfd,
4474	vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *arg)
4475	{
4476	int error;
4477
4478	proc_fdlock(p);
4479	error = falloc_withalloc_locked(p,
4480	resultfp, resultfd, ctx, fp_zalloc, arg, `1`);
4481	proc_fdunlock(p);
4482
4483	return (error);
4484	}
4485
4486	/*
4487	* "uninitialized" ops -- ensure fg->fg_ops->fo_type always exists
4488	*/
4489	static const struct fileops uninitops;
4490
4491	/*
4492	* falloc_locked
4493	*
4494	* Create a new open file structure and allocate
4495	* a file descriptor for the process that refers to it.
4496	*
4497	* Returns: 0 Success
4498	*
4499	* Description: Allocate an entry in the per process open file table and
4500	* return the corresponding fileproc and fd.
4501	*
4502	* Parameters: p The process in whose open file
4503	* table the fd is to be allocated
4504	* resultfp Pointer to fileproc pointer
4505	* return area
4506	* resultfd Pointer to fd return area
4507	* ctx VFS context
4508	* locked Flag to indicate whether the
4509	* caller holds proc_fdlock
4510	*
4511	* Returns: 0 Success
4512	* ENFILE Too many open files in system
4513	* fdalloc:EMFILE Too many open files in process
4514	* ENOMEM M_FILEPROC or M_FILEGLOB zone
4515	* exhausted
4516	* fdalloc:ENOMEM
4517	*
4518	* Implicit returns:
4519	* *resultfd (modified) Returned fileproc pointer
4520	* *resultfd (modified) Returned fd
4521	*
4522	* Locks: If the parameter 'locked' is zero, this function takes and
4523	* drops the proc_fdlock; if non-zero, the caller must hold the
4524	* lock.
4525	*
4526	* Notes: If you intend to use a non-zero 'locked' parameter, use the
4527	* utility function falloc() instead.
4528	*
4529	* This function takes separate process and context arguments
4530	* solely to support kern_exec.c; otherwise, it would take
4531	* neither, and use the vfs_context_current() routine internally.
4532	*/
4533	int
4534	falloc_locked(proc_t p, struct fileproc *resultfp, int* *resultfd,
4535	vfs_context_t ctx, int locked)
4536	{
4537	return (falloc_withalloc_locked(p, resultfp, resultfd, ctx,
4538	fileproc_alloc_init, NULL, locked));
4539	}
4540
4541	static int
4542	falloc_withalloc_locked(proc_t p, struct fileproc *resultfp, int* *resultfd,
4543	vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg,
4544	int locked)
4545	{
4546	struct fileproc *fp;
4547	struct fileglob *fg;
4548	int error, nfd;
4549
4550	if (!locked)
4551	proc_fdlock(p);
4552	if ( (error = fdalloc(p, `0`, &nfd)) ) {
4553	if (!locked)
4554	proc_fdunlock(p);
4555	return (error);
4556	}
4557	if (nfiles >= maxfiles) {
4558	if (!locked)
4559	proc_fdunlock(p);
4560	tablefull("file");
4561	return (ENFILE);
4562	}
4563	#if CONFIG_MACF
4564	error = mac_file_check_create(proc_ucred(p));
4565	if (error) {
4566	if (!locked)
4567	proc_fdunlock(p);
4568	return (error);
4569	}
4570	#endif
4571
4572	/*
4573	* Allocate a new file descriptor.
4574	* If the process has file descriptor zero open, add to the list
4575	* of open files at that point, otherwise put it at the front of
4576	* the list of open files.
4577	*/
4578	proc_fdunlock(p);
4579
4580	fp = (*fp_zalloc)(crarg);
4581	if (fp == NULL) {
4582	if (locked)
4583	proc_fdlock(p);
4584	return (ENOMEM);
4585	}
4586	MALLOC_ZONE(fg, struct fileglob , sizeof(struct* fileglob), M_FILEGLOB, M_WAITOK);
4587	if (fg == NULL) {
4588	fileproc_free(fp);
4589	if (locked)
4590	proc_fdlock(p);
4591	return (ENOMEM);
4592	}
4593	bzero(fg, sizeof(struct fileglob));
4594	lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4595
4596	fp->f_iocount = `1`;
4597	fg->fg_count = `1`;
4598	fg->fg_ops = &uninitops;
4599	fp->f_fglob = fg;
4600	#if CONFIG_MACF
4601	mac_file_label_init(fg);
4602	#endif
4603
4604	kauth_cred_ref(ctx->vc_ucred);
4605
4606	proc_fdlock(p);
4607
4608	fp->f_cred = ctx->vc_ucred;
4609
4610	#if CONFIG_MACF
4611	mac_file_label_associate(fp->f_cred, fg);
4612	#endif
4613
4614	OSAddAtomic(`1`, &nfiles);
4615
4616	p->p_fd->fd_ofiles[nfd] = fp;
4617
4618	if (!locked)
4619	proc_fdunlock(p);
4620
4621	if (resultfp)
4622	*resultfp = fp;
4623	if (resultfd)
4624	*resultfd = nfd;
4625
4626	return (`0`);
4627	}
4628
4629
4630	/*
4631	* fg_free
4632	*
4633	* Description: Free a file structure; drop the global open file count, and
4634	* drop the credential reference, if the fileglob has one, and
4635	* destroy the instance mutex before freeing
4636	*
4637	* Parameters: fg Pointer to fileglob to be
4638	* freed
4639	*
4640	* Returns: void
4641	*/
4642	void
4643	fg_free(struct fileglob *fg)
4644	{
4645	OSAddAtomic(-`1`, &nfiles);
4646
4647	if (fg->fg_vn_data) {
4648	fg_vn_data_free(fg->fg_vn_data);
4649	fg->fg_vn_data = NULL;
4650	}
4651
4652	if (IS_VALID_CRED(fg->fg_cred)) {
4653	kauth_cred_unref(&fg->fg_cred);
4654	}
4655	lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
4656
4657	#if CONFIG_MACF
4658	mac_file_label_destroy(fg);
4659	#endif
4660	FREE_ZONE(fg, sizeof *fg, M_FILEGLOB);
4661	}
4662
4663
4664
4665	/*
4666	* fdexec
4667	*
4668	* Description: Perform close-on-exec processing for all files in a process
4669	* that are either marked as close-on-exec, or which were in the
4670	* process of being opened at the time of the execve
4671	*
4672	* Also handles the case (via posix_spawn()) where -all-
4673	* files except those marked with "inherit" as treated as
4674	* close-on-exec.
4675	*
4676	* Parameters: p Pointer to process calling
4677	* execve
4678	*
4679	* Returns: void
4680	*
4681	* Locks: This function internally takes and drops proc_fdlock()
4682	* But assumes tables don't grow/change while unlocked.
4683	*
4684	*/
4685	void
4686	fdexec(proc_t p, short flags, int self_exec)
4687	{
4688	struct filedesc *fdp = p->p_fd;
4689	int i;
4690	boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != `0`;
4691	thread_t self = current_thread();
4692	struct uthread *ut = get_bsdthread_info(self);
4693	struct kqueue *dealloc_kq = NULL;
4694
4695	/*
4696	* If the current thread is bound as a workq/workloop
4697	* servicing thread, we need to unbind it first.
4698	*/
4699	if (ut->uu_kqr_bound && self_exec) {
4700	kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
4701	}
4702
4703	proc_fdlock(p);
4704
4705	/*
4706	* Deallocate the knotes for this process
4707	* and mark the tables non-existent so
4708	* subsequent kqueue closes go faster.
4709	*/
4710	knotes_dealloc(p);
4711	assert(fdp->fd_knlistsize == -`1`);
4712	assert(fdp->fd_knhashmask == `0`);
4713
4714	for (i = fdp->fd_lastfile; i >= `0`; i--) {
4715
4716	struct fileproc *fp = fdp->fd_ofiles[i];
4717	char *flagp = &fdp->fd_ofileflags[i];
4718
4719	if (fp && cloexec_default) {
4720	/*
4721	* Reverse the usual semantics of file descriptor
4722	* inheritance - all of them should be closed
4723	* except files marked explicitly as "inherit" and
4724	* not marked close-on-exec.
4725	*/
4726	if ((*flagp & (UF_EXCLOSE\|UF_INHERIT)) != UF_INHERIT)
4727	*flagp \|= UF_EXCLOSE;
4728	*flagp &= ~UF_INHERIT;
4729	}
4730
4731	if (
4732	((*flagp & (UF_RESERVED\|UF_EXCLOSE)) == UF_EXCLOSE)
4733	#if CONFIG_MACF
4734	\|\| (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
4735	#endif
4736	) {
4737	procfdtbl_clearfd(p, i);
4738	if (i == fdp->fd_lastfile && i > `0`)
4739	fdp->fd_lastfile--;
4740	if (i < fdp->fd_freefile)
4741	fdp->fd_freefile = i;
4742
4743	/*
4744	* Wait for any third party viewers (e.g., lsof)
4745	* to release their references to this fileproc.
4746	*/
4747	while (fp->f_iocount > `0`) {
4748	p->p_fpdrainwait = `1`;
4749	msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO,
4750	"fpdrain", NULL);
4751	}
4752
4753	closef_locked(fp, fp->f_fglob, p);
4754
4755	fileproc_free(fp);
4756	}
4757	}
4758
4759	/ release the per-process workq kq /
4760	if (fdp->fd_wqkqueue) {
4761	dealloc_kq = fdp->fd_wqkqueue;
4762	fdp->fd_wqkqueue = NULL;
4763	}
4764
4765	proc_fdunlock(p);
4766
4767	/ Anything to free? /
4768	if (dealloc_kq)
4769	kqueue_dealloc(dealloc_kq);
4770	}
4771
4772
4773	/*
4774	* fdcopy
4775	*
4776	* Description: Copy a filedesc structure. This is normally used as part of
4777	* forkproc() when forking a new process, to copy the per process
4778	* open file table over to the new process.
4779	*
4780	* Parameters: p Process whose open file table
4781	* is to be copied (parent)
4782	* uth_cdir Per thread current working
4783	* cirectory, or NULL
4784	*
4785	* Returns: NULL Copy failed
4786	* !NULL Pointer to new struct filedesc
4787	*
4788	* Locks: This function internally takes and drops proc_fdlock()
4789	*
4790	* Notes: Files are copied directly, ignoring the new resource limits
4791	* for the process that's being copied into. Since the descriptor
4792	* references are just additional references, this does not count
4793	* against the number of open files on the system.
4794	*
4795	* The struct filedesc includes the current working directory,
4796	* and the current root directory, if the process is chroot'ed.
4797	*
4798	* If the exec was called by a thread using a per thread current
4799	* working directory, we inherit the working directory from the
4800	* thread making the call, rather than from the process.
4801	*
4802	* In the case of a failure to obtain a reference, for most cases,
4803	* the file entry will be silently dropped. There's an exception
4804	* for the case of a chroot dir, since a failure to to obtain a
4805	* reference there would constitute an "escape" from the chroot
4806	* environment, which must not be allowed. In that case, we will
4807	* deny the execve() operation, rather than allowing the escape.
4808	*/
4809	struct filedesc *
4810	fdcopy(proc_t p, vnode_t uth_cdir)
4811	{
4812	struct filedesc newfdp, fdp = p->p_fd;
4813	int i;
4814	struct fileproc ofp, fp;
4815	vnode_t v_dir;
4816
4817	MALLOC_ZONE(newfdp, struct filedesc *,
4818	sizeof(*newfdp), M_FILEDESC, M_WAITOK);
4819	if (newfdp == NULL)
4820	return(NULL);
4821
4822	proc_fdlock(p);
4823
4824	/*
4825	* the FD_CHROOT flag will be inherited via this copy
4826	*/
4827	(void) memcpy(newfdp, fdp, sizeof(*newfdp));
4828
4829	/*
4830	* If we are running with per-thread current working directories,
4831	* inherit the new current working directory from the current thread
4832	* instead, before we take our references.
4833	*/
4834	if (uth_cdir != NULLVP)
4835	newfdp->fd_cdir = uth_cdir;
4836
4837	/*
4838	* For both fd_cdir and fd_rdir make sure we get
4839	* a valid reference... if we can't, than set
4840	* set the pointer(s) to NULL in the child... this
4841	* will keep us from using a non-referenced vp
4842	* and allows us to do the vnode_rele only on
4843	* a properly referenced vp
4844	*/
4845	if ( (v_dir = newfdp->fd_cdir) ) {
4846	if (vnode_getwithref(v_dir) == `0`) {
4847	if ( (vnode_ref(v_dir)) )
4848	newfdp->fd_cdir = NULL;
4849	vnode_put(v_dir);
4850	} else
4851	newfdp->fd_cdir = NULL;
4852	}
4853	if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4854	/*
4855	* we couldn't get a new reference on
4856	* the current working directory being
4857	* inherited... we might as well drop
4858	* our reference from the parent also
4859	* since the vnode has gone DEAD making
4860	* it useless... by dropping it we'll
4861	* be that much closer to recycling it
4862	*/
4863	vnode_rele(fdp->fd_cdir);
4864	fdp->fd_cdir = NULL;
4865	}
4866
4867	if ( (v_dir = newfdp->fd_rdir) ) {
4868	if (vnode_getwithref(v_dir) == `0`) {
4869	if ( (vnode_ref(v_dir)) )
4870	newfdp->fd_rdir = NULL;
4871	vnode_put(v_dir);
4872	} else {
4873	newfdp->fd_rdir = NULL;
4874	}
4875	}
4876	/ Coming from a chroot environment and unable to get a reference... /
4877	if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4878	/*
4879	* We couldn't get a new reference on
4880	* the chroot directory being
4881	* inherited... this is fatal, since
4882	* otherwise it would constitute an
4883	* escape from a chroot environment by
4884	* the new process.
4885	*/
4886	if (newfdp->fd_cdir)
4887	vnode_rele(newfdp->fd_cdir);
4888	FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC);
4889	return(NULL);
4890	}
4891
4892	/*
4893	* If the number of open files fits in the internal arrays
4894	* of the open file structure, use them, otherwise allocate
4895	* additional memory for the number of descriptors currently
4896	* in use.
4897	*/
4898	if (newfdp->fd_lastfile < NDFILE)
4899	i = NDFILE;
4900	else {
4901	/*
4902	* Compute the smallest multiple of NDEXTENT needed
4903	* for the file descriptors currently in use,
4904	* allowing the table to shrink.
4905	*/
4906	i = newfdp->fd_nfiles;
4907	while (i > `1` + `2` * NDEXTENT && i > `1` + newfdp->fd_lastfile * `2`)
4908	i /= `2`;
4909	}
4910	proc_fdunlock(p);
4911
4912	MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **,
4913	i * OFILESIZE, M_OFILETABL, M_WAITOK);
4914	if (newfdp->fd_ofiles == NULL) {
4915	if (newfdp->fd_cdir)
4916	vnode_rele(newfdp->fd_cdir);
4917	if (newfdp->fd_rdir)
4918	vnode_rele(newfdp->fd_rdir);
4919
4920	FREE_ZONE(newfdp, sizeof(*newfdp), M_FILEDESC);
4921	return(NULL);
4922	}
4923	(void) memset(newfdp->fd_ofiles, `0`, i * OFILESIZE);
4924	proc_fdlock(p);
4925
4926	newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4927	newfdp->fd_nfiles = i;
4928
4929	if (fdp->fd_nfiles > `0`) {
4930	struct fileproc **fpp;
4931	char *flags;
4932
4933	(void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4934	(newfdp->fd_lastfile + `1`) * sizeof(*fdp->fd_ofiles));
4935	(void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4936	(newfdp->fd_lastfile + `1`) * sizeof(*fdp->fd_ofileflags));
4937
4938	/*
4939	* kq descriptors cannot be copied.
4940	*/
4941	if (newfdp->fd_knlistsize != -`1`) {
4942	fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4943	flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
4944	for (i = newfdp->fd_lastfile;
4945	i >= `0`; i--, fpp--, flags--) {
4946	if (*flags & UF_RESERVED)
4947	continue; / (removed below) /
4948	if (fpp != NULL && (fpp)->f_type == DTYPE_KQUEUE) {
4949	*fpp = NULL;
4950	*flags = `0`;
4951	if (i < newfdp->fd_freefile)
4952	newfdp->fd_freefile = i;
4953	}
4954	if (*fpp == NULL && i == newfdp->fd_lastfile && i > `0`)
4955	newfdp->fd_lastfile--;
4956	}
4957	}
4958	fpp = newfdp->fd_ofiles;
4959	flags = newfdp->fd_ofileflags;
4960
4961	for (i = newfdp->fd_lastfile + `1`; --i >= `0`; fpp++, flags++)
4962	if ((ofp = *fpp) != NULL &&
4963	`0` == (ofp->f_fglob->fg_lflags & FG_CONFINED) &&
4964	`0` == (*flags & (UF_FORKCLOSE\|UF_RESERVED))) {
4965	#if DEBUG
4966	if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE)
4967	panic("complex fileproc");
4968	#endif
4969	fp = fileproc_alloc_init(NULL);
4970	if (fp == NULL) {
4971	/*
4972	* XXX no room to copy, unable to
4973	* XXX safely unwind state at present
4974	*/
4975	*fpp = NULL;
4976	} else {
4977	fp->f_flags \|=
4978	(ofp->f_flags & ~FP_TYPEMASK);
4979	fp->f_fglob = ofp->f_fglob;
4980	(void)fg_ref(fp);
4981	*fpp = fp;
4982	}
4983	} else {
4984	if (i < newfdp->fd_freefile)
4985	newfdp->fd_freefile = i;
4986	*fpp = NULL;
4987	*flags = `0`;
4988	}
4989	}
4990
4991	proc_fdunlock(p);
4992
4993	/*
4994	* Initialize knote and kqueue tracking structs
4995	*/
4996	newfdp->fd_knlist = NULL;
4997	newfdp->fd_knlistsize = -`1`;
4998	newfdp->fd_knhash = NULL;
4999	newfdp->fd_knhashmask = `0`;
5000	newfdp->fd_kqhash = NULL;
5001	newfdp->fd_kqhashmask = `0`;
5002	newfdp->fd_wqkqueue = NULL;
5003	lck_mtx_init(&newfdp->fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
5004	lck_mtx_init(&newfdp->fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
5005
5006	return (newfdp);
5007	}
5008
5009
5010	/*
5011	* fdfree
5012	*
5013	* Description: Release a filedesc (per process open file table) structure;
5014	* this is done on process exit(), or from forkproc_free() if
5015	* the fork fails for some reason subsequent to a successful
5016	* call to fdcopy()
5017	*
5018	* Parameters: p Pointer to process going away
5019	*
5020	* Returns: void
5021	*
5022	* Locks: This function internally takes and drops proc_fdlock()
5023	*/
5024	void
5025	fdfree(proc_t p)
5026	{
5027	struct filedesc *fdp;
5028	struct fileproc *fp;
5029	struct kqueue *dealloc_kq = NULL;
5030	int i;
5031
5032	proc_fdlock(p);
5033
5034	if (p == kernproc \|\| NULL == (fdp = p->p_fd)) {
5035	proc_fdunlock(p);
5036	return;
5037	}
5038
5039	extern struct filedesc filedesc0;
5040
5041	if (&filedesc0 == fdp)
5042	panic("filedesc0");
5043
5044	/*
5045	* deallocate all the knotes up front and claim empty
5046	* tables to make any subsequent kqueue closes faster.
5047	*/
5048	knotes_dealloc(p);
5049	assert(fdp->fd_knlistsize == -`1`);
5050	assert(fdp->fd_knhashmask == `0`);
5051
5052	/*
5053	* dealloc all workloops that have outstanding retains
5054	* when created with scheduling parameters.
5055	*/
5056	kqworkloops_dealloc(p);
5057
5058	/ close file descriptors /
5059	if (fdp->fd_nfiles > `0` && fdp->fd_ofiles) {
5060	for (i = fdp->fd_lastfile; i >= `0`; i--) {
5061	if ((fp = fdp->fd_ofiles[i]) != NULL) {
5062
5063	if (fdp->fd_ofileflags[i] & UF_RESERVED)
5064	panic("fdfree: found fp with UF_RESERVED");
5065
5066	procfdtbl_reservefd(p, i);
5067
5068	if (fp->f_flags & FP_WAITEVENT)
5069	(void)waitevent_close(p, fp);
5070	(void) closef_locked(fp, fp->f_fglob, p);
5071	fileproc_free(fp);
5072	}
5073	}
5074	FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
5075	fdp->fd_ofiles = NULL;
5076	fdp->fd_nfiles = `0`;
5077	}
5078
5079	if (fdp->fd_wqkqueue) {
5080	dealloc_kq = fdp->fd_wqkqueue;
5081	fdp->fd_wqkqueue = NULL;
5082	}
5083
5084	proc_fdunlock(p);
5085
5086	if (dealloc_kq)
5087	kqueue_dealloc(dealloc_kq);
5088
5089	if (fdp->fd_cdir)
5090	vnode_rele(fdp->fd_cdir);
5091	if (fdp->fd_rdir)
5092	vnode_rele(fdp->fd_rdir);
5093
5094	proc_fdlock_spin(p);
5095	p->p_fd = NULL;
5096	proc_fdunlock(p);
5097
5098	if (fdp->fd_kqhash) {
5099	for (uint32_t j = `0`; j <= fdp->fd_kqhashmask; j++)
5100	assert(SLIST_EMPTY(&fdp->fd_kqhash[j]));
5101	FREE(fdp->fd_kqhash, M_KQUEUE);
5102	}
5103
5104	lck_mtx_destroy(&fdp->fd_kqhashlock, proc_kqhashlock_grp);
5105	lck_mtx_destroy(&fdp->fd_knhashlock, proc_knhashlock_grp);
5106
5107	FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC);
5108	}
5109
5110	/*
5111	* closef_locked
5112	*
5113	* Description: Internal form of closef; called with proc_fdlock held
5114	*
5115	* Parameters: fp Pointer to fileproc for fd
5116	* fg Pointer to fileglob for fd
5117	* p Pointer to proc structure
5118	*
5119	* Returns: 0 Success
5120	* closef_finish:??? Anything returnable by a per-fileops
5121	* close function
5122	*
5123	* Note: Decrements reference count on file structure; if this was the
5124	* last reference, then closef_finish() is called
5125	*
5126	* p and fp are allowed to be NULL when closing a file that was
5127	* being passed in a message (but only if we are called when this
5128	* is NOT the last reference).
5129	*/
5130	int
5131	closef_locked(struct fileproc fp, struct* fileglob *fg, proc_t p)
5132	{
5133	struct vnode *vp;
5134	struct flock lf;
5135	struct vfs_context context;
5136	int error;
5137
5138	if (fg == NULL) {
5139	return (`0`);
5140	}
5141
5142	/ Set up context with cred stashed in fg /
5143	if (p == current_proc())
5144	context.vc_thread = current_thread();
5145	else
5146	context.vc_thread = NULL;
5147	context.vc_ucred = fg->fg_cred;
5148
5149	/*
5150	* POSIX record locking dictates that any close releases ALL
5151	* locks owned by this process. This is handled by setting
5152	* a flag in the unlock to free ONLY locks obeying POSIX
5153	* semantics, and not to free BSD-style file locks.
5154	* If the descriptor was in a message, POSIX-style locks
5155	* aren't passed with the descriptor.
5156	*/
5157	if (p && (p->p_ladvflag & P_LADVLOCK) &&
5158	DTYPE_VNODE == FILEGLOB_DTYPE(fg)) {
5159	proc_fdunlock(p);
5160
5161	lf.l_whence = SEEK_SET;
5162	lf.l_start = `0`;
5163	lf.l_len = `0`;
5164	lf.l_type = F_UNLCK;
5165	vp = (struct vnode *)fg->fg_data;
5166
5167	if ( (error = vnode_getwithref(vp)) == `0` ) {
5168	(void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
5169	(void)vnode_put(vp);
5170	}
5171	proc_fdlock(p);
5172	}
5173	lck_mtx_lock_spin(&fg->fg_lock);
5174	fg->fg_count--;
5175
5176	if (fg->fg_count > `0`) {
5177	lck_mtx_unlock(&fg->fg_lock);
5178	return (`0`);
5179	}
5180	#if DIAGNOSTIC
5181	if (fg->fg_count != `0`)
5182	panic("fg %p: being freed with bad fg_count (%d)", fg, fg->fg_count);
5183	#endif
5184
5185	if (fp && (fp->f_flags & FP_WRITTEN))
5186	fg->fg_flag \|= FWASWRITTEN;
5187
5188	fg->fg_lflags \|= FG_TERM;
5189	lck_mtx_unlock(&fg->fg_lock);
5190
5191	if (p)
5192	proc_fdunlock(p);
5193
5194	/ Since we ensure that fg->fg_ops is always initialized,*
5195	* it is safe to invoke fo_close on the fg */
5196	error = fo_close(fg, &context);
5197
5198	fg_free(fg);
5199
5200	if (p)
5201	proc_fdlock(p);
5202
5203	return(error);
5204	}
5205
5206
5207	/*
5208	* fileproc_drain
5209	*
5210	* Description: Drain out pending I/O operations
5211	*
5212	* Parameters: p Process closing this file
5213	* fp fileproc struct for the open
5214	* instance on the file
5215	*
5216	* Returns: void
5217	*
5218	* Locks: Assumes the caller holds the proc_fdlock
5219	*
5220	* Notes: For character devices, this occurs on the last close of the
5221	* device; for all other file descriptors, this occurs on each
5222	* close to prevent fd's from being closed out from under
5223	* operations currently in progress and blocked
5224	*
5225	* See Also: file_vnode(), file_socket(), file_drop(), and the cautions
5226	* regarding their use and interaction with this function.
5227	*/
5228	void
5229	fileproc_drain(proc_t p, struct fileproc * fp)
5230	{
5231	struct vfs_context context;
5232
5233	context.vc_thread = proc_thread(p); / XXX /
5234	context.vc_ucred = fp->f_fglob->fg_cred;
5235
5236	fp->f_iocount-- ; / (the one the close holds) /
5237
5238	while (fp->f_iocount) {
5239
5240	lck_mtx_convert_spin(&p->p_fdmlock);
5241
5242	if (fp->f_fglob->fg_ops->fo_drain) {
5243	(*fp->f_fglob->fg_ops->fo_drain)(fp, &context);
5244	}
5245	if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
5246	if (waitq_wakeup64_all((struct waitq *)fp->f_wset, NO_EVENT64,
5247	THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT)
5248	panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->f_wset, fp);
5249	}
5250	if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5251	if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5252	THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT)
5253	panic("bad select_conflict_queue");
5254	}
5255	p->p_fpdrainwait = `1`;
5256
5257	msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5258
5259	}
5260	#if DIAGNOSTIC
5261	if ((fp->f_flags & FP_INSELECT) != `0`)
5262	panic("FP_INSELECT set on drained fp");
5263	#endif
5264	if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT)
5265	fp->f_flags &= ~FP_SELCONFLICT;
5266	}
5267
5268
5269	/*
5270	* fp_free
5271	*
5272	* Description: Release the fd and free the fileproc associated with the fd
5273	* in the per process open file table of the specified process;
5274	* these values must correspond.
5275	*
5276	* Parameters: p Process containing fd
5277	* fd fd to be released
5278	* fp fileproc to be freed
5279	*
5280	* Returns: 0 Success
5281	*
5282	* Notes: XXX function should be void - no one interprets the returns
5283	* XXX code
5284	*/
5285	int
5286	fp_free(proc_t p, int fd, struct fileproc * fp)
5287	{
5288	proc_fdlock_spin(p);
5289	fdrelse(p, fd);
5290	proc_fdunlock(p);
5291
5292	fg_free(fp->f_fglob);
5293	fileproc_free(fp);
5294	return(`0`);
5295	}
5296
5297
5298	/*
5299	* flock
5300	*
5301	* Description: Apply an advisory lock on a file descriptor.
5302	*
5303	* Parameters: p Process making request
5304	* uap->fd fd on which the lock is to be
5305	* attempted
5306	* uap->how (Un)Lock bits, including type
5307	* retval Pointer to the call return area
5308	*
5309	* Returns: 0 Success
5310	* fp_getfvp:EBADF Bad file descriptor
5311	* fp_getfvp:ENOTSUP fd does not refer to a vnode
5312	* vnode_getwithref:???
5313	* VNOP_ADVLOCK:???
5314	*
5315	* Implicit returns:
5316	* *retval (modified) Size of dtable
5317	*
5318	* Notes: Just attempt to get a record lock of the requested type on
5319	* the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5320	*/
5321	int
5322	flock(proc_t p, struct flock_args uap, __unused int32_t retval)
5323	{
5324	int fd = uap->fd;
5325	int how = uap->how;
5326	struct fileproc *fp;
5327	struct vnode *vp;
5328	struct flock lf;
5329	vfs_context_t ctx = vfs_context_current();
5330	int error=`0`;
5331
5332	AUDIT_ARG(fd, uap->fd);
5333	if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
5334	return(error);
5335	}
5336	if ( (error = vnode_getwithref(vp)) ) {
5337	goto out1;
5338	}
5339	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5340
5341	lf.l_whence = SEEK_SET;
5342	lf.l_start = `0`;
5343	lf.l_len = `0`;
5344	if (how & LOCK_UN) {
5345	lf.l_type = F_UNLCK;
5346	fp->f_flag &= ~FHASLOCK;
5347	error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5348	goto out;
5349	}
5350	if (how & LOCK_EX)
5351	lf.l_type = F_WRLCK;
5352	else if (how & LOCK_SH)
5353	lf.l_type = F_RDLCK;
5354	else {
5355	error = EBADF;
5356	goto out;
5357	}
5358	#if CONFIG_MACF
5359	error = mac_file_check_lock(proc_ucred(p), fp->f_fglob, F_SETLK, &lf);
5360	if (error)
5361	goto out;
5362	#endif
5363	error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf,
5364	(how & LOCK_NB ? F_FLOCK : F_FLOCK \| F_WAIT),
5365	ctx, NULL);
5366	if (!error)
5367	fp->f_flag \|= FHASLOCK;
5368	out:
5369	(void)vnode_put(vp);
5370	out1:
5371	fp_drop(p, fd, fp, `0`);
5372	return(error);
5373
5374	}
5375
5376	/*
5377	* fileport_makeport
5378	*
5379	* Description: Obtain a Mach send right for a given file descriptor.
5380	*
5381	* Parameters: p Process calling fileport
5382	* uap->fd The fd to reference
5383	* uap->portnamep User address at which to place port name.
5384	*
5385	* Returns: 0 Success.
5386	* EBADF Bad file descriptor.
5387	* EINVAL File descriptor had type that cannot be sent, misc. other errors.
5388	* EFAULT Address at which to store port name is not valid.
5389	* EAGAIN Resource shortage.
5390	*
5391	* Implicit returns:
5392	* On success, name of send right is stored at user-specified address.
5393	*/
5394	int
5395	fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5396	__unused int *retval)
5397	{
5398	int err;
5399	int fd = uap->fd;
5400	user_addr_t user_portaddr = uap->portnamep;
5401	struct fileproc *fp = FILEPROC_NULL;
5402	struct fileglob *fg = NULL;
5403	ipc_port_t fileport;
5404	mach_port_name_t name = MACH_PORT_NULL;
5405
5406	proc_fdlock(p);
5407	err = fp_lookup(p, fd, &fp, `1`);
5408	if (err != `0`) {
5409	goto out_unlock;
5410	}
5411
5412	if (!file_issendable(p, fp)) {
5413	err = EINVAL;
5414	goto out_unlock;
5415	}
5416
5417	if (FP_ISGUARDED(fp, GUARD_FILEPORT)) {
5418	err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5419	goto out_unlock;
5420	}
5421
5422	/ Dropped when port is deallocated /
5423	fg = fp->f_fglob;
5424	fg_ref(fp);
5425
5426	proc_fdunlock(p);
5427
5428	/ Allocate and initialize a port /
5429	fileport = fileport_alloc(fg);
5430	if (fileport == IPC_PORT_NULL) {
5431	err = EAGAIN;
5432	fg_drop(fp);
5433	goto out;
5434	}
5435
5436	/ Add an entry. Deallocates port on failure. /
5437	name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5438	if (!MACH_PORT_VALID(name)) {
5439	err = EINVAL;
5440	goto out;
5441	}
5442
5443	err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5444	if (err != `0`) {
5445	goto out;
5446	}
5447
5448	/ Tag the fileglob for debugging purposes /
5449	lck_mtx_lock_spin(&fg->fg_lock);
5450	fg->fg_lflags \|= FG_PORTMADE;
5451	lck_mtx_unlock(&fg->fg_lock);
5452
5453	fp_drop(p, fd, fp, `0`);
5454
5455	return `0`;
5456
5457	out_unlock:
5458	proc_fdunlock(p);
5459	out:
5460	if (MACH_PORT_VALID(name)) {
5461	/ Don't care if another thread races us to deallocate the entry /
5462	(void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5463	}
5464
5465	if (fp != FILEPROC_NULL) {
5466	fp_drop(p, fd, fp, `0`);
5467	}
5468
5469	return err;
5470	}
5471
5472	void
5473	fileport_releasefg(struct fileglob *fg)
5474	{
5475	(void)closef_locked(NULL, fg, PROC_NULL);
5476
5477	return;
5478	}
5479
5480
5481	/*
5482	* fileport_makefd
5483	*
5484	* Description: Obtain the file descriptor for a given Mach send right.
5485	*
5486	* Parameters: p Process calling fileport
5487	* uap->port Name of send right to file port.
5488	*
5489	* Returns: 0 Success
5490	* EINVAL Invalid Mach port name, or port is not for a file.
5491	* fdalloc:EMFILE
5492	* fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5493	*
5494	* Implicit returns:
5495	* *retval (modified) The new descriptor
5496	*/
5497	int
5498	fileport_makefd(proc_t p, struct fileport_makefd_args uap, int32_t retval)
5499	{
5500	struct fileglob *fg;
5501	struct fileproc *fp = FILEPROC_NULL;
5502	ipc_port_t port = IPC_PORT_NULL;
5503	mach_port_name_t send = uap->port;
5504	kern_return_t res;
5505	int fd;
5506	int err;
5507
5508	res = ipc_object_copyin(get_task_ipcspace(p->task),
5509	send, MACH_MSG_TYPE_COPY_SEND, &port);
5510
5511	if (res != KERN_SUCCESS) {
5512	err = EINVAL;
5513	goto out;
5514	}
5515
5516	fg = fileport_port_to_fileglob(port);
5517	if (fg == NULL) {
5518	err = EINVAL;
5519	goto out;
5520	}
5521
5522	fp = fileproc_alloc_init(NULL);
5523	if (fp == FILEPROC_NULL) {
5524	err = ENOMEM;
5525	goto out;
5526	}
5527
5528	fp->f_fglob = fg;
5529	fg_ref(fp);
5530
5531	proc_fdlock(p);
5532	err = fdalloc(p, `0`, &fd);
5533	if (err != `0`) {
5534	proc_fdunlock(p);
5535	fg_drop(fp);
5536	goto out;
5537	}
5538	*fdflags(p, fd) \|= UF_EXCLOSE;
5539
5540	procfdtbl_releasefd(p, fd, fp);
5541	proc_fdunlock(p);
5542
5543	*retval = fd;
5544	err = `0`;
5545	out:
5546	if ((fp != NULL) && (`0` != err)) {
5547	fileproc_free(fp);
5548	}
5549
5550	if (IPC_PORT_NULL != port) {
5551	ipc_port_release_send(port);
5552	}
5553
5554	return err;
5555	}
5556
5557
5558	/*
5559	* dupfdopen
5560	*
5561	* Description: Duplicate the specified descriptor to a free descriptor;
5562	* this is the second half of fdopen(), above.
5563	*
5564	* Parameters: fdp filedesc pointer to fill in
5565	* indx fd to dup to
5566	* dfd fd to dup from
5567	* mode mode to set on new fd
5568	* error command code
5569	*
5570	* Returns: 0 Success
5571	* EBADF Source fd is bad
5572	* EACCES Requested mode not allowed
5573	* !0 'error', if not ENODEV or
5574	* ENXIO
5575	*
5576	* Notes: XXX This is not thread safe; see fdopen() above
5577	*/
5578	int
5579	dupfdopen(struct filedesc fdp, int* indx, int dfd, int flags, int error)
5580	{
5581	struct fileproc *wfp;
5582	struct fileproc *fp;
5583	#if CONFIG_MACF
5584	int myerror;
5585	#endif
5586	proc_t p = current_proc();
5587
5588	/*
5589	* If the to-be-dup'd fd number is greater than the allowed number
5590	* of file descriptors, or the fd to be dup'd has already been
5591	* closed, reject. Note, check for new == old is necessary as
5592	* falloc could allocate an already closed to-be-dup'd descriptor
5593	* as the new descriptor.
5594	*/
5595	proc_fdlock(p);
5596
5597	fp = fdp->fd_ofiles[indx];
5598	if (dfd < `0` \|\| dfd >= fdp->fd_nfiles \|\|
5599	(wfp = fdp->fd_ofiles[dfd]) == NULL \|\| wfp == fp \|\|
5600	(fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5601
5602	proc_fdunlock(p);
5603	return (EBADF);
5604	}
5605	#if CONFIG_MACF
5606	myerror = mac_file_check_dup(proc_ucred(p), wfp->f_fglob, dfd);
5607	if (myerror) {
5608	proc_fdunlock(p);
5609	return (myerror);
5610	}
5611	#endif
5612	/*
5613	* There are two cases of interest here.
5614	*
5615	* For ENODEV simply dup (dfd) to file descriptor
5616	* (indx) and return.
5617	*
5618	* For ENXIO steal away the file structure from (dfd) and
5619	* store it in (indx). (dfd) is effectively closed by
5620	* this operation.
5621	*
5622	* Any other error code is just returned.
5623	*/
5624	switch (error) {
5625	case ENODEV:
5626	if (FP_ISGUARDED(wfp, GUARD_DUP)) {
5627	proc_fdunlock(p);
5628	return (EPERM);
5629	}
5630
5631	/*
5632	* Check that the mode the file is being opened for is a
5633	* subset of the mode of the existing descriptor.
5634	*/
5635	if (((flags & (FREAD\|FWRITE)) \| wfp->f_flag) != wfp->f_flag) {
5636	proc_fdunlock(p);
5637	return (EACCES);
5638	}
5639	if (indx > fdp->fd_lastfile)
5640	fdp->fd_lastfile = indx;
5641	(void)fg_ref(wfp);
5642
5643	if (fp->f_fglob)
5644	fg_free(fp->f_fglob);
5645	fp->f_fglob = wfp->f_fglob;
5646
5647	fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] \|
5648	(flags & O_CLOEXEC) ? UF_EXCLOSE : `0`;
5649
5650	proc_fdunlock(p);
5651	return (`0`);
5652
5653	default:
5654	proc_fdunlock(p);
5655	return (error);
5656	}
5657	/ NOTREACHED /
5658	}
5659
5660
5661	/*
5662	* fg_ref
5663	*
5664	* Description: Add a reference to a fileglob by fileproc
5665	*
5666	* Parameters: fp fileproc containing fileglob
5667	* pointer
5668	*
5669	* Returns: void
5670	*
5671	* Notes: XXX Should use OSAddAtomic?
5672	*/
5673	void
5674	fg_ref(struct fileproc * fp)
5675	{
5676	struct fileglob *fg;
5677
5678	fg = fp->f_fglob;
5679
5680	lck_mtx_lock_spin(&fg->fg_lock);
5681
5682	#if DIAGNOSTIC
5683	if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != `0`)
5684	panic("fg_ref: invalid bits on fp %p", fp);
5685
5686	if (fg->fg_count == `0`)
5687	panic("fg_ref: adding fgcount to zeroed fg: fp %p fg %p",
5688	fp, fg);
5689	#endif
5690	fg->fg_count++;
5691	lck_mtx_unlock(&fg->fg_lock);
5692	}
5693
5694
5695	/*
5696	* fg_drop
5697	*
5698	* Description: Remove a reference to a fileglob by fileproc
5699	*
5700	* Parameters: fp fileproc containing fileglob
5701	* pointer
5702	*
5703	* Returns: void
5704	*
5705	* Notes: XXX Should use OSAddAtomic?
5706	*/
5707	void
5708	fg_drop(struct fileproc * fp)
5709	{
5710	struct fileglob *fg;
5711
5712	fg = fp->f_fglob;
5713	lck_mtx_lock_spin(&fg->fg_lock);
5714	fg->fg_count--;
5715	lck_mtx_unlock(&fg->fg_lock);
5716	}
5717
5718	#if SOCKETS
5719	/*
5720	* fg_insertuipc_mark
5721	*
5722	* Description: Mark fileglob for insertion onto message queue if needed
5723	* Also takes fileglob reference
5724	*
5725	* Parameters: fg Fileglob pointer to insert
5726	*
5727	* Returns: true, if the fileglob needs to be inserted onto msg queue
5728	*
5729	* Locks: Takes and drops fg_lock, potentially many times
5730	*/
5731	boolean_t
5732	fg_insertuipc_mark(struct fileglob * fg)
5733	{
5734	boolean_t insert = FALSE;
5735
5736	lck_mtx_lock_spin(&fg->fg_lock);
5737	while (fg->fg_lflags & FG_RMMSGQ) {
5738	lck_mtx_convert_spin(&fg->fg_lock);
5739
5740	fg->fg_lflags \|= FG_WRMMSGQ;
5741	msleep(&fg->fg_lflags, &fg->fg_lock, `0`, "fg_insertuipc", NULL);
5742	}
5743
5744	fg->fg_count++;
5745	fg->fg_msgcount++;
5746	if (fg->fg_msgcount == `1`) {
5747	fg->fg_lflags \|= FG_INSMSGQ;
5748	insert = TRUE;
5749	}
5750	lck_mtx_unlock(&fg->fg_lock);
5751	return (insert);
5752	}
5753
5754	/*
5755	* fg_insertuipc
5756	*
5757	* Description: Insert marked fileglob onto message queue
5758	*
5759	* Parameters: fg Fileglob pointer to insert
5760	*
5761	* Returns: void
5762	*
5763	* Locks: Takes and drops fg_lock & uipc_lock
5764	* DO NOT call this function with proc_fdlock held as unp_gc()
5765	* can potentially try to acquire proc_fdlock, which can result
5766	* in a deadlock if this function is in unp_gc_wait().
5767	*/
5768	void
5769	fg_insertuipc(struct fileglob * fg)
5770	{
5771	if (fg->fg_lflags & FG_INSMSGQ) {
5772	lck_mtx_lock_spin(uipc_lock);
5773	unp_gc_wait();
5774	LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
5775	lck_mtx_unlock(uipc_lock);
5776	lck_mtx_lock(&fg->fg_lock);
5777	fg->fg_lflags &= ~FG_INSMSGQ;
5778	if (fg->fg_lflags & FG_WINSMSGQ) {
5779	fg->fg_lflags &= ~FG_WINSMSGQ;
5780	wakeup(&fg->fg_lflags);
5781	}
5782	lck_mtx_unlock(&fg->fg_lock);
5783	}
5784	}
5785
5786	/*
5787	* fg_removeuipc_mark
5788	*
5789	* Description: Mark the fileglob for removal from message queue if needed
5790	* Also releases fileglob message queue reference
5791	*
5792	* Parameters: fg Fileglob pointer to remove
5793	*
5794	* Returns: true, if the fileglob needs to be removed from msg queue
5795	*
5796	* Locks: Takes and drops fg_lock, potentially many times
5797	*/
5798	boolean_t
5799	fg_removeuipc_mark(struct fileglob * fg)
5800	{
5801	boolean_t remove = FALSE;
5802
5803	lck_mtx_lock_spin(&fg->fg_lock);
5804	while (fg->fg_lflags & FG_INSMSGQ) {
5805	lck_mtx_convert_spin(&fg->fg_lock);
5806
5807	fg->fg_lflags \|= FG_WINSMSGQ;
5808	msleep(&fg->fg_lflags, &fg->fg_lock, `0`, "fg_removeuipc", NULL);
5809	}
5810	fg->fg_msgcount--;
5811	if (fg->fg_msgcount == `0`) {
5812	fg->fg_lflags \|= FG_RMMSGQ;
5813	remove = TRUE;
5814	}
5815	lck_mtx_unlock(&fg->fg_lock);
5816	return (remove);
5817	}
5818
5819	/*
5820	* fg_removeuipc
5821	*
5822	* Description: Remove marked fileglob from message queue
5823	*
5824	* Parameters: fg Fileglob pointer to remove
5825	*
5826	* Returns: void
5827	*
5828	* Locks: Takes and drops fg_lock & uipc_lock
5829	* DO NOT call this function with proc_fdlock held as unp_gc()
5830	* can potentially try to acquire proc_fdlock, which can result
5831	* in a deadlock if this function is in unp_gc_wait().
5832	*/
5833	void
5834	fg_removeuipc(struct fileglob * fg)
5835	{
5836	if (fg->fg_lflags & FG_RMMSGQ) {
5837	lck_mtx_lock_spin(uipc_lock);
5838	unp_gc_wait();
5839	LIST_REMOVE(fg, f_msglist);
5840	lck_mtx_unlock(uipc_lock);
5841	lck_mtx_lock(&fg->fg_lock);
5842	fg->fg_lflags &= ~FG_RMMSGQ;
5843	if (fg->fg_lflags & FG_WRMMSGQ) {
5844	fg->fg_lflags &= ~FG_WRMMSGQ;
5845	wakeup(&fg->fg_lflags);
5846	}
5847	lck_mtx_unlock(&fg->fg_lock);
5848	}
5849	}
5850	#endif /* SOCKETS */
5851
5852	/*
5853	* fo_read
5854	*
5855	* Description: Generic fileops read indirected through the fileops pointer
5856	* in the fileproc structure
5857	*
5858	* Parameters: fp fileproc structure pointer
5859	* uio user I/O structure pointer
5860	* flags FOF_ flags
5861	* ctx VFS context for operation
5862	*
5863	* Returns: 0 Success
5864	* !0 Errno from read
5865	*/
5866	int
5867	fo_read(struct fileproc fp, struct* uio uio, int* flags, vfs_context_t ctx)
5868	{
5869	return ((*fp->f_ops->fo_read)(fp, uio, flags, ctx));
5870	}
5871
5872
5873	/*
5874	* fo_write
5875	*
5876	* Description: Generic fileops write indirected through the fileops pointer
5877	* in the fileproc structure
5878	*
5879	* Parameters: fp fileproc structure pointer
5880	* uio user I/O structure pointer
5881	* flags FOF_ flags
5882	* ctx VFS context for operation
5883	*
5884	* Returns: 0 Success
5885	* !0 Errno from write
5886	*/
5887	int
5888	fo_write(struct fileproc fp, struct* uio uio, int* flags, vfs_context_t ctx)
5889	{
5890	return((*fp->f_ops->fo_write)(fp, uio, flags, ctx));
5891	}
5892
5893
5894	/*
5895	* fo_ioctl
5896	*
5897	* Description: Generic fileops ioctl indirected through the fileops pointer
5898	* in the fileproc structure
5899	*
5900	* Parameters: fp fileproc structure pointer
5901	* com ioctl command
5902	* data pointer to internalized copy
5903	* of user space ioctl command
5904	* parameter data in kernel space
5905	* ctx VFS context for operation
5906	*
5907	* Returns: 0 Success
5908	* !0 Errno from ioctl
5909	*
5910	* Locks: The caller is assumed to have held the proc_fdlock; this
5911	* function releases and reacquires this lock. If the caller
5912	* accesses data protected by this lock prior to calling this
5913	* function, it will need to revalidate/reacquire any cached
5914	* protected data obtained prior to the call.
5915	*/
5916	int
5917	fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5918	{
5919	int error;
5920
5921	proc_fdunlock(vfs_context_proc(ctx));
5922	error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5923	proc_fdlock(vfs_context_proc(ctx));
5924	return(error);
5925	}
5926
5927
5928	/*
5929	* fo_select
5930	*
5931	* Description: Generic fileops select indirected through the fileops pointer
5932	* in the fileproc structure
5933	*
5934	* Parameters: fp fileproc structure pointer
5935	* which select which
5936	* wql pointer to wait queue list
5937	* ctx VFS context for operation
5938	*
5939	* Returns: 0 Success
5940	* !0 Errno from select
5941	*/
5942	int
5943	fo_select(struct fileproc fp, int* which, void *wql, vfs_context_t ctx)
5944	{
5945	return((*fp->f_ops->fo_select)(fp, which, wql, ctx));
5946	}
5947
5948
5949	/*
5950	* fo_close
5951	*
5952	* Description: Generic fileops close indirected through the fileops pointer
5953	* in the fileproc structure
5954	*
5955	* Parameters: fp fileproc structure pointer for
5956	* file to close
5957	* ctx VFS context for operation
5958	*
5959	* Returns: 0 Success
5960	* !0 Errno from close
5961	*/
5962	int
5963	fo_close(struct fileglob *fg, vfs_context_t ctx)
5964	{
5965	return((*fg->fg_ops->fo_close)(fg, ctx));
5966	}
5967
5968
5969	/*
5970	* fo_kqfilter
5971	*
5972	* Description: Generic fileops kqueue filter indirected through the fileops
5973	* pointer in the fileproc structure
5974	*
5975	* Parameters: fp fileproc structure pointer
5976	* kn pointer to knote to filter on
5977	* ctx VFS context for operation
5978	*
5979	* Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
5980	* 0 Filter is not active
5981	* !0 Filter is active
5982	*/
5983	int
5984	fo_kqfilter(struct fileproc fp, struct* knote *kn,
5985	struct kevent_internal_s *kev, vfs_context_t ctx)
5986	{
5987	return ((*fp->f_ops->fo_kqfilter)(fp, kn, kev, ctx));
5988	}
5989
5990	/*
5991	* The ability to send a file descriptor to another
5992	* process is opt-in by file type.
5993	*/
5994	boolean_t
5995	file_issendable(proc_t p, struct fileproc *fp)
5996	{
5997	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
5998
5999	switch (fp->f_type) {
6000	case DTYPE_VNODE:
6001	case DTYPE_SOCKET:
6002	case DTYPE_PIPE:
6003	case DTYPE_PSXSHM:
6004	case DTYPE_NETPOLICY:
6005	return (`0` == (fp->f_fglob->fg_lflags & FG_CONFINED));
6006	default:
6007	/ DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM /
6008	return FALSE;
6009	}
6010	}
6011
6012
6013	struct fileproc *
6014	fileproc_alloc_init(__unused void *arg)
6015	{
6016	struct fileproc *fp;
6017
6018	MALLOC_ZONE(fp, struct fileproc , sizeof* (*fp), M_FILEPROC, M_WAITOK);
6019	if (fp)
6020	bzero(fp, sizeof (*fp));
6021
6022	return (fp);
6023	}
6024
6025	void
6026	fileproc_free(struct fileproc *fp)
6027	{
6028	switch (FILEPROC_TYPE(fp)) {
6029	case FTYPE_SIMPLE:
6030	FREE_ZONE(fp, sizeof (*fp), M_FILEPROC);
6031	break;
6032	case FTYPE_GUARDED:
6033	guarded_fileproc_free(fp);
6034	break;
6035	default:
6036	panic("%s: corrupt fp %p flags %x", __func__, fp, fp->f_flags);
6037	}
6038	}
6039

Browse the source code of xnu/bsd/kern/kern_descrip.c