1/*
2 * Copyright (c) 2000-2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)filedesc.h 8.1 (Berkeley) 6/2/93
62 */
63
64#ifndef _SYS_FILEDESC_H_
65#define _SYS_FILEDESC_H_
66
67#include <sys/appleapiopts.h>
68
69/*
70 * This structure is used for the management of descriptors. It may be
71 * shared by multiple processes.
72 *
73 * A process is initially started out with NDFILE descriptors [XXXstored within
74 * this structureXXX], selected to be enough for typical applications based on
75 * the historical limit of 20 open files (and the usage of descriptors by
76 * shells). If these descriptors are exhausted, a larger descriptor table
77 * may be allocated, up to a process' resource limit; [XXXthe internal arrays
78 * are then unusedXXX]. The initial expansion is set to NDEXTENT; each time
79 * it runs out, it is doubled until the resource limit is reached. NDEXTENT
80 * should be selected to be the biggest multiple of OFILESIZE (see below)
81 * that will fit in a power-of-two sized piece of memory.
82 */
83#define NDFILE 25 /* 125 bytes */
84#define NDEXTENT 50 /* 250 bytes in 256-byte alloc. */
85
86#ifdef XNU_KERNEL_PRIVATE
87
88#include <sys/kernel_types.h>
89#include <kern/locks.h>
90
91struct klist;
92struct kqwllist;
93struct ucred;
94
95__options_decl(filedesc_flags_t, uint8_t, {
96 /*
97 * process was chrooted... keep track even
98 * if we're force unmounted and unable to
99 * take a vnode_ref on fd_rdir during a fork
100 */
101 FD_CHROOT = 0x01,
102
103 /*
104 * process has created a kqworkloop that
105 * requires manual cleanup on exit
106 */
107 FD_WORKLOOP = 0x02,
108
109#if CONFIG_PROC_RESOURCE_LIMITS
110 /* process has exceeded fd_nfiles soft limit */
111 FD_ABOVE_SOFT_LIMIT = 0x04,
112 /* process has exceeded fd_nfiles hard limit */
113 FD_ABOVE_HARD_LIMIT = 0x08,
114 KQWL_ABOVE_SOFT_LIMIT = 0x10,
115 KQWL_ABOVE_HARD_LIMIT = 0x20,
116#endif /* CONFIG_PROC_RESOURCE_LIMITS */
117});
118
119#define FILEDESC_FORK_INHERITED_MASK (FD_CHROOT)
120
121struct filedesc {
122 lck_mtx_t fd_lock; /* (L) lock to protect fdesc */
123 uint8_t fd_fpdrainwait; /* (L) has drain waiters */
124 filedesc_flags_t fd_flags; /* (L) filedesc flags */
125 u_short fd_cmask; /* (L) mask for file creation */
126 int fd_nfiles; /* (L) number of open fdesc slots allocated */
127 int fd_afterlast; /* (L) high-water mark of fd_ofiles */
128 int fd_freefile; /* (L) approx. next free file */
129#if CONFIG_PROC_RESOURCE_LIMITS
130#define FD_LIMIT_SENTINEL ((int) (-1))
131 int fd_nfiles_open;
132 int fd_nfiles_soft_limit; /* (L) fd_nfiles soft limit to trigger guard. */
133 int fd_nfiles_hard_limit; /* (L) fd_nfiles hard limit to terminate. */
134
135#define KQWL_LIMIT_SENTINEL ((int) (-1))
136 int num_kqwls; /* Number of kqwls in the fd_kqhash */
137 int kqwl_dyn_soft_limit; /* (L) soft limit for dynamic kqueue */
138 int kqwl_dyn_hard_limit; /* (L) hard limit for dynamic kqueue */
139#endif /* CONFIG_PROC_RESOURCE_LIMITS */
140
141 int fd_knlistsize; /* (L) size of knlist */
142 int unused_padding;/* Due to alignment */
143 struct fileproc **XNU_PTRAUTH_SIGNED_PTR("filedesc.fd_ofiles") fd_ofiles; /* (L) file structures for open files */
144 char *fd_ofileflags; /* (L) per-process open file flags */
145
146 struct klist *fd_knlist; /* (L) list of attached knotes */
147
148 struct kqworkq *fd_wqkqueue; /* (L) the workq kqueue */
149 struct vnode *fd_cdir; /* (L) current directory */
150 struct vnode *fd_rdir; /* (L) root directory */
151 lck_rw_t fd_dirs_lock; /* keeps fd_cdir and fd_rdir stable across a lookup */
152
153 lck_mtx_t fd_kqhashlock; /* (Q) lock for dynamic kqueue hash */
154 u_long fd_kqhashmask; /* (Q) size of dynamic kqueue hash */
155 struct kqwllist *fd_kqhash; /* (Q) hash table for dynamic kqueues */
156
157 lck_mtx_t fd_knhashlock; /* (N) lock for hash table for attached knotes */
158 u_long fd_knhashmask; /* (N) size of knhash */
159 struct klist *fd_knhash; /* (N) hash table for attached knotes */
160};
161
162#define fdt_flag_test(fdt, flag) (((fdt)->fd_flags & (flag)) != 0)
163#define fdt_flag_set(fdt, flag) ((void)((fdt)->fd_flags |= (flag)))
164#define fdt_flag_clear(fdt, flag) ((void)((fdt)->fd_flags &= ~(flag)))
165
166#if CONFIG_PROC_RESOURCE_LIMITS
167#define fd_above_soft_limit_notified(fdp) fdt_flag_test(fdp, FD_ABOVE_SOFT_LIMIT)
168#define fd_above_hard_limit_notified(fdp) fdt_flag_test(fdp, FD_ABOVE_HARD_LIMIT)
169#define fd_above_soft_limit_send_notification(fdp) fdt_flag_set(fdp, FD_ABOVE_SOFT_LIMIT)
170#define fd_above_hard_limit_send_notification(fdp) fdt_flag_set(fdp, FD_ABOVE_HARD_LIMIT)
171
172#define kqwl_above_soft_limit_notified(fdp) fdt_flag_test(fdp, KQWL_ABOVE_SOFT_LIMIT)
173#define kqwl_above_hard_limit_notified(fdp) fdt_flag_test(fdp, KQWL_ABOVE_HARD_LIMIT)
174#define kqwl_above_soft_limit_send_notification(fdp) fdt_flag_set(fdp, KQWL_ABOVE_SOFT_LIMIT)
175#define kqwl_above_hard_limit_send_notification(fdp) fdt_flag_set(fdp, KQWL_ABOVE_HARD_LIMIT)
176#endif /* CONFIG_PROC_RESOURCE_LIMITS */
177
178/*
179 * Per-process open flags.
180 */
181#define UF_RESERVED 0x04 /* open pending / in progress */
182#define UF_CLOSING 0x08 /* close in progress */
183#define UF_RESVWAIT 0x10 /* close in progress */
184#define UF_INHERIT 0x20 /* "inherit-on-exec" */
185
186/*
187 * Storage required per open file descriptor.
188 */
189#define OFILESIZE (sizeof(struct file *) + sizeof(char))
190
191/*!
192 * @function fdt_available
193 *
194 * @brief
195 * Returns whether the file descritor table can accomodate
196 * for @c n new entries.
197 *
198 * @discussion
199 * The answer is only valid so long as the @c proc_fdlock() is held by the
200 * caller.
201 */
202extern bool
203fdt_available_locked(proc_t p, int n);
204
205/*!
206 * @struct fdt_iterator
207 *
208 * @brief
209 * Type used to iterate a file descriptor table.
210 */
211struct fdt_iterator {
212 int fdti_fd;
213 struct fileproc *fdti_fp;
214};
215
216/*!
217 * @function fdt_next
218 *
219 * @brief
220 * Seek the iterator forward.
221 *
222 * @discussion
223 * The @c proc_fdlock() should be held by the caller.
224 *
225 * @param p
226 * The process for which the file descriptor table is being iterated.
227 *
228 * @param fd
229 * The current file file descriptor to scan from (exclusive).
230 *
231 * @param only_settled
232 * When true, only fileprocs with @c UF_RESERVED set are returned.
233 * If false, fileprocs that are in flux (@c UF_RESERVED is set) are returned.
234 *
235 * @returns
236 * The next iterator position.
237 * If @c fdti_fp is NULL, the iteration is done.
238 */
239extern struct fdt_iterator
240fdt_next(proc_t p, int fd, bool only_settled);
241
242/*!
243 * @function fdt_next
244 *
245 * @brief
246 * Seek the iterator backwards.
247 *
248 * @discussion
249 * The @c proc_fdlock() should be held by the caller.
250 *
251 * @param p
252 * The process for which the file descriptor table is being iterated.
253 *
254 * @param fd
255 * The current file file descriptor to scan from (exclusive).
256 *
257 * @param only_settled
258 * When true, only fileprocs with @c UF_RESERVED set are returned.
259 * If false, fileprocs that are in flux (@c UF_RESERVED is set) are returned.
260 *
261 * @returns
262 * The next iterator position.
263 * If @c fdti_fp is NULL, the iteration is done.
264 */
265extern struct fdt_iterator
266fdt_prev(proc_t p, int fd, bool only_settled);
267
268/*!
269 * @def fdt_foreach
270 *
271 * @brief
272 * Convenience macro around @c fdt_next() to enumerates fileprocs in a process
273 * file descriptor table.
274 *
275 * @discussion
276 * The @c proc_fdlock() should be held by the caller.
277 *
278 * @param fp
279 * The iteration variable.
280 *
281 * @param p
282 * The process for which the file descriptor table is being iterated.
283 */
284#define fdt_foreach(fp, p) \
285 for (struct fdt_iterator __fdt_it = fdt_next(p, -1, true); \
286 ((fp) = __fdt_it.fdti_fp); \
287 __fdt_it = fdt_next(p, __fdt_it.fdti_fd, true))
288
289/*!
290 * @def fdt_foreach_fd
291 *
292 * @brief
293 * When in an @c fdt_foreach() loop, return the current file descriptor
294 * being inspected.
295 */
296#define fdt_foreach_fd() __fdt_it.fdti_fd
297
298/*!
299 * @function fdt_init
300 *
301 * @brief
302 * Initializers a proc file descriptor table.
303 *
304 * @warning
305 * The proc that is passed is supposed to have been zeroed out,
306 * as this function is used to setup @c kernelproc's file descriptor table
307 * and some fields are already initialized when fdt_init() is called.
308 */
309extern void
310fdt_init(proc_t p);
311
312/*!
313 * @function fdt_destroy
314 *
315 * @brief
316 * Destroys locks from the file descriptor table.
317 *
318 * @description
319 * This function destroys the file descriptor table locks.
320 *
321 * This cannot be done while the process this table belongs
322 * to can be looked up.
323 */
324extern void
325fdt_destroy(proc_t p);
326
327/*!
328 * @function fdt_fork
329 *
330 * @brief
331 * Clones a file descriptor table for the @c fork() system call.
332 *
333 * @discussion
334 * This function internally takes and drops @c proc_fdlock().
335 *
336 * Files are copied directly, ignoring the new resource limits for the process
337 * that's being copied into. Since the descriptor references are just
338 * additional references, this does not count against the number of open files
339 * on the system.
340 *
341 * The struct filedesc includes the current working directory, and the current
342 * root directory, if the process is chroot'ed.
343 *
344 * If the exec was called by a thread using a per thread current working
345 * directory, we inherit the working directory from the thread making the call,
346 * rather than from the process.
347 *
348 * In the case of a failure to obtain a reference, for most cases, the file
349 * entry will be silently dropped. There's an exception for the case of
350 * a chroot dir, since a failure to to obtain a reference there would constitute
351 * an "escape" from the chroot environment, which must not be allowed.
352 *
353 * @param child_fdt
354 * The child process file descriptor table.
355 *
356 * @param parent_p
357 * The parent process to clone the file descriptor table from.
358 *
359 * @param uth_cdir
360 * The vnode for the current thread's current working directory if it is
361 * different from the parent process one.
362 *
363 * @param in_exec
364 * The duplication of fdt is happening for exec
365 *
366 * @returns
367 * 0 Success
368 * EPERM Unable to acquire a reference to the current chroot directory
369 * ENOMEM Not enough memory to perform the clone operation
370 */
371extern int
372fdt_fork(struct filedesc *child_fdt, proc_t parent_p, struct vnode *uth_cdir, bool in_exec);
373
374/*!
375 * @function fdt_exec
376 *
377 * @brief
378 * Perform close-on-exec processing for all files in a process
379 * that are either marked as close-on-exec.
380 *
381 * @description
382 * Also handles the case (via posix_spawn()) where -all- files except those
383 * marked with "inherit" as treated as close-on-exec.
384 *
385 * This function internally takes and drops proc_fdlock()
386 * But assumes tables don't grow/change while unlocked.
387 *
388 * @param p
389 * The process whose file descriptor table is being filrered.
390 *
391 * @param posix_spawn_flags
392 * A set of @c POSIX_SPAWN_* flags.
393 *
394 * @param thread
395 * new thread
396 *
397 * @param in_exec
398 * If the process is in exec
399 */
400extern void
401fdt_exec(proc_t p, struct ucred *p_cred, short posix_spawn_flags, thread_t thread, bool in_exec);
402
403/*!
404 * @function fdt_invalidate
405 *
406 * @brief
407 * Invalidates a proc file descriptor table.
408 *
409 * @discussion
410 * Closes all open files in the file descriptor table,
411 * empties hash tables, etc...
412 *
413 * However, the fileproc arrays stay allocated to still allow external lookups.
414 * These get cleaned up by @c fdt_destroy().
415 *
416 * This function internally takes and drops proc_fdlock().
417 */
418extern void
419fdt_invalidate(proc_t p);
420
421/*
422 * Kernel global variables and routines.
423 */
424extern int dupfdopen(proc_t p, int indx, int dfd, int mode, int error);
425extern int fdalloc(proc_t p, int want, int *result);
426extern void fdrelse(struct proc * p, int fd);
427#define fdfile(p, fd) \
428 (&(p)->p_fd.fd_ofiles[(fd)])
429#define fdflags(p, fd) \
430 (&(p)->p_fd.fd_ofileflags[(fd)])
431
432typedef void (*fp_initfn_t)(struct fileproc *, void *ctx);
433extern int falloc_withinit(
434 proc_t p,
435 struct ucred *p_cred,
436 struct vfs_context *ctx,
437 struct fileproc **resultfp,
438 int *resultfd,
439 fp_initfn_t fp_init,
440 void *initarg);
441
442#define falloc(p, rfp, rfd) ({ \
443 struct proc *__p = (p); \
444 falloc_withinit(__p, current_cached_proc_cred(__p), \
445 vfs_context_current(), rfp, rfd, NULL, NULL); \
446})
447
448#define falloc_exec(p, ctx, rfp, rfd) ({ \
449 struct vfs_context *__c = (ctx); \
450 falloc_withinit(p, vfs_context_ucred(__c), __c, rfp, rfd, NULL, NULL); \
451})
452
453#if CONFIG_PROC_RESOURCE_LIMITS
454/* The proc_fdlock has to be held by caller for duration of the call */
455void fd_check_limit_exceeded(struct filedesc *fdp);
456
457/* The kqhash_lock has to be held by caller for duration of the call */
458void kqworkloop_check_limit_exceeded(struct filedesc *fdp);
459#endif /* CONFIG_PROC_RESOURCE_LIMITS */
460
461#endif /* XNU_KERNEL_PRIVATE */
462
463#endif /* !_SYS_FILEDESC_H_ */
464