1 | /* |
2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ |
29 | /* |
30 | * Copyright (c) 1982, 1986, 1989, 1993 |
31 | * The Regents of the University of California. All rights reserved. |
32 | * (c) UNIX System Laboratories, Inc. |
33 | * All or some portions of this file are derived from material licensed |
34 | * to the University of California by American Telephone and Telegraph |
35 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
36 | * the permission of UNIX System Laboratories, Inc. |
37 | * |
38 | * Redistribution and use in source and binary forms, with or without |
39 | * modification, are permitted provided that the following conditions |
40 | * are met: |
41 | * 1. Redistributions of source code must retain the above copyright |
42 | * notice, this list of conditions and the following disclaimer. |
43 | * 2. Redistributions in binary form must reproduce the above copyright |
44 | * notice, this list of conditions and the following disclaimer in the |
45 | * documentation and/or other materials provided with the distribution. |
46 | * 3. All advertising materials mentioning features or use of this software |
47 | * must display the following acknowledgement: |
48 | * This product includes software developed by the University of |
49 | * California, Berkeley and its contributors. |
50 | * 4. Neither the name of the University nor the names of its contributors |
51 | * may be used to endorse or promote products derived from this software |
52 | * without specific prior written permission. |
53 | * |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
64 | * SUCH DAMAGE. |
65 | * |
66 | * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 |
67 | * |
68 | */ |
69 | /* |
70 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce |
71 | * support for mandatory and extensible security protections. This notice |
72 | * is included in support of clause 2.2 (b) of the Apple Public License, |
73 | * Version 2.0. |
74 | */ |
75 | |
76 | #include <sys/param.h> |
77 | #include <sys/types.h> |
78 | #include <sys/systm.h> |
79 | #include <sys/kernel.h> |
80 | #include <sys/file_internal.h> |
81 | #include <sys/stat.h> |
82 | #include <sys/proc_internal.h> |
83 | #include <sys/kauth.h> |
84 | #include <sys/mount_internal.h> |
85 | #include <sys/namei.h> |
86 | #include <sys/vnode_internal.h> |
87 | #include <sys/ioctl.h> |
88 | #include <sys/fsctl.h> |
89 | #include <sys/tty.h> |
90 | #include <sys/ubc.h> |
91 | #include <sys/conf.h> |
92 | #include <sys/disk.h> |
93 | #include <sys/fsevents.h> |
94 | #include <sys/kdebug.h> |
95 | #include <sys/xattr.h> |
96 | #include <sys/ubc_internal.h> |
97 | #include <sys/uio_internal.h> |
98 | #include <sys/resourcevar.h> |
99 | #include <sys/signalvar.h> |
100 | |
101 | #include <vm/vm_kern.h> |
102 | #include <vm/vm_map.h> |
103 | |
104 | #include <miscfs/specfs/specdev.h> |
105 | #include <miscfs/fifofs/fifo.h> |
106 | |
107 | #if CONFIG_MACF |
108 | #include <security/mac_framework.h> |
109 | #endif |
110 | |
111 | #include <IOKit/IOBSD.h> |
112 | #include <libkern/section_keywords.h> |
113 | |
114 | static int vn_closefile(struct fileglob *fp, vfs_context_t ctx); |
115 | static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, |
116 | vfs_context_t ctx); |
117 | static int vn_read(struct fileproc *fp, struct uio *uio, int flags, |
118 | vfs_context_t ctx); |
119 | static int vn_write(struct fileproc *fp, struct uio *uio, int flags, |
120 | vfs_context_t ctx); |
121 | static int vn_select( struct fileproc *fp, int which, void * wql, |
122 | vfs_context_t ctx); |
123 | static int vn_kqfilter(struct fileproc *fp, struct knote *kn, |
124 | struct kevent_qos_s *kev); |
125 | static void filt_vndetach(struct knote *kn); |
126 | static int filt_vnode(struct knote *kn, long hint); |
127 | static int filt_vnode_common(struct knote *kn, struct kevent_qos_s *kev, |
128 | vnode_t vp, long hint); |
129 | static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx); |
130 | |
131 | const struct fileops vnops = { |
132 | .fo_type = DTYPE_VNODE, |
133 | .fo_read = vn_read, |
134 | .fo_write = vn_write, |
135 | .fo_ioctl = vn_ioctl, |
136 | .fo_select = vn_select, |
137 | .fo_close = vn_closefile, |
138 | .fo_drain = fo_no_drain, |
139 | .fo_kqfilter = vn_kqfilter, |
140 | }; |
141 | |
142 | static int filt_vntouch(struct knote *kn, struct kevent_qos_s *kev); |
143 | static int filt_vnprocess(struct knote *kn, struct kevent_qos_s*kev); |
144 | |
145 | SECURITY_READ_ONLY_EARLY(struct filterops) vnode_filtops = { |
146 | .f_isfd = 1, |
147 | .f_attach = NULL, |
148 | .f_detach = filt_vndetach, |
149 | .f_event = filt_vnode, |
150 | .f_touch = filt_vntouch, |
151 | .f_process = filt_vnprocess, |
152 | }; |
153 | |
154 | /* |
155 | * Common code for vnode open operations. |
156 | * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine. |
157 | * |
158 | * XXX the profusion of interfaces here is probably a bad thing. |
159 | */ |
160 | int |
161 | vn_open(struct nameidata *ndp, int fmode, int cmode) |
162 | { |
163 | return vn_open_modflags(ndp, fmode: &fmode, cmode); |
164 | } |
165 | |
166 | int |
167 | vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode) |
168 | { |
169 | int error; |
170 | struct vnode_attr *vap; |
171 | |
172 | vap = kalloc_type(struct vnode_attr, Z_WAITOK); |
173 | |
174 | VATTR_INIT(vap); |
175 | VATTR_SET(vap, va_mode, (mode_t)cmode); |
176 | |
177 | error = vn_open_auth(ndp, fmode: fmodep, vap, NULLVP); |
178 | |
179 | kfree_type(struct vnode_attr, vap); |
180 | |
181 | return error; |
182 | } |
183 | |
184 | static int |
185 | vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) |
186 | { |
187 | int error; |
188 | |
189 | if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) { |
190 | goto bad; |
191 | } |
192 | |
193 | /* Call out to allow 3rd party notification of open. |
194 | * Ignore result of kauth_authorize_fileop call. |
195 | */ |
196 | #if CONFIG_MACF |
197 | mac_vnode_notify_open(ctx, vp, acc_flags: fmode); |
198 | #endif |
199 | kauth_authorize_fileop(credential: vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, |
200 | arg0: (uintptr_t)vp, arg1: 0); |
201 | |
202 | return 0; |
203 | |
204 | bad: |
205 | return error; |
206 | } |
207 | |
208 | /* |
209 | * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to |
210 | * determine whether that has happened. |
211 | */ |
212 | static int |
213 | vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx) |
214 | { |
215 | uint32_t status = 0; |
216 | vnode_t dvp = ndp->ni_dvp; |
217 | int batched; |
218 | int error; |
219 | vnode_t vp; |
220 | |
221 | batched = vnode_compound_open_available(vp: ndp->ni_dvp); |
222 | *did_open = FALSE; |
223 | |
224 | VATTR_SET(vap, va_type, VREG); |
225 | if (fmode & O_EXCL) { |
226 | vap->va_vaflags |= VA_EXCLUSIVE; |
227 | } |
228 | |
229 | #if NAMEDRSRCFORK |
230 | if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) { |
231 | if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) { |
232 | goto out; |
233 | } |
234 | if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0) { |
235 | goto out; |
236 | } |
237 | *did_create = TRUE; |
238 | } else { |
239 | #endif |
240 | if (!batched) { |
241 | if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) { |
242 | goto out; |
243 | } |
244 | } |
245 | |
246 | error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx); |
247 | if (error != 0) { |
248 | if (batched) { |
249 | *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE; |
250 | } else { |
251 | *did_create = FALSE; |
252 | } |
253 | |
254 | if (error == EKEEPLOOKING) { |
255 | if (*did_create) { |
256 | panic("EKEEPLOOKING, but we did a create?" ); |
257 | } |
258 | if (!batched) { |
259 | panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?" ); |
260 | } |
261 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { |
262 | panic("EKEEPLOOKING, but continue flag not set?" ); |
263 | } |
264 | |
265 | /* |
266 | * Do NOT drop the dvp: we need everything to continue the lookup. |
267 | */ |
268 | return error; |
269 | } |
270 | } else { |
271 | if (batched) { |
272 | *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0; |
273 | *did_open = TRUE; |
274 | } else { |
275 | *did_create = TRUE; |
276 | } |
277 | } |
278 | #if NAMEDRSRCFORK |
279 | } |
280 | #endif |
281 | |
282 | vp = ndp->ni_vp; |
283 | |
284 | if (*did_create) { |
285 | int update_flags = 0; |
286 | |
287 | // Make sure the name & parent pointers are hooked up |
288 | if (vp->v_name == NULL) { |
289 | update_flags |= VNODE_UPDATE_NAME; |
290 | } |
291 | if (vp->v_parent == NULLVP) { |
292 | update_flags |= VNODE_UPDATE_PARENT; |
293 | } |
294 | |
295 | if (update_flags) { |
296 | vnode_update_identity(vp, dvp, name: ndp->ni_cnd.cn_nameptr, name_len: ndp->ni_cnd.cn_namelen, name_hashval: ndp->ni_cnd.cn_hash, flags: update_flags); |
297 | } |
298 | |
299 | vnode_put(vp: dvp); |
300 | ndp->ni_dvp = NULLVP; |
301 | |
302 | #if CONFIG_FSE |
303 | if (need_fsevent(FSE_CREATE_FILE, vp)) { |
304 | add_fsevent(FSE_CREATE_FILE, ctx, |
305 | FSE_ARG_VNODE, vp, |
306 | FSE_ARG_DONE); |
307 | } |
308 | #endif |
309 | } |
310 | out: |
311 | if (ndp->ni_dvp != NULLVP) { |
312 | vnode_put(vp: dvp); |
313 | ndp->ni_dvp = NULLVP; |
314 | } |
315 | |
316 | return error; |
317 | } |
318 | |
319 | /* |
320 | * This is the number of times we'll loop in vn_open_auth without explicitly |
321 | * yielding the CPU when we determine we have to retry. |
322 | */ |
323 | #define RETRY_NO_YIELD_COUNT 5 |
324 | |
325 | /* |
326 | * Open a file with authorization, updating the contents of the structures |
327 | * pointed to by ndp, fmodep, and vap as necessary to perform the requested |
328 | * operation. This function is used for both opens of existing files, and |
329 | * creation of new files. |
330 | * |
331 | * Parameters: ndp The nami data pointer describing the |
332 | * file |
333 | * fmodep A pointer to an int containg the mode |
334 | * information to be used for the open |
335 | * vap A pointer to the vnode attribute |
336 | * descriptor to be used for the open |
337 | * authvp If non-null and VA_DP_AUTHENTICATE is set in vap, |
338 | * have a supporting filesystem verify that the file |
339 | * to be opened is on the same volume as authvp and |
340 | * that authvp is on an authenticated volume |
341 | * |
342 | * Indirect: * Contents of the data structures pointed |
343 | * to by the parameters are modified as |
344 | * necessary to the requested operation. |
345 | * |
346 | * Returns: 0 Success |
347 | * !0 errno value |
348 | * |
349 | * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. |
350 | * |
351 | * The contents of '*ndp' will be modified, based on the other |
352 | * arguments to this function, and to return file and directory |
353 | * data necessary to satisfy the requested operation. |
354 | * |
355 | * If the file does not exist and we are creating it, then the |
356 | * O_TRUNC flag will be cleared in '*fmodep' to indicate to the |
357 | * caller that the file was not truncated. |
358 | * |
359 | * If the file exists and the O_EXCL flag was not specified, then |
360 | * the O_CREAT flag will be cleared in '*fmodep' to indicate to |
361 | * the caller that the existing file was merely opened rather |
362 | * than created. |
363 | * |
364 | * The contents of '*vap' will be modified as necessary to |
365 | * complete the operation, including setting of supported |
366 | * attribute, clearing of fields containing unsupported attributes |
367 | * in the request, if the request proceeds without them, etc.. |
368 | * |
369 | * XXX: This function is too complicated in actings on its arguments |
370 | * |
371 | * XXX: We should enummerate the possible errno values here, and where |
372 | * in the code they originated. |
373 | */ |
374 | int |
375 | vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap, vnode_t authvp) |
376 | { |
377 | struct vnode *vp; |
378 | struct vnode *dvp; |
379 | vfs_context_t ctx = ndp->ni_cnd.cn_context; |
380 | int error; |
381 | int fmode; |
382 | uint32_t origcnflags; |
383 | boolean_t did_create; |
384 | boolean_t did_open; |
385 | boolean_t need_vnop_open; |
386 | boolean_t batched; |
387 | boolean_t ref_failed; |
388 | int nretries = 0; |
389 | |
390 | again: |
391 | vp = NULL; |
392 | dvp = NULL; |
393 | batched = FALSE; |
394 | did_create = FALSE; |
395 | need_vnop_open = TRUE; |
396 | ref_failed = FALSE; |
397 | fmode = *fmodep; |
398 | origcnflags = ndp->ni_cnd.cn_flags; |
399 | |
400 | if (VATTR_IS_ACTIVE(vap, va_dataprotect_flags)) { |
401 | if ((authvp != NULLVP) |
402 | && !ISSET(vap->va_dataprotect_flags, VA_DP_AUTHENTICATE)) { |
403 | return EINVAL; |
404 | } |
405 | // If raw encrypted mode is requested, handle that here |
406 | if (ISSET(vap->va_dataprotect_flags, VA_DP_RAWENCRYPTED)) { |
407 | fmode |= FENCRYPTED; |
408 | } |
409 | } |
410 | |
411 | if ((fmode & O_NOFOLLOW_ANY) && (fmode & (O_SYMLINK | O_NOFOLLOW))) { |
412 | error = EINVAL; |
413 | goto out; |
414 | } |
415 | |
416 | /* |
417 | * O_CREAT |
418 | */ |
419 | if (fmode & O_CREAT) { |
420 | if ((fmode & O_DIRECTORY)) { |
421 | error = EINVAL; |
422 | goto out; |
423 | } |
424 | ndp->ni_cnd.cn_nameiop = CREATE; |
425 | #if CONFIG_TRIGGERS |
426 | ndp->ni_op = OP_LINK; |
427 | #endif |
428 | /* Inherit USEDVP, vnode_open() supported flags only */ |
429 | ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); |
430 | ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1; |
431 | /* Inherit NAMEI_ROOTDIR flag only */ |
432 | ndp->ni_flag &= NAMEI_ROOTDIR; |
433 | ndp->ni_flag |= NAMEI_COMPOUNDOPEN; |
434 | #if NAMEDRSRCFORK |
435 | /* open calls are allowed for resource forks. */ |
436 | ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; |
437 | #endif |
438 | if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0) { |
439 | ndp->ni_cnd.cn_flags |= FOLLOW; |
440 | } |
441 | if (fmode & O_NOFOLLOW_ANY) { |
442 | /* will return ELOOP on the first symlink to be hit */ |
443 | ndp->ni_flag |= NAMEI_NOFOLLOW_ANY; |
444 | } |
445 | |
446 | continue_create_lookup: |
447 | if ((error = namei(ndp))) { |
448 | goto out; |
449 | } |
450 | |
451 | dvp = ndp->ni_dvp; |
452 | vp = ndp->ni_vp; |
453 | |
454 | batched = vnode_compound_open_available(vp: dvp); |
455 | |
456 | /* not found, create */ |
457 | if (vp == NULL) { |
458 | /* must have attributes for a new file */ |
459 | if (vap == NULL) { |
460 | vnode_put(vp: dvp); |
461 | error = EINVAL; |
462 | goto out; |
463 | } |
464 | /* |
465 | * Attempt a create. For a system supporting compound VNOPs, we may |
466 | * find an existing file or create one; in either case, we will already |
467 | * have the file open and no VNOP_OPEN() will be needed. |
468 | */ |
469 | error = vn_open_auth_do_create(ndp, vap, fmode, did_create: &did_create, did_open: &did_open, ctx); |
470 | |
471 | dvp = ndp->ni_dvp; |
472 | vp = ndp->ni_vp; |
473 | |
474 | /* |
475 | * Detected a node that the filesystem couldn't handle. Don't call |
476 | * nameidone() yet, because we need that path buffer. |
477 | */ |
478 | if (error == EKEEPLOOKING) { |
479 | if (!batched) { |
480 | panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?" ); |
481 | } |
482 | goto continue_create_lookup; |
483 | } |
484 | |
485 | nameidone(ndp); |
486 | if (dvp) { |
487 | panic("Shouldn't have a dvp here." ); |
488 | } |
489 | |
490 | if (error) { |
491 | /* |
492 | * Check for a create race. |
493 | */ |
494 | if ((error == EEXIST) && !(fmode & O_EXCL)) { |
495 | if (vp) { |
496 | vnode_put(vp); |
497 | } |
498 | goto again; |
499 | } |
500 | goto bad; |
501 | } |
502 | |
503 | need_vnop_open = !did_open; |
504 | } else { |
505 | if (fmode & O_EXCL) { |
506 | error = EEXIST; |
507 | } |
508 | |
509 | /* |
510 | * We have a vnode. Use compound open if available |
511 | * or else fall through to "traditional" path. Note: can't |
512 | * do a compound open for root, because the parent belongs |
513 | * to a different FS. |
514 | */ |
515 | if (error == 0 && batched && (vnode_mount(vp: dvp) == vnode_mount(vp))) { |
516 | error = VNOP_COMPOUND_OPEN(dvp, vpp: &ndp->ni_vp, ndp, flags: 0, fmode, NULL, NULL, ctx); |
517 | |
518 | if (error == 0) { |
519 | vp = ndp->ni_vp; |
520 | need_vnop_open = FALSE; |
521 | } else if (error == EKEEPLOOKING) { |
522 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { |
523 | panic("EKEEPLOOKING, but continue flag not set?" ); |
524 | } |
525 | goto continue_create_lookup; |
526 | } |
527 | } |
528 | nameidone(ndp); |
529 | vnode_put(vp: dvp); |
530 | ndp->ni_dvp = NULLVP; |
531 | |
532 | if (error) { |
533 | goto bad; |
534 | } |
535 | |
536 | fmode &= ~O_CREAT; |
537 | |
538 | /* Fall through */ |
539 | } |
540 | } else { |
541 | /* |
542 | * Not O_CREAT |
543 | */ |
544 | ndp->ni_cnd.cn_nameiop = LOOKUP; |
545 | /* Inherit USEDVP, vnode_open() supported flags only */ |
546 | ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); |
547 | ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT; |
548 | #if NAMEDRSRCFORK |
549 | /* open calls are allowed for resource forks. */ |
550 | ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; |
551 | #endif |
552 | if (fmode & FENCRYPTED) { |
553 | ndp->ni_cnd.cn_flags |= CN_RAW_ENCRYPTED | CN_SKIPNAMECACHE; |
554 | } |
555 | /* Inherit NAMEI_ROOTDIR flag only */ |
556 | ndp->ni_flag &= NAMEI_ROOTDIR; |
557 | ndp->ni_flag |= NAMEI_COMPOUNDOPEN; |
558 | |
559 | /* preserve NOFOLLOW from vnode_open() */ |
560 | if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) { |
561 | ndp->ni_cnd.cn_flags &= ~FOLLOW; |
562 | } |
563 | if (fmode & O_NOFOLLOW_ANY) { |
564 | /* will return ELOOP on the first symlink to be hit */ |
565 | ndp->ni_flag |= NAMEI_NOFOLLOW_ANY; |
566 | } |
567 | |
568 | /* Do a lookup, possibly going directly to filesystem for compound operation */ |
569 | do { |
570 | if ((error = namei(ndp))) { |
571 | goto out; |
572 | } |
573 | vp = ndp->ni_vp; |
574 | dvp = ndp->ni_dvp; |
575 | |
576 | /* Check for batched lookup-open */ |
577 | batched = vnode_compound_open_available(vp: dvp); |
578 | if (batched && ((vp == NULLVP) || (vnode_mount(vp: dvp) == vnode_mount(vp)))) { |
579 | error = VNOP_COMPOUND_OPEN(dvp, vpp: &ndp->ni_vp, ndp, flags: 0, fmode, NULL, NULL, ctx); |
580 | vp = ndp->ni_vp; |
581 | if (error == 0) { |
582 | need_vnop_open = FALSE; |
583 | } else if (error == EKEEPLOOKING) { |
584 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { |
585 | panic("EKEEPLOOKING, but continue flag not set?" ); |
586 | } |
587 | } else if ((fmode & (FREAD | FWRITE | FEXEC)) == FEXEC) { |
588 | /* |
589 | * Some file systems fail in vnop_open call with absense of |
590 | * both FREAD and FWRITE access modes. Retry the vnop_open |
591 | * call again with FREAD access mode added. |
592 | */ |
593 | error = VNOP_COMPOUND_OPEN(dvp, vpp: &ndp->ni_vp, ndp, flags: 0, |
594 | fmode: fmode | FREAD, NULL, NULL, ctx); |
595 | if (error == 0) { |
596 | vp = ndp->ni_vp; |
597 | need_vnop_open = FALSE; |
598 | } |
599 | } |
600 | } |
601 | } while (error == EKEEPLOOKING); |
602 | |
603 | nameidone(ndp); |
604 | vnode_put(vp: dvp); |
605 | ndp->ni_dvp = NULLVP; |
606 | |
607 | if (error) { |
608 | goto bad; |
609 | } |
610 | } |
611 | |
612 | /* |
613 | * By this point, nameidone() is called, dvp iocount is dropped, |
614 | * and dvp pointer is cleared. |
615 | */ |
616 | if (ndp->ni_dvp != NULLVP) { |
617 | panic("Haven't cleaned up adequately in vn_open_auth()" ); |
618 | } |
619 | |
620 | /* |
621 | * Expect to use this code for filesystems without compound VNOPs, for the root |
622 | * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(), |
623 | * and for shadow files, which do not live on the same filesystems as their "parents." |
624 | */ |
625 | if (need_vnop_open) { |
626 | if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) { |
627 | panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?" ); |
628 | } |
629 | |
630 | if (!did_create) { |
631 | error = vn_authorize_open_existing(vp, cnp: &ndp->ni_cnd, fmode, ctx, NULL); |
632 | if (error) { |
633 | goto bad; |
634 | } |
635 | } |
636 | |
637 | if (VATTR_IS_ACTIVE(vap, va_dataprotect_flags)) { |
638 | if (ISSET(vap->va_dataprotect_flags, VA_DP_RAWUNENCRYPTED)) { |
639 | /* Don't allow unencrypted io request from user space unless entitled */ |
640 | boolean_t entitled = FALSE; |
641 | #if !SECURE_KERNEL |
642 | entitled = IOCurrentTaskHasEntitlement(entitlement: "com.apple.private.security.file-unencrypt-access" ); |
643 | #endif /* SECURE_KERNEL */ |
644 | if (!entitled) { |
645 | error = EPERM; |
646 | goto bad; |
647 | } |
648 | fmode |= FUNENCRYPTED; |
649 | } |
650 | |
651 | if (ISSET(vap->va_dataprotect_flags, VA_DP_AUTHENTICATE)) { |
652 | fsioc_auth_fs_t afs = { .authvp = authvp }; |
653 | |
654 | error = VNOP_IOCTL(vp, FSIOC_AUTH_FS, data: (caddr_t)&afs, fflag: 0, ctx); |
655 | if (error) { |
656 | goto bad; |
657 | } |
658 | } |
659 | } |
660 | |
661 | error = VNOP_OPEN(vp, fmode, ctx); |
662 | if (error) { |
663 | /* |
664 | * Some file systems fail in vnop_open call with absense of both |
665 | * FREAD and FWRITE access modes. Retry the vnop_open call again |
666 | * with FREAD access mode added. |
667 | */ |
668 | if ((fmode & (FREAD | FWRITE | FEXEC)) == FEXEC) { |
669 | error = VNOP_OPEN(vp, fmode | FREAD, ctx); |
670 | } |
671 | if (error) { |
672 | goto bad; |
673 | } |
674 | } |
675 | need_vnop_open = FALSE; |
676 | } |
677 | |
678 | // if the vnode is tagged VOPENEVT and the current process |
679 | // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY |
680 | // flag to the open mode so that this open won't count against |
681 | // the vnode when carbon delete() does a vnode_isinuse() to see |
682 | // if a file is currently in use. this allows spotlight |
683 | // importers to not interfere with carbon apps that depend on |
684 | // the no-delete-if-busy semantics of carbon delete(). |
685 | // |
686 | if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) { |
687 | fmode |= O_EVTONLY; |
688 | } |
689 | |
690 | /* |
691 | * Grab reference, etc. |
692 | */ |
693 | error = vn_open_auth_finish(vp, fmode, ctx); |
694 | if (error) { |
695 | ref_failed = TRUE; |
696 | goto bad; |
697 | } |
698 | |
699 | /* Compound VNOP open is responsible for doing the truncate */ |
700 | if (batched || did_create) { |
701 | fmode &= ~O_TRUNC; |
702 | } |
703 | |
704 | *fmodep = fmode; |
705 | return 0; |
706 | |
707 | bad: |
708 | /* Opened either explicitly or by a batched create */ |
709 | if (!need_vnop_open) { |
710 | VNOP_CLOSE(vp, fmode, ctx); |
711 | } |
712 | |
713 | ndp->ni_vp = NULL; |
714 | if (vp) { |
715 | #if NAMEDRSRCFORK |
716 | /* Aggressively recycle shadow files if we error'd out during open() */ |
717 | if ((vnode_isnamedstream(vp)) && |
718 | (vp->v_parent != NULLVP) && |
719 | (vnode_isshadow(vp))) { |
720 | vnode_recycle(vp); |
721 | } |
722 | #endif |
723 | vnode_put(vp); |
724 | /* |
725 | * Check for a race against unlink. We had a vnode |
726 | * but according to vnode_authorize or VNOP_OPEN it |
727 | * no longer exists. |
728 | * |
729 | * EREDRIVEOPEN: means that we were hit by the tty allocation race or NFSv4 open/create race. |
730 | */ |
731 | if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) { |
732 | /* |
733 | * We'll retry here but it may be possible that we get |
734 | * into a retry "spin" inside the kernel and not allow |
735 | * threads, which need to run in order for the retry |
736 | * loop to end, to run. An example is an open of a |
737 | * terminal which is getting revoked and we spin here |
738 | * without yielding becasue namei and VNOP_OPEN are |
739 | * successful but vnode_ref fails. The revoke needs |
740 | * threads with an iocount to run but if spin here we |
741 | * may possibly be blcoking other threads from running. |
742 | * |
743 | * We start yielding the CPU after some number of |
744 | * retries for increasing durations. Note that this is |
745 | * still a loop without an exit condition. |
746 | */ |
747 | nretries += 1; |
748 | if (nretries > RETRY_NO_YIELD_COUNT) { |
749 | /* Every hz/100 secs is 10 msecs ... */ |
750 | tsleep(chan: &nretries, PVFS, wmesg: "vn_open_auth_retry" , |
751 | MIN((nretries * (hz / 100)), hz)); |
752 | } |
753 | goto again; |
754 | } |
755 | } |
756 | |
757 | out: |
758 | return error; |
759 | } |
760 | |
761 | #if vn_access_DEPRECATED |
762 | /* |
763 | * Authorize an action against a vnode. This has been the canonical way to |
764 | * ensure that the credential/process/etc. referenced by a vfs_context |
765 | * is granted the rights called out in 'mode' against the vnode 'vp'. |
766 | * |
767 | * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult |
768 | * to add support for more rights. As such, this interface will be deprecated |
769 | * and callers will use vnode_authorize instead. |
770 | */ |
771 | int |
772 | vn_access(vnode_t vp, int mode, vfs_context_t context) |
773 | { |
774 | kauth_action_t action; |
775 | |
776 | action = 0; |
777 | if (mode & VREAD) { |
778 | action |= KAUTH_VNODE_READ_DATA; |
779 | } |
780 | if (mode & VWRITE) { |
781 | action |= KAUTH_VNODE_WRITE_DATA; |
782 | } |
783 | if (mode & VEXEC) { |
784 | action |= KAUTH_VNODE_EXECUTE; |
785 | } |
786 | |
787 | return vnode_authorize(vp, NULL, action, context); |
788 | } |
789 | #endif /* vn_access_DEPRECATED */ |
790 | |
791 | /* |
792 | * Vnode close call |
793 | */ |
794 | int |
795 | vn_close(struct vnode *vp, int flags, vfs_context_t ctx) |
796 | { |
797 | int error; |
798 | int flusherror = 0; |
799 | |
800 | #if NAMEDRSRCFORK |
801 | /* Sync data from resource fork shadow file if needed. */ |
802 | if ((vp->v_flag & VISNAMEDSTREAM) && |
803 | (vp->v_parent != NULLVP) && |
804 | vnode_isshadow(vp)) { |
805 | if (flags & FWASWRITTEN) { |
806 | flusherror = vnode_flushnamedstream(vp: vp->v_parent, svp: vp, context: ctx); |
807 | } |
808 | } |
809 | #endif |
810 | /* |
811 | * If vnode @vp belongs to a chardev or a blkdev then it is handled |
812 | * specially. We first drop its user reference count @vp->v_usecount |
813 | * before calling VNOP_CLOSE(). This was done historically to ensure |
814 | * that the last close of a special device vnode performed some |
815 | * conditional cleanups. Now we still need to drop this reference here |
816 | * to ensure that devfsspec_close() can check if the vnode is still in |
817 | * use. |
818 | */ |
819 | if (vnode_isspec(vp)) { |
820 | (void)vnode_rele_ext(vp, flags, 0); |
821 | } |
822 | |
823 | /* |
824 | * On HFS, we flush when the last writer closes. We do this |
825 | * because resource fork vnodes hold a reference on data fork |
826 | * vnodes and that will prevent them from getting VNOP_INACTIVE |
827 | * which will delay when we flush cached data. In future, we |
828 | * might find it beneficial to do this for all file systems. |
829 | * Note that it's OK to access v_writecount without the lock |
830 | * in this context. |
831 | */ |
832 | if (vp->v_tag == VT_HFS && (flags & FWRITE) && vp->v_writecount == 1) { |
833 | VNOP_FSYNC(vp, MNT_NOWAIT, ctx); |
834 | } |
835 | |
836 | error = VNOP_CLOSE(vp, flags, ctx); |
837 | |
838 | #if CONFIG_FSE |
839 | if (flags & FWASWRITTEN) { |
840 | if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) { |
841 | add_fsevent(FSE_CONTENT_MODIFIED, ctx, |
842 | FSE_ARG_VNODE, vp, |
843 | FSE_ARG_DONE); |
844 | } |
845 | } |
846 | #endif |
847 | |
848 | if (!vnode_isspec(vp)) { |
849 | (void)vnode_rele_ext(vp, flags, 0); |
850 | } |
851 | |
852 | if (flusherror) { |
853 | error = flusherror; |
854 | } |
855 | return error; |
856 | } |
857 | |
858 | static int |
859 | vn_read_swapfile( |
860 | struct vnode *vp, |
861 | uio_t uio) |
862 | { |
863 | int error; |
864 | off_t swap_count, this_count; |
865 | off_t file_end, read_end; |
866 | off_t prev_resid; |
867 | char *my_swap_page; |
868 | |
869 | /* |
870 | * Reading from a swap file will get you zeroes. |
871 | */ |
872 | |
873 | my_swap_page = NULL; |
874 | error = 0; |
875 | swap_count = uio_resid(a_uio: uio); |
876 | |
877 | file_end = ubc_getsize(vp); |
878 | read_end = uio->uio_offset + uio_resid(a_uio: uio); |
879 | if (uio->uio_offset >= file_end) { |
880 | /* uio starts after end of file: nothing to read */ |
881 | swap_count = 0; |
882 | } else if (read_end > file_end) { |
883 | /* uio extends beyond end of file: stop before that */ |
884 | swap_count -= (read_end - file_end); |
885 | } |
886 | |
887 | while (swap_count > 0) { |
888 | if (my_swap_page == NULL) { |
889 | my_swap_page = kalloc_data(PAGE_SIZE, Z_WAITOK | Z_ZERO); |
890 | /* add an end-of-line to keep line counters happy */ |
891 | my_swap_page[PAGE_SIZE - 1] = '\n'; |
892 | } |
893 | this_count = swap_count; |
894 | if (this_count > PAGE_SIZE) { |
895 | this_count = PAGE_SIZE; |
896 | } |
897 | |
898 | prev_resid = uio_resid(a_uio: uio); |
899 | error = uiomove(cp: (caddr_t) my_swap_page, |
900 | n: (int)this_count, |
901 | uio); |
902 | if (error) { |
903 | break; |
904 | } |
905 | swap_count -= (prev_resid - uio_resid(a_uio: uio)); |
906 | } |
907 | kfree_data(my_swap_page, PAGE_SIZE); |
908 | |
909 | return error; |
910 | } |
911 | /* |
912 | * Package up an I/O request on a vnode into a uio and do it. |
913 | */ |
914 | int |
915 | vn_rdwr( |
916 | enum uio_rw rw, |
917 | struct vnode *vp, |
918 | caddr_t base, |
919 | int len, |
920 | off_t offset, |
921 | enum uio_seg segflg, |
922 | int ioflg, |
923 | kauth_cred_t cred, |
924 | int *aresid, |
925 | proc_t p) |
926 | { |
927 | int64_t resid; |
928 | int result; |
929 | |
930 | if (len < 0) { |
931 | return EINVAL; |
932 | } |
933 | |
934 | result = vn_rdwr_64(rw, |
935 | vp, |
936 | base: (uint64_t)(uintptr_t)base, |
937 | len: (int64_t)len, |
938 | offset, |
939 | segflg, |
940 | ioflg, |
941 | cred, |
942 | aresid: &resid, |
943 | p); |
944 | |
945 | /* "resid" should be bounded above by "len," which is an int */ |
946 | if (aresid != NULL) { |
947 | *aresid = (int)resid; |
948 | } |
949 | |
950 | return result; |
951 | } |
952 | |
953 | |
954 | int |
955 | vn_rdwr_64( |
956 | enum uio_rw rw, |
957 | struct vnode *vp, |
958 | uint64_t base, |
959 | int64_t len, |
960 | off_t offset, |
961 | enum uio_seg segflg, |
962 | int ioflg, |
963 | kauth_cred_t cred, |
964 | int64_t *aresid, |
965 | proc_t p) |
966 | { |
967 | uio_t auio; |
968 | int spacetype; |
969 | struct vfs_context context; |
970 | int error = 0; |
971 | UIO_STACKBUF(uio_buf, 1); |
972 | |
973 | context.vc_thread = current_thread(); |
974 | context.vc_ucred = cred; |
975 | |
976 | if (UIO_SEG_IS_USER_SPACE(segflg)) { |
977 | spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; |
978 | } else { |
979 | spacetype = UIO_SYSSPACE; |
980 | } |
981 | |
982 | if (len < 0) { |
983 | return EINVAL; |
984 | } |
985 | |
986 | auio = uio_createwithbuffer(a_iovcount: 1, a_offset: offset, a_spacetype: spacetype, a_iodirection: rw, |
987 | a_buf_p: &uio_buf[0], a_buffer_size: sizeof(uio_buf)); |
988 | uio_addiov(a_uio: auio, CAST_USER_ADDR_T(base), a_length: (user_size_t)len); |
989 | |
990 | #if CONFIG_MACF |
991 | /* XXXMAC |
992 | * IO_NOAUTH should be re-examined. |
993 | * Likely that mediation should be performed in caller. |
994 | */ |
995 | if ((ioflg & IO_NOAUTH) == 0) { |
996 | /* passed cred is fp->f_cred */ |
997 | if (rw == UIO_READ) { |
998 | error = mac_vnode_check_read(ctx: &context, file_cred: cred, vp); |
999 | } else { |
1000 | error = mac_vnode_check_write(ctx: &context, file_cred: cred, vp); |
1001 | } |
1002 | } |
1003 | #endif |
1004 | |
1005 | if (error == 0) { |
1006 | if (rw == UIO_READ) { |
1007 | if (vnode_isswap(vp) && ((ioflg & IO_SWAP_DISPATCH) == 0)) { |
1008 | error = vn_read_swapfile(vp, uio: auio); |
1009 | } else { |
1010 | error = VNOP_READ(vp, uio: auio, ioflag: ioflg, ctx: &context); |
1011 | } |
1012 | } else { |
1013 | error = VNOP_WRITE(vp, uio: auio, ioflag: ioflg, ctx: &context); |
1014 | } |
1015 | } |
1016 | |
1017 | if (aresid) { |
1018 | *aresid = uio_resid(a_uio: auio); |
1019 | assert(*aresid <= len); |
1020 | } else if (uio_resid(a_uio: auio) && error == 0) { |
1021 | error = EIO; |
1022 | } |
1023 | return error; |
1024 | } |
1025 | |
1026 | void |
1027 | vn_offset_lock(struct fileglob *fg) |
1028 | { |
1029 | lck_mtx_lock_spin(lck: &fg->fg_lock); |
1030 | while (fg->fg_lflags & FG_OFF_LOCKED) { |
1031 | fg->fg_lflags |= FG_OFF_LOCKWANT; |
1032 | msleep(chan: &fg->fg_lflags, mtx: &fg->fg_lock, PVFS | PSPIN, |
1033 | wmesg: "fg_offset_lock_wait" , ts: 0); |
1034 | } |
1035 | fg->fg_lflags |= FG_OFF_LOCKED; |
1036 | lck_mtx_unlock(lck: &fg->fg_lock); |
1037 | } |
1038 | |
1039 | void |
1040 | vn_offset_unlock(struct fileglob *fg) |
1041 | { |
1042 | int lock_wanted = 0; |
1043 | |
1044 | lck_mtx_lock_spin(lck: &fg->fg_lock); |
1045 | if (fg->fg_lflags & FG_OFF_LOCKWANT) { |
1046 | lock_wanted = 1; |
1047 | } |
1048 | fg->fg_lflags &= ~(FG_OFF_LOCKED | FG_OFF_LOCKWANT); |
1049 | lck_mtx_unlock(lck: &fg->fg_lock); |
1050 | if (lock_wanted) { |
1051 | wakeup(chan: &fg->fg_lflags); |
1052 | } |
1053 | } |
1054 | |
1055 | static int |
1056 | vn_read_common(vnode_t vp, struct uio *uio, int fflag, vfs_context_t ctx) |
1057 | { |
1058 | int error; |
1059 | int ioflag; |
1060 | off_t read_offset; |
1061 | user_ssize_t read_len; |
1062 | user_ssize_t adjusted_read_len; |
1063 | user_ssize_t clippedsize; |
1064 | |
1065 | /* Caller has already validated read_len. */ |
1066 | read_len = uio_resid(a_uio: uio); |
1067 | assert(read_len >= 0 && read_len <= INT_MAX); |
1068 | |
1069 | adjusted_read_len = read_len; |
1070 | clippedsize = 0; |
1071 | |
1072 | #if CONFIG_MACF |
1073 | error = mac_vnode_check_read(ctx, file_cred: vfs_context_ucred(ctx), vp); |
1074 | if (error) { |
1075 | return error; |
1076 | } |
1077 | #endif |
1078 | |
1079 | /* This signals to VNOP handlers that this read came from a file table read */ |
1080 | ioflag = IO_SYSCALL_DISPATCH; |
1081 | |
1082 | if (fflag & FNONBLOCK) { |
1083 | ioflag |= IO_NDELAY; |
1084 | } |
1085 | if ((fflag & FNOCACHE) || vnode_isnocache(vp)) { |
1086 | ioflag |= IO_NOCACHE; |
1087 | } |
1088 | if (fflag & FENCRYPTED) { |
1089 | ioflag |= IO_ENCRYPTED; |
1090 | } |
1091 | if (fflag & FUNENCRYPTED) { |
1092 | ioflag |= IO_SKIP_ENCRYPTION; |
1093 | } |
1094 | if (fflag & O_EVTONLY) { |
1095 | ioflag |= IO_EVTONLY; |
1096 | } |
1097 | if (fflag & FNORDAHEAD) { |
1098 | ioflag |= IO_RAOFF; |
1099 | } |
1100 | |
1101 | read_offset = uio_offset(a_uio: uio); |
1102 | /* POSIX allows negative offsets for character devices. */ |
1103 | if ((read_offset < 0) && (vnode_vtype(vp) != VCHR)) { |
1104 | error = EINVAL; |
1105 | goto error_out; |
1106 | } |
1107 | |
1108 | if (read_offset == INT64_MAX) { |
1109 | /* can't read any more */ |
1110 | error = 0; |
1111 | goto error_out; |
1112 | } |
1113 | |
1114 | /* |
1115 | * If offset + len will cause overflow, reduce the len to a value |
1116 | * (adjusted_read_len) where it won't |
1117 | */ |
1118 | if ((read_offset >= 0) && (INT64_MAX - read_offset) < read_len) { |
1119 | /* |
1120 | * 0 read_offset INT64_MAX |
1121 | * |-----------------------------------------------|----------|~~~ |
1122 | * <--read_len--> |
1123 | * <-adjusted-> |
1124 | */ |
1125 | adjusted_read_len = (user_ssize_t)(INT64_MAX - read_offset); |
1126 | } |
1127 | |
1128 | if (adjusted_read_len < read_len) { |
1129 | uio_setresid(a_uio: uio, a_value: adjusted_read_len); |
1130 | clippedsize = read_len - adjusted_read_len; |
1131 | } |
1132 | |
1133 | if (vnode_isswap(vp) && !(IO_SKIP_ENCRYPTION & ioflag)) { |
1134 | /* special case for swap files */ |
1135 | error = vn_read_swapfile(vp, uio); |
1136 | } else { |
1137 | error = VNOP_READ(vp, uio, ioflag, ctx); |
1138 | } |
1139 | |
1140 | if (clippedsize) { |
1141 | uio_setresid(a_uio: uio, a_value: (uio_resid(a_uio: uio) + clippedsize)); |
1142 | } |
1143 | |
1144 | error_out: |
1145 | return error; |
1146 | } |
1147 | |
1148 | /* |
1149 | * File table vnode read routine. |
1150 | */ |
1151 | static int |
1152 | vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) |
1153 | { |
1154 | vnode_t vp; |
1155 | user_ssize_t read_len; |
1156 | int error; |
1157 | bool offset_locked; |
1158 | |
1159 | read_len = uio_resid(a_uio: uio); |
1160 | if (read_len < 0 || read_len > INT_MAX) { |
1161 | return EINVAL; |
1162 | } |
1163 | |
1164 | if ((flags & FOF_OFFSET) == 0) { |
1165 | vn_offset_lock(fg: fp->fp_glob); |
1166 | offset_locked = true; |
1167 | } else { |
1168 | offset_locked = false; |
1169 | } |
1170 | vp = (struct vnode *)fp_get_data(fp); |
1171 | if ((error = vnode_getwithref(vp))) { |
1172 | if (offset_locked) { |
1173 | vn_offset_unlock(fg: fp->fp_glob); |
1174 | } |
1175 | return error; |
1176 | } |
1177 | |
1178 | if (offset_locked && (vnode_vtype(vp) != VREG || vnode_isswap(vp))) { |
1179 | vn_offset_unlock(fg: fp->fp_glob); |
1180 | offset_locked = false; |
1181 | } |
1182 | |
1183 | if ((flags & FOF_OFFSET) == 0) { |
1184 | uio_setoffset(a_uio: uio, a_offset: fp->fp_glob->fg_offset); |
1185 | } |
1186 | |
1187 | error = vn_read_common(vp, uio, fflag: fp->fp_glob->fg_flag, ctx); |
1188 | |
1189 | if ((flags & FOF_OFFSET) == 0) { |
1190 | fp->fp_glob->fg_offset += read_len - uio_resid(a_uio: uio); |
1191 | } |
1192 | |
1193 | if (offset_locked) { |
1194 | vn_offset_unlock(fg: fp->fp_glob); |
1195 | offset_locked = false; |
1196 | } |
1197 | |
1198 | vnode_put(vp); |
1199 | return error; |
1200 | } |
1201 | |
1202 | /* |
1203 | * File table vnode write routine. |
1204 | */ |
1205 | static int |
1206 | vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) |
1207 | { |
1208 | struct vnode *vp; |
1209 | int error, ioflag; |
1210 | off_t write_offset; |
1211 | off_t write_end_offset; |
1212 | user_ssize_t write_len; |
1213 | user_ssize_t adjusted_write_len; |
1214 | user_ssize_t clippedsize; |
1215 | bool offset_locked; |
1216 | proc_t p = vfs_context_proc(ctx); |
1217 | rlim_t rlim_cur_fsize = p ? proc_limitgetcur(p, RLIMIT_FSIZE) : 0; |
1218 | |
1219 | write_len = uio_resid(a_uio: uio); |
1220 | if (write_len < 0 || write_len > INT_MAX) { |
1221 | return EINVAL; |
1222 | } |
1223 | adjusted_write_len = write_len; |
1224 | clippedsize = 0; |
1225 | |
1226 | if ((flags & FOF_OFFSET) == 0) { |
1227 | vn_offset_lock(fg: fp->fp_glob); |
1228 | offset_locked = true; |
1229 | } else { |
1230 | offset_locked = false; |
1231 | } |
1232 | |
1233 | vp = (struct vnode *)fp_get_data(fp); |
1234 | if ((error = vnode_getwithref(vp))) { |
1235 | if (offset_locked) { |
1236 | vn_offset_unlock(fg: fp->fp_glob); |
1237 | } |
1238 | return error; |
1239 | } |
1240 | |
1241 | if (offset_locked && (vnode_vtype(vp) != VREG || vnode_isswap(vp))) { |
1242 | vn_offset_unlock(fg: fp->fp_glob); |
1243 | offset_locked = false; |
1244 | } |
1245 | |
1246 | #if CONFIG_MACF |
1247 | error = mac_vnode_check_write(ctx, file_cred: vfs_context_ucred(ctx), vp); |
1248 | if (error) { |
1249 | (void)vnode_put(vp); |
1250 | if (offset_locked) { |
1251 | vn_offset_unlock(fg: fp->fp_glob); |
1252 | } |
1253 | return error; |
1254 | } |
1255 | #endif |
1256 | |
1257 | /* |
1258 | * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write came from |
1259 | * a file table write |
1260 | */ |
1261 | ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH); |
1262 | |
1263 | if (vp->v_type == VREG && (fp->fp_glob->fg_flag & O_APPEND)) { |
1264 | ioflag |= IO_APPEND; |
1265 | } |
1266 | if (fp->fp_glob->fg_flag & FNONBLOCK) { |
1267 | ioflag |= IO_NDELAY; |
1268 | } |
1269 | if ((fp->fp_glob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) { |
1270 | ioflag |= IO_NOCACHE; |
1271 | } |
1272 | if (fp->fp_glob->fg_flag & FNODIRECT) { |
1273 | ioflag |= IO_NODIRECT; |
1274 | } |
1275 | if (fp->fp_glob->fg_flag & FSINGLE_WRITER) { |
1276 | ioflag |= IO_SINGLE_WRITER; |
1277 | } |
1278 | if (fp->fp_glob->fg_flag & O_EVTONLY) { |
1279 | ioflag |= IO_EVTONLY; |
1280 | } |
1281 | |
1282 | /* |
1283 | * Treat synchronous mounts and O_FSYNC on the fd as equivalent. |
1284 | * |
1285 | * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay |
1286 | * XXX the non-essential metadata without some additional VFS work; |
1287 | * XXX the intent at this point is to plumb the interface for it. |
1288 | */ |
1289 | if ((fp->fp_glob->fg_flag & (O_FSYNC | O_DSYNC)) || |
1290 | (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) { |
1291 | ioflag |= IO_SYNC; |
1292 | } |
1293 | |
1294 | if ((flags & FOF_OFFSET) == 0) { |
1295 | write_offset = fp->fp_glob->fg_offset; |
1296 | uio_setoffset(a_uio: uio, a_offset: write_offset); |
1297 | } else { |
1298 | /* for pwrite, append should be ignored */ |
1299 | ioflag &= ~IO_APPEND; |
1300 | write_offset = uio_offset(a_uio: uio); |
1301 | /* POSIX allows negative offsets for character devices. */ |
1302 | if ((write_offset < 0) && (vnode_vtype(vp) != VCHR)) { |
1303 | error = EINVAL; |
1304 | goto error_out; |
1305 | } |
1306 | } |
1307 | |
1308 | if (write_offset == INT64_MAX) { |
1309 | /* writes are not possible */ |
1310 | error = EFBIG; |
1311 | goto error_out; |
1312 | } |
1313 | |
1314 | /* |
1315 | * write_len is the original write length that was requested. |
1316 | * We may however need to reduce that becasue of two reasons |
1317 | * |
1318 | * 1) If write_offset + write_len will exceed OFF_T_MAX (i.e. INT64_MAX) |
1319 | * and/or |
1320 | * 2) If write_offset + write_len will exceed the administrative |
1321 | * limit for the maximum file size. |
1322 | * |
1323 | * In both cases the write will be denied if we can't write even a single |
1324 | * byte otherwise it will be "clipped" (i.e. a short write). |
1325 | */ |
1326 | |
1327 | /* |
1328 | * If offset + len will cause overflow, reduce the len |
1329 | * to a value (adjusted_write_len) where it won't |
1330 | */ |
1331 | if ((write_offset >= 0) && (INT64_MAX - write_offset) < write_len) { |
1332 | /* |
1333 | * 0 write_offset INT64_MAX |
1334 | * |-----------------------------------------------|----------|~~~ |
1335 | * <--write_len--> |
1336 | * <-adjusted-> |
1337 | */ |
1338 | adjusted_write_len = (user_ssize_t)(INT64_MAX - write_offset); |
1339 | } |
1340 | |
1341 | /* write_end_offset will always be [0, INT64_MAX] */ |
1342 | write_end_offset = write_offset + adjusted_write_len; |
1343 | |
1344 | if (p && (vp->v_type == VREG) && |
1345 | (rlim_cur_fsize != RLIM_INFINITY) && |
1346 | (rlim_cur_fsize <= INT64_MAX) && |
1347 | (write_end_offset > (off_t)rlim_cur_fsize)) { |
1348 | /* |
1349 | * If the requested residual would cause us to go past the |
1350 | * administrative limit, then we need to adjust the residual |
1351 | * down to cause fewer bytes than requested to be written. If |
1352 | * we can't do that (e.g. the residual is already 1 byte), |
1353 | * then we fail the write with EFBIG. |
1354 | */ |
1355 | if (write_offset >= (off_t)rlim_cur_fsize) { |
1356 | /* |
1357 | * 0 rlim_fsize write_offset write_end INT64_MAX |
1358 | * |------------------------|----------|-------------|--------| |
1359 | * <--write_len--> |
1360 | * |
1361 | * write not permitted |
1362 | */ |
1363 | psignal(p, SIGXFSZ); |
1364 | error = EFBIG; |
1365 | goto error_out; |
1366 | } |
1367 | |
1368 | /* |
1369 | * 0 write_offset rlim_fsize write_end INT64_MAX |
1370 | * |------------------------|-----------|---------|------------| |
1371 | * <------write_len------> |
1372 | * <-adjusted--> |
1373 | */ |
1374 | adjusted_write_len = (user_ssize_t)((off_t)rlim_cur_fsize - write_offset); |
1375 | assert((adjusted_write_len > 0) && (adjusted_write_len < write_len)); |
1376 | } |
1377 | |
1378 | if (adjusted_write_len < write_len) { |
1379 | uio_setresid(a_uio: uio, a_value: adjusted_write_len); |
1380 | clippedsize = write_len - adjusted_write_len; |
1381 | } |
1382 | |
1383 | error = VNOP_WRITE(vp, uio, ioflag, ctx); |
1384 | |
1385 | /* |
1386 | * If we had to reduce the size of write requested either because |
1387 | * of rlimit or because it would have exceeded |
1388 | * maximum file size, we have to add that back to the residual so |
1389 | * it correctly reflects what we did in this function. |
1390 | */ |
1391 | if (clippedsize) { |
1392 | uio_setresid(a_uio: uio, a_value: (uio_resid(a_uio: uio) + clippedsize)); |
1393 | } |
1394 | |
1395 | if ((flags & FOF_OFFSET) == 0) { |
1396 | if (ioflag & IO_APPEND) { |
1397 | fp->fp_glob->fg_offset = uio_offset(a_uio: uio); |
1398 | } else { |
1399 | fp->fp_glob->fg_offset += (write_len - uio_resid(a_uio: uio)); |
1400 | } |
1401 | if (offset_locked) { |
1402 | vn_offset_unlock(fg: fp->fp_glob); |
1403 | offset_locked = false; |
1404 | } |
1405 | } |
1406 | |
1407 | /* |
1408 | * Set the credentials on successful writes |
1409 | */ |
1410 | if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) { |
1411 | ubc_setcred(vp, vfs_context_ucred(ctx)); |
1412 | } |
1413 | |
1414 | #if CONFIG_FILE_LEASES |
1415 | /* |
1416 | * On success, break the parent dir lease as the file's attributes (size |
1417 | * and/or mtime) have changed. Best attempt to break lease, just drop the |
1418 | * the error upon failure as there is no point to return error when the |
1419 | * write has completed successfully. |
1420 | */ |
1421 | if (__probable(error == 0)) { |
1422 | vnode_breakdirlease(vp, true, O_WRONLY); |
1423 | } |
1424 | #endif /* CONFIG_FILE_LEASES */ |
1425 | |
1426 | (void)vnode_put(vp); |
1427 | return error; |
1428 | |
1429 | error_out: |
1430 | if (offset_locked) { |
1431 | vn_offset_unlock(fg: fp->fp_glob); |
1432 | } |
1433 | (void)vnode_put(vp); |
1434 | return error; |
1435 | } |
1436 | |
1437 | /* |
1438 | * File table vnode stat routine. |
1439 | * |
1440 | * Returns: 0 Success |
1441 | * EBADF |
1442 | * ENOMEM |
1443 | * vnode_getattr:??? |
1444 | */ |
1445 | int |
1446 | vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, |
1447 | int needsrealdev, vfs_context_t ctx, struct ucred *file_cred) |
1448 | { |
1449 | struct vnode_attr va; |
1450 | int error; |
1451 | u_short mode; |
1452 | kauth_filesec_t fsec; |
1453 | struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ |
1454 | struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ |
1455 | |
1456 | if (isstat64 != 0) { |
1457 | sb64 = (struct stat64 *)sbptr; |
1458 | } else { |
1459 | sb = (struct stat *)sbptr; |
1460 | } |
1461 | memset(s: &va, c: 0, n: sizeof(va)); |
1462 | VATTR_INIT(&va); |
1463 | VATTR_WANTED(&va, va_fsid); |
1464 | VATTR_WANTED(&va, va_fileid); |
1465 | VATTR_WANTED(&va, va_mode); |
1466 | VATTR_WANTED(&va, va_type); |
1467 | VATTR_WANTED(&va, va_nlink); |
1468 | VATTR_WANTED(&va, va_uid); |
1469 | VATTR_WANTED(&va, va_gid); |
1470 | VATTR_WANTED(&va, va_rdev); |
1471 | VATTR_WANTED(&va, va_data_size); |
1472 | VATTR_WANTED(&va, va_access_time); |
1473 | VATTR_WANTED(&va, va_modify_time); |
1474 | VATTR_WANTED(&va, va_change_time); |
1475 | VATTR_WANTED(&va, va_create_time); |
1476 | VATTR_WANTED(&va, va_flags); |
1477 | VATTR_WANTED(&va, va_gen); |
1478 | VATTR_WANTED(&va, va_iosize); |
1479 | /* lower layers will synthesise va_total_alloc from va_data_size if required */ |
1480 | VATTR_WANTED(&va, va_total_alloc); |
1481 | if (xsec != NULL) { |
1482 | VATTR_WANTED(&va, va_uuuid); |
1483 | VATTR_WANTED(&va, va_guuid); |
1484 | VATTR_WANTED(&va, va_acl); |
1485 | } |
1486 | if (needsrealdev) { |
1487 | va.va_vaflags = VA_REALFSID; |
1488 | } |
1489 | error = vnode_getattr(vp, vap: &va, ctx); |
1490 | if (error) { |
1491 | goto out; |
1492 | } |
1493 | #if CONFIG_MACF |
1494 | /* |
1495 | * Give MAC polices a chance to reject or filter the attributes |
1496 | * returned by the filesystem. Note that MAC policies are consulted |
1497 | * *after* calling the filesystem because filesystems can return more |
1498 | * attributes than were requested so policies wouldn't be authoritative |
1499 | * is consulted beforehand. This also gives policies an opportunity |
1500 | * to change the values of attributes retrieved. |
1501 | */ |
1502 | error = mac_vnode_check_getattr(ctx, file_cred, vp, va: &va); |
1503 | if (error) { |
1504 | goto out; |
1505 | } |
1506 | #endif |
1507 | /* |
1508 | * Copy from vattr table |
1509 | */ |
1510 | if (isstat64 != 0) { |
1511 | sb64->st_dev = va.va_fsid; |
1512 | sb64->st_ino = (ino64_t)va.va_fileid; |
1513 | } else { |
1514 | sb->st_dev = va.va_fsid; |
1515 | sb->st_ino = (ino_t)va.va_fileid; |
1516 | } |
1517 | mode = va.va_mode; |
1518 | switch (vp->v_type) { |
1519 | case VREG: |
1520 | mode |= S_IFREG; |
1521 | break; |
1522 | case VDIR: |
1523 | mode |= S_IFDIR; |
1524 | break; |
1525 | case VBLK: |
1526 | mode |= S_IFBLK; |
1527 | break; |
1528 | case VCHR: |
1529 | mode |= S_IFCHR; |
1530 | break; |
1531 | case VLNK: |
1532 | mode |= S_IFLNK; |
1533 | break; |
1534 | case VSOCK: |
1535 | mode |= S_IFSOCK; |
1536 | break; |
1537 | case VFIFO: |
1538 | mode |= S_IFIFO; |
1539 | break; |
1540 | default: |
1541 | error = EBADF; |
1542 | goto out; |
1543 | } |
1544 | ; |
1545 | if (isstat64 != 0) { |
1546 | sb64->st_mode = mode; |
1547 | sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? va.va_nlink > UINT16_MAX ? UINT16_MAX : (u_int16_t)va.va_nlink : 1; |
1548 | sb64->st_uid = va.va_uid; |
1549 | sb64->st_gid = va.va_gid; |
1550 | sb64->st_rdev = va.va_rdev; |
1551 | sb64->st_size = va.va_data_size; |
1552 | sb64->st_atimespec = va.va_access_time; |
1553 | sb64->st_mtimespec = va.va_modify_time; |
1554 | sb64->st_ctimespec = va.va_change_time; |
1555 | if (VATTR_IS_SUPPORTED(&va, va_create_time)) { |
1556 | sb64->st_birthtimespec = va.va_create_time; |
1557 | } else { |
1558 | sb64->st_birthtimespec.tv_sec = sb64->st_birthtimespec.tv_nsec = 0; |
1559 | } |
1560 | sb64->st_blksize = va.va_iosize; |
1561 | sb64->st_flags = va.va_flags; |
1562 | sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512; |
1563 | } else { |
1564 | sb->st_mode = mode; |
1565 | sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? va.va_nlink > UINT16_MAX ? UINT16_MAX : (u_int16_t)va.va_nlink : 1; |
1566 | sb->st_uid = va.va_uid; |
1567 | sb->st_gid = va.va_gid; |
1568 | sb->st_rdev = va.va_rdev; |
1569 | sb->st_size = va.va_data_size; |
1570 | sb->st_atimespec = va.va_access_time; |
1571 | sb->st_mtimespec = va.va_modify_time; |
1572 | sb->st_ctimespec = va.va_change_time; |
1573 | sb->st_blksize = va.va_iosize; |
1574 | sb->st_flags = va.va_flags; |
1575 | sb->st_blocks = roundup(va.va_total_alloc, 512) / 512; |
1576 | } |
1577 | |
1578 | /* if we're interested in extended security data and we got an ACL */ |
1579 | if (xsec != NULL) { |
1580 | if (!VATTR_IS_SUPPORTED(&va, va_acl) && |
1581 | !VATTR_IS_SUPPORTED(&va, va_uuuid) && |
1582 | !VATTR_IS_SUPPORTED(&va, va_guuid)) { |
1583 | *xsec = KAUTH_FILESEC_NONE; |
1584 | } else { |
1585 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { |
1586 | fsec = kauth_filesec_alloc(size: va.va_acl->acl_entrycount); |
1587 | } else { |
1588 | fsec = kauth_filesec_alloc(size: 0); |
1589 | } |
1590 | if (fsec == NULL) { |
1591 | error = ENOMEM; |
1592 | goto out; |
1593 | } |
1594 | fsec->fsec_magic = KAUTH_FILESEC_MAGIC; |
1595 | if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { |
1596 | fsec->fsec_owner = va.va_uuuid; |
1597 | } else { |
1598 | fsec->fsec_owner = kauth_null_guid; |
1599 | } |
1600 | if (VATTR_IS_SUPPORTED(&va, va_guuid)) { |
1601 | fsec->fsec_group = va.va_guuid; |
1602 | } else { |
1603 | fsec->fsec_group = kauth_null_guid; |
1604 | } |
1605 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { |
1606 | __nochk_bcopy(src: va.va_acl, dst: &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl)); |
1607 | } else { |
1608 | fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL; |
1609 | } |
1610 | *xsec = fsec; |
1611 | } |
1612 | } |
1613 | |
1614 | /* Do not give the generation number out to unpriviledged users */ |
1615 | if (va.va_gen && !vfs_context_issuser(ctx)) { |
1616 | if (isstat64 != 0) { |
1617 | sb64->st_gen = 0; |
1618 | } else { |
1619 | sb->st_gen = 0; |
1620 | } |
1621 | } else { |
1622 | if (isstat64 != 0) { |
1623 | sb64->st_gen = va.va_gen; |
1624 | } else { |
1625 | sb->st_gen = va.va_gen; |
1626 | } |
1627 | } |
1628 | |
1629 | error = 0; |
1630 | out: |
1631 | if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) { |
1632 | kauth_acl_free(fsp: va.va_acl); |
1633 | } |
1634 | return error; |
1635 | } |
1636 | |
1637 | int |
1638 | vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, int needsrealdev, vfs_context_t ctx) |
1639 | { |
1640 | int error; |
1641 | |
1642 | #if CONFIG_MACF |
1643 | error = mac_vnode_check_stat(ctx, NOCRED, vp); |
1644 | if (error) { |
1645 | return error; |
1646 | } |
1647 | #endif |
1648 | |
1649 | /* authorize */ |
1650 | if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0) { |
1651 | return error; |
1652 | } |
1653 | |
1654 | /* actual stat */ |
1655 | return vn_stat_noauth(vp, sbptr: sb, xsec, isstat64, needsrealdev, ctx, NOCRED); |
1656 | } |
1657 | |
1658 | |
1659 | /* |
1660 | * File table vnode ioctl routine. |
1661 | */ |
1662 | static int |
1663 | vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) |
1664 | { |
1665 | struct vnode *vp = (struct vnode *)fp_get_data(fp); |
1666 | off_t file_size; |
1667 | int error; |
1668 | struct vnode *ttyvp; |
1669 | |
1670 | if ((error = vnode_getwithref(vp))) { |
1671 | return error; |
1672 | } |
1673 | |
1674 | #if CONFIG_MACF |
1675 | error = mac_vnode_check_ioctl(ctx, vp, cmd: com); |
1676 | if (error) { |
1677 | goto out; |
1678 | } |
1679 | #endif |
1680 | |
1681 | switch (vp->v_type) { |
1682 | case VREG: |
1683 | case VDIR: |
1684 | if (com == FIONREAD) { |
1685 | off_t temp_nbytes; |
1686 | if ((error = vnode_size(vp, &file_size, ctx)) != 0) { |
1687 | goto out; |
1688 | } |
1689 | temp_nbytes = file_size - fp->fp_glob->fg_offset; |
1690 | if (temp_nbytes > INT_MAX) { |
1691 | *(int *)data = INT_MAX; |
1692 | } else if (temp_nbytes < 0) { |
1693 | *(int *)data = 0; |
1694 | } else { |
1695 | *(int *)data = (int)temp_nbytes; |
1696 | } |
1697 | goto out; |
1698 | } |
1699 | if (com == FIONBIO || com == FIOASYNC) { /* XXX */ |
1700 | goto out; |
1701 | } |
1702 | OS_FALLTHROUGH; |
1703 | |
1704 | default: |
1705 | error = ENOTTY; |
1706 | goto out; |
1707 | |
1708 | case VFIFO: |
1709 | case VCHR: |
1710 | case VBLK: |
1711 | |
1712 | if (com == TIOCREVOKE || com == TIOCREVOKECLEAR) { |
1713 | error = ENOTTY; |
1714 | goto out; |
1715 | } |
1716 | |
1717 | /* Should not be able to set block size from user space */ |
1718 | if (com == DKIOCSETBLOCKSIZE) { |
1719 | error = EPERM; |
1720 | goto out; |
1721 | } |
1722 | |
1723 | /* Should not be able to check if filesystem is authenticated from user space */ |
1724 | if (com == FSIOC_AUTH_FS) { |
1725 | error = ENOTTY; |
1726 | goto out; |
1727 | } |
1728 | |
1729 | if (com == FIODTYPE) { |
1730 | if (vp->v_type == VBLK) { |
1731 | if (major(vp->v_rdev) >= nblkdev) { |
1732 | error = ENXIO; |
1733 | goto out; |
1734 | } |
1735 | *(int *)data = bdevsw[major(vp->v_rdev)].d_type; |
1736 | } else if (vp->v_type == VCHR) { |
1737 | if (major(vp->v_rdev) >= nchrdev) { |
1738 | error = ENXIO; |
1739 | goto out; |
1740 | } |
1741 | *(int *)data = cdevsw[major(vp->v_rdev)].d_type; |
1742 | } else { |
1743 | error = ENOTTY; |
1744 | goto out; |
1745 | } |
1746 | goto out; |
1747 | } |
1748 | error = VNOP_IOCTL(vp, command: com, data, fflag: fp->fp_glob->fg_flag, ctx); |
1749 | |
1750 | if (error == 0 && com == TIOCSCTTY) { |
1751 | struct session *sessp; |
1752 | struct pgrp *pg; |
1753 | |
1754 | pg = proc_pgrp(vfs_context_proc(ctx), &sessp); |
1755 | |
1756 | session_lock(sess: sessp); |
1757 | ttyvp = sessp->s_ttyvp; |
1758 | sessp->s_ttyvp = vp; |
1759 | sessp->s_ttyvid = vnode_vid(vp); |
1760 | session_unlock(sess: sessp); |
1761 | |
1762 | pgrp_rele(pgrp: pg); |
1763 | } |
1764 | } |
1765 | out: |
1766 | (void)vnode_put(vp); |
1767 | return error; |
1768 | } |
1769 | |
1770 | /* |
1771 | * File table vnode select routine. |
1772 | */ |
1773 | static int |
1774 | vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) |
1775 | { |
1776 | int error; |
1777 | struct vnode * vp = (struct vnode *)fp_get_data(fp); |
1778 | struct vfs_context context; |
1779 | |
1780 | if ((error = vnode_getwithref(vp)) == 0) { |
1781 | context.vc_thread = current_thread(); |
1782 | context.vc_ucred = fp->fp_glob->fg_cred; |
1783 | |
1784 | #if CONFIG_MACF |
1785 | /* |
1786 | * XXX We should use a per thread credential here; minimally, |
1787 | * XXX the process credential should have a persistent |
1788 | * XXX reference on it before being passed in here. |
1789 | */ |
1790 | error = mac_vnode_check_select(ctx, vp, which); |
1791 | if (error == 0) |
1792 | #endif |
1793 | error = VNOP_SELECT(vp, which, fp->fp_glob->fg_flag, wql, ctx); |
1794 | |
1795 | (void)vnode_put(vp); |
1796 | } |
1797 | return error; |
1798 | } |
1799 | |
1800 | /* |
1801 | * File table vnode close routine. |
1802 | */ |
1803 | static int |
1804 | vn_closefile(struct fileglob *fg, vfs_context_t ctx) |
1805 | { |
1806 | struct vnode *vp = fg_get_data(fg); |
1807 | int error; |
1808 | |
1809 | if ((error = vnode_getwithref(vp)) == 0) { |
1810 | if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE && |
1811 | ((fg->fg_flag & FWASLOCKED) != 0 || |
1812 | (fg->fg_lflags & FG_HAS_OFDLOCK) != 0)) { |
1813 | struct flock lf = { |
1814 | .l_whence = SEEK_SET, |
1815 | .l_start = 0, |
1816 | .l_len = 0, |
1817 | .l_type = F_UNLCK |
1818 | }; |
1819 | |
1820 | if ((fg->fg_flag & FWASLOCKED) != 0) { |
1821 | (void) VNOP_ADVLOCK(vp, (caddr_t)fg, |
1822 | F_UNLCK, &lf, F_FLOCK, ctx, NULL); |
1823 | } |
1824 | |
1825 | if ((fg->fg_lflags & FG_HAS_OFDLOCK) != 0) { |
1826 | (void) VNOP_ADVLOCK(vp, ofd_to_id(fg), |
1827 | F_UNLCK, &lf, F_OFD_LOCK, ctx, NULL); |
1828 | } |
1829 | } |
1830 | #if CONFIG_FILE_LEASES |
1831 | if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE && !LIST_EMPTY(&vp->v_leases)) { |
1832 | /* Expected open count doesn't matter for release. */ |
1833 | (void)vnode_setlease(vp, fg, F_UNLCK, expcounts: 0, ctx); |
1834 | } |
1835 | #endif |
1836 | error = vn_close(vp, flags: fg->fg_flag, ctx); |
1837 | (void) vnode_put(vp); |
1838 | } |
1839 | return error; |
1840 | } |
1841 | |
1842 | /* |
1843 | * Returns: 0 Success |
1844 | * VNOP_PATHCONF:??? |
1845 | */ |
1846 | int |
1847 | vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx) |
1848 | { |
1849 | int error = 0; |
1850 | struct vfs_attr vfa; |
1851 | |
1852 | switch (name) { |
1853 | case _PC_EXTENDED_SECURITY_NP: |
1854 | *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0; |
1855 | break; |
1856 | case _PC_AUTH_OPAQUE_NP: |
1857 | *retval = vfs_authopaque(mp: vnode_mount(vp)); |
1858 | break; |
1859 | case _PC_2_SYMLINKS: |
1860 | *retval = 1; /* XXX NOTSUP on MSDOS, etc. */ |
1861 | break; |
1862 | case _PC_ALLOC_SIZE_MIN: |
1863 | *retval = 1; /* XXX lie: 1 byte */ |
1864 | break; |
1865 | case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */ |
1866 | *retval = 1; /* [AIO] option is supported */ |
1867 | break; |
1868 | case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */ |
1869 | *retval = 0; /* [PIO] option is not supported */ |
1870 | break; |
1871 | case _PC_REC_INCR_XFER_SIZE: |
1872 | *retval = 4096; /* XXX go from MIN to MAX 4K at a time */ |
1873 | break; |
1874 | case _PC_REC_MIN_XFER_SIZE: |
1875 | *retval = 4096; /* XXX recommend 4K minimum reads/writes */ |
1876 | break; |
1877 | case _PC_REC_MAX_XFER_SIZE: |
1878 | *retval = 65536; /* XXX recommend 64K maximum reads/writes */ |
1879 | break; |
1880 | case _PC_REC_XFER_ALIGN: |
1881 | *retval = 4096; /* XXX recommend page aligned buffers */ |
1882 | break; |
1883 | case _PC_SYMLINK_MAX: |
1884 | *retval = 255; /* Minimum acceptable POSIX value */ |
1885 | break; |
1886 | case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */ |
1887 | *retval = 0; /* [SIO] option is not supported */ |
1888 | break; |
1889 | case _PC_XATTR_SIZE_BITS: |
1890 | /* The number of bits used to store maximum extended |
1891 | * attribute size in bytes. For example, if the maximum |
1892 | * attribute size supported by a file system is 128K, the |
1893 | * value returned will be 18. However a value 18 can mean |
1894 | * that the maximum attribute size can be anywhere from |
1895 | * (256KB - 1) to 128KB. As a special case, the resource |
1896 | * fork can have much larger size, and some file system |
1897 | * specific extended attributes can have smaller and preset |
1898 | * size; for example, Finder Info is always 32 bytes. |
1899 | */ |
1900 | memset(s: &vfa, c: 0, n: sizeof(vfa)); |
1901 | VFSATTR_INIT(&vfa); |
1902 | VFSATTR_WANTED(&vfa, f_capabilities); |
1903 | if (vfs_getattr(mp: vnode_mount(vp), vfa: &vfa, ctx) == 0 && |
1904 | (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) && |
1905 | (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && |
1906 | (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { |
1907 | /* Supports native extended attributes */ |
1908 | error = VNOP_PATHCONF(vp, name, retval, ctx); |
1909 | } else { |
1910 | /* Number of bits used to represent the maximum size of |
1911 | * extended attribute stored in an Apple Double file. |
1912 | */ |
1913 | *retval = AD_XATTR_SIZE_BITS; |
1914 | } |
1915 | break; |
1916 | default: |
1917 | error = VNOP_PATHCONF(vp, name, retval, ctx); |
1918 | break; |
1919 | } |
1920 | |
1921 | return error; |
1922 | } |
1923 | |
1924 | static int |
1925 | vn_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev) |
1926 | { |
1927 | vfs_context_t ctx = vfs_context_current(); |
1928 | struct vnode *vp; |
1929 | int error = 0; |
1930 | int result = 0; |
1931 | |
1932 | vp = (struct vnode *)fp_get_data(fp); |
1933 | |
1934 | /* |
1935 | * Don't attach a knote to a dead vnode. |
1936 | */ |
1937 | if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) { |
1938 | switch (kn->kn_filter) { |
1939 | case EVFILT_READ: |
1940 | case EVFILT_WRITE: |
1941 | if (vnode_isfifo(vp)) { |
1942 | /* We'll only watch FIFOs that use our fifofs */ |
1943 | if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) { |
1944 | error = ENOTSUP; |
1945 | } |
1946 | } else if (!vnode_isreg(vp)) { |
1947 | if (vnode_ischr(vp)) { |
1948 | result = spec_kqfilter(vp, kn, kev); |
1949 | if ((kn->kn_flags & EV_ERROR) == 0) { |
1950 | /* claimed by a special device */ |
1951 | vnode_put(vp); |
1952 | return result; |
1953 | } |
1954 | } |
1955 | error = EINVAL; |
1956 | } |
1957 | break; |
1958 | case EVFILT_VNODE: |
1959 | break; |
1960 | default: |
1961 | error = EINVAL; |
1962 | } |
1963 | |
1964 | if (error == 0) { |
1965 | #if CONFIG_MACF |
1966 | error = mac_vnode_check_kqfilter(ctx, file_cred: fp->fp_glob->fg_cred, kn, vp); |
1967 | if (error) { |
1968 | vnode_put(vp); |
1969 | goto out; |
1970 | } |
1971 | #endif |
1972 | |
1973 | kn->kn_filtid = EVFILTID_VN; |
1974 | knote_kn_hook_set_raw(kn, kn_hook: (void *)vp); |
1975 | vnode_hold(vp); |
1976 | |
1977 | vnode_lock(vp); |
1978 | KNOTE_ATTACH(&vp->v_knotes, kn); |
1979 | result = filt_vnode_common(kn, NULL, vp, hint: 0); |
1980 | vnode_unlock(vp); |
1981 | |
1982 | /* |
1983 | * Ask the filesystem to provide remove notifications, |
1984 | * but ignore failure |
1985 | */ |
1986 | VNOP_MONITOR(vp, events: 0, VNODE_MONITOR_BEGIN, handle: (void*) kn, ctx); |
1987 | } |
1988 | |
1989 | vnode_put(vp); |
1990 | } |
1991 | |
1992 | out: |
1993 | if (error) { |
1994 | knote_set_error(kn, error); |
1995 | } |
1996 | |
1997 | return result; |
1998 | } |
1999 | |
2000 | static void |
2001 | filt_vndetach(struct knote *kn) |
2002 | { |
2003 | vfs_context_t ctx = vfs_context_current(); |
2004 | struct vnode *vp = (struct vnode *)knote_kn_hook_get_raw(kn); |
2005 | uint32_t vid = vnode_vid(vp); |
2006 | if (vnode_getwithvid(vp, vid)) { |
2007 | vnode_drop(vp); |
2008 | return; |
2009 | } |
2010 | vnode_drop(vp); |
2011 | |
2012 | vnode_lock(vp); |
2013 | KNOTE_DETACH(&vp->v_knotes, kn); |
2014 | vnode_unlock(vp); |
2015 | |
2016 | /* |
2017 | * Tell a (generally networked) filesystem that we're no longer watching |
2018 | * If the FS wants to track contexts, it should still be using the one from |
2019 | * the VNODE_MONITOR_BEGIN. |
2020 | */ |
2021 | VNOP_MONITOR(vp, events: 0, VNODE_MONITOR_END, handle: (void*)kn, ctx); |
2022 | vnode_put(vp); |
2023 | } |
2024 | |
2025 | |
2026 | /* |
2027 | * Used for EVFILT_READ |
2028 | * |
2029 | * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case |
2030 | * differently than the regular case for VREG files. If not in poll(), |
2031 | * then we need to know current fileproc offset for VREG. |
2032 | */ |
2033 | static int64_t |
2034 | vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll) |
2035 | { |
2036 | if (vnode_isfifo(vp)) { |
2037 | #if FIFO |
2038 | int cnt; |
2039 | int err = fifo_charcount(vp, count: &cnt); |
2040 | if (err == 0) { |
2041 | return (int64_t)cnt; |
2042 | } else |
2043 | #endif |
2044 | { |
2045 | return 0; |
2046 | } |
2047 | } else if (vnode_isreg(vp)) { |
2048 | if (ispoll) { |
2049 | return 1; |
2050 | } |
2051 | |
2052 | off_t amount; |
2053 | amount = vp->v_un.vu_ubcinfo->ui_size - current_offset; |
2054 | if (amount > INT64_MAX) { |
2055 | return INT64_MAX; |
2056 | } else if (amount < INT64_MIN) { |
2057 | return INT64_MIN; |
2058 | } else { |
2059 | return (int64_t)amount; |
2060 | } |
2061 | } else { |
2062 | panic("Should never have an EVFILT_READ except for reg or fifo." ); |
2063 | return 0; |
2064 | } |
2065 | } |
2066 | |
2067 | /* |
2068 | * Used for EVFILT_WRITE. |
2069 | * |
2070 | * For regular vnodes, we can always write (1). For named pipes, |
2071 | * see how much space there is in the buffer. Nothing else is covered. |
2072 | */ |
2073 | static intptr_t |
2074 | vnode_writable_space_count(vnode_t vp) |
2075 | { |
2076 | if (vnode_isfifo(vp)) { |
2077 | #if FIFO |
2078 | long spc; |
2079 | int err = fifo_freespace(vp, count: &spc); |
2080 | if (err == 0) { |
2081 | return (intptr_t)spc; |
2082 | } else |
2083 | #endif |
2084 | { |
2085 | return (intptr_t)0; |
2086 | } |
2087 | } else if (vnode_isreg(vp)) { |
2088 | return (intptr_t)1; |
2089 | } else { |
2090 | panic("Should never have an EVFILT_READ except for reg or fifo." ); |
2091 | return 0; |
2092 | } |
2093 | } |
2094 | |
2095 | /* |
2096 | * Determine whether this knote should be active |
2097 | * |
2098 | * This is kind of subtle. |
2099 | * --First, notice if the vnode has been revoked: in so, override hint |
2100 | * --EVFILT_READ knotes are checked no matter what the hint is |
2101 | * --Other knotes activate based on hint. |
2102 | * --If hint is revoke, set special flags and activate |
2103 | */ |
2104 | static int |
2105 | filt_vnode_common(struct knote *kn, struct kevent_qos_s *kev, vnode_t vp, long hint) |
2106 | { |
2107 | int activate = 0; |
2108 | int64_t data = 0; |
2109 | |
2110 | lck_mtx_assert(lck: &vp->v_lock, LCK_MTX_ASSERT_OWNED); |
2111 | |
2112 | /* Special handling for vnodes that are in recycle or already gone */ |
2113 | if (NOTE_REVOKE == hint) { |
2114 | kn->kn_flags |= (EV_EOF | EV_ONESHOT); |
2115 | activate = 1; |
2116 | |
2117 | if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) { |
2118 | kn->kn_fflags |= NOTE_REVOKE; |
2119 | } |
2120 | } else { |
2121 | switch (kn->kn_filter) { |
2122 | case EVFILT_READ: |
2123 | data = vnode_readable_data_count(vp, current_offset: kn->kn_fp->fp_glob->fg_offset, ispoll: (kn->kn_flags & EV_POLL)); |
2124 | activate = (data != 0); |
2125 | break; |
2126 | case EVFILT_WRITE: |
2127 | data = vnode_writable_space_count(vp); |
2128 | activate = (data != 0); |
2129 | break; |
2130 | case EVFILT_VNODE: |
2131 | /* Check events this note matches against the hint */ |
2132 | if (kn->kn_sfflags & hint) { |
2133 | kn->kn_fflags |= (uint32_t)hint; /* Set which event occurred */ |
2134 | } |
2135 | activate = (kn->kn_fflags != 0); |
2136 | break; |
2137 | default: |
2138 | panic("Invalid knote filter on a vnode!" ); |
2139 | } |
2140 | } |
2141 | |
2142 | if (kev && activate) { |
2143 | knote_fill_kevent(kn, kev, data); |
2144 | } |
2145 | |
2146 | return activate; |
2147 | } |
2148 | |
2149 | static int |
2150 | filt_vnode(struct knote *kn, long hint) |
2151 | { |
2152 | vnode_t vp = (struct vnode *)knote_kn_hook_get_raw(kn); |
2153 | |
2154 | return filt_vnode_common(kn, NULL, vp, hint); |
2155 | } |
2156 | |
2157 | static int |
2158 | filt_vntouch(struct knote *kn, struct kevent_qos_s *kev) |
2159 | { |
2160 | vnode_t vp = (struct vnode *)knote_kn_hook_get_raw(kn); |
2161 | uint32_t vid = vnode_vid(vp); |
2162 | int activate; |
2163 | int hint = 0; |
2164 | |
2165 | vnode_lock(vp); |
2166 | if (vnode_getiocount(vp, vid, VNODE_NODEAD | VNODE_WITHID) != 0) { |
2167 | /* is recycled */ |
2168 | hint = NOTE_REVOKE; |
2169 | } |
2170 | |
2171 | /* accept new input fflags mask */ |
2172 | kn->kn_sfflags = kev->fflags; |
2173 | |
2174 | activate = filt_vnode_common(kn, NULL, vp, hint); |
2175 | |
2176 | if (hint == 0) { |
2177 | vnode_put_locked(vp); |
2178 | } |
2179 | vnode_unlock(vp); |
2180 | |
2181 | return activate; |
2182 | } |
2183 | |
2184 | static int |
2185 | filt_vnprocess(struct knote *kn, struct kevent_qos_s *kev) |
2186 | { |
2187 | vnode_t vp = (struct vnode *)knote_kn_hook_get_raw(kn); |
2188 | uint32_t vid = vnode_vid(vp); |
2189 | int activate; |
2190 | int hint = 0; |
2191 | |
2192 | vnode_lock(vp); |
2193 | if (vnode_getiocount(vp, vid, VNODE_NODEAD | VNODE_WITHID) != 0) { |
2194 | /* Is recycled */ |
2195 | hint = NOTE_REVOKE; |
2196 | } |
2197 | activate = filt_vnode_common(kn, kev, vp, hint); |
2198 | |
2199 | /* Definitely need to unlock, may need to put */ |
2200 | if (hint == 0) { |
2201 | vnode_put_locked(vp); |
2202 | } |
2203 | vnode_unlock(vp); |
2204 | |
2205 | return activate; |
2206 | } |
2207 | |
2208 | struct vniodesc { |
2209 | vnode_t vnio_vnode; /* associated vnode */ |
2210 | int vnio_fflags; /* cached fileglob flags */ |
2211 | }; |
2212 | |
2213 | errno_t |
2214 | vnio_openfd(int fd, vniodesc_t *vniop) |
2215 | { |
2216 | proc_t p = current_proc(); |
2217 | struct fileproc *fp = NULL; |
2218 | vniodesc_t vnio; |
2219 | vnode_t vp; |
2220 | int error; |
2221 | bool need_drop = false; |
2222 | |
2223 | vnio = kalloc_type(struct vniodesc, Z_WAITOK); |
2224 | |
2225 | error = fp_getfvp(p, fd, resultfp: &fp, resultvp: &vp); |
2226 | if (error) { |
2227 | goto out; |
2228 | } |
2229 | need_drop = true; |
2230 | |
2231 | if ((fp->fp_glob->fg_flag & (FREAD | FWRITE)) == 0) { |
2232 | error = EBADF; |
2233 | goto out; |
2234 | } |
2235 | |
2236 | if (vnode_isswap(vp)) { |
2237 | error = EPERM; |
2238 | goto out; |
2239 | } |
2240 | |
2241 | if (vp->v_type != VREG) { |
2242 | error = EFTYPE; |
2243 | goto out; |
2244 | } |
2245 | |
2246 | error = vnode_getwithref(vp); |
2247 | if (error) { |
2248 | goto out; |
2249 | } |
2250 | |
2251 | vnio->vnio_vnode = vp; |
2252 | vnio->vnio_fflags = fp->fp_glob->fg_flag; |
2253 | |
2254 | (void)fp_drop(p, fd, fp, locked: 0); |
2255 | need_drop = false; |
2256 | |
2257 | error = vnode_ref_ext(vp, vnio->vnio_fflags, 0); |
2258 | if (error == 0) { |
2259 | *vniop = vnio; |
2260 | vnio = NULL; |
2261 | } |
2262 | |
2263 | vnode_put(vp); |
2264 | |
2265 | out: |
2266 | if (need_drop) { |
2267 | fp_drop(p, fd, fp, locked: 0); |
2268 | } |
2269 | if (vnio != NULL) { |
2270 | kfree_type(struct vniodesc, vnio); |
2271 | } |
2272 | return error; |
2273 | } |
2274 | |
2275 | errno_t |
2276 | vnio_close(vniodesc_t vnio) |
2277 | { |
2278 | int error; |
2279 | |
2280 | /* |
2281 | * The vniodesc is always destroyed, because the close |
2282 | * always "succeeds". We just return whatever error |
2283 | * might have been encountered while doing so. |
2284 | */ |
2285 | |
2286 | error = vnode_getwithref(vp: vnio->vnio_vnode); |
2287 | if (error == 0) { |
2288 | error = vnode_close(vp: vnio->vnio_vnode, flags: vnio->vnio_fflags, NULL); |
2289 | } |
2290 | |
2291 | kfree_type(struct vniodesc, vnio); |
2292 | |
2293 | return error; |
2294 | } |
2295 | |
2296 | errno_t |
2297 | vnio_read(vniodesc_t vnio, uio_t uio) |
2298 | { |
2299 | vnode_t vp = vnio->vnio_vnode; |
2300 | user_ssize_t read_len; |
2301 | int error; |
2302 | |
2303 | read_len = uio_resid(a_uio: uio); |
2304 | if (read_len < 0 || read_len > INT_MAX) { |
2305 | return EINVAL; |
2306 | } |
2307 | |
2308 | error = vnode_getwithref(vp); |
2309 | if (error == 0) { |
2310 | error = vn_read_common(vp, uio, fflag: vnio->vnio_fflags, |
2311 | ctx: vfs_context_current()); |
2312 | vnode_put(vp); |
2313 | } |
2314 | |
2315 | return error; |
2316 | } |
2317 | |
2318 | vnode_t |
2319 | vnio_vnode(vniodesc_t vnio) |
2320 | { |
2321 | return vnio->vnio_vnode; |
2322 | } |
2323 | |
2324 | int |
2325 | vnode_isauthfs(vnode_t vp) |
2326 | { |
2327 | fsioc_auth_fs_t afs = { .authvp = NULL }; |
2328 | int error; |
2329 | |
2330 | error = VNOP_IOCTL(vp, FSIOC_AUTH_FS, data: (caddr_t)&afs, fflag: 0, |
2331 | ctx: vfs_context_current()); |
2332 | |
2333 | return error == 0 ? 1 : 0; |
2334 | } |
2335 | |