1/*
2 * Copyright (c) 2022-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <string.h>
30#include <sys/fcntl.h>
31#include <sys/fsctl.h>
32#include <sys/namei.h>
33#include <sys/stat.h>
34#include <sys/vnode.h>
35#include <sys/vnode_internal.h>
36#include <sys/uio_internal.h>
37#include <sys/fsevents.h>
38#include <kern/kalloc.h>
39#include <vfs/vfs_exclave_fs.h>
40#include <miscfs/devfs/devfs.h>
41#include <pexpert/pexpert.h>
42
43__private_extern__ int unlink1(vfs_context_t, vnode_t, user_addr_t,
44 enum uio_seg, int);
45
46struct open_vnode {
47 LIST_ENTRY(open_vnode) chain;
48 vnode_t vp;
49 dev_t dev;
50 uint64_t file_id;
51 uint32_t open_count;
52};
53
54#define ROOT_DIR_INO_NUM 2
55
56#define VFS_EXCLAVE_FS_BASE_DIR_GRAFT 1
57#define VFS_EXCLAVE_FS_BASE_DIR_SEALED 2
58
59typedef struct {
60 uint32_t flags;
61 vnode_t vp;
62 dev_t dev;
63 fsioc_graft_info_t graft_info;
64} base_dir_t;
65
66/* hash table that maps from file_id to a vnode and its open count */
67typedef LIST_HEAD(open_vnode_head, open_vnode) open_vnodes_list_head_t;
68static open_vnodes_list_head_t *open_vnodes_hashtbl = NULL;
69static u_long open_vnodes_hashmask = 0;
70static int open_vnodes_hashsize = 0;
71static uint32_t num_open_vnodes = 0;
72
73/* registered base directories */
74static base_dir_t base_dirs[EFT_FS_NUM_TAGS] = {0};
75static uint32_t num_base_dirs = 0;
76
77static LCK_GRP_DECLARE(vfs_exclave_lck_grp, "vfs_exclave");
78
79/* protects base_dirs */
80static lck_mtx_t base_dirs_mtx;
81
82/* protects open vnodes hash table */
83static lck_mtx_t open_vnodes_mtx;
84
85#define HASHFUNC(dev, file_id) (((dev) + (file_id)) & open_vnodes_hashmask)
86#define OPEN_VNODES_HASH(dev, file_id) (&open_vnodes_hashtbl[HASHFUNC(dev, file_id)])
87
88#if (DEVELOPMENT || DEBUG)
89static bool integrity_checks_disabled = false;
90#define EXCLAVE_INTEGRITY_CHECKS_DISABLED_BOOTARG "disable_integrity_checks"
91#endif
92
93static int exclave_fs_open_internal(uint32_t fs_tag, uint64_t root_id, const char *path,
94 int flags, uint64_t *file_id);
95
96/*
97 * Get the fsid and fileid attributes of the given vnode.
98 */
99static int
100get_vnode_info(vnode_t vp, dev_t *dev, fsid_t *fsid, uint64_t *file_id)
101{
102 struct vnode_attr va;
103 int error;
104
105 memset(s: &va, c: 0, n: sizeof(va));
106 VATTR_INIT(&va);
107 if (dev) {
108 VATTR_WANTED(&va, va_fsid);
109 }
110 if (fsid) {
111 VATTR_WANTED(&va, va_fsid64);
112 }
113 if (file_id) {
114 VATTR_WANTED(&va, va_fileid);
115 }
116
117 error = vnode_getattr(vp, vap: &va, ctx: vfs_context_kernel());
118 if (error) {
119 return error;
120 }
121
122 if (dev) {
123 if (!VATTR_IS_SUPPORTED(&va, va_fsid)) {
124 return ENOTSUP;
125 }
126 *dev = va.va_fsid;
127 }
128
129 if (fsid) {
130 if (!VATTR_IS_SUPPORTED(&va, va_fsid64)) {
131 return ENOTSUP;
132 }
133 *fsid = va.va_fsid64;
134 }
135
136 if (file_id) {
137 if (!VATTR_IS_SUPPORTED(&va, va_fileid)) {
138 return ENOTSUP;
139 }
140 *file_id = va.va_fileid;
141 }
142
143 return 0;
144}
145
146static inline bool
147is_graft(base_dir_t *base_dir)
148{
149 return base_dir->flags & VFS_EXCLAVE_FS_BASE_DIR_GRAFT;
150}
151
152static inline bool
153is_sealed(base_dir_t *base_dir)
154{
155 return base_dir->flags & VFS_EXCLAVE_FS_BASE_DIR_SEALED;
156}
157
158static int
159graft_to_host_inum(fsioc_graft_info_t *gi, uint64_t graft_inum, uint64_t *host_inum)
160{
161 if (graft_inum == ROOT_DIR_INO_NUM) {
162 *host_inum = gi->gi_graft_dir;
163 } else if (graft_inum < gi->gi_inum_len) {
164 *host_inum = gi->gi_inum_base + graft_inum;
165 } else {
166 return ERANGE;
167 }
168
169 return 0;
170}
171
172static int
173host_to_graft_inum(fsioc_graft_info_t *gi, uint64_t host_inum, uint64_t *graft_inum)
174{
175 if (host_inum == gi->gi_graft_dir) {
176 *graft_inum = ROOT_DIR_INO_NUM;
177 } else if ((host_inum >= gi->gi_inum_base) && (host_inum < gi->gi_inum_base + gi->gi_inum_len)) {
178 *graft_inum = host_inum - gi->gi_inum_base;
179 } else {
180 return ERANGE;
181 }
182
183 return 0;
184}
185
186/*
187 * Check if a vnode is in an APFS graft and if so obtain information about the graft.
188 */
189static int
190get_graft_info(vnode_t vp, bool *is_graft, fsioc_graft_info_t *graft_info)
191{
192 fsioc_get_graft_info_t ggi = {0};
193 uint16_t alloc_count;
194 fsioc_graft_info_t *graft_infos = NULL;
195 int error = 0;
196
197 *is_graft = false;
198
199 error = VNOP_IOCTL(vp, FSIOC_GET_GRAFT_INFO, data: (caddr_t)&ggi, fflag: 0, ctx: vfs_context_kernel());
200 if (error) {
201 return error;
202 }
203
204 if (!ggi.ggi_is_in_graft) {
205 return 0;
206 }
207
208 if (ggi.ggi_count == 0) {
209 return EINVAL;
210 }
211
212 alloc_count = ggi.ggi_count;
213
214 graft_infos = kalloc_type(fsioc_graft_info_t, alloc_count, Z_WAITOK | Z_ZERO);
215 if (!graft_infos) {
216 return ENOMEM;
217 }
218
219 memset(s: &ggi, c: 0, n: sizeof(ggi));
220 ggi.ggi_count = alloc_count;
221 ggi.ggi_buffer = (user64_addr_t)graft_infos;
222
223 error = VNOP_IOCTL(vp, FSIOC_GET_GRAFT_INFO, data: (caddr_t)&ggi, fflag: 0, ctx: vfs_context_kernel());
224 if (error) {
225 goto out;
226 }
227
228 if (!ggi.ggi_is_in_graft) {
229 error = EAGAIN;
230 goto out;
231 }
232
233 if (ggi.ggi_graft_index >= alloc_count) {
234 error = ERANGE;
235 goto out;
236 }
237
238 *graft_info = graft_infos[ggi.ggi_graft_index];
239 *is_graft = true;
240
241out:
242 if (graft_infos) {
243 kfree_type(fsioc_graft_info_t, alloc_count, graft_infos);
244 }
245
246 return error;
247}
248
249/*
250 * Set a base directory for the given fs tag.
251 */
252static int
253set_base_dir(uint32_t fs_tag, vnode_t vp, fsioc_graft_info_t *graft_info, bool is_sealed)
254{
255 dev_t dev;
256 base_dir_t *base_dir;
257 int error = 0;
258
259 if (fs_tag >= EFT_FS_NUM_TAGS) {
260 return EINVAL;
261 }
262
263 lck_mtx_lock(lck: &base_dirs_mtx);
264
265 if (base_dirs[fs_tag].vp) {
266 if (base_dirs[fs_tag].vp == vp) {
267 error = EALREADY;
268 } else {
269 error = EBUSY;
270 }
271 goto out;
272 }
273
274 error = get_vnode_info(vp, dev: &dev, NULL, NULL);
275 if (error) {
276 goto out;
277 }
278
279 /*
280 * make sure that EFT_EXCLAVE does not share a dev_t with another fs,
281 * since EFT_EXCLAVE vnodes are opened RW whereas other fs vnodes
282 * are opened RO
283 */
284 if (fs_tag == EFT_EXCLAVE) {
285 int i;
286 for (i = 0; i < EFT_FS_NUM_TAGS; i++) {
287 if (!base_dirs[i].vp) {
288 continue;
289 }
290 if (base_dirs[i].dev == dev) {
291 error = EBUSY;
292 goto out;
293 }
294 }
295 } else if (base_dirs[EFT_EXCLAVE].vp && (base_dirs[EFT_EXCLAVE].dev == dev)) {
296 error = EBUSY;
297 goto out;
298 }
299
300 base_dir = &base_dirs[fs_tag];
301
302 if (graft_info) {
303 base_dir->flags |= VFS_EXCLAVE_FS_BASE_DIR_GRAFT;
304 if (is_sealed) {
305 base_dir->flags |= VFS_EXCLAVE_FS_BASE_DIR_SEALED;
306 }
307 base_dir->graft_info = *graft_info;
308 }
309
310 base_dir->vp = vp;
311 base_dir->dev = dev;
312
313 num_base_dirs++;
314
315out:
316 lck_mtx_unlock(lck: &base_dirs_mtx);
317 return error;
318}
319
320/*
321 * Get the base directory entry for the given fs tag. If vpp is passed, return
322 * with an iocount taken on the vnode.
323 */
324static int
325get_base_dir(uint32_t fs_tag, base_dir_t *base_dir, vnode_t *vpp)
326{
327 vnode_t base_vp;
328 int error = 0;
329
330 if (!base_dir && !vpp) {
331 return EINVAL;
332 }
333
334 if (fs_tag >= EFT_FS_NUM_TAGS) {
335 return EINVAL;
336 }
337
338 lck_mtx_lock(lck: &base_dirs_mtx);
339
340 base_vp = base_dirs[fs_tag].vp;
341
342 if (base_vp == NULLVP) {
343 error = ENOENT;
344 goto out;
345 }
346
347 if (vpp) {
348 error = vnode_getwithref(vp: base_vp);
349 if (error) {
350 goto out;
351 }
352 *vpp = base_vp;
353 }
354
355 if (base_dir) {
356 *base_dir = base_dirs[fs_tag];
357 }
358
359out:
360 lck_mtx_unlock(lck: &base_dirs_mtx);
361 return error;
362}
363
364int
365vfs_exclave_fs_start(void)
366{
367 lck_mtx_init(lck: &base_dirs_mtx, grp: &vfs_exclave_lck_grp, LCK_ATTR_NULL);
368 lck_mtx_init(lck: &open_vnodes_mtx, grp: &vfs_exclave_lck_grp, LCK_ATTR_NULL);
369
370 assert(open_vnodes_hashtbl == NULL);
371
372 open_vnodes_hashsize = desiredvnodes / 16;
373 open_vnodes_hashtbl = hashinit(count: open_vnodes_hashsize, M_VNODE, hashmask: &open_vnodes_hashmask);
374 if (open_vnodes_hashtbl == NULL) {
375 open_vnodes_hashsize = open_vnodes_hashmask = 0;
376 return ENOMEM;
377 }
378
379#if (DEVELOPMENT || DEBUG)
380 uint32_t bootarg_val;
381 if (PE_parse_boot_argn(EXCLAVE_INTEGRITY_CHECKS_DISABLED_BOOTARG, &bootarg_val, sizeof(bootarg_val))) {
382 if (bootarg_val) {
383 integrity_checks_disabled = true;
384 }
385 }
386#endif
387
388 return 0;
389}
390
391static bool
392exclave_fs_started(void)
393{
394 return open_vnodes_hashtbl != NULL;
395}
396
397void
398vfs_exclave_fs_stop(void)
399{
400 int i;
401
402 if (!exclave_fs_started()) {
403 return;
404 }
405
406 for (i = 0; i < EFT_FS_NUM_TAGS; i++) {
407 vfs_exclave_fs_unregister_tag(fs_tag: i);
408 }
409
410 assert(num_open_vnodes == 0);
411 assert(open_vnodes_hashtbl);
412
413 hashdestroy(open_vnodes_hashtbl, M_VNODE, hashmask: open_vnodes_hashmask);
414 open_vnodes_hashtbl = NULL;
415 open_vnodes_hashmask = open_vnodes_hashsize = 0;
416
417 lck_mtx_destroy(lck: &base_dirs_mtx, grp: &vfs_exclave_lck_grp);
418 lck_mtx_destroy(lck: &open_vnodes_mtx, grp: &vfs_exclave_lck_grp);
419
420#if (DEVELOPMENT || DEBUG)
421 integrity_checks_disabled = false;
422#endif
423}
424
425static bool
426is_fs_writeable(uint32_t fs_tag)
427{
428 return fs_tag == EFT_EXCLAVE;
429}
430
431int
432vfs_exclave_fs_register(uint32_t fs_tag, vnode_t vp)
433{
434 char vfs_name[MFSNAMELEN];
435 bool is_graft;
436 fsioc_graft_info_t graft_info;
437 int error;
438
439 if (!exclave_fs_started()) {
440 return ENXIO;
441 }
442
443 if (fs_tag >= EFT_FS_NUM_TAGS) {
444 return EINVAL;
445 }
446
447 vnode_vfsname(vp, buf: vfs_name);
448 if (strcmp(s1: vfs_name, s2: "apfs")) {
449 return ENOTSUP;
450 }
451
452 if (!vnode_isdir(vp)) {
453 return ENOTDIR;
454 }
455
456 error = get_graft_info(vp, is_graft: &is_graft, graft_info: &graft_info);
457 if (error) {
458 return error;
459 }
460
461 if (is_graft && is_fs_writeable(fs_tag)) {
462 return EROFS;
463 }
464
465 error = vnode_ref(vp);
466 if (error) {
467 return error;
468 }
469
470 // Check if tag is sealed, RW tags are always not sealed
471 bool is_sealed = false;
472 if (!is_fs_writeable(fs_tag)) {
473 error = VNOP_IOCTL(vp, FSIOC_EVAL_ROOTAUTH, NULL, fflag: 0, ctx: vfs_context_kernel());
474 if (!error) {
475 is_sealed = true;
476 }
477 }
478
479 error = set_base_dir(fs_tag, vp, graft_info: is_graft ? &graft_info : NULL, is_sealed);
480 if (error) {
481 vnode_rele(vp);
482 // if this directory is already registered in this tag do not consider it as an error
483 if (error == EALREADY) {
484 error = 0;
485 }
486 return error;
487 }
488
489 return 0;
490}
491
492int
493vfs_exclave_fs_register_path(uint32_t fs_tag, const char *base_path)
494{
495 struct nameidata nd;
496 int error;
497
498 if (!exclave_fs_started()) {
499 return ENXIO;
500 }
501
502 if (fs_tag >= EFT_FS_NUM_TAGS) {
503 return EINVAL;
504 }
505
506 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_SYSSPACE,
507 CAST_USER_ADDR_T(base_path), vfs_context_kernel());
508
509 error = namei(ndp: &nd);
510 if (error) {
511 return error;
512 }
513
514 error = vfs_exclave_fs_register(fs_tag, vp: nd.ni_vp);
515
516 vnode_put(vp: nd.ni_vp);
517 nameidone(&nd);
518
519 return error;
520}
521
522/*
523 * Release open vnodes for the given fs_tag.
524 * base_dirs_mtx and open_vnodes_mtx must be locked by caller.
525 */
526static void
527release_open_vnodes(uint32_t fs_tag)
528{
529 dev_t dev;
530 int i;
531
532 if (num_open_vnodes == 0) {
533 return;
534 }
535
536 dev = base_dirs[fs_tag].dev;
537
538 if (num_base_dirs > 1) {
539 /* skip release if another base dir has the same device */
540 for (i = 0; i < EFT_FS_NUM_TAGS; i++) {
541 if ((i != fs_tag) && base_dirs[i].vp
542 && (base_dirs[i].dev == dev)) {
543 return;
544 }
545 }
546 }
547
548 for (i = 0; i < open_vnodes_hashmask + 1; i++) {
549 struct open_vnode *entry, *temp_entry;
550
551 LIST_FOREACH_SAFE(entry, &open_vnodes_hashtbl[i], chain, temp_entry) {
552 if (entry->dev != dev) {
553 continue;
554 }
555 while (entry->open_count) {
556 vnode_rele(vp: entry->vp);
557 entry->open_count--;
558 }
559 LIST_REMOVE(entry, chain);
560 kfree_type(struct open_vnode, entry);
561 num_open_vnodes--;
562 }
563 }
564}
565
566static int
567vfs_exclave_fs_unregister_internal(uint32_t fs_tag, vnode_t vp)
568{
569 int error = 0;
570
571 if (!exclave_fs_started()) {
572 return ENXIO;
573 }
574
575 if (fs_tag >= EFT_FS_NUM_TAGS) {
576 return EINVAL;
577 }
578
579 lck_mtx_lock(lck: &base_dirs_mtx);
580
581 if (vp) {
582 for (fs_tag = 0; fs_tag < EFT_FS_NUM_TAGS; fs_tag++) {
583 if (base_dirs[fs_tag].vp == vp) {
584 break;
585 }
586 }
587 } else {
588 vp = base_dirs[fs_tag].vp;
589 }
590
591 if (!vp || (fs_tag == EFT_FS_NUM_TAGS)) {
592 lck_mtx_unlock(lck: &base_dirs_mtx);
593 return ENOENT;
594 }
595
596 lck_mtx_lock(lck: &open_vnodes_mtx);
597
598 release_open_vnodes(fs_tag);
599
600 vnode_rele(vp);
601 base_dirs[fs_tag].vp = NULL;
602 base_dirs[fs_tag].dev = 0;
603 memset(s: &base_dirs[fs_tag], c: 0, n: sizeof(base_dirs[fs_tag]));
604 num_base_dirs--;
605
606 lck_mtx_unlock(lck: &base_dirs_mtx);
607 lck_mtx_unlock(lck: &open_vnodes_mtx);
608 return error;
609}
610
611int
612vfs_exclave_fs_unregister(vnode_t vp)
613{
614 return vfs_exclave_fs_unregister_internal(fs_tag: 0, vp);
615}
616
617int
618vfs_exclave_fs_unregister_tag(uint32_t fs_tag)
619{
620 return vfs_exclave_fs_unregister_internal(fs_tag, NULLVP);
621}
622
623int
624vfs_exclave_fs_get_base_dirs(void *buf, uint32_t *count)
625{
626 int error = 0;
627 uint32_t i, num_copied = 0;
628 exclave_fs_base_dir_t *dirs = (exclave_fs_base_dir_t *)buf;
629
630 if (!count || (dirs && !*count)) {
631 return EINVAL;
632 }
633
634 lck_mtx_lock(lck: &base_dirs_mtx);
635
636 if (!dirs) {
637 *count = num_base_dirs;
638 goto out;
639 } else if (*count < num_base_dirs) {
640 error = ENOSPC;
641 goto out;
642 }
643
644 for (i = 0; (i < EFT_FS_NUM_TAGS) && (num_copied < num_base_dirs); i++) {
645 base_dir_t *base_dir = &base_dirs[i];
646 exclave_fs_base_dir_t *out_dir = &dirs[num_copied];
647
648 if (base_dir->vp == NULLVP) {
649 continue;
650 }
651
652 memset(s: out_dir, c: 0, n: sizeof(exclave_fs_base_dir_t));
653
654 error = get_vnode_info(vp: base_dir->vp, NULL, fsid: &out_dir->fsid, file_id: &out_dir->base_dir);
655 if (error) {
656 goto out;
657 }
658
659 out_dir->fs_tag = i;
660 out_dir->graft_file = is_graft(base_dir) ? base_dir->graft_info.gi_graft_file : 0;
661 num_copied++;
662 }
663
664 *count = num_copied;
665
666out:
667 lck_mtx_unlock(lck: &base_dirs_mtx);
668 return error;
669}
670
671static int
672create_exclave_dir(vnode_t base_vp, const char *exclave_id)
673{
674 vnode_t vp = NULLVP, dvp = NULLVP;
675 vfs_context_t ctx;
676 struct vnode_attr va, *vap = &va;
677 struct nameidata nd;
678 int update_flags = 0;
679 int error;
680
681 ctx = vfs_context_kernel();
682
683 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_SYSSPACE,
684 CAST_USER_ADDR_T(exclave_id), ctx);
685 nd.ni_cnd.cn_flags |= WILLBEDIR;
686
687continue_lookup:
688 nd.ni_dvp = base_vp;
689 nd.ni_cnd.cn_flags |= USEDVP;
690
691 error = namei(ndp: &nd);
692 if (error) {
693 return error;
694 }
695
696 dvp = nd.ni_dvp;
697 vp = nd.ni_vp;
698
699 if (vp != NULLVP) {
700 error = EEXIST;
701 goto out;
702 }
703
704 nd.ni_cnd.cn_flags &= ~USEDVP;
705
706 VATTR_INIT(vap);
707 VATTR_SET(vap, va_mode, S_IRWXU | S_IRWXG);
708 VATTR_SET(vap, va_type, VDIR);
709
710 error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL);
711 if (error) {
712 goto out;
713 }
714
715 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
716 if (error == EKEEPLOOKING) {
717 nd.ni_vp = vp;
718 goto continue_lookup;
719 }
720
721 if (error) {
722 goto out;
723 }
724
725 if (vp->v_name == NULL) {
726 update_flags |= VNODE_UPDATE_NAME;
727 }
728 if (vp->v_parent == NULLVP) {
729 update_flags |= VNODE_UPDATE_PARENT;
730 }
731
732 if (update_flags) {
733 vnode_update_identity(vp, dvp, name: nd.ni_cnd.cn_nameptr,
734 name_len: nd.ni_cnd.cn_namelen, name_hashval: nd.ni_cnd.cn_hash, flags: update_flags);
735 }
736
737out:
738 nameidone(&nd);
739 if (vp) {
740 vnode_put(vp);
741 }
742 if (dvp) {
743 vnode_put(vp: dvp);
744 }
745
746 return error;
747}
748
749int
750vfs_exclave_fs_root(const char *exclave_id, uint64_t *root_id)
751{
752 int error;
753
754 if (!exclave_fs_started()) {
755 return ENXIO;
756 }
757
758 if (strchr(s: exclave_id, c: '/') || !strcmp(s1: exclave_id, s2: ".") || !strcmp(s1: exclave_id, s2: "..")) {
759 /* don't allow an exclave_id that looks like a path */
760 return EINVAL;
761 }
762
763 error = exclave_fs_open_internal(fs_tag: EFT_EXCLAVE, EXCLAVE_FS_BASEDIR_ROOT_ID,
764 path: exclave_id, O_DIRECTORY, file_id: root_id);
765
766 if (error == ENOENT) {
767 vnode_t base_vp;
768
769 error = get_base_dir(fs_tag: EFT_EXCLAVE, NULL, vpp: &base_vp);
770 if (error) {
771 return error;
772 }
773
774 error = create_exclave_dir(base_vp, exclave_id);
775 if (!error) {
776 error = exclave_fs_open_internal(fs_tag: EFT_EXCLAVE, EXCLAVE_FS_BASEDIR_ROOT_ID,
777 path: exclave_id, O_DIRECTORY, file_id: root_id);
778 }
779
780 vnode_put(vp: base_vp);
781 }
782
783 return error;
784}
785
786/*
787 * Find a vnode in the open vnodes hash table with the given file_id
788 * under a base dir, take an iocount on it and return it.
789 * If base dir is a graft, file_id should be the graft inode number.
790 */
791static int
792get_open_vnode(base_dir_t *base_dir, uint64_t file_id, vnode_t *vpp)
793{
794 uint64_t vp_file_id;
795 struct open_vnode *entry;
796 int error;
797
798 if (is_graft(base_dir)) {
799 error = graft_to_host_inum(gi: &base_dir->graft_info, graft_inum: file_id, host_inum: &vp_file_id);
800 if (error) {
801 return error;
802 }
803 } else {
804 vp_file_id = file_id;
805 }
806
807 error = ENOENT;
808
809 lck_mtx_lock(lck: &open_vnodes_mtx);
810
811 LIST_FOREACH(entry, OPEN_VNODES_HASH(base_dir->dev, vp_file_id), chain) {
812 if ((entry->dev == base_dir->dev) && (entry->file_id == vp_file_id)) {
813 error = vnode_getwithref(vp: entry->vp);
814 if (!error) {
815 *vpp = entry->vp;
816 }
817 break;
818 }
819 }
820
821 lck_mtx_unlock(lck: &open_vnodes_mtx);
822 return error;
823}
824
825/*
826 * Increment a vnode open count in the open vnodes hash table.
827 * If base dir is a graft, file_id should be the host inode number.
828 */
829static int
830increment_vnode_open_count(vnode_t vp, base_dir_t *base_dir, uint64_t file_id)
831{
832 struct open_vnode *entry;
833 open_vnodes_list_head_t *list;
834 int error = 0;
835
836 lck_mtx_lock(lck: &open_vnodes_mtx);
837
838 list = OPEN_VNODES_HASH(base_dir->dev, file_id);
839
840 LIST_FOREACH(entry, list, chain) {
841 if ((entry->dev == base_dir->dev) && (entry->file_id == file_id)) {
842 break;
843 }
844 }
845
846 if (!entry) {
847 entry = kalloc_type(struct open_vnode, Z_WAITOK | Z_ZERO);
848 if (!entry) {
849 error = ENOMEM;
850 goto out;
851 }
852 entry->vp = vp;
853 entry->dev = base_dir->dev;
854 entry->file_id = file_id;
855 LIST_INSERT_HEAD(list, entry, chain);
856 num_open_vnodes++;
857 }
858
859 entry->open_count++;
860
861out:
862 lck_mtx_unlock(lck: &open_vnodes_mtx);
863 return error;
864}
865
866/*
867 * Decrement a vnode open count in the open vnodes hash table and
868 * return it with an iocount taken on it.
869 * If base dir is a graft, file_id should be the graft inode number.
870 */
871static int
872decrement_vnode_open_count(base_dir_t *base_dir, uint64_t file_id, vnode_t *vpp)
873{
874 struct open_vnode *entry;
875 vnode_t vp;
876 uint64_t vp_file_id;
877 int error = 0;
878
879 if (is_graft(base_dir)) {
880 error = graft_to_host_inum(gi: &base_dir->graft_info, graft_inum: file_id, host_inum: &vp_file_id);
881 if (error) {
882 return error;
883 }
884 } else {
885 vp_file_id = file_id;
886 }
887
888 lck_mtx_lock(lck: &open_vnodes_mtx);
889
890 LIST_FOREACH(entry, OPEN_VNODES_HASH(base_dir->dev, vp_file_id), chain) {
891 if ((entry->dev == base_dir->dev) && (entry->file_id == vp_file_id)) {
892 break;
893 }
894 }
895
896 if (!entry) {
897 error = ENOENT;
898 goto out;
899 }
900
901 vp = entry->vp;
902 entry->open_count--;
903
904 if (entry->open_count == 0) {
905 LIST_REMOVE(entry, chain);
906 kfree_type(struct open_vnode, entry);
907 num_open_vnodes--;
908 }
909
910 error = vnode_getwithref(vp);
911 if (!error) {
912 *vpp = vp;
913 }
914
915out:
916 lck_mtx_unlock(lck: &open_vnodes_mtx);
917 return error;
918}
919
920static int
921exclave_fs_open_internal(uint32_t fs_tag, uint64_t root_id, const char *path,
922 int flags, uint64_t *file_id)
923{
924 vnode_t dvp = NULLVP, vp = NULLVP;
925 base_dir_t base_dir;
926 vfs_context_t ctx;
927 struct nameidata *ndp = NULL;
928 struct vnode_attr *vap = NULL;
929 uint64_t vp_file_id;
930 int error;
931 uint32_t ndflags = NOCROSSMOUNT;
932
933 if (flags & ~(O_CREAT | O_DIRECTORY)) {
934 return EINVAL;
935 }
936
937 if (is_fs_writeable(fs_tag)) {
938 ndflags |= NOFOLLOW;
939 } else {
940 ndflags |= FOLLOW;
941 }
942
943 if ((flags & O_CREAT) && !is_fs_writeable(fs_tag)) {
944 return EROFS;
945 }
946
947 if (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID) {
948 error = get_base_dir(fs_tag, base_dir: &base_dir, vpp: &dvp);
949 } else {
950 error = get_base_dir(fs_tag, base_dir: &base_dir, NULL);
951 if (!error) {
952 error = get_open_vnode(base_dir: &base_dir, file_id: root_id, vpp: &dvp);
953 }
954 }
955
956 if (error) {
957 return error;
958 }
959
960 ndp = kalloc_type(struct nameidata, Z_WAITOK);
961 if (!ndp) {
962 error = ENOMEM;
963 goto out;
964 }
965
966 ctx = vfs_context_kernel();
967
968 NDINIT(ndp, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE,
969 CAST_USER_ADDR_T(path), ctx);
970
971 ndp->ni_rootdir = dvp;
972 ndp->ni_flag = NAMEI_ROOTDIR;
973 ndp->ni_dvp = dvp;
974 ndp->ni_cnd.cn_flags |= USEDVP;
975
976 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
977 if (!vap) {
978 error = ENOMEM;
979 goto out;
980 }
981
982 VATTR_INIT(vap);
983 VATTR_SET(vap, va_mode, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
984
985 flags |= FREAD;
986
987 if (is_fs_writeable(fs_tag) && (root_id != EXCLAVE_FS_BASEDIR_ROOT_ID)) {
988 flags |= FWRITE;
989 }
990
991 error = vn_open_auth(ndp, fmode: &flags, vap, NULLVP);
992 if (error) {
993 goto out;
994 }
995
996 vp = ndp->ni_vp;
997
998 error = get_vnode_info(vp, NULL, NULL, file_id: &vp_file_id);
999 if (error) {
1000 goto out;
1001 }
1002
1003 if (is_graft(base_dir: &base_dir)) {
1004 error = host_to_graft_inum(gi: &base_dir.graft_info, host_inum: vp_file_id, graft_inum: file_id);
1005 if (error) {
1006 goto out;
1007 }
1008 } else {
1009 *file_id = vp_file_id;
1010 }
1011
1012 error = increment_vnode_open_count(vp, base_dir: &base_dir, file_id: vp_file_id);
1013
1014out:
1015 if (dvp) {
1016 vnode_put(vp: dvp);
1017 }
1018 if (vp) {
1019 vnode_put(vp);
1020 }
1021 if (ndp) {
1022 kfree_type(struct nameidata, ndp);
1023 }
1024 if (vap) {
1025 kfree_type(struct vnode_attr, vap);
1026 }
1027
1028 return error;
1029}
1030
1031int
1032vfs_exclave_fs_open(uint32_t fs_tag, uint64_t root_id, const char *name, uint64_t *file_id)
1033{
1034 if (!exclave_fs_started()) {
1035 return ENXIO;
1036 }
1037
1038 if ((fs_tag == EFT_EXCLAVE) && (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID)) {
1039 return EINVAL;
1040 }
1041
1042 return exclave_fs_open_internal(fs_tag, root_id, path: name, flags: 0, file_id);
1043}
1044
1045int
1046vfs_exclave_fs_create(uint32_t fs_tag, uint64_t root_id, const char *name, uint64_t *file_id)
1047{
1048 if (!exclave_fs_started()) {
1049 return ENXIO;
1050 }
1051
1052 if ((fs_tag == EFT_EXCLAVE) && (root_id == EXCLAVE_FS_BASEDIR_ROOT_ID)) {
1053 return EINVAL;
1054 }
1055
1056 return exclave_fs_open_internal(fs_tag, root_id, path: name, O_CREAT, file_id);
1057}
1058
1059int
1060vfs_exclave_fs_close(uint32_t fs_tag, uint64_t file_id)
1061{
1062 vnode_t vp = NULLVP;
1063 base_dir_t base_dir;
1064 int flags = FREAD;
1065 int error;
1066
1067 if (!exclave_fs_started()) {
1068 return ENXIO;
1069 }
1070
1071 error = get_base_dir(fs_tag, base_dir: &base_dir, NULL);
1072 if (error) {
1073 return error;
1074 }
1075
1076 error = decrement_vnode_open_count(base_dir: &base_dir, file_id, vpp: &vp);
1077 if (error) {
1078 goto out;
1079 }
1080
1081 if (is_fs_writeable(fs_tag) && !vnode_isdir(vp)) {
1082 flags |= FWRITE;
1083 }
1084
1085 error = vn_close(vp, flags, ctx: vfs_context_kernel());
1086
1087out:
1088 if (vp) {
1089 vnode_put(vp);
1090 }
1091
1092 return error;
1093}
1094
1095static int
1096exclave_fs_io(uint32_t fs_tag, uint64_t file_id, uint64_t offset, uint64_t length, uint8_t *data, bool read)
1097{
1098 vnode_t vp = NULLVP;
1099 base_dir_t base_dir;
1100 UIO_STACKBUF(uio_buf, 1);
1101 uio_t auio = NULL;
1102 int error = 0;
1103
1104 if (!read && !is_fs_writeable(fs_tag)) {
1105 return EROFS;
1106 }
1107
1108 error = get_base_dir(fs_tag, base_dir: &base_dir, NULL);
1109 if (error) {
1110 return error;
1111 }
1112
1113 error = get_open_vnode(base_dir: &base_dir, file_id, vpp: &vp);
1114 if (error) {
1115 goto out;
1116 }
1117
1118 auio = uio_createwithbuffer(a_iovcount: 1, a_offset: offset, a_spacetype: UIO_SYSSPACE, a_iodirection: read ? UIO_READ : UIO_WRITE,
1119 a_buf_p: &uio_buf[0], a_buffer_size: sizeof(uio_buf));
1120 if (!auio) {
1121 error = ENOMEM;
1122 goto out;
1123 }
1124
1125 error = uio_addiov(a_uio: auio, a_baseaddr: (uintptr_t)data, a_length: length);
1126 if (error) {
1127 goto out;
1128 }
1129
1130 if (read) {
1131 error = VNOP_READ(vp, uio: auio, ioflag: 0, ctx: vfs_context_kernel());
1132 } else {
1133 error = VNOP_WRITE(vp, uio: auio, ioflag: 0, ctx: vfs_context_kernel());
1134 }
1135
1136 if (!error && uio_resid(a_uio: auio)) {
1137 error = EIO;
1138 }
1139
1140out:
1141 if (vp) {
1142 vnode_put(vp);
1143 }
1144
1145 return error;
1146}
1147
1148int
1149vfs_exclave_fs_read(uint32_t fs_tag, uint64_t file_id, uint64_t file_offset, uint64_t length, void *data)
1150{
1151 if (!exclave_fs_started()) {
1152 return ENXIO;
1153 }
1154
1155 return exclave_fs_io(fs_tag, file_id, offset: file_offset, length, data, true);
1156}
1157
1158int
1159vfs_exclave_fs_write(uint32_t fs_tag, uint64_t file_id, uint64_t file_offset, uint64_t length, void *data)
1160{
1161 if (!exclave_fs_started()) {
1162 return ENXIO;
1163 }
1164
1165 return exclave_fs_io(fs_tag, file_id, offset: file_offset, length, data: (void *)data, false);
1166}
1167
1168int
1169vfs_exclave_fs_remove(uint32_t fs_tag, uint64_t root_id, const char *name)
1170{
1171 vnode_t rvp = NULLVP;
1172 base_dir_t base_dir;
1173 int error;
1174
1175 if (!exclave_fs_started()) {
1176 return ENXIO;
1177 }
1178
1179 if (!is_fs_writeable(fs_tag)) {
1180 return EROFS;
1181 }
1182
1183 error = get_base_dir(fs_tag, base_dir: &base_dir, NULL);
1184 if (error) {
1185 return error;
1186 }
1187
1188 error = get_open_vnode(base_dir: &base_dir, file_id: root_id, vpp: &rvp);
1189 if (error) {
1190 return error;
1191 }
1192
1193 error = unlink1(vfs_context_kernel(), rvp, CAST_USER_ADDR_T(name), UIO_SYSSPACE, 0);
1194
1195 if (rvp) {
1196 vnode_put(vp: rvp);
1197 }
1198
1199 return error;
1200}
1201
1202int
1203vfs_exclave_fs_sync(uint32_t fs_tag, uint64_t file_id, uint64_t sync_op)
1204{
1205 vnode_t vp = NULLVP;
1206 base_dir_t base_dir;
1207 u_long command;
1208 int error;
1209
1210 if (!exclave_fs_started()) {
1211 return ENXIO;
1212 }
1213
1214 if (!is_fs_writeable(fs_tag)) {
1215 return EROFS;
1216 }
1217
1218 if (sync_op == EXCLAVE_FS_SYNC_OP_BARRIER) {
1219 command = F_BARRIERFSYNC;
1220 } else if (sync_op == EXCLAVE_FS_SYNC_OP_FULL) {
1221 command = F_FULLFSYNC;
1222 } else {
1223 return EINVAL;
1224 }
1225
1226 error = get_base_dir(fs_tag, base_dir: &base_dir, NULL);
1227 if (error) {
1228 return error;
1229 }
1230
1231 error = get_open_vnode(base_dir: &base_dir, file_id, vpp: &vp);
1232 if (error) {
1233 goto out;
1234 }
1235
1236 error = VNOP_IOCTL(vp, command, data: (caddr_t)NULL, fflag: 0, ctx: vfs_context_kernel());
1237
1238out:
1239 if (vp) {
1240 vnode_put(vp);
1241 }
1242
1243 return error;
1244}
1245
1246static int
1247map_graft_dirents(fsioc_graft_info_t *graft_info, void *dirent_buf, int32_t count)
1248{
1249 int i, error = 0;
1250
1251 for (i = 0; i < count; i++) {
1252 exclave_fs_dirent_t *dirent = (exclave_fs_dirent_t *)dirent_buf;
1253 uint64_t mapped_file_id;
1254
1255 error = host_to_graft_inum(gi: graft_info, host_inum: dirent->file_id, graft_inum: &mapped_file_id);
1256 if (error) {
1257 return error;
1258 }
1259 dirent->file_id = mapped_file_id;
1260 dirent_buf = (char *)dirent_buf + dirent->length;
1261 }
1262
1263 return 0;
1264}
1265
1266int
1267vfs_exclave_fs_readdir(uint32_t fs_tag, uint64_t file_id, void *dirent_buf,
1268 uint32_t buf_size, int32_t *count)
1269{
1270 vnode_t dvp = NULLVP;
1271 base_dir_t base_dir;
1272 UIO_STACKBUF(uio_buf, 1);
1273 uio_t auio = NULL;
1274 vfs_context_t ctx;
1275 uthread_t ut;
1276 struct attrlist al;
1277 struct vnode_attr *vap = NULL;
1278 char *va_name = NULL;
1279 int32_t eofflag;
1280 int error;
1281
1282 if (!exclave_fs_started()) {
1283 return ENXIO;
1284 }
1285
1286 error = get_base_dir(fs_tag, base_dir: &base_dir, NULL);
1287 if (error) {
1288 return error;
1289 }
1290
1291 /*
1292 * For RO tags, readdir thru VFS is not permitted in RELEASE -
1293 * it should be based on the catalogue.
1294 * For non-RELEASE we allow readdir if a boot-arg is set
1295 * or if the volume is not sealed (roots installation flow).
1296 */
1297 if (fs_tag != EFT_EXCLAVE) {
1298#if (DEVELOPMENT || DEBUG)
1299 if (!integrity_checks_disabled && is_sealed(&base_dir)) {
1300 return ENOTSUP;
1301 }
1302#else
1303 return ENOTSUP;
1304#endif
1305 }
1306
1307 error = get_open_vnode(base_dir: &base_dir, file_id, vpp: &dvp);
1308 if (error) {
1309 goto out;
1310 }
1311
1312 if (!vnode_isdir(vp: dvp)) {
1313 error = ENOTDIR;
1314 goto out;
1315 }
1316
1317 auio = uio_createwithbuffer(a_iovcount: 1, a_offset: 0, a_spacetype: UIO_SYSSPACE, a_iodirection: UIO_READ,
1318 a_buf_p: &uio_buf[0], a_buffer_size: sizeof(uio_buf));
1319 if (!auio) {
1320 error = ENOMEM;
1321 goto out;
1322 }
1323
1324 error = uio_addiov(a_uio: auio, a_baseaddr: (uintptr_t)dirent_buf, a_length: buf_size);
1325 if (error) {
1326 goto out;
1327 }
1328
1329 al.bitmapcount = ATTR_BIT_MAP_COUNT;
1330 al.commonattr = ATTR_CMN_RETURNED_ATTRS | ATTR_CMN_NAME | ATTR_CMN_OBJTYPE | ATTR_CMN_FILEID;
1331 al.fileattr = ATTR_FILE_DATALENGTH;
1332
1333 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
1334 if (!vap) {
1335 error = ENOMEM;
1336 goto out;
1337 }
1338
1339 VATTR_INIT(vap);
1340 va_name = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_ZERO);
1341 if (!va_name) {
1342 error = ENOMEM;
1343 goto out;
1344 }
1345 vap->va_name = va_name;
1346
1347 VATTR_SET_ACTIVE(vap, va_name);
1348 VATTR_SET_ACTIVE(vap, va_objtype);
1349 VATTR_SET_ACTIVE(vap, va_fileid);
1350 VATTR_SET_ACTIVE(vap, va_total_size);
1351 VATTR_SET_ACTIVE(vap, va_data_size);
1352
1353 ctx = vfs_context_kernel();
1354 ut = current_uthread();
1355
1356 ut->uu_flag |= UT_KERN_RAGE_VNODES;
1357 error = VNOP_GETATTRLISTBULK(dvp, &al, vap, auio, NULL,
1358 0, &eofflag, count, ctx);
1359 ut->uu_flag &= ~UT_KERN_RAGE_VNODES;
1360
1361 if (!error && !eofflag) {
1362 return ENOBUFS;
1363 }
1364
1365 if (is_graft(base_dir: &base_dir)) {
1366 error = map_graft_dirents(graft_info: &base_dir.graft_info, dirent_buf, count: *count);
1367 if (error) {
1368 goto out;
1369 }
1370 }
1371
1372out:
1373 if (va_name) {
1374 zfree(ZV_NAMEI, va_name);
1375 }
1376 if (vap) {
1377 kfree_type(struct vnode_attr, vap);
1378 }
1379 if (dvp) {
1380 vnode_put(vp: dvp);
1381 }
1382
1383 return error;
1384}
1385
1386int
1387vfs_exclave_fs_getsize(uint32_t fs_tag, uint64_t file_id, uint64_t *size)
1388{
1389 vnode_t vp = NULLVP;
1390 base_dir_t base_dir;
1391 vfs_context_t ctx;
1392 struct vnode_attr *vap = NULL;
1393 int error;
1394
1395 if (!exclave_fs_started()) {
1396 return ENXIO;
1397 }
1398
1399 error = get_base_dir(fs_tag, base_dir: &base_dir, NULL);
1400 if (error) {
1401 return error;
1402 }
1403
1404 error = get_open_vnode(base_dir: &base_dir, file_id, vpp: &vp);
1405 if (error) {
1406 goto out;
1407 }
1408
1409 if (vnode_isdir(vp)) {
1410 error = EISDIR;
1411 goto out;
1412 }
1413
1414 vap = kalloc_type(struct vnode_attr, Z_WAITOK);
1415 if (!vap) {
1416 error = ENOMEM;
1417 goto out;
1418 }
1419
1420 VATTR_INIT(vap);
1421 VATTR_WANTED(vap, va_data_size);
1422
1423 ctx = vfs_context_kernel();
1424
1425 error = VNOP_GETATTR(vp, vap, ctx);
1426 if (error) {
1427 goto out;
1428 }
1429
1430 if (!VATTR_IS_SUPPORTED(vap, va_data_size)) {
1431 error = ENOTSUP;
1432 goto out;
1433 }
1434
1435 *size = vap->va_data_size;
1436
1437out:
1438 if (vap) {
1439 kfree_type(struct vnode_attr, vap);
1440 }
1441 if (vp) {
1442 vnode_put(vp);
1443 }
1444
1445 return error;
1446}
1447
1448int
1449vfs_exclave_fs_sealstate(uint32_t fs_tag, bool *sealed)
1450{
1451 base_dir_t base_dir;
1452 int error;
1453
1454 if (!exclave_fs_started()) {
1455 return ENXIO;
1456 }
1457
1458 error = get_base_dir(fs_tag, base_dir: &base_dir, NULL);
1459 if (error) {
1460 return error;
1461 }
1462
1463 *sealed = is_sealed(base_dir: &base_dir);
1464
1465 return 0;
1466}
1467
1468