1/*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 *
29 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
66 */
67
68/*
69 *
70 * Mach Operating System
71 * Copyright (c) 1987 Carnegie-Mellon University
72 * All rights reserved. The CMU software License Agreement specifies
73 * the terms and conditions for use and redistribution.
74 */
75/*
76 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81
82#include <sys/param.h>
83#include <sys/filedesc.h>
84#include <sys/kernel.h>
85#include <sys/mount_internal.h>
86#include <sys/proc_internal.h>
87#include <sys/kauth.h>
88#include <sys/systm.h>
89#include <sys/vnode_internal.h>
90#include <sys/conf.h>
91#include <sys/buf_internal.h>
92#include <sys/user.h>
93#include <sys/time.h>
94#include <sys/systm.h>
95#include <sys/mman.h>
96
97#include <security/audit/audit.h>
98
99#include <sys/malloc.h>
100#include <sys/dkstat.h>
101#include <sys/codesign.h>
102
103#include <kern/startup.h>
104#include <kern/thread.h>
105#include <kern/task.h>
106#include <kern/ast.h>
107#include <kern/zalloc.h>
108#include <kern/ux_handler.h> /* for ux_handler_setup() */
109#include <kern/sched_hygiene.h>
110
111#if (DEVELOPMENT || DEBUG)
112#include <kern/debug.h>
113#endif
114
115#include <mach/vm_param.h>
116
117#include <vm/vm_map.h>
118#include <vm/vm_kern.h>
119
120#include <sys/reboot.h>
121#include <dev/busvar.h> /* for pseudo_inits */
122#include <sys/kdebug.h>
123#include <sys/monotonic.h>
124
125#include <mach/mach_types.h>
126#include <mach/vm_prot.h>
127#include <mach/semaphore.h>
128#include <mach/sync_policy.h>
129#include <kern/clock.h>
130#include <sys/csr.h>
131#include <mach/kern_return.h>
132#include <mach/thread_act.h> /* for thread_resume() */
133#include <sys/mcache.h> /* for mcache_init() */
134#include <sys/mbuf.h> /* for mbinit() */
135#include <sys/event.h> /* for knote_init() */
136#include <sys/eventhandler.h> /* for eventhandler_init() */
137#include <sys/kern_memorystatus.h> /* for memorystatus_init() */
138#include <sys/kern_memorystatus_freeze.h> /* for memorystatus_freeze_init() */
139#include <sys/aio_kern.h> /* for aio_init() */
140#include <sys/semaphore.h> /* for psem_cache_init() */
141#include <net/dlil.h> /* for dlil_init() */
142#include <net/iptap.h> /* for iptap_init() */
143#include <sys/socketvar.h> /* for socketinit() */
144#include <sys/protosw.h> /* for domaininit() */
145#include <kern/sched_prim.h> /* for thread_wakeup() */
146#include <net/if_ether.h> /* for ether_family_init() */
147#include <net/if_gif.h> /* for gif_init() */
148#include <miscfs/devfs/devfsdefs.h> /* for devfs_kernel_mount() */
149#include <vm/vm_kern.h> /* for kmem_suballoc() */
150#include <sys/proc_uuid_policy.h> /* proc_uuid_policy_init() */
151#include <netinet/flow_divert.h> /* flow_divert_init() */
152#include <net/content_filter.h> /* for cfil_init() */
153#include <net/necp.h> /* for necp_init() */
154#include <net/network_agent.h> /* for netagent_init() */
155#include <net/packet_mangler.h> /* for pkt_mnglr_init() */
156#include <net/if_utun.h> /* for utun_register_control() */
157#include <netinet6/ipsec.h> /* for ipsec_init() */
158#include <net/if_redirect.h> /* for if_redirect_init() */
159#include <net/netsrc.h> /* for netsrc_init() */
160#include <net/ntstat.h> /* for nstat_init() */
161#include <netinet/mptcp_var.h> /* for mptcp_control_register() */
162#include <net/nwk_wq.h> /* for nwk_wq_init */
163#include <net/restricted_in_port.h> /* for restricted_in_port_init() */
164#include <net/remote_vif.h> /* for rvi_init() */
165#include <net/kctl_test.h> /* for kctl_test_init() */
166#include <netinet/kpi_ipfilter_var.h> /* for ipfilter_init() */
167#include <kern/assert.h> /* for assert() */
168#include <sys/kern_overrides.h> /* for init_system_override() */
169#include <sys/lockf.h> /* for lf_init() */
170#include <sys/fsctl.h>
171
172#include <net/init.h>
173
174#if CONFIG_MACF
175#include <security/mac_framework.h>
176#include <security/mac_internal.h> /* mac_init_bsd() */
177#include <security/mac_mach_internal.h> /* mac_update_task_label() */
178#endif
179
180#include <machine/exec.h>
181
182#if CONFIG_NETBOOT
183#include <sys/netboot.h>
184#endif
185
186#if CONFIG_IMAGEBOOT
187#include <sys/imageboot.h>
188#endif
189
190#if PFLOG
191#include <net/if_pflog.h>
192#endif
193
194#if SKYWALK
195#include <skywalk/os_skywalk_private.h>
196#endif /* SKYWALK */
197
198#include <pexpert/pexpert.h>
199#include <machine/pal_routines.h>
200#include <console/video_console.h>
201
202#if CONFIG_XNUPOST
203#include <tests/xnupost.h>
204#endif
205
206void * get_user_regs(thread_t); /* XXX kludge for <machine/thread.h> */
207void IOKitInitializeTime(void); /* XXX */
208void IOSleep(unsigned int); /* XXX */
209void IOSetImageBoot(void); /* XXX */
210void loopattach(void); /* XXX */
211
212void ipc_task_enable(task_t task);
213
214const char *const copyright =
215 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n\t"
216 "The Regents of the University of California. "
217 "All rights reserved.\n\n";
218
219/* Components of the first process -- never freed. */
220SECURITY_READ_ONLY_LATE(struct vfs_context) vfs_context0;
221
222static struct plimit limit0;
223static struct pstats pstats0;
224SECURITY_READ_ONLY_LATE(proc_t) kernproc;
225proc_t XNU_PTRAUTH_SIGNED_PTR("initproc") initproc;
226
227long tk_cancc;
228long tk_nin;
229long tk_nout;
230long tk_rawcc;
231
232int lock_trace = 0;
233/* Global variables to make pstat happy. We do swapping differently */
234int nswdev, nswap;
235int nswapmap;
236void *swapmap;
237struct swdevt swdevt[1];
238
239static LCK_GRP_DECLARE(hostname_lck_grp, "hostname");
240LCK_MTX_DECLARE(hostname_lock, &hostname_lck_grp);
241LCK_MTX_DECLARE(domainname_lock, &hostname_lck_grp);
242
243dev_t rootdev; /* device of the root */
244dev_t dumpdev; /* device to take dumps on */
245long dumplo; /* offset into dumpdev */
246long hostid;
247char hostname[MAXHOSTNAMELEN];
248char domainname[MAXDOMNAMELEN];
249char rootdevice[DEVMAXNAMESIZE];
250
251struct vnode *rootvp;
252bool rootvp_is_ssd = false;
253SECURITY_READ_ONLY_LATE(int) boothowto;
254/*
255 * -minimalboot indicates that we want userspace to be bootstrapped to a
256 * minimal environment. What constitutes minimal is up to the bootstrap
257 * process.
258 */
259TUNABLE(int, minimalboot, "-minimalboot", 0);
260#if CONFIG_DARKBOOT
261int darkboot = 0;
262#endif
263
264extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *);
265extern void IOSecureBSDRoot(const char * rootName);
266extern kern_return_t IOKitBSDInit(void );
267extern boolean_t IOSetRecoveryBoot(bsd_bootfail_mode_t, uuid_t, boolean_t);
268extern void kminit(void);
269extern void bsd_bufferinit(void);
270extern void throttle_init(void);
271
272vm_map_t bsd_pageable_map;
273vm_map_t mb_map;
274
275static int bsd_simul_execs;
276static int bsd_pageable_map_size;
277__private_extern__ int execargs_cache_size = 0;
278__private_extern__ int execargs_free_count = 0;
279__private_extern__ vm_offset_t * execargs_cache = NULL;
280
281void bsd_exec_setup(int);
282
283__private_extern__ int bootarg_execfailurereports = 0;
284
285#if __x86_64__
286__private_extern__ TUNABLE(int, bootarg_no32exec, "no32exec", 1);
287#endif
288
289#if DEVELOPMENT || DEBUG
290/* Prevent kernel-based ASLR from being used. */
291__private_extern__ TUNABLE(bool, bootarg_disable_aslr, "-disable_aslr", 0);
292#endif
293
294/*
295 * Allow an alternate dyld to be used for testing.
296 */
297
298#if DEVELOPMENT || DEBUG
299char dyld_alt_path[MAXPATHLEN];
300int use_alt_dyld = 0;
301
302char panic_on_proc_crash[NAME_MAX];
303int use_panic_on_proc_crash = 0;
304
305char panic_on_proc_exit[NAME_MAX];
306int use_panic_on_proc_exit = 0;
307
308char panic_on_proc_spawn_fail[NAME_MAX];
309int use_panic_on_proc_spawn_fail = 0;
310
311char dyld_suffix[NAME_MAX];
312int use_dyld_suffix = 0;
313#endif
314
315#if DEVELOPMENT || DEBUG
316__private_extern__ bool bootarg_hide_process_traced = 0;
317#endif
318
319int cmask = CMASK;
320extern int customnbuf;
321
322kern_return_t bsd_autoconf(void);
323void bsd_utaskbootstrap(void);
324
325#if CONFIG_DEV_KMEM
326extern void dev_kmem_init(void);
327#endif
328static void process_name(const char *, proc_t);
329
330static void setconf(void);
331
332#if CONFIG_BASESYSTEMROOT
333static int bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check);
334static boolean_t bsdmgroot_bootable(void);
335#endif // CONFIG_BASESYSTEMROOT
336
337bool bsd_rooted_ramdisk(void);
338
339#if SYSV_SHM
340extern void sysv_shm_lock_init(void);
341#endif
342#if SYSV_SEM
343extern void sysv_sem_lock_init(void);
344#endif
345#if SYSV_MSG
346extern void sysv_msg_lock_init(void);
347#endif
348
349#if CONFIG_MACF
350#if defined (__i386__) || defined (__x86_64__)
351/* MACF policy_check configuration flags; see policy_check.c for details */
352extern int check_policy_init(int);
353#endif
354#endif /* CONFIG_MACF */
355
356/* If we are using CONFIG_DTRACE */
357#if CONFIG_DTRACE
358extern void dtrace_postinit(void);
359#endif
360
361/*
362 * Initialization code.
363 * Called from cold start routine as
364 * soon as a stack and segmentation
365 * have been established.
366 * Functions:
367 * turn on clock
368 * hand craft 0th process
369 * call all initialization routines
370 * hand craft 1st user process
371 */
372
373/*
374 * Sets the name for the given task.
375 */
376static void
377process_name(const char *s, proc_t p)
378{
379 strlcpy(dst: p->p_comm, src: s, n: sizeof(p->p_comm));
380 strlcpy(dst: p->p_name, src: s, n: sizeof(p->p_name));
381}
382
383/* To allow these values to be patched, they're globals here */
384#include <machine/vmparam.h>
385struct rlimit vm_initial_limit_stack = { .rlim_cur = DFLSSIZ, .rlim_max = MAXSSIZ - PAGE_MAX_SIZE };
386struct rlimit vm_initial_limit_data = { .rlim_cur = DFLDSIZ, .rlim_max = MAXDSIZ };
387struct rlimit vm_initial_limit_core = { .rlim_cur = DFLCSIZ, .rlim_max = MAXCSIZ };
388
389extern struct os_refgrp rlimit_refgrp;
390
391extern int (*mountroot)(void);
392
393LCK_ATTR_DECLARE(proc_lck_attr, 0, 0);
394LCK_GRP_DECLARE(proc_lck_grp, "proc");
395LCK_GRP_DECLARE(proc_slock_grp, "proc-slock");
396LCK_GRP_DECLARE(proc_fdmlock_grp, "proc-fdmlock");
397LCK_GRP_DECLARE(proc_mlock_grp, "proc-mlock");
398LCK_GRP_DECLARE(proc_ucred_mlock_grp, "proc-ucred-mlock");
399LCK_GRP_DECLARE(proc_dirslock_grp, "proc-dirslock");
400LCK_GRP_DECLARE(proc_kqhashlock_grp, "proc-kqhashlock");
401LCK_GRP_DECLARE(proc_knhashlock_grp, "proc-knhashlock");
402
403
404LCK_MTX_DECLARE_ATTR(proc_list_mlock, &proc_mlock_grp, &proc_lck_attr);
405
406#if XNU_TARGET_OS_OSX
407/* hook called after root is mounted XXX temporary hack */
408void (*mountroot_post_hook)(void);
409void (*unmountroot_pre_hook)(void);
410#endif
411void set_rootvnode(vnode_t);
412
413extern lck_rw_t rootvnode_rw_lock;
414
415SECURITY_READ_ONLY_LATE(struct mach_vm_range) bsd_pageable_range = {};
416KMEM_RANGE_REGISTER_DYNAMIC(bsd_pageable, &bsd_pageable_range, ^() {
417 assert(bsd_pageable_map_size != 0);
418 return (vm_map_size_t) bsd_pageable_map_size;
419});
420
421/* called with an iocount and usecount on new_rootvnode */
422void
423set_rootvnode(vnode_t new_rootvnode)
424{
425 mount_t new_mount = (new_rootvnode != NULL) ? new_rootvnode->v_mount : NULL;
426 vnode_t new_devvp = (new_mount != NULL) ? new_mount->mnt_devvp : NULL;
427 vnode_t old_rootvnode = rootvnode;
428
429 new_rootvnode->v_flag |= VROOT;
430 rootvp = new_devvp;
431 rootvnode = new_rootvnode;
432 kernproc->p_fd.fd_cdir = new_rootvnode;
433 if (new_devvp != NULL) {
434 rootdev = vnode_specrdev(vp: new_devvp);
435 } else if (new_mount != NULL) {
436 rootdev = vfs_statfs(mp: new_mount)->f_fsid.val[0]; /* like ATTR_CMN_DEVID */
437 } else {
438 rootdev = NODEV;
439 }
440
441 if (old_rootvnode) {
442 vnode_rele(vp: old_rootvnode);
443 }
444}
445
446#define RAMDEV "md0"
447
448bool
449bsd_rooted_ramdisk(void)
450{
451 bool is_ramdisk = false;
452 char *dev_path = zalloc(view: ZV_NAMEI);
453 if (dev_path == NULL) {
454 panic("failed to allocate devpath string!");
455 }
456
457 if (PE_parse_boot_argn(arg_string: "rd", arg_ptr: dev_path, MAXPATHLEN)) {
458 if (strncmp(s1: dev_path, RAMDEV, n: strlen(RAMDEV)) == 0) {
459 is_ramdisk = true;
460 }
461 }
462
463 zfree(ZV_NAMEI, dev_path);
464 return is_ramdisk;
465}
466
467/*
468 * This function is called very early on in the Mach startup, from the
469 * function start_kernel_threads() in osfmk/kern/startup.c. It's called
470 * in the context of the current (startup) task using a call to the
471 * function kernel_thread_create() to jump into start_kernel_threads().
472 * Internally, kernel_thread_create() calls thread_create_internal(),
473 * which calls uthread_init(). The function of uthread_init() is
474 * normally to init a uthread structure, and fill out the uu_sigmask,
475 * tro_ucred/tro_proc fields. It skips filling these out in the case of the "task"
476 * being "kernel_task", because the order of operation is inverted. To
477 * account for that, we need to manually fill in at least the contents
478 * of the tro_ucred field so that the uthread structure can be
479 * used like any other.
480 */
481void
482bsd_init(void)
483{
484 struct uthread *ut;
485 vnode_t init_rootvnode = NULLVP;
486 struct proc_ro_data kernproc_ro_data = {
487 .p_csflags = CS_VALID,
488 };
489 struct task_ro_data kerntask_ro_data = { };
490#if CONFIG_NETBOOT || CONFIG_IMAGEBOOT
491 boolean_t netboot = FALSE;
492#endif
493
494#if (DEVELOPMENT || DEBUG)
495 platform_stall_panic_or_spin(PLATFORM_STALL_XNU_LOCATION_BSD_INIT);
496#endif
497
498#define DEBUG_BSDINIT 0
499
500#if DEBUG_BSDINIT
501#define bsd_init_kprintf(x, ...) kprintf("bsd_init: " x, ## __VA_ARGS__)
502#else
503#define bsd_init_kprintf(x, ...)
504#endif
505
506 throttle_init();
507
508 printf(copyright);
509
510#if CONFIG_DEV_KMEM
511 bsd_init_kprintf("calling dev_kmem_init\n");
512 dev_kmem_init();
513#endif
514
515 /* Initialize kauth subsystem before instancing the first credential */
516 bsd_init_kprintf("calling kauth_init\n");
517 kauth_init();
518
519 /* kernel_task->proc = kernproc; */
520 set_bsdtask_info(kernel_task, (void *)kernproc);
521
522 /* Set the parent of kernproc to itself */
523 kernproc->p_pptr = kernproc;
524
525 /* Set the state to SRUN */
526 kernproc->p_stat = SRUN;
527
528 /* Set the proc flags */
529#if defined(__LP64__)
530 kernproc->p_flag = P_SYSTEM | P_LP64;
531#else
532 kernproc->p_flag = P_SYSTEM;
533#endif
534
535 kernproc->p_nice = NZERO;
536 TAILQ_INIT(&kernproc->p_uthlist);
537
538 /* set the cred */
539 kauth_cred_set(&kernproc_ro_data.p_ucred.__smr_ptr, vfs_context0.vc_ucred);
540 kernproc->p_proc_ro = proc_ro_alloc(p: kernproc, p_data: &kernproc_ro_data,
541 t: kernel_task, t_data: &kerntask_ro_data);
542
543 /* give kernproc a name */
544 bsd_init_kprintf("calling process_name\n");
545 process_name(s: "kernel_task", p: kernproc);
546
547 /* Allocate proc lock attribute */
548
549 lck_mtx_init(lck: &kernproc->p_mlock, grp: &proc_mlock_grp, attr: &proc_lck_attr);
550 lck_mtx_init(lck: &kernproc->p_ucred_mlock, grp: &proc_ucred_mlock_grp, attr: &proc_lck_attr);
551#if CONFIG_AUDIT
552 lck_mtx_init(lck: &kernproc->p_audit_mlock, grp: &proc_ucred_mlock_grp, attr: &proc_lck_attr);
553#endif /* CONFIG_AUDIT */
554 lck_spin_init(lck: &kernproc->p_slock, grp: &proc_slock_grp, attr: &proc_lck_attr);
555
556 /* Init the file descriptor table. */
557 fdt_init(p: kernproc);
558 kernproc->p_fd.fd_cmask = (mode_t)cmask;
559
560 assert(bsd_simul_execs != 0);
561 execargs_cache_size = bsd_simul_execs;
562 execargs_free_count = bsd_simul_execs;
563 execargs_cache = zalloc_permanent(bsd_simul_execs * sizeof(vm_offset_t),
564 ZALIGN(vm_offset_t));
565
566 if (current_task() != kernel_task) {
567 printf("bsd_init: We have a problem, "
568 "current task is not kernel task\n");
569 }
570
571 bsd_init_kprintf("calling get_bsdthread_info\n");
572 ut = current_uthread();
573
574#if CONFIG_MACF
575 /*
576 * Initialize the MAC Framework
577 */
578 mac_policy_initbsd();
579
580#if defined (__i386__) || defined (__x86_64__)
581 /*
582 * We currently only support this on i386/x86_64, as that is the
583 * only lock code we have instrumented so far.
584 */
585 int policy_check_flags;
586 PE_parse_boot_argn("policy_check", &policy_check_flags, sizeof(policy_check_flags));
587 check_policy_init(policy_check_flags);
588#endif
589#endif /* MAC */
590
591 /*
592 * Make a session and group
593 *
594 * No need to hold the pgrp lock,
595 * there are no other BSD threads yet.
596 */
597 struct session *session0 = session_alloc(leader: kernproc);
598 struct pgrp *pgrp0 = pgrp_alloc(pgid: 0, bits: PGRP_REF_NONE);
599 session0->s_ttypgrpid = 0;
600 pgrp0->pg_session = session0;
601
602 /*
603 * Create process 0.
604 */
605 proc_list_lock();
606 os_ref_init_mask(&kernproc->p_refcount, P_REF_BITS, &p_refgrp, P_REF_NONE);
607 os_ref_init_raw(&kernproc->p_waitref, &p_refgrp);
608 proc_ref_hold_proc_task_struct(proc: kernproc);
609
610 /*
611 * Make a group and session, then simulate pinsertchild(),
612 * adjusted for the kernel.
613 */
614 pghash_insert_locked(pgrp0);
615
616 LIST_INSERT_HEAD(&pgrp0->pg_members, kernproc, p_pglist);
617 smr_init_store(&kernproc->p_pgrp, pgrp0);
618 LIST_INSERT_HEAD(&allproc, kernproc, p_list);
619
620 LIST_INSERT_HEAD(SESSHASH(0), session0, s_hash);
621 proc_list_unlock();
622
623 proc_set_task(kernproc, kernel_task);
624
625#if DEVELOPMENT || DEBUG
626 if (bootarg_disable_aslr) {
627 kernproc->p_flag |= P_DISABLE_ASLR;
628 }
629#endif
630
631 TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
632
633 /*
634 * Officially associate the kernel with vfs_context0.vc_ucred.
635 */
636#if CONFIG_MACF
637 mac_cred_label_associate_kernel(cred: vfs_context0.vc_ucred);
638#endif
639 proc_update_creds_onproc(kernproc, cred: vfs_context0.vc_ucred);
640
641 TAILQ_INIT(&kernproc->p_aio_activeq);
642 TAILQ_INIT(&kernproc->p_aio_doneq);
643 kernproc->p_aio_total_count = 0;
644
645 /* Create the limits structures. */
646 for (uint32_t i = 0; i < ARRAY_COUNT(limit0.pl_rlimit); i++) {
647 limit0.pl_rlimit[i].rlim_cur =
648 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
649 }
650 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
651 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
652 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
653 limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
654 limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
655 limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
656 os_ref_init_count(&limit0.pl_refcnt, &rlimit_refgrp, 1);
657
658 smr_init_store(&kernproc->p_limit, &limit0);
659 kernproc->p_stats = &pstats0;
660 kernproc->p_subsystem_root_path = NULL;
661
662 /*
663 * Charge root for one process: launchd.
664 */
665 bsd_init_kprintf("calling chgproccnt\n");
666 (void)chgproccnt(uid: 0, diff: 1);
667
668 /*
669 * Allocate a kernel submap for pageable memory
670 * for temporary copying (execve()).
671 */
672 bsd_init_kprintf("calling kmem_suballoc\n");
673 bsd_pageable_map = kmem_suballoc(parent: kernel_map,
674 addr: &bsd_pageable_range.min_address,
675 size: (vm_size_t)bsd_pageable_map_size,
676 vmc_options: VM_MAP_CREATE_PAGEABLE,
677 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
678 flags: KMS_PERMANENT | KMS_NOFAIL,
679 VM_KERN_MEMORY_BSD).kmr_submap;
680
681 /*
682 * Initialize buffers and hash links for buffers
683 *
684 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
685 * happen after a credential has been associated with
686 * the kernel task.
687 */
688 bsd_init_kprintf("calling bsd_bufferinit\n");
689 bsd_bufferinit();
690
691 /*
692 * Initialize the calendar.
693 */
694 bsd_init_kprintf("calling IOKitInitializeTime\n");
695 IOKitInitializeTime();
696
697 /* Initialize the file systems. */
698 bsd_init_kprintf("calling vfsinit\n");
699 vfsinit();
700
701#if CONFIG_PROC_UUID_POLICY
702 /* Initial proc_uuid_policy subsystem */
703 bsd_init_kprintf("calling proc_uuid_policy_init()\n");
704 proc_uuid_policy_init();
705#endif
706
707#if SOCKETS
708#if CONFIG_MBUF_MCACHE
709 /* Initialize per-CPU cache allocator */
710 mcache_init();
711#endif /* CONFIG_MBUF_MCACHE */
712
713 /* Initialize mbuf's. */
714 bsd_init_kprintf("calling mbinit\n");
715 mbinit();
716 restricted_in_port_init();
717#endif /* SOCKETS */
718
719 /*
720 * Initializes security event auditing.
721 * XXX: Should/could this occur later?
722 */
723#if CONFIG_AUDIT
724 bsd_init_kprintf("calling audit_init\n");
725 audit_init();
726#endif
727
728 /* Initialize kqueues */
729 bsd_init_kprintf("calling knote_init\n");
730 knote_init();
731
732 /* Initialize event handler */
733 bsd_init_kprintf("calling eventhandler_init\n");
734 eventhandler_init();
735
736 /* Initialize for async IO */
737 bsd_init_kprintf("calling aio_init\n");
738 aio_init();
739
740 pthread_init();
741 /* POSIX Shm and Sem */
742 bsd_init_kprintf("calling pshm_cache_init\n");
743 pshm_cache_init();
744 bsd_init_kprintf("calling psem_cache_init\n");
745 psem_cache_init();
746
747 /*
748 * Initialize protocols. Block reception of incoming packets
749 * until everything is ready.
750 */
751#if NETWORKING
752 bsd_init_kprintf("calling nwk_wq_init\n");
753 nwk_wq_init();
754 bsd_init_kprintf("calling dlil_init\n");
755 dlil_init();
756#endif /* NETWORKING */
757#if SOCKETS
758 bsd_init_kprintf("calling socketinit\n");
759 socketinit();
760 bsd_init_kprintf("calling domaininit\n");
761 domaininit();
762 iptap_init();
763#if FLOW_DIVERT
764 flow_divert_init();
765#endif /* FLOW_DIVERT */
766#endif /* SOCKETS */
767#if SKYWALK
768 bsd_init_kprintf("calling skywalk_init\n");
769 (void) skywalk_init();
770#endif /* SKYWALK */
771#if NETWORKING
772#if NECP
773 /* Initialize Network Extension Control Policies */
774 necp_init();
775#endif
776 netagent_init();
777#endif /* NETWORKING */
778
779#if CONFIG_FREEZE
780#ifndef CONFIG_MEMORYSTATUS
781 #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
782#endif
783 /* Initialise background freezing */
784 bsd_init_kprintf("calling memorystatus_freeze_init\n");
785 memorystatus_freeze_init();
786#endif
787
788#if CONFIG_MEMORYSTATUS
789 /* Initialize kernel memory status notifications */
790 bsd_init_kprintf("calling memorystatus_init\n");
791 memorystatus_init();
792
793 /* Fixup memorystatus fields of the kernel process (only for logging purposes) */
794 kernproc->p_memstat_state |= P_MEMSTAT_INTERNAL;
795 kernproc->p_memstat_effectivepriority = JETSAM_PRIORITY_INTERNAL;
796 kernproc->p_memstat_requestedpriority = JETSAM_PRIORITY_INTERNAL;
797#endif /* CONFIG_MEMORYSTATUS */
798
799 bsd_init_kprintf("calling sysctl_mib_init\n");
800 sysctl_mib_init();
801
802 bsd_init_kprintf("calling bsd_autoconf\n");
803 bsd_autoconf();
804
805#if CONFIG_DTRACE
806 dtrace_postinit();
807#endif
808
809 /*
810 * We attach the loopback interface *way* down here to ensure
811 * it happens after autoconf(), otherwise it becomes the
812 * "primary" interface.
813 */
814#include <loop.h>
815#if NLOOP > 0
816 bsd_init_kprintf("calling loopattach\n");
817 loopattach(); /* XXX */
818#endif
819#if NGIF
820 /* Initialize gif interface (after lo0) */
821 gif_init();
822#endif
823
824#if PFLOG
825 /* Initialize packet filter log interface */
826 pfloginit();
827#endif /* PFLOG */
828
829#if NETHER > 0
830 /* Register the built-in dlil ethernet interface family */
831 bsd_init_kprintf("calling ether_family_init\n");
832 ether_family_init();
833#endif /* ETHER */
834
835#if NETWORKING
836#if CONTENT_FILTER
837 cfil_init();
838#endif
839
840#if PACKET_MANGLER
841 pkt_mnglr_init();
842#endif
843
844 /*
845 * Register subsystems with kernel control handlers
846 */
847 utun_register_control();
848#if IPSEC
849 ipsec_init();
850#endif /* IPSEC */
851 netsrc_init();
852 nstat_init();
853#if MPTCP
854 mptcp_control_register();
855#endif /* MPTCP */
856
857#if REMOTE_VIF
858 rvi_init();
859#endif /* REMOTE_VIF */
860
861#if IF_REDIRECT
862 if_redirect_init();
863#endif /* REDIRECT */
864
865#if KCTL_TEST
866 kctl_test_init();
867#endif /* KCTL_TEST */
868
869 /*
870 * The the networking stack is now initialized so it is a good time to call
871 * the clients that are waiting for the networking stack to be usable.
872 */
873 bsd_init_kprintf("calling net_init_run\n");
874 net_init_run();
875#endif /* NETWORKING */
876
877 bsd_init_kprintf("calling inittodr\n");
878 inittodr(base: 0);
879
880 /* Mount the root file system. */
881 while (TRUE) {
882 int err;
883
884 bsd_init_kprintf("calling setconf\n");
885 setconf();
886#if CONFIG_NETBOOT
887 netboot = (mountroot == netboot_mountroot);
888#endif
889
890 bsd_init_kprintf("vfs_mountroot\n");
891 if (0 == (err = vfs_mountroot())) {
892 break;
893 }
894 rootdevice[0] = '\0';
895#if CONFIG_NETBOOT
896 if (netboot) {
897 PE_display_icon( flags: 0, name: "noroot"); /* XXX a netboot-specific icon would be nicer */
898 vc_progress_set(FALSE, vc_delay: 0);
899 for (uint32_t i = 1; 1; i *= 2) {
900 printf("bsd_init: failed to mount network root, error %d, %s\n",
901 err, PE_boot_args());
902 printf("We are hanging here...\n");
903 IOSleep(i * 60 * 1000);
904 }
905 /*NOTREACHED*/
906 }
907#endif
908 printf("cannot mount root, errno = %d\n", err);
909 }
910
911 IOSecureBSDRoot(rootName: rootdevice);
912
913 mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
914
915 bsd_init_kprintf("calling VFS_ROOT\n");
916 /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */
917 if (VFS_ROOT(mountlist.tqh_first, &init_rootvnode, vfs_context_kernel())) {
918 panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
919 }
920 (void)vnode_ref(vp: init_rootvnode);
921 (void)vnode_put(vp: init_rootvnode);
922
923 lck_rw_lock_exclusive(lck: &rootvnode_rw_lock);
924 set_rootvnode(init_rootvnode);
925 lck_rw_unlock_exclusive(lck: &rootvnode_rw_lock);
926 init_rootvnode = NULLVP; /* use rootvnode after this point */
927
928
929 if (!bsd_rooted_ramdisk()) {
930 boolean_t require_rootauth = FALSE;
931
932#if XNU_TARGET_OS_OSX && defined(__arm64__)
933#if CONFIG_IMAGEBOOT
934 /* Apple Silicon MacOS */
935 require_rootauth = !imageboot_desired();
936#endif // CONFIG_IMAGEBOOT
937#elif !XNU_TARGET_OS_OSX
938 /* Non MacOS */
939 require_rootauth = TRUE;
940#endif // XNU_TARGET_OS_OSX && defined(__arm64__)
941
942 if (require_rootauth) {
943 /* enforce sealedness */
944 int autherr = VNOP_IOCTL(vp: rootvnode, FSIOC_KERNEL_ROOTAUTH, NULL, fflag: 0, ctx: vfs_context_kernel());
945 if (autherr) {
946 panic("rootvp not authenticated after mounting");
947 }
948 }
949 }
950
951
952#if CONFIG_NETBOOT
953 if (netboot) {
954 int err;
955
956 netboot = TRUE;
957 /* post mount setup */
958 if ((err = netboot_setup()) != 0) {
959 PE_display_icon( flags: 0, name: "noroot"); /* XXX a netboot-specific icon would be nicer */
960 vc_progress_set(FALSE, vc_delay: 0);
961 for (uint32_t i = 1; 1; i *= 2) {
962 printf("bsd_init: NetBoot could not find root, error %d: %s\n",
963 err, PE_boot_args());
964 printf("We are hanging here...\n");
965 IOSleep(i * 60 * 1000);
966 }
967 /*NOTREACHED*/
968 }
969 }
970#endif
971
972
973#if CONFIG_IMAGEBOOT
974 /*
975 * See if a system disk image is present. If so, mount it and
976 * switch the root vnode to point to it
977 */
978 imageboot_type_t imageboot_type = imageboot_needed();
979 if (netboot == FALSE && imageboot_type) {
980 /*
981 * An image was found. No turning back: we're booted
982 * with a kernel from the disk image.
983 */
984 bsd_init_kprintf("doing image boot: type = %d\n", imageboot_type);
985 imageboot_setup(type: imageboot_type);
986 IOSetImageBoot();
987 }
988
989#endif /* CONFIG_IMAGEBOOT */
990
991 /* set initial time; all other resource data is already zero'ed */
992 microtime_with_abstime(tv: &kernproc->p_start, abstime: &kernproc->p_stats->ps_start);
993
994#if DEVFS
995 {
996 char mounthere[] = "/dev"; /* !const because of internal casting */
997
998 bsd_init_kprintf("calling devfs_kernel_mount\n");
999 devfs_kernel_mount(mntname: mounthere);
1000 }
1001#endif /* DEVFS */
1002
1003#if CONFIG_BASESYSTEMROOT
1004#if CONFIG_IMAGEBOOT
1005 if (bsdmgroot_bootable()) {
1006 int error;
1007 bool rooted_dmg = false;
1008 bool skip_signature_check = false;
1009
1010 printf("trying to find and mount BaseSystem dmg as root volume\n");
1011#if DEVELOPMENT || DEBUG
1012 printf("(set boot-arg -nobsdmgroot to avoid this)\n");
1013#endif // DEVELOPMENT || DEBUG
1014
1015 char *dmgpath = NULL;
1016 dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK | Z_NOFAIL);
1017
1018 error = bsd_find_basesystem_dmg(bsdmgpath_out: dmgpath, rooted_dmg: &rooted_dmg, skip_signature_check: &skip_signature_check);
1019 if (error) {
1020 bsd_init_kprintf("failed to to find BaseSystem dmg: error = %d\n", error);
1021 } else {
1022 PE_parse_boot_argn(arg_string: "bsdmgpath", arg_ptr: dmgpath, max_arg: sizeof(dmgpath));
1023
1024 bsd_init_kprintf("found BaseSystem dmg at: %s\n", dmgpath);
1025
1026 error = imageboot_pivot_image(image_path: dmgpath, type: IMAGEBOOT_DMG, mount_path: "/System/Volumes/BaseSystem", outgoing_root_path: "System/Volumes/macOS", rooted_dmg, skip_signature_check);
1027 if (error) {
1028 bsd_init_kprintf("couldn't mount BaseSystem dmg: error = %d", error);
1029 } else {
1030 IOSetImageBoot();
1031 }
1032 }
1033 zfree(ZV_NAMEI, dmgpath);
1034 }
1035#else /* CONFIG_IMAGEBOOT */
1036#error CONFIG_BASESYSTEMROOT requires CONFIG_IMAGEBOOT
1037#endif /* CONFIG_IMAGEBOOT */
1038#endif /* CONFIG_BASESYSTEMROOT */
1039
1040 /* Initialize signal state for process 0. */
1041 bsd_init_kprintf("calling siginit\n");
1042 siginit(p: kernproc);
1043
1044 bsd_init_kprintf("calling bsd_utaskbootstrap\n");
1045 bsd_utaskbootstrap();
1046
1047 pal_kernel_announce();
1048
1049 bsd_init_kprintf("calling mountroot_post_hook\n");
1050
1051#if XNU_TARGET_OS_OSX
1052 /* invoke post-root-mount hook */
1053 if (mountroot_post_hook != NULL) {
1054 mountroot_post_hook();
1055 }
1056#endif
1057
1058#if 0 /* not yet */
1059 consider_zone_gc(FALSE);
1060#endif
1061
1062#if DEVELOPMENT || DEBUG
1063 /*
1064 * At this point, we consider the kernel "booted" enough to apply
1065 * stricter timeouts. Only used for debug timeouts.
1066 */
1067 machine_timeout_bsd_init();
1068#endif /* DEVELOPMENT || DEBUG */
1069
1070 bsd_init_kprintf("done\n");
1071}
1072
1073void
1074bsdinit_task(void)
1075{
1076 proc_t p = current_proc();
1077
1078 process_name(s: "init", p);
1079
1080 /* Set up exception-to-signal reflection */
1081 ux_handler_setup();
1082
1083#if CONFIG_MACF
1084 mac_cred_label_associate_user(cred: proc_ucred_unsafe(p)); /* in init */
1085#endif
1086
1087 vm_init_before_launchd();
1088
1089#if CONFIG_XNUPOST
1090 int result = bsd_list_tests();
1091 result = bsd_do_post();
1092 if (result != 0) {
1093 panic("bsd_do_post: Tests failed with result = 0x%08x", result);
1094 }
1095#endif
1096
1097 bsd_init_kprintf("bsd_do_post - done");
1098
1099 load_init_program(p);
1100 lock_trace = 1;
1101}
1102
1103kern_return_t
1104bsd_autoconf(void)
1105{
1106 kprintf(fmt: "bsd_autoconf: calling kminit\n");
1107 kminit();
1108
1109 /*
1110 * Early startup for bsd pseudodevices.
1111 */
1112 {
1113 struct pseudo_init *pi;
1114
1115 for (pi = pseudo_inits; pi->ps_func; pi++) {
1116 (*pi->ps_func)(pi->ps_count);
1117 }
1118 }
1119
1120 return IOKitBSDInit();
1121}
1122
1123
1124#include <sys/disklabel.h> /* for MAXPARTITIONS */
1125
1126static void
1127setconf(void)
1128{
1129 u_int32_t flags;
1130 kern_return_t err;
1131
1132 err = IOFindBSDRoot(rootdevice, sizeof(rootdevice), &rootdev, &flags);
1133 if (err) {
1134 printf("setconf: IOFindBSDRoot returned an error (%d);"
1135 "setting rootdevice to 'sd0a'.\n", err); /* XXX DEBUG TEMP */
1136 rootdev = makedev( 6, 0 );
1137 strlcpy(dst: rootdevice, src: "sd0a", n: sizeof(rootdevice));
1138 flags = 0;
1139 }
1140
1141#if CONFIG_NETBOOT
1142 if (flags & 1) {
1143 /* network device */
1144 mountroot = netboot_mountroot;
1145 } else {
1146#endif
1147 /* otherwise have vfs determine root filesystem */
1148 mountroot = NULL;
1149#if CONFIG_NETBOOT
1150}
1151#endif
1152}
1153
1154/*
1155 * Boot into the flavor of Recovery dictated by `mode`.
1156 */
1157boolean_t
1158bsd_boot_to_recovery(bsd_bootfail_mode_t mode, uuid_t volume_uuid, boolean_t reboot)
1159{
1160 return IOSetRecoveryBoot(mode, volume_uuid, reboot);
1161}
1162
1163void
1164bsd_utaskbootstrap(void)
1165{
1166 thread_t thread;
1167 struct uthread *ut;
1168
1169 /*
1170 * Clone the bootstrap process from the kernel process, without
1171 * inheriting either task characteristics or memory from the kernel;
1172 */
1173 thread = cloneproc(TASK_NULL, NULL, kernproc, CLONEPROC_INITPROC);
1174
1175 /* Hold the reference as it will be dropped during shutdown */
1176 initproc = proc_find(pid: 1);
1177#if __PROC_INTERNAL_DEBUG
1178 if (initproc == PROC_NULL) {
1179 panic("bsd_utaskbootstrap: initproc not set");
1180 }
1181#endif
1182
1183 zalloc_first_proc_made();
1184
1185 /*
1186 * Since we aren't going back out the normal way to our parent,
1187 * we have to drop the transition locks explicitly.
1188 */
1189 proc_signalend(initproc, locked: 0);
1190 proc_transend(initproc, locked: 0);
1191
1192 ut = (struct uthread *)get_bsdthread_info(thread);
1193 ut->uu_sigmask = 0;
1194 act_set_astbsd(thread);
1195
1196 ipc_task_enable(task: get_threadtask(thread));
1197
1198 task_clear_return_wait(task: get_threadtask(thread), TCRW_CLEAR_ALL_WAIT);
1199}
1200
1201static void
1202parse_bsd_args(void)
1203{
1204 char namep[48];
1205
1206 if (PE_parse_boot_argn(arg_string: "-s", arg_ptr: namep, max_arg: sizeof(namep))) {
1207 boothowto |= RB_SINGLE;
1208 }
1209
1210 if (PE_parse_boot_argn(arg_string: "-x", arg_ptr: namep, max_arg: sizeof(namep))) { /* safe boot */
1211 boothowto |= RB_SAFEBOOT;
1212 }
1213
1214 if (PE_parse_boot_argn(arg_string: "nbuf", arg_ptr: &max_nbuf_headers,
1215 max_arg: sizeof(max_nbuf_headers))) {
1216 customnbuf = 1;
1217 }
1218
1219#if CONFIG_DARKBOOT
1220 /*
1221 * The darkboot flag is specified by the bootloader and is stored in
1222 * boot_args->bootFlags. This flag is available starting revision 2.
1223 */
1224 boot_args *args = (boot_args *) PE_state.bootArgs;
1225 if ((args != NULL) && (args->Revision >= kBootArgsRevision2)) {
1226 darkboot = (args->bootFlags & kBootFlagsDarkBoot) ? 1 : 0;
1227 } else {
1228 darkboot = 0;
1229 }
1230#endif
1231
1232#if DEVELOPMENT || DEBUG
1233 if (PE_parse_boot_argn("dyldsuffix", dyld_suffix, sizeof(dyld_suffix))) {
1234 if (strlen(dyld_suffix) > 0) {
1235 use_dyld_suffix = 1;
1236 }
1237 }
1238
1239 if (PE_parse_boot_argn("alt-dyld", dyld_alt_path, sizeof(dyld_alt_path))) {
1240 if (strlen(dyld_alt_path) > 0) {
1241 use_alt_dyld = 1;
1242 }
1243 }
1244
1245 if (PE_parse_boot_arg_str("panic-on-proc-crash", panic_on_proc_crash, sizeof(panic_on_proc_crash))) {
1246 if (strlen(panic_on_proc_crash) > 0) {
1247 use_panic_on_proc_crash = 1;
1248 }
1249 }
1250
1251 if (PE_parse_boot_arg_str("panic-on-proc-exit", panic_on_proc_exit, sizeof(panic_on_proc_exit))) {
1252 if (strlen(panic_on_proc_exit) > 0) {
1253 use_panic_on_proc_exit = 1;
1254 }
1255 }
1256
1257 if (PE_parse_boot_arg_str("panic-on-proc-spawn-fail", panic_on_proc_spawn_fail, sizeof(panic_on_proc_spawn_fail))) {
1258 if (strlen(panic_on_proc_spawn_fail) > 0) {
1259 use_panic_on_proc_spawn_fail = 1;
1260 }
1261 }
1262
1263 if (PE_i_can_has_debugger(NULL) && PE_parse_boot_argn("-hide_process_traced", namep, sizeof(namep))) {
1264 bootarg_hide_process_traced = 1;
1265 }
1266#endif /* DEVELOPMENT || DEBUG */
1267}
1268STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, parse_bsd_args);
1269
1270#if CONFIG_BASESYSTEMROOT
1271
1272extern bool IOGetBootUUID(char *);
1273extern bool IOGetApfsPrebootUUID(char *);
1274
1275
1276// This function returns the UUID of the Preboot (and Recovery) folder associated with the
1277// current boot volume, if applicable. The meaning of the UUID can be
1278// filesystem-dependent and not all kinds of boots will have a UUID.
1279// On success, the UUID is copied into the past-in parameter and TRUE is returned.
1280// In case the current boot has no applicable Preboot UUID, FALSE is returned.
1281static bool
1282get_preboot_uuid(uuid_string_t maybe_uuid_string)
1283{
1284 // try IOGetApfsPrebootUUID
1285 if (IOGetApfsPrebootUUID(maybe_uuid_string)) {
1286 uuid_t maybe_uuid;
1287 int error = uuid_parse(in: maybe_uuid_string, uu: maybe_uuid);
1288 if (error == 0) {
1289 return true;
1290 }
1291 }
1292
1293 // try IOGetBootUUID
1294 if (IOGetBootUUID(maybe_uuid_string)) {
1295 uuid_t maybe_uuid;
1296 int error = uuid_parse(in: maybe_uuid_string, uu: maybe_uuid);
1297 if (error == 0) {
1298 return true;
1299 }
1300 }
1301
1302 // didn't find it
1303 return false;
1304}
1305
1306#if defined(__arm64__)
1307extern bool IOGetBootObjectsPath(char *);
1308#endif
1309
1310// Find the BaseSystem.dmg to be used as the initial root volume during certain
1311// kinds of boots.
1312// This may mount volumes and lookup vnodes.
1313// The DEVELOPMENT kernel will look for BaseSystem.rooted.dmg first.
1314// If it returns 0 (no error), then it also writes the absolute path to the
1315// BaseSystem.dmg into its argument (which must be a char[MAXPATHLEN]).
1316static
1317int
1318bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check)
1319{
1320 int error;
1321 size_t len;
1322 char *dmgbasepath;
1323 char *dmgpath;
1324 bool allow_rooted_dmg = false;
1325
1326 dmgbasepath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1327 dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1328 vnode_t imagevp = NULLVP;
1329
1330#if DEVELOPMENT || DEBUG
1331 allow_rooted_dmg = true;
1332#endif
1333
1334 //must provide output bool
1335 if (rooted_dmg && skip_signature_check) {
1336 *rooted_dmg = false;
1337 *skip_signature_check = false;
1338 } else {
1339 error = EINVAL;
1340 goto done;
1341 }
1342
1343 error = vfs_mount_recovery();
1344 if (error) {
1345 goto done;
1346 }
1347
1348 len = strlcpy(dst: dmgbasepath, src: "/System/Volumes/Recovery/", MAXPATHLEN);
1349 if (len > MAXPATHLEN) {
1350 error = ENAMETOOLONG;
1351 goto done;
1352 }
1353
1354 if (csr_check(CSR_ALLOW_ANY_RECOVERY_OS) == 0) {
1355 *skip_signature_check = true;
1356 allow_rooted_dmg = true;
1357 }
1358
1359#if defined(__arm64__)
1360 char boot_obj_path[MAXPATHLEN] = "";
1361
1362 if (IOGetBootObjectsPath(boot_obj_path)) {
1363 if (boot_obj_path[0] == '/') {
1364 dmgbasepath[len - 1] = '\0';
1365 }
1366
1367 len = strlcat(dst: dmgbasepath, src: boot_obj_path, MAXPATHLEN);
1368 if (len > MAXPATHLEN) {
1369 error = ENAMETOOLONG;
1370 goto done;
1371 }
1372
1373 len = strlcat(dst: dmgbasepath, src: "/usr/standalone/firmware/", MAXPATHLEN);
1374 if (len > MAXPATHLEN) {
1375 error = ENAMETOOLONG;
1376 goto done;
1377 }
1378
1379 if (allow_rooted_dmg) {
1380 len = strlcpy(dst: dmgpath, src: dmgbasepath, MAXPATHLEN);
1381 if (len > MAXPATHLEN) {
1382 error = ENAMETOOLONG;
1383 goto done;
1384 }
1385
1386 len = strlcat(dst: dmgpath, src: "arm64eBaseSystem.rooted.dmg", MAXPATHLEN);
1387 if (len > MAXPATHLEN) {
1388 error = ENAMETOOLONG;
1389 goto done;
1390 }
1391
1392 error = vnode_lookup(path: dmgpath, flags: 0, vpp: &imagevp, ctx: vfs_context_kernel());
1393 if (error == 0) {
1394 *rooted_dmg = true;
1395 *skip_signature_check = true;
1396 goto done;
1397 }
1398 memset(s: dmgpath, c: 0, MAXPATHLEN);
1399 }
1400
1401 len = strlcpy(dst: dmgpath, src: dmgbasepath, MAXPATHLEN);
1402 if (len > MAXPATHLEN) {
1403 error = ENAMETOOLONG;
1404 goto done;
1405 }
1406
1407 len = strlcat(dst: dmgpath, src: "arm64eBaseSystem.dmg", MAXPATHLEN);
1408 if (len > MAXPATHLEN) {
1409 error = ENAMETOOLONG;
1410 goto done;
1411 }
1412
1413 error = vnode_lookup(path: dmgpath, flags: 0, vpp: &imagevp, ctx: vfs_context_kernel());
1414 if (error == 0) {
1415 goto done;
1416 }
1417 memset(s: dmgpath, c: 0, MAXPATHLEN);
1418 dmgbasepath[strlen(s: "/System/Volumes/Recovery/")] = '\0';
1419 }
1420#endif // __arm64__
1421
1422 uuid_string_t preboot_uuid;
1423 if (!get_preboot_uuid(maybe_uuid_string: preboot_uuid)) {
1424 // no preboot? bail out
1425 return EINVAL;
1426 }
1427
1428 len = strlcat(dst: dmgbasepath, src: preboot_uuid, MAXPATHLEN);
1429 if (len > MAXPATHLEN) {
1430 error = ENAMETOOLONG;
1431 goto done;
1432 }
1433
1434 if (allow_rooted_dmg) {
1435 // Try BaseSystem.rooted.dmg
1436 len = strlcpy(dst: dmgpath, src: dmgbasepath, MAXPATHLEN);
1437 if (len > MAXPATHLEN) {
1438 error = ENAMETOOLONG;
1439 goto done;
1440 }
1441
1442 len = strlcat(dst: dmgpath, src: "/BaseSystem.rooted.dmg", MAXPATHLEN);
1443 if (len > MAXPATHLEN) {
1444 error = ENAMETOOLONG;
1445 goto done;
1446 }
1447
1448 error = vnode_lookup(path: dmgpath, flags: 0, vpp: &imagevp, ctx: vfs_context_kernel());
1449 if (error == 0) {
1450 // we found it! success!
1451 *rooted_dmg = true;
1452 *skip_signature_check = true;
1453 goto done;
1454 }
1455 }
1456
1457 // Try BaseSystem.dmg
1458 len = strlcpy(dst: dmgpath, src: dmgbasepath, MAXPATHLEN);
1459 if (len > MAXPATHLEN) {
1460 error = ENAMETOOLONG;
1461 goto done;
1462 }
1463
1464 len = strlcat(dst: dmgpath, src: "/BaseSystem.dmg", MAXPATHLEN);
1465 if (len > MAXPATHLEN) {
1466 error = ENAMETOOLONG;
1467 goto done;
1468 }
1469
1470 error = vnode_lookup(path: dmgpath, flags: 0, vpp: &imagevp, ctx: vfs_context_kernel());
1471 if (error == 0) {
1472 // success!
1473 goto done;
1474 }
1475
1476done:
1477 if (error == 0) {
1478 strlcpy(dst: bsdmgpath_out, src: dmgpath, MAXPATHLEN);
1479 } else {
1480 bsd_init_kprintf("%s: error %d\n", __func__, error);
1481 }
1482 if (imagevp != NULLVP) {
1483 vnode_put(vp: imagevp);
1484 }
1485 zfree(ZV_NAMEI, dmgpath);
1486 zfree(ZV_NAMEI, dmgbasepath);
1487 return error;
1488}
1489
1490static boolean_t
1491bsdmgroot_bootable(void)
1492{
1493#if defined(__arm64__)
1494#define BSDMGROOT_DEFAULT true
1495#else
1496#define BSDMGROOT_DEFAULT false
1497#endif
1498
1499 boolean_t resolved = BSDMGROOT_DEFAULT;
1500
1501 boolean_t boot_arg_bsdmgroot = false;
1502 boolean_t boot_arg_nobsdmgroot = false;
1503 int error;
1504 mount_t mp;
1505 boolean_t root_part_of_volume_group = false;
1506 struct vfs_attr vfsattr;
1507
1508 mp = rootvnode->v_mount;
1509 VFSATTR_INIT(&vfsattr);
1510 VFSATTR_WANTED(&vfsattr, f_capabilities);
1511
1512 boot_arg_bsdmgroot = PE_parse_boot_argn(arg_string: "-bsdmgroot", NULL, max_arg: 0);
1513 boot_arg_nobsdmgroot = PE_parse_boot_argn(arg_string: "-nobsdmgroot", NULL, max_arg: 0);
1514
1515 error = vfs_getattr(mp, vfa: &vfsattr, ctx: vfs_context_kernel());
1516 if (!error && VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1517 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
1518 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
1519 root_part_of_volume_group = true;
1520 }
1521 }
1522
1523 boolean_t singleuser = (boothowto & RB_SINGLE) != 0;
1524
1525 // Start with the #defined default above.
1526 // If booting to single-user mode, default to false, because single-
1527 // user mode inside the BaseSystem is probably not what's wanted.
1528 // If the 'yes' boot-arg is set, we'll allow that even in single-user
1529 // mode, we'll assume you know what you're doing.
1530 // The 'no' boot-arg overpowers the 'yes' boot-arg.
1531 // In any case, we will not attempt to root from BaseSystem if the
1532 // original (booter-chosen) root volume isn't in a volume group.
1533 // This is just out of an abundance of caution: if the boot environment
1534 // seems to be "something other than a standard install",
1535 // we'll be conservative in messing with the root volume.
1536
1537 if (singleuser) {
1538 resolved = false;
1539 }
1540
1541 if (boot_arg_bsdmgroot) {
1542 resolved = true;
1543 }
1544
1545 if (boot_arg_nobsdmgroot) {
1546 resolved = false;
1547 }
1548
1549 if (!root_part_of_volume_group) {
1550 resolved = false;
1551 }
1552
1553 return resolved;
1554}
1555#endif // CONFIG_BASESYSTEMROOT
1556
1557void
1558bsd_exec_setup(int scale)
1559{
1560 switch (scale) {
1561 case 0:
1562 case 1:
1563 bsd_simul_execs = BSD_SIMUL_EXECS;
1564 break;
1565 case 2:
1566 case 3:
1567 bsd_simul_execs = 65;
1568 break;
1569 case 4:
1570 case 5:
1571 bsd_simul_execs = 129;
1572 break;
1573 case 6:
1574 case 7:
1575 bsd_simul_execs = 257;
1576 break;
1577 default:
1578 bsd_simul_execs = 513;
1579 break;
1580 }
1581 bsd_pageable_map_size = (bsd_simul_execs * BSD_PAGEABLE_SIZE_PER_EXEC);
1582}
1583
1584#if !CONFIG_NETBOOT
1585int
1586netboot_root(void);
1587
1588int
1589netboot_root(void)
1590{
1591 return 0;
1592}
1593#endif
1594