| 1 | /* |
| 2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * @OSF_COPYRIGHT@ |
| 30 | */ |
| 31 | /* |
| 32 | * Mach Operating System |
| 33 | * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University |
| 34 | * All Rights Reserved. |
| 35 | * |
| 36 | * Permission to use, copy, modify and distribute this software and its |
| 37 | * documentation is hereby granted, provided that both the copyright |
| 38 | * notice and this permission notice appear in all copies of the |
| 39 | * software, derivative works or modified versions, and any portions |
| 40 | * thereof, and that both notices appear in supporting documentation. |
| 41 | * |
| 42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
| 44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 45 | * |
| 46 | * Carnegie Mellon requests users of this software to return to |
| 47 | * |
| 48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 49 | * School of Computer Science |
| 50 | * Carnegie Mellon University |
| 51 | * Pittsburgh PA 15213-3890 |
| 52 | * |
| 53 | * any improvements or extensions that they make and grant Carnegie Mellon |
| 54 | * the rights to redistribute these changes. |
| 55 | */ |
| 56 | /* |
| 57 | * NOTICE: This file was modified by McAfee Research in 2004 to introduce |
| 58 | * support for mandatory and extensible security protections. This notice |
| 59 | * is included in support of clause 2.2 (b) of the Apple Public License, |
| 60 | * Version 2.0. |
| 61 | */ |
| 62 | /* |
| 63 | */ |
| 64 | |
| 65 | /* |
| 66 | * Mach kernel startup. |
| 67 | */ |
| 68 | |
| 69 | #include <debug.h> |
| 70 | #include <mach_kdp.h> |
| 71 | |
| 72 | #include <mach/boolean.h> |
| 73 | #include <mach/machine.h> |
| 74 | #include <mach/thread_act.h> |
| 75 | #include <mach/task_special_ports.h> |
| 76 | #include <mach/vm_param.h> |
| 77 | #include <kern/assert.h> |
| 78 | #include <kern/mach_param.h> |
| 79 | #include <kern/misc_protos.h> |
| 80 | #include <kern/clock.h> |
| 81 | #include <kern/coalition.h> |
| 82 | #include <kern/cpu_number.h> |
| 83 | #include <kern/ledger.h> |
| 84 | #include <kern/machine.h> |
| 85 | #include <kern/processor.h> |
| 86 | #include <kern/restartable.h> |
| 87 | #include <kern/sched_prim.h> |
| 88 | #include <kern/turnstile.h> |
| 89 | #if CONFIG_SCHED_SFI |
| 90 | #include <kern/sfi.h> |
| 91 | #endif |
| 92 | #include <kern/startup.h> |
| 93 | #include <kern/task.h> |
| 94 | #include <kern/thread.h> |
| 95 | #include <kern/timer.h> |
| 96 | #if CONFIG_TELEMETRY |
| 97 | #include <kern/telemetry.h> |
| 98 | #endif |
| 99 | #include <kern/kpc.h> |
| 100 | #include <kern/zalloc.h> |
| 101 | #include <kern/locks.h> |
| 102 | #include <kern/debug.h> |
| 103 | #if KPERF |
| 104 | #include <kperf/kperf.h> |
| 105 | #endif /* KPERF */ |
| 106 | #include <corpses/task_corpse.h> |
| 107 | #include <prng/random.h> |
| 108 | #include <console/serial_protos.h> |
| 109 | #include <vm/vm_kern.h> |
| 110 | #include <vm/vm_init.h> |
| 111 | #include <vm/vm_map.h> |
| 112 | #include <vm/vm_object.h> |
| 113 | #include <vm/vm_page.h> |
| 114 | #include <vm/vm_pageout.h> |
| 115 | #include <vm/vm_shared_region.h> |
| 116 | #include <machine/pmap.h> |
| 117 | #include <machine/commpage.h> |
| 118 | #include <machine/machine_routines.h> |
| 119 | #include <libkern/version.h> |
| 120 | #include <pexpert/device_tree.h> |
| 121 | #include <sys/codesign.h> |
| 122 | #include <sys/kdebug.h> |
| 123 | #include <sys/random.h> |
| 124 | #include <sys/ktrace.h> |
| 125 | #include <sys/trust_caches.h> |
| 126 | #include <sys/code_signing.h> |
| 127 | #include <libkern/section_keywords.h> |
| 128 | |
| 129 | #include <kern/waitq.h> |
| 130 | #include <ipc/ipc_voucher.h> |
| 131 | #include <mach/host_info.h> |
| 132 | #include <pthread/workqueue_internal.h> |
| 133 | |
| 134 | #if SOCKETS |
| 135 | extern void mbuf_tag_init(void); |
| 136 | #endif |
| 137 | |
| 138 | #if CONFIG_XNUPOST |
| 139 | #include <tests/ktest.h> |
| 140 | #include <tests/xnupost.h> |
| 141 | #endif |
| 142 | |
| 143 | #if CONFIG_ATM |
| 144 | #include <atm/atm_internal.h> |
| 145 | #endif |
| 146 | |
| 147 | #if CONFIG_CSR |
| 148 | #include <sys/csr.h> |
| 149 | #endif |
| 150 | |
| 151 | #if ALTERNATE_DEBUGGER |
| 152 | #include <arm64/alternate_debugger.h> |
| 153 | #endif |
| 154 | |
| 155 | #if MACH_KDP |
| 156 | #include <kdp/kdp.h> |
| 157 | #endif |
| 158 | |
| 159 | #if CONFIG_MACF |
| 160 | #include <security/mac_mach_internal.h> |
| 161 | #if CONFIG_VNGUARD |
| 162 | extern void vnguard_policy_init(void); |
| 163 | #endif |
| 164 | #endif |
| 165 | |
| 166 | #if HYPERVISOR |
| 167 | #include <kern/hv_support.h> |
| 168 | #endif |
| 169 | |
| 170 | #if CONFIG_UBSAN_MINIMAL |
| 171 | #include <san/ubsan_minimal.h> |
| 172 | #endif |
| 173 | |
| 174 | #include <san/kasan.h> |
| 175 | |
| 176 | #include <i386/pmCPU.h> |
| 177 | static void kernel_bootstrap_thread(void); |
| 178 | |
| 179 | static void load_context( |
| 180 | thread_t thread); |
| 181 | |
| 182 | #if CONFIG_ECC_LOGGING |
| 183 | #include <kern/ecc.h> |
| 184 | #endif |
| 185 | |
| 186 | #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX |
| 187 | #include <i386/vmx/vmx_cpu.h> |
| 188 | #endif |
| 189 | |
| 190 | #if CONFIG_DTRACE |
| 191 | extern void dtrace_early_init(void); |
| 192 | extern void sdt_early_init(void); |
| 193 | #endif |
| 194 | |
| 195 | // libkern/OSKextLib.cpp |
| 196 | extern void OSKextRemoveKextBootstrap(void); |
| 197 | |
| 198 | void scale_setup(void); |
| 199 | extern void bsd_scale_setup(int); |
| 200 | extern unsigned int semaphore_max; |
| 201 | extern void stackshot_init(void); |
| 202 | |
| 203 | /* |
| 204 | * Running in virtual memory, on the interrupt stack. |
| 205 | */ |
| 206 | |
| 207 | extern struct startup_entry startup_entries[] |
| 208 | __SECTION_START_SYM(STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); |
| 209 | |
| 210 | extern struct startup_entry startup_entries_end[] |
| 211 | __SECTION_END_SYM(STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); |
| 212 | |
| 213 | static struct startup_entry *__startup_data startup_entry_cur = startup_entries; |
| 214 | |
| 215 | SECURITY_READ_ONLY_LATE(startup_subsystem_id_t) startup_phase = STARTUP_SUB_NONE; |
| 216 | |
| 217 | extern int serverperfmode; |
| 218 | |
| 219 | TUNABLE(startup_debug_t, startup_debug, "startup_debug" , 0); |
| 220 | |
| 221 | static inline void |
| 222 | kernel_bootstrap_log(const char *message) |
| 223 | { |
| 224 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && |
| 225 | startup_phase >= STARTUP_SUB_KPRINTF) { |
| 226 | kprintf(fmt: "kernel_bootstrap: %s\n" , message); |
| 227 | } |
| 228 | kernel_debug_string_early(message); |
| 229 | } |
| 230 | |
| 231 | static inline void |
| 232 | kernel_bootstrap_thread_log(const char *message) |
| 233 | { |
| 234 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && |
| 235 | startup_phase >= STARTUP_SUB_KPRINTF) { |
| 236 | kprintf(fmt: "kernel_bootstrap_thread: %s\n" , message); |
| 237 | } |
| 238 | kernel_debug_string_early(message); |
| 239 | } |
| 240 | |
| 241 | extern void |
| 242 | qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)); |
| 243 | |
| 244 | __startup_func |
| 245 | static int |
| 246 | startup_entry_cmp(const void *e1, const void *e2) |
| 247 | { |
| 248 | const struct startup_entry *a = e1; |
| 249 | const struct startup_entry *b = e2; |
| 250 | if (a->subsystem == b->subsystem) { |
| 251 | if (a->rank == b->rank) { |
| 252 | return 0; |
| 253 | } |
| 254 | return a->rank > b->rank ? 1 : -1; |
| 255 | } |
| 256 | return a->subsystem > b->subsystem ? 1 : -1; |
| 257 | } |
| 258 | |
| 259 | __startup_func |
| 260 | void |
| 261 | kernel_startup_bootstrap(void) |
| 262 | { |
| 263 | /* |
| 264 | * Sort the various STARTUP() entries by subsystem/rank. |
| 265 | */ |
| 266 | size_t n = startup_entries_end - startup_entries; |
| 267 | |
| 268 | if (n == 0) { |
| 269 | panic("Section %s,%s missing" , |
| 270 | STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); |
| 271 | } |
| 272 | if (((uintptr_t)startup_entries_end - (uintptr_t)startup_entries) % |
| 273 | sizeof(struct startup_entry)) { |
| 274 | panic("Section %s,%s has invalid size" , |
| 275 | STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); |
| 276 | } |
| 277 | |
| 278 | qsort(a: startup_entries, n, es: sizeof(struct startup_entry), cmp: startup_entry_cmp); |
| 279 | |
| 280 | /* |
| 281 | * Then initialize all tunables, timeouts, and locks |
| 282 | */ |
| 283 | kernel_startup_initialize_upto(upto: STARTUP_SUB_LOCKS); |
| 284 | } |
| 285 | |
| 286 | __startup_func |
| 287 | void |
| 288 | kernel_startup_tunable_init(const struct startup_tunable_spec *spec) |
| 289 | { |
| 290 | if (spec->var_is_str) { |
| 291 | PE_parse_boot_arg_str(arg_string: spec->name, arg_ptr: spec->var_addr, size: spec->var_len); |
| 292 | } else if (PE_parse_boot_argn(arg_string: spec->name, arg_ptr: spec->var_addr, max_arg: spec->var_len)) { |
| 293 | if (spec->var_is_bool) { |
| 294 | /* make sure bool's are valued in {0, 1} */ |
| 295 | *(bool *)spec->var_addr = *(uint8_t *)spec->var_addr; |
| 296 | } |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | __startup_func |
| 301 | void |
| 302 | kernel_startup_tunable_dt_init(const struct startup_tunable_dt_spec *spec) |
| 303 | { |
| 304 | DTEntry base; |
| 305 | |
| 306 | if (SecureDTLookupEntry(NULL, pathName: spec->dt_base, foundEntry: &base) != kSuccess) { |
| 307 | base = NULL; |
| 308 | } |
| 309 | |
| 310 | bool found_in_chosen = false; |
| 311 | |
| 312 | if (spec->dt_chosen_override) { |
| 313 | DTEntry chosen, chosen_base; |
| 314 | |
| 315 | if (SecureDTLookupEntry(NULL, pathName: "chosen" , foundEntry: &chosen) != kSuccess) { |
| 316 | chosen = NULL; |
| 317 | } |
| 318 | |
| 319 | if (chosen != NULL && SecureDTLookupEntry(searchPoint: chosen, pathName: spec->dt_base, foundEntry: &chosen_base) == kSuccess) { |
| 320 | base = chosen_base; |
| 321 | found_in_chosen = true; |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | uint64_t const *data; |
| 326 | unsigned int data_size = spec->var_len; |
| 327 | |
| 328 | if (base != NULL && SecureDTGetProperty(entry: base, propertyName: spec->dt_name, propertyValue: (const void **)&data, propertySize: &data_size) == kSuccess) { |
| 329 | if (data_size != spec->var_len) { |
| 330 | panic("unexpected tunable size %u in DT entry %s/%s/%s" , |
| 331 | data_size, found_in_chosen ? "/chosen" : "" , spec->dt_base, spec->dt_name); |
| 332 | } |
| 333 | |
| 334 | /* No need to handle bools specially, they are 1 byte integers in the DT. */ |
| 335 | memcpy(dst: spec->var_addr, src: data, n: spec->var_len); |
| 336 | } |
| 337 | |
| 338 | /* boot-arg overrides. */ |
| 339 | |
| 340 | if (PE_parse_boot_argn(arg_string: spec->boot_arg_name, arg_ptr: spec->var_addr, max_arg: spec->var_len)) { |
| 341 | if (spec->var_is_bool) { |
| 342 | *(bool *)spec->var_addr = *(uint8_t *)spec->var_addr; |
| 343 | } |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | static void |
| 348 | kernel_startup_log(startup_subsystem_id_t subsystem) |
| 349 | { |
| 350 | static const char *names[] = { |
| 351 | [STARTUP_SUB_TUNABLES] = "tunables" , |
| 352 | [STARTUP_SUB_TIMEOUTS] = "timeouts" , |
| 353 | [STARTUP_SUB_LOCKS] = "locks" , |
| 354 | [STARTUP_SUB_KPRINTF] = "kprintf" , |
| 355 | |
| 356 | [STARTUP_SUB_PMAP_STEAL] = "pmap_steal" , |
| 357 | [STARTUP_SUB_KMEM] = "kmem" , |
| 358 | [STARTUP_SUB_ZALLOC] = "zalloc" , |
| 359 | [STARTUP_SUB_PERCPU] = "percpu" , |
| 360 | [STARTUP_SUB_EVENT] = "event" , |
| 361 | |
| 362 | [STARTUP_SUB_CODESIGNING] = "codesigning" , |
| 363 | [STARTUP_SUB_KTRACE] = "ktrace" , |
| 364 | [STARTUP_SUB_OSLOG] = "oslog" , |
| 365 | [STARTUP_SUB_MACH_IPC] = "mach_ipc" , |
| 366 | [STARTUP_SUB_THREAD_CALL] = "thread_call" , |
| 367 | [STARTUP_SUB_SYSCTL] = "sysctl" , |
| 368 | [STARTUP_SUB_EARLY_BOOT] = "early_boot" , |
| 369 | |
| 370 | /* LOCKDOWN is special and its value won't fit here. */ |
| 371 | }; |
| 372 | static startup_subsystem_id_t logged = STARTUP_SUB_NONE; |
| 373 | |
| 374 | if (subsystem <= logged) { |
| 375 | return; |
| 376 | } |
| 377 | |
| 378 | if (subsystem < sizeof(names) / sizeof(names[0]) && names[subsystem]) { |
| 379 | kernel_bootstrap_log(message: names[subsystem]); |
| 380 | } |
| 381 | logged = subsystem; |
| 382 | } |
| 383 | |
| 384 | __startup_func |
| 385 | void |
| 386 | event_register_handler(struct event_hdr *hdr) |
| 387 | { |
| 388 | struct event_hdr *head = hdr->next; |
| 389 | |
| 390 | hdr->next = head->next; |
| 391 | head->next = hdr; |
| 392 | } |
| 393 | |
| 394 | __startup_func |
| 395 | void |
| 396 | kernel_startup_initialize_upto(startup_subsystem_id_t upto) |
| 397 | { |
| 398 | struct startup_entry *cur = startup_entry_cur; |
| 399 | |
| 400 | assert(startup_phase < upto); |
| 401 | |
| 402 | while (cur < startup_entries_end && cur->subsystem <= upto) { |
| 403 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && |
| 404 | startup_phase >= STARTUP_SUB_KPRINTF) { |
| 405 | kprintf(fmt: "%s[%d, rank %d]: %p(%p)\n" , __func__, |
| 406 | cur->subsystem, cur->rank, cur->func, cur->arg); |
| 407 | } |
| 408 | startup_phase = cur->subsystem - 1; |
| 409 | kernel_startup_log(subsystem: cur->subsystem); |
| 410 | cur->func(cur->arg); |
| 411 | startup_entry_cur = ++cur; |
| 412 | } |
| 413 | kernel_startup_log(subsystem: upto); |
| 414 | |
| 415 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && |
| 416 | upto >= STARTUP_SUB_KPRINTF) { |
| 417 | kprintf(fmt: "%s: reached phase %d\n" , __func__, upto); |
| 418 | } |
| 419 | startup_phase = upto; |
| 420 | } |
| 421 | |
| 422 | void |
| 423 | kernel_bootstrap(void) |
| 424 | { |
| 425 | kern_return_t result; |
| 426 | thread_t thread; |
| 427 | char namep[16]; |
| 428 | |
| 429 | code_signing_config_t cs_config; |
| 430 | |
| 431 | printf(format: "%s\n" , version); /* log kernel version */ |
| 432 | |
| 433 | scale_setup(); |
| 434 | |
| 435 | kernel_bootstrap_log(message: "vm_mem_bootstrap" ); |
| 436 | vm_mem_bootstrap(); |
| 437 | |
| 438 | machine_info.memory_size = (uint32_t)mem_size; |
| 439 | #if XNU_TARGET_OS_OSX |
| 440 | machine_info.max_mem = max_mem_actual; |
| 441 | #else |
| 442 | machine_info.max_mem = max_mem; |
| 443 | #endif /* XNU_TARGET_OS_OSX */ |
| 444 | machine_info.major_version = version_major; |
| 445 | machine_info.minor_version = version_minor; |
| 446 | |
| 447 | #if CONFIG_ATM |
| 448 | /* Initialize the Activity Trace Resource Manager. */ |
| 449 | kernel_bootstrap_log(message: "atm_init" ); |
| 450 | atm_init(); |
| 451 | #endif |
| 452 | kernel_startup_initialize_upto(upto: STARTUP_SUB_OSLOG); |
| 453 | |
| 454 | #if CONFIG_UBSAN_MINIMAL |
| 455 | kernel_bootstrap_log("UBSan minimal runtime init" ); |
| 456 | ubsan_minimal_init(); |
| 457 | #endif |
| 458 | |
| 459 | #if KASAN |
| 460 | kernel_bootstrap_log("kasan_late_init" ); |
| 461 | kasan_late_init(); |
| 462 | #endif |
| 463 | |
| 464 | #if CONFIG_TELEMETRY |
| 465 | kernel_bootstrap_log(message: "telemetry_init" ); |
| 466 | telemetry_init(); |
| 467 | #endif |
| 468 | |
| 469 | if (PE_i_can_has_debugger(NULL)) { |
| 470 | if (PE_parse_boot_argn(arg_string: "-show_pointers" , arg_ptr: &namep, max_arg: sizeof(namep))) { |
| 471 | doprnt_hide_pointers = FALSE; |
| 472 | } |
| 473 | if (PE_parse_boot_argn(arg_string: "-no_slto_panic" , arg_ptr: &namep, max_arg: sizeof(namep))) { |
| 474 | extern boolean_t spinlock_timeout_panic; |
| 475 | spinlock_timeout_panic = FALSE; |
| 476 | } |
| 477 | } |
| 478 | |
| 479 | kernel_bootstrap_log(message: "console_init" ); |
| 480 | console_init(); |
| 481 | |
| 482 | kernel_bootstrap_log(message: "stackshot_init" ); |
| 483 | stackshot_init(); |
| 484 | |
| 485 | kernel_bootstrap_log(message: "sched_init" ); |
| 486 | sched_init(); |
| 487 | |
| 488 | #if CONFIG_MACF |
| 489 | kernel_bootstrap_log(message: "mac_policy_init" ); |
| 490 | mac_policy_init(); |
| 491 | #endif |
| 492 | |
| 493 | kernel_startup_initialize_upto(upto: STARTUP_SUB_MACH_IPC); |
| 494 | |
| 495 | /* |
| 496 | * As soon as the virtual memory system is up, we record |
| 497 | * that this CPU is using the kernel pmap. |
| 498 | */ |
| 499 | kernel_bootstrap_log(message: "PMAP_ACTIVATE_KERNEL" ); |
| 500 | PMAP_ACTIVATE_KERNEL(master_cpu); |
| 501 | |
| 502 | kernel_bootstrap_log(message: "mapping_free_prime" ); |
| 503 | mapping_free_prime(); /* Load up with temporary mapping blocks */ |
| 504 | |
| 505 | kernel_bootstrap_log(message: "machine_init" ); |
| 506 | machine_init(); |
| 507 | |
| 508 | kernel_bootstrap_log(message: "thread_machine_init_template" ); |
| 509 | thread_machine_init_template(); |
| 510 | |
| 511 | kernel_bootstrap_log(message: "clock_init" ); |
| 512 | clock_init(); |
| 513 | |
| 514 | /* |
| 515 | * Initialize the IPC, task, and thread subsystems. |
| 516 | */ |
| 517 | #if CONFIG_THREAD_GROUPS |
| 518 | kernel_bootstrap_log(message: "thread_group_init" ); |
| 519 | thread_group_init(); |
| 520 | #endif |
| 521 | |
| 522 | #if CONFIG_COALITIONS |
| 523 | kernel_bootstrap_log(message: "coalitions_init" ); |
| 524 | coalitions_init(); |
| 525 | #endif |
| 526 | |
| 527 | kernel_bootstrap_log(message: "code_signing_init" ); |
| 528 | code_signing_init(); |
| 529 | code_signing_configuration(NULL, config: &cs_config); |
| 530 | #if XNU_TARGET_OS_OSX && (DEVELOPMENT || DEBUG) |
| 531 | if (cs_config & CS_CONFIG_GET_OUT_OF_MY_WAY) { |
| 532 | AMFI_bootarg_disable_mach_hardening = true; |
| 533 | } |
| 534 | #endif /* XNU_TARGET_OS_OSX && (DEVELOPMENT || DEBUG) */ |
| 535 | |
| 536 | kernel_bootstrap_log(message: "task_init" ); |
| 537 | task_init(); |
| 538 | |
| 539 | kernel_bootstrap_log(message: "thread_init" ); |
| 540 | thread_init(); |
| 541 | |
| 542 | kernel_bootstrap_log(message: "restartable_init" ); |
| 543 | restartable_init(); |
| 544 | |
| 545 | kernel_bootstrap_log(message: "workq_init" ); |
| 546 | workq_init(); |
| 547 | |
| 548 | kernel_bootstrap_log(message: "turnstiles_init" ); |
| 549 | turnstiles_init(); |
| 550 | |
| 551 | kernel_bootstrap_log(message: "mach_init_activity_id" ); |
| 552 | mach_init_activity_id(); |
| 553 | |
| 554 | /* initialize host_statistics */ |
| 555 | host_statistics_init(); |
| 556 | |
| 557 | /* initialize exceptions */ |
| 558 | kernel_bootstrap_log(message: "exception_init" ); |
| 559 | exception_init(); |
| 560 | |
| 561 | #if CONFIG_SCHED_SFI |
| 562 | kernel_bootstrap_log(message: "sfi_init" ); |
| 563 | sfi_init(); |
| 564 | #endif |
| 565 | |
| 566 | /* |
| 567 | * Create a kernel thread to execute the kernel bootstrap. |
| 568 | */ |
| 569 | |
| 570 | kernel_bootstrap_log(message: "kernel_thread_create" ); |
| 571 | result = kernel_thread_create(continuation: (thread_continue_t)kernel_bootstrap_thread, NULL, MAXPRI_KERNEL, new_thread: &thread); |
| 572 | |
| 573 | if (result != KERN_SUCCESS) { |
| 574 | panic("kernel_bootstrap: result = %08X" , result); |
| 575 | } |
| 576 | |
| 577 | /* TODO: do a proper thread_start() (without the thread_setrun()) */ |
| 578 | thread->state = TH_RUN; |
| 579 | thread->last_made_runnable_time = mach_absolute_time(); |
| 580 | thread_set_thread_name(th: thread, name: "kernel_bootstrap_thread" ); |
| 581 | |
| 582 | thread_deallocate(thread); |
| 583 | |
| 584 | kernel_bootstrap_log(message: "load_context - done" ); |
| 585 | load_context(thread); |
| 586 | /*NOTREACHED*/ |
| 587 | } |
| 588 | |
| 589 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_addrperm; |
| 590 | SECURITY_READ_ONLY_LATE(vm_offset_t) buf_kernel_addrperm; |
| 591 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_addrperm_ext; |
| 592 | SECURITY_READ_ONLY_LATE(uint64_t) vm_kernel_addrhash_salt; |
| 593 | SECURITY_READ_ONLY_LATE(uint64_t) vm_kernel_addrhash_salt_ext; |
| 594 | |
| 595 | /* |
| 596 | * Now running in a thread. Kick off other services, |
| 597 | * invoke user bootstrap, enter pageout loop. |
| 598 | */ |
| 599 | static void |
| 600 | kernel_bootstrap_thread(void) |
| 601 | { |
| 602 | processor_t processor = current_processor(); |
| 603 | |
| 604 | #if (DEVELOPMENT || DEBUG) |
| 605 | platform_stall_panic_or_spin(PLATFORM_STALL_XNU_LOCATION_KERNEL_BOOTSTRAP); |
| 606 | #endif |
| 607 | |
| 608 | kernel_bootstrap_thread_log(message: "idle_thread_create" ); |
| 609 | /* |
| 610 | * Create the idle processor thread. |
| 611 | */ |
| 612 | idle_thread_create(processor); |
| 613 | |
| 614 | /* |
| 615 | * N.B. Do not stick anything else |
| 616 | * before this point. |
| 617 | * |
| 618 | * Start up the scheduler services. |
| 619 | */ |
| 620 | kernel_bootstrap_thread_log(message: "sched_startup" ); |
| 621 | sched_startup(); |
| 622 | |
| 623 | /* |
| 624 | * Thread lifecycle maintenance (teardown, stack allocation) |
| 625 | */ |
| 626 | kernel_bootstrap_thread_log(message: "thread_daemon_init" ); |
| 627 | thread_daemon_init(); |
| 628 | |
| 629 | /* |
| 630 | * Thread callout service. |
| 631 | */ |
| 632 | kernel_startup_initialize_upto(upto: STARTUP_SUB_THREAD_CALL); |
| 633 | |
| 634 | /* |
| 635 | * Remain on current processor as |
| 636 | * additional processors come online. |
| 637 | */ |
| 638 | kernel_bootstrap_thread_log(message: "thread_bind" ); |
| 639 | thread_bind(processor); |
| 640 | |
| 641 | /* |
| 642 | * Kick off memory mapping adjustments. |
| 643 | */ |
| 644 | kernel_bootstrap_thread_log(message: "mapping_adjust" ); |
| 645 | mapping_adjust(); |
| 646 | |
| 647 | /* |
| 648 | * Create the clock service. |
| 649 | */ |
| 650 | kernel_bootstrap_thread_log(message: "clock_service_create" ); |
| 651 | clock_service_create(); |
| 652 | |
| 653 | /* |
| 654 | * Create the device service. |
| 655 | */ |
| 656 | device_service_create(); |
| 657 | |
| 658 | phys_carveout_init(); |
| 659 | |
| 660 | #if MACH_KDP |
| 661 | kernel_bootstrap_log(message: "kdp_init" ); |
| 662 | kdp_init(); |
| 663 | #endif |
| 664 | |
| 665 | #if ALTERNATE_DEBUGGER |
| 666 | alternate_debugger_init(); |
| 667 | #endif |
| 668 | |
| 669 | #if HYPERVISOR |
| 670 | kernel_bootstrap_thread_log("hv_support_init" ); |
| 671 | hv_support_init(); |
| 672 | #endif |
| 673 | |
| 674 | #if CONFIG_TELEMETRY |
| 675 | kernel_bootstrap_log(message: "bootprofile_init" ); |
| 676 | bootprofile_init(); |
| 677 | #endif |
| 678 | |
| 679 | kernel_startup_initialize_upto(upto: STARTUP_SUB_SYSCTL); |
| 680 | |
| 681 | /* |
| 682 | * Initialize the globals used for permuting kernel |
| 683 | * addresses that may be exported to userland as tokens |
| 684 | * using VM_KERNEL_ADDRPERM()/VM_KERNEL_ADDRPERM_EXTERNAL(). |
| 685 | * Force the random number to be odd to avoid mapping a non-zero |
| 686 | * word-aligned address to zero via addition. |
| 687 | */ |
| 688 | vm_kernel_addrperm = (vm_offset_t)(early_random() | 1); |
| 689 | buf_kernel_addrperm = (vm_offset_t)(early_random() | 1); |
| 690 | vm_kernel_addrperm_ext = (vm_offset_t)(early_random() | 1); |
| 691 | vm_kernel_addrhash_salt = early_random(); |
| 692 | vm_kernel_addrhash_salt_ext = early_random(); |
| 693 | |
| 694 | #ifdef IOKIT |
| 695 | kernel_bootstrap_log(message: "PE_init_iokit" ); |
| 696 | PE_init_iokit(); |
| 697 | #endif |
| 698 | |
| 699 | assert(ml_get_interrupts_enabled() == FALSE); |
| 700 | |
| 701 | /* |
| 702 | * Past this point, kernel subsystems that expect to operate with |
| 703 | * interrupts or preemption enabled may begin enforcement. |
| 704 | */ |
| 705 | kernel_startup_initialize_upto(upto: STARTUP_SUB_EARLY_BOOT); |
| 706 | |
| 707 | #if SCHED_HYGIENE_DEBUG |
| 708 | // Reset interrupts masked timeout before we enable interrupts |
| 709 | ml_spin_debug_clear_self(); |
| 710 | #endif |
| 711 | (void) spllo(); /* Allow interruptions */ |
| 712 | |
| 713 | /* |
| 714 | * This will start displaying progress to the user, start as early as possible |
| 715 | */ |
| 716 | initialize_screen(NULL, kPEAcquireScreen); |
| 717 | |
| 718 | /* |
| 719 | * Initialize the shared region module. |
| 720 | */ |
| 721 | vm_commpage_init(); |
| 722 | vm_commpage_text_init(); |
| 723 | |
| 724 | #if CONFIG_MACF |
| 725 | kernel_bootstrap_log(message: "mac_policy_initmach" ); |
| 726 | mac_policy_initmach(); |
| 727 | #if CONFIG_VNGUARD |
| 728 | kernel_bootstrap_log("vnguard_policy_init" ); |
| 729 | vnguard_policy_init(); |
| 730 | #endif |
| 731 | #endif |
| 732 | |
| 733 | #if CONFIG_DTRACE |
| 734 | kernel_bootstrap_log(message: "dtrace_early_init" ); |
| 735 | dtrace_early_init(); |
| 736 | sdt_early_init(); |
| 737 | #endif |
| 738 | |
| 739 | #if CODE_SIGNING_MONITOR |
| 740 | /* |
| 741 | * Lockdown mode is initialized as a startup function within the early boot |
| 742 | * category, which means it has been initialized by now. Query the state and |
| 743 | * pass it to the code-signing-monitor if required. |
| 744 | */ |
| 745 | kernel_bootstrap_log("code-signing-monitor lockdown mode" ); |
| 746 | csm_check_lockdown_mode(); |
| 747 | #endif |
| 748 | |
| 749 | #if CODE_SIGNING_MONITOR |
| 750 | kernel_bootstrap_log("provisioning_profile_init" ); |
| 751 | csm_initialize_provisioning_profiles(); |
| 752 | #endif |
| 753 | |
| 754 | #ifndef BCM2837 |
| 755 | kernel_bootstrap_log(message: "trust_cache_init" ); |
| 756 | |
| 757 | /* Initialize the runtime for the trust cache interface */ |
| 758 | trust_cache_runtime_init(); |
| 759 | |
| 760 | /* Load the static and engineering trust caches */ |
| 761 | load_static_trust_cache(); |
| 762 | #endif |
| 763 | |
| 764 | kernel_startup_initialize_upto(upto: STARTUP_SUB_LOCKDOWN); |
| 765 | |
| 766 | /* |
| 767 | * Get rid of segments used to bootstrap kext loading. This removes |
| 768 | * the KLD, PRELINK symtab, LINKEDIT, and symtab segments/load commands. |
| 769 | * Must be done prior to lockdown so that we can free (and possibly relocate) |
| 770 | * the static KVA mappings used for the jettisoned bootstrap segments. |
| 771 | */ |
| 772 | kernel_bootstrap_log(message: "OSKextRemoveKextBootstrap" ); |
| 773 | OSKextRemoveKextBootstrap(); |
| 774 | |
| 775 | #if SOCKETS |
| 776 | /* |
| 777 | * Initialize callback table before machine lockdown |
| 778 | */ |
| 779 | mbuf_tag_init(); |
| 780 | #endif |
| 781 | |
| 782 | /* No changes to kernel text and rodata beyond this point. */ |
| 783 | kernel_bootstrap_log(message: "machine_lockdown" ); |
| 784 | machine_lockdown(); |
| 785 | |
| 786 | #ifdef CONFIG_XNUPOST |
| 787 | kern_return_t result = kernel_list_tests(); |
| 788 | result = kernel_do_post(); |
| 789 | if (result != KERN_SUCCESS) { |
| 790 | panic("kernel_do_post: Tests failed with result = 0x%08x" , result); |
| 791 | } |
| 792 | kernel_bootstrap_log("kernel_do_post - done" ); |
| 793 | #endif /* CONFIG_XNUPOST */ |
| 794 | |
| 795 | #ifdef IOKIT |
| 796 | kernel_bootstrap_log(message: "PE_lockdown_iokit" ); |
| 797 | PE_lockdown_iokit(); |
| 798 | #endif |
| 799 | /* |
| 800 | * max_cpus must be nailed down by the time PE_lockdown_iokit() finishes, |
| 801 | * at the latest |
| 802 | */ |
| 803 | vm_set_restrictions(num_cpus: machine_info.max_cpus); |
| 804 | |
| 805 | |
| 806 | #if KPERF |
| 807 | kperf_init_early(); |
| 808 | #endif |
| 809 | |
| 810 | /* |
| 811 | * Start the user bootstrap. |
| 812 | */ |
| 813 | #ifdef MACH_BSD |
| 814 | bsd_init(); |
| 815 | #endif |
| 816 | |
| 817 | |
| 818 | /* |
| 819 | * Get rid of pages used for early boot tracing. |
| 820 | */ |
| 821 | kdebug_free_early_buf(); |
| 822 | |
| 823 | serial_keyboard_init(); /* Start serial keyboard if wanted */ |
| 824 | |
| 825 | vm_page_init_local_q(num_cpus: machine_info.max_cpus); |
| 826 | |
| 827 | thread_bind(PROCESSOR_NULL); |
| 828 | |
| 829 | /* |
| 830 | * Now that all CPUs are available to run threads, this is essentially |
| 831 | * a background thread. Take this opportunity to initialize and free |
| 832 | * any remaining vm_pages that were delayed earlier by pmap_startup(). |
| 833 | */ |
| 834 | vm_free_delayed_pages(); |
| 835 | |
| 836 | /* |
| 837 | * Become the pageout daemon. |
| 838 | */ |
| 839 | vm_pageout(); |
| 840 | /*NOTREACHED*/ |
| 841 | } |
| 842 | |
| 843 | /* |
| 844 | * slave_main: |
| 845 | * |
| 846 | * Load the first thread to start a processor. |
| 847 | * This path will also be used by the master processor |
| 848 | * after being offlined. |
| 849 | */ |
| 850 | void |
| 851 | slave_main(void *machine_param) |
| 852 | { |
| 853 | processor_t processor = current_processor(); |
| 854 | thread_t thread; |
| 855 | |
| 856 | /* |
| 857 | * Use the idle processor thread if there |
| 858 | * is no dedicated start up thread. |
| 859 | */ |
| 860 | if (processor->processor_offlined == true) { |
| 861 | /* Return to the saved processor_offline context */ |
| 862 | assert(processor->startup_thread == THREAD_NULL); |
| 863 | |
| 864 | thread = processor->idle_thread; |
| 865 | thread->parameter = machine_param; |
| 866 | } else if (processor->startup_thread) { |
| 867 | thread = processor->startup_thread; |
| 868 | processor->startup_thread = THREAD_NULL; |
| 869 | } else { |
| 870 | thread = processor->idle_thread; |
| 871 | thread->continuation = processor_start_thread; |
| 872 | thread->parameter = machine_param; |
| 873 | } |
| 874 | |
| 875 | load_context(thread); |
| 876 | /*NOTREACHED*/ |
| 877 | } |
| 878 | |
| 879 | /* |
| 880 | * processor_start_thread: |
| 881 | * |
| 882 | * First thread to execute on a started processor. |
| 883 | * |
| 884 | * Called at splsched. |
| 885 | */ |
| 886 | void |
| 887 | processor_start_thread(void *machine_param, |
| 888 | __unused wait_result_t result) |
| 889 | { |
| 890 | processor_t processor = current_processor(); |
| 891 | thread_t self = current_thread(); |
| 892 | |
| 893 | slave_machine_init(machine_param); |
| 894 | |
| 895 | /* |
| 896 | * If running the idle processor thread, |
| 897 | * reenter the idle loop, else terminate. |
| 898 | */ |
| 899 | if (self == processor->idle_thread) { |
| 900 | thread_block(continuation: idle_thread); |
| 901 | } |
| 902 | |
| 903 | thread_terminate(target_act: self); |
| 904 | /*NOTREACHED*/ |
| 905 | } |
| 906 | |
| 907 | /* |
| 908 | * load_context: |
| 909 | * |
| 910 | * Start the first thread on a processor. |
| 911 | * This may be the first thread ever run on a processor, or |
| 912 | * it could be a processor that was previously offlined. |
| 913 | */ |
| 914 | static void __attribute__((noreturn)) |
| 915 | load_context( |
| 916 | thread_t thread) |
| 917 | { |
| 918 | processor_t processor = current_processor(); |
| 919 | |
| 920 | |
| 921 | #define load_context_kprintf(x...) /* kprintf("load_context: " x) */ |
| 922 | |
| 923 | load_context_kprintf("machine_set_current_thread\n" ); |
| 924 | machine_set_current_thread(thread); |
| 925 | |
| 926 | load_context_kprintf("processor_up\n" ); |
| 927 | |
| 928 | PMAP_ACTIVATE_KERNEL(processor->cpu_id); |
| 929 | |
| 930 | /* |
| 931 | * Acquire a stack if none attached. The panic |
| 932 | * should never occur since the thread is expected |
| 933 | * to have reserved stack. |
| 934 | */ |
| 935 | load_context_kprintf("thread %p, stack %lx, stackptr %lx\n" , thread, |
| 936 | thread->kernel_stack, thread->machine.kstackptr); |
| 937 | if (!thread->kernel_stack) { |
| 938 | load_context_kprintf("stack_alloc_try\n" ); |
| 939 | if (!stack_alloc_try(thread)) { |
| 940 | panic("load_context" ); |
| 941 | } |
| 942 | } |
| 943 | |
| 944 | /* |
| 945 | * The idle processor threads are not counted as |
| 946 | * running for load calculations. |
| 947 | */ |
| 948 | if (!(thread->state & TH_IDLE)) { |
| 949 | SCHED(run_count_incr)(thread); |
| 950 | } |
| 951 | |
| 952 | processor->active_thread = thread; |
| 953 | processor_state_update_explicit(processor, pri: thread->sched_pri, |
| 954 | SFI_CLASS_KERNEL, pset_type: PSET_SMP, perfctl_class: thread_get_perfcontrol_class(thread), urgency: THREAD_URGENCY_NONE, |
| 955 | bucket: ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket); |
| 956 | processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL; |
| 957 | processor->current_is_NO_SMT = false; |
| 958 | processor->current_is_eagerpreempt = false; |
| 959 | #if CONFIG_THREAD_GROUPS |
| 960 | processor->current_thread_group = thread_group_get(t: thread); |
| 961 | #endif |
| 962 | processor->starting_pri = thread->sched_pri; |
| 963 | processor->deadline = UINT64_MAX; |
| 964 | thread->last_processor = processor; |
| 965 | processor_up(processor); |
| 966 | struct recount_snap snap = { 0 }; |
| 967 | recount_snapshot(snap: &snap); |
| 968 | processor->last_dispatch = snap.rsn_time_mach; |
| 969 | recount_processor_online(processor, snap: &snap); |
| 970 | |
| 971 | smr_cpu_join(processor, ctime: processor->last_dispatch); |
| 972 | |
| 973 | PMAP_ACTIVATE_USER(thread, processor->cpu_id); |
| 974 | |
| 975 | load_context_kprintf("machine_load_context\n" ); |
| 976 | |
| 977 | #if KASAN_TBI |
| 978 | __asan_handle_no_return(); |
| 979 | #endif /* KASAN_TBI */ |
| 980 | |
| 981 | machine_load_context(thread); |
| 982 | /*NOTREACHED*/ |
| 983 | } |
| 984 | |
| 985 | void |
| 986 | scale_setup(void) |
| 987 | { |
| 988 | int scale = 0; |
| 989 | #if defined(__LP64__) |
| 990 | typeof(task_max) task_max_base = task_max; |
| 991 | |
| 992 | /* Raise limits for servers with >= 16G */ |
| 993 | if ((serverperfmode != 0) && ((uint64_t)max_mem_actual >= (uint64_t)(16 * 1024 * 1024 * 1024ULL))) { |
| 994 | scale = (int)((uint64_t)sane_size / (uint64_t)(8 * 1024 * 1024 * 1024ULL)); |
| 995 | /* limit to 128 G */ |
| 996 | if (scale > 16) { |
| 997 | scale = 16; |
| 998 | } |
| 999 | task_max_base = 2500; |
| 1000 | /* Raise limits for machines with >= 3GB */ |
| 1001 | } else if ((uint64_t)max_mem_actual >= (uint64_t)(3 * 1024 * 1024 * 1024ULL)) { |
| 1002 | if ((uint64_t)max_mem_actual < (uint64_t)(8 * 1024 * 1024 * 1024ULL)) { |
| 1003 | scale = 2; |
| 1004 | } else { |
| 1005 | /* limit to 64GB */ |
| 1006 | scale = MIN(16, (int)((uint64_t)max_mem_actual / (uint64_t)(4 * 1024 * 1024 * 1024ULL))); |
| 1007 | } |
| 1008 | } |
| 1009 | |
| 1010 | task_max = MAX(task_max, task_max_base * scale); |
| 1011 | |
| 1012 | if (scale != 0) { |
| 1013 | task_threadmax = task_max; |
| 1014 | thread_max = task_max * 5; |
| 1015 | } |
| 1016 | |
| 1017 | #endif |
| 1018 | |
| 1019 | bsd_scale_setup(scale); |
| 1020 | } |
| 1021 | |