1 | /* |
2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | * NOTICE: This file was modified by McAfee Research in 2004 to introduce |
58 | * support for mandatory and extensible security protections. This notice |
59 | * is included in support of clause 2.2 (b) of the Apple Public License, |
60 | * Version 2.0. |
61 | */ |
62 | /* |
63 | */ |
64 | |
65 | /* |
66 | * Mach kernel startup. |
67 | */ |
68 | |
69 | #include <debug.h> |
70 | #include <mach_kdp.h> |
71 | |
72 | #include <mach/boolean.h> |
73 | #include <mach/machine.h> |
74 | #include <mach/thread_act.h> |
75 | #include <mach/task_special_ports.h> |
76 | #include <mach/vm_param.h> |
77 | #include <kern/assert.h> |
78 | #include <kern/mach_param.h> |
79 | #include <kern/misc_protos.h> |
80 | #include <kern/clock.h> |
81 | #include <kern/coalition.h> |
82 | #include <kern/cpu_number.h> |
83 | #include <kern/ledger.h> |
84 | #include <kern/machine.h> |
85 | #include <kern/processor.h> |
86 | #include <kern/restartable.h> |
87 | #include <kern/sched_prim.h> |
88 | #include <kern/turnstile.h> |
89 | #if CONFIG_SCHED_SFI |
90 | #include <kern/sfi.h> |
91 | #endif |
92 | #include <kern/startup.h> |
93 | #include <kern/task.h> |
94 | #include <kern/thread.h> |
95 | #include <kern/timer.h> |
96 | #if CONFIG_TELEMETRY |
97 | #include <kern/telemetry.h> |
98 | #endif |
99 | #include <kern/kpc.h> |
100 | #include <kern/zalloc.h> |
101 | #include <kern/locks.h> |
102 | #include <kern/debug.h> |
103 | #if KPERF |
104 | #include <kperf/kperf.h> |
105 | #endif /* KPERF */ |
106 | #include <corpses/task_corpse.h> |
107 | #include <prng/random.h> |
108 | #include <console/serial_protos.h> |
109 | #include <vm/vm_kern.h> |
110 | #include <vm/vm_init.h> |
111 | #include <vm/vm_map.h> |
112 | #include <vm/vm_object.h> |
113 | #include <vm/vm_page.h> |
114 | #include <vm/vm_pageout.h> |
115 | #include <vm/vm_shared_region.h> |
116 | #include <machine/pmap.h> |
117 | #include <machine/commpage.h> |
118 | #include <machine/machine_routines.h> |
119 | #include <libkern/version.h> |
120 | #include <pexpert/device_tree.h> |
121 | #include <sys/codesign.h> |
122 | #include <sys/kdebug.h> |
123 | #include <sys/random.h> |
124 | #include <sys/ktrace.h> |
125 | #include <sys/trust_caches.h> |
126 | #include <sys/code_signing.h> |
127 | #include <libkern/section_keywords.h> |
128 | |
129 | #include <kern/waitq.h> |
130 | #include <ipc/ipc_voucher.h> |
131 | #include <mach/host_info.h> |
132 | #include <pthread/workqueue_internal.h> |
133 | |
134 | #if SOCKETS |
135 | extern void mbuf_tag_init(void); |
136 | #endif |
137 | |
138 | #if CONFIG_XNUPOST |
139 | #include <tests/ktest.h> |
140 | #include <tests/xnupost.h> |
141 | #endif |
142 | |
143 | #if CONFIG_ATM |
144 | #include <atm/atm_internal.h> |
145 | #endif |
146 | |
147 | #if CONFIG_CSR |
148 | #include <sys/csr.h> |
149 | #endif |
150 | |
151 | #if ALTERNATE_DEBUGGER |
152 | #include <arm64/alternate_debugger.h> |
153 | #endif |
154 | |
155 | #if MACH_KDP |
156 | #include <kdp/kdp.h> |
157 | #endif |
158 | |
159 | #if CONFIG_MACF |
160 | #include <security/mac_mach_internal.h> |
161 | #if CONFIG_VNGUARD |
162 | extern void vnguard_policy_init(void); |
163 | #endif |
164 | #endif |
165 | |
166 | #if HYPERVISOR |
167 | #include <kern/hv_support.h> |
168 | #endif |
169 | |
170 | #if CONFIG_UBSAN_MINIMAL |
171 | #include <san/ubsan_minimal.h> |
172 | #endif |
173 | |
174 | #include <san/kasan.h> |
175 | |
176 | #include <i386/pmCPU.h> |
177 | static void kernel_bootstrap_thread(void); |
178 | |
179 | static void load_context( |
180 | thread_t thread); |
181 | |
182 | #if CONFIG_ECC_LOGGING |
183 | #include <kern/ecc.h> |
184 | #endif |
185 | |
186 | #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX |
187 | #include <i386/vmx/vmx_cpu.h> |
188 | #endif |
189 | |
190 | #if CONFIG_DTRACE |
191 | extern void dtrace_early_init(void); |
192 | extern void sdt_early_init(void); |
193 | #endif |
194 | |
195 | // libkern/OSKextLib.cpp |
196 | extern void OSKextRemoveKextBootstrap(void); |
197 | |
198 | void scale_setup(void); |
199 | extern void bsd_scale_setup(int); |
200 | extern unsigned int semaphore_max; |
201 | extern void stackshot_init(void); |
202 | |
203 | /* |
204 | * Running in virtual memory, on the interrupt stack. |
205 | */ |
206 | |
207 | extern struct startup_entry startup_entries[] |
208 | __SECTION_START_SYM(STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); |
209 | |
210 | extern struct startup_entry startup_entries_end[] |
211 | __SECTION_END_SYM(STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); |
212 | |
213 | static struct startup_entry *__startup_data startup_entry_cur = startup_entries; |
214 | |
215 | SECURITY_READ_ONLY_LATE(startup_subsystem_id_t) startup_phase = STARTUP_SUB_NONE; |
216 | |
217 | extern int serverperfmode; |
218 | |
219 | TUNABLE(startup_debug_t, startup_debug, "startup_debug" , 0); |
220 | |
221 | static inline void |
222 | kernel_bootstrap_log(const char *message) |
223 | { |
224 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && |
225 | startup_phase >= STARTUP_SUB_KPRINTF) { |
226 | kprintf(fmt: "kernel_bootstrap: %s\n" , message); |
227 | } |
228 | kernel_debug_string_early(message); |
229 | } |
230 | |
231 | static inline void |
232 | kernel_bootstrap_thread_log(const char *message) |
233 | { |
234 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && |
235 | startup_phase >= STARTUP_SUB_KPRINTF) { |
236 | kprintf(fmt: "kernel_bootstrap_thread: %s\n" , message); |
237 | } |
238 | kernel_debug_string_early(message); |
239 | } |
240 | |
241 | extern void |
242 | qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)); |
243 | |
244 | __startup_func |
245 | static int |
246 | startup_entry_cmp(const void *e1, const void *e2) |
247 | { |
248 | const struct startup_entry *a = e1; |
249 | const struct startup_entry *b = e2; |
250 | if (a->subsystem == b->subsystem) { |
251 | if (a->rank == b->rank) { |
252 | return 0; |
253 | } |
254 | return a->rank > b->rank ? 1 : -1; |
255 | } |
256 | return a->subsystem > b->subsystem ? 1 : -1; |
257 | } |
258 | |
259 | __startup_func |
260 | void |
261 | kernel_startup_bootstrap(void) |
262 | { |
263 | /* |
264 | * Sort the various STARTUP() entries by subsystem/rank. |
265 | */ |
266 | size_t n = startup_entries_end - startup_entries; |
267 | |
268 | if (n == 0) { |
269 | panic("Section %s,%s missing" , |
270 | STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); |
271 | } |
272 | if (((uintptr_t)startup_entries_end - (uintptr_t)startup_entries) % |
273 | sizeof(struct startup_entry)) { |
274 | panic("Section %s,%s has invalid size" , |
275 | STARTUP_HOOK_SEGMENT, STARTUP_HOOK_SECTION); |
276 | } |
277 | |
278 | qsort(a: startup_entries, n, es: sizeof(struct startup_entry), cmp: startup_entry_cmp); |
279 | |
280 | /* |
281 | * Then initialize all tunables, timeouts, and locks |
282 | */ |
283 | kernel_startup_initialize_upto(upto: STARTUP_SUB_LOCKS); |
284 | } |
285 | |
286 | __startup_func |
287 | void |
288 | kernel_startup_tunable_init(const struct startup_tunable_spec *spec) |
289 | { |
290 | if (spec->var_is_str) { |
291 | PE_parse_boot_arg_str(arg_string: spec->name, arg_ptr: spec->var_addr, size: spec->var_len); |
292 | } else if (PE_parse_boot_argn(arg_string: spec->name, arg_ptr: spec->var_addr, max_arg: spec->var_len)) { |
293 | if (spec->var_is_bool) { |
294 | /* make sure bool's are valued in {0, 1} */ |
295 | *(bool *)spec->var_addr = *(uint8_t *)spec->var_addr; |
296 | } |
297 | } |
298 | } |
299 | |
300 | __startup_func |
301 | void |
302 | kernel_startup_tunable_dt_init(const struct startup_tunable_dt_spec *spec) |
303 | { |
304 | DTEntry base; |
305 | |
306 | if (SecureDTLookupEntry(NULL, pathName: spec->dt_base, foundEntry: &base) != kSuccess) { |
307 | base = NULL; |
308 | } |
309 | |
310 | bool found_in_chosen = false; |
311 | |
312 | if (spec->dt_chosen_override) { |
313 | DTEntry chosen, chosen_base; |
314 | |
315 | if (SecureDTLookupEntry(NULL, pathName: "chosen" , foundEntry: &chosen) != kSuccess) { |
316 | chosen = NULL; |
317 | } |
318 | |
319 | if (chosen != NULL && SecureDTLookupEntry(searchPoint: chosen, pathName: spec->dt_base, foundEntry: &chosen_base) == kSuccess) { |
320 | base = chosen_base; |
321 | found_in_chosen = true; |
322 | } |
323 | } |
324 | |
325 | uint64_t const *data; |
326 | unsigned int data_size = spec->var_len; |
327 | |
328 | if (base != NULL && SecureDTGetProperty(entry: base, propertyName: spec->dt_name, propertyValue: (const void **)&data, propertySize: &data_size) == kSuccess) { |
329 | if (data_size != spec->var_len) { |
330 | panic("unexpected tunable size %u in DT entry %s/%s/%s" , |
331 | data_size, found_in_chosen ? "/chosen" : "" , spec->dt_base, spec->dt_name); |
332 | } |
333 | |
334 | /* No need to handle bools specially, they are 1 byte integers in the DT. */ |
335 | memcpy(dst: spec->var_addr, src: data, n: spec->var_len); |
336 | } |
337 | |
338 | /* boot-arg overrides. */ |
339 | |
340 | if (PE_parse_boot_argn(arg_string: spec->boot_arg_name, arg_ptr: spec->var_addr, max_arg: spec->var_len)) { |
341 | if (spec->var_is_bool) { |
342 | *(bool *)spec->var_addr = *(uint8_t *)spec->var_addr; |
343 | } |
344 | } |
345 | } |
346 | |
347 | static void |
348 | kernel_startup_log(startup_subsystem_id_t subsystem) |
349 | { |
350 | static const char *names[] = { |
351 | [STARTUP_SUB_TUNABLES] = "tunables" , |
352 | [STARTUP_SUB_TIMEOUTS] = "timeouts" , |
353 | [STARTUP_SUB_LOCKS] = "locks" , |
354 | [STARTUP_SUB_KPRINTF] = "kprintf" , |
355 | |
356 | [STARTUP_SUB_PMAP_STEAL] = "pmap_steal" , |
357 | [STARTUP_SUB_KMEM] = "kmem" , |
358 | [STARTUP_SUB_ZALLOC] = "zalloc" , |
359 | [STARTUP_SUB_PERCPU] = "percpu" , |
360 | [STARTUP_SUB_EVENT] = "event" , |
361 | |
362 | [STARTUP_SUB_CODESIGNING] = "codesigning" , |
363 | [STARTUP_SUB_KTRACE] = "ktrace" , |
364 | [STARTUP_SUB_OSLOG] = "oslog" , |
365 | [STARTUP_SUB_MACH_IPC] = "mach_ipc" , |
366 | [STARTUP_SUB_THREAD_CALL] = "thread_call" , |
367 | [STARTUP_SUB_SYSCTL] = "sysctl" , |
368 | [STARTUP_SUB_EARLY_BOOT] = "early_boot" , |
369 | |
370 | /* LOCKDOWN is special and its value won't fit here. */ |
371 | }; |
372 | static startup_subsystem_id_t logged = STARTUP_SUB_NONE; |
373 | |
374 | if (subsystem <= logged) { |
375 | return; |
376 | } |
377 | |
378 | if (subsystem < sizeof(names) / sizeof(names[0]) && names[subsystem]) { |
379 | kernel_bootstrap_log(message: names[subsystem]); |
380 | } |
381 | logged = subsystem; |
382 | } |
383 | |
384 | __startup_func |
385 | void |
386 | event_register_handler(struct event_hdr *hdr) |
387 | { |
388 | struct event_hdr *head = hdr->next; |
389 | |
390 | hdr->next = head->next; |
391 | head->next = hdr; |
392 | } |
393 | |
394 | __startup_func |
395 | void |
396 | kernel_startup_initialize_upto(startup_subsystem_id_t upto) |
397 | { |
398 | struct startup_entry *cur = startup_entry_cur; |
399 | |
400 | assert(startup_phase < upto); |
401 | |
402 | while (cur < startup_entries_end && cur->subsystem <= upto) { |
403 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && |
404 | startup_phase >= STARTUP_SUB_KPRINTF) { |
405 | kprintf(fmt: "%s[%d, rank %d]: %p(%p)\n" , __func__, |
406 | cur->subsystem, cur->rank, cur->func, cur->arg); |
407 | } |
408 | startup_phase = cur->subsystem - 1; |
409 | kernel_startup_log(subsystem: cur->subsystem); |
410 | cur->func(cur->arg); |
411 | startup_entry_cur = ++cur; |
412 | } |
413 | kernel_startup_log(subsystem: upto); |
414 | |
415 | if ((startup_debug & STARTUP_DEBUG_VERBOSE) && |
416 | upto >= STARTUP_SUB_KPRINTF) { |
417 | kprintf(fmt: "%s: reached phase %d\n" , __func__, upto); |
418 | } |
419 | startup_phase = upto; |
420 | } |
421 | |
422 | void |
423 | kernel_bootstrap(void) |
424 | { |
425 | kern_return_t result; |
426 | thread_t thread; |
427 | char namep[16]; |
428 | |
429 | code_signing_config_t cs_config; |
430 | |
431 | printf(format: "%s\n" , version); /* log kernel version */ |
432 | |
433 | scale_setup(); |
434 | |
435 | kernel_bootstrap_log(message: "vm_mem_bootstrap" ); |
436 | vm_mem_bootstrap(); |
437 | |
438 | machine_info.memory_size = (uint32_t)mem_size; |
439 | #if XNU_TARGET_OS_OSX |
440 | machine_info.max_mem = max_mem_actual; |
441 | #else |
442 | machine_info.max_mem = max_mem; |
443 | #endif /* XNU_TARGET_OS_OSX */ |
444 | machine_info.major_version = version_major; |
445 | machine_info.minor_version = version_minor; |
446 | |
447 | #if CONFIG_ATM |
448 | /* Initialize the Activity Trace Resource Manager. */ |
449 | kernel_bootstrap_log(message: "atm_init" ); |
450 | atm_init(); |
451 | #endif |
452 | kernel_startup_initialize_upto(upto: STARTUP_SUB_OSLOG); |
453 | |
454 | #if CONFIG_UBSAN_MINIMAL |
455 | kernel_bootstrap_log("UBSan minimal runtime init" ); |
456 | ubsan_minimal_init(); |
457 | #endif |
458 | |
459 | #if KASAN |
460 | kernel_bootstrap_log("kasan_late_init" ); |
461 | kasan_late_init(); |
462 | #endif |
463 | |
464 | #if CONFIG_TELEMETRY |
465 | kernel_bootstrap_log(message: "telemetry_init" ); |
466 | telemetry_init(); |
467 | #endif |
468 | |
469 | if (PE_i_can_has_debugger(NULL)) { |
470 | if (PE_parse_boot_argn(arg_string: "-show_pointers" , arg_ptr: &namep, max_arg: sizeof(namep))) { |
471 | doprnt_hide_pointers = FALSE; |
472 | } |
473 | if (PE_parse_boot_argn(arg_string: "-no_slto_panic" , arg_ptr: &namep, max_arg: sizeof(namep))) { |
474 | extern boolean_t spinlock_timeout_panic; |
475 | spinlock_timeout_panic = FALSE; |
476 | } |
477 | } |
478 | |
479 | kernel_bootstrap_log(message: "console_init" ); |
480 | console_init(); |
481 | |
482 | kernel_bootstrap_log(message: "stackshot_init" ); |
483 | stackshot_init(); |
484 | |
485 | kernel_bootstrap_log(message: "sched_init" ); |
486 | sched_init(); |
487 | |
488 | #if CONFIG_MACF |
489 | kernel_bootstrap_log(message: "mac_policy_init" ); |
490 | mac_policy_init(); |
491 | #endif |
492 | |
493 | kernel_startup_initialize_upto(upto: STARTUP_SUB_MACH_IPC); |
494 | |
495 | /* |
496 | * As soon as the virtual memory system is up, we record |
497 | * that this CPU is using the kernel pmap. |
498 | */ |
499 | kernel_bootstrap_log(message: "PMAP_ACTIVATE_KERNEL" ); |
500 | PMAP_ACTIVATE_KERNEL(master_cpu); |
501 | |
502 | kernel_bootstrap_log(message: "mapping_free_prime" ); |
503 | mapping_free_prime(); /* Load up with temporary mapping blocks */ |
504 | |
505 | kernel_bootstrap_log(message: "machine_init" ); |
506 | machine_init(); |
507 | |
508 | kernel_bootstrap_log(message: "thread_machine_init_template" ); |
509 | thread_machine_init_template(); |
510 | |
511 | kernel_bootstrap_log(message: "clock_init" ); |
512 | clock_init(); |
513 | |
514 | /* |
515 | * Initialize the IPC, task, and thread subsystems. |
516 | */ |
517 | #if CONFIG_THREAD_GROUPS |
518 | kernel_bootstrap_log(message: "thread_group_init" ); |
519 | thread_group_init(); |
520 | #endif |
521 | |
522 | #if CONFIG_COALITIONS |
523 | kernel_bootstrap_log(message: "coalitions_init" ); |
524 | coalitions_init(); |
525 | #endif |
526 | |
527 | kernel_bootstrap_log(message: "code_signing_init" ); |
528 | code_signing_init(); |
529 | code_signing_configuration(NULL, config: &cs_config); |
530 | #if XNU_TARGET_OS_OSX && (DEVELOPMENT || DEBUG) |
531 | if (cs_config & CS_CONFIG_GET_OUT_OF_MY_WAY) { |
532 | AMFI_bootarg_disable_mach_hardening = true; |
533 | } |
534 | #endif /* XNU_TARGET_OS_OSX && (DEVELOPMENT || DEBUG) */ |
535 | |
536 | kernel_bootstrap_log(message: "task_init" ); |
537 | task_init(); |
538 | |
539 | kernel_bootstrap_log(message: "thread_init" ); |
540 | thread_init(); |
541 | |
542 | kernel_bootstrap_log(message: "restartable_init" ); |
543 | restartable_init(); |
544 | |
545 | kernel_bootstrap_log(message: "workq_init" ); |
546 | workq_init(); |
547 | |
548 | kernel_bootstrap_log(message: "turnstiles_init" ); |
549 | turnstiles_init(); |
550 | |
551 | kernel_bootstrap_log(message: "mach_init_activity_id" ); |
552 | mach_init_activity_id(); |
553 | |
554 | /* initialize host_statistics */ |
555 | host_statistics_init(); |
556 | |
557 | /* initialize exceptions */ |
558 | kernel_bootstrap_log(message: "exception_init" ); |
559 | exception_init(); |
560 | |
561 | #if CONFIG_SCHED_SFI |
562 | kernel_bootstrap_log(message: "sfi_init" ); |
563 | sfi_init(); |
564 | #endif |
565 | |
566 | /* |
567 | * Create a kernel thread to execute the kernel bootstrap. |
568 | */ |
569 | |
570 | kernel_bootstrap_log(message: "kernel_thread_create" ); |
571 | result = kernel_thread_create(continuation: (thread_continue_t)kernel_bootstrap_thread, NULL, MAXPRI_KERNEL, new_thread: &thread); |
572 | |
573 | if (result != KERN_SUCCESS) { |
574 | panic("kernel_bootstrap: result = %08X" , result); |
575 | } |
576 | |
577 | /* TODO: do a proper thread_start() (without the thread_setrun()) */ |
578 | thread->state = TH_RUN; |
579 | thread->last_made_runnable_time = mach_absolute_time(); |
580 | thread_set_thread_name(th: thread, name: "kernel_bootstrap_thread" ); |
581 | |
582 | thread_deallocate(thread); |
583 | |
584 | kernel_bootstrap_log(message: "load_context - done" ); |
585 | load_context(thread); |
586 | /*NOTREACHED*/ |
587 | } |
588 | |
589 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_addrperm; |
590 | SECURITY_READ_ONLY_LATE(vm_offset_t) buf_kernel_addrperm; |
591 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_addrperm_ext; |
592 | SECURITY_READ_ONLY_LATE(uint64_t) vm_kernel_addrhash_salt; |
593 | SECURITY_READ_ONLY_LATE(uint64_t) vm_kernel_addrhash_salt_ext; |
594 | |
595 | /* |
596 | * Now running in a thread. Kick off other services, |
597 | * invoke user bootstrap, enter pageout loop. |
598 | */ |
599 | static void |
600 | kernel_bootstrap_thread(void) |
601 | { |
602 | processor_t processor = current_processor(); |
603 | |
604 | #if (DEVELOPMENT || DEBUG) |
605 | platform_stall_panic_or_spin(PLATFORM_STALL_XNU_LOCATION_KERNEL_BOOTSTRAP); |
606 | #endif |
607 | |
608 | kernel_bootstrap_thread_log(message: "idle_thread_create" ); |
609 | /* |
610 | * Create the idle processor thread. |
611 | */ |
612 | idle_thread_create(processor); |
613 | |
614 | /* |
615 | * N.B. Do not stick anything else |
616 | * before this point. |
617 | * |
618 | * Start up the scheduler services. |
619 | */ |
620 | kernel_bootstrap_thread_log(message: "sched_startup" ); |
621 | sched_startup(); |
622 | |
623 | /* |
624 | * Thread lifecycle maintenance (teardown, stack allocation) |
625 | */ |
626 | kernel_bootstrap_thread_log(message: "thread_daemon_init" ); |
627 | thread_daemon_init(); |
628 | |
629 | /* |
630 | * Thread callout service. |
631 | */ |
632 | kernel_startup_initialize_upto(upto: STARTUP_SUB_THREAD_CALL); |
633 | |
634 | /* |
635 | * Remain on current processor as |
636 | * additional processors come online. |
637 | */ |
638 | kernel_bootstrap_thread_log(message: "thread_bind" ); |
639 | thread_bind(processor); |
640 | |
641 | /* |
642 | * Kick off memory mapping adjustments. |
643 | */ |
644 | kernel_bootstrap_thread_log(message: "mapping_adjust" ); |
645 | mapping_adjust(); |
646 | |
647 | /* |
648 | * Create the clock service. |
649 | */ |
650 | kernel_bootstrap_thread_log(message: "clock_service_create" ); |
651 | clock_service_create(); |
652 | |
653 | /* |
654 | * Create the device service. |
655 | */ |
656 | device_service_create(); |
657 | |
658 | phys_carveout_init(); |
659 | |
660 | #if MACH_KDP |
661 | kernel_bootstrap_log(message: "kdp_init" ); |
662 | kdp_init(); |
663 | #endif |
664 | |
665 | #if ALTERNATE_DEBUGGER |
666 | alternate_debugger_init(); |
667 | #endif |
668 | |
669 | #if HYPERVISOR |
670 | kernel_bootstrap_thread_log("hv_support_init" ); |
671 | hv_support_init(); |
672 | #endif |
673 | |
674 | #if CONFIG_TELEMETRY |
675 | kernel_bootstrap_log(message: "bootprofile_init" ); |
676 | bootprofile_init(); |
677 | #endif |
678 | |
679 | kernel_startup_initialize_upto(upto: STARTUP_SUB_SYSCTL); |
680 | |
681 | /* |
682 | * Initialize the globals used for permuting kernel |
683 | * addresses that may be exported to userland as tokens |
684 | * using VM_KERNEL_ADDRPERM()/VM_KERNEL_ADDRPERM_EXTERNAL(). |
685 | * Force the random number to be odd to avoid mapping a non-zero |
686 | * word-aligned address to zero via addition. |
687 | */ |
688 | vm_kernel_addrperm = (vm_offset_t)(early_random() | 1); |
689 | buf_kernel_addrperm = (vm_offset_t)(early_random() | 1); |
690 | vm_kernel_addrperm_ext = (vm_offset_t)(early_random() | 1); |
691 | vm_kernel_addrhash_salt = early_random(); |
692 | vm_kernel_addrhash_salt_ext = early_random(); |
693 | |
694 | #ifdef IOKIT |
695 | kernel_bootstrap_log(message: "PE_init_iokit" ); |
696 | PE_init_iokit(); |
697 | #endif |
698 | |
699 | assert(ml_get_interrupts_enabled() == FALSE); |
700 | |
701 | /* |
702 | * Past this point, kernel subsystems that expect to operate with |
703 | * interrupts or preemption enabled may begin enforcement. |
704 | */ |
705 | kernel_startup_initialize_upto(upto: STARTUP_SUB_EARLY_BOOT); |
706 | |
707 | #if SCHED_HYGIENE_DEBUG |
708 | // Reset interrupts masked timeout before we enable interrupts |
709 | ml_spin_debug_clear_self(); |
710 | #endif |
711 | (void) spllo(); /* Allow interruptions */ |
712 | |
713 | /* |
714 | * This will start displaying progress to the user, start as early as possible |
715 | */ |
716 | initialize_screen(NULL, kPEAcquireScreen); |
717 | |
718 | /* |
719 | * Initialize the shared region module. |
720 | */ |
721 | vm_commpage_init(); |
722 | vm_commpage_text_init(); |
723 | |
724 | #if CONFIG_MACF |
725 | kernel_bootstrap_log(message: "mac_policy_initmach" ); |
726 | mac_policy_initmach(); |
727 | #if CONFIG_VNGUARD |
728 | kernel_bootstrap_log("vnguard_policy_init" ); |
729 | vnguard_policy_init(); |
730 | #endif |
731 | #endif |
732 | |
733 | #if CONFIG_DTRACE |
734 | kernel_bootstrap_log(message: "dtrace_early_init" ); |
735 | dtrace_early_init(); |
736 | sdt_early_init(); |
737 | #endif |
738 | |
739 | #if CODE_SIGNING_MONITOR |
740 | /* |
741 | * Lockdown mode is initialized as a startup function within the early boot |
742 | * category, which means it has been initialized by now. Query the state and |
743 | * pass it to the code-signing-monitor if required. |
744 | */ |
745 | kernel_bootstrap_log("code-signing-monitor lockdown mode" ); |
746 | csm_check_lockdown_mode(); |
747 | #endif |
748 | |
749 | #if CODE_SIGNING_MONITOR |
750 | kernel_bootstrap_log("provisioning_profile_init" ); |
751 | csm_initialize_provisioning_profiles(); |
752 | #endif |
753 | |
754 | #ifndef BCM2837 |
755 | kernel_bootstrap_log(message: "trust_cache_init" ); |
756 | |
757 | /* Initialize the runtime for the trust cache interface */ |
758 | trust_cache_runtime_init(); |
759 | |
760 | /* Load the static and engineering trust caches */ |
761 | load_static_trust_cache(); |
762 | #endif |
763 | |
764 | kernel_startup_initialize_upto(upto: STARTUP_SUB_LOCKDOWN); |
765 | |
766 | /* |
767 | * Get rid of segments used to bootstrap kext loading. This removes |
768 | * the KLD, PRELINK symtab, LINKEDIT, and symtab segments/load commands. |
769 | * Must be done prior to lockdown so that we can free (and possibly relocate) |
770 | * the static KVA mappings used for the jettisoned bootstrap segments. |
771 | */ |
772 | kernel_bootstrap_log(message: "OSKextRemoveKextBootstrap" ); |
773 | OSKextRemoveKextBootstrap(); |
774 | |
775 | #if SOCKETS |
776 | /* |
777 | * Initialize callback table before machine lockdown |
778 | */ |
779 | mbuf_tag_init(); |
780 | #endif |
781 | |
782 | /* No changes to kernel text and rodata beyond this point. */ |
783 | kernel_bootstrap_log(message: "machine_lockdown" ); |
784 | machine_lockdown(); |
785 | |
786 | #ifdef CONFIG_XNUPOST |
787 | kern_return_t result = kernel_list_tests(); |
788 | result = kernel_do_post(); |
789 | if (result != KERN_SUCCESS) { |
790 | panic("kernel_do_post: Tests failed with result = 0x%08x" , result); |
791 | } |
792 | kernel_bootstrap_log("kernel_do_post - done" ); |
793 | #endif /* CONFIG_XNUPOST */ |
794 | |
795 | #ifdef IOKIT |
796 | kernel_bootstrap_log(message: "PE_lockdown_iokit" ); |
797 | PE_lockdown_iokit(); |
798 | #endif |
799 | /* |
800 | * max_cpus must be nailed down by the time PE_lockdown_iokit() finishes, |
801 | * at the latest |
802 | */ |
803 | vm_set_restrictions(num_cpus: machine_info.max_cpus); |
804 | |
805 | |
806 | #if KPERF |
807 | kperf_init_early(); |
808 | #endif |
809 | |
810 | /* |
811 | * Start the user bootstrap. |
812 | */ |
813 | #ifdef MACH_BSD |
814 | bsd_init(); |
815 | #endif |
816 | |
817 | |
818 | /* |
819 | * Get rid of pages used for early boot tracing. |
820 | */ |
821 | kdebug_free_early_buf(); |
822 | |
823 | serial_keyboard_init(); /* Start serial keyboard if wanted */ |
824 | |
825 | vm_page_init_local_q(num_cpus: machine_info.max_cpus); |
826 | |
827 | thread_bind(PROCESSOR_NULL); |
828 | |
829 | /* |
830 | * Now that all CPUs are available to run threads, this is essentially |
831 | * a background thread. Take this opportunity to initialize and free |
832 | * any remaining vm_pages that were delayed earlier by pmap_startup(). |
833 | */ |
834 | vm_free_delayed_pages(); |
835 | |
836 | /* |
837 | * Become the pageout daemon. |
838 | */ |
839 | vm_pageout(); |
840 | /*NOTREACHED*/ |
841 | } |
842 | |
843 | /* |
844 | * slave_main: |
845 | * |
846 | * Load the first thread to start a processor. |
847 | * This path will also be used by the master processor |
848 | * after being offlined. |
849 | */ |
850 | void |
851 | slave_main(void *machine_param) |
852 | { |
853 | processor_t processor = current_processor(); |
854 | thread_t thread; |
855 | |
856 | /* |
857 | * Use the idle processor thread if there |
858 | * is no dedicated start up thread. |
859 | */ |
860 | if (processor->processor_offlined == true) { |
861 | /* Return to the saved processor_offline context */ |
862 | assert(processor->startup_thread == THREAD_NULL); |
863 | |
864 | thread = processor->idle_thread; |
865 | thread->parameter = machine_param; |
866 | } else if (processor->startup_thread) { |
867 | thread = processor->startup_thread; |
868 | processor->startup_thread = THREAD_NULL; |
869 | } else { |
870 | thread = processor->idle_thread; |
871 | thread->continuation = processor_start_thread; |
872 | thread->parameter = machine_param; |
873 | } |
874 | |
875 | load_context(thread); |
876 | /*NOTREACHED*/ |
877 | } |
878 | |
879 | /* |
880 | * processor_start_thread: |
881 | * |
882 | * First thread to execute on a started processor. |
883 | * |
884 | * Called at splsched. |
885 | */ |
886 | void |
887 | processor_start_thread(void *machine_param, |
888 | __unused wait_result_t result) |
889 | { |
890 | processor_t processor = current_processor(); |
891 | thread_t self = current_thread(); |
892 | |
893 | slave_machine_init(machine_param); |
894 | |
895 | /* |
896 | * If running the idle processor thread, |
897 | * reenter the idle loop, else terminate. |
898 | */ |
899 | if (self == processor->idle_thread) { |
900 | thread_block(continuation: idle_thread); |
901 | } |
902 | |
903 | thread_terminate(target_act: self); |
904 | /*NOTREACHED*/ |
905 | } |
906 | |
907 | /* |
908 | * load_context: |
909 | * |
910 | * Start the first thread on a processor. |
911 | * This may be the first thread ever run on a processor, or |
912 | * it could be a processor that was previously offlined. |
913 | */ |
914 | static void __attribute__((noreturn)) |
915 | load_context( |
916 | thread_t thread) |
917 | { |
918 | processor_t processor = current_processor(); |
919 | |
920 | |
921 | #define load_context_kprintf(x...) /* kprintf("load_context: " x) */ |
922 | |
923 | load_context_kprintf("machine_set_current_thread\n" ); |
924 | machine_set_current_thread(thread); |
925 | |
926 | load_context_kprintf("processor_up\n" ); |
927 | |
928 | PMAP_ACTIVATE_KERNEL(processor->cpu_id); |
929 | |
930 | /* |
931 | * Acquire a stack if none attached. The panic |
932 | * should never occur since the thread is expected |
933 | * to have reserved stack. |
934 | */ |
935 | load_context_kprintf("thread %p, stack %lx, stackptr %lx\n" , thread, |
936 | thread->kernel_stack, thread->machine.kstackptr); |
937 | if (!thread->kernel_stack) { |
938 | load_context_kprintf("stack_alloc_try\n" ); |
939 | if (!stack_alloc_try(thread)) { |
940 | panic("load_context" ); |
941 | } |
942 | } |
943 | |
944 | /* |
945 | * The idle processor threads are not counted as |
946 | * running for load calculations. |
947 | */ |
948 | if (!(thread->state & TH_IDLE)) { |
949 | SCHED(run_count_incr)(thread); |
950 | } |
951 | |
952 | processor->active_thread = thread; |
953 | processor_state_update_explicit(processor, pri: thread->sched_pri, |
954 | SFI_CLASS_KERNEL, pset_type: PSET_SMP, perfctl_class: thread_get_perfcontrol_class(thread), urgency: THREAD_URGENCY_NONE, |
955 | bucket: ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket); |
956 | processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL; |
957 | processor->current_is_NO_SMT = false; |
958 | processor->current_is_eagerpreempt = false; |
959 | #if CONFIG_THREAD_GROUPS |
960 | processor->current_thread_group = thread_group_get(t: thread); |
961 | #endif |
962 | processor->starting_pri = thread->sched_pri; |
963 | processor->deadline = UINT64_MAX; |
964 | thread->last_processor = processor; |
965 | processor_up(processor); |
966 | struct recount_snap snap = { 0 }; |
967 | recount_snapshot(snap: &snap); |
968 | processor->last_dispatch = snap.rsn_time_mach; |
969 | recount_processor_online(processor, snap: &snap); |
970 | |
971 | smr_cpu_join(processor, ctime: processor->last_dispatch); |
972 | |
973 | PMAP_ACTIVATE_USER(thread, processor->cpu_id); |
974 | |
975 | load_context_kprintf("machine_load_context\n" ); |
976 | |
977 | #if KASAN_TBI |
978 | __asan_handle_no_return(); |
979 | #endif /* KASAN_TBI */ |
980 | |
981 | machine_load_context(thread); |
982 | /*NOTREACHED*/ |
983 | } |
984 | |
985 | void |
986 | scale_setup(void) |
987 | { |
988 | int scale = 0; |
989 | #if defined(__LP64__) |
990 | typeof(task_max) task_max_base = task_max; |
991 | |
992 | /* Raise limits for servers with >= 16G */ |
993 | if ((serverperfmode != 0) && ((uint64_t)max_mem_actual >= (uint64_t)(16 * 1024 * 1024 * 1024ULL))) { |
994 | scale = (int)((uint64_t)sane_size / (uint64_t)(8 * 1024 * 1024 * 1024ULL)); |
995 | /* limit to 128 G */ |
996 | if (scale > 16) { |
997 | scale = 16; |
998 | } |
999 | task_max_base = 2500; |
1000 | /* Raise limits for machines with >= 3GB */ |
1001 | } else if ((uint64_t)max_mem_actual >= (uint64_t)(3 * 1024 * 1024 * 1024ULL)) { |
1002 | if ((uint64_t)max_mem_actual < (uint64_t)(8 * 1024 * 1024 * 1024ULL)) { |
1003 | scale = 2; |
1004 | } else { |
1005 | /* limit to 64GB */ |
1006 | scale = MIN(16, (int)((uint64_t)max_mem_actual / (uint64_t)(4 * 1024 * 1024 * 1024ULL))); |
1007 | } |
1008 | } |
1009 | |
1010 | task_max = MAX(task_max, task_max_base * scale); |
1011 | |
1012 | if (scale != 0) { |
1013 | task_threadmax = task_max; |
1014 | thread_max = task_max * 5; |
1015 | } |
1016 | |
1017 | #endif |
1018 | |
1019 | bsd_scale_setup(scale); |
1020 | } |
1021 | |