1 | /* |
2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ |
29 | /* |
30 | * Mach Operating System |
31 | * Copyright (c) 1987 Carnegie-Mellon University |
32 | * All rights reserved. The CMU software License Agreement specifies |
33 | * the terms and conditions for use and redistribution. |
34 | */ |
35 | |
36 | /*- |
37 | * Copyright (c) 1982, 1986, 1991, 1993 |
38 | * The Regents of the University of California. All rights reserved. |
39 | * (c) UNIX System Laboratories, Inc. |
40 | * All or some portions of this file are derived from material licensed |
41 | * to the University of California by American Telephone and Telegraph |
42 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
43 | * the permission of UNIX System Laboratories, Inc. |
44 | * |
45 | * Redistribution and use in source and binary forms, with or without |
46 | * modification, are permitted provided that the following conditions |
47 | * are met: |
48 | * 1. Redistributions of source code must retain the above copyright |
49 | * notice, this list of conditions and the following disclaimer. |
50 | * 2. Redistributions in binary form must reproduce the above copyright |
51 | * notice, this list of conditions and the following disclaimer in the |
52 | * documentation and/or other materials provided with the distribution. |
53 | * 3. All advertising materials mentioning features or use of this software |
54 | * must display the following acknowledgement: |
55 | * This product includes software developed by the University of |
56 | * California, Berkeley and its contributors. |
57 | * 4. Neither the name of the University nor the names of its contributors |
58 | * may be used to endorse or promote products derived from this software |
59 | * without specific prior written permission. |
60 | * |
61 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
62 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
63 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
64 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
65 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
66 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
67 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
68 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
69 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
70 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
71 | * SUCH DAMAGE. |
72 | * |
73 | * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93 |
74 | */ |
75 | /* |
76 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce |
77 | * support for mandatory and extensible security protections. This notice |
78 | * is included in support of clause 2.2 (b) of the Apple Public License, |
79 | * Version 2.0. |
80 | */ |
81 | #include <machine/reg.h> |
82 | #include <machine/cpu_capabilities.h> |
83 | |
84 | #include <sys/cdefs.h> |
85 | #include <sys/param.h> |
86 | #include <sys/systm.h> |
87 | #include <sys/filedesc.h> |
88 | #include <sys/kernel.h> |
89 | #include <sys/proc_internal.h> |
90 | #include <sys/kauth.h> |
91 | #include <sys/user.h> |
92 | #include <sys/socketvar.h> |
93 | #include <sys/malloc.h> |
94 | #include <sys/namei.h> |
95 | #include <sys/mount_internal.h> |
96 | #include <sys/vnode_internal.h> |
97 | #include <sys/file_internal.h> |
98 | #include <sys/stat.h> |
99 | #include <sys/uio_internal.h> |
100 | #include <sys/acct.h> |
101 | #include <sys/exec.h> |
102 | #include <sys/kdebug.h> |
103 | #include <sys/signal.h> |
104 | #include <sys/aio_kern.h> |
105 | #include <sys/sysproto.h> |
106 | #include <sys/sysctl.h> |
107 | #include <sys/persona.h> |
108 | #include <sys/reason.h> |
109 | #if SYSV_SHM |
110 | #include <sys/shm_internal.h> /* shmexec() */ |
111 | #endif |
112 | #include <sys/ubc_internal.h> /* ubc_map() */ |
113 | #include <sys/spawn.h> |
114 | #include <sys/spawn_internal.h> |
115 | #include <sys/process_policy.h> |
116 | #include <sys/codesign.h> |
117 | #include <sys/random.h> |
118 | #include <crypto/sha1.h> |
119 | |
120 | #include <libkern/libkern.h> |
121 | #include <libkern/crypto/sha2.h> |
122 | #include <security/audit/audit.h> |
123 | |
124 | #include <ipc/ipc_types.h> |
125 | |
126 | #include <mach/mach_param.h> |
127 | #include <mach/mach_types.h> |
128 | #include <mach/port.h> |
129 | #include <mach/task.h> |
130 | #include <mach/task_access.h> |
131 | #include <mach/thread_act.h> |
132 | #include <mach/vm_map.h> |
133 | #include <mach/mach_vm.h> |
134 | #include <mach/vm_param.h> |
135 | #include <mach_debug/mach_debug_types.h> |
136 | |
137 | #include <kern/sched_prim.h> /* thread_wakeup() */ |
138 | #include <kern/affinity.h> |
139 | #include <kern/assert.h> |
140 | #include <kern/task.h> |
141 | #include <kern/thread.h> |
142 | #include <kern/coalition.h> |
143 | #include <kern/policy_internal.h> |
144 | #include <kern/kalloc.h> |
145 | #include <kern/zalloc.h> /* zone_userspace_reboot_checks() */ |
146 | |
147 | #include <os/log.h> |
148 | |
149 | #if CONFIG_MACF |
150 | #include <security/mac_framework.h> |
151 | #include <security/mac_mach_internal.h> |
152 | #endif |
153 | |
154 | #if CONFIG_AUDIT |
155 | #include <bsm/audit_kevents.h> |
156 | #endif |
157 | |
158 | #if CONFIG_ARCADE |
159 | #include <kern/arcade.h> |
160 | #endif |
161 | |
162 | #include <vm/vm_map.h> |
163 | #include <vm/vm_kern.h> |
164 | #include <vm/vm_protos.h> |
165 | #include <vm/vm_kern.h> |
166 | #include <vm/vm_fault.h> |
167 | #include <vm/vm_pageout.h> |
168 | #include <vm/pmap.h> |
169 | #include <vm/vm_reclaim_internal.h> |
170 | |
171 | #include <kdp/kdp_dyld.h> |
172 | |
173 | #include <machine/machine_routines.h> |
174 | #include <machine/pal_routines.h> |
175 | |
176 | #include <pexpert/pexpert.h> |
177 | |
178 | #if CONFIG_MEMORYSTATUS |
179 | #include <sys/kern_memorystatus.h> |
180 | #endif |
181 | |
182 | #include <IOKit/IOBSD.h> |
183 | |
184 | #include "kern_exec_internal.h" |
185 | |
186 | #include <CoreEntitlements/CoreEntitlements.h> |
187 | |
188 | #include <mach/exclaves.h> |
189 | |
190 | extern boolean_t vm_darkwake_mode; |
191 | |
192 | /* enable crash reports on various exec failures */ |
193 | static TUNABLE(bool, bootarg_execfailurereports, "execfailurecrashes" , false); |
194 | |
195 | #if XNU_TARGET_OS_OSX |
196 | #if __has_feature(ptrauth_calls) |
197 | static TUNABLE(bool, bootarg_arm64e_preview_abi, "-arm64e_preview_abi" , false); |
198 | #endif /* __has_feature(ptrauth_calls) */ |
199 | |
200 | #if DEBUG || DEVELOPMENT |
201 | static TUNABLE(bool, unentitled_ios_sim_launch, "unentitled_ios_sim_launch" , false); |
202 | #endif /* DEBUG || DEVELOPMENT */ |
203 | #endif /* XNU_TARGET_OS_OSX */ |
204 | |
205 | #if CONFIG_DTRACE |
206 | /* Do not include dtrace.h, it redefines kmem_[alloc/free] */ |
207 | extern void dtrace_proc_exec(proc_t); |
208 | extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t); |
209 | |
210 | /* |
211 | * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c, |
212 | * we will store its value before actually calling it. |
213 | */ |
214 | static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL; |
215 | |
216 | #include <sys/dtrace_ptss.h> |
217 | #endif |
218 | |
219 | #if __has_feature(ptrauth_calls) |
220 | static TUNABLE_DEV_WRITEABLE(int, vm_shared_region_per_team_id, |
221 | "vm_shared_region_per_team_id" , 1); |
222 | static TUNABLE_DEV_WRITEABLE(int, vm_shared_region_by_entitlement, |
223 | "vm_shared_region_by_entitlement" , 1); |
224 | |
225 | /* Upon userland request, reslide the shared cache. */ |
226 | static TUNABLE_DEV_WRITEABLE(int, vm_shared_region_reslide_aslr, |
227 | "vm_shared_region_reslide_aslr" , |
228 | #if CONFIG_RESLIDE_SHARED_CACHE |
229 | 1 |
230 | #else |
231 | 0 |
232 | #endif /* CONFIG_RESLIDE_SHARED_CACHE */ |
233 | ); |
234 | |
235 | /* |
236 | * Flag to control what processes should get shared cache randomize resliding |
237 | * after a fault in the shared cache region: |
238 | * |
239 | * 0 - all processes get a new randomized slide |
240 | * 1 - only platform processes get a new randomized slide |
241 | */ |
242 | TUNABLE_DEV_WRITEABLE(int, vm_shared_region_reslide_restrict, |
243 | "vm_shared_region_reslide_restrict" , 1); |
244 | |
245 | #if DEVELOPMENT || DEBUG |
246 | SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_per_team_id, |
247 | CTLFLAG_RW, &vm_shared_region_per_team_id, 0, "" ); |
248 | SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_by_entitlement, |
249 | CTLFLAG_RW, &vm_shared_region_by_entitlement, 0, "" ); |
250 | SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_reslide_restrict, |
251 | CTLFLAG_RW, &vm_shared_region_reslide_restrict, 0, "" ); |
252 | SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_reslide_aslr, |
253 | CTLFLAG_RW, &vm_shared_region_reslide_aslr, 0, "" ); |
254 | #endif |
255 | #endif /* __has_feature(ptrauth_calls) */ |
256 | |
257 | #if DEVELOPMENT || DEBUG |
258 | static TUNABLE(bool, enable_dext_coredumps_on_panic, "dext_panic_coredump" , true); |
259 | #else |
260 | static TUNABLE(bool, enable_dext_coredumps_on_panic, "dext_panic_coredump" , false); |
261 | #endif |
262 | extern kern_return_t kern_register_userspace_coredump(task_t task, const char * name); |
263 | #define USERSPACE_COREDUMP_PANIC_ENTITLEMENT "com.apple.private.enable-coredump-on-panic" |
264 | #define USERSPACE_COREDUMP_PANIC_SEED_ENTITLEMENT \ |
265 | "com.apple.private.enable-coredump-on-panic-seed-privacy-approved" |
266 | |
267 | extern void proc_apply_task_networkbg_internal(proc_t, thread_t); |
268 | extern void task_set_did_exec_flag(task_t task); |
269 | extern void task_clear_exec_copy_flag(task_t task); |
270 | proc_t proc_exec_switch_task(proc_t old_proc, proc_t new_proc, task_t old_task, |
271 | task_t new_task, struct image_params *imgp, void **inherit); |
272 | boolean_t task_is_active(task_t); |
273 | boolean_t thread_is_active(thread_t thread); |
274 | void thread_copy_resource_info(thread_t dst_thread, thread_t src_thread); |
275 | void *ipc_importance_exec_switch_task(task_t old_task, task_t new_task); |
276 | extern void ipc_importance_release(void *elem); |
277 | extern boolean_t task_has_watchports(task_t task); |
278 | extern void task_set_no_smt(task_t task); |
279 | #if defined(HAS_APPLE_PAC) |
280 | char *task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid); |
281 | #endif |
282 | task_t convert_port_to_task(ipc_port_t port); |
283 | |
284 | #if CONFIG_EXCLAVES |
285 | int task_add_conclave(task_t task, void *vnode, int64_t off, const char *task_conclave_id); |
286 | kern_return_t task_inherit_conclave(task_t old_task, task_t new_task, void *vnode, int64_t off); |
287 | #endif /* CONFIG_EXCLAVES */ |
288 | |
289 | |
290 | /* |
291 | * Mach things for which prototypes are unavailable from Mach headers |
292 | */ |
293 | #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1 |
294 | void ipc_task_enable( |
295 | task_t task); |
296 | void ipc_task_reset( |
297 | task_t task); |
298 | void ipc_thread_reset( |
299 | thread_t thread); |
300 | kern_return_t ipc_object_copyin( |
301 | ipc_space_t space, |
302 | mach_port_name_t name, |
303 | mach_msg_type_name_t msgt_name, |
304 | ipc_object_t *objectp, |
305 | mach_port_context_t context, |
306 | mach_msg_guard_flags_t *guard_flags, |
307 | uint32_t kmsg_flags); |
308 | void ipc_port_release_send(ipc_port_t); |
309 | |
310 | #if DEVELOPMENT || DEBUG |
311 | void task_importance_update_owner_info(task_t); |
312 | #endif |
313 | |
314 | extern struct savearea *get_user_regs(thread_t); |
315 | |
316 | __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid); |
317 | |
318 | #include <kern/thread.h> |
319 | #include <kern/task.h> |
320 | #include <kern/ast.h> |
321 | #include <kern/mach_loader.h> |
322 | #include <kern/mach_fat.h> |
323 | #include <mach-o/fat.h> |
324 | #include <mach-o/loader.h> |
325 | #include <machine/vmparam.h> |
326 | #include <sys/imgact.h> |
327 | |
328 | #include <sys/sdt.h> |
329 | |
330 | |
331 | /* |
332 | * EAI_ITERLIMIT The maximum number of times to iterate an image |
333 | * activator in exec_activate_image() before treating |
334 | * it as malformed/corrupt. |
335 | */ |
336 | #define EAI_ITERLIMIT 3 |
337 | |
338 | /* |
339 | * For #! interpreter parsing |
340 | */ |
341 | #define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t')) |
342 | #define IS_EOL(ch) ((ch == '#') || (ch == '\n')) |
343 | |
344 | extern vm_map_t bsd_pageable_map; |
345 | extern const struct fileops vnops; |
346 | extern int nextpidversion; |
347 | |
348 | |
349 | #define USER_ADDR_ALIGN(addr, val) \ |
350 | ( ( (user_addr_t)(addr) + (val) - 1) \ |
351 | & ~((val) - 1) ) |
352 | |
353 | /* |
354 | * For subsystem root support |
355 | */ |
356 | #define SPAWN_SUBSYSTEM_ROOT_ENTITLEMENT "com.apple.private.spawn-subsystem-root" |
357 | |
358 | /* |
359 | * Allow setting p_crash_behavior to trigger panic on crash |
360 | */ |
361 | #define SPAWN_SET_PANIC_CRASH_BEHAVIOR "com.apple.private.spawn-panic-crash-behavior" |
362 | |
363 | /* Platform Code Exec Logging */ |
364 | static int platform_exec_logging = 0; |
365 | |
366 | SYSCTL_DECL(_security_mac); |
367 | |
368 | SYSCTL_INT(_security_mac, OID_AUTO, platform_exec_logging, CTLFLAG_RW, &platform_exec_logging, 0, |
369 | "log cdhashes for all platform binary executions" ); |
370 | |
371 | static os_log_t peLog = OS_LOG_DEFAULT; |
372 | |
373 | struct exception_port_action_t { |
374 | ipc_port_t port; |
375 | _ps_port_action_t *port_action; |
376 | }; |
377 | |
378 | struct exec_port_actions { |
379 | uint32_t exception_port_count; |
380 | uint32_t portwatch_count; |
381 | uint32_t registered_count; |
382 | struct exception_port_action_t *excport_array; |
383 | ipc_port_t *portwatch_array; |
384 | ipc_port_t *registered_array; |
385 | }; |
386 | |
387 | struct image_params; /* Forward */ |
388 | static int exec_activate_image(struct image_params *imgp); |
389 | static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp); |
390 | static int load_return_to_errno(load_return_t lrtn); |
391 | static int execargs_alloc(struct image_params *imgp); |
392 | static int execargs_free(struct image_params *imgp); |
393 | static int exec_check_permissions(struct image_params *imgp); |
394 | static int exec_extract_strings(struct image_params *imgp); |
395 | static int exec_add_apple_strings(struct image_params *imgp, const load_result_t *load_result); |
396 | static int exec_handle_sugid(struct image_params *imgp); |
397 | static int sugid_scripts = 0; |
398 | SYSCTL_INT(_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "" ); |
399 | static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p); |
400 | static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size); |
401 | static void exec_resettextvp(proc_t, struct image_params *); |
402 | static int process_signature(proc_t, struct image_params *); |
403 | static void exec_prefault_data(proc_t, struct image_params *, load_result_t *); |
404 | static errno_t exec_handle_port_actions(struct image_params *imgp, |
405 | struct exec_port_actions *port_actions); |
406 | static errno_t exec_handle_exception_port_actions(const struct image_params *imgp, |
407 | const struct exec_port_actions *port_actions); |
408 | static errno_t exec_handle_spawnattr_policy(proc_t p, thread_t thread, int psa_apptype, uint64_t psa_qos_clamp, |
409 | task_role_t psa_darwin_role, struct exec_port_actions *port_actions); |
410 | static void exec_port_actions_destroy(struct exec_port_actions *port_actions); |
411 | |
412 | /* |
413 | * exec_add_user_string |
414 | * |
415 | * Add the requested string to the string space area. |
416 | * |
417 | * Parameters; struct image_params * image parameter block |
418 | * user_addr_t string to add to strings area |
419 | * int segment from which string comes |
420 | * boolean_t TRUE if string contributes to NCARGS |
421 | * |
422 | * Returns: 0 Success |
423 | * !0 Failure errno from copyinstr() |
424 | * |
425 | * Implicit returns: |
426 | * (imgp->ip_strendp) updated location of next add, if any |
427 | * (imgp->ip_strspace) updated byte count of space remaining |
428 | * (imgp->ip_argspace) updated byte count of space in NCARGS |
429 | */ |
430 | __attribute__((noinline)) |
431 | static int |
432 | exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs) |
433 | { |
434 | int error = 0; |
435 | |
436 | do { |
437 | size_t len = 0; |
438 | int space; |
439 | |
440 | if (is_ncargs) { |
441 | space = imgp->ip_argspace; /* by definition smaller than ip_strspace */ |
442 | } else { |
443 | space = imgp->ip_strspace; |
444 | } |
445 | |
446 | if (space <= 0) { |
447 | error = E2BIG; |
448 | break; |
449 | } |
450 | |
451 | if (!UIO_SEG_IS_USER_SPACE(seg)) { |
452 | char *kstr = CAST_DOWN(char *, str); /* SAFE */ |
453 | error = copystr(kfaddr: kstr, kdaddr: imgp->ip_strendp, len: space, done: &len); |
454 | } else { |
455 | error = copyinstr(uaddr: str, kaddr: imgp->ip_strendp, len: space, done: &len); |
456 | } |
457 | |
458 | imgp->ip_strendp += len; |
459 | imgp->ip_strspace -= len; |
460 | if (is_ncargs) { |
461 | imgp->ip_argspace -= len; |
462 | } |
463 | } while (error == ENAMETOOLONG); |
464 | |
465 | return error; |
466 | } |
467 | |
468 | /* |
469 | * dyld is now passed the executable path as a getenv-like variable |
470 | * in the same fashion as the stack_guard and malloc_entropy keys. |
471 | */ |
472 | #define EXECUTABLE_KEY "executable_path=" |
473 | |
474 | /* |
475 | * exec_save_path |
476 | * |
477 | * To support new app package launching for Mac OS X, the dyld needs the |
478 | * first argument to execve() stored on the user stack. |
479 | * |
480 | * Save the executable path name at the bottom of the strings area and set |
481 | * the argument vector pointer to the location following that to indicate |
482 | * the start of the argument and environment tuples, setting the remaining |
483 | * string space count to the size of the string area minus the path length. |
484 | * |
485 | * Parameters; struct image_params * image parameter block |
486 | * char * path used to invoke program |
487 | * int segment from which path comes |
488 | * |
489 | * Returns: int 0 Success |
490 | * EFAULT Bad address |
491 | * copy[in]str:EFAULT Bad address |
492 | * copy[in]str:ENAMETOOLONG Filename too long |
493 | * |
494 | * Implicit returns: |
495 | * (imgp->ip_strings) saved path |
496 | * (imgp->ip_strspace) space remaining in ip_strings |
497 | * (imgp->ip_strendp) start of remaining copy area |
498 | * (imgp->ip_argspace) space remaining of NCARGS |
499 | * (imgp->ip_applec) Initial applev[0] |
500 | * |
501 | * Note: We have to do this before the initial namei() since in the |
502 | * path contains symbolic links, namei() will overwrite the |
503 | * original path buffer contents. If the last symbolic link |
504 | * resolved was a relative pathname, we would lose the original |
505 | * "path", which could be an absolute pathname. This might be |
506 | * unacceptable for dyld. |
507 | */ |
508 | static int |
509 | exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath) |
510 | { |
511 | int error; |
512 | size_t len; |
513 | char *kpath; |
514 | |
515 | // imgp->ip_strings can come out of a cache, so we need to obliterate the |
516 | // old path. |
517 | memset(s: imgp->ip_strings, c: '\0', n: strlen(EXECUTABLE_KEY) + MAXPATHLEN); |
518 | |
519 | len = MIN(MAXPATHLEN, imgp->ip_strspace); |
520 | |
521 | switch (seg) { |
522 | case UIO_USERSPACE32: |
523 | case UIO_USERSPACE64: /* Same for copyin()... */ |
524 | error = copyinstr(uaddr: path, kaddr: imgp->ip_strings + strlen(EXECUTABLE_KEY), len, done: &len); |
525 | break; |
526 | case UIO_SYSSPACE: |
527 | kpath = CAST_DOWN(char *, path); /* SAFE */ |
528 | error = copystr(kfaddr: kpath, kdaddr: imgp->ip_strings + strlen(EXECUTABLE_KEY), len, done: &len); |
529 | break; |
530 | default: |
531 | error = EFAULT; |
532 | break; |
533 | } |
534 | |
535 | if (!error) { |
536 | bcopy(EXECUTABLE_KEY, dst: imgp->ip_strings, n: strlen(EXECUTABLE_KEY)); |
537 | len += strlen(EXECUTABLE_KEY); |
538 | |
539 | imgp->ip_strendp += len; |
540 | imgp->ip_strspace -= len; |
541 | |
542 | if (excpath) { |
543 | *excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY); |
544 | } |
545 | } |
546 | |
547 | return error; |
548 | } |
549 | |
550 | /* |
551 | * exec_reset_save_path |
552 | * |
553 | * If we detect a shell script, we need to reset the string area |
554 | * state so that the interpreter can be saved onto the stack. |
555 | * |
556 | * Parameters; struct image_params * image parameter block |
557 | * |
558 | * Returns: int 0 Success |
559 | * |
560 | * Implicit returns: |
561 | * (imgp->ip_strings) saved path |
562 | * (imgp->ip_strspace) space remaining in ip_strings |
563 | * (imgp->ip_strendp) start of remaining copy area |
564 | * (imgp->ip_argspace) space remaining of NCARGS |
565 | * |
566 | */ |
567 | static int |
568 | exec_reset_save_path(struct image_params *imgp) |
569 | { |
570 | imgp->ip_strendp = imgp->ip_strings; |
571 | imgp->ip_argspace = NCARGS; |
572 | imgp->ip_strspace = (NCARGS + PAGE_SIZE); |
573 | |
574 | return 0; |
575 | } |
576 | |
577 | /* |
578 | * exec_shell_imgact |
579 | * |
580 | * Image activator for interpreter scripts. If the image begins with |
581 | * the characters "#!", then it is an interpreter script. Verify the |
582 | * length of the script line indicating the interpreter is not in |
583 | * excess of the maximum allowed size. If this is the case, then |
584 | * break out the arguments, if any, which are separated by white |
585 | * space, and copy them into the argument save area as if they were |
586 | * provided on the command line before all other arguments. The line |
587 | * ends when we encounter a comment character ('#') or newline. |
588 | * |
589 | * Parameters; struct image_params * image parameter block |
590 | * |
591 | * Returns: -1 not an interpreter (keep looking) |
592 | * -3 Success: interpreter: relookup |
593 | * >0 Failure: interpreter: error number |
594 | * |
595 | * A return value other than -1 indicates subsequent image activators should |
596 | * not be given the opportunity to attempt to activate the image. |
597 | */ |
598 | static int |
599 | exec_shell_imgact(struct image_params *imgp) |
600 | { |
601 | char *vdata = imgp->ip_vdata; |
602 | char *ihp; |
603 | char *line_startp, *line_endp; |
604 | char *interp; |
605 | |
606 | /* |
607 | * Make sure it's a shell script. If we've already redirected |
608 | * from an interpreted file once, don't do it again. |
609 | */ |
610 | if (vdata[0] != '#' || |
611 | vdata[1] != '!' || |
612 | (imgp->ip_flags & IMGPF_INTERPRET) != 0) { |
613 | return -1; |
614 | } |
615 | |
616 | if (imgp->ip_origcputype != 0) { |
617 | /* Fat header previously matched, don't allow shell script inside */ |
618 | return -1; |
619 | } |
620 | |
621 | imgp->ip_flags |= IMGPF_INTERPRET; |
622 | imgp->ip_interp_sugid_fd = -1; |
623 | imgp->ip_interp_buffer[0] = '\0'; |
624 | |
625 | /* Check to see if SUGID scripts are permitted. If they aren't then |
626 | * clear the SUGID bits. |
627 | * imgp->ip_vattr is known to be valid. |
628 | */ |
629 | if (sugid_scripts == 0) { |
630 | imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID); |
631 | } |
632 | |
633 | /* Try to find the first non-whitespace character */ |
634 | for (ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++) { |
635 | if (IS_EOL(*ihp)) { |
636 | /* Did not find interpreter, "#!\n" */ |
637 | return ENOEXEC; |
638 | } else if (IS_WHITESPACE(*ihp)) { |
639 | /* Whitespace, like "#! /bin/sh\n", keep going. */ |
640 | } else { |
641 | /* Found start of interpreter */ |
642 | break; |
643 | } |
644 | } |
645 | |
646 | if (ihp == &vdata[IMG_SHSIZE]) { |
647 | /* All whitespace, like "#! " */ |
648 | return ENOEXEC; |
649 | } |
650 | |
651 | line_startp = ihp; |
652 | |
653 | /* Try to find the end of the interpreter+args string */ |
654 | for (; ihp < &vdata[IMG_SHSIZE]; ihp++) { |
655 | if (IS_EOL(*ihp)) { |
656 | /* Got it */ |
657 | break; |
658 | } else { |
659 | /* Still part of interpreter or args */ |
660 | } |
661 | } |
662 | |
663 | if (ihp == &vdata[IMG_SHSIZE]) { |
664 | /* A long line, like "#! blah blah blah" without end */ |
665 | return ENOEXEC; |
666 | } |
667 | |
668 | /* Backtrack until we find the last non-whitespace */ |
669 | while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) { |
670 | ihp--; |
671 | } |
672 | |
673 | /* The character after the last non-whitespace is our logical end of line */ |
674 | line_endp = ihp + 1; |
675 | |
676 | /* |
677 | * Now we have pointers to the usable part of: |
678 | * |
679 | * "#! /usr/bin/int first second third \n" |
680 | * ^ line_startp ^ line_endp |
681 | */ |
682 | |
683 | /* copy the interpreter name */ |
684 | interp = imgp->ip_interp_buffer; |
685 | for (ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++) { |
686 | *interp++ = *ihp; |
687 | } |
688 | *interp = '\0'; |
689 | |
690 | exec_reset_save_path(imgp); |
691 | exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer), |
692 | seg: UIO_SYSSPACE, NULL); |
693 | |
694 | /* Copy the entire interpreter + args for later processing into argv[] */ |
695 | interp = imgp->ip_interp_buffer; |
696 | for (ihp = line_startp; (ihp < line_endp); ihp++) { |
697 | *interp++ = *ihp; |
698 | } |
699 | *interp = '\0'; |
700 | |
701 | #if CONFIG_SETUID |
702 | /* |
703 | * If we have an SUID or SGID script, create a file descriptor |
704 | * from the vnode and pass /dev/fd/%d instead of the actual |
705 | * path name so that the script does not get opened twice |
706 | */ |
707 | if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) { |
708 | proc_t p; |
709 | struct fileproc *fp; |
710 | int fd; |
711 | int error; |
712 | |
713 | p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
714 | error = falloc_exec(p, imgp->ip_vfs_context, &fp, &fd); |
715 | if (error) { |
716 | return error; |
717 | } |
718 | |
719 | fp->fp_glob->fg_flag = FREAD; |
720 | fp->fp_glob->fg_ops = &vnops; |
721 | fp_set_data(fp, fg_data: imgp->ip_vp); |
722 | |
723 | proc_fdlock(p); |
724 | procfdtbl_releasefd(p, fd, NULL); |
725 | fp_drop(p, fd, fp, locked: 1); |
726 | proc_fdunlock(p); |
727 | vnode_ref(vp: imgp->ip_vp); |
728 | |
729 | imgp->ip_interp_sugid_fd = fd; |
730 | } |
731 | #endif /* CONFIG_SETUID */ |
732 | |
733 | return -3; |
734 | } |
735 | |
736 | |
737 | |
738 | /* |
739 | * exec_fat_imgact |
740 | * |
741 | * Image activator for fat 1.0 binaries. If the binary is fat, then we |
742 | * need to select an image from it internally, and make that the image |
743 | * we are going to attempt to execute. At present, this consists of |
744 | * reloading the first page for the image with a first page from the |
745 | * offset location indicated by the fat header. |
746 | * |
747 | * Parameters; struct image_params * image parameter block |
748 | * |
749 | * Returns: -1 not a fat binary (keep looking) |
750 | * -2 Success: encapsulated binary: reread |
751 | * >0 Failure: error number |
752 | * |
753 | * Important: This image activator is byte order neutral. |
754 | * |
755 | * Note: A return value other than -1 indicates subsequent image |
756 | * activators should not be given the opportunity to attempt |
757 | * to activate the image. |
758 | * |
759 | * If we find an encapsulated binary, we make no assertions |
760 | * about its validity; instead, we leave that up to a rescan |
761 | * for an activator to claim it, and, if it is claimed by one, |
762 | * that activator is responsible for determining validity. |
763 | */ |
764 | static int |
765 | exec_fat_imgact(struct image_params *imgp) |
766 | { |
767 | proc_t p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
768 | kauth_cred_t cred = kauth_cred_proc_ref(procp: p); |
769 | struct fat_header * = (struct fat_header *)imgp->ip_vdata; |
770 | struct _posix_spawnattr *psa = NULL; |
771 | struct fat_arch fat_arch; |
772 | int resid, error; |
773 | load_return_t lret; |
774 | |
775 | if (imgp->ip_origcputype != 0) { |
776 | /* Fat header previously matched, don't allow another fat file inside */ |
777 | error = -1; /* not claimed */ |
778 | goto bad; |
779 | } |
780 | |
781 | /* Make sure it's a fat binary */ |
782 | if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) { |
783 | error = -1; /* not claimed */ |
784 | goto bad; |
785 | } |
786 | |
787 | /* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */ |
788 | lret = fatfile_validate_fatarches(data_ptr: (vm_offset_t)fat_header, PAGE_SIZE, |
789 | file_size: (off_t)imgp->ip_vattr->va_data_size); |
790 | if (lret != LOAD_SUCCESS) { |
791 | error = load_return_to_errno(lrtn: lret); |
792 | goto bad; |
793 | } |
794 | |
795 | /* If posix_spawn binprefs exist, respect those prefs. */ |
796 | psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
797 | if (psa != NULL && psa->psa_binprefs[0] != 0) { |
798 | uint32_t pr = 0; |
799 | |
800 | /* Check each preference listed against all arches in header */ |
801 | for (pr = 0; pr < NBINPREFS; pr++) { |
802 | cpu_type_t pref = psa->psa_binprefs[pr]; |
803 | cpu_type_t subpref = psa->psa_subcpuprefs[pr]; |
804 | |
805 | if (pref == 0) { |
806 | /* No suitable arch in the pref list */ |
807 | error = EBADARCH; |
808 | goto bad; |
809 | } |
810 | |
811 | if (pref == CPU_TYPE_ANY) { |
812 | /* Fall through to regular grading */ |
813 | goto regular_grading; |
814 | } |
815 | |
816 | lret = fatfile_getbestarch_for_cputype(cputype: pref, |
817 | cpusubtype: subpref, |
818 | data_ptr: (vm_offset_t)fat_header, |
819 | PAGE_SIZE, |
820 | imgp, |
821 | archret: &fat_arch); |
822 | if (lret == LOAD_SUCCESS) { |
823 | goto use_arch; |
824 | } |
825 | } |
826 | |
827 | /* Requested binary preference was not honored */ |
828 | error = EBADEXEC; |
829 | goto bad; |
830 | } |
831 | |
832 | regular_grading: |
833 | /* Look up our preferred architecture in the fat file. */ |
834 | lret = fatfile_getbestarch(data_ptr: (vm_offset_t)fat_header, |
835 | PAGE_SIZE, |
836 | imgp, |
837 | archret: &fat_arch, |
838 | affinity: (p->p_flag & P_AFFINITY) != 0); |
839 | if (lret != LOAD_SUCCESS) { |
840 | error = load_return_to_errno(lrtn: lret); |
841 | goto bad; |
842 | } |
843 | |
844 | use_arch: |
845 | /* Read the Mach-O header out of fat_arch */ |
846 | error = vn_rdwr(rw: UIO_READ, vp: imgp->ip_vp, base: imgp->ip_vdata, |
847 | PAGE_SIZE, offset: fat_arch.offset, |
848 | segflg: UIO_SYSSPACE, ioflg: (IO_UNIT | IO_NODELOCKED), |
849 | cred, aresid: &resid, p); |
850 | if (error) { |
851 | goto bad; |
852 | } |
853 | |
854 | if (resid) { |
855 | memset(s: imgp->ip_vdata + (PAGE_SIZE - resid), c: 0x0, n: resid); |
856 | } |
857 | |
858 | /* Success. Indicate we have identified an encapsulated binary */ |
859 | error = -2; |
860 | imgp->ip_arch_offset = (user_size_t)fat_arch.offset; |
861 | imgp->ip_arch_size = (user_size_t)fat_arch.size; |
862 | imgp->ip_origcputype = fat_arch.cputype; |
863 | imgp->ip_origcpusubtype = fat_arch.cpusubtype; |
864 | |
865 | bad: |
866 | kauth_cred_unref(&cred); |
867 | return error; |
868 | } |
869 | |
870 | static int |
871 | activate_exec_state(task_t task, proc_t p, thread_t thread, load_result_t *result) |
872 | { |
873 | int ret; |
874 | |
875 | (void)task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, size: 0); |
876 | task_set_64bit(task, is_64bit: result->is_64bit_addr, is_64bit_data: result->is_64bit_data); |
877 | if (result->is_64bit_addr) { |
878 | OSBitOrAtomic(P_LP64, &p->p_flag); |
879 | get_bsdthread_info(thread)->uu_flag |= UT_LP64; |
880 | } else { |
881 | OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag); |
882 | get_bsdthread_info(thread)->uu_flag &= ~UT_LP64; |
883 | } |
884 | task_set_mach_header_address(task, addr: result->mach_header); |
885 | |
886 | ret = thread_state_initialize(thread); |
887 | if (ret != KERN_SUCCESS) { |
888 | return ret; |
889 | } |
890 | |
891 | if (result->threadstate) { |
892 | uint32_t *ts = result->threadstate; |
893 | uint32_t total_size = (uint32_t)result->threadstate_sz; |
894 | |
895 | while (total_size > 0) { |
896 | uint32_t flavor = *ts++; |
897 | uint32_t size = *ts++; |
898 | |
899 | ret = thread_setstatus(thread, flavor, tstate: (thread_state_t)ts, count: size); |
900 | if (ret) { |
901 | return ret; |
902 | } |
903 | ts += size; |
904 | total_size -= (size + 2) * sizeof(uint32_t); |
905 | } |
906 | } |
907 | |
908 | thread_setentrypoint(thread, entry: result->entry_point); |
909 | |
910 | return KERN_SUCCESS; |
911 | } |
912 | |
913 | #if (DEVELOPMENT || DEBUG) |
914 | extern char panic_on_proc_crash[]; |
915 | extern int use_panic_on_proc_crash; |
916 | |
917 | extern char panic_on_proc_exit[]; |
918 | extern int use_panic_on_proc_exit; |
919 | |
920 | extern char panic_on_proc_spawn_fail[]; |
921 | extern int use_panic_on_proc_spawn_fail; |
922 | #endif |
923 | |
924 | void |
925 | set_proc_name(struct image_params *imgp, proc_t p) |
926 | { |
927 | int p_name_len = sizeof(p->p_name) - 1; |
928 | |
929 | if (imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len) { |
930 | imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len; |
931 | } |
932 | |
933 | bcopy(src: (caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, dst: (caddr_t)p->p_name, |
934 | n: (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen); |
935 | p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; |
936 | |
937 | if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) { |
938 | imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN; |
939 | } |
940 | |
941 | bcopy(src: (caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, dst: (caddr_t)p->p_comm, |
942 | n: (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen); |
943 | p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; |
944 | |
945 | #if DEVELOPMENT || DEBUG |
946 | /* |
947 | * This happens during image activation, so the crash behavior flags from |
948 | * posix_spawn will have already been set. So we don't have to worry about |
949 | * this being overridden. |
950 | */ |
951 | if (use_panic_on_proc_crash && strcmp(p->p_comm, panic_on_proc_crash) == 0) { |
952 | printf("will panic on proc crash: %s\n" , p->p_comm); |
953 | p->p_crash_behavior |= POSIX_SPAWN_PANIC_ON_CRASH; |
954 | } |
955 | |
956 | if (use_panic_on_proc_exit && strcmp(p->p_comm, panic_on_proc_exit) == 0) { |
957 | printf("will panic on proc exit: %s\n" , p->p_comm); |
958 | p->p_crash_behavior |= POSIX_SPAWN_PANIC_ON_EXIT; |
959 | } |
960 | |
961 | if (use_panic_on_proc_spawn_fail && strcmp(p->p_comm, panic_on_proc_spawn_fail) == 0) { |
962 | printf("will panic on proc spawn fail: %s\n" , p->p_comm); |
963 | p->p_crash_behavior |= POSIX_SPAWN_PANIC_ON_SPAWN_FAIL; |
964 | } |
965 | #endif |
966 | } |
967 | |
968 | #if __has_feature(ptrauth_calls) |
969 | /** |
970 | * Returns a team ID string that may be used to assign a shared region. |
971 | * |
972 | * Platform binaries do not have team IDs and will return NULL. Non-platform |
973 | * binaries without a team ID will be assigned an artificial team ID of "" |
974 | * (empty string) so that they will not be assigned to the default shared |
975 | * region. |
976 | * |
977 | * @param imgp image parameter block |
978 | * @return NULL if this is a platform binary, or an appropriate team ID string |
979 | * otherwise |
980 | */ |
981 | static inline const char * |
982 | get_teamid_for_shared_region(struct image_params *imgp) |
983 | { |
984 | assert(imgp->ip_vp != NULL); |
985 | |
986 | const char *ret = csvnode_get_teamid(imgp->ip_vp, imgp->ip_arch_offset); |
987 | if (ret) { |
988 | return ret; |
989 | } |
990 | |
991 | struct cs_blob *blob = csvnode_get_blob(imgp->ip_vp, imgp->ip_arch_offset); |
992 | if (csblob_get_platform_binary(blob)) { |
993 | return NULL; |
994 | } else { |
995 | static const char *NO_TEAM_ID = "" ; |
996 | return NO_TEAM_ID; |
997 | } |
998 | } |
999 | |
1000 | /** |
1001 | * Determines whether ptrauth should be enabled for the provided arm64 CPU subtype. |
1002 | * |
1003 | * @param cpusubtype Mach-O style CPU subtype |
1004 | * @return whether the CPU subtype matches arm64e with the current ptrauth ABI |
1005 | */ |
1006 | static inline bool |
1007 | arm64_cpusubtype_uses_ptrauth(cpu_subtype_t cpusubtype) |
1008 | { |
1009 | return (cpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E && |
1010 | CPU_SUBTYPE_ARM64_PTR_AUTH_VERSION(cpusubtype) == CPU_SUBTYPE_ARM64_PTR_AUTH_CURRENT_VERSION; |
1011 | } |
1012 | |
1013 | #endif /* __has_feature(ptrauth_calls) */ |
1014 | |
1015 | /** |
1016 | * Returns whether a type/subtype slice matches the requested |
1017 | * type/subtype. |
1018 | * |
1019 | * @param mask Bits to mask from the requested/tested cpu type |
1020 | * @param req_cpu Requested cpu type |
1021 | * @param req_subcpu Requested cpu subtype |
1022 | * @param test_cpu Tested slice cpu type |
1023 | * @param test_subcpu Tested slice cpu subtype |
1024 | */ |
1025 | boolean_t |
1026 | binary_match(cpu_type_t mask, cpu_type_t req_cpu, |
1027 | cpu_subtype_t req_subcpu, cpu_type_t test_cpu, |
1028 | cpu_subtype_t test_subcpu) |
1029 | { |
1030 | if ((test_cpu & ~mask) != (req_cpu & ~mask)) { |
1031 | return FALSE; |
1032 | } |
1033 | |
1034 | test_subcpu &= ~CPU_SUBTYPE_MASK; |
1035 | req_subcpu &= ~CPU_SUBTYPE_MASK; |
1036 | |
1037 | if (test_subcpu != req_subcpu && req_subcpu != (CPU_SUBTYPE_ANY & ~CPU_SUBTYPE_MASK)) { |
1038 | return FALSE; |
1039 | } |
1040 | |
1041 | return TRUE; |
1042 | } |
1043 | |
1044 | |
1045 | #define MIN_IOS_TPRO_SDK_VERSION 0x00100000 |
1046 | #define MIN_OSX_TPRO_SDK_VERSION 0x000D0000 |
1047 | #define MIN_TVOS_TPRO_SDK_VERSION 0x000D0000 |
1048 | #define MIN_WATCHOS_TPRO_SDK_VERSION 0x00090000 |
1049 | #define MIN_DRIVERKIT_TPRO_SDK_VERSION 0x00600000 |
1050 | |
1051 | static void |
1052 | exec_setup_tpro(struct image_params *imgp, load_result_t *load_result) |
1053 | { |
1054 | extern boolean_t xprr_tpro_enabled; |
1055 | extern boolean_t enable_user_modifiable_perms; |
1056 | uint32_t min_sdk_version = 0; |
1057 | |
1058 | /* x86-64 translated code cannot take advantage of TPRO */ |
1059 | if (imgp->ip_flags & IMGPF_ROSETTA) { |
1060 | return; |
1061 | } |
1062 | |
1063 | /* Do not enable on 32-bit VA targets */ |
1064 | if (!(imgp->ip_flags & IMGPF_IS_64BIT_ADDR)) { |
1065 | return; |
1066 | } |
1067 | |
1068 | switch (load_result->ip_platform) { |
1069 | case PLATFORM_IOS: |
1070 | case PLATFORM_IOSSIMULATOR: |
1071 | case PLATFORM_MACCATALYST: |
1072 | min_sdk_version = MIN_IOS_TPRO_SDK_VERSION; |
1073 | break; |
1074 | case PLATFORM_MACOS: |
1075 | min_sdk_version = MIN_OSX_TPRO_SDK_VERSION; |
1076 | break; |
1077 | case PLATFORM_TVOS: |
1078 | case PLATFORM_TVOSSIMULATOR: |
1079 | min_sdk_version = MIN_TVOS_TPRO_SDK_VERSION; |
1080 | break; |
1081 | case PLATFORM_WATCHOS: |
1082 | case PLATFORM_WATCHOSSIMULATOR: |
1083 | min_sdk_version = MIN_WATCHOS_TPRO_SDK_VERSION; |
1084 | break; |
1085 | case PLATFORM_DRIVERKIT: |
1086 | min_sdk_version = MIN_DRIVERKIT_TPRO_SDK_VERSION; |
1087 | break; |
1088 | default: |
1089 | /* TPRO is on by default for newer platforms */ |
1090 | break; |
1091 | } |
1092 | |
1093 | } |
1094 | |
1095 | /* |
1096 | * If the passed in executable's vnode should use the RSR |
1097 | * shared region, then this should return TRUE, otherwise, return FALSE. |
1098 | */ |
1099 | static uint32_t rsr_current_version = 0; |
1100 | boolean_t (*rsr_check_vnode)(void *vnode) = NULL; |
1101 | |
1102 | boolean_t |
1103 | vnode_is_rsr(vnode_t vp) |
1104 | { |
1105 | if (!(vnode_isreg(vp) && vnode_tag(vp) == VT_APFS)) { |
1106 | return FALSE; |
1107 | } |
1108 | |
1109 | if (rsr_check_vnode != NULL && rsr_check_vnode((void *)vp)) { |
1110 | return TRUE; |
1111 | } |
1112 | return FALSE; |
1113 | } |
1114 | |
1115 | |
1116 | static inline void |
1117 | encode_HR_entitlement(const char *entitlement, HR_flags_t mask, |
1118 | const struct image_params *imgp, load_result_t *load_result) |
1119 | { |
1120 | if (IOVnodeHasEntitlement(vnode: imgp->ip_vp, off: (int64_t)imgp->ip_arch_offset, entitlement)) { |
1121 | load_result->hardened_runtime_binary |= mask; |
1122 | } |
1123 | } |
1124 | |
1125 | uint32_t |
1126 | rsr_get_version(void) |
1127 | { |
1128 | return os_atomic_load(&rsr_current_version, relaxed); |
1129 | } |
1130 | |
1131 | void |
1132 | rsr_bump_version(void) |
1133 | { |
1134 | os_atomic_inc(&rsr_current_version, relaxed); |
1135 | } |
1136 | |
1137 | #if XNU_TARGET_OS_OSX |
1138 | static int |
1139 | rsr_version_sysctl SYSCTL_HANDLER_ARGS |
1140 | { |
1141 | #pragma unused(arg1, arg2, oidp) |
1142 | int value = rsr_get_version(); |
1143 | int error = SYSCTL_OUT(req, &value, sizeof(int)); |
1144 | if (error) { |
1145 | return error; |
1146 | } |
1147 | |
1148 | if (!req->newptr) { |
1149 | return 0; |
1150 | } |
1151 | |
1152 | error = SYSCTL_IN(req, &value, sizeof(int)); |
1153 | if (error) { |
1154 | return error; |
1155 | } |
1156 | if (value != 0) { |
1157 | rsr_bump_version(); |
1158 | } |
1159 | return 0; |
1160 | } |
1161 | |
1162 | |
1163 | SYSCTL_PROC(_vm, OID_AUTO, shared_region_control, |
1164 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
1165 | 0, 0, rsr_version_sysctl, "I" , "" ); |
1166 | #endif /* XNU_TARGET_OS_OSX */ |
1167 | |
1168 | /* |
1169 | * exec_mach_imgact |
1170 | * |
1171 | * Image activator for mach-o 1.0 binaries. |
1172 | * |
1173 | * Parameters; struct image_params * image parameter block |
1174 | * |
1175 | * Returns: -1 not a fat binary (keep looking) |
1176 | * -2 Success: encapsulated binary: reread |
1177 | * >0 Failure: error number |
1178 | * EBADARCH Mach-o binary, but with an unrecognized |
1179 | * architecture |
1180 | * ENOMEM No memory for child process after - |
1181 | * can only happen after vfork() |
1182 | * |
1183 | * Important: This image activator is NOT byte order neutral. |
1184 | * |
1185 | * Note: A return value other than -1 indicates subsequent image |
1186 | * activators should not be given the opportunity to attempt |
1187 | * to activate the image. |
1188 | */ |
1189 | static int |
1190 | exec_mach_imgact(struct image_params *imgp) |
1191 | { |
1192 | struct mach_header * = (struct mach_header *)imgp->ip_vdata; |
1193 | proc_t p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
1194 | int error = 0; |
1195 | task_t task; |
1196 | task_t new_task = NULL; /* protected by vfexec */ |
1197 | thread_t thread; |
1198 | struct uthread *uthread; |
1199 | vm_map_t old_map = VM_MAP_NULL; |
1200 | vm_map_t map = VM_MAP_NULL; |
1201 | load_return_t lret; |
1202 | load_result_t load_result = {}; |
1203 | struct _posix_spawnattr *psa = NULL; |
1204 | int spawn = (imgp->ip_flags & IMGPF_SPAWN); |
1205 | const int vfexec = 0; |
1206 | int exec = (imgp->ip_flags & IMGPF_EXEC); |
1207 | os_reason_t exec_failure_reason = OS_REASON_NULL; |
1208 | boolean_t reslide = FALSE; |
1209 | char * userspace_coredump_name = NULL; |
1210 | |
1211 | /* |
1212 | * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference |
1213 | * is a reserved field on the end, so for the most part, we can |
1214 | * treat them as if they were identical. Reverse-endian Mach-O |
1215 | * binaries are recognized but not compatible. |
1216 | */ |
1217 | if ((mach_header->magic == MH_CIGAM) || |
1218 | (mach_header->magic == MH_CIGAM_64)) { |
1219 | error = EBADARCH; |
1220 | goto bad; |
1221 | } |
1222 | |
1223 | if ((mach_header->magic != MH_MAGIC) && |
1224 | (mach_header->magic != MH_MAGIC_64)) { |
1225 | error = -1; |
1226 | goto bad; |
1227 | } |
1228 | |
1229 | if (mach_header->filetype != MH_EXECUTE) { |
1230 | error = -1; |
1231 | goto bad; |
1232 | } |
1233 | |
1234 | if (imgp->ip_origcputype != 0) { |
1235 | /* Fat header previously had an idea about this thin file */ |
1236 | if (imgp->ip_origcputype != mach_header->cputype || |
1237 | imgp->ip_origcpusubtype != mach_header->cpusubtype) { |
1238 | error = EBADARCH; |
1239 | goto bad; |
1240 | } |
1241 | } else { |
1242 | imgp->ip_origcputype = mach_header->cputype; |
1243 | imgp->ip_origcpusubtype = mach_header->cpusubtype; |
1244 | } |
1245 | |
1246 | task = current_task(); |
1247 | thread = current_thread(); |
1248 | uthread = get_bsdthread_info(thread); |
1249 | |
1250 | if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64) { |
1251 | imgp->ip_flags |= IMGPF_IS_64BIT_ADDR | IMGPF_IS_64BIT_DATA; |
1252 | } |
1253 | |
1254 | |
1255 | /* If posix_spawn binprefs exist, respect those prefs. */ |
1256 | psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
1257 | if (psa != NULL && psa->psa_binprefs[0] != 0) { |
1258 | int pr = 0; |
1259 | for (pr = 0; pr < NBINPREFS; pr++) { |
1260 | cpu_type_t pref = psa->psa_binprefs[pr]; |
1261 | cpu_subtype_t subpref = psa->psa_subcpuprefs[pr]; |
1262 | |
1263 | if (pref == 0) { |
1264 | /* No suitable arch in the pref list */ |
1265 | error = EBADARCH; |
1266 | goto bad; |
1267 | } |
1268 | |
1269 | if (pref == CPU_TYPE_ANY) { |
1270 | /* Jump to regular grading */ |
1271 | goto grade; |
1272 | } |
1273 | |
1274 | if (binary_match(CPU_ARCH_MASK, req_cpu: pref, req_subcpu: subpref, |
1275 | test_cpu: imgp->ip_origcputype, test_subcpu: imgp->ip_origcpusubtype)) { |
1276 | goto grade; |
1277 | } |
1278 | } |
1279 | error = EBADARCH; |
1280 | goto bad; |
1281 | } |
1282 | grade: |
1283 | if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK, |
1284 | imgp->ip_origcpusubtype & CPU_SUBTYPE_MASK, TRUE)) { |
1285 | error = EBADARCH; |
1286 | goto bad; |
1287 | } |
1288 | |
1289 | if (validate_potential_simulator_binary(exectype: imgp->ip_origcputype, imgp, |
1290 | file_offset: imgp->ip_arch_offset, macho_size: imgp->ip_arch_size) != LOAD_SUCCESS) { |
1291 | #if __x86_64__ |
1292 | const char *excpath; |
1293 | error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath); |
1294 | os_log_error(OS_LOG_DEFAULT, "Unsupported 32-bit executable: \"%s\"" , (error) ? imgp->ip_vp->v_name : excpath); |
1295 | #endif |
1296 | error = EBADARCH; |
1297 | goto bad; |
1298 | } |
1299 | |
1300 | #if defined(HAS_APPLE_PAC) |
1301 | assert(mach_header->cputype == CPU_TYPE_ARM64 |
1302 | ); |
1303 | |
1304 | if ((mach_header->cputype == CPU_TYPE_ARM64 && |
1305 | arm64_cpusubtype_uses_ptrauth(mach_header->cpusubtype)) |
1306 | ) { |
1307 | imgp->ip_flags &= ~IMGPF_NOJOP; |
1308 | } else { |
1309 | imgp->ip_flags |= IMGPF_NOJOP; |
1310 | } |
1311 | #endif |
1312 | |
1313 | /* Copy in arguments/environment from the old process */ |
1314 | error = exec_extract_strings(imgp); |
1315 | if (error) { |
1316 | goto bad; |
1317 | } |
1318 | |
1319 | AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc, |
1320 | imgp->ip_endargv - imgp->ip_startargv); |
1321 | AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc, |
1322 | imgp->ip_endenvv - imgp->ip_endargv); |
1323 | |
1324 | |
1325 | |
1326 | /* reset local idea of thread, uthread, task */ |
1327 | thread = imgp->ip_new_thread; |
1328 | uthread = get_bsdthread_info(thread); |
1329 | task = new_task = get_threadtask(thread); |
1330 | |
1331 | /* |
1332 | * Load the Mach-O file. |
1333 | * |
1334 | * NOTE: An error after this point indicates we have potentially |
1335 | * destroyed or overwritten some process state while attempting an |
1336 | * execve() following a vfork(), which is an unrecoverable condition. |
1337 | * We send the new process an immediate SIGKILL to avoid it executing |
1338 | * any instructions in the mutated address space. For true spawns, |
1339 | * this is not the case, and "too late" is still not too late to |
1340 | * return an error code to the parent process. |
1341 | */ |
1342 | |
1343 | /* |
1344 | * Actually load the image file we previously decided to load. |
1345 | */ |
1346 | lret = load_machfile(imgp, header: mach_header, thread, mapp: &map, result: &load_result); |
1347 | if (lret != LOAD_SUCCESS) { |
1348 | error = load_return_to_errno(lrtn: lret); |
1349 | |
1350 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1351 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO, 0, 0); |
1352 | if (lret == LOAD_BADMACHO_UPX) { |
1353 | set_proc_name(imgp, p); |
1354 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_UPX); |
1355 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1356 | } else { |
1357 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO); |
1358 | |
1359 | if (bootarg_execfailurereports) { |
1360 | set_proc_name(imgp, p); |
1361 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1362 | } |
1363 | } |
1364 | |
1365 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE; |
1366 | |
1367 | goto badtoolate; |
1368 | } |
1369 | |
1370 | /* |
1371 | * ERROR RECOVERY |
1372 | * |
1373 | * load_machfile() returned the new VM map ("map") but we haven't |
1374 | * committed to it yet. |
1375 | * Any error path between here and the point where we commit to using |
1376 | * the new "map" (with swap_task_map()) should deallocate "map". |
1377 | */ |
1378 | |
1379 | #ifndef KASAN |
1380 | /* |
1381 | * Security: zone sanity checks on fresh boot or initproc re-exec. |
1382 | * launchd by design does not tear down its own service port on USR (rdar://72797967), |
1383 | * which means here is the earliest point we can assert on empty service port label zone, |
1384 | * after load_machfile() above terminates old launchd's IPC space. |
1385 | * |
1386 | * Disable on KASAN builds since zone_size_allocated() accounts for elements |
1387 | * under quarantine. |
1388 | */ |
1389 | if (task_pid(task) == 1) { |
1390 | zone_userspace_reboot_checks(); |
1391 | } |
1392 | #endif |
1393 | |
1394 | proc_lock(p); |
1395 | p->p_cputype = imgp->ip_origcputype; |
1396 | p->p_cpusubtype = imgp->ip_origcpusubtype; |
1397 | proc_setplatformdata(p, load_result.ip_platform, load_result.lr_min_sdk, load_result.lr_sdk); |
1398 | exec_setup_tpro(imgp, load_result: &load_result); |
1399 | |
1400 | vm_map_set_size_limit(map, limit: proc_limitgetcur(p, RLIMIT_AS)); |
1401 | vm_map_set_data_limit(map, limit: proc_limitgetcur(p, RLIMIT_DATA)); |
1402 | vm_map_set_user_wire_limit(map, limit: (vm_size_t)proc_limitgetcur(p, RLIMIT_MEMLOCK)); |
1403 | #if XNU_TARGET_OS_OSX |
1404 | if (proc_platform(p) == PLATFORM_IOS) { |
1405 | assert(vm_map_is_alien(map)); |
1406 | } else { |
1407 | assert(!vm_map_is_alien(map)); |
1408 | } |
1409 | #endif /* XNU_TARGET_OS_OSX */ |
1410 | proc_unlock(p); |
1411 | |
1412 | /* |
1413 | * Set TPRO flags if enabled |
1414 | */ |
1415 | |
1416 | /* |
1417 | * Set code-signing flags if this binary is signed, or if parent has |
1418 | * requested them on exec. |
1419 | */ |
1420 | if (load_result.csflags & CS_VALID) { |
1421 | imgp->ip_csflags |= load_result.csflags & |
1422 | (CS_VALID | CS_SIGNED | CS_DEV_CODE | CS_LINKER_SIGNED | |
1423 | CS_HARD | CS_KILL | CS_RESTRICT | CS_ENFORCEMENT | CS_REQUIRE_LV | |
1424 | CS_FORCED_LV | CS_ENTITLEMENTS_VALIDATED | CS_NO_UNTRUSTED_HELPERS | CS_RUNTIME | |
1425 | CS_ENTITLEMENT_FLAGS | |
1426 | CS_EXEC_SET_HARD | CS_EXEC_SET_KILL | CS_EXEC_SET_ENFORCEMENT); |
1427 | } else { |
1428 | imgp->ip_csflags &= ~CS_VALID; |
1429 | } |
1430 | |
1431 | if (proc_getcsflags(p) & CS_EXEC_SET_HARD) { |
1432 | imgp->ip_csflags |= CS_HARD; |
1433 | } |
1434 | if (proc_getcsflags(p) & CS_EXEC_SET_KILL) { |
1435 | imgp->ip_csflags |= CS_KILL; |
1436 | } |
1437 | if (proc_getcsflags(p) & CS_EXEC_SET_ENFORCEMENT) { |
1438 | imgp->ip_csflags |= CS_ENFORCEMENT; |
1439 | } |
1440 | if (proc_getcsflags(p) & CS_EXEC_INHERIT_SIP) { |
1441 | if (proc_getcsflags(p) & CS_INSTALLER) { |
1442 | imgp->ip_csflags |= CS_INSTALLER; |
1443 | } |
1444 | if (proc_getcsflags(p) & CS_DATAVAULT_CONTROLLER) { |
1445 | imgp->ip_csflags |= CS_DATAVAULT_CONTROLLER; |
1446 | } |
1447 | if (proc_getcsflags(p) & CS_NVRAM_UNRESTRICTED) { |
1448 | imgp->ip_csflags |= CS_NVRAM_UNRESTRICTED; |
1449 | } |
1450 | } |
1451 | |
1452 | #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) |
1453 | /* |
1454 | * ptrauth version 0 is a preview ABI. Developers can opt into running |
1455 | * their own arm64e binaries for local testing, with the understanding |
1456 | * that future OSes may break ABI. |
1457 | */ |
1458 | if ((imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E && |
1459 | CPU_SUBTYPE_ARM64_PTR_AUTH_VERSION(imgp->ip_origcpusubtype) == 0 && |
1460 | !load_result.platform_binary && |
1461 | !bootarg_arm64e_preview_abi) { |
1462 | static bool logged_once = false; |
1463 | set_proc_name(imgp, p); |
1464 | |
1465 | printf("%s: not running binary \"%s\" built against preview arm64e ABI\n" , __func__, p->p_name); |
1466 | if (!os_atomic_xchg(&logged_once, true, relaxed)) { |
1467 | printf("%s: (to allow this, add \"-arm64e_preview_abi\" to boot-args)\n" , __func__); |
1468 | } |
1469 | |
1470 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO); |
1471 | if (bootarg_execfailurereports) { |
1472 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1473 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE; |
1474 | } |
1475 | |
1476 | /* release new address space since we won't use it */ |
1477 | imgp->ip_free_map = map; |
1478 | map = VM_MAP_NULL; |
1479 | goto badtoolate; |
1480 | } |
1481 | |
1482 | if ((imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E && |
1483 | imgp->ip_origcputype == CPU_TYPE_ARM64 && |
1484 | load_result.platform_binary && |
1485 | (imgp->ip_flags & IMGPF_DRIVER) != 0) { |
1486 | set_proc_name(imgp, p); |
1487 | printf("%s: disallowing arm64 platform driverkit binary \"%s\", should be arm64e\n" , __func__, p->p_name); |
1488 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO); |
1489 | if (bootarg_execfailurereports) { |
1490 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1491 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE; |
1492 | } |
1493 | |
1494 | /* release new address space since we won't use it */ |
1495 | imgp->ip_free_map = map; |
1496 | map = VM_MAP_NULL; |
1497 | goto badtoolate; |
1498 | } |
1499 | #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */ |
1500 | |
1501 | |
1502 | load_result.hardened_runtime_binary = 0; |
1503 | // Propogate which hardened runtime entitlements are active to the apple array |
1504 | encode_HR_entitlement(kCSWebBrowserHostEntitlement, mask: BrowserHostEntitlementMask, imgp, load_result: &load_result); |
1505 | encode_HR_entitlement(kCSWebBrowserGPUEntitlement, mask: BrowserGPUEntitlementMask, imgp, load_result: &load_result); |
1506 | encode_HR_entitlement(kCSWebBrowserNetworkEntitlement, mask: BrowserNetworkEntitlementMask, imgp, load_result: &load_result); |
1507 | encode_HR_entitlement(kCSWebBrowserWebContentEntitlement, mask: BrowserWebContentEntitlementMask, imgp, load_result: &load_result); |
1508 | |
1509 | /* |
1510 | * Set up the shared cache region in the new process. |
1511 | * |
1512 | * Normally there is a single shared region per architecture. |
1513 | * However on systems with Pointer Authentication, we can create |
1514 | * multiple shared caches with the amount of sharing determined |
1515 | * by team-id or entitlement. Inherited shared region IDs are used |
1516 | * for system processes that need to match and be able to inspect |
1517 | * a pre-existing task. |
1518 | */ |
1519 | int cpu_subtype = 0; /* all cpu_subtypes use the same shared region */ |
1520 | #if __has_feature(ptrauth_calls) |
1521 | char *shared_region_id = NULL; |
1522 | size_t len; |
1523 | char *base; |
1524 | const char *cbase; |
1525 | #define HARDENED_RUNTIME_CONTENT_ID "C-" |
1526 | #define TEAM_ID_PREFIX "T-" |
1527 | #define ENTITLE_PREFIX "E-" |
1528 | #define SR_PREFIX_LEN 2 |
1529 | #define SR_ENTITLEMENT "com.apple.pac.shared_region_id" |
1530 | |
1531 | if (cpu_type() == CPU_TYPE_ARM64 && |
1532 | arm64_cpusubtype_uses_ptrauth(p->p_cpusubtype) && |
1533 | (imgp->ip_flags & IMGPF_NOJOP) == 0) { |
1534 | assertf(p->p_cputype == CPU_TYPE_ARM64, |
1535 | "p %p cpu_type() 0x%x p->p_cputype 0x%x p->p_cpusubtype 0x%x" , |
1536 | p, cpu_type(), p->p_cputype, p->p_cpusubtype); |
1537 | |
1538 | /* |
1539 | * arm64e uses pointer authentication, so request a separate |
1540 | * shared region for this CPU subtype. |
1541 | */ |
1542 | cpu_subtype = p->p_cpusubtype & ~CPU_SUBTYPE_MASK; |
1543 | |
1544 | /* |
1545 | * Determine which shared cache to select based on being told, |
1546 | * matching a team-id or matching an entitlement. |
1547 | */ |
1548 | if (load_result.hardened_runtime_binary & BrowserWebContentEntitlementMask) { |
1549 | len = sizeof(HARDENED_RUNTIME_CONTENT_ID); |
1550 | shared_region_id = kalloc_data(len, Z_WAITOK | Z_NOFAIL); |
1551 | strlcpy(shared_region_id, HARDENED_RUNTIME_CONTENT_ID, len); |
1552 | } else if (imgp->ip_inherited_shared_region_id) { |
1553 | len = strlen(imgp->ip_inherited_shared_region_id); |
1554 | shared_region_id = kalloc_data(len + 1, Z_WAITOK | Z_NOFAIL); |
1555 | memcpy(shared_region_id, imgp->ip_inherited_shared_region_id, len + 1); |
1556 | } else if ((cbase = get_teamid_for_shared_region(imgp)) != NULL) { |
1557 | len = strlen(cbase); |
1558 | if (vm_shared_region_per_team_id) { |
1559 | shared_region_id = kalloc_data(len + SR_PREFIX_LEN + 1, |
1560 | Z_WAITOK | Z_NOFAIL); |
1561 | memcpy(shared_region_id, TEAM_ID_PREFIX, SR_PREFIX_LEN); |
1562 | memcpy(shared_region_id + SR_PREFIX_LEN, cbase, len + 1); |
1563 | } |
1564 | } else if ((base = IOVnodeGetEntitlement(imgp->ip_vp, |
1565 | (int64_t)imgp->ip_arch_offset, SR_ENTITLEMENT)) != NULL) { |
1566 | len = strlen(base); |
1567 | if (vm_shared_region_by_entitlement) { |
1568 | shared_region_id = kalloc_data(len + SR_PREFIX_LEN + 1, |
1569 | Z_WAITOK | Z_NOFAIL); |
1570 | memcpy(shared_region_id, ENTITLE_PREFIX, SR_PREFIX_LEN); |
1571 | memcpy(shared_region_id + SR_PREFIX_LEN, base, len + 1); |
1572 | } |
1573 | /* Discard the copy of the entitlement */ |
1574 | kfree_data(base, len + 1); |
1575 | } |
1576 | } |
1577 | |
1578 | if (imgp->ip_flags & IMGPF_RESLIDE) { |
1579 | reslide = TRUE; |
1580 | } |
1581 | |
1582 | /* use "" as the default shared_region_id */ |
1583 | if (shared_region_id == NULL) { |
1584 | shared_region_id = kalloc_data(1, Z_WAITOK | Z_ZERO | Z_NOFAIL); |
1585 | } |
1586 | |
1587 | /* ensure there's a unique pointer signing key for this shared_region_id */ |
1588 | shared_region_key_alloc(shared_region_id, |
1589 | imgp->ip_inherited_shared_region_id != NULL, imgp->ip_inherited_jop_pid); |
1590 | task_set_shared_region_id(task, shared_region_id); |
1591 | shared_region_id = NULL; |
1592 | #endif /* __has_feature(ptrauth_calls) */ |
1593 | |
1594 | #if CONFIG_ROSETTA |
1595 | if (imgp->ip_flags & IMGPF_ROSETTA) { |
1596 | OSBitOrAtomic(P_TRANSLATED, &p->p_flag); |
1597 | } else if (p->p_flag & P_TRANSLATED) { |
1598 | OSBitAndAtomic(~P_TRANSLATED, &p->p_flag); |
1599 | } |
1600 | #endif |
1601 | |
1602 | int cputype = cpu_type(); |
1603 | |
1604 | uint32_t rsr_version = 0; |
1605 | #if XNU_TARGET_OS_OSX |
1606 | if (vnode_is_rsr(vp: imgp->ip_vp)) { |
1607 | rsr_version = rsr_get_version(); |
1608 | os_atomic_or(&p->p_ladvflag, P_RSR, relaxed); |
1609 | os_atomic_or(&p->p_vfs_iopolicy, P_VFS_IOPOLICY_ALTLINK, relaxed); |
1610 | } |
1611 | #endif /* XNU_TARGET_OS_OSX */ |
1612 | |
1613 | vm_map_exec(new_map: map, task, is64bit: load_result.is_64bit_addr, |
1614 | fsroot: (void *)p->p_fd.fd_rdir, cpu: cputype, cpu_subtype, reslide, |
1615 | is_driverkit: (imgp->ip_flags & IMGPF_DRIVER) != 0, |
1616 | rsr_version); |
1617 | |
1618 | /* |
1619 | * Close file descriptors which specify close-on-exec. |
1620 | */ |
1621 | fdt_exec(p, p_cred: vfs_context_ucred(ctx: imgp->ip_vfs_context), |
1622 | posix_spawn_flags: psa != NULL ? psa->psa_flags : 0, thread: imgp->ip_new_thread, in_exec: exec); |
1623 | |
1624 | /* |
1625 | * deal with set[ug]id. |
1626 | */ |
1627 | error = exec_handle_sugid(imgp); |
1628 | if (error) { |
1629 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1630 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE, 0, 0); |
1631 | |
1632 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE); |
1633 | if (bootarg_execfailurereports) { |
1634 | set_proc_name(imgp, p); |
1635 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1636 | } |
1637 | |
1638 | /* release new address space since we won't use it */ |
1639 | imgp->ip_free_map = map; |
1640 | map = VM_MAP_NULL; |
1641 | goto badtoolate; |
1642 | } |
1643 | |
1644 | /* |
1645 | * Commit to new map. |
1646 | * |
1647 | * Swap the new map for the old for target task, which consumes |
1648 | * our new map reference but each leaves us responsible for the |
1649 | * old_map reference. That lets us get off the pmap associated |
1650 | * with it, and then we can release it. |
1651 | * |
1652 | * The map needs to be set on the target task which is different |
1653 | * than current task, thus swap_task_map is used instead of |
1654 | * vm_map_switch. |
1655 | */ |
1656 | old_map = swap_task_map(task, thread, map); |
1657 | #if MACH_ASSERT |
1658 | /* |
1659 | * Reset the pmap's process info to prevent ledger checks |
1660 | * which might fail due to the ledgers being shared between |
1661 | * the old and new pmaps. |
1662 | */ |
1663 | vm_map_pmap_set_process(old_map, -1, "<old_map>" ); |
1664 | #endif /* MACH_ASSERT */ |
1665 | imgp->ip_free_map = old_map; |
1666 | old_map = NULL; |
1667 | |
1668 | lret = activate_exec_state(task, p, thread, result: &load_result); |
1669 | if (lret != KERN_SUCCESS) { |
1670 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1671 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE, 0, 0); |
1672 | |
1673 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE); |
1674 | if (bootarg_execfailurereports) { |
1675 | set_proc_name(imgp, p); |
1676 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1677 | } |
1678 | |
1679 | goto badtoolate; |
1680 | } |
1681 | |
1682 | /* |
1683 | * deal with voucher on exec-calling thread. |
1684 | */ |
1685 | if (imgp->ip_new_thread == NULL) { |
1686 | thread_set_mach_voucher(thr_act: current_thread(), IPC_VOUCHER_NULL); |
1687 | } |
1688 | |
1689 | /* Make sure we won't interrupt ourself signalling a partial process */ |
1690 | if (!vfexec && !spawn && (p->p_lflag & P_LTRACED)) { |
1691 | psignal(p, SIGTRAP); |
1692 | } |
1693 | |
1694 | if (load_result.unixproc && |
1695 | create_unix_stack(map: get_task_map(task), |
1696 | load_result: &load_result, |
1697 | p) != KERN_SUCCESS) { |
1698 | error = load_return_to_errno(LOAD_NOSPACE); |
1699 | |
1700 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1701 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC, 0, 0); |
1702 | |
1703 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC); |
1704 | if (bootarg_execfailurereports) { |
1705 | set_proc_name(imgp, p); |
1706 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1707 | } |
1708 | |
1709 | goto badtoolate; |
1710 | } |
1711 | |
1712 | if (load_result.hardened_runtime_binary) { |
1713 | if (cs_debug) { |
1714 | printf("setting hardened runtime with entitlement mask= " |
1715 | "0x%x on task: pid = %d\n" , |
1716 | load_result.hardened_runtime_binary, |
1717 | proc_getpid(p)); |
1718 | } |
1719 | task_set_hardened_runtime(task, true); |
1720 | } |
1721 | |
1722 | /* |
1723 | * The load result will have already been munged by AMFI to include the |
1724 | * platform binary flag if boot-args dictated it (AMFI will mark anything |
1725 | * that doesn't go through the upcall path as a platform binary if its |
1726 | * enforcement is disabled). |
1727 | */ |
1728 | if (load_result.platform_binary) { |
1729 | if (cs_debug) { |
1730 | printf("setting platform binary on task: pid = %d\n" , proc_getpid(p)); |
1731 | } |
1732 | |
1733 | /* |
1734 | * We must use 'task' here because the proc's task has not yet been |
1735 | * switched to the new one. |
1736 | */ |
1737 | task_set_platform_binary(task, TRUE); |
1738 | } else { |
1739 | if (cs_debug) { |
1740 | printf("clearing platform binary on task: pid = %d\n" , proc_getpid(p)); |
1741 | } |
1742 | |
1743 | task_set_platform_binary(task, FALSE); |
1744 | } |
1745 | |
1746 | #if XNU_TARGET_OS_OSX |
1747 | /* Disable mach hardening for all 1P tasks which load 3P plugins */ |
1748 | if (imgp->ip_flags & IMGPF_3P_PLUGINS) { |
1749 | if (cs_debug) { |
1750 | printf("Disabling some mach hardening on task due to 3P plugins: pid = %d\n" , proc_getpid(p)); |
1751 | } |
1752 | task_disable_mach_hardening(task); |
1753 | } |
1754 | #if DEVELOPMENT || DEBUG |
1755 | /* Disable mach hardening for all tasks if amfi_get_out_of_my_way is set. |
1756 | * Customers will have to turn SIP off to use this boot-arg, and so this is |
1757 | * only needed internally since we disable this feature when SIP is off. */ |
1758 | if (AMFI_bootarg_disable_mach_hardening) { |
1759 | if (cs_debug) { |
1760 | printf("Disabling some mach hardening on task due to AMFI boot-args: pid = %d\n" , proc_getpid(p)); |
1761 | } |
1762 | task_disable_mach_hardening(task); |
1763 | } |
1764 | #endif /* DEVELOPMENT || DEBUG */ |
1765 | #endif /* XNU_TARGET_OS_OSX */ |
1766 | |
1767 | /* |
1768 | * Set starting EXC_GUARD and control port behavior for task now that |
1769 | * platform and hardened runtime is set. Use the name directly from imgp since we haven't |
1770 | * set_proc_name() yet. Also make control port for the task and main thread |
1771 | * immovable/pinned based on task's option. |
1772 | * |
1773 | * Must happen before main thread port copyout in exc_add_apple_strings. |
1774 | */ |
1775 | task_set_exc_guard_ctrl_port_default(task, main_thread: thread, |
1776 | name: imgp->ip_ndp->ni_cnd.cn_nameptr, |
1777 | namelen: (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen, |
1778 | is_simulated: proc_is_simulated(p), |
1779 | platform: load_result.ip_platform, |
1780 | sdk: load_result.lr_sdk); |
1781 | |
1782 | error = exec_add_apple_strings(imgp, load_result: &load_result); /* copies out main thread port */ |
1783 | |
1784 | if (error) { |
1785 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1786 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT, 0, 0); |
1787 | |
1788 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT); |
1789 | if (bootarg_execfailurereports) { |
1790 | set_proc_name(imgp, p); |
1791 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1792 | } |
1793 | goto badtoolate; |
1794 | } |
1795 | |
1796 | /* Switch to target task's map to copy out strings */ |
1797 | old_map = vm_map_switch(map: get_task_map(task)); |
1798 | |
1799 | if (load_result.unixproc) { |
1800 | user_addr_t ap; |
1801 | |
1802 | /* |
1803 | * Copy the strings area out into the new process address |
1804 | * space. |
1805 | */ |
1806 | ap = p->user_stack; |
1807 | error = exec_copyout_strings(imgp, stackp: &ap); |
1808 | if (error) { |
1809 | vm_map_switch(map: old_map); |
1810 | |
1811 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1812 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS, 0, 0); |
1813 | |
1814 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS); |
1815 | if (bootarg_execfailurereports) { |
1816 | set_proc_name(imgp, p); |
1817 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1818 | } |
1819 | goto badtoolate; |
1820 | } |
1821 | /* Set the stack */ |
1822 | thread_setuserstack(thread, user_stack: ap); |
1823 | } |
1824 | |
1825 | if (load_result.dynlinker || load_result.is_rosetta) { |
1826 | user_addr_t ap; |
1827 | int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4; |
1828 | |
1829 | /* Adjust the stack */ |
1830 | ap = thread_adjuserstack(thread, adjust: -new_ptr_size); |
1831 | error = copyoutptr(ua: load_result.mach_header, ptr: ap, ptr_size: new_ptr_size); |
1832 | |
1833 | if (error) { |
1834 | vm_map_switch(map: old_map); |
1835 | |
1836 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1837 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER, 0, 0); |
1838 | |
1839 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER); |
1840 | if (bootarg_execfailurereports) { |
1841 | set_proc_name(imgp, p); |
1842 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1843 | } |
1844 | goto badtoolate; |
1845 | } |
1846 | error = task_set_dyld_info(task, addr: load_result.all_image_info_addr, |
1847 | size: load_result.all_image_info_size); |
1848 | if (error) { |
1849 | vm_map_switch(map: old_map); |
1850 | |
1851 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1852 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_SET_DYLD_INFO, 0, 0); |
1853 | |
1854 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SET_DYLD_INFO); |
1855 | if (bootarg_execfailurereports) { |
1856 | set_proc_name(imgp, p); |
1857 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1858 | } |
1859 | error = EINVAL; |
1860 | goto badtoolate; |
1861 | } |
1862 | } |
1863 | |
1864 | #if CONFIG_ROSETTA |
1865 | if (load_result.is_rosetta) { |
1866 | // Add an fd for the executable file for Rosetta's use |
1867 | int main_binary_fd; |
1868 | struct fileproc *fp; |
1869 | |
1870 | error = falloc_exec(p, imgp->ip_vfs_context, &fp, &main_binary_fd); |
1871 | if (error) { |
1872 | vm_map_switch(old_map); |
1873 | |
1874 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1875 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_MAIN_FD_ALLOC, 0, 0); |
1876 | |
1877 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_MAIN_FD_ALLOC); |
1878 | if (bootarg_execfailurereports) { |
1879 | set_proc_name(imgp, p); |
1880 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1881 | } |
1882 | goto badtoolate; |
1883 | } |
1884 | |
1885 | error = VNOP_OPEN(imgp->ip_vp, FREAD, imgp->ip_vfs_context); |
1886 | if (error) { |
1887 | vm_map_switch(old_map); |
1888 | |
1889 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1890 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_MAIN_FD_ALLOC, 0, 0); |
1891 | |
1892 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_MAIN_FD_ALLOC); |
1893 | if (bootarg_execfailurereports) { |
1894 | set_proc_name(imgp, p); |
1895 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1896 | } |
1897 | goto cleanup_rosetta_fp; |
1898 | } |
1899 | |
1900 | fp->fp_glob->fg_flag = FREAD; |
1901 | fp->fp_glob->fg_ops = &vnops; |
1902 | fp_set_data(fp, imgp->ip_vp); |
1903 | |
1904 | proc_fdlock(p); |
1905 | procfdtbl_releasefd(p, main_binary_fd, NULL); |
1906 | fp_drop(p, main_binary_fd, fp, 1); |
1907 | proc_fdunlock(p); |
1908 | |
1909 | vnode_ref(imgp->ip_vp); |
1910 | |
1911 | // Pass the dyld load address, main binary fd, and dyld fd on the stack |
1912 | uint64_t ap = thread_adjuserstack(thread, -24); |
1913 | |
1914 | error = copyoutptr((user_addr_t)load_result.dynlinker_fd, ap, 8); |
1915 | if (error) { |
1916 | vm_map_switch(old_map); |
1917 | |
1918 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1919 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA, 0, 0); |
1920 | |
1921 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA); |
1922 | if (bootarg_execfailurereports) { |
1923 | set_proc_name(imgp, p); |
1924 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1925 | } |
1926 | goto cleanup_rosetta_fp; |
1927 | } |
1928 | |
1929 | error = copyoutptr(load_result.dynlinker_mach_header, ap + 8, 8); |
1930 | if (error) { |
1931 | vm_map_switch(old_map); |
1932 | |
1933 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1934 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA, 0, 0); |
1935 | |
1936 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA); |
1937 | if (bootarg_execfailurereports) { |
1938 | set_proc_name(imgp, p); |
1939 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1940 | } |
1941 | goto cleanup_rosetta_fp; |
1942 | } |
1943 | |
1944 | error = copyoutptr((user_addr_t)main_binary_fd, ap + 16, 8); |
1945 | if (error) { |
1946 | vm_map_switch(old_map); |
1947 | |
1948 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
1949 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA, 0, 0); |
1950 | |
1951 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA); |
1952 | if (bootarg_execfailurereports) { |
1953 | set_proc_name(imgp, p); |
1954 | exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; |
1955 | } |
1956 | goto cleanup_rosetta_fp; |
1957 | } |
1958 | |
1959 | cleanup_rosetta_fp: |
1960 | if (error) { |
1961 | fp_free(p, load_result.dynlinker_fd, load_result.dynlinker_fp); |
1962 | fp_free(p, main_binary_fd, fp); |
1963 | goto badtoolate; |
1964 | } |
1965 | } |
1966 | |
1967 | #endif |
1968 | |
1969 | /* Avoid immediate VM faults back into kernel */ |
1970 | exec_prefault_data(p, imgp, &load_result); |
1971 | |
1972 | vm_map_switch(map: old_map); |
1973 | |
1974 | /* |
1975 | * Reset signal state. |
1976 | */ |
1977 | execsigs(p, thread); |
1978 | |
1979 | /* |
1980 | * need to cancel async IO requests that can be cancelled and wait for those |
1981 | * already active. MAY BLOCK! |
1982 | */ |
1983 | _aio_exec( p ); |
1984 | |
1985 | #if SYSV_SHM |
1986 | /* FIXME: Till vmspace inherit is fixed: */ |
1987 | if (!vfexec && p->vm_shm) { |
1988 | shmexec(p); |
1989 | } |
1990 | #endif |
1991 | #if SYSV_SEM |
1992 | /* Clean up the semaphores */ |
1993 | semexit(p); |
1994 | #endif |
1995 | |
1996 | /* |
1997 | * Remember file name for accounting. |
1998 | */ |
1999 | p->p_acflag &= ~AFORK; |
2000 | |
2001 | set_proc_name(imgp, p); |
2002 | |
2003 | #if CONFIG_SECLUDED_MEMORY |
2004 | if (secluded_for_apps && |
2005 | load_result.platform_binary) { |
2006 | if (strncmp(p->p_name, |
2007 | "Camera" , |
2008 | sizeof(p->p_name)) == 0) { |
2009 | task_set_could_use_secluded_mem(task, TRUE); |
2010 | } else { |
2011 | task_set_could_use_secluded_mem(task, FALSE); |
2012 | } |
2013 | if (strncmp(p->p_name, |
2014 | "mediaserverd" , |
2015 | sizeof(p->p_name)) == 0) { |
2016 | task_set_could_also_use_secluded_mem(task, TRUE); |
2017 | } |
2018 | } |
2019 | #endif /* CONFIG_SECLUDED_MEMORY */ |
2020 | |
2021 | #if __arm64__ |
2022 | if (load_result.legacy_footprint) { |
2023 | task_set_legacy_footprint(task); |
2024 | } |
2025 | #endif /* __arm64__ */ |
2026 | |
2027 | pal_dbg_set_task_name(task); |
2028 | |
2029 | #if DEVELOPMENT || DEBUG |
2030 | /* |
2031 | * Update the pid an proc name for importance base if any |
2032 | */ |
2033 | task_importance_update_owner_info(task); |
2034 | #endif |
2035 | |
2036 | proc_setexecutableuuid(p, &load_result.uuid[0]); |
2037 | |
2038 | #if CONFIG_DTRACE |
2039 | dtrace_proc_exec(p); |
2040 | #endif |
2041 | |
2042 | if (kdebug_enable) { |
2043 | long args[4] = {}; |
2044 | |
2045 | uintptr_t fsid = 0, fileid = 0; |
2046 | if (imgp->ip_vattr) { |
2047 | uint64_t fsid64 = vnode_get_va_fsid(vap: imgp->ip_vattr); |
2048 | fsid = (uintptr_t)fsid64; |
2049 | fileid = (uintptr_t)imgp->ip_vattr->va_fileid; |
2050 | // check for (unexpected) overflow and trace zero in that case |
2051 | if (fsid != fsid64 || fileid != imgp->ip_vattr->va_fileid) { |
2052 | fsid = fileid = 0; |
2053 | } |
2054 | } |
2055 | KERNEL_DEBUG_CONSTANT_IST1(TRACE_DATA_EXEC, proc_getpid(p), fsid, fileid, 0, |
2056 | (uintptr_t)thread_tid(thread)); |
2057 | |
2058 | extern void kdebug_proc_name_args(struct proc *proc, long args[static 4]); |
2059 | kdebug_proc_name_args(proc: p, args); |
2060 | KERNEL_DEBUG_CONSTANT_IST1(TRACE_STRING_EXEC, args[0], args[1], |
2061 | args[2], args[3], (uintptr_t)thread_tid(thread)); |
2062 | } |
2063 | |
2064 | |
2065 | /* |
2066 | * If posix_spawned with the START_SUSPENDED flag, stop the |
2067 | * process before it runs. |
2068 | */ |
2069 | if (imgp->ip_px_sa != NULL) { |
2070 | psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
2071 | if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) { |
2072 | proc_lock(p); |
2073 | p->p_stat = SSTOP; |
2074 | proc_unlock(p); |
2075 | (void) task_suspend_internal(task); |
2076 | } |
2077 | } |
2078 | |
2079 | /* |
2080 | * mark as execed |
2081 | */ |
2082 | OSBitOrAtomic(P_EXEC, &p->p_flag); |
2083 | proc_resetregister(p); |
2084 | if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) { |
2085 | proc_lock(p); |
2086 | p->p_lflag &= ~P_LPPWAIT; |
2087 | proc_unlock(p); |
2088 | wakeup(chan: (caddr_t)p->p_pptr); |
2089 | } |
2090 | |
2091 | /* |
2092 | * Set up dext coredumps on kernel panic. |
2093 | * This requires the following: |
2094 | * - dext_panic_coredump=1 boot-arg (enabled by default on DEVELOPMENT, DEBUG and certain Seed builds) |
2095 | * - process must be a driver |
2096 | * - process must have the com.apple.private.enable-coredump-on-panic entitlement, and the |
2097 | * entitlement has a string value. |
2098 | * - process must have the com.apple.private.enable-coredump-on-panic-seed-privacy-approved |
2099 | * entitlement (Seed builds only). |
2100 | * |
2101 | * The core dump file name is formatted with the entitlement string value, followed by a hyphen |
2102 | * and the process PID. |
2103 | */ |
2104 | if (enable_dext_coredumps_on_panic && |
2105 | (imgp->ip_flags & IMGPF_DRIVER) != 0 && |
2106 | (userspace_coredump_name = IOVnodeGetEntitlement(vnode: imgp->ip_vp, |
2107 | offset: (int64_t)imgp->ip_arch_offset, USERSPACE_COREDUMP_PANIC_ENTITLEMENT)) != NULL) { |
2108 | size_t userspace_coredump_name_len = strlen(s: userspace_coredump_name); |
2109 | |
2110 | char core_name[MACH_CORE_FILEHEADER_NAMELEN]; |
2111 | /* 16 - NULL char - strlen("-") - maximum of 5 digits for pid */ |
2112 | snprintf(core_name, MACH_CORE_FILEHEADER_NAMELEN, "%.9s-%d" , userspace_coredump_name, proc_getpid(p)); |
2113 | |
2114 | kern_register_userspace_coredump(task, name: core_name); |
2115 | |
2116 | /* Discard the copy of the entitlement */ |
2117 | kfree_data(userspace_coredump_name, userspace_coredump_name_len + 1); |
2118 | userspace_coredump_name = NULL; |
2119 | } |
2120 | |
2121 | goto done; |
2122 | |
2123 | badtoolate: |
2124 | /* Don't allow child process to execute any instructions */ |
2125 | if (!spawn) { |
2126 | { |
2127 | assert(exec_failure_reason != OS_REASON_NULL); |
2128 | if (bootarg_execfailurereports) { |
2129 | set_proc_name(imgp, p: current_proc()); |
2130 | } |
2131 | psignal_with_reason(p: current_proc(), SIGKILL, signal_reason: exec_failure_reason); |
2132 | exec_failure_reason = OS_REASON_NULL; |
2133 | |
2134 | if (exec) { |
2135 | /* Terminate the exec copy task */ |
2136 | task_terminate_internal(task); |
2137 | } |
2138 | } |
2139 | |
2140 | /* We can't stop this system call at this point, so just pretend we succeeded */ |
2141 | error = 0; |
2142 | } else { |
2143 | os_reason_free(cur_reason: exec_failure_reason); |
2144 | exec_failure_reason = OS_REASON_NULL; |
2145 | } |
2146 | |
2147 | done: |
2148 | if (load_result.threadstate) { |
2149 | kfree_data(load_result.threadstate, load_result.threadstate_sz); |
2150 | load_result.threadstate = NULL; |
2151 | } |
2152 | |
2153 | bad: |
2154 | /* If we hit this, we likely would have leaked an exit reason */ |
2155 | assert(exec_failure_reason == OS_REASON_NULL); |
2156 | return error; |
2157 | } |
2158 | |
2159 | |
2160 | |
2161 | |
2162 | /* |
2163 | * Our image activator table; this is the table of the image types we are |
2164 | * capable of loading. We list them in order of preference to ensure the |
2165 | * fastest image load speed. |
2166 | * |
2167 | * XXX hardcoded, for now; should use linker sets |
2168 | */ |
2169 | struct execsw { |
2170 | int(*const ex_imgact)(struct image_params *); |
2171 | const char *ex_name; |
2172 | }const execsw[] = { |
2173 | { exec_mach_imgact, "Mach-o Binary" }, |
2174 | { .ex_imgact: exec_fat_imgact, .ex_name: "Fat Binary" }, |
2175 | { .ex_imgact: exec_shell_imgact, .ex_name: "Interpreter Script" }, |
2176 | { NULL, NULL} |
2177 | }; |
2178 | |
2179 | |
2180 | /* |
2181 | * exec_activate_image |
2182 | * |
2183 | * Description: Iterate through the available image activators, and activate |
2184 | * the image associated with the imgp structure. We start with |
2185 | * the activator for Mach-o binaries followed by that for Fat binaries |
2186 | * for Interpreter scripts. |
2187 | * |
2188 | * Parameters: struct image_params * Image parameter block |
2189 | * |
2190 | * Returns: 0 Success |
2191 | * ENOEXEC No activator for image. |
2192 | * EBADEXEC The executable is corrupt/unknown |
2193 | * execargs_alloc:EINVAL Invalid argument |
2194 | * execargs_alloc:EACCES Permission denied |
2195 | * execargs_alloc:EINTR Interrupted function |
2196 | * execargs_alloc:ENOMEM Not enough space |
2197 | * exec_save_path:EFAULT Bad address |
2198 | * exec_save_path:ENAMETOOLONG Filename too long |
2199 | * exec_check_permissions:EACCES Permission denied |
2200 | * exec_check_permissions:ENOEXEC Executable file format error |
2201 | * exec_check_permissions:ETXTBSY Text file busy [misuse of error code] |
2202 | * exec_check_permissions:??? |
2203 | * namei:??? |
2204 | * vn_rdwr:??? [anything vn_rdwr can return] |
2205 | * <ex_imgact>:??? [anything an imgact can return] |
2206 | * EDEADLK Process is being terminated |
2207 | */ |
2208 | static int |
2209 | exec_activate_image(struct image_params *imgp) |
2210 | { |
2211 | struct nameidata *ndp = NULL; |
2212 | const char *excpath; |
2213 | int error; |
2214 | int resid; |
2215 | int once = 1; /* save SGUID-ness for interpreted files */ |
2216 | int i; |
2217 | int itercount = 0; |
2218 | proc_t p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
2219 | |
2220 | /* |
2221 | * For exec, the translock needs to be taken on old proc and not |
2222 | * on new shadow proc. |
2223 | */ |
2224 | if (imgp->ip_flags & IMGPF_EXEC) { |
2225 | p = current_proc(); |
2226 | } |
2227 | |
2228 | error = execargs_alloc(imgp); |
2229 | if (error) { |
2230 | goto bad_notrans; |
2231 | } |
2232 | |
2233 | error = exec_save_path(imgp, path: imgp->ip_user_fname, seg: imgp->ip_seg, excpath: &excpath); |
2234 | if (error) { |
2235 | goto bad_notrans; |
2236 | } |
2237 | |
2238 | /* Use excpath, which contains the copyin-ed exec path */ |
2239 | DTRACE_PROC1(exec, uintptr_t, excpath); |
2240 | |
2241 | ndp = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL); |
2242 | |
2243 | NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, |
2244 | UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context); |
2245 | |
2246 | again: |
2247 | error = namei(ndp); |
2248 | if (error) { |
2249 | if (error == ERESTART) { |
2250 | error = EINTR; |
2251 | } |
2252 | goto bad_notrans; |
2253 | } |
2254 | imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */ |
2255 | imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */ |
2256 | |
2257 | /* |
2258 | * Before we start the transition from binary A to binary B, make |
2259 | * sure another thread hasn't started exiting the process. We grab |
2260 | * the proc lock to check p_lflag initially, and the transition |
2261 | * mechanism ensures that the value doesn't change after we release |
2262 | * the lock. |
2263 | */ |
2264 | proc_lock(p); |
2265 | if (p->p_lflag & P_LEXIT) { |
2266 | error = EDEADLK; |
2267 | proc_unlock(p); |
2268 | goto bad_notrans; |
2269 | } |
2270 | error = proc_transstart(p, locked: 1, non_blocking: 0); |
2271 | proc_unlock(p); |
2272 | if (error) { |
2273 | goto bad_notrans; |
2274 | } |
2275 | |
2276 | error = exec_check_permissions(imgp); |
2277 | if (error) { |
2278 | goto bad; |
2279 | } |
2280 | |
2281 | /* Copy; avoid invocation of an interpreter overwriting the original */ |
2282 | if (once) { |
2283 | once = 0; |
2284 | *imgp->ip_origvattr = *imgp->ip_vattr; |
2285 | } |
2286 | |
2287 | error = vn_rdwr(rw: UIO_READ, vp: imgp->ip_vp, base: imgp->ip_vdata, PAGE_SIZE, offset: 0, |
2288 | segflg: UIO_SYSSPACE, IO_NODELOCKED, |
2289 | cred: vfs_context_ucred(ctx: imgp->ip_vfs_context), |
2290 | aresid: &resid, p: vfs_context_proc(ctx: imgp->ip_vfs_context)); |
2291 | if (error) { |
2292 | goto bad; |
2293 | } |
2294 | |
2295 | if (resid) { |
2296 | memset(s: imgp->ip_vdata + (PAGE_SIZE - resid), c: 0x0, n: resid); |
2297 | } |
2298 | |
2299 | encapsulated_binary: |
2300 | /* Limit the number of iterations we will attempt on each binary */ |
2301 | if (++itercount > EAI_ITERLIMIT) { |
2302 | error = EBADEXEC; |
2303 | goto bad; |
2304 | } |
2305 | error = -1; |
2306 | for (i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) { |
2307 | error = (*execsw[i].ex_imgact)(imgp); |
2308 | |
2309 | switch (error) { |
2310 | /* case -1: not claimed: continue */ |
2311 | case -2: /* Encapsulated binary, imgp->ip_XXX set for next iteration */ |
2312 | goto encapsulated_binary; |
2313 | |
2314 | case -3: /* Interpreter */ |
2315 | #if CONFIG_MACF |
2316 | /* |
2317 | * Copy the script label for later use. Note that |
2318 | * the label can be different when the script is |
2319 | * actually read by the interpreter. |
2320 | */ |
2321 | if (imgp->ip_scriptlabelp) { |
2322 | mac_vnode_label_free(label: imgp->ip_scriptlabelp); |
2323 | imgp->ip_scriptlabelp = NULL; |
2324 | } |
2325 | imgp->ip_scriptlabelp = mac_vnode_label_alloc(NULL); |
2326 | if (imgp->ip_scriptlabelp == NULL) { |
2327 | error = ENOMEM; |
2328 | break; |
2329 | } |
2330 | mac_vnode_label_copy(l1: mac_vnode_label(vp: imgp->ip_vp), |
2331 | l2: imgp->ip_scriptlabelp); |
2332 | |
2333 | /* |
2334 | * Take a ref of the script vnode for later use. |
2335 | */ |
2336 | if (imgp->ip_scriptvp) { |
2337 | vnode_put(vp: imgp->ip_scriptvp); |
2338 | imgp->ip_scriptvp = NULLVP; |
2339 | } |
2340 | if (vnode_getwithref(vp: imgp->ip_vp) == 0) { |
2341 | imgp->ip_scriptvp = imgp->ip_vp; |
2342 | } |
2343 | #endif |
2344 | |
2345 | nameidone(ndp); |
2346 | |
2347 | vnode_put(vp: imgp->ip_vp); |
2348 | imgp->ip_vp = NULL; /* already put */ |
2349 | imgp->ip_ndp = NULL; /* already nameidone */ |
2350 | |
2351 | /* Use excpath, which exec_shell_imgact reset to the interpreter */ |
2352 | NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, |
2353 | UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context); |
2354 | |
2355 | proc_transend(p, locked: 0); |
2356 | goto again; |
2357 | |
2358 | default: |
2359 | break; |
2360 | } |
2361 | } |
2362 | |
2363 | if (error == -1) { |
2364 | error = ENOEXEC; |
2365 | } else if (error == 0) { |
2366 | if (imgp->ip_flags & IMGPF_INTERPRET && ndp->ni_vp) { |
2367 | AUDIT_ARG(vnpath, ndp->ni_vp, ARG_VNODE2); |
2368 | } |
2369 | |
2370 | /* |
2371 | * Call out to allow 3rd party notification of exec. |
2372 | * Ignore result of kauth_authorize_fileop call. |
2373 | */ |
2374 | if (kauth_authorize_fileop_has_listeners()) { |
2375 | kauth_authorize_fileop(credential: vfs_context_ucred(ctx: imgp->ip_vfs_context), |
2376 | KAUTH_FILEOP_EXEC, |
2377 | arg0: (uintptr_t)ndp->ni_vp, arg1: 0); |
2378 | } |
2379 | } |
2380 | bad: |
2381 | proc_transend(p, locked: 0); |
2382 | |
2383 | bad_notrans: |
2384 | if (imgp->ip_strings) { |
2385 | execargs_free(imgp); |
2386 | } |
2387 | if (imgp->ip_ndp) { |
2388 | nameidone(imgp->ip_ndp); |
2389 | } |
2390 | kfree_type(struct nameidata, ndp); |
2391 | |
2392 | return error; |
2393 | } |
2394 | |
2395 | /* |
2396 | * exec_validate_spawnattr_policy |
2397 | * |
2398 | * Description: Validates the entitlements required to set the apptype. |
2399 | * |
2400 | * Parameters: int psa_apptype posix spawn attribute apptype |
2401 | * |
2402 | * Returns: 0 Success |
2403 | * EPERM Failure |
2404 | */ |
2405 | static errno_t |
2406 | exec_validate_spawnattr_policy(int psa_apptype) |
2407 | { |
2408 | if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) { |
2409 | int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK; |
2410 | if (proctype == POSIX_SPAWN_PROC_TYPE_DRIVER) { |
2411 | if (!IOCurrentTaskHasEntitlement(POSIX_SPAWN_ENTITLEMENT_DRIVER)) { |
2412 | return EPERM; |
2413 | } |
2414 | } |
2415 | } |
2416 | |
2417 | return 0; |
2418 | } |
2419 | |
2420 | /* |
2421 | * exec_handle_spawnattr_policy |
2422 | * |
2423 | * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task. |
2424 | * |
2425 | * Parameters: proc_t p process to apply attributes to |
2426 | * int psa_apptype posix spawn attribute apptype |
2427 | * |
2428 | * Returns: 0 Success |
2429 | */ |
2430 | static errno_t |
2431 | exec_handle_spawnattr_policy(proc_t p, thread_t thread, int psa_apptype, uint64_t psa_qos_clamp, |
2432 | task_role_t psa_darwin_role, struct exec_port_actions *port_actions) |
2433 | { |
2434 | int apptype = TASK_APPTYPE_NONE; |
2435 | int qos_clamp = THREAD_QOS_UNSPECIFIED; |
2436 | task_role_t role = TASK_UNSPECIFIED; |
2437 | |
2438 | if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) { |
2439 | int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK; |
2440 | |
2441 | switch (proctype) { |
2442 | case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE: |
2443 | apptype = TASK_APPTYPE_DAEMON_INTERACTIVE; |
2444 | break; |
2445 | case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD: |
2446 | apptype = TASK_APPTYPE_DAEMON_STANDARD; |
2447 | break; |
2448 | case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE: |
2449 | apptype = TASK_APPTYPE_DAEMON_ADAPTIVE; |
2450 | break; |
2451 | case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND: |
2452 | apptype = TASK_APPTYPE_DAEMON_BACKGROUND; |
2453 | break; |
2454 | case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT: |
2455 | apptype = TASK_APPTYPE_APP_DEFAULT; |
2456 | break; |
2457 | case POSIX_SPAWN_PROC_TYPE_DRIVER: |
2458 | apptype = TASK_APPTYPE_DRIVER; |
2459 | break; |
2460 | default: |
2461 | apptype = TASK_APPTYPE_NONE; |
2462 | /* TODO: Should an invalid value here fail the spawn? */ |
2463 | break; |
2464 | } |
2465 | } |
2466 | |
2467 | if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) { |
2468 | switch (psa_qos_clamp) { |
2469 | case POSIX_SPAWN_PROC_CLAMP_UTILITY: |
2470 | qos_clamp = THREAD_QOS_UTILITY; |
2471 | break; |
2472 | case POSIX_SPAWN_PROC_CLAMP_BACKGROUND: |
2473 | qos_clamp = THREAD_QOS_BACKGROUND; |
2474 | break; |
2475 | case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE: |
2476 | qos_clamp = THREAD_QOS_MAINTENANCE; |
2477 | break; |
2478 | default: |
2479 | qos_clamp = THREAD_QOS_UNSPECIFIED; |
2480 | /* TODO: Should an invalid value here fail the spawn? */ |
2481 | break; |
2482 | } |
2483 | } |
2484 | |
2485 | if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) { |
2486 | proc_darwin_role_to_task_role(darwin_role: psa_darwin_role, task_role: &role); |
2487 | } |
2488 | |
2489 | if (apptype != TASK_APPTYPE_NONE || |
2490 | qos_clamp != THREAD_QOS_UNSPECIFIED || |
2491 | role != TASK_UNSPECIFIED || |
2492 | port_actions->portwatch_count) { |
2493 | proc_set_task_spawnpolicy(task: proc_task(p), thread, apptype, qos_clamp, role, |
2494 | portwatch_ports: port_actions->portwatch_array, portwatch_count: port_actions->portwatch_count); |
2495 | } |
2496 | |
2497 | if (port_actions->registered_count) { |
2498 | if (mach_ports_register(target_task: proc_task(p), init_port_set: port_actions->registered_array, |
2499 | init_port_setCnt: port_actions->registered_count)) { |
2500 | return EINVAL; |
2501 | } |
2502 | /* mach_ports_register() consumed the array */ |
2503 | port_actions->registered_array = NULL; |
2504 | port_actions->registered_count = 0; |
2505 | } |
2506 | |
2507 | return 0; |
2508 | } |
2509 | |
2510 | static void |
2511 | exec_port_actions_destroy(struct exec_port_actions *port_actions) |
2512 | { |
2513 | if (port_actions->excport_array) { |
2514 | for (uint32_t i = 0; i < port_actions->exception_port_count; i++) { |
2515 | ipc_port_t port = NULL; |
2516 | if ((port = port_actions->excport_array[i].port) != NULL) { |
2517 | ipc_port_release_send(port); |
2518 | } |
2519 | } |
2520 | kfree_type(struct exception_port_action_t, port_actions->exception_port_count, |
2521 | port_actions->excport_array); |
2522 | } |
2523 | |
2524 | if (port_actions->portwatch_array) { |
2525 | for (uint32_t i = 0; i < port_actions->portwatch_count; i++) { |
2526 | ipc_port_t port = NULL; |
2527 | if ((port = port_actions->portwatch_array[i]) != NULL) { |
2528 | ipc_port_release_send(port); |
2529 | } |
2530 | } |
2531 | kfree_type(ipc_port_t, port_actions->portwatch_count, |
2532 | port_actions->portwatch_array); |
2533 | } |
2534 | |
2535 | if (port_actions->registered_array) { |
2536 | for (uint32_t i = 0; i < port_actions->registered_count; i++) { |
2537 | ipc_port_t port = NULL; |
2538 | if ((port = port_actions->registered_array[i]) != NULL) { |
2539 | ipc_port_release_send(port); |
2540 | } |
2541 | } |
2542 | kfree_type(ipc_port_t, port_actions->registered_count, |
2543 | port_actions->registered_array); |
2544 | } |
2545 | } |
2546 | |
2547 | /* |
2548 | * exec_handle_port_actions |
2549 | * |
2550 | * Description: Go through the _posix_port_actions_t contents, |
2551 | * calling task_set_special_port, task_set_exception_ports |
2552 | * and/or audit_session_spawnjoin for the current task. |
2553 | * |
2554 | * Parameters: struct image_params * Image parameter block |
2555 | * |
2556 | * Returns: 0 Success |
2557 | * EINVAL Failure |
2558 | * ENOTSUP Illegal posix_spawn attr flag was set |
2559 | */ |
2560 | static errno_t |
2561 | exec_handle_port_actions(struct image_params *imgp, |
2562 | struct exec_port_actions *actions) |
2563 | { |
2564 | _posix_spawn_port_actions_t pacts = imgp->ip_px_spa; |
2565 | #if CONFIG_AUDIT |
2566 | proc_t p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
2567 | #endif |
2568 | _ps_port_action_t *act = NULL; |
2569 | task_t task = get_threadtask(imgp->ip_new_thread); |
2570 | ipc_port_t port = NULL; |
2571 | errno_t ret = 0; |
2572 | int i = 0, portwatch_i = 0, registered_i = 0, excport_i = 0; |
2573 | kern_return_t kr; |
2574 | boolean_t task_has_watchport_boost = task_has_watchports(task: current_task()); |
2575 | boolean_t in_exec = (imgp->ip_flags & IMGPF_EXEC); |
2576 | int ptrauth_task_port_count = 0; |
2577 | |
2578 | for (i = 0; i < pacts->pspa_count; i++) { |
2579 | act = &pacts->pspa_actions[i]; |
2580 | |
2581 | switch (act->port_type) { |
2582 | case PSPA_SPECIAL: |
2583 | #if CONFIG_AUDIT |
2584 | case PSPA_AU_SESSION: |
2585 | #endif |
2586 | break; |
2587 | case PSPA_EXCEPTION: |
2588 | if (++actions->exception_port_count > TASK_MAX_EXCEPTION_PORT_COUNT) { |
2589 | ret = EINVAL; |
2590 | goto done; |
2591 | } |
2592 | break; |
2593 | case PSPA_IMP_WATCHPORTS: |
2594 | if (++actions->portwatch_count > TASK_MAX_WATCHPORT_COUNT) { |
2595 | ret = EINVAL; |
2596 | goto done; |
2597 | } |
2598 | break; |
2599 | case PSPA_REGISTERED_PORTS: |
2600 | if (++actions->registered_count > TASK_PORT_REGISTER_MAX) { |
2601 | ret = EINVAL; |
2602 | goto done; |
2603 | } |
2604 | break; |
2605 | case PSPA_PTRAUTH_TASK_PORT: |
2606 | if (++ptrauth_task_port_count > 1) { |
2607 | ret = EINVAL; |
2608 | goto done; |
2609 | } |
2610 | break; |
2611 | default: |
2612 | ret = EINVAL; |
2613 | goto done; |
2614 | } |
2615 | } |
2616 | |
2617 | if (actions->exception_port_count) { |
2618 | actions->excport_array = kalloc_type(struct exception_port_action_t, |
2619 | actions->exception_port_count, Z_WAITOK | Z_ZERO); |
2620 | |
2621 | if (actions->excport_array == NULL) { |
2622 | ret = ENOMEM; |
2623 | goto done; |
2624 | } |
2625 | } |
2626 | if (actions->portwatch_count) { |
2627 | if (in_exec && task_has_watchport_boost) { |
2628 | ret = EINVAL; |
2629 | goto done; |
2630 | } |
2631 | actions->portwatch_array = kalloc_type(ipc_port_t, |
2632 | actions->portwatch_count, Z_WAITOK | Z_ZERO); |
2633 | if (actions->portwatch_array == NULL) { |
2634 | ret = ENOMEM; |
2635 | goto done; |
2636 | } |
2637 | } |
2638 | |
2639 | if (actions->registered_count) { |
2640 | actions->registered_array = kalloc_type(ipc_port_t, |
2641 | actions->registered_count, Z_WAITOK | Z_ZERO); |
2642 | if (actions->registered_array == NULL) { |
2643 | ret = ENOMEM; |
2644 | goto done; |
2645 | } |
2646 | } |
2647 | |
2648 | for (i = 0; i < pacts->pspa_count; i++) { |
2649 | act = &pacts->pspa_actions[i]; |
2650 | |
2651 | if (MACH_PORT_VALID(act->new_port)) { |
2652 | kr = ipc_object_copyin(space: get_task_ipcspace(t: current_task()), |
2653 | name: act->new_port, MACH_MSG_TYPE_COPY_SEND, |
2654 | objectp: (ipc_object_t *) &port, context: 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND); |
2655 | |
2656 | if (kr != KERN_SUCCESS) { |
2657 | ret = EINVAL; |
2658 | goto done; |
2659 | } |
2660 | } else { |
2661 | /* it's NULL or DEAD */ |
2662 | port = CAST_MACH_NAME_TO_PORT(act->new_port); |
2663 | } |
2664 | |
2665 | switch (act->port_type) { |
2666 | case PSPA_SPECIAL: |
2667 | kr = task_set_special_port(task, which_port: act->which, special_port: port); |
2668 | |
2669 | if (kr != KERN_SUCCESS) { |
2670 | ret = EINVAL; |
2671 | } |
2672 | break; |
2673 | |
2674 | #if CONFIG_AUDIT |
2675 | case PSPA_AU_SESSION: |
2676 | ret = audit_session_spawnjoin(p, port); |
2677 | if (ret) { |
2678 | /* audit_session_spawnjoin() has already dropped the reference in case of error. */ |
2679 | goto done; |
2680 | } |
2681 | |
2682 | break; |
2683 | #endif |
2684 | case PSPA_EXCEPTION: |
2685 | assert(excport_i < actions->exception_port_count); |
2686 | /* hold on to this till end of spawn */ |
2687 | actions->excport_array[excport_i].port_action = act; |
2688 | actions->excport_array[excport_i].port = port; |
2689 | excport_i++; |
2690 | break; |
2691 | case PSPA_IMP_WATCHPORTS: |
2692 | assert(portwatch_i < actions->portwatch_count); |
2693 | /* hold on to this till end of spawn */ |
2694 | actions->portwatch_array[portwatch_i++] = port; |
2695 | break; |
2696 | case PSPA_REGISTERED_PORTS: |
2697 | assert(registered_i < actions->registered_count); |
2698 | /* hold on to this till end of spawn */ |
2699 | actions->registered_array[registered_i++] = port; |
2700 | break; |
2701 | |
2702 | case PSPA_PTRAUTH_TASK_PORT: |
2703 | #if (DEVELOPMENT || DEBUG) |
2704 | #if defined(HAS_APPLE_PAC) |
2705 | { |
2706 | task_t ptr_auth_task = convert_port_to_task(port); |
2707 | |
2708 | if (ptr_auth_task == TASK_NULL) { |
2709 | ret = EINVAL; |
2710 | break; |
2711 | } |
2712 | |
2713 | imgp->ip_inherited_shared_region_id = |
2714 | task_get_vm_shared_region_id_and_jop_pid(ptr_auth_task, |
2715 | &imgp->ip_inherited_jop_pid); |
2716 | |
2717 | /* Deallocate task ref returned by convert_port_to_task */ |
2718 | task_deallocate(ptr_auth_task); |
2719 | } |
2720 | #endif /* HAS_APPLE_PAC */ |
2721 | #endif /* (DEVELOPMENT || DEBUG) */ |
2722 | |
2723 | /* consume the port right in case of success */ |
2724 | ipc_port_release_send(port); |
2725 | break; |
2726 | default: |
2727 | ret = EINVAL; |
2728 | break; |
2729 | } |
2730 | |
2731 | if (ret) { |
2732 | /* action failed, so release port resources */ |
2733 | ipc_port_release_send(port); |
2734 | break; |
2735 | } |
2736 | } |
2737 | |
2738 | done: |
2739 | if (0 != ret) { |
2740 | DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port); |
2741 | } |
2742 | return ret; |
2743 | } |
2744 | |
2745 | |
2746 | /* |
2747 | * exec_handle_exception_port_actions |
2748 | * |
2749 | * Description: Go through the saved exception ports in exec_port_actions, |
2750 | * calling task_set_exception_ports for the current Task. |
2751 | * This must happen after image activation, and after exec_resettextvp() |
2752 | * because task_set_exception_ports checks the `TF_PLATFORM` bit and entitlements. |
2753 | * |
2754 | * Parameters: struct image_params * Image parameter block |
2755 | * struct exec_port_actions * Saved Port Actions |
2756 | * |
2757 | * Returns: 0 Success |
2758 | * EINVAL task_set_exception_ports failed |
2759 | */ |
2760 | static errno_t |
2761 | exec_handle_exception_port_actions(const struct image_params *imgp, |
2762 | const struct exec_port_actions *actions) |
2763 | { |
2764 | task_t task = get_threadtask(imgp->ip_new_thread); |
2765 | |
2766 | for (int i = 0; i < actions->exception_port_count; i++) { |
2767 | ipc_port_t port = actions->excport_array[i].port; |
2768 | _ps_port_action_t *act = actions->excport_array[i].port_action; |
2769 | assert(act != NULL); |
2770 | kern_return_t kr = task_set_exception_ports(task, exception_mask: act->mask, new_port: port, |
2771 | behavior: act->behavior, new_flavor: act->flavor); |
2772 | if (kr != KERN_SUCCESS) { |
2773 | DTRACE_PROC1(spawn__exception__port__failure, mach_port_name_t, act->new_port); |
2774 | return EINVAL; |
2775 | } |
2776 | actions->excport_array[i].port = NULL; |
2777 | } |
2778 | |
2779 | return 0; |
2780 | } |
2781 | |
2782 | |
2783 | /* |
2784 | * exec_handle_file_actions |
2785 | * |
2786 | * Description: Go through the _posix_file_actions_t contents applying the |
2787 | * open, close, and dup2 operations to the open file table for |
2788 | * the current process. |
2789 | * |
2790 | * Parameters: struct image_params * Image parameter block |
2791 | * |
2792 | * Returns: 0 Success |
2793 | * ??? |
2794 | * |
2795 | * Note: Actions are applied in the order specified, with the credential |
2796 | * of the parent process. This is done to permit the parent |
2797 | * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in |
2798 | * the child following operations the child may in fact not be |
2799 | * normally permitted to perform. |
2800 | */ |
2801 | static int |
2802 | exec_handle_file_actions(struct image_params *imgp, short psa_flags) |
2803 | { |
2804 | int error = 0; |
2805 | int action; |
2806 | proc_t p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
2807 | kauth_cred_t p_cred = vfs_context_ucred(ctx: imgp->ip_vfs_context); |
2808 | _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa; |
2809 | int ival[2]; /* dummy retval for system calls) */ |
2810 | #if CONFIG_AUDIT |
2811 | struct uthread *uthread = current_uthread(); |
2812 | #endif |
2813 | |
2814 | for (action = 0; action < px_sfap->psfa_act_count; action++) { |
2815 | _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action]; |
2816 | |
2817 | switch (psfa->psfaa_type) { |
2818 | case PSFA_OPEN: { |
2819 | /* |
2820 | * Open is different, in that it requires the use of |
2821 | * a path argument, which is normally copied in from |
2822 | * user space; because of this, we have to support an |
2823 | * open from kernel space that passes an address space |
2824 | * context of UIO_SYSSPACE, and casts the address |
2825 | * argument to a user_addr_t. |
2826 | */ |
2827 | struct vnode_attr *vap; |
2828 | struct nameidata *ndp; |
2829 | int mode = psfa->psfaa_openargs.psfao_mode; |
2830 | int origfd; |
2831 | struct { |
2832 | struct vnode_attr va; |
2833 | struct nameidata nd; |
2834 | } *__open_data; |
2835 | |
2836 | __open_data = kalloc_type(typeof(*__open_data), Z_WAITOK | Z_ZERO); |
2837 | if (__open_data == NULL) { |
2838 | error = ENOMEM; |
2839 | break; |
2840 | } |
2841 | |
2842 | vap = &__open_data->va; |
2843 | ndp = &__open_data->nd; |
2844 | |
2845 | VATTR_INIT(vap); |
2846 | /* Mask off all but regular access permissions */ |
2847 | mode = ((mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT; |
2848 | VATTR_SET(vap, va_mode, mode & ACCESSPERMS); |
2849 | |
2850 | AUDIT_SUBCALL_ENTER(OPEN, p, uthread); |
2851 | |
2852 | NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE, |
2853 | CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path), |
2854 | imgp->ip_vfs_context); |
2855 | |
2856 | error = open1(ctx: imgp->ip_vfs_context, ndp, |
2857 | uflags: psfa->psfaa_openargs.psfao_oflag, |
2858 | vap, NULL, NULL, retval: &origfd, AUTH_OPEN_NOAUTHFD); |
2859 | |
2860 | kfree_type(typeof(*__open_data), __open_data); |
2861 | |
2862 | AUDIT_SUBCALL_EXIT(uthread, error); |
2863 | |
2864 | /* |
2865 | * If there's an error, or we get the right fd by |
2866 | * accident, then drop out here. This is easier than |
2867 | * reworking all the open code to preallocate fd |
2868 | * slots, and internally taking one as an argument. |
2869 | */ |
2870 | if (error || origfd == psfa->psfaa_filedes) { |
2871 | break; |
2872 | } |
2873 | |
2874 | /* |
2875 | * If we didn't fall out from an error, we ended up |
2876 | * with the wrong fd; so now we've got to try to dup2 |
2877 | * it to the right one. |
2878 | */ |
2879 | AUDIT_SUBCALL_ENTER(DUP2, p, uthread); |
2880 | error = dup2(p, p_cred, from: origfd, to: psfa->psfaa_filedes, fd: ival); |
2881 | AUDIT_SUBCALL_EXIT(uthread, error); |
2882 | if (error) { |
2883 | break; |
2884 | } |
2885 | |
2886 | /* |
2887 | * Finally, close the original fd. |
2888 | */ |
2889 | AUDIT_SUBCALL_ENTER(CLOSE, p, uthread); |
2890 | error = close_nocancel(p, p_cred, fd: origfd); |
2891 | AUDIT_SUBCALL_EXIT(uthread, error); |
2892 | } |
2893 | break; |
2894 | |
2895 | case PSFA_DUP2: { |
2896 | AUDIT_SUBCALL_ENTER(DUP2, p, uthread); |
2897 | error = dup2(p, p_cred, from: psfa->psfaa_filedes, |
2898 | to: psfa->psfaa_dup2args.psfad_newfiledes, fd: ival); |
2899 | AUDIT_SUBCALL_EXIT(uthread, error); |
2900 | } |
2901 | break; |
2902 | |
2903 | case PSFA_FILEPORT_DUP2: { |
2904 | ipc_port_t port; |
2905 | kern_return_t kr; |
2906 | int origfd; |
2907 | |
2908 | if (!MACH_PORT_VALID(psfa->psfaa_fileport)) { |
2909 | error = EINVAL; |
2910 | break; |
2911 | } |
2912 | |
2913 | kr = ipc_object_copyin(space: get_task_ipcspace(t: current_task()), |
2914 | name: psfa->psfaa_fileport, MACH_MSG_TYPE_COPY_SEND, |
2915 | objectp: (ipc_object_t *) &port, context: 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND); |
2916 | |
2917 | if (kr != KERN_SUCCESS) { |
2918 | error = EINVAL; |
2919 | break; |
2920 | } |
2921 | |
2922 | error = fileport_makefd(p, port, fp_flags: 0, fd: &origfd); |
2923 | |
2924 | if (IPC_PORT_NULL != port) { |
2925 | ipc_port_release_send(port); |
2926 | } |
2927 | |
2928 | if (error || origfd == psfa->psfaa_dup2args.psfad_newfiledes) { |
2929 | break; |
2930 | } |
2931 | |
2932 | AUDIT_SUBCALL_ENTER(DUP2, p, uthread); |
2933 | error = dup2(p, p_cred, from: origfd, |
2934 | to: psfa->psfaa_dup2args.psfad_newfiledes, fd: ival); |
2935 | AUDIT_SUBCALL_EXIT(uthread, error); |
2936 | if (error) { |
2937 | break; |
2938 | } |
2939 | |
2940 | AUDIT_SUBCALL_ENTER(CLOSE, p, uthread); |
2941 | error = close_nocancel(p, p_cred, fd: origfd); |
2942 | AUDIT_SUBCALL_EXIT(uthread, error); |
2943 | } |
2944 | break; |
2945 | |
2946 | case PSFA_CLOSE: { |
2947 | AUDIT_SUBCALL_ENTER(CLOSE, p, uthread); |
2948 | error = close_nocancel(p, p_cred, fd: psfa->psfaa_filedes); |
2949 | AUDIT_SUBCALL_EXIT(uthread, error); |
2950 | } |
2951 | break; |
2952 | |
2953 | case PSFA_INHERIT: { |
2954 | struct fileproc *fp; |
2955 | |
2956 | /* |
2957 | * Check to see if the descriptor exists, and |
2958 | * ensure it's -not- marked as close-on-exec. |
2959 | * |
2960 | * Attempting to "inherit" a guarded fd will |
2961 | * result in a error. |
2962 | */ |
2963 | |
2964 | proc_fdlock(p); |
2965 | if ((fp = fp_get_noref_locked(p, fd: psfa->psfaa_filedes)) == NULL) { |
2966 | error = EBADF; |
2967 | } else if (fp->fp_guard_attrs) { |
2968 | error = fp_guard_exception(p, fd: psfa->psfaa_filedes, |
2969 | fp, attribs: kGUARD_EXC_NOCLOEXEC); |
2970 | } else { |
2971 | fp->fp_flags &= ~FP_CLOEXEC; |
2972 | error = 0; |
2973 | } |
2974 | proc_fdunlock(p); |
2975 | } |
2976 | break; |
2977 | |
2978 | case PSFA_CHDIR: { |
2979 | /* |
2980 | * Chdir is different, in that it requires the use of |
2981 | * a path argument, which is normally copied in from |
2982 | * user space; because of this, we have to support a |
2983 | * chdir from kernel space that passes an address space |
2984 | * context of UIO_SYSSPACE, and casts the address |
2985 | * argument to a user_addr_t. |
2986 | */ |
2987 | struct nameidata *nd; |
2988 | nd = kalloc_type(struct nameidata, |
2989 | Z_WAITOK | Z_ZERO | Z_NOFAIL); |
2990 | |
2991 | AUDIT_SUBCALL_ENTER(CHDIR, p, uthread); |
2992 | NDINIT(nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE, |
2993 | CAST_USER_ADDR_T(psfa->psfaa_chdirargs.psfac_path), |
2994 | imgp->ip_vfs_context); |
2995 | |
2996 | error = chdir_internal(p, ctx: imgp->ip_vfs_context, ndp: nd, per_thread: 0); |
2997 | kfree_type(struct nameidata, nd); |
2998 | AUDIT_SUBCALL_EXIT(uthread, error); |
2999 | } |
3000 | break; |
3001 | |
3002 | case PSFA_FCHDIR: { |
3003 | AUDIT_SUBCALL_ENTER(FCHDIR, p, uthread); |
3004 | error = fchdir(p, ctx: imgp->ip_vfs_context, |
3005 | fd: psfa->psfaa_filedes, false); |
3006 | AUDIT_SUBCALL_EXIT(uthread, error); |
3007 | } |
3008 | break; |
3009 | |
3010 | default: |
3011 | error = EINVAL; |
3012 | break; |
3013 | } |
3014 | |
3015 | /* All file actions failures are considered fatal, per POSIX */ |
3016 | |
3017 | if (error) { |
3018 | if (PSFA_OPEN == psfa->psfaa_type) { |
3019 | DTRACE_PROC1(spawn__open__failure, uintptr_t, |
3020 | psfa->psfaa_openargs.psfao_path); |
3021 | } else { |
3022 | DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes); |
3023 | } |
3024 | break; |
3025 | } |
3026 | } |
3027 | |
3028 | if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0) { |
3029 | return error; |
3030 | } |
3031 | |
3032 | /* |
3033 | * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during |
3034 | * this spawn only) as if "close on exec" is the default |
3035 | * disposition of all pre-existing file descriptors. In this case, |
3036 | * the list of file descriptors mentioned in the file actions |
3037 | * are the only ones that can be inherited, so mark them now. |
3038 | * |
3039 | * The actual closing part comes later, in fdt_exec(). |
3040 | */ |
3041 | proc_fdlock(p); |
3042 | for (action = 0; action < px_sfap->psfa_act_count; action++) { |
3043 | _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action]; |
3044 | int fd = psfa->psfaa_filedes; |
3045 | |
3046 | switch (psfa->psfaa_type) { |
3047 | case PSFA_DUP2: |
3048 | case PSFA_FILEPORT_DUP2: |
3049 | fd = psfa->psfaa_dup2args.psfad_newfiledes; |
3050 | OS_FALLTHROUGH; |
3051 | case PSFA_OPEN: |
3052 | case PSFA_INHERIT: |
3053 | *fdflags(p, fd) |= UF_INHERIT; |
3054 | break; |
3055 | |
3056 | case PSFA_CLOSE: |
3057 | case PSFA_CHDIR: |
3058 | case PSFA_FCHDIR: |
3059 | /* |
3060 | * Although PSFA_FCHDIR does have a file descriptor, it is not |
3061 | * *creating* one, thus we do not automatically mark it for |
3062 | * inheritance under POSIX_SPAWN_CLOEXEC_DEFAULT. A client that |
3063 | * wishes it to be inherited should use the PSFA_INHERIT action |
3064 | * explicitly. |
3065 | */ |
3066 | break; |
3067 | } |
3068 | } |
3069 | proc_fdunlock(p); |
3070 | |
3071 | return 0; |
3072 | } |
3073 | |
3074 | #if CONFIG_MACF |
3075 | /* |
3076 | * Check that the extension's data is within the bounds of the |
3077 | * allocation storing all extensions' data |
3078 | */ |
3079 | static inline errno_t |
3080 | exec_spawnattr_validate_policyext_data(const struct ip_px_smpx_s *px_s, |
3081 | const _ps_mac_policy_extension_t *ext) |
3082 | { |
3083 | uint64_t dataend; |
3084 | |
3085 | if (__improbable(os_add_overflow(ext->dataoff, ext->datalen, &dataend))) { |
3086 | return EOVERFLOW; |
3087 | } |
3088 | if (__improbable(dataend > px_s->datalen)) { |
3089 | return EINVAL; |
3090 | } |
3091 | |
3092 | return 0; |
3093 | } |
3094 | |
3095 | /* |
3096 | * exec_spawnattr_getmacpolicyinfo |
3097 | */ |
3098 | void * |
3099 | exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp) |
3100 | { |
3101 | const struct ip_px_smpx_s *px_s = macextensions; |
3102 | const struct _posix_spawn_mac_policy_extensions *psmx = NULL; |
3103 | int i; |
3104 | |
3105 | if (px_s == NULL) { |
3106 | return NULL; |
3107 | } |
3108 | |
3109 | psmx = px_s->array; |
3110 | if (psmx == NULL) { |
3111 | return NULL; |
3112 | } |
3113 | |
3114 | for (i = 0; i < psmx->psmx_count; i++) { |
3115 | const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i]; |
3116 | if (strncmp(s1: extension->policyname, s2: policyname, n: sizeof(extension->policyname)) == 0) { |
3117 | if (__improbable(exec_spawnattr_validate_policyext_data(px_s, extension))) { |
3118 | panic("invalid mac policy extension data" ); |
3119 | } |
3120 | if (lenp != NULL) { |
3121 | *lenp = (size_t)extension->datalen; |
3122 | } |
3123 | return (void *)((uintptr_t)px_s->data + extension->dataoff); |
3124 | } |
3125 | } |
3126 | |
3127 | if (lenp != NULL) { |
3128 | *lenp = 0; |
3129 | } |
3130 | return NULL; |
3131 | } |
3132 | |
3133 | static int |
3134 | spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, |
3135 | struct ip_px_smpx_s *pxsp) |
3136 | { |
3137 | _posix_spawn_mac_policy_extensions_t psmx = NULL; |
3138 | uint8_t *data = NULL; |
3139 | uint64_t datalen = 0; |
3140 | uint64_t dataoff = 0; |
3141 | int error = 0; |
3142 | |
3143 | bzero(s: pxsp, n: sizeof(*pxsp)); |
3144 | |
3145 | if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) || |
3146 | px_args->mac_extensions_size > PAGE_SIZE) { |
3147 | error = EINVAL; |
3148 | goto bad; |
3149 | } |
3150 | |
3151 | psmx = kalloc_data(px_args->mac_extensions_size, Z_WAITOK); |
3152 | if (psmx == NULL) { |
3153 | error = ENOMEM; |
3154 | goto bad; |
3155 | } |
3156 | |
3157 | error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size); |
3158 | if (error) { |
3159 | goto bad; |
3160 | } |
3161 | |
3162 | size_t extsize = PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count); |
3163 | if (extsize == 0 || extsize > px_args->mac_extensions_size) { |
3164 | error = EINVAL; |
3165 | goto bad; |
3166 | } |
3167 | |
3168 | for (int i = 0; i < psmx->psmx_count; i++) { |
3169 | _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i]; |
3170 | if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) { |
3171 | error = EINVAL; |
3172 | goto bad; |
3173 | } |
3174 | if (__improbable(os_add_overflow(datalen, extension->datalen, &datalen))) { |
3175 | error = ENOMEM; |
3176 | goto bad; |
3177 | } |
3178 | } |
3179 | |
3180 | data = kalloc_data((vm_size_t)datalen, Z_WAITOK); |
3181 | if (data == NULL) { |
3182 | error = ENOMEM; |
3183 | goto bad; |
3184 | } |
3185 | |
3186 | for (int i = 0; i < psmx->psmx_count; i++) { |
3187 | _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i]; |
3188 | |
3189 | #if !__LP64__ |
3190 | if (extension->data > UINT32_MAX) { |
3191 | goto bad; |
3192 | } |
3193 | #endif |
3194 | error = copyin((user_addr_t)extension->data, &data[dataoff], (size_t)extension->datalen); |
3195 | if (error) { |
3196 | error = ENOMEM; |
3197 | goto bad; |
3198 | } |
3199 | extension->dataoff = dataoff; |
3200 | dataoff += extension->datalen; |
3201 | } |
3202 | |
3203 | pxsp->array = psmx; |
3204 | pxsp->data = data; |
3205 | pxsp->datalen = datalen; |
3206 | return 0; |
3207 | |
3208 | bad: |
3209 | kfree_data(psmx, px_args->mac_extensions_size); |
3210 | kfree_data(data, (vm_size_t)datalen); |
3211 | return error; |
3212 | } |
3213 | #endif /* CONFIG_MACF */ |
3214 | |
3215 | #if CONFIG_COALITIONS |
3216 | static inline void |
3217 | spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES]) |
3218 | { |
3219 | for (int c = 0; c < COALITION_NUM_TYPES; c++) { |
3220 | if (coal[c]) { |
3221 | coalition_remove_active(coal: coal[c]); |
3222 | coalition_release(coal: coal[c]); |
3223 | } |
3224 | } |
3225 | } |
3226 | #endif |
3227 | |
3228 | #if CONFIG_PERSONAS |
3229 | static int |
3230 | spawn_validate_persona(struct _posix_spawn_persona_info *px_persona) |
3231 | { |
3232 | int error = 0; |
3233 | struct persona *persona = NULL; |
3234 | |
3235 | if (!IOCurrentTaskHasEntitlement( PERSONA_MGMT_ENTITLEMENT)) { |
3236 | return EPERM; |
3237 | } |
3238 | |
3239 | if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) { |
3240 | if (px_persona->pspi_ngroups > NGROUPS_MAX) { |
3241 | return EINVAL; |
3242 | } |
3243 | } |
3244 | |
3245 | persona = persona_lookup(id: px_persona->pspi_id); |
3246 | if (!persona) { |
3247 | error = ESRCH; |
3248 | goto out; |
3249 | } |
3250 | |
3251 | out: |
3252 | if (persona) { |
3253 | persona_put(persona); |
3254 | } |
3255 | |
3256 | return error; |
3257 | } |
3258 | |
3259 | static bool |
3260 | kauth_cred_model_setpersona( |
3261 | kauth_cred_t model, |
3262 | struct _posix_spawn_persona_info *px_persona) |
3263 | { |
3264 | bool updated = false; |
3265 | |
3266 | if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) { |
3267 | updated |= kauth_cred_model_setresuid(model, |
3268 | ruid: px_persona->pspi_uid, |
3269 | euid: px_persona->pspi_uid, |
3270 | svuid: px_persona->pspi_uid, |
3271 | KAUTH_UID_NONE); |
3272 | } |
3273 | |
3274 | if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) { |
3275 | updated |= kauth_cred_model_setresgid(model, |
3276 | rgid: px_persona->pspi_gid, |
3277 | egid: px_persona->pspi_gid, |
3278 | svgid: px_persona->pspi_gid); |
3279 | } |
3280 | |
3281 | if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) { |
3282 | updated |= kauth_cred_model_setgroups(model, |
3283 | groups: px_persona->pspi_groups, |
3284 | groupcount: px_persona->pspi_ngroups, |
3285 | gmuid: px_persona->pspi_gmuid); |
3286 | } |
3287 | |
3288 | return updated; |
3289 | } |
3290 | |
3291 | static int |
3292 | spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona) |
3293 | { |
3294 | struct persona *persona = NULL; |
3295 | |
3296 | /* |
3297 | * we want to spawn into the given persona, but we want to override |
3298 | * the kauth with a different UID/GID combo |
3299 | */ |
3300 | persona = persona_lookup(id: px_persona->pspi_id); |
3301 | if (!persona) { |
3302 | return ESRCH; |
3303 | } |
3304 | |
3305 | return persona_proc_adopt(p, persona, |
3306 | fn: ^bool (kauth_cred_t parent __unused, kauth_cred_t model) { |
3307 | return kauth_cred_model_setpersona(model, px_persona); |
3308 | }); |
3309 | } |
3310 | #endif |
3311 | |
3312 | #if __arm64__ |
3313 | #if DEVELOPMENT || DEBUG |
3314 | TUNABLE(int, legacy_footprint_entitlement_mode, "legacy_footprint_entitlement_mode" , |
3315 | LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE); |
3316 | |
3317 | __startup_func |
3318 | static void |
3319 | legacy_footprint_entitlement_mode_init(void) |
3320 | { |
3321 | /* |
3322 | * legacy_footprint_entitlement_mode specifies the behavior we want associated |
3323 | * with the entitlement. The supported modes are: |
3324 | * |
3325 | * LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE: |
3326 | * Indicates that we want every process to have the memory accounting |
3327 | * that is available in iOS 12.0 and beyond. |
3328 | * |
3329 | * LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT: |
3330 | * Indicates that for every process that has the 'legacy footprint entitlement', |
3331 | * we want to give it the old iOS 11.0 accounting behavior which accounted some |
3332 | * of the process's memory to the kernel. |
3333 | * |
3334 | * LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE: |
3335 | * Indicates that for every process that has the 'legacy footprint entitlement', |
3336 | * we want it to have a higher memory limit which will help them acclimate to the |
3337 | * iOS 12.0 (& beyond) accounting behavior that does the right accounting. |
3338 | * The bonus added to the system-wide task limit to calculate this higher memory limit |
3339 | * is available in legacy_footprint_bonus_mb. |
3340 | */ |
3341 | |
3342 | if (legacy_footprint_entitlement_mode < LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE || |
3343 | legacy_footprint_entitlement_mode > LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE) { |
3344 | legacy_footprint_entitlement_mode = LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE; |
3345 | } |
3346 | } |
3347 | STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, legacy_footprint_entitlement_mode_init); |
3348 | #else |
3349 | const int = LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE; |
3350 | #endif |
3351 | |
3352 | static inline void |
3353 | (proc_t p, task_t task) |
3354 | { |
3355 | #pragma unused(p) |
3356 | boolean_t ; |
3357 | |
3358 | switch (legacy_footprint_entitlement_mode) { |
3359 | case LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE: |
3360 | /* the entitlement is ignored */ |
3361 | break; |
3362 | case LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT: |
3363 | /* the entitlement grants iOS11 legacy accounting */ |
3364 | legacy_footprint_entitled = memorystatus_task_has_legacy_footprint_entitlement(task: proc_task(p)); |
3365 | if (legacy_footprint_entitled) { |
3366 | task_set_legacy_footprint(task); |
3367 | } |
3368 | break; |
3369 | case LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE: |
3370 | /* the entitlement grants a footprint limit increase */ |
3371 | legacy_footprint_entitled = memorystatus_task_has_legacy_footprint_entitlement(task: proc_task(p)); |
3372 | if (legacy_footprint_entitled) { |
3373 | task_set_extra_footprint_limit(task); |
3374 | } |
3375 | break; |
3376 | default: |
3377 | break; |
3378 | } |
3379 | } |
3380 | |
3381 | static inline void |
3382 | (proc_t p, task_t task) |
3383 | { |
3384 | #pragma unused(p) |
3385 | boolean_t ; |
3386 | |
3387 | /* the entitlement grants a footprint limit increase */ |
3388 | ios13extended_footprint_entitled = memorystatus_task_has_ios13extended_footprint_limit(task: proc_task(p)); |
3389 | if (ios13extended_footprint_entitled) { |
3390 | task_set_ios13extended_footprint_limit(task); |
3391 | } |
3392 | } |
3393 | |
3394 | static inline void |
3395 | proc_increased_memory_limit_entitled(proc_t p, task_t task) |
3396 | { |
3397 | bool entitled = memorystatus_task_has_increased_memory_limit_entitlement(task); |
3398 | |
3399 | if (entitled) { |
3400 | memorystatus_act_on_entitled_task_limit(p); |
3401 | } |
3402 | } |
3403 | |
3404 | /* |
3405 | * Check for any of the various entitlements that permit a higher |
3406 | * task footprint limit or alternate accounting and apply them. |
3407 | */ |
3408 | static inline void |
3409 | (proc_t p, task_t task) |
3410 | { |
3411 | proc_legacy_footprint_entitled(p, task); |
3412 | proc_ios13extended_footprint_entitled(p, task); |
3413 | proc_increased_memory_limit_entitled(p, task); |
3414 | } |
3415 | #endif /* __arm64__ */ |
3416 | |
3417 | /* |
3418 | * Processes with certain entitlements are granted a jumbo-size VM map. |
3419 | */ |
3420 | static inline void |
3421 | proc_apply_jit_and_vm_policies(struct image_params *imgp, proc_t p, task_t task) |
3422 | { |
3423 | #if CONFIG_MACF |
3424 | bool jit_entitled = false; |
3425 | #endif /* CONFIG_MACF */ |
3426 | bool needs_jumbo_va = false; |
3427 | struct _posix_spawnattr *psa = imgp->ip_px_sa; |
3428 | |
3429 | #if CONFIG_MACF |
3430 | jit_entitled = (mac_proc_check_map_anon(proc: p, cred: proc_ucred_unsafe(p), |
3431 | u_addr: 0, u_size: 0, prot: 0, MAP_JIT, NULL) == 0); |
3432 | needs_jumbo_va = jit_entitled || IOTaskHasEntitlement(task, |
3433 | entitlement: "com.apple.developer.kernel.extended-virtual-addressing" ) || |
3434 | memorystatus_task_has_increased_memory_limit_entitlement(task); |
3435 | #else |
3436 | #pragma unused(p) |
3437 | #endif /* CONFIG_MACF */ |
3438 | |
3439 | |
3440 | if (needs_jumbo_va) { |
3441 | vm_map_set_jumbo(map: get_task_map(task)); |
3442 | } |
3443 | |
3444 | if (psa && psa->psa_max_addr) { |
3445 | vm_map_set_max_addr(map: get_task_map(task), new_max_offset: psa->psa_max_addr); |
3446 | } |
3447 | |
3448 | #if CONFIG_MAP_RANGES |
3449 | if (task_is_hardened_binary(task) && !proc_is_simulated(p)) { |
3450 | /* |
3451 | * This must be done last as it needs to observe |
3452 | * any kind of VA space growth that was requested. |
3453 | * This is used by the secure allocator, so |
3454 | * must be applied to all hardened binaries |
3455 | */ |
3456 | vm_map_range_configure(get_task_map(task)); |
3457 | } |
3458 | #endif /* CONFIG_MAP_RANGES */ |
3459 | |
3460 | #if CONFIG_MACF |
3461 | if (jit_entitled) { |
3462 | vm_map_set_jit_entitled(map: get_task_map(task)); |
3463 | |
3464 | } |
3465 | #endif /* CONFIG_MACF */ |
3466 | |
3467 | #if XNU_TARGET_OS_OSX |
3468 | /* TPRO cannot be enforced on binaries that load 3P plugins on macos - rdar://107420220 */ |
3469 | const bool task_loads_3P_plugins = imgp->ip_flags & IMGPF_3P_PLUGINS; |
3470 | #endif /* XNU_TARGET_OS_OSX */ |
3471 | |
3472 | if (task_is_hardened_binary(task) |
3473 | #if XNU_TARGET_OS_OSX |
3474 | && !task_loads_3P_plugins |
3475 | #endif /* XNU_TARGET_OS_OSX */ |
3476 | ) { |
3477 | /* |
3478 | * Pre-emptively disable TPRO remapping for |
3479 | * hardened binaries (which do not load 3P plugins) |
3480 | */ |
3481 | vm_map_set_tpro_enforcement(map: get_task_map(task)); |
3482 | } |
3483 | } |
3484 | |
3485 | static int |
3486 | spawn_posix_cred_adopt(proc_t p, |
3487 | struct _posix_spawn_posix_cred_info *px_pcred_info) |
3488 | { |
3489 | int error = 0; |
3490 | |
3491 | if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GID) { |
3492 | struct setgid_args args = { |
3493 | .gid = px_pcred_info->pspci_gid, |
3494 | }; |
3495 | error = setgid(p, &args, NULL); |
3496 | if (error) { |
3497 | return error; |
3498 | } |
3499 | } |
3500 | |
3501 | if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GROUPS) { |
3502 | error = setgroups_internal(p, |
3503 | gidsetsize: px_pcred_info->pspci_ngroups, |
3504 | gidset: px_pcred_info->pspci_groups, |
3505 | gmuid: px_pcred_info->pspci_gmuid); |
3506 | if (error) { |
3507 | return error; |
3508 | } |
3509 | } |
3510 | |
3511 | if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_UID) { |
3512 | struct setuid_args args = { |
3513 | .uid = px_pcred_info->pspci_uid, |
3514 | }; |
3515 | error = setuid(p, &args, NULL); |
3516 | if (error) { |
3517 | return error; |
3518 | } |
3519 | } |
3520 | return 0; |
3521 | } |
3522 | |
3523 | /* |
3524 | * posix_spawn |
3525 | * |
3526 | * Parameters: uap->pid Pointer to pid return area |
3527 | * uap->fname File name to exec |
3528 | * uap->argp Argument list |
3529 | * uap->envp Environment list |
3530 | * |
3531 | * Returns: 0 Success |
3532 | * EINVAL Invalid argument |
3533 | * ENOTSUP Not supported |
3534 | * ENOEXEC Executable file format error |
3535 | * exec_activate_image:EINVAL Invalid argument |
3536 | * exec_activate_image:EACCES Permission denied |
3537 | * exec_activate_image:EINTR Interrupted function |
3538 | * exec_activate_image:ENOMEM Not enough space |
3539 | * exec_activate_image:EFAULT Bad address |
3540 | * exec_activate_image:ENAMETOOLONG Filename too long |
3541 | * exec_activate_image:ENOEXEC Executable file format error |
3542 | * exec_activate_image:ETXTBSY Text file busy [misuse of error code] |
3543 | * exec_activate_image:EAUTH Image decryption failed |
3544 | * exec_activate_image:EBADEXEC The executable is corrupt/unknown |
3545 | * exec_activate_image:??? |
3546 | * mac_execve_enter:??? |
3547 | * |
3548 | * TODO: Expect to need __mac_posix_spawn() at some point... |
3549 | * Handle posix_spawnattr_t |
3550 | * Handle posix_spawn_file_actions_t |
3551 | */ |
3552 | int |
3553 | posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) |
3554 | { |
3555 | proc_t p = ap; |
3556 | user_addr_t pid = uap->pid; |
3557 | int ival[2]; /* dummy retval for setpgid() */ |
3558 | char *subsystem_root_path = NULL; |
3559 | struct image_params *imgp = NULL; |
3560 | struct vnode_attr *vap = NULL; |
3561 | struct vnode_attr *origvap = NULL; |
3562 | struct uthread *uthread = 0; /* compiler complains if not set to 0*/ |
3563 | int error, sig; |
3564 | int is_64 = IS_64BIT_PROCESS(p); |
3565 | struct vfs_context context; |
3566 | struct user__posix_spawn_args_desc px_args = {}; |
3567 | struct _posix_spawnattr px_sa = {}; |
3568 | _posix_spawn_file_actions_t px_sfap = NULL; |
3569 | _posix_spawn_port_actions_t px_spap = NULL; |
3570 | struct __kern_sigaction vec; |
3571 | boolean_t spawn_no_exec = FALSE; |
3572 | boolean_t proc_transit_set = TRUE; |
3573 | boolean_t proc_signal_set = TRUE; |
3574 | boolean_t exec_done = FALSE; |
3575 | os_reason_t exec_failure_reason = NULL; |
3576 | |
3577 | struct exec_port_actions port_actions = { }; |
3578 | vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports); |
3579 | task_t old_task = current_task(); |
3580 | task_t new_task = NULL; |
3581 | boolean_t should_release_proc_ref = FALSE; |
3582 | void *inherit = NULL; |
3583 | uint8_t crash_behavior = 0; |
3584 | uint64_t crash_behavior_deadline = 0; |
3585 | #if CONFIG_EXCLAVES |
3586 | char *task_conclave_id = NULL; |
3587 | #endif |
3588 | #if CONFIG_PERSONAS |
3589 | struct _posix_spawn_persona_info *px_persona = NULL; |
3590 | #endif |
3591 | struct _posix_spawn_posix_cred_info *px_pcred_info = NULL; |
3592 | struct { |
3593 | struct image_params imgp; |
3594 | struct vnode_attr va; |
3595 | struct vnode_attr origva; |
3596 | } *__spawn_data; |
3597 | |
3598 | /* |
3599 | * Allocate a big chunk for locals instead of using stack since these |
3600 | * structures are pretty big. |
3601 | */ |
3602 | __spawn_data = kalloc_type(typeof(*__spawn_data), Z_WAITOK | Z_ZERO); |
3603 | if (__spawn_data == NULL) { |
3604 | error = ENOMEM; |
3605 | goto bad; |
3606 | } |
3607 | imgp = &__spawn_data->imgp; |
3608 | vap = &__spawn_data->va; |
3609 | origvap = &__spawn_data->origva; |
3610 | |
3611 | /* Initialize the common data in the image_params structure */ |
3612 | imgp->ip_user_fname = uap->path; |
3613 | imgp->ip_user_argv = uap->argv; |
3614 | imgp->ip_user_envv = uap->envp; |
3615 | imgp->ip_vattr = vap; |
3616 | imgp->ip_origvattr = origvap; |
3617 | imgp->ip_vfs_context = &context; |
3618 | imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE); |
3619 | imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); |
3620 | imgp->ip_mac_return = 0; |
3621 | imgp->ip_px_persona = NULL; |
3622 | imgp->ip_px_pcred_info = NULL; |
3623 | imgp->ip_cs_error = OS_REASON_NULL; |
3624 | imgp->ip_simulator_binary = IMGPF_SB_DEFAULT; |
3625 | imgp->ip_subsystem_root_path = NULL; |
3626 | imgp->ip_inherited_shared_region_id = NULL; |
3627 | imgp->ip_inherited_jop_pid = 0; |
3628 | uthread_set_exec_data(uth: current_uthread(), imgp); |
3629 | |
3630 | if (uap->adesc != USER_ADDR_NULL) { |
3631 | if (is_64) { |
3632 | error = copyin(uap->adesc, &px_args, sizeof(px_args)); |
3633 | } else { |
3634 | struct user32__posix_spawn_args_desc px_args32; |
3635 | |
3636 | error = copyin(uap->adesc, &px_args32, sizeof(px_args32)); |
3637 | |
3638 | /* |
3639 | * Convert arguments descriptor from external 32 bit |
3640 | * representation to internal 64 bit representation |
3641 | */ |
3642 | px_args.attr_size = px_args32.attr_size; |
3643 | px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp); |
3644 | px_args.file_actions_size = px_args32.file_actions_size; |
3645 | px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions); |
3646 | px_args.port_actions_size = px_args32.port_actions_size; |
3647 | px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions); |
3648 | px_args.mac_extensions_size = px_args32.mac_extensions_size; |
3649 | px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions); |
3650 | px_args.coal_info_size = px_args32.coal_info_size; |
3651 | px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info); |
3652 | px_args.persona_info_size = px_args32.persona_info_size; |
3653 | px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info); |
3654 | px_args.posix_cred_info_size = px_args32.posix_cred_info_size; |
3655 | px_args.posix_cred_info = CAST_USER_ADDR_T(px_args32.posix_cred_info); |
3656 | px_args.subsystem_root_path_size = px_args32.subsystem_root_path_size; |
3657 | px_args.subsystem_root_path = CAST_USER_ADDR_T(px_args32.subsystem_root_path); |
3658 | px_args.conclave_id_size = px_args32.conclave_id_size; |
3659 | px_args.conclave_id = CAST_USER_ADDR_T(px_args32.conclave_id); |
3660 | } |
3661 | if (error) { |
3662 | goto bad; |
3663 | } |
3664 | |
3665 | if (px_args.attr_size != 0) { |
3666 | /* |
3667 | * We are not copying the port_actions pointer, |
3668 | * because we already have it from px_args. |
3669 | * This is a bit fragile: <rdar://problem/16427422> |
3670 | */ |
3671 | |
3672 | if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset)) != 0) { |
3673 | goto bad; |
3674 | } |
3675 | |
3676 | imgp->ip_px_sa = &px_sa; |
3677 | } |
3678 | if (px_args.file_actions_size != 0) { |
3679 | /* Limit file_actions to allowed number of open files */ |
3680 | size_t maxfa_size = PSF_ACTIONS_SIZE(proc_limitgetcur_nofile(p)); |
3681 | |
3682 | if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) || |
3683 | maxfa_size == 0 || px_args.file_actions_size > maxfa_size) { |
3684 | error = EINVAL; |
3685 | goto bad; |
3686 | } |
3687 | |
3688 | px_sfap = kalloc_data(px_args.file_actions_size, Z_WAITOK); |
3689 | if (px_sfap == NULL) { |
3690 | error = ENOMEM; |
3691 | goto bad; |
3692 | } |
3693 | imgp->ip_px_sfa = px_sfap; |
3694 | |
3695 | if ((error = copyin(px_args.file_actions, px_sfap, |
3696 | px_args.file_actions_size)) != 0) { |
3697 | goto bad; |
3698 | } |
3699 | |
3700 | /* Verify that the action count matches the struct size */ |
3701 | size_t psfsize = PSF_ACTIONS_SIZE(px_sfap->psfa_act_count); |
3702 | if (psfsize == 0 || psfsize != px_args.file_actions_size) { |
3703 | error = EINVAL; |
3704 | goto bad; |
3705 | } |
3706 | } |
3707 | if (px_args.port_actions_size != 0) { |
3708 | /* Limit port_actions to one page of data */ |
3709 | if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) || |
3710 | px_args.port_actions_size > PAGE_SIZE) { |
3711 | error = EINVAL; |
3712 | goto bad; |
3713 | } |
3714 | |
3715 | px_spap = kalloc_data(px_args.port_actions_size, Z_WAITOK); |
3716 | if (px_spap == NULL) { |
3717 | error = ENOMEM; |
3718 | goto bad; |
3719 | } |
3720 | imgp->ip_px_spa = px_spap; |
3721 | |
3722 | if ((error = copyin(px_args.port_actions, px_spap, |
3723 | px_args.port_actions_size)) != 0) { |
3724 | goto bad; |
3725 | } |
3726 | |
3727 | /* Verify that the action count matches the struct size */ |
3728 | size_t pasize = PS_PORT_ACTIONS_SIZE(px_spap->pspa_count); |
3729 | if (pasize == 0 || pasize != px_args.port_actions_size) { |
3730 | error = EINVAL; |
3731 | goto bad; |
3732 | } |
3733 | } |
3734 | #if CONFIG_PERSONAS |
3735 | /* copy in the persona info */ |
3736 | if (px_args.persona_info_size != 0 && px_args.persona_info != 0) { |
3737 | /* for now, we need the exact same struct in user space */ |
3738 | if (px_args.persona_info_size != sizeof(*px_persona)) { |
3739 | error = ERANGE; |
3740 | goto bad; |
3741 | } |
3742 | |
3743 | px_persona = kalloc_data(px_args.persona_info_size, Z_WAITOK); |
3744 | if (px_persona == NULL) { |
3745 | error = ENOMEM; |
3746 | goto bad; |
3747 | } |
3748 | imgp->ip_px_persona = px_persona; |
3749 | |
3750 | if ((error = copyin(px_args.persona_info, px_persona, |
3751 | px_args.persona_info_size)) != 0) { |
3752 | goto bad; |
3753 | } |
3754 | if ((error = spawn_validate_persona(px_persona)) != 0) { |
3755 | goto bad; |
3756 | } |
3757 | } |
3758 | #endif |
3759 | /* copy in the posix cred info */ |
3760 | if (px_args.posix_cred_info_size != 0 && px_args.posix_cred_info != 0) { |
3761 | /* for now, we need the exact same struct in user space */ |
3762 | if (px_args.posix_cred_info_size != sizeof(*px_pcred_info)) { |
3763 | error = ERANGE; |
3764 | goto bad; |
3765 | } |
3766 | |
3767 | if (!kauth_cred_issuser(cred: kauth_cred_get())) { |
3768 | error = EPERM; |
3769 | goto bad; |
3770 | } |
3771 | |
3772 | px_pcred_info = kalloc_data(px_args.posix_cred_info_size, Z_WAITOK); |
3773 | if (px_pcred_info == NULL) { |
3774 | error = ENOMEM; |
3775 | goto bad; |
3776 | } |
3777 | imgp->ip_px_pcred_info = px_pcred_info; |
3778 | |
3779 | if ((error = copyin(px_args.posix_cred_info, px_pcred_info, |
3780 | px_args.posix_cred_info_size)) != 0) { |
3781 | goto bad; |
3782 | } |
3783 | |
3784 | if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GROUPS) { |
3785 | if (px_pcred_info->pspci_ngroups > NGROUPS_MAX) { |
3786 | error = EINVAL; |
3787 | goto bad; |
3788 | } |
3789 | } |
3790 | } |
3791 | #if CONFIG_MACF |
3792 | if (px_args.mac_extensions_size != 0) { |
3793 | if ((error = spawn_copyin_macpolicyinfo(px_args: &px_args, pxsp: (struct ip_px_smpx_s *)&imgp->ip_px_smpx)) != 0) { |
3794 | goto bad; |
3795 | } |
3796 | } |
3797 | #endif /* CONFIG_MACF */ |
3798 | if ((px_args.subsystem_root_path_size > 0) && (px_args.subsystem_root_path_size <= MAXPATHLEN)) { |
3799 | /* |
3800 | * If a valid-looking subsystem root has been |
3801 | * specified... |
3802 | */ |
3803 | if (IOTaskHasEntitlement(task: old_task, SPAWN_SUBSYSTEM_ROOT_ENTITLEMENT)) { |
3804 | /* |
3805 | * ...AND the parent has the entitlement, copy |
3806 | * the subsystem root path in. |
3807 | */ |
3808 | subsystem_root_path = zalloc_flags(ZV_NAMEI, |
3809 | Z_WAITOK | Z_ZERO | Z_NOFAIL); |
3810 | |
3811 | if ((error = copyin(px_args.subsystem_root_path, subsystem_root_path, px_args.subsystem_root_path_size))) { |
3812 | goto bad; |
3813 | } |
3814 | |
3815 | /* Paranoia */ |
3816 | subsystem_root_path[px_args.subsystem_root_path_size - 1] = 0; |
3817 | } |
3818 | } |
3819 | #if CONFIG_EXCLAVES |
3820 | if ((px_args.conclave_id_size > 0) && (px_args.conclave_id_size <= MAXCONCLAVENAME) && |
3821 | (exclaves_get_status() == EXCLAVES_STATUS_AVAILABLE)) { |
3822 | if (px_args.conclave_id) { |
3823 | if (imgp->ip_px_sa != NULL && (px_sa.psa_flags & POSIX_SPAWN_SETEXEC)) { |
3824 | /* Conclave id could be set only for true spawn */ |
3825 | error = EINVAL; |
3826 | goto bad; |
3827 | } |
3828 | task_conclave_id = kalloc_data(MAXCONCLAVENAME, |
3829 | Z_WAITOK | Z_ZERO | Z_NOFAIL); |
3830 | if ((error = copyin(px_args.conclave_id, task_conclave_id, MAXCONCLAVENAME))) { |
3831 | goto bad; |
3832 | } |
3833 | task_conclave_id[MAXCONCLAVENAME - 1] = 0; |
3834 | } |
3835 | } |
3836 | #endif |
3837 | } |
3838 | |
3839 | if (IOTaskHasEntitlement(task: old_task, SPAWN_SET_PANIC_CRASH_BEHAVIOR)) { |
3840 | /* Truncate to uint8_t since we only support 2 flags for now */ |
3841 | crash_behavior = (uint8_t)px_sa.psa_crash_behavior; |
3842 | crash_behavior_deadline = px_sa.psa_crash_behavior_deadline; |
3843 | } |
3844 | |
3845 | /* set uthread to parent */ |
3846 | uthread = current_uthread(); |
3847 | |
3848 | /* |
3849 | * <rdar://6640530>; this does not result in a behaviour change |
3850 | * relative to Leopard, so there should not be any existing code |
3851 | * which depends on it. |
3852 | */ |
3853 | |
3854 | if (imgp->ip_px_sa != NULL) { |
3855 | struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
3856 | if ((psa->psa_options & PSA_OPTION_PLUGIN_HOST_DISABLE_A_KEYS) == PSA_OPTION_PLUGIN_HOST_DISABLE_A_KEYS) { |
3857 | imgp->ip_flags |= IMGPF_PLUGIN_HOST_DISABLE_A_KEYS; |
3858 | } |
3859 | #if (DEVELOPMENT || DEBUG) |
3860 | if ((psa->psa_options & PSA_OPTION_ALT_ROSETTA) == PSA_OPTION_ALT_ROSETTA) { |
3861 | imgp->ip_flags |= (IMGPF_ROSETTA | IMGPF_ALT_ROSETTA); |
3862 | } |
3863 | #endif |
3864 | |
3865 | if ((error = exec_validate_spawnattr_policy(psa_apptype: psa->psa_apptype)) != 0) { |
3866 | goto bad; |
3867 | } |
3868 | } |
3869 | |
3870 | /* |
3871 | * If we don't have the extension flag that turns "posix_spawn()" |
3872 | * into "execve() with options", then we will be creating a new |
3873 | * process which does not inherit memory from the parent process, |
3874 | * which is one of the most expensive things about using fork() |
3875 | * and execve(). |
3876 | */ |
3877 | if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)) { |
3878 | /* Set the new task's coalition, if it is requested. */ |
3879 | coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL }; |
3880 | #if CONFIG_COALITIONS |
3881 | int i, ncoals; |
3882 | kern_return_t kr = KERN_SUCCESS; |
3883 | struct _posix_spawn_coalition_info coal_info; |
3884 | int coal_role[COALITION_NUM_TYPES]; |
3885 | |
3886 | if (imgp->ip_px_sa == NULL || !px_args.coal_info) { |
3887 | goto do_fork1; |
3888 | } |
3889 | |
3890 | memset(s: &coal_info, c: 0, n: sizeof(coal_info)); |
3891 | |
3892 | if (px_args.coal_info_size > sizeof(coal_info)) { |
3893 | px_args.coal_info_size = sizeof(coal_info); |
3894 | } |
3895 | error = copyin(px_args.coal_info, |
3896 | &coal_info, px_args.coal_info_size); |
3897 | if (error != 0) { |
3898 | goto bad; |
3899 | } |
3900 | |
3901 | ncoals = 0; |
3902 | for (i = 0; i < COALITION_NUM_TYPES; i++) { |
3903 | uint64_t cid = coal_info.psci_info[i].psci_id; |
3904 | if (cid != 0) { |
3905 | /* |
3906 | * don't allow tasks which are not in a |
3907 | * privileged coalition to spawn processes |
3908 | * into coalitions other than their own |
3909 | */ |
3910 | if (!task_is_in_privileged_coalition(task: proc_task(p), type: i) && |
3911 | !IOTaskHasEntitlement(task: proc_task(p), COALITION_SPAWN_ENTITLEMENT)) { |
3912 | coal_dbg("ERROR: %d not in privilegd " |
3913 | "coalition of type %d" , |
3914 | proc_getpid(p), i); |
3915 | spawn_coalitions_release_all(coal); |
3916 | error = EPERM; |
3917 | goto bad; |
3918 | } |
3919 | |
3920 | coal_dbg("searching for coalition id:%llu" , cid); |
3921 | /* |
3922 | * take a reference and activation on the |
3923 | * coalition to guard against free-while-spawn |
3924 | * races |
3925 | */ |
3926 | coal[i] = coalition_find_and_activate_by_id(coal_id: cid); |
3927 | if (coal[i] == COALITION_NULL) { |
3928 | coal_dbg("could not find coalition id:%llu " |
3929 | "(perhaps it has been terminated or reaped)" , cid); |
3930 | /* |
3931 | * release any other coalition's we |
3932 | * may have a reference to |
3933 | */ |
3934 | spawn_coalitions_release_all(coal); |
3935 | error = ESRCH; |
3936 | goto bad; |
3937 | } |
3938 | if (coalition_type(coal: coal[i]) != i) { |
3939 | coal_dbg("coalition with id:%lld is not of type:%d" |
3940 | " (it's type:%d)" , cid, i, coalition_type(coal[i])); |
3941 | spawn_coalitions_release_all(coal); |
3942 | error = ESRCH; |
3943 | goto bad; |
3944 | } |
3945 | coal_role[i] = coal_info.psci_info[i].psci_role; |
3946 | ncoals++; |
3947 | } |
3948 | } |
3949 | if (ncoals < COALITION_NUM_TYPES) { |
3950 | /* |
3951 | * If the user is attempting to spawn into a subset of |
3952 | * the known coalition types, then make sure they have |
3953 | * _at_least_ specified a resource coalition. If not, |
3954 | * the following fork1() call will implicitly force an |
3955 | * inheritance from 'p' and won't actually spawn the |
3956 | * new task into the coalitions the user specified. |
3957 | * (also the call to coalitions_set_roles will panic) |
3958 | */ |
3959 | if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) { |
3960 | spawn_coalitions_release_all(coal); |
3961 | error = EINVAL; |
3962 | goto bad; |
3963 | } |
3964 | } |
3965 | do_fork1: |
3966 | #endif /* CONFIG_COALITIONS */ |
3967 | |
3968 | /* |
3969 | * note that this will implicitly inherit the |
3970 | * caller's persona (if it exists) |
3971 | */ |
3972 | error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal); |
3973 | /* returns a thread and task reference */ |
3974 | |
3975 | if (error == 0) { |
3976 | new_task = get_threadtask(imgp->ip_new_thread); |
3977 | } |
3978 | #if CONFIG_COALITIONS |
3979 | /* set the roles of this task within each given coalition */ |
3980 | if (error == 0) { |
3981 | kr = coalitions_set_roles(coalitions: coal, task: new_task, roles: coal_role); |
3982 | if (kr != KERN_SUCCESS) { |
3983 | error = EINVAL; |
3984 | } |
3985 | if (kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_COALITION, |
3986 | MACH_COALITION_ADOPT))) { |
3987 | for (i = 0; i < COALITION_NUM_TYPES; i++) { |
3988 | if (coal[i] != COALITION_NULL) { |
3989 | /* |
3990 | * On 32-bit targets, uniqueid |
3991 | * will get truncated to 32 bits |
3992 | */ |
3993 | KDBG_RELEASE(MACHDBG_CODE( |
3994 | DBG_MACH_COALITION, |
3995 | MACH_COALITION_ADOPT), |
3996 | coalition_id(coal[i]), |
3997 | get_task_uniqueid(new_task)); |
3998 | } |
3999 | } |
4000 | } |
4001 | } |
4002 | |
4003 | /* drop our references and activations - fork1() now holds them */ |
4004 | spawn_coalitions_release_all(coal); |
4005 | #endif /* CONFIG_COALITIONS */ |
4006 | if (error != 0) { |
4007 | goto bad; |
4008 | } |
4009 | imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */ |
4010 | spawn_no_exec = TRUE; /* used in later tests */ |
4011 | } else { |
4012 | /* Adjust the user proc count */ |
4013 | (void)chgproccnt(uid: kauth_getruid(), diff: 1); |
4014 | /* |
4015 | * For execve case, create a new proc, task and thread |
4016 | * but don't make the proc visible to userland. After |
4017 | * image activation, the new proc would take place of |
4018 | * the old proc in pid hash and other lists that make |
4019 | * the proc visible to the system. |
4020 | */ |
4021 | imgp->ip_new_thread = cloneproc(old_task, NULL, p, CLONEPROC_EXEC); |
4022 | |
4023 | /* task and thread ref returned by cloneproc */ |
4024 | if (imgp->ip_new_thread == NULL) { |
4025 | (void)chgproccnt(uid: kauth_getruid(), diff: -1); |
4026 | error = ENOMEM; |
4027 | goto bad; |
4028 | } |
4029 | |
4030 | new_task = get_threadtask(imgp->ip_new_thread); |
4031 | imgp->ip_flags |= IMGPF_EXEC; |
4032 | } |
4033 | |
4034 | p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread); |
4035 | |
4036 | if (spawn_no_exec) { |
4037 | /* |
4038 | * We had to wait until this point before firing the |
4039 | * proc:::create probe, otherwise p would not point to the |
4040 | * child process. |
4041 | */ |
4042 | DTRACE_PROC1(create, proc_t, p); |
4043 | } |
4044 | assert(p != NULL); |
4045 | |
4046 | if (subsystem_root_path) { |
4047 | /* If a subsystem root was specified, swap it in */ |
4048 | char * old_subsystem_root_path = p->p_subsystem_root_path; |
4049 | p->p_subsystem_root_path = subsystem_root_path; |
4050 | subsystem_root_path = old_subsystem_root_path; |
4051 | } |
4052 | |
4053 | p->p_crash_behavior = crash_behavior; |
4054 | p->p_crash_behavior_deadline = crash_behavior_deadline; |
4055 | |
4056 | p->p_crash_count = px_sa.psa_crash_count; |
4057 | p->p_throttle_timeout = px_sa.psa_throttle_timeout; |
4058 | |
4059 | /* We'll need the subsystem root for setting up Apple strings */ |
4060 | imgp->ip_subsystem_root_path = p->p_subsystem_root_path; |
4061 | |
4062 | context.vc_thread = imgp->ip_new_thread; |
4063 | context.vc_ucred = proc_ucred_unsafe(p); /* in init */ |
4064 | |
4065 | /* |
4066 | * Post fdt_fork(), pre exec_handle_sugid() - this is where we want |
4067 | * to handle the file_actions. |
4068 | */ |
4069 | |
4070 | /* Has spawn file actions? */ |
4071 | if (imgp->ip_px_sfa != NULL) { |
4072 | /* |
4073 | * The POSIX_SPAWN_CLOEXEC_DEFAULT flag |
4074 | * is handled in exec_handle_file_actions(). |
4075 | */ |
4076 | #if CONFIG_AUDIT |
4077 | /* |
4078 | * The file actions auditing can overwrite the upath of |
4079 | * AUE_POSIX_SPAWN audit record. Save the audit record. |
4080 | */ |
4081 | struct kaudit_record *save_uu_ar = uthread->uu_ar; |
4082 | uthread->uu_ar = NULL; |
4083 | #endif |
4084 | error = exec_handle_file_actions(imgp, |
4085 | psa_flags: imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0); |
4086 | #if CONFIG_AUDIT |
4087 | /* Restore the AUE_POSIX_SPAWN audit record. */ |
4088 | uthread->uu_ar = save_uu_ar; |
4089 | #endif |
4090 | if (error != 0) { |
4091 | goto bad; |
4092 | } |
4093 | } |
4094 | |
4095 | /* Has spawn port actions? */ |
4096 | if (imgp->ip_px_spa != NULL) { |
4097 | #if CONFIG_AUDIT |
4098 | /* |
4099 | * Do the same for the port actions as we did for the file |
4100 | * actions. Save the AUE_POSIX_SPAWN audit record. |
4101 | */ |
4102 | struct kaudit_record *save_uu_ar = uthread->uu_ar; |
4103 | uthread->uu_ar = NULL; |
4104 | #endif |
4105 | error = exec_handle_port_actions(imgp, actions: &port_actions); |
4106 | #if CONFIG_AUDIT |
4107 | /* Restore the AUE_POSIX_SPAWN audit record. */ |
4108 | uthread->uu_ar = save_uu_ar; |
4109 | #endif |
4110 | if (error != 0) { |
4111 | goto bad; |
4112 | } |
4113 | } |
4114 | |
4115 | /* Has spawn attr? */ |
4116 | if (imgp->ip_px_sa != NULL) { |
4117 | /* |
4118 | * Reset UID/GID to parent's RUID/RGID; This works only |
4119 | * because the operation occurs before the call |
4120 | * to exec_handle_sugid() by the image activator called |
4121 | * from exec_activate_image(). |
4122 | * |
4123 | * POSIX requires that any setuid/setgid bits on the process |
4124 | * image will take precedence over the spawn attributes |
4125 | * (re)setting them. |
4126 | * |
4127 | * Modifications to p_ucred must be guarded using the |
4128 | * proc's ucred lock. This prevents others from accessing |
4129 | * a garbage credential. |
4130 | */ |
4131 | if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) { |
4132 | kauth_cred_proc_update(p, action: PROC_SETTOKEN_NONE, |
4133 | fn: ^bool (kauth_cred_t parent __unused, kauth_cred_t model){ |
4134 | return kauth_cred_model_setuidgid(model, |
4135 | uid: kauth_cred_getruid(cred: parent), |
4136 | gid: kauth_cred_getrgid(cred: parent)); |
4137 | }); |
4138 | } |
4139 | |
4140 | if (imgp->ip_px_pcred_info) { |
4141 | if (!spawn_no_exec) { |
4142 | error = ENOTSUP; |
4143 | goto bad; |
4144 | } |
4145 | |
4146 | error = spawn_posix_cred_adopt(p, px_pcred_info: imgp->ip_px_pcred_info); |
4147 | if (error != 0) { |
4148 | goto bad; |
4149 | } |
4150 | } |
4151 | |
4152 | #if CONFIG_PERSONAS |
4153 | if (imgp->ip_px_persona != NULL) { |
4154 | if (!spawn_no_exec) { |
4155 | error = ENOTSUP; |
4156 | goto bad; |
4157 | } |
4158 | |
4159 | /* |
4160 | * If we were asked to spawn a process into a new persona, |
4161 | * do the credential switch now (which may override the UID/GID |
4162 | * inherit done just above). It's important to do this switch |
4163 | * before image activation both for reasons stated above, and |
4164 | * to ensure that the new persona has access to the image/file |
4165 | * being executed. |
4166 | */ |
4167 | error = spawn_persona_adopt(p, px_persona: imgp->ip_px_persona); |
4168 | if (error != 0) { |
4169 | goto bad; |
4170 | } |
4171 | } |
4172 | #endif /* CONFIG_PERSONAS */ |
4173 | #if !SECURE_KERNEL |
4174 | /* |
4175 | * Disable ASLR for the spawned process. |
4176 | * |
4177 | * But only do so if we are not embedded + RELEASE. |
4178 | * While embedded allows for a boot-arg (-disable_aslr) |
4179 | * to deal with this (which itself is only honored on |
4180 | * DEVELOPMENT or DEBUG builds of xnu), it is often |
4181 | * useful or necessary to disable ASLR on a per-process |
4182 | * basis for unit testing and debugging. |
4183 | */ |
4184 | if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR) { |
4185 | OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag); |
4186 | } |
4187 | #endif /* !SECURE_KERNEL */ |
4188 | |
4189 | /* Randomize high bits of ASLR slide */ |
4190 | if (px_sa.psa_flags & _POSIX_SPAWN_HIGH_BITS_ASLR) { |
4191 | imgp->ip_flags |= IMGPF_HIGH_BITS_ASLR; |
4192 | } |
4193 | |
4194 | #if !SECURE_KERNEL |
4195 | /* |
4196 | * Forcibly disallow execution from data pages for the spawned process |
4197 | * even if it would otherwise be permitted by the architecture default. |
4198 | */ |
4199 | if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC) { |
4200 | imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC; |
4201 | } |
4202 | #endif /* !SECURE_KERNEL */ |
4203 | |
4204 | #if __has_feature(ptrauth_calls) |
4205 | if (vm_shared_region_reslide_aslr && is_64 && (px_sa.psa_flags & _POSIX_SPAWN_RESLIDE)) { |
4206 | imgp->ip_flags |= IMGPF_RESLIDE; |
4207 | } |
4208 | #endif /* __has_feature(ptrauth_calls) */ |
4209 | |
4210 | if ((px_sa.psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) == |
4211 | POSIX_SPAWN_PROC_TYPE_DRIVER) { |
4212 | imgp->ip_flags |= IMGPF_DRIVER; |
4213 | } |
4214 | } |
4215 | |
4216 | /* |
4217 | * Disable ASLR during image activation. This occurs either if the |
4218 | * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if |
4219 | * P_DISABLE_ASLR was inherited from the parent process. |
4220 | */ |
4221 | if (p->p_flag & P_DISABLE_ASLR) { |
4222 | imgp->ip_flags |= IMGPF_DISABLE_ASLR; |
4223 | } |
4224 | |
4225 | /* |
4226 | * Clear transition flag so we won't hang if exec_activate_image() causes |
4227 | * an automount (and launchd does a proc sysctl to service it). |
4228 | * |
4229 | * <rdar://problem/6848672>, <rdar://problem/5959568>. |
4230 | */ |
4231 | proc_transend(p, locked: 0); |
4232 | proc_transit_set = 0; |
4233 | |
4234 | if (!spawn_no_exec) { |
4235 | /* |
4236 | * Clear the signal lock in case of exec, since |
4237 | * image activation uses psignal on child process. |
4238 | */ |
4239 | proc_signalend(p, locked: 0); |
4240 | proc_signal_set = 0; |
4241 | } |
4242 | |
4243 | #if MAC_SPAWN /* XXX */ |
4244 | if (uap->mac_p != USER_ADDR_NULL) { |
4245 | error = mac_execve_enter(uap->mac_p, imgp); |
4246 | if (error) { |
4247 | goto bad; |
4248 | } |
4249 | } |
4250 | #endif |
4251 | |
4252 | |
4253 | /* |
4254 | * Activate the image. |
4255 | * Warning: If activation failed after point of no return, it returns error |
4256 | * as 0 and pretends the call succeeded. |
4257 | */ |
4258 | error = exec_activate_image(imgp); |
4259 | #if defined(HAS_APPLE_PAC) |
4260 | const uint8_t disable_user_jop = imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE; |
4261 | ml_task_set_jop_pid_from_shared_region(task: new_task, disable_user_jop); |
4262 | ml_task_set_disable_user_jop(task: new_task, disable_user_jop); |
4263 | ml_thread_set_disable_user_jop(thread: imgp->ip_new_thread, disable_user_jop); |
4264 | ml_thread_set_jop_pid(thread: imgp->ip_new_thread, task: new_task); |
4265 | #endif |
4266 | |
4267 | |
4268 | /* |
4269 | * If you've come here to add support for some new HW feature or some per-process or per-vmmap |
4270 | * or per-pmap flag that needs to be set before the process runs, or are in general lost, here |
4271 | * is some help. This summary was accurate as of Jul 2022. Use git log as needed. This comment |
4272 | * is here to prevent a recurrence of rdar://96307913 |
4273 | * |
4274 | * In posix_spawn, following is what happens: |
4275 | * 1. Lots of prep and checking work |
4276 | * 2. Image activation via exec_activate_image(). The new task will get a new pmap here |
4277 | * 3. More prep work. (YOU ARE HERE) |
4278 | * 4. exec_resettextvp() is called |
4279 | * 5. At this point it is safe to check entitlements and code signatures |
4280 | * 6. task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT); |
4281 | * The new thread is allowed to run in kernel. It cannot yet get to userland |
4282 | * 7. More things done here. This is your chance to affect the task before it runs in |
4283 | * userspace |
4284 | * 8. task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_FINAL_WAIT); |
4285 | * The new thread is allowed to run in userland |
4286 | */ |
4287 | |
4288 | if (error == 0 && !spawn_no_exec) { |
4289 | p = proc_exec_switch_task(old_proc: current_proc(), new_proc: p, old_task, new_task, imgp, inherit: &inherit); |
4290 | /* proc ref returned */ |
4291 | should_release_proc_ref = TRUE; |
4292 | } |
4293 | |
4294 | if (error == 0) { |
4295 | /* process completed the exec, but may have failed after point of no return */ |
4296 | exec_done = TRUE; |
4297 | } |
4298 | |
4299 | #if CONFIG_EXCLAVES |
4300 | if (!error && task_conclave_id != NULL) { |
4301 | kern_return_t kr; |
4302 | kr = task_add_conclave(new_task, imgp->ip_vp, (int64_t)imgp->ip_arch_offset, |
4303 | task_conclave_id); |
4304 | if (kr != KERN_SUCCESS) { |
4305 | error = EINVAL; |
4306 | goto bad; |
4307 | } |
4308 | } |
4309 | #endif |
4310 | |
4311 | if (!error && imgp->ip_px_sa != NULL) { |
4312 | thread_t child_thread = imgp->ip_new_thread; |
4313 | uthread_t child_uthread = get_bsdthread_info(child_thread); |
4314 | |
4315 | /* |
4316 | * Because of POSIX_SPAWN_SETEXEC, we need to handle this after image |
4317 | * activation, else when image activation fails (before the point of no |
4318 | * return) would leave the parent process in a modified state. |
4319 | */ |
4320 | if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) { |
4321 | struct setpgid_args spga; |
4322 | spga.pid = proc_getpid(p); |
4323 | spga.pgid = px_sa.psa_pgroup; |
4324 | /* |
4325 | * Effectively, call setpgid() system call; works |
4326 | * because there are no pointer arguments. |
4327 | */ |
4328 | if ((error = setpgid(p, &spga, ival)) != 0) { |
4329 | goto bad_px_sa; |
4330 | } |
4331 | } |
4332 | |
4333 | if (px_sa.psa_flags & POSIX_SPAWN_SETSID) { |
4334 | error = setsid_internal(p); |
4335 | if (error != 0) { |
4336 | goto bad_px_sa; |
4337 | } |
4338 | } |
4339 | |
4340 | /* |
4341 | * If we have a spawn attr, and it contains signal related flags, |
4342 | * the we need to process them in the "context" of the new child |
4343 | * process, so we have to process it following image activation, |
4344 | * prior to making the thread runnable in user space. This is |
4345 | * necessitated by some signal information being per-thread rather |
4346 | * than per-process, and we don't have the new allocation in hand |
4347 | * until after the image is activated. |
4348 | */ |
4349 | |
4350 | /* |
4351 | * Mask a list of signals, instead of them being unmasked, if |
4352 | * they were unmasked in the parent; note that some signals |
4353 | * are not maskable. |
4354 | */ |
4355 | if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK) { |
4356 | child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask); |
4357 | } |
4358 | /* |
4359 | * Default a list of signals instead of ignoring them, if |
4360 | * they were ignored in the parent. Note that we pass |
4361 | * spawn_no_exec to setsigvec() to indicate that we called |
4362 | * fork1() and therefore do not need to call proc_signalstart() |
4363 | * internally. |
4364 | */ |
4365 | if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) { |
4366 | vec.sa_handler = SIG_DFL; |
4367 | vec.sa_tramp = 0; |
4368 | vec.sa_mask = 0; |
4369 | vec.sa_flags = 0; |
4370 | for (sig = 1; sig < NSIG; sig++) { |
4371 | if (px_sa.psa_sigdefault & (1 << (sig - 1))) { |
4372 | error = setsigvec(p, child_thread, signum: sig, &vec, in_sigstart: spawn_no_exec); |
4373 | } |
4374 | } |
4375 | } |
4376 | |
4377 | /* |
4378 | * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU |
4379 | * usage limit, which will generate a resource exceeded exception if any one thread exceeds the |
4380 | * limit. |
4381 | * |
4382 | * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds. |
4383 | */ |
4384 | if ((px_sa.psa_cpumonitor_percent != 0) && (px_sa.psa_cpumonitor_percent < UINT8_MAX)) { |
4385 | /* |
4386 | * Always treat a CPU monitor activation coming from spawn as entitled. Requiring |
4387 | * an entitlement to configure the monitor a certain way seems silly, since |
4388 | * whomever is turning it on could just as easily choose not to do so. |
4389 | */ |
4390 | error = proc_set_task_ruse_cpu(task: proc_task(p), |
4391 | TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC, |
4392 | percentage: (uint8_t)px_sa.psa_cpumonitor_percent, |
4393 | interval: px_sa.psa_cpumonitor_interval * NSEC_PER_SEC, |
4394 | deadline: 0, TRUE); |
4395 | } |
4396 | |
4397 | |
4398 | if (px_pcred_info && |
4399 | (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_LOGIN)) { |
4400 | /* |
4401 | * setlogin() must happen after setsid() |
4402 | */ |
4403 | setlogin_internal(p, login: px_pcred_info->pspci_login); |
4404 | } |
4405 | |
4406 | bad_px_sa: |
4407 | if (error != 0) { |
4408 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
4409 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_PSATTR, 0, 0); |
4410 | exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_PSATTR); |
4411 | } |
4412 | } |
4413 | |
4414 | bad: |
4415 | |
4416 | if (error == 0) { |
4417 | /* reset delay idle sleep status if set */ |
4418 | #if CONFIG_DELAY_IDLE_SLEEP |
4419 | if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) { |
4420 | OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag); |
4421 | } |
4422 | #endif /* CONFIG_DELAY_IDLE_SLEEP */ |
4423 | /* upon successful spawn, re/set the proc control state */ |
4424 | if (imgp->ip_px_sa != NULL) { |
4425 | switch (px_sa.psa_pcontrol) { |
4426 | case POSIX_SPAWN_PCONTROL_THROTTLE: |
4427 | p->p_pcaction = P_PCTHROTTLE; |
4428 | break; |
4429 | case POSIX_SPAWN_PCONTROL_SUSPEND: |
4430 | p->p_pcaction = P_PCSUSP; |
4431 | break; |
4432 | case POSIX_SPAWN_PCONTROL_KILL: |
4433 | p->p_pcaction = P_PCKILL; |
4434 | break; |
4435 | case POSIX_SPAWN_PCONTROL_NONE: |
4436 | default: |
4437 | p->p_pcaction = 0; |
4438 | break; |
4439 | } |
4440 | ; |
4441 | } |
4442 | exec_resettextvp(p, imgp); |
4443 | |
4444 | /* |
4445 | * Enable new task IPC access if exec_activate_image() returned an |
4446 | * active task. (Checks active bit in ipc_task_enable() under lock). |
4447 | * Must enable after resettextvp so that task port policies are not evaluated |
4448 | * until the csblob in the textvp is accurately reflected. |
4449 | */ |
4450 | ipc_task_enable(task: new_task); |
4451 | |
4452 | /* Set task exception ports now that we can check entitlements */ |
4453 | if (imgp->ip_px_spa != NULL) { |
4454 | error = exec_handle_exception_port_actions(imgp, actions: &port_actions); |
4455 | } |
4456 | |
4457 | #if CONFIG_MEMORYSTATUS |
4458 | /* Set jetsam priority for DriverKit processes */ |
4459 | if (px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DRIVER) { |
4460 | px_sa.psa_priority = JETSAM_PRIORITY_DRIVER_APPLE; |
4461 | } |
4462 | |
4463 | /* Has jetsam attributes? */ |
4464 | if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) { |
4465 | /* |
4466 | * With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no |
4467 | * longer relevant, as background limits are described via the inactive limit slots. |
4468 | * |
4469 | * That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in, |
4470 | * we attempt to mimic previous behavior by forcing the BG limit data into the |
4471 | * inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode. |
4472 | */ |
4473 | |
4474 | if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) { |
4475 | memorystatus_update(p, priority: px_sa.psa_priority, user_data: 0, FALSE, /* assertion priority */ |
4476 | effective: (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), |
4477 | TRUE, |
4478 | memlimit_active: -1, TRUE, |
4479 | memlimit_inactive: px_sa.psa_memlimit_inactive, FALSE); |
4480 | } else { |
4481 | memorystatus_update(p, priority: px_sa.psa_priority, user_data: 0, FALSE, /* assertion priority */ |
4482 | effective: (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), |
4483 | TRUE, |
4484 | memlimit_active: px_sa.psa_memlimit_active, |
4485 | memlimit_active_is_fatal: (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL), |
4486 | memlimit_inactive: px_sa.psa_memlimit_inactive, |
4487 | memlimit_inactive_is_fatal: (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL)); |
4488 | } |
4489 | } |
4490 | |
4491 | /* Has jetsam relaunch behavior? */ |
4492 | if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MASK)) { |
4493 | /* |
4494 | * Launchd has passed in data indicating the behavior of this process in response to jetsam. |
4495 | * This data would be used by the jetsam subsystem to determine the position and protection |
4496 | * offered to this process on dirty -> clean transitions. |
4497 | */ |
4498 | int relaunch_flags = P_MEMSTAT_RELAUNCH_UNKNOWN; |
4499 | switch (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MASK) { |
4500 | case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_LOW: |
4501 | relaunch_flags = P_MEMSTAT_RELAUNCH_LOW; |
4502 | break; |
4503 | case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MED: |
4504 | relaunch_flags = P_MEMSTAT_RELAUNCH_MED; |
4505 | break; |
4506 | case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_HIGH: |
4507 | relaunch_flags = P_MEMSTAT_RELAUNCH_HIGH; |
4508 | break; |
4509 | default: |
4510 | break; |
4511 | } |
4512 | memorystatus_relaunch_flags_update(p, relaunch_flags); |
4513 | } |
4514 | |
4515 | #endif /* CONFIG_MEMORYSTATUS */ |
4516 | if (imgp->ip_px_sa != NULL && px_sa.psa_thread_limit > 0) { |
4517 | task_set_thread_limit(task: new_task, thread_limit: (uint16_t)px_sa.psa_thread_limit); |
4518 | } |
4519 | |
4520 | #if CONFIG_PROC_RESOURCE_LIMITS |
4521 | if (imgp->ip_px_sa != NULL && (px_sa.psa_port_soft_limit > 0 || px_sa.psa_port_hard_limit > 0)) { |
4522 | task_set_port_space_limits(new_task, (uint32_t)px_sa.psa_port_soft_limit, |
4523 | (uint32_t)px_sa.psa_port_hard_limit); |
4524 | } |
4525 | |
4526 | if (imgp->ip_px_sa != NULL && (px_sa.psa_filedesc_soft_limit > 0 || px_sa.psa_filedesc_hard_limit > 0)) { |
4527 | proc_set_filedesc_limits(p, (int)px_sa.psa_filedesc_soft_limit, |
4528 | (int)px_sa.psa_filedesc_hard_limit); |
4529 | } |
4530 | if (imgp->ip_px_sa != NULL && (px_sa.psa_kqworkloop_soft_limit > 0 || px_sa.psa_kqworkloop_hard_limit > 0)) { |
4531 | proc_set_kqworkloop_limits(p, (int)px_sa.psa_kqworkloop_soft_limit, |
4532 | (int)px_sa.psa_kqworkloop_hard_limit); |
4533 | } |
4534 | #endif /* CONFIG_PROC_RESOURCE_LIMITS */ |
4535 | |
4536 | /* Disable wakeup monitoring for DriverKit processes */ |
4537 | if (px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DRIVER) { |
4538 | uint32_t flags = WAKEMON_DISABLE; |
4539 | task_wakeups_monitor_ctl(task: new_task, rate_hz: &flags, NULL); |
4540 | } |
4541 | } |
4542 | |
4543 | |
4544 | /* |
4545 | * If we successfully called fork1() or cloneproc, we always need |
4546 | * to do this. This is because we come back from that call with |
4547 | * signals blocked in the child, and we have to unblock them, for exec |
4548 | * case they are unblocked before activation, but for true spawn case |
4549 | * we want to wait until after we've performed any spawn actions. |
4550 | * This has to happen before process_signature(), which uses psignal. |
4551 | */ |
4552 | if (proc_transit_set) { |
4553 | proc_transend(p, locked: 0); |
4554 | } |
4555 | |
4556 | /* |
4557 | * Drop the signal lock on the child which was taken on our |
4558 | * behalf by forkproc()/cloneproc() to prevent signals being |
4559 | * received by the child in a partially constructed state. |
4560 | */ |
4561 | if (proc_signal_set) { |
4562 | proc_signalend(p, locked: 0); |
4563 | } |
4564 | |
4565 | if (error == 0) { |
4566 | /* |
4567 | * We need to initialize the bank context behind the protection of |
4568 | * the proc_trans lock to prevent a race with exit. We can't do this during |
4569 | * exec_activate_image because task_bank_init checks entitlements that |
4570 | * aren't loaded until subsequent calls (including exec_resettextvp). |
4571 | */ |
4572 | error = proc_transstart(p, locked: 0, non_blocking: 0); |
4573 | |
4574 | if (error == 0) { |
4575 | task_bank_init(task: new_task); |
4576 | proc_transend(p, locked: 0); |
4577 | } |
4578 | |
4579 | #if __arm64__ |
4580 | proc_footprint_entitlement_hacks(p, task: new_task); |
4581 | #endif /* __arm64__ */ |
4582 | |
4583 | #if XNU_TARGET_OS_OSX |
4584 | #define SINGLE_JIT_ENTITLEMENT "com.apple.security.cs.single-jit" |
4585 | if (IOTaskHasEntitlement(task: new_task, SINGLE_JIT_ENTITLEMENT)) { |
4586 | vm_map_single_jit(map: get_task_map(new_task)); |
4587 | } |
4588 | #endif /* XNU_TARGET_OS_OSX */ |
4589 | |
4590 | #if __has_feature(ptrauth_calls) |
4591 | task_set_pac_exception_fatal_flag(new_task); |
4592 | #endif /* __has_feature(ptrauth_calls) */ |
4593 | task_set_jit_exception_fatal_flag(task: new_task); |
4594 | } |
4595 | |
4596 | /* Inherit task role from old task to new task for exec */ |
4597 | if (error == 0 && !spawn_no_exec) { |
4598 | proc_inherit_task_role(new_task, old_task); |
4599 | } |
4600 | |
4601 | #if CONFIG_ARCADE |
4602 | if (error == 0) { |
4603 | /* |
4604 | * Check to see if we need to trigger an arcade upcall AST now |
4605 | * that the vnode has been reset on the task. |
4606 | */ |
4607 | arcade_prepare(task: new_task, thread: imgp->ip_new_thread); |
4608 | } |
4609 | #endif /* CONFIG_ARCADE */ |
4610 | |
4611 | if (error == 0) { |
4612 | proc_apply_jit_and_vm_policies(imgp, p, task: new_task); |
4613 | } |
4614 | |
4615 | /* Clear the initial wait on the thread before handling spawn policy */ |
4616 | if (imgp && imgp->ip_new_thread) { |
4617 | task_clear_return_wait(task: get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT); |
4618 | } |
4619 | |
4620 | /* |
4621 | * Apply the spawnattr policy, apptype (which primes the task for importance donation), |
4622 | * and bind any portwatch ports to the new task. |
4623 | * This must be done after the exec so that the child's thread is ready, |
4624 | * and after the in transit state has been released, because priority is |
4625 | * dropped here so we need to be prepared for a potentially long preemption interval |
4626 | * |
4627 | * TODO: Consider splitting this up into separate phases |
4628 | */ |
4629 | if (error == 0 && imgp->ip_px_sa != NULL) { |
4630 | struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
4631 | |
4632 | error = exec_handle_spawnattr_policy(p, thread: imgp->ip_new_thread, psa_apptype: psa->psa_apptype, psa_qos_clamp: psa->psa_qos_clamp, |
4633 | psa_darwin_role: psa->psa_darwin_role, port_actions: &port_actions); |
4634 | } |
4635 | |
4636 | /* Transfer the turnstile watchport boost to new task if in exec */ |
4637 | if (error == 0 && !spawn_no_exec) { |
4638 | task_transfer_turnstile_watchports(old_task, new_task, new_thread: imgp->ip_new_thread); |
4639 | } |
4640 | |
4641 | if (error == 0 && imgp->ip_px_sa != NULL) { |
4642 | struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
4643 | |
4644 | if (psa->psa_no_smt) { |
4645 | task_set_no_smt(task: new_task); |
4646 | } |
4647 | if (psa->psa_tecs) { |
4648 | task_set_tecs(task: new_task); |
4649 | } |
4650 | } |
4651 | |
4652 | if (error == 0 && imgp->ip_px_sa != NULL) { |
4653 | struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
4654 | |
4655 | if (psa->psa_options & PSA_OPTION_DATALESS_IOPOLICY) { |
4656 | struct _iopol_param_t iop_param = { |
4657 | .iop_scope = IOPOL_SCOPE_PROCESS, |
4658 | .iop_iotype = IOPOL_TYPE_VFS_MATERIALIZE_DATALESS_FILES, |
4659 | .iop_policy = psa->psa_dataless_iopolicy, |
4660 | }; |
4661 | error = iopolicysys_vfs_materialize_dataless_files(p, IOPOL_CMD_SET, scope: iop_param.iop_scope, |
4662 | policy: iop_param.iop_policy, iop_param: &iop_param); |
4663 | } |
4664 | } |
4665 | |
4666 | if (error == 0) { |
4667 | /* Apply the main thread qos */ |
4668 | thread_t main_thread = imgp->ip_new_thread; |
4669 | task_set_main_thread_qos(task: new_task, main_thread); |
4670 | } |
4671 | |
4672 | /* |
4673 | * Release any ports we kept around for binding to the new task |
4674 | * We need to release the rights even if the posix_spawn has failed. |
4675 | */ |
4676 | if (imgp->ip_px_spa != NULL) { |
4677 | exec_port_actions_destroy(port_actions: &port_actions); |
4678 | } |
4679 | |
4680 | /* |
4681 | * We have to delay operations which might throw a signal until after |
4682 | * the signals have been unblocked; however, we want that to happen |
4683 | * after exec_resettextvp() so that the textvp is correct when they |
4684 | * fire. |
4685 | */ |
4686 | if (error == 0) { |
4687 | error = process_signature(p, imgp); |
4688 | |
4689 | /* |
4690 | * Pay for our earlier safety; deliver the delayed signals from |
4691 | * the incomplete spawn process now that it's complete. |
4692 | */ |
4693 | if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) { |
4694 | psignal_vfork(p, new_task: proc_task(p), thread: imgp->ip_new_thread, SIGTRAP); |
4695 | } |
4696 | |
4697 | if (error == 0 && !spawn_no_exec) { |
4698 | KDBG(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXEC), |
4699 | proc_getpid(p)); |
4700 | } |
4701 | } |
4702 | |
4703 | if (spawn_no_exec) { |
4704 | /* flag the 'fork' has occurred */ |
4705 | proc_knote(p: p->p_pptr, NOTE_FORK | proc_getpid(p)); |
4706 | } |
4707 | |
4708 | /* flag exec has occurred, notify only if it has not failed due to FP Key error */ |
4709 | if (!error && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)) { |
4710 | proc_knote(p, NOTE_EXEC); |
4711 | } |
4712 | |
4713 | if (imgp != NULL) { |
4714 | uthread_set_exec_data(uth: current_uthread(), NULL); |
4715 | if (imgp->ip_vp) { |
4716 | vnode_put(vp: imgp->ip_vp); |
4717 | } |
4718 | if (imgp->ip_scriptvp) { |
4719 | vnode_put(vp: imgp->ip_scriptvp); |
4720 | } |
4721 | if (imgp->ip_strings) { |
4722 | execargs_free(imgp); |
4723 | } |
4724 | if (imgp->ip_free_map) { |
4725 | /* Free the map after dropping iocount on vnode to avoid deadlock */ |
4726 | vm_map_deallocate(map: imgp->ip_free_map); |
4727 | } |
4728 | kfree_data(imgp->ip_px_sfa, |
4729 | px_args.file_actions_size); |
4730 | kfree_data(imgp->ip_px_spa, |
4731 | px_args.port_actions_size); |
4732 | #if CONFIG_PERSONAS |
4733 | kfree_data(imgp->ip_px_persona, |
4734 | px_args.persona_info_size); |
4735 | #endif |
4736 | kfree_data(imgp->ip_px_pcred_info, |
4737 | px_args.posix_cred_info_size); |
4738 | |
4739 | if (subsystem_root_path != NULL) { |
4740 | zfree(ZV_NAMEI, subsystem_root_path); |
4741 | } |
4742 | #if CONFIG_MACF |
4743 | struct ip_px_smpx_s *px_s = &imgp->ip_px_smpx; |
4744 | kfree_data(px_s->array, px_args.mac_extensions_size); |
4745 | kfree_data(px_s->data, (vm_size_t)px_s->datalen); |
4746 | |
4747 | if (imgp->ip_execlabelp) { |
4748 | mac_cred_label_free(label: imgp->ip_execlabelp); |
4749 | imgp->ip_execlabelp = NULL; |
4750 | } |
4751 | if (imgp->ip_scriptlabelp) { |
4752 | mac_vnode_label_free(label: imgp->ip_scriptlabelp); |
4753 | imgp->ip_scriptlabelp = NULL; |
4754 | } |
4755 | if (imgp->ip_cs_error != OS_REASON_NULL) { |
4756 | os_reason_free(cur_reason: imgp->ip_cs_error); |
4757 | imgp->ip_cs_error = OS_REASON_NULL; |
4758 | } |
4759 | if (imgp->ip_inherited_shared_region_id != NULL) { |
4760 | kfree_data(imgp->ip_inherited_shared_region_id, |
4761 | strlen(imgp->ip_inherited_shared_region_id) + 1); |
4762 | imgp->ip_inherited_shared_region_id = NULL; |
4763 | } |
4764 | #endif |
4765 | } |
4766 | |
4767 | #if CONFIG_DTRACE |
4768 | if (spawn_no_exec) { |
4769 | /* |
4770 | * In the original DTrace reference implementation, |
4771 | * posix_spawn() was a libc routine that just |
4772 | * did vfork(2) then exec(2). Thus the proc::: probes |
4773 | * are very fork/exec oriented. The details of this |
4774 | * in-kernel implementation of posix_spawn() is different |
4775 | * (while producing the same process-observable effects) |
4776 | * particularly w.r.t. errors, and which thread/process |
4777 | * is constructing what on behalf of whom. |
4778 | */ |
4779 | if (error) { |
4780 | DTRACE_PROC1(spawn__failure, int, error); |
4781 | } else { |
4782 | DTRACE_PROC(spawn__success); |
4783 | /* |
4784 | * Some DTrace scripts, e.g. newproc.d in |
4785 | * /usr/bin, rely on the the 'exec-success' |
4786 | * probe being fired in the child after the |
4787 | * new process image has been constructed |
4788 | * in order to determine the associated pid. |
4789 | * |
4790 | * So, even though the parent built the image |
4791 | * here, for compatibility, mark the new thread |
4792 | * so 'exec-success' fires on it as it leaves |
4793 | * the kernel. |
4794 | */ |
4795 | dtrace_thread_didexec(imgp->ip_new_thread); |
4796 | } |
4797 | } else { |
4798 | if (error) { |
4799 | DTRACE_PROC1(exec__failure, int, error); |
4800 | } else { |
4801 | dtrace_thread_didexec(imgp->ip_new_thread); |
4802 | } |
4803 | } |
4804 | |
4805 | if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) { |
4806 | (*dtrace_proc_waitfor_hook)(p); |
4807 | } |
4808 | #endif |
4809 | |
4810 | #if CONFIG_AUDIT |
4811 | if (!error && AUDIT_ENABLED() && p) { |
4812 | /* Add the CDHash of the new process to the audit record */ |
4813 | uint8_t *cdhash = cs_get_cdhash(p); |
4814 | if (cdhash) { |
4815 | AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN); |
4816 | } |
4817 | } |
4818 | #endif |
4819 | |
4820 | /* terminate the new task if exec failed */ |
4821 | if (new_task != NULL && task_is_exec_copy(new_task)) { |
4822 | task_terminate_internal(task: new_task); |
4823 | } |
4824 | |
4825 | if (exec_failure_reason && !spawn_no_exec) { |
4826 | psignal_with_reason(p, SIGKILL, signal_reason: exec_failure_reason); |
4827 | exec_failure_reason = NULL; |
4828 | } |
4829 | |
4830 | /* Return to both the parent and the child? */ |
4831 | if (imgp != NULL && spawn_no_exec) { |
4832 | /* |
4833 | * If the parent wants the pid, copy it out |
4834 | */ |
4835 | if (error == 0 && pid != USER_ADDR_NULL) { |
4836 | _Static_assert(sizeof(pid_t) == 4, "posix_spawn() assumes a 32-bit pid_t" ); |
4837 | bool aligned = (pid & 3) == 0; |
4838 | if (aligned) { |
4839 | (void)copyout_atomic32(u32: proc_getpid(p), user_addr: pid); |
4840 | } else { |
4841 | (void)suword(addr: pid, word: proc_getpid(p)); |
4842 | } |
4843 | } |
4844 | retval[0] = error; |
4845 | |
4846 | /* |
4847 | * If we had an error, perform an internal reap ; this is |
4848 | * entirely safe, as we have a real process backing us. |
4849 | */ |
4850 | if (error) { |
4851 | proc_list_lock(); |
4852 | p->p_listflag |= P_LIST_DEADPARENT; |
4853 | proc_list_unlock(); |
4854 | proc_lock(p); |
4855 | /* make sure no one else has killed it off... */ |
4856 | if (p->p_stat != SZOMB && p->exit_thread == NULL) { |
4857 | p->exit_thread = current_thread(); |
4858 | p->p_posix_spawn_failed = true; |
4859 | proc_unlock(p); |
4860 | exit1(p, 1, (int *)NULL); |
4861 | } else { |
4862 | /* someone is doing it for us; just skip it */ |
4863 | proc_unlock(p); |
4864 | } |
4865 | } |
4866 | } |
4867 | |
4868 | /* |
4869 | * Do not terminate the current task, if proc_exec_switch_task did not |
4870 | * switch the tasks, terminating the current task without the switch would |
4871 | * result in loosing the SIGKILL status. |
4872 | */ |
4873 | if (task_did_exec(task: old_task)) { |
4874 | /* Terminate the current task, since exec will start in new task */ |
4875 | task_terminate_internal(task: old_task); |
4876 | } |
4877 | |
4878 | /* Release the thread ref returned by cloneproc/fork1 */ |
4879 | if (imgp != NULL && imgp->ip_new_thread) { |
4880 | /* clear the exec complete flag if there is an error before point of no-return */ |
4881 | uint32_t clearwait_flags = TCRW_CLEAR_FINAL_WAIT; |
4882 | if (!spawn_no_exec && !exec_done && error != 0) { |
4883 | clearwait_flags |= TCRW_CLEAR_EXEC_COMPLETE; |
4884 | } |
4885 | /* wake up the new thread */ |
4886 | task_clear_return_wait(task: get_threadtask(imgp->ip_new_thread), flags: clearwait_flags); |
4887 | thread_deallocate(thread: imgp->ip_new_thread); |
4888 | imgp->ip_new_thread = NULL; |
4889 | } |
4890 | |
4891 | /* Release the ref returned by cloneproc/fork1 */ |
4892 | if (new_task) { |
4893 | task_deallocate(new_task); |
4894 | new_task = NULL; |
4895 | } |
4896 | |
4897 | if (should_release_proc_ref) { |
4898 | proc_rele(p); |
4899 | } |
4900 | |
4901 | kfree_type(typeof(*__spawn_data), __spawn_data); |
4902 | |
4903 | if (inherit != NULL) { |
4904 | ipc_importance_release(elem: inherit); |
4905 | } |
4906 | |
4907 | #if CONFIG_EXCLAVES |
4908 | if (task_conclave_id != NULL) { |
4909 | kfree_data(task_conclave_id, MAXCONCLAVENAME); |
4910 | } |
4911 | #endif |
4912 | |
4913 | assert(spawn_no_exec || exec_failure_reason == NULL); |
4914 | return error; |
4915 | } |
4916 | |
4917 | /* |
4918 | * proc_exec_switch_task |
4919 | * |
4920 | * Parameters: old_proc proc before exec |
4921 | * new_proc proc after exec |
4922 | * old_task task before exec |
4923 | * new_task task after exec |
4924 | * imgp image params |
4925 | * inherit resulting importance linkage |
4926 | * |
4927 | * Returns: proc. |
4928 | * |
4929 | * Note: The function will switch proc in pid hash from old proc to new proc. |
4930 | * The switch needs to happen after draining all proc refs and inside |
4931 | * a proc list lock. In the case of failure to switch the proc, which |
4932 | * might happen if the process received a SIGKILL or jetsam killed it, |
4933 | * it will make sure that the new tasks terminates. User proc ref returned |
4934 | * to caller. |
4935 | * |
4936 | * This function is called after point of no return, in the case |
4937 | * failure to switch, it will terminate the new task and swallow the |
4938 | * error and let the terminated process complete exec and die. |
4939 | */ |
4940 | proc_t |
4941 | proc_exec_switch_task(proc_t old_proc, proc_t new_proc, task_t old_task, task_t new_task, struct image_params *imgp, void **inherit) |
4942 | { |
4943 | boolean_t task_active; |
4944 | boolean_t proc_active; |
4945 | boolean_t thread_active; |
4946 | boolean_t reparent_traced_child = FALSE; |
4947 | thread_t old_thread = current_thread(); |
4948 | thread_t new_thread = imgp->ip_new_thread; |
4949 | |
4950 | thread_set_exec_promotion(thread: old_thread); |
4951 | old_proc = proc_refdrain_will_exec(p: old_proc); |
4952 | |
4953 | new_proc = proc_refdrain_will_exec(p: new_proc); |
4954 | /* extra proc ref returned to the caller */ |
4955 | |
4956 | assert(get_threadtask(new_thread) == new_task); |
4957 | task_active = task_is_active(new_task); |
4958 | proc_active = !(old_proc->p_lflag & P_LEXIT); |
4959 | |
4960 | /* Check if the current thread is not aborted due to SIGKILL */ |
4961 | thread_active = thread_is_active(thread: old_thread); |
4962 | |
4963 | /* |
4964 | * Do not switch the proc if the new task or proc is already terminated |
4965 | * as a result of error in exec past point of no return |
4966 | */ |
4967 | if (proc_active && task_active && thread_active) { |
4968 | uthread_t new_uthread = get_bsdthread_info(new_thread); |
4969 | uthread_t old_uthread = current_uthread(); |
4970 | |
4971 | /* Clear dispatchqueue and workloop ast offset */ |
4972 | new_proc->p_dispatchqueue_offset = 0; |
4973 | new_proc->p_dispatchqueue_serialno_offset = 0; |
4974 | new_proc->p_dispatchqueue_label_offset = 0; |
4975 | new_proc->p_return_to_kernel_offset = 0; |
4976 | new_proc->p_pthread_wq_quantum_offset = 0; |
4977 | |
4978 | /* If old_proc is session leader, change the leader to new proc */ |
4979 | session_replace_leader(old_proc, new_proc); |
4980 | |
4981 | proc_lock(old_proc); |
4982 | |
4983 | /* Copy the signal state, dtrace state and set bsd ast on new thread */ |
4984 | act_set_astbsd(new_thread); |
4985 | new_uthread->uu_siglist |= old_uthread->uu_siglist; |
4986 | new_uthread->uu_siglist |= old_proc->p_siglist; |
4987 | new_uthread->uu_sigwait = old_uthread->uu_sigwait; |
4988 | new_uthread->uu_sigmask = old_uthread->uu_sigmask; |
4989 | new_uthread->uu_oldmask = old_uthread->uu_oldmask; |
4990 | new_uthread->uu_exit_reason = old_uthread->uu_exit_reason; |
4991 | #if CONFIG_DTRACE |
4992 | new_uthread->t_dtrace_sig = old_uthread->t_dtrace_sig; |
4993 | new_uthread->t_dtrace_stop = old_uthread->t_dtrace_stop; |
4994 | new_uthread->t_dtrace_resumepid = old_uthread->t_dtrace_resumepid; |
4995 | assert(new_uthread->t_dtrace_scratch == NULL); |
4996 | new_uthread->t_dtrace_scratch = old_uthread->t_dtrace_scratch; |
4997 | |
4998 | old_uthread->t_dtrace_sig = 0; |
4999 | old_uthread->t_dtrace_stop = 0; |
5000 | old_uthread->t_dtrace_resumepid = 0; |
5001 | old_uthread->t_dtrace_scratch = NULL; |
5002 | #endif |
5003 | |
5004 | #if CONFIG_PROC_UDATA_STORAGE |
5005 | new_proc->p_user_data = old_proc->p_user_data; |
5006 | #endif /* CONFIG_PROC_UDATA_STORAGE */ |
5007 | |
5008 | /* Copy the resource accounting info */ |
5009 | thread_copy_resource_info(dst_thread: new_thread, src_thread: current_thread()); |
5010 | |
5011 | /* Clear the exit reason and signal state on old thread */ |
5012 | old_uthread->uu_exit_reason = NULL; |
5013 | old_uthread->uu_siglist = 0; |
5014 | |
5015 | task_set_did_exec_flag(task: old_task); |
5016 | task_clear_exec_copy_flag(task: new_task); |
5017 | |
5018 | task_copy_fields_for_exec(dst_task: new_task, src_task: old_task); |
5019 | |
5020 | /* |
5021 | * Need to transfer pending watch port boosts to the new task |
5022 | * while still making sure that the old task remains in the |
5023 | * importance linkage. Create an importance linkage from old task |
5024 | * to new task, then switch the task importance base of old task |
5025 | * and new task. After the switch the port watch boost will be |
5026 | * boosting the new task and new task will be donating importance |
5027 | * to old task. |
5028 | */ |
5029 | *inherit = ipc_importance_exec_switch_task(old_task, new_task); |
5030 | |
5031 | /* Transfer parent's ptrace state to child */ |
5032 | new_proc->p_lflag &= ~(P_LTRACED | P_LSIGEXC | P_LNOATTACH); |
5033 | new_proc->p_lflag |= (old_proc->p_lflag & (P_LTRACED | P_LSIGEXC | P_LNOATTACH)); |
5034 | new_proc->p_oppid = old_proc->p_oppid; |
5035 | |
5036 | if (old_proc->p_pptr != new_proc->p_pptr) { |
5037 | reparent_traced_child = TRUE; |
5038 | new_proc->p_lflag |= P_LTRACE_WAIT; |
5039 | } |
5040 | |
5041 | proc_unlock(old_proc); |
5042 | |
5043 | /* Update the list of proc knotes */ |
5044 | proc_transfer_knotes(old_proc, new_proc); |
5045 | |
5046 | /* Update the proc interval timers */ |
5047 | proc_inherit_itimers(old_proc, new_proc); |
5048 | |
5049 | proc_list_lock(); |
5050 | |
5051 | /* Insert the new proc in child list of parent proc */ |
5052 | p_reparentallchildren(old_proc, new_proc); |
5053 | |
5054 | /* Switch proc in pid hash */ |
5055 | phash_replace_locked(old_proc, new_proc); |
5056 | |
5057 | /* Transfer the shadow flag to old proc */ |
5058 | os_atomic_andnot(&new_proc->p_refcount, P_REF_SHADOW, relaxed); |
5059 | os_atomic_or(&old_proc->p_refcount, P_REF_SHADOW, relaxed); |
5060 | |
5061 | /* Change init proc if launchd exec */ |
5062 | if (old_proc == initproc) { |
5063 | /* Take the ref on new proc after proc_refwake_did_exec */ |
5064 | initproc = new_proc; |
5065 | /* Drop the proc ref on old proc */ |
5066 | proc_rele(p: old_proc); |
5067 | } |
5068 | |
5069 | proc_list_unlock(); |
5070 | #if CONFIG_EXCLAVES |
5071 | if (task_inherit_conclave(old_task, new_task, imgp->ip_vp, |
5072 | (int64_t)imgp->ip_arch_offset) != KERN_SUCCESS) { |
5073 | task_terminate_internal(new_task); |
5074 | } |
5075 | #endif |
5076 | } else { |
5077 | task_terminate_internal(task: new_task); |
5078 | } |
5079 | |
5080 | proc_refwake_did_exec(p: new_proc); |
5081 | proc_refwake_did_exec(p: old_proc); |
5082 | |
5083 | /* Take a ref on initproc if it changed */ |
5084 | if (new_proc == initproc) { |
5085 | initproc = proc_ref(p: new_proc, false); |
5086 | assert(initproc != PROC_NULL); |
5087 | } |
5088 | |
5089 | thread_clear_exec_promotion(thread: old_thread); |
5090 | proc_rele(p: old_proc); |
5091 | |
5092 | if (reparent_traced_child) { |
5093 | proc_t pp = proc_parent(old_proc); |
5094 | assert(pp != PROC_NULL); |
5095 | |
5096 | proc_reparentlocked(child: new_proc, newparent: pp, cansignal: 1, locked: 0); |
5097 | proc_rele(p: pp); |
5098 | |
5099 | proc_lock(new_proc); |
5100 | new_proc->p_lflag &= ~P_LTRACE_WAIT; |
5101 | proc_unlock(new_proc); |
5102 | } |
5103 | |
5104 | return new_proc; |
5105 | } |
5106 | |
5107 | /* |
5108 | * execve |
5109 | * |
5110 | * Parameters: uap->fname File name to exec |
5111 | * uap->argp Argument list |
5112 | * uap->envp Environment list |
5113 | * |
5114 | * Returns: 0 Success |
5115 | * __mac_execve:EINVAL Invalid argument |
5116 | * __mac_execve:ENOTSUP Invalid argument |
5117 | * __mac_execve:EACCES Permission denied |
5118 | * __mac_execve:EINTR Interrupted function |
5119 | * __mac_execve:ENOMEM Not enough space |
5120 | * __mac_execve:EFAULT Bad address |
5121 | * __mac_execve:ENAMETOOLONG Filename too long |
5122 | * __mac_execve:ENOEXEC Executable file format error |
5123 | * __mac_execve:ETXTBSY Text file busy [misuse of error code] |
5124 | * __mac_execve:??? |
5125 | * |
5126 | * TODO: Dynamic linker header address on stack is copied via suword() |
5127 | */ |
5128 | /* ARGSUSED */ |
5129 | int |
5130 | execve(proc_t p, struct execve_args *uap, int32_t *retval) |
5131 | { |
5132 | struct __mac_execve_args muap; |
5133 | int err; |
5134 | |
5135 | memoryshot(VM_EXECVE, DBG_FUNC_NONE); |
5136 | |
5137 | muap.fname = uap->fname; |
5138 | muap.argp = uap->argp; |
5139 | muap.envp = uap->envp; |
5140 | muap.mac_p = USER_ADDR_NULL; |
5141 | err = __mac_execve(p, &muap, retval); |
5142 | |
5143 | return err; |
5144 | } |
5145 | |
5146 | /* |
5147 | * __mac_execve |
5148 | * |
5149 | * Parameters: uap->fname File name to exec |
5150 | * uap->argp Argument list |
5151 | * uap->envp Environment list |
5152 | * uap->mac_p MAC label supplied by caller |
5153 | * |
5154 | * Returns: 0 Success |
5155 | * EINVAL Invalid argument |
5156 | * ENOTSUP Not supported |
5157 | * ENOEXEC Executable file format error |
5158 | * exec_activate_image:EINVAL Invalid argument |
5159 | * exec_activate_image:EACCES Permission denied |
5160 | * exec_activate_image:EINTR Interrupted function |
5161 | * exec_activate_image:ENOMEM Not enough space |
5162 | * exec_activate_image:EFAULT Bad address |
5163 | * exec_activate_image:ENAMETOOLONG Filename too long |
5164 | * exec_activate_image:ENOEXEC Executable file format error |
5165 | * exec_activate_image:ETXTBSY Text file busy [misuse of error code] |
5166 | * exec_activate_image:EBADEXEC The executable is corrupt/unknown |
5167 | * exec_activate_image:??? |
5168 | * mac_execve_enter:??? |
5169 | * |
5170 | * TODO: Dynamic linker header address on stack is copied via suword() |
5171 | */ |
5172 | int |
5173 | __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval __unused) |
5174 | { |
5175 | struct image_params *imgp = NULL; |
5176 | struct vnode_attr *vap = NULL; |
5177 | struct vnode_attr *origvap = NULL; |
5178 | int error; |
5179 | int is_64 = IS_64BIT_PROCESS(p); |
5180 | struct vfs_context context; |
5181 | struct uthread *uthread = NULL; |
5182 | task_t old_task = current_task(); |
5183 | task_t new_task = NULL; |
5184 | boolean_t should_release_proc_ref = FALSE; |
5185 | boolean_t exec_done = FALSE; |
5186 | void *inherit = NULL; |
5187 | struct { |
5188 | struct image_params imgp; |
5189 | struct vnode_attr va; |
5190 | struct vnode_attr origva; |
5191 | } *__execve_data; |
5192 | |
5193 | /* Allocate a big chunk for locals instead of using stack since these |
5194 | * structures a pretty big. |
5195 | */ |
5196 | __execve_data = kalloc_type(typeof(*__execve_data), Z_WAITOK | Z_ZERO); |
5197 | if (__execve_data == NULL) { |
5198 | error = ENOMEM; |
5199 | goto exit_with_error; |
5200 | } |
5201 | imgp = &__execve_data->imgp; |
5202 | vap = &__execve_data->va; |
5203 | origvap = &__execve_data->origva; |
5204 | |
5205 | /* Initialize the common data in the image_params structure */ |
5206 | imgp->ip_user_fname = uap->fname; |
5207 | imgp->ip_user_argv = uap->argp; |
5208 | imgp->ip_user_envv = uap->envp; |
5209 | imgp->ip_vattr = vap; |
5210 | imgp->ip_origvattr = origvap; |
5211 | imgp->ip_vfs_context = &context; |
5212 | imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE); |
5213 | imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); |
5214 | imgp->ip_mac_return = 0; |
5215 | imgp->ip_cs_error = OS_REASON_NULL; |
5216 | imgp->ip_simulator_binary = IMGPF_SB_DEFAULT; |
5217 | imgp->ip_subsystem_root_path = NULL; |
5218 | uthread_set_exec_data(uth: current_uthread(), imgp); |
5219 | |
5220 | #if CONFIG_MACF |
5221 | if (uap->mac_p != USER_ADDR_NULL) { |
5222 | error = mac_execve_enter(mac_p: uap->mac_p, imgp); |
5223 | if (error) { |
5224 | goto exit_with_error; |
5225 | } |
5226 | } |
5227 | #endif |
5228 | uthread = current_uthread(); |
5229 | { |
5230 | imgp->ip_flags |= IMGPF_EXEC; |
5231 | |
5232 | /* Adjust the user proc count */ |
5233 | (void)chgproccnt(uid: kauth_getruid(), diff: 1); |
5234 | /* |
5235 | * For execve case, create a new proc, task and thread |
5236 | * but don't make the proc visible to userland. After |
5237 | * image activation, the new proc would take place of |
5238 | * the old proc in pid hash and other lists that make |
5239 | * the proc visible to the system. |
5240 | */ |
5241 | imgp->ip_new_thread = cloneproc(old_task, NULL, p, CLONEPROC_EXEC); |
5242 | /* task and thread ref returned by cloneproc */ |
5243 | if (imgp->ip_new_thread == NULL) { |
5244 | (void)chgproccnt(uid: kauth_getruid(), diff: -1); |
5245 | error = ENOMEM; |
5246 | goto exit_with_error; |
5247 | } |
5248 | |
5249 | new_task = get_threadtask(imgp->ip_new_thread); |
5250 | } |
5251 | |
5252 | p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread); |
5253 | |
5254 | context.vc_thread = imgp->ip_new_thread; |
5255 | context.vc_ucred = kauth_cred_proc_ref(procp: p); /* XXX must NOT be kauth_cred_get() */ |
5256 | |
5257 | imgp->ip_subsystem_root_path = p->p_subsystem_root_path; |
5258 | |
5259 | proc_transend(p, locked: 0); |
5260 | proc_signalend(p, locked: 0); |
5261 | |
5262 | |
5263 | /* |
5264 | * Activate the image. |
5265 | * Warning: If activation failed after point of no return, it returns error |
5266 | * as 0 and pretends the call succeeded. |
5267 | */ |
5268 | error = exec_activate_image(imgp); |
5269 | /* thread and task ref returned for vfexec case */ |
5270 | |
5271 | if (imgp->ip_new_thread != NULL) { |
5272 | /* |
5273 | * task reference might be returned by exec_activate_image |
5274 | * for vfexec. |
5275 | */ |
5276 | new_task = get_threadtask(imgp->ip_new_thread); |
5277 | #if defined(HAS_APPLE_PAC) |
5278 | ml_task_set_disable_user_jop(task: new_task, disable_user_jop: imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE); |
5279 | ml_thread_set_disable_user_jop(thread: imgp->ip_new_thread, disable_user_jop: imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE); |
5280 | #endif |
5281 | } |
5282 | |
5283 | if (!error) { |
5284 | p = proc_exec_switch_task(old_proc: current_proc(), new_proc: p, old_task, new_task, imgp, inherit: &inherit); |
5285 | /* proc ref returned */ |
5286 | should_release_proc_ref = TRUE; |
5287 | } |
5288 | |
5289 | kauth_cred_unref(&context.vc_ucred); |
5290 | |
5291 | if (!error) { |
5292 | exec_done = TRUE; |
5293 | assert(imgp->ip_new_thread != NULL); |
5294 | |
5295 | exec_resettextvp(p, imgp); |
5296 | /* |
5297 | * Enable new task IPC access if exec_activate_image() returned an |
5298 | * active task. (Checks active bit in ipc_task_enable() under lock). |
5299 | * Must enable after resettextvp so that task port policies are not evaluated |
5300 | * until the csblob in the textvp is accurately reflected. |
5301 | */ |
5302 | ipc_task_enable(task: new_task); |
5303 | error = process_signature(p, imgp); |
5304 | } |
5305 | |
5306 | #if defined(HAS_APPLE_PAC) |
5307 | if (imgp->ip_new_thread && !error) { |
5308 | ml_task_set_jop_pid_from_shared_region(task: new_task, disable_user_jop: imgp->ip_flags & IMGPF_NOJOP); |
5309 | ml_thread_set_jop_pid(thread: imgp->ip_new_thread, task: new_task); |
5310 | } |
5311 | #endif /* defined(HAS_APPLE_PAC) */ |
5312 | |
5313 | /* flag exec has occurred, notify only if it has not failed due to FP Key error */ |
5314 | if (exec_done && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)) { |
5315 | proc_knote(p, NOTE_EXEC); |
5316 | } |
5317 | |
5318 | if (imgp->ip_vp != NULLVP) { |
5319 | vnode_put(vp: imgp->ip_vp); |
5320 | } |
5321 | if (imgp->ip_scriptvp != NULLVP) { |
5322 | vnode_put(vp: imgp->ip_scriptvp); |
5323 | } |
5324 | if (imgp->ip_free_map) { |
5325 | /* Free the map after dropping iocount on vnode to avoid deadlock */ |
5326 | vm_map_deallocate(map: imgp->ip_free_map); |
5327 | } |
5328 | if (imgp->ip_strings) { |
5329 | execargs_free(imgp); |
5330 | } |
5331 | #if CONFIG_MACF |
5332 | if (imgp->ip_execlabelp) { |
5333 | mac_cred_label_free(label: imgp->ip_execlabelp); |
5334 | imgp->ip_execlabelp = NULL; |
5335 | } |
5336 | if (imgp->ip_scriptlabelp) { |
5337 | mac_vnode_label_free(label: imgp->ip_scriptlabelp); |
5338 | imgp->ip_scriptlabelp = NULL; |
5339 | } |
5340 | #endif |
5341 | if (imgp->ip_cs_error != OS_REASON_NULL) { |
5342 | os_reason_free(cur_reason: imgp->ip_cs_error); |
5343 | imgp->ip_cs_error = OS_REASON_NULL; |
5344 | } |
5345 | |
5346 | if (!error) { |
5347 | /* |
5348 | * We need to initialize the bank context behind the protection of |
5349 | * the proc_trans lock to prevent a race with exit. We can't do this during |
5350 | * exec_activate_image because task_bank_init checks entitlements that |
5351 | * aren't loaded until subsequent calls (including exec_resettextvp). |
5352 | */ |
5353 | error = proc_transstart(p, locked: 0, non_blocking: 0); |
5354 | } |
5355 | |
5356 | if (!error) { |
5357 | task_bank_init(task: new_task); |
5358 | proc_transend(p, locked: 0); |
5359 | |
5360 | // Don't inherit crash behavior across exec |
5361 | p->p_crash_behavior = 0; |
5362 | p->p_crash_behavior_deadline = 0; |
5363 | |
5364 | #if __arm64__ |
5365 | proc_footprint_entitlement_hacks(p, task: new_task); |
5366 | #endif /* __arm64__ */ |
5367 | |
5368 | #if XNU_TARGET_OS_OSX |
5369 | if (IOTaskHasEntitlement(task: new_task, SINGLE_JIT_ENTITLEMENT)) { |
5370 | vm_map_single_jit(map: get_task_map(new_task)); |
5371 | } |
5372 | #endif /* XNU_TARGET_OS_OSX */ |
5373 | |
5374 | /* Sever any extant thread affinity */ |
5375 | thread_affinity_exec(thread: current_thread()); |
5376 | |
5377 | /* Inherit task role from old task to new task for exec */ |
5378 | proc_inherit_task_role(new_task, old_task); |
5379 | |
5380 | thread_t main_thread = imgp->ip_new_thread; |
5381 | |
5382 | task_set_main_thread_qos(task: new_task, main_thread); |
5383 | |
5384 | #if __has_feature(ptrauth_calls) |
5385 | task_set_pac_exception_fatal_flag(new_task); |
5386 | #endif /* __has_feature(ptrauth_calls) */ |
5387 | task_set_jit_exception_fatal_flag(task: new_task); |
5388 | |
5389 | #if CONFIG_ARCADE |
5390 | /* |
5391 | * Check to see if we need to trigger an arcade upcall AST now |
5392 | * that the vnode has been reset on the task. |
5393 | */ |
5394 | arcade_prepare(task: new_task, thread: imgp->ip_new_thread); |
5395 | #endif /* CONFIG_ARCADE */ |
5396 | |
5397 | proc_apply_jit_and_vm_policies(imgp, p, task: new_task); |
5398 | |
5399 | if (vm_darkwake_mode == TRUE) { |
5400 | /* |
5401 | * This process is being launched when the system |
5402 | * is in darkwake. So mark it specially. This will |
5403 | * cause all its pages to be entered in the background Q. |
5404 | */ |
5405 | task_set_darkwake_mode(new_task, vm_darkwake_mode); |
5406 | } |
5407 | |
5408 | #if CONFIG_DTRACE |
5409 | dtrace_thread_didexec(imgp->ip_new_thread); |
5410 | |
5411 | if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) { |
5412 | (*dtrace_proc_waitfor_hook)(p); |
5413 | } |
5414 | #endif |
5415 | |
5416 | #if CONFIG_AUDIT |
5417 | if (!error && AUDIT_ENABLED() && p) { |
5418 | /* Add the CDHash of the new process to the audit record */ |
5419 | uint8_t *cdhash = cs_get_cdhash(p); |
5420 | if (cdhash) { |
5421 | AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN); |
5422 | } |
5423 | } |
5424 | #endif |
5425 | } else { |
5426 | DTRACE_PROC1(exec__failure, int, error); |
5427 | } |
5428 | |
5429 | exit_with_error: |
5430 | |
5431 | /* terminate the new task it if exec failed */ |
5432 | if (new_task != NULL && task_is_exec_copy(new_task)) { |
5433 | task_terminate_internal(task: new_task); |
5434 | } |
5435 | |
5436 | if (imgp != NULL) { |
5437 | /* Clear the initial wait on the thread transferring watchports */ |
5438 | if (imgp->ip_new_thread) { |
5439 | task_clear_return_wait(task: get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT); |
5440 | } |
5441 | |
5442 | /* Transfer the watchport boost to new task */ |
5443 | if (!error) { |
5444 | task_transfer_turnstile_watchports(old_task, |
5445 | new_task, new_thread: imgp->ip_new_thread); |
5446 | } |
5447 | /* |
5448 | * Do not terminate the current task, if proc_exec_switch_task did not |
5449 | * switch the tasks, terminating the current task without the switch would |
5450 | * result in loosing the SIGKILL status. |
5451 | */ |
5452 | if (task_did_exec(task: old_task)) { |
5453 | /* Terminate the current task, since exec will start in new task */ |
5454 | task_terminate_internal(task: old_task); |
5455 | } |
5456 | |
5457 | /* Release the thread ref returned by cloneproc */ |
5458 | if (imgp->ip_new_thread) { |
5459 | /* clear the exec complete flag if there is an error before point of no-return */ |
5460 | uint32_t clearwait_flags = TCRW_CLEAR_FINAL_WAIT; |
5461 | if (!exec_done && error != 0) { |
5462 | clearwait_flags |= TCRW_CLEAR_EXEC_COMPLETE; |
5463 | } |
5464 | /* wake up the new exec thread */ |
5465 | task_clear_return_wait(task: get_threadtask(imgp->ip_new_thread), flags: clearwait_flags); |
5466 | thread_deallocate(thread: imgp->ip_new_thread); |
5467 | imgp->ip_new_thread = NULL; |
5468 | } |
5469 | } |
5470 | |
5471 | /* Release the ref returned by fork_create_child */ |
5472 | if (new_task) { |
5473 | task_deallocate(new_task); |
5474 | new_task = NULL; |
5475 | } |
5476 | |
5477 | if (should_release_proc_ref) { |
5478 | proc_rele(p); |
5479 | } |
5480 | |
5481 | uthread_set_exec_data(uth: current_uthread(), NULL); |
5482 | kfree_type(typeof(*__execve_data), __execve_data); |
5483 | |
5484 | if (inherit != NULL) { |
5485 | ipc_importance_release(elem: inherit); |
5486 | } |
5487 | |
5488 | return error; |
5489 | } |
5490 | |
5491 | |
5492 | /* |
5493 | * copyinptr |
5494 | * |
5495 | * Description: Copy a pointer in from user space to a user_addr_t in kernel |
5496 | * space, based on 32/64 bitness of the user space |
5497 | * |
5498 | * Parameters: froma User space address |
5499 | * toptr Address of kernel space user_addr_t |
5500 | * ptr_size 4/8, based on 'froma' address space |
5501 | * |
5502 | * Returns: 0 Success |
5503 | * EFAULT Bad 'froma' |
5504 | * |
5505 | * Implicit returns: |
5506 | * *ptr_size Modified |
5507 | */ |
5508 | static int |
5509 | copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size) |
5510 | { |
5511 | int error; |
5512 | |
5513 | if (ptr_size == 4) { |
5514 | /* 64 bit value containing 32 bit address */ |
5515 | unsigned int i = 0; |
5516 | |
5517 | error = copyin(froma, &i, 4); |
5518 | *toptr = CAST_USER_ADDR_T(i); /* SAFE */ |
5519 | } else { |
5520 | error = copyin(froma, toptr, 8); |
5521 | } |
5522 | return error; |
5523 | } |
5524 | |
5525 | |
5526 | /* |
5527 | * copyoutptr |
5528 | * |
5529 | * Description: Copy a pointer out from a user_addr_t in kernel space to |
5530 | * user space, based on 32/64 bitness of the user space |
5531 | * |
5532 | * Parameters: ua User space address to copy to |
5533 | * ptr Address of kernel space user_addr_t |
5534 | * ptr_size 4/8, based on 'ua' address space |
5535 | * |
5536 | * Returns: 0 Success |
5537 | * EFAULT Bad 'ua' |
5538 | * |
5539 | */ |
5540 | static int |
5541 | copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size) |
5542 | { |
5543 | int error; |
5544 | |
5545 | if (ptr_size == 4) { |
5546 | /* 64 bit value containing 32 bit address */ |
5547 | unsigned int i = CAST_DOWN_EXPLICIT(unsigned int, ua); /* SAFE */ |
5548 | |
5549 | error = copyout(&i, ptr, 4); |
5550 | } else { |
5551 | error = copyout(&ua, ptr, 8); |
5552 | } |
5553 | return error; |
5554 | } |
5555 | |
5556 | |
5557 | /* |
5558 | * exec_copyout_strings |
5559 | * |
5560 | * Copy out the strings segment to user space. The strings segment is put |
5561 | * on a preinitialized stack frame. |
5562 | * |
5563 | * Parameters: struct image_params * the image parameter block |
5564 | * int * a pointer to the stack offset variable |
5565 | * |
5566 | * Returns: 0 Success |
5567 | * !0 Faiure: errno |
5568 | * |
5569 | * Implicit returns: |
5570 | * (*stackp) The stack offset, modified |
5571 | * |
5572 | * Note: The strings segment layout is backward, from the beginning |
5573 | * of the top of the stack to consume the minimal amount of |
5574 | * space possible; the returned stack pointer points to the |
5575 | * end of the area consumed (stacks grow downward). |
5576 | * |
5577 | * argc is an int; arg[i] are pointers; env[i] are pointers; |
5578 | * the 0's are (void *)NULL's |
5579 | * |
5580 | * The stack frame layout is: |
5581 | * |
5582 | * +-------------+ <- p->user_stack |
5583 | * | 16b | |
5584 | * +-------------+ |
5585 | * | STRING AREA | |
5586 | * | : | |
5587 | * | : | |
5588 | * | : | |
5589 | * +- -- -- -- --+ |
5590 | * | PATH AREA | |
5591 | * +-------------+ |
5592 | * | 0 | |
5593 | * +-------------+ |
5594 | * | applev[n] | |
5595 | * +-------------+ |
5596 | * : |
5597 | * : |
5598 | * +-------------+ |
5599 | * | applev[1] | |
5600 | * +-------------+ |
5601 | * | exec_path / | |
5602 | * | applev[0] | |
5603 | * +-------------+ |
5604 | * | 0 | |
5605 | * +-------------+ |
5606 | * | env[n] | |
5607 | * +-------------+ |
5608 | * : |
5609 | * : |
5610 | * +-------------+ |
5611 | * | env[0] | |
5612 | * +-------------+ |
5613 | * | 0 | |
5614 | * +-------------+ |
5615 | * | arg[argc-1] | |
5616 | * +-------------+ |
5617 | * : |
5618 | * : |
5619 | * +-------------+ |
5620 | * | arg[0] | |
5621 | * +-------------+ |
5622 | * | argc | |
5623 | * sp-> +-------------+ |
5624 | * |
5625 | * Although technically a part of the STRING AREA, we treat the PATH AREA as |
5626 | * a separate entity. This allows us to align the beginning of the PATH AREA |
5627 | * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers |
5628 | * which preceed it on the stack are properly aligned. |
5629 | */ |
5630 | __attribute__((noinline)) |
5631 | static int |
5632 | exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) |
5633 | { |
5634 | proc_t p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
5635 | int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4; |
5636 | int ptr_area_size; |
5637 | void *ptr_buffer_start, *ptr_buffer; |
5638 | size_t string_size; |
5639 | |
5640 | user_addr_t string_area; /* *argv[], *env[] */ |
5641 | user_addr_t ptr_area; /* argv[], env[], applev[] */ |
5642 | user_addr_t argc_area; /* argc */ |
5643 | user_addr_t stack; |
5644 | int error; |
5645 | |
5646 | unsigned i; |
5647 | struct copyout_desc { |
5648 | char *start_string; |
5649 | int count; |
5650 | #if CONFIG_DTRACE |
5651 | user_addr_t *dtrace_cookie; |
5652 | #endif |
5653 | boolean_t null_term; |
5654 | } descriptors[] = { |
5655 | { |
5656 | .start_string = imgp->ip_startargv, |
5657 | .count = imgp->ip_argc, |
5658 | #if CONFIG_DTRACE |
5659 | .dtrace_cookie = &p->p_dtrace_argv, |
5660 | #endif |
5661 | .null_term = TRUE |
5662 | }, |
5663 | { |
5664 | .start_string = imgp->ip_endargv, |
5665 | .count = imgp->ip_envc, |
5666 | #if CONFIG_DTRACE |
5667 | .dtrace_cookie = &p->p_dtrace_envp, |
5668 | #endif |
5669 | .null_term = TRUE |
5670 | }, |
5671 | { |
5672 | .start_string = imgp->ip_strings, |
5673 | .count = 1, |
5674 | #if CONFIG_DTRACE |
5675 | .dtrace_cookie = NULL, |
5676 | #endif |
5677 | .null_term = FALSE |
5678 | }, |
5679 | { |
5680 | .start_string = imgp->ip_endenvv, |
5681 | .count = imgp->ip_applec - 1, /* exec_path handled above */ |
5682 | #if CONFIG_DTRACE |
5683 | .dtrace_cookie = NULL, |
5684 | #endif |
5685 | .null_term = TRUE |
5686 | } |
5687 | }; |
5688 | |
5689 | stack = *stackp; |
5690 | |
5691 | /* |
5692 | * All previous contributors to the string area |
5693 | * should have aligned their sub-area |
5694 | */ |
5695 | if (imgp->ip_strspace % ptr_size != 0) { |
5696 | error = EINVAL; |
5697 | goto bad; |
5698 | } |
5699 | |
5700 | /* Grow the stack down for the strings we've been building up */ |
5701 | string_size = imgp->ip_strendp - imgp->ip_strings; |
5702 | stack -= string_size; |
5703 | string_area = stack; |
5704 | |
5705 | /* |
5706 | * Need room for one pointer for each string, plus |
5707 | * one for the NULLs terminating the argv, envv, and apple areas. |
5708 | */ |
5709 | ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) * ptr_size; |
5710 | stack -= ptr_area_size; |
5711 | ptr_area = stack; |
5712 | |
5713 | /* We'll construct all the pointer arrays in our string buffer, |
5714 | * which we already know is aligned properly, and ip_argspace |
5715 | * was used to verify we have enough space. |
5716 | */ |
5717 | ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp; |
5718 | |
5719 | /* |
5720 | * Need room for pointer-aligned argc slot. |
5721 | */ |
5722 | stack -= ptr_size; |
5723 | argc_area = stack; |
5724 | |
5725 | /* |
5726 | * Record the size of the arguments area so that sysctl_procargs() |
5727 | * can return the argument area without having to parse the arguments. |
5728 | */ |
5729 | proc_lock(p); |
5730 | p->p_argc = imgp->ip_argc; |
5731 | p->p_argslen = (int)(*stackp - string_area); |
5732 | proc_unlock(p); |
5733 | |
5734 | /* Return the initial stack address: the location of argc */ |
5735 | *stackp = stack; |
5736 | |
5737 | /* |
5738 | * Copy out the entire strings area. |
5739 | */ |
5740 | error = copyout(imgp->ip_strings, string_area, |
5741 | string_size); |
5742 | if (error) { |
5743 | goto bad; |
5744 | } |
5745 | |
5746 | for (i = 0; i < sizeof(descriptors) / sizeof(descriptors[0]); i++) { |
5747 | char *cur_string = descriptors[i].start_string; |
5748 | int j; |
5749 | |
5750 | #if CONFIG_DTRACE |
5751 | if (descriptors[i].dtrace_cookie) { |
5752 | proc_lock(p); |
5753 | *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */ |
5754 | proc_unlock(p); |
5755 | } |
5756 | #endif /* CONFIG_DTRACE */ |
5757 | |
5758 | /* |
5759 | * For each segment (argv, envv, applev), copy as many pointers as requested |
5760 | * to our pointer buffer. |
5761 | */ |
5762 | for (j = 0; j < descriptors[i].count; j++) { |
5763 | user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings); |
5764 | |
5765 | /* Copy out the pointer to the current string. Alignment has been verified */ |
5766 | if (ptr_size == 8) { |
5767 | *(uint64_t *)ptr_buffer = (uint64_t)cur_address; |
5768 | } else { |
5769 | *(uint32_t *)ptr_buffer = (uint32_t)cur_address; |
5770 | } |
5771 | |
5772 | ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size); |
5773 | cur_string += strlen(s: cur_string) + 1; /* Only a NUL between strings in the same area */ |
5774 | } |
5775 | |
5776 | if (descriptors[i].null_term) { |
5777 | if (ptr_size == 8) { |
5778 | *(uint64_t *)ptr_buffer = 0ULL; |
5779 | } else { |
5780 | *(uint32_t *)ptr_buffer = 0; |
5781 | } |
5782 | |
5783 | ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size); |
5784 | } |
5785 | } |
5786 | |
5787 | /* |
5788 | * Copy out all our pointer arrays in bulk. |
5789 | */ |
5790 | error = copyout(ptr_buffer_start, ptr_area, |
5791 | ptr_area_size); |
5792 | if (error) { |
5793 | goto bad; |
5794 | } |
5795 | |
5796 | /* argc (int32, stored in a ptr_size area) */ |
5797 | error = copyoutptr(ua: (user_addr_t)imgp->ip_argc, ptr: argc_area, ptr_size); |
5798 | if (error) { |
5799 | goto bad; |
5800 | } |
5801 | |
5802 | bad: |
5803 | return error; |
5804 | } |
5805 | |
5806 | |
5807 | /* |
5808 | * exec_extract_strings |
5809 | * |
5810 | * Copy arguments and environment from user space into work area; we may |
5811 | * have already copied some early arguments into the work area, and if |
5812 | * so, any arguments opied in are appended to those already there. |
5813 | * This function is the primary manipulator of ip_argspace, since |
5814 | * these are the arguments the client of execve(2) knows about. After |
5815 | * each argv[]/envv[] string is copied, we charge the string length |
5816 | * and argv[]/envv[] pointer slot to ip_argspace, so that we can |
5817 | * full preflight the arg list size. |
5818 | * |
5819 | * Parameters: struct image_params * the image parameter block |
5820 | * |
5821 | * Returns: 0 Success |
5822 | * !0 Failure: errno |
5823 | * |
5824 | * Implicit returns; |
5825 | * (imgp->ip_argc) Count of arguments, updated |
5826 | * (imgp->ip_envc) Count of environment strings, updated |
5827 | * (imgp->ip_argspace) Count of remaining of NCARGS |
5828 | * (imgp->ip_interp_buffer) Interpreter and args (mutated in place) |
5829 | * |
5830 | * |
5831 | * Note: The argument and environment vectors are user space pointers |
5832 | * to arrays of user space pointers. |
5833 | */ |
5834 | __attribute__((noinline)) |
5835 | static int |
5836 | (struct image_params *imgp) |
5837 | { |
5838 | int error = 0; |
5839 | int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT_ADDR) ? 8 : 4; |
5840 | int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4; |
5841 | user_addr_t argv = imgp->ip_user_argv; |
5842 | user_addr_t envv = imgp->ip_user_envv; |
5843 | |
5844 | /* |
5845 | * Adjust space reserved for the path name by however much padding it |
5846 | * needs. Doing this here since we didn't know if this would be a 32- |
5847 | * or 64-bit process back in exec_save_path. |
5848 | */ |
5849 | while (imgp->ip_strspace % new_ptr_size != 0) { |
5850 | *imgp->ip_strendp++ = '\0'; |
5851 | imgp->ip_strspace--; |
5852 | /* imgp->ip_argspace--; not counted towards exec args total */ |
5853 | } |
5854 | |
5855 | /* |
5856 | * From now on, we start attributing string space to ip_argspace |
5857 | */ |
5858 | imgp->ip_startargv = imgp->ip_strendp; |
5859 | imgp->ip_argc = 0; |
5860 | |
5861 | if ((imgp->ip_flags & IMGPF_INTERPRET) != 0) { |
5862 | user_addr_t arg; |
5863 | char *argstart, *ch; |
5864 | |
5865 | /* First, the arguments in the "#!" string are tokenized and extracted. */ |
5866 | argstart = imgp->ip_interp_buffer; |
5867 | while (argstart) { |
5868 | ch = argstart; |
5869 | while (*ch && !IS_WHITESPACE(*ch)) { |
5870 | ch++; |
5871 | } |
5872 | |
5873 | if (*ch == '\0') { |
5874 | /* last argument, no need to NUL-terminate */ |
5875 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), seg: UIO_SYSSPACE, TRUE); |
5876 | argstart = NULL; |
5877 | } else { |
5878 | /* NUL-terminate */ |
5879 | *ch = '\0'; |
5880 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), seg: UIO_SYSSPACE, TRUE); |
5881 | |
5882 | /* |
5883 | * Find the next string. We know spaces at the end of the string have already |
5884 | * been stripped. |
5885 | */ |
5886 | argstart = ch + 1; |
5887 | while (IS_WHITESPACE(*argstart)) { |
5888 | argstart++; |
5889 | } |
5890 | } |
5891 | |
5892 | /* Error-check, regardless of whether this is the last interpreter arg or not */ |
5893 | if (error) { |
5894 | goto bad; |
5895 | } |
5896 | if (imgp->ip_argspace < new_ptr_size) { |
5897 | error = E2BIG; |
5898 | goto bad; |
5899 | } |
5900 | imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */ |
5901 | imgp->ip_argc++; |
5902 | } |
5903 | |
5904 | if (argv != 0LL) { |
5905 | /* |
5906 | * If we are running an interpreter, replace the av[0] that was |
5907 | * passed to execve() with the path name that was |
5908 | * passed to execve() for interpreters which do not use the PATH |
5909 | * to locate their script arguments. |
5910 | */ |
5911 | error = copyinptr(froma: argv, toptr: &arg, ptr_size); |
5912 | if (error) { |
5913 | goto bad; |
5914 | } |
5915 | if (arg != 0LL) { |
5916 | argv += ptr_size; /* consume without using */ |
5917 | } |
5918 | } |
5919 | |
5920 | if (imgp->ip_interp_sugid_fd != -1) { |
5921 | char temp[19]; /* "/dev/fd/" + 10 digits + NUL */ |
5922 | snprintf(temp, count: sizeof(temp), "/dev/fd/%d" , imgp->ip_interp_sugid_fd); |
5923 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), seg: UIO_SYSSPACE, TRUE); |
5924 | } else { |
5925 | error = exec_add_user_string(imgp, str: imgp->ip_user_fname, seg: imgp->ip_seg, TRUE); |
5926 | } |
5927 | |
5928 | if (error) { |
5929 | goto bad; |
5930 | } |
5931 | if (imgp->ip_argspace < new_ptr_size) { |
5932 | error = E2BIG; |
5933 | goto bad; |
5934 | } |
5935 | imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */ |
5936 | imgp->ip_argc++; |
5937 | } |
5938 | |
5939 | while (argv != 0LL) { |
5940 | user_addr_t arg; |
5941 | |
5942 | error = copyinptr(froma: argv, toptr: &arg, ptr_size); |
5943 | if (error) { |
5944 | goto bad; |
5945 | } |
5946 | |
5947 | if (arg == 0LL) { |
5948 | break; |
5949 | } |
5950 | |
5951 | argv += ptr_size; |
5952 | |
5953 | /* |
5954 | * av[n...] = arg[n] |
5955 | */ |
5956 | error = exec_add_user_string(imgp, str: arg, seg: imgp->ip_seg, TRUE); |
5957 | if (error) { |
5958 | goto bad; |
5959 | } |
5960 | if (imgp->ip_argspace < new_ptr_size) { |
5961 | error = E2BIG; |
5962 | goto bad; |
5963 | } |
5964 | imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */ |
5965 | imgp->ip_argc++; |
5966 | } |
5967 | |
5968 | /* Save space for argv[] NULL terminator */ |
5969 | if (imgp->ip_argspace < new_ptr_size) { |
5970 | error = E2BIG; |
5971 | goto bad; |
5972 | } |
5973 | imgp->ip_argspace -= new_ptr_size; |
5974 | |
5975 | /* Note where the args ends and env begins. */ |
5976 | imgp->ip_endargv = imgp->ip_strendp; |
5977 | imgp->ip_envc = 0; |
5978 | |
5979 | /* Now, get the environment */ |
5980 | while (envv != 0LL) { |
5981 | user_addr_t env; |
5982 | |
5983 | error = copyinptr(froma: envv, toptr: &env, ptr_size); |
5984 | if (error) { |
5985 | goto bad; |
5986 | } |
5987 | |
5988 | envv += ptr_size; |
5989 | if (env == 0LL) { |
5990 | break; |
5991 | } |
5992 | /* |
5993 | * av[n...] = env[n] |
5994 | */ |
5995 | error = exec_add_user_string(imgp, str: env, seg: imgp->ip_seg, TRUE); |
5996 | if (error) { |
5997 | goto bad; |
5998 | } |
5999 | if (imgp->ip_argspace < new_ptr_size) { |
6000 | error = E2BIG; |
6001 | goto bad; |
6002 | } |
6003 | imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */ |
6004 | imgp->ip_envc++; |
6005 | } |
6006 | |
6007 | /* Save space for envv[] NULL terminator */ |
6008 | if (imgp->ip_argspace < new_ptr_size) { |
6009 | error = E2BIG; |
6010 | goto bad; |
6011 | } |
6012 | imgp->ip_argspace -= new_ptr_size; |
6013 | |
6014 | /* Align the tail of the combined argv+envv area */ |
6015 | while (imgp->ip_strspace % new_ptr_size != 0) { |
6016 | if (imgp->ip_argspace < 1) { |
6017 | error = E2BIG; |
6018 | goto bad; |
6019 | } |
6020 | *imgp->ip_strendp++ = '\0'; |
6021 | imgp->ip_strspace--; |
6022 | imgp->ip_argspace--; |
6023 | } |
6024 | |
6025 | /* Note where the envv ends and applev begins. */ |
6026 | imgp->ip_endenvv = imgp->ip_strendp; |
6027 | |
6028 | /* |
6029 | * From now on, we are no longer charging argument |
6030 | * space to ip_argspace. |
6031 | */ |
6032 | |
6033 | bad: |
6034 | return error; |
6035 | } |
6036 | |
6037 | /* |
6038 | * Libc has an 8-element array set up for stack guard values. It only fills |
6039 | * in one of those entries, and both gcc and llvm seem to use only a single |
6040 | * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't |
6041 | * do the work to construct them. |
6042 | */ |
6043 | #define GUARD_VALUES 1 |
6044 | #define GUARD_KEY "stack_guard=" |
6045 | |
6046 | /* |
6047 | * System malloc needs some entropy when it is initialized. |
6048 | */ |
6049 | #define ENTROPY_VALUES 2 |
6050 | #define ENTROPY_KEY "malloc_entropy=" |
6051 | |
6052 | /* |
6053 | * libplatform needs a random pointer-obfuscation value when it is initialized. |
6054 | */ |
6055 | #define PTR_MUNGE_VALUES 1 |
6056 | #define PTR_MUNGE_KEY "ptr_munge=" |
6057 | |
6058 | /* |
6059 | * System malloc engages nanozone for UIAPP. |
6060 | */ |
6061 | #define NANO_ENGAGE_KEY "MallocNanoZone=1" |
6062 | |
6063 | /* |
6064 | * Used to pass experiment flags up to libmalloc. |
6065 | */ |
6066 | #define LIBMALLOC_EXPERIMENT_FACTORS_KEY "MallocExperiment=" |
6067 | |
6068 | /* |
6069 | * Passes information about hardened runtime entitlements to libsystem/libmalloc |
6070 | */ |
6071 | #define HARDENED_RUNTIME_KEY "HardenedRuntime=" |
6072 | |
6073 | #define PFZ_KEY "pfz=" |
6074 | extern user32_addr_t commpage_text32_location; |
6075 | extern user64_addr_t commpage_text64_location; |
6076 | |
6077 | extern uuid_string_t bootsessionuuid_string; |
6078 | static TUNABLE(uint32_t, exe_boothash_salt, "exe_boothash_salt" , 0); |
6079 | |
6080 | __startup_func |
6081 | static void |
6082 | exe_boothash_salt_generate(void) |
6083 | { |
6084 | if (!PE_parse_boot_argn(arg_string: "exe_boothash_salt" , NULL, max_arg: 0)) { |
6085 | read_random(buffer: &exe_boothash_salt, numBytes: sizeof(exe_boothash_salt)); |
6086 | } |
6087 | } |
6088 | STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, exe_boothash_salt_generate); |
6089 | |
6090 | |
6091 | #define MAIN_STACK_VALUES 4 |
6092 | #define MAIN_STACK_KEY "main_stack=" |
6093 | |
6094 | #define FSID_KEY "executable_file=" |
6095 | #define DYLD_FSID_KEY "dyld_file=" |
6096 | #define CDHASH_KEY "executable_cdhash=" |
6097 | #define DYLD_FLAGS_KEY "dyld_flags=" |
6098 | #define SUBSYSTEM_ROOT_PATH_KEY "subsystem_root_path=" |
6099 | #define APP_BOOT_SESSION_KEY "executable_boothash=" |
6100 | #if __has_feature(ptrauth_calls) |
6101 | #define PTRAUTH_DISABLED_FLAG "ptrauth_disabled=1" |
6102 | #define DYLD_ARM64E_ABI_KEY "arm64e_abi=" |
6103 | #endif /* __has_feature(ptrauth_calls) */ |
6104 | #define MAIN_TH_PORT_KEY "th_port=" |
6105 | |
6106 | #define FSID_MAX_STRING "0x1234567890abcdef,0x1234567890abcdef" |
6107 | |
6108 | #define HEX_STR_LEN 18 // 64-bit hex value "0x0123456701234567" |
6109 | #define HEX_STR_LEN32 10 // 32-bit hex value "0x01234567" |
6110 | |
6111 | #if XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES |
6112 | #define VM_FORCE_4K_PAGES_KEY "vm_force_4k_pages=1" |
6113 | #endif /* XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES */ |
6114 | |
6115 | static int |
6116 | exec_add_entropy_key(struct image_params *imgp, |
6117 | const char *key, |
6118 | int values, |
6119 | boolean_t embedNUL) |
6120 | { |
6121 | const int limit = 8; |
6122 | uint64_t entropy[limit]; |
6123 | char str[strlen(s: key) + (HEX_STR_LEN + 1) * limit + 1]; |
6124 | if (values > limit) { |
6125 | values = limit; |
6126 | } |
6127 | |
6128 | read_random(buffer: entropy, numBytes: sizeof(entropy[0]) * values); |
6129 | |
6130 | if (embedNUL) { |
6131 | entropy[0] &= ~(0xffull << 8); |
6132 | } |
6133 | |
6134 | int len = scnprintf(str, count: sizeof(str), "%s0x%llx" , key, entropy[0]); |
6135 | size_t remaining = sizeof(str) - len; |
6136 | for (int i = 1; i < values && remaining > 0; ++i) { |
6137 | size_t start = sizeof(str) - remaining; |
6138 | len = scnprintf(&str[start], count: remaining, ",0x%llx" , entropy[i]); |
6139 | remaining -= len; |
6140 | } |
6141 | |
6142 | return exec_add_user_string(imgp, CAST_USER_ADDR_T(str), seg: UIO_SYSSPACE, FALSE); |
6143 | } |
6144 | |
6145 | /* |
6146 | * Build up the contents of the apple[] string vector |
6147 | */ |
6148 | #if (DEVELOPMENT || DEBUG) |
6149 | extern uint64_t dyld_flags; |
6150 | #endif |
6151 | |
6152 | #if __has_feature(ptrauth_calls) |
6153 | static inline bool |
6154 | is_arm64e_running_as_arm64(const struct image_params *imgp) |
6155 | { |
6156 | return (imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E && |
6157 | (imgp->ip_flags & IMGPF_NOJOP); |
6158 | } |
6159 | #endif /* __has_feature(ptrauth_calls) */ |
6160 | |
6161 | _Atomic uint64_t libmalloc_experiment_factors = 0; |
6162 | |
6163 | static int |
6164 | exec_add_apple_strings(struct image_params *imgp, |
6165 | const load_result_t *load_result) |
6166 | { |
6167 | int error; |
6168 | int img_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4; |
6169 | thread_t new_thread; |
6170 | ipc_port_t sright; |
6171 | uint64_t local_experiment_factors = 0; |
6172 | |
6173 | /* exec_save_path stored the first string */ |
6174 | imgp->ip_applec = 1; |
6175 | |
6176 | /* adding the pfz string */ |
6177 | { |
6178 | char pfz_string[strlen(PFZ_KEY) + HEX_STR_LEN + 1]; |
6179 | |
6180 | if (img_ptr_size == 8) { |
6181 | __assert_only size_t ret = snprintf(pfz_string, count: sizeof(pfz_string), PFZ_KEY "0x%llx" , commpage_text64_location); |
6182 | assert(ret < sizeof(pfz_string)); |
6183 | } else { |
6184 | snprintf(pfz_string, count: sizeof(pfz_string), PFZ_KEY "0x%x" , commpage_text32_location); |
6185 | } |
6186 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string), seg: UIO_SYSSPACE, FALSE); |
6187 | if (error) { |
6188 | printf("Failed to add the pfz string with error %d\n" , error); |
6189 | goto bad; |
6190 | } |
6191 | imgp->ip_applec++; |
6192 | } |
6193 | |
6194 | /* adding the NANO_ENGAGE_KEY key */ |
6195 | if (imgp->ip_px_sa) { |
6196 | struct _posix_spawnattr* psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
6197 | int proc_flags = psa->psa_flags; |
6198 | |
6199 | if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) { |
6200 | const char *nano_string = NANO_ENGAGE_KEY; |
6201 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(nano_string), seg: UIO_SYSSPACE, FALSE); |
6202 | if (error) { |
6203 | goto bad; |
6204 | } |
6205 | imgp->ip_applec++; |
6206 | } |
6207 | } |
6208 | |
6209 | /* |
6210 | * Supply libc with a collection of random values to use when |
6211 | * implementing -fstack-protector. |
6212 | * |
6213 | * (The first random string always contains an embedded NUL so that |
6214 | * __stack_chk_guard also protects against C string vulnerabilities) |
6215 | */ |
6216 | error = exec_add_entropy_key(imgp, GUARD_KEY, GUARD_VALUES, TRUE); |
6217 | if (error) { |
6218 | goto bad; |
6219 | } |
6220 | imgp->ip_applec++; |
6221 | |
6222 | /* |
6223 | * Supply libc with entropy for system malloc. |
6224 | */ |
6225 | error = exec_add_entropy_key(imgp, ENTROPY_KEY, ENTROPY_VALUES, FALSE); |
6226 | if (error) { |
6227 | goto bad; |
6228 | } |
6229 | imgp->ip_applec++; |
6230 | |
6231 | /* |
6232 | * Supply libpthread & libplatform with a random value to use for pointer |
6233 | * obfuscation. |
6234 | */ |
6235 | error = exec_add_entropy_key(imgp, PTR_MUNGE_KEY, PTR_MUNGE_VALUES, FALSE); |
6236 | if (error) { |
6237 | goto bad; |
6238 | } |
6239 | imgp->ip_applec++; |
6240 | |
6241 | /* |
6242 | * Add MAIN_STACK_KEY: Supplies the address and size of the main thread's |
6243 | * stack if it was allocated by the kernel. |
6244 | * |
6245 | * The guard page is not included in this stack size as libpthread |
6246 | * expects to add it back in after receiving this value. |
6247 | */ |
6248 | if (load_result->unixproc) { |
6249 | char stack_string[strlen(MAIN_STACK_KEY) + (HEX_STR_LEN + 1) * MAIN_STACK_VALUES + 1]; |
6250 | snprintf(stack_string, count: sizeof(stack_string), |
6251 | MAIN_STACK_KEY "0x%llx,0x%llx,0x%llx,0x%llx" , |
6252 | (uint64_t)load_result->user_stack, |
6253 | (uint64_t)load_result->user_stack_size, |
6254 | (uint64_t)load_result->user_stack_alloc, |
6255 | (uint64_t)load_result->user_stack_alloc_size); |
6256 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(stack_string), seg: UIO_SYSSPACE, FALSE); |
6257 | if (error) { |
6258 | goto bad; |
6259 | } |
6260 | imgp->ip_applec++; |
6261 | } |
6262 | |
6263 | if (imgp->ip_vattr) { |
6264 | uint64_t fsid = vnode_get_va_fsid(vap: imgp->ip_vattr); |
6265 | uint64_t fsobjid = imgp->ip_vattr->va_fileid; |
6266 | |
6267 | char fsid_string[strlen(FSID_KEY) + strlen(FSID_MAX_STRING) + 1]; |
6268 | snprintf(fsid_string, count: sizeof(fsid_string), |
6269 | FSID_KEY "0x%llx,0x%llx" , fsid, fsobjid); |
6270 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), seg: UIO_SYSSPACE, FALSE); |
6271 | if (error) { |
6272 | goto bad; |
6273 | } |
6274 | imgp->ip_applec++; |
6275 | } |
6276 | |
6277 | if (imgp->ip_dyld_fsid || imgp->ip_dyld_fsobjid) { |
6278 | char fsid_string[strlen(DYLD_FSID_KEY) + strlen(FSID_MAX_STRING) + 1]; |
6279 | snprintf(fsid_string, count: sizeof(fsid_string), |
6280 | DYLD_FSID_KEY "0x%llx,0x%llx" , imgp->ip_dyld_fsid, imgp->ip_dyld_fsobjid); |
6281 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), seg: UIO_SYSSPACE, FALSE); |
6282 | if (error) { |
6283 | goto bad; |
6284 | } |
6285 | imgp->ip_applec++; |
6286 | } |
6287 | |
6288 | uint8_t cdhash[SHA1_RESULTLEN]; |
6289 | int cdhash_errror = ubc_cs_getcdhash(imgp->ip_vp, imgp->ip_arch_offset, cdhash); |
6290 | if (cdhash_errror == 0) { |
6291 | char hash_string[strlen(CDHASH_KEY) + 2 * SHA1_RESULTLEN + 1]; |
6292 | strncpy(hash_string, CDHASH_KEY, sizeof(hash_string)); |
6293 | char *p = hash_string + sizeof(CDHASH_KEY) - 1; |
6294 | for (int i = 0; i < SHA1_RESULTLEN; i++) { |
6295 | snprintf(p, count: 3, "%02x" , (int) cdhash[i]); |
6296 | p += 2; |
6297 | } |
6298 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(hash_string), seg: UIO_SYSSPACE, FALSE); |
6299 | if (error) { |
6300 | goto bad; |
6301 | } |
6302 | imgp->ip_applec++; |
6303 | |
6304 | /* hash together cd-hash and boot-session-uuid */ |
6305 | uint8_t sha_digest[SHA256_DIGEST_LENGTH]; |
6306 | SHA256_CTX sha_ctx; |
6307 | SHA256_Init(ctx: &sha_ctx); |
6308 | SHA256_Update(ctx: &sha_ctx, data: &exe_boothash_salt, len: sizeof(exe_boothash_salt)); |
6309 | SHA256_Update(ctx: &sha_ctx, data: bootsessionuuid_string, len: sizeof(bootsessionuuid_string)); |
6310 | SHA256_Update(ctx: &sha_ctx, data: cdhash, len: sizeof(cdhash)); |
6311 | SHA256_Final(digest: sha_digest, ctx: &sha_ctx); |
6312 | char app_boot_string[strlen(APP_BOOT_SESSION_KEY) + 2 * SHA1_RESULTLEN + 1]; |
6313 | strncpy(app_boot_string, APP_BOOT_SESSION_KEY, sizeof(app_boot_string)); |
6314 | char *s = app_boot_string + sizeof(APP_BOOT_SESSION_KEY) - 1; |
6315 | for (int i = 0; i < SHA1_RESULTLEN; i++) { |
6316 | snprintf(s, count: 3, "%02x" , (int) sha_digest[i]); |
6317 | s += 2; |
6318 | } |
6319 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(app_boot_string), seg: UIO_SYSSPACE, FALSE); |
6320 | if (error) { |
6321 | goto bad; |
6322 | } |
6323 | imgp->ip_applec++; |
6324 | } |
6325 | #if (DEVELOPMENT || DEBUG) |
6326 | if (dyld_flags) { |
6327 | char dyld_flags_string[strlen(DYLD_FLAGS_KEY) + HEX_STR_LEN + 1]; |
6328 | snprintf(dyld_flags_string, sizeof(dyld_flags_string), DYLD_FLAGS_KEY "0x%llx" , dyld_flags); |
6329 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(dyld_flags_string), UIO_SYSSPACE, FALSE); |
6330 | if (error) { |
6331 | goto bad; |
6332 | } |
6333 | imgp->ip_applec++; |
6334 | } |
6335 | #endif |
6336 | if (imgp->ip_subsystem_root_path) { |
6337 | size_t buffer_len = MAXPATHLEN + strlen(SUBSYSTEM_ROOT_PATH_KEY); |
6338 | char subsystem_root_path_string[buffer_len]; |
6339 | int required_len = snprintf(subsystem_root_path_string, count: buffer_len, SUBSYSTEM_ROOT_PATH_KEY "%s" , imgp->ip_subsystem_root_path); |
6340 | |
6341 | if (((size_t)required_len >= buffer_len) || (required_len < 0)) { |
6342 | error = ENAMETOOLONG; |
6343 | goto bad; |
6344 | } |
6345 | |
6346 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(subsystem_root_path_string), seg: UIO_SYSSPACE, FALSE); |
6347 | if (error) { |
6348 | goto bad; |
6349 | } |
6350 | |
6351 | imgp->ip_applec++; |
6352 | } |
6353 | #if __has_feature(ptrauth_calls) |
6354 | if (is_arm64e_running_as_arm64(imgp)) { |
6355 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(PTRAUTH_DISABLED_FLAG), UIO_SYSSPACE, FALSE); |
6356 | if (error) { |
6357 | goto bad; |
6358 | } |
6359 | |
6360 | imgp->ip_applec++; |
6361 | } |
6362 | #endif /* __has_feature(ptrauth_calls) */ |
6363 | |
6364 | |
6365 | #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) |
6366 | { |
6367 | char dyld_abi_string[strlen(DYLD_ARM64E_ABI_KEY) + 8]; |
6368 | strlcpy(dyld_abi_string, DYLD_ARM64E_ABI_KEY, sizeof(dyld_abi_string)); |
6369 | bool allowAll = bootarg_arm64e_preview_abi; |
6370 | strlcat(dyld_abi_string, (allowAll ? "all" : "os" ), sizeof(dyld_abi_string)); |
6371 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(dyld_abi_string), UIO_SYSSPACE, FALSE); |
6372 | if (error) { |
6373 | goto bad; |
6374 | } |
6375 | |
6376 | imgp->ip_applec++; |
6377 | } |
6378 | #endif |
6379 | /* |
6380 | * Add main thread mach port name |
6381 | * +1 uref on main thread port, this ref will be extracted by libpthread in __pthread_init |
6382 | * and consumed in _bsdthread_terminate. Leaking the main thread port name if not linked |
6383 | * against libpthread. |
6384 | */ |
6385 | if ((new_thread = imgp->ip_new_thread) != THREAD_NULL) { |
6386 | thread_reference(thread: new_thread); |
6387 | sright = convert_thread_to_port_pinned(new_thread); |
6388 | task_t new_task = get_threadtask(new_thread); |
6389 | mach_port_name_t name = ipc_port_copyout_send(sright, space: get_task_ipcspace(t: new_task)); |
6390 | char port_name_hex_str[strlen(MAIN_TH_PORT_KEY) + HEX_STR_LEN32 + 1]; |
6391 | snprintf(port_name_hex_str, count: sizeof(port_name_hex_str), MAIN_TH_PORT_KEY "0x%x" , name); |
6392 | |
6393 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(port_name_hex_str), seg: UIO_SYSSPACE, FALSE); |
6394 | if (error) { |
6395 | goto bad; |
6396 | } |
6397 | imgp->ip_applec++; |
6398 | } |
6399 | |
6400 | #if XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES |
6401 | if (imgp->ip_px_sa != NULL) { |
6402 | struct _posix_spawnattr* psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
6403 | if (psa->psa_flags & _POSIX_SPAWN_FORCE_4K_PAGES) { |
6404 | const char *vm_force_4k_string = VM_FORCE_4K_PAGES_KEY; |
6405 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(vm_force_4k_string), UIO_SYSSPACE, FALSE); |
6406 | if (error) { |
6407 | goto bad; |
6408 | } |
6409 | imgp->ip_applec++; |
6410 | } |
6411 | } |
6412 | #endif /* XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES */ |
6413 | |
6414 | /* adding the libmalloc experiment string */ |
6415 | local_experiment_factors = os_atomic_load_wide(&libmalloc_experiment_factors, relaxed); |
6416 | if (__improbable(local_experiment_factors != 0)) { |
6417 | char libmalloc_experiment_factors_string[strlen(LIBMALLOC_EXPERIMENT_FACTORS_KEY) + HEX_STR_LEN + 1]; |
6418 | |
6419 | snprintf( |
6420 | libmalloc_experiment_factors_string, |
6421 | count: sizeof(libmalloc_experiment_factors_string), |
6422 | LIBMALLOC_EXPERIMENT_FACTORS_KEY "0x%llx" , |
6423 | local_experiment_factors); |
6424 | error = exec_add_user_string( |
6425 | imgp, |
6426 | CAST_USER_ADDR_T(libmalloc_experiment_factors_string), |
6427 | seg: UIO_SYSSPACE, |
6428 | FALSE); |
6429 | if (error) { |
6430 | printf("Failed to add the libmalloc experiment factors string with error %d\n" , error); |
6431 | goto bad; |
6432 | } |
6433 | imgp->ip_applec++; |
6434 | } |
6435 | |
6436 | |
6437 | /* tell dyld that it can leverage hardware for its read-only/read-write trusted path */ |
6438 | if (imgp->ip_flags & IMGPF_HW_TPRO) { |
6439 | const char *dyld_hw_tpro = "dyld_hw_tpro=1" ; |
6440 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(dyld_hw_tpro), seg: UIO_SYSSPACE, FALSE); |
6441 | if (error) { |
6442 | printf("Failed to add dyld hw tpro setting with error %d\n" , error); |
6443 | goto bad; |
6444 | } |
6445 | |
6446 | imgp->ip_applec++; |
6447 | |
6448 | } |
6449 | |
6450 | if (load_result->hardened_runtime_binary) { |
6451 | const size_t HR_STRING_SIZE = sizeof(HARDENED_RUNTIME_KEY) + HR_FLAGS_NUM_NIBBLES + 2 + 1; |
6452 | char hardened_runtime[HR_STRING_SIZE]; |
6453 | snprintf(hardened_runtime, count: HR_STRING_SIZE, HARDENED_RUNTIME_KEY"0x%x" , load_result->hardened_runtime_binary); |
6454 | error = exec_add_user_string(imgp, CAST_USER_ADDR_T(hardened_runtime), seg: UIO_SYSSPACE, FALSE); |
6455 | if (error) { |
6456 | printf("Failed to add hardened runtime flag with error %d\n" , error); |
6457 | goto bad; |
6458 | } |
6459 | imgp->ip_applec++; |
6460 | } |
6461 | /* Align the tail of the combined applev area */ |
6462 | while (imgp->ip_strspace % img_ptr_size != 0) { |
6463 | *imgp->ip_strendp++ = '\0'; |
6464 | imgp->ip_strspace--; |
6465 | } |
6466 | |
6467 | bad: |
6468 | return error; |
6469 | } |
6470 | |
6471 | /* |
6472 | * exec_check_permissions |
6473 | * |
6474 | * Description: Verify that the file that is being attempted to be executed |
6475 | * is in fact allowed to be executed based on it POSIX file |
6476 | * permissions and other access control criteria |
6477 | * |
6478 | * Parameters: struct image_params * the image parameter block |
6479 | * |
6480 | * Returns: 0 Success |
6481 | * EACCES Permission denied |
6482 | * ENOEXEC Executable file format error |
6483 | * ETXTBSY Text file busy [misuse of error code] |
6484 | * vnode_getattr:??? |
6485 | * vnode_authorize:??? |
6486 | */ |
6487 | static int |
6488 | exec_check_permissions(struct image_params *imgp) |
6489 | { |
6490 | struct vnode *vp = imgp->ip_vp; |
6491 | struct vnode_attr *vap = imgp->ip_vattr; |
6492 | proc_t p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
6493 | int error; |
6494 | kauth_action_t action; |
6495 | |
6496 | /* Only allow execution of regular files */ |
6497 | if (!vnode_isreg(vp)) { |
6498 | return EACCES; |
6499 | } |
6500 | |
6501 | /* Get the file attributes that we will be using here and elsewhere */ |
6502 | VATTR_INIT(vap); |
6503 | VATTR_WANTED(vap, va_uid); |
6504 | VATTR_WANTED(vap, va_gid); |
6505 | VATTR_WANTED(vap, va_mode); |
6506 | VATTR_WANTED(vap, va_fsid); |
6507 | VATTR_WANTED(vap, va_fsid64); |
6508 | VATTR_WANTED(vap, va_fileid); |
6509 | VATTR_WANTED(vap, va_data_size); |
6510 | if ((error = vnode_getattr(vp, vap, ctx: imgp->ip_vfs_context)) != 0) { |
6511 | return error; |
6512 | } |
6513 | |
6514 | /* |
6515 | * Ensure that at least one execute bit is on - otherwise root |
6516 | * will always succeed, and we don't want to happen unless the |
6517 | * file really is executable. |
6518 | */ |
6519 | if (!vfs_authopaque(mp: vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) { |
6520 | return EACCES; |
6521 | } |
6522 | |
6523 | /* Disallow zero length files */ |
6524 | if (vap->va_data_size == 0) { |
6525 | return ENOEXEC; |
6526 | } |
6527 | |
6528 | imgp->ip_arch_offset = (user_size_t)0; |
6529 | #if __LP64__ |
6530 | imgp->ip_arch_size = vap->va_data_size; |
6531 | #else |
6532 | if (vap->va_data_size > UINT32_MAX) { |
6533 | return ENOEXEC; |
6534 | } |
6535 | imgp->ip_arch_size = (user_size_t)vap->va_data_size; |
6536 | #endif |
6537 | |
6538 | /* Disable setuid-ness for traced programs or if MNT_NOSUID */ |
6539 | if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) { |
6540 | vap->va_mode &= ~(VSUID | VSGID); |
6541 | } |
6542 | |
6543 | /* |
6544 | * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR |
6545 | * flags for setuid/setgid binaries. |
6546 | */ |
6547 | if (vap->va_mode & (VSUID | VSGID)) { |
6548 | imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR); |
6549 | } |
6550 | |
6551 | #if CONFIG_MACF |
6552 | error = mac_vnode_check_exec(ctx: imgp->ip_vfs_context, vp, imgp); |
6553 | if (error) { |
6554 | return error; |
6555 | } |
6556 | #endif |
6557 | |
6558 | /* Check for execute permission */ |
6559 | action = KAUTH_VNODE_EXECUTE; |
6560 | /* Traced images must also be readable */ |
6561 | if (p->p_lflag & P_LTRACED) { |
6562 | action |= KAUTH_VNODE_READ_DATA; |
6563 | } |
6564 | if ((error = vnode_authorize(vp, NULL, action, ctx: imgp->ip_vfs_context)) != 0) { |
6565 | return error; |
6566 | } |
6567 | |
6568 | #if 0 |
6569 | /* Don't let it run if anyone had it open for writing */ |
6570 | vnode_lock(vp); |
6571 | if (vp->v_writecount) { |
6572 | panic("going to return ETXTBSY %x" , vp); |
6573 | vnode_unlock(vp); |
6574 | return ETXTBSY; |
6575 | } |
6576 | vnode_unlock(vp); |
6577 | #endif |
6578 | |
6579 | /* XXX May want to indicate to underlying FS that vnode is open */ |
6580 | |
6581 | return error; |
6582 | } |
6583 | |
6584 | |
6585 | /* |
6586 | * exec_handle_sugid |
6587 | * |
6588 | * Initially clear the P_SUGID in the process flags; if an SUGID process is |
6589 | * exec'ing a non-SUGID image, then this is the point of no return. |
6590 | * |
6591 | * If the image being activated is SUGID, then replace the credential with a |
6592 | * copy, disable tracing (unless the tracing process is root), reset the |
6593 | * mach task port to revoke it, set the P_SUGID bit, |
6594 | * |
6595 | * If the saved user and group ID will be changing, then make sure it happens |
6596 | * to a new credential, rather than a shared one. |
6597 | * |
6598 | * Set the security token (this is probably obsolete, given that the token |
6599 | * should not technically be separate from the credential itself). |
6600 | * |
6601 | * Parameters: struct image_params * the image parameter block |
6602 | * |
6603 | * Returns: void No failure indication |
6604 | * |
6605 | * Implicit returns: |
6606 | * <process credential> Potentially modified/replaced |
6607 | * <task port> Potentially revoked |
6608 | * <process flags> P_SUGID bit potentially modified |
6609 | * <security token> Potentially modified |
6610 | */ |
6611 | __attribute__((noinline)) |
6612 | static int |
6613 | exec_handle_sugid(struct image_params *imgp) |
6614 | { |
6615 | proc_t p = vfs_context_proc(ctx: imgp->ip_vfs_context); |
6616 | kauth_cred_t cred = vfs_context_ucred(ctx: imgp->ip_vfs_context); |
6617 | int i; |
6618 | int leave_sugid_clear = 0; |
6619 | int mac_reset_ipc = 0; |
6620 | int error = 0; |
6621 | #if CONFIG_MACF |
6622 | int mac_transition, disjoint_cred = 0; |
6623 | int label_update_return = 0; |
6624 | |
6625 | /* |
6626 | * Determine whether a call to update the MAC label will result in the |
6627 | * credential changing. |
6628 | * |
6629 | * Note: MAC policies which do not actually end up modifying |
6630 | * the label subsequently are strongly encouraged to |
6631 | * return 0 for this check, since a non-zero answer will |
6632 | * slow down the exec fast path for normal binaries. |
6633 | */ |
6634 | mac_transition = mac_cred_check_label_update_execve( |
6635 | ctx: imgp->ip_vfs_context, |
6636 | vp: imgp->ip_vp, |
6637 | offset: imgp->ip_arch_offset, |
6638 | scriptvp: imgp->ip_scriptvp, |
6639 | scriptvnodelabel: imgp->ip_scriptlabelp, |
6640 | execlabel: imgp->ip_execlabelp, |
6641 | proc: p, |
6642 | macextensions: &imgp->ip_px_smpx); |
6643 | #endif |
6644 | |
6645 | OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag); |
6646 | |
6647 | /* |
6648 | * Order of the following is important; group checks must go last, |
6649 | * as we use the success of the 'ismember' check combined with the |
6650 | * failure of the explicit match to indicate that we will be setting |
6651 | * the egid of the process even though the new process did not |
6652 | * require VSUID/VSGID bits in order for it to set the new group as |
6653 | * its egid. |
6654 | * |
6655 | * Note: Technically, by this we are implying a call to |
6656 | * setegid() in the new process, rather than implying |
6657 | * it used its VSGID bit to set the effective group, |
6658 | * even though there is no code in that process to make |
6659 | * such a call. |
6660 | */ |
6661 | if (((imgp->ip_origvattr->va_mode & VSUID) != 0 && |
6662 | kauth_cred_getuid(cred: cred) != imgp->ip_origvattr->va_uid) || |
6663 | ((imgp->ip_origvattr->va_mode & VSGID) != 0 && |
6664 | ((kauth_cred_ismember_gid(cred: cred, gid: imgp->ip_origvattr->va_gid, resultp: &leave_sugid_clear) || !leave_sugid_clear) || |
6665 | (kauth_cred_getgid(cred: cred) != imgp->ip_origvattr->va_gid)))) { |
6666 | #if CONFIG_MACF |
6667 | /* label for MAC transition and neither VSUID nor VSGID */ |
6668 | handle_mac_transition: |
6669 | #endif |
6670 | |
6671 | #if CONFIG_SETUID |
6672 | /* |
6673 | * Replace the credential with a copy of itself if euid or |
6674 | * egid change. |
6675 | * |
6676 | * Note: setuid binaries will automatically opt out of |
6677 | * group resolver participation as a side effect |
6678 | * of this operation. This is an intentional |
6679 | * part of the security model, which requires a |
6680 | * participating credential be established by |
6681 | * escalating privilege, setting up all other |
6682 | * aspects of the credential including whether |
6683 | * or not to participate in external group |
6684 | * membership resolution, then dropping their |
6685 | * effective privilege to that of the desired |
6686 | * final credential state. |
6687 | * |
6688 | * Modifications to p_ucred must be guarded using the |
6689 | * proc's ucred lock. This prevents others from accessing |
6690 | * a garbage credential. |
6691 | */ |
6692 | |
6693 | if (imgp->ip_origvattr->va_mode & VSUID) { |
6694 | kauth_cred_proc_update(p, action: PROC_SETTOKEN_NONE, |
6695 | fn: ^bool (kauth_cred_t parent __unused, kauth_cred_t model) { |
6696 | return kauth_cred_model_setresuid(model, |
6697 | KAUTH_UID_NONE, |
6698 | euid: imgp->ip_origvattr->va_uid, |
6699 | svuid: imgp->ip_origvattr->va_uid, |
6700 | KAUTH_UID_NONE); |
6701 | }); |
6702 | } |
6703 | |
6704 | if (imgp->ip_origvattr->va_mode & VSGID) { |
6705 | kauth_cred_proc_update(p, action: PROC_SETTOKEN_NONE, |
6706 | fn: ^bool (kauth_cred_t parent __unused, kauth_cred_t model) { |
6707 | return kauth_cred_model_setresgid(model, |
6708 | KAUTH_GID_NONE, |
6709 | egid: imgp->ip_origvattr->va_gid, |
6710 | svgid: imgp->ip_origvattr->va_gid); |
6711 | }); |
6712 | } |
6713 | #endif /* CONFIG_SETUID */ |
6714 | |
6715 | #if CONFIG_MACF |
6716 | /* |
6717 | * If a policy has indicated that it will transition the label, |
6718 | * before making the call into the MAC policies, get a new |
6719 | * duplicate credential, so they can modify it without |
6720 | * modifying any others sharing it. |
6721 | */ |
6722 | if (mac_transition) { |
6723 | /* |
6724 | * This hook may generate upcalls that require |
6725 | * importance donation from the kernel. |
6726 | * (23925818) |
6727 | */ |
6728 | thread_t thread = current_thread(); |
6729 | thread_enable_send_importance(thread, TRUE); |
6730 | kauth_proc_label_update_execve(p, |
6731 | ctx: imgp->ip_vfs_context, |
6732 | vp: imgp->ip_vp, |
6733 | offset: imgp->ip_arch_offset, |
6734 | scriptvp: imgp->ip_scriptvp, |
6735 | scriptlabel: imgp->ip_scriptlabelp, |
6736 | execlabel: imgp->ip_execlabelp, |
6737 | csflags: &imgp->ip_csflags, |
6738 | psattr: &imgp->ip_px_smpx, |
6739 | disjoint: &disjoint_cred, /* will be non zero if disjoint */ |
6740 | update_return: &label_update_return); |
6741 | thread_enable_send_importance(thread, FALSE); |
6742 | |
6743 | if (disjoint_cred) { |
6744 | /* |
6745 | * If updating the MAC label resulted in a |
6746 | * disjoint credential, flag that we need to |
6747 | * set the P_SUGID bit. This protects |
6748 | * against debuggers being attached by an |
6749 | * insufficiently privileged process onto the |
6750 | * result of a transition to a more privileged |
6751 | * credential. |
6752 | */ |
6753 | leave_sugid_clear = 0; |
6754 | } |
6755 | |
6756 | imgp->ip_mac_return = label_update_return; |
6757 | } |
6758 | |
6759 | mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, cur_vp: p->p_textvp, cur_offset: p->p_textoff, img_vp: imgp->ip_vp, img_offset: imgp->ip_arch_offset, scriptvp: imgp->ip_scriptvp); |
6760 | |
6761 | #endif /* CONFIG_MACF */ |
6762 | |
6763 | /* |
6764 | * If 'leave_sugid_clear' is non-zero, then we passed the |
6765 | * VSUID and MACF checks, and successfully determined that |
6766 | * the previous cred was a member of the VSGID group, but |
6767 | * that it was not the default at the time of the execve, |
6768 | * and that the post-labelling credential was not disjoint. |
6769 | * So we don't set the P_SUGID or reset mach ports and fds |
6770 | * on the basis of simply running this code. |
6771 | */ |
6772 | if (mac_reset_ipc || !leave_sugid_clear) { |
6773 | /* |
6774 | * Have mach reset the task and thread ports. |
6775 | * We don't want anyone who had the ports before |
6776 | * a setuid exec to be able to access/control the |
6777 | * task/thread after. |
6778 | */ |
6779 | ipc_task_reset(task: (imgp->ip_new_thread != NULL) ? |
6780 | get_threadtask(imgp->ip_new_thread) : proc_task(p)); |
6781 | ipc_thread_reset(thread: (imgp->ip_new_thread != NULL) ? |
6782 | imgp->ip_new_thread : current_thread()); |
6783 | } |
6784 | |
6785 | if (!leave_sugid_clear) { |
6786 | /* |
6787 | * Flag the process as setuid. |
6788 | */ |
6789 | OSBitOrAtomic(P_SUGID, &p->p_flag); |
6790 | |
6791 | /* |
6792 | * Radar 2261856; setuid security hole fix |
6793 | * XXX For setuid processes, attempt to ensure that |
6794 | * stdin, stdout, and stderr are already allocated. |
6795 | * We do not want userland to accidentally allocate |
6796 | * descriptors in this range which has implied meaning |
6797 | * to libc. |
6798 | */ |
6799 | for (i = 0; i < 3; i++) { |
6800 | if (fp_get_noref_locked(p, fd: i) != NULL) { |
6801 | continue; |
6802 | } |
6803 | |
6804 | /* |
6805 | * Do the kernel equivalent of |
6806 | * |
6807 | * if i == 0 |
6808 | * (void) open("/dev/null", O_RDONLY); |
6809 | * else |
6810 | * (void) open("/dev/null", O_WRONLY); |
6811 | */ |
6812 | |
6813 | struct fileproc *fp; |
6814 | int indx; |
6815 | int flag; |
6816 | struct nameidata *ndp = NULL; |
6817 | |
6818 | if (i == 0) { |
6819 | flag = FREAD; |
6820 | } else { |
6821 | flag = FWRITE; |
6822 | } |
6823 | |
6824 | if ((error = falloc_exec(p, imgp->ip_vfs_context, |
6825 | &fp, &indx)) != 0) { |
6826 | continue; |
6827 | } |
6828 | |
6829 | ndp = kalloc_type(struct nameidata, |
6830 | Z_WAITOK | Z_ZERO | Z_NOFAIL); |
6831 | |
6832 | NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, |
6833 | CAST_USER_ADDR_T("/dev/null" ), |
6834 | imgp->ip_vfs_context); |
6835 | |
6836 | if ((error = vn_open(ndp, fmode: flag, cmode: 0)) != 0) { |
6837 | fp_free(p, fd: indx, fp); |
6838 | kfree_type(struct nameidata, ndp); |
6839 | break; |
6840 | } |
6841 | |
6842 | struct fileglob *fg = fp->fp_glob; |
6843 | |
6844 | fg->fg_flag = flag; |
6845 | fg->fg_ops = &vnops; |
6846 | fp_set_data(fp, fg_data: ndp->ni_vp); |
6847 | |
6848 | vnode_put(vp: ndp->ni_vp); |
6849 | |
6850 | proc_fdlock(p); |
6851 | procfdtbl_releasefd(p, fd: indx, NULL); |
6852 | fp_drop(p, fd: indx, fp, locked: 1); |
6853 | proc_fdunlock(p); |
6854 | |
6855 | kfree_type(struct nameidata, ndp); |
6856 | } |
6857 | } |
6858 | } |
6859 | #if CONFIG_MACF |
6860 | else { |
6861 | /* |
6862 | * We are here because we were told that the MAC label will |
6863 | * be transitioned, and the binary is not VSUID or VSGID; to |
6864 | * deal with this case, we could either duplicate a lot of |
6865 | * code, or we can indicate we want to default the P_SUGID |
6866 | * bit clear and jump back up. |
6867 | */ |
6868 | if (mac_transition) { |
6869 | leave_sugid_clear = 1; |
6870 | goto handle_mac_transition; |
6871 | } |
6872 | } |
6873 | |
6874 | #endif /* CONFIG_MACF */ |
6875 | |
6876 | /* Update the process' identity version and set the security token */ |
6877 | proc_setpidversion(p, OSIncrementAtomic(&nextpidversion)); |
6878 | task_set_uniqueid(task: proc_task(p)); |
6879 | |
6880 | /* |
6881 | * Implement the semantic where the effective user and group become |
6882 | * the saved user and group in exec'ed programs. |
6883 | */ |
6884 | kauth_cred_proc_update(p, action: PROC_SETTOKEN_ALWAYS, |
6885 | fn: ^bool (kauth_cred_t parent __unused, kauth_cred_t model) { |
6886 | posix_cred_t pcred = posix_cred_get(cred: model); |
6887 | |
6888 | if (pcred->cr_svuid == pcred->cr_uid && |
6889 | pcred->cr_svgid == pcred->cr_gid) { |
6890 | return false; |
6891 | } |
6892 | |
6893 | pcred->cr_svuid = pcred->cr_uid; |
6894 | pcred->cr_svgid = pcred->cr_gid; |
6895 | return true; |
6896 | }); |
6897 | |
6898 | return error; |
6899 | } |
6900 | |
6901 | |
6902 | /* |
6903 | * create_unix_stack |
6904 | * |
6905 | * Description: Set the user stack address for the process to the provided |
6906 | * address. If a custom stack was not set as a result of the |
6907 | * load process (i.e. as specified by the image file for the |
6908 | * executable), then allocate the stack in the provided map and |
6909 | * set up appropriate guard pages for enforcing administrative |
6910 | * limits on stack growth, if they end up being needed. |
6911 | * |
6912 | * Parameters: p Process to set stack on |
6913 | * load_result Information from mach-o load commands |
6914 | * map Address map in which to allocate the new stack |
6915 | * |
6916 | * Returns: KERN_SUCCESS Stack successfully created |
6917 | * !KERN_SUCCESS Mach failure code |
6918 | */ |
6919 | __attribute__((noinline)) |
6920 | static kern_return_t |
6921 | create_unix_stack(vm_map_t map, load_result_t* load_result, |
6922 | proc_t p) |
6923 | { |
6924 | mach_vm_size_t size, prot_size; |
6925 | mach_vm_offset_t addr, prot_addr; |
6926 | kern_return_t kr; |
6927 | |
6928 | mach_vm_address_t user_stack = load_result->user_stack; |
6929 | |
6930 | proc_lock(p); |
6931 | p->user_stack = (uintptr_t)user_stack; |
6932 | if (load_result->custom_stack) { |
6933 | p->p_lflag |= P_LCUSTOM_STACK; |
6934 | } |
6935 | proc_unlock(p); |
6936 | if (vm_map_page_shift(map) < (int)PAGE_SHIFT) { |
6937 | DEBUG4K_LOAD("map %p user_stack 0x%llx custom %d user_stack_alloc_size 0x%llx\n" , map, user_stack, load_result->custom_stack, load_result->user_stack_alloc_size); |
6938 | } |
6939 | |
6940 | if (load_result->user_stack_alloc_size > 0) { |
6941 | /* |
6942 | * Allocate enough space for the maximum stack size we |
6943 | * will ever authorize and an extra page to act as |
6944 | * a guard page for stack overflows. For default stacks, |
6945 | * vm_initial_limit_stack takes care of the extra guard page. |
6946 | * Otherwise we must allocate it ourselves. |
6947 | */ |
6948 | if (mach_vm_round_page_overflow(in: load_result->user_stack_alloc_size, out: &size)) { |
6949 | return KERN_INVALID_ARGUMENT; |
6950 | } |
6951 | addr = vm_map_trunc_page(load_result->user_stack - size, |
6952 | vm_map_page_mask(map)); |
6953 | kr = mach_vm_allocate_kernel(map, addr: &addr, size, |
6954 | VM_FLAGS_FIXED, VM_MEMORY_STACK); |
6955 | if (kr != KERN_SUCCESS) { |
6956 | // Can't allocate at default location, try anywhere |
6957 | addr = 0; |
6958 | kr = mach_vm_allocate_kernel(map, addr: &addr, size, |
6959 | VM_FLAGS_ANYWHERE, VM_MEMORY_STACK); |
6960 | if (kr != KERN_SUCCESS) { |
6961 | return kr; |
6962 | } |
6963 | |
6964 | user_stack = addr + size; |
6965 | load_result->user_stack = (user_addr_t)user_stack; |
6966 | |
6967 | proc_lock(p); |
6968 | p->user_stack = (uintptr_t)user_stack; |
6969 | proc_unlock(p); |
6970 | } |
6971 | |
6972 | load_result->user_stack_alloc = (user_addr_t)addr; |
6973 | |
6974 | /* |
6975 | * And prevent access to what's above the current stack |
6976 | * size limit for this process. |
6977 | */ |
6978 | if (load_result->user_stack_size == 0) { |
6979 | load_result->user_stack_size = proc_limitgetcur(p, RLIMIT_STACK); |
6980 | prot_size = vm_map_trunc_page(size - load_result->user_stack_size, vm_map_page_mask(map)); |
6981 | } else { |
6982 | prot_size = PAGE_SIZE; |
6983 | } |
6984 | |
6985 | prot_addr = addr; |
6986 | kr = mach_vm_protect(target_task: map, |
6987 | address: prot_addr, |
6988 | size: prot_size, |
6989 | FALSE, |
6990 | VM_PROT_NONE); |
6991 | if (kr != KERN_SUCCESS) { |
6992 | (void)mach_vm_deallocate(target: map, address: addr, size); |
6993 | return kr; |
6994 | } |
6995 | } |
6996 | |
6997 | return KERN_SUCCESS; |
6998 | } |
6999 | |
7000 | #include <sys/reboot.h> |
7001 | |
7002 | /* |
7003 | * load_init_program_at_path |
7004 | * |
7005 | * Description: Load the "init" program; in most cases, this will be "launchd" |
7006 | * |
7007 | * Parameters: p Process to call execve() to create |
7008 | * the "init" program |
7009 | * scratch_addr Page in p, scratch space |
7010 | * path NULL terminated path |
7011 | * |
7012 | * Returns: KERN_SUCCESS Success |
7013 | * !KERN_SUCCESS See execve/mac_execve for error codes |
7014 | * |
7015 | * Notes: The process that is passed in is the first manufactured |
7016 | * process on the system, and gets here via bsd_ast() firing |
7017 | * for the first time. This is done to ensure that bsd_init() |
7018 | * has run to completion. |
7019 | * |
7020 | * The address map of the first manufactured process matches the |
7021 | * word width of the kernel. Once the self-exec completes, the |
7022 | * initproc might be different. |
7023 | */ |
7024 | static int |
7025 | load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path) |
7026 | { |
7027 | int retval[2]; |
7028 | int error; |
7029 | struct execve_args init_exec_args; |
7030 | user_addr_t argv0 = USER_ADDR_NULL, argv1 = USER_ADDR_NULL; |
7031 | |
7032 | /* |
7033 | * Validate inputs and pre-conditions |
7034 | */ |
7035 | assert(p); |
7036 | assert(scratch_addr); |
7037 | assert(path); |
7038 | |
7039 | /* |
7040 | * Copy out program name. |
7041 | */ |
7042 | size_t path_length = strlen(s: path) + 1; |
7043 | argv0 = scratch_addr; |
7044 | error = copyout(path, argv0, path_length); |
7045 | if (error) { |
7046 | return error; |
7047 | } |
7048 | |
7049 | scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, sizeof(user_addr_t)); |
7050 | |
7051 | /* |
7052 | * Put out first (and only) argument, similarly. |
7053 | * Assumes everything fits in a page as allocated above. |
7054 | */ |
7055 | if (boothowto & RB_SINGLE) { |
7056 | const char *init_args = "-s" ; |
7057 | size_t init_args_length = strlen(s: init_args) + 1; |
7058 | |
7059 | argv1 = scratch_addr; |
7060 | error = copyout(init_args, argv1, init_args_length); |
7061 | if (error) { |
7062 | return error; |
7063 | } |
7064 | |
7065 | scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, sizeof(user_addr_t)); |
7066 | } |
7067 | |
7068 | if (proc_is64bit(p)) { |
7069 | user64_addr_t argv64bit[3] = {}; |
7070 | |
7071 | argv64bit[0] = argv0; |
7072 | argv64bit[1] = argv1; |
7073 | argv64bit[2] = USER_ADDR_NULL; |
7074 | |
7075 | error = copyout(argv64bit, scratch_addr, sizeof(argv64bit)); |
7076 | if (error) { |
7077 | return error; |
7078 | } |
7079 | } else { |
7080 | user32_addr_t argv32bit[3] = {}; |
7081 | |
7082 | argv32bit[0] = (user32_addr_t)argv0; |
7083 | argv32bit[1] = (user32_addr_t)argv1; |
7084 | argv32bit[2] = USER_ADDR_NULL; |
7085 | |
7086 | error = copyout(argv32bit, scratch_addr, sizeof(argv32bit)); |
7087 | if (error) { |
7088 | return error; |
7089 | } |
7090 | } |
7091 | |
7092 | /* |
7093 | * Set up argument block for fake call to execve. |
7094 | */ |
7095 | init_exec_args.fname = argv0; |
7096 | init_exec_args.argp = scratch_addr; |
7097 | init_exec_args.envp = USER_ADDR_NULL; |
7098 | |
7099 | /* |
7100 | * So that init task is set with uid,gid 0 token |
7101 | * |
7102 | * The access to the cred is safe: |
7103 | * the proc isn't running yet, it's stable. |
7104 | */ |
7105 | set_security_token(p, cred: proc_ucred_unsafe(p)); |
7106 | |
7107 | return execve(p, uap: &init_exec_args, retval); |
7108 | } |
7109 | |
7110 | static const char * init_programs[] = { |
7111 | #if DEBUG |
7112 | "/usr/appleinternal/sbin/launchd.debug" , |
7113 | #endif |
7114 | #if DEVELOPMENT || DEBUG |
7115 | "/usr/appleinternal/sbin/launchd.development" , |
7116 | #endif |
7117 | "/sbin/launchd" , |
7118 | }; |
7119 | |
7120 | /* |
7121 | * load_init_program |
7122 | * |
7123 | * Description: Load the "init" program; in most cases, this will be "launchd" |
7124 | * |
7125 | * Parameters: p Process to call execve() to create |
7126 | * the "init" program |
7127 | * |
7128 | * Returns: (void) |
7129 | * |
7130 | * Notes: The process that is passed in is the first manufactured |
7131 | * process on the system, and gets here via bsd_ast() firing |
7132 | * for the first time. This is done to ensure that bsd_init() |
7133 | * has run to completion. |
7134 | * |
7135 | * In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg |
7136 | * may be used to select a specific launchd executable. As with |
7137 | * the kcsuffix boot-arg, setting launchdsuffix to "" or "release" |
7138 | * will force /sbin/launchd to be selected. |
7139 | * |
7140 | * Search order by build: |
7141 | * |
7142 | * DEBUG DEVELOPMENT RELEASE PATH |
7143 | * ---------------------------------------------------------------------------------- |
7144 | * 1 1 NA /usr/appleinternal/sbin/launchd.$LAUNCHDSUFFIX |
7145 | * 2 NA NA /usr/appleinternal/sbin/launchd.debug |
7146 | * 3 2 NA /usr/appleinternal/sbin/launchd.development |
7147 | * 4 3 1 /sbin/launchd |
7148 | */ |
7149 | void |
7150 | load_init_program(proc_t p) |
7151 | { |
7152 | uint32_t i; |
7153 | int error; |
7154 | vm_map_t map = current_map(); |
7155 | mach_vm_offset_t scratch_addr = 0; |
7156 | mach_vm_size_t map_page_size = vm_map_page_size(map); |
7157 | |
7158 | (void) mach_vm_allocate_kernel(map, addr: &scratch_addr, size: map_page_size, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE); |
7159 | #if CONFIG_MEMORYSTATUS |
7160 | (void) memorystatus_init_at_boot_snapshot(); |
7161 | #endif /* CONFIG_MEMORYSTATUS */ |
7162 | |
7163 | #if DEBUG || DEVELOPMENT |
7164 | /* Check for boot-arg suffix first */ |
7165 | char launchd_suffix[64]; |
7166 | if (PE_parse_boot_argn("launchdsuffix" , launchd_suffix, sizeof(launchd_suffix))) { |
7167 | char launchd_path[128]; |
7168 | boolean_t is_release_suffix = ((launchd_suffix[0] == 0) || |
7169 | (strcmp(launchd_suffix, "release" ) == 0)); |
7170 | |
7171 | if (is_release_suffix) { |
7172 | printf("load_init_program: attempting to load /sbin/launchd\n" ); |
7173 | error = load_init_program_at_path(p, (user_addr_t)scratch_addr, "/sbin/launchd" ); |
7174 | if (!error) { |
7175 | return; |
7176 | } |
7177 | |
7178 | panic("Process 1 exec of launchd.release failed, errno %d" , error); |
7179 | } else { |
7180 | strlcpy(launchd_path, "/usr/appleinternal/sbin/launchd." , sizeof(launchd_path)); |
7181 | strlcat(launchd_path, launchd_suffix, sizeof(launchd_path)); |
7182 | |
7183 | printf("load_init_program: attempting to load %s\n" , launchd_path); |
7184 | error = load_init_program_at_path(p, (user_addr_t)scratch_addr, launchd_path); |
7185 | if (!error) { |
7186 | return; |
7187 | } else if (error != ENOENT) { |
7188 | printf("load_init_program: failed loading %s: errno %d\n" , launchd_path, error); |
7189 | } |
7190 | } |
7191 | } |
7192 | #endif |
7193 | |
7194 | error = ENOENT; |
7195 | for (i = 0; i < sizeof(init_programs) / sizeof(init_programs[0]); i++) { |
7196 | printf("load_init_program: attempting to load %s\n" , init_programs[i]); |
7197 | error = load_init_program_at_path(p, scratch_addr: (user_addr_t)scratch_addr, path: init_programs[i]); |
7198 | if (!error) { |
7199 | return; |
7200 | } else if (error != ENOENT) { |
7201 | printf("load_init_program: failed loading %s: errno %d\n" , init_programs[i], error); |
7202 | } |
7203 | } |
7204 | |
7205 | panic("Process 1 exec of %s failed, errno %d" , ((i == 0) ? "<null>" : init_programs[i - 1]), error); |
7206 | } |
7207 | |
7208 | /* |
7209 | * load_return_to_errno |
7210 | * |
7211 | * Description: Convert a load_return_t (Mach error) to an errno (BSD error) |
7212 | * |
7213 | * Parameters: lrtn Mach error number |
7214 | * |
7215 | * Returns: (int) BSD error number |
7216 | * 0 Success |
7217 | * EBADARCH Bad architecture |
7218 | * EBADMACHO Bad Mach object file |
7219 | * ESHLIBVERS Bad shared library version |
7220 | * ENOMEM Out of memory/resource shortage |
7221 | * EACCES Access denied |
7222 | * ENOENT Entry not found (usually "file does |
7223 | * does not exist") |
7224 | * EIO An I/O error occurred |
7225 | * EBADEXEC The executable is corrupt/unknown |
7226 | */ |
7227 | static int |
7228 | load_return_to_errno(load_return_t lrtn) |
7229 | { |
7230 | switch (lrtn) { |
7231 | case LOAD_SUCCESS: |
7232 | return 0; |
7233 | case LOAD_BADARCH: |
7234 | return EBADARCH; |
7235 | case LOAD_BADMACHO: |
7236 | case LOAD_BADMACHO_UPX: |
7237 | return EBADMACHO; |
7238 | case LOAD_SHLIB: |
7239 | return ESHLIBVERS; |
7240 | case LOAD_NOSPACE: |
7241 | case LOAD_RESOURCE: |
7242 | return ENOMEM; |
7243 | case LOAD_PROTECT: |
7244 | return EACCES; |
7245 | case LOAD_ENOENT: |
7246 | return ENOENT; |
7247 | case LOAD_IOERROR: |
7248 | return EIO; |
7249 | case LOAD_DECRYPTFAIL: |
7250 | return EAUTH; |
7251 | case LOAD_FAILURE: |
7252 | default: |
7253 | return EBADEXEC; |
7254 | } |
7255 | } |
7256 | |
7257 | #include <mach/mach_types.h> |
7258 | #include <mach/vm_prot.h> |
7259 | #include <mach/semaphore.h> |
7260 | #include <mach/sync_policy.h> |
7261 | #include <kern/clock.h> |
7262 | #include <mach/kern_return.h> |
7263 | |
7264 | /* |
7265 | * execargs_alloc |
7266 | * |
7267 | * Description: Allocate the block of memory used by the execve arguments. |
7268 | * At the same time, we allocate a page so that we can read in |
7269 | * the first page of the image. |
7270 | * |
7271 | * Parameters: struct image_params * the image parameter block |
7272 | * |
7273 | * Returns: 0 Success |
7274 | * EINVAL Invalid argument |
7275 | * EACCES Permission denied |
7276 | * EINTR Interrupted function |
7277 | * ENOMEM Not enough space |
7278 | * |
7279 | * Notes: This is a temporary allocation into the kernel address space |
7280 | * to enable us to copy arguments in from user space. This is |
7281 | * necessitated by not mapping the process calling execve() into |
7282 | * the kernel address space during the execve() system call. |
7283 | * |
7284 | * We assemble the argument and environment, etc., into this |
7285 | * region before copying it as a single block into the child |
7286 | * process address space (at the top or bottom of the stack, |
7287 | * depending on which way the stack grows; see the function |
7288 | * exec_copyout_strings() for details). |
7289 | * |
7290 | * This ends up with a second (possibly unnecessary) copy compared |
7291 | * with assembing the data directly into the child address space, |
7292 | * instead, but since we cannot be guaranteed that the parent has |
7293 | * not modified its environment, we can't really know that it's |
7294 | * really a block there as well. |
7295 | */ |
7296 | |
7297 | |
7298 | static int execargs_waiters = 0; |
7299 | static LCK_MTX_DECLARE_ATTR(execargs_cache_lock, &proc_lck_grp, &proc_lck_attr); |
7300 | |
7301 | static void |
7302 | execargs_lock_lock(void) |
7303 | { |
7304 | lck_mtx_lock_spin(lck: &execargs_cache_lock); |
7305 | } |
7306 | |
7307 | static void |
7308 | execargs_lock_unlock(void) |
7309 | { |
7310 | lck_mtx_unlock(lck: &execargs_cache_lock); |
7311 | } |
7312 | |
7313 | static wait_result_t |
7314 | execargs_lock_sleep(void) |
7315 | { |
7316 | return lck_mtx_sleep(lck: &execargs_cache_lock, lck_sleep_action: LCK_SLEEP_DEFAULT, event: &execargs_free_count, THREAD_INTERRUPTIBLE); |
7317 | } |
7318 | |
7319 | static kern_return_t |
7320 | execargs_purgeable_allocate(char **execarg_address) |
7321 | { |
7322 | mach_vm_offset_t addr = 0; |
7323 | kern_return_t kr = mach_vm_allocate_kernel(map: bsd_pageable_map, addr: &addr, |
7324 | BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE, |
7325 | VM_KERN_MEMORY_NONE); |
7326 | *execarg_address = (char *)addr; |
7327 | assert(kr == KERN_SUCCESS); |
7328 | return kr; |
7329 | } |
7330 | |
7331 | static kern_return_t |
7332 | execargs_purgeable_reference(void *execarg_address) |
7333 | { |
7334 | int state = VM_PURGABLE_NONVOLATILE; |
7335 | kern_return_t kr = vm_purgable_control(target_task: bsd_pageable_map, address: (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, state: &state); |
7336 | |
7337 | assert(kr == KERN_SUCCESS); |
7338 | return kr; |
7339 | } |
7340 | |
7341 | static kern_return_t |
7342 | execargs_purgeable_volatilize(void *execarg_address) |
7343 | { |
7344 | int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE; |
7345 | kern_return_t kr; |
7346 | kr = vm_purgable_control(target_task: bsd_pageable_map, address: (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, state: &state); |
7347 | |
7348 | assert(kr == KERN_SUCCESS); |
7349 | |
7350 | return kr; |
7351 | } |
7352 | |
7353 | static void |
7354 | execargs_wakeup_waiters(void) |
7355 | { |
7356 | thread_wakeup(&execargs_free_count); |
7357 | } |
7358 | |
7359 | static int |
7360 | execargs_alloc(struct image_params *imgp) |
7361 | { |
7362 | kern_return_t kret; |
7363 | wait_result_t res; |
7364 | int i, cache_index = -1; |
7365 | |
7366 | execargs_lock_lock(); |
7367 | |
7368 | while (execargs_free_count == 0) { |
7369 | execargs_waiters++; |
7370 | res = execargs_lock_sleep(); |
7371 | execargs_waiters--; |
7372 | if (res != THREAD_AWAKENED) { |
7373 | execargs_lock_unlock(); |
7374 | return EINTR; |
7375 | } |
7376 | } |
7377 | |
7378 | execargs_free_count--; |
7379 | |
7380 | for (i = 0; i < execargs_cache_size; i++) { |
7381 | vm_offset_t element = execargs_cache[i]; |
7382 | if (element) { |
7383 | cache_index = i; |
7384 | imgp->ip_strings = (char *)(execargs_cache[i]); |
7385 | execargs_cache[i] = 0; |
7386 | break; |
7387 | } |
7388 | } |
7389 | |
7390 | assert(execargs_free_count >= 0); |
7391 | |
7392 | execargs_lock_unlock(); |
7393 | |
7394 | if (cache_index == -1) { |
7395 | kret = execargs_purgeable_allocate(execarg_address: &imgp->ip_strings); |
7396 | } else { |
7397 | kret = execargs_purgeable_reference(execarg_address: imgp->ip_strings); |
7398 | } |
7399 | |
7400 | assert(kret == KERN_SUCCESS); |
7401 | if (kret != KERN_SUCCESS) { |
7402 | return ENOMEM; |
7403 | } |
7404 | |
7405 | /* last page used to read in file headers */ |
7406 | imgp->ip_vdata = imgp->ip_strings + (NCARGS + PAGE_SIZE); |
7407 | imgp->ip_strendp = imgp->ip_strings; |
7408 | imgp->ip_argspace = NCARGS; |
7409 | imgp->ip_strspace = (NCARGS + PAGE_SIZE); |
7410 | |
7411 | return 0; |
7412 | } |
7413 | |
7414 | /* |
7415 | * execargs_free |
7416 | * |
7417 | * Description: Free the block of memory used by the execve arguments and the |
7418 | * first page of the executable by a previous call to the function |
7419 | * execargs_alloc(). |
7420 | * |
7421 | * Parameters: struct image_params * the image parameter block |
7422 | * |
7423 | * Returns: 0 Success |
7424 | * EINVAL Invalid argument |
7425 | * EINTR Oeration interrupted |
7426 | */ |
7427 | static int |
7428 | execargs_free(struct image_params *imgp) |
7429 | { |
7430 | kern_return_t kret; |
7431 | int i; |
7432 | boolean_t needs_wakeup = FALSE; |
7433 | |
7434 | kret = execargs_purgeable_volatilize(execarg_address: imgp->ip_strings); |
7435 | |
7436 | execargs_lock_lock(); |
7437 | execargs_free_count++; |
7438 | |
7439 | for (i = 0; i < execargs_cache_size; i++) { |
7440 | vm_offset_t element = execargs_cache[i]; |
7441 | if (element == 0) { |
7442 | execargs_cache[i] = (vm_offset_t) imgp->ip_strings; |
7443 | imgp->ip_strings = NULL; |
7444 | break; |
7445 | } |
7446 | } |
7447 | |
7448 | assert(imgp->ip_strings == NULL); |
7449 | |
7450 | if (execargs_waiters > 0) { |
7451 | needs_wakeup = TRUE; |
7452 | } |
7453 | |
7454 | execargs_lock_unlock(); |
7455 | |
7456 | if (needs_wakeup == TRUE) { |
7457 | execargs_wakeup_waiters(); |
7458 | } |
7459 | |
7460 | return kret == KERN_SUCCESS ? 0 : EINVAL; |
7461 | } |
7462 | |
7463 | void |
7464 | uthread_set_exec_data(struct uthread *uth, struct image_params *imgp) |
7465 | { |
7466 | uth->uu_save.uus_exec_data.imgp = imgp; |
7467 | } |
7468 | |
7469 | size_t |
7470 | thread_get_current_exec_path(char *path, size_t size) |
7471 | { |
7472 | struct uthread *uth = current_uthread(); |
7473 | struct image_params *imgp = uth->uu_save.uus_exec_data.imgp; |
7474 | size_t string_size = 0; |
7475 | char *exec_path; |
7476 | |
7477 | if (path == NULL || imgp == NULL || imgp->ip_strings == NULL) { |
7478 | return 0; |
7479 | } |
7480 | |
7481 | exec_path = imgp->ip_strings + strlen(EXECUTABLE_KEY); |
7482 | string_size = imgp->ip_strendp - exec_path; |
7483 | string_size = MIN(MAXPATHLEN, string_size); |
7484 | string_size = MIN(size, string_size); |
7485 | |
7486 | string_size = strlcpy(dst: path, src: exec_path, n: string_size); |
7487 | return string_size; |
7488 | } |
7489 | static void |
7490 | exec_resettextvp(proc_t p, struct image_params *imgp) |
7491 | { |
7492 | vnode_t vp; |
7493 | off_t offset; |
7494 | vnode_t tvp = p->p_textvp; |
7495 | int ret; |
7496 | |
7497 | vp = imgp->ip_vp; |
7498 | offset = imgp->ip_arch_offset; |
7499 | |
7500 | if (vp == NULLVP) { |
7501 | panic("exec_resettextvp: expected valid vp" ); |
7502 | } |
7503 | |
7504 | ret = vnode_ref(vp); |
7505 | proc_lock(p); |
7506 | if (ret == 0) { |
7507 | p->p_textvp = vp; |
7508 | p->p_textoff = offset; |
7509 | } else { |
7510 | p->p_textvp = NULLVP; /* this is paranoia */ |
7511 | p->p_textoff = 0; |
7512 | } |
7513 | proc_unlock(p); |
7514 | |
7515 | if (tvp != NULLVP) { |
7516 | if (vnode_getwithref(vp: tvp) == 0) { |
7517 | vnode_rele(vp: tvp); |
7518 | vnode_put(vp: tvp); |
7519 | } |
7520 | } |
7521 | } |
7522 | |
7523 | // Includes the 0-byte (therefore "SIZE" instead of "LEN"). |
7524 | static const size_t CS_CDHASH_STRING_SIZE = CS_CDHASH_LEN * 2 + 1; |
7525 | |
7526 | static void |
7527 | cdhash_to_string(char str[CS_CDHASH_STRING_SIZE], uint8_t const * const cdhash) |
7528 | { |
7529 | static char const nibble[] = "0123456789abcdef" ; |
7530 | |
7531 | /* Apparently still the safest way to get a hex representation |
7532 | * of binary data. |
7533 | * xnu's printf routines have %*D/%20D in theory, but "not really", see: |
7534 | * <rdar://problem/33328859> confusion around %*D/%nD in printf |
7535 | */ |
7536 | for (int i = 0; i < CS_CDHASH_LEN; ++i) { |
7537 | str[i * 2] = nibble[(cdhash[i] & 0xf0) >> 4]; |
7538 | str[i * 2 + 1] = nibble[cdhash[i] & 0x0f]; |
7539 | } |
7540 | str[CS_CDHASH_STRING_SIZE - 1] = 0; |
7541 | } |
7542 | |
7543 | /* |
7544 | * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__ |
7545 | * |
7546 | * Description: Waits for the userspace daemon to respond to the request |
7547 | * we made. Function declared non inline to be visible in |
7548 | * stackshots and spindumps as well as debugging. |
7549 | */ |
7550 | __attribute__((noinline)) int |
7551 | __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid) |
7552 | { |
7553 | return find_code_signature(task_access_port, new_pid); |
7554 | } |
7555 | |
7556 | /* |
7557 | * Update signature dependent process state, called by |
7558 | * process_signature. |
7559 | */ |
7560 | static int |
7561 | proc_process_signature(proc_t p, os_reason_t *signature_failure_reason) |
7562 | { |
7563 | int error = 0; |
7564 | char const *error_msg = NULL; |
7565 | |
7566 | kern_return_t kr = machine_task_process_signature(task: proc_get_task_raw(proc: p), platform: proc_platform(p), sdk: proc_sdk(p), error_msg: &error_msg); |
7567 | |
7568 | if (kr != KERN_SUCCESS) { |
7569 | error = EINVAL; |
7570 | |
7571 | if (error_msg != NULL) { |
7572 | uint32_t error_msg_len = (uint32_t)strlen(s: error_msg) + 1; |
7573 | mach_vm_address_t data_addr = 0; |
7574 | int reason_error = 0; |
7575 | int kcdata_error = 0; |
7576 | |
7577 | os_reason_t reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY); |
7578 | reason->osr_flags = OS_REASON_FLAG_GENERATE_CRASH_REPORT | OS_REASON_FLAG_CONSISTENT_FAILURE; |
7579 | |
7580 | if ((reason_error = os_reason_alloc_buffer_noblock(cur_reason: reason, |
7581 | osr_bufsize: kcdata_estimate_required_buffer_size(num_items: 1, payload_size: error_msg_len))) == 0 && |
7582 | (kcdata_error = kcdata_get_memory_addr(data: &reason->osr_kcd_descriptor, |
7583 | EXIT_REASON_USER_DESC, size: error_msg_len, |
7584 | user_addr: &data_addr)) == KERN_SUCCESS) { |
7585 | kern_return_t mc_error = kcdata_memcpy(data: &reason->osr_kcd_descriptor, dst_addr: (mach_vm_address_t)data_addr, |
7586 | src_addr: error_msg, size: error_msg_len); |
7587 | |
7588 | if (mc_error != KERN_SUCCESS) { |
7589 | printf("process_signature: failed to copy reason string (kcdata_memcpy error: %d)\n" , |
7590 | mc_error); |
7591 | } |
7592 | } else { |
7593 | printf("failed to allocate space for reason string (os_reason_alloc_buffer error: %d, kcdata error: %d, length: %u)\n" , |
7594 | reason_error, kcdata_error, error_msg_len); |
7595 | } |
7596 | |
7597 | assert(*signature_failure_reason == NULL); // shouldn't have gotten so far |
7598 | *signature_failure_reason = reason; |
7599 | } |
7600 | } |
7601 | return error; |
7602 | } |
7603 | |
7604 | static int |
7605 | process_signature(proc_t p, struct image_params *imgp) |
7606 | { |
7607 | mach_port_t port = IPC_PORT_NULL; |
7608 | kern_return_t kr = KERN_FAILURE; |
7609 | int error = EACCES; |
7610 | boolean_t unexpected_failure = FALSE; |
7611 | struct cs_blob *csb; |
7612 | boolean_t require_success = FALSE; |
7613 | int spawn = (imgp->ip_flags & IMGPF_SPAWN); |
7614 | const int vfexec = 0; |
7615 | os_reason_t signature_failure_reason = OS_REASON_NULL; |
7616 | |
7617 | /* |
7618 | * Override inherited code signing flags with the |
7619 | * ones for the process that is being successfully |
7620 | * loaded |
7621 | */ |
7622 | proc_lock(p); |
7623 | proc_csflags_update(p, imgp->ip_csflags); |
7624 | proc_unlock(p); |
7625 | |
7626 | /* Set the switch_protect flag on the map */ |
7627 | if (proc_getcsflags(p) & (CS_HARD | CS_KILL)) { |
7628 | vm_map_switch_protect(map: get_task_map(proc_task(p)), TRUE); |
7629 | } |
7630 | /* set the cs_enforced flags in the map */ |
7631 | if (proc_getcsflags(p) & CS_ENFORCEMENT) { |
7632 | vm_map_cs_enforcement_set(map: get_task_map(proc_task(p)), TRUE); |
7633 | } else { |
7634 | vm_map_cs_enforcement_set(map: get_task_map(proc_task(p)), FALSE); |
7635 | } |
7636 | |
7637 | /* |
7638 | * image activation may be failed due to policy |
7639 | * which is unexpected but security framework does not |
7640 | * approve of exec, kill and return immediately. |
7641 | */ |
7642 | if (imgp->ip_mac_return != 0) { |
7643 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
7644 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY, 0, 0); |
7645 | signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY); |
7646 | error = imgp->ip_mac_return; |
7647 | unexpected_failure = TRUE; |
7648 | goto done; |
7649 | } |
7650 | |
7651 | if (imgp->ip_cs_error != OS_REASON_NULL) { |
7652 | signature_failure_reason = imgp->ip_cs_error; |
7653 | imgp->ip_cs_error = OS_REASON_NULL; |
7654 | error = EACCES; |
7655 | goto done; |
7656 | } |
7657 | |
7658 | /* call the launch constraints hook */ |
7659 | os_reason_t launch_constraint_reason; |
7660 | if ((error = mac_proc_check_launch_constraints(curp: p, imgp, reasonp: &launch_constraint_reason)) != 0) { |
7661 | signature_failure_reason = launch_constraint_reason; |
7662 | goto done; |
7663 | } |
7664 | |
7665 | #if XNU_TARGET_OS_OSX |
7666 | /* Check for platform passed in spawn attr if iOS binary is being spawned */ |
7667 | if (proc_platform(p) == PLATFORM_IOS) { |
7668 | struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa; |
7669 | if (psa == NULL || psa->psa_platform == 0) { |
7670 | boolean_t no_sandbox_entitled = FALSE; |
7671 | #if DEBUG || DEVELOPMENT |
7672 | /* |
7673 | * Allow iOS binaries to spawn on internal systems |
7674 | * if no-sandbox entitlement is present of unentitled_ios_sim_launch |
7675 | * boot-arg set to true |
7676 | */ |
7677 | if (unentitled_ios_sim_launch) { |
7678 | no_sandbox_entitled = TRUE; |
7679 | } else { |
7680 | no_sandbox_entitled = IOVnodeHasEntitlement(imgp->ip_vp, |
7681 | (int64_t)imgp->ip_arch_offset, "com.apple.private.security.no-sandbox" ); |
7682 | } |
7683 | #endif /* DEBUG || DEVELOPMENT */ |
7684 | if (!no_sandbox_entitled) { |
7685 | signature_failure_reason = os_reason_create(OS_REASON_EXEC, |
7686 | EXEC_EXIT_REASON_WRONG_PLATFORM); |
7687 | error = EACCES; |
7688 | goto done; |
7689 | } |
7690 | printf("Allowing spawn of iOS binary %s since it has " |
7691 | "com.apple.private.security.no-sandbox entitlement or unentitled_ios_sim_launch " |
7692 | "boot-arg set to true\n" , p->p_name); |
7693 | } else if (psa->psa_platform != PLATFORM_IOS) { |
7694 | /* Simulator binary spawned with wrong platform */ |
7695 | signature_failure_reason = os_reason_create(OS_REASON_EXEC, |
7696 | EXEC_EXIT_REASON_WRONG_PLATFORM); |
7697 | error = EACCES; |
7698 | goto done; |
7699 | } else { |
7700 | printf("Allowing spawn of iOS binary %s since correct platform was passed in spawn\n" , |
7701 | p->p_name); |
7702 | } |
7703 | } |
7704 | #endif /* XNU_TARGET_OS_OSX */ |
7705 | |
7706 | /* If the code signature came through the image activation path, we skip the |
7707 | * taskgated / externally attached path. */ |
7708 | if (imgp->ip_csflags & CS_SIGNED) { |
7709 | error = 0; |
7710 | goto done; |
7711 | } |
7712 | |
7713 | /* The rest of the code is for signatures that either already have been externally |
7714 | * attached (likely, but not necessarily by a previous run through the taskgated |
7715 | * path), or that will now be attached by taskgated. */ |
7716 | |
7717 | kr = task_get_task_access_port(proc_task(p), &port); |
7718 | if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) { |
7719 | error = 0; |
7720 | if (require_success) { |
7721 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
7722 | proc_getpid(p), OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT, 0, 0); |
7723 | signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT); |
7724 | error = EACCES; |
7725 | } |
7726 | goto done; |
7727 | } |
7728 | |
7729 | /* |
7730 | * taskgated returns KERN_SUCCESS if it has completed its work |
7731 | * and the exec should continue, KERN_FAILURE if the exec should |
7732 | * fail, or it may error out with different error code in an |
7733 | * event of mig failure (e.g. process was signalled during the |
7734 | * rpc call, taskgated died, mig server died etc.). |
7735 | */ |
7736 | |
7737 | kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(task_access_port: port, new_pid: proc_getpid(p)); |
7738 | switch (kr) { |
7739 | case KERN_SUCCESS: |
7740 | error = 0; |
7741 | break; |
7742 | case KERN_FAILURE: |
7743 | error = EACCES; |
7744 | |
7745 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
7746 | proc_getpid(p), OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG, 0, 0); |
7747 | signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG); |
7748 | goto done; |
7749 | default: |
7750 | error = EACCES; |
7751 | |
7752 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, |
7753 | proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER, 0, 0); |
7754 | signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER); |
7755 | unexpected_failure = TRUE; |
7756 | goto done; |
7757 | } |
7758 | |
7759 | /* Only do this if exec_resettextvp() did not fail */ |
7760 | if (p->p_textvp != NULLVP) { |
7761 | csb = ubc_cs_blob_get(p->p_textvp, -1, -1, p->p_textoff); |
7762 | |
7763 | if (csb != NULL) { |
7764 | /* As the enforcement we can do here is very limited, we only allow things that |
7765 | * are the only reason why this code path still exists: |
7766 | * Adhoc signed non-platform binaries without special cs_flags and without any |
7767 | * entitlements (unrestricted ones still pass AMFI). */ |
7768 | if ( |
7769 | /* Revalidate the blob if necessary through bumped generation count. */ |
7770 | (ubc_cs_generation_check(p->p_textvp) == 0 || |
7771 | ubc_cs_blob_revalidate(p->p_textvp, csb, imgp, 0, proc_platform(p)) == 0) && |
7772 | /* Only CS_ADHOC, no CS_KILL, CS_HARD etc. */ |
7773 | (csb->csb_flags & CS_ALLOWED_MACHO) == CS_ADHOC && |
7774 | /* If it has a CMS blob, it's not adhoc. The CS_ADHOC flag can lie. */ |
7775 | csblob_find_blob_bytes((const uint8_t *)csb->csb_mem_kaddr, csb->csb_mem_size, |
7776 | CSSLOT_SIGNATURESLOT, |
7777 | CSMAGIC_BLOBWRAPPER) == NULL && |
7778 | /* It could still be in a trust cache (unlikely with CS_ADHOC), or a magic path. */ |
7779 | csb->csb_platform_binary == 0 && |
7780 | /* No entitlements, not even unrestricted ones. */ |
7781 | csb->csb_entitlements_blob == NULL && |
7782 | csb->csb_der_entitlements_blob == NULL) { |
7783 | proc_lock(p); |
7784 | proc_csflags_set(p, CS_SIGNED | CS_VALID); |
7785 | proc_unlock(p); |
7786 | } else { |
7787 | uint8_t cdhash[CS_CDHASH_LEN]; |
7788 | char cdhash_string[CS_CDHASH_STRING_SIZE]; |
7789 | proc_getcdhash(p, cdhash); |
7790 | cdhash_to_string(str: cdhash_string, cdhash); |
7791 | printf("ignoring detached code signature on '%s' with cdhash '%s' " |
7792 | "because it is invalid, or not a simple adhoc signature.\n" , |
7793 | p->p_name, cdhash_string); |
7794 | } |
7795 | } |
7796 | } |
7797 | |
7798 | done: |
7799 | if (0 == error) { |
7800 | /* |
7801 | * Update the new process's signature-dependent process state. |
7802 | * state. |
7803 | */ |
7804 | |
7805 | error = proc_process_signature(p, signature_failure_reason: &signature_failure_reason); |
7806 | } |
7807 | |
7808 | if (0 == error) { |
7809 | /* |
7810 | * Update the new main thread's signature-dependent thread |
7811 | * state. This was also called when the thread was created, |
7812 | * but for the main thread the signature was not yet attached |
7813 | * at that time. |
7814 | */ |
7815 | kr = thread_process_signature(thread: imgp->ip_new_thread, task: proc_get_task_raw(proc: p)); |
7816 | |
7817 | if (kr != KERN_SUCCESS) { |
7818 | error = EINVAL; |
7819 | signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_MACHINE_THREAD); |
7820 | } |
7821 | } |
7822 | |
7823 | if (0 == error) { |
7824 | /* The process's code signature related properties are |
7825 | * fully set up, so this is an opportune moment to log |
7826 | * platform binary execution, if desired. */ |
7827 | if (platform_exec_logging != 0 && csproc_get_platform_binary(p)) { |
7828 | uint8_t cdhash[CS_CDHASH_LEN]; |
7829 | char cdhash_string[CS_CDHASH_STRING_SIZE]; |
7830 | proc_getcdhash(p, cdhash); |
7831 | cdhash_to_string(str: cdhash_string, cdhash); |
7832 | |
7833 | os_log(peLog, "CS Platform Exec Logging: Executing platform signed binary " |
7834 | "'%s' with cdhash %s\n" , p->p_name, cdhash_string); |
7835 | } |
7836 | } else { |
7837 | if (!unexpected_failure) { |
7838 | proc_csflags_set(p, CS_KILLED); |
7839 | } |
7840 | /* make very sure execution fails */ |
7841 | if (vfexec || spawn) { |
7842 | assert(signature_failure_reason != OS_REASON_NULL); |
7843 | psignal_vfork_with_reason(p, new_task: proc_task(p), thread: imgp->ip_new_thread, |
7844 | SIGKILL, signal_reason: signature_failure_reason); |
7845 | signature_failure_reason = OS_REASON_NULL; |
7846 | error = 0; |
7847 | } else { |
7848 | assert(signature_failure_reason != OS_REASON_NULL); |
7849 | psignal_with_reason(p, SIGKILL, signal_reason: signature_failure_reason); |
7850 | signature_failure_reason = OS_REASON_NULL; |
7851 | } |
7852 | } |
7853 | |
7854 | if (port != IPC_PORT_NULL) { |
7855 | ipc_port_release_send(port); |
7856 | } |
7857 | |
7858 | /* If we hit this, we likely would have leaked an exit reason */ |
7859 | assert(signature_failure_reason == OS_REASON_NULL); |
7860 | return error; |
7861 | } |
7862 | |
7863 | /* |
7864 | * Typically as soon as we start executing this process, the |
7865 | * first instruction will trigger a VM fault to bring the text |
7866 | * pages (as executable) into the address space, followed soon |
7867 | * thereafter by dyld data structures (for dynamic executable). |
7868 | * To optimize this, as well as improve support for hardware |
7869 | * debuggers that can only access resident pages present |
7870 | * in the process' page tables, we prefault some pages if |
7871 | * possible. Errors are non-fatal. |
7872 | */ |
7873 | #ifndef PREVENT_CALLER_STACK_USE |
7874 | #define PREVENT_CALLER_STACK_USE __attribute__((noinline)) |
7875 | #endif |
7876 | static void PREVENT_CALLER_STACK_USE |
7877 | exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result) |
7878 | { |
7879 | int ret; |
7880 | size_t expected_all_image_infos_size; |
7881 | kern_return_t kr; |
7882 | |
7883 | /* |
7884 | * Prefault executable or dyld entry point. |
7885 | */ |
7886 | if (vm_map_page_shift(map: current_map()) < (int)PAGE_SHIFT) { |
7887 | DEBUG4K_LOAD("entry_point 0x%llx\n" , (uint64_t)load_result->entry_point); |
7888 | } |
7889 | kr = vm_fault(map: current_map(), |
7890 | vm_map_trunc_page(load_result->entry_point, |
7891 | vm_map_page_mask(current_map())), |
7892 | VM_PROT_READ | VM_PROT_EXECUTE, |
7893 | FALSE, VM_KERN_MEMORY_NONE, |
7894 | THREAD_UNINT, NULL, pmap_addr: 0); |
7895 | if (kr != KERN_SUCCESS) { |
7896 | DEBUG4K_ERROR("map %p va 0x%llx -> 0x%x\n" , current_map(), (uint64_t)vm_map_trunc_page(load_result->entry_point, vm_map_page_mask(current_map())), kr); |
7897 | } |
7898 | |
7899 | if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) { |
7900 | expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos); |
7901 | } else { |
7902 | expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos); |
7903 | } |
7904 | |
7905 | /* Decode dyld anchor structure from <mach-o/dyld_images.h> */ |
7906 | if (load_result->dynlinker && |
7907 | load_result->all_image_info_addr && |
7908 | load_result->all_image_info_size >= expected_all_image_infos_size) { |
7909 | union { |
7910 | struct user64_dyld_all_image_infos infos64; |
7911 | struct user32_dyld_all_image_infos infos32; |
7912 | } all_image_infos; |
7913 | |
7914 | /* |
7915 | * Pre-fault to avoid copyin() going through the trap handler |
7916 | * and recovery path. |
7917 | */ |
7918 | if (vm_map_page_shift(map: current_map()) < (int)PAGE_SHIFT) { |
7919 | DEBUG4K_LOAD("all_image_info_addr 0x%llx\n" , load_result->all_image_info_addr); |
7920 | } |
7921 | kr = vm_fault(map: current_map(), |
7922 | vm_map_trunc_page(load_result->all_image_info_addr, |
7923 | vm_map_page_mask(current_map())), |
7924 | VM_PROT_READ | VM_PROT_WRITE, |
7925 | FALSE, VM_KERN_MEMORY_NONE, |
7926 | THREAD_UNINT, NULL, pmap_addr: 0); |
7927 | if (kr != KERN_SUCCESS) { |
7928 | // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(load_result->all_image_info_addr, vm_map_page_mask(current_map())), kr); |
7929 | } |
7930 | if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) { |
7931 | /* all_image_infos straddles a page */ |
7932 | kr = vm_fault(map: current_map(), |
7933 | vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1, |
7934 | vm_map_page_mask(current_map())), |
7935 | VM_PROT_READ | VM_PROT_WRITE, |
7936 | FALSE, VM_KERN_MEMORY_NONE, |
7937 | THREAD_UNINT, NULL, pmap_addr: 0); |
7938 | if (kr != KERN_SUCCESS) { |
7939 | // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size -1, vm_map_page_mask(current_map())), kr); |
7940 | } |
7941 | } |
7942 | |
7943 | if (vm_map_page_shift(map: current_map()) < (int)PAGE_SHIFT) { |
7944 | DEBUG4K_LOAD("copyin(0x%llx, 0x%lx)\n" , load_result->all_image_info_addr, expected_all_image_infos_size); |
7945 | } |
7946 | ret = copyin((user_addr_t)load_result->all_image_info_addr, |
7947 | &all_image_infos, |
7948 | expected_all_image_infos_size); |
7949 | if (ret == 0 && all_image_infos.infos32.version >= DYLD_ALL_IMAGE_INFOS_ADDRESS_MINIMUM_VERSION) { |
7950 | user_addr_t notification_address; |
7951 | user_addr_t dyld_image_address; |
7952 | user_addr_t dyld_version_address; |
7953 | user_addr_t dyld_all_image_infos_address; |
7954 | user_addr_t dyld_slide_amount; |
7955 | |
7956 | if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) { |
7957 | notification_address = (user_addr_t)all_image_infos.infos64.notification; |
7958 | dyld_image_address = (user_addr_t)all_image_infos.infos64.dyldImageLoadAddress; |
7959 | dyld_version_address = (user_addr_t)all_image_infos.infos64.dyldVersion; |
7960 | dyld_all_image_infos_address = (user_addr_t)all_image_infos.infos64.dyldAllImageInfosAddress; |
7961 | } else { |
7962 | notification_address = all_image_infos.infos32.notification; |
7963 | dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress; |
7964 | dyld_version_address = all_image_infos.infos32.dyldVersion; |
7965 | dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress; |
7966 | } |
7967 | |
7968 | /* |
7969 | * dyld statically sets up the all_image_infos in its Mach-O |
7970 | * binary at static link time, with pointers relative to its default |
7971 | * load address. Since ASLR might slide dyld before its first |
7972 | * instruction is executed, "dyld_slide_amount" tells us how far |
7973 | * dyld was loaded compared to its default expected load address. |
7974 | * All other pointers into dyld's image should be adjusted by this |
7975 | * amount. At some point later, dyld will fix up pointers to take |
7976 | * into account the slide, at which point the all_image_infos_address |
7977 | * field in the structure will match the runtime load address, and |
7978 | * "dyld_slide_amount" will be 0, if we were to consult it again. |
7979 | */ |
7980 | |
7981 | dyld_slide_amount = (user_addr_t)load_result->all_image_info_addr - dyld_all_image_infos_address; |
7982 | |
7983 | #if 0 |
7984 | kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n" , |
7985 | (uint64_t)load_result->all_image_info_addr, |
7986 | all_image_infos.infos32.version, |
7987 | (uint64_t)notification_address, |
7988 | (uint64_t)dyld_image_address, |
7989 | (uint64_t)dyld_version_address, |
7990 | (uint64_t)dyld_all_image_infos_address); |
7991 | #endif |
7992 | |
7993 | if (vm_map_page_shift(map: current_map()) < (int)PAGE_SHIFT) { |
7994 | DEBUG4K_LOAD("notification_address 0x%llx dyld_slide_amount 0x%llx\n" , (uint64_t)notification_address, (uint64_t)dyld_slide_amount); |
7995 | } |
7996 | kr = vm_fault(map: current_map(), |
7997 | vm_map_trunc_page(notification_address + dyld_slide_amount, |
7998 | vm_map_page_mask(current_map())), |
7999 | VM_PROT_READ | VM_PROT_EXECUTE, |
8000 | FALSE, VM_KERN_MEMORY_NONE, |
8001 | THREAD_UNINT, NULL, pmap_addr: 0); |
8002 | if (kr != KERN_SUCCESS) { |
8003 | // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(notification_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr); |
8004 | } |
8005 | if (vm_map_page_shift(map: current_map()) < (int)PAGE_SHIFT) { |
8006 | DEBUG4K_LOAD("dyld_image_address 0x%llx dyld_slide_amount 0x%llx\n" , (uint64_t)dyld_image_address, (uint64_t)dyld_slide_amount); |
8007 | } |
8008 | kr = vm_fault(map: current_map(), |
8009 | vm_map_trunc_page(dyld_image_address + dyld_slide_amount, |
8010 | vm_map_page_mask(current_map())), |
8011 | VM_PROT_READ | VM_PROT_EXECUTE, |
8012 | FALSE, VM_KERN_MEMORY_NONE, |
8013 | THREAD_UNINT, NULL, pmap_addr: 0); |
8014 | if (kr != KERN_SUCCESS) { |
8015 | // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_image_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr); |
8016 | } |
8017 | if (vm_map_page_shift(map: current_map()) < (int)PAGE_SHIFT) { |
8018 | DEBUG4K_LOAD("dyld_version_address 0x%llx dyld_slide_amount 0x%llx\n" , (uint64_t)dyld_version_address, (uint64_t)dyld_slide_amount); |
8019 | } |
8020 | kr = vm_fault(map: current_map(), |
8021 | vm_map_trunc_page(dyld_version_address + dyld_slide_amount, |
8022 | vm_map_page_mask(current_map())), |
8023 | VM_PROT_READ, |
8024 | FALSE, VM_KERN_MEMORY_NONE, |
8025 | THREAD_UNINT, NULL, pmap_addr: 0); |
8026 | if (kr != KERN_SUCCESS) { |
8027 | // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_version_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr); |
8028 | } |
8029 | if (vm_map_page_shift(map: current_map()) < (int)PAGE_SHIFT) { |
8030 | DEBUG4K_LOAD("dyld_all_image_infos_address 0x%llx dyld_slide_amount 0x%llx\n" , (uint64_t)dyld_version_address, (uint64_t)dyld_slide_amount); |
8031 | } |
8032 | kr = vm_fault(map: current_map(), |
8033 | vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount, |
8034 | vm_map_page_mask(current_map())), |
8035 | VM_PROT_READ | VM_PROT_WRITE, |
8036 | FALSE, VM_KERN_MEMORY_NONE, |
8037 | THREAD_UNINT, NULL, pmap_addr: 0); |
8038 | if (kr != KERN_SUCCESS) { |
8039 | // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr); |
8040 | } |
8041 | } |
8042 | } |
8043 | } |
8044 | |
8045 | static int |
8046 | sysctl_libmalloc_experiments SYSCTL_HANDLER_ARGS |
8047 | { |
8048 | #pragma unused(oidp, arg2, req) |
8049 | int changed; |
8050 | errno_t error; |
8051 | uint64_t value = os_atomic_load_wide(&libmalloc_experiment_factors, relaxed); |
8052 | |
8053 | error = sysctl_io_number(req, bigValue: value, valueSize: sizeof(value), pValue: &value, changed: &changed); |
8054 | if (error) { |
8055 | return error; |
8056 | } |
8057 | |
8058 | if (changed) { |
8059 | os_atomic_store_wide(&libmalloc_experiment_factors, value, relaxed); |
8060 | } |
8061 | |
8062 | return 0; |
8063 | } |
8064 | |
8065 | EXPERIMENT_FACTOR_PROC(_kern, libmalloc_experiments, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_libmalloc_experiments, "A" , "" ); |
8066 | |