1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)kern_proc.c 8.4 (Berkeley) 1/4/94
62 */
63/*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69/* HISTORY
70 * 04-Aug-97 Umesh Vaishampayan (umeshv@apple.com)
71 * Added current_proc_EXTERNAL() function for the use of kernel
72 * lodable modules.
73 *
74 * 05-Jun-95 Mac Gillon (mgillon) at NeXT
75 * New version based on 3.3NS and 4.4
76 */
77
78
79#include <sys/param.h>
80#include <sys/systm.h>
81#include <sys/kernel.h>
82#include <sys/proc_internal.h>
83#include <sys/acct.h>
84#include <sys/wait.h>
85#include <sys/file_internal.h>
86#include <sys/uio.h>
87#include <sys/malloc.h>
88#include <sys/lock.h>
89#include <sys/mbuf.h>
90#include <sys/ioctl.h>
91#include <sys/tty.h>
92#include <sys/signalvar.h>
93#include <sys/syslog.h>
94#include <sys/sysctl.h>
95#include <sys/sysproto.h>
96#include <sys/kauth.h>
97#include <sys/codesign.h>
98#include <sys/kernel_types.h>
99#include <sys/ubc.h>
100#include <kern/kalloc.h>
101#include <kern/smr_hash.h>
102#include <kern/task.h>
103#include <kern/coalition.h>
104#include <sys/coalition.h>
105#include <kern/assert.h>
106#include <kern/sched_prim.h>
107#include <vm/vm_protos.h>
108#include <vm/vm_map.h> /* vm_map_switch_protect() */
109#include <vm/vm_pageout.h>
110#include <mach/task.h>
111#include <mach/message.h>
112#include <sys/priv.h>
113#include <sys/proc_info.h>
114#include <sys/bsdtask_info.h>
115#include <sys/persona.h>
116#include <sys/sysent.h>
117#include <sys/reason.h>
118#include <sys/proc_require.h>
119#include <sys/kern_debug.h>
120#include <IOKit/IOBSD.h> /* IOTaskHasEntitlement() */
121#include <kern/ipc_kobject.h> /* ipc_kobject_set_kobjidx() */
122#include <kern/ast.h> /* proc_filedesc_ast */
123#include <libkern/amfi/amfi.h>
124#include <mach-o/loader.h>
125#include <os/base.h> /* OS_STRINGIFY */
126
127#if CONFIG_CSR
128#include <sys/csr.h>
129#endif
130
131#include <sys/kern_memorystatus.h>
132
133#if CONFIG_MACF
134#include <security/mac_framework.h>
135#include <security/mac_mach_internal.h>
136#endif
137
138#include <libkern/crypto/sha1.h>
139#include <IOKit/IOKitKeys.h>
140
141/*
142 * Structure associated with user cacheing.
143 */
144struct uidinfo {
145 LIST_ENTRY(uidinfo) ui_hash;
146 uid_t ui_uid;
147 size_t ui_proccnt;
148};
149#define UIHASH(uid) (&uihashtbl[(uid) & uihash])
150static LIST_HEAD(uihashhead, uidinfo) * uihashtbl;
151static u_long uihash; /* size of hash table - 1 */
152
153/*
154 * Other process lists
155 */
156static struct smr_hash pid_hash;
157static struct smr_hash pgrp_hash;
158
159SECURITY_READ_ONLY_LATE(struct sesshashhead *) sesshashtbl;
160SECURITY_READ_ONLY_LATE(u_long) sesshash;
161
162#if PROC_REF_DEBUG
163/* disable panics on leaked proc refs across syscall boundary */
164static TUNABLE(bool, proc_ref_tracking_disabled, "-disable_procref_tracking", false);
165#endif
166
167struct proclist allproc = LIST_HEAD_INITIALIZER(allproc);
168struct proclist zombproc = LIST_HEAD_INITIALIZER(zombproc);
169extern struct tty cons;
170extern size_t proc_struct_size;
171extern size_t proc_and_task_size;
172
173extern int cs_debug;
174
175#if DEVELOPMENT || DEBUG
176static TUNABLE(bool, syscallfilter_disable, "-disable_syscallfilter", false);
177#endif // DEVELOPMENT || DEBUG
178
179#if DEBUG
180#define __PROC_INTERNAL_DEBUG 1
181#endif
182#if CONFIG_COREDUMP
183/* Name to give to core files */
184#if defined(XNU_TARGET_OS_BRIDGE)
185__XNU_PRIVATE_EXTERN const char * defaultcorefiledir = "/private/var/internal";
186__XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/private/var/internal/%N.core"};
187__XNU_PRIVATE_EXTERN const char * defaultdrivercorefiledir = "/private/var/internal";
188__XNU_PRIVATE_EXTERN char drivercorefilename[MAXPATHLEN + 1] = {"/private/var/internal/%N.core"};
189#elif defined(XNU_TARGET_OS_OSX)
190__XNU_PRIVATE_EXTERN const char * defaultcorefiledir = "/cores";
191__XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/cores/core.%P"};
192__XNU_PRIVATE_EXTERN const char * defaultdrivercorefiledir = "/private/var/dextcores";
193__XNU_PRIVATE_EXTERN char drivercorefilename[MAXPATHLEN + 1] = {"/private/var/dextcores/%N.core"};
194#else
195__XNU_PRIVATE_EXTERN const char * defaultcorefiledir = "/private/var/cores";
196__XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/private/var/cores/%N.core"};
197__XNU_PRIVATE_EXTERN const char * defaultdrivercorefiledir = "/private/var/dextcores";
198__XNU_PRIVATE_EXTERN char drivercorefilename[MAXPATHLEN + 1] = {"/private/var/dextcores/%N.core"};
199#endif
200#endif
201
202#if PROC_REF_DEBUG
203#include <kern/backtrace.h>
204#endif
205
206static LCK_MTX_DECLARE_ATTR(proc_klist_mlock, &proc_mlock_grp, &proc_lck_attr);
207
208ZONE_DEFINE(pgrp_zone, "pgrp",
209 sizeof(struct pgrp), ZC_ZFREE_CLEARMEM);
210ZONE_DEFINE(session_zone, "session",
211 sizeof(struct session), ZC_ZFREE_CLEARMEM);
212ZONE_DEFINE_ID(ZONE_ID_PROC_RO, "proc_ro", struct proc_ro,
213 ZC_READONLY | ZC_ZFREE_CLEARMEM);
214
215typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
216
217static void orphanpg(struct pgrp * pg);
218void proc_name_kdp(proc_t t, char * buf, int size);
219boolean_t proc_binary_uuid_kdp(task_t task, uuid_t uuid);
220boolean_t current_thread_aborted(void);
221int proc_threadname_kdp(void * uth, char * buf, size_t size);
222void proc_starttime_kdp(void * p, unaligned_u64 *tv_sec, unaligned_u64 *tv_usec, unaligned_u64 *abstime);
223void proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
224uint64_t proc_getcsflags_kdp(void * p);
225char * proc_name_address(void * p);
226char * proc_longname_address(void *);
227
228static void pgrp_destroy(struct pgrp *pgrp);
229static void pgrp_replace(proc_t p, struct pgrp *pgrp);
230static int csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaddittoken);
231static boolean_t proc_parent_is_currentproc(proc_t p);
232
233#if CONFIG_PROC_RESOURCE_LIMITS
234extern void task_filedesc_ast(task_t task, int current_size, int soft_limit, int hard_limit);
235extern void task_kqworkloop_ast(task_t task, int current_size, int soft_limit, int hard_limit);
236#endif
237
238struct fixjob_iterargs {
239 struct pgrp * pg;
240 struct session * mysession;
241 int entering;
242};
243
244int fixjob_callback(proc_t, void *);
245
246uint64_t
247get_current_unique_pid(void)
248{
249 proc_t p = current_proc();
250
251 if (p) {
252 return proc_uniqueid(p);
253 } else {
254 return 0;
255 }
256}
257
258/*
259 * Initialize global process hashing structures.
260 */
261static void
262procinit(void)
263{
264 smr_hash_init(smrh: &pid_hash, size: maxproc / 4);
265 smr_hash_init(smrh: &pgrp_hash, size: maxproc / 4);
266 sesshashtbl = hashinit(count: maxproc / 4, M_PROC, hashmask: &sesshash);
267 uihashtbl = hashinit(count: maxproc / 16, M_PROC, hashmask: &uihash);
268}
269STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, procinit);
270
271/*
272 * Change the count associated with number of processes
273 * a given user is using. This routine protects the uihash
274 * with the list lock
275 */
276size_t
277chgproccnt(uid_t uid, int diff)
278{
279 struct uidinfo *uip;
280 struct uidinfo *newuip = NULL;
281 struct uihashhead *uipp;
282 size_t retval;
283
284again:
285 proc_list_lock();
286 uipp = UIHASH(uid);
287 for (uip = uipp->lh_first; uip != 0; uip = uip->ui_hash.le_next) {
288 if (uip->ui_uid == uid) {
289 break;
290 }
291 }
292 if (uip) {
293 uip->ui_proccnt += diff;
294 if (uip->ui_proccnt > 0) {
295 retval = uip->ui_proccnt;
296 proc_list_unlock();
297 goto out;
298 }
299 LIST_REMOVE(uip, ui_hash);
300 retval = 0;
301 proc_list_unlock();
302 kfree_type(struct uidinfo, uip);
303 goto out;
304 }
305 if (diff <= 0) {
306 if (diff == 0) {
307 retval = 0;
308 proc_list_unlock();
309 goto out;
310 }
311 panic("chgproccnt: lost user");
312 }
313 if (newuip != NULL) {
314 uip = newuip;
315 newuip = NULL;
316 LIST_INSERT_HEAD(uipp, uip, ui_hash);
317 uip->ui_uid = uid;
318 uip->ui_proccnt = diff;
319 retval = diff;
320 proc_list_unlock();
321 goto out;
322 }
323 proc_list_unlock();
324 newuip = kalloc_type(struct uidinfo, Z_WAITOK | Z_NOFAIL);
325 goto again;
326out:
327 kfree_type(struct uidinfo, newuip);
328 return retval;
329}
330
331/*
332 * Is p an inferior of the current process?
333 */
334int
335inferior(proc_t p)
336{
337 int retval = 0;
338
339 proc_list_lock();
340 for (; p != current_proc(); p = p->p_pptr) {
341 if (proc_getpid(p) == 0) {
342 goto out;
343 }
344 }
345 retval = 1;
346out:
347 proc_list_unlock();
348 return retval;
349}
350
351/*
352 * Is p an inferior of t ?
353 */
354int
355isinferior(proc_t p, proc_t t)
356{
357 int retval = 0;
358 int nchecked = 0;
359 proc_t start = p;
360
361 /* if p==t they are not inferior */
362 if (p == t) {
363 return 0;
364 }
365
366 proc_list_lock();
367 for (; p != t; p = p->p_pptr) {
368 nchecked++;
369
370 /* Detect here if we're in a cycle */
371 if ((proc_getpid(p) == 0) || (p->p_pptr == start) || (nchecked >= nprocs)) {
372 goto out;
373 }
374 }
375 retval = 1;
376out:
377 proc_list_unlock();
378 return retval;
379}
380
381int
382proc_isinferior(int pid1, int pid2)
383{
384 proc_t p = PROC_NULL;
385 proc_t t = PROC_NULL;
386 int retval = 0;
387
388 if (((p = proc_find(pid: pid1)) != (proc_t)0) && ((t = proc_find(pid: pid2)) != (proc_t)0)) {
389 retval = isinferior(p, t);
390 }
391
392 if (p != PROC_NULL) {
393 proc_rele(p);
394 }
395 if (t != PROC_NULL) {
396 proc_rele(p: t);
397 }
398
399 return retval;
400}
401
402/*
403 * Returns process identity of a given process. Calling this function is not
404 * racy for a current process or if a reference to the process is held.
405 */
406struct proc_ident
407proc_ident(proc_t p)
408{
409 struct proc_ident ident = {
410 .p_pid = proc_pid(p),
411 .p_uniqueid = proc_uniqueid(p),
412 .p_idversion = proc_pidversion(p),
413 };
414
415 return ident;
416}
417
418proc_t
419proc_find_ident(struct proc_ident const *ident)
420{
421 proc_t proc = PROC_NULL;
422
423 proc = proc_find(pid: ident->p_pid);
424 if (proc == PROC_NULL) {
425 return PROC_NULL;
426 }
427
428 if (proc_uniqueid(proc) != ident->p_uniqueid ||
429 proc_pidversion(proc) != ident->p_idversion) {
430 proc_rele(p: proc);
431 return PROC_NULL;
432 }
433
434 return proc;
435}
436
437void
438uthread_reset_proc_refcount(uthread_t uth)
439{
440 uth->uu_proc_refcount = 0;
441
442#if PROC_REF_DEBUG
443 if (proc_ref_tracking_disabled) {
444 return;
445 }
446
447 struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
448 uint32_t n = uth->uu_proc_ref_info->upri_pindex;
449
450 uth->uu_proc_ref_info->upri_pindex = 0;
451
452 if (n) {
453 for (unsigned i = 0; i < n; i++) {
454 btref_put(upri->upri_proc_stacks[i]);
455 }
456 bzero(upri->upri_proc_stacks, sizeof(btref_t) * n);
457 bzero(upri->upri_proc_ps, sizeof(proc_t) * n);
458 }
459#endif
460}
461
462#if PROC_REF_DEBUG
463void
464uthread_init_proc_refcount(uthread_t uth)
465{
466 if (proc_ref_tracking_disabled) {
467 return;
468 }
469
470 uth->uu_proc_ref_info = kalloc_type(struct uthread_proc_ref_info,
471 Z_ZERO | Z_WAITOK | Z_NOFAIL);
472}
473
474void
475uthread_destroy_proc_refcount(uthread_t uth)
476{
477 if (proc_ref_tracking_disabled) {
478 return;
479 }
480
481 struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
482 uint32_t n = uth->uu_proc_ref_info->upri_pindex;
483
484 for (unsigned i = 0; i < n; i++) {
485 btref_put(upri->upri_proc_stacks[i]);
486 }
487
488 kfree_type(struct uthread_proc_ref_info, uth->uu_proc_ref_info);
489}
490
491void
492uthread_assert_zero_proc_refcount(uthread_t uth)
493{
494 if (proc_ref_tracking_disabled) {
495 return;
496 }
497
498 if (__improbable(uth->uu_proc_refcount != 0)) {
499 panic("Unexpected non zero uu_proc_refcount = %d (%p)",
500 uth->uu_proc_refcount, uth);
501 }
502}
503#endif
504
505bool
506proc_list_exited(proc_t p)
507{
508 return os_ref_get_raw_mask(rc: &p->p_refcount) & P_REF_DEAD;
509}
510
511#if CONFIG_DEBUG_SYSCALL_REJECTION
512uint64_t
513uthread_get_syscall_rejection_flags(void *uthread)
514{
515 uthread_t uth = (uthread_t) uthread;
516 return uth->syscall_rejection_flags;
517}
518
519uint64_t*
520uthread_get_syscall_rejection_mask(void *uthread)
521{
522 uthread_t uth = (uthread_t) uthread;
523 return uth->syscall_rejection_mask;
524}
525
526uint64_t*
527uthread_get_syscall_rejection_once_mask(void *uthread)
528{
529 uthread_t uth = (uthread_t) uthread;
530 return uth->syscall_rejection_once_mask;
531}
532
533bool
534uthread_syscall_rejection_is_enabled(void *uthread)
535{
536 uthread_t uth = (uthread_t) uthread;
537 return (debug_syscall_rejection_mode != 0) || (uth->syscall_rejection_flags & SYSCALL_REJECTION_FLAGS_FORCE_FATAL);
538}
539#endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
540
541#if PROC_REF_DEBUG
542__attribute__((noinline))
543#endif /* PROC_REF_DEBUG */
544static void
545record_procref(proc_t p __unused, int count)
546{
547 uthread_t uth;
548
549 uth = current_uthread();
550 uth->uu_proc_refcount += count;
551
552#if PROC_REF_DEBUG
553 if (proc_ref_tracking_disabled) {
554 return;
555 }
556 struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
557
558 if (upri->upri_pindex < NUM_PROC_REFS_TO_TRACK) {
559 upri->upri_proc_stacks[upri->upri_pindex] =
560 btref_get(__builtin_frame_address(0), BTREF_GET_NOWAIT);
561 upri->upri_proc_ps[upri->upri_pindex] = p;
562 upri->upri_pindex++;
563 }
564#endif /* PROC_REF_DEBUG */
565}
566
567/*!
568 * @function proc_ref_try_fast()
569 *
570 * @brief
571 * Tries to take a proc ref, unless it is in flux (being made, or dead).
572 *
573 * @returns
574 * - the new refcount value (including bits) on success,
575 * - 0 on failure.
576 */
577static inline uint32_t
578proc_ref_try_fast(proc_t p)
579{
580 uint32_t bits;
581
582 proc_require(proc: p, flags: PROC_REQUIRE_ALLOW_ALL);
583
584 bits = os_ref_retain_try_mask(&p->p_refcount, P_REF_BITS,
585 P_REF_NEW | P_REF_DEAD, NULL);
586 if (bits) {
587 record_procref(p, count: 1);
588 }
589 return bits;
590}
591
592/*!
593 * @function proc_ref_wait()
594 *
595 * @brief
596 * Waits for the specified bits to clear, on the specified event.
597 */
598__attribute__((noinline))
599static void
600proc_ref_wait(proc_t p, event_t event, proc_ref_bits_t mask, bool locked)
601{
602 assert_wait(event, THREAD_UNINT | THREAD_WAIT_NOREPORT);
603
604 if (os_ref_get_raw_mask(rc: &p->p_refcount) & mask) {
605 uthread_t uth = current_uthread();
606
607 if (locked) {
608 proc_list_unlock();
609 }
610 uth->uu_wchan = event;
611 uth->uu_wmesg = "proc_refwait";
612 thread_block(THREAD_CONTINUE_NULL);
613 uth->uu_wchan = NULL;
614 uth->uu_wmesg = NULL;
615 if (locked) {
616 proc_list_lock();
617 }
618 } else {
619 clear_wait(thread: current_thread(), THREAD_AWAKENED);
620 }
621}
622
623/*!
624 * @function proc_ref_wait_for_exec()
625 *
626 * @brief
627 * Routine called by processes trying to acquire a ref while
628 * an exec is in flight.
629 *
630 * @discussion
631 * This function is called with a proc ref held on the proc,
632 * which will be given up until the @c P_REF_*_EXEC flags clear.
633 *
634 * @param p the proc, the caller owns a proc ref
635 * @param bits the result of @c proc_ref_try_fast() prior to calling this.
636 * @param locked whether the caller holds the @c proc_list_lock().
637 */
638__attribute__((noinline))
639static proc_t
640proc_ref_wait_for_exec(proc_t p, uint32_t bits, int locked)
641{
642 const proc_ref_bits_t mask = P_REF_WILL_EXEC | P_REF_IN_EXEC;
643
644 /*
645 * the proc is in the middle of exec,
646 * trade our ref for a "wait ref",
647 * and wait for the proc_refwake_did_exec() call.
648 *
649 * Note: it's very unlikely that we'd loop back into the wait,
650 * it would only happen if the target proc would be
651 * in exec again by the time we woke up.
652 */
653 os_ref_retain_raw(&p->p_waitref, &p_refgrp);
654
655 do {
656 proc_rele(p);
657 proc_ref_wait(p, event: &p->p_waitref, mask, locked);
658 bits = proc_ref_try_fast(p);
659 } while (__improbable(bits & mask));
660
661 proc_wait_release(p);
662
663 return bits ? p : PROC_NULL;
664}
665
666static inline bool
667proc_ref_needs_wait_for_exec(uint32_t bits)
668{
669 if (__probable((bits & (P_REF_WILL_EXEC | P_REF_IN_EXEC)) == 0)) {
670 return false;
671 }
672
673 if (bits & P_REF_IN_EXEC) {
674 return true;
675 }
676
677 /*
678 * procs can't have outstanding refs while execing.
679 *
680 * In order to achieve, that, proc_refdrain_will_exec()
681 * will drain outstanding references. It signals its intent
682 * with the P_REF_WILL_EXEC flag, and moves to P_REF_IN_EXEC
683 * when this is achieved.
684 *
685 * Most threads will block in proc_ref() when any of those
686 * flags is set. However, threads that already have
687 * an oustanding ref on this proc might want another
688 * before dropping them. To avoid deadlocks, we need
689 * to let threads with any oustanding reference take one
690 * when only P_REF_WILL_EXEC is set (which causes exec
691 * to be delayed).
692 *
693 * Note: the current thread will _always_ appear like it holds
694 * one ref due to having taken one speculatively.
695 */
696 assert(current_uthread()->uu_proc_refcount >= 1);
697 return current_uthread()->uu_proc_refcount == 1;
698}
699
700int
701proc_rele(proc_t p)
702{
703 uint32_t o_bits, n_bits;
704
705 proc_require(proc: p, flags: PROC_REQUIRE_ALLOW_ALL);
706
707 os_atomic_rmw_loop(&p->p_refcount, o_bits, n_bits, release, {
708 n_bits = o_bits - (1u << P_REF_BITS);
709 if ((n_bits >> P_REF_BITS) == 1) {
710 n_bits &= ~P_REF_DRAINING;
711 }
712 });
713 record_procref(p, count: -1);
714
715 /*
716 * p might be freed after this point.
717 */
718
719 if (__improbable((o_bits & P_REF_DRAINING) && !(n_bits & P_REF_DRAINING))) {
720 /*
721 * This wakeup can cause spurious ones,
722 * but proc_refdrain() can deal with those.
723 *
724 * Because the proc_zone memory is sequestered,
725 * this is safe to wakeup a possible "freed" address.
726 */
727 wakeup(chan: &p->p_refcount);
728 }
729 return 0;
730}
731
732bool
733proc_is_shadow(proc_t p)
734{
735 return os_ref_get_raw_mask(rc: &p->p_refcount) & P_REF_SHADOW;
736}
737
738proc_t
739proc_self(void)
740{
741 proc_t p = current_proc();
742
743 /*
744 * Do not go through the logic of "wait for exec", it is meaningless.
745 * Only fail taking a ref for oneself if the proc is about to die.
746 */
747 return proc_ref_try_fast(p) ? p : PROC_NULL;
748}
749
750proc_t
751proc_ref(proc_t p, int locked)
752{
753 uint32_t bits;
754
755 bits = proc_ref_try_fast(p);
756 if (__improbable(!bits)) {
757 return PROC_NULL;
758 }
759
760 if (__improbable(proc_ref_needs_wait_for_exec(bits))) {
761 return proc_ref_wait_for_exec(p, bits, locked);
762 }
763
764 return p;
765}
766
767static void
768proc_wait_free(smr_node_t node)
769{
770 struct proc *p = __container_of(node, struct proc, p_smr_node);
771
772 proc_release_proc_task_struct(proc: p);
773}
774
775void
776proc_wait_release(proc_t p)
777{
778 if (__probable(os_ref_release_raw(&p->p_waitref, &p_refgrp) == 0)) {
779 smr_proc_task_call(&p->p_smr_node, proc_and_task_size,
780 proc_wait_free);
781 }
782}
783
784proc_t
785proc_find_zombref(int pid)
786{
787 proc_t p;
788
789 proc_list_lock();
790
791again:
792 p = phash_find_locked(pid);
793
794 /* should we bail? */
795 if ((p == PROC_NULL) || !proc_list_exited(p)) {
796 proc_list_unlock();
797 return PROC_NULL;
798 }
799
800 /* If someone else is controlling the (unreaped) zombie - wait */
801 if ((p->p_listflag & P_LIST_WAITING) != 0) {
802 (void)msleep(chan: &p->p_stat, mtx: &proc_list_mlock, PWAIT, wmesg: "waitcoll", ts: 0);
803 goto again;
804 }
805 p->p_listflag |= P_LIST_WAITING;
806
807 proc_list_unlock();
808
809 return p;
810}
811
812void
813proc_drop_zombref(proc_t p)
814{
815 proc_list_lock();
816 if ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
817 p->p_listflag &= ~P_LIST_WAITING;
818 wakeup(chan: &p->p_stat);
819 }
820 proc_list_unlock();
821}
822
823
824void
825proc_refdrain(proc_t p)
826{
827 uint32_t bits = os_ref_get_raw_mask(rc: &p->p_refcount);
828
829 assert(proc_list_exited(p));
830
831 while ((bits >> P_REF_BITS) > 1) {
832 if (os_atomic_cmpxchgv(&p->p_refcount, bits,
833 bits | P_REF_DRAINING, &bits, relaxed)) {
834 proc_ref_wait(p, event: &p->p_refcount, mask: P_REF_DRAINING, false);
835 }
836 }
837}
838
839proc_t
840proc_refdrain_will_exec(proc_t p)
841{
842 const proc_ref_bits_t will_exec_mask = P_REF_WILL_EXEC | P_REF_DRAINING;
843
844 /*
845 * All the calls to proc_ref will wait
846 * for the flag to get cleared before returning a ref.
847 *
848 * (except for the case documented in proc_ref_needs_wait_for_exec()).
849 */
850
851 if (p == initproc) {
852 /* Do not wait in ref drain for launchd exec */
853 os_atomic_or(&p->p_refcount, P_REF_IN_EXEC, relaxed);
854 } else {
855 for (;;) {
856 uint32_t o_ref, n_ref;
857
858 os_atomic_rmw_loop(&p->p_refcount, o_ref, n_ref, relaxed, {
859 if ((o_ref >> P_REF_BITS) == 1) {
860 /*
861 * We drained successfully,
862 * move on to P_REF_IN_EXEC
863 */
864 n_ref = o_ref & ~will_exec_mask;
865 n_ref |= P_REF_IN_EXEC;
866 } else {
867 /*
868 * Outstanding refs exit,
869 * mark our desire to stall
870 * proc_ref() callers with
871 * P_REF_WILL_EXEC.
872 */
873 n_ref = o_ref | will_exec_mask;
874 }
875 });
876
877 if (n_ref & P_REF_IN_EXEC) {
878 break;
879 }
880
881 proc_ref_wait(p, event: &p->p_refcount, mask: P_REF_DRAINING, false);
882 }
883 }
884
885 /* Return a ref to the caller */
886 os_ref_retain_mask(&p->p_refcount, P_REF_BITS, NULL);
887 record_procref(p, count: 1);
888
889 return p;
890}
891
892void
893proc_refwake_did_exec(proc_t p)
894{
895 os_atomic_andnot(&p->p_refcount, P_REF_IN_EXEC, release);
896 wakeup(chan: &p->p_waitref);
897}
898
899void
900proc_ref_hold_proc_task_struct(proc_t proc)
901{
902 os_atomic_or(&proc->p_refcount, P_REF_PROC_HOLD, relaxed);
903}
904
905static void
906proc_free(proc_t proc)
907{
908 proc_ro_t proc_ro = proc->p_proc_ro;
909 kauth_cred_t cred;
910
911 if (proc_ro) {
912 cred = smr_serialized_load(&proc_ro->p_ucred);
913
914 kauth_cred_set(&cred, NOCRED);
915 zfree_ro(ZONE_ID_PROC_RO, proc_ro);
916 }
917 zfree(proc_task_zone, proc);
918}
919
920void
921proc_release_proc_task_struct(proc_t proc)
922{
923 uint32_t old_ref = os_atomic_andnot_orig(&proc->p_refcount, P_REF_PROC_HOLD, relaxed);
924 if ((old_ref & P_REF_TASK_HOLD) == 0) {
925 proc_free(proc);
926 }
927}
928
929void
930task_ref_hold_proc_task_struct(task_t task)
931{
932 proc_t proc_from_task = task_get_proc_raw(task);
933 os_atomic_or(&proc_from_task->p_refcount, P_REF_TASK_HOLD, relaxed);
934}
935
936void
937task_release_proc_task_struct(task_t task)
938{
939 proc_t proc_from_task = task_get_proc_raw(task);
940 uint32_t old_ref = os_atomic_andnot_orig(&proc_from_task->p_refcount, P_REF_TASK_HOLD, relaxed);
941
942 if ((old_ref & P_REF_PROC_HOLD) == 0) {
943 proc_free(proc: proc_from_task);
944 }
945}
946
947proc_t
948proc_parentholdref(proc_t p)
949{
950 proc_t parent = PROC_NULL;
951 proc_t pp;
952
953 proc_list_lock();
954loop:
955 pp = p->p_pptr;
956 if ((pp == PROC_NULL) || (pp->p_stat == SZOMB) || ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED))) {
957 parent = PROC_NULL;
958 goto out;
959 }
960
961 if ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == P_LIST_CHILDDRSTART) {
962 pp->p_listflag |= P_LIST_CHILDDRWAIT;
963 msleep(chan: &pp->p_childrencnt, mtx: &proc_list_mlock, pri: 0, wmesg: "proc_parent", ts: 0);
964 goto loop;
965 }
966
967 if ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == 0) {
968 pp->p_parentref++;
969 parent = pp;
970 goto out;
971 }
972
973out:
974 proc_list_unlock();
975 return parent;
976}
977int
978proc_parentdropref(proc_t p, int listlocked)
979{
980 if (listlocked == 0) {
981 proc_list_lock();
982 }
983
984 if (p->p_parentref > 0) {
985 p->p_parentref--;
986 if ((p->p_parentref == 0) && ((p->p_listflag & P_LIST_PARENTREFWAIT) == P_LIST_PARENTREFWAIT)) {
987 p->p_listflag &= ~P_LIST_PARENTREFWAIT;
988 wakeup(chan: &p->p_parentref);
989 }
990 } else {
991 panic("proc_parentdropref -ve ref");
992 }
993 if (listlocked == 0) {
994 proc_list_unlock();
995 }
996
997 return 0;
998}
999
1000void
1001proc_childdrainstart(proc_t p)
1002{
1003#if __PROC_INTERNAL_DEBUG
1004 if ((p->p_listflag & P_LIST_CHILDDRSTART) == P_LIST_CHILDDRSTART) {
1005 panic("proc_childdrainstart: childdrain already started");
1006 }
1007#endif
1008 p->p_listflag |= P_LIST_CHILDDRSTART;
1009 /* wait for all that hold parentrefs to drop */
1010 while (p->p_parentref > 0) {
1011 p->p_listflag |= P_LIST_PARENTREFWAIT;
1012 msleep(chan: &p->p_parentref, mtx: &proc_list_mlock, pri: 0, wmesg: "proc_childdrainstart", ts: 0);
1013 }
1014}
1015
1016
1017void
1018proc_childdrainend(proc_t p)
1019{
1020#if __PROC_INTERNAL_DEBUG
1021 if (p->p_childrencnt > 0) {
1022 panic("exiting: children stil hanging around");
1023 }
1024#endif
1025 p->p_listflag |= P_LIST_CHILDDRAINED;
1026 if ((p->p_listflag & (P_LIST_CHILDLKWAIT | P_LIST_CHILDDRWAIT)) != 0) {
1027 p->p_listflag &= ~(P_LIST_CHILDLKWAIT | P_LIST_CHILDDRWAIT);
1028 wakeup(chan: &p->p_childrencnt);
1029 }
1030}
1031
1032void
1033proc_checkdeadrefs(__unused proc_t p)
1034{
1035 uint32_t bits;
1036
1037 bits = os_ref_release_raw_mask(&p->p_refcount, P_REF_BITS, NULL);
1038 bits &= ~(P_REF_SHADOW | P_REF_PROC_HOLD | P_REF_TASK_HOLD);
1039 if (bits != P_REF_DEAD) {
1040 panic("proc being freed and unexpected refcount %p:%d:0x%x", p,
1041 bits >> P_REF_BITS, bits & P_REF_MASK);
1042 }
1043#if __PROC_INTERNAL_DEBUG
1044 if (p->p_childrencnt != 0) {
1045 panic("proc being freed and pending children cnt %p:%d", p, p->p_childrencnt);
1046 }
1047 if (p->p_parentref != 0) {
1048 panic("proc being freed and pending parentrefs %p:%d", p, p->p_parentref);
1049 }
1050#endif
1051}
1052
1053
1054__attribute__((always_inline, visibility("hidden")))
1055void
1056proc_require(proc_t proc, proc_require_flags_t flags)
1057{
1058 if ((flags & PROC_REQUIRE_ALLOW_NULL) && proc == PROC_NULL) {
1059 return;
1060 }
1061 zone_id_require(zone_id: ZONE_ID_PROC_TASK, elem_size: proc_and_task_size, addr: proc);
1062}
1063
1064pid_t
1065proc_getpid(proc_t p)
1066{
1067 if (p == kernproc) {
1068 return 0;
1069 }
1070
1071 return p->p_pid;
1072}
1073
1074int
1075proc_pid(proc_t p)
1076{
1077 if (p != NULL) {
1078 proc_require(proc: p, flags: PROC_REQUIRE_ALLOW_ALL);
1079 return proc_getpid(p);
1080 }
1081 return -1;
1082}
1083
1084int
1085proc_ppid(proc_t p)
1086{
1087 if (p != NULL) {
1088 proc_require(proc: p, flags: PROC_REQUIRE_ALLOW_ALL);
1089 return p->p_ppid;
1090 }
1091 return -1;
1092}
1093
1094int
1095proc_original_ppid(proc_t p)
1096{
1097 if (p != NULL) {
1098 proc_require(proc: p, flags: PROC_REQUIRE_ALLOW_ALL);
1099 return p->p_original_ppid;
1100 }
1101 return -1;
1102}
1103
1104int
1105proc_starttime(proc_t p, struct timeval *tv)
1106{
1107 if (p != NULL && tv != NULL) {
1108 tv->tv_sec = p->p_start.tv_sec;
1109 tv->tv_usec = p->p_start.tv_usec;
1110 return 0;
1111 }
1112 return EINVAL;
1113}
1114
1115int
1116proc_selfpid(void)
1117{
1118 return proc_getpid(p: current_proc());
1119}
1120
1121int
1122proc_selfppid(void)
1123{
1124 return current_proc()->p_ppid;
1125}
1126
1127uint64_t
1128proc_selfcsflags(void)
1129{
1130 return proc_getcsflags(current_proc());
1131}
1132
1133int
1134proc_csflags(proc_t p, uint64_t *flags)
1135{
1136 if (p && flags) {
1137 proc_require(proc: p, flags: PROC_REQUIRE_ALLOW_ALL);
1138 *flags = proc_getcsflags(p);
1139 return 0;
1140 }
1141 return EINVAL;
1142}
1143
1144boolean_t
1145proc_is_simulated(const proc_t p)
1146{
1147#ifdef XNU_TARGET_OS_OSX
1148 if (p != NULL) {
1149 switch (proc_platform(p)) {
1150 case PLATFORM_IOSSIMULATOR:
1151 case PLATFORM_TVOSSIMULATOR:
1152 case PLATFORM_WATCHOSSIMULATOR:
1153 return TRUE;
1154 default:
1155 return FALSE;
1156 }
1157 }
1158#else /* !XNU_TARGET_OS_OSX */
1159 (void)p;
1160#endif
1161 return FALSE;
1162}
1163
1164uint32_t
1165proc_platform(const proc_t p)
1166{
1167 if (p != NULL) {
1168 return proc_get_ro(p)->p_platform_data.p_platform;
1169 }
1170 return (uint32_t)-1;
1171}
1172
1173uint32_t
1174proc_min_sdk(proc_t p)
1175{
1176 if (p != NULL) {
1177 return proc_get_ro(p)->p_platform_data.p_min_sdk;
1178 }
1179 return (uint32_t)-1;
1180}
1181
1182uint32_t
1183proc_sdk(proc_t p)
1184{
1185 if (p != NULL) {
1186 return proc_get_ro(p)->p_platform_data.p_sdk;
1187 }
1188 return (uint32_t)-1;
1189}
1190
1191void
1192proc_setplatformdata(proc_t p, uint32_t platform, uint32_t min_sdk, uint32_t sdk)
1193{
1194 proc_ro_t ro;
1195 struct proc_platform_ro_data platform_data;
1196
1197 ro = proc_get_ro(p);
1198 platform_data = ro->p_platform_data;
1199 platform_data.p_platform = platform;
1200 platform_data.p_min_sdk = min_sdk;
1201 platform_data.p_sdk = sdk;
1202
1203 zalloc_ro_update_field(ZONE_ID_PROC_RO, ro, p_platform_data, &platform_data);
1204}
1205
1206#if CONFIG_DTRACE
1207int
1208dtrace_proc_selfpid(void)
1209{
1210 return proc_selfpid();
1211}
1212
1213int
1214dtrace_proc_selfppid(void)
1215{
1216 return proc_selfppid();
1217}
1218
1219uid_t
1220dtrace_proc_selfruid(void)
1221{
1222 return current_proc()->p_ruid;
1223}
1224#endif /* CONFIG_DTRACE */
1225
1226/*!
1227 * @function proc_parent()
1228 *
1229 * @brief
1230 * Returns a ref on the parent of @c p.
1231 *
1232 * @discussion
1233 * Returns a reference on the parent, or @c PROC_NULL
1234 * if both @c p and its parent are zombies.
1235 *
1236 * If the parent is currently dying, then this function waits
1237 * for the situation to be resolved.
1238 *
1239 * This function never returns @c PROC_NULL if @c p isn't
1240 * a zombie (@c p_stat is @c SZOMB) yet.
1241 */
1242proc_t
1243proc_parent(proc_t p)
1244{
1245 proc_t parent;
1246 proc_t pp;
1247
1248 proc_list_lock();
1249
1250 while (1) {
1251 pp = p->p_pptr;
1252 parent = proc_ref(p: pp, true);
1253 /* Check if we got a proc ref and it is still the parent */
1254 if (parent != PROC_NULL) {
1255 if (parent == p->p_pptr) {
1256 /*
1257 * We have a ref on the parent and it is still
1258 * our parent, return the ref
1259 */
1260 proc_list_unlock();
1261 return parent;
1262 }
1263
1264 /*
1265 * Our parent changed while we slept on proc_ref,
1266 * drop the ref on old parent and retry.
1267 */
1268 proc_rele(p: parent);
1269 continue;
1270 }
1271
1272 if (pp != p->p_pptr) {
1273 /*
1274 * We didn't get a ref, but parent changed from what
1275 * we last saw before we slept in proc_ref, try again
1276 * with new parent.
1277 */
1278 continue;
1279 }
1280
1281 if ((pp->p_listflag & P_LIST_CHILDDRAINED) == 0) {
1282 /* Parent did not change, but we also did not get a
1283 * ref on parent, sleep if the parent has not drained
1284 * its children and then retry.
1285 */
1286 pp->p_listflag |= P_LIST_CHILDLKWAIT;
1287 msleep(chan: &pp->p_childrencnt, mtx: &proc_list_mlock, pri: 0, wmesg: "proc_parent", ts: 0);
1288 continue;
1289 }
1290
1291 /* Parent has died and drained its children and we still
1292 * point to it, return NULL.
1293 */
1294 proc_list_unlock();
1295 return PROC_NULL;
1296 }
1297}
1298
1299static boolean_t
1300proc_parent_is_currentproc(proc_t p)
1301{
1302 boolean_t ret = FALSE;
1303
1304 proc_list_lock();
1305 if (p->p_pptr == current_proc()) {
1306 ret = TRUE;
1307 }
1308
1309 proc_list_unlock();
1310 return ret;
1311}
1312
1313void
1314proc_name(int pid, char * buf, int size)
1315{
1316 proc_t p;
1317
1318 if (size <= 0) {
1319 return;
1320 }
1321
1322 bzero(s: buf, n: size);
1323
1324 if ((p = proc_find(pid)) != PROC_NULL) {
1325 strlcpy(dst: buf, src: &p->p_comm[0], n: size);
1326 proc_rele(p);
1327 }
1328}
1329
1330void
1331proc_name_kdp(proc_t p, char * buf, int size)
1332{
1333 if (p == PROC_NULL) {
1334 return;
1335 }
1336
1337 if ((size_t)size > sizeof(p->p_comm)) {
1338 strlcpy(dst: buf, src: &p->p_name[0], MIN((int)sizeof(p->p_name), size));
1339 } else {
1340 strlcpy(dst: buf, src: &p->p_comm[0], MIN((int)sizeof(p->p_comm), size));
1341 }
1342}
1343
1344boolean_t
1345proc_binary_uuid_kdp(task_t task, uuid_t uuid)
1346{
1347 proc_t p = get_bsdtask_info(task);
1348 if (p == PROC_NULL) {
1349 return FALSE;
1350 }
1351
1352 proc_getexecutableuuid(p, uuid, sizeof(uuid_t));
1353
1354 return TRUE;
1355}
1356
1357int
1358proc_threadname_kdp(void * uth, char * buf, size_t size)
1359{
1360 if (size < MAXTHREADNAMESIZE) {
1361 /* this is really just a protective measure for the future in
1362 * case the thread name size in stackshot gets out of sync with
1363 * the BSD max thread name size. Note that bsd_getthreadname
1364 * doesn't take input buffer size into account. */
1365 return -1;
1366 }
1367
1368 if (uth != NULL) {
1369 bsd_getthreadname(uth, buffer: buf);
1370 }
1371 return 0;
1372}
1373
1374
1375/* note that this function is generally going to be called from stackshot,
1376 * and the arguments will be coming from a struct which is declared packed
1377 * thus the input arguments will in general be unaligned. We have to handle
1378 * that here. */
1379void
1380proc_starttime_kdp(void *p, unaligned_u64 *tv_sec, unaligned_u64 *tv_usec, unaligned_u64 *abstime)
1381{
1382 proc_t pp = (proc_t)p;
1383 if (pp != PROC_NULL) {
1384 if (tv_sec != NULL) {
1385 *tv_sec = pp->p_start.tv_sec;
1386 }
1387 if (tv_usec != NULL) {
1388 *tv_usec = pp->p_start.tv_usec;
1389 }
1390 if (abstime != NULL) {
1391 if (pp->p_stats != NULL) {
1392 *abstime = pp->p_stats->ps_start;
1393 } else {
1394 *abstime = 0;
1395 }
1396 }
1397 }
1398}
1399
1400void
1401proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype)
1402{
1403 proc_t pp = (proc_t)p;
1404 if (pp != PROC_NULL) {
1405 *cputype = pp->p_cputype;
1406 *cpusubtype = pp->p_cpusubtype;
1407 }
1408}
1409
1410char *
1411proc_name_address(void *p)
1412{
1413 return &((proc_t)p)->p_comm[0];
1414}
1415
1416char *
1417proc_longname_address(void *p)
1418{
1419 return &((proc_t)p)->p_name[0];
1420}
1421
1422char *
1423proc_best_name(proc_t p)
1424{
1425 if (p->p_name[0] != '\0') {
1426 return &p->p_name[0];
1427 }
1428 return &p->p_comm[0];
1429}
1430
1431void
1432proc_selfname(char * buf, int size)
1433{
1434 proc_t p;
1435
1436 if ((p = current_proc()) != (proc_t)0) {
1437 strlcpy(dst: buf, src: &p->p_name[0], n: size);
1438 }
1439}
1440
1441void
1442proc_signal(int pid, int signum)
1443{
1444 proc_t p;
1445
1446 if ((p = proc_find(pid)) != PROC_NULL) {
1447 psignal(p, sig: signum);
1448 proc_rele(p);
1449 }
1450}
1451
1452int
1453proc_issignal(int pid, sigset_t mask)
1454{
1455 proc_t p;
1456 int error = 0;
1457
1458 if ((p = proc_find(pid)) != PROC_NULL) {
1459 error = proc_pendingsignals(p, mask);
1460 proc_rele(p);
1461 }
1462
1463 return error;
1464}
1465
1466int
1467proc_noremotehang(proc_t p)
1468{
1469 int retval = 0;
1470
1471 if (p) {
1472 retval = p->p_flag & P_NOREMOTEHANG;
1473 }
1474 return retval? 1: 0;
1475}
1476
1477int
1478proc_exiting(proc_t p)
1479{
1480 int retval = 0;
1481
1482 if (p) {
1483 retval = p->p_lflag & P_LEXIT;
1484 }
1485 return retval? 1: 0;
1486}
1487
1488int
1489proc_in_teardown(proc_t p)
1490{
1491 int retval = 0;
1492
1493 if (p) {
1494 retval = p->p_lflag & P_LPEXIT;
1495 }
1496 return retval? 1: 0;
1497}
1498
1499int
1500proc_lvfork(proc_t p __unused)
1501{
1502 return 0;
1503}
1504
1505int
1506proc_increment_ru_oublock(proc_t p, long *origvalp)
1507{
1508 long origval;
1509
1510 if (p && p->p_stats) {
1511 origval = OSIncrementAtomicLong(address: &p->p_stats->p_ru.ru_oublock);
1512 if (origvalp) {
1513 *origvalp = origval;
1514 }
1515 return 0;
1516 }
1517
1518 return EINVAL;
1519}
1520
1521int
1522proc_isabortedsignal(proc_t p)
1523{
1524 if ((p != kernproc) && current_thread_aborted() &&
1525 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) ||
1526 (p->p_sigacts.ps_sig < 1) || (p->p_sigacts.ps_sig >= NSIG) ||
1527 !hassigprop(sig: p->p_sigacts.ps_sig, SA_CORE))) {
1528 return 1;
1529 }
1530
1531 return 0;
1532}
1533
1534int
1535proc_forcequota(proc_t p)
1536{
1537 int retval = 0;
1538
1539 if (p) {
1540 retval = p->p_flag & P_FORCEQUOTA;
1541 }
1542 return retval? 1: 0;
1543}
1544
1545int
1546proc_suser(proc_t p)
1547{
1548 int error;
1549
1550 smr_proc_task_enter();
1551 error = suser(cred: proc_ucred_smr(p), acflag: &p->p_acflag);
1552 smr_proc_task_leave();
1553 return error;
1554}
1555
1556task_t
1557proc_task(proc_t proc)
1558{
1559 task_t task_from_proc = proc_get_task_raw(proc);
1560 return (proc->p_lflag & P_LHASTASK) ? task_from_proc : NULL;
1561}
1562
1563void
1564proc_set_task(proc_t proc, task_t task)
1565{
1566 task_t task_from_proc = proc_get_task_raw(proc);
1567 if (task == NULL) {
1568 proc->p_lflag &= ~P_LHASTASK;
1569 } else {
1570 if (task != task_from_proc) {
1571 panic("proc_set_task trying to set random task %p", task);
1572 }
1573 proc->p_lflag |= P_LHASTASK;
1574 }
1575}
1576
1577task_t
1578proc_get_task_raw(proc_t proc)
1579{
1580 return (task_t)((uintptr_t)proc + proc_struct_size);
1581}
1582
1583proc_t
1584task_get_proc_raw(task_t task)
1585{
1586 return (proc_t)((uintptr_t)task - proc_struct_size);
1587}
1588
1589/*
1590 * Obtain the first thread in a process
1591 *
1592 * XXX This is a bad thing to do; it exists predominantly to support the
1593 * XXX use of proc_t's in places that should really be using
1594 * XXX thread_t's instead. This maintains historical behaviour, but really
1595 * XXX needs an audit of the context (proxy vs. not) to clean up.
1596 */
1597thread_t
1598proc_thread(proc_t proc)
1599{
1600 LCK_MTX_ASSERT(&proc->p_mlock, LCK_MTX_ASSERT_OWNED);
1601
1602 uthread_t uth = TAILQ_FIRST(&proc->p_uthlist);
1603
1604 if (uth != NULL) {
1605 return get_machthread(uth);
1606 }
1607
1608 return NULL;
1609}
1610
1611kauth_cred_t
1612proc_ucred_unsafe(proc_t p)
1613{
1614 kauth_cred_t cred = smr_serialized_load(&proc_get_ro(p)->p_ucred);
1615
1616 return kauth_cred_require(cred);
1617}
1618
1619kauth_cred_t
1620proc_ucred_smr(proc_t p)
1621{
1622 assert(smr_entered(&smr_proc_task));
1623 return proc_ucred_unsafe(p);
1624}
1625
1626kauth_cred_t
1627proc_ucred_locked(proc_t p)
1628{
1629 LCK_MTX_ASSERT(&p->p_ucred_mlock, LCK_ASSERT_OWNED);
1630 return proc_ucred_unsafe(p);
1631}
1632
1633struct uthread *
1634current_uthread(void)
1635{
1636 return get_bsdthread_info(current_thread());
1637}
1638
1639
1640int
1641proc_is64bit(proc_t p)
1642{
1643 return IS_64BIT_PROCESS(p);
1644}
1645
1646int
1647proc_is64bit_data(proc_t p)
1648{
1649 assert(proc_task(p));
1650 return (int)task_get_64bit_data(task: proc_task(proc: p));
1651}
1652
1653int
1654proc_isinitproc(proc_t p)
1655{
1656 if (initproc == NULL) {
1657 return 0;
1658 }
1659 return p == initproc;
1660}
1661
1662int
1663proc_pidversion(proc_t p)
1664{
1665 return proc_get_ro(p)->p_idversion;
1666}
1667
1668void
1669proc_setpidversion(proc_t p, int idversion)
1670{
1671 zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p), p_idversion,
1672 &idversion);
1673}
1674
1675uint32_t
1676proc_persona_id(proc_t p)
1677{
1678 return (uint32_t)persona_id_from_proc(p);
1679}
1680
1681uint32_t
1682proc_getuid(proc_t p)
1683{
1684 return p->p_uid;
1685}
1686
1687uint32_t
1688proc_getgid(proc_t p)
1689{
1690 return p->p_gid;
1691}
1692
1693uint64_t
1694proc_uniqueid(proc_t p)
1695{
1696 if (p == kernproc) {
1697 return 0;
1698 }
1699
1700 return proc_get_ro(p)->p_uniqueid;
1701}
1702
1703uint64_t proc_uniqueid_task(void *p_arg, void *t);
1704/*
1705 * During exec, two tasks point at the proc. This function is used
1706 * to gives tasks a unique ID; we make the matching task have the
1707 * proc's uniqueid, and any other task gets the high-bit flipped.
1708 * (We need to try to avoid returning UINT64_MAX, which is the
1709 * which is the uniqueid of a task without a proc. (e.g. while exiting))
1710 *
1711 * Only used by get_task_uniqueid(); do not add additional callers.
1712 */
1713uint64_t
1714proc_uniqueid_task(void *p_arg, void *t __unused)
1715{
1716 proc_t p = p_arg;
1717 uint64_t uniqueid = proc_uniqueid(p);
1718 return uniqueid ^ (__probable(!proc_is_shadow(p)) ? 0 : (1ull << 63));
1719}
1720
1721uint64_t
1722proc_puniqueid(proc_t p)
1723{
1724 return p->p_puniqueid;
1725}
1726
1727void
1728proc_coalitionids(__unused proc_t p, __unused uint64_t ids[COALITION_NUM_TYPES])
1729{
1730#if CONFIG_COALITIONS
1731 task_coalition_ids(task: proc_task(proc: p), ids);
1732#else
1733 memset(ids, 0, sizeof(uint64_t[COALITION_NUM_TYPES]));
1734#endif
1735 return;
1736}
1737
1738uint64_t
1739proc_was_throttled(proc_t p)
1740{
1741 return p->was_throttled;
1742}
1743
1744uint64_t
1745proc_did_throttle(proc_t p)
1746{
1747 return p->did_throttle;
1748}
1749
1750int
1751proc_getcdhash(proc_t p, unsigned char *cdhash)
1752{
1753 if (p == kernproc) {
1754 return EINVAL;
1755 }
1756 return vn_getcdhash(vp: p->p_textvp, offset: p->p_textoff, cdhash);
1757}
1758
1759uint64_t
1760proc_getcsflags(proc_t p)
1761{
1762 return proc_get_ro(p)->p_csflags;
1763}
1764
1765/* This variant runs in stackshot context and must not take locks. */
1766uint64_t
1767proc_getcsflags_kdp(void * p)
1768{
1769 proc_t proc = (proc_t)p;
1770 if (p == PROC_NULL) {
1771 return 0;
1772 }
1773 return proc_getcsflags(p: proc);
1774}
1775
1776void
1777proc_csflags_update(proc_t p, uint64_t flags)
1778{
1779 uint32_t csflags = (uint32_t)flags;
1780
1781 if (p != kernproc) {
1782 zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p),
1783 p_csflags, &csflags);
1784 }
1785}
1786
1787void
1788proc_csflags_set(proc_t p, uint64_t flags)
1789{
1790 proc_csflags_update(p, flags: proc_getcsflags(p) | (uint32_t)flags);
1791}
1792
1793void
1794proc_csflags_clear(proc_t p, uint64_t flags)
1795{
1796 proc_csflags_update(p, flags: proc_getcsflags(p) & ~(uint32_t)flags);
1797}
1798
1799uint8_t *
1800proc_syscall_filter_mask(proc_t p)
1801{
1802 return proc_get_ro(p)->syscall_filter_mask;
1803}
1804
1805void
1806proc_syscall_filter_mask_set(proc_t p, uint8_t *mask)
1807{
1808 zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p),
1809 syscall_filter_mask, &mask);
1810}
1811
1812int
1813proc_exitstatus(proc_t p)
1814{
1815 return p->p_xstat & 0xffff;
1816}
1817
1818bool
1819proc_is_zombie(proc_t p)
1820{
1821 return proc_list_exited(p);
1822}
1823
1824void
1825proc_setexecutableuuid(proc_t p, const unsigned char *uuid)
1826{
1827 memcpy(dst: p->p_uuid, src: uuid, n: sizeof(p->p_uuid));
1828}
1829
1830const unsigned char *
1831proc_executableuuid_addr(proc_t p)
1832{
1833 return &p->p_uuid[0];
1834}
1835
1836void
1837proc_getexecutableuuid(proc_t p, unsigned char *uuidbuf, unsigned long size)
1838{
1839 if (size >= sizeof(uuid_t)) {
1840 memcpy(dst: uuidbuf, src: proc_executableuuid_addr(p), n: sizeof(uuid_t));
1841 }
1842}
1843
1844void
1845proc_getresponsibleuuid(proc_t p, unsigned char *uuidbuf, unsigned long size)
1846{
1847 if (size >= sizeof(uuid_t)) {
1848 memcpy(dst: uuidbuf, src: p->p_responsible_uuid, n: sizeof(uuid_t));
1849 }
1850}
1851
1852void
1853proc_setresponsibleuuid(proc_t p, unsigned char *uuidbuf, unsigned long size)
1854{
1855 if (p != NULL && uuidbuf != NULL && size >= sizeof(uuid_t)) {
1856 memcpy(dst: p->p_responsible_uuid, src: uuidbuf, n: sizeof(uuid_t));
1857 }
1858 return;
1859}
1860
1861/* Return vnode for executable with an iocount. Must be released with vnode_put() */
1862vnode_t
1863proc_getexecutablevnode(proc_t p)
1864{
1865 vnode_t tvp = p->p_textvp;
1866
1867 if (tvp != NULLVP) {
1868 if (vnode_getwithref(vp: tvp) == 0) {
1869 return tvp;
1870 }
1871 }
1872
1873 return NULLVP;
1874}
1875
1876/*
1877 * Similar to proc_getexecutablevnode() but returns NULLVP if the vnode is
1878 * being reclaimed rather than blocks until reclaim is done.
1879 */
1880vnode_t
1881proc_getexecutablevnode_noblock(proc_t p)
1882{
1883 vnode_t tvp = p->p_textvp;
1884
1885 if (tvp != NULLVP) {
1886 if (vnode_getwithref_noblock(vp: tvp) == 0) {
1887 return tvp;
1888 }
1889 }
1890
1891 return NULLVP;
1892}
1893
1894int
1895proc_gettty(proc_t p, vnode_t *vp)
1896{
1897 struct session *procsp;
1898 struct pgrp *pg;
1899 int err = EINVAL;
1900
1901 if (!p || !vp) {
1902 return EINVAL;
1903 }
1904
1905 if ((pg = proc_pgrp(p, &procsp)) != PGRP_NULL) {
1906 session_lock(sess: procsp);
1907 vnode_t ttyvp = procsp->s_ttyvp;
1908 int ttyvid = procsp->s_ttyvid;
1909 if (ttyvp) {
1910 vnode_hold(vp: ttyvp);
1911 }
1912 session_unlock(sess: procsp);
1913
1914 if (ttyvp) {
1915 if (vnode_getwithvid(ttyvp, ttyvid) == 0) {
1916 *vp = ttyvp;
1917 err = 0;
1918 }
1919 vnode_drop(vp: ttyvp);
1920 } else {
1921 err = ENOENT;
1922 }
1923
1924 pgrp_rele(pgrp: pg);
1925 }
1926
1927 return err;
1928}
1929
1930int
1931proc_gettty_dev(proc_t p, dev_t *devp)
1932{
1933 struct pgrp *pg;
1934 dev_t dev = NODEV;
1935
1936 if ((pg = proc_pgrp(p, NULL)) != PGRP_NULL) {
1937 dev = os_atomic_load(&pg->pg_session->s_ttydev, relaxed);
1938 pgrp_rele(pgrp: pg);
1939 }
1940
1941 if (dev == NODEV) {
1942 return EINVAL;
1943 }
1944
1945 *devp = dev;
1946 return 0;
1947}
1948
1949int
1950proc_selfexecutableargs(uint8_t *buf, size_t *buflen)
1951{
1952 proc_t p = current_proc();
1953
1954 // buflen must always be provided
1955 if (buflen == NULL) {
1956 return EINVAL;
1957 }
1958
1959 // If a buf is provided, there must be at least enough room to fit argc
1960 if (buf && *buflen < sizeof(p->p_argc)) {
1961 return EINVAL;
1962 }
1963
1964 if (!p->user_stack) {
1965 return EINVAL;
1966 }
1967
1968 if (buf == NULL) {
1969 *buflen = p->p_argslen + sizeof(p->p_argc);
1970 return 0;
1971 }
1972
1973 // Copy in argc to the first 4 bytes
1974 memcpy(dst: buf, src: &p->p_argc, n: sizeof(p->p_argc));
1975
1976 if (*buflen > sizeof(p->p_argc) && p->p_argslen > 0) {
1977 // See memory layout comment in kern_exec.c:exec_copyout_strings()
1978 // We want to copy starting from `p_argslen` bytes away from top of stack
1979 return copyin(p->user_stack - p->p_argslen,
1980 buf + sizeof(p->p_argc),
1981 MIN(p->p_argslen, *buflen - sizeof(p->p_argc)));
1982 } else {
1983 return 0;
1984 }
1985}
1986
1987off_t
1988proc_getexecutableoffset(proc_t p)
1989{
1990 return p->p_textoff;
1991}
1992
1993void
1994bsd_set_dependency_capable(task_t task)
1995{
1996 proc_t p = get_bsdtask_info(task);
1997
1998 if (p) {
1999 OSBitOrAtomic(P_DEPENDENCY_CAPABLE, &p->p_flag);
2000 }
2001}
2002
2003
2004#ifndef __arm__
2005int
2006IS_64BIT_PROCESS(proc_t p)
2007{
2008 if (p && (p->p_flag & P_LP64)) {
2009 return 1;
2010 } else {
2011 return 0;
2012 }
2013}
2014#endif
2015
2016SMRH_TRAITS_DEFINE_SCALAR(pid_hash_traits, struct proc, p_pid, p_hash,
2017 .domain = &smr_proc_task);
2018
2019/*
2020 * Locate a process by number
2021 */
2022proc_t
2023phash_find_locked(pid_t pid)
2024{
2025 smrh_key_t key = SMRH_SCALAR_KEY(pid);
2026
2027 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2028
2029 if (!pid) {
2030 return kernproc;
2031 }
2032
2033 return smr_hash_serialized_find(&pid_hash, key, &pid_hash_traits);
2034}
2035
2036void
2037phash_replace_locked(struct proc *old_proc, struct proc *new_proc)
2038{
2039 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2040
2041 smr_hash_serialized_replace(&pid_hash,
2042 &old_proc->p_hash, &new_proc->p_hash, &pid_hash_traits);
2043}
2044
2045void
2046phash_insert_locked(struct proc *p)
2047{
2048 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2049
2050 smr_hash_serialized_insert(&pid_hash, &p->p_hash, &pid_hash_traits);
2051}
2052
2053void
2054phash_remove_locked(struct proc *p)
2055{
2056 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2057
2058 smr_hash_serialized_remove(&pid_hash, &p->p_hash, &pid_hash_traits);
2059}
2060
2061proc_t
2062proc_find_noref_smr(int pid)
2063{
2064 smrh_key_t key = SMRH_SCALAR_KEY(pid);
2065
2066 if (__improbable(pid == 0)) {
2067 return kernproc;
2068 }
2069
2070 return smr_hash_entered_find(&pid_hash, key, &pid_hash_traits);
2071}
2072
2073proc_t
2074proc_find(int pid)
2075{
2076 smrh_key_t key = SMRH_SCALAR_KEY(pid);
2077 proc_t p;
2078 uint32_t bits;
2079 bool shadow_proc = false;
2080
2081 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2082
2083 if (!pid) {
2084 return proc_ref(p: kernproc, false);
2085 }
2086
2087retry:
2088 p = PROC_NULL;
2089 bits = 0;
2090 shadow_proc = false;
2091
2092 smr_proc_task_enter();
2093 p = smr_hash_entered_find(&pid_hash, key, &pid_hash_traits);
2094 if (p) {
2095 bits = proc_ref_try_fast(p);
2096 shadow_proc = !!proc_is_shadow(p);
2097 }
2098 smr_proc_task_leave();
2099
2100 /* Retry if the proc is a shadow proc */
2101 if (shadow_proc) {
2102 if (bits) {
2103 proc_rele(p);
2104 }
2105 goto retry;
2106 }
2107
2108 if (__improbable(!bits)) {
2109 return PROC_NULL;
2110 }
2111
2112 if (__improbable(proc_ref_needs_wait_for_exec(bits))) {
2113 p = proc_ref_wait_for_exec(p, bits, false);
2114 /*
2115 * Retry if exec was successful since the old proc
2116 * would have become a shadow proc and might be in
2117 * middle of exiting.
2118 */
2119 if (p == PROC_NULL || proc_is_shadow(p)) {
2120 if (p != PROC_NULL) {
2121 proc_rele(p);
2122 }
2123 goto retry;
2124 }
2125 }
2126
2127 return p;
2128}
2129
2130proc_t
2131proc_find_locked(int pid)
2132{
2133 proc_t p = PROC_NULL;
2134
2135retry:
2136 p = phash_find_locked(pid);
2137 if (p != PROC_NULL) {
2138 uint32_t bits;
2139
2140 assert(!proc_is_shadow(p));
2141
2142 bits = proc_ref_try_fast(p);
2143 if (__improbable(!bits)) {
2144 return PROC_NULL;
2145 }
2146
2147 if (__improbable(proc_ref_needs_wait_for_exec(bits))) {
2148 p = proc_ref_wait_for_exec(p, bits, true);
2149 /*
2150 * Retry if exec was successful since the old proc
2151 * would have become a shadow proc and might be in
2152 * middle of exiting.
2153 */
2154 if (p == PROC_NULL || proc_is_shadow(p)) {
2155 if (p != PROC_NULL) {
2156 proc_rele(p);
2157 }
2158 goto retry;
2159 }
2160 }
2161 }
2162
2163 return p;
2164}
2165
2166proc_t
2167proc_findthread(thread_t thread)
2168{
2169 proc_t p = PROC_NULL;
2170
2171 proc_list_lock();
2172 {
2173 p = (proc_t)(get_bsdthreadtask_info(thread));
2174 }
2175 p = proc_ref(p, true);
2176 proc_list_unlock();
2177 return p;
2178}
2179
2180
2181/*
2182 * Locate a zombie by PID
2183 */
2184__private_extern__ proc_t
2185pzfind(pid_t pid)
2186{
2187 proc_t p;
2188
2189
2190 proc_list_lock();
2191
2192 LIST_FOREACH(p, &zombproc, p_list) {
2193 if (proc_getpid(p) == pid && !proc_is_shadow(p)) {
2194 break;
2195 }
2196 }
2197
2198 proc_list_unlock();
2199
2200 return p;
2201}
2202
2203/*
2204 * Acquire a pgrp ref, if and only if the pgrp is non empty.
2205 */
2206static inline bool
2207pg_ref_try(struct pgrp *pgrp)
2208{
2209 return os_ref_retain_try_mask(&pgrp->pg_refcount, PGRP_REF_BITS,
2210 PGRP_REF_EMPTY, &p_refgrp);
2211}
2212
2213static bool
2214pgrp_hash_obj_try_get(void *pgrp)
2215{
2216 return pg_ref_try(pgrp);
2217}
2218/*
2219 * Unconditionally acquire a pgrp ref,
2220 * regardless of whether the pgrp is empty or not.
2221 */
2222static inline struct pgrp *
2223pg_ref(struct pgrp *pgrp)
2224{
2225 os_ref_retain_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp);
2226 return pgrp;
2227}
2228
2229SMRH_TRAITS_DEFINE_SCALAR(pgrp_hash_traits, struct pgrp, pg_id, pg_hash,
2230 .domain = &smr_proc_task,
2231 .obj_try_get = pgrp_hash_obj_try_get);
2232
2233/*
2234 * Locate a process group by number
2235 */
2236bool
2237pghash_exists_locked(pid_t pgid)
2238{
2239 smrh_key_t key = SMRH_SCALAR_KEY(pgid);
2240
2241 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2242
2243 return smr_hash_serialized_find(&pgrp_hash, key, &pgrp_hash_traits);
2244}
2245
2246void
2247pghash_insert_locked(struct pgrp *pgrp)
2248{
2249 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2250
2251 smr_hash_serialized_insert(&pgrp_hash, &pgrp->pg_hash,
2252 &pgrp_hash_traits);
2253}
2254
2255static void
2256pghash_remove_locked(struct pgrp *pgrp)
2257{
2258 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2259
2260 smr_hash_serialized_remove(&pgrp_hash, &pgrp->pg_hash,
2261 &pgrp_hash_traits);
2262}
2263
2264struct pgrp *
2265pgrp_find(pid_t pgid)
2266{
2267 smrh_key_t key = SMRH_SCALAR_KEY(pgid);
2268
2269 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2270
2271 return smr_hash_get(&pgrp_hash, key, &pgrp_hash_traits);
2272}
2273
2274/* consumes one ref from pgrp */
2275static void
2276pgrp_add_member(struct pgrp *pgrp, struct proc *parent, struct proc *p)
2277{
2278 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2279
2280 pgrp_lock(pgrp);
2281 if (LIST_EMPTY(&pgrp->pg_members)) {
2282 os_atomic_andnot(&pgrp->pg_refcount, PGRP_REF_EMPTY, relaxed);
2283 }
2284 if (parent != PROC_NULL) {
2285 assert(pgrp == smr_serialized_load(&parent->p_pgrp));
2286 }
2287
2288 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
2289 pgrp_unlock(pgrp);
2290
2291 p->p_pgrpid = pgrp->pg_id;
2292 p->p_sessionid = pgrp->pg_session->s_sid;
2293 smr_serialized_store(&p->p_pgrp, pgrp);
2294}
2295
2296/* returns one ref from pgrp */
2297static void
2298pgrp_del_member(struct pgrp *pgrp, struct proc *p)
2299{
2300 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2301
2302 pgrp_lock(pgrp);
2303 LIST_REMOVE(p, p_pglist);
2304 if (LIST_EMPTY(&pgrp->pg_members)) {
2305 os_atomic_or(&pgrp->pg_refcount, PGRP_REF_EMPTY, relaxed);
2306 }
2307 pgrp_unlock(pgrp);
2308}
2309
2310void
2311pgrp_rele(struct pgrp * pgrp)
2312{
2313 if (pgrp == PGRP_NULL) {
2314 return;
2315 }
2316
2317 if (os_ref_release_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp) == 0) {
2318 pgrp_destroy(pgrp);
2319 }
2320}
2321
2322struct session *
2323session_alloc(proc_t leader)
2324{
2325 struct session *sess;
2326
2327 sess = zalloc_flags(session_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2328 lck_mtx_init(lck: &sess->s_mlock, grp: &proc_mlock_grp, attr: &proc_lck_attr);
2329 sess->s_leader = leader;
2330 sess->s_sid = proc_getpid(p: leader);
2331 sess->s_ttypgrpid = NO_PID;
2332 os_atomic_init(&sess->s_ttydev, NODEV);
2333 os_ref_init_mask(&sess->s_refcount, SESSION_REF_BITS,
2334 &p_refgrp, S_DEFAULT);
2335
2336 return sess;
2337}
2338
2339struct tty *
2340session_set_tty_locked(struct session *sessp, struct tty *tp)
2341{
2342 struct tty *old;
2343
2344 LCK_MTX_ASSERT(&sessp->s_mlock, LCK_MTX_ASSERT_OWNED);
2345
2346 old = sessp->s_ttyp;
2347 ttyhold(tp);
2348 sessp->s_ttyp = tp;
2349 os_atomic_store(&sessp->s_ttydev, tp->t_dev, relaxed);
2350
2351 return old;
2352}
2353
2354struct tty *
2355session_clear_tty_locked(struct session *sessp)
2356{
2357 struct tty *tp = sessp->s_ttyp;
2358
2359 LCK_MTX_ASSERT(&sessp->s_mlock, LCK_MTX_ASSERT_OWNED);
2360 sessp->s_ttyvp = NULLVP;
2361 sessp->s_ttyvid = 0;
2362 sessp->s_ttyp = TTY_NULL;
2363 sessp->s_ttypgrpid = NO_PID;
2364 os_atomic_store(&sessp->s_ttydev, NODEV, relaxed);
2365
2366 return tp;
2367}
2368
2369__attribute__((noinline))
2370static void
2371session_destroy(struct session *sess)
2372{
2373 proc_list_lock();
2374 LIST_REMOVE(sess, s_hash);
2375 proc_list_unlock();
2376
2377 /*
2378 * Either the TTY was closed,
2379 * or proc_exit() destroyed it when the leader went away
2380 */
2381 assert(sess->s_ttyp == TTY_NULL);
2382
2383 lck_mtx_destroy(lck: &sess->s_mlock, grp: &proc_mlock_grp);
2384 zfree(session_zone, sess);
2385}
2386
2387struct session *
2388session_ref(struct session *sess)
2389{
2390 os_ref_retain_mask(&sess->s_refcount, SESSION_REF_BITS, &p_refgrp);
2391 return sess;
2392}
2393
2394void
2395session_rele(struct session *sess)
2396{
2397 if (os_ref_release_mask(&sess->s_refcount, SESSION_REF_BITS, &p_refgrp) == 0) {
2398 session_destroy(sess);
2399 }
2400}
2401
2402
2403/*
2404 * Make a new process ready to become a useful member of society by making it
2405 * visible in all the right places and initialize its own lists to empty.
2406 *
2407 * Parameters: parent The parent of the process to insert
2408 * child The child process to insert
2409 * in_exec The child process is in exec
2410 *
2411 * Returns: (void)
2412 *
2413 * Notes: Insert a child process into the parents children list, assign
2414 * the child the parent process pointer and PPID of the parent...
2415 */
2416void
2417pinsertchild(proc_t parent, proc_t child, bool in_exec)
2418{
2419 LIST_INIT(&child->p_children);
2420 proc_t sibling = parent;
2421
2422 /* For exec case, new proc is not a child of old proc, but its replacement */
2423 if (in_exec) {
2424 parent = proc_parent(p: parent);
2425 assert(parent != PROC_NULL);
2426
2427 /* Copy the ptrace flags from sibling */
2428 proc_lock(sibling);
2429 child->p_oppid = sibling->p_oppid;
2430 child->p_lflag |= (sibling->p_lflag & (P_LTRACED | P_LSIGEXC | P_LNOATTACH));
2431 proc_unlock(sibling);
2432 }
2433
2434 proc_list_lock();
2435
2436 child->p_pptr = parent;
2437 child->p_ppid = proc_getpid(p: parent);
2438 child->p_original_ppid = in_exec ? sibling->p_original_ppid : proc_getpid(p: parent);
2439 child->p_puniqueid = proc_uniqueid(p: parent);
2440 child->p_xhighbits = 0;
2441#if CONFIG_MEMORYSTATUS
2442 memorystatus_add(p: child, TRUE);
2443#endif
2444
2445 /* If the parent is initproc and p_original pid is not 1, then set reparent flag */
2446 if (in_exec && parent == initproc && child->p_original_ppid != 1) {
2447 child->p_listflag |= P_LIST_DEADPARENT;
2448 }
2449
2450 parent->p_childrencnt++;
2451 LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
2452
2453 LIST_INSERT_HEAD(&allproc, child, p_list);
2454 /* mark the completion of proc creation */
2455 os_atomic_andnot(&child->p_refcount, P_REF_NEW, relaxed);
2456
2457 proc_list_unlock();
2458 if (in_exec) {
2459 proc_rele(p: parent);
2460 }
2461}
2462
2463/*
2464 * Reparent all children of old proc to new proc.
2465 *
2466 * Parameters: old process Old process.
2467 * new process New process.
2468 *
2469 * Returns: None.
2470 */
2471void
2472p_reparentallchildren(proc_t old_proc, proc_t new_proc)
2473{
2474 proc_t child;
2475
2476 LIST_INIT(&new_proc->p_children);
2477
2478 /* Wait for parent ref to drop */
2479 proc_childdrainstart(p: old_proc);
2480
2481 /* Reparent child from old proc to new proc */
2482 while ((child = old_proc->p_children.lh_first) != NULL) {
2483 LIST_REMOVE(child, p_sibling);
2484 old_proc->p_childrencnt--;
2485 child->p_pptr = new_proc;
2486 LIST_INSERT_HEAD(&new_proc->p_children, child, p_sibling);
2487 new_proc->p_childrencnt++;
2488 }
2489
2490 new_proc->si_pid = old_proc->si_pid;
2491 new_proc->si_status = old_proc->si_status;
2492 new_proc->si_code = old_proc->si_code;
2493 new_proc->si_uid = old_proc->si_uid;
2494
2495 proc_childdrainend(p: old_proc);
2496}
2497
2498/*
2499 * Move p to a new or existing process group (and session)
2500 *
2501 * Returns: 0 Success
2502 * ESRCH No such process
2503 */
2504int
2505enterpgrp(proc_t p, pid_t pgid, int mksess)
2506{
2507 struct pgrp *pgrp;
2508 struct pgrp *mypgrp;
2509 struct session *procsp;
2510
2511 pgrp = pgrp_find(pgid);
2512 mypgrp = proc_pgrp(p, &procsp);
2513
2514#if DIAGNOSTIC
2515 if (pgrp != NULL && mksess) { /* firewalls */
2516 panic("enterpgrp: setsid into non-empty pgrp");
2517 }
2518 if (SESS_LEADER(p, mypgrp->pg_session)) {
2519 panic("enterpgrp: session leader attempted setpgrp");
2520 }
2521#endif
2522 if (pgrp == PGRP_NULL) {
2523 struct session *sess;
2524 pid_t savepid = proc_getpid(p);
2525 proc_t np = PROC_NULL;
2526
2527 /*
2528 * new process group
2529 */
2530#if DIAGNOSTIC
2531 if (proc_getpid(p) != pgid) {
2532 panic("enterpgrp: new pgrp and pid != pgid");
2533 }
2534#endif
2535 if ((np = proc_find(pid: savepid)) == NULL || np != p) {
2536 if (np != PROC_NULL) {
2537 proc_rele(p: np);
2538 }
2539 pgrp_rele(pgrp: mypgrp);
2540 return ESRCH;
2541 }
2542 proc_rele(p: np);
2543
2544 pgrp = pgrp_alloc(pgid, bits: PGRP_REF_EMPTY);
2545
2546 if (mksess) {
2547 /*
2548 * new session
2549 */
2550 sess = session_alloc(leader: p);
2551
2552 bcopy(src: mypgrp->pg_session->s_login, dst: sess->s_login,
2553 n: sizeof(sess->s_login));
2554 os_atomic_andnot(&p->p_flag, P_CONTROLT, relaxed);
2555 } else {
2556 sess = session_ref(sess: procsp);
2557 }
2558
2559 proc_list_lock();
2560 pgrp->pg_session = sess;
2561 p->p_sessionid = sess->s_sid;
2562 pghash_insert_locked(pgrp);
2563 if (mksess) {
2564 LIST_INSERT_HEAD(SESSHASH(sess->s_sid), sess, s_hash);
2565 }
2566 proc_list_unlock();
2567 } else if (pgrp == mypgrp) {
2568 pgrp_rele(pgrp);
2569 pgrp_rele(pgrp: mypgrp);
2570 return 0;
2571 }
2572
2573 /*
2574 * Adjust eligibility of affected pgrps to participate in job control.
2575 * Increment eligibility counts before decrementing, otherwise we
2576 * could reach 0 spuriously during the first call.
2577 */
2578 fixjobc(p, pgrp, entering: 1);
2579 fixjobc(p, pgrp: mypgrp, entering: 0);
2580
2581 pgrp_rele(pgrp: mypgrp);
2582 pgrp_replace(p, pgrp);
2583
2584 return 0;
2585}
2586
2587/*
2588 * remove process from process group
2589 */
2590struct pgrp *
2591pgrp_leave_locked(proc_t p)
2592{
2593 struct pgrp *pg;
2594
2595 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2596
2597 pg = smr_serialized_load(&p->p_pgrp);
2598 pgrp_del_member(pgrp: pg, p);
2599 p->p_pgrpid = PGRPID_DEAD;
2600 smr_clear_store(&p->p_pgrp);
2601
2602 return pg;
2603}
2604
2605struct pgrp *
2606pgrp_enter_locked(struct proc *parent, struct proc *child)
2607{
2608 struct pgrp *pgrp;
2609
2610 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2611
2612 pgrp = pg_ref(smr_serialized_load(&parent->p_pgrp));
2613 pgrp_add_member(pgrp, parent, p: child);
2614 return pgrp;
2615}
2616
2617/*
2618 * delete a process group
2619 */
2620static void
2621pgrp_free(smr_node_t node)
2622{
2623 struct pgrp *pgrp = __container_of(node, struct pgrp, pg_smr_node);
2624
2625 zfree(pgrp_zone, pgrp);
2626}
2627
2628__attribute__((noinline))
2629static void
2630pgrp_destroy(struct pgrp *pgrp)
2631{
2632 struct session *sess;
2633
2634 assert(LIST_EMPTY(&pgrp->pg_members));
2635 assert(os_ref_get_raw_mask(&pgrp->pg_refcount) & PGRP_REF_EMPTY);
2636
2637 proc_list_lock();
2638 pghash_remove_locked(pgrp);
2639 proc_list_unlock();
2640
2641 sess = pgrp->pg_session;
2642 pgrp->pg_session = SESSION_NULL;
2643 session_rele(sess);
2644
2645 lck_mtx_destroy(lck: &pgrp->pg_mlock, grp: &proc_mlock_grp);
2646 if (os_ref_release_raw(&pgrp->pg_hashref, &p_refgrp) == 0) {
2647 smr_proc_task_call(&pgrp->pg_smr_node, sizeof(*pgrp), pgrp_free);
2648 }
2649}
2650
2651
2652/*
2653 * Adjust pgrp jobc counters when specified process changes process group.
2654 * We count the number of processes in each process group that "qualify"
2655 * the group for terminal job control (those with a parent in a different
2656 * process group of the same session). If that count reaches zero, the
2657 * process group becomes orphaned. Check both the specified process'
2658 * process group and that of its children.
2659 * entering == 0 => p is leaving specified group.
2660 * entering == 1 => p is entering specified group.
2661 */
2662int
2663fixjob_callback(proc_t p, void * arg)
2664{
2665 struct fixjob_iterargs *fp;
2666 struct pgrp * pg, *hispg;
2667 struct session * mysession, *hissess;
2668 int entering;
2669
2670 fp = (struct fixjob_iterargs *)arg;
2671 pg = fp->pg;
2672 mysession = fp->mysession;
2673 entering = fp->entering;
2674
2675 hispg = proc_pgrp(p, &hissess);
2676
2677 if (hispg != pg && hissess == mysession) {
2678 pgrp_lock(pgrp: hispg);
2679 if (entering) {
2680 hispg->pg_jobc++;
2681 pgrp_unlock(pgrp: hispg);
2682 } else if (--hispg->pg_jobc == 0) {
2683 pgrp_unlock(pgrp: hispg);
2684 orphanpg(pg: hispg);
2685 } else {
2686 pgrp_unlock(pgrp: hispg);
2687 }
2688 }
2689 pgrp_rele(pgrp: hispg);
2690
2691 return PROC_RETURNED;
2692}
2693
2694void
2695fixjobc(proc_t p, struct pgrp *pgrp, int entering)
2696{
2697 struct pgrp *hispgrp = PGRP_NULL;
2698 struct session *hissess = SESSION_NULL;
2699 struct session *mysession = pgrp->pg_session;
2700 proc_t parent;
2701 struct fixjob_iterargs fjarg;
2702 boolean_t proc_parent_self;
2703
2704 /*
2705 * Check if p's parent is current proc, if yes then no need to take
2706 * a ref; calling proc_parent with current proc as parent may
2707 * deadlock if current proc is exiting.
2708 */
2709 proc_parent_self = proc_parent_is_currentproc(p);
2710 if (proc_parent_self) {
2711 parent = current_proc();
2712 } else {
2713 parent = proc_parent(p);
2714 }
2715
2716 if (parent != PROC_NULL) {
2717 hispgrp = proc_pgrp(parent, &hissess);
2718 if (!proc_parent_self) {
2719 proc_rele(p: parent);
2720 }
2721 }
2722
2723 /*
2724 * Check p's parent to see whether p qualifies its own process
2725 * group; if so, adjust count for p's process group.
2726 */
2727 if (hispgrp != pgrp && hissess == mysession) {
2728 pgrp_lock(pgrp);
2729 if (entering) {
2730 pgrp->pg_jobc++;
2731 pgrp_unlock(pgrp);
2732 } else if (--pgrp->pg_jobc == 0) {
2733 pgrp_unlock(pgrp);
2734 orphanpg(pg: pgrp);
2735 } else {
2736 pgrp_unlock(pgrp);
2737 }
2738 }
2739
2740 pgrp_rele(pgrp: hispgrp);
2741
2742 /*
2743 * Check this process' children to see whether they qualify
2744 * their process groups; if so, adjust counts for children's
2745 * process groups.
2746 */
2747 fjarg.pg = pgrp;
2748 fjarg.mysession = mysession;
2749 fjarg.entering = entering;
2750 proc_childrenwalk(p, callout: fixjob_callback, arg: &fjarg);
2751}
2752
2753/*
2754 * The pidlist_* routines support the functions in this file that
2755 * walk lists of processes applying filters and callouts to the
2756 * elements of the list.
2757 *
2758 * A prior implementation used a single linear array, which can be
2759 * tricky to allocate on large systems. This implementation creates
2760 * an SLIST of modestly sized arrays of PIDS_PER_ENTRY elements.
2761 *
2762 * The array should be sized large enough to keep the overhead of
2763 * walking the list low, but small enough that blocking allocations of
2764 * pidlist_entry_t structures always succeed.
2765 */
2766
2767#define PIDS_PER_ENTRY 1021
2768
2769typedef struct pidlist_entry {
2770 SLIST_ENTRY(pidlist_entry) pe_link;
2771 u_int pe_nused;
2772 pid_t pe_pid[PIDS_PER_ENTRY];
2773} pidlist_entry_t;
2774
2775typedef struct {
2776 SLIST_HEAD(, pidlist_entry) pl_head;
2777 struct pidlist_entry *pl_active;
2778 u_int pl_nalloc;
2779} pidlist_t;
2780
2781static __inline__ pidlist_t *
2782pidlist_init(pidlist_t *pl)
2783{
2784 SLIST_INIT(&pl->pl_head);
2785 pl->pl_active = NULL;
2786 pl->pl_nalloc = 0;
2787 return pl;
2788}
2789
2790static u_int
2791pidlist_alloc(pidlist_t *pl, u_int needed)
2792{
2793 while (pl->pl_nalloc < needed) {
2794 pidlist_entry_t *pe = kalloc_type(pidlist_entry_t,
2795 Z_WAITOK | Z_ZERO | Z_NOFAIL);
2796 SLIST_INSERT_HEAD(&pl->pl_head, pe, pe_link);
2797 pl->pl_nalloc += (sizeof(pe->pe_pid) / sizeof(pe->pe_pid[0]));
2798 }
2799 return pl->pl_nalloc;
2800}
2801
2802static void
2803pidlist_free(pidlist_t *pl)
2804{
2805 pidlist_entry_t *pe;
2806 while (NULL != (pe = SLIST_FIRST(&pl->pl_head))) {
2807 SLIST_FIRST(&pl->pl_head) = SLIST_NEXT(pe, pe_link);
2808 kfree_type(pidlist_entry_t, pe);
2809 }
2810 pl->pl_nalloc = 0;
2811}
2812
2813static __inline__ void
2814pidlist_set_active(pidlist_t *pl)
2815{
2816 pl->pl_active = SLIST_FIRST(&pl->pl_head);
2817 assert(pl->pl_active);
2818}
2819
2820static void
2821pidlist_add_pid(pidlist_t *pl, pid_t pid)
2822{
2823 pidlist_entry_t *pe = pl->pl_active;
2824 if (pe->pe_nused >= sizeof(pe->pe_pid) / sizeof(pe->pe_pid[0])) {
2825 if (NULL == (pe = SLIST_NEXT(pe, pe_link))) {
2826 panic("pidlist allocation exhausted");
2827 }
2828 pl->pl_active = pe;
2829 }
2830 pe->pe_pid[pe->pe_nused++] = pid;
2831}
2832
2833static __inline__ u_int
2834pidlist_nalloc(const pidlist_t *pl)
2835{
2836 return pl->pl_nalloc;
2837}
2838
2839/*
2840 * A process group has become orphaned; if there are any stopped processes in
2841 * the group, hang-up all process in that group.
2842 */
2843static void
2844orphanpg(struct pgrp *pgrp)
2845{
2846 pidlist_t pid_list, *pl = pidlist_init(pl: &pid_list);
2847 u_int pid_count_available = 0;
2848 proc_t p;
2849
2850 /* allocate outside of the pgrp_lock */
2851 for (;;) {
2852 pgrp_lock(pgrp);
2853
2854 boolean_t should_iterate = FALSE;
2855 pid_count_available = 0;
2856
2857 PGMEMBERS_FOREACH(pgrp, p) {
2858 pid_count_available++;
2859 if (p->p_stat == SSTOP) {
2860 should_iterate = TRUE;
2861 }
2862 }
2863 if (pid_count_available == 0 || !should_iterate) {
2864 pgrp_unlock(pgrp);
2865 goto out; /* no orphaned processes OR nothing stopped */
2866 }
2867 if (pidlist_nalloc(pl) >= pid_count_available) {
2868 break;
2869 }
2870 pgrp_unlock(pgrp);
2871
2872 pidlist_alloc(pl, needed: pid_count_available);
2873 }
2874 pidlist_set_active(pl);
2875
2876 u_int pid_count = 0;
2877 PGMEMBERS_FOREACH(pgrp, p) {
2878 pidlist_add_pid(pl, pid: proc_pid(p));
2879 if (++pid_count >= pid_count_available) {
2880 break;
2881 }
2882 }
2883 pgrp_unlock(pgrp);
2884
2885 const pidlist_entry_t *pe;
2886 SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
2887 for (u_int i = 0; i < pe->pe_nused; i++) {
2888 const pid_t pid = pe->pe_pid[i];
2889 if (0 == pid) {
2890 continue; /* skip kernproc */
2891 }
2892 p = proc_find(pid);
2893 if (!p) {
2894 continue;
2895 }
2896 proc_transwait(p, locked: 0);
2897 pt_setrunnable(p);
2898 psignal(p, SIGHUP);
2899 psignal(p, SIGCONT);
2900 proc_rele(p);
2901 }
2902 }
2903out:
2904 pidlist_free(pl);
2905}
2906
2907boolean_t
2908proc_is_translated(proc_t p)
2909{
2910 return p && ((p->p_flag & P_TRANSLATED) != 0);
2911}
2912
2913
2914
2915int
2916proc_is_classic(proc_t p __unused)
2917{
2918 return 0;
2919}
2920
2921bool
2922proc_is_exotic(
2923 proc_t p)
2924{
2925 if (p == NULL) {
2926 return false;
2927 }
2928 return task_is_exotic(task: proc_task(proc: p));
2929}
2930
2931bool
2932proc_is_alien(
2933 proc_t p)
2934{
2935 if (p == NULL) {
2936 return false;
2937 }
2938 return task_is_alien(task: proc_task(proc: p));
2939}
2940
2941bool
2942proc_is_driver(proc_t p)
2943{
2944 if (p == NULL) {
2945 return false;
2946 }
2947 return task_is_driver(task: proc_task(proc: p));
2948}
2949
2950bool
2951proc_is_third_party_debuggable_driver(proc_t p)
2952{
2953#if XNU_TARGET_OS_IOS
2954 uint64_t csflags;
2955 if (proc_csflags(p, &csflags) != 0) {
2956 return false;
2957 }
2958
2959 if (proc_is_driver(p) &&
2960 !csproc_get_platform_binary(p) &&
2961 IOTaskHasEntitlement(proc_task(p), kIODriverKitEntitlementKey) &&
2962 (csflags & CS_GET_TASK_ALLOW) != 0) {
2963 return true;
2964 }
2965
2966 return false;
2967
2968#else
2969 /* On other platforms, fall back to existing rules for debugging */
2970 (void)p;
2971 return false;
2972#endif /* XNU_TARGET_OS_IOS */
2973}
2974
2975/* XXX Why does this function exist? Need to kill it off... */
2976proc_t
2977current_proc_EXTERNAL(void)
2978{
2979 return current_proc();
2980}
2981
2982int
2983proc_is_forcing_hfs_case_sensitivity(proc_t p)
2984{
2985 return (p->p_vfs_iopolicy & P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY) ? 1 : 0;
2986}
2987
2988bool
2989proc_ignores_content_protection(proc_t p)
2990{
2991 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_CONTENT_PROTECTION;
2992}
2993
2994bool
2995proc_ignores_node_permissions(proc_t p)
2996{
2997 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS;
2998}
2999
3000bool
3001proc_skip_mtime_update(proc_t p)
3002{
3003 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_SKIP_MTIME_UPDATE;
3004}
3005
3006bool
3007proc_allow_low_space_writes(proc_t p)
3008{
3009 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_ALLOW_LOW_SPACE_WRITES;
3010}
3011
3012bool
3013proc_disallow_rw_for_o_evtonly(proc_t p)
3014{
3015 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_DISALLOW_RW_FOR_O_EVTONLY;
3016}
3017
3018bool
3019proc_use_alternative_symlink_ea(proc_t p)
3020{
3021 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_ALTLINK;
3022}
3023
3024bool
3025proc_allow_nocache_write_fs_blksize(proc_t p)
3026{
3027 struct uthread *ut = get_bsdthread_info(current_thread());
3028
3029 return (ut && (ut->uu_flag & UT_FS_BLKSIZE_NOCACHE_WRITES)) ||
3030 os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_NOCACHE_WRITE_FS_BLKSIZE;
3031}
3032
3033bool
3034proc_is_rsr(proc_t p)
3035{
3036 return os_atomic_load(&p->p_ladvflag, relaxed) & P_RSR;
3037}
3038
3039#if CONFIG_COREDUMP
3040/*
3041 * proc_core_name(format, name, uid, pid)
3042 * Expand the name described in format, using name, uid, and pid.
3043 * format is a printf-like string, with four format specifiers:
3044 * %N name of process ("name")
3045 * %P process id (pid)
3046 * %U user id (uid)
3047 * %T mach_continuous_time() timestamp
3048 * For example, "%N.core" is the default; they can be disabled completely
3049 * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
3050 * This is controlled by the sysctl variable kern.corefile (see above).
3051 */
3052__private_extern__ int
3053proc_core_name(const char *format, const char * name, uid_t uid, pid_t pid, char *cf_name,
3054 size_t cf_name_len)
3055{
3056 const char *appendstr;
3057 char id_buf[sizeof(OS_STRINGIFY(INT32_MAX))]; /* Buffer for pid/uid -- max 4B */
3058 _Static_assert(sizeof(id_buf) == 11, "size mismatch");
3059 char timestamp_buf[sizeof(OS_STRINGIFY(UINT64_MAX))]; /* Buffer for timestamp, including null terminator */
3060 size_t i, l, n;
3061
3062 if (cf_name == NULL) {
3063 goto toolong;
3064 }
3065
3066 for (i = 0, n = 0; n < cf_name_len && format[i]; i++) {
3067 switch (format[i]) {
3068 case '%': /* Format character */
3069 i++;
3070 switch (format[i]) {
3071 case '%':
3072 appendstr = "%";
3073 break;
3074 case 'N': /* process name */
3075 appendstr = name;
3076 break;
3077 case 'P': /* process id */
3078 snprintf(id_buf, count: sizeof(id_buf), "%u", pid);
3079 appendstr = id_buf;
3080 break;
3081 case 'U': /* user id */
3082 snprintf(id_buf, count: sizeof(id_buf), "%u", uid);
3083 appendstr = id_buf;
3084 break;
3085 case 'T': /* timestamp */
3086 snprintf(timestamp_buf, count: sizeof(timestamp_buf), "%llu", mach_continuous_time());
3087 appendstr = timestamp_buf;
3088 break;
3089 case '\0': /* format string ended in % symbol */
3090 goto endofstring;
3091 default:
3092 appendstr = "";
3093 log(LOG_ERR,
3094 "Unknown format character %c in `%s'\n",
3095 format[i], format);
3096 }
3097 l = strlen(s: appendstr);
3098 if ((n + l) >= cf_name_len) {
3099 goto toolong;
3100 }
3101 bcopy(src: appendstr, dst: cf_name + n, n: l);
3102 n += l;
3103 break;
3104 default:
3105 cf_name[n++] = format[i];
3106 }
3107 }
3108 if (format[i] != '\0') {
3109 goto toolong;
3110 }
3111 return 0;
3112toolong:
3113 log(LOG_ERR, "pid %ld (%s), uid (%u): corename is too long\n",
3114 (long)pid, name, (uint32_t)uid);
3115 return 1;
3116endofstring:
3117 log(LOG_ERR, "pid %ld (%s), uid (%u): unexpected end of string after %% token\n",
3118 (long)pid, name, (uint32_t)uid);
3119 return 1;
3120}
3121#endif /* CONFIG_COREDUMP */
3122
3123/* Code Signing related routines */
3124
3125int
3126csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval)
3127{
3128 return csops_internal(pid: uap->pid, ops: uap->ops, uaddr: uap->useraddr,
3129 usersize: uap->usersize, USER_ADDR_NULL);
3130}
3131
3132int
3133csops_audittoken(__unused proc_t p, struct csops_audittoken_args *uap, __unused int32_t *retval)
3134{
3135 if (uap->uaudittoken == USER_ADDR_NULL) {
3136 return EINVAL;
3137 }
3138 return csops_internal(pid: uap->pid, ops: uap->ops, uaddr: uap->useraddr,
3139 usersize: uap->usersize, uaddittoken: uap->uaudittoken);
3140}
3141
3142static int
3143csops_copy_token(const void *start, size_t length, user_size_t usize, user_addr_t uaddr)
3144{
3145 char fakeheader[8] = { 0 };
3146 int error;
3147
3148 if (usize < sizeof(fakeheader)) {
3149 return ERANGE;
3150 }
3151
3152 /* if no blob, fill in zero header */
3153 if (NULL == start) {
3154 start = fakeheader;
3155 length = sizeof(fakeheader);
3156 } else if (usize < length) {
3157 /* ... if input too short, copy out length of entitlement */
3158 uint32_t length32 = htonl((uint32_t)length);
3159 memcpy(dst: &fakeheader[4], src: &length32, n: sizeof(length32));
3160
3161 error = copyout(fakeheader, uaddr, sizeof(fakeheader));
3162 if (error == 0) {
3163 return ERANGE; /* input buffer to short, ERANGE signals that */
3164 }
3165 return error;
3166 }
3167 return copyout(start, uaddr, length);
3168}
3169
3170static int
3171csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaudittoken)
3172{
3173 size_t usize = (size_t)CAST_DOWN(size_t, usersize);
3174 proc_t pt;
3175 int forself;
3176 int error;
3177 vnode_t tvp;
3178 off_t toff;
3179 unsigned char cdhash[SHA1_RESULTLEN];
3180 audit_token_t token;
3181 unsigned int upid = 0, uidversion = 0;
3182
3183 forself = error = 0;
3184
3185 if (pid == 0) {
3186 pid = proc_selfpid();
3187 }
3188 if (pid == proc_selfpid()) {
3189 forself = 1;
3190 }
3191
3192
3193 switch (ops) {
3194 case CS_OPS_STATUS:
3195 case CS_OPS_CDHASH:
3196 case CS_OPS_PIDOFFSET:
3197 case CS_OPS_ENTITLEMENTS_BLOB:
3198 case CS_OPS_DER_ENTITLEMENTS_BLOB:
3199 case CS_OPS_IDENTITY:
3200 case CS_OPS_BLOB:
3201 case CS_OPS_TEAMID:
3202 case CS_OPS_CLEAR_LV:
3203 case CS_OPS_VALIDATION_CATEGORY:
3204 break; /* not restricted to root */
3205 default:
3206 if (forself == 0 && kauth_cred_issuser(cred: kauth_cred_get()) != TRUE) {
3207 return EPERM;
3208 }
3209 break;
3210 }
3211
3212 pt = proc_find(pid);
3213 if (pt == PROC_NULL) {
3214 return ESRCH;
3215 }
3216
3217 upid = proc_getpid(p: pt);
3218 uidversion = proc_pidversion(p: pt);
3219 if (uaudittoken != USER_ADDR_NULL) {
3220 error = copyin(uaudittoken, &token, sizeof(audit_token_t));
3221 if (error != 0) {
3222 goto out;
3223 }
3224 /* verify the audit token pid/idversion matches with proc */
3225 if ((token.val[5] != upid) || (token.val[7] != uidversion)) {
3226 error = ESRCH;
3227 goto out;
3228 }
3229 }
3230
3231#if CONFIG_MACF
3232 switch (ops) {
3233 case CS_OPS_MARKINVALID:
3234 case CS_OPS_MARKHARD:
3235 case CS_OPS_MARKKILL:
3236 case CS_OPS_MARKRESTRICT:
3237 case CS_OPS_SET_STATUS:
3238 case CS_OPS_CLEARINSTALLER:
3239 case CS_OPS_CLEARPLATFORM:
3240 case CS_OPS_CLEAR_LV:
3241 if ((error = mac_proc_check_set_cs_info(curp: current_proc(), target: pt, op: ops))) {
3242 goto out;
3243 }
3244 break;
3245 default:
3246 if ((error = mac_proc_check_get_cs_info(curp: current_proc(), target: pt, op: ops))) {
3247 goto out;
3248 }
3249 }
3250#endif
3251
3252 switch (ops) {
3253 case CS_OPS_STATUS: {
3254 uint32_t retflags;
3255
3256 proc_lock(pt);
3257 retflags = (uint32_t)proc_getcsflags(p: pt);
3258 if (cs_process_enforcement(pt)) {
3259 retflags |= CS_ENFORCEMENT;
3260 }
3261 if (csproc_get_platform_binary(pt)) {
3262 retflags |= CS_PLATFORM_BINARY;
3263 }
3264 if (csproc_get_platform_path(pt)) {
3265 retflags |= CS_PLATFORM_PATH;
3266 }
3267 //Don't return CS_REQUIRE_LV if we turned it on with CS_FORCED_LV but still report CS_FORCED_LV
3268 if ((proc_getcsflags(p: pt) & CS_FORCED_LV) == CS_FORCED_LV) {
3269 retflags &= (~CS_REQUIRE_LV);
3270 }
3271 proc_unlock(pt);
3272
3273 if (uaddr != USER_ADDR_NULL) {
3274 error = copyout(&retflags, uaddr, sizeof(uint32_t));
3275 }
3276 break;
3277 }
3278 case CS_OPS_MARKINVALID:
3279 proc_lock(pt);
3280 if ((proc_getcsflags(p: pt) & CS_VALID) == CS_VALID) { /* is currently valid */
3281 proc_csflags_clear(p: pt, CS_VALID); /* set invalid */
3282 cs_process_invalidated(pt);
3283 if ((proc_getcsflags(p: pt) & CS_KILL) == CS_KILL) {
3284 proc_csflags_set(p: pt, CS_KILLED);
3285 proc_unlock(pt);
3286 if (cs_debug) {
3287 printf("CODE SIGNING: marked invalid by pid %d: "
3288 "p=%d[%s] honoring CS_KILL, final status 0x%x\n",
3289 proc_selfpid(), proc_getpid(p: pt), pt->p_comm,
3290 (unsigned int)proc_getcsflags(p: pt));
3291 }
3292 psignal(p: pt, SIGKILL);
3293 } else {
3294 proc_unlock(pt);
3295 }
3296 } else {
3297 proc_unlock(pt);
3298 }
3299
3300 break;
3301
3302 case CS_OPS_MARKHARD:
3303 proc_lock(pt);
3304 proc_csflags_set(p: pt, CS_HARD);
3305 if ((proc_getcsflags(p: pt) & CS_VALID) == 0) {
3306 /* @@@ allow? reject? kill? @@@ */
3307 proc_unlock(pt);
3308 error = EINVAL;
3309 goto out;
3310 } else {
3311 proc_unlock(pt);
3312 }
3313 break;
3314
3315 case CS_OPS_MARKKILL:
3316 proc_lock(pt);
3317 proc_csflags_set(p: pt, CS_KILL);
3318 if ((proc_getcsflags(p: pt) & CS_VALID) == 0) {
3319 proc_unlock(pt);
3320 psignal(p: pt, SIGKILL);
3321 } else {
3322 proc_unlock(pt);
3323 }
3324 break;
3325
3326 case CS_OPS_PIDOFFSET:
3327 toff = pt->p_textoff;
3328 proc_rele(p: pt);
3329 error = copyout(&toff, uaddr, sizeof(toff));
3330 return error;
3331
3332 case CS_OPS_CDHASH:
3333
3334 /* pt already holds a reference on its p_textvp */
3335 tvp = pt->p_textvp;
3336 toff = pt->p_textoff;
3337
3338 if (tvp == NULLVP || usize != SHA1_RESULTLEN) {
3339 proc_rele(p: pt);
3340 return EINVAL;
3341 }
3342
3343 error = vn_getcdhash(vp: tvp, offset: toff, cdhash);
3344 proc_rele(p: pt);
3345
3346 if (error == 0) {
3347 error = copyout(cdhash, uaddr, sizeof(cdhash));
3348 }
3349
3350 return error;
3351
3352 case CS_OPS_ENTITLEMENTS_BLOB: {
3353 void *start;
3354 size_t length;
3355 struct cs_blob* blob;
3356
3357 proc_lock(pt);
3358 if ((proc_getcsflags(p: pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3359 proc_unlock(pt);
3360 error = EINVAL;
3361 goto out;
3362 }
3363 blob = csproc_get_blob(pt);
3364 proc_unlock(pt);
3365
3366 if (!blob) {
3367 error = EBADEXEC;
3368 goto out;
3369 }
3370
3371 void* osent = csblob_os_entitlements_get(csblob: blob);
3372 if (!osent) {
3373 goto out;
3374 }
3375 CS_GenericBlob* xmlblob = NULL;
3376 if (amfi->OSEntitlements_get_xml(osent, &xmlblob)) {
3377 start = (void*)xmlblob;
3378 length = (size_t)ntohl(xmlblob->length);
3379 } else {
3380 goto out;
3381 }
3382
3383 error = csops_copy_token(start, length, usize, uaddr);
3384 kfree_data(start, length);
3385 goto out;
3386 }
3387 case CS_OPS_DER_ENTITLEMENTS_BLOB: {
3388 const void *start;
3389 size_t length;
3390 struct cs_blob* blob;
3391
3392 proc_lock(pt);
3393 if ((proc_getcsflags(p: pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3394 proc_unlock(pt);
3395 error = EINVAL;
3396 goto out;
3397 }
3398 blob = csproc_get_blob(pt);
3399 proc_unlock(pt);
3400
3401 if (!blob) {
3402 error = EBADEXEC;
3403 goto out;
3404 }
3405
3406 error = csblob_get_der_entitlements(blob, (const CS_GenericBlob **)&start, &length);
3407 if (error || start == NULL) {
3408 if (amfi && csblob_os_entitlements_get(csblob: blob)) {
3409 void* osent = csblob_os_entitlements_get(csblob: blob);
3410
3411 const CS_GenericBlob* transmuted = NULL;
3412 if (amfi->OSEntitlements_get_transmuted(osent, &transmuted)) {
3413 start = transmuted;
3414 length = (size_t)ntohl(transmuted->length);
3415 } else {
3416 goto out;
3417 }
3418 } else {
3419 goto out;
3420 }
3421 }
3422
3423 error = csops_copy_token(start, length, usize, uaddr);
3424 goto out;
3425 }
3426
3427 case CS_OPS_VALIDATION_CATEGORY:
3428 {
3429 unsigned int validation_category = CS_VALIDATION_CATEGORY_INVALID;
3430 error = csproc_get_validation_category(pt, &validation_category);
3431 if (error) {
3432 goto out;
3433 }
3434 error = copyout(&validation_category, uaddr, sizeof(validation_category));
3435 break;
3436 }
3437
3438 case CS_OPS_MARKRESTRICT:
3439 proc_lock(pt);
3440 proc_csflags_set(p: pt, CS_RESTRICT);
3441 proc_unlock(pt);
3442 break;
3443
3444 case CS_OPS_SET_STATUS: {
3445 uint32_t flags;
3446
3447 if (usize < sizeof(flags)) {
3448 error = ERANGE;
3449 break;
3450 }
3451
3452 error = copyin(uaddr, &flags, sizeof(flags));
3453 if (error) {
3454 break;
3455 }
3456
3457 /* only allow setting a subset of all code sign flags */
3458 flags &=
3459 CS_HARD | CS_EXEC_SET_HARD |
3460 CS_KILL | CS_EXEC_SET_KILL |
3461 CS_RESTRICT |
3462 CS_REQUIRE_LV |
3463 CS_ENFORCEMENT | CS_EXEC_SET_ENFORCEMENT;
3464
3465 proc_lock(pt);
3466 if (proc_getcsflags(p: pt) & CS_VALID) {
3467 if ((flags & CS_ENFORCEMENT) &&
3468 !(proc_getcsflags(p: pt) & CS_ENFORCEMENT)) {
3469 vm_map_cs_enforcement_set(map: get_task_map(proc_task(proc: pt)), TRUE);
3470 }
3471 proc_csflags_set(p: pt, flags);
3472 } else {
3473 error = EINVAL;
3474 }
3475 proc_unlock(pt);
3476
3477 break;
3478 }
3479 case CS_OPS_CLEAR_LV: {
3480 /*
3481 * This option is used to remove library validation from
3482 * a running process. This is used in plugin architectures
3483 * when a program needs to load untrusted libraries. This
3484 * allows the process to maintain library validation as
3485 * long as possible, then drop it only when required.
3486 * Once a process has loaded the untrusted library,
3487 * relying on library validation in the future will
3488 * not be effective. An alternative is to re-exec
3489 * your application without library validation, or
3490 * fork an untrusted child.
3491 */
3492#if !defined(XNU_TARGET_OS_OSX)
3493 // We only support dropping library validation on macOS
3494 error = ENOTSUP;
3495#else
3496 /*
3497 * if we have the flag set, and the caller wants
3498 * to remove it, and they're entitled to, then
3499 * we remove it from the csflags
3500 *
3501 * NOTE: We are fine to poke into the task because
3502 * we get a ref to pt when we do the proc_find
3503 * at the beginning of this function.
3504 *
3505 * We also only allow altering ourselves.
3506 */
3507 if (forself == 1 && IOTaskHasEntitlement(task: proc_task(proc: pt), CLEAR_LV_ENTITLEMENT)) {
3508 proc_lock(pt);
3509 if (!(proc_getcsflags(p: pt) & CS_INSTALLER)) {
3510 proc_csflags_clear(p: pt, CS_REQUIRE_LV | CS_FORCED_LV);
3511 error = 0;
3512 } else {
3513 error = EPERM;
3514 }
3515 proc_unlock(pt);
3516 } else {
3517 error = EPERM;
3518 }
3519#endif
3520 break;
3521 }
3522 case CS_OPS_BLOB: {
3523 void *start;
3524 size_t length;
3525
3526 proc_lock(pt);
3527 if ((proc_getcsflags(p: pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3528 proc_unlock(pt);
3529 error = EINVAL;
3530 break;
3531 }
3532 proc_unlock(pt);
3533 // Don't need to lock here as not accessing CSFLAGS
3534 error = cs_blob_get(pt, &start, &length);
3535 if (error) {
3536 goto out;
3537 }
3538
3539 error = csops_copy_token(start, length, usize, uaddr);
3540 goto out;
3541 }
3542 case CS_OPS_IDENTITY:
3543 case CS_OPS_TEAMID: {
3544 const char *identity;
3545 uint8_t fakeheader[8];
3546 uint32_t idlen;
3547 size_t length;
3548
3549 /*
3550 * Make identity have a blob header to make it
3551 * easier on userland to guess the identity
3552 * length.
3553 */
3554 if (usize < sizeof(fakeheader)) {
3555 error = ERANGE;
3556 break;
3557 }
3558 memset(s: fakeheader, c: 0, n: sizeof(fakeheader));
3559
3560 proc_lock(pt);
3561 if ((proc_getcsflags(p: pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3562 proc_unlock(pt);
3563 error = EINVAL;
3564 break;
3565 }
3566 identity = ops == CS_OPS_TEAMID ? csproc_get_teamid(pt) : cs_identity_get(pt);
3567 proc_unlock(pt);
3568
3569 if (identity == NULL) {
3570 error = ENOENT;
3571 goto out;
3572 }
3573
3574 length = strlen(s: identity) + 1; /* include NUL */
3575 idlen = htonl((uint32_t)(length + sizeof(fakeheader)));
3576 memcpy(dst: &fakeheader[4], src: &idlen, n: sizeof(idlen));
3577
3578 error = copyout(fakeheader, uaddr, sizeof(fakeheader));
3579 if (error) {
3580 goto out;
3581 }
3582
3583 if (usize < sizeof(fakeheader) + length) {
3584 error = ERANGE;
3585 } else if (usize > sizeof(fakeheader)) {
3586 error = copyout(identity, uaddr + sizeof(fakeheader), length);
3587 }
3588 goto out;
3589 }
3590
3591 case CS_OPS_CLEARINSTALLER:
3592 proc_lock(pt);
3593 proc_csflags_clear(p: pt, CS_INSTALLER | CS_DATAVAULT_CONTROLLER | CS_EXEC_INHERIT_SIP);
3594 proc_unlock(pt);
3595 break;
3596
3597 case CS_OPS_CLEARPLATFORM:
3598#if DEVELOPMENT || DEBUG
3599 if (cs_process_global_enforcement()) {
3600 error = ENOTSUP;
3601 break;
3602 }
3603
3604#if CONFIG_CSR
3605 if (csr_check(CSR_ALLOW_APPLE_INTERNAL) != 0) {
3606 error = ENOTSUP;
3607 break;
3608 }
3609#endif /* CONFIG_CSR */
3610 task_t task = proc_task(pt);
3611
3612 proc_lock(pt);
3613 proc_csflags_clear(pt, CS_PLATFORM_BINARY | CS_PLATFORM_PATH);
3614 task_set_hardened_runtime(task, false);
3615 csproc_clear_platform_binary(pt);
3616 proc_unlock(pt);
3617 break;
3618#else /* DEVELOPMENT || DEBUG */
3619 error = ENOTSUP;
3620 break;
3621#endif /* !DEVELOPMENT || DEBUG */
3622
3623 default:
3624 error = EINVAL;
3625 break;
3626 }
3627out:
3628 proc_rele(p: pt);
3629 return error;
3630}
3631
3632void
3633proc_iterate(
3634 unsigned int flags,
3635 proc_iterate_fn_t callout,
3636 void *arg,
3637 proc_iterate_fn_t filterfn,
3638 void *filterarg)
3639{
3640 pidlist_t pid_list, *pl = pidlist_init(pl: &pid_list);
3641 u_int pid_count_available = 0;
3642
3643 assert(callout != NULL);
3644
3645 /* allocate outside of the proc_list_lock */
3646 for (;;) {
3647 proc_list_lock();
3648 pid_count_available = nprocs + 1; /* kernel_task not counted in nprocs */
3649 assert(pid_count_available > 0);
3650 if (pidlist_nalloc(pl) >= pid_count_available) {
3651 break;
3652 }
3653 proc_list_unlock();
3654
3655 pidlist_alloc(pl, needed: pid_count_available);
3656 }
3657 pidlist_set_active(pl);
3658
3659 /* filter pids into the pid_list */
3660
3661 u_int pid_count = 0;
3662 if (flags & PROC_ALLPROCLIST) {
3663 proc_t p;
3664 ALLPROC_FOREACH(p) {
3665 /* ignore processes that are being forked */
3666 if (p->p_stat == SIDL || proc_is_shadow(p)) {
3667 continue;
3668 }
3669 if ((filterfn != NULL) && (filterfn(p, filterarg) == 0)) {
3670 continue;
3671 }
3672 pidlist_add_pid(pl, pid: proc_pid(p));
3673 if (++pid_count >= pid_count_available) {
3674 break;
3675 }
3676 }
3677 }
3678
3679 if ((pid_count < pid_count_available) &&
3680 (flags & PROC_ZOMBPROCLIST)) {
3681 proc_t p;
3682 ZOMBPROC_FOREACH(p) {
3683 if (proc_is_shadow(p)) {
3684 continue;
3685 }
3686 if ((filterfn != NULL) && (filterfn(p, filterarg) == 0)) {
3687 continue;
3688 }
3689 pidlist_add_pid(pl, pid: proc_pid(p));
3690 if (++pid_count >= pid_count_available) {
3691 break;
3692 }
3693 }
3694 }
3695
3696 proc_list_unlock();
3697
3698 /* call callout on processes in the pid_list */
3699
3700 const pidlist_entry_t *pe;
3701 SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3702 for (u_int i = 0; i < pe->pe_nused; i++) {
3703 const pid_t pid = pe->pe_pid[i];
3704 proc_t p = proc_find(pid);
3705 if (p) {
3706 if ((flags & PROC_NOWAITTRANS) == 0) {
3707 proc_transwait(p, locked: 0);
3708 }
3709 const int callout_ret = callout(p, arg);
3710
3711 switch (callout_ret) {
3712 case PROC_RETURNED_DONE:
3713 proc_rele(p);
3714 OS_FALLTHROUGH;
3715 case PROC_CLAIMED_DONE:
3716 goto out;
3717
3718 case PROC_RETURNED:
3719 proc_rele(p);
3720 OS_FALLTHROUGH;
3721 case PROC_CLAIMED:
3722 break;
3723 default:
3724 panic("%s: callout =%d for pid %d",
3725 __func__, callout_ret, pid);
3726 break;
3727 }
3728 } else if (flags & PROC_ZOMBPROCLIST) {
3729 p = proc_find_zombref(pid);
3730 if (!p) {
3731 continue;
3732 }
3733 const int callout_ret = callout(p, arg);
3734
3735 switch (callout_ret) {
3736 case PROC_RETURNED_DONE:
3737 proc_drop_zombref(p);
3738 OS_FALLTHROUGH;
3739 case PROC_CLAIMED_DONE:
3740 goto out;
3741
3742 case PROC_RETURNED:
3743 proc_drop_zombref(p);
3744 OS_FALLTHROUGH;
3745 case PROC_CLAIMED:
3746 break;
3747 default:
3748 panic("%s: callout =%d for zombie %d",
3749 __func__, callout_ret, pid);
3750 break;
3751 }
3752 }
3753 }
3754 }
3755out:
3756 pidlist_free(pl);
3757}
3758
3759void
3760proc_rebootscan(
3761 proc_iterate_fn_t callout,
3762 void *arg,
3763 proc_iterate_fn_t filterfn,
3764 void *filterarg)
3765{
3766 proc_t p;
3767
3768 assert(callout != NULL);
3769
3770 proc_shutdown_exitcount = 0;
3771
3772restart_foreach:
3773
3774 proc_list_lock();
3775
3776 ALLPROC_FOREACH(p) {
3777 if ((filterfn != NULL) && filterfn(p, filterarg) == 0) {
3778 continue;
3779 }
3780 p = proc_ref(p, true);
3781 if (!p) {
3782 proc_list_unlock();
3783 goto restart_foreach;
3784 }
3785
3786 proc_list_unlock();
3787
3788 proc_transwait(p, locked: 0);
3789 (void)callout(p, arg);
3790 proc_rele(p);
3791
3792 goto restart_foreach;
3793 }
3794
3795 proc_list_unlock();
3796}
3797
3798void
3799proc_childrenwalk(
3800 proc_t parent,
3801 proc_iterate_fn_t callout,
3802 void *arg)
3803{
3804 pidlist_t pid_list, *pl = pidlist_init(pl: &pid_list);
3805 u_int pid_count_available = 0;
3806
3807 assert(parent != NULL);
3808 assert(callout != NULL);
3809
3810 for (;;) {
3811 proc_list_lock();
3812 pid_count_available = parent->p_childrencnt;
3813 if (pid_count_available == 0) {
3814 proc_list_unlock();
3815 goto out;
3816 }
3817 if (pidlist_nalloc(pl) >= pid_count_available) {
3818 break;
3819 }
3820 proc_list_unlock();
3821
3822 pidlist_alloc(pl, needed: pid_count_available);
3823 }
3824 pidlist_set_active(pl);
3825
3826 u_int pid_count = 0;
3827 proc_t p;
3828 PCHILDREN_FOREACH(parent, p) {
3829 if (p->p_stat == SIDL || proc_is_shadow(p)) {
3830 continue;
3831 }
3832
3833 pidlist_add_pid(pl, pid: proc_pid(p));
3834 if (++pid_count >= pid_count_available) {
3835 break;
3836 }
3837 }
3838
3839 proc_list_unlock();
3840
3841 const pidlist_entry_t *pe;
3842 SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3843 for (u_int i = 0; i < pe->pe_nused; i++) {
3844 const pid_t pid = pe->pe_pid[i];
3845 p = proc_find(pid);
3846 if (!p) {
3847 continue;
3848 }
3849 const int callout_ret = callout(p, arg);
3850
3851 switch (callout_ret) {
3852 case PROC_RETURNED_DONE:
3853 proc_rele(p);
3854 OS_FALLTHROUGH;
3855 case PROC_CLAIMED_DONE:
3856 goto out;
3857
3858 case PROC_RETURNED:
3859 proc_rele(p);
3860 OS_FALLTHROUGH;
3861 case PROC_CLAIMED:
3862 break;
3863 default:
3864 panic("%s: callout =%d for pid %d",
3865 __func__, callout_ret, pid);
3866 break;
3867 }
3868 }
3869 }
3870out:
3871 pidlist_free(pl);
3872}
3873
3874void
3875pgrp_iterate(
3876 struct pgrp *pgrp,
3877 proc_iterate_fn_t callout,
3878 void * arg,
3879 bool (^filterfn)(proc_t))
3880{
3881 pidlist_t pid_list, *pl = pidlist_init(pl: &pid_list);
3882 u_int pid_count_available = 0;
3883 proc_t p;
3884
3885 assert(pgrp != NULL);
3886 assert(callout != NULL);
3887
3888 for (;;) {
3889 pgrp_lock(pgrp);
3890 /*
3891 * each member has one ref + some transient holders,
3892 * this is a good enough approximation
3893 */
3894 pid_count_available = os_ref_get_count_mask(rc: &pgrp->pg_refcount,
3895 PGRP_REF_BITS);
3896 if (pidlist_nalloc(pl) >= pid_count_available) {
3897 break;
3898 }
3899 pgrp_unlock(pgrp);
3900
3901 pidlist_alloc(pl, needed: pid_count_available);
3902 }
3903 pidlist_set_active(pl);
3904
3905 const pid_t pgid = pgrp->pg_id;
3906 u_int pid_count = 0;
3907
3908 PGMEMBERS_FOREACH(pgrp, p) {
3909 if ((filterfn != NULL) && (filterfn(p) == 0)) {
3910 continue;
3911 }
3912 pidlist_add_pid(pl, pid: proc_pid(p));
3913 if (++pid_count >= pid_count_available) {
3914 break;
3915 }
3916 }
3917
3918 pgrp_unlock(pgrp);
3919
3920 const pidlist_entry_t *pe;
3921 SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3922 for (u_int i = 0; i < pe->pe_nused; i++) {
3923 const pid_t pid = pe->pe_pid[i];
3924 if (0 == pid) {
3925 continue; /* skip kernproc */
3926 }
3927 p = proc_find(pid);
3928 if (!p) {
3929 continue;
3930 }
3931 if (p->p_pgrpid != pgid) {
3932 proc_rele(p);
3933 continue;
3934 }
3935 const int callout_ret = callout(p, arg);
3936
3937 switch (callout_ret) {
3938 case PROC_RETURNED:
3939 proc_rele(p);
3940 OS_FALLTHROUGH;
3941 case PROC_CLAIMED:
3942 break;
3943 case PROC_RETURNED_DONE:
3944 proc_rele(p);
3945 OS_FALLTHROUGH;
3946 case PROC_CLAIMED_DONE:
3947 goto out;
3948
3949 default:
3950 panic("%s: callout =%d for pid %d",
3951 __func__, callout_ret, pid);
3952 }
3953 }
3954 }
3955
3956out:
3957 pidlist_free(pl);
3958}
3959
3960/* consumes the newpg ref */
3961static void
3962pgrp_replace(struct proc *p, struct pgrp *newpg)
3963{
3964 struct pgrp *oldpg;
3965
3966 proc_list_lock();
3967 oldpg = smr_serialized_load(&p->p_pgrp);
3968 pgrp_del_member(pgrp: oldpg, p);
3969 pgrp_add_member(pgrp: newpg, PROC_NULL, p);
3970 proc_list_unlock();
3971
3972 pgrp_rele(pgrp: oldpg);
3973}
3974
3975struct pgrp *
3976pgrp_alloc(pid_t pgid, pggrp_ref_bits_t bits)
3977{
3978 struct pgrp *pgrp = zalloc_flags(pgrp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3979
3980 os_ref_init_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp, bits);
3981 os_ref_init_raw(&pgrp->pg_hashref, &p_refgrp);
3982 LIST_INIT(&pgrp->pg_members);
3983 lck_mtx_init(lck: &pgrp->pg_mlock, grp: &proc_mlock_grp, attr: &proc_lck_attr);
3984 pgrp->pg_id = pgid;
3985
3986 return pgrp;
3987}
3988
3989void
3990pgrp_lock(struct pgrp * pgrp)
3991{
3992 lck_mtx_lock(lck: &pgrp->pg_mlock);
3993}
3994
3995void
3996pgrp_unlock(struct pgrp * pgrp)
3997{
3998 lck_mtx_unlock(lck: &pgrp->pg_mlock);
3999}
4000
4001struct session *
4002session_find_locked(pid_t sessid)
4003{
4004 struct session *sess;
4005
4006 LIST_FOREACH(sess, SESSHASH(sessid), s_hash) {
4007 if (sess->s_sid == sessid) {
4008 break;
4009 }
4010 }
4011
4012 return sess;
4013}
4014
4015void
4016session_replace_leader(struct proc *old_proc, struct proc *new_proc)
4017{
4018 assert(old_proc == current_proc());
4019
4020 /* If old_proc is session leader, change the leader to new proc */
4021 struct pgrp *pgrp = smr_serialized_load(&old_proc->p_pgrp);
4022 struct session *sessp = pgrp->pg_session;
4023 struct tty *ttyp = TTY_NULL;
4024
4025 if (sessp == SESSION_NULL || !SESS_LEADER(old_proc, sessp)) {
4026 return;
4027 }
4028
4029 session_lock(sess: sessp);
4030 if (sessp->s_ttyp && sessp->s_ttyp->t_session == sessp) {
4031 ttyp = sessp->s_ttyp;
4032 ttyhold(tp: ttyp);
4033 }
4034
4035 /* Do the dance to take tty lock and session lock */
4036 if (ttyp) {
4037 session_unlock(sess: sessp);
4038 tty_lock(tp: ttyp);
4039 session_lock(sess: sessp);
4040 }
4041
4042 sessp->s_leader = new_proc;
4043 session_unlock(sess: sessp);
4044
4045 if (ttyp) {
4046 tty_unlock(tp: ttyp);
4047 ttyfree(ttyp);
4048 }
4049}
4050
4051void
4052session_lock(struct session * sess)
4053{
4054 lck_mtx_lock(lck: &sess->s_mlock);
4055}
4056
4057
4058void
4059session_unlock(struct session * sess)
4060{
4061 lck_mtx_unlock(lck: &sess->s_mlock);
4062}
4063
4064struct pgrp *
4065proc_pgrp(proc_t p, struct session **sessp)
4066{
4067 struct pgrp *pgrp = PGRP_NULL;
4068 bool success = false;
4069
4070 if (__probable(p != PROC_NULL)) {
4071 smr_proc_task_enter();
4072 pgrp = smr_entered_load(&p->p_pgrp);
4073 success = pgrp == PGRP_NULL || pg_ref_try(pgrp);
4074 smr_proc_task_leave();
4075
4076 if (__improbable(!success)) {
4077 /*
4078 * We caught the process in the middle of pgrp_replace(),
4079 * go the slow, never failing way.
4080 */
4081 proc_list_lock();
4082 pgrp = pg_ref(smr_serialized_load(&p->p_pgrp));
4083 proc_list_unlock();
4084 }
4085 }
4086
4087 if (sessp) {
4088 *sessp = pgrp ? pgrp->pg_session : SESSION_NULL;
4089 }
4090 return pgrp;
4091}
4092
4093struct pgrp *
4094tty_pgrp_locked(struct tty *tp)
4095{
4096 struct pgrp *pg = PGRP_NULL;
4097
4098 /* either the tty_lock() or the proc_list_lock() must be held */
4099
4100 if (tp->t_pgrp) {
4101 pg = pg_ref(pgrp: tp->t_pgrp);
4102 }
4103
4104 return pg;
4105}
4106
4107int
4108proc_transstart(proc_t p, int locked, int non_blocking)
4109{
4110 if (locked == 0) {
4111 proc_lock(p);
4112 }
4113 while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) {
4114 if (((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT) || non_blocking) {
4115 if (locked == 0) {
4116 proc_unlock(p);
4117 }
4118 return EDEADLK;
4119 }
4120 p->p_lflag |= P_LTRANSWAIT;
4121 msleep(chan: &p->p_lflag, mtx: &p->p_mlock, pri: 0, wmesg: "proc_signstart", NULL);
4122 }
4123 p->p_lflag |= P_LINTRANSIT;
4124 p->p_transholder = current_thread();
4125 if (locked == 0) {
4126 proc_unlock(p);
4127 }
4128 return 0;
4129}
4130
4131void
4132proc_transcommit(proc_t p, int locked)
4133{
4134 if (locked == 0) {
4135 proc_lock(p);
4136 }
4137
4138 assert((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT);
4139 assert(p->p_transholder == current_thread());
4140 p->p_lflag |= P_LTRANSCOMMIT;
4141
4142 if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) {
4143 p->p_lflag &= ~P_LTRANSWAIT;
4144 wakeup(chan: &p->p_lflag);
4145 }
4146 if (locked == 0) {
4147 proc_unlock(p);
4148 }
4149}
4150
4151void
4152proc_transend(proc_t p, int locked)
4153{
4154 if (locked == 0) {
4155 proc_lock(p);
4156 }
4157
4158 p->p_lflag &= ~(P_LINTRANSIT | P_LTRANSCOMMIT);
4159 p->p_transholder = NULL;
4160
4161 if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) {
4162 p->p_lflag &= ~P_LTRANSWAIT;
4163 wakeup(chan: &p->p_lflag);
4164 }
4165 if (locked == 0) {
4166 proc_unlock(p);
4167 }
4168}
4169
4170int
4171proc_transwait(proc_t p, int locked)
4172{
4173 if (locked == 0) {
4174 proc_lock(p);
4175 }
4176 while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) {
4177 if ((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT && current_proc() == p) {
4178 if (locked == 0) {
4179 proc_unlock(p);
4180 }
4181 return EDEADLK;
4182 }
4183 p->p_lflag |= P_LTRANSWAIT;
4184 msleep(chan: &p->p_lflag, mtx: &p->p_mlock, pri: 0, wmesg: "proc_signstart", NULL);
4185 }
4186 if (locked == 0) {
4187 proc_unlock(p);
4188 }
4189 return 0;
4190}
4191
4192void
4193proc_klist_lock(void)
4194{
4195 lck_mtx_lock(lck: &proc_klist_mlock);
4196}
4197
4198void
4199proc_klist_unlock(void)
4200{
4201 lck_mtx_unlock(lck: &proc_klist_mlock);
4202}
4203
4204void
4205proc_knote(struct proc * p, long hint)
4206{
4207 proc_klist_lock();
4208 KNOTE(&p->p_klist, hint);
4209 proc_klist_unlock();
4210}
4211
4212void
4213proc_transfer_knotes(struct proc *old_proc, struct proc *new_proc)
4214{
4215 struct knote *kn = NULL;
4216
4217 proc_klist_lock();
4218 while ((kn = SLIST_FIRST(&old_proc->p_klist))) {
4219 KNOTE_DETACH(&old_proc->p_klist, kn);
4220 if (kn->kn_filtid == (uint8_t)~EVFILT_PROC) {
4221 kn->kn_proc = new_proc;
4222 KNOTE_ATTACH(&new_proc->p_klist, kn);
4223 } else {
4224 assert(kn->kn_filtid == (uint8_t)~EVFILT_SIGNAL);
4225 kn->kn_proc = NULL;
4226 }
4227 }
4228 proc_klist_unlock();
4229}
4230
4231void
4232proc_knote_drain(struct proc *p)
4233{
4234 struct knote *kn = NULL;
4235
4236 /*
4237 * Clear the proc's klist to avoid references after the proc is reaped.
4238 */
4239 proc_klist_lock();
4240 while ((kn = SLIST_FIRST(&p->p_klist))) {
4241 kn->kn_proc = PROC_NULL;
4242 KNOTE_DETACH(&p->p_klist, kn);
4243 }
4244 proc_klist_unlock();
4245}
4246
4247void
4248proc_setregister(proc_t p)
4249{
4250 proc_lock(p);
4251 p->p_lflag |= P_LREGISTER;
4252 proc_unlock(p);
4253}
4254
4255void
4256proc_resetregister(proc_t p)
4257{
4258 proc_lock(p);
4259 p->p_lflag &= ~P_LREGISTER;
4260 proc_unlock(p);
4261}
4262
4263bool
4264proc_get_pthread_jit_allowlist(proc_t p, bool *late_out)
4265{
4266 bool ret = false;
4267
4268 proc_lock(p);
4269 ret = (p->p_lflag & P_LPTHREADJITALLOWLIST);
4270 *late_out = (p->p_lflag & P_LPTHREADJITFREEZELATE);
4271 proc_unlock(p);
4272
4273 return ret;
4274}
4275
4276void
4277proc_set_pthread_jit_allowlist(proc_t p, bool late)
4278{
4279 proc_lock(p);
4280 p->p_lflag |= P_LPTHREADJITALLOWLIST;
4281 if (late) {
4282 p->p_lflag |= P_LPTHREADJITFREEZELATE;
4283 }
4284 proc_unlock(p);
4285}
4286
4287pid_t
4288proc_pgrpid(proc_t p)
4289{
4290 return p->p_pgrpid;
4291}
4292
4293pid_t
4294proc_sessionid(proc_t p)
4295{
4296 return p->p_sessionid;
4297}
4298
4299pid_t
4300proc_selfpgrpid()
4301{
4302 return current_proc()->p_pgrpid;
4303}
4304
4305
4306/* return control and action states */
4307int
4308proc_getpcontrol(int pid, int * pcontrolp)
4309{
4310 proc_t p;
4311
4312 p = proc_find(pid);
4313 if (p == PROC_NULL) {
4314 return ESRCH;
4315 }
4316 if (pcontrolp != NULL) {
4317 *pcontrolp = p->p_pcaction;
4318 }
4319
4320 proc_rele(p);
4321 return 0;
4322}
4323
4324int
4325proc_dopcontrol(proc_t p)
4326{
4327 int pcontrol;
4328 os_reason_t kill_reason;
4329
4330 proc_lock(p);
4331
4332 pcontrol = PROC_CONTROL_STATE(p);
4333
4334 if (PROC_ACTION_STATE(p) == 0) {
4335 switch (pcontrol) {
4336 case P_PCTHROTTLE:
4337 PROC_SETACTION_STATE(p);
4338 proc_unlock(p);
4339 printf("low swap: throttling pid %d (%s)\n", proc_getpid(p), p->p_comm);
4340 break;
4341
4342 case P_PCSUSP:
4343 PROC_SETACTION_STATE(p);
4344 proc_unlock(p);
4345 printf("low swap: suspending pid %d (%s)\n", proc_getpid(p), p->p_comm);
4346 task_suspend(target_task: proc_task(proc: p));
4347 break;
4348
4349 case P_PCKILL:
4350 PROC_SETACTION_STATE(p);
4351 proc_unlock(p);
4352 printf("low swap: killing pid %d (%s)\n", proc_getpid(p), p->p_comm);
4353 kill_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_LOWSWAP);
4354 psignal_with_reason(p, SIGKILL, signal_reason: kill_reason);
4355 break;
4356
4357 default:
4358 proc_unlock(p);
4359 }
4360 } else {
4361 proc_unlock(p);
4362 }
4363
4364 return PROC_RETURNED;
4365}
4366
4367
4368/*
4369 * Resume a throttled or suspended process. This is an internal interface that's only
4370 * used by the user level code that presents the GUI when we run out of swap space and
4371 * hence is restricted to processes with superuser privileges.
4372 */
4373
4374int
4375proc_resetpcontrol(int pid)
4376{
4377 proc_t p;
4378 int pcontrol;
4379 int error;
4380 proc_t self = current_proc();
4381
4382 /* if the process has been validated to handle resource control or root is valid one */
4383 if (((self->p_lflag & P_LVMRSRCOWNER) == 0) && (error = suser(cred: kauth_cred_get(), acflag: 0))) {
4384 return error;
4385 }
4386
4387 p = proc_find(pid);
4388 if (p == PROC_NULL) {
4389 return ESRCH;
4390 }
4391
4392 proc_lock(p);
4393
4394 pcontrol = PROC_CONTROL_STATE(p);
4395
4396 if (PROC_ACTION_STATE(p) != 0) {
4397 switch (pcontrol) {
4398 case P_PCTHROTTLE:
4399 PROC_RESETACTION_STATE(p);
4400 proc_unlock(p);
4401 printf("low swap: unthrottling pid %d (%s)\n", proc_getpid(p), p->p_comm);
4402 break;
4403
4404 case P_PCSUSP:
4405 PROC_RESETACTION_STATE(p);
4406 proc_unlock(p);
4407 printf("low swap: resuming pid %d (%s)\n", proc_getpid(p), p->p_comm);
4408 task_resume(target_task: proc_task(proc: p));
4409 break;
4410
4411 case P_PCKILL:
4412 /* Huh? */
4413 PROC_SETACTION_STATE(p);
4414 proc_unlock(p);
4415 printf("low swap: attempt to unkill pid %d (%s) ignored\n", proc_getpid(p), p->p_comm);
4416 break;
4417
4418 default:
4419 proc_unlock(p);
4420 }
4421 } else {
4422 proc_unlock(p);
4423 }
4424
4425 proc_rele(p);
4426 return 0;
4427}
4428
4429
4430
4431struct no_paging_space {
4432 uint64_t pcs_max_size;
4433 uint64_t pcs_uniqueid;
4434 int pcs_pid;
4435 int pcs_proc_count;
4436 uint64_t pcs_total_size;
4437
4438 uint64_t npcs_max_size;
4439 uint64_t npcs_uniqueid;
4440 int npcs_pid;
4441 int npcs_proc_count;
4442 uint64_t npcs_total_size;
4443
4444 int apcs_proc_count;
4445 uint64_t apcs_total_size;
4446};
4447
4448
4449static int
4450proc_pcontrol_filter(proc_t p, void *arg)
4451{
4452 struct no_paging_space *nps;
4453 uint64_t compressed;
4454
4455 nps = (struct no_paging_space *)arg;
4456
4457 compressed = get_task_compressed(proc_task(proc: p));
4458
4459 if (PROC_CONTROL_STATE(p)) {
4460 if (PROC_ACTION_STATE(p) == 0) {
4461 if (compressed > nps->pcs_max_size) {
4462 nps->pcs_pid = proc_getpid(p);
4463 nps->pcs_uniqueid = proc_uniqueid(p);
4464 nps->pcs_max_size = compressed;
4465 }
4466 nps->pcs_total_size += compressed;
4467 nps->pcs_proc_count++;
4468 } else {
4469 nps->apcs_total_size += compressed;
4470 nps->apcs_proc_count++;
4471 }
4472 } else {
4473 if (compressed > nps->npcs_max_size) {
4474 nps->npcs_pid = proc_getpid(p);
4475 nps->npcs_uniqueid = proc_uniqueid(p);
4476 nps->npcs_max_size = compressed;
4477 }
4478 nps->npcs_total_size += compressed;
4479 nps->npcs_proc_count++;
4480 }
4481 return 0;
4482}
4483
4484
4485static int
4486proc_pcontrol_null(__unused proc_t p, __unused void *arg)
4487{
4488 return PROC_RETURNED;
4489}
4490
4491
4492/*
4493 * Deal with the low on compressor pool space condition... this function
4494 * gets called when we are approaching the limits of the compressor pool or
4495 * we are unable to create a new swap file.
4496 * Since this eventually creates a memory deadlock situtation, we need to take action to free up
4497 * memory resources (both compressed and uncompressed) in order to prevent the system from hanging completely.
4498 * There are 2 categories of processes to deal with. Those that have an action
4499 * associated with them by the task itself and those that do not. Actionable
4500 * tasks can have one of three categories specified: ones that
4501 * can be killed immediately, ones that should be suspended, and ones that should
4502 * be throttled. Processes that do not have an action associated with them are normally
4503 * ignored unless they are utilizing such a large percentage of the compressor pool (currently 50%)
4504 * that only by killing them can we hope to put the system back into a usable state.
4505 */
4506
4507#define NO_PAGING_SPACE_DEBUG 0
4508
4509extern uint64_t vm_compressor_pages_compressed(void);
4510
4511struct timeval last_no_space_action = {.tv_sec = 0, .tv_usec = 0};
4512
4513#define MB_SIZE (1024 * 1024ULL)
4514boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
4515
4516extern int32_t max_kill_priority;
4517
4518int
4519no_paging_space_action()
4520{
4521 proc_t p;
4522 struct no_paging_space nps;
4523 struct timeval now;
4524 os_reason_t kill_reason;
4525
4526 /*
4527 * Throttle how often we come through here. Once every 5 seconds should be plenty.
4528 */
4529 microtime(tv: &now);
4530
4531 if (now.tv_sec <= last_no_space_action.tv_sec + 5) {
4532 return 0;
4533 }
4534
4535 /*
4536 * Examine all processes and find the biggest (biggest is based on the number of pages this
4537 * task has in the compressor pool) that has been marked to have some action
4538 * taken when swap space runs out... we also find the biggest that hasn't been marked for
4539 * action.
4540 *
4541 * If the biggest non-actionable task is over the "dangerously big" threashold (currently 50% of
4542 * the total number of pages held by the compressor, we go ahead and kill it since no other task
4543 * can have any real effect on the situation. Otherwise, we go after the actionable process.
4544 */
4545 bzero(s: &nps, n: sizeof(nps));
4546
4547 proc_iterate(PROC_ALLPROCLIST, callout: proc_pcontrol_null, arg: (void *)NULL, filterfn: proc_pcontrol_filter, filterarg: (void *)&nps);
4548
4549#if NO_PAGING_SPACE_DEBUG
4550 printf("low swap: npcs_proc_count = %d, npcs_total_size = %qd, npcs_max_size = %qd\n",
4551 nps.npcs_proc_count, nps.npcs_total_size, nps.npcs_max_size);
4552 printf("low swap: pcs_proc_count = %d, pcs_total_size = %qd, pcs_max_size = %qd\n",
4553 nps.pcs_proc_count, nps.pcs_total_size, nps.pcs_max_size);
4554 printf("low swap: apcs_proc_count = %d, apcs_total_size = %qd\n",
4555 nps.apcs_proc_count, nps.apcs_total_size);
4556#endif
4557 if (nps.npcs_max_size > (vm_compressor_pages_compressed() * 50) / 100) {
4558 /*
4559 * for now we'll knock out any task that has more then 50% of the pages
4560 * held by the compressor
4561 */
4562 if ((p = proc_find(pid: nps.npcs_pid)) != PROC_NULL) {
4563 if (nps.npcs_uniqueid == proc_uniqueid(p)) {
4564 /*
4565 * verify this is still the same process
4566 * in case the proc exited and the pid got reused while
4567 * we were finishing the proc_iterate and getting to this point
4568 */
4569 last_no_space_action = now;
4570
4571 printf("low swap: killing largest compressed process with pid %d (%s) and size %llu MB\n", proc_getpid(p), p->p_comm, (nps.npcs_max_size / MB_SIZE));
4572 kill_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_LOWSWAP);
4573 psignal_with_reason(p, SIGKILL, signal_reason: kill_reason);
4574
4575 proc_rele(p);
4576
4577 return 0;
4578 }
4579
4580 proc_rele(p);
4581 }
4582 }
4583
4584 /*
4585 * We have some processes within our jetsam bands of consideration and hence can be killed.
4586 * So we will invoke the memorystatus thread to go ahead and kill something.
4587 */
4588 if (memorystatus_get_proccnt_upto_priority(max_bucket_index: max_kill_priority) > 0) {
4589 last_no_space_action = now;
4590 /*
4591 * TODO(jason): This is only mac OS right now, but we'll need
4592 * something like this on iPad...
4593 */
4594 memorystatus_kill_on_VM_compressor_space_shortage(TRUE);
4595 return 1;
4596 }
4597
4598 /*
4599 * No eligible processes to kill. So let's suspend/kill the largest
4600 * process depending on its policy control specifications.
4601 */
4602
4603 if (nps.pcs_max_size > 0) {
4604 if ((p = proc_find(pid: nps.pcs_pid)) != PROC_NULL) {
4605 if (nps.pcs_uniqueid == proc_uniqueid(p)) {
4606 /*
4607 * verify this is still the same process
4608 * in case the proc exited and the pid got reused while
4609 * we were finishing the proc_iterate and getting to this point
4610 */
4611 last_no_space_action = now;
4612
4613 proc_dopcontrol(p);
4614
4615 proc_rele(p);
4616
4617 return 1;
4618 }
4619
4620 proc_rele(p);
4621 }
4622 }
4623 last_no_space_action = now;
4624
4625 printf("low swap: unable to find any eligible processes to take action on\n");
4626
4627 return 0;
4628}
4629
4630int
4631proc_trace_log(__unused proc_t p, struct proc_trace_log_args *uap, __unused int *retval)
4632{
4633 int ret = 0;
4634 proc_t target_proc = PROC_NULL;
4635 pid_t target_pid = uap->pid;
4636 uint64_t target_uniqueid = uap->uniqueid;
4637 task_t target_task = NULL;
4638
4639 if (priv_check_cred(cred: kauth_cred_get(), PRIV_PROC_TRACE_INSPECT, flags: 0)) {
4640 ret = EPERM;
4641 goto out;
4642 }
4643 target_proc = proc_find(pid: target_pid);
4644 if (target_proc != PROC_NULL) {
4645 if (target_uniqueid != proc_uniqueid(p: target_proc)) {
4646 ret = ENOENT;
4647 goto out;
4648 }
4649
4650 target_task = proc_task(proc: target_proc);
4651 if (task_send_trace_memory(task: target_task, pid: target_pid, uniqueid: target_uniqueid)) {
4652 ret = EINVAL;
4653 goto out;
4654 }
4655 } else {
4656 ret = ENOENT;
4657 }
4658
4659out:
4660 if (target_proc != PROC_NULL) {
4661 proc_rele(p: target_proc);
4662 }
4663 return ret;
4664}
4665
4666#if VM_SCAN_FOR_SHADOW_CHAIN
4667extern int vm_map_shadow_max(vm_map_t map);
4668int proc_shadow_max(void);
4669int
4670proc_shadow_max(void)
4671{
4672 int retval, max;
4673 proc_t p;
4674 task_t task;
4675 vm_map_t map;
4676
4677 max = 0;
4678 proc_list_lock();
4679 for (p = allproc.lh_first; (p != 0); p = p->p_list.le_next) {
4680 if (p->p_stat == SIDL) {
4681 continue;
4682 }
4683 task = proc_task(p);
4684 if (task == NULL) {
4685 continue;
4686 }
4687 map = get_task_map(task);
4688 if (map == NULL) {
4689 continue;
4690 }
4691 retval = vm_map_shadow_max(map);
4692 if (retval > max) {
4693 max = retval;
4694 }
4695 }
4696 proc_list_unlock();
4697 return max;
4698}
4699#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
4700
4701void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid);
4702void
4703proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid)
4704{
4705 if (target_proc != NULL) {
4706 target_proc->p_responsible_pid = responsible_pid;
4707
4708 // Also save the responsible UUID
4709 if (responsible_pid >= 0) {
4710 proc_t responsible_proc = proc_find(pid: responsible_pid);
4711 if (responsible_proc != PROC_NULL) {
4712 proc_getexecutableuuid(p: responsible_proc, uuidbuf: target_proc->p_responsible_uuid, size: sizeof(target_proc->p_responsible_uuid));
4713 proc_rele(p: responsible_proc);
4714 }
4715 }
4716 }
4717 return;
4718}
4719
4720int
4721proc_chrooted(proc_t p)
4722{
4723 int retval = 0;
4724
4725 if (p) {
4726 proc_fdlock(p);
4727 retval = (p->p_fd.fd_rdir != NULL) ? 1 : 0;
4728 proc_fdunlock(p);
4729 }
4730
4731 return retval;
4732}
4733
4734boolean_t
4735proc_send_synchronous_EXC_RESOURCE(proc_t p)
4736{
4737 if (p == PROC_NULL) {
4738 return FALSE;
4739 }
4740
4741 /* Send sync EXC_RESOURCE if the process is traced */
4742 if (ISSET(p->p_lflag, P_LTRACED)) {
4743 return TRUE;
4744 }
4745 return FALSE;
4746}
4747
4748#if CONFIG_MACF
4749size_t
4750proc_get_syscall_filter_mask_size(int which)
4751{
4752 switch (which) {
4753 case SYSCALL_MASK_UNIX:
4754 return nsysent;
4755 case SYSCALL_MASK_MACH:
4756 return mach_trap_count;
4757 case SYSCALL_MASK_KOBJ:
4758 return mach_kobj_count;
4759 default:
4760 return 0;
4761 }
4762}
4763
4764int
4765proc_set_syscall_filter_mask(proc_t p, int which, unsigned char *maskptr, size_t masklen)
4766{
4767#if DEVELOPMENT || DEBUG
4768 if (syscallfilter_disable) {
4769 printf("proc_set_syscall_filter_mask: attempt to set policy for pid %d, but disabled by boot-arg\n", proc_pid(p));
4770 return 0;
4771 }
4772#endif // DEVELOPMENT || DEBUG
4773
4774 switch (which) {
4775 case SYSCALL_MASK_UNIX:
4776 if (maskptr != NULL && masklen != nsysent) {
4777 return EINVAL;
4778 }
4779 proc_syscall_filter_mask_set(p, mask: maskptr);
4780 break;
4781 case SYSCALL_MASK_MACH:
4782 if (maskptr != NULL && masklen != (size_t)mach_trap_count) {
4783 return EINVAL;
4784 }
4785 mac_task_set_mach_filter_mask(task: proc_task(proc: p), maskptr);
4786 break;
4787 case SYSCALL_MASK_KOBJ:
4788 if (maskptr != NULL && masklen != (size_t)mach_kobj_count) {
4789 return EINVAL;
4790 }
4791 mac_task_set_kobj_filter_mask(task: proc_task(proc: p), maskptr);
4792 break;
4793 default:
4794 return EINVAL;
4795 }
4796
4797 return 0;
4798}
4799
4800int
4801proc_set_syscall_filter_callbacks(syscall_filter_cbs_t cbs)
4802{
4803 if (cbs->version != SYSCALL_FILTER_CALLBACK_VERSION) {
4804 return EINVAL;
4805 }
4806
4807 /* XXX register unix filter callback instead of using MACF hook. */
4808
4809 if (cbs->mach_filter_cbfunc || cbs->kobj_filter_cbfunc) {
4810 if (mac_task_register_filter_callbacks(mach_cbfunc: cbs->mach_filter_cbfunc,
4811 kobj_cbfunc: cbs->kobj_filter_cbfunc) != 0) {
4812 return EPERM;
4813 }
4814 }
4815
4816 return 0;
4817}
4818
4819int
4820proc_set_syscall_filter_index(int which, int num, int index)
4821{
4822 switch (which) {
4823 case SYSCALL_MASK_KOBJ:
4824 if (ipc_kobject_set_kobjidx(msgid: num, index) != 0) {
4825 return ENOENT;
4826 }
4827 break;
4828 default:
4829 return EINVAL;
4830 }
4831
4832 return 0;
4833}
4834#endif /* CONFIG_MACF */
4835
4836int
4837proc_set_filter_message_flag(proc_t p, boolean_t flag)
4838{
4839 if (p == PROC_NULL) {
4840 return EINVAL;
4841 }
4842
4843 task_set_filter_msg_flag(task: proc_task(proc: p), flag);
4844
4845 return 0;
4846}
4847
4848int
4849proc_get_filter_message_flag(proc_t p, boolean_t *flag)
4850{
4851 if (p == PROC_NULL || flag == NULL) {
4852 return EINVAL;
4853 }
4854
4855 *flag = task_get_filter_msg_flag(task: proc_task(proc: p));
4856
4857 return 0;
4858}
4859
4860bool
4861proc_is_traced(proc_t p)
4862{
4863 bool ret = FALSE;
4864 assert(p != PROC_NULL);
4865 proc_lock(p);
4866 if (p->p_lflag & P_LTRACED) {
4867 ret = TRUE;
4868 }
4869 proc_unlock(p);
4870 return ret;
4871}
4872
4873#if CONFIG_PROC_RESOURCE_LIMITS
4874int
4875proc_set_filedesc_limits(proc_t p, int soft_limit, int hard_limit)
4876{
4877 struct filedesc *fdp = &p->p_fd;
4878 int retval = 0;
4879
4880 proc_fdlock(p);
4881
4882 if (hard_limit > 0) {
4883 if (soft_limit >= hard_limit) {
4884 soft_limit = 0;
4885 }
4886 }
4887 fdp->fd_nfiles_soft_limit = soft_limit;
4888 fdp->fd_nfiles_hard_limit = hard_limit;
4889 /* Make sure that current fd_nfiles hasn't already exceeded these limits */
4890 fd_check_limit_exceeded(fdp);
4891
4892 proc_fdunlock(p);
4893
4894 return retval;
4895}
4896
4897int
4898proc_set_kqworkloop_limits(proc_t p, int soft_limit, int hard_limit)
4899{
4900 struct filedesc *fdp = &p->p_fd;
4901 lck_mtx_lock_spin_always(&fdp->fd_kqhashlock);
4902
4903 fdp->kqwl_dyn_soft_limit = soft_limit;
4904 fdp->kqwl_dyn_hard_limit = hard_limit;
4905 /* Make sure existing limits aren't exceeded already */
4906 kqworkloop_check_limit_exceeded(fdp);
4907
4908 lck_mtx_unlock(&fdp->fd_kqhashlock);
4909 return 0;
4910}
4911
4912static int
4913proc_evaluate_fd_limits_ast(proc_t p, struct filedesc *fdp, int *soft_limit, int *hard_limit)
4914{
4915 int fd_current_size, fd_soft_limit, fd_hard_limit;
4916 proc_fdlock(p);
4917
4918 fd_current_size = fdp->fd_nfiles_open;
4919 fd_hard_limit = fdp->fd_nfiles_hard_limit;
4920 fd_soft_limit = fdp->fd_nfiles_soft_limit;
4921
4922 /*
4923 * If a thread is going to take action on a specific limit exceeding, it also
4924 * clears it out to a SENTINEL so that future threads don't reevaluate the
4925 * limit as having exceeded again
4926 */
4927 if (fd_hard_limit > 0 && fd_current_size >= fd_hard_limit) {
4928 /* Clear our soft limit when we are sending hard limit notification */
4929 fd_soft_limit = 0;
4930
4931 fdp->fd_nfiles_hard_limit = FD_LIMIT_SENTINEL;
4932 } else if (fd_soft_limit > 0 && fd_current_size >= fd_soft_limit) {
4933 /* Clear out hard limit when we are sending soft limit notification */
4934 fd_hard_limit = 0;
4935
4936 fdp->fd_nfiles_soft_limit = FD_LIMIT_SENTINEL;
4937 } else {
4938 /* Neither limits were exceeded */
4939 fd_soft_limit = fd_hard_limit = 0;
4940 }
4941
4942 proc_fdunlock(p);
4943
4944 *soft_limit = fd_soft_limit;
4945 *hard_limit = fd_hard_limit;
4946 return fd_current_size;
4947}
4948
4949static int
4950proc_evaluate_kqwl_limits_ast(struct filedesc *fdp, int *soft_limit, int *hard_limit)
4951{
4952 lck_mtx_lock_spin_always(&fdp->fd_kqhashlock);
4953
4954 int kqwl_current_size = fdp->num_kqwls;
4955 int kqwl_soft_limit = fdp->kqwl_dyn_soft_limit;
4956 int kqwl_hard_limit = fdp->kqwl_dyn_hard_limit;
4957
4958 /*
4959 * If a thread is going to take action on a specific limit exceeding, it also
4960 * clears it out to a SENTINEL so that future threads don't reevaluate the
4961 * limit as having exceeded again
4962 */
4963 if (kqwl_hard_limit > 0 && kqwl_current_size >= kqwl_hard_limit) {
4964 /* Clear our soft limit when we are sending hard limit notification */
4965 kqwl_soft_limit = 0;
4966
4967 fdp->kqwl_dyn_hard_limit = KQWL_LIMIT_SENTINEL;
4968 } else if (kqwl_soft_limit > 0 && kqwl_current_size >= kqwl_soft_limit) {
4969 /* Clear out hard limit when we are sending soft limit notification */
4970 kqwl_hard_limit = 0;
4971
4972 fdp->kqwl_dyn_soft_limit = KQWL_LIMIT_SENTINEL;
4973 } else {
4974 /* Neither limits were exceeded */
4975 kqwl_soft_limit = kqwl_hard_limit = 0;
4976 }
4977
4978 lck_mtx_unlock(&fdp->fd_kqhashlock);
4979
4980 *soft_limit = kqwl_soft_limit;
4981 *hard_limit = kqwl_hard_limit;
4982 return kqwl_current_size;
4983}
4984#endif /* CONFIG_PROC_RESOURCE_LIMITS */
4985
4986void
4987proc_filedesc_ast(__unused task_t task)
4988{
4989#if CONFIG_PROC_RESOURCE_LIMITS
4990 assert(task == current_task());
4991 proc_t p = get_bsdtask_info(task);
4992 struct filedesc *fdp = &p->p_fd;
4993
4994 /*
4995 * At this point, we can possibly race with other threads which set the AST
4996 * due to triggering the soft/hard limits for fd or kqworkloops.
4997 *
4998 * The first thread to reach this logic will always evaluate hard limit for fd
4999 * or kqworkloops even if it was the one which triggered the soft limit for
5000 * them.
5001 *
5002 * If a thread takes action on a specific limit, it will clear the limit value
5003 * in the fdp with a SENTINEL to indicate to other racing threads that they no
5004 * longer need to evaluate it.
5005 */
5006 int soft_limit, hard_limit;
5007 int fd_current_size = proc_evaluate_fd_limits_ast(p, fdp, &soft_limit, &hard_limit);
5008
5009 if (hard_limit || soft_limit) {
5010 return task_filedesc_ast(task, fd_current_size, soft_limit, hard_limit);
5011 }
5012
5013 int kqwl_current_size = proc_evaluate_kqwl_limits_ast(fdp, &soft_limit, &hard_limit);
5014 if (hard_limit || soft_limit) {
5015 return task_kqworkloop_ast(task, kqwl_current_size, soft_limit, hard_limit);
5016 }
5017#endif /* CONFIG_PROC_RESOURCE_LIMITS */
5018}
5019
5020proc_ro_t
5021proc_ro_alloc(proc_t p, proc_ro_data_t p_data, task_t t, task_ro_data_t t_data)
5022{
5023 proc_ro_t pr;
5024 struct proc_ro pr_local = {};
5025
5026 pr = (proc_ro_t)zalloc_ro(ZONE_ID_PROC_RO, Z_WAITOK | Z_NOFAIL | Z_ZERO);
5027
5028 if (p != PROC_NULL) {
5029 pr_local.pr_proc = p;
5030 pr_local.proc_data = *p_data;
5031 }
5032
5033 if (t != TASK_NULL) {
5034 pr_local.pr_task = t;
5035 pr_local.task_data = *t_data;
5036 }
5037
5038 if ((p != PROC_NULL) || (t != TASK_NULL)) {
5039 zalloc_ro_update_elem(ZONE_ID_PROC_RO, pr, &pr_local);
5040 }
5041
5042 return pr;
5043}
5044
5045proc_ro_t
5046proc_ro_ref_task(proc_ro_t pr, task_t t, task_ro_data_t t_data)
5047{
5048 struct proc_ro pr_local;
5049
5050 if (pr->pr_task != TASK_NULL) {
5051 panic("%s: proc_ro already has an owning task", __func__);
5052 }
5053
5054 pr_local = *pr;
5055 pr_local.pr_task = t;
5056 pr_local.task_data = *t_data;
5057
5058 zalloc_ro_update_elem(ZONE_ID_PROC_RO, pr, &pr_local);
5059
5060 return pr;
5061}
5062
5063void
5064proc_ro_erase_task(proc_ro_t pr)
5065{
5066 zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO,
5067 pr, pr_task, ZRO_ATOMIC_XCHG_LONG, TASK_NULL);
5068}
5069
5070__abortlike
5071static void
5072panic_proc_ro_proc_backref_mismatch(proc_t p, proc_ro_t ro)
5073{
5074 panic("proc_ro->proc backref mismatch: p=%p, ro=%p, "
5075 "ro->pr_proc(ro)=%p", p, ro, ro->pr_proc);
5076}
5077
5078proc_ro_t
5079proc_get_ro(proc_t p)
5080{
5081 proc_ro_t ro = p->p_proc_ro;
5082
5083 zone_require_ro(zone_id: ZONE_ID_PROC_RO, elem_size: sizeof(struct proc_ro), addr: ro);
5084 if (__improbable(ro->pr_proc != p)) {
5085 panic_proc_ro_proc_backref_mismatch(p, ro);
5086 }
5087
5088 return ro;
5089}
5090
5091task_t
5092proc_ro_task(proc_ro_t pr)
5093{
5094 return pr->pr_task;
5095}
5096