1/*
2 * Copyright (c) 2013 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/kernel.h>
32#include <sys/malloc.h>
33#include <sys/proc_internal.h>
34#include <sys/proc.h>
35#include <sys/kauth.h>
36#include <sys/unistd.h>
37#include <sys/priv.h>
38
39#include <mach/mach_types.h>
40#include <mach/vm_param.h>
41#include <kern/task.h>
42#include <kern/locks.h>
43#include <kern/assert.h>
44#include <kern/sched_prim.h>
45
46#include <sys/kern_overrides.h>
47#include <sys/bsdtask_info.h>
48#include <sys/kdebug.h>
49#include <sys/sysproto.h>
50#include <sys/msgbuf.h>
51#include <sys/kern_memorystatus.h>
52
53/* Mutex for global system override state */
54static LCK_GRP_DECLARE(sys_override_mtx_grp, "system_override");
55static LCK_MTX_DECLARE(sys_override_lock, &sys_override_mtx_grp);
56
57/*
58 * Assertion counts for system properties (add new ones for each new mechanism)
59 *
60 * The assertion count management for system overrides is as follows:
61 *
62 * - All assertion counts are protected by the sys_override_lock.
63 *
64 * - Each caller of system_override() increments the assertion count for the
65 * mechanism it specified in the flags. The caller then blocks for the
66 * timeout specified in the system call.
67 *
68 * - At the end of the timeout, the caller thread wakes up and decrements the
69 * assertion count for the mechanism it originally took an assertion on.
70 *
71 * - If another caller calls the system_override() to disable the override
72 * for a mechanism, it simply disables the mechanism without changing any
73 * assertion counts. That way, the assertion counts are properly balanced.
74 *
75 * One thing to note is that a SYS_OVERRIDE_DISABLE disables the overrides
76 * for a mechanism irrespective of how many clients requested that override.
77 * That makes the implementation simpler and avoids keeping a lot of process
78 * specific state in the kernel.
79 *
80 */
81static int64_t io_throttle_assert_cnt;
82static int64_t cpu_throttle_assert_cnt;
83static int64_t fast_jetsam_assert_cnt;
84
85/* Wait Channel for system override */
86static uint64_t sys_override_wait;
87
88/* Helper routines */
89static void system_override_begin(uint64_t flags);
90static void system_override_end(uint64_t flags);
91static void system_override_abort(uint64_t flags);
92static void system_override_callouts(uint64_t flags, boolean_t enable_override);
93static __attribute__((noinline)) int PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout);
94
95/* system call implementation */
96int
97system_override(__unused struct proc *p, struct system_override_args * uap, __unused int32_t *retval)
98{
99 uint64_t timeout = uap->timeout;
100 uint64_t flags = uap->flags;
101 int error = 0;
102
103 /* Check credentials for caller. Only entitled processes are allowed to make this call. */
104 if ((error = priv_check_cred(cred: kauth_cred_get(), PRIV_SYSTEM_OVERRIDE, flags: 0))) {
105 goto out;
106 }
107
108 /* Check to see if sane flags are specified. */
109 if ((flags & ~SYS_OVERRIDE_FLAGS_MASK) != 0) {
110 error = EINVAL;
111 goto out;
112 }
113
114 lck_mtx_lock(lck: &sys_override_lock);
115
116 if (flags & SYS_OVERRIDE_DISABLE) {
117 flags &= ~SYS_OVERRIDE_DISABLE;
118 system_override_abort(flags);
119 } else {
120 system_override_begin(flags);
121 error = PROCESS_OVERRIDING_SYSTEM_DEFAULTS(timeout);
122 system_override_end(flags);
123 }
124
125 lck_mtx_unlock(lck: &sys_override_lock);
126
127out:
128 return error;
129}
130
131/*
132 * Helper routines for enabling/disabling system overrides for various mechanisms.
133 * These routines should be called with the sys_override_lock held. Each subsystem
134 * which is hooked into the override service provides two routines:
135 *
136 * - void sys_override_foo_init(void);
137 * Routine to initialize the subsystem or the data needed for the override to work.
138 * This routine is optional and if a subsystem needs it, it should be invoked from
139 * init_system_override().
140 *
141 * - void sys_override_foo(boolean_t enable_override);
142 * Routine to enable/disable the override mechanism for that subsystem. A value of
143 * true indicates that the mechanism should be overridden and the special behavior
144 * should begin. A false value indicates that the subsystem should return to default
145 * behavior. This routine is mandatory and should be invoked as part of the helper
146 * routines if the flags passed in the syscall match the subsystem. Also, this
147 * routine should preferably be idempotent.
148 */
149
150static void
151system_override_callouts(uint64_t flags, boolean_t enable_override)
152{
153 switch (flags) {
154 case SYS_OVERRIDE_IO_THROTTLE:
155 if (enable_override) {
156 KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_START,
157 proc_getpid(current_proc()), 0, 0, 0, 0);
158 } else {
159 KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_END,
160 proc_getpid(current_proc()), 0, 0, 0, 0);
161 }
162 sys_override_io_throttle(enable_override);
163 break;
164
165 case SYS_OVERRIDE_CPU_THROTTLE:
166 if (enable_override) {
167 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_START,
168 proc_getpid(current_proc()), 0, 0, 0, 0);
169 } else {
170 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_END,
171 proc_getpid(current_proc()), 0, 0, 0, 0);
172 }
173 sys_override_cpu_throttle(enable_override);
174 break;
175
176 case SYS_OVERRIDE_FAST_JETSAM:
177 if (enable_override) {
178 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FAST_JETSAM) | DBG_FUNC_START,
179 proc_getpid(current_proc()), 0, 0, 0, 0);
180 } else {
181 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FAST_JETSAM) | DBG_FUNC_END,
182 proc_getpid(current_proc()), 0, 0, 0, 0);
183 }
184#if CONFIG_JETSAM
185 memorystatus_fast_jetsam_override(enable_override);
186#endif /* CONFIG_JETSAM */
187 break;
188
189 default:
190 panic("Unknown option to system_override_callouts(): %llu", flags);
191 }
192}
193
194/*
195 * system_override_begin(uint64_t flags)
196 *
197 * Routine to start a system override if the assertion count
198 * transitions from 0->1 for a specified mechanism.
199 */
200static void
201system_override_begin(uint64_t flags)
202{
203 lck_mtx_assert(lck: &sys_override_lock, LCK_MTX_ASSERT_OWNED);
204
205 if (flags & SYS_OVERRIDE_IO_THROTTLE) {
206 if (io_throttle_assert_cnt == 0) {
207 system_override_callouts(SYS_OVERRIDE_IO_THROTTLE, true);
208 }
209 io_throttle_assert_cnt++;
210 }
211
212 if (flags & SYS_OVERRIDE_CPU_THROTTLE) {
213 if (cpu_throttle_assert_cnt == 0) {
214 system_override_callouts(SYS_OVERRIDE_CPU_THROTTLE, true);
215 }
216 cpu_throttle_assert_cnt++;
217 }
218
219 if (flags & SYS_OVERRIDE_FAST_JETSAM) {
220 if (fast_jetsam_assert_cnt == 0) {
221 system_override_callouts(SYS_OVERRIDE_FAST_JETSAM, true);
222 }
223 fast_jetsam_assert_cnt++;
224 }
225}
226
227/*
228 * system_override_end(uint64_t flags)
229 *
230 * Routine to end a system override if the assertion count
231 * transitions from 1->0 for a specified mechanism.
232 */
233static void
234system_override_end(uint64_t flags)
235{
236 lck_mtx_assert(lck: &sys_override_lock, LCK_MTX_ASSERT_OWNED);
237
238 if (flags & SYS_OVERRIDE_IO_THROTTLE) {
239 assert(io_throttle_assert_cnt > 0);
240 io_throttle_assert_cnt--;
241 if (io_throttle_assert_cnt == 0) {
242 system_override_callouts(SYS_OVERRIDE_IO_THROTTLE, false);
243 }
244 }
245
246 if (flags & SYS_OVERRIDE_CPU_THROTTLE) {
247 assert(cpu_throttle_assert_cnt > 0);
248 cpu_throttle_assert_cnt--;
249 if (cpu_throttle_assert_cnt == 0) {
250 system_override_callouts(SYS_OVERRIDE_CPU_THROTTLE, false);
251 }
252 }
253
254 if (flags & SYS_OVERRIDE_FAST_JETSAM) {
255 assert(fast_jetsam_assert_cnt > 0);
256 fast_jetsam_assert_cnt--;
257 if (fast_jetsam_assert_cnt == 0) {
258 system_override_callouts(SYS_OVERRIDE_FAST_JETSAM, false);
259 }
260 }
261}
262
263/*
264 * system_override_abort(uint64_t flags)
265 *
266 * Routine to abort a system override (if one was active)
267 * irrespective of the assertion counts and number of blocked
268 * requestors.
269 */
270static void
271system_override_abort(uint64_t flags)
272{
273 lck_mtx_assert(lck: &sys_override_lock, LCK_MTX_ASSERT_OWNED);
274
275 if ((flags & SYS_OVERRIDE_IO_THROTTLE) && (io_throttle_assert_cnt > 0)) {
276 system_override_callouts(SYS_OVERRIDE_IO_THROTTLE, false);
277 }
278
279 if ((flags & SYS_OVERRIDE_CPU_THROTTLE) && (cpu_throttle_assert_cnt > 0)) {
280 system_override_callouts(SYS_OVERRIDE_CPU_THROTTLE, false);
281 }
282
283 if ((flags & SYS_OVERRIDE_FAST_JETSAM) && (fast_jetsam_assert_cnt > 0)) {
284 system_override_callouts(SYS_OVERRIDE_FAST_JETSAM, false);
285 }
286}
287
288static __attribute__((noinline)) int
289PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout)
290{
291 struct timespec ts;
292 ts.tv_sec = timeout / NSEC_PER_SEC;
293 ts.tv_nsec = timeout - ((long)ts.tv_sec * NSEC_PER_SEC);
294 int error = msleep(chan: (caddr_t)&sys_override_wait, mtx: &sys_override_lock, PRIBIO | PCATCH, wmesg: "system_override", ts: &ts);
295 /* msleep returns EWOULDBLOCK if timeout expires, treat that as success */
296 return (error == EWOULDBLOCK) ? 0 : error;
297}
298