1/*
2 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30#include <kern/sched_prim.h>
31#include <kern/kalloc.h>
32#include <kern/assert.h>
33#include <kern/debug.h>
34#include <kern/locks.h>
35#include <kern/task.h>
36#include <kern/thread.h>
37#include <kern/host.h>
38#include <kern/policy_internal.h>
39#include <kern/thread_call.h>
40#include <kern/thread_group.h>
41
42#include <libkern/libkern.h>
43#include <mach/coalition.h>
44#include <mach/mach_time.h>
45#include <mach/task.h>
46#include <mach/host_priv.h>
47#include <mach/mach_host.h>
48#include <os/log.h>
49#include <pexpert/pexpert.h>
50#include <sys/coalition.h>
51#include <sys/kern_event.h>
52#include <sys/kdebug.h>
53#include <sys/kdebug_kernel.h>
54#include <sys/proc.h>
55#include <sys/proc_info.h>
56#include <sys/reason.h>
57#include <sys/signal.h>
58#include <sys/signalvar.h>
59#include <sys/sysctl.h>
60#include <sys/sysproto.h>
61#include <sys/wait.h>
62#include <sys/tree.h>
63#include <sys/priv.h>
64#include <vm/vm_pageout.h>
65#include <vm/vm_protos.h>
66#include <mach/machine/sdt.h>
67#include <libkern/coreanalytics/coreanalytics.h>
68#include <libkern/section_keywords.h>
69#include <stdatomic.h>
70
71#include <IOKit/IOBSD.h>
72
73#if CONFIG_FREEZE
74#include <vm/vm_map.h>
75#endif /* CONFIG_FREEZE */
76
77#include <kern/kern_memorystatus_internal.h>
78#include <sys/kern_memorystatus.h>
79#include <sys/kern_memorystatus_freeze.h>
80#include <sys/kern_memorystatus_notify.h>
81
82#if CONFIG_JETSAM
83
84extern unsigned int memorystatus_available_pages;
85extern unsigned int memorystatus_available_pages_pressure;
86extern unsigned int memorystatus_available_pages_critical;
87extern unsigned int memorystatus_available_pages_critical_base;
88extern unsigned int memorystatus_available_pages_critical_idle_offset;
89
90#else /* CONFIG_JETSAM */
91
92extern uint64_t memorystatus_available_pages;
93extern uint64_t memorystatus_available_pages_pressure;
94extern uint64_t memorystatus_available_pages_critical;
95
96#endif /* CONFIG_JETSAM */
97
98unsigned int memorystatus_frozen_count = 0;
99unsigned int memorystatus_frozen_count_webcontent = 0;
100unsigned int memorystatus_frozen_count_xpc_service = 0;
101unsigned int memorystatus_suspended_count = 0;
102
103#if CONFIG_FREEZE
104
105static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
106static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
107
108/* Thresholds */
109unsigned int memorystatus_freeze_threshold = 0;
110unsigned int memorystatus_freeze_pages_min = 0;
111unsigned int memorystatus_freeze_pages_max = 0;
112unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
113unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
114uint64_t memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
115uint64_t memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
116boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
117unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
118
119unsigned int memorystatus_max_frozen_demotions_daily = 0;
120unsigned int memorystatus_thaw_count_demotion_threshold = 0;
121unsigned int memorystatus_min_thaw_refreeze_threshold;
122
123#if XNU_TARGET_OS_WATCH
124#define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT true
125#else
126#define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT false
127#endif
128boolean_t memorystatus_freeze_dynamic_thread_delay_enabled = FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT;
129SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_dynamic_thread_delay_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_dynamic_thread_delay_enabled, 0, "");
130
131#define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST 1
132#define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW 30
133#define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST
134unsigned int memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT;
135
136#if (XNU_TARGET_OS_IOS && || XNU_TARGET_OS_WATCH
137#define FREEZE_ENABLED_DEFAULT true
138#else
139#define FREEZE_ENABLED_DEFAULT false
140#endif
141TUNABLE_WRITEABLE(bool, memorystatus_freeze_enabled, "freeze_enabled", FREEZE_ENABLED_DEFAULT);
142
143int memorystatus_freeze_wakeup = 0;
144int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
145
146#define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
147
148#ifdef XNU_KERNEL_PRIVATE
149
150unsigned int memorystatus_frozen_processes_max = 0;
151unsigned int memorystatus_frozen_shared_mb = 0;
152unsigned int memorystatus_frozen_shared_mb_max = 0;
153unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
154#if XNU_TARGET_OS_WATCH
155unsigned int memorystatus_freeze_private_shared_pages_ratio = 1; /* Ratio of private:shared pages for a process to be freezer-eligible. */
156#else
157unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
158#endif
159unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
160uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
161unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
162
163struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
164
165#endif /* XNU_KERNEL_PRIVATE */
166
167static inline boolean_t memorystatus_can_freeze_processes(void);
168static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
169static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
170static uint32_t memorystatus_freeze_calculate_new_budget(
171 unsigned int time_since_last_interval_expired_sec,
172 unsigned int burst_multiple,
173 unsigned int interval_duration_min,
174 uint32_t rollover);
175static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
176
177static void memorystatus_set_freeze_is_enabled(bool enabled);
178static void memorystatus_disable_freeze(void);
179static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
180
181/* Stats */
182static uint64_t memorystatus_freeze_pageouts = 0;
183
184/* Throttling */
185#define DEGRADED_WINDOW_MINS (30)
186#define NORMAL_WINDOW_MINS (24 * 60)
187
188/* Protected by the freezer_mutex */
189static throttle_interval_t throttle_intervals[] = {
190 { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
191 { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
192};
193throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
194throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
195uint32_t memorystatus_freeze_current_interval = 0;
196static thread_call_t freeze_interval_reset_thread_call;
197static uint32_t memorystatus_freeze_calculate_new_budget(
198 unsigned int time_since_last_interval_expired_sec,
199 unsigned int burst_multiple,
200 unsigned int interval_duration_min,
201 uint32_t rollover);
202
203struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
204struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
205/*
206 * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
207 * in order (as opposed to using the older LRU approach).
208 */
209#if XNU_TARGET_OS_WATCH
210#define FREEZER_USE_ORDERED_LIST_DEFAULT 1
211#else
212#define FREEZER_USE_ORDERED_LIST_DEFAULT 0
213#endif
214int memorystatus_freezer_use_ordered_list = FREEZER_USE_ORDERED_LIST_DEFAULT;
215EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
216/*
217 * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
218 */
219int memorystatus_freezer_use_demotion_list = 0;
220EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
221
222extern uint64_t vm_swap_get_free_space(void);
223extern boolean_t vm_swap_max_budget(uint64_t *);
224
225static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
226static void memorystatus_demote_frozen_processes(bool urgent_mode);
227
228static void memorystatus_freeze_handle_error(proc_t p, const freezer_error_code_t freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
229static void memorystatus_freeze_out_of_slots(void);
230uint64_t memorystatus_freezer_thread_next_run_ts = 0;
231
232/* Sysctls needed for aggd stats */
233
234SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
235SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
236SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
237SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
238SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
239SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
240SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
241
242/*
243 * Force a new interval with the given budget (no rollover).
244 */
245static void
246memorystatus_freeze_force_new_interval(uint64_t new_budget)
247{
248 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
249 mach_timespec_t now_ts;
250 clock_sec_t sec;
251 clock_nsec_t nsec;
252
253 clock_get_system_nanotime(&sec, &nsec);
254 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
255 now_ts.tv_nsec = nsec;
256 memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
257 /* Don't carry over any excess pageouts since we're forcing a new budget */
258 normal_throttle_window->pageouts = 0;
259 memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
260}
261#if DEVELOPMENT || DEBUG
262static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
263{
264 #pragma unused(arg1, arg2, oidp)
265 int error, changed;
266 uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
267
268 lck_mtx_lock(&freezer_mutex);
269
270 error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
271 if (changed) {
272 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
273 lck_mtx_unlock(&freezer_mutex);
274 return ENOTSUP;
275 }
276 memorystatus_freeze_force_new_interval(new_budget);
277 }
278
279 lck_mtx_unlock(&freezer_mutex);
280 return error;
281}
282
283SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
284#else /* DEVELOPMENT || DEBUG */
285SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
286#endif /* DEVELOPMENT || DEBUG */
287SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
288SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
289SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
290SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
291SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
292SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
293SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
294SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
295SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
296SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
297SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
298SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
299SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
300SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
301SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
302SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
303SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
304
305static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
306
307/*
308 * Calculates the hit rate for the freezer.
309 * The hit rate is defined as the percentage of procs that are currently in the
310 * freezer which we have thawed.
311 * A low hit rate means we're freezing bad candidates since they're not re-used.
312 */
313static int
314calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
315{
316 int thaw_percentage = 100;
317
318 if (frozen_count > 0) {
319 if (thaw_count > frozen_count) {
320 /*
321 * Both counts are using relaxed atomics & could be out of sync
322 * causing us to see thaw_percentage > 100.
323 */
324 thaw_percentage = 100;
325 } else {
326 thaw_percentage = (int)(100 * thaw_count / frozen_count);
327 }
328 }
329 return thaw_percentage;
330}
331
332static int
333get_thaw_percentage()
334{
335 uint64_t processes_frozen, processes_thawed;
336 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
337 processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
338 return calculate_thaw_percentage(processes_frozen, processes_thawed);
339}
340
341static int
342sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
343{
344#pragma unused(arg1, arg2)
345 int thaw_percentage = get_thaw_percentage();
346 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
347}
348SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
349
350static int
351get_thaw_percentage_fg()
352{
353 uint64_t processes_frozen, processes_thawed_fg;
354 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
355 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
356 return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
357}
358
359static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
360{
361#pragma unused(arg1, arg2)
362 int thaw_percentage = get_thaw_percentage_fg();
363 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
364}
365SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
366
367static int
368get_thaw_percentage_webcontent()
369{
370 uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
371 processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
372 processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
373 return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
374}
375
376static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
377{
378#pragma unused(arg1, arg2)
379 int thaw_percentage = get_thaw_percentage_webcontent();
380 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
381}
382SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
383
384
385static int
386get_thaw_percentage_bg()
387{
388 uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
389 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
390 processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
391 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
392 return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
393}
394
395static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
396{
397#pragma unused(arg1, arg2)
398 int thaw_percentage = get_thaw_percentage_bg();
399 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
400}
401SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
402
403static int
404get_thaw_percentage_fg_non_xpc_service()
405{
406 uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
407 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
408 processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
409 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
410 processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
411 /*
412 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
413 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
414 * Clamp just in case.
415 */
416 processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
417 processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
418 return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
419}
420
421static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
422{
423#pragma unused(arg1, arg2)
424 int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
425 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
426}
427
428SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
429
430#define FREEZER_ERROR_STRING_LENGTH 128
431
432EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
433EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
434EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
435EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_BACKGROUND, JETSAM_PRIORITY_FOREGROUND, "");
436EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
437EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
438EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_FOREGROUND, "");
439static int
440sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
441{
442#pragma unused(arg1, arg2, oidp, req)
443 int error = 0, changed = 0;
444 uint64_t val = memorystatus_freeze_budget_multiplier;
445 unsigned int new_budget;
446 clock_sec_t sec;
447 clock_nsec_t nsec;
448 mach_timespec_t now_ts;
449
450 error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
451 if (error) {
452 return error;
453 }
454 if (changed) {
455 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
456 return ENOTSUP;
457 }
458#if !(DEVELOPMENT || DEBUG)
459 if (val > 100) {
460 /* Can not increase budget on release. */
461 return EINVAL;
462 }
463#endif
464 lck_mtx_lock(&freezer_mutex);
465
466 memorystatus_freeze_budget_multiplier = val;
467 /* Start a new throttle interval with this budget multiplier */
468 new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
469 clock_get_system_nanotime(&sec, &nsec);
470 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
471 now_ts.tv_nsec = nsec;
472 memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
473 memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
474
475 lck_mtx_unlock(&freezer_mutex);
476 }
477 return 0;
478}
479EXPERIMENT_FACTOR_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
480/*
481 * max. # of frozen process demotions we will allow in our daily cycle.
482 */
483EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
484
485/*
486 * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
487 */
488EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
489
490/*
491 * min # of global thaws needed for us to consider refreezing these processes.
492 */
493EXPERIMENT_FACTOR_UINT(_kern, memorystatus_min_thaw_refreeze_threshold, &memorystatus_min_thaw_refreeze_threshold, 0, UINT32_MAX, "");
494
495#if DEVELOPMENT || DEBUG
496
497SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
498SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
499SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
500SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
501
502/*
503 * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
504 * "0" means no limit.
505 * Default is 10% of system-wide task limit.
506 */
507
508SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
509SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
510
511SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
512
513boolean_t memorystatus_freeze_throttle_enabled = TRUE;
514SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
515
516/*
517 * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
518 * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
519 */
520boolean_t memorystatus_freeze_to_memory = FALSE;
521SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
522
523#define VM_PAGES_FOR_ALL_PROCS (2)
524
525/*
526 * Manual trigger of freeze and thaw for dev / debug kernels only.
527 */
528static int
529sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
530{
531#pragma unused(arg1, arg2)
532 int error, pid = 0;
533 proc_t p;
534 freezer_error_code_t freezer_error_code = 0;
535 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
536 int ntasks = 0;
537 coalition_t coal = COALITION_NULL;
538
539 error = sysctl_handle_int(oidp, &pid, 0, req);
540 if (error || !req->newptr) {
541 return error;
542 }
543
544 if (pid == VM_PAGES_FOR_ALL_PROCS) {
545 vm_pageout_anonymous_pages();
546
547 return 0;
548 }
549
550 lck_mtx_lock(&freezer_mutex);
551 if (memorystatus_freeze_enabled == false) {
552 lck_mtx_unlock(&freezer_mutex);
553 memorystatus_log("sysctl_freeze: Freeze is DISABLED\n");
554 return ENOTSUP;
555 }
556
557again:
558 p = proc_find(pid);
559 if (p != NULL) {
560 memorystatus_freezer_stats.mfs_process_considered_count++;
561 uint32_t purgeable, wired, clean, dirty, shared;
562 uint32_t max_pages = 0, state = 0;
563
564 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
565 /*
566 * Freezer backed by the compressor and swap file(s)
567 * will hold compressed data.
568 *
569 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
570 * being swapped out to disk. Note that this disables freezer swap support globally,
571 * not just for the process being frozen.
572 *
573 *
574 * We don't care about the global freezer budget or the process's (min/max) budget here.
575 * The freeze sysctl is meant to force-freeze a process.
576 *
577 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
578 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
579 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
580 */
581 max_pages = memorystatus_freeze_pages_max;
582 } else {
583 /*
584 * We only have the compressor without any swap.
585 */
586 max_pages = UINT32_MAX - 1;
587 }
588
589 proc_list_lock();
590 state = p->p_memstat_state;
591 proc_list_unlock();
592
593 /*
594 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
595 * We simply ensure that jetsam is not already working on the process and that the process has not
596 * explicitly disabled freezing.
597 */
598 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
599 memorystatus_log_error("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
600 (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
601 (state & P_MEMSTAT_LOCKED) ? " locked" : "",
602 (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
603
604 proc_rele(p);
605 lck_mtx_unlock(&freezer_mutex);
606 return EPERM;
607 }
608
609 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, pid, max_pages);
610 error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
611 if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
612 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
613 }
614 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
615
616 if (error) {
617 memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
618 if (error == KERN_NO_SPACE) {
619 /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
620 error = ENOSPC;
621 } else {
622 error = EIO;
623 }
624 } else {
625 proc_list_lock();
626 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
627 p->p_memstat_state |= P_MEMSTAT_FROZEN;
628 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
629 memorystatus_frozen_count++;
630 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
631 if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
632 memorystatus_frozen_count_webcontent++;
633 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
634 }
635 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
636 memorystatus_freeze_out_of_slots();
637 }
638 } else {
639 // This was a re-freeze
640 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
641 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
642 memorystatus_freezer_stats.mfs_refreeze_count++;
643 }
644 }
645 p->p_memstat_frozen_count++;
646
647 if (coal != NULL) {
648 /* We just froze an xpc service. Mark it as such for telemetry */
649 p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
650 memorystatus_frozen_count_xpc_service++;
651 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
652 }
653
654
655 proc_list_unlock();
656
657 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
658 /*
659 * We elevate only if we are going to swap out the data.
660 */
661 error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
662 memorystatus_freeze_jetsam_band, TRUE);
663
664 if (error) {
665 memorystatus_log_error("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
666 }
667 }
668 }
669
670 if ((error == 0) && (coal == NULL)) {
671 /*
672 * We froze a process and so we check to see if it was
673 * a coalition leader and if it has XPC services that
674 * might need freezing.
675 * Only one leader can be frozen at a time and so we shouldn't
676 * enter this block more than once per call. Hence the
677 * check that 'coal' has to be NULL. We should make this an
678 * assert() or panic() once we have a much more concrete way
679 * to detect an app vs a daemon.
680 */
681
682 task_t curr_task = NULL;
683
684 curr_task = proc_task(p);
685 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
686 if (coalition_is_leader(curr_task, coal)) {
687 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
688 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
689
690 if (ntasks > MAX_XPC_SERVICE_PIDS) {
691 ntasks = MAX_XPC_SERVICE_PIDS;
692 }
693 }
694 }
695
696 proc_rele(p);
697
698 while (ntasks) {
699 pid = pid_list[--ntasks];
700 goto again;
701 }
702
703 lck_mtx_unlock(&freezer_mutex);
704 return error;
705 } else {
706 memorystatus_log_error("sysctl_freeze: Invalid process\n");
707 }
708
709
710 lck_mtx_unlock(&freezer_mutex);
711 return EINVAL;
712}
713
714SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
715 0, 0, &sysctl_memorystatus_freeze, "I", "");
716
717/*
718 * Manual trigger of agressive frozen demotion for dev / debug kernels only.
719 */
720static int
721sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
722{
723#pragma unused(arg1, arg2)
724 int error, val;
725 /*
726 * Only demote on write to prevent demoting during `sysctl -a`.
727 * The actual value written doesn't matter.
728 */
729 error = sysctl_handle_int(oidp, &val, 0, req);
730 if (error || !req->newptr) {
731 return error;
732 }
733 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
734 return ENOTSUP;
735 }
736 lck_mtx_lock(&freezer_mutex);
737 memorystatus_demote_frozen_processes(false);
738 lck_mtx_unlock(&freezer_mutex);
739 return 0;
740}
741
742SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
743
744static int
745sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
746{
747#pragma unused(arg1, arg2)
748
749 int error, pid = 0;
750 proc_t p;
751
752 if (memorystatus_freeze_enabled == false) {
753 return ENOTSUP;
754 }
755
756 error = sysctl_handle_int(oidp, &pid, 0, req);
757 if (error || !req->newptr) {
758 return error;
759 }
760
761 if (pid == VM_PAGES_FOR_ALL_PROCS) {
762 do_fastwake_warmup_all();
763 return 0;
764 } else {
765 p = proc_find(pid);
766 if (p != NULL) {
767 error = task_thaw(proc_task(p));
768
769 if (error) {
770 error = EIO;
771 } else {
772 /*
773 * task_thaw() succeeded.
774 *
775 * We increment memorystatus_frozen_count on the sysctl freeze path.
776 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
777 * when this process exits.
778 *
779 * proc_list_lock();
780 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
781 * proc_list_unlock();
782 */
783 }
784 proc_rele(p);
785 return error;
786 }
787 }
788
789 return EINVAL;
790}
791
792SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
793 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
794
795
796typedef struct _global_freezable_status {
797 boolean_t freeze_pages_threshold_crossed;
798 boolean_t freeze_eligible_procs_available;
799 boolean_t freeze_scheduled_in_future;
800}global_freezable_status_t;
801
802typedef struct _proc_freezable_status {
803 boolean_t freeze_has_memstat_state;
804 boolean_t freeze_has_pages_min;
805 int freeze_has_probability;
806 int freeze_leader_eligible;
807 boolean_t freeze_attempted;
808 uint32_t p_memstat_state;
809 uint32_t p_pages;
810 int p_freeze_error_code;
811 int p_pid;
812 int p_leader_pid;
813 char p_name[MAXCOMLEN + 1];
814}proc_freezable_status_t;
815
816#define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
817
818/*
819 * For coalition based freezing evaluations, we proceed as follows:
820 * - detect that the process is a coalition member and a XPC service
821 * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
822 * - continue its freezability evaluation assuming its leader will be freezable too
823 *
824 * Once we are done evaluating all processes, we do a quick run thru all
825 * processes and for a coalition member XPC service we look up the 'freezable'
826 * status of its leader and iff:
827 * - the xpc service is freezable i.e. its individual freeze evaluation worked
828 * - and, its leader is also marked freezable
829 * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
830 */
831
832#define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1)
833#define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1)
834#define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2)
835
836static int
837memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
838{
839 uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
840 global_freezable_status_t *list_head;
841 proc_freezable_status_t *list_entry, *list_entry_start;
842 size_t list_size = 0, entry_count = 0;
843 proc_t p, leader_proc;
844 memstat_bucket_t *bucket;
845 uint32_t state = 0, pages = 0;
846 boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
847 int error = 0, probability_of_use = 0;
848 pid_t leader_pid = 0;
849 struct memorystatus_freeze_list_iterator iterator;
850
851 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
852 return ENOTSUP;
853 }
854
855 bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
856
857 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
858
859 if (buffer_size < list_size) {
860 return EINVAL;
861 }
862
863 list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
864 if (list_head == NULL) {
865 return ENOMEM;
866 }
867
868 list_size = sizeof(global_freezable_status_t);
869
870 lck_mtx_lock(&freezer_mutex);
871 proc_list_lock();
872
873 uint64_t curr_time = mach_absolute_time();
874
875 list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
876 if (memorystatus_freezer_use_ordered_list) {
877 list_head->freeze_eligible_procs_available = memorystatus_frozen_count < memorystatus_global_freeze_list.mfcl_length;
878 } else {
879 list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
880 }
881 list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
882
883 list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
884 list_entry = list_entry_start;
885
886 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
887
888 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
889
890 if (memorystatus_freezer_use_ordered_list) {
891 while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
892 p = memorystatus_freezer_candidate_list_get_proc(
893 &memorystatus_global_freeze_list,
894 (iterator.global_freeze_list_index)++,
895 NULL);
896 if (p != PROC_NULL) {
897 break;
898 }
899 }
900 } else {
901 p = memorystatus_get_first_proc_locked(&band, FALSE);
902 }
903
904 proc_count++;
905
906 while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
907 (p) &&
908 (list_size < buffer_size)) {
909 if (isSysProc(p)) {
910 /*
911 * Daemon:- We will consider freezing it iff:
912 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
913 * - its role in the coalition is XPC service.
914 *
915 * We skip memory size requirements in this case.
916 */
917
918 coalition_t coal = COALITION_NULL;
919 task_t leader_task = NULL, curr_task = NULL;
920 int task_role_in_coalition = 0;
921
922 curr_task = proc_task(p);
923 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
924
925 if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
926 /*
927 * By default, XPC services without an app
928 * will be the leader of their own single-member
929 * coalition.
930 */
931 goto skip_ineligible_xpc;
932 }
933
934 leader_task = coalition_get_leader(coal);
935 if (leader_task == TASK_NULL) {
936 /*
937 * This jetsam coalition is currently leader-less.
938 * This could happen if the app died, but XPC services
939 * have not yet exited.
940 */
941 goto skip_ineligible_xpc;
942 }
943
944 leader_proc = (proc_t)get_bsdtask_info(leader_task);
945 task_deallocate(leader_task);
946
947 if (leader_proc == PROC_NULL) {
948 /* leader task is exiting */
949 goto skip_ineligible_xpc;
950 }
951
952 task_role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
953
954 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
955 xpc_skip_size_probability_check = TRUE;
956 leader_pid = proc_getpid(leader_proc);
957 goto continue_eval;
958 }
959
960skip_ineligible_xpc:
961 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
962 proc_count++;
963 continue;
964 }
965
966continue_eval:
967 strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
968
969 list_entry->p_pid = proc_getpid(p);
970
971 state = p->p_memstat_state;
972
973 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
974 !(state & P_MEMSTAT_SUSPENDED)) {
975 try_freeze = list_entry->freeze_has_memstat_state = FALSE;
976 } else {
977 try_freeze = list_entry->freeze_has_memstat_state = TRUE;
978 }
979
980 list_entry->p_memstat_state = state;
981
982 if (xpc_skip_size_probability_check == TRUE) {
983 /*
984 * Assuming the coalition leader is freezable
985 * we don't care re. minimum pages and probability
986 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
987 * XPC services have to be explicity opted-out of the disabled
988 * state. And we checked that state above.
989 */
990 list_entry->freeze_has_pages_min = TRUE;
991 list_entry->p_pages = -1;
992 list_entry->freeze_has_probability = -1;
993
994 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
995 list_entry->p_leader_pid = leader_pid;
996
997 xpc_skip_size_probability_check = FALSE;
998 } else {
999 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
1000 list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
1001
1002 memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
1003 if (pages < memorystatus_freeze_pages_min) {
1004 try_freeze = list_entry->freeze_has_pages_min = FALSE;
1005 } else {
1006 list_entry->freeze_has_pages_min = TRUE;
1007 }
1008
1009 list_entry->p_pages = pages;
1010
1011 if (entry_count) {
1012 uint32_t j = 0;
1013 for (j = 0; j < entry_count; j++) {
1014 if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
1015 p->p_name,
1016 MAXCOMLEN) == 0) {
1017 probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
1018 break;
1019 }
1020 }
1021
1022 list_entry->freeze_has_probability = probability_of_use;
1023
1024 try_freeze = ((probability_of_use > 0) && try_freeze);
1025 } else {
1026 list_entry->freeze_has_probability = -1;
1027 }
1028 }
1029
1030 if (try_freeze) {
1031 uint32_t purgeable, wired, clean, dirty, shared;
1032 uint32_t max_pages = 0;
1033 int freezer_error_code = 0;
1034
1035 error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
1036
1037 if (error) {
1038 list_entry->p_freeze_error_code = freezer_error_code;
1039 }
1040
1041 list_entry->freeze_attempted = TRUE;
1042 }
1043
1044 list_entry++;
1045 freeze_eligible_proc_considered++;
1046
1047 list_size += sizeof(proc_freezable_status_t);
1048
1049 if (memorystatus_freezer_use_ordered_list) {
1050 p = PROC_NULL;
1051 while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
1052 p = memorystatus_freezer_candidate_list_get_proc(
1053 &memorystatus_global_freeze_list,
1054 (iterator.global_freeze_list_index)++,
1055 NULL);
1056 if (p != PROC_NULL) {
1057 break;
1058 }
1059 }
1060 } else {
1061 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1062 }
1063
1064 proc_count++;
1065 }
1066
1067 proc_list_unlock();
1068 lck_mtx_unlock(&freezer_mutex);
1069
1070 list_entry = list_entry_start;
1071
1072 for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1073 if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1074 leader_pid = list_entry[xpc_index].p_leader_pid;
1075
1076 leader_proc = proc_find(leader_pid);
1077
1078 if (leader_proc) {
1079 if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
1080 /*
1081 * Leader has already been frozen.
1082 */
1083 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1084 proc_rele(leader_proc);
1085 continue;
1086 }
1087 proc_rele(leader_proc);
1088 }
1089
1090 for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1091 if (list_entry[leader_index].p_pid == leader_pid) {
1092 if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1093 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1094 } else {
1095 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1096 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1097 }
1098 break;
1099 }
1100 }
1101
1102 /*
1103 * Didn't find the leader entry. This might be likely because
1104 * the leader never made it down to band 0.
1105 */
1106 if (leader_index == freeze_eligible_proc_considered) {
1107 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1108 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1109 }
1110 }
1111 }
1112
1113 buffer_size = MIN(list_size, INT32_MAX);
1114
1115 error = copyout(list_head, buffer, buffer_size);
1116 if (error == 0) {
1117 *retval = (int32_t) buffer_size;
1118 } else {
1119 *retval = 0;
1120 }
1121
1122 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1123 kfree_data(list_head, list_size);
1124
1125 memorystatus_log_debug("memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)list_size);
1126
1127 return error;
1128}
1129
1130#endif /* DEVELOPMENT || DEBUG */
1131
1132/*
1133 * Get a list of all processes in the freezer band which are currently frozen.
1134 * Used by powerlog to collect analytics on frozen process.
1135 */
1136static int
1137memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1138{
1139 global_frozen_procs_t *frozen_procs = NULL;
1140 uint32_t band = memorystatus_freeze_jetsam_band;
1141 proc_t p;
1142 uint32_t state;
1143 int error;
1144 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1145 return ENOTSUP;
1146 }
1147 if (buffer_size < sizeof(global_frozen_procs_t)) {
1148 return EINVAL;
1149 }
1150 frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1151 if (frozen_procs == NULL) {
1152 return ENOMEM;
1153 }
1154
1155 proc_list_lock();
1156 p = memorystatus_get_first_proc_locked(&band, FALSE);
1157 while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1158 state = p->p_memstat_state;
1159 if (state & P_MEMSTAT_FROZEN) {
1160 frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1161 strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1162 p->p_name, sizeof(proc_name_t));
1163 frozen_procs->gfp_num_frozen++;
1164 }
1165 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1166 }
1167 proc_list_unlock();
1168
1169 buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1170 error = copyout(frozen_procs, buffer, buffer_size);
1171 if (error == 0) {
1172 *retval = (int32_t) buffer_size;
1173 } else {
1174 *retval = 0;
1175 }
1176 kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1177
1178 return error;
1179}
1180
1181/*
1182 * If dasd is running an experiment that impacts their freezer candidate selection,
1183 * we record that in our telemetry.
1184 */
1185static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1186
1187static int
1188memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1189{
1190 memorystatus_freezer_trial_identifiers_v1 identifiers;
1191 int error = 0;
1192
1193 if (buffer_size != sizeof(identifiers)) {
1194 return EINVAL;
1195 }
1196 error = copyin(buffer, &identifiers, sizeof(identifiers));
1197 if (error != 0) {
1198 return error;
1199 }
1200 if (identifiers.version != 1) {
1201 return EINVAL;
1202 }
1203 dasd_trial_identifiers = identifiers;
1204 *retval = 0;
1205 return error;
1206}
1207
1208/*
1209 * Reset the freezer state by wiping out all suspended frozen apps, clearing
1210 * per-process freezer state, and starting a fresh interval.
1211 */
1212static int
1213memorystatus_freezer_reset_state(int32_t *retval)
1214{
1215 uint32_t band = JETSAM_PRIORITY_IDLE;
1216 /* Don't kill above the frozen band */
1217 uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1218 proc_t next_p = PROC_NULL;
1219 uint64_t new_budget;
1220
1221 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1222 return ENOTSUP;
1223 }
1224
1225 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1226 if (jetsam_reason == OS_REASON_NULL) {
1227 memorystatus_log_error("memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1228 }
1229 lck_mtx_lock(&freezer_mutex);
1230 kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1231 proc_list_lock();
1232
1233 /*
1234 * Clear the considered and skip reason flags on all processes
1235 * so we're starting fresh with the new policy.
1236 */
1237 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1238 while (next_p) {
1239 proc_t p = next_p;
1240 uint32_t state = p->p_memstat_state;
1241 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1242
1243 if (p->p_memstat_effectivepriority > kMaxBand) {
1244 break;
1245 }
1246 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1247 continue;
1248 }
1249
1250 p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1251 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1252 }
1253
1254 proc_list_unlock();
1255
1256 new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1257 memorystatus_freeze_force_new_interval(new_budget);
1258
1259 lck_mtx_unlock(&freezer_mutex);
1260 *retval = 0;
1261 return 0;
1262}
1263
1264int
1265memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1266{
1267 int err = ENOTSUP;
1268
1269#if DEVELOPMENT || DEBUG
1270 if (flags == FREEZER_CONTROL_GET_STATUS) {
1271 err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1272 }
1273#endif /* DEVELOPMENT || DEBUG */
1274 if (flags == FREEZER_CONTROL_GET_PROCS) {
1275 err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1276 } else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1277 err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1278 } else if (flags == FREEZER_CONTROL_RESET_STATE) {
1279 err = memorystatus_freezer_reset_state(retval);
1280 }
1281
1282 return err;
1283}
1284
1285extern void vm_swap_consider_defragmenting(int);
1286extern void vm_page_reactivate_all_throttled(void);
1287
1288static bool
1289kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1290{
1291 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1292 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1293
1294 unsigned int band = 0;
1295 proc_t p = PROC_NULL, next_p = PROC_NULL;
1296 pid_t pid = 0;
1297 bool retval = false, killed = false;
1298 uint32_t state;
1299 uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1300 proc_list_lock();
1301
1302 band = JETSAM_PRIORITY_IDLE;
1303 p = PROC_NULL;
1304 next_p = PROC_NULL;
1305
1306 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1307 while (next_p) {
1308 p = next_p;
1309 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1310 state = p->p_memstat_state;
1311
1312 if (p->p_memstat_effectivepriority > max_band) {
1313 break;
1314 }
1315
1316 if (!(state & P_MEMSTAT_FROZEN)) {
1317 continue;
1318 }
1319
1320 if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1321 continue;
1322 }
1323
1324 if (state & P_MEMSTAT_ERROR) {
1325 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1326 }
1327
1328 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1329 memorystatus_log("memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1330 skips++;
1331 continue;
1332 }
1333
1334 footprint = get_task_phys_footprint(proc_task(p));
1335 pid = proc_getpid(p);
1336 proc_list_unlock();
1337
1338 /* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1339 os_reason_ref(jetsam_reason);
1340 retval = memorystatus_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1341 if (retval) {
1342 killed = true;
1343 memory_reclaimed += footprint;
1344 }
1345 proc_list_lock();
1346 /*
1347 * The bands might have changed when we dropped the proc list lock.
1348 * So start from the beginning.
1349 * Since we're preventing any further freezing by holding the freezer mutex,
1350 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1351 */
1352 band = 0;
1353 skips = 0;
1354 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1355 }
1356
1357 assert(skips <= memorystatus_frozen_count);
1358#if DEVELOPMENT || DEBUG
1359 if (!suspended_only && max_band >= JETSAM_PRIORITY_FOREGROUND) {
1360 /*
1361 * Check that we've killed all frozen processes.
1362 * Note that they may still be exiting (represented by skips).
1363 */
1364 if (memorystatus_frozen_count - skips > 0) {
1365 assert(memorystatus_freeze_enabled == false);
1366
1367 panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1368 memorystatus_frozen_count);
1369 }
1370 }
1371#endif /* DEVELOPMENT || DEBUG */
1372 if (memory_reclaimed_out) {
1373 *memory_reclaimed_out = memory_reclaimed;
1374 }
1375 proc_list_unlock();
1376 return killed;
1377}
1378
1379/*
1380 * Disables the freezer, jetsams all frozen processes,
1381 * and reclaims the swap space immediately.
1382 */
1383
1384void
1385memorystatus_disable_freeze(void)
1386{
1387 uint64_t memory_reclaimed = 0;
1388 bool killed = false;
1389 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1390 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1391
1392
1393 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1394 memorystatus_available_pages);
1395 memorystatus_log("memorystatus: Disabling freezer. Will kill all frozen processes\n");
1396
1397 /*
1398 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1399 * and all frozen processes will be killed
1400 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1401 * ensures that no new processes will be frozen once we release the mutex.
1402 *
1403 */
1404 memorystatus_freeze_enabled = false;
1405
1406 /*
1407 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1408 */
1409 vm_page_reactivate_all_throttled();
1410 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1411 if (jetsam_reason == OS_REASON_NULL) {
1412 memorystatus_log_error("memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1413 }
1414
1415 killed = kill_all_frozen_processes(JETSAM_PRIORITY_FOREGROUND, false, jetsam_reason, &memory_reclaimed);
1416
1417 if (killed) {
1418 memorystatus_log_info("memorystatus: Killed all frozen processes.\n");
1419 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1420
1421 proc_list_lock();
1422 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1423 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1424 uint64_t timestamp_now = mach_absolute_time();
1425 memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1426 memorystatus_jetsam_snapshot->js_gencount++;
1427 if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1428 timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1429 proc_list_unlock();
1430 int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1431 if (!ret) {
1432 proc_list_lock();
1433 memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1434 }
1435 }
1436 proc_list_unlock();
1437 } else {
1438 memorystatus_log_info("memorystatus: No frozen processes to kill.\n");
1439 }
1440
1441 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1442 memorystatus_available_pages, memory_reclaimed);
1443
1444 return;
1445}
1446
1447static void
1448memorystatus_set_freeze_is_enabled(bool enabled)
1449{
1450 lck_mtx_lock(&freezer_mutex);
1451 if (enabled != memorystatus_freeze_enabled) {
1452 if (enabled) {
1453 memorystatus_freeze_enabled = true;
1454 } else {
1455 memorystatus_disable_freeze();
1456 }
1457 }
1458 lck_mtx_unlock(&freezer_mutex);
1459}
1460
1461
1462static int
1463sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1464{
1465#pragma unused(arg1, arg2)
1466 int error, val = memorystatus_freeze_enabled ? 1 : 0;
1467
1468 error = sysctl_handle_int(oidp, &val, 0, req);
1469 if (error || !req->newptr) {
1470 return error;
1471 }
1472
1473 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1474 memorystatus_log_error("memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1475 return EINVAL;
1476 }
1477
1478 memorystatus_set_freeze_is_enabled(val);
1479
1480 return 0;
1481}
1482
1483SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1484
1485static void
1486schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1487{
1488 uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1489 uint64_t interval_expiration_absolutetime;
1490 nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1491 memorystatus_log_info("memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1492
1493 thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1494}
1495
1496extern uuid_string_t trial_treatment_id;
1497extern uuid_string_t trial_experiment_id;
1498extern int trial_deployment_id;
1499
1500CA_EVENT(freezer_interval,
1501 CA_INT, budget_remaining,
1502 CA_INT, error_below_min_pages,
1503 CA_INT, error_excess_shared_memory,
1504 CA_INT, error_low_private_shared_ratio,
1505 CA_INT, error_no_compressor_space,
1506 CA_INT, error_no_swap_space,
1507 CA_INT, error_low_probability_of_use,
1508 CA_INT, error_elevated,
1509 CA_INT, error_other,
1510 CA_INT, frozen_count,
1511 CA_INT, pageouts,
1512 CA_INT, refreeze_average,
1513 CA_INT, skipped_full,
1514 CA_INT, skipped_shared_mb_high,
1515 CA_INT, swapusage,
1516 CA_INT, thaw_count,
1517 CA_INT, thaw_percentage,
1518 CA_INT, thaws_per_gb,
1519 CA_INT, trial_deployment_id,
1520 CA_INT, dasd_trial_deployment_id,
1521 CA_INT, budget_exhaustion_duration_remaining,
1522 CA_INT, thaw_percentage_webcontent,
1523 CA_INT, thaw_percentage_fg,
1524 CA_INT, thaw_percentage_bg,
1525 CA_INT, thaw_percentage_fg_non_xpc_service,
1526 CA_INT, fg_resume_count,
1527 CA_INT, unique_freeze_count,
1528 CA_INT, unique_thaw_count,
1529 CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1530 CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1531 CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1532 CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1533
1534extern uint64_t vm_swap_get_total_space(void);
1535extern uint64_t vm_swap_get_free_space(void);
1536
1537/*
1538 * Record statistics from the expiring interval
1539 * via core analytics.
1540 */
1541static void
1542memorystatus_freeze_record_interval_analytics(void)
1543{
1544 ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1545 CA_EVENT_TYPE(freezer_interval) * e = event->data;
1546 e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1547 uint64_t process_considered_count, refrozen_count, below_threshold_count;
1548 memory_object_size_t swap_size;
1549 process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1550 if (process_considered_count != 0) {
1551 e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1552 e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1553 e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1554 e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1555 e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1556 e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1557 e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1558 e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1559 }
1560 e->frozen_count = memorystatus_frozen_count;
1561 e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1562 refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1563 if (refrozen_count != 0) {
1564 e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1565 }
1566 below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1567 if (below_threshold_count != 0) {
1568 e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1569 e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1570 }
1571 if (VM_CONFIG_SWAP_IS_PRESENT) {
1572 swap_size = vm_swap_get_total_space();
1573 if (swap_size) {
1574 e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1575 }
1576 }
1577 e->thaw_count = memorystatus_thaw_count;
1578 e->thaw_percentage = get_thaw_percentage();
1579 e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1580 e->thaw_percentage_fg = get_thaw_percentage_fg();
1581 e->thaw_percentage_bg = get_thaw_percentage_bg();
1582 e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1583
1584 if (e->pageouts / (1UL << 10) != 0) {
1585 e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1586 }
1587 e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1588 e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1589 e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1590 e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1591
1592 /*
1593 * Record any xnu or dasd experiment information
1594 */
1595 strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1596 strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1597 e->trial_deployment_id = trial_deployment_id;
1598 strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1599 strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1600 e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1601
1602 CA_EVENT_SEND(event);
1603}
1604
1605static void
1606memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1607{
1608#pragma unused(arg0, arg1)
1609 struct throttle_interval_t *interval = NULL;
1610 clock_sec_t sec;
1611 clock_nsec_t nsec;
1612 mach_timespec_t now_ts;
1613 uint32_t budget_rollover = 0;
1614
1615 clock_get_system_nanotime(&sec, &nsec);
1616 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1617 now_ts.tv_nsec = nsec;
1618 interval = normal_throttle_window;
1619
1620 /* Record analytics from the old interval before resetting. */
1621 memorystatus_freeze_record_interval_analytics();
1622
1623 lck_mtx_lock(&freezer_mutex);
1624 /* How long has it been since the previous interval expired? */
1625 mach_timespec_t expiration_period_ts = now_ts;
1626 SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1627 /* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1628 budget_rollover = interval->pageouts > interval->max_pageouts ?
1629 0 : interval->max_pageouts - interval->pageouts;
1630
1631 memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1632 expiration_period_ts.tv_sec, interval->burst_multiple,
1633 interval->mins, budget_rollover),
1634 now_ts);
1635 memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1636
1637 if (!memorystatus_freezer_use_demotion_list) {
1638 memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1639 }
1640 lck_mtx_unlock(&freezer_mutex);
1641}
1642
1643
1644proc_t
1645memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition)
1646{
1647 coalition_t coal = COALITION_NULL;
1648 task_t leader_task = NULL, curr_task = NULL;
1649 proc_t leader_proc = PROC_NULL;
1650
1651 curr_task = proc_task(p);
1652 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1653
1654 if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1655 return p;
1656 }
1657
1658 leader_task = coalition_get_leader(coal);
1659 if (leader_task == TASK_NULL) {
1660 /*
1661 * This jetsam coalition is currently leader-less.
1662 * This could happen if the app died, but XPC services
1663 * have not yet exited.
1664 */
1665 return PROC_NULL;
1666 }
1667
1668 leader_proc = (proc_t)get_bsdtask_info(leader_task);
1669 task_deallocate(leader_task);
1670
1671 if (leader_proc == PROC_NULL) {
1672 /* leader task is exiting */
1673 return PROC_NULL;
1674 }
1675
1676 *role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
1677
1678 return leader_proc;
1679}
1680
1681bool
1682memorystatus_freeze_process_is_recommended(const proc_t p)
1683{
1684 assert(!memorystatus_freezer_use_ordered_list);
1685 int probability_of_use = 0;
1686
1687 size_t entry_count = 0, i = 0;
1688 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1689 if (entry_count == 0) {
1690 /*
1691 * If dasd hasn't supplied a table yet, we default to every app being eligible
1692 * for the freezer.
1693 */
1694 return true;
1695 }
1696 for (i = 0; i < entry_count; i++) {
1697 /*
1698 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1699 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1700 * MAXCOMLEN bytes here since the name in the probabilities table could
1701 * be truncated from the proc_t's p_name.
1702 */
1703 if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1704 p->p_name,
1705 MAXCOMLEN) == 0) {
1706 probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1707 break;
1708 }
1709 }
1710 return probability_of_use > 0;
1711}
1712
1713__private_extern__ void
1714memorystatus_freeze_init(void)
1715{
1716 kern_return_t result;
1717 thread_t thread;
1718
1719 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1720 /*
1721 * This is just the default value if the underlying
1722 * storage device doesn't have any specific budget.
1723 * We check with the storage layer in memorystatus_freeze_update_throttle()
1724 * before we start our freezing the first time.
1725 */
1726 memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1727
1728 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1729 if (result == KERN_SUCCESS) {
1730 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1731 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1732 thread_set_thread_name(thread, "VM_freezer");
1733
1734 thread_deallocate(thread);
1735 } else {
1736 panic("Could not create memorystatus_freeze_thread");
1737 }
1738
1739 freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1740 /* Start a new interval */
1741
1742 lck_mtx_lock(&freezer_mutex);
1743 uint32_t budget;
1744 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1745 memorystatus_freeze_force_new_interval(budget);
1746 lck_mtx_unlock(&freezer_mutex);
1747 } else {
1748 memorystatus_freeze_budget_pages_remaining = 0;
1749 }
1750}
1751
1752void
1753memorystatus_freeze_configure_for_swap()
1754{
1755 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1756 return;
1757 }
1758
1759 assert(memorystatus_swap_all_apps);
1760
1761 /*
1762 * We expect both a larger working set and larger individual apps
1763 * in this mode, so tune up the freezer accordingly.
1764 */
1765 memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_SWAP_ENABLED;
1766 memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_SWAP_ENABLED;
1767 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_SWAP_ENABLED;
1768
1769 /*
1770 * We don't have a budget when running with full app swap.
1771 * Force a new interval. memorystatus_freeze_calculate_new_budget should give us an
1772 * unlimited budget.
1773 */
1774 lck_mtx_lock(&freezer_mutex);
1775 uint32_t budget;
1776 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1777 memorystatus_freeze_force_new_interval(budget);
1778 lck_mtx_unlock(&freezer_mutex);
1779}
1780
1781void
1782memorystatus_freeze_disable_swap()
1783{
1784 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1785 return;
1786 }
1787
1788 assert(!memorystatus_swap_all_apps);
1789
1790 memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX;
1791 memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS;
1792 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
1793
1794 /*
1795 * Calculate a new budget now that we're constrained by our daily write budget again.
1796 */
1797 lck_mtx_lock(&freezer_mutex);
1798 uint32_t budget;
1799 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1800 memorystatus_freeze_force_new_interval(budget);
1801 lck_mtx_unlock(&freezer_mutex);
1802}
1803
1804/*
1805 * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1806 */
1807static int
1808memorystatus_freeze_process(
1809 proc_t p,
1810 coalition_t *coal, /* IN / OUT */
1811 pid_t *coalition_list, /* OUT */
1812 unsigned int *coalition_list_length /* OUT */)
1813{
1814 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1815 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1816
1817 kern_return_t kr;
1818 uint32_t purgeable, wired, clean, dirty, shared;
1819 uint64_t max_pages = 0;
1820 freezer_error_code_t freezer_error_code = 0;
1821 bool is_refreeze = false;
1822 task_t curr_task = TASK_NULL;
1823
1824 pid_t aPid = proc_getpid(p);
1825
1826 is_refreeze = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
1827
1828 /* Ensure the process is eligible for (re-)freezing */
1829 if (is_refreeze && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
1830 /* Process is already frozen & hasn't been thawed. Nothing to do here. */
1831 return EINVAL;
1832 }
1833 if (is_refreeze) {
1834 /*
1835 * Not currently being looked at for something.
1836 */
1837 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1838 return EBUSY;
1839 }
1840
1841 /*
1842 * We are going to try and refreeze and so re-evaluate
1843 * the process. We don't want to double count the shared
1844 * memory. So deduct the old snapshot here.
1845 */
1846 memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1847 p->p_memstat_freeze_sharedanon_pages = 0;
1848
1849 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1850 memorystatus_refreeze_eligible_count--;
1851 } else {
1852 if (!memorystatus_is_process_eligible_for_freeze(p)) {
1853 return EINVAL;
1854 }
1855 if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1856 memorystatus_freeze_handle_error(p, FREEZER_ERROR_NO_SLOTS, is_refreeze, aPid, (coal ? *coal : NULL), "memorystatus_freeze_process");
1857 return ENOSPC;
1858 }
1859 }
1860
1861 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1862 /*
1863 * Freezer backed by the compressor and swap file(s)
1864 * will hold compressed data.
1865 */
1866
1867 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1868 } else {
1869 /*
1870 * We only have the compressor pool.
1871 */
1872 max_pages = UINT32_MAX - 1;
1873 }
1874
1875 /* Mark as locked temporarily to avoid kill */
1876 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1877
1878 p = proc_ref(p, true);
1879 if (!p) {
1880 memorystatus_freezer_stats.mfs_error_other_count++;
1881 return EBUSY;
1882 }
1883
1884 proc_list_unlock();
1885
1886 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, aPid, max_pages);
1887
1888 max_pages = MIN(max_pages, UINT32_MAX);
1889 kr = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1890 if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1891 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1892 }
1893
1894 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
1895
1896 memorystatus_log_debug("memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1897 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %llu, shared %d",
1898 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1899 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1900
1901 proc_list_lock();
1902
1903 /* Success? */
1904 if (KERN_SUCCESS == kr) {
1905 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1906
1907 p->p_memstat_freeze_sharedanon_pages += shared;
1908
1909 memorystatus_frozen_shared_mb += shared;
1910
1911 if (!is_refreeze) {
1912 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1913 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1914 memorystatus_frozen_count++;
1915 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1916 if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1917 memorystatus_frozen_count_webcontent++;
1918 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1919 }
1920 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1921 memorystatus_freeze_out_of_slots();
1922 }
1923 } else {
1924 // This was a re-freeze
1925 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1926 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1927 memorystatus_freezer_stats.mfs_refreeze_count++;
1928 }
1929 }
1930
1931 p->p_memstat_frozen_count++;
1932
1933 /*
1934 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1935 * to its higher jetsam band.
1936 */
1937 proc_list_unlock();
1938
1939 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1940
1941 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1942 int ret;
1943 unsigned int i;
1944 ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1945
1946 if (ret) {
1947 memorystatus_log_error("Elevating the frozen process failed with %d\n", ret);
1948 /* not fatal */
1949 }
1950
1951 /* Update stats */
1952 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1953 throttle_intervals[i].pageouts += dirty;
1954 }
1955 }
1956 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1957 memorystatus_log("memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1958 is_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
1959 memorystatus_freeze_budget_pages_remaining, is_refreeze ? "Re" : "", dirty);
1960
1961 proc_list_lock();
1962
1963 memorystatus_freeze_pageouts += dirty;
1964
1965 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1966 /*
1967 * Add some eviction logic here? At some point should we
1968 * jetsam a process to get back its swap space so that we
1969 * can freeze a more eligible process at this moment in time?
1970 */
1971 }
1972
1973 /* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1974 if (coal != NULL && *coal == NULL) {
1975 curr_task = proc_task(p);
1976 *coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1977 if (coalition_is_leader(curr_task, *coal)) {
1978 *coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1979 COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1980
1981 if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1982 *coalition_list_length = MAX_XPC_SERVICE_PIDS;
1983 }
1984 }
1985 } else {
1986 /* We just froze an xpc service. Mark it as such for telemetry */
1987 p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1988 memorystatus_frozen_count_xpc_service++;
1989 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1990 }
1991
1992 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1993 wakeup(&p->p_memstat_state);
1994 proc_rele(p);
1995 return 0;
1996 } else {
1997 if (is_refreeze) {
1998 if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1999 (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
2000 /*
2001 * Keeping this prior-frozen process in this high band when
2002 * we failed to re-freeze it due to bad shared memory usage
2003 * could cause excessive pressure on the lower bands.
2004 * We need to demote it for now. It'll get re-evaluated next
2005 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
2006 * bit.
2007 */
2008
2009 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2010 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2011 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
2012 }
2013 } else {
2014 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
2015 }
2016 memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, (coal != NULL) ? *coal : NULL, "memorystatus_freeze_process");
2017
2018 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
2019 wakeup(&p->p_memstat_state);
2020 proc_rele(p);
2021
2022 return EINVAL;
2023 }
2024}
2025
2026/*
2027 * Synchronously freeze the passed proc. Called with a reference to the proc held.
2028 *
2029 * Doesn't deal with:
2030 * - re-freezing because this is called on a specific process and
2031 * not by the freezer thread. If that changes, we'll have to teach it about
2032 * refreezing a frozen process.
2033 *
2034 * - grouped/coalition freezing because we are hoping to deprecate this
2035 * interface as it was used by user-space to freeze particular processes. But
2036 * we have moved away from that approach to having the kernel choose the optimal
2037 * candidates to be frozen.
2038 *
2039 * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
2040 * returns EINVAL or the value returned by task_freeze().
2041 */
2042int
2043memorystatus_freeze_process_sync(proc_t p)
2044{
2045 int ret = EINVAL;
2046 boolean_t memorystatus_freeze_swap_low = FALSE;
2047
2048 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2049 return ENOTSUP;
2050 }
2051
2052 lck_mtx_lock(&freezer_mutex);
2053
2054 if (p == NULL) {
2055 memorystatus_log_error("memorystatus_freeze_process_sync: Invalid process\n");
2056 goto exit;
2057 }
2058
2059 if (memorystatus_freeze_enabled == false) {
2060 memorystatus_log_error("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
2061 goto exit;
2062 }
2063
2064 if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2065 memorystatus_log_info("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
2066 goto exit;
2067 }
2068
2069 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2070 if (!memorystatus_freeze_budget_pages_remaining) {
2071 memorystatus_log_info("memorystatus_freeze_process_sync: exit with NO available budget\n");
2072 goto exit;
2073 }
2074
2075 proc_list_lock();
2076
2077 ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2078
2079exit:
2080 lck_mtx_unlock(&freezer_mutex);
2081
2082 return ret;
2083}
2084
2085proc_t
2086memorystatus_freezer_candidate_list_get_proc(
2087 struct memorystatus_freezer_candidate_list *list,
2088 size_t index,
2089 uint64_t *pid_mismatch_counter)
2090{
2091 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2092 if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2093 return NULL;
2094 }
2095 memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2096 if (entry->pid == NO_PID) {
2097 /* Entry has been removed. */
2098 return NULL;
2099 }
2100
2101 proc_t p = proc_find_locked(entry->pid);
2102 if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2103 /*
2104 * We grab a reference when we are about to freeze the process. So drop
2105 * the reference that proc_find_locked() grabbed for us.
2106 * We also have the proc_list_lock so this process is stable.
2107 */
2108 proc_rele(p);
2109 return p;
2110 } else {
2111 if (p) {
2112 /* pid rollover. */
2113 proc_rele(p);
2114 }
2115 /*
2116 * The proc has exited since we received this list.
2117 * It may have re-launched with a new pid, so we go looking for it.
2118 */
2119 unsigned int band = JETSAM_PRIORITY_IDLE;
2120 p = memorystatus_get_first_proc_locked(&band, TRUE);
2121 while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2122 if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2123 if (pid_mismatch_counter != NULL) {
2124 (*pid_mismatch_counter)++;
2125 }
2126 /* Stash the pid for faster lookup next time. */
2127 entry->pid = proc_getpid(p);
2128 return p;
2129 }
2130 p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2131 }
2132 /* No match. */
2133 return NULL;
2134 }
2135}
2136
2137static size_t
2138memorystatus_freeze_pid_list(pid_t *pid_list, unsigned int num_pids)
2139{
2140 int ret = 0;
2141 size_t num_frozen = 0;
2142 while (num_pids > 0 &&
2143 memorystatus_frozen_count < memorystatus_frozen_processes_max) {
2144 pid_t pid = pid_list[--num_pids];
2145 proc_t p = proc_find_locked(pid);
2146 if (p) {
2147 proc_rele(p);
2148 ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2149 if (ret != 0) {
2150 break;
2151 }
2152 num_frozen++;
2153 }
2154 }
2155 return num_frozen;
2156}
2157
2158/*
2159 * Attempt to freeze the best candidate process.
2160 * Keep trying until we freeze something or run out of candidates.
2161 * Returns the number of processes frozen (including coalition members).
2162 */
2163static size_t
2164memorystatus_freeze_top_process(void)
2165{
2166 int freeze_ret;
2167 size_t num_frozen = 0;
2168 coalition_t coal = COALITION_NULL;
2169 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2170 unsigned int ntasks = 0;
2171 struct memorystatus_freeze_list_iterator iterator;
2172 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2173
2174 bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
2175 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages);
2176
2177 proc_list_lock();
2178 while (true) {
2179 proc_t p = memorystatus_freeze_pick_process(&iterator);
2180 if (p == PROC_NULL) {
2181 /* Nothing left to freeze */
2182 break;
2183 }
2184 freeze_ret = memorystatus_freeze_process(p, &coal, pid_list, &ntasks);
2185 if (freeze_ret == 0) {
2186 num_frozen = 1;
2187 /*
2188 * We froze a process successfully.
2189 * If it's a coalition head, freeze the coalition.
2190 * Then we're done for now.
2191 */
2192 if (coal != NULL) {
2193 num_frozen += memorystatus_freeze_pid_list(pid_list, ntasks);
2194 }
2195 break;
2196 } else {
2197 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2198 break;
2199 }
2200 /*
2201 * Freeze failed but we're not out of space.
2202 * Keep trying to find a good candidate,
2203 * memorystatus_freeze_pick_process will not return this proc again until
2204 * we reset the iterator.
2205 */
2206 }
2207 }
2208 proc_list_unlock();
2209
2210 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages);
2211
2212 return num_frozen;
2213}
2214
2215#if DEVELOPMENT || DEBUG
2216/* For testing memorystatus_freeze_top_process */
2217static int
2218sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2219{
2220#pragma unused(arg1, arg2)
2221 int error, val, ret = 0;
2222 size_t num_frozen;
2223 /*
2224 * Only freeze on write to prevent freezing during `sysctl -a`.
2225 * The actual value written doesn't matter.
2226 */
2227 error = sysctl_handle_int(oidp, &val, 0, req);
2228 if (error || !req->newptr) {
2229 return error;
2230 }
2231
2232 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2233 return ENOTSUP;
2234 }
2235
2236 lck_mtx_lock(&freezer_mutex);
2237 num_frozen = memorystatus_freeze_top_process();
2238 lck_mtx_unlock(&freezer_mutex);
2239
2240 if (num_frozen == 0) {
2241 ret = ESRCH;
2242 }
2243 return ret;
2244}
2245SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2246 0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2247#endif /* DEVELOPMENT || DEBUG */
2248
2249static inline boolean_t
2250memorystatus_can_freeze_processes(void)
2251{
2252 boolean_t ret;
2253
2254 proc_list_lock();
2255
2256 if (memorystatus_suspended_count) {
2257 memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2258
2259 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2260 ret = TRUE;
2261 } else {
2262 ret = FALSE;
2263 }
2264 } else {
2265 ret = FALSE;
2266 }
2267
2268 proc_list_unlock();
2269
2270 return ret;
2271}
2272
2273static boolean_t
2274memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2275{
2276 boolean_t can_freeze = TRUE;
2277
2278 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
2279 * after boot, and is generally is a no-op once we've reached steady state. */
2280 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2281 return FALSE;
2282 }
2283
2284 /* Check minimum suspended process threshold. */
2285 if (!memorystatus_can_freeze_processes()) {
2286 return FALSE;
2287 }
2288 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2289
2290 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2291 /*
2292 * In-core compressor used for freezing WITHOUT on-disk swap support.
2293 */
2294 if (vm_compressor_low_on_space()) {
2295 if (*memorystatus_freeze_swap_low) {
2296 *memorystatus_freeze_swap_low = TRUE;
2297 }
2298
2299 can_freeze = FALSE;
2300 } else {
2301 if (*memorystatus_freeze_swap_low) {
2302 *memorystatus_freeze_swap_low = FALSE;
2303 }
2304
2305 can_freeze = TRUE;
2306 }
2307 } else {
2308 /*
2309 * Freezing WITH on-disk swap support.
2310 *
2311 * In-core compressor fronts the swap.
2312 */
2313 if (vm_swap_low_on_space()) {
2314 if (*memorystatus_freeze_swap_low) {
2315 *memorystatus_freeze_swap_low = TRUE;
2316 }
2317
2318 can_freeze = FALSE;
2319 }
2320 }
2321
2322 return can_freeze;
2323}
2324
2325/*
2326 * Demote the given frozen process.
2327 * Caller must hold the proc_list_lock & it will be held on return.
2328 */
2329static void
2330memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2331{
2332 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2333
2334 /* We demote to IDLE unless someone has asserted a higher priority on this process. */
2335 int maxpriority = JETSAM_PRIORITY_IDLE;
2336 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2337 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2338
2339 maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
2340 memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
2341#if DEVELOPMENT || DEBUG
2342 memorystatus_log("memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2343 (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2344#endif /* DEVELOPMENT || DEBUG */
2345
2346 /*
2347 * The freezer thread will consider this a normal app to be frozen
2348 * because it is in the IDLE band. So we don't need the
2349 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2350 * we'll correctly count it as eligible for re-freeze again.
2351 *
2352 * We don't drop the frozen count because this process still has
2353 * state on disk. So there's a chance it gets resumed and then it
2354 * should land in the higher jetsam band. For that it needs to
2355 * remain marked frozen.
2356 */
2357 if (memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2358 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2359 memorystatus_refreeze_eligible_count--;
2360 }
2361}
2362
2363static unsigned int
2364memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2365{
2366 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2367 unsigned int demoted_proc_count = 0;
2368 proc_t p = PROC_NULL, next_p = PROC_NULL;
2369 proc_list_lock();
2370
2371 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2372 while (next_p) {
2373 p = next_p;
2374 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2375
2376 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
2377 continue;
2378 }
2379
2380 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2381 continue;
2382 }
2383
2384 if (urgent_mode) {
2385 if (!memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2386 /*
2387 * This process hasn't been thawed recently and so most of
2388 * its state sits on NAND and so we skip it -- jetsamming it
2389 * won't help with memory pressure.
2390 */
2391 continue;
2392 }
2393 } else {
2394 if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2395 /*
2396 * This process has met / exceeded our thaw count demotion threshold
2397 * and so we let it live in the higher bands.
2398 */
2399 continue;
2400 }
2401 }
2402
2403 memorystatus_demote_frozen_process(p, urgent_mode);
2404 demoted_proc_count++;
2405 if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2406 break;
2407 }
2408 }
2409
2410 proc_list_unlock();
2411 return demoted_proc_count;
2412}
2413
2414static unsigned int
2415memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2416{
2417 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2418 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2419 assert(memorystatus_freezer_use_demotion_list);
2420 unsigned int demoted_proc_count = 0;
2421
2422 proc_list_lock();
2423 for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2424 proc_t p = memorystatus_freezer_candidate_list_get_proc(
2425 &memorystatus_global_demote_list,
2426 i,
2427 &memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2428 if (p != NULL && memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2429 memorystatus_demote_frozen_process(p, urgent_mode);
2430 /* Remove this entry now that it's been demoted. */
2431 memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2432 demoted_proc_count++;
2433 /*
2434 * We only demote one proc at a time in this mode.
2435 * This gives jetsam a chance to kill the recently demoted processes.
2436 */
2437 break;
2438 }
2439 }
2440
2441 proc_list_unlock();
2442 return demoted_proc_count;
2443}
2444
2445/*
2446 * This function evaluates if the currently frozen processes deserve
2447 * to stay in the higher jetsam band. There are 2 modes:
2448 * - 'force one == TRUE': (urgent mode)
2449 * We are out of budget and can't refreeze a process. The process's
2450 * state, if it was resumed, will stay in compressed memory. If we let it
2451 * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2452 * the lower bands. So we force-demote the least-recently-used-and-thawed
2453 * process.
2454 *
2455 * - 'force_one == FALSE': (normal mode)
2456 * If the # of thaws of a process is below our threshold, then we
2457 * will demote that process into the IDLE band.
2458 * We don't immediately kill the process here because it already has
2459 * state on disk and so it might be worth giving it another shot at
2460 * getting thawed/resumed and used.
2461 */
2462static void
2463memorystatus_demote_frozen_processes(bool urgent_mode)
2464{
2465 unsigned int demoted_proc_count = 0;
2466
2467 if (memorystatus_freeze_enabled == false) {
2468 /*
2469 * Freeze has been disabled likely to
2470 * reclaim swap space. So don't change
2471 * any state on the frozen processes.
2472 */
2473 return;
2474 }
2475
2476 /*
2477 * We have two demotion policies which can be toggled by userspace.
2478 * In non-urgent mode, the ordered list policy will
2479 * choose a demotion candidate using the list provided by dasd.
2480 * The thaw count policy will demote the oldest process that hasn't been
2481 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2482 *
2483 * If urgent_mode is set, both policies will only consider demoting
2484 * processes that are re-freeze eligible. But the ordering is different.
2485 * The ordered list policy will scan in the order given by dasd.
2486 * The thaw count policy will scan through the frozen band.
2487 */
2488 if (memorystatus_freezer_use_demotion_list) {
2489 demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2490
2491 if (demoted_proc_count == 0 && urgent_mode) {
2492 /*
2493 * We're out of budget and the demotion list doesn't contain any valid
2494 * candidates. We still need to demote something. Fall back to scanning
2495 * the frozen band.
2496 */
2497 memorystatus_demote_frozen_processes_using_thaw_count(true);
2498 }
2499 } else {
2500 demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2501 }
2502}
2503
2504/*
2505 * Calculate a new freezer budget.
2506 * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2507 * @param burst_multiple The burst_multiple for the new period
2508 * @param interval_duration_min How many minutes will the new interval be?
2509 * @param rollover The amount to rollover from the previous budget.
2510 *
2511 * @return A budget for the new interval.
2512 */
2513static uint32_t
2514memorystatus_freeze_calculate_new_budget(
2515 unsigned int time_since_last_interval_expired_sec,
2516 unsigned int burst_multiple,
2517 unsigned int interval_duration_min,
2518 uint32_t rollover)
2519{
2520 uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2521 const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2522 /* Precision factor for days_missed. 2 decimal points. */
2523 const static unsigned int kFixedPointFactor = 100;
2524 unsigned int days_missed;
2525
2526 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2527 return 0;
2528 }
2529 if (memorystatus_swap_all_apps) {
2530 /*
2531 * We effectively have an unlimited budget when app swap is enabled.
2532 */
2533 memorystatus_freeze_daily_mb_max = UINT32_MAX;
2534 return UINT32_MAX;
2535 }
2536
2537 /* Get the daily budget from the storage layer */
2538 if (vm_swap_max_budget(&freeze_daily_budget)) {
2539 freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2540 assert(freeze_daily_budget_mb <= UINT32_MAX);
2541 memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2542 memorystatus_log_info("memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2543 }
2544 /* Calculate the daily pageout budget */
2545 freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2546 /* Multiply by memorystatus_freeze_budget_multiplier */
2547 freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2548
2549 daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2550
2551 /*
2552 * Add additional budget for time since the interval expired.
2553 * For example, if the interval expired n days ago, we should get an additional n days
2554 * of budget since we didn't use any budget during those n days.
2555 */
2556 days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2557 budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2558 new_budget = rollover + daily_budget_pageouts + budget_missed;
2559 return (uint32_t) MIN(new_budget, UINT32_MAX);
2560}
2561
2562/*
2563 * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2564 * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2565 * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2566 * it gets set for new processes.
2567 * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2568 */
2569static void
2570memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2571{
2572 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2573 LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2574 unsigned int band = JETSAM_PRIORITY_IDLE;
2575 proc_t p;
2576
2577 if (!locked) {
2578 proc_list_lock();
2579 }
2580 p = memorystatus_get_first_proc_locked(&band, FALSE);
2581 while (p) {
2582 assert(p->p_memstat_effectivepriority == (int32_t) band);
2583 if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) {
2584 assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2585 p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2586 }
2587 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2588 }
2589 if (!locked) {
2590 proc_list_unlock();
2591 }
2592}
2593
2594/*
2595 * Called after we fail to freeze a process.
2596 * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2597 */
2598static void
2599memorystatus_freeze_handle_error(
2600 proc_t p,
2601 const freezer_error_code_t freezer_error_code,
2602 bool was_refreeze,
2603 pid_t pid,
2604 const coalition_t coalition,
2605 const char* log_prefix)
2606{
2607 const char *reason;
2608 memorystatus_freeze_skip_reason_t skip_reason;
2609
2610 switch (freezer_error_code) {
2611 case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2612 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2613 reason = "too much shared memory";
2614 skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2615 break;
2616 case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2617 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2618 reason = "private-shared pages ratio";
2619 skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2620 break;
2621 case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2622 memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2623 reason = "no compressor space";
2624 skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2625 break;
2626 case FREEZER_ERROR_NO_SWAP_SPACE:
2627 memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2628 reason = "no swap space";
2629 skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2630 break;
2631 case FREEZER_ERROR_NO_SLOTS:
2632 memorystatus_freezer_stats.mfs_skipped_full_count++;
2633 reason = "no slots";
2634 skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
2635 break;
2636 default:
2637 reason = "unknown error";
2638 skip_reason = kMemorystatusFreezeSkipReasonOther;
2639 }
2640
2641 p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2642
2643 memorystatus_log("%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2644 log_prefix, was_refreeze ? "re" : "",
2645 (coalition == NULL ? "general" : "coalition-driven"), pid,
2646 ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2647}
2648
2649/*
2650 * Start a new normal throttle interval with the given budget.
2651 * Caller must hold the freezer mutex
2652 */
2653static void
2654memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2655{
2656 unsigned int band;
2657 proc_t p, next_p;
2658 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2659 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2660
2661 normal_throttle_window->max_pageouts = new_budget;
2662 normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2663 normal_throttle_window->ts.tv_nsec = 0;
2664 ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2665 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2666 if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2667 normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2668 } else {
2669 normal_throttle_window->pageouts = 0;
2670 }
2671 /* Ensure the normal window is now active. */
2672 memorystatus_freeze_degradation = FALSE;
2673
2674 /*
2675 * Reset interval statistics.
2676 */
2677 memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2678 memorystatus_freezer_stats.mfs_process_considered_count = 0;
2679 memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2680 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2681 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2682 memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2683 memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2684 memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2685 memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2686 memorystatus_freezer_stats.mfs_error_other_count = 0;
2687 memorystatus_freezer_stats.mfs_refreeze_count = 0;
2688 memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2689 memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2690 memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2691 memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2692 memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2693 memorystatus_thaw_count = 0;
2694 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2695 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2696 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2697 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2698 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2699 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2700 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2701 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2702 os_atomic_inc(&memorystatus_freeze_current_interval, release);
2703
2704 /* Clear the focal thaw bit */
2705 proc_list_lock();
2706 band = JETSAM_PRIORITY_IDLE;
2707 p = PROC_NULL;
2708 next_p = PROC_NULL;
2709
2710 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2711 while (next_p) {
2712 p = next_p;
2713 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2714
2715 if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2716 break;
2717 }
2718 p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2719 }
2720 proc_list_unlock();
2721
2722 schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2723}
2724
2725#if DEVELOPMENT || DEBUG
2726
2727static int
2728sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2729{
2730#pragma unused(arg1, arg2)
2731 int error = 0;
2732 unsigned int time_since_last_interval_expired_sec = 0;
2733 unsigned int new_budget;
2734
2735 error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2736 if (error || !req->newptr) {
2737 return error;
2738 }
2739
2740 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2741 return ENOTSUP;
2742 }
2743 new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2744 return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2745}
2746
2747SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2748 0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2749
2750#endif /* DEVELOPMENT || DEBUG */
2751
2752/*
2753 * Called when we first run out of budget in an interval.
2754 * Marks idle processes as not frozen due to lack of budget.
2755 * NB: It might be worth having a CA event here.
2756 */
2757static void
2758memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2759{
2760 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2761 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2762
2763 mach_timespec_t time_left = {0, 0};
2764 mach_timespec_t now_ts;
2765 clock_sec_t sec;
2766 clock_nsec_t nsec;
2767
2768 time_left.tv_sec = interval->ts.tv_sec;
2769 time_left.tv_nsec = 0;
2770 clock_get_system_nanotime(&sec, &nsec);
2771 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2772 now_ts.tv_nsec = nsec;
2773
2774 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2775 memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2776 memorystatus_log(
2777 "memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2778 time_left.tv_sec / 60, memorystatus_frozen_count);
2779
2780 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2781}
2782
2783/*
2784 * Called when we cross over the threshold of maximum frozen processes allowed.
2785 * Marks remaining idle processes as not frozen due to lack of slots.
2786 */
2787static void
2788memorystatus_freeze_out_of_slots(void)
2789{
2790 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2791 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2792 assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2793
2794 memorystatus_log(
2795 "memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2796 memorystatus_frozen_count);
2797
2798 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2799}
2800
2801/*
2802 * This function will do 4 things:
2803 *
2804 * 1) check to see if we are currently in a degraded freezer mode, and if so:
2805 * - check to see if our window has expired and we should exit this mode, OR,
2806 * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2807 *
2808 * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2809 *
2810 * 3) check what the current normal window allows for a budget.
2811 *
2812 * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2813 * what we would normally expect, then we are running low on our daily budget and need to enter
2814 * degraded perf. mode.
2815 *
2816 * Caller must hold the freezer mutex
2817 * Caller must not hold the proc_list lock
2818 */
2819
2820static void
2821memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2822{
2823 clock_sec_t sec;
2824 clock_nsec_t nsec;
2825 mach_timespec_t now_ts;
2826 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2827 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2828
2829 unsigned int freeze_daily_pageouts_max = 0;
2830 bool started_with_budget = (*budget_pages_allowed > 0);
2831
2832#if DEVELOPMENT || DEBUG
2833 if (!memorystatus_freeze_throttle_enabled) {
2834 /*
2835 * No throttling...we can use the full budget everytime.
2836 */
2837 *budget_pages_allowed = UINT64_MAX;
2838 return;
2839 }
2840#endif
2841
2842 clock_get_system_nanotime(&sec, &nsec);
2843 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2844 now_ts.tv_nsec = nsec;
2845
2846 struct throttle_interval_t *interval = NULL;
2847
2848 if (memorystatus_freeze_degradation == TRUE) {
2849 interval = degraded_throttle_window;
2850
2851 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2852 interval->pageouts = 0;
2853 interval->max_pageouts = 0;
2854 } else {
2855 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2856 }
2857 }
2858
2859 interval = normal_throttle_window;
2860
2861 /*
2862 * Current throttle window.
2863 * Deny freezing if we have no budget left.
2864 * Try graceful degradation if we are within 25% of:
2865 * - the daily budget, and
2866 * - the current budget left is below our normal budget expectations.
2867 */
2868
2869 if (memorystatus_freeze_degradation == FALSE) {
2870 if (interval->pageouts >= interval->max_pageouts) {
2871 *budget_pages_allowed = 0;
2872 if (started_with_budget) {
2873 memorystatus_freeze_out_of_budget(interval);
2874 }
2875 } else {
2876 int budget_left = interval->max_pageouts - interval->pageouts;
2877 int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2878
2879 mach_timespec_t time_left = {0, 0};
2880
2881 time_left.tv_sec = interval->ts.tv_sec;
2882 time_left.tv_nsec = 0;
2883
2884 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2885
2886 if (budget_left <= budget_threshold) {
2887 /*
2888 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2889 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2890 * daily pageout budget.
2891 */
2892
2893 unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2894 unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2895
2896 /*
2897 * The current rate of pageouts is below what we would expect for
2898 * the normal rate i.e. we have below normal budget left and so...
2899 */
2900
2901 if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2902 memorystatus_freeze_degradation = TRUE;
2903 degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2904 degraded_throttle_window->pageouts = 0;
2905
2906 /*
2907 * Switch over to the degraded throttle window so the budget
2908 * doled out is based on that window.
2909 */
2910 interval = degraded_throttle_window;
2911 }
2912 }
2913
2914 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2915 }
2916 }
2917
2918 memorystatus_log_debug(
2919 "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining\n",
2920 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts.tv_sec) / 60);
2921}
2922
2923SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_apps_idle_delay_multiplier, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_apps_idle_delay_multiplier, 0, "");
2924
2925bool memorystatus_freeze_thread_init = false;
2926static void
2927memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2928{
2929 static boolean_t memorystatus_freeze_swap_low = FALSE;
2930 size_t max_to_freeze = 0, num_frozen = 0, num_frozen_this_iteration = 0;
2931
2932 if (!memorystatus_freeze_thread_init) {
2933#if CONFIG_THREAD_GROUPS
2934 thread_group_vm_add();
2935#endif
2936 memorystatus_freeze_thread_init = true;
2937 }
2938
2939 max_to_freeze = memorystatus_pick_freeze_count_for_wakeup();
2940
2941 lck_mtx_lock(&freezer_mutex);
2942 if (memorystatus_freeze_enabled) {
2943 if (memorystatus_freezer_use_demotion_list && memorystatus_refreeze_eligible_count > 0) {
2944 memorystatus_demote_frozen_processes(false); /* Normal mode. Consider demoting thawed processes. */
2945 }
2946 while (num_frozen < max_to_freeze &&
2947 memorystatus_can_freeze(&memorystatus_freeze_swap_low) &&
2948 ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2949 (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold))) {
2950 /* Only freeze if we've not exceeded our pageout budgets.*/
2951 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2952
2953 if (memorystatus_freeze_budget_pages_remaining) {
2954 num_frozen_this_iteration = memorystatus_freeze_top_process();
2955 if (num_frozen_this_iteration == 0) {
2956 /* Nothing left to freeze. */
2957 break;
2958 }
2959 num_frozen += num_frozen_this_iteration;
2960 } else {
2961 memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
2962 break;
2963 }
2964 }
2965 }
2966
2967 /*
2968 * Give applications currently in the aging band a chance to age out into the idle band before
2969 * running the freezer again.
2970 */
2971 if (memorystatus_freeze_dynamic_thread_delay_enabled) {
2972 if ((num_frozen > 0) || (memorystatus_frozen_count == 0)) {
2973 memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST;
2974 } else {
2975 memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW;
2976 }
2977 }
2978 memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + (memorystatus_apps_idle_delay_time * memorystatus_freeze_apps_idle_delay_multiplier);
2979
2980 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2981 lck_mtx_unlock(&freezer_mutex);
2982
2983 thread_block((thread_continue_t) memorystatus_freeze_thread);
2984}
2985
2986int
2987memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2988{
2989 proc_t p = PROC_NULL;
2990
2991 if (pid == 0) {
2992 return EINVAL;
2993 }
2994
2995 p = proc_find(pid);
2996 if (!p) {
2997 return ESRCH;
2998 }
2999
3000 /*
3001 * Only allow this on the current proc for now.
3002 * We can check for privileges and allow targeting another process in the future.
3003 */
3004 if (p != current_proc()) {
3005 proc_rele(p);
3006 return EPERM;
3007 }
3008
3009 proc_list_lock();
3010 *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
3011 proc_rele(p);
3012 proc_list_unlock();
3013
3014 return 0;
3015}
3016
3017errno_t
3018memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
3019{
3020 proc_t p = PROC_NULL;
3021
3022 if (pid == 0) {
3023 return EINVAL;
3024 }
3025
3026 /*
3027 * Only allow this on the current proc for now.
3028 * We can check for privileges and allow targeting another process in the future.
3029 */
3030 p = current_proc();
3031 if (proc_getpid(p) != pid) {
3032 return EPERM;
3033 }
3034
3035 proc_list_lock();
3036 *is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3037 proc_list_unlock();
3038
3039 return 0;
3040}
3041
3042int
3043memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
3044{
3045 proc_t p = PROC_NULL;
3046
3047 if (pid == 0) {
3048 return EINVAL;
3049 }
3050
3051 /*
3052 * To enable freezable status, you need to be root or an entitlement.
3053 */
3054 if (is_freezable &&
3055 !kauth_cred_issuser(kauth_cred_get()) &&
3056 !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3057 return EPERM;
3058 }
3059
3060 p = proc_find(pid);
3061 if (!p) {
3062 return ESRCH;
3063 }
3064
3065 /*
3066 * A process can change its own status. A coalition leader can
3067 * change the status of coalition members.
3068 * An entitled process (or root) can change anyone's status.
3069 */
3070 if (p != current_proc() &&
3071 !kauth_cred_issuser(kauth_cred_get()) &&
3072 !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3073 coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
3074 if (!coalition_is_leader(proc_task(current_proc()), coal)) {
3075 proc_rele(p);
3076 return EPERM;
3077 }
3078 }
3079
3080 proc_list_lock();
3081 if (is_freezable == FALSE) {
3082 /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
3083 p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
3084 memorystatus_log_info("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
3085 proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3086 } else {
3087 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
3088 memorystatus_log_info("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
3089 proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3090 }
3091 proc_rele(p);
3092 proc_list_unlock();
3093
3094 return 0;
3095}
3096
3097/*
3098 * Called when process is created before it is added to a memorystatus bucket.
3099 */
3100void
3101memorystatus_freeze_init_proc(proc_t p)
3102{
3103 /* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3104 if (memorystatus_freeze_budget_pages_remaining == 0) {
3105 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3106 } else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3107 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3108 } else {
3109 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3110 }
3111}
3112
3113
3114static int
3115sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS
3116{
3117#pragma unused(oidp, arg1, arg2)
3118
3119 if (!req->newptr) {
3120 return EINVAL;
3121 }
3122
3123 /* Need to be root or have entitlement */
3124 if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement( MEMORYSTATUS_ENTITLEMENT)) {
3125 return EPERM;
3126 }
3127
3128 if (memorystatus_freeze_enabled == false) {
3129 return ENOTSUP;
3130 }
3131
3132 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3133 return ENOTSUP;
3134 }
3135
3136 do_fastwake_warmup_all();
3137
3138 return 0;
3139}
3140
3141SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3142 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
3143
3144/*
3145 * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3146 * Takes ownership over the original value of list.
3147 * Assumes that list is protected by the freezer_mutex.
3148 * The caller should not hold any locks.
3149 */
3150static errno_t
3151set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3152{
3153 errno_t error = 0;
3154 memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3155 size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3156
3157 /* Validate the user provided list. */
3158 if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3159 memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3160 return EINVAL;
3161 }
3162
3163 if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3164 memorystatus_log_error(
3165 "memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3166 return EINVAL;
3167 }
3168
3169 entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3170 entries_size = buffer_size;
3171 entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3172 if (entries == NULL) {
3173 return ENOMEM;
3174 }
3175
3176 error = copyin(buffer, entries, buffer_size);
3177 if (error != 0) {
3178 goto out;
3179 }
3180
3181#if MACH_ASSERT
3182 for (size_t i = 0; i < entry_count; i++) {
3183 memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3184 if (entry->version != 1) {
3185 memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3186 error = EINVAL;
3187 goto out;
3188 }
3189 if (i > 0 && entry->priority >= entries[i - 1].priority) {
3190 memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3191 error = EINVAL;
3192 goto out;
3193 }
3194 }
3195#endif /* MACH_ASSERT */
3196
3197 lck_mtx_lock(&freezer_mutex);
3198
3199 tmp_entries = list->mfcl_list;
3200 tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3201 list->mfcl_list = entries;
3202 list->mfcl_length = entry_count;
3203
3204 lck_mtx_unlock(&freezer_mutex);
3205
3206 entries = tmp_entries;
3207 entries_size = tmp_size;
3208
3209out:
3210 kfree_data(entries, entries_size);
3211 return error;
3212}
3213
3214errno_t
3215memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3216{
3217 return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3218}
3219
3220errno_t
3221memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3222{
3223 return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3224}
3225
3226void
3227memorystatus_freezer_mark_ui_transition(proc_t p)
3228{
3229 bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3230 proc_list_lock();
3231
3232 if (isSysProc(p)) {
3233 goto out;
3234 }
3235
3236 frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3237 previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3238 xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3239 suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3240 if (!suspended) {
3241 if (frozen) {
3242 if (!previous_focal_thaw) {
3243 p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3244 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3245 if (xpc_service) {
3246 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3247 }
3248 }
3249 }
3250 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3251 }
3252
3253out:
3254 proc_list_unlock();
3255}
3256
3257#endif /* CONFIG_FREEZE */
3258