1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include "vm_compressor_backing_store.h"
30#include <vm/vm_pageout.h>
31#include <vm/vm_protos.h>
32
33#include <IOKit/IOHibernatePrivate.h>
34
35#include <kern/policy_internal.h>
36
37LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
38LCK_MTX_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
39
40#if defined(XNU_TARGET_OS_OSX)
41/*
42 * launchd explicitly turns ON swap later during boot on macOS devices.
43 */
44boolean_t compressor_store_stop_compaction = TRUE;
45#else
46boolean_t compressor_store_stop_compaction = FALSE;
47#endif
48
49boolean_t vm_swapfile_create_needed = FALSE;
50boolean_t vm_swapfile_gc_needed = FALSE;
51
52int vm_swapper_throttle = -1;
53uint64_t vm_swapout_thread_id;
54
55uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
56uint64_t vm_swap_get_failures = 0; /* Fatal */
57uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
58int vm_num_swap_files_config = 0;
59int vm_num_swap_files = 0;
60int vm_num_pinned_swap_files = 0;
61uint64_t vm_swap_volume_capacity = 0;
62int vm_swapout_thread_processed_segments = 0;
63int vm_swapout_thread_awakened = 0;
64bool vm_swapout_thread_running = FALSE;
65_Atomic bool vm_swapout_wake_pending = false;
66int vm_swapfile_create_thread_awakened = 0;
67int vm_swapfile_create_thread_running = 0;
68int vm_swapfile_gc_thread_awakened = 0;
69int vm_swapfile_gc_thread_running = 0;
70
71int64_t vm_swappin_avail = 0;
72boolean_t vm_swappin_enabled = FALSE;
73unsigned int vm_swapfile_total_segs_alloced = 0;
74unsigned int vm_swapfile_total_segs_alloced_max = 0;
75unsigned int vm_swapfile_total_segs_used = 0;
76unsigned int vm_swapfile_total_segs_used_max = 0;
77
78char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
79
80extern vm_map_t compressor_map;
81extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
82
83#define SWAP_READY 0x1 /* Swap file is ready to be used */
84#define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
85#define SWAP_WANTED 0x4 /* Swap file has waiters */
86#define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
87#define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
88
89
90struct swapfile {
91 queue_head_t swp_queue; /* list of swap files */
92 char *swp_path; /* saved pathname of swap file */
93 struct vnode *swp_vp; /* backing vnode */
94 uint64_t swp_size; /* size of this swap file */
95 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
96 unsigned int swp_pathlen; /* length of pathname */
97 unsigned int swp_nsegs; /* #segments we can use */
98 unsigned int swp_nseginuse; /* #segments in use */
99 unsigned int swp_index; /* index of this swap file */
100 unsigned int swp_flags; /* state of swap file */
101 unsigned int swp_free_hint; /* offset of 1st free chunk */
102 unsigned int swp_io_count; /* count of outstanding I/Os */
103 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
104
105 struct trim_list *swp_delayed_trim_list_head;
106 unsigned int swp_delayed_trim_count;
107};
108
109queue_head_t swf_global_queue;
110boolean_t swp_trim_supported = FALSE;
111
112extern clock_sec_t dont_trim_until_ts;
113clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
114clock_sec_t vm_swapfile_last_successful_create_ts = 0;
115int vm_swapfile_can_be_created = FALSE;
116boolean_t delayed_trim_handling_in_progress = FALSE;
117
118boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
119
120static void vm_swapout_thread_throttle_adjust(void);
121static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
122void vm_swapout_thread(void);
123static void vm_swapfile_create_thread(void);
124static void vm_swapfile_gc_thread(void);
125static void vm_swap_defragment(void);
126static void vm_swap_handle_delayed_trims(boolean_t);
127static void vm_swap_do_delayed_trim(struct swapfile *);
128static void vm_swap_wait_on_trim_handling_in_progress(void);
129static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
130
131extern int vnode_getwithref(struct vnode* vp);
132
133boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
134
135#if !XNU_TARGET_OS_OSX
136
137/*
138 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
139 * number of swapfiles allowed. That increases wired memory overhead.
140 * So we want to keep the max swapfiles same on both DEV/RELEASE so
141 * that the memory overhead is similar for performance comparisons.
142 */
143#define VM_MAX_SWAP_FILE_NUM 5
144#if defined(__arm64__) && defined(ARM_LARGE_MEMORY)
145#define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (64ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
146#define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (16ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
147#else /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
148/*
149 * We reserve compressor pool VA at boot for the max # of swap files. If someone
150 * has enabled app swap but we're not an arm large memory device we can't hog
151 * all of the VA so we only go up to 4GB.
152 */
153#define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
154#define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
155#endif /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
156#define VM_SWAP_MIN_VOLUME_CAPACITY (128ULL * (1ULL << 30))
157
158#define VM_SWAPFILE_DELAYED_TRIM_MAX 4
159
160#define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
161#define VM_SWAP_SHOULD_PIN(_size) FALSE
162#define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
163 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
164#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
165
166#else /* !XNU_TARGET_OS_OSX */
167
168#define VM_MAX_SWAP_FILE_NUM 100
169#define VM_SWAPFILE_DELAYED_TRIM_MAX 128
170
171#define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
172#define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
173#define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
174 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
175#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
176
177#endif /* !XNU_TARGET_OS_OSX */
178
179#define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
180#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
181#define VM_SWAPFILE_DELAYED_CREATE 15
182
183#define VM_SWAP_BUSY() (((c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count) && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
184
185
186#if CHECKSUM_THE_SWAP
187extern unsigned int hash_string(char *cp, int len);
188#endif
189
190#if RECORD_THE_COMPRESSED_DATA
191boolean_t c_compressed_record_init_done = FALSE;
192int c_compressed_record_write_error = 0;
193struct vnode *c_compressed_record_vp = NULL;
194uint64_t c_compressed_record_file_offset = 0;
195void c_compressed_record_init(void);
196void c_compressed_record_write(char *, int);
197#endif
198
199extern void vm_pageout_io_throttle(void);
200
201static struct swapfile *vm_swapfile_for_handle(uint64_t);
202
203/*
204 * Called with the vm_swap_data_lock held.
205 */
206
207static struct swapfile *
208vm_swapfile_for_handle(uint64_t f_offset)
209{
210 uint64_t file_offset = 0;
211 unsigned int swapfile_index = 0;
212 struct swapfile* swf = NULL;
213
214 file_offset = (f_offset & SWAP_SLOT_MASK);
215 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
216
217 swf = (struct swapfile*) queue_first(&swf_global_queue);
218
219 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
220 if (swapfile_index == swf->swp_index) {
221 break;
222 }
223
224 swf = (struct swapfile*) queue_next(&swf->swp_queue);
225 }
226
227 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
228 swf = NULL;
229 }
230
231 return swf;
232}
233
234#if ENCRYPTED_SWAP
235
236#include <libkern/crypto/aesxts.h>
237
238extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
239
240boolean_t swap_crypt_initialized;
241void swap_crypt_initialize(void);
242
243symmetric_xts xts_modectx;
244uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
245uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
246
247#if DEVELOPMENT || DEBUG
248boolean_t swap_crypt_xts_tested = FALSE;
249unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
250unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
251unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
252#endif /* DEVELOPMENT || DEBUG */
253
254unsigned long vm_page_encrypt_counter;
255unsigned long vm_page_decrypt_counter;
256
257
258void
259swap_crypt_initialize(void)
260{
261 uint8_t *enckey1, *enckey2;
262 int keylen1, keylen2;
263 int error;
264
265 assert(swap_crypt_initialized == FALSE);
266
267 keylen1 = sizeof(swap_crypt_key1);
268 enckey1 = (uint8_t *)&swap_crypt_key1;
269 keylen2 = sizeof(swap_crypt_key2);
270 enckey2 = (uint8_t *)&swap_crypt_key2;
271
272 error = cc_rand_generate((void *)enckey1, keylen1);
273 assert(!error);
274
275 error = cc_rand_generate((void *)enckey2, keylen2);
276 assert(!error);
277
278 error = xts_start(cipher: 0, NULL, key1: enckey1, keylen: keylen1, key2: enckey2, tweaklen: keylen2, num_rounds: 0, options: 0, xts: &xts_modectx);
279 assert(!error);
280
281 swap_crypt_initialized = TRUE;
282
283#if DEVELOPMENT || DEBUG
284 uint8_t *encptr;
285 uint8_t *decptr;
286 uint8_t *refptr;
287 uint8_t *iv;
288 uint64_t ivnum[2];
289 int size = 0;
290 int i = 0;
291 int rc = 0;
292
293 assert(swap_crypt_xts_tested == FALSE);
294
295 /*
296 * Validate the encryption algorithms.
297 *
298 * First initialize the test data.
299 */
300 for (i = 0; i < 4096; i++) {
301 swap_crypt_test_page_ref[i] = (char) i;
302 }
303 ivnum[0] = (uint64_t)0xaa;
304 ivnum[1] = 0;
305 iv = (uint8_t *)ivnum;
306
307 refptr = (uint8_t *)swap_crypt_test_page_ref;
308 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
309 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
310 size = 4096;
311
312 /* encrypt */
313 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
314 assert(!rc);
315
316 /* compare result with original - should NOT match */
317 for (i = 0; i < 4096; i++) {
318 if (swap_crypt_test_page_encrypt[i] !=
319 swap_crypt_test_page_ref[i]) {
320 break;
321 }
322 }
323 assert(i != 4096);
324
325 /* decrypt */
326 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
327 assert(!rc);
328
329 /* compare result with original */
330 for (i = 0; i < 4096; i++) {
331 if (swap_crypt_test_page_decrypt[i] !=
332 swap_crypt_test_page_ref[i]) {
333 panic("encryption test failed");
334 }
335 }
336 /* encrypt in place */
337 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
338 assert(!rc);
339
340 /* decrypt in place */
341 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
342 assert(!rc);
343
344 for (i = 0; i < 4096; i++) {
345 if (swap_crypt_test_page_decrypt[i] !=
346 swap_crypt_test_page_ref[i]) {
347 panic("in place encryption test failed");
348 }
349 }
350 swap_crypt_xts_tested = TRUE;
351#endif /* DEVELOPMENT || DEBUG */
352}
353
354
355void
356vm_swap_encrypt(c_segment_t c_seg)
357{
358 uint8_t *ptr;
359 uint8_t *iv;
360 uint64_t ivnum[2];
361 int size = 0;
362 int rc = 0;
363
364 if (swap_crypt_initialized == FALSE) {
365 swap_crypt_initialize();
366 }
367
368#if DEVELOPMENT || DEBUG
369 C_SEG_MAKE_WRITEABLE(c_seg);
370#endif
371 ptr = (uint8_t *)c_seg->c_store.c_buffer;
372 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
373
374 ivnum[0] = (uint64_t)c_seg;
375 ivnum[1] = 0;
376 iv = (uint8_t *)ivnum;
377
378 rc = xts_encrypt(pt: ptr, ptlen: size, ct: ptr, tweak: iv, xts: &xts_modectx);
379 assert(!rc);
380
381 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
382
383#if DEVELOPMENT || DEBUG
384 C_SEG_WRITE_PROTECT(c_seg);
385#endif
386}
387
388void
389vm_swap_decrypt(c_segment_t c_seg)
390{
391 uint8_t *ptr;
392 uint8_t *iv;
393 uint64_t ivnum[2];
394 int size = 0;
395 int rc = 0;
396
397 assert(swap_crypt_initialized);
398
399#if DEVELOPMENT || DEBUG
400 C_SEG_MAKE_WRITEABLE(c_seg);
401#endif
402 ptr = (uint8_t *)c_seg->c_store.c_buffer;
403 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
404
405 ivnum[0] = (uint64_t)c_seg;
406 ivnum[1] = 0;
407 iv = (uint8_t *)ivnum;
408
409 rc = xts_decrypt(ct: ptr, ptlen: size, pt: ptr, tweak: iv, xts: &xts_modectx);
410 assert(!rc);
411
412 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
413
414#if DEVELOPMENT || DEBUG
415 C_SEG_WRITE_PROTECT(c_seg);
416#endif
417}
418#endif /* ENCRYPTED_SWAP */
419
420uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
421extern bool memorystatus_swap_all_apps;
422
423void
424vm_compressor_swap_init_swap_file_limit(void)
425{
426 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
427#if CONFIG_JETSAM
428 if (memorystatus_swap_all_apps) {
429 if (vm_swap_volume_capacity == 0) {
430 /*
431 * Early in boot we don't know the swap volume capacity.
432 * That's fine. Reserve space for the maximum config
433 * and we'll lower this later in boot once we have the capacity.
434 */
435 vm_num_swap_files_config = VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM;
436 } else {
437 static uint64_t kFixedPointFactor = 100;
438 /*
439 * Scale the max number of swap files linearly.
440 * But we can never go above VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM.
441 */
442 vm_num_swap_files_config = vm_swap_volume_capacity * kFixedPointFactor / VM_SWAP_MIN_VOLUME_CAPACITY
443 * VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM / kFixedPointFactor;
444 vm_num_swap_files_config = MAX(vm_num_swap_files_config, VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM);
445 vm_num_swap_files_config = MIN(vm_num_swap_files_config, VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM);
446 }
447 }
448#endif /* CONFIG_JETSAM */
449#if DEVELOPMENT || DEBUG
450 typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
451 if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
452 if (parsed_vm_max_num_swap_files > 0) {
453 vm_num_swap_files_config = parsed_vm_max_num_swap_files;
454 } else {
455 printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
456 }
457 }
458#endif
459 printf(format: "Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
460}
461
462int vm_swap_enabled = 0;
463void
464vm_compressor_swap_init(void)
465{
466 thread_t thread = NULL;
467
468 queue_init(&swf_global_queue);
469
470#if !XNU_TARGET_OS_OSX
471 /*
472 * dummy value until the swap file gets created
473 * when we drive the first c_segment_t to the
474 * swapout queue... at that time we will
475 * know the true size we have to work with
476 */
477 c_overage_swapped_limit = 16;
478#endif /* !XNU_TARGET_OS_OSX */
479
480 compressed_swap_chunk_size = c_seg_bufsize;
481 vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
482 swapfile_reclaim_threshold_segs = ((17 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
483 swapfile_reclam_minimum_segs = ((13 * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / 10);
484
485 if (kernel_thread_start_priority(continuation: (thread_continue_t)vm_swapout_thread, NULL,
486 BASEPRI_VM, new_thread: &thread) != KERN_SUCCESS) {
487 panic("vm_swapout_thread: create failed");
488 }
489 thread_set_thread_name(th: thread, name: "VM_swapout");
490 vm_swapout_thread_id = thread->thread_id;
491 thread_deallocate(thread);
492
493 if (kernel_thread_start_priority(continuation: (thread_continue_t)vm_swapfile_create_thread, NULL,
494 BASEPRI_VM, new_thread: &thread) != KERN_SUCCESS) {
495 panic("vm_swapfile_create_thread: create failed");
496 }
497 thread_set_thread_name(th: thread, name: "VM_swapfile_create");
498 thread_deallocate(thread);
499
500 if (kernel_thread_start_priority(continuation: (thread_continue_t)vm_swapfile_gc_thread, NULL,
501 BASEPRI_VM, new_thread: &thread) != KERN_SUCCESS) {
502 panic("vm_swapfile_gc_thread: create failed");
503 }
504 thread_set_thread_name(th: thread, name: "VM_swapfile_gc");
505 /*
506 * Swapfile garbage collection will need to allocate memory
507 * to complete its swap reclaim and in-memory compaction.
508 * So allow it to dip into the reserved VM page pool.
509 */
510 thread_lock(thread);
511 thread->options |= TH_OPT_VMPRIV;
512 thread_unlock(thread);
513 thread_deallocate(thread);
514 proc_set_thread_policy_with_tid(task: kernel_task, tid: thread->thread_id,
515 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
516 proc_set_thread_policy_with_tid(task: kernel_task, tid: thread->thread_id,
517 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
518
519 vm_swap_enabled = 1;
520 printf(format: "VM Swap Subsystem is ON\n");
521}
522
523
524#if RECORD_THE_COMPRESSED_DATA
525
526void
527c_compressed_record_init()
528{
529 if (c_compressed_record_init_done == FALSE) {
530 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
531 c_compressed_record_init_done = TRUE;
532 }
533}
534
535void
536c_compressed_record_write(char *buf, int size)
537{
538 if (c_compressed_record_write_error == 0) {
539 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
540 c_compressed_record_file_offset += size;
541 }
542}
543#endif
544
545
546int compaction_swapper_inited = 0;
547
548void
549vm_compaction_swapper_do_init(void)
550{
551 struct vnode *vp;
552 char *pathname;
553 int namelen;
554
555 if (compaction_swapper_inited) {
556 return;
557 }
558
559 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
560 compaction_swapper_inited = 1;
561 return;
562 }
563 lck_mtx_lock(lck: &vm_swap_data_lock);
564
565 if (!compaction_swapper_inited) {
566 namelen = (int)strlen(s: swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
567 pathname = kalloc_data(namelen, Z_WAITOK | Z_ZERO);
568 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
569
570 vm_swapfile_open(path: pathname, vp: &vp);
571
572 if (vp) {
573 if (vnode_pager_isSSD(vp) == FALSE) {
574 /*
575 * swap files live on an HDD, so let's make sure to start swapping
576 * much earlier since we're not worried about SSD write-wear and
577 * we have so little write bandwidth to work with
578 * these values were derived expermentially by running the performance
579 * teams stock test for evaluating HDD performance against various
580 * combinations and looking and comparing overall results.
581 * Note that the > relationship between these 4 values must be maintained
582 */
583 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
584 vm_compressor_minorcompact_threshold_divisor = 15;
585 }
586 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
587 vm_compressor_majorcompact_threshold_divisor = 18;
588 }
589 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
590 vm_compressor_unthrottle_threshold_divisor = 24;
591 }
592 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
593 vm_compressor_catchup_threshold_divisor = 30;
594 }
595 }
596#if XNU_TARGET_OS_OSX
597 vnode_setswapmount(vp);
598 vm_swappin_avail = vnode_getswappin_avail(vp);
599
600 if (vm_swappin_avail) {
601 vm_swappin_enabled = TRUE;
602 }
603#endif /* XNU_TARGET_OS_OSX */
604 vm_swapfile_close(path: (uint64_t)pathname, vp);
605 }
606 kfree_data(pathname, namelen);
607
608 compaction_swapper_inited = 1;
609 }
610 lck_mtx_unlock(lck: &vm_swap_data_lock);
611}
612
613
614void
615vm_swap_consider_defragmenting(int flags)
616{
617 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
618 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
619
620 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
621 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
622 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
623 lck_mtx_lock(lck: &vm_swap_data_lock);
624
625 if (force_defrag) {
626 vm_swap_force_defrag = TRUE;
627 }
628
629 if (force_reclaim) {
630 vm_swap_force_reclaim = TRUE;
631 }
632
633 if (!vm_swapfile_gc_thread_running) {
634 thread_wakeup((event_t) &vm_swapfile_gc_needed);
635 }
636
637 lck_mtx_unlock(lck: &vm_swap_data_lock);
638 }
639 }
640}
641
642
643int vm_swap_defragment_yielded = 0;
644int vm_swap_defragment_swapin = 0;
645int vm_swap_defragment_free = 0;
646int vm_swap_defragment_busy = 0;
647
648#if CONFIG_FREEZE
649extern int32_t c_segment_pages_compressed_incore;
650extern int32_t c_segment_pages_compressed_incore_late_swapout;
651extern uint32_t c_segment_pages_compressed_nearing_limit;
652extern uint32_t c_segment_count;
653extern uint32_t c_segments_nearing_limit;
654
655boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
656
657extern bool freezer_incore_cseg_acct;
658#endif /* CONFIG_FREEZE */
659
660static void
661vm_swap_defragment()
662{
663 c_segment_t c_seg;
664
665 /*
666 * have to grab the master lock w/o holding
667 * any locks in spin mode
668 */
669 PAGE_REPLACEMENT_DISALLOWED(TRUE);
670
671 lck_mtx_lock_spin_always(c_list_lock);
672
673 while (!queue_empty(&c_swappedout_sparse_list_head)) {
674 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
675 vm_swap_defragment_yielded++;
676 break;
677 }
678 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
679
680 lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
681
682 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
683
684 if (c_seg->c_busy) {
685 lck_mtx_unlock_always(c_list_lock);
686
687 PAGE_REPLACEMENT_DISALLOWED(FALSE);
688 /*
689 * c_seg_wait_on_busy consumes c_seg->c_lock
690 */
691 c_seg_wait_on_busy(c_seg);
692
693 PAGE_REPLACEMENT_DISALLOWED(TRUE);
694
695 lck_mtx_lock_spin_always(c_list_lock);
696
697 vm_swap_defragment_busy++;
698 continue;
699 }
700 if (c_seg->c_bytes_used == 0) {
701 /*
702 * c_seg_free_locked consumes the c_list_lock
703 * and c_seg->c_lock
704 */
705 C_SEG_BUSY(c_seg);
706 c_seg_free_locked(c_seg);
707
708 vm_swap_defragment_free++;
709 } else {
710 lck_mtx_unlock_always(c_list_lock);
711
712#if CONFIG_FREEZE
713 if (freezer_incore_cseg_acct) {
714 /*
715 * TODO(jason): These two are tricky because they're pre-emptive jetsams.
716 * The system is not unhealthy, but we know that it's about to become unhealthy once
717 * we do this swapin.
718 * So we're waking up the memorystatus thread to make space
719 * (hopefully) before this segment comes in.
720 *
721 * I think the compressor_backing_store needs to keep track of
722 * two new globals that will track the number of segments
723 * being swapped in due to defrag and the number of slots used
724 * in those segments.
725 * Then the health check below can be called from the memorystatus
726 * thread.
727 */
728 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
729 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
730 }
731
732 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
733 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
734 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
735 }
736 }
737#endif /* CONFIG_FREEZE */
738 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
739 lck_mtx_unlock_always(&c_seg->c_lock);
740 vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
741 }
742
743 vm_swap_defragment_swapin++;
744 }
745 PAGE_REPLACEMENT_DISALLOWED(FALSE);
746
747 vm_pageout_io_throttle();
748
749 /*
750 * because write waiters have privilege over readers,
751 * dropping and immediately retaking the master lock will
752 * still allow any thread waiting to acquire the
753 * master lock exclusively an opportunity to take it
754 */
755 PAGE_REPLACEMENT_DISALLOWED(TRUE);
756
757 lck_mtx_lock_spin_always(c_list_lock);
758 }
759 lck_mtx_unlock_always(c_list_lock);
760
761 PAGE_REPLACEMENT_DISALLOWED(FALSE);
762}
763
764
765bool vm_swapfile_create_thread_inited = false;
766static void
767vm_swapfile_create_thread(void)
768{
769 clock_sec_t sec;
770 clock_nsec_t nsec;
771
772 if (!vm_swapfile_create_thread_inited) {
773#if CONFIG_THREAD_GROUPS
774 thread_group_vm_add();
775#endif /* CONFIG_THREAD_GROUPS */
776 current_thread()->options |= TH_OPT_VMPRIV;
777 vm_swapfile_create_thread_inited = true;
778 }
779
780 vm_swapfile_create_thread_awakened++;
781 vm_swapfile_create_thread_running = 1;
782
783 while (TRUE) {
784 /*
785 * walk through the list of swap files
786 * and do the delayed frees/trims for
787 * any swap file whose count of delayed
788 * frees is above the batch limit
789 */
790 vm_swap_handle_delayed_trims(FALSE);
791
792 lck_mtx_lock(lck: &vm_swap_data_lock);
793
794 if (hibernate_in_progress_with_pinned_swap == TRUE) {
795 break;
796 }
797
798 if (compressor_store_stop_compaction == TRUE) {
799 break;
800 }
801
802 clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
803
804 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
805 break;
806 }
807
808 lck_mtx_unlock(lck: &vm_swap_data_lock);
809
810 if (vm_swap_create_file() == FALSE) {
811 vm_swapfile_last_failed_to_create_ts = sec;
812 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
813 } else {
814 vm_swapfile_last_successful_create_ts = sec;
815 }
816 }
817 vm_swapfile_create_thread_running = 0;
818
819 if (hibernate_in_progress_with_pinned_swap == TRUE) {
820 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
821 }
822
823 if (compressor_store_stop_compaction == TRUE) {
824 thread_wakeup((event_t)&compressor_store_stop_compaction);
825 }
826
827 assert_wait(event: (event_t)&vm_swapfile_create_needed, THREAD_UNINT);
828
829 lck_mtx_unlock(lck: &vm_swap_data_lock);
830
831 thread_block(continuation: (thread_continue_t)vm_swapfile_create_thread);
832
833 /* NOTREACHED */
834}
835
836
837#if HIBERNATION
838
839kern_return_t
840hibernate_pin_swap(boolean_t start)
841{
842 vm_compaction_swapper_do_init();
843
844 if (start == FALSE) {
845 lck_mtx_lock(&vm_swap_data_lock);
846 hibernate_in_progress_with_pinned_swap = FALSE;
847 lck_mtx_unlock(&vm_swap_data_lock);
848
849 return KERN_SUCCESS;
850 }
851 if (vm_swappin_enabled == FALSE) {
852 return KERN_SUCCESS;
853 }
854
855 lck_mtx_lock(&vm_swap_data_lock);
856
857 hibernate_in_progress_with_pinned_swap = TRUE;
858
859 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
860 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
861
862 lck_mtx_unlock(&vm_swap_data_lock);
863
864 thread_block(THREAD_CONTINUE_NULL);
865
866 lck_mtx_lock(&vm_swap_data_lock);
867 }
868 if (vm_num_swap_files > vm_num_pinned_swap_files) {
869 hibernate_in_progress_with_pinned_swap = FALSE;
870 lck_mtx_unlock(&vm_swap_data_lock);
871
872 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
873 vm_num_swap_files, vm_num_pinned_swap_files);
874 return KERN_FAILURE;
875 }
876 lck_mtx_unlock(&vm_swap_data_lock);
877
878 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
879 if (vm_swap_create_file() == FALSE) {
880 break;
881 }
882 }
883 return KERN_SUCCESS;
884}
885#endif
886bool vm_swapfile_gc_thread_inited = false;
887static void
888vm_swapfile_gc_thread(void)
889{
890 boolean_t need_defragment;
891 boolean_t need_reclaim;
892
893 if (!vm_swapfile_gc_thread_inited) {
894#if CONFIG_THREAD_GROUPS
895 thread_group_vm_add();
896#endif /* CONFIG_THREAD_GROUPS */
897 vm_swapfile_gc_thread_inited = true;
898 }
899
900 vm_swapfile_gc_thread_awakened++;
901 vm_swapfile_gc_thread_running = 1;
902
903 while (TRUE) {
904 lck_mtx_lock(lck: &vm_swap_data_lock);
905
906 if (hibernate_in_progress_with_pinned_swap == TRUE) {
907 break;
908 }
909
910 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
911 break;
912 }
913
914 need_defragment = FALSE;
915 need_reclaim = FALSE;
916
917 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
918 need_defragment = TRUE;
919 }
920
921 if (VM_SWAP_SHOULD_RECLAIM()) {
922 need_defragment = TRUE;
923 need_reclaim = TRUE;
924 }
925 if (need_defragment == FALSE && need_reclaim == FALSE) {
926 break;
927 }
928
929 vm_swap_force_defrag = FALSE;
930 vm_swap_force_reclaim = FALSE;
931
932 lck_mtx_unlock(lck: &vm_swap_data_lock);
933
934 if (need_defragment == TRUE) {
935 vm_swap_defragment();
936 }
937 if (need_reclaim == TRUE) {
938 vm_swap_reclaim();
939 }
940 }
941 vm_swapfile_gc_thread_running = 0;
942
943 if (hibernate_in_progress_with_pinned_swap == TRUE) {
944 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
945 }
946
947 if (compressor_store_stop_compaction == TRUE) {
948 thread_wakeup((event_t)&compressor_store_stop_compaction);
949 }
950
951 assert_wait(event: (event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
952
953 lck_mtx_unlock(lck: &vm_swap_data_lock);
954
955 thread_block(continuation: (thread_continue_t)vm_swapfile_gc_thread);
956
957 /* NOTREACHED */
958}
959
960
961
962#define VM_SWAPOUT_LIMIT_T2P 4
963#define VM_SWAPOUT_LIMIT_T1P 4
964#define VM_SWAPOUT_LIMIT_T0P 6
965#define VM_SWAPOUT_LIMIT_T0 8
966#define VM_SWAPOUT_LIMIT_MAX 8
967
968#define VM_SWAPOUT_START 0
969#define VM_SWAPOUT_T2_PASSIVE 1
970#define VM_SWAPOUT_T1_PASSIVE 2
971#define VM_SWAPOUT_T0_PASSIVE 3
972#define VM_SWAPOUT_T0 4
973
974int vm_swapout_state = VM_SWAPOUT_START;
975int vm_swapout_limit = 1;
976
977int vm_swapper_entered_T0 = 0;
978int vm_swapper_entered_T0P = 0;
979int vm_swapper_entered_T1P = 0;
980int vm_swapper_entered_T2P = 0;
981
982
983static void
984vm_swapout_thread_throttle_adjust(void)
985{
986 switch (vm_swapout_state) {
987 case VM_SWAPOUT_START:
988
989 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
990 vm_swapper_entered_T2P++;
991
992 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
993 TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
994 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
995 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
996 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
997 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
998
999 break;
1000
1001 case VM_SWAPOUT_T2_PASSIVE:
1002
1003 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1004 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1005 vm_swapper_entered_T0P++;
1006
1007 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1008 TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1009 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1010 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1011 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1012 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1013
1014 break;
1015 }
1016 if (swapout_target_age || hibernate_flushing == TRUE) {
1017 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
1018 vm_swapper_entered_T1P++;
1019
1020 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1021 TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1022 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1023 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1024 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
1025 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
1026 }
1027 break;
1028
1029 case VM_SWAPOUT_T1_PASSIVE:
1030
1031 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1032 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1033 vm_swapper_entered_T0P++;
1034
1035 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1036 TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1037 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1038 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1039 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1040 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1041
1042 break;
1043 }
1044 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
1045 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1046 vm_swapper_entered_T2P++;
1047
1048 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1049 TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1050 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1051 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1052 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1053 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1054 }
1055 break;
1056
1057 case VM_SWAPOUT_T0_PASSIVE:
1058
1059 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
1060 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1061 vm_swapper_entered_T2P++;
1062
1063 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1064 TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1065 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1066 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1067 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1068 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1069
1070 break;
1071 }
1072 if (SWAPPER_NEEDS_TO_CATCHUP()) {
1073 vm_swapper_entered_T0++;
1074
1075 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1076 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1077 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1078 vm_swapout_state = VM_SWAPOUT_T0;
1079 }
1080 break;
1081
1082 case VM_SWAPOUT_T0:
1083
1084 if (SWAPPER_HAS_CAUGHTUP()) {
1085 vm_swapper_entered_T0P++;
1086
1087 proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1088 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1089 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1090 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1091 }
1092 break;
1093 }
1094}
1095
1096int vm_swapout_found_empty = 0;
1097
1098struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1099
1100int vm_swapout_soc_busy = 0;
1101int vm_swapout_soc_done = 0;
1102
1103
1104static struct swapout_io_completion *
1105vm_swapout_find_free_soc(void)
1106{
1107 int i;
1108
1109 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1110 if (vm_swapout_ctx[i].swp_io_busy == 0) {
1111 return &vm_swapout_ctx[i];
1112 }
1113 }
1114 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1115
1116 return NULL;
1117}
1118
1119static struct swapout_io_completion *
1120vm_swapout_find_done_soc(void)
1121{
1122 int i;
1123
1124 if (vm_swapout_soc_done) {
1125 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1126 if (vm_swapout_ctx[i].swp_io_done) {
1127 return &vm_swapout_ctx[i];
1128 }
1129 }
1130 }
1131 return NULL;
1132}
1133
1134static void
1135vm_swapout_complete_soc(struct swapout_io_completion *soc)
1136{
1137 kern_return_t kr;
1138
1139 if (soc->swp_io_error) {
1140 kr = KERN_FAILURE;
1141 } else {
1142 kr = KERN_SUCCESS;
1143 }
1144
1145 lck_mtx_unlock_always(c_list_lock);
1146
1147 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1148 vm_swapout_finish(c_seg: soc->swp_c_seg, f_offset: soc->swp_f_offset, size: soc->swp_c_size, kr);
1149
1150 lck_mtx_lock_spin_always(c_list_lock);
1151
1152 soc->swp_io_done = 0;
1153 soc->swp_io_busy = 0;
1154
1155 vm_swapout_soc_busy--;
1156 vm_swapout_soc_done--;
1157}
1158
1159bool vm_swapout_thread_inited = false;
1160extern uint32_t c_donate_swapout_count;
1161#if CONFIG_JETSAM
1162bool memorystatus_swap_over_trigger(uint64_t adjustment_factor);
1163/*
1164 * swapout_sleep_threshold sets the percentage of the swapout threshold at which
1165 * the swap thread will stop processing the swapout queue.
1166 * By default this is 90 which means we will swap until the
1167 * swapout queue size is at 90% of the threshold to wake the swap thread.
1168 * By definition the queue length must be >= 100% of the threshold when the.
1169 * swap thread is woken up. On development builds this can be adjusted with
1170 * the vm.swapout_sleep_threshold sysctl.
1171 */
1172uint32_t swapout_sleep_threshold = 90;
1173#endif /* CONFIG_JETSAM */
1174static bool
1175should_process_swapout_queue(const queue_head_t *swapout_list_head)
1176{
1177 bool process_queue = !queue_empty(swapout_list_head) &&
1178 vm_swapout_soc_busy < vm_swapout_limit &&
1179 !compressor_store_stop_compaction;
1180#if CONFIG_JETSAM
1181 if (memorystatus_swap_all_apps && swapout_list_head == &c_late_swapout_list_head) {
1182 process_queue = process_queue && memorystatus_swap_over_trigger(swapout_sleep_threshold);
1183 }
1184#endif /* CONFIG_JETSAM */
1185 return process_queue;
1186}
1187
1188void
1189vm_swapout_thread(void)
1190{
1191 uint32_t size = 0;
1192 c_segment_t c_seg = NULL;
1193 kern_return_t kr = KERN_SUCCESS;
1194 struct swapout_io_completion *soc;
1195 queue_head_t *swapout_list_head;
1196 bool queues_empty = false;
1197
1198 if (!vm_swapout_thread_inited) {
1199#if CONFIG_THREAD_GROUPS
1200 thread_group_vm_add();
1201#endif /* CONFIG_THREAD_GROUPS */
1202 current_thread()->options |= TH_OPT_VMPRIV;
1203 vm_swapout_thread_inited = true;
1204 }
1205
1206 vm_swapout_thread_awakened++;
1207
1208 lck_mtx_lock_spin_always(c_list_lock);
1209
1210 swapout_list_head = &c_early_swapout_list_head;
1211 vm_swapout_thread_running = TRUE;
1212 os_atomic_store(&vm_swapout_wake_pending, false, relaxed);
1213again:
1214 while (should_process_swapout_queue(swapout_list_head)) {
1215 c_seg = (c_segment_t)queue_first(swapout_list_head);
1216
1217 lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1218
1219 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1220
1221 if (c_seg->c_busy) {
1222 lck_mtx_unlock_always(c_list_lock);
1223
1224 c_seg_wait_on_busy(c_seg);
1225
1226 lck_mtx_lock_spin_always(c_list_lock);
1227
1228 continue;
1229 }
1230 vm_swapout_thread_processed_segments++;
1231
1232 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1233
1234 if (size == 0) {
1235 assert(c_seg->c_bytes_used == 0);
1236
1237 /*
1238 * c_seg_free_locked will drop the c_list_lock and
1239 * the c_seg->c_lock.
1240 */
1241 C_SEG_BUSY(c_seg);
1242 c_seg_free_locked(c_seg);
1243 c_seg = NULL;
1244
1245 vm_swapout_found_empty++;
1246 goto c_seg_is_empty;
1247 }
1248 C_SEG_BUSY(c_seg);
1249 c_seg->c_busy_swapping = 1;
1250
1251 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1252
1253 lck_mtx_unlock_always(c_list_lock);
1254 lck_mtx_unlock_always(&c_seg->c_lock);
1255
1256#if CHECKSUM_THE_SWAP
1257 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1258 c_seg->cseg_swap_size = size;
1259#endif /* CHECKSUM_THE_SWAP */
1260
1261#if ENCRYPTED_SWAP
1262 vm_swap_encrypt(c_seg);
1263#endif /* ENCRYPTED_SWAP */
1264
1265 soc = vm_swapout_find_free_soc();
1266 assert(soc);
1267
1268 soc->swp_upl_ctx.io_context = (void *)soc;
1269 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1270 soc->swp_upl_ctx.io_error = 0;
1271
1272 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1273
1274 if (kr != KERN_SUCCESS) {
1275 if (soc->swp_io_done) {
1276 lck_mtx_lock_spin_always(c_list_lock);
1277
1278 soc->swp_io_done = 0;
1279 vm_swapout_soc_done--;
1280
1281 lck_mtx_unlock_always(c_list_lock);
1282 }
1283 vm_swapout_finish(c_seg, f_offset: soc->swp_f_offset, size, kr);
1284 } else {
1285 soc->swp_io_busy = 1;
1286 vm_swapout_soc_busy++;
1287 }
1288
1289c_seg_is_empty:
1290 if (!(c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count)) {
1291 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1292 }
1293
1294 lck_mtx_lock_spin_always(c_list_lock);
1295
1296 while ((soc = vm_swapout_find_done_soc())) {
1297 vm_swapout_complete_soc(soc);
1298 }
1299 lck_mtx_unlock_always(c_list_lock);
1300
1301 vm_swapout_thread_throttle_adjust();
1302
1303 lck_mtx_lock_spin_always(c_list_lock);
1304 }
1305 while ((soc = vm_swapout_find_done_soc())) {
1306 vm_swapout_complete_soc(soc);
1307 }
1308 lck_mtx_unlock_always(c_list_lock);
1309
1310 vm_pageout_io_throttle();
1311
1312 lck_mtx_lock_spin_always(c_list_lock);
1313
1314 /*
1315 * Recheck if we have some c_segs to wakeup
1316 * post throttle. And, check to see if we
1317 * have any more swapouts needed.
1318 */
1319 if (vm_swapout_soc_done) {
1320 goto again;
1321 }
1322
1323#if XNU_TARGET_OS_OSX
1324 queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_regular_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1325#else /* XNU_TARGET_OS_OSX */
1326 queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1327#endif /* XNU_TARGET_OS_OSX */
1328
1329 if (!queues_empty) {
1330 swapout_list_head = NULL;
1331 if (!queue_empty(&c_early_swapout_list_head)) {
1332 swapout_list_head = &c_early_swapout_list_head;
1333 } else {
1334#if XNU_TARGET_OS_OSX
1335 /*
1336 * On macOS we _always_ processs all swapout queues.
1337 */
1338 if (!queue_empty(&c_regular_swapout_list_head)) {
1339 swapout_list_head = &c_regular_swapout_list_head;
1340 } else {
1341 swapout_list_head = &c_late_swapout_list_head;
1342 }
1343#else /* XNU_TARGET_OS_OSX */
1344 /*
1345 * On non-macOS swap-capable platforms, we might want to
1346 * processs just the early queue (Freezer) or process both
1347 * early and late queues (app swap). We processed the early
1348 * queue up above. The late Q will only be processed if the
1349 * checks in should_process_swapout_queue give the go-ahead.
1350 */
1351 swapout_list_head = &c_late_swapout_list_head;
1352#endif /* XNU_TARGET_OS_OSX */
1353 }
1354 if (swapout_list_head && should_process_swapout_queue(swapout_list_head)) {
1355 goto again;
1356 }
1357 }
1358
1359 assert_wait(event: (event_t)&vm_swapout_thread, THREAD_UNINT);
1360
1361 vm_swapout_thread_running = FALSE;
1362
1363 lck_mtx_unlock_always(c_list_lock);
1364
1365 thread_block(continuation: (thread_continue_t)vm_swapout_thread);
1366
1367 /* NOTREACHED */
1368}
1369
1370
1371void
1372vm_swapout_iodone(void *io_context, int error)
1373{
1374 struct swapout_io_completion *soc;
1375
1376 soc = (struct swapout_io_completion *)io_context;
1377
1378 lck_mtx_lock_spin_always(c_list_lock);
1379
1380 soc->swp_io_done = 1;
1381 soc->swp_io_error = error;
1382 vm_swapout_soc_done++;
1383
1384 if (!vm_swapout_thread_running) {
1385 thread_wakeup((event_t)&vm_swapout_thread);
1386 }
1387
1388 lck_mtx_unlock_always(c_list_lock);
1389}
1390
1391
1392static void
1393vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1394{
1395 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1396
1397 if (kr == KERN_SUCCESS) {
1398 kernel_memory_depopulate(addr: (vm_offset_t)c_seg->c_store.c_buffer, size,
1399 flags: KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1400 }
1401#if ENCRYPTED_SWAP
1402 else {
1403 vm_swap_decrypt(c_seg);
1404 }
1405#endif /* ENCRYPTED_SWAP */
1406 lck_mtx_lock_spin_always(c_list_lock);
1407 lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1408
1409 if (kr == KERN_SUCCESS) {
1410 int new_state = C_ON_SWAPPEDOUT_Q;
1411 boolean_t insert_head = FALSE;
1412
1413 if (hibernate_flushing == TRUE) {
1414 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1415 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1416 insert_head = TRUE;
1417 }
1418 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1419 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1420 }
1421
1422 c_seg_switch_state(c_seg, new_state, insert_head);
1423
1424 c_seg->c_store.c_swap_handle = f_offset;
1425
1426 counter_add(&vm_statistics_swapouts, amount: size >> PAGE_SHIFT);
1427
1428 c_seg->c_swappedin = false;
1429
1430 if (c_seg->c_bytes_used) {
1431 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1432 }
1433
1434#if CONFIG_FREEZE
1435 /*
1436 * Successful swapout. Decrement the in-core compressed pages count.
1437 */
1438 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1439 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1440 if (c_seg->c_has_donated_pages) {
1441 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore_late_swapout);
1442 }
1443#endif /* CONFIG_FREEZE */
1444 } else {
1445 if (c_seg->c_overage_swap == TRUE) {
1446 c_seg->c_overage_swap = FALSE;
1447 c_overage_swapped_count--;
1448 }
1449
1450#if CONFIG_FREEZE
1451 if (c_seg->c_has_freezer_pages) {
1452 if (c_seg->c_task_owner) {
1453 c_seg_update_task_owner(c_seg, NULL);
1454 }
1455 /*
1456 * We failed to swapout a frozen cseg. We need
1457 * to put it back in the queues, specifically the
1458 * AGE_Q. So clear the donated bit otherwise it'll
1459 * land on the swapped_in Q.
1460 */
1461 c_seg->c_has_donated_pages = 0;
1462 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1463 } else
1464#endif /* CONFIG_FREEZE */
1465 {
1466 if (c_seg->c_has_donated_pages) {
1467 c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
1468 } else {
1469 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1470 }
1471 }
1472
1473 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1474 c_seg_need_delayed_compaction(c_seg, TRUE);
1475 }
1476 }
1477 assert(c_seg->c_busy_swapping);
1478 assert(c_seg->c_busy);
1479
1480 c_seg->c_busy_swapping = 0;
1481 lck_mtx_unlock_always(c_list_lock);
1482
1483 C_SEG_WAKEUP_DONE(c_seg);
1484 lck_mtx_unlock_always(&c_seg->c_lock);
1485
1486 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1487}
1488
1489
1490boolean_t
1491vm_swap_create_file()
1492{
1493 uint64_t size = 0;
1494 int namelen = 0;
1495 boolean_t swap_file_created = FALSE;
1496 boolean_t swap_file_reuse = FALSE;
1497 boolean_t swap_file_pin = FALSE;
1498 struct swapfile *swf = NULL;
1499
1500 /*
1501 * make sure we've got all the info we need
1502 * to potentially pin a swap file... we could
1503 * be swapping out due to hibernation w/o ever
1504 * having run vm_pageout_scan, which is normally
1505 * the trigger to do the init
1506 */
1507 vm_compaction_swapper_do_init();
1508
1509 /*
1510 * Any swapfile structure ready for re-use?
1511 */
1512
1513 lck_mtx_lock(lck: &vm_swap_data_lock);
1514
1515 swf = (struct swapfile*) queue_first(&swf_global_queue);
1516
1517 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1518 if (swf->swp_flags == SWAP_REUSE) {
1519 swap_file_reuse = TRUE;
1520 break;
1521 }
1522 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1523 }
1524
1525 lck_mtx_unlock(lck: &vm_swap_data_lock);
1526
1527 if (swap_file_reuse == FALSE) {
1528 namelen = (int)strlen(s: swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1529
1530 swf = kalloc_type(struct swapfile, Z_WAITOK | Z_ZERO);
1531 swf->swp_index = vm_num_swap_files + 1;
1532 swf->swp_pathlen = namelen;
1533 swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK | Z_ZERO);
1534
1535 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1536 }
1537
1538 vm_swapfile_open(path: swf->swp_path, vp: &swf->swp_vp);
1539
1540 if (swf->swp_vp == NULL) {
1541 if (swap_file_reuse == FALSE) {
1542 kfree_data(swf->swp_path, swf->swp_pathlen);
1543 kfree_type(struct swapfile, swf);
1544 }
1545 return FALSE;
1546 }
1547 vm_swapfile_can_be_created = TRUE;
1548
1549 size = MAX_SWAP_FILE_SIZE;
1550
1551 while (size >= MIN_SWAP_FILE_SIZE) {
1552 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1553
1554 if (vm_swapfile_preallocate(vp: swf->swp_vp, size: &size, pin: &swap_file_pin) == 0) {
1555 int num_bytes_for_bitmap = 0;
1556
1557 swap_file_created = TRUE;
1558
1559 swf->swp_size = size;
1560 swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1561 swf->swp_nseginuse = 0;
1562 swf->swp_free_hint = 0;
1563
1564 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1565 /*
1566 * Allocate a bitmap that describes the
1567 * number of segments held by this swapfile.
1568 */
1569 swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1570 Z_WAITOK | Z_ZERO);
1571
1572 swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1573 Z_WAITOK | Z_ZERO);
1574
1575 /*
1576 * passing a NULL trim_list into vnode_trim_list
1577 * will return ENOTSUP if trim isn't supported
1578 * and 0 if it is
1579 */
1580 if (vnode_trim_list(vp: swf->swp_vp, NULL, FALSE) == 0) {
1581 swp_trim_supported = TRUE;
1582 }
1583
1584 lck_mtx_lock(lck: &vm_swap_data_lock);
1585
1586 swf->swp_flags = SWAP_READY;
1587
1588 if (swap_file_reuse == FALSE) {
1589 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1590 }
1591
1592 vm_num_swap_files++;
1593
1594 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1595 if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1596 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1597 }
1598
1599 if (swap_file_pin == TRUE) {
1600 vm_num_pinned_swap_files++;
1601 swf->swp_flags |= SWAP_PINNED;
1602 vm_swappin_avail -= swf->swp_size;
1603 }
1604
1605 lck_mtx_unlock(lck: &vm_swap_data_lock);
1606
1607 thread_wakeup((event_t) &vm_num_swap_files);
1608#if !XNU_TARGET_OS_OSX
1609 if (vm_num_swap_files == 1) {
1610 c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1611
1612 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1613 c_overage_swapped_limit /= 2;
1614 }
1615 }
1616#endif /* !XNU_TARGET_OS_OSX */
1617 break;
1618 } else {
1619 size = size / 2;
1620 }
1621 }
1622 if (swap_file_created == FALSE) {
1623 vm_swapfile_close(path: (uint64_t)(swf->swp_path), vp: swf->swp_vp);
1624
1625 swf->swp_vp = NULL;
1626
1627 if (swap_file_reuse == FALSE) {
1628 kfree_data(swf->swp_path, swf->swp_pathlen);
1629 kfree_type(struct swapfile, swf);
1630 }
1631 }
1632 return swap_file_created;
1633}
1634
1635extern void vnode_put(struct vnode* vp);
1636kern_return_t
1637vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1638{
1639 struct swapfile *swf = NULL;
1640 uint64_t file_offset = 0;
1641 int retval = 0;
1642
1643 assert(c_seg->c_store.c_buffer);
1644
1645 lck_mtx_lock(lck: &vm_swap_data_lock);
1646
1647 swf = vm_swapfile_for_handle(f_offset);
1648
1649 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1650 vm_swap_get_failures++;
1651 retval = 1;
1652 goto done;
1653 }
1654 swf->swp_io_count++;
1655
1656 lck_mtx_unlock(lck: &vm_swap_data_lock);
1657
1658#if DEVELOPMENT || DEBUG
1659 C_SEG_MAKE_WRITEABLE(c_seg);
1660#endif
1661 file_offset = (f_offset & SWAP_SLOT_MASK);
1662
1663 if ((retval = vnode_getwithref(vp: swf->swp_vp)) != 0) {
1664 printf(format: "vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1665 } else {
1666 retval = vm_swapfile_io(vp: swf->swp_vp, offset: file_offset, start: (uint64_t)c_seg->c_store.c_buffer, npages: (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1667 vnode_put(vp: swf->swp_vp);
1668 }
1669
1670#if DEVELOPMENT || DEBUG
1671 C_SEG_WRITE_PROTECT(c_seg);
1672#endif
1673 if (retval == 0) {
1674 counter_add(&vm_statistics_swapins, amount: size >> PAGE_SHIFT);
1675 } else {
1676 vm_swap_get_failures++;
1677 }
1678
1679 /*
1680 * Free this slot in the swap structure.
1681 */
1682 vm_swap_free(f_offset);
1683
1684 lck_mtx_lock(lck: &vm_swap_data_lock);
1685 swf->swp_io_count--;
1686
1687 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1688 swf->swp_flags &= ~SWAP_WANTED;
1689 thread_wakeup((event_t) &swf->swp_flags);
1690 }
1691done:
1692 lck_mtx_unlock(lck: &vm_swap_data_lock);
1693
1694 if (retval == 0) {
1695 return KERN_SUCCESS;
1696 } else {
1697 return KERN_FAILURE;
1698 }
1699}
1700
1701kern_return_t
1702vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1703{
1704 unsigned int segidx = 0;
1705 struct swapfile *swf = NULL;
1706 uint64_t file_offset = 0;
1707 uint64_t swapfile_index = 0;
1708 unsigned int byte_for_segidx = 0;
1709 unsigned int offset_within_byte = 0;
1710 boolean_t swf_eligible = FALSE;
1711 boolean_t waiting = FALSE;
1712 boolean_t retried = FALSE;
1713 int error = 0;
1714 clock_sec_t sec;
1715 clock_nsec_t nsec;
1716 void *upl_ctx = NULL;
1717 boolean_t drop_iocount = FALSE;
1718
1719 if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1720 return KERN_FAILURE;
1721 }
1722retry:
1723 lck_mtx_lock(lck: &vm_swap_data_lock);
1724
1725 swf = (struct swapfile*) queue_first(&swf_global_queue);
1726
1727 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1728 segidx = swf->swp_free_hint;
1729
1730 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1731
1732 if (swf_eligible) {
1733 while (segidx < swf->swp_nsegs) {
1734 byte_for_segidx = segidx >> 3;
1735 offset_within_byte = segidx % 8;
1736
1737 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1738 segidx++;
1739 continue;
1740 }
1741
1742 (swf->swp_bitmap)[byte_for_segidx] |= (uint8_t)(1 << offset_within_byte);
1743
1744 file_offset = segidx * compressed_swap_chunk_size;
1745 swf->swp_nseginuse++;
1746 swf->swp_io_count++;
1747 swf->swp_csegs[segidx] = c_seg;
1748
1749 swapfile_index = swf->swp_index;
1750 vm_swapfile_total_segs_used++;
1751 if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1752 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1753 }
1754
1755 clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
1756
1757 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1758 thread_wakeup((event_t) &vm_swapfile_create_needed);
1759 }
1760
1761 lck_mtx_unlock(lck: &vm_swap_data_lock);
1762
1763 goto issue_io;
1764 }
1765 }
1766 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1767 }
1768 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1769
1770 /*
1771 * we've run out of swap segments, but may not
1772 * be in a position to immediately create a new swap
1773 * file if we've recently failed to create due to a lack
1774 * of free space in the root filesystem... we'll try
1775 * to kick that create off, but in any event we're going
1776 * to take a breather (up to 1 second) so that we're not caught in a tight
1777 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1778 * segments into swap files only to have them immediately put back
1779 * on the c_age queue due to vm_swap_put failing.
1780 *
1781 * if we're doing these puts due to a hibernation flush,
1782 * no need to block... setting hibernate_no_swapspace to TRUE,
1783 * will cause "vm_compressor_compact_and_swap" to immediately abort
1784 */
1785 clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
1786
1787 if (VM_SWAP_SHOULD_CREATE(sec)) {
1788 if (!vm_swapfile_create_thread_running) {
1789 thread_wakeup((event_t) &vm_swapfile_create_needed);
1790 }
1791 waiting = TRUE;
1792 assert_wait_timeout(event: (event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, interval: 1000, scale_factor: 1000 * NSEC_PER_USEC);
1793 } else {
1794 if (hibernate_flushing) {
1795 hibernate_no_swapspace = TRUE;
1796 }
1797 }
1798
1799 lck_mtx_unlock(lck: &vm_swap_data_lock);
1800
1801 if (waiting == TRUE) {
1802 thread_block(THREAD_CONTINUE_NULL);
1803
1804 if (retried == FALSE && hibernate_flushing == TRUE) {
1805 retried = TRUE;
1806 goto retry;
1807 }
1808 }
1809 vm_swap_put_failures_no_swap_file++;
1810
1811 return KERN_FAILURE;
1812
1813issue_io:
1814 assert(c_seg->c_busy_swapping);
1815 assert(c_seg->c_busy);
1816 assert(!c_seg->c_on_minorcompact_q);
1817
1818 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1819
1820 if (soc) {
1821 soc->swp_c_seg = c_seg;
1822 soc->swp_c_size = size;
1823
1824 soc->swp_swf = swf;
1825
1826 soc->swp_io_error = 0;
1827 soc->swp_io_done = 0;
1828
1829 upl_ctx = (void *)&soc->swp_upl_ctx;
1830 }
1831
1832 if ((error = vnode_getwithref(vp: swf->swp_vp)) != 0) {
1833 printf(format: "vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1834 } else {
1835 error = vm_swapfile_io(vp: swf->swp_vp, offset: file_offset, start: addr, npages: (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1836 drop_iocount = TRUE;
1837 }
1838
1839 if (error || upl_ctx == NULL) {
1840 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1841 }
1842
1843 return KERN_SUCCESS;
1844}
1845
1846kern_return_t
1847vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1848{
1849 if (drop_iocount) {
1850 vnode_put(vp: swf->swp_vp);
1851 }
1852
1853 lck_mtx_lock(lck: &vm_swap_data_lock);
1854
1855 swf->swp_io_count--;
1856
1857 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1858 swf->swp_flags &= ~SWAP_WANTED;
1859 thread_wakeup((event_t) &swf->swp_flags);
1860 }
1861 lck_mtx_unlock(lck: &vm_swap_data_lock);
1862
1863 if (error) {
1864 vm_swap_free(*f_offset);
1865 vm_swap_put_failures++;
1866
1867 return KERN_FAILURE;
1868 }
1869 return KERN_SUCCESS;
1870}
1871
1872
1873static void
1874vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1875{
1876 uint64_t file_offset = 0;
1877 unsigned int segidx = 0;
1878
1879
1880 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1881 unsigned int byte_for_segidx = 0;
1882 unsigned int offset_within_byte = 0;
1883
1884 file_offset = (f_offset & SWAP_SLOT_MASK);
1885 segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1886
1887 byte_for_segidx = segidx >> 3;
1888 offset_within_byte = segidx % 8;
1889
1890 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1891 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1892
1893 swf->swp_csegs[segidx] = NULL;
1894
1895 swf->swp_nseginuse--;
1896 vm_swapfile_total_segs_used--;
1897
1898 if (segidx < swf->swp_free_hint) {
1899 swf->swp_free_hint = segidx;
1900 }
1901 }
1902 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1903 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1904 }
1905 }
1906}
1907
1908
1909uint32_t vm_swap_free_now_count = 0;
1910uint32_t vm_swap_free_delayed_count = 0;
1911
1912
1913void
1914vm_swap_free(uint64_t f_offset)
1915{
1916 struct swapfile *swf = NULL;
1917 struct trim_list *tl = NULL;
1918 clock_sec_t sec;
1919 clock_nsec_t nsec;
1920
1921 if (swp_trim_supported == TRUE) {
1922 tl = kalloc_type(struct trim_list, Z_WAITOK);
1923 }
1924
1925 lck_mtx_lock(lck: &vm_swap_data_lock);
1926
1927 swf = vm_swapfile_for_handle(f_offset);
1928
1929 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1930 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1931 /*
1932 * don't delay the free if the underlying disk doesn't support
1933 * trim, or we're in the midst of reclaiming this swap file since
1934 * we don't want to move segments that are technically free
1935 * but not yet handled by the delayed free mechanism
1936 */
1937 vm_swap_free_now(swf, f_offset);
1938
1939 vm_swap_free_now_count++;
1940 goto done;
1941 }
1942 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1943 tl->tl_length = compressed_swap_chunk_size;
1944
1945 tl->tl_next = swf->swp_delayed_trim_list_head;
1946 swf->swp_delayed_trim_list_head = tl;
1947 swf->swp_delayed_trim_count++;
1948 tl = NULL;
1949
1950 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1951 clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
1952
1953 if (sec > dont_trim_until_ts) {
1954 thread_wakeup((event_t) &vm_swapfile_create_needed);
1955 }
1956 }
1957 vm_swap_free_delayed_count++;
1958 }
1959done:
1960 lck_mtx_unlock(lck: &vm_swap_data_lock);
1961
1962 if (tl != NULL) {
1963 kfree_type(struct trim_list, tl);
1964 }
1965}
1966
1967
1968static void
1969vm_swap_wait_on_trim_handling_in_progress()
1970{
1971 while (delayed_trim_handling_in_progress == TRUE) {
1972 assert_wait(event: (event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1973 lck_mtx_unlock(lck: &vm_swap_data_lock);
1974
1975 thread_block(THREAD_CONTINUE_NULL);
1976
1977 lck_mtx_lock(lck: &vm_swap_data_lock);
1978 }
1979}
1980
1981
1982static void
1983vm_swap_handle_delayed_trims(boolean_t force_now)
1984{
1985 struct swapfile *swf = NULL;
1986
1987 /*
1988 * serialize the race between us and vm_swap_reclaim...
1989 * if vm_swap_reclaim wins it will turn off SWAP_READY
1990 * on the victim it has chosen... we can just skip over
1991 * that file since vm_swap_reclaim will first process
1992 * all of the delayed trims associated with it
1993 */
1994
1995 if (compressor_store_stop_compaction == TRUE) {
1996 return;
1997 }
1998
1999 lck_mtx_lock(lck: &vm_swap_data_lock);
2000
2001 delayed_trim_handling_in_progress = TRUE;
2002
2003 lck_mtx_unlock(lck: &vm_swap_data_lock);
2004
2005 /*
2006 * no need to hold the lock to walk the swf list since
2007 * vm_swap_create (the only place where we add to this list)
2008 * is run on the same thread as this function
2009 * and vm_swap_reclaim doesn't remove items from this list
2010 * instead marking them with SWAP_REUSE for future re-use
2011 */
2012 swf = (struct swapfile*) queue_first(&swf_global_queue);
2013
2014 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2015 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
2016 assert(!(swf->swp_flags & SWAP_RECLAIM));
2017 vm_swap_do_delayed_trim(swf);
2018 }
2019 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2020 }
2021 lck_mtx_lock(lck: &vm_swap_data_lock);
2022
2023 delayed_trim_handling_in_progress = FALSE;
2024 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
2025
2026 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
2027 thread_wakeup((event_t) &vm_swapfile_gc_needed);
2028 }
2029
2030 lck_mtx_unlock(lck: &vm_swap_data_lock);
2031}
2032
2033static void
2034vm_swap_do_delayed_trim(struct swapfile *swf)
2035{
2036 struct trim_list *tl, *tl_head;
2037 int error;
2038
2039 if (compressor_store_stop_compaction == TRUE) {
2040 return;
2041 }
2042
2043 if ((error = vnode_getwithref(vp: swf->swp_vp)) != 0) {
2044 printf(format: "vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
2045 return;
2046 }
2047
2048 lck_mtx_lock(lck: &vm_swap_data_lock);
2049
2050 tl_head = swf->swp_delayed_trim_list_head;
2051 swf->swp_delayed_trim_list_head = NULL;
2052 swf->swp_delayed_trim_count = 0;
2053
2054 lck_mtx_unlock(lck: &vm_swap_data_lock);
2055
2056 vnode_trim_list(vp: swf->swp_vp, tl: tl_head, TRUE);
2057
2058 (void) vnode_put(vp: swf->swp_vp);
2059
2060 while ((tl = tl_head) != NULL) {
2061 unsigned int segidx = 0;
2062 unsigned int byte_for_segidx = 0;
2063 unsigned int offset_within_byte = 0;
2064
2065 lck_mtx_lock(lck: &vm_swap_data_lock);
2066
2067 segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
2068
2069 byte_for_segidx = segidx >> 3;
2070 offset_within_byte = segidx % 8;
2071
2072 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
2073 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2074
2075 swf->swp_csegs[segidx] = NULL;
2076
2077 swf->swp_nseginuse--;
2078 vm_swapfile_total_segs_used--;
2079
2080 if (segidx < swf->swp_free_hint) {
2081 swf->swp_free_hint = segidx;
2082 }
2083 }
2084 lck_mtx_unlock(lck: &vm_swap_data_lock);
2085
2086 tl_head = tl->tl_next;
2087
2088 kfree_type(struct trim_list, tl);
2089 }
2090}
2091
2092
2093void
2094vm_swap_flush()
2095{
2096 return;
2097}
2098
2099int vm_swap_reclaim_yielded = 0;
2100
2101void
2102vm_swap_reclaim(void)
2103{
2104 vm_offset_t addr = 0;
2105 unsigned int segidx = 0;
2106 uint64_t f_offset = 0;
2107 struct swapfile *swf = NULL;
2108 struct swapfile *smallest_swf = NULL;
2109 unsigned int min_nsegs = 0;
2110 unsigned int byte_for_segidx = 0;
2111 unsigned int offset_within_byte = 0;
2112 uint32_t c_size = 0;
2113
2114 c_segment_t c_seg = NULL;
2115
2116 kmem_alloc(map: compressor_map, addrp: (vm_offset_t *)&addr, size: c_seg_bufsize,
2117 flags: KMA_NOFAIL | KMA_KOBJECT | KMA_DATA, VM_KERN_MEMORY_COMPRESSOR);
2118
2119 lck_mtx_lock(lck: &vm_swap_data_lock);
2120
2121 /*
2122 * if we're running the swapfile list looking for
2123 * candidates with delayed trims, we need to
2124 * wait before making our decision concerning
2125 * the swapfile we want to reclaim
2126 */
2127 vm_swap_wait_on_trim_handling_in_progress();
2128
2129 /*
2130 * from here until we knock down the SWAP_READY bit,
2131 * we need to remain behind the vm_swap_data_lock...
2132 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
2133 * will not consider this swapfile for processing
2134 */
2135 swf = (struct swapfile*) queue_first(&swf_global_queue);
2136 min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
2137 smallest_swf = NULL;
2138
2139 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2140 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
2141 smallest_swf = swf;
2142 min_nsegs = swf->swp_nseginuse;
2143 }
2144 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2145 }
2146
2147 if (smallest_swf == NULL) {
2148 goto done;
2149 }
2150
2151 swf = smallest_swf;
2152
2153
2154 swf->swp_flags &= ~SWAP_READY;
2155 swf->swp_flags |= SWAP_RECLAIM;
2156
2157 if (swf->swp_delayed_trim_count) {
2158 lck_mtx_unlock(lck: &vm_swap_data_lock);
2159
2160 vm_swap_do_delayed_trim(swf);
2161
2162 lck_mtx_lock(lck: &vm_swap_data_lock);
2163 }
2164 segidx = 0;
2165
2166 while (segidx < swf->swp_nsegs) {
2167ReTry_for_cseg:
2168 /*
2169 * Wait for outgoing I/Os.
2170 */
2171 while (swf->swp_io_count) {
2172 swf->swp_flags |= SWAP_WANTED;
2173
2174 assert_wait(event: (event_t) &swf->swp_flags, THREAD_UNINT);
2175 lck_mtx_unlock(lck: &vm_swap_data_lock);
2176
2177 thread_block(THREAD_CONTINUE_NULL);
2178
2179 lck_mtx_lock(lck: &vm_swap_data_lock);
2180 }
2181 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2182 vm_swap_reclaim_yielded++;
2183 break;
2184 }
2185
2186 byte_for_segidx = segidx >> 3;
2187 offset_within_byte = segidx % 8;
2188
2189 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2190 segidx++;
2191 continue;
2192 }
2193
2194 c_seg = swf->swp_csegs[segidx];
2195 assert(c_seg);
2196
2197 lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
2198
2199 if (c_seg->c_busy) {
2200 /*
2201 * a swapped out c_segment in the process of being freed will remain in the
2202 * busy state until after the vm_swap_free is called on it... vm_swap_free
2203 * takes the vm_swap_data_lock, so can't change the swap state until after
2204 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2205 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2206 * at that point, we re-look up the swap state which will now indicate that
2207 * this c_segment no longer exists.
2208 */
2209 c_seg->c_wanted = 1;
2210
2211 assert_wait(event: (event_t) (c_seg), THREAD_UNINT);
2212 lck_mtx_unlock_always(&c_seg->c_lock);
2213
2214 lck_mtx_unlock(lck: &vm_swap_data_lock);
2215
2216 thread_block(THREAD_CONTINUE_NULL);
2217
2218 lck_mtx_lock(lck: &vm_swap_data_lock);
2219
2220 goto ReTry_for_cseg;
2221 }
2222 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2223
2224 f_offset = segidx * compressed_swap_chunk_size;
2225
2226 assert(c_seg == swf->swp_csegs[segidx]);
2227 swf->swp_csegs[segidx] = NULL;
2228 swf->swp_nseginuse--;
2229
2230 vm_swapfile_total_segs_used--;
2231
2232 lck_mtx_unlock(lck: &vm_swap_data_lock);
2233
2234 assert(C_SEG_IS_ONDISK(c_seg));
2235
2236 C_SEG_BUSY(c_seg);
2237 c_seg->c_busy_swapping = 1;
2238#if !CHECKSUM_THE_SWAP
2239 c_seg_trim_tail(c_seg);
2240#endif
2241 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2242
2243 assert(c_size <= c_seg_bufsize && c_size);
2244
2245 lck_mtx_unlock_always(&c_seg->c_lock);
2246
2247 if (vnode_getwithref(vp: swf->swp_vp)) {
2248 printf(format: "vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2249 vm_swap_get_failures++;
2250 goto swap_io_failed;
2251 } else {
2252 if (vm_swapfile_io(vp: swf->swp_vp, offset: f_offset, start: addr, npages: (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2253 /*
2254 * reading the data back in failed, so convert c_seg
2255 * to a swapped in c_segment that contains no data
2256 */
2257 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2258 /*
2259 * returns with c_busy_swapping cleared
2260 */
2261 vnode_put(vp: swf->swp_vp);
2262 vm_swap_get_failures++;
2263 goto swap_io_failed;
2264 }
2265 vnode_put(vp: swf->swp_vp);
2266 }
2267
2268 counter_add(&vm_statistics_swapins, amount: c_size >> PAGE_SHIFT);
2269 vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2270
2271 if (vm_swap_put(addr, f_offset: &f_offset, size: c_size, c_seg, NULL)) {
2272 vm_offset_t c_buffer;
2273
2274 /*
2275 * the put failed, so convert c_seg to a fully swapped in c_segment
2276 * with valid data
2277 */
2278 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2279
2280 kernel_memory_populate(addr: c_buffer, size: c_size,
2281 flags: KMA_NOFAIL | KMA_COMPRESSOR,
2282 VM_KERN_MEMORY_COMPRESSOR);
2283
2284 memcpy(dst: (char *)c_buffer, src: (char *)addr, n: c_size);
2285
2286 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2287#if ENCRYPTED_SWAP
2288 vm_swap_decrypt(c_seg);
2289#endif /* ENCRYPTED_SWAP */
2290 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2291 /*
2292 * returns with c_busy_swapping cleared
2293 */
2294 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2295
2296 goto swap_io_failed;
2297 }
2298 counter_add(&vm_statistics_swapouts, amount: c_size >> PAGE_SHIFT);
2299
2300 lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
2301
2302 c_seg->c_swappedin = false;
2303
2304 assert(C_SEG_IS_ONDISK(c_seg));
2305 /*
2306 * The c_seg will now know about the new location on disk.
2307 */
2308 c_seg->c_store.c_swap_handle = f_offset;
2309
2310 assert(c_seg->c_busy_swapping);
2311 c_seg->c_busy_swapping = 0;
2312swap_io_failed:
2313 assert(c_seg->c_busy);
2314 C_SEG_WAKEUP_DONE(c_seg);
2315
2316 lck_mtx_unlock_always(&c_seg->c_lock);
2317 lck_mtx_lock(lck: &vm_swap_data_lock);
2318 }
2319
2320 if (swf->swp_nseginuse) {
2321 swf->swp_flags &= ~SWAP_RECLAIM;
2322 swf->swp_flags |= SWAP_READY;
2323
2324 goto done;
2325 }
2326 /*
2327 * We don't remove this inactive swf from the queue.
2328 * That way, we can re-use it when needed again and
2329 * preserve the namespace. The delayed_trim processing
2330 * is also dependent on us not removing swfs from the queue.
2331 */
2332 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2333
2334 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2335
2336 lck_mtx_unlock(lck: &vm_swap_data_lock);
2337
2338 vm_swapfile_close(path: (uint64_t)(swf->swp_path), vp: swf->swp_vp);
2339
2340 kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2341 kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2342
2343 lck_mtx_lock(lck: &vm_swap_data_lock);
2344
2345 if (swf->swp_flags & SWAP_PINNED) {
2346 vm_num_pinned_swap_files--;
2347 vm_swappin_avail += swf->swp_size;
2348 }
2349
2350 swf->swp_vp = NULL;
2351 swf->swp_size = 0;
2352 swf->swp_free_hint = 0;
2353 swf->swp_nsegs = 0;
2354 swf->swp_flags = SWAP_REUSE;
2355
2356 vm_num_swap_files--;
2357
2358done:
2359 thread_wakeup((event_t) &swf->swp_flags);
2360 lck_mtx_unlock(lck: &vm_swap_data_lock);
2361
2362 kmem_free(map: compressor_map, addr: (vm_offset_t) addr, size: c_seg_bufsize);
2363}
2364
2365
2366uint64_t
2367vm_swap_get_total_space(void)
2368{
2369 uint64_t total_space = 0;
2370
2371 total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2372
2373 return total_space;
2374}
2375
2376uint64_t
2377vm_swap_get_used_space(void)
2378{
2379 uint64_t used_space = 0;
2380
2381 used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2382
2383 return used_space;
2384}
2385
2386uint64_t
2387vm_swap_get_free_space(void)
2388{
2389 return vm_swap_get_total_space() - vm_swap_get_used_space();
2390}
2391
2392uint64_t
2393vm_swap_get_max_configured_space(void)
2394{
2395 int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2396 return num_swap_files * MAX_SWAP_FILE_SIZE;
2397}
2398
2399int
2400vm_swap_low_on_space(void)
2401{
2402 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2403 return 0;
2404 }
2405
2406 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)vm_swapfile_hiwater_segs) / 8)) {
2407 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2408 return 0;
2409 }
2410
2411 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2412 return 1;
2413 }
2414 }
2415 return 0;
2416}
2417
2418int
2419vm_swap_out_of_space(void)
2420{
2421 if ((vm_num_swap_files == vm_num_swap_files_config) &&
2422 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2423 /*
2424 * Last swapfile and we have only space for the
2425 * last few swapouts.
2426 */
2427 return 1;
2428 }
2429
2430 return 0;
2431}
2432
2433boolean_t
2434vm_swap_files_pinned(void)
2435{
2436 boolean_t result;
2437
2438 if (vm_swappin_enabled == FALSE) {
2439 return TRUE;
2440 }
2441
2442 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2443
2444 return result;
2445}
2446
2447#if CONFIG_FREEZE
2448boolean_t
2449vm_swap_max_budget(uint64_t *freeze_daily_budget)
2450{
2451 boolean_t use_device_value = FALSE;
2452 struct swapfile *swf = NULL;
2453
2454 if (vm_num_swap_files) {
2455 lck_mtx_lock(&vm_swap_data_lock);
2456
2457 swf = (struct swapfile*) queue_first(&swf_global_queue);
2458
2459 if (swf) {
2460 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2461 if (swf->swp_flags == SWAP_READY) {
2462 assert(swf->swp_vp);
2463
2464 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2465 use_device_value = TRUE;
2466 }
2467 break;
2468 }
2469 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2470 }
2471 }
2472
2473 lck_mtx_unlock(&vm_swap_data_lock);
2474 } else {
2475 /*
2476 * This block is used for the initial budget value before any swap files
2477 * are created. We create a temp swap file to get the budget.
2478 */
2479
2480 struct vnode *temp_vp = NULL;
2481
2482 vm_swapfile_open(swapfilename, &temp_vp);
2483
2484 if (temp_vp) {
2485 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2486 use_device_value = TRUE;
2487 }
2488
2489 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2490 temp_vp = NULL;
2491 } else {
2492 *freeze_daily_budget = 0;
2493 }
2494 }
2495
2496 return use_device_value;
2497}
2498#endif /* CONFIG_FREEZE */
2499
2500void
2501vm_swap_reset_max_segs_tracking(uint64_t *alloced_max, uint64_t *used_max)
2502{
2503 lck_mtx_lock(lck: &vm_swap_data_lock);
2504
2505 *alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max * compressed_swap_chunk_size;
2506 *used_max = (uint64_t) vm_swapfile_total_segs_used_max * compressed_swap_chunk_size;
2507
2508 vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2509 vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2510
2511 lck_mtx_unlock(lck: &vm_swap_data_lock);
2512}
2513