vm_compressor_backing_store.c source code [xnu/osfmk/vm/vm_compressor_backing_store.c]

1	/*
2	* Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include "vm_compressor_backing_store.h"
30	#include <vm/vm_pageout.h>
31	#include <vm/vm_protos.h>
32
33	#include <IOKit/IOHibernatePrivate.h>
34
35	#include <kern/policy_internal.h>
36
37	LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
38	LCK_MTX_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
39
40	#if defined(XNU_TARGET_OS_OSX)
41	/*
42	* launchd explicitly turns ON swap later during boot on macOS devices.
43	*/
44	boolean_t compressor_store_stop_compaction = TRUE;
45	#else
46	boolean_t compressor_store_stop_compaction = FALSE;
47	#endif
48
49	boolean_t vm_swapfile_create_needed = FALSE;
50	boolean_t vm_swapfile_gc_needed = FALSE;
51
52	int vm_swapper_throttle = -`1`;
53	uint64_t vm_swapout_thread_id;
54
55	uint64_t vm_swap_put_failures = `0`; / Likely failed I/O. Data is still in memory. /
56	uint64_t vm_swap_get_failures = `0`; / Fatal /
57	uint64_t vm_swap_put_failures_no_swap_file = `0`; / Possibly not fatal because we might just need a new swapfile. /
58	int vm_num_swap_files_config = `0`;
59	int vm_num_swap_files = `0`;
60	int vm_num_pinned_swap_files = `0`;
61	uint64_t vm_swap_volume_capacity = `0`;
62	int vm_swapout_thread_processed_segments = `0`;
63	int vm_swapout_thread_awakened = `0`;
64	bool vm_swapout_thread_running = FALSE;
65	_Atomic bool vm_swapout_wake_pending = false;
66	int vm_swapfile_create_thread_awakened = `0`;
67	int vm_swapfile_create_thread_running = `0`;
68	int vm_swapfile_gc_thread_awakened = `0`;
69	int vm_swapfile_gc_thread_running = `0`;
70
71	int64_t vm_swappin_avail = `0`;
72	boolean_t vm_swappin_enabled = FALSE;
73	unsigned int vm_swapfile_total_segs_alloced = `0`;
74	unsigned int vm_swapfile_total_segs_alloced_max = `0`;
75	unsigned int vm_swapfile_total_segs_used = `0`;
76	unsigned int vm_swapfile_total_segs_used_max = `0`;
77
78	char swapfilename[MAX_SWAPFILENAME_LEN + `1`] = SWAP_FILE_NAME;
79
80	extern vm_map_t compressor_map;
81	extern uint32_t c_seg_bufsize, c_seg_allocsize, c_seg_off_limit;
82
83	#define SWAP_READY 0x1 /* Swap file is ready to be used */
84	#define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
85	#define SWAP_WANTED 0x4 /* Swap file has waiters */
86	#define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
87	#define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
88
89
90	struct swapfile {
91	queue_head_t swp_queue; / list of swap files /
92	char swp_path; /* saved pathname of swap file /
93	struct vnode swp_vp; /* backing vnode /
94	uint64_t swp_size; / size of this swap file /
95	uint8_t swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file /
96	unsigned int swp_pathlen; / length of pathname /
97	unsigned int swp_nsegs; / #segments we can use /
98	unsigned int swp_nseginuse; / #segments in use /
99	unsigned int swp_index; / index of this swap file /
100	unsigned int swp_flags; / state of swap file /
101	unsigned int swp_free_hint; / offset of 1st free chunk /
102	unsigned int swp_io_count; / count of outstanding I/Os /
103	c_segment_t swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. /
104
105	struct trim_list *swp_delayed_trim_list_head;
106	unsigned int swp_delayed_trim_count;
107	};
108
109	queue_head_t swf_global_queue;
110	boolean_t swp_trim_supported = FALSE;
111
112	extern clock_sec_t dont_trim_until_ts;
113	clock_sec_t vm_swapfile_last_failed_to_create_ts = `0`;
114	clock_sec_t vm_swapfile_last_successful_create_ts = `0`;
115	int vm_swapfile_can_be_created = FALSE;
116	boolean_t delayed_trim_handling_in_progress = FALSE;
117
118	boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
119
120	static void vm_swapout_thread_throttle_adjust(void);
121	static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
122	void vm_swapout_thread(void);
123	static void vm_swapfile_create_thread(void);
124	static void vm_swapfile_gc_thread(void);
125	static void vm_swap_defragment(void);
126	static void vm_swap_handle_delayed_trims(boolean_t);
127	static void vm_swap_do_delayed_trim(struct swapfile *);
128	static void vm_swap_wait_on_trim_handling_in_progress(void);
129	static void vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr);
130
131	extern int vnode_getwithref(struct vnode* vp);
132
133	boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
134
135	#if !XNU_TARGET_OS_OSX
136
137	/*
138	* For CONFIG_FREEZE, we scale the c_segments_limit based on the
139	* number of swapfiles allowed. That increases wired memory overhead.
140	* So we want to keep the max swapfiles same on both DEV/RELEASE so
141	* that the memory overhead is similar for performance comparisons.
142	*/
143	#define VM_MAX_SWAP_FILE_NUM 5
144	#if defined(__arm64__) && defined(ARM_LARGE_MEMORY)
145	#define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (64ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
146	#define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (16ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
147	#else /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
148	/*
149	* We reserve compressor pool VA at boot for the max # of swap files. If someone
150	* has enabled app swap but we're not an arm large memory device we can't hog
151	* all of the VA so we only go up to 4GB.
152	*/
153	#define VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
154	#define VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM (4ULL * (1ULL << 30) / MAX_SWAP_FILE_SIZE)
155	#endif /* defined(__arm64__) && defined(ARM_LARGE_MEMORY) */
156	#define VM_SWAP_MIN_VOLUME_CAPACITY (128ULL * (1ULL << 30))
157
158	#define VM_SWAPFILE_DELAYED_TRIM_MAX 4
159
160	#define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) \|\| (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
161	#define VM_SWAP_SHOULD_PIN(_size) FALSE
162	#define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
163	((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
164	#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
165
166	#else /* !XNU_TARGET_OS_OSX */
167
168	#define VM_MAX_SWAP_FILE_NUM 100
169	#define VM_SWAPFILE_DELAYED_TRIM_MAX 128
170
171	#define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) \|\| (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
172	#define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
173	#define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)vm_swapfile_hiwater_segs) && \
174	((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
175	#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
176
177	#endif /* !XNU_TARGET_OS_OSX */
178
179	#define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) \|\| ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= swapfile_reclaim_threshold_segs)) ? 1 : 0)
180	#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= swapfile_reclam_minimum_segs)) ? 1 : 0)
181	#define VM_SWAPFILE_DELAYED_CREATE 15
182
183	#define VM_SWAP_BUSY() (((c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count) && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
184
185
186	#if CHECKSUM_THE_SWAP
187	extern unsigned int hash_string(char cp, int* len);
188	#endif
189
190	#if RECORD_THE_COMPRESSED_DATA
191	boolean_t c_compressed_record_init_done = FALSE;
192	int c_compressed_record_write_error = `0`;
193	struct vnode *c_compressed_record_vp = NULL;
194	uint64_t c_compressed_record_file_offset = `0`;
195	void c_compressed_record_init(void);
196	void c_compressed_record_write(char , int*);
197	#endif
198
199	extern void vm_pageout_io_throttle(void);
200
201	static struct swapfile *vm_swapfile_for_handle(uint64_t);
202
203	/*
204	* Called with the vm_swap_data_lock held.
205	*/
206
207	static struct swapfile *
208	vm_swapfile_for_handle(uint64_t f_offset)
209	{
210	uint64_t file_offset = `0`;
211	unsigned int swapfile_index = `0`;
212	struct swapfile* swf = NULL;
213
214	file_offset = (f_offset & SWAP_SLOT_MASK);
215	swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
216
217	swf = (struct swapfile*) queue_first(&swf_global_queue);
218
219	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
220	if (swapfile_index == swf->swp_index) {
221	break;
222	}
223
224	swf = (struct swapfile*) queue_next(&swf->swp_queue);
225	}
226
227	if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
228	swf = NULL;
229	}
230
231	return swf;
232	}
233
234	#if ENCRYPTED_SWAP
235
236	#include <libkern/crypto/aesxts.h>
237
238	extern int cc_rand_generate(void , size_t); /* from libkern/cyrpto/rand.h> /
239
240	boolean_t swap_crypt_initialized;
241	void swap_crypt_initialize(void);
242
243	symmetric_xts xts_modectx;
244	uint32_t swap_crypt_key1[`8`]; / big enough for a 256 bit random key /
245	uint32_t swap_crypt_key2[`8`]; / big enough for a 256 bit random key /
246
247	#if DEVELOPMENT \|\| DEBUG
248	boolean_t swap_crypt_xts_tested = FALSE;
249	unsigned char swap_crypt_test_page_ref[`4096`] __attribute__((aligned(`4096`)));
250	unsigned char swap_crypt_test_page_encrypt[`4096`] __attribute__((aligned(`4096`)));
251	unsigned char swap_crypt_test_page_decrypt[`4096`] __attribute__((aligned(`4096`)));
252	#endif /* DEVELOPMENT \|\| DEBUG */
253
254	unsigned long vm_page_encrypt_counter;
255	unsigned long vm_page_decrypt_counter;
256
257
258	void
259	swap_crypt_initialize(void)
260	{
261	uint8_t enckey1, enckey2;
262	int keylen1, keylen2;
263	int error;
264
265	assert(swap_crypt_initialized == FALSE);
266
267	keylen1 = sizeof(swap_crypt_key1);
268	enckey1 = (uint8_t *)&swap_crypt_key1;
269	keylen2 = sizeof(swap_crypt_key2);
270	enckey2 = (uint8_t *)&swap_crypt_key2;
271
272	error = cc_rand_generate((void *)enckey1, keylen1);
273	assert(!error);
274
275	error = cc_rand_generate((void *)enckey2, keylen2);
276	assert(!error);
277
278	error = xts_start(cipher: `0`, NULL, key1: enckey1, keylen: keylen1, key2: enckey2, tweaklen: keylen2, num_rounds: `0`, options: `0`, xts: &xts_modectx);
279	assert(!error);
280
281	swap_crypt_initialized = TRUE;
282
283	#if DEVELOPMENT \|\| DEBUG
284	uint8_t *encptr;
285	uint8_t *decptr;
286	uint8_t *refptr;
287	uint8_t *iv;
288	uint64_t ivnum[`2`];
289	int size = `0`;
290	int i = `0`;
291	int rc = `0`;
292
293	assert(swap_crypt_xts_tested == FALSE);
294
295	/*
296	* Validate the encryption algorithms.
297	*
298	* First initialize the test data.
299	*/
300	for (i = `0`; i < `4096`; i++) {
301	swap_crypt_test_page_ref[i] = (char) i;
302	}
303	ivnum[`0`] = (uint64_t)`0xaa`;
304	ivnum[`1`] = `0`;
305	iv = (uint8_t *)ivnum;
306
307	refptr = (uint8_t *)swap_crypt_test_page_ref;
308	encptr = (uint8_t *)swap_crypt_test_page_encrypt;
309	decptr = (uint8_t *)swap_crypt_test_page_decrypt;
310	size = `4096`;
311
312	/ encrypt /
313	rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
314	assert(!rc);
315
316	/ compare result with original - should NOT match /
317	for (i = `0`; i < `4096`; i++) {
318	if (swap_crypt_test_page_encrypt[i] !=
319	swap_crypt_test_page_ref[i]) {
320	break;
321	}
322	}
323	assert(i != `4096`);
324
325	/ decrypt /
326	rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
327	assert(!rc);
328
329	/ compare result with original /
330	for (i = `0`; i < `4096`; i++) {
331	if (swap_crypt_test_page_decrypt[i] !=
332	swap_crypt_test_page_ref[i]) {
333	panic("encryption test failed");
334	}
335	}
336	/ encrypt in place /
337	rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
338	assert(!rc);
339
340	/ decrypt in place /
341	rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
342	assert(!rc);
343
344	for (i = `0`; i < `4096`; i++) {
345	if (swap_crypt_test_page_decrypt[i] !=
346	swap_crypt_test_page_ref[i]) {
347	panic("in place encryption test failed");
348	}
349	}
350	swap_crypt_xts_tested = TRUE;
351	#endif /* DEVELOPMENT \|\| DEBUG */
352	}
353
354
355	void
356	vm_swap_encrypt(c_segment_t c_seg)
357	{
358	uint8_t *ptr;
359	uint8_t *iv;
360	uint64_t ivnum[`2`];
361	int size = `0`;
362	int rc = `0`;
363
364	if (swap_crypt_initialized == FALSE) {
365	swap_crypt_initialize();
366	}
367
368	#if DEVELOPMENT \|\| DEBUG
369	C_SEG_MAKE_WRITEABLE(c_seg);
370	#endif
371	ptr = (uint8_t *)c_seg->c_store.c_buffer;
372	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
373
374	ivnum[`0`] = (uint64_t)c_seg;
375	ivnum[`1`] = `0`;
376	iv = (uint8_t *)ivnum;
377
378	rc = xts_encrypt(pt: ptr, ptlen: size, ct: ptr, tweak: iv, xts: &xts_modectx);
379	assert(!rc);
380
381	vm_page_encrypt_counter += (size / PAGE_SIZE_64);
382
383	#if DEVELOPMENT \|\| DEBUG
384	C_SEG_WRITE_PROTECT(c_seg);
385	#endif
386	}
387
388	void
389	vm_swap_decrypt(c_segment_t c_seg)
390	{
391	uint8_t *ptr;
392	uint8_t *iv;
393	uint64_t ivnum[`2`];
394	int size = `0`;
395	int rc = `0`;
396
397	assert(swap_crypt_initialized);
398
399	#if DEVELOPMENT \|\| DEBUG
400	C_SEG_MAKE_WRITEABLE(c_seg);
401	#endif
402	ptr = (uint8_t *)c_seg->c_store.c_buffer;
403	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
404
405	ivnum[`0`] = (uint64_t)c_seg;
406	ivnum[`1`] = `0`;
407	iv = (uint8_t *)ivnum;
408
409	rc = xts_decrypt(ct: ptr, ptlen: size, pt: ptr, tweak: iv, xts: &xts_modectx);
410	assert(!rc);
411
412	vm_page_decrypt_counter += (size / PAGE_SIZE_64);
413
414	#if DEVELOPMENT \|\| DEBUG
415	C_SEG_WRITE_PROTECT(c_seg);
416	#endif
417	}
418	#endif /* ENCRYPTED_SWAP */
419
420	uint64_t compressed_swap_chunk_size, vm_swapfile_hiwater_segs, swapfile_reclaim_threshold_segs, swapfile_reclam_minimum_segs;
421	extern bool memorystatus_swap_all_apps;
422
423	void
424	vm_compressor_swap_init_swap_file_limit(void)
425	{
426	vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
427	#if CONFIG_JETSAM
428	if (memorystatus_swap_all_apps) {
429	if (vm_swap_volume_capacity == `0`) {
430	/*
431	* Early in boot we don't know the swap volume capacity.
432	* That's fine. Reserve space for the maximum config
433	* and we'll lower this later in boot once we have the capacity.
434	*/
435	vm_num_swap_files_config = VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM;
436	} else {
437	static uint64_t kFixedPointFactor = `100`;
438	/*
439	* Scale the max number of swap files linearly.
440	* But we can never go above VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM.
441	*/
442	vm_num_swap_files_config = vm_swap_volume_capacity * kFixedPointFactor / VM_SWAP_MIN_VOLUME_CAPACITY
443	* VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM / kFixedPointFactor;
444	vm_num_swap_files_config = MAX(vm_num_swap_files_config, VM_MIN_SWAP_FILE_SWAP_ENABLED_NUM);
445	vm_num_swap_files_config = MIN(vm_num_swap_files_config, VM_MAX_SWAP_FILE_SWAP_ENABLED_NUM);
446	}
447	}
448	#endif /* CONFIG_JETSAM */
449	#if DEVELOPMENT \|\| DEBUG
450	typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = `0`;
451	if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
452	if (parsed_vm_max_num_swap_files > `0`) {
453	vm_num_swap_files_config = parsed_vm_max_num_swap_files;
454	} else {
455	printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
456	}
457	}
458	#endif
459	printf(format: "Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
460	}
461
462	int vm_swap_enabled = `0`;
463	void
464	vm_compressor_swap_init(void)
465	{
466	thread_t thread = NULL;
467
468	queue_init(&swf_global_queue);
469
470	#if !XNU_TARGET_OS_OSX
471	/*
472	* dummy value until the swap file gets created
473	* when we drive the first c_segment_t to the
474	* swapout queue... at that time we will
475	* know the true size we have to work with
476	*/
477	c_overage_swapped_limit = `16`;
478	#endif /* !XNU_TARGET_OS_OSX */
479
480	compressed_swap_chunk_size = c_seg_bufsize;
481	vm_swapfile_hiwater_segs = (MIN_SWAP_FILE_SIZE / compressed_swap_chunk_size);
482	swapfile_reclaim_threshold_segs = ((`17` * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / `10`);
483	swapfile_reclam_minimum_segs = ((`13` * (MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size)) / `10`);
484
485	if (kernel_thread_start_priority(continuation: (thread_continue_t)vm_swapout_thread, NULL,
486	BASEPRI_VM, new_thread: &thread) != KERN_SUCCESS) {
487	panic("vm_swapout_thread: create failed");
488	}
489	thread_set_thread_name(th: thread, name: "VM_swapout");
490	vm_swapout_thread_id = thread->thread_id;
491	thread_deallocate(thread);
492
493	if (kernel_thread_start_priority(continuation: (thread_continue_t)vm_swapfile_create_thread, NULL,
494	BASEPRI_VM, new_thread: &thread) != KERN_SUCCESS) {
495	panic("vm_swapfile_create_thread: create failed");
496	}
497	thread_set_thread_name(th: thread, name: "VM_swapfile_create");
498	thread_deallocate(thread);
499
500	if (kernel_thread_start_priority(continuation: (thread_continue_t)vm_swapfile_gc_thread, NULL,
501	BASEPRI_VM, new_thread: &thread) != KERN_SUCCESS) {
502	panic("vm_swapfile_gc_thread: create failed");
503	}
504	thread_set_thread_name(th: thread, name: "VM_swapfile_gc");
505	/*
506	* Swapfile garbage collection will need to allocate memory
507	* to complete its swap reclaim and in-memory compaction.
508	* So allow it to dip into the reserved VM page pool.
509	*/
510	thread_lock(thread);
511	thread->options \|= TH_OPT_VMPRIV;
512	thread_unlock(thread);
513	thread_deallocate(thread);
514	proc_set_thread_policy_with_tid(task: kernel_task, tid: thread->thread_id,
515	TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
516	proc_set_thread_policy_with_tid(task: kernel_task, tid: thread->thread_id,
517	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
518
519	vm_swap_enabled = `1`;
520	printf(format: "VM Swap Subsystem is ON\n");
521	}
522
523
524	#if RECORD_THE_COMPRESSED_DATA
525
526	void
527	c_compressed_record_init()
528	{
529	if (c_compressed_record_init_done == FALSE) {
530	vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
531	c_compressed_record_init_done = TRUE;
532	}
533	}
534
535	void
536	c_compressed_record_write(char buf, int* size)
537	{
538	if (c_compressed_record_write_error == `0`) {
539	c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
540	c_compressed_record_file_offset += size;
541	}
542	}
543	#endif
544
545
546	int compaction_swapper_inited = `0`;
547
548	void
549	vm_compaction_swapper_do_init(void)
550	{
551	struct vnode *vp;
552	char *pathname;
553	int namelen;
554
555	if (compaction_swapper_inited) {
556	return;
557	}
558
559	if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
560	compaction_swapper_inited = `1`;
561	return;
562	}
563	lck_mtx_lock(lck: &vm_swap_data_lock);
564
565	if (!compaction_swapper_inited) {
566	namelen = (int)strlen(s: swapfilename) + SWAPFILENAME_INDEX_LEN + `1`;
567	pathname = kalloc_data(namelen, Z_WAITOK \| Z_ZERO);
568	snprintf(pathname, namelen, "%s%d", swapfilename, `0`);
569
570	vm_swapfile_open(path: pathname, vp: &vp);
571
572	if (vp) {
573	if (vnode_pager_isSSD(vp) == FALSE) {
574	/*
575	* swap files live on an HDD, so let's make sure to start swapping
576	* much earlier since we're not worried about SSD write-wear and
577	* we have so little write bandwidth to work with
578	* these values were derived expermentially by running the performance
579	* teams stock test for evaluating HDD performance against various
580	* combinations and looking and comparing overall results.
581	* Note that the > relationship between these 4 values must be maintained
582	*/
583	if (vm_compressor_minorcompact_threshold_divisor_overridden == `0`) {
584	vm_compressor_minorcompact_threshold_divisor = `15`;
585	}
586	if (vm_compressor_majorcompact_threshold_divisor_overridden == `0`) {
587	vm_compressor_majorcompact_threshold_divisor = `18`;
588	}
589	if (vm_compressor_unthrottle_threshold_divisor_overridden == `0`) {
590	vm_compressor_unthrottle_threshold_divisor = `24`;
591	}
592	if (vm_compressor_catchup_threshold_divisor_overridden == `0`) {
593	vm_compressor_catchup_threshold_divisor = `30`;
594	}
595	}
596	#if XNU_TARGET_OS_OSX
597	vnode_setswapmount(vp);
598	vm_swappin_avail = vnode_getswappin_avail(vp);
599
600	if (vm_swappin_avail) {
601	vm_swappin_enabled = TRUE;
602	}
603	#endif /* XNU_TARGET_OS_OSX */
604	vm_swapfile_close(path: (uint64_t)pathname, vp);
605	}
606	kfree_data(pathname, namelen);
607
608	compaction_swapper_inited = `1`;
609	}
610	lck_mtx_unlock(lck: &vm_swap_data_lock);
611	}
612
613
614	void
615	vm_swap_consider_defragmenting(int flags)
616	{
617	boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
618	boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
619
620	if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
621	(force_defrag \|\| force_reclaim \|\| VM_SWAP_SHOULD_DEFRAGMENT() \|\| VM_SWAP_SHOULD_RECLAIM())) {
622	if (!vm_swapfile_gc_thread_running \|\| force_defrag \|\| force_reclaim) {
623	lck_mtx_lock(lck: &vm_swap_data_lock);
624
625	if (force_defrag) {
626	vm_swap_force_defrag = TRUE;
627	}
628
629	if (force_reclaim) {
630	vm_swap_force_reclaim = TRUE;
631	}
632
633	if (!vm_swapfile_gc_thread_running) {
634	thread_wakeup((event_t) &vm_swapfile_gc_needed);
635	}
636
637	lck_mtx_unlock(lck: &vm_swap_data_lock);
638	}
639	}
640	}
641
642
643	int vm_swap_defragment_yielded = `0`;
644	int vm_swap_defragment_swapin = `0`;
645	int vm_swap_defragment_free = `0`;
646	int vm_swap_defragment_busy = `0`;
647
648	#if CONFIG_FREEZE
649	extern int32_t c_segment_pages_compressed_incore;
650	extern int32_t c_segment_pages_compressed_incore_late_swapout;
651	extern uint32_t c_segment_pages_compressed_nearing_limit;
652	extern uint32_t c_segment_count;
653	extern uint32_t c_segments_nearing_limit;
654
655	boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
656
657	extern bool freezer_incore_cseg_acct;
658	#endif /* CONFIG_FREEZE */
659
660	static void
661	vm_swap_defragment()
662	{
663	c_segment_t c_seg;
664
665	/*
666	* have to grab the master lock w/o holding
667	* any locks in spin mode
668	*/
669	PAGE_REPLACEMENT_DISALLOWED(TRUE);
670
671	lck_mtx_lock_spin_always(c_list_lock);
672
673	while (!queue_empty(&c_swappedout_sparse_list_head)) {
674	if (compressor_store_stop_compaction == TRUE \|\| VM_SWAP_BUSY()) {
675	vm_swap_defragment_yielded++;
676	break;
677	}
678	c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
679
680	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
681
682	assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
683
684	if (c_seg->c_busy) {
685	lck_mtx_unlock_always(c_list_lock);
686
687	PAGE_REPLACEMENT_DISALLOWED(FALSE);
688	/*
689	* c_seg_wait_on_busy consumes c_seg->c_lock
690	*/
691	c_seg_wait_on_busy(c_seg);
692
693	PAGE_REPLACEMENT_DISALLOWED(TRUE);
694
695	lck_mtx_lock_spin_always(c_list_lock);
696
697	vm_swap_defragment_busy++;
698	continue;
699	}
700	if (c_seg->c_bytes_used == `0`) {
701	/*
702	* c_seg_free_locked consumes the c_list_lock
703	* and c_seg->c_lock
704	*/
705	C_SEG_BUSY(c_seg);
706	c_seg_free_locked(c_seg);
707
708	vm_swap_defragment_free++;
709	} else {
710	lck_mtx_unlock_always(c_list_lock);
711
712	#if CONFIG_FREEZE
713	if (freezer_incore_cseg_acct) {
714	/*
715	* TODO(jason): These two are tricky because they're pre-emptive jetsams.
716	* The system is not unhealthy, but we know that it's about to become unhealthy once
717	* we do this swapin.
718	* So we're waking up the memorystatus thread to make space
719	* (hopefully) before this segment comes in.
720	*
721	* I think the compressor_backing_store needs to keep track of
722	* two new globals that will track the number of segments
723	* being swapped in due to defrag and the number of slots used
724	* in those segments.
725	* Then the health check below can be called from the memorystatus
726	* thread.
727	*/
728	if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
729	memorystatus_kill_on_VM_compressor_space_shortage(TRUE / async /);
730	}
731
732	uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
733	if ((incore_seg_count + `1`) >= c_segments_nearing_limit) {
734	memorystatus_kill_on_VM_compressor_space_shortage(TRUE / async /);
735	}
736	}
737	#endif /* CONFIG_FREEZE */
738	if (c_seg_swapin(c_seg, TRUE, FALSE) == `0`) {
739	lck_mtx_unlock_always(&c_seg->c_lock);
740	vmcs_stats.defrag_swapins += (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) >> PAGE_SHIFT;
741	}
742
743	vm_swap_defragment_swapin++;
744	}
745	PAGE_REPLACEMENT_DISALLOWED(FALSE);
746
747	vm_pageout_io_throttle();
748
749	/*
750	* because write waiters have privilege over readers,
751	* dropping and immediately retaking the master lock will
752	* still allow any thread waiting to acquire the
753	* master lock exclusively an opportunity to take it
754	*/
755	PAGE_REPLACEMENT_DISALLOWED(TRUE);
756
757	lck_mtx_lock_spin_always(c_list_lock);
758	}
759	lck_mtx_unlock_always(c_list_lock);
760
761	PAGE_REPLACEMENT_DISALLOWED(FALSE);
762	}
763
764
765	bool vm_swapfile_create_thread_inited = false;
766	static void
767	vm_swapfile_create_thread(void)
768	{
769	clock_sec_t sec;
770	clock_nsec_t nsec;
771
772	if (!vm_swapfile_create_thread_inited) {
773	#if CONFIG_THREAD_GROUPS
774	thread_group_vm_add();
775	#endif /* CONFIG_THREAD_GROUPS */
776	current_thread()->options \|= TH_OPT_VMPRIV;
777	vm_swapfile_create_thread_inited = true;
778	}
779
780	vm_swapfile_create_thread_awakened++;
781	vm_swapfile_create_thread_running = `1`;
782
783	while (TRUE) {
784	/*
785	* walk through the list of swap files
786	* and do the delayed frees/trims for
787	* any swap file whose count of delayed
788	* frees is above the batch limit
789	*/
790	vm_swap_handle_delayed_trims(FALSE);
791
792	lck_mtx_lock(lck: &vm_swap_data_lock);
793
794	if (hibernate_in_progress_with_pinned_swap == TRUE) {
795	break;
796	}
797
798	if (compressor_store_stop_compaction == TRUE) {
799	break;
800	}
801
802	clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
803
804	if (VM_SWAP_SHOULD_CREATE(sec) == `0`) {
805	break;
806	}
807
808	lck_mtx_unlock(lck: &vm_swap_data_lock);
809
810	if (vm_swap_create_file() == FALSE) {
811	vm_swapfile_last_failed_to_create_ts = sec;
812	HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
813	} else {
814	vm_swapfile_last_successful_create_ts = sec;
815	}
816	}
817	vm_swapfile_create_thread_running = `0`;
818
819	if (hibernate_in_progress_with_pinned_swap == TRUE) {
820	thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
821	}
822
823	if (compressor_store_stop_compaction == TRUE) {
824	thread_wakeup((event_t)&compressor_store_stop_compaction);
825	}
826
827	assert_wait(event: (event_t)&vm_swapfile_create_needed, THREAD_UNINT);
828
829	lck_mtx_unlock(lck: &vm_swap_data_lock);
830
831	thread_block(continuation: (thread_continue_t)vm_swapfile_create_thread);
832
833	/ NOTREACHED /
834	}
835
836
837	#if HIBERNATION
838
839	kern_return_t
840	hibernate_pin_swap(boolean_t start)
841	{
842	vm_compaction_swapper_do_init();
843
844	if (start == FALSE) {
845	lck_mtx_lock(&vm_swap_data_lock);
846	hibernate_in_progress_with_pinned_swap = FALSE;
847	lck_mtx_unlock(&vm_swap_data_lock);
848
849	return KERN_SUCCESS;
850	}
851	if (vm_swappin_enabled == FALSE) {
852	return KERN_SUCCESS;
853	}
854
855	lck_mtx_lock(&vm_swap_data_lock);
856
857	hibernate_in_progress_with_pinned_swap = TRUE;
858
859	while (vm_swapfile_create_thread_running \|\| vm_swapfile_gc_thread_running) {
860	assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
861
862	lck_mtx_unlock(&vm_swap_data_lock);
863
864	thread_block(THREAD_CONTINUE_NULL);
865
866	lck_mtx_lock(&vm_swap_data_lock);
867	}
868	if (vm_num_swap_files > vm_num_pinned_swap_files) {
869	hibernate_in_progress_with_pinned_swap = FALSE;
870	lck_mtx_unlock(&vm_swap_data_lock);
871
872	HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
873	vm_num_swap_files, vm_num_pinned_swap_files);
874	return KERN_FAILURE;
875	}
876	lck_mtx_unlock(&vm_swap_data_lock);
877
878	while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
879	if (vm_swap_create_file() == FALSE) {
880	break;
881	}
882	}
883	return KERN_SUCCESS;
884	}
885	#endif
886	bool vm_swapfile_gc_thread_inited = false;
887	static void
888	vm_swapfile_gc_thread(void)
889	{
890	boolean_t need_defragment;
891	boolean_t need_reclaim;
892
893	if (!vm_swapfile_gc_thread_inited) {
894	#if CONFIG_THREAD_GROUPS
895	thread_group_vm_add();
896	#endif /* CONFIG_THREAD_GROUPS */
897	vm_swapfile_gc_thread_inited = true;
898	}
899
900	vm_swapfile_gc_thread_awakened++;
901	vm_swapfile_gc_thread_running = `1`;
902
903	while (TRUE) {
904	lck_mtx_lock(lck: &vm_swap_data_lock);
905
906	if (hibernate_in_progress_with_pinned_swap == TRUE) {
907	break;
908	}
909
910	if (VM_SWAP_BUSY() \|\| compressor_store_stop_compaction == TRUE) {
911	break;
912	}
913
914	need_defragment = FALSE;
915	need_reclaim = FALSE;
916
917	if (VM_SWAP_SHOULD_DEFRAGMENT()) {
918	need_defragment = TRUE;
919	}
920
921	if (VM_SWAP_SHOULD_RECLAIM()) {
922	need_defragment = TRUE;
923	need_reclaim = TRUE;
924	}
925	if (need_defragment == FALSE && need_reclaim == FALSE) {
926	break;
927	}
928
929	vm_swap_force_defrag = FALSE;
930	vm_swap_force_reclaim = FALSE;
931
932	lck_mtx_unlock(lck: &vm_swap_data_lock);
933
934	if (need_defragment == TRUE) {
935	vm_swap_defragment();
936	}
937	if (need_reclaim == TRUE) {
938	vm_swap_reclaim();
939	}
940	}
941	vm_swapfile_gc_thread_running = `0`;
942
943	if (hibernate_in_progress_with_pinned_swap == TRUE) {
944	thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
945	}
946
947	if (compressor_store_stop_compaction == TRUE) {
948	thread_wakeup((event_t)&compressor_store_stop_compaction);
949	}
950
951	assert_wait(event: (event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
952
953	lck_mtx_unlock(lck: &vm_swap_data_lock);
954
955	thread_block(continuation: (thread_continue_t)vm_swapfile_gc_thread);
956
957	/ NOTREACHED /
958	}
959
960
961
962	#define VM_SWAPOUT_LIMIT_T2P 4
963	#define VM_SWAPOUT_LIMIT_T1P 4
964	#define VM_SWAPOUT_LIMIT_T0P 6
965	#define VM_SWAPOUT_LIMIT_T0 8
966	#define VM_SWAPOUT_LIMIT_MAX 8
967
968	#define VM_SWAPOUT_START 0
969	#define VM_SWAPOUT_T2_PASSIVE 1
970	#define VM_SWAPOUT_T1_PASSIVE 2
971	#define VM_SWAPOUT_T0_PASSIVE 3
972	#define VM_SWAPOUT_T0 4
973
974	int vm_swapout_state = VM_SWAPOUT_START;
975	int vm_swapout_limit = `1`;
976
977	int vm_swapper_entered_T0 = `0`;
978	int vm_swapper_entered_T0P = `0`;
979	int vm_swapper_entered_T1P = `0`;
980	int vm_swapper_entered_T2P = `0`;
981
982
983	static void
984	vm_swapout_thread_throttle_adjust(void)
985	{
986	switch (vm_swapout_state) {
987	case VM_SWAPOUT_START:
988
989	vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
990	vm_swapper_entered_T2P++;
991
992	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
993	TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
994	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
995	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
996	vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
997	vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
998
999	break;
1000
1001	case VM_SWAPOUT_T2_PASSIVE:
1002
1003	if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1004	vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1005	vm_swapper_entered_T0P++;
1006
1007	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1008	TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1009	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1010	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1011	vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1012	vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1013
1014	break;
1015	}
1016	if (swapout_target_age \|\| hibernate_flushing == TRUE) {
1017	vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
1018	vm_swapper_entered_T1P++;
1019
1020	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1021	TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1022	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1023	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1024	vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
1025	vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
1026	}
1027	break;
1028
1029	case VM_SWAPOUT_T1_PASSIVE:
1030
1031	if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
1032	vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
1033	vm_swapper_entered_T0P++;
1034
1035	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1036	TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1037	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1038	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1039	vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1040	vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1041
1042	break;
1043	}
1044	if (swapout_target_age == `0` && hibernate_flushing == FALSE) {
1045	vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1046	vm_swapper_entered_T2P++;
1047
1048	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1049	TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1050	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1051	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1052	vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1053	vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1054	}
1055	break;
1056
1057	case VM_SWAPOUT_T0_PASSIVE:
1058
1059	if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
1060	vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
1061	vm_swapper_entered_T2P++;
1062
1063	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1064	TASK_POLICY_INTERNAL, TASK_POLICY_IO, value: vm_swapper_throttle);
1065	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1066	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1067	vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
1068	vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
1069
1070	break;
1071	}
1072	if (SWAPPER_NEEDS_TO_CATCHUP()) {
1073	vm_swapper_entered_T0++;
1074
1075	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1076	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
1077	vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
1078	vm_swapout_state = VM_SWAPOUT_T0;
1079	}
1080	break;
1081
1082	case VM_SWAPOUT_T0:
1083
1084	if (SWAPPER_HAS_CAUGHTUP()) {
1085	vm_swapper_entered_T0P++;
1086
1087	proc_set_thread_policy_with_tid(task: kernel_task, tid: vm_swapout_thread_id,
1088	TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1089	vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1090	vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1091	}
1092	break;
1093	}
1094	}
1095
1096	int vm_swapout_found_empty = `0`;
1097
1098	struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1099
1100	int vm_swapout_soc_busy = `0`;
1101	int vm_swapout_soc_done = `0`;
1102
1103
1104	static struct swapout_io_completion *
1105	vm_swapout_find_free_soc(void)
1106	{
1107	int i;
1108
1109	for (i = `0`; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1110	if (vm_swapout_ctx[i].swp_io_busy == `0`) {
1111	return &vm_swapout_ctx[i];
1112	}
1113	}
1114	assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1115
1116	return NULL;
1117	}
1118
1119	static struct swapout_io_completion *
1120	vm_swapout_find_done_soc(void)
1121	{
1122	int i;
1123
1124	if (vm_swapout_soc_done) {
1125	for (i = `0`; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1126	if (vm_swapout_ctx[i].swp_io_done) {
1127	return &vm_swapout_ctx[i];
1128	}
1129	}
1130	}
1131	return NULL;
1132	}
1133
1134	static void
1135	vm_swapout_complete_soc(struct swapout_io_completion *soc)
1136	{
1137	kern_return_t kr;
1138
1139	if (soc->swp_io_error) {
1140	kr = KERN_FAILURE;
1141	} else {
1142	kr = KERN_SUCCESS;
1143	}
1144
1145	lck_mtx_unlock_always(c_list_lock);
1146
1147	vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /drop iocount/);
1148	vm_swapout_finish(c_seg: soc->swp_c_seg, f_offset: soc->swp_f_offset, size: soc->swp_c_size, kr);
1149
1150	lck_mtx_lock_spin_always(c_list_lock);
1151
1152	soc->swp_io_done = `0`;
1153	soc->swp_io_busy = `0`;
1154
1155	vm_swapout_soc_busy--;
1156	vm_swapout_soc_done--;
1157	}
1158
1159	bool vm_swapout_thread_inited = false;
1160	extern uint32_t c_donate_swapout_count;
1161	#if CONFIG_JETSAM
1162	bool memorystatus_swap_over_trigger(uint64_t adjustment_factor);
1163	/*
1164	* swapout_sleep_threshold sets the percentage of the swapout threshold at which
1165	* the swap thread will stop processing the swapout queue.
1166	* By default this is 90 which means we will swap until the
1167	* swapout queue size is at 90% of the threshold to wake the swap thread.
1168	* By definition the queue length must be >= 100% of the threshold when the.
1169	* swap thread is woken up. On development builds this can be adjusted with
1170	* the vm.swapout_sleep_threshold sysctl.
1171	*/
1172	uint32_t swapout_sleep_threshold = `90`;
1173	#endif /* CONFIG_JETSAM */
1174	static bool
1175	should_process_swapout_queue(const queue_head_t *swapout_list_head)
1176	{
1177	bool process_queue = !queue_empty(swapout_list_head) &&
1178	vm_swapout_soc_busy < vm_swapout_limit &&
1179	!compressor_store_stop_compaction;
1180	#if CONFIG_JETSAM
1181	if (memorystatus_swap_all_apps && swapout_list_head == &c_late_swapout_list_head) {
1182	process_queue = process_queue && memorystatus_swap_over_trigger(swapout_sleep_threshold);
1183	}
1184	#endif /* CONFIG_JETSAM */
1185	return process_queue;
1186	}
1187
1188	void
1189	vm_swapout_thread(void)
1190	{
1191	uint32_t size = `0`;
1192	c_segment_t c_seg = NULL;
1193	kern_return_t kr = KERN_SUCCESS;
1194	struct swapout_io_completion *soc;
1195	queue_head_t *swapout_list_head;
1196	bool queues_empty = false;
1197
1198	if (!vm_swapout_thread_inited) {
1199	#if CONFIG_THREAD_GROUPS
1200	thread_group_vm_add();
1201	#endif /* CONFIG_THREAD_GROUPS */
1202	current_thread()->options \|= TH_OPT_VMPRIV;
1203	vm_swapout_thread_inited = true;
1204	}
1205
1206	vm_swapout_thread_awakened++;
1207
1208	lck_mtx_lock_spin_always(c_list_lock);
1209
1210	swapout_list_head = &c_early_swapout_list_head;
1211	vm_swapout_thread_running = TRUE;
1212	os_atomic_store(&vm_swapout_wake_pending, false, relaxed);
1213	again:
1214	while (should_process_swapout_queue(swapout_list_head)) {
1215	c_seg = (c_segment_t)queue_first(swapout_list_head);
1216
1217	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1218
1219	assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1220
1221	if (c_seg->c_busy) {
1222	lck_mtx_unlock_always(c_list_lock);
1223
1224	c_seg_wait_on_busy(c_seg);
1225
1226	lck_mtx_lock_spin_always(c_list_lock);
1227
1228	continue;
1229	}
1230	vm_swapout_thread_processed_segments++;
1231
1232	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1233
1234	if (size == `0`) {
1235	assert(c_seg->c_bytes_used == `0`);
1236
1237	/*
1238	* c_seg_free_locked will drop the c_list_lock and
1239	* the c_seg->c_lock.
1240	*/
1241	C_SEG_BUSY(c_seg);
1242	c_seg_free_locked(c_seg);
1243	c_seg = NULL;
1244
1245	vm_swapout_found_empty++;
1246	goto c_seg_is_empty;
1247	}
1248	C_SEG_BUSY(c_seg);
1249	c_seg->c_busy_swapping = `1`;
1250
1251	c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1252
1253	lck_mtx_unlock_always(c_list_lock);
1254	lck_mtx_unlock_always(&c_seg->c_lock);
1255
1256	#if CHECKSUM_THE_SWAP
1257	c_seg->cseg_hash = hash_string((char )c_seg->c_store.c_buffer, (int*)size);
1258	c_seg->cseg_swap_size = size;
1259	#endif /* CHECKSUM_THE_SWAP */
1260
1261	#if ENCRYPTED_SWAP
1262	vm_swap_encrypt(c_seg);
1263	#endif /* ENCRYPTED_SWAP */
1264
1265	soc = vm_swapout_find_free_soc();
1266	assert(soc);
1267
1268	soc->swp_upl_ctx.io_context = (void *)soc;
1269	soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1270	soc->swp_upl_ctx.io_error = `0`;
1271
1272	kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1273
1274	if (kr != KERN_SUCCESS) {
1275	if (soc->swp_io_done) {
1276	lck_mtx_lock_spin_always(c_list_lock);
1277
1278	soc->swp_io_done = `0`;
1279	vm_swapout_soc_done--;
1280
1281	lck_mtx_unlock_always(c_list_lock);
1282	}
1283	vm_swapout_finish(c_seg, f_offset: soc->swp_f_offset, size, kr);
1284	} else {
1285	soc->swp_io_busy = `1`;
1286	vm_swapout_soc_busy++;
1287	}
1288
1289	c_seg_is_empty:
1290	if (!(c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count)) {
1291	vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1292	}
1293
1294	lck_mtx_lock_spin_always(c_list_lock);
1295
1296	while ((soc = vm_swapout_find_done_soc())) {
1297	vm_swapout_complete_soc(soc);
1298	}
1299	lck_mtx_unlock_always(c_list_lock);
1300
1301	vm_swapout_thread_throttle_adjust();
1302
1303	lck_mtx_lock_spin_always(c_list_lock);
1304	}
1305	while ((soc = vm_swapout_find_done_soc())) {
1306	vm_swapout_complete_soc(soc);
1307	}
1308	lck_mtx_unlock_always(c_list_lock);
1309
1310	vm_pageout_io_throttle();
1311
1312	lck_mtx_lock_spin_always(c_list_lock);
1313
1314	/*
1315	* Recheck if we have some c_segs to wakeup
1316	* post throttle. And, check to see if we
1317	* have any more swapouts needed.
1318	*/
1319	if (vm_swapout_soc_done) {
1320	goto again;
1321	}
1322
1323	#if XNU_TARGET_OS_OSX
1324	queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_regular_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1325	#else /* XNU_TARGET_OS_OSX */
1326	queues_empty = queue_empty(&c_early_swapout_list_head) && queue_empty(&c_late_swapout_list_head);
1327	#endif /* XNU_TARGET_OS_OSX */
1328
1329	if (!queues_empty) {
1330	swapout_list_head = NULL;
1331	if (!queue_empty(&c_early_swapout_list_head)) {
1332	swapout_list_head = &c_early_swapout_list_head;
1333	} else {
1334	#if XNU_TARGET_OS_OSX
1335	/*
1336	* On macOS we _always_ processs all swapout queues.
1337	*/
1338	if (!queue_empty(&c_regular_swapout_list_head)) {
1339	swapout_list_head = &c_regular_swapout_list_head;
1340	} else {
1341	swapout_list_head = &c_late_swapout_list_head;
1342	}
1343	#else /* XNU_TARGET_OS_OSX */
1344	/*
1345	* On non-macOS swap-capable platforms, we might want to
1346	* processs just the early queue (Freezer) or process both
1347	* early and late queues (app swap). We processed the early
1348	* queue up above. The late Q will only be processed if the
1349	* checks in should_process_swapout_queue give the go-ahead.
1350	*/
1351	swapout_list_head = &c_late_swapout_list_head;
1352	#endif /* XNU_TARGET_OS_OSX */
1353	}
1354	if (swapout_list_head && should_process_swapout_queue(swapout_list_head)) {
1355	goto again;
1356	}
1357	}
1358
1359	assert_wait(event: (event_t)&vm_swapout_thread, THREAD_UNINT);
1360
1361	vm_swapout_thread_running = FALSE;
1362
1363	lck_mtx_unlock_always(c_list_lock);
1364
1365	thread_block(continuation: (thread_continue_t)vm_swapout_thread);
1366
1367	/ NOTREACHED /
1368	}
1369
1370
1371	void
1372	vm_swapout_iodone(void io_context, int* error)
1373	{
1374	struct swapout_io_completion *soc;
1375
1376	soc = (struct swapout_io_completion *)io_context;
1377
1378	lck_mtx_lock_spin_always(c_list_lock);
1379
1380	soc->swp_io_done = `1`;
1381	soc->swp_io_error = error;
1382	vm_swapout_soc_done++;
1383
1384	if (!vm_swapout_thread_running) {
1385	thread_wakeup((event_t)&vm_swapout_thread);
1386	}
1387
1388	lck_mtx_unlock_always(c_list_lock);
1389	}
1390
1391
1392	static void
1393	vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1394	{
1395	PAGE_REPLACEMENT_DISALLOWED(TRUE);
1396
1397	if (kr == KERN_SUCCESS) {
1398	kernel_memory_depopulate(addr: (vm_offset_t)c_seg->c_store.c_buffer, size,
1399	flags: KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1400	}
1401	#if ENCRYPTED_SWAP
1402	else {
1403	vm_swap_decrypt(c_seg);
1404	}
1405	#endif /* ENCRYPTED_SWAP */
1406	lck_mtx_lock_spin_always(c_list_lock);
1407	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1408
1409	if (kr == KERN_SUCCESS) {
1410	int new_state = C_ON_SWAPPEDOUT_Q;
1411	boolean_t insert_head = FALSE;
1412
1413	if (hibernate_flushing == TRUE) {
1414	if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1415	c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1416	insert_head = TRUE;
1417	}
1418	} else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1419	new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1420	}
1421
1422	c_seg_switch_state(c_seg, new_state, insert_head);
1423
1424	c_seg->c_store.c_swap_handle = f_offset;
1425
1426	counter_add(&vm_statistics_swapouts, amount: size >> PAGE_SHIFT);
1427
1428	c_seg->c_swappedin = false;
1429
1430	if (c_seg->c_bytes_used) {
1431	OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1432	}
1433
1434	#if CONFIG_FREEZE
1435	/*
1436	* Successful swapout. Decrement the in-core compressed pages count.
1437	*/
1438	OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1439	assertf(c_segment_pages_compressed_incore >= `0`, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1440	if (c_seg->c_has_donated_pages) {
1441	OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore_late_swapout);
1442	}
1443	#endif /* CONFIG_FREEZE */
1444	} else {
1445	if (c_seg->c_overage_swap == TRUE) {
1446	c_seg->c_overage_swap = FALSE;
1447	c_overage_swapped_count--;
1448	}
1449
1450	#if CONFIG_FREEZE
1451	if (c_seg->c_has_freezer_pages) {
1452	if (c_seg->c_task_owner) {
1453	c_seg_update_task_owner(c_seg, NULL);
1454	}
1455	/*
1456	* We failed to swapout a frozen cseg. We need
1457	* to put it back in the queues, specifically the
1458	* AGE_Q. So clear the donated bit otherwise it'll
1459	* land on the swapped_in Q.
1460	*/
1461	c_seg->c_has_donated_pages = `0`;
1462	c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1463	} else
1464	#endif /* CONFIG_FREEZE */
1465	{
1466	if (c_seg->c_has_donated_pages) {
1467	c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
1468	} else {
1469	c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1470	}
1471	}
1472
1473	if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1474	c_seg_need_delayed_compaction(c_seg, TRUE);
1475	}
1476	}
1477	assert(c_seg->c_busy_swapping);
1478	assert(c_seg->c_busy);
1479
1480	c_seg->c_busy_swapping = `0`;
1481	lck_mtx_unlock_always(c_list_lock);
1482
1483	C_SEG_WAKEUP_DONE(c_seg);
1484	lck_mtx_unlock_always(&c_seg->c_lock);
1485
1486	PAGE_REPLACEMENT_DISALLOWED(FALSE);
1487	}
1488
1489
1490	boolean_t
1491	vm_swap_create_file()
1492	{
1493	uint64_t size = `0`;
1494	int namelen = `0`;
1495	boolean_t swap_file_created = FALSE;
1496	boolean_t swap_file_reuse = FALSE;
1497	boolean_t swap_file_pin = FALSE;
1498	struct swapfile *swf = NULL;
1499
1500	/*
1501	* make sure we've got all the info we need
1502	* to potentially pin a swap file... we could
1503	* be swapping out due to hibernation w/o ever
1504	* having run vm_pageout_scan, which is normally
1505	* the trigger to do the init
1506	*/
1507	vm_compaction_swapper_do_init();
1508
1509	/*
1510	* Any swapfile structure ready for re-use?
1511	*/
1512
1513	lck_mtx_lock(lck: &vm_swap_data_lock);
1514
1515	swf = (struct swapfile*) queue_first(&swf_global_queue);
1516
1517	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1518	if (swf->swp_flags == SWAP_REUSE) {
1519	swap_file_reuse = TRUE;
1520	break;
1521	}
1522	swf = (struct swapfile*) queue_next(&swf->swp_queue);
1523	}
1524
1525	lck_mtx_unlock(lck: &vm_swap_data_lock);
1526
1527	if (swap_file_reuse == FALSE) {
1528	namelen = (int)strlen(s: swapfilename) + SWAPFILENAME_INDEX_LEN + `1`;
1529
1530	swf = kalloc_type(struct swapfile, Z_WAITOK \| Z_ZERO);
1531	swf->swp_index = vm_num_swap_files + `1`;
1532	swf->swp_pathlen = namelen;
1533	swf->swp_path = kalloc_data(swf->swp_pathlen, Z_WAITOK \| Z_ZERO);
1534
1535	snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1536	}
1537
1538	vm_swapfile_open(path: swf->swp_path, vp: &swf->swp_vp);
1539
1540	if (swf->swp_vp == NULL) {
1541	if (swap_file_reuse == FALSE) {
1542	kfree_data(swf->swp_path, swf->swp_pathlen);
1543	kfree_type(struct swapfile, swf);
1544	}
1545	return FALSE;
1546	}
1547	vm_swapfile_can_be_created = TRUE;
1548
1549	size = MAX_SWAP_FILE_SIZE;
1550
1551	while (size >= MIN_SWAP_FILE_SIZE) {
1552	swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1553
1554	if (vm_swapfile_preallocate(vp: swf->swp_vp, size: &size, pin: &swap_file_pin) == `0`) {
1555	int num_bytes_for_bitmap = `0`;
1556
1557	swap_file_created = TRUE;
1558
1559	swf->swp_size = size;
1560	swf->swp_nsegs = (unsigned int) (size / compressed_swap_chunk_size);
1561	swf->swp_nseginuse = `0`;
1562	swf->swp_free_hint = `0`;
1563
1564	num_bytes_for_bitmap = MAX((swf->swp_nsegs >> `3`), `1`);
1565	/*
1566	* Allocate a bitmap that describes the
1567	* number of segments held by this swapfile.
1568	*/
1569	swf->swp_bitmap = kalloc_data(num_bytes_for_bitmap,
1570	Z_WAITOK \| Z_ZERO);
1571
1572	swf->swp_csegs = kalloc_type(c_segment_t, swf->swp_nsegs,
1573	Z_WAITOK \| Z_ZERO);
1574
1575	/*
1576	* passing a NULL trim_list into vnode_trim_list
1577	* will return ENOTSUP if trim isn't supported
1578	* and 0 if it is
1579	*/
1580	if (vnode_trim_list(vp: swf->swp_vp, NULL, FALSE) == `0`) {
1581	swp_trim_supported = TRUE;
1582	}
1583
1584	lck_mtx_lock(lck: &vm_swap_data_lock);
1585
1586	swf->swp_flags = SWAP_READY;
1587
1588	if (swap_file_reuse == FALSE) {
1589	queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1590	}
1591
1592	vm_num_swap_files++;
1593
1594	vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1595	if (vm_swapfile_total_segs_alloced > vm_swapfile_total_segs_alloced_max) {
1596	vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
1597	}
1598
1599	if (swap_file_pin == TRUE) {
1600	vm_num_pinned_swap_files++;
1601	swf->swp_flags \|= SWAP_PINNED;
1602	vm_swappin_avail -= swf->swp_size;
1603	}
1604
1605	lck_mtx_unlock(lck: &vm_swap_data_lock);
1606
1607	thread_wakeup((event_t) &vm_num_swap_files);
1608	#if !XNU_TARGET_OS_OSX
1609	if (vm_num_swap_files == `1`) {
1610	c_overage_swapped_limit = (uint32_t)size / c_seg_bufsize;
1611
1612	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1613	c_overage_swapped_limit /= `2`;
1614	}
1615	}
1616	#endif /* !XNU_TARGET_OS_OSX */
1617	break;
1618	} else {
1619	size = size / `2`;
1620	}
1621	}
1622	if (swap_file_created == FALSE) {
1623	vm_swapfile_close(path: (uint64_t)(swf->swp_path), vp: swf->swp_vp);
1624
1625	swf->swp_vp = NULL;
1626
1627	if (swap_file_reuse == FALSE) {
1628	kfree_data(swf->swp_path, swf->swp_pathlen);
1629	kfree_type(struct swapfile, swf);
1630	}
1631	}
1632	return swap_file_created;
1633	}
1634
1635	extern void vnode_put(struct vnode* vp);
1636	kern_return_t
1637	vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1638	{
1639	struct swapfile *swf = NULL;
1640	uint64_t file_offset = `0`;
1641	int retval = `0`;
1642
1643	assert(c_seg->c_store.c_buffer);
1644
1645	lck_mtx_lock(lck: &vm_swap_data_lock);
1646
1647	swf = vm_swapfile_for_handle(f_offset);
1648
1649	if (swf == NULL \|\| (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1650	vm_swap_get_failures++;
1651	retval = `1`;
1652	goto done;
1653	}
1654	swf->swp_io_count++;
1655
1656	lck_mtx_unlock(lck: &vm_swap_data_lock);
1657
1658	#if DEVELOPMENT \|\| DEBUG
1659	C_SEG_MAKE_WRITEABLE(c_seg);
1660	#endif
1661	file_offset = (f_offset & SWAP_SLOT_MASK);
1662
1663	if ((retval = vnode_getwithref(vp: swf->swp_vp)) != `0`) {
1664	printf(format: "vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1665	} else {
1666	retval = vm_swapfile_io(vp: swf->swp_vp, offset: file_offset, start: (uint64_t)c_seg->c_store.c_buffer, npages: (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1667	vnode_put(vp: swf->swp_vp);
1668	}
1669
1670	#if DEVELOPMENT \|\| DEBUG
1671	C_SEG_WRITE_PROTECT(c_seg);
1672	#endif
1673	if (retval == `0`) {
1674	counter_add(&vm_statistics_swapins, amount: size >> PAGE_SHIFT);
1675	} else {
1676	vm_swap_get_failures++;
1677	}
1678
1679	/*
1680	* Free this slot in the swap structure.
1681	*/
1682	vm_swap_free(f_offset);
1683
1684	lck_mtx_lock(lck: &vm_swap_data_lock);
1685	swf->swp_io_count--;
1686
1687	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == `0`) {
1688	swf->swp_flags &= ~SWAP_WANTED;
1689	thread_wakeup((event_t) &swf->swp_flags);
1690	}
1691	done:
1692	lck_mtx_unlock(lck: &vm_swap_data_lock);
1693
1694	if (retval == `0`) {
1695	return KERN_SUCCESS;
1696	} else {
1697	return KERN_FAILURE;
1698	}
1699	}
1700
1701	kern_return_t
1702	vm_swap_put(vm_offset_t addr, uint64_t f_offset, uint32_t size, c_segment_t c_seg, struct* swapout_io_completion *soc)
1703	{
1704	unsigned int segidx = `0`;
1705	struct swapfile *swf = NULL;
1706	uint64_t file_offset = `0`;
1707	uint64_t swapfile_index = `0`;
1708	unsigned int byte_for_segidx = `0`;
1709	unsigned int offset_within_byte = `0`;
1710	boolean_t swf_eligible = FALSE;
1711	boolean_t waiting = FALSE;
1712	boolean_t retried = FALSE;
1713	int error = `0`;
1714	clock_sec_t sec;
1715	clock_nsec_t nsec;
1716	void *upl_ctx = NULL;
1717	boolean_t drop_iocount = FALSE;
1718
1719	if (addr == `0` \|\| f_offset == NULL \|\| compressor_store_stop_compaction) {
1720	return KERN_FAILURE;
1721	}
1722	retry:
1723	lck_mtx_lock(lck: &vm_swap_data_lock);
1724
1725	swf = (struct swapfile*) queue_first(&swf_global_queue);
1726
1727	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1728	segidx = swf->swp_free_hint;
1729
1730	swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1731
1732	if (swf_eligible) {
1733	while (segidx < swf->swp_nsegs) {
1734	byte_for_segidx = segidx >> `3`;
1735	offset_within_byte = segidx % `8`;
1736
1737	if ((swf->swp_bitmap)[byte_for_segidx] & (`1` << offset_within_byte)) {
1738	segidx++;
1739	continue;
1740	}
1741
1742	(swf->swp_bitmap)[byte_for_segidx] \|= (uint8_t)(`1` << offset_within_byte);
1743
1744	file_offset = segidx * compressed_swap_chunk_size;
1745	swf->swp_nseginuse++;
1746	swf->swp_io_count++;
1747	swf->swp_csegs[segidx] = c_seg;
1748
1749	swapfile_index = swf->swp_index;
1750	vm_swapfile_total_segs_used++;
1751	if (vm_swapfile_total_segs_used > vm_swapfile_total_segs_used_max) {
1752	vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
1753	}
1754
1755	clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
1756
1757	if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1758	thread_wakeup((event_t) &vm_swapfile_create_needed);
1759	}
1760
1761	lck_mtx_unlock(lck: &vm_swap_data_lock);
1762
1763	goto issue_io;
1764	}
1765	}
1766	swf = (struct swapfile*) queue_next(&swf->swp_queue);
1767	}
1768	assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1769
1770	/*
1771	* we've run out of swap segments, but may not
1772	* be in a position to immediately create a new swap
1773	* file if we've recently failed to create due to a lack
1774	* of free space in the root filesystem... we'll try
1775	* to kick that create off, but in any event we're going
1776	* to take a breather (up to 1 second) so that we're not caught in a tight
1777	* loop back in "vm_compressor_compact_and_swap" trying to stuff
1778	* segments into swap files only to have them immediately put back
1779	* on the c_age queue due to vm_swap_put failing.
1780	*
1781	* if we're doing these puts due to a hibernation flush,
1782	* no need to block... setting hibernate_no_swapspace to TRUE,
1783	* will cause "vm_compressor_compact_and_swap" to immediately abort
1784	*/
1785	clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
1786
1787	if (VM_SWAP_SHOULD_CREATE(sec)) {
1788	if (!vm_swapfile_create_thread_running) {
1789	thread_wakeup((event_t) &vm_swapfile_create_needed);
1790	}
1791	waiting = TRUE;
1792	assert_wait_timeout(event: (event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, interval: `1000`, scale_factor: `1000` * NSEC_PER_USEC);
1793	} else {
1794	if (hibernate_flushing) {
1795	hibernate_no_swapspace = TRUE;
1796	}
1797	}
1798
1799	lck_mtx_unlock(lck: &vm_swap_data_lock);
1800
1801	if (waiting == TRUE) {
1802	thread_block(THREAD_CONTINUE_NULL);
1803
1804	if (retried == FALSE && hibernate_flushing == TRUE) {
1805	retried = TRUE;
1806	goto retry;
1807	}
1808	}
1809	vm_swap_put_failures_no_swap_file++;
1810
1811	return KERN_FAILURE;
1812
1813	issue_io:
1814	assert(c_seg->c_busy_swapping);
1815	assert(c_seg->c_busy);
1816	assert(!c_seg->c_on_minorcompact_q);
1817
1818	*f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) \| file_offset;
1819
1820	if (soc) {
1821	soc->swp_c_seg = c_seg;
1822	soc->swp_c_size = size;
1823
1824	soc->swp_swf = swf;
1825
1826	soc->swp_io_error = `0`;
1827	soc->swp_io_done = `0`;
1828
1829	upl_ctx = (void *)&soc->swp_upl_ctx;
1830	}
1831
1832	if ((error = vnode_getwithref(vp: swf->swp_vp)) != `0`) {
1833	printf(format: "vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1834	} else {
1835	error = vm_swapfile_io(vp: swf->swp_vp, offset: file_offset, start: addr, npages: (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1836	drop_iocount = TRUE;
1837	}
1838
1839	if (error \|\| upl_ctx == NULL) {
1840	return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1841	}
1842
1843	return KERN_SUCCESS;
1844	}
1845
1846	kern_return_t
1847	vm_swap_put_finish(struct swapfile swf, uint64_t f_offset, int error, boolean_t drop_iocount)
1848	{
1849	if (drop_iocount) {
1850	vnode_put(vp: swf->swp_vp);
1851	}
1852
1853	lck_mtx_lock(lck: &vm_swap_data_lock);
1854
1855	swf->swp_io_count--;
1856
1857	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == `0`) {
1858	swf->swp_flags &= ~SWAP_WANTED;
1859	thread_wakeup((event_t) &swf->swp_flags);
1860	}
1861	lck_mtx_unlock(lck: &vm_swap_data_lock);
1862
1863	if (error) {
1864	vm_swap_free(*f_offset);
1865	vm_swap_put_failures++;
1866
1867	return KERN_FAILURE;
1868	}
1869	return KERN_SUCCESS;
1870	}
1871
1872
1873	static void
1874	vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1875	{
1876	uint64_t file_offset = `0`;
1877	unsigned int segidx = `0`;
1878
1879
1880	if ((swf->swp_flags & SWAP_READY) \|\| (swf->swp_flags & SWAP_RECLAIM)) {
1881	unsigned int byte_for_segidx = `0`;
1882	unsigned int offset_within_byte = `0`;
1883
1884	file_offset = (f_offset & SWAP_SLOT_MASK);
1885	segidx = (unsigned int) (file_offset / compressed_swap_chunk_size);
1886
1887	byte_for_segidx = segidx >> `3`;
1888	offset_within_byte = segidx % `8`;
1889
1890	if ((swf->swp_bitmap)[byte_for_segidx] & (`1` << offset_within_byte)) {
1891	(swf->swp_bitmap)[byte_for_segidx] &= ~(`1` << offset_within_byte);
1892
1893	swf->swp_csegs[segidx] = NULL;
1894
1895	swf->swp_nseginuse--;
1896	vm_swapfile_total_segs_used--;
1897
1898	if (segidx < swf->swp_free_hint) {
1899	swf->swp_free_hint = segidx;
1900	}
1901	}
1902	if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1903	thread_wakeup((event_t) &vm_swapfile_gc_needed);
1904	}
1905	}
1906	}
1907
1908
1909	uint32_t vm_swap_free_now_count = `0`;
1910	uint32_t vm_swap_free_delayed_count = `0`;
1911
1912
1913	void
1914	vm_swap_free(uint64_t f_offset)
1915	{
1916	struct swapfile *swf = NULL;
1917	struct trim_list *tl = NULL;
1918	clock_sec_t sec;
1919	clock_nsec_t nsec;
1920
1921	if (swp_trim_supported == TRUE) {
1922	tl = kalloc_type(struct trim_list, Z_WAITOK);
1923	}
1924
1925	lck_mtx_lock(lck: &vm_swap_data_lock);
1926
1927	swf = vm_swapfile_for_handle(f_offset);
1928
1929	if (swf && (swf->swp_flags & (SWAP_READY \| SWAP_RECLAIM))) {
1930	if (swp_trim_supported == FALSE \|\| (swf->swp_flags & SWAP_RECLAIM)) {
1931	/*
1932	* don't delay the free if the underlying disk doesn't support
1933	* trim, or we're in the midst of reclaiming this swap file since
1934	* we don't want to move segments that are technically free
1935	* but not yet handled by the delayed free mechanism
1936	*/
1937	vm_swap_free_now(swf, f_offset);
1938
1939	vm_swap_free_now_count++;
1940	goto done;
1941	}
1942	tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1943	tl->tl_length = compressed_swap_chunk_size;
1944
1945	tl->tl_next = swf->swp_delayed_trim_list_head;
1946	swf->swp_delayed_trim_list_head = tl;
1947	swf->swp_delayed_trim_count++;
1948	tl = NULL;
1949
1950	if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1951	clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
1952
1953	if (sec > dont_trim_until_ts) {
1954	thread_wakeup((event_t) &vm_swapfile_create_needed);
1955	}
1956	}
1957	vm_swap_free_delayed_count++;
1958	}
1959	done:
1960	lck_mtx_unlock(lck: &vm_swap_data_lock);
1961
1962	if (tl != NULL) {
1963	kfree_type(struct trim_list, tl);
1964	}
1965	}
1966
1967
1968	static void
1969	vm_swap_wait_on_trim_handling_in_progress()
1970	{
1971	while (delayed_trim_handling_in_progress == TRUE) {
1972	assert_wait(event: (event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1973	lck_mtx_unlock(lck: &vm_swap_data_lock);
1974
1975	thread_block(THREAD_CONTINUE_NULL);
1976
1977	lck_mtx_lock(lck: &vm_swap_data_lock);
1978	}
1979	}
1980
1981
1982	static void
1983	vm_swap_handle_delayed_trims(boolean_t force_now)
1984	{
1985	struct swapfile *swf = NULL;
1986
1987	/*
1988	* serialize the race between us and vm_swap_reclaim...
1989	* if vm_swap_reclaim wins it will turn off SWAP_READY
1990	* on the victim it has chosen... we can just skip over
1991	* that file since vm_swap_reclaim will first process
1992	* all of the delayed trims associated with it
1993	*/
1994
1995	if (compressor_store_stop_compaction == TRUE) {
1996	return;
1997	}
1998
1999	lck_mtx_lock(lck: &vm_swap_data_lock);
2000
2001	delayed_trim_handling_in_progress = TRUE;
2002
2003	lck_mtx_unlock(lck: &vm_swap_data_lock);
2004
2005	/*
2006	* no need to hold the lock to walk the swf list since
2007	* vm_swap_create (the only place where we add to this list)
2008	* is run on the same thread as this function
2009	* and vm_swap_reclaim doesn't remove items from this list
2010	* instead marking them with SWAP_REUSE for future re-use
2011	*/
2012	swf = (struct swapfile*) queue_first(&swf_global_queue);
2013
2014	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2015	if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE \|\| VM_SWAP_SHOULD_TRIM(swf))) {
2016	assert(!(swf->swp_flags & SWAP_RECLAIM));
2017	vm_swap_do_delayed_trim(swf);
2018	}
2019	swf = (struct swapfile*) queue_next(&swf->swp_queue);
2020	}
2021	lck_mtx_lock(lck: &vm_swap_data_lock);
2022
2023	delayed_trim_handling_in_progress = FALSE;
2024	thread_wakeup((event_t) &delayed_trim_handling_in_progress);
2025
2026	if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
2027	thread_wakeup((event_t) &vm_swapfile_gc_needed);
2028	}
2029
2030	lck_mtx_unlock(lck: &vm_swap_data_lock);
2031	}
2032
2033	static void
2034	vm_swap_do_delayed_trim(struct swapfile *swf)
2035	{
2036	struct trim_list tl, tl_head;
2037	int error;
2038
2039	if (compressor_store_stop_compaction == TRUE) {
2040	return;
2041	}
2042
2043	if ((error = vnode_getwithref(vp: swf->swp_vp)) != `0`) {
2044	printf(format: "vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
2045	return;
2046	}
2047
2048	lck_mtx_lock(lck: &vm_swap_data_lock);
2049
2050	tl_head = swf->swp_delayed_trim_list_head;
2051	swf->swp_delayed_trim_list_head = NULL;
2052	swf->swp_delayed_trim_count = `0`;
2053
2054	lck_mtx_unlock(lck: &vm_swap_data_lock);
2055
2056	vnode_trim_list(vp: swf->swp_vp, tl: tl_head, TRUE);
2057
2058	(void) vnode_put(vp: swf->swp_vp);
2059
2060	while ((tl = tl_head) != NULL) {
2061	unsigned int segidx = `0`;
2062	unsigned int byte_for_segidx = `0`;
2063	unsigned int offset_within_byte = `0`;
2064
2065	lck_mtx_lock(lck: &vm_swap_data_lock);
2066
2067	segidx = (unsigned int) (tl->tl_offset / compressed_swap_chunk_size);
2068
2069	byte_for_segidx = segidx >> `3`;
2070	offset_within_byte = segidx % `8`;
2071
2072	if ((swf->swp_bitmap)[byte_for_segidx] & (`1` << offset_within_byte)) {
2073	(swf->swp_bitmap)[byte_for_segidx] &= ~(`1` << offset_within_byte);
2074
2075	swf->swp_csegs[segidx] = NULL;
2076
2077	swf->swp_nseginuse--;
2078	vm_swapfile_total_segs_used--;
2079
2080	if (segidx < swf->swp_free_hint) {
2081	swf->swp_free_hint = segidx;
2082	}
2083	}
2084	lck_mtx_unlock(lck: &vm_swap_data_lock);
2085
2086	tl_head = tl->tl_next;
2087
2088	kfree_type(struct trim_list, tl);
2089	}
2090	}
2091
2092
2093	void
2094	vm_swap_flush()
2095	{
2096	return;
2097	}
2098
2099	int vm_swap_reclaim_yielded = `0`;
2100
2101	void
2102	vm_swap_reclaim(void)
2103	{
2104	vm_offset_t addr = `0`;
2105	unsigned int segidx = `0`;
2106	uint64_t f_offset = `0`;
2107	struct swapfile *swf = NULL;
2108	struct swapfile *smallest_swf = NULL;
2109	unsigned int min_nsegs = `0`;
2110	unsigned int byte_for_segidx = `0`;
2111	unsigned int offset_within_byte = `0`;
2112	uint32_t c_size = `0`;
2113
2114	c_segment_t c_seg = NULL;
2115
2116	kmem_alloc(map: compressor_map, addrp: (vm_offset_t *)&addr, size: c_seg_bufsize,
2117	flags: KMA_NOFAIL \| KMA_KOBJECT \| KMA_DATA, VM_KERN_MEMORY_COMPRESSOR);
2118
2119	lck_mtx_lock(lck: &vm_swap_data_lock);
2120
2121	/*
2122	* if we're running the swapfile list looking for
2123	* candidates with delayed trims, we need to
2124	* wait before making our decision concerning
2125	* the swapfile we want to reclaim
2126	*/
2127	vm_swap_wait_on_trim_handling_in_progress();
2128
2129	/*
2130	* from here until we knock down the SWAP_READY bit,
2131	* we need to remain behind the vm_swap_data_lock...
2132	* once that bit has been turned off, "vm_swap_handle_delayed_trims"
2133	* will not consider this swapfile for processing
2134	*/
2135	swf = (struct swapfile*) queue_first(&swf_global_queue);
2136	min_nsegs = MAX_SWAP_FILE_SIZE / compressed_swap_chunk_size;
2137	smallest_swf = NULL;
2138
2139	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2140	if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
2141	smallest_swf = swf;
2142	min_nsegs = swf->swp_nseginuse;
2143	}
2144	swf = (struct swapfile*) queue_next(&swf->swp_queue);
2145	}
2146
2147	if (smallest_swf == NULL) {
2148	goto done;
2149	}
2150
2151	swf = smallest_swf;
2152
2153
2154	swf->swp_flags &= ~SWAP_READY;
2155	swf->swp_flags \|= SWAP_RECLAIM;
2156
2157	if (swf->swp_delayed_trim_count) {
2158	lck_mtx_unlock(lck: &vm_swap_data_lock);
2159
2160	vm_swap_do_delayed_trim(swf);
2161
2162	lck_mtx_lock(lck: &vm_swap_data_lock);
2163	}
2164	segidx = `0`;
2165
2166	while (segidx < swf->swp_nsegs) {
2167	ReTry_for_cseg:
2168	/*
2169	* Wait for outgoing I/Os.
2170	*/
2171	while (swf->swp_io_count) {
2172	swf->swp_flags \|= SWAP_WANTED;
2173
2174	assert_wait(event: (event_t) &swf->swp_flags, THREAD_UNINT);
2175	lck_mtx_unlock(lck: &vm_swap_data_lock);
2176
2177	thread_block(THREAD_CONTINUE_NULL);
2178
2179	lck_mtx_lock(lck: &vm_swap_data_lock);
2180	}
2181	if (compressor_store_stop_compaction == TRUE \|\| VM_SWAP_SHOULD_ABORT_RECLAIM() \|\| VM_SWAP_BUSY()) {
2182	vm_swap_reclaim_yielded++;
2183	break;
2184	}
2185
2186	byte_for_segidx = segidx >> `3`;
2187	offset_within_byte = segidx % `8`;
2188
2189	if (((swf->swp_bitmap)[byte_for_segidx] & (`1` << offset_within_byte)) == `0`) {
2190	segidx++;
2191	continue;
2192	}
2193
2194	c_seg = swf->swp_csegs[segidx];
2195	assert(c_seg);
2196
2197	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
2198
2199	if (c_seg->c_busy) {
2200	/*
2201	* a swapped out c_segment in the process of being freed will remain in the
2202	* busy state until after the vm_swap_free is called on it... vm_swap_free
2203	* takes the vm_swap_data_lock, so can't change the swap state until after
2204	* we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2205	* which will allow c_seg_free_locked to clear busy and wake up this thread...
2206	* at that point, we re-look up the swap state which will now indicate that
2207	* this c_segment no longer exists.
2208	*/
2209	c_seg->c_wanted = `1`;
2210
2211	assert_wait(event: (event_t) (c_seg), THREAD_UNINT);
2212	lck_mtx_unlock_always(&c_seg->c_lock);
2213
2214	lck_mtx_unlock(lck: &vm_swap_data_lock);
2215
2216	thread_block(THREAD_CONTINUE_NULL);
2217
2218	lck_mtx_lock(lck: &vm_swap_data_lock);
2219
2220	goto ReTry_for_cseg;
2221	}
2222	(swf->swp_bitmap)[byte_for_segidx] &= ~(`1` << offset_within_byte);
2223
2224	f_offset = segidx * compressed_swap_chunk_size;
2225
2226	assert(c_seg == swf->swp_csegs[segidx]);
2227	swf->swp_csegs[segidx] = NULL;
2228	swf->swp_nseginuse--;
2229
2230	vm_swapfile_total_segs_used--;
2231
2232	lck_mtx_unlock(lck: &vm_swap_data_lock);
2233
2234	assert(C_SEG_IS_ONDISK(c_seg));
2235
2236	C_SEG_BUSY(c_seg);
2237	c_seg->c_busy_swapping = `1`;
2238	#if !CHECKSUM_THE_SWAP
2239	c_seg_trim_tail(c_seg);
2240	#endif
2241	c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2242
2243	assert(c_size <= c_seg_bufsize && c_size);
2244
2245	lck_mtx_unlock_always(&c_seg->c_lock);
2246
2247	if (vnode_getwithref(vp: swf->swp_vp)) {
2248	printf(format: "vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2249	vm_swap_get_failures++;
2250	goto swap_io_failed;
2251	} else {
2252	if (vm_swapfile_io(vp: swf->swp_vp, offset: f_offset, start: addr, npages: (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2253	/*
2254	* reading the data back in failed, so convert c_seg
2255	* to a swapped in c_segment that contains no data
2256	*/
2257	c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2258	/*
2259	* returns with c_busy_swapping cleared
2260	*/
2261	vnode_put(vp: swf->swp_vp);
2262	vm_swap_get_failures++;
2263	goto swap_io_failed;
2264	}
2265	vnode_put(vp: swf->swp_vp);
2266	}
2267
2268	counter_add(&vm_statistics_swapins, amount: c_size >> PAGE_SHIFT);
2269	vmcs_stats.reclaim_swapins += c_size >> PAGE_SHIFT;
2270
2271	if (vm_swap_put(addr, f_offset: &f_offset, size: c_size, c_seg, NULL)) {
2272	vm_offset_t c_buffer;
2273
2274	/*
2275	* the put failed, so convert c_seg to a fully swapped in c_segment
2276	* with valid data
2277	*/
2278	c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2279
2280	kernel_memory_populate(addr: c_buffer, size: c_size,
2281	flags: KMA_NOFAIL \| KMA_COMPRESSOR,
2282	VM_KERN_MEMORY_COMPRESSOR);
2283
2284	memcpy(dst: (char )c_buffer, src: (char* *)addr, n: c_size);
2285
2286	c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2287	#if ENCRYPTED_SWAP
2288	vm_swap_decrypt(c_seg);
2289	#endif /* ENCRYPTED_SWAP */
2290	c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2291	/*
2292	* returns with c_busy_swapping cleared
2293	*/
2294	OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2295
2296	goto swap_io_failed;
2297	}
2298	counter_add(&vm_statistics_swapouts, amount: c_size >> PAGE_SHIFT);
2299
2300	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
2301
2302	c_seg->c_swappedin = false;
2303
2304	assert(C_SEG_IS_ONDISK(c_seg));
2305	/*
2306	* The c_seg will now know about the new location on disk.
2307	*/
2308	c_seg->c_store.c_swap_handle = f_offset;
2309
2310	assert(c_seg->c_busy_swapping);
2311	c_seg->c_busy_swapping = `0`;
2312	swap_io_failed:
2313	assert(c_seg->c_busy);
2314	C_SEG_WAKEUP_DONE(c_seg);
2315
2316	lck_mtx_unlock_always(&c_seg->c_lock);
2317	lck_mtx_lock(lck: &vm_swap_data_lock);
2318	}
2319
2320	if (swf->swp_nseginuse) {
2321	swf->swp_flags &= ~SWAP_RECLAIM;
2322	swf->swp_flags \|= SWAP_READY;
2323
2324	goto done;
2325	}
2326	/*
2327	* We don't remove this inactive swf from the queue.
2328	* That way, we can re-use it when needed again and
2329	* preserve the namespace. The delayed_trim processing
2330	* is also dependent on us not removing swfs from the queue.
2331	*/
2332	//queue_remove(&swf_global_queue, swf, struct swapfile, swp_queue);*
2333
2334	vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2335
2336	lck_mtx_unlock(lck: &vm_swap_data_lock);
2337
2338	vm_swapfile_close(path: (uint64_t)(swf->swp_path), vp: swf->swp_vp);
2339
2340	kfree_type(c_segment_t, swf->swp_nsegs, swf->swp_csegs);
2341	kfree_data(swf->swp_bitmap, MAX((swf->swp_nsegs >> `3`), `1`));
2342
2343	lck_mtx_lock(lck: &vm_swap_data_lock);
2344
2345	if (swf->swp_flags & SWAP_PINNED) {
2346	vm_num_pinned_swap_files--;
2347	vm_swappin_avail += swf->swp_size;
2348	}
2349
2350	swf->swp_vp = NULL;
2351	swf->swp_size = `0`;
2352	swf->swp_free_hint = `0`;
2353	swf->swp_nsegs = `0`;
2354	swf->swp_flags = SWAP_REUSE;
2355
2356	vm_num_swap_files--;
2357
2358	done:
2359	thread_wakeup((event_t) &swf->swp_flags);
2360	lck_mtx_unlock(lck: &vm_swap_data_lock);
2361
2362	kmem_free(map: compressor_map, addr: (vm_offset_t) addr, size: c_seg_bufsize);
2363	}
2364
2365
2366	uint64_t
2367	vm_swap_get_total_space(void)
2368	{
2369	uint64_t total_space = `0`;
2370
2371	total_space = (uint64_t)vm_swapfile_total_segs_alloced * compressed_swap_chunk_size;
2372
2373	return total_space;
2374	}
2375
2376	uint64_t
2377	vm_swap_get_used_space(void)
2378	{
2379	uint64_t used_space = `0`;
2380
2381	used_space = (uint64_t)vm_swapfile_total_segs_used * compressed_swap_chunk_size;
2382
2383	return used_space;
2384	}
2385
2386	uint64_t
2387	vm_swap_get_free_space(void)
2388	{
2389	return vm_swap_get_total_space() - vm_swap_get_used_space();
2390	}
2391
2392	uint64_t
2393	vm_swap_get_max_configured_space(void)
2394	{
2395	int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2396	return num_swap_files * MAX_SWAP_FILE_SIZE;
2397	}
2398
2399	int
2400	vm_swap_low_on_space(void)
2401	{
2402	if (vm_num_swap_files == `0` && vm_swapfile_can_be_created == FALSE) {
2403	return `0`;
2404	}
2405
2406	if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)vm_swapfile_hiwater_segs) / `8`)) {
2407	if (vm_num_swap_files == `0` && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2408	return `0`;
2409	}
2410
2411	if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2412	return `1`;
2413	}
2414	}
2415	return `0`;
2416	}
2417
2418	int
2419	vm_swap_out_of_space(void)
2420	{
2421	if ((vm_num_swap_files == vm_num_swap_files_config) &&
2422	((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2423	/*
2424	* Last swapfile and we have only space for the
2425	* last few swapouts.
2426	*/
2427	return `1`;
2428	}
2429
2430	return `0`;
2431	}
2432
2433	boolean_t
2434	vm_swap_files_pinned(void)
2435	{
2436	boolean_t result;
2437
2438	if (vm_swappin_enabled == FALSE) {
2439	return TRUE;
2440	}
2441
2442	result = (vm_num_pinned_swap_files == vm_num_swap_files);
2443
2444	return result;
2445	}
2446
2447	#if CONFIG_FREEZE
2448	boolean_t
2449	vm_swap_max_budget(uint64_t *freeze_daily_budget)
2450	{
2451	boolean_t use_device_value = FALSE;
2452	struct swapfile *swf = NULL;
2453
2454	if (vm_num_swap_files) {
2455	lck_mtx_lock(&vm_swap_data_lock);
2456
2457	swf = (struct swapfile*) queue_first(&swf_global_queue);
2458
2459	if (swf) {
2460	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2461	if (swf->swp_flags == SWAP_READY) {
2462	assert(swf->swp_vp);
2463
2464	if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == `0`) {
2465	use_device_value = TRUE;
2466	}
2467	break;
2468	}
2469	swf = (struct swapfile*) queue_next(&swf->swp_queue);
2470	}
2471	}
2472
2473	lck_mtx_unlock(&vm_swap_data_lock);
2474	} else {
2475	/*
2476	* This block is used for the initial budget value before any swap files
2477	* are created. We create a temp swap file to get the budget.
2478	*/
2479
2480	struct vnode *temp_vp = NULL;
2481
2482	vm_swapfile_open(swapfilename, &temp_vp);
2483
2484	if (temp_vp) {
2485	if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == `0`) {
2486	use_device_value = TRUE;
2487	}
2488
2489	vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2490	temp_vp = NULL;
2491	} else {
2492	*freeze_daily_budget = `0`;
2493	}
2494	}
2495
2496	return use_device_value;
2497	}
2498	#endif /* CONFIG_FREEZE */
2499
2500	void
2501	vm_swap_reset_max_segs_tracking(uint64_t alloced_max, uint64_t used_max)
2502	{
2503	lck_mtx_lock(lck: &vm_swap_data_lock);
2504
2505	alloced_max = (uint64_t) vm_swapfile_total_segs_alloced_max compressed_swap_chunk_size;
2506	used_max = (uint64_t) vm_swapfile_total_segs_used_max compressed_swap_chunk_size;
2507
2508	vm_swapfile_total_segs_alloced_max = vm_swapfile_total_segs_alloced;
2509	vm_swapfile_total_segs_used_max = vm_swapfile_total_segs_used;
2510
2511	lck_mtx_unlock(lck: &vm_swap_data_lock);
2512	}
2513

Browse the source code of xnu/osfmk/vm/vm_compressor_backing_store.c