vm_compressor.c source code [xnu/osfmk/vm/vm_compressor.c]

1	/*
2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <vm/vm_compressor.h>
30
31	#if CONFIG_PHANTOM_CACHE
32	#include <vm/vm_phantom_cache.h>
33	#endif
34
35	#include <vm/vm_map.h>
36	#include <vm/vm_pageout.h>
37	#include <vm/memory_object.h>
38	#include <vm/vm_compressor_algorithms.h>
39	#include <vm/vm_compressor_backing_store.h>
40	#include <vm/vm_fault.h>
41	#include <vm/vm_protos.h>
42	#include <mach/mach_host.h> /* for host_info() */
43	#if DEVELOPMENT \|\| DEBUG
44	#include <kern/hvg_hypercall.h>
45	#endif
46	#include <kern/ledger.h>
47	#include <kern/policy_internal.h>
48	#include <kern/thread_group.h>
49	#include <san/kasan.h>
50	#include <os/log.h>
51	#include <pexpert/pexpert.h>
52	#include <pexpert/device_tree.h>
53
54	#if defined(__x86_64__)
55	#include <i386/misc_protos.h>
56	#endif
57	#if defined(__arm64__)
58	#include <arm/machine_routines.h>
59	#endif
60
61	#include <IOKit/IOHibernatePrivate.h>
62
63	/*
64	* The segment buffer size is a tradeoff.
65	* A larger buffer leads to faster I/O throughput, better compression ratios
66	* (since fewer bytes are wasted at the end of the segment),
67	* and less overhead (both in time and space).
68	* However, a smaller buffer causes less swap when the system is overcommited
69	* b/c a higher percentage of the swapped-in segment is definitely accessed
70	* before it goes back out to storage.
71	*
72	* So on systems without swap, a larger segment is a clear win.
73	* On systems with swap, the choice is murkier. Empirically, we've
74	* found that a 64KB segment provides a better tradeoff both in terms of
75	* performance and swap writes than a 256KB segment on systems with fast SSDs
76	* and a HW compression block.
77	*/
78	#define C_SEG_BUFSIZE_ARM_SWAP (1024 * 64)
79	#if XNU_TARGET_OS_OSX && defined(__arm64__)
80	#define C_SEG_BUFSIZE_DEFAULT C_SEG_BUFSIZE_ARM_SWAP
81	#else
82	#define C_SEG_BUFSIZE_DEFAULT (1024 * 256)
83	#endif /* TARGET_OS_OSX && defined(__arm64__) */
84	uint32_t c_seg_bufsize;
85
86	uint32_t c_seg_max_pages, c_seg_off_limit, c_seg_allocsize, c_seg_slot_var_array_min_len;
87
88	extern boolean_t vm_darkwake_mode;
89	extern zone_t vm_page_zone;
90
91	#if DEVELOPMENT \|\| DEBUG
92	/ sysctl defined in bsd/dev/arm64/sysctl.c /
93	int do_cseg_wedge_thread(void);
94	int do_cseg_unwedge_thread(void);
95	static event_t debug_cseg_wait_event = NULL;
96	#endif /* DEVELOPMENT \|\| DEBUG */
97
98	#if CONFIG_FREEZE
99	bool freezer_incore_cseg_acct = TRUE; / Only count incore compressed memory for jetsams. /
100	void task_disown_frozen_csegs(task_t owner_task);
101	#endif /* CONFIG_FREEZE */
102
103	#if POPCOUNT_THE_COMPRESSED_DATA
104	boolean_t popcount_c_segs = TRUE;
105
106	static inline uint32_t
107	vmc_pop(uintptr_t ins, int sz)
108	{
109	uint32_t rv = `0`;
110
111	if (__probable(popcount_c_segs == FALSE)) {
112	return `0xDEAD707C`;
113	}
114
115	while (sz >= `16`) {
116	uint32_t rv1, rv2;
117	uint64_t ins64 = (uint64_t ) ins;
118	uint64_t ins642 = (uint64_t ) (ins + `8`);
119	rv1 = __builtin_popcountll(*ins64);
120	rv2 = __builtin_popcountll(*ins642);
121	rv += rv1 + rv2;
122	sz -= `16`;
123	ins += `16`;
124	}
125
126	while (sz >= `4`) {
127	uint32_t ins32 = (uint32_t ) ins;
128	rv += __builtin_popcount(*ins32);
129	sz -= `4`;
130	ins += `4`;
131	}
132
133	while (sz > `0`) {
134	char ins8 = (char* *)ins;
135	rv += __builtin_popcount(*ins8);
136	sz--;
137	ins++;
138	}
139	return rv;
140	}
141	#endif
142
143	#if VALIDATE_C_SEGMENTS
144	boolean_t validate_c_segs = TRUE;
145	#endif
146	/*
147	* vm_compressor_mode has a hierarchy of control to set its value.
148	* boot-args are checked first, then device-tree, and finally
149	* the default value that is defined below. See vm_fault_init() for
150	* the boot-arg & device-tree code.
151	*/
152
153	#if !XNU_TARGET_OS_OSX
154
155	#if CONFIG_FREEZE
156	int vm_compressor_mode = VM_PAGER_FREEZER_DEFAULT;
157	struct freezer_context freezer_context_global;
158	#else /* CONFIG_FREEZE */
159	int vm_compressor_mode = VM_PAGER_NOT_CONFIGURED;
160	#endif /* CONFIG_FREEZE */
161
162	#else /* !XNU_TARGET_OS_OSX */
163	int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP;
164
165	#endif /* !XNU_TARGET_OS_OSX */
166
167	TUNABLE(uint32_t, vm_compression_limit, "vm_compression_limit", `0`);
168	int vm_compressor_is_active = `0`;
169	int vm_compressor_available = `0`;
170
171	extern uint64_t vm_swap_get_max_configured_space(void);
172	extern void vm_pageout_io_throttle(void);
173	bool vm_compressor_swapout_is_ripe(void);
174
175	#if CHECKSUM_THE_DATA \|\| CHECKSUM_THE_SWAP \|\| CHECKSUM_THE_COMPRESSED_DATA
176	extern unsigned int hash_string(char cp, int* len);
177	static unsigned int vmc_hash(char , int*);
178	boolean_t checksum_c_segs = TRUE;
179
180	unsigned int
181	vmc_hash(char cp, int* len)
182	{
183	if (__probable(checksum_c_segs == FALSE)) {
184	return `0xDEAD7A37`;
185	}
186	return hash_string(cp, len);
187	}
188	#endif
189
190	#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size)
191	#define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size))
192
193
194	struct c_sv_hash_entry {
195	union {
196	struct {
197	uint32_t c_sv_he_ref;
198	uint32_t c_sv_he_data;
199	} c_sv_he;
200	uint64_t c_sv_he_record;
201	} c_sv_he_un;
202	};
203
204	#define he_ref c_sv_he_un.c_sv_he.c_sv_he_ref
205	#define he_data c_sv_he_un.c_sv_he.c_sv_he_data
206	#define he_record c_sv_he_un.c_sv_he_record
207
208	#define C_SV_HASH_MAX_MISS 32
209	#define C_SV_HASH_SIZE ((1 << 10))
210	#define C_SV_HASH_MASK ((1 << 10) - 1)
211
212	#if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
213	#define C_SV_CSEG_ID ((1 << 21) - 1)
214	#else /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
215	#define C_SV_CSEG_ID ((1 << 22) - 1)
216	#endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
217
218
219	union c_segu {
220	c_segment_t c_seg;
221	uintptr_t c_segno;
222	};
223
224	#define C_SLOT_ASSERT_PACKABLE(ptr) \
225	VM_ASSERT_POINTER_PACKABLE((vm_offset_t)(ptr), C_SLOT_PACKED_PTR);
226
227	#define C_SLOT_PACK_PTR(ptr) \
228	VM_PACK_POINTER((vm_offset_t)(ptr), C_SLOT_PACKED_PTR)
229
230	#define C_SLOT_UNPACK_PTR(cslot) \
231	(c_slot_mapping_t)VM_UNPACK_POINTER((cslot)->c_packed_ptr, C_SLOT_PACKED_PTR)
232
233	/ for debugging purposes /
234	SECURITY_READ_ONLY_EARLY(vm_packing_params_t) c_slot_packing_params =
235	VM_PACKING_PARAMS(C_SLOT_PACKED_PTR);
236
237	uint32_t c_segment_count = `0`;
238	uint32_t c_segment_count_max = `0`;
239
240	uint64_t c_generation_id = `0`;
241	uint64_t c_generation_id_flush_barrier;
242
243
244	#define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120
245
246	boolean_t hibernate_no_swapspace = FALSE;
247	boolean_t hibernate_flush_timed_out = FALSE;
248	clock_sec_t hibernate_flushing_deadline = `0`;
249
250	#if RECORD_THE_COMPRESSED_DATA
251	char *c_compressed_record_sbuf;
252	char *c_compressed_record_ebuf;
253	char *c_compressed_record_cptr;
254	#endif
255
256
257	queue_head_t c_age_list_head;
258	queue_head_t c_early_swappedin_list_head, c_regular_swappedin_list_head, c_late_swappedin_list_head;
259	queue_head_t c_early_swapout_list_head, c_regular_swapout_list_head, c_late_swapout_list_head;
260	queue_head_t c_swapio_list_head;
261	queue_head_t c_swappedout_list_head;
262	queue_head_t c_swappedout_sparse_list_head;
263	queue_head_t c_major_list_head;
264	queue_head_t c_filling_list_head;
265	queue_head_t c_bad_list_head;
266
267	uint32_t c_age_count = `0`;
268	uint32_t c_early_swappedin_count = `0`, c_regular_swappedin_count = `0`, c_late_swappedin_count = `0`;
269	uint32_t c_early_swapout_count = `0`, c_regular_swapout_count = `0`, c_late_swapout_count = `0`;
270	uint32_t c_swapio_count = `0`;
271	uint32_t c_swappedout_count = `0`;
272	uint32_t c_swappedout_sparse_count = `0`;
273	uint32_t c_major_count = `0`;
274	uint32_t c_filling_count = `0`;
275	uint32_t c_empty_count = `0`;
276	uint32_t c_bad_count = `0`;
277
278
279	queue_head_t c_minor_list_head;
280	uint32_t c_minor_count = `0`;
281
282	int c_overage_swapped_count = `0`;
283	int c_overage_swapped_limit = `0`;
284
285	int c_seg_fixed_array_len;
286	union c_segu *c_segments;
287	vm_offset_t c_buffers;
288	vm_size_t c_buffers_size;
289	caddr_t c_segments_next_page;
290	boolean_t c_segments_busy;
291	uint32_t c_segments_available;
292	uint32_t c_segments_limit;
293	uint32_t c_segments_nearing_limit;
294
295	uint32_t c_segment_svp_in_hash;
296	uint32_t c_segment_svp_hash_succeeded;
297	uint32_t c_segment_svp_hash_failed;
298	uint32_t c_segment_svp_zero_compressions;
299	uint32_t c_segment_svp_nonzero_compressions;
300	uint32_t c_segment_svp_zero_decompressions;
301	uint32_t c_segment_svp_nonzero_decompressions;
302
303	uint32_t c_segment_noncompressible_pages;
304
305	uint32_t c_segment_pages_compressed = `0`; / Tracks # of uncompressed pages fed into the compressor /
306	#if CONFIG_FREEZE
307	int32_t c_segment_pages_compressed_incore = `0`; / Tracks # of uncompressed pages fed into the compressor that are in memory /
308	int32_t c_segment_pages_compressed_incore_late_swapout = `0`; / Tracks # of uncompressed pages fed into the compressor that are in memory and tagged for swapout /
309	uint32_t c_segments_incore_limit = `0`; / Tracks # of segments allowed to be in-core. Based on compressor pool size /
310	#endif /* CONFIG_FREEZE */
311
312	uint32_t c_segment_pages_compressed_limit;
313	uint32_t c_segment_pages_compressed_nearing_limit;
314	uint32_t c_free_segno_head = (uint32_t)-`1`;
315
316	uint32_t vm_compressor_minorcompact_threshold_divisor = `10`;
317	uint32_t vm_compressor_majorcompact_threshold_divisor = `10`;
318	uint32_t vm_compressor_unthrottle_threshold_divisor = `10`;
319	uint32_t vm_compressor_catchup_threshold_divisor = `10`;
320
321	uint32_t vm_compressor_minorcompact_threshold_divisor_overridden = `0`;
322	uint32_t vm_compressor_majorcompact_threshold_divisor_overridden = `0`;
323	uint32_t vm_compressor_unthrottle_threshold_divisor_overridden = `0`;
324	uint32_t vm_compressor_catchup_threshold_divisor_overridden = `0`;
325
326	#define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu))
327
328	LCK_GRP_DECLARE(vm_compressor_lck_grp, "vm_compressor");
329	LCK_RW_DECLARE(c_master_lock, &vm_compressor_lck_grp);
330	LCK_MTX_DECLARE(c_list_lock_storage, &vm_compressor_lck_grp);
331
332	boolean_t decompressions_blocked = FALSE;
333
334	zone_t compressor_segment_zone;
335	int c_compressor_swap_trigger = `0`;
336
337	uint32_t compressor_cpus;
338	char *compressor_scratch_bufs;
339	char *kdp_compressor_scratch_buf;
340	char *kdp_compressor_decompressed_page;
341	addr64_t kdp_compressor_decompressed_page_paddr;
342	ppnum_t kdp_compressor_decompressed_page_ppnum;
343
344	clock_sec_t start_of_sample_period_sec = `0`;
345	clock_nsec_t start_of_sample_period_nsec = `0`;
346	clock_sec_t start_of_eval_period_sec = `0`;
347	clock_nsec_t start_of_eval_period_nsec = `0`;
348	uint32_t sample_period_decompression_count = `0`;
349	uint32_t sample_period_compression_count = `0`;
350	uint32_t last_eval_decompression_count = `0`;
351	uint32_t last_eval_compression_count = `0`;
352
353	#define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30)
354
355	boolean_t vm_swapout_ripe_segments = FALSE;
356	uint32_t vm_ripe_target_age = (`60` * `60` * `48`);
357
358	uint32_t swapout_target_age = `0`;
359	uint32_t age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE];
360	uint32_t overage_decompressions_during_sample_period = `0`;
361
362
363	void do_fastwake_warmup(queue_head_t *, boolean_t);
364	boolean_t fastwake_warmup = FALSE;
365	boolean_t fastwake_recording_in_progress = FALSE;
366	clock_sec_t dont_trim_until_ts = `0`;
367
368	uint64_t c_segment_warmup_count;
369	uint64_t first_c_segment_to_warm_generation_id = `0`;
370	uint64_t last_c_segment_to_warm_generation_id = `0`;
371	boolean_t hibernate_flushing = FALSE;
372
373	int64_t c_segment_input_bytes __attribute__((aligned(`8`))) = `0`;
374	int64_t c_segment_compressed_bytes __attribute__((aligned(`8`))) = `0`;
375	int64_t compressor_bytes_used __attribute__((aligned(`8`))) = `0`;
376
377	/ Keeps track of the most recent timestamp for when major compaction finished. /
378	mach_timespec_t major_compact_ts;
379
380	struct c_sv_hash_entry c_segment_sv_hash_table[C_SV_HASH_SIZE] __attribute__ ((aligned(`8`)));
381
382	static void vm_compressor_swap_trigger_thread(void);
383	static void vm_compressor_do_delayed_compactions(boolean_t);
384	static void vm_compressor_compact_and_swap(boolean_t);
385	static void vm_compressor_process_regular_swapped_in_segments(boolean_t);
386	void vm_compressor_process_special_swapped_in_segments(void);
387	static void vm_compressor_process_special_swapped_in_segments_locked(void);
388
389	struct vm_compressor_swapper_stats vmcs_stats;
390
391	static void vm_compressor_process_major_segments(bool);
392	#if XNU_TARGET_OS_OSX
393	static void vm_compressor_take_paging_space_action(void);
394	#endif /* XNU_TARGET_OS_OSX */
395
396	void compute_swapout_target_age(void);
397
398	boolean_t c_seg_major_compact(c_segment_t, c_segment_t);
399	boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t);
400
401	int c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t);
402	int c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t);
403	void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg);
404
405	void c_seg_move_to_sparse_list(c_segment_t);
406	void c_seg_insert_into_q(queue_head_t *, c_segment_t);
407
408	uint64_t vm_available_memory(void);
409	uint64_t vm_compressor_pages_compressed(void);
410	uint32_t vm_compressor_pool_size(void);
411	uint32_t vm_compressor_fragmentation_level(void);
412	uint32_t vm_compression_ratio(void);
413
414	/*
415	* indicate the need to do a major compaction if
416	* the overall set of in-use compression segments
417	* becomes sparse... on systems that support pressure
418	* driven swapping, this will also cause swapouts to
419	* be initiated.
420	*/
421	static inline bool
422	vm_compressor_needs_to_major_compact()
423	{
424	uint32_t incore_seg_count;
425
426	incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
427
428	if ((c_segment_count >= (c_segments_nearing_limit / `8`)) &&
429	((incore_seg_count * c_seg_max_pages) - VM_PAGE_COMPRESSOR_COUNT) >
430	((incore_seg_count / `8`) * c_seg_max_pages)) {
431	return true;
432	}
433	return false;
434	}
435
436
437	uint64_t
438	vm_available_memory(void)
439	{
440	return ((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64;
441	}
442
443
444	uint32_t
445	vm_compressor_pool_size(void)
446	{
447	return VM_PAGE_COMPRESSOR_COUNT;
448	}
449
450	uint32_t
451	vm_compressor_fragmentation_level(void)
452	{
453	const uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
454	if ((incore_seg_count == `0`) \|\| (c_seg_max_pages == `0`)) {
455	return `0`;
456	}
457	return `100` - (vm_compressor_pool_size() * `100` / (incore_seg_count * c_seg_max_pages));
458	}
459
460	uint32_t
461	vm_compression_ratio(void)
462	{
463	if (vm_compressor_pool_size() == `0`) {
464	return UINT32_MAX;
465	}
466	return c_segment_pages_compressed / vm_compressor_pool_size();
467	}
468
469	uint64_t
470	vm_compressor_pages_compressed(void)
471	{
472	return c_segment_pages_compressed * PAGE_SIZE_64;
473	}
474
475	bool
476	vm_compressor_compressed_pages_nearing_limit(void)
477	{
478	uint32_t pages = `0`;
479
480	#if CONFIG_FREEZE
481	pages = os_atomic_load(&c_segment_pages_compressed_incore, relaxed);
482	#else /* CONFIG_FREEZE */
483	pages = c_segment_pages_compressed;
484	#endif /* CONFIG_FREEZE */
485
486	return pages > c_segment_pages_compressed_nearing_limit;
487	}
488
489	static bool
490	vm_compressor_segments_nearing_limit(void)
491	{
492	uint64_t segments;
493
494	#if CONFIG_FREEZE
495	if (freezer_incore_cseg_acct) {
496	if (os_sub_overflow(c_segment_count, c_swappedout_count, &segments)) {
497	segments = `0`;
498	}
499	if (os_sub_overflow(segments, c_swappedout_sparse_count, &segments)) {
500	segments = `0`;
501	}
502	} else {
503	segments = os_atomic_load(&c_segment_count, relaxed);
504	}
505	#else /* CONFIG_FREEZE */
506	segments = c_segment_count;
507	#endif /* CONFIG_FREEZE */
508
509	return segments > c_segments_nearing_limit;
510	}
511
512	boolean_t
513	vm_compressor_low_on_space(void)
514	{
515	return vm_compressor_compressed_pages_nearing_limit() \|\|
516	vm_compressor_segments_nearing_limit();
517	}
518
519
520	boolean_t
521	vm_compressor_out_of_space(void)
522	{
523	#if CONFIG_FREEZE
524	uint64_t incore_seg_count;
525	uint32_t incore_compressed_pages;
526	if (freezer_incore_cseg_acct) {
527	if (os_sub_overflow(c_segment_count, c_swappedout_count, &incore_seg_count)) {
528	incore_seg_count = `0`;
529	}
530	if (os_sub_overflow(incore_seg_count, c_swappedout_sparse_count, &incore_seg_count)) {
531	incore_seg_count = `0`;
532	}
533	incore_compressed_pages = os_atomic_load(&c_segment_pages_compressed_incore, relaxed);
534	} else {
535	incore_seg_count = os_atomic_load(&c_segment_count, relaxed);
536	incore_compressed_pages = os_atomic_load(&c_segment_pages_compressed_incore, relaxed);
537	}
538
539	if ((incore_compressed_pages >= c_segment_pages_compressed_limit) \|\|
540	(incore_seg_count > c_segments_incore_limit)) {
541	return TRUE;
542	}
543	#else /* CONFIG_FREEZE */
544	if ((c_segment_pages_compressed >= c_segment_pages_compressed_limit) \|\|
545	(c_segment_count >= c_segments_limit)) {
546	return TRUE;
547	}
548	#endif /* CONFIG_FREEZE */
549	return FALSE;
550	}
551
552	bool
553	vm_compressor_is_thrashing()
554	{
555	compute_swapout_target_age();
556
557	if (swapout_target_age) {
558	c_segment_t c_seg;
559
560	lck_mtx_lock_spin_always(c_list_lock);
561
562	if (!queue_empty(&c_age_list_head)) {
563	c_seg = (c_segment_t) queue_first(&c_age_list_head);
564
565	if (c_seg->c_creation_ts > swapout_target_age) {
566	swapout_target_age = `0`;
567	}
568	}
569	lck_mtx_unlock_always(c_list_lock);
570	}
571
572	return swapout_target_age != `0`;
573	}
574
575
576	int
577	vm_wants_task_throttled(task_t task)
578	{
579	ledger_amount_t compressed;
580	if (task == kernel_task) {
581	return `0`;
582	}
583
584	if (VM_CONFIG_SWAP_IS_ACTIVE) {
585	if ((vm_compressor_low_on_space() \|\| HARD_THROTTLE_LIMIT_REACHED())) {
586	ledger_get_balance(ledger: task->ledger, entry: task_ledgers.internal_compressed, balance: &compressed);
587	compressed >>= VM_MAP_PAGE_SHIFT(map: task->map);
588	if ((unsigned int)compressed > (c_segment_pages_compressed / `4`)) {
589	return `1`;
590	}
591	}
592	}
593	return `0`;
594	}
595
596
597	#if DEVELOPMENT \|\| DEBUG
598	/*
599	* On compressor/swap exhaustion, kill the largest process regardless of
600	* its chosen process policy.
601	*/
602	TUNABLE(bool, kill_on_no_paging_space, "-kill_on_no_paging_space", false);
603	#endif /* DEVELOPMENT \|\| DEBUG */
604
605	#if CONFIG_JETSAM
606	boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
607	void memorystatus_thread_wake(void);
608	extern uint32_t jetsam_kill_on_low_swap;
609	bool memorystatus_disable_swap(void);
610	#if CONFIG_PHANTOM_CACHE
611	extern bool memorystatus_phantom_cache_pressure;
612	#endif /* CONFIG_PHANTOM_CACHE */
613	int compressor_thrashing_induced_jetsam = `0`;
614	int filecache_thrashing_induced_jetsam = `0`;
615	static boolean_t vm_compressor_thrashing_detected = FALSE;
616	#else /* CONFIG_JETSAM */
617	static uint32_t no_paging_space_action_in_progress = `0`;
618	extern void memorystatus_send_low_swap_note(void);
619	#endif /* CONFIG_JETSAM */
620
621	static void
622	vm_compressor_take_paging_space_action(void)
623	{
624	#if CONFIG_JETSAM
625	/*
626	* On systems with both swap and jetsam,
627	* just wake up the jetsam thread and have it handle the low swap condition
628	* by killing apps.
629	*/
630	if (jetsam_kill_on_low_swap) {
631	memorystatus_thread_wake();
632	}
633	#else /* CONFIG_JETSAM */
634	if (no_paging_space_action_in_progress == `0`) {
635	if (OSCompareAndSwap(`0`, `1`, (UInt32 *)&no_paging_space_action_in_progress)) {
636	if (no_paging_space_action()) {
637	#if DEVELOPMENT \|\| DEBUG
638	if (kill_on_no_paging_space) {
639	/*
640	* Since we are choosing to always kill a process, we don't need the
641	* "out of application memory" dialog box in this mode. And, hence we won't
642	* send the knote.
643	*/
644	no_paging_space_action_in_progress = `0`;
645	return;
646	}
647	#endif /* DEVELOPMENT \|\| DEBUG */
648	memorystatus_send_low_swap_note();
649	}
650
651	no_paging_space_action_in_progress = `0`;
652	}
653	}
654	#endif /* !CONFIG_JETSAM */
655	}
656
657
658	void
659	vm_decompressor_lock(void)
660	{
661	PAGE_REPLACEMENT_ALLOWED(TRUE);
662
663	decompressions_blocked = TRUE;
664
665	PAGE_REPLACEMENT_ALLOWED(FALSE);
666	}
667
668	void
669	vm_decompressor_unlock(void)
670	{
671	PAGE_REPLACEMENT_ALLOWED(TRUE);
672
673	decompressions_blocked = FALSE;
674
675	PAGE_REPLACEMENT_ALLOWED(FALSE);
676
677	thread_wakeup((event_t)&decompressions_blocked);
678	}
679
680	static inline void
681	cslot_copy(c_slot_t cdst, c_slot_t csrc)
682	{
683	#if CHECKSUM_THE_DATA
684	cdst->c_hash_data = csrc->c_hash_data;
685	#endif
686	#if CHECKSUM_THE_COMPRESSED_DATA
687	cdst->c_hash_compressed_data = csrc->c_hash_compressed_data;
688	#endif
689	#if POPCOUNT_THE_COMPRESSED_DATA
690	cdst->c_pop_cdata = csrc->c_pop_cdata;
691	#endif
692	cdst->c_size = csrc->c_size;
693	cdst->c_packed_ptr = csrc->c_packed_ptr;
694	#if defined(__arm64__)
695	cdst->c_codec = csrc->c_codec;
696	#endif
697	}
698
699	#if XNU_TARGET_OS_OSX
700	#define VM_COMPRESSOR_MAX_POOL_SIZE (192UL << 30)
701	#else
702	#define VM_COMPRESSOR_MAX_POOL_SIZE (0)
703	#endif
704
705	static vm_map_size_t compressor_size;
706	static SECURITY_READ_ONLY_LATE(struct mach_vm_range) compressor_range;
707	vm_map_t compressor_map;
708	uint64_t compressor_pool_max_size;
709	uint64_t compressor_pool_size;
710	uint32_t compressor_pool_multiplier;
711
712	#if DEVELOPMENT \|\| DEBUG
713	/*
714	* Compressor segments are write-protected in development/debug
715	* kernels to help debug memory corruption.
716	* In cases where performance is a concern, this can be disabled
717	* via the boot-arg "-disable_cseg_write_protection".
718	*/
719	boolean_t write_protect_c_segs = TRUE;
720	int vm_compressor_test_seg_wp;
721	uint32_t vm_ktrace_enabled;
722	#endif /* DEVELOPMENT \|\| DEBUG */
723
724	#if (XNU_TARGET_OS_OSX && __arm64__)
725
726	#include <IOKit/IOPlatformExpert.h>
727	#include <sys/random.h>
728
729	static const char *csegbufsizeExperimentProperty = "_csegbufsz_experiment";
730	static thread_call_t csegbufsz_experiment_thread_call;
731
732	extern boolean_t IOServiceWaitForMatchingResource(const char * property, uint64_t timeout);
733	static void
734	erase_csegbufsz_experiment_property(__unused void param0, __unused void* *param1)
735	{
736	// Wait for NVRAM to be writable
737	if (!IOServiceWaitForMatchingResource(property: "IONVRAM", UINT64_MAX)) {
738	printf(format: "csegbufsz_experiment_property: Failed to wait for IONVRAM.");
739	}
740
741	if (!PERemoveNVRAMProperty(symbol: csegbufsizeExperimentProperty)) {
742	printf(format: "csegbufsize_experiment_property: Failed to remove %s from NVRAM.", csegbufsizeExperimentProperty);
743	}
744	thread_call_free(call: csegbufsz_experiment_thread_call);
745	}
746
747	static void
748	erase_csegbufsz_experiment_property_async()
749	{
750	csegbufsz_experiment_thread_call = thread_call_allocate_with_priority(
751	func: erase_csegbufsz_experiment_property,
752	NULL,
753	pri: THREAD_CALL_PRIORITY_LOW
754	);
755	if (csegbufsz_experiment_thread_call == NULL) {
756	printf(format: "csegbufsize_experiment_property: Unable to allocate thread call.");
757	} else {
758	thread_call_enter(call: csegbufsz_experiment_thread_call);
759	}
760	}
761
762	static void
763	cleanup_csegbufsz_experiment(__unused void *arg0)
764	{
765	char nvram = `0`;
766	unsigned int len = sizeof(nvram);
767	if (PEReadNVRAMProperty(symbol: csegbufsizeExperimentProperty, value: &nvram, len: &len)) {
768	erase_csegbufsz_experiment_property_async();
769	}
770	}
771
772	STARTUP_ARG(EARLY_BOOT, STARTUP_RANK_FIRST, cleanup_csegbufsz_experiment, NULL);
773	#endif /* XNU_TARGET_OS_OSX && __arm64__ */
774
775	#if CONFIG_JETSAM
776	extern unsigned int memorystatus_swap_all_apps;
777	#endif /* CONFIG_JETSAM */
778
779	TUNABLE_DT(uint64_t, swap_vol_min_capacity, "/defaults", "kern.swap_min_capacity", "kern.swap_min_capacity", `0`, TUNABLE_DT_NONE);
780
781	static void
782	vm_compressor_set_size(void)
783	{
784	/*
785	* Note that this function may be called multiple times on systems with app swap
786	* because the value of vm_swap_get_max_configured_space() and memorystatus_swap_all_apps
787	* can change based the size of the swap volume. On these systems, we'll call
788	* this function once early in boot to reserve the maximum amount of VA required
789	* for the compressor submap and then one more time in vm_compressor_init after
790	* determining the swap volume size. We must not return a larger value the second
791	* time around.
792	*/
793	vm_size_t c_segments_arr_size = `0`;
794	struct c_slot_mapping tmp_slot_ptr;
795
796	/ The segment size can be overwritten by a boot-arg /
797	if (!PE_parse_boot_argn(arg_string: "vm_compressor_segment_buffer_size", arg_ptr: &c_seg_bufsize, max_arg: sizeof(c_seg_bufsize))) {
798	#if CONFIG_JETSAM
799	if (memorystatus_swap_all_apps) {
800	c_seg_bufsize = C_SEG_BUFSIZE_ARM_SWAP;
801	} else {
802	c_seg_bufsize = C_SEG_BUFSIZE_DEFAULT;
803	}
804	#else
805	c_seg_bufsize = C_SEG_BUFSIZE_DEFAULT;
806	#endif /* CONFIG_JETSAM */
807	}
808
809	vm_compressor_swap_init_swap_file_limit();
810	if (vm_compression_limit) {
811	compressor_pool_size = ptoa_64(vm_compression_limit);
812	}
813
814	compressor_pool_max_size = C_SEG_MAX_LIMIT;
815	compressor_pool_max_size *= c_seg_bufsize;
816
817	#if XNU_TARGET_OS_OSX
818
819	if (vm_compression_limit == `0`) {
820	if (max_mem <= (`4ULL` * `1024ULL` * `1024ULL` * `1024ULL`)) {
821	compressor_pool_size = `16ULL` * max_mem;
822	} else if (max_mem <= (`8ULL` * `1024ULL` * `1024ULL` * `1024ULL`)) {
823	compressor_pool_size = `8ULL` * max_mem;
824	} else if (max_mem <= (`32ULL` * `1024ULL` * `1024ULL` * `1024ULL`)) {
825	compressor_pool_size = `4ULL` * max_mem;
826	} else {
827	compressor_pool_size = `2ULL` * max_mem;
828	}
829	}
830	/*
831	* Cap the compressor pool size to a max of 192G
832	*/
833	if (compressor_pool_size > VM_COMPRESSOR_MAX_POOL_SIZE) {
834	compressor_pool_size = VM_COMPRESSOR_MAX_POOL_SIZE;
835	}
836	if (max_mem <= (`8ULL` * `1024ULL` * `1024ULL` * `1024ULL`)) {
837	compressor_pool_multiplier = `1`;
838	} else if (max_mem <= (`32ULL` * `1024ULL` * `1024ULL` * `1024ULL`)) {
839	compressor_pool_multiplier = `2`;
840	} else {
841	compressor_pool_multiplier = `4`;
842	}
843
844	#else
845
846	if (compressor_pool_max_size > max_mem) {
847	compressor_pool_max_size = max_mem;
848	}
849
850	if (vm_compression_limit == `0`) {
851	compressor_pool_size = max_mem;
852	}
853
854	#if XNU_TARGET_OS_WATCH
855	compressor_pool_multiplier = `2`;
856	#elif XNU_TARGET_OS_IOS
857	if (max_mem <= (`2ULL` * `1024ULL` * `1024ULL` * `1024ULL`)) {
858	compressor_pool_multiplier = `2`;
859	} else {
860	compressor_pool_multiplier = `1`;
861	}
862	#else
863	compressor_pool_multiplier = `1`;
864	#endif
865
866	#endif
867
868	PE_parse_boot_argn(arg_string: "kern.compressor_pool_multiplier", arg_ptr: &compressor_pool_multiplier, max_arg: sizeof(compressor_pool_multiplier));
869	if (compressor_pool_multiplier < `1`) {
870	compressor_pool_multiplier = `1`;
871	}
872
873	if (compressor_pool_size > compressor_pool_max_size) {
874	compressor_pool_size = compressor_pool_max_size;
875	}
876
877	c_seg_max_pages = (c_seg_bufsize / PAGE_SIZE);
878	c_seg_slot_var_array_min_len = c_seg_max_pages;
879
880	#if !defined(__x86_64__)
881	c_seg_off_limit = (C_SEG_BYTES_TO_OFFSET((c_seg_bufsize - `512`)));
882	c_seg_allocsize = (c_seg_bufsize + PAGE_SIZE);
883	#else
884	c_seg_off_limit = (C_SEG_BYTES_TO_OFFSET((c_seg_bufsize - `128`)));
885	c_seg_allocsize = c_seg_bufsize;
886	#endif /* !defined(__x86_64__) */
887
888	c_segments_limit = (uint32_t)(compressor_pool_size / (vm_size_t)(c_seg_allocsize));
889	tmp_slot_ptr.s_cseg = c_segments_limit;
890	/ Panic on internal configs/
891	assertf((tmp_slot_ptr.s_cseg == c_segments_limit), "vm_compressor_init: overflowed s_cseg field in c_slot_mapping with c_segno: %d", c_segments_limit);
892
893	if (tmp_slot_ptr.s_cseg != c_segments_limit) {
894	tmp_slot_ptr.s_cseg = -`1`;
895	c_segments_limit = tmp_slot_ptr.s_cseg - `1`; /limited by segment idx bits in c_slot_mapping/
896	compressor_pool_size = (c_segments_limit * (vm_size_t)(c_seg_allocsize));
897	}
898
899	c_segments_nearing_limit = (uint32_t)(((uint64_t)c_segments_limit * `98ULL`) / `100ULL`);
900
901	c_segment_pages_compressed_limit = (c_segments_limit * (c_seg_bufsize / PAGE_SIZE) * compressor_pool_multiplier);
902
903	if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE)) {
904	#if defined(XNU_TARGET_OS_WATCH)
905	c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE);
906	#else
907	if (!vm_compression_limit) {
908	c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE);
909	}
910	#endif
911	}
912
913	c_segment_pages_compressed_nearing_limit = (uint32_t)(((uint64_t)c_segment_pages_compressed_limit * `98ULL`) / `100ULL`);
914
915	#if CONFIG_FREEZE
916	/*
917	* Our in-core limits are based on the size of the compressor pool.
918	* The c_segments_nearing_limit is also based on the compressor pool
919	* size and calculated above.
920	*/
921	c_segments_incore_limit = c_segments_limit;
922
923	if (freezer_incore_cseg_acct) {
924	/*
925	* Add enough segments to track all frozen c_segs that can be stored in swap.
926	*/
927	c_segments_limit += (uint32_t)(vm_swap_get_max_configured_space() / (vm_size_t)(c_seg_allocsize));
928	tmp_slot_ptr.s_cseg = c_segments_limit;
929	/ Panic on internal configs/
930	assertf((tmp_slot_ptr.s_cseg == c_segments_limit), "vm_compressor_init: freezer reserve overflowed s_cseg field in c_slot_mapping with c_segno: %d", c_segments_limit);
931	}
932	#endif
933	/*
934	* Submap needs space for:
935	* - c_segments
936	* - c_buffers
937	* - swap reclaimations -- c_seg_bufsize
938	*/
939	c_segments_arr_size = vm_map_round_page((sizeof(union c_segu) * c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
940	c_buffers_size = vm_map_round_page(((vm_size_t)c_seg_allocsize * (vm_size_t)c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
941
942	compressor_size = c_segments_arr_size + c_buffers_size + c_seg_bufsize;
943
944	#if RECORD_THE_COMPRESSED_DATA
945	c_compressed_record_sbuf_size = (vm_size_t)c_seg_allocsize + (PAGE_SIZE * `2`);
946	compressor_size += c_compressed_record_sbuf_size;
947	#endif /* RECORD_THE_COMPRESSED_DATA */
948	}
949	STARTUP(KMEM, STARTUP_RANK_FIRST, vm_compressor_set_size);
950
951	KMEM_RANGE_REGISTER_DYNAMIC(compressor, &compressor_range, ^() {
952	return compressor_size;
953	});
954
955	bool
956	osenvironment_is_diagnostics(void)
957	{
958	DTEntry chosen;
959	const char *osenvironment;
960	unsigned int size;
961	if (kSuccess == SecureDTLookupEntry(searchPoint: `0`, pathName: "/chosen", foundEntry: &chosen)) {
962	if (kSuccess == SecureDTGetProperty(entry: chosen, propertyName: "osenvironment", propertyValue: (void const **) &osenvironment, propertySize: &size)) {
963	return strcmp(s1: osenvironment, s2: "diagnostics") == `0`;
964	}
965	}
966	return false;
967	}
968
969	void
970	vm_compressor_init(void)
971	{
972	thread_t thread;
973	#if RECORD_THE_COMPRESSED_DATA
974	vm_size_t c_compressed_record_sbuf_size = `0`;
975	#endif /* RECORD_THE_COMPRESSED_DATA */
976
977	#if DEVELOPMENT \|\| DEBUG \|\| CONFIG_FREEZE
978	char bootarg_name[`32`];
979	#endif /* DEVELOPMENT \|\| DEBUG \|\| CONFIG_FREEZE */
980	__unused uint64_t early_boot_compressor_size = compressor_size;
981
982	#if CONFIG_JETSAM
983	if (memorystatus_swap_all_apps && osenvironment_is_diagnostics()) {
984	printf("osenvironment == \"diagnostics\". Disabling app swap.\n");
985	memorystatus_disable_swap();
986	}
987
988	if (memorystatus_swap_all_apps) {
989	/*
990	* App swap is disabled on devices with small NANDs.
991	* Now that we're no longer in early boot, we can get
992	* the NAND size and re-run vm_compressor_set_size.
993	*/
994	int error = vm_swap_vol_get_capacity(SWAP_VOLUME_NAME, &vm_swap_volume_capacity);
995	#if DEVELOPMENT \|\| DEBUG
996	if (error != `0`) {
997	panic("vm_compressor_init: Unable to get swap volume capacity. error=%d\n", error);
998	}
999	#else
1000	if (error != `0`) {
1001	os_log_with_startup_serial(OS_LOG_DEFAULT, "vm_compressor_init: Unable to get swap volume capacity. error=%d\n", error);
1002	}
1003	#endif /* DEVELOPMENT \|\| DEBUG */
1004	if (vm_swap_volume_capacity < swap_vol_min_capacity) {
1005	memorystatus_disable_swap();
1006	}
1007	/*
1008	* Resize the compressor and swap now that we know the capacity
1009	* of the swap volume.
1010	*/
1011	vm_compressor_set_size();
1012	/*
1013	* We reserved a chunk of VA early in boot for the compressor submap.
1014	* We can't allocate more than that.
1015	*/
1016	assert(compressor_size <= early_boot_compressor_size);
1017	}
1018	#endif /* CONFIG_JETSAM */
1019
1020	#if DEVELOPMENT \|\| DEBUG
1021	if (PE_parse_boot_argn("-disable_cseg_write_protection", bootarg_name, sizeof(bootarg_name))) {
1022	write_protect_c_segs = FALSE;
1023	}
1024
1025	int vmcval = `1`;
1026	#if defined(XNU_TARGET_OS_WATCH)
1027	vmcval = `0`;
1028	#endif /* XNU_TARGET_OS_WATCH */
1029	PE_parse_boot_argn("vm_compressor_validation", &vmcval, sizeof(vmcval));
1030
1031	if (kern_feature_override(KF_COMPRSV_OVRD)) {
1032	vmcval = `0`;
1033	}
1034
1035	if (vmcval == `0`) {
1036	#if POPCOUNT_THE_COMPRESSED_DATA
1037	popcount_c_segs = FALSE;
1038	#endif
1039	#if CHECKSUM_THE_DATA \|\| CHECKSUM_THE_COMPRESSED_DATA
1040	checksum_c_segs = FALSE;
1041	#endif
1042	#if VALIDATE_C_SEGMENTS
1043	validate_c_segs = FALSE;
1044	#endif
1045	write_protect_c_segs = FALSE;
1046	}
1047	#endif /* DEVELOPMENT \|\| DEBUG */
1048
1049	#if CONFIG_FREEZE
1050	if (PE_parse_boot_argn("-disable_freezer_cseg_acct", bootarg_name, sizeof(bootarg_name))) {
1051	freezer_incore_cseg_acct = FALSE;
1052	}
1053	#endif /* CONFIG_FREEZE */
1054
1055	assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE);
1056
1057	#if !XNU_TARGET_OS_OSX
1058	vm_compressor_minorcompact_threshold_divisor = `20`;
1059	vm_compressor_majorcompact_threshold_divisor = `30`;
1060	vm_compressor_unthrottle_threshold_divisor = `40`;
1061	vm_compressor_catchup_threshold_divisor = `60`;
1062	#else /* !XNU_TARGET_OS_OSX */
1063	if (max_mem <= (`3ULL` * `1024ULL` * `1024ULL` * `1024ULL`)) {
1064	vm_compressor_minorcompact_threshold_divisor = `11`;
1065	vm_compressor_majorcompact_threshold_divisor = `13`;
1066	vm_compressor_unthrottle_threshold_divisor = `20`;
1067	vm_compressor_catchup_threshold_divisor = `35`;
1068	} else {
1069	vm_compressor_minorcompact_threshold_divisor = `20`;
1070	vm_compressor_majorcompact_threshold_divisor = `25`;
1071	vm_compressor_unthrottle_threshold_divisor = `35`;
1072	vm_compressor_catchup_threshold_divisor = `50`;
1073	}
1074	#endif /* !XNU_TARGET_OS_OSX */
1075
1076	queue_init(&c_bad_list_head);
1077	queue_init(&c_age_list_head);
1078	queue_init(&c_minor_list_head);
1079	queue_init(&c_major_list_head);
1080	queue_init(&c_filling_list_head);
1081	queue_init(&c_early_swapout_list_head);
1082	queue_init(&c_regular_swapout_list_head);
1083	queue_init(&c_late_swapout_list_head);
1084	queue_init(&c_swapio_list_head);
1085	queue_init(&c_early_swappedin_list_head);
1086	queue_init(&c_regular_swappedin_list_head);
1087	queue_init(&c_late_swappedin_list_head);
1088	queue_init(&c_swappedout_list_head);
1089	queue_init(&c_swappedout_sparse_list_head);
1090
1091	c_free_segno_head = -`1`;
1092	c_segments_available = `0`;
1093
1094	compressor_map = kmem_suballoc(parent: kernel_map, addr: &compressor_range.min_address,
1095	size: compressor_size, vmc_options: VM_MAP_CREATE_NEVER_FAULTS,
1096	VM_FLAGS_FIXED \| VM_FLAGS_OVERWRITE, flags: KMS_NOFAIL \| KMS_PERMANENT,
1097	VM_KERN_MEMORY_COMPRESSOR).kmr_submap;
1098
1099	kmem_alloc(map: compressor_map, addrp: (vm_offset_t *)(&c_segments),
1100	size: (sizeof(union c_segu) * c_segments_limit),
1101	flags: KMA_NOFAIL \| KMA_KOBJECT \| KMA_VAONLY \| KMA_PERMANENT,
1102	VM_KERN_MEMORY_COMPRESSOR);
1103	kmem_alloc(map: compressor_map, addrp: &c_buffers, size: c_buffers_size,
1104	flags: KMA_NOFAIL \| KMA_COMPRESSOR \| KMA_VAONLY \| KMA_PERMANENT,
1105	VM_KERN_MEMORY_COMPRESSOR);
1106
1107	#if DEVELOPMENT \|\| DEBUG
1108	if (hvg_is_hcall_available(HVG_HCALL_SET_COREDUMP_DATA)) {
1109	hvg_hcall_set_coredump_data();
1110	}
1111	#endif
1112
1113	/*
1114	* Pick a good size that will minimize fragmentation in zalloc
1115	* by minimizing the fragmentation in a 16k run.
1116	*
1117	* c_seg_slot_var_array_min_len is larger on 4k systems than 16k ones,
1118	* making the fragmentation in a 4k page terrible. Using 16k for all
1119	* systems matches zalloc() and will minimize fragmentation.
1120	*/
1121	uint32_t c_segment_size = sizeof(struct c_segment) + (c_seg_slot_var_array_min_len * sizeof(struct c_slot));
1122	uint32_t cnt = (`16` << `10`) / c_segment_size;
1123	uint32_t frag = (`16` << `10`) % c_segment_size;
1124
1125	c_seg_fixed_array_len = c_seg_slot_var_array_min_len;
1126
1127	while (cnt * sizeof(struct c_slot) < frag) {
1128	c_segment_size += sizeof(struct c_slot);
1129	c_seg_fixed_array_len++;
1130	frag -= cnt * sizeof(struct c_slot);
1131	}
1132
1133	compressor_segment_zone = zone_create(name: "compressor_segment",
1134	size: c_segment_size, flags: ZC_PGZ_USE_GUARDS \| ZC_NOENCRYPT \| ZC_ZFREE_CLEARMEM);
1135
1136	c_segments_busy = FALSE;
1137
1138	c_segments_next_page = (caddr_t)c_segments;
1139	vm_compressor_algorithm_init();
1140
1141	{
1142	host_basic_info_data_t hinfo;
1143	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1144	size_t bufsize;
1145	char *buf;
1146
1147	#define BSD_HOST 1
1148	host_info(host: (host_t)BSD_HOST, HOST_BASIC_INFO, host_info_out: (host_info_t)&hinfo, host_info_outCnt: &count);
1149
1150	compressor_cpus = hinfo.max_cpus;
1151
1152	bufsize = PAGE_SIZE;
1153	bufsize += compressor_cpus * vm_compressor_get_decode_scratch_size();
1154	/ For the KDP path /
1155	bufsize += vm_compressor_get_decode_scratch_size();
1156	#if CONFIG_FREEZE
1157	bufsize += vm_compressor_get_encode_scratch_size();
1158	#endif
1159	#if RECORD_THE_COMPRESSED_DATA
1160	bufsize += c_compressed_record_sbuf_size;
1161	#endif
1162
1163	kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&buf, size: bufsize,
1164	flags: KMA_DATA \| KMA_NOFAIL \| KMA_KOBJECT \| KMA_PERMANENT,
1165	VM_KERN_MEMORY_COMPRESSOR);
1166
1167	/*
1168	* kdp_compressor_decompressed_page must be page aligned because we access
1169	* it through the physical aperture by page number.
1170	*/
1171	kdp_compressor_decompressed_page = buf;
1172	kdp_compressor_decompressed_page_paddr = kvtophys(va: (vm_offset_t)kdp_compressor_decompressed_page);
1173	kdp_compressor_decompressed_page_ppnum = (ppnum_t) atop(kdp_compressor_decompressed_page_paddr);
1174	buf += PAGE_SIZE;
1175	bufsize -= PAGE_SIZE;
1176
1177	compressor_scratch_bufs = buf;
1178	buf += compressor_cpus * vm_compressor_get_decode_scratch_size();
1179	bufsize -= compressor_cpus * vm_compressor_get_decode_scratch_size();
1180
1181	kdp_compressor_scratch_buf = buf;
1182	buf += vm_compressor_get_decode_scratch_size();
1183	bufsize -= vm_compressor_get_decode_scratch_size();
1184
1185	#if CONFIG_FREEZE
1186	freezer_context_global.freezer_ctx_compressor_scratch_buf = buf;
1187	buf += vm_compressor_get_encode_scratch_size();
1188	bufsize -= vm_compressor_get_encode_scratch_size();
1189	#endif
1190
1191	#if RECORD_THE_COMPRESSED_DATA
1192	c_compressed_record_sbuf = buf;
1193	c_compressed_record_cptr = buf;
1194	c_compressed_record_ebuf = c_compressed_record_sbuf + c_compressed_record_sbuf_size;
1195	buf += c_compressed_record_sbuf_size;
1196	bufsize -= c_compressed_record_sbuf_size;
1197	#endif
1198	assert(bufsize == `0`);
1199	}
1200
1201	if (kernel_thread_start_priority(continuation: (thread_continue_t)vm_compressor_swap_trigger_thread, NULL,
1202	BASEPRI_VM, new_thread: &thread) != KERN_SUCCESS) {
1203	panic("vm_compressor_swap_trigger_thread: create failed");
1204	}
1205	thread_deallocate(thread);
1206
1207	if (vm_pageout_internal_start() != KERN_SUCCESS) {
1208	panic("vm_compressor_init: Failed to start the internal pageout thread.");
1209	}
1210	if (VM_CONFIG_SWAP_IS_PRESENT) {
1211	vm_compressor_swap_init();
1212	}
1213
1214	if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
1215	vm_compressor_is_active = `1`;
1216	}
1217
1218	vm_compressor_available = `1`;
1219
1220	vm_page_reactivate_all_throttled();
1221
1222	bzero(s: &vmcs_stats, n: sizeof(struct vm_compressor_swapper_stats));
1223	}
1224
1225
1226	#if VALIDATE_C_SEGMENTS
1227
1228	static void
1229	c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact)
1230	{
1231	uint16_t c_indx;
1232	int32_t bytes_used;
1233	uint32_t c_rounded_size;
1234	uint32_t c_size;
1235	c_slot_t cs;
1236
1237	if (__probable(validate_c_segs == FALSE)) {
1238	return;
1239	}
1240	if (c_seg->c_firstemptyslot < c_seg->c_nextslot) {
1241	c_indx = c_seg->c_firstemptyslot;
1242	cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1243
1244	if (cs == NULL) {
1245	panic("c_seg_validate: no slot backing c_firstemptyslot");
1246	}
1247
1248	if (cs->c_size) {
1249	panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)", cs->c_size);
1250	}
1251	}
1252	bytes_used = `0`;
1253
1254	for (c_indx = `0`; c_indx < c_seg->c_nextslot; c_indx++) {
1255	cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1256
1257	c_size = UNPACK_C_SIZE(cs);
1258
1259	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1260
1261	bytes_used += c_rounded_size;
1262
1263	#if CHECKSUM_THE_COMPRESSED_DATA
1264	unsigned csvhash;
1265	if (c_size && cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) {
1266	addr64_t csvphys = kvtophys((vm_offset_t)&c_seg->c_store.c_buffer[cs->c_offset]);
1267	panic("Compressed data doesn't match original %p phys: 0x%llx %d %p %d %d 0x%x 0x%x", c_seg, csvphys, cs->c_offset, cs, c_indx, c_size, cs->c_hash_compressed_data, csvhash);
1268	}
1269	#endif
1270	#if POPCOUNT_THE_COMPRESSED_DATA
1271	unsigned csvpop;
1272	if (c_size) {
1273	uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset];
1274	if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) {
1275	panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%llx 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void )csvaddr, (void* *) kvtophys(csvaddr), c_seg, cs, (uint64_t)cs->c_offset, c_size, csvpop, cs->c_pop_cdata);
1276	}
1277	}
1278	#endif
1279	}
1280
1281	if (bytes_used != c_seg->c_bytes_used) {
1282	panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d", bytes_used, c_seg->c_bytes_used);
1283	}
1284
1285	if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) {
1286	panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d",
1287	(int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
1288	}
1289
1290	if (must_be_compact) {
1291	if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) {
1292	panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d",
1293	(int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
1294	}
1295	}
1296	}
1297
1298	#endif
1299
1300
1301	void
1302	c_seg_need_delayed_compaction(c_segment_t c_seg, boolean_t c_list_lock_held)
1303	{
1304	boolean_t clear_busy = FALSE;
1305
1306	if (c_list_lock_held == FALSE) {
1307	if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
1308	C_SEG_BUSY(c_seg);
1309
1310	lck_mtx_unlock_always(&c_seg->c_lock);
1311	lck_mtx_lock_spin_always(c_list_lock);
1312	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1313
1314	clear_busy = TRUE;
1315	}
1316	}
1317	assert(c_seg->c_state != C_IS_FILLING);
1318
1319	if (!c_seg->c_on_minorcompact_q && !(C_SEG_IS_ON_DISK_OR_SOQ(c_seg)) && !c_seg->c_has_donated_pages) {
1320	queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list);
1321	c_seg->c_on_minorcompact_q = `1`;
1322	c_minor_count++;
1323	}
1324	if (c_list_lock_held == FALSE) {
1325	lck_mtx_unlock_always(c_list_lock);
1326	}
1327
1328	if (clear_busy == TRUE) {
1329	C_SEG_WAKEUP_DONE(c_seg);
1330	}
1331	}
1332
1333
1334	unsigned int c_seg_moved_to_sparse_list = `0`;
1335
1336	void
1337	c_seg_move_to_sparse_list(c_segment_t c_seg)
1338	{
1339	boolean_t clear_busy = FALSE;
1340
1341	if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
1342	C_SEG_BUSY(c_seg);
1343
1344	lck_mtx_unlock_always(&c_seg->c_lock);
1345	lck_mtx_lock_spin_always(c_list_lock);
1346	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1347
1348	clear_busy = TRUE;
1349	}
1350	c_seg_switch_state(c_seg, C_ON_SWAPPEDOUTSPARSE_Q, FALSE);
1351
1352	c_seg_moved_to_sparse_list++;
1353
1354	lck_mtx_unlock_always(c_list_lock);
1355
1356	if (clear_busy == TRUE) {
1357	C_SEG_WAKEUP_DONE(c_seg);
1358	}
1359	}
1360
1361
1362	void
1363	c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg)
1364	{
1365	c_segment_t c_seg_next;
1366
1367	if (queue_empty(qhead)) {
1368	queue_enter(qhead, c_seg, c_segment_t, c_age_list);
1369	} else {
1370	c_seg_next = (c_segment_t)queue_first(qhead);
1371
1372	while (TRUE) {
1373	if (c_seg->c_generation_id < c_seg_next->c_generation_id) {
1374	queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list);
1375	break;
1376	}
1377	c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list);
1378
1379	if (queue_end(qhead, (queue_entry_t) c_seg_next)) {
1380	queue_enter(qhead, c_seg, c_segment_t, c_age_list);
1381	break;
1382	}
1383	}
1384	}
1385	}
1386
1387
1388	int try_minor_compaction_failed = `0`;
1389	int try_minor_compaction_succeeded = `0`;
1390
1391	void
1392	c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg)
1393	{
1394	assert(c_seg->c_on_minorcompact_q);
1395	/*
1396	* c_seg is currently on the delayed minor compaction
1397	* queue and we have c_seg locked... if we can get the
1398	* c_list_lock w/o blocking (if we blocked we could deadlock
1399	* because the lock order is c_list_lock then c_seg's lock)
1400	* we'll pull it from the delayed list and free it directly
1401	*/
1402	if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
1403	/*
1404	* c_list_lock is held, we need to bail
1405	*/
1406	try_minor_compaction_failed++;
1407
1408	lck_mtx_unlock_always(&c_seg->c_lock);
1409	} else {
1410	try_minor_compaction_succeeded++;
1411
1412	C_SEG_BUSY(c_seg);
1413	c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE);
1414	}
1415	}
1416
1417
1418	int
1419	c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement)
1420	{
1421	int c_seg_freed;
1422
1423	assert(c_seg->c_busy);
1424	assert(!C_SEG_IS_ON_DISK_OR_SOQ(c_seg));
1425
1426	/*
1427	* check for the case that can occur when we are not swapping
1428	* and this segment has been major compacted in the past
1429	* and moved to the majorcompact q to remove it from further
1430	* consideration... if the occupancy falls too low we need
1431	* to put it back on the age_q so that it will be considered
1432	* in the next major compaction sweep... if we don't do this
1433	* we will eventually run into the c_segments_limit
1434	*/
1435	if (c_seg->c_state == C_ON_MAJORCOMPACT_Q && C_SEG_SHOULD_MAJORCOMPACT_NOW(c_seg)) {
1436	c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1437	}
1438	if (!c_seg->c_on_minorcompact_q) {
1439	if (clear_busy == TRUE) {
1440	C_SEG_WAKEUP_DONE(c_seg);
1441	}
1442
1443	lck_mtx_unlock_always(&c_seg->c_lock);
1444
1445	return `0`;
1446	}
1447	queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list);
1448	c_seg->c_on_minorcompact_q = `0`;
1449	c_minor_count--;
1450
1451	lck_mtx_unlock_always(c_list_lock);
1452
1453	if (disallow_page_replacement == TRUE) {
1454	lck_mtx_unlock_always(&c_seg->c_lock);
1455
1456	PAGE_REPLACEMENT_DISALLOWED(TRUE);
1457
1458	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1459	}
1460	c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy);
1461
1462	if (disallow_page_replacement == TRUE) {
1463	PAGE_REPLACEMENT_DISALLOWED(FALSE);
1464	}
1465
1466	if (need_list_lock == TRUE) {
1467	lck_mtx_lock_spin_always(c_list_lock);
1468	}
1469
1470	return c_seg_freed;
1471	}
1472
1473	void
1474	kdp_compressor_busy_find_owner(event64_t wait_event, thread_waitinfo_t *waitinfo)
1475	{
1476	c_segment_t c_seg = (c_segment_t) wait_event;
1477
1478	waitinfo->owner = thread_tid(thread: c_seg->c_busy_for_thread);
1479	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(c_seg);
1480	}
1481
1482	#if DEVELOPMENT \|\| DEBUG
1483	int
1484	do_cseg_wedge_thread(void)
1485	{
1486	struct c_segment c_seg;
1487	c_seg.c_busy_for_thread = current_thread();
1488
1489	debug_cseg_wait_event = (event_t) &c_seg;
1490
1491	thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor);
1492	assert_wait((event_t) (&c_seg), THREAD_INTERRUPTIBLE);
1493
1494	thread_block(THREAD_CONTINUE_NULL);
1495
1496	return `0`;
1497	}
1498
1499	int
1500	do_cseg_unwedge_thread(void)
1501	{
1502	thread_wakeup(debug_cseg_wait_event);
1503	debug_cseg_wait_event = NULL;
1504
1505	return `0`;
1506	}
1507	#endif /* DEVELOPMENT \|\| DEBUG */
1508
1509	void
1510	c_seg_wait_on_busy(c_segment_t c_seg)
1511	{
1512	c_seg->c_wanted = `1`;
1513
1514	thread_set_pending_block_hint(thread: current_thread(), block_hint: kThreadWaitCompressor);
1515	assert_wait(event: (event_t) (c_seg), THREAD_UNINT);
1516
1517	lck_mtx_unlock_always(&c_seg->c_lock);
1518	thread_block(THREAD_CONTINUE_NULL);
1519	}
1520
1521	#if CONFIG_FREEZE
1522	/*
1523	* We don't have the task lock held while updating the task's
1524	* c_seg queues. We can do that because of the following restrictions:
1525	*
1526	* - SINGLE FREEZER CONTEXT:
1527	* We 'insert' c_segs into the task list on the task_freeze path.
1528	* There can only be one such freeze in progress and the task
1529	* isn't disappearing because we have the VM map lock held throughout
1530	* and we have a reference on the proc too.
1531	*
1532	* - SINGLE TASK DISOWN CONTEXT:
1533	* We 'disown' c_segs of a task ONLY from the task_terminate context. So
1534	* we don't need the task lock but we need the c_list_lock and the
1535	* compressor master lock (shared). We also hold the individual
1536	* c_seg locks (exclusive).
1537	*
1538	* If we either:
1539	* - can't get the c_seg lock on a try, then we start again because maybe
1540	* the c_seg is part of a compaction and might get freed. So we can't trust
1541	* that linkage and need to restart our queue traversal.
1542	* - OR, we run into a busy c_seg (say being swapped in or free-ing) we
1543	* drop all locks again and wait and restart our queue traversal.
1544	*
1545	* - The new_owner_task below is currently only the kernel or NULL.
1546	*
1547	*/
1548	void
1549	c_seg_update_task_owner(c_segment_t c_seg, task_t new_owner_task)
1550	{
1551	task_t owner_task = c_seg->c_task_owner;
1552	uint64_t uncompressed_bytes = ((c_seg->c_slots_used) * PAGE_SIZE_64);
1553
1554	LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
1555	LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
1556
1557	if (owner_task) {
1558	task_update_frozen_to_swap_acct(owner_task, uncompressed_bytes, DEBIT_FROM_SWAP);
1559	queue_remove(&owner_task->task_frozen_cseg_q, c_seg,
1560	c_segment_t, c_task_list_next_cseg);
1561	}
1562
1563	if (new_owner_task) {
1564	queue_enter(&new_owner_task->task_frozen_cseg_q, c_seg,
1565	c_segment_t, c_task_list_next_cseg);
1566	task_update_frozen_to_swap_acct(new_owner_task, uncompressed_bytes, CREDIT_TO_SWAP);
1567	}
1568
1569	c_seg->c_task_owner = new_owner_task;
1570	}
1571
1572	void
1573	task_disown_frozen_csegs(task_t owner_task)
1574	{
1575	c_segment_t c_seg = NULL, next_cseg = NULL;
1576
1577	again:
1578	PAGE_REPLACEMENT_DISALLOWED(TRUE);
1579	lck_mtx_lock_spin_always(c_list_lock);
1580
1581	for (c_seg = (c_segment_t) queue_first(&owner_task->task_frozen_cseg_q);
1582	!queue_end(&owner_task->task_frozen_cseg_q, (queue_entry_t) c_seg);
1583	c_seg = next_cseg) {
1584	next_cseg = (c_segment_t) queue_next(&c_seg->c_task_list_next_cseg);
1585
1586	if (!lck_mtx_try_lock_spin_always(&c_seg->c_lock)) {
1587	lck_mtx_unlock(c_list_lock);
1588	PAGE_REPLACEMENT_DISALLOWED(FALSE);
1589	goto again;
1590	}
1591
1592	if (c_seg->c_busy) {
1593	lck_mtx_unlock(c_list_lock);
1594	PAGE_REPLACEMENT_DISALLOWED(FALSE);
1595
1596	c_seg_wait_on_busy(c_seg);
1597
1598	goto again;
1599	}
1600	assert(c_seg->c_task_owner == owner_task);
1601	c_seg_update_task_owner(c_seg, kernel_task);
1602	lck_mtx_unlock_always(&c_seg->c_lock);
1603	}
1604
1605	lck_mtx_unlock(c_list_lock);
1606	PAGE_REPLACEMENT_DISALLOWED(FALSE);
1607	}
1608	#endif /* CONFIG_FREEZE */
1609
1610	void
1611	c_seg_switch_state(c_segment_t c_seg, int new_state, boolean_t insert_head)
1612	{
1613	int old_state = c_seg->c_state;
1614	queue_head_t donate_swapout_list_head, donate_swappedin_list_head;
1615	uint32_t donate_swapout_count, donate_swappedin_count;
1616
1617	/*
1618	* On macOS the donate queue is swapped first ie the c_early_swapout queue.
1619	* On other swap-capable platforms, we want to swap those out last. So we
1620	* use the c_late_swapout queue.
1621	*/
1622	#if XNU_TARGET_OS_OSX
1623	#if (DEVELOPMENT \|\| DEBUG)
1624	if (new_state != C_IS_FILLING) {
1625	LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
1626	}
1627	LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
1628	#endif /* DEVELOPMENT \|\| DEBUG */
1629
1630	donate_swapout_list_head = &c_early_swapout_list_head;
1631	donate_swapout_count = &c_early_swapout_count;
1632	donate_swappedin_list_head = &c_early_swappedin_list_head;
1633	donate_swappedin_count = &c_early_swappedin_count;
1634	#else /* XNU_TARGET_OS_OSX */
1635	donate_swapout_list_head = &c_late_swapout_list_head;
1636	donate_swapout_count = &c_late_swapout_count;
1637	donate_swappedin_list_head = &c_late_swappedin_list_head;
1638	donate_swappedin_count = &c_late_swappedin_count;
1639	#endif /* XNU_TARGET_OS_OSX */
1640
1641	switch (old_state) {
1642	case C_IS_EMPTY:
1643	assert(new_state == C_IS_FILLING \|\| new_state == C_IS_FREE);
1644
1645	c_empty_count--;
1646	break;
1647
1648	case C_IS_FILLING:
1649	assert(new_state == C_ON_AGE_Q \|\| new_state == C_ON_SWAPOUT_Q);
1650
1651	queue_remove(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
1652	c_filling_count--;
1653	break;
1654
1655	case C_ON_AGE_Q:
1656	assert(new_state == C_ON_SWAPOUT_Q \|\| new_state == C_ON_MAJORCOMPACT_Q \|\|
1657	new_state == C_IS_FREE);
1658
1659	queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1660	c_age_count--;
1661	break;
1662
1663	case C_ON_SWAPPEDIN_Q:
1664	if (c_seg->c_has_donated_pages) {
1665	assert(new_state == C_ON_SWAPOUT_Q \|\| new_state == C_IS_FREE);
1666	queue_remove(donate_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1667	*donate_swappedin_count -= `1`;
1668	} else {
1669	assert(new_state == C_ON_AGE_Q \|\| new_state == C_IS_FREE);
1670	#if CONFIG_FREEZE
1671	assert(c_seg->c_has_freezer_pages);
1672	queue_remove(&c_early_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1673	c_early_swappedin_count--;
1674	#else /* CONFIG_FREEZE */
1675	queue_remove(&c_regular_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1676	c_regular_swappedin_count--;
1677	#endif /* CONFIG_FREEZE */
1678	}
1679	break;
1680
1681	case C_ON_SWAPOUT_Q:
1682	assert(new_state == C_ON_AGE_Q \|\| new_state == C_IS_FREE \|\| new_state == C_IS_EMPTY \|\| new_state == C_ON_SWAPIO_Q);
1683
1684	#if CONFIG_FREEZE
1685	if (c_seg->c_has_freezer_pages) {
1686	if (c_seg->c_task_owner && (new_state != C_ON_SWAPIO_Q)) {
1687	c_seg_update_task_owner(c_seg, NULL);
1688	}
1689	queue_remove(&c_early_swapout_list_head, c_seg, c_segment_t, c_age_list);
1690	c_early_swapout_count--;
1691	} else
1692	#endif /* CONFIG_FREEZE */
1693	{
1694	if (c_seg->c_has_donated_pages) {
1695	queue_remove(donate_swapout_list_head, c_seg, c_segment_t, c_age_list);
1696	*donate_swapout_count -= `1`;
1697	} else {
1698	queue_remove(&c_regular_swapout_list_head, c_seg, c_segment_t, c_age_list);
1699	c_regular_swapout_count--;
1700	}
1701	}
1702
1703	if (new_state == C_ON_AGE_Q) {
1704	c_seg->c_has_donated_pages = `0`;
1705	}
1706	thread_wakeup((event_t)&compaction_swapper_running);
1707	break;
1708
1709	case C_ON_SWAPIO_Q:
1710	#if CONFIG_FREEZE
1711	if (c_seg->c_has_freezer_pages) {
1712	assert(new_state == C_ON_SWAPPEDOUT_Q \|\| new_state == C_ON_SWAPPEDOUTSPARSE_Q \|\| new_state == C_ON_AGE_Q);
1713	} else
1714	#endif /* CONFIG_FREEZE */
1715	{
1716	if (c_seg->c_has_donated_pages) {
1717	assert(new_state == C_ON_SWAPPEDOUT_Q \|\| new_state == C_ON_SWAPPEDOUTSPARSE_Q \|\| new_state == C_ON_SWAPPEDIN_Q);
1718	} else {
1719	assert(new_state == C_ON_SWAPPEDOUT_Q \|\| new_state == C_ON_SWAPPEDOUTSPARSE_Q \|\| new_state == C_ON_AGE_Q);
1720	}
1721	}
1722
1723	queue_remove(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1724	c_swapio_count--;
1725	break;
1726
1727	case C_ON_SWAPPEDOUT_Q:
1728	assert(new_state == C_ON_SWAPPEDIN_Q \|\| new_state == C_ON_AGE_Q \|\|
1729	new_state == C_ON_SWAPPEDOUTSPARSE_Q \|\|
1730	new_state == C_ON_BAD_Q \|\| new_state == C_IS_EMPTY \|\| new_state == C_IS_FREE);
1731
1732	queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1733	c_swappedout_count--;
1734	break;
1735
1736	case C_ON_SWAPPEDOUTSPARSE_Q:
1737	assert(new_state == C_ON_SWAPPEDIN_Q \|\| new_state == C_ON_AGE_Q \|\|
1738	new_state == C_ON_BAD_Q \|\| new_state == C_IS_EMPTY \|\| new_state == C_IS_FREE);
1739
1740	queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1741	c_swappedout_sparse_count--;
1742	break;
1743
1744	case C_ON_MAJORCOMPACT_Q:
1745	assert(new_state == C_ON_AGE_Q \|\| new_state == C_IS_FREE);
1746
1747	queue_remove(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1748	c_major_count--;
1749	break;
1750
1751	case C_ON_BAD_Q:
1752	assert(new_state == C_IS_FREE);
1753
1754	queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1755	c_bad_count--;
1756	break;
1757
1758	default:
1759	panic("c_seg %p has bad c_state = %d", c_seg, old_state);
1760	}
1761
1762	switch (new_state) {
1763	case C_IS_FREE:
1764	assert(old_state != C_IS_FILLING);
1765
1766	break;
1767
1768	case C_IS_EMPTY:
1769	assert(old_state == C_ON_SWAPOUT_Q \|\| old_state == C_ON_SWAPPEDOUT_Q \|\| old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1770
1771	c_empty_count++;
1772	break;
1773
1774	case C_IS_FILLING:
1775	assert(old_state == C_IS_EMPTY);
1776
1777	queue_enter(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
1778	c_filling_count++;
1779	break;
1780
1781	case C_ON_AGE_Q:
1782	assert(old_state == C_IS_FILLING \|\| old_state == C_ON_SWAPPEDIN_Q \|\|
1783	old_state == C_ON_SWAPOUT_Q \|\| old_state == C_ON_SWAPIO_Q \|\|
1784	old_state == C_ON_MAJORCOMPACT_Q \|\| old_state == C_ON_SWAPPEDOUT_Q \|\| old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1785
1786	assert(!c_seg->c_has_donated_pages);
1787	if (old_state == C_IS_FILLING) {
1788	queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1789	} else {
1790	if (!queue_empty(&c_age_list_head)) {
1791	c_segment_t c_first;
1792
1793	c_first = (c_segment_t)queue_first(&c_age_list_head);
1794	c_seg->c_creation_ts = c_first->c_creation_ts;
1795	}
1796	queue_enter_first(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1797	}
1798	c_age_count++;
1799	break;
1800
1801	case C_ON_SWAPPEDIN_Q:
1802	{
1803	queue_head_t *list_head;
1804
1805	assert(old_state == C_ON_SWAPPEDOUT_Q \|\| old_state == C_ON_SWAPPEDOUTSPARSE_Q \|\| old_state == C_ON_SWAPIO_Q);
1806	if (c_seg->c_has_donated_pages) {
1807	/ Error in swapouts could happen while the c_seg is still on the swapio queue /
1808	list_head = donate_swappedin_list_head;
1809	*donate_swappedin_count += `1`;
1810	} else {
1811	#if CONFIG_FREEZE
1812	assert(c_seg->c_has_freezer_pages);
1813	list_head = &c_early_swappedin_list_head;
1814	c_early_swappedin_count++;
1815	#else /* CONFIG_FREEZE */
1816	list_head = &c_regular_swappedin_list_head;
1817	c_regular_swappedin_count++;
1818	#endif /* CONFIG_FREEZE */
1819	}
1820
1821	if (insert_head == TRUE) {
1822	queue_enter_first(list_head, c_seg, c_segment_t, c_age_list);
1823	} else {
1824	queue_enter(list_head, c_seg, c_segment_t, c_age_list);
1825	}
1826	break;
1827	}
1828
1829	case C_ON_SWAPOUT_Q:
1830	{
1831	queue_head_t *list_head;
1832
1833	#if CONFIG_FREEZE
1834	/*
1835	* A segment with both identities of frozen + donated pages
1836	* will be put on early swapout Q ie the frozen identity wins.
1837	* This is because when both identities are set, the donation bit
1838	* is added on after in the c_current_seg_filled path for accounting
1839	* purposes.
1840	*/
1841	if (c_seg->c_has_freezer_pages) {
1842	assert(old_state == C_ON_AGE_Q \|\| old_state == C_IS_FILLING);
1843	list_head = &c_early_swapout_list_head;
1844	c_early_swapout_count++;
1845	} else
1846	#endif
1847	{
1848	if (c_seg->c_has_donated_pages) {
1849	assert(old_state == C_ON_SWAPPEDIN_Q \|\| old_state == C_IS_FILLING);
1850	list_head = donate_swapout_list_head;
1851	*donate_swapout_count += `1`;
1852	} else {
1853	assert(old_state == C_ON_AGE_Q \|\| old_state == C_IS_FILLING);
1854	list_head = &c_regular_swapout_list_head;
1855	c_regular_swapout_count++;
1856	}
1857	}
1858
1859	if (insert_head == TRUE) {
1860	queue_enter_first(list_head, c_seg, c_segment_t, c_age_list);
1861	} else {
1862	queue_enter(list_head, c_seg, c_segment_t, c_age_list);
1863	}
1864	break;
1865	}
1866
1867	case C_ON_SWAPIO_Q:
1868	assert(old_state == C_ON_SWAPOUT_Q);
1869
1870	if (insert_head == TRUE) {
1871	queue_enter_first(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1872	} else {
1873	queue_enter(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1874	}
1875	c_swapio_count++;
1876	break;
1877
1878	case C_ON_SWAPPEDOUT_Q:
1879	assert(old_state == C_ON_SWAPIO_Q);
1880
1881	if (insert_head == TRUE) {
1882	queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1883	} else {
1884	queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1885	}
1886	c_swappedout_count++;
1887	break;
1888
1889	case C_ON_SWAPPEDOUTSPARSE_Q:
1890	assert(old_state == C_ON_SWAPIO_Q \|\| old_state == C_ON_SWAPPEDOUT_Q);
1891
1892	if (insert_head == TRUE) {
1893	queue_enter_first(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1894	} else {
1895	queue_enter(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1896	}
1897
1898	c_swappedout_sparse_count++;
1899	break;
1900
1901	case C_ON_MAJORCOMPACT_Q:
1902	assert(old_state == C_ON_AGE_Q);
1903	assert(!c_seg->c_has_donated_pages);
1904
1905	if (insert_head == TRUE) {
1906	queue_enter_first(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1907	} else {
1908	queue_enter(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1909	}
1910	c_major_count++;
1911	break;
1912
1913	case C_ON_BAD_Q:
1914	assert(old_state == C_ON_SWAPPEDOUT_Q \|\| old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1915
1916	if (insert_head == TRUE) {
1917	queue_enter_first(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1918	} else {
1919	queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1920	}
1921	c_bad_count++;
1922	break;
1923
1924	default:
1925	panic("c_seg %p requesting bad c_state = %d", c_seg, new_state);
1926	}
1927	c_seg->c_state = new_state;
1928	}
1929
1930
1931
1932	void
1933	c_seg_free(c_segment_t c_seg)
1934	{
1935	assert(c_seg->c_busy);
1936
1937	lck_mtx_unlock_always(&c_seg->c_lock);
1938	lck_mtx_lock_spin_always(c_list_lock);
1939	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1940
1941	c_seg_free_locked(c_seg);
1942	}
1943
1944
1945	void
1946	c_seg_free_locked(c_segment_t c_seg)
1947	{
1948	int segno;
1949	int pages_populated = `0`;
1950	int32_t *c_buffer = NULL;
1951	uint64_t c_swap_handle = `0`;
1952
1953	assert(c_seg->c_busy);
1954	assert(c_seg->c_slots_used == `0`);
1955	assert(!c_seg->c_on_minorcompact_q);
1956	assert(!c_seg->c_busy_swapping);
1957
1958	if (c_seg->c_overage_swap == TRUE) {
1959	c_overage_swapped_count--;
1960	c_seg->c_overage_swap = FALSE;
1961	}
1962	if (!(C_SEG_IS_ONDISK(c_seg))) {
1963	c_buffer = c_seg->c_store.c_buffer;
1964	} else {
1965	c_swap_handle = c_seg->c_store.c_swap_handle;
1966	}
1967
1968	c_seg_switch_state(c_seg, C_IS_FREE, FALSE);
1969
1970	if (c_buffer) {
1971	pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
1972	c_seg->c_store.c_buffer = NULL;
1973	} else {
1974	#if CONFIG_FREEZE
1975	c_seg_update_task_owner(c_seg, NULL);
1976	#endif /* CONFIG_FREEZE */
1977
1978	c_seg->c_store.c_swap_handle = (uint64_t)-`1`;
1979	}
1980
1981	lck_mtx_unlock_always(&c_seg->c_lock);
1982
1983	lck_mtx_unlock_always(c_list_lock);
1984
1985	if (c_buffer) {
1986	if (pages_populated) {
1987	kernel_memory_depopulate(addr: (vm_offset_t)c_buffer,
1988	ptoa(pages_populated), flags: KMA_COMPRESSOR,
1989	VM_KERN_MEMORY_COMPRESSOR);
1990	}
1991	} else if (c_swap_handle) {
1992	/*
1993	* Free swap space on disk.
1994	*/
1995	vm_swap_free(c_swap_handle);
1996	}
1997	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
1998	/*
1999	* c_seg must remain busy until
2000	* after the call to vm_swap_free
2001	*/
2002	C_SEG_WAKEUP_DONE(c_seg);
2003	lck_mtx_unlock_always(&c_seg->c_lock);
2004
2005	segno = c_seg->c_mysegno;
2006
2007	lck_mtx_lock_spin_always(c_list_lock);
2008	/*
2009	* because the c_buffer is now associated with the segno,
2010	* we can't put the segno back on the free list until
2011	* after we have depopulated the c_buffer range, or
2012	* we run the risk of depopulating a range that is
2013	* now being used in one of the compressor heads
2014	*/
2015	c_segments[segno].c_segno = c_free_segno_head;
2016	c_free_segno_head = segno;
2017	c_segment_count--;
2018
2019	lck_mtx_unlock_always(c_list_lock);
2020
2021	lck_mtx_destroy(lck: &c_seg->c_lock, grp: &vm_compressor_lck_grp);
2022
2023	if (c_seg->c_slot_var_array_len) {
2024	kfree_type(struct c_slot, c_seg->c_slot_var_array_len,
2025	c_seg->c_slot_var_array);
2026	}
2027
2028	zfree(compressor_segment_zone, c_seg);
2029	}
2030
2031	#if DEVELOPMENT \|\| DEBUG
2032	int c_seg_trim_page_count = `0`;
2033	#endif
2034
2035	void
2036	c_seg_trim_tail(c_segment_t c_seg)
2037	{
2038	c_slot_t cs;
2039	uint32_t c_size;
2040	uint32_t c_offset;
2041	uint32_t c_rounded_size;
2042	uint16_t current_nextslot;
2043	uint32_t current_populated_offset;
2044
2045	if (c_seg->c_bytes_used == `0`) {
2046	return;
2047	}
2048	current_nextslot = c_seg->c_nextslot;
2049	current_populated_offset = c_seg->c_populated_offset;
2050
2051	while (c_seg->c_nextslot) {
2052	cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - `1`));
2053
2054	c_size = UNPACK_C_SIZE(cs);
2055
2056	if (c_size) {
2057	if (current_nextslot != c_seg->c_nextslot) {
2058	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
2059	c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size);
2060
2061	c_seg->c_nextoffset = c_offset;
2062	c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - `1`)) &
2063	~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - `1`);
2064
2065	if (c_seg->c_firstemptyslot > c_seg->c_nextslot) {
2066	c_seg->c_firstemptyslot = c_seg->c_nextslot;
2067	}
2068	#if DEVELOPMENT \|\| DEBUG
2069	c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) -
2070	round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) /
2071	PAGE_SIZE);
2072	#endif
2073	}
2074	break;
2075	}
2076	c_seg->c_nextslot--;
2077	}
2078	assert(c_seg->c_nextslot);
2079	}
2080
2081
2082	int
2083	c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy)
2084	{
2085	c_slot_mapping_t slot_ptr;
2086	uint32_t c_offset = `0`;
2087	uint32_t old_populated_offset;
2088	uint32_t c_rounded_size;
2089	uint32_t c_size;
2090	uint16_t c_indx = `0`;
2091	int i;
2092	c_slot_t c_dst;
2093	c_slot_t c_src;
2094
2095	assert(c_seg->c_busy);
2096
2097	#if VALIDATE_C_SEGMENTS
2098	c_seg_validate(c_seg, FALSE);
2099	#endif
2100	if (c_seg->c_bytes_used == `0`) {
2101	c_seg_free(c_seg);
2102	return `1`;
2103	}
2104	lck_mtx_unlock_always(&c_seg->c_lock);
2105
2106	if (c_seg->c_firstemptyslot >= c_seg->c_nextslot \|\| C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE) {
2107	goto done;
2108	}
2109
2110	/ TODO: assert first emptyslot's c_size is actually 0 /
2111
2112	#if DEVELOPMENT \|\| DEBUG
2113	C_SEG_MAKE_WRITEABLE(c_seg);
2114	#endif
2115
2116	#if VALIDATE_C_SEGMENTS
2117	c_seg->c_was_minor_compacted++;
2118	#endif
2119	c_indx = c_seg->c_firstemptyslot;
2120	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
2121
2122	old_populated_offset = c_seg->c_populated_offset;
2123	c_offset = c_dst->c_offset;
2124
2125	for (i = c_indx + `1`; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) {
2126	c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i);
2127
2128	c_size = UNPACK_C_SIZE(c_src);
2129
2130	if (c_size == `0`) {
2131	continue;
2132	}
2133
2134	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
2135	/ N.B.: This memcpy may be an overlapping copy /
2136	memcpy(dst: &c_seg->c_store.c_buffer[c_offset], src: &c_seg->c_store.c_buffer[c_src->c_offset], n: c_rounded_size);
2137
2138	cslot_copy(cdst: c_dst, csrc: c_src);
2139	c_dst->c_offset = c_offset;
2140
2141	slot_ptr = C_SLOT_UNPACK_PTR(c_dst);
2142	slot_ptr->s_cindx = c_indx;
2143
2144	c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
2145	PACK_C_SIZE(c_src, `0`);
2146	c_indx++;
2147
2148	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
2149	}
2150	c_seg->c_firstemptyslot = c_indx;
2151	c_seg->c_nextslot = c_indx;
2152	c_seg->c_nextoffset = c_offset;
2153	c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - `1`)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - `1`);
2154	c_seg->c_bytes_unused = `0`;
2155
2156	#if VALIDATE_C_SEGMENTS
2157	c_seg_validate(c_seg, TRUE);
2158	#endif
2159	if (old_populated_offset > c_seg->c_populated_offset) {
2160	uint32_t gc_size;
2161	int32_t *gc_ptr;
2162
2163	gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset);
2164	gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset];
2165
2166	kernel_memory_depopulate(addr: (vm_offset_t)gc_ptr, size: gc_size,
2167	flags: KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
2168	}
2169
2170	#if DEVELOPMENT \|\| DEBUG
2171	C_SEG_WRITE_PROTECT(c_seg);
2172	#endif
2173
2174	done:
2175	if (clear_busy == TRUE) {
2176	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
2177	C_SEG_WAKEUP_DONE(c_seg);
2178	lck_mtx_unlock_always(&c_seg->c_lock);
2179	}
2180	return `0`;
2181	}
2182
2183
2184	static void
2185	c_seg_alloc_nextslot(c_segment_t c_seg)
2186	{
2187	struct c_slot *old_slot_array = NULL;
2188	struct c_slot *new_slot_array = NULL;
2189	int newlen;
2190	int oldlen;
2191
2192	if (c_seg->c_nextslot < c_seg_fixed_array_len) {
2193	return;
2194	}
2195
2196	if ((c_seg->c_nextslot - c_seg_fixed_array_len) >= c_seg->c_slot_var_array_len) {
2197	oldlen = c_seg->c_slot_var_array_len;
2198	old_slot_array = c_seg->c_slot_var_array;
2199
2200	if (oldlen == `0`) {
2201	newlen = c_seg_slot_var_array_min_len;
2202	} else {
2203	newlen = oldlen * `2`;
2204	}
2205
2206	new_slot_array = kalloc_type(struct c_slot, newlen, Z_WAITOK);
2207
2208	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
2209
2210	if (old_slot_array) {
2211	memcpy(dst: new_slot_array, src: old_slot_array,
2212	n: sizeof(struct c_slot) * oldlen);
2213	}
2214
2215	c_seg->c_slot_var_array_len = newlen;
2216	c_seg->c_slot_var_array = new_slot_array;
2217
2218	lck_mtx_unlock_always(&c_seg->c_lock);
2219
2220	kfree_type(struct c_slot, oldlen, old_slot_array);
2221	}
2222	}
2223
2224
2225	#define C_SEG_MAJOR_COMPACT_STATS_MAX (30)
2226
2227	struct {
2228	uint64_t asked_permission;
2229	uint64_t compactions;
2230	uint64_t moved_slots;
2231	uint64_t moved_bytes;
2232	uint64_t wasted_space_in_swapouts;
2233	uint64_t count_of_swapouts;
2234	uint64_t count_of_freed_segs;
2235	uint64_t bailed_compactions;
2236	uint64_t bytes_freed_rate_us;
2237	} c_seg_major_compact_stats[C_SEG_MAJOR_COMPACT_STATS_MAX];
2238
2239	int c_seg_major_compact_stats_now = `0`;
2240
2241
2242	#define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((c_seg_bufsize * 90) / 100)
2243
2244
2245	boolean_t
2246	c_seg_major_compact_ok(
2247	c_segment_t c_seg_dst,
2248	c_segment_t c_seg_src)
2249	{
2250	c_seg_major_compact_stats[c_seg_major_compact_stats_now].asked_permission++;
2251
2252	if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE &&
2253	c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE) {
2254	return FALSE;
2255	}
2256
2257	if (c_seg_dst->c_nextoffset >= c_seg_off_limit \|\| c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
2258	/*
2259	* destination segment is full... can't compact
2260	*/
2261	return FALSE;
2262	}
2263
2264	return TRUE;
2265	}
2266
2267
2268	boolean_t
2269	c_seg_major_compact(
2270	c_segment_t c_seg_dst,
2271	c_segment_t c_seg_src)
2272	{
2273	c_slot_mapping_t slot_ptr;
2274	uint32_t c_rounded_size;
2275	uint32_t c_size;
2276	uint16_t dst_slot;
2277	int i;
2278	c_slot_t c_dst;
2279	c_slot_t c_src;
2280	boolean_t keep_compacting = TRUE;
2281
2282	/*
2283	* segments are not locked but they are both marked c_busy
2284	* which keeps c_decompress from working on them...
2285	* we can safely allocate new pages, move compressed data
2286	* from c_seg_src to c_seg_dst and update both c_segment's
2287	* state w/o holding the master lock
2288	*/
2289	#if DEVELOPMENT \|\| DEBUG
2290	C_SEG_MAKE_WRITEABLE(c_seg_dst);
2291	#endif
2292
2293	#if VALIDATE_C_SEGMENTS
2294	c_seg_dst->c_was_major_compacted++;
2295	c_seg_src->c_was_major_donor++;
2296	#endif
2297	assertf(c_seg_dst->c_has_donated_pages == c_seg_src->c_has_donated_pages, "Mismatched donation status Dst: %p, Src: %p\n", c_seg_dst, c_seg_src);
2298	c_seg_major_compact_stats[c_seg_major_compact_stats_now].compactions++;
2299
2300	dst_slot = c_seg_dst->c_nextslot;
2301
2302	for (i = `0`; i < c_seg_src->c_nextslot; i++) {
2303	c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i);
2304
2305	c_size = UNPACK_C_SIZE(c_src);
2306
2307	if (c_size == `0`) {
2308	/ BATCH: move what we have so far; /
2309	continue;
2310	}
2311
2312	if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) {
2313	int size_to_populate;
2314
2315	/ doesn't fit /
2316	size_to_populate = c_seg_bufsize - C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset);
2317
2318	if (size_to_populate == `0`) {
2319	/ can't fit /
2320	keep_compacting = FALSE;
2321	break;
2322	}
2323	if (size_to_populate > C_SEG_MAX_POPULATE_SIZE) {
2324	size_to_populate = C_SEG_MAX_POPULATE_SIZE;
2325	}
2326
2327	kernel_memory_populate(
2328	addr: (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset],
2329	size: size_to_populate,
2330	flags: KMA_NOFAIL \| KMA_COMPRESSOR,
2331	VM_KERN_MEMORY_COMPRESSOR);
2332
2333	c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
2334	assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= c_seg_bufsize);
2335	}
2336	c_seg_alloc_nextslot(c_seg: c_seg_dst);
2337
2338	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
2339
2340	memcpy(dst: &c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], src: &c_seg_src->c_store.c_buffer[c_src->c_offset], n: c_size);
2341
2342	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
2343
2344	c_seg_major_compact_stats[c_seg_major_compact_stats_now].moved_slots++;
2345	c_seg_major_compact_stats[c_seg_major_compact_stats_now].moved_bytes += c_size;
2346
2347	cslot_copy(cdst: c_dst, csrc: c_src);
2348	c_dst->c_offset = c_seg_dst->c_nextoffset;
2349
2350	if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) {
2351	c_seg_dst->c_firstemptyslot++;
2352	}
2353	c_seg_dst->c_slots_used++;
2354	c_seg_dst->c_nextslot++;
2355	c_seg_dst->c_bytes_used += c_rounded_size;
2356	c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
2357
2358	PACK_C_SIZE(c_src, `0`);
2359
2360	c_seg_src->c_bytes_used -= c_rounded_size;
2361	c_seg_src->c_bytes_unused += c_rounded_size;
2362	c_seg_src->c_firstemptyslot = `0`;
2363
2364	assert(c_seg_src->c_slots_used);
2365	c_seg_src->c_slots_used--;
2366
2367	if (!c_seg_src->c_swappedin) {
2368	/ Pessimistically lose swappedin status when non-swappedin pages are added. /
2369	c_seg_dst->c_swappedin = false;
2370	}
2371
2372	if (c_seg_dst->c_nextoffset >= c_seg_off_limit \|\| c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
2373	/ dest segment is now full /
2374	keep_compacting = FALSE;
2375	break;
2376	}
2377	}
2378	#if DEVELOPMENT \|\| DEBUG
2379	C_SEG_WRITE_PROTECT(c_seg_dst);
2380	#endif
2381	if (dst_slot < c_seg_dst->c_nextslot) {
2382	PAGE_REPLACEMENT_ALLOWED(TRUE);
2383	/*
2384	* we've now locked out c_decompress from
2385	* converting the slot passed into it into
2386	* a c_segment_t which allows us to use
2387	* the backptr to change which c_segment and
2388	* index the slot points to
2389	*/
2390	while (dst_slot < c_seg_dst->c_nextslot) {
2391	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
2392
2393	slot_ptr = C_SLOT_UNPACK_PTR(c_dst);
2394	/ <csegno=0,indx=0> would mean "empty slot", so use csegno+1 /
2395	slot_ptr->s_cseg = c_seg_dst->c_mysegno + `1`;
2396	slot_ptr->s_cindx = dst_slot++;
2397	}
2398	PAGE_REPLACEMENT_ALLOWED(FALSE);
2399	}
2400	return keep_compacting;
2401	}
2402
2403
2404	uint64_t
2405	vm_compressor_compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec)
2406	{
2407	uint64_t end_msecs;
2408	uint64_t start_msecs;
2409
2410	end_msecs = (end_sec * `1000`) + end_nsec / `1000000`;
2411	start_msecs = (start_sec * `1000`) + start_nsec / `1000000`;
2412
2413	return end_msecs - start_msecs;
2414	}
2415
2416
2417
2418	uint32_t compressor_eval_period_in_msecs = `250`;
2419	uint32_t compressor_sample_min_in_msecs = `500`;
2420	uint32_t compressor_sample_max_in_msecs = `10000`;
2421	uint32_t compressor_thrashing_threshold_per_10msecs = `50`;
2422	uint32_t compressor_thrashing_min_per_10msecs = `20`;
2423
2424	/ When true, reset sample data next chance we get. /
2425	static boolean_t compressor_need_sample_reset = FALSE;
2426
2427
2428	void
2429	compute_swapout_target_age(void)
2430	{
2431	clock_sec_t cur_ts_sec;
2432	clock_nsec_t cur_ts_nsec;
2433	uint32_t min_operations_needed_in_this_sample;
2434	uint64_t elapsed_msecs_in_eval;
2435	uint64_t elapsed_msecs_in_sample;
2436	boolean_t need_eval_reset = FALSE;
2437
2438	clock_get_system_nanotime(secs: &cur_ts_sec, nanosecs: &cur_ts_nsec);
2439
2440	elapsed_msecs_in_sample = vm_compressor_compute_elapsed_msecs(end_sec: cur_ts_sec, end_nsec: cur_ts_nsec, start_sec: start_of_sample_period_sec, start_nsec: start_of_sample_period_nsec);
2441
2442	if (compressor_need_sample_reset \|\|
2443	elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) {
2444	compressor_need_sample_reset = TRUE;
2445	need_eval_reset = TRUE;
2446	goto done;
2447	}
2448	elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(end_sec: cur_ts_sec, end_nsec: cur_ts_nsec, start_sec: start_of_eval_period_sec, start_nsec: start_of_eval_period_nsec);
2449
2450	if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs) {
2451	goto done;
2452	}
2453	need_eval_reset = TRUE;
2454
2455	KERNEL_DEBUG(`0xe0400020` \| DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, `0`, `0`);
2456
2457	min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / `10`;
2458
2459	if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample \|\|
2460	(sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) {
2461	KERNEL_DEBUG(`0xe0400020` \| DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count,
2462	sample_period_decompression_count - last_eval_decompression_count, `0`, `1`, `0`);
2463
2464	swapout_target_age = `0`;
2465
2466	compressor_need_sample_reset = TRUE;
2467	need_eval_reset = TRUE;
2468	goto done;
2469	}
2470	last_eval_compression_count = sample_period_compression_count;
2471	last_eval_decompression_count = sample_period_decompression_count;
2472
2473	if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) {
2474	KERNEL_DEBUG(`0xe0400020` \| DBG_FUNC_END, swapout_target_age, `0`, `0`, `5`, `0`);
2475	goto done;
2476	}
2477	if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / `10`)) {
2478	uint64_t running_total;
2479	uint64_t working_target;
2480	uint64_t aging_target;
2481	uint32_t oldest_age_of_csegs_sampled = `0`;
2482	uint64_t working_set_approximation = `0`;
2483
2484	swapout_target_age = `0`;
2485
2486	working_target = (sample_period_decompression_count / `100`) * `95`; / 95 percent /
2487	aging_target = (sample_period_decompression_count / `100`) * `1`; / 1 percent /
2488	running_total = `0`;
2489
2490	for (oldest_age_of_csegs_sampled = `0`; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) {
2491	running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
2492
2493	working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
2494
2495	if (running_total >= working_target) {
2496	break;
2497	}
2498	}
2499	if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) {
2500	working_set_approximation = (working_set_approximation * `1000`) / elapsed_msecs_in_sample;
2501
2502	if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) {
2503	running_total = overage_decompressions_during_sample_period;
2504
2505	for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - `1`; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) {
2506	running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
2507
2508	if (running_total >= aging_target) {
2509	break;
2510	}
2511	}
2512	swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled;
2513
2514	KERNEL_DEBUG(`0xe0400020` \| DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, `2`, `0`);
2515	} else {
2516	KERNEL_DEBUG(`0xe0400020` \| DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, `0`, `3`, `0`);
2517	}
2518	} else {
2519	KERNEL_DEBUG(`0xe0400020` \| DBG_FUNC_END, working_target, running_total, `0`, `4`, `0`);
2520	}
2521
2522	compressor_need_sample_reset = TRUE;
2523	need_eval_reset = TRUE;
2524	} else {
2525	KERNEL_DEBUG(`0xe0400020` \| DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / `10`, `0`, `6`, `0`);
2526	}
2527	done:
2528	if (compressor_need_sample_reset == TRUE) {
2529	bzero(s: age_of_decompressions_during_sample_period, n: sizeof(age_of_decompressions_during_sample_period));
2530	overage_decompressions_during_sample_period = `0`;
2531
2532	start_of_sample_period_sec = cur_ts_sec;
2533	start_of_sample_period_nsec = cur_ts_nsec;
2534	sample_period_decompression_count = `0`;
2535	sample_period_compression_count = `0`;
2536	last_eval_decompression_count = `0`;
2537	last_eval_compression_count = `0`;
2538	compressor_need_sample_reset = FALSE;
2539	}
2540	if (need_eval_reset == TRUE) {
2541	start_of_eval_period_sec = cur_ts_sec;
2542	start_of_eval_period_nsec = cur_ts_nsec;
2543	}
2544	}
2545
2546
2547	int compaction_swapper_init_now = `0`;
2548	int compaction_swapper_running = `0`;
2549	int compaction_swapper_awakened = `0`;
2550	int compaction_swapper_abort = `0`;
2551
2552	bool
2553	vm_compressor_swapout_is_ripe()
2554	{
2555	bool is_ripe = false;
2556	if (vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit) {
2557	c_segment_t c_seg;
2558	clock_sec_t now;
2559	clock_sec_t age;
2560	clock_nsec_t nsec;
2561
2562	clock_get_system_nanotime(secs: &now, nanosecs: &nsec);
2563	age = `0`;
2564
2565	lck_mtx_lock_spin_always(c_list_lock);
2566
2567	if (!queue_empty(&c_age_list_head)) {
2568	c_seg = (c_segment_t) queue_first(&c_age_list_head);
2569
2570	age = now - c_seg->c_creation_ts;
2571	}
2572	lck_mtx_unlock_always(c_list_lock);
2573
2574	if (age >= vm_ripe_target_age) {
2575	is_ripe = true;
2576	}
2577	}
2578	return is_ripe;
2579	}
2580
2581	static bool
2582	compressor_swapout_conditions_met(void)
2583	{
2584	bool should_swap = false;
2585	if (COMPRESSOR_NEEDS_TO_SWAP()) {
2586	should_swap = true;
2587	vmcs_stats.compressor_swap_threshold_exceeded++;
2588	}
2589	if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / `20`)) {
2590	should_swap = true;
2591	vmcs_stats.external_q_throttled++;
2592	}
2593	if (vm_page_free_count < (vm_page_free_reserved - (COMPRESSOR_FREE_RESERVED_LIMIT * `2`))) {
2594	should_swap = true;
2595	vmcs_stats.free_count_below_reserve++;
2596	}
2597	return should_swap;
2598	}
2599
2600	static bool
2601	compressor_needs_to_swap()
2602	{
2603	bool should_swap = false;
2604	if (vm_compressor_swapout_is_ripe()) {
2605	should_swap = true;
2606	goto check_if_low_space;
2607	}
2608
2609	if (VM_CONFIG_SWAP_IS_ACTIVE) {
2610	should_swap = compressor_swapout_conditions_met();
2611	if (should_swap) {
2612	goto check_if_low_space;
2613	}
2614	}
2615
2616	#if (XNU_TARGET_OS_OSX && __arm64__)
2617	/*
2618	* Thrashing detection disabled.
2619	*/
2620	#else /* (XNU_TARGET_OS_OSX && __arm64__) */
2621
2622	if (vm_compressor_is_thrashing()) {
2623	should_swap = true;
2624	vmcs_stats.thrashing_detected++;
2625	}
2626
2627	#if CONFIG_PHANTOM_CACHE
2628	if (vm_phantom_cache_check_pressure()) {
2629	os_atomic_store(&memorystatus_phantom_cache_pressure, true, release);
2630	should_swap = true;
2631	}
2632	#endif
2633	if (swapout_target_age) {
2634	should_swap = true;
2635	}
2636	#endif /* (XNU_TARGET_OS_OSX && __arm64__) */
2637
2638	check_if_low_space:
2639
2640	#if CONFIG_JETSAM
2641	if (should_swap \|\| vm_compressor_low_on_space() == TRUE) {
2642	if (vm_compressor_thrashing_detected == FALSE) {
2643	vm_compressor_thrashing_detected = TRUE;
2644
2645	if (swapout_target_age) {
2646	compressor_thrashing_induced_jetsam++;
2647	} else if (vm_compressor_low_on_space() == TRUE) {
2648	compressor_thrashing_induced_jetsam++;
2649	} else {
2650	filecache_thrashing_induced_jetsam++;
2651	}
2652	/*
2653	* Wake up the memorystatus thread so that it can return
2654	* the system to a healthy state (by killing processes).
2655	*/
2656	memorystatus_thread_wake();
2657	}
2658	/*
2659	* let the jetsam take precedence over
2660	* any major compactions we might have
2661	* been able to do... otherwise we run
2662	* the risk of doing major compactions
2663	* on segments we're about to free up
2664	* due to the jetsam activity.
2665	*/
2666	should_swap = false;
2667	if (memorystatus_swap_all_apps && vm_swap_low_on_space()) {
2668	vm_compressor_take_paging_space_action();
2669	}
2670	}
2671
2672	#else /* CONFIG_JETSAM */
2673	if (should_swap && vm_swap_low_on_space()) {
2674	vm_compressor_take_paging_space_action();
2675	}
2676	#endif /* CONFIG_JETSAM */
2677
2678	if (should_swap == false) {
2679	/*
2680	* vm_compressor_needs_to_major_compact returns true only if we're
2681	* about to run out of available compressor segments... in this
2682	* case, we absolutely need to run a major compaction even if
2683	* we've just kicked off a jetsam or we don't otherwise need to
2684	* swap... terminating objects releases
2685	* pages back to the uncompressed cache, but does not guarantee
2686	* that we will free up even a single compression segment
2687	*/
2688	should_swap = vm_compressor_needs_to_major_compact();
2689	if (should_swap) {
2690	vmcs_stats.fragmentation_detected++;
2691	}
2692	}
2693
2694	/*
2695	* returning TRUE when swap_supported == FALSE
2696	* will cause the major compaction engine to
2697	* run, but will not trigger any swapping...
2698	* segments that have been major compacted
2699	* will be moved to the majorcompact queue
2700	*/
2701	return should_swap;
2702	}
2703
2704	#if CONFIG_JETSAM
2705	/*
2706	* This function is called from the jetsam thread after killing something to
2707	* mitigate thrashing.
2708	*
2709	* We need to restart our thrashing detection heuristics since memory pressure
2710	* has potentially changed significantly, and we don't want to detect on old
2711	* data from before the jetsam.
2712	*/
2713	void
2714	vm_thrashing_jetsam_done(void)
2715	{
2716	vm_compressor_thrashing_detected = FALSE;
2717
2718	/ Were we compressor-thrashing or filecache-thrashing? /
2719	if (swapout_target_age) {
2720	swapout_target_age = `0`;
2721	compressor_need_sample_reset = TRUE;
2722	}
2723	#if CONFIG_PHANTOM_CACHE
2724	else {
2725	vm_phantom_cache_restart_sample();
2726	}
2727	#endif
2728	}
2729	#endif /* CONFIG_JETSAM */
2730
2731	uint32_t vm_wake_compactor_swapper_calls = `0`;
2732	uint32_t vm_run_compactor_already_running = `0`;
2733	uint32_t vm_run_compactor_empty_minor_q = `0`;
2734	uint32_t vm_run_compactor_did_compact = `0`;
2735	uint32_t vm_run_compactor_waited = `0`;
2736
2737	void
2738	vm_run_compactor(void)
2739	{
2740	if (c_segment_count == `0`) {
2741	return;
2742	}
2743
2744	lck_mtx_lock_spin_always(c_list_lock);
2745
2746	if (c_minor_count == `0`) {
2747	vm_run_compactor_empty_minor_q++;
2748
2749	lck_mtx_unlock_always(c_list_lock);
2750	return;
2751	}
2752	if (compaction_swapper_running) {
2753	if (vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2754	vm_run_compactor_already_running++;
2755
2756	lck_mtx_unlock_always(c_list_lock);
2757	return;
2758	}
2759	vm_run_compactor_waited++;
2760
2761	assert_wait(event: (event_t)&compaction_swapper_running, THREAD_UNINT);
2762
2763	lck_mtx_unlock_always(c_list_lock);
2764
2765	thread_block(THREAD_CONTINUE_NULL);
2766
2767	return;
2768	}
2769	vm_run_compactor_did_compact++;
2770
2771	fastwake_warmup = FALSE;
2772	compaction_swapper_running = `1`;
2773
2774	vm_compressor_do_delayed_compactions(FALSE);
2775
2776	compaction_swapper_running = `0`;
2777
2778	lck_mtx_unlock_always(c_list_lock);
2779
2780	thread_wakeup((event_t)&compaction_swapper_running);
2781	}
2782
2783
2784	void
2785	vm_wake_compactor_swapper(void)
2786	{
2787	if (compaction_swapper_running \|\| compaction_swapper_awakened \|\| c_segment_count == `0`) {
2788	return;
2789	}
2790
2791	if (c_minor_count \|\| vm_compressor_needs_to_major_compact()) {
2792	lck_mtx_lock_spin_always(c_list_lock);
2793
2794	fastwake_warmup = FALSE;
2795
2796	if (compaction_swapper_running == `0` && compaction_swapper_awakened == `0`) {
2797	vm_wake_compactor_swapper_calls++;
2798
2799	compaction_swapper_awakened = `1`;
2800	thread_wakeup((event_t)&c_compressor_swap_trigger);
2801	}
2802	lck_mtx_unlock_always(c_list_lock);
2803	}
2804	}
2805
2806
2807	void
2808	vm_consider_swapping()
2809	{
2810	assert(VM_CONFIG_SWAP_IS_PRESENT);
2811
2812	lck_mtx_lock_spin_always(c_list_lock);
2813
2814	compaction_swapper_abort = `1`;
2815
2816	while (compaction_swapper_running) {
2817	assert_wait(event: (event_t)&compaction_swapper_running, THREAD_UNINT);
2818
2819	lck_mtx_unlock_always(c_list_lock);
2820
2821	thread_block(THREAD_CONTINUE_NULL);
2822
2823	lck_mtx_lock_spin_always(c_list_lock);
2824	}
2825	compaction_swapper_abort = `0`;
2826	compaction_swapper_running = `1`;
2827
2828	vm_swapout_ripe_segments = TRUE;
2829
2830	vm_compressor_process_major_segments(vm_swapout_ripe_segments);
2831
2832	vm_compressor_compact_and_swap(FALSE);
2833
2834	compaction_swapper_running = `0`;
2835
2836	vm_swapout_ripe_segments = FALSE;
2837
2838	lck_mtx_unlock_always(c_list_lock);
2839
2840	thread_wakeup((event_t)&compaction_swapper_running);
2841	}
2842
2843
2844	void
2845	vm_consider_waking_compactor_swapper(void)
2846	{
2847	boolean_t need_wakeup = FALSE;
2848
2849	if (c_segment_count == `0`) {
2850	return;
2851	}
2852
2853	if (compaction_swapper_running \|\| compaction_swapper_awakened) {
2854	return;
2855	}
2856
2857	if (!compaction_swapper_inited && !compaction_swapper_init_now) {
2858	compaction_swapper_init_now = `1`;
2859	need_wakeup = TRUE;
2860	}
2861
2862	if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) {
2863	need_wakeup = TRUE;
2864	} else if (compressor_needs_to_swap()) {
2865	need_wakeup = TRUE;
2866	} else if (c_minor_count) {
2867	uint64_t total_bytes;
2868
2869	total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64;
2870
2871	if ((total_bytes - compressor_bytes_used) > total_bytes / `10`) {
2872	need_wakeup = TRUE;
2873	}
2874	}
2875	if (need_wakeup == TRUE) {
2876	lck_mtx_lock_spin_always(c_list_lock);
2877
2878	fastwake_warmup = FALSE;
2879
2880	if (compaction_swapper_running == `0` && compaction_swapper_awakened == `0`) {
2881	memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE);
2882
2883	compaction_swapper_awakened = `1`;
2884	thread_wakeup((event_t)&c_compressor_swap_trigger);
2885	}
2886	lck_mtx_unlock_always(c_list_lock);
2887	}
2888	}
2889
2890
2891	#define C_SWAPOUT_LIMIT 4
2892	#define DELAYED_COMPACTIONS_PER_PASS 30
2893
2894	void
2895	vm_compressor_do_delayed_compactions(boolean_t flush_all)
2896	{
2897	c_segment_t c_seg;
2898	int number_compacted = `0`;
2899	boolean_t needs_to_swap = FALSE;
2900	uint32_t c_swapout_count = `0`;
2901
2902
2903	VM_DEBUG_CONSTANT_EVENT(vm_compressor_do_delayed_compactions, VM_COMPRESSOR_DO_DELAYED_COMPACTIONS, DBG_FUNC_START, c_minor_count, flush_all, `0`, `0`);
2904
2905	#if XNU_TARGET_OS_OSX
2906	LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
2907	#endif /* XNU_TARGET_OS_OSX */
2908
2909	while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) {
2910	c_seg = (c_segment_t)queue_first(&c_minor_list_head);
2911
2912	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
2913
2914	if (c_seg->c_busy) {
2915	lck_mtx_unlock_always(c_list_lock);
2916	c_seg_wait_on_busy(c_seg);
2917	lck_mtx_lock_spin_always(c_list_lock);
2918
2919	continue;
2920	}
2921	C_SEG_BUSY(c_seg);
2922
2923	c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE);
2924
2925	c_swapout_count = c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count;
2926	if (VM_CONFIG_SWAP_IS_ACTIVE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) {
2927	if ((flush_all == TRUE \|\| compressor_needs_to_swap()) && c_swapout_count < C_SWAPOUT_LIMIT) {
2928	needs_to_swap = TRUE;
2929	}
2930
2931	number_compacted = `0`;
2932	}
2933	lck_mtx_lock_spin_always(c_list_lock);
2934	}
2935
2936	VM_DEBUG_CONSTANT_EVENT(vm_compressor_do_delayed_compactions, VM_COMPRESSOR_DO_DELAYED_COMPACTIONS, DBG_FUNC_END, c_minor_count, number_compacted, needs_to_swap, `0`);
2937	}
2938
2939	int min_csegs_per_major_compaction = DELAYED_COMPACTIONS_PER_PASS;
2940
2941	static bool
2942	vm_compressor_major_compact_cseg(c_segment_t c_seg, uint32_t* c_seg_considered, bool* bail_wanted_cseg, uint64_t* total_bytes_freed)
2943	{
2944	/*
2945	* Major compaction
2946	*/
2947	bool keep_compacting = true, fully_compacted = true;
2948	queue_head_t *list_head = NULL;
2949	c_segment_t c_seg_next;
2950	uint64_t bytes_to_free = `0`, bytes_freed = `0`;
2951	uint32_t number_considered = `0`;
2952
2953	if (c_seg->c_state == C_ON_AGE_Q) {
2954	assert(!c_seg->c_has_donated_pages);
2955	list_head = &c_age_list_head;
2956	} else if (c_seg->c_state == C_ON_SWAPPEDIN_Q) {
2957	assert(c_seg->c_has_donated_pages);
2958	list_head = &c_late_swappedin_list_head;
2959	}
2960
2961	while (keep_compacting == TRUE) {
2962	assert(c_seg->c_busy);
2963
2964	/ look for another segment to consolidate /
2965
2966	c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
2967
2968	if (queue_end(list_head, (queue_entry_t)c_seg_next)) {
2969	break;
2970	}
2971
2972	assert(c_seg_next->c_state == c_seg->c_state);
2973
2974	number_considered++;
2975
2976	if (c_seg_major_compact_ok(c_seg_dst: c_seg, c_seg_src: c_seg_next) == FALSE) {
2977	break;
2978	}
2979
2980	lck_mtx_lock_spin_always(lck: &c_seg_next->c_lock);
2981
2982	if (c_seg_next->c_busy) {
2983	/*
2984	* We are going to block for our neighbor.
2985	* If our c_seg is wanted, we should unbusy
2986	* it because we don't know how long we might
2987	* have to block here.
2988	*/
2989	if (c_seg->c_wanted) {
2990	lck_mtx_unlock_always(&c_seg_next->c_lock);
2991	fully_compacted = false;
2992	c_seg_major_compact_stats[c_seg_major_compact_stats_now].bailed_compactions++;
2993	*bail_wanted_cseg = true;
2994	break;
2995	}
2996
2997	lck_mtx_unlock_always(c_list_lock);
2998
2999	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `8`, (void*) VM_KERNEL_ADDRPERM(c_seg_next), `0`, `0`);
3000
3001	c_seg_wait_on_busy(c_seg: c_seg_next);
3002	lck_mtx_lock_spin_always(c_list_lock);
3003
3004	continue;
3005	}
3006	/ grab that segment /
3007	C_SEG_BUSY(c_seg_next);
3008
3009	bytes_to_free = C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset);
3010	if (c_seg_do_minor_compaction_and_unlock(c_seg: c_seg_next, FALSE, TRUE, TRUE)) {
3011	/*
3012	* found an empty c_segment and freed it
3013	* so we can't continue to use c_seg_next
3014	*/
3015	bytes_freed += bytes_to_free;
3016	c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++;
3017	continue;
3018	}
3019
3020	/ unlock the list ... /
3021	lck_mtx_unlock_always(c_list_lock);
3022
3023	/ do the major compaction /
3024
3025	keep_compacting = c_seg_major_compact(c_seg_dst: c_seg, c_seg_src: c_seg_next);
3026
3027	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `9`, keep_compacting, `0`, `0`);
3028
3029	PAGE_REPLACEMENT_DISALLOWED(TRUE);
3030
3031	lck_mtx_lock_spin_always(lck: &c_seg_next->c_lock);
3032	/*
3033	* run a minor compaction on the donor segment
3034	* since we pulled at least some of it's
3035	* data into our target... if we've emptied
3036	* it, now is a good time to free it which
3037	* c_seg_minor_compaction_and_unlock also takes care of
3038	*
3039	* by passing TRUE, we ask for c_busy to be cleared
3040	* and c_wanted to be taken care of
3041	*/
3042	bytes_to_free = C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset);
3043	if (c_seg_minor_compaction_and_unlock(c_seg: c_seg_next, TRUE)) {
3044	bytes_freed += bytes_to_free;
3045	c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++;
3046	} else {
3047	bytes_to_free -= C_SEG_OFFSET_TO_BYTES(c_seg_next->c_populated_offset);
3048	bytes_freed += bytes_to_free;
3049	}
3050
3051	PAGE_REPLACEMENT_DISALLOWED(FALSE);
3052
3053	/ relock the list /
3054	lck_mtx_lock_spin_always(c_list_lock);
3055
3056	if (c_seg->c_wanted) {
3057	/*
3058	* Our c_seg is in demand. Let's
3059	* unbusy it and wakeup the waiters
3060	* instead of continuing the compaction
3061	* because we could be in this loop
3062	* for a while.
3063	*/
3064	fully_compacted = false;
3065	*bail_wanted_cseg = true;
3066	c_seg_major_compact_stats[c_seg_major_compact_stats_now].bailed_compactions++;
3067	break;
3068	}
3069	} / major compaction /
3070
3071	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `10`, number_considered, *bail_wanted_cseg, `0`);
3072
3073	*c_seg_considered += number_considered;
3074	*total_bytes_freed += bytes_freed;
3075
3076	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
3077	return fully_compacted;
3078	}
3079
3080	#define TIME_SUB(rsecs, secs, rfrac, frac, unit) \
3081	MACRO_BEGIN \
3082	if ((int)((rfrac) -= (frac)) < 0) { \
3083	(rfrac) += (unit); \
3084	(rsecs) -= 1; \
3085	} \
3086	(rsecs) -= (secs); \
3087	MACRO_END
3088
3089	clock_nsec_t c_process_major_report_over_ms = `9`; / report if over 9 ms /
3090	int c_process_major_yield_after = `1000`; / yield after moving 1,000 segments /
3091	uint64_t c_process_major_reports = `0`;
3092	clock_sec_t c_process_major_max_sec = `0`;
3093	clock_nsec_t c_process_major_max_nsec = `0`;
3094	uint32_t c_process_major_peak_segcount = `0`;
3095	static void
3096	vm_compressor_process_major_segments(bool ripe_age_only)
3097	{
3098	c_segment_t c_seg = NULL;
3099	int count = `0`, total = `0`, breaks = `0`;
3100	clock_sec_t start_sec, end_sec;
3101	clock_nsec_t start_nsec, end_nsec;
3102	clock_nsec_t report_over_ns;
3103
3104	if (queue_empty(&c_major_list_head)) {
3105	return;
3106	}
3107
3108	// printf("%s: starting to move segments from MAJORQ to AGEQ\n", __FUNCTION__);
3109	if (c_process_major_report_over_ms != `0`) {
3110	report_over_ns = c_process_major_report_over_ms * NSEC_PER_MSEC;
3111	} else {
3112	report_over_ns = (clock_nsec_t)-`1`;
3113	}
3114
3115	if (ripe_age_only) {
3116	if (c_overage_swapped_count >= c_overage_swapped_limit) {
3117	/*
3118	* Return while we wait for the overage segments
3119	* in our queue to get pushed out first.
3120	*/
3121	return;
3122	}
3123	}
3124
3125	clock_get_system_nanotime(secs: &start_sec, nanosecs: &start_nsec);
3126	while (!queue_empty(&c_major_list_head)) {
3127	if (!ripe_age_only) {
3128	/*
3129	* Start from the end to preserve aging order. The newer
3130	* segments are at the tail and so need to be inserted in
3131	* the aging queue in this way so we have the older segments
3132	* at the end of the AGE_Q.
3133	*/
3134	c_seg = (c_segment_t)queue_last(&c_major_list_head);
3135	} else {
3136	c_seg = (c_segment_t)queue_first(&c_major_list_head);
3137	if ((start_sec - c_seg->c_creation_ts) < vm_ripe_target_age) {
3138	/*
3139	* We have found the first segment in our queue that is not ripe. Segments after it
3140	* will be the same. So let's bail here. Return with c_list_lock held.
3141	*/
3142	break;
3143	}
3144	}
3145
3146	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
3147	c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
3148	lck_mtx_unlock_always(&c_seg->c_lock);
3149
3150	count++;
3151	if (count == c_process_major_yield_after \|\|
3152	queue_empty(&c_major_list_head)) {
3153	/ done or time to take a break /
3154	} else {
3155	/ keep going /
3156	continue;
3157	}
3158
3159	total += count;
3160	clock_get_system_nanotime(secs: &end_sec, nanosecs: &end_nsec);
3161	TIME_SUB(end_sec, start_sec, end_nsec, start_nsec, NSEC_PER_SEC);
3162	if (end_sec > c_process_major_max_sec) {
3163	c_process_major_max_sec = end_sec;
3164	c_process_major_max_nsec = end_nsec;
3165	} else if (end_sec == c_process_major_max_sec &&
3166	end_nsec > c_process_major_max_nsec) {
3167	c_process_major_max_nsec = end_nsec;
3168	}
3169	if (total > c_process_major_peak_segcount) {
3170	c_process_major_peak_segcount = total;
3171	}
3172	if (end_sec > `0` \|\|
3173	end_nsec >= report_over_ns) {
3174	/ we used more than expected /
3175	c_process_major_reports++;
3176	printf(format: "%s: moved %d/%d segments from MAJORQ to AGEQ in %lu.%09u seconds and %d breaks\n",
3177	__FUNCTION__, count, total,
3178	end_sec, end_nsec, breaks);
3179	}
3180	if (queue_empty(&c_major_list_head)) {
3181	/ done /
3182	break;
3183	}
3184	/ take a break to allow someone else to grab the lock /
3185	lck_mtx_unlock_always(c_list_lock);
3186	mutex_pause(`0`); / 10 microseconds /
3187	lck_mtx_lock_spin_always(c_list_lock);
3188	/ start again /
3189	clock_get_system_nanotime(secs: &start_sec, nanosecs: &start_nsec);
3190	count = `0`;
3191	breaks++;
3192	}
3193	}
3194
3195	/*
3196	* macOS special swappable csegs -> early_swapin queue
3197	* non-macOS special swappable+non-freezer csegs -> late_swapin queue
3198	* Processing special csegs means minor compacting each cseg and then
3199	* major compacting it and putting them on the early or late
3200	* (depending on platform) swapout queue.
3201	*/
3202	static void
3203	vm_compressor_process_special_swapped_in_segments_locked(void)
3204	{
3205	c_segment_t c_seg = NULL;
3206	bool switch_state = true, bail_wanted_cseg = false;
3207	unsigned int number_considered = `0`, yield_after_considered_per_pass = `0`;
3208	uint64_t bytes_freed = `0`;
3209	queue_head_t *special_swappedin_list_head;
3210
3211	#if XNU_TARGET_OS_OSX
3212	special_swappedin_list_head = &c_early_swappedin_list_head;
3213	#else /* XNU_TARGET_OS_OSX */
3214	if (memorystatus_swap_all_apps) {
3215	special_swappedin_list_head = &c_late_swappedin_list_head;
3216	} else {
3217	/ called on unsupported config/
3218	return;
3219	}
3220	#endif /* XNU_TARGET_OS_OSX */
3221
3222	yield_after_considered_per_pass = MAX(min_csegs_per_major_compaction, DELAYED_COMPACTIONS_PER_PASS);
3223	while (!queue_empty(special_swappedin_list_head)) {
3224	c_seg = (c_segment_t)queue_first(special_swappedin_list_head);
3225
3226	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
3227
3228	if (c_seg->c_busy) {
3229	lck_mtx_unlock_always(c_list_lock);
3230	c_seg_wait_on_busy(c_seg);
3231	lck_mtx_lock_spin_always(c_list_lock);
3232	continue;
3233	}
3234
3235	C_SEG_BUSY(c_seg);
3236	lck_mtx_unlock_always(&c_seg->c_lock);
3237	lck_mtx_unlock_always(c_list_lock);
3238
3239	PAGE_REPLACEMENT_DISALLOWED(TRUE);
3240
3241	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
3242
3243	if (c_seg_minor_compaction_and_unlock(c_seg, FALSE /clear busy?/)) {
3244	/*
3245	* found an empty c_segment and freed it
3246	* so go grab the next guy in the queue
3247	*/
3248	PAGE_REPLACEMENT_DISALLOWED(FALSE);
3249	lck_mtx_lock_spin_always(c_list_lock);
3250	continue;
3251	}
3252
3253	PAGE_REPLACEMENT_DISALLOWED(FALSE);
3254	lck_mtx_lock_spin_always(c_list_lock);
3255
3256	switch_state = vm_compressor_major_compact_cseg(c_seg, c_seg_considered: &number_considered, bail_wanted_cseg: &bail_wanted_cseg, total_bytes_freed: &bytes_freed);
3257	assert(c_seg->c_busy);
3258	assert(!c_seg->c_on_minorcompact_q);
3259
3260	if (switch_state) {
3261	if (VM_CONFIG_SWAP_IS_ACTIVE \|\| VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3262	/*
3263	* Ordinarily we let swapped in segments age out + get
3264	* major compacted with the rest of the c_segs on the ageQ.
3265	* But the early donated c_segs, if well compacted, should be
3266	* kept ready to be swapped out if needed. These are typically
3267	* describing memory belonging to a leaky app (macOS) or a swap-
3268	* capable app (iPadOS) and for the latter we can keep these
3269	* around longer because we control the triggers in the memorystatus
3270	* subsystem
3271	*/
3272	c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
3273	}
3274	}
3275
3276	C_SEG_WAKEUP_DONE(c_seg);
3277
3278	lck_mtx_unlock_always(&c_seg->c_lock);
3279
3280	if (number_considered >= yield_after_considered_per_pass) {
3281	if (bail_wanted_cseg) {
3282	/*
3283	* We stopped major compactions on a c_seg
3284	* that is wanted. We don't know the priority
3285	* of the waiter unfortunately but we are at
3286	* a very high priority and so, just in case
3287	* the waiter is a critical system daemon or
3288	* UI thread, let's give up the CPU in case
3289	* the system is running a few CPU intensive
3290	* tasks.
3291	*/
3292	bail_wanted_cseg = false;
3293	lck_mtx_unlock_always(c_list_lock);
3294
3295	mutex_pause(`2`); / 100us yield /
3296
3297	lck_mtx_lock_spin_always(c_list_lock);
3298	}
3299
3300	number_considered = `0`;
3301	}
3302	}
3303	}
3304
3305	void
3306	vm_compressor_process_special_swapped_in_segments(void)
3307	{
3308	lck_mtx_lock_spin_always(c_list_lock);
3309	vm_compressor_process_special_swapped_in_segments_locked();
3310	lck_mtx_unlock_always(c_list_lock);
3311	}
3312
3313	#define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10
3314	/*
3315	* Processing regular csegs means aging them.
3316	*/
3317	static void
3318	vm_compressor_process_regular_swapped_in_segments(boolean_t flush_all)
3319	{
3320	c_segment_t c_seg;
3321	clock_sec_t now;
3322	clock_nsec_t nsec;
3323
3324	clock_get_system_nanotime(secs: &now, nanosecs: &nsec);
3325
3326	while (!queue_empty(&c_regular_swappedin_list_head)) {
3327	c_seg = (c_segment_t)queue_first(&c_regular_swappedin_list_head);
3328
3329	if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT) {
3330	break;
3331	}
3332
3333	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
3334
3335	c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
3336	c_seg->c_agedin_ts = (uint32_t) now;
3337
3338	lck_mtx_unlock_always(&c_seg->c_lock);
3339	}
3340	}
3341
3342
3343	extern int vm_num_swap_files;
3344	extern int vm_num_pinned_swap_files;
3345	extern int vm_swappin_enabled;
3346
3347	extern unsigned int vm_swapfile_total_segs_used;
3348	extern unsigned int vm_swapfile_total_segs_alloced;
3349
3350
3351	void
3352	vm_compressor_flush(void)
3353	{
3354	uint64_t vm_swap_put_failures_at_start;
3355	wait_result_t wait_result = `0`;
3356	AbsoluteTime startTime, endTime;
3357	clock_sec_t now_sec;
3358	clock_nsec_t now_nsec;
3359	uint64_t nsec;
3360	c_segment_t c_seg, c_seg_next;
3361
3362	HIBLOG("vm_compressor_flush - starting\n");
3363
3364	clock_get_uptime(result: &startTime);
3365
3366	lck_mtx_lock_spin_always(c_list_lock);
3367
3368	fastwake_warmup = FALSE;
3369	compaction_swapper_abort = `1`;
3370
3371	while (compaction_swapper_running) {
3372	assert_wait(event: (event_t)&compaction_swapper_running, THREAD_UNINT);
3373
3374	lck_mtx_unlock_always(c_list_lock);
3375
3376	thread_block(THREAD_CONTINUE_NULL);
3377
3378	lck_mtx_lock_spin_always(c_list_lock);
3379	}
3380	compaction_swapper_abort = `0`;
3381	compaction_swapper_running = `1`;
3382
3383	hibernate_flushing = TRUE;
3384	hibernate_no_swapspace = FALSE;
3385	hibernate_flush_timed_out = FALSE;
3386	c_generation_id_flush_barrier = c_generation_id + `1000`;
3387
3388	clock_get_system_nanotime(secs: &now_sec, nanosecs: &now_nsec);
3389	hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE;
3390
3391	vm_swap_put_failures_at_start = vm_swap_put_failures;
3392
3393	/*
3394	* We are about to hibernate and so we want all segments flushed to disk.
3395	* Segments that are on the major compaction queue won't be considered in
3396	* the vm_compressor_compact_and_swap() pass. So we need to bring them to
3397	* the ageQ for consideration.
3398	*/
3399	if (!queue_empty(&c_major_list_head)) {
3400	c_seg = (c_segment_t)queue_first(&c_major_list_head);
3401
3402	while (!queue_end(&c_major_list_head, (queue_entry_t)c_seg)) {
3403	c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
3404	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
3405	c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
3406	lck_mtx_unlock_always(&c_seg->c_lock);
3407	c_seg = c_seg_next;
3408	}
3409	}
3410	vm_compressor_compact_and_swap(TRUE);
3411
3412	while (!queue_empty(&c_early_swapout_list_head) \|\| !queue_empty(&c_regular_swapout_list_head) \|\| !queue_empty(&c_late_swapout_list_head)) {
3413	assert_wait_timeout(event: (event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, interval: `5000`, scale_factor: `1000` * NSEC_PER_USEC);
3414
3415	lck_mtx_unlock_always(c_list_lock);
3416
3417	wait_result = thread_block(THREAD_CONTINUE_NULL);
3418
3419	lck_mtx_lock_spin_always(c_list_lock);
3420
3421	if (wait_result == THREAD_TIMED_OUT) {
3422	break;
3423	}
3424	}
3425	hibernate_flushing = FALSE;
3426	compaction_swapper_running = `0`;
3427
3428	if (vm_swap_put_failures > vm_swap_put_failures_at_start) {
3429	HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n",
3430	vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT);
3431	}
3432
3433	lck_mtx_unlock_always(c_list_lock);
3434
3435	thread_wakeup((event_t)&compaction_swapper_running);
3436
3437	clock_get_uptime(result: &endTime);
3438	SUB_ABSOLUTETIME(&endTime, &startTime);
3439	absolutetime_to_nanoseconds(abstime: endTime, result: &nsec);
3440
3441	HIBLOG("vm_compressor_flush completed - took %qd msecs - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d, vm_swappin_enabled = %d\n",
3442	nsec / `1000000ULL`, vm_num_swap_files, vm_num_pinned_swap_files, vm_swappin_enabled);
3443	}
3444
3445
3446	int compaction_swap_trigger_thread_awakened = `0`;
3447
3448	static void
3449	vm_compressor_swap_trigger_thread(void)
3450	{
3451	current_thread()->options \|= TH_OPT_VMPRIV;
3452
3453	/*
3454	* compaction_swapper_init_now is set when the first call to
3455	* vm_consider_waking_compactor_swapper is made from
3456	* vm_pageout_scan... since this function is called upon
3457	* thread creation, we want to make sure to delay adjusting
3458	* the tuneables until we are awakened via vm_pageout_scan
3459	* so that we are at a point where the vm_swapfile_open will
3460	* be operating on the correct directory (in case the default
3461	* of using the VM volume is overridden by the dynamic_pager)
3462	*/
3463	if (compaction_swapper_init_now) {
3464	vm_compaction_swapper_do_init();
3465
3466	if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
3467	thread_vm_bind_group_add();
3468	}
3469	#if CONFIG_THREAD_GROUPS
3470	thread_group_vm_add();
3471	#endif
3472	thread_set_thread_name(th: current_thread(), name: "VM_cswap_trigger");
3473	compaction_swapper_init_now = `0`;
3474	}
3475	lck_mtx_lock_spin_always(c_list_lock);
3476
3477	compaction_swap_trigger_thread_awakened++;
3478	compaction_swapper_awakened = `0`;
3479
3480	if (compaction_swapper_running == `0`) {
3481	compaction_swapper_running = `1`;
3482
3483	vm_compressor_compact_and_swap(FALSE);
3484
3485	compaction_swapper_running = `0`;
3486	}
3487	assert_wait(event: (event_t)&c_compressor_swap_trigger, THREAD_UNINT);
3488
3489	if (compaction_swapper_running == `0`) {
3490	thread_wakeup((event_t)&compaction_swapper_running);
3491	}
3492
3493	lck_mtx_unlock_always(c_list_lock);
3494
3495	thread_block(continuation: (thread_continue_t)vm_compressor_swap_trigger_thread);
3496
3497	/ NOTREACHED /
3498	}
3499
3500
3501	void
3502	vm_compressor_record_warmup_start(void)
3503	{
3504	c_segment_t c_seg;
3505
3506	lck_mtx_lock_spin_always(c_list_lock);
3507
3508	if (first_c_segment_to_warm_generation_id == `0`) {
3509	if (!queue_empty(&c_age_list_head)) {
3510	c_seg = (c_segment_t)queue_last(&c_age_list_head);
3511
3512	first_c_segment_to_warm_generation_id = c_seg->c_generation_id;
3513	} else {
3514	first_c_segment_to_warm_generation_id = `0`;
3515	}
3516
3517	fastwake_recording_in_progress = TRUE;
3518	}
3519	lck_mtx_unlock_always(c_list_lock);
3520	}
3521
3522
3523	void
3524	vm_compressor_record_warmup_end(void)
3525	{
3526	c_segment_t c_seg;
3527
3528	lck_mtx_lock_spin_always(c_list_lock);
3529
3530	if (fastwake_recording_in_progress == TRUE) {
3531	if (!queue_empty(&c_age_list_head)) {
3532	c_seg = (c_segment_t)queue_last(&c_age_list_head);
3533
3534	last_c_segment_to_warm_generation_id = c_seg->c_generation_id;
3535	} else {
3536	last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id;
3537	}
3538
3539	fastwake_recording_in_progress = FALSE;
3540
3541	HIBLOG("vm_compressor_record_warmup (%qd - %qd)\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
3542	}
3543	lck_mtx_unlock_always(c_list_lock);
3544	}
3545
3546
3547	#define DELAY_TRIM_ON_WAKE_SECS 25
3548
3549	void
3550	vm_compressor_delay_trim(void)
3551	{
3552	clock_sec_t sec;
3553	clock_nsec_t nsec;
3554
3555	clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
3556	dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS;
3557	}
3558
3559
3560	void
3561	vm_compressor_do_warmup(void)
3562	{
3563	lck_mtx_lock_spin_always(c_list_lock);
3564
3565	if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) {
3566	first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = `0`;
3567
3568	lck_mtx_unlock_always(c_list_lock);
3569	return;
3570	}
3571
3572	if (compaction_swapper_running == `0` && compaction_swapper_awakened == `0`) {
3573	fastwake_warmup = TRUE;
3574
3575	compaction_swapper_awakened = `1`;
3576	thread_wakeup((event_t)&c_compressor_swap_trigger);
3577	}
3578	lck_mtx_unlock_always(c_list_lock);
3579	}
3580
3581	void
3582	do_fastwake_warmup_all(void)
3583	{
3584	lck_mtx_lock_spin_always(c_list_lock);
3585
3586	if (queue_empty(&c_swappedout_list_head) && queue_empty(&c_swappedout_sparse_list_head)) {
3587	lck_mtx_unlock_always(c_list_lock);
3588	return;
3589	}
3590
3591	fastwake_warmup = TRUE;
3592
3593	do_fastwake_warmup(&c_swappedout_list_head, TRUE);
3594
3595	do_fastwake_warmup(&c_swappedout_sparse_list_head, TRUE);
3596
3597	fastwake_warmup = FALSE;
3598
3599	lck_mtx_unlock_always(c_list_lock);
3600	}
3601
3602	void
3603	do_fastwake_warmup(queue_head_t *c_queue, boolean_t consider_all_cseg)
3604	{
3605	c_segment_t c_seg = NULL;
3606	AbsoluteTime startTime, endTime;
3607	uint64_t nsec;
3608
3609
3610	HIBLOG("vm_compressor_fastwake_warmup (%qd - %qd) - starting\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
3611
3612	clock_get_uptime(result: &startTime);
3613
3614	lck_mtx_unlock_always(c_list_lock);
3615
3616	proc_set_thread_policy(thread: current_thread(),
3617	TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
3618
3619	PAGE_REPLACEMENT_DISALLOWED(TRUE);
3620
3621	lck_mtx_lock_spin_always(c_list_lock);
3622
3623	while (!queue_empty(c_queue) && fastwake_warmup == TRUE) {
3624	c_seg = (c_segment_t) queue_first(c_queue);
3625
3626	if (consider_all_cseg == FALSE) {
3627	if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id \|\|
3628	c_seg->c_generation_id > last_c_segment_to_warm_generation_id) {
3629	break;
3630	}
3631
3632	if (vm_page_free_count < (AVAILABLE_MEMORY / `4`)) {
3633	break;
3634	}
3635	}
3636
3637	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
3638	lck_mtx_unlock_always(c_list_lock);
3639
3640	if (c_seg->c_busy) {
3641	PAGE_REPLACEMENT_DISALLOWED(FALSE);
3642	c_seg_wait_on_busy(c_seg);
3643	PAGE_REPLACEMENT_DISALLOWED(TRUE);
3644	} else {
3645	if (c_seg_swapin(c_seg, TRUE, FALSE) == `0`) {
3646	lck_mtx_unlock_always(&c_seg->c_lock);
3647	}
3648	c_segment_warmup_count++;
3649
3650	PAGE_REPLACEMENT_DISALLOWED(FALSE);
3651	vm_pageout_io_throttle();
3652	PAGE_REPLACEMENT_DISALLOWED(TRUE);
3653	}
3654	lck_mtx_lock_spin_always(c_list_lock);
3655	}
3656	lck_mtx_unlock_always(c_list_lock);
3657
3658	PAGE_REPLACEMENT_DISALLOWED(FALSE);
3659
3660	proc_set_thread_policy(thread: current_thread(),
3661	TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0);
3662
3663	clock_get_uptime(result: &endTime);
3664	SUB_ABSOLUTETIME(&endTime, &startTime);
3665	absolutetime_to_nanoseconds(abstime: endTime, result: &nsec);
3666
3667	HIBLOG("vm_compressor_fastwake_warmup completed - took %qd msecs\n", nsec / `1000000ULL`);
3668
3669	lck_mtx_lock_spin_always(c_list_lock);
3670
3671	if (consider_all_cseg == FALSE) {
3672	first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = `0`;
3673	}
3674	}
3675
3676	extern bool vm_swapout_thread_running;
3677	extern boolean_t compressor_store_stop_compaction;
3678
3679	void
3680	vm_compressor_compact_and_swap(boolean_t flush_all)
3681	{
3682	c_segment_t c_seg;
3683	bool switch_state, bail_wanted_cseg = false;
3684	clock_sec_t now;
3685	clock_nsec_t nsec;
3686	mach_timespec_t start_ts, end_ts;
3687	unsigned int number_considered, wanted_cseg_found, yield_after_considered_per_pass, number_yields;
3688	uint64_t bytes_freed, delta_usec;
3689	uint32_t c_swapout_count = `0`;
3690
3691	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_START, c_age_count, c_minor_count, c_major_count, vm_page_free_count);
3692
3693	if (fastwake_warmup == TRUE) {
3694	uint64_t starting_warmup_count;
3695
3696	starting_warmup_count = c_segment_warmup_count;
3697
3698	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, `11`) \| DBG_FUNC_START, c_segment_warmup_count,
3699	first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, `0`, `0`);
3700	do_fastwake_warmup(c_queue: &c_swappedout_list_head, FALSE);
3701	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, `11`) \| DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, `0`, `0`, `0`);
3702
3703	fastwake_warmup = FALSE;
3704	}
3705
3706	#if (XNU_TARGET_OS_OSX && __arm64__)
3707	/*
3708	* Re-considering major csegs showed benefits on all platforms by
3709	* significantly reducing fragmentation and getting back memory.
3710	* However, on smaller devices, eg watch, there was increased power
3711	* use for the additional compactions. And the turnover in csegs on
3712	* those smaller platforms is high enough in the decompression/free
3713	* path that we can skip reconsidering them here because we already
3714	* consider them for major compaction in those paths.
3715	*/
3716	vm_compressor_process_major_segments(false /all segments and not just the ripe-aged ones/);
3717	#endif /* (XNU_TARGET_OS_OSX && __arm64__) */
3718
3719	/*
3720	* it's possible for the c_age_list_head to be empty if we
3721	* hit our limits for growing the compressor pool and we subsequently
3722	* hibernated... on the next hibernation we could see the queue as
3723	* empty and not proceeed even though we have a bunch of segments on
3724	* the swapped in queue that need to be dealt with.
3725	*/
3726	vm_compressor_do_delayed_compactions(flush_all);
3727	vm_compressor_process_special_swapped_in_segments_locked();
3728	vm_compressor_process_regular_swapped_in_segments(flush_all);
3729
3730	/*
3731	* we only need to grab the timestamp once per
3732	* invocation of this function since the
3733	* timescale we're interested in is measured
3734	* in days
3735	*/
3736	clock_get_system_nanotime(secs: &now, nanosecs: &nsec);
3737
3738	start_ts.tv_sec = (int) now;
3739	start_ts.tv_nsec = nsec;
3740	delta_usec = `0`;
3741	number_considered = `0`;
3742	wanted_cseg_found = `0`;
3743	number_yields = `0`;
3744	bytes_freed = `0`;
3745	yield_after_considered_per_pass = MAX(min_csegs_per_major_compaction, DELAYED_COMPACTIONS_PER_PASS);
3746
3747	#if 0
3748	/**
3749	* SW: Need to figure out how to properly rate limit this log because it is currently way too
3750	* noisy. rdar://99379414 (Figure out how to rate limit the fragmentation level logging)
3751	*/
3752	os_log(OS_LOG_DEFAULT, "memorystatus: before compaction fragmentation level %u\n", vm_compressor_fragmentation_level());
3753	#endif
3754
3755	while (!queue_empty(&c_age_list_head) && !compaction_swapper_abort && !compressor_store_stop_compaction) {
3756	if (hibernate_flushing == TRUE) {
3757	clock_sec_t sec;
3758
3759	if (hibernate_should_abort()) {
3760	HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n");
3761	break;
3762	}
3763	if (hibernate_no_swapspace == TRUE) {
3764	HIBLOG("vm_compressor_flush - out of swap space\n");
3765	break;
3766	}
3767	if (vm_swap_files_pinned() == FALSE) {
3768	HIBLOG("vm_compressor_flush - unpinned swap files\n");
3769	break;
3770	}
3771	if (hibernate_in_progress_with_pinned_swap == TRUE &&
3772	(vm_swapfile_total_segs_alloced == vm_swapfile_total_segs_used)) {
3773	HIBLOG("vm_compressor_flush - out of pinned swap space\n");
3774	break;
3775	}
3776	clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
3777
3778	if (sec > hibernate_flushing_deadline) {
3779	hibernate_flush_timed_out = TRUE;
3780	HIBLOG("vm_compressor_flush - failed to finish before deadline\n");
3781	break;
3782	}
3783	}
3784
3785	c_swapout_count = c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count;
3786	if (VM_CONFIG_SWAP_IS_ACTIVE && !vm_swap_out_of_space() && c_swapout_count >= C_SWAPOUT_LIMIT) {
3787	assert_wait_timeout(event: (event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, interval: `100`, scale_factor: `1000` * NSEC_PER_USEC);
3788
3789	if (!vm_swapout_thread_running) {
3790	thread_wakeup((event_t)&vm_swapout_thread);
3791	}
3792
3793	lck_mtx_unlock_always(c_list_lock);
3794
3795	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `1`, c_swapout_count, `0`, `0`);
3796
3797	thread_block(THREAD_CONTINUE_NULL);
3798
3799	lck_mtx_lock_spin_always(c_list_lock);
3800	}
3801	/*
3802	* Minor compactions
3803	*/
3804	vm_compressor_do_delayed_compactions(flush_all);
3805
3806	/*
3807	* vm_compressor_process_early_swapped_in_segments()
3808	* might be too aggressive. So OFF for now.
3809	*/
3810	vm_compressor_process_regular_swapped_in_segments(flush_all);
3811
3812	/ Recompute because we dropped the c_list_lock above/
3813	c_swapout_count = c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count;
3814	if (VM_CONFIG_SWAP_IS_ACTIVE && !vm_swap_out_of_space() && c_swapout_count >= C_SWAPOUT_LIMIT) {
3815	/*
3816	* we timed out on the above thread_block
3817	* let's loop around and try again
3818	* the timeout allows us to continue
3819	* to do minor compactions to make
3820	* more memory available
3821	*/
3822	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `2`, c_swapout_count, `0`, `0`);
3823
3824	continue;
3825	}
3826
3827	/*
3828	* Swap out segments?
3829	*/
3830	if (flush_all == FALSE) {
3831	bool needs_to_swap;
3832
3833	lck_mtx_unlock_always(c_list_lock);
3834
3835	needs_to_swap = compressor_needs_to_swap();
3836
3837	lck_mtx_lock_spin_always(c_list_lock);
3838
3839	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `3`, needs_to_swap, `0`, `0`);
3840
3841	if (!needs_to_swap) {
3842	break;
3843	}
3844	}
3845	if (queue_empty(&c_age_list_head)) {
3846	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `4`, c_age_count, `0`, `0`);
3847	break;
3848	}
3849	c_seg = (c_segment_t) queue_first(&c_age_list_head);
3850
3851	assert(c_seg->c_state == C_ON_AGE_Q);
3852
3853	if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier) {
3854	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `5`, `0`, `0`, `0`);
3855	break;
3856	}
3857
3858	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
3859
3860	if (c_seg->c_busy) {
3861	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `6`, (void*) VM_KERNEL_ADDRPERM(c_seg), `0`, `0`);
3862
3863	lck_mtx_unlock_always(c_list_lock);
3864	c_seg_wait_on_busy(c_seg);
3865	lck_mtx_lock_spin_always(c_list_lock);
3866
3867	continue;
3868	}
3869	C_SEG_BUSY(c_seg);
3870
3871	if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) {
3872	/*
3873	* found an empty c_segment and freed it
3874	* so go grab the next guy in the queue
3875	*/
3876	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `7`, `0`, `0`, `0`);
3877	c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_freed_segs++;
3878	continue;
3879	}
3880
3881	switch_state = vm_compressor_major_compact_cseg(c_seg, c_seg_considered: &number_considered, bail_wanted_cseg: &bail_wanted_cseg, total_bytes_freed: &bytes_freed);
3882	if (bail_wanted_cseg) {
3883	wanted_cseg_found++;
3884	bail_wanted_cseg = false;
3885	}
3886
3887	assert(c_seg->c_busy);
3888	assert(!c_seg->c_on_minorcompact_q);
3889
3890	if (switch_state) {
3891	if (VM_CONFIG_SWAP_IS_ACTIVE) {
3892	int new_state = C_ON_SWAPOUT_Q;
3893	#if (XNU_TARGET_OS_OSX && __arm64__)
3894	if (flush_all == false && compressor_swapout_conditions_met() == false) {
3895	new_state = C_ON_MAJORCOMPACT_Q;
3896	}
3897	#endif /* (XNU_TARGET_OS_OSX && __arm64__) */
3898
3899	if (new_state == C_ON_SWAPOUT_Q) {
3900	/*
3901	* This mode of putting a generic c_seg on the swapout list is
3902	* only supported when we have general swapping enabled
3903	*/
3904	clock_sec_t lnow;
3905	clock_nsec_t lnsec;
3906	clock_get_system_nanotime(secs: &lnow, nanosecs: &lnsec);
3907	if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < `30`) {
3908	vmcs_stats.unripe_under_30s++;
3909	} else if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < `60`) {
3910	vmcs_stats.unripe_under_60s++;
3911	} else if (c_seg->c_agedin_ts && (lnow - c_seg->c_agedin_ts) < `300`) {
3912	vmcs_stats.unripe_under_300s++;
3913	}
3914	}
3915
3916	c_seg_switch_state(c_seg, new_state, FALSE);
3917	} else {
3918	if ((vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit)) {
3919	assert(VM_CONFIG_SWAP_IS_PRESENT);
3920	/*
3921	* we are running compressor sweeps with swap-behind
3922	* make sure the c_seg has aged enough before swapping it
3923	* out...
3924	*/
3925	if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
3926	c_seg->c_overage_swap = TRUE;
3927	c_overage_swapped_count++;
3928	c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
3929	}
3930	}
3931	}
3932	if (c_seg->c_state == C_ON_AGE_Q) {
3933	/*
3934	* this c_seg didn't get moved to the swapout queue
3935	* so we need to move it out of the way...
3936	* we just did a major compaction on it so put it
3937	* on that queue
3938	*/
3939	c_seg_switch_state(c_seg, C_ON_MAJORCOMPACT_Q, FALSE);
3940	} else {
3941	c_seg_major_compact_stats[c_seg_major_compact_stats_now].wasted_space_in_swapouts += c_seg_bufsize - c_seg->c_bytes_used;
3942	c_seg_major_compact_stats[c_seg_major_compact_stats_now].count_of_swapouts++;
3943	}
3944	}
3945
3946	C_SEG_WAKEUP_DONE(c_seg);
3947
3948	lck_mtx_unlock_always(&c_seg->c_lock);
3949
3950	/*
3951	* On systems _with_ general swap, regardless of jetsam, we wake up the swapout thread here.
3952	* On systems _without_ general swap, it's the responsibility of the memorystatus
3953	* subsystem to wake up the swapper.
3954	* TODO: When we have full jetsam support on a swap enabled system, we will need to revisit
3955	* this policy.
3956	*/
3957	if (VM_CONFIG_SWAP_IS_ACTIVE && c_swapout_count) {
3958	/*
3959	* We don't pause/yield here because we will either
3960	* yield below or at the top of the loop with the
3961	* assert_wait_timeout.
3962	*/
3963	if (!vm_swapout_thread_running) {
3964	thread_wakeup((event_t)&vm_swapout_thread);
3965	}
3966	}
3967
3968	if (number_considered >= yield_after_considered_per_pass) {
3969	if (wanted_cseg_found) {
3970	/*
3971	* We stopped major compactions on a c_seg
3972	* that is wanted. We don't know the priority
3973	* of the waiter unfortunately but we are at
3974	* a very high priority and so, just in case
3975	* the waiter is a critical system daemon or
3976	* UI thread, let's give up the CPU in case
3977	* the system is running a few CPU intensive
3978	* tasks.
3979	*/
3980	lck_mtx_unlock_always(c_list_lock);
3981
3982	mutex_pause(`2`); / 100us yield /
3983
3984	number_yields++;
3985
3986	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_NONE, `11`, number_considered, number_yields, `0`);
3987
3988	lck_mtx_lock_spin_always(c_list_lock);
3989	}
3990
3991	number_considered = `0`;
3992	wanted_cseg_found = `0`;
3993	}
3994	}
3995	clock_get_system_nanotime(secs: &now, nanosecs: &nsec);
3996
3997	end_ts = major_compact_ts = (mach_timespec_t){.tv_sec = (int)now, .tv_nsec = nsec};
3998
3999	SUB_MACH_TIMESPEC(&end_ts, &start_ts);
4000
4001	delta_usec = (end_ts.tv_sec * USEC_PER_SEC) + (end_ts.tv_nsec / NSEC_PER_USEC) - (number_yields * `100`);
4002
4003	delta_usec = MAX(`1`, delta_usec); / we could have 0 usec run if conditions weren't right /
4004
4005	c_seg_major_compact_stats[c_seg_major_compact_stats_now].bytes_freed_rate_us = (bytes_freed / delta_usec);
4006
4007	if ((c_seg_major_compact_stats_now + `1`) == C_SEG_MAJOR_COMPACT_STATS_MAX) {
4008	c_seg_major_compact_stats_now = `0`;
4009	} else {
4010	c_seg_major_compact_stats_now++;
4011	}
4012
4013	assert(c_seg_major_compact_stats_now < C_SEG_MAJOR_COMPACT_STATS_MAX);
4014
4015	VM_DEBUG_CONSTANT_EVENT(vm_compressor_compact_and_swap, VM_COMPRESSOR_COMPACT_AND_SWAP, DBG_FUNC_END, c_age_count, c_minor_count, c_major_count, vm_page_free_count);
4016	}
4017
4018
4019	static c_segment_t
4020	c_seg_allocate(c_segment_t *current_chead)
4021	{
4022	c_segment_t c_seg;
4023	int min_needed;
4024	int size_to_populate;
4025	c_segment_t *donate_queue_head;
4026
4027	#if XNU_TARGET_OS_OSX
4028	if (vm_compressor_low_on_space()) {
4029	vm_compressor_take_paging_space_action();
4030	}
4031	#endif /* XNU_TARGET_OS_OSX */
4032
4033	if ((c_seg = *current_chead) == NULL) {
4034	uint32_t c_segno;
4035
4036	lck_mtx_lock_spin_always(c_list_lock);
4037
4038	while (c_segments_busy == TRUE) {
4039	assert_wait(event: (event_t) (&c_segments_busy), THREAD_UNINT);
4040
4041	lck_mtx_unlock_always(c_list_lock);
4042
4043	thread_block(THREAD_CONTINUE_NULL);
4044
4045	lck_mtx_lock_spin_always(c_list_lock);
4046	}
4047	if (c_free_segno_head == (uint32_t)-`1`) {
4048	uint32_t c_segments_available_new;
4049	uint32_t compressed_pages;
4050
4051	#if CONFIG_FREEZE
4052	if (freezer_incore_cseg_acct) {
4053	compressed_pages = c_segment_pages_compressed_incore;
4054	} else {
4055	compressed_pages = c_segment_pages_compressed;
4056	}
4057	#else
4058	compressed_pages = c_segment_pages_compressed;
4059	#endif /* CONFIG_FREEZE */
4060
4061	if (c_segments_available >= c_segments_limit \|\| compressed_pages >= c_segment_pages_compressed_limit) {
4062	lck_mtx_unlock_always(c_list_lock);
4063
4064	return NULL;
4065	}
4066	c_segments_busy = TRUE;
4067	lck_mtx_unlock_always(c_list_lock);
4068
4069	kernel_memory_populate(addr: (vm_offset_t)c_segments_next_page,
4070	PAGE_SIZE, flags: KMA_NOFAIL \| KMA_KOBJECT,
4071	VM_KERN_MEMORY_COMPRESSOR);
4072	c_segments_next_page += PAGE_SIZE;
4073
4074	c_segments_available_new = c_segments_available + C_SEGMENTS_PER_PAGE;
4075
4076	if (c_segments_available_new > c_segments_limit) {
4077	c_segments_available_new = c_segments_limit;
4078	}
4079
4080	for (c_segno = c_segments_available + `1`; c_segno < c_segments_available_new; c_segno++) {
4081	c_segments[c_segno - `1`].c_segno = c_segno;
4082	}
4083
4084	lck_mtx_lock_spin_always(c_list_lock);
4085
4086	c_segments[c_segno - `1`].c_segno = c_free_segno_head;
4087	c_free_segno_head = c_segments_available;
4088	c_segments_available = c_segments_available_new;
4089
4090	c_segments_busy = FALSE;
4091	thread_wakeup((event_t) (&c_segments_busy));
4092	}
4093	c_segno = c_free_segno_head;
4094	assert(c_segno >= `0` && c_segno < c_segments_limit);
4095
4096	c_free_segno_head = (uint32_t)c_segments[c_segno].c_segno;
4097
4098	/*
4099	* do the rest of the bookkeeping now while we're still behind
4100	* the list lock and grab our generation id now into a local
4101	* so that we can install it once we have the c_seg allocated
4102	*/
4103	c_segment_count++;
4104	if (c_segment_count > c_segment_count_max) {
4105	c_segment_count_max = c_segment_count;
4106	}
4107
4108	lck_mtx_unlock_always(c_list_lock);
4109
4110	c_seg = zalloc_flags(compressor_segment_zone, Z_WAITOK \| Z_ZERO);
4111
4112	c_seg->c_store.c_buffer = (int32_t *)C_SEG_BUFFER_ADDRESS(c_segno);
4113
4114	lck_mtx_init(lck: &c_seg->c_lock, grp: &vm_compressor_lck_grp, LCK_ATTR_NULL);
4115
4116	c_seg->c_state = C_IS_EMPTY;
4117	c_seg->c_firstemptyslot = C_SLOT_MAX_INDEX;
4118	c_seg->c_mysegno = c_segno;
4119
4120	lck_mtx_lock_spin_always(c_list_lock);
4121	c_empty_count++;
4122	c_seg_switch_state(c_seg, C_IS_FILLING, FALSE);
4123	c_segments[c_segno].c_seg = c_seg;
4124	assert(c_segments[c_segno].c_segno > c_segments_available);
4125	lck_mtx_unlock_always(c_list_lock);
4126
4127	for (int i = `0`; i < vm_pageout_state.vm_compressor_thread_count; i++) {
4128	#if XNU_TARGET_OS_OSX
4129	donate_queue_head = (c_segment_t*) &(pgo_iothread_internal_state[i].current_early_swapout_chead);
4130	#else /* XNU_TARGET_OS_OSX */
4131	if (memorystatus_swap_all_apps) {
4132	donate_queue_head = (c_segment_t*) &(pgo_iothread_internal_state[i].current_late_swapout_chead);
4133	} else {
4134	donate_queue_head = NULL;
4135	}
4136	#endif /* XNU_TARGET_OS_OSX */
4137
4138	if (current_chead == donate_queue_head) {
4139	c_seg->c_has_donated_pages = `1`;
4140	break;
4141	}
4142	}
4143
4144	*current_chead = c_seg;
4145
4146	#if DEVELOPMENT \|\| DEBUG
4147	C_SEG_MAKE_WRITEABLE(c_seg);
4148	#endif
4149	}
4150	c_seg_alloc_nextslot(c_seg);
4151
4152	size_to_populate = c_seg_allocsize - C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset);
4153
4154	if (size_to_populate) {
4155	min_needed = PAGE_SIZE + (c_seg_allocsize - c_seg_bufsize);
4156
4157	if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset) < (unsigned) min_needed) {
4158	if (size_to_populate > C_SEG_MAX_POPULATE_SIZE) {
4159	size_to_populate = C_SEG_MAX_POPULATE_SIZE;
4160	}
4161
4162	OSAddAtomic64(size_to_populate / PAGE_SIZE, &vm_pageout_vminfo.vm_compressor_pages_grabbed);
4163
4164	kernel_memory_populate(
4165	addr: (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset],
4166	size: size_to_populate,
4167	flags: KMA_NOFAIL \| KMA_COMPRESSOR,
4168	VM_KERN_MEMORY_COMPRESSOR);
4169	} else {
4170	size_to_populate = `0`;
4171	}
4172	}
4173	PAGE_REPLACEMENT_DISALLOWED(TRUE);
4174
4175	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
4176
4177	if (size_to_populate) {
4178	c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
4179	}
4180
4181	return c_seg;
4182	}
4183
4184	#if DEVELOPMENT \|\| DEBUG
4185	#if CONFIG_FREEZE
4186	extern boolean_t memorystatus_freeze_to_memory;
4187	#endif /* CONFIG_FREEZE */
4188	#endif /* DEVELOPMENT \|\| DEBUG */
4189	uint64_t c_seg_total_donated_bytes = `0`; / For testing/debugging only for now. Remove and add new counters for vm_stat./
4190
4191	uint64_t c_seg_filled_no_contention = `0`;
4192	uint64_t c_seg_filled_contention = `0`;
4193	clock_sec_t c_seg_filled_contention_sec_max = `0`;
4194	clock_nsec_t c_seg_filled_contention_nsec_max = `0`;
4195
4196	static void
4197	c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead)
4198	{
4199	uint32_t unused_bytes;
4200	uint32_t offset_to_depopulate;
4201	int new_state = C_ON_AGE_Q;
4202	clock_sec_t sec;
4203	clock_nsec_t nsec;
4204	bool head_insert = false, wakeup_swapout_thread = false;
4205
4206	unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset));
4207
4208	if (unused_bytes) {
4209	offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset)));
4210
4211	/*
4212	* release the extra physical page(s) at the end of the segment
4213	*/
4214	lck_mtx_unlock_always(&c_seg->c_lock);
4215
4216	kernel_memory_depopulate(
4217	addr: (vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate],
4218	size: unused_bytes,
4219	flags: KMA_COMPRESSOR,
4220	VM_KERN_MEMORY_COMPRESSOR);
4221
4222	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
4223
4224	c_seg->c_populated_offset = offset_to_depopulate;
4225	}
4226	assert(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) <= c_seg_bufsize);
4227
4228	#if DEVELOPMENT \|\| DEBUG
4229	{
4230	boolean_t c_seg_was_busy = FALSE;
4231
4232	if (!c_seg->c_busy) {
4233	C_SEG_BUSY(c_seg);
4234	} else {
4235	c_seg_was_busy = TRUE;
4236	}
4237
4238	lck_mtx_unlock_always(&c_seg->c_lock);
4239
4240	C_SEG_WRITE_PROTECT(c_seg);
4241
4242	lck_mtx_lock_spin_always(&c_seg->c_lock);
4243
4244	if (c_seg_was_busy == FALSE) {
4245	C_SEG_WAKEUP_DONE(c_seg);
4246	}
4247	}
4248	#endif
4249
4250	#if CONFIG_FREEZE
4251	if (current_chead == (c_segment_t*) &(freezer_context_global.freezer_ctx_chead) &&
4252	VM_CONFIG_SWAP_IS_PRESENT &&
4253	VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
4254	#if DEVELOPMENT \|\| DEBUG
4255	&& !memorystatus_freeze_to_memory
4256	#endif /* DEVELOPMENT \|\| DEBUG */
4257	) {
4258	new_state = C_ON_SWAPOUT_Q;
4259	wakeup_swapout_thread = true;
4260	}
4261	#endif /* CONFIG_FREEZE */
4262
4263	if (vm_darkwake_mode == TRUE) {
4264	new_state = C_ON_SWAPOUT_Q;
4265	head_insert = true;
4266	wakeup_swapout_thread = true;
4267	} else {
4268	c_segment_t *donate_queue_head;
4269	for (int i = `0`; i < vm_pageout_state.vm_compressor_thread_count; i++) {
4270	#if XNU_TARGET_OS_OSX
4271	donate_queue_head = (c_segment_t*) &(pgo_iothread_internal_state[i].current_early_swapout_chead);
4272	#else /* XNU_TARGET_OS_OSX */
4273	donate_queue_head = (c_segment_t*) &(pgo_iothread_internal_state[i].current_late_swapout_chead);
4274	#endif /* XNU_TARGET_OS_OSX */
4275
4276	if (current_chead == donate_queue_head) {
4277	assert(c_seg->c_has_donated_pages);
4278	new_state = C_ON_SWAPOUT_Q;
4279	c_seg_total_donated_bytes += c_seg->c_bytes_used;
4280	break;
4281	}
4282	}
4283	}
4284
4285	clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
4286	c_seg->c_creation_ts = (uint32_t)sec;
4287
4288	if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
4289	clock_sec_t sec2;
4290	clock_nsec_t nsec2;
4291
4292	lck_mtx_lock_spin_always(c_list_lock);
4293	clock_get_system_nanotime(secs: &sec2, nanosecs: &nsec2);
4294	TIME_SUB(sec2, sec, nsec2, nsec, NSEC_PER_SEC);
4295	// printf("FBDP %s: head %p waited for c_list_lock for %lu.%09u seconds\n", __FUNCTION__, current_chead, sec2, nsec2);
4296	if (sec2 > c_seg_filled_contention_sec_max) {
4297	c_seg_filled_contention_sec_max = sec2;
4298	c_seg_filled_contention_nsec_max = nsec2;
4299	} else if (sec2 == c_seg_filled_contention_sec_max &&
4300	nsec2 > c_seg_filled_contention_nsec_max) {
4301	c_seg_filled_contention_nsec_max = nsec2;
4302	}
4303	c_seg_filled_contention++;
4304	} else {
4305	c_seg_filled_no_contention++;
4306	}
4307
4308	#if CONFIG_FREEZE
4309	if (current_chead == (c_segment_t*) &(freezer_context_global.freezer_ctx_chead)) {
4310	if (freezer_context_global.freezer_ctx_task->donates_own_pages) {
4311	assert(!c_seg->c_has_donated_pages);
4312	c_seg->c_has_donated_pages = `1`;
4313	OSAddAtomic(c_seg->c_slots_used, &c_segment_pages_compressed_incore_late_swapout);
4314	}
4315	c_seg->c_has_freezer_pages = `1`;
4316	}
4317	#endif /* CONFIG_FREEZE */
4318
4319	c_seg->c_generation_id = c_generation_id++;
4320	c_seg_switch_state(c_seg, new_state, insert_head: head_insert);
4321
4322	#if CONFIG_FREEZE
4323	/*
4324	* Donated segments count as frozen to swap if we go through the freezer.
4325	* TODO: What we need is a new ledger and cseg state that can describe
4326	* a frozen cseg from a donated task so we can accurately decrement it on
4327	* swapins.
4328	*/
4329	if (current_chead == (c_segment_t*) &(freezer_context_global.freezer_ctx_chead) && (c_seg->c_state == C_ON_SWAPOUT_Q)) {
4330	/*
4331	* darkwake and freezer can't co-exist together
4332	* We'll need to fix this accounting as a start.
4333	* And early donation c_segs are separate from frozen c_segs.
4334	*/
4335	assert(vm_darkwake_mode == FALSE);
4336	c_seg_update_task_owner(c_seg, freezer_context_global.freezer_ctx_task);
4337	freezer_context_global.freezer_ctx_swapped_bytes += c_seg->c_bytes_used;
4338	}
4339	#endif /* CONFIG_FREEZE */
4340
4341	if (c_seg->c_state == C_ON_AGE_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
4342	#if CONFIG_FREEZE
4343	assert(c_seg->c_task_owner == NULL);
4344	#endif /* CONFIG_FREEZE */
4345	c_seg_need_delayed_compaction(c_seg, TRUE);
4346	}
4347
4348	lck_mtx_unlock_always(c_list_lock);
4349
4350	if (wakeup_swapout_thread) {
4351	/*
4352	* Darkwake and Freeze configs always
4353	* wake up the swapout thread because
4354	* the compactor thread that normally handles
4355	* it may not be running as much in these
4356	* configs.
4357	*/
4358	thread_wakeup((event_t)&vm_swapout_thread);
4359	}
4360
4361	*current_chead = NULL;
4362	}
4363
4364	/*
4365	* returns with c_seg locked
4366	*/
4367	void
4368	c_seg_swapin_requeue(c_segment_t c_seg, boolean_t has_data, boolean_t minor_compact_ok, boolean_t age_on_swapin_q)
4369	{
4370	clock_sec_t sec;
4371	clock_nsec_t nsec;
4372
4373	clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
4374
4375	lck_mtx_lock_spin_always(c_list_lock);
4376	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
4377
4378	assert(c_seg->c_busy_swapping);
4379	assert(c_seg->c_busy);
4380
4381	c_seg->c_busy_swapping = `0`;
4382
4383	if (c_seg->c_overage_swap == TRUE) {
4384	c_overage_swapped_count--;
4385	c_seg->c_overage_swap = FALSE;
4386	}
4387	if (has_data == TRUE) {
4388	if (age_on_swapin_q == TRUE \|\| c_seg->c_has_donated_pages) {
4389	#if CONFIG_FREEZE
4390	/*
4391	* If a segment has both identities, frozen and donated bits set, the donated
4392	* bit wins on the swapin path. This is because the segment is being swapped back
4393	* in and so is in demand and should be given more time to spend in memory before
4394	* being swapped back out under pressure.
4395	*/
4396	if (c_seg->c_has_donated_pages) {
4397	c_seg->c_has_freezer_pages = `0`;
4398	}
4399	#endif /* CONFIG_FREEZE */
4400	c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
4401	} else {
4402	c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
4403	}
4404
4405	if (minor_compact_ok == TRUE && !c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
4406	c_seg_need_delayed_compaction(c_seg, TRUE);
4407	}
4408	} else {
4409	c_seg->c_store.c_buffer = (int32_t*) NULL;
4410	c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(`0`);
4411
4412	c_seg_switch_state(c_seg, C_ON_BAD_Q, FALSE);
4413	}
4414	c_seg->c_swappedin_ts = (uint32_t)sec;
4415	c_seg->c_swappedin = true;
4416
4417	lck_mtx_unlock_always(c_list_lock);
4418	}
4419
4420
4421
4422	/*
4423	* c_seg has to be locked and is returned locked if the c_seg isn't freed
4424	* PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE
4425	* c_seg_swapin returns 1 if the c_seg was freed, 0 otherwise
4426	*/
4427
4428	int
4429	c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction, boolean_t age_on_swapin_q)
4430	{
4431	vm_offset_t addr = `0`;
4432	uint32_t io_size = `0`;
4433	uint64_t f_offset;
4434	thread_pri_floor_t token;
4435
4436	assert(C_SEG_IS_ONDISK(c_seg));
4437
4438	#if !CHECKSUM_THE_SWAP
4439	c_seg_trim_tail(c_seg);
4440	#endif
4441	io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
4442	f_offset = c_seg->c_store.c_swap_handle;
4443
4444	C_SEG_BUSY(c_seg);
4445	c_seg->c_busy_swapping = `1`;
4446
4447	/*
4448	* This thread is likely going to block for I/O.
4449	* Make sure it is ready to run when the I/O completes because
4450	* it needs to clear the busy bit on the c_seg so that other
4451	* waiting threads can make progress too.
4452	*/
4453	token = thread_priority_floor_start();
4454	lck_mtx_unlock_always(&c_seg->c_lock);
4455
4456	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4457
4458	addr = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
4459	c_seg->c_store.c_buffer = (int32_t*) addr;
4460
4461	kernel_memory_populate(addr, size: io_size, flags: KMA_NOFAIL \| KMA_COMPRESSOR,
4462	VM_KERN_MEMORY_COMPRESSOR);
4463
4464	if (vm_swap_get(c_seg, f_offset, io_size) != KERN_SUCCESS) {
4465	PAGE_REPLACEMENT_DISALLOWED(TRUE);
4466
4467	kernel_memory_depopulate(addr, size: io_size, flags: KMA_COMPRESSOR,
4468	VM_KERN_MEMORY_COMPRESSOR);
4469
4470	c_seg_swapin_requeue(c_seg, FALSE, TRUE, age_on_swapin_q);
4471	} else {
4472	#if ENCRYPTED_SWAP
4473	vm_swap_decrypt(c_seg);
4474	#endif /* ENCRYPTED_SWAP */
4475
4476	#if CHECKSUM_THE_SWAP
4477	if (c_seg->cseg_swap_size != io_size) {
4478	panic("swapin size doesn't match swapout size");
4479	}
4480
4481	if (c_seg->cseg_hash != vmc_hash((char) c_seg->c_store.c_buffer, (int*)io_size)) {
4482	panic("c_seg_swapin - Swap hash mismatch");
4483	}
4484	#endif /* CHECKSUM_THE_SWAP */
4485
4486	PAGE_REPLACEMENT_DISALLOWED(TRUE);
4487
4488	c_seg_swapin_requeue(c_seg, TRUE, minor_compact_ok: force_minor_compaction == TRUE ? FALSE : TRUE, age_on_swapin_q);
4489
4490	#if CONFIG_FREEZE
4491	/*
4492	* c_seg_swapin_requeue() returns with the c_seg lock held.
4493	*/
4494	if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
4495	assert(c_seg->c_busy);
4496
4497	lck_mtx_unlock_always(&c_seg->c_lock);
4498	lck_mtx_lock_spin_always(c_list_lock);
4499	lck_mtx_lock_spin_always(&c_seg->c_lock);
4500	}
4501
4502	if (c_seg->c_task_owner) {
4503	c_seg_update_task_owner(c_seg, NULL);
4504	}
4505
4506	lck_mtx_unlock_always(c_list_lock);
4507
4508	OSAddAtomic(c_seg->c_slots_used, &c_segment_pages_compressed_incore);
4509	if (c_seg->c_has_donated_pages) {
4510	OSAddAtomic(c_seg->c_slots_used, &c_segment_pages_compressed_incore_late_swapout);
4511	}
4512	#endif /* CONFIG_FREEZE */
4513
4514	OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
4515
4516	if (force_minor_compaction == TRUE) {
4517	if (c_seg_minor_compaction_and_unlock(c_seg, FALSE)) {
4518	/*
4519	* c_seg was completely empty so it was freed,
4520	* so be careful not to reference it again
4521	*
4522	* Drop the boost so that the thread priority
4523	* is returned back to where it is supposed to be.
4524	*/
4525	thread_priority_floor_end(token: &token);
4526	return `1`;
4527	}
4528
4529	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
4530	}
4531	}
4532	C_SEG_WAKEUP_DONE(c_seg);
4533
4534	/*
4535	* Drop the boost so that the thread priority
4536	* is returned back to where it is supposed to be.
4537	*/
4538	thread_priority_floor_end(token: &token);
4539
4540	return `0`;
4541	}
4542
4543
4544	static void
4545	c_segment_sv_hash_drop_ref(int hash_indx)
4546	{
4547	struct c_sv_hash_entry o_sv_he, n_sv_he;
4548
4549	while (`1`) {
4550	o_sv_he.he_record = c_segment_sv_hash_table[hash_indx].he_record;
4551
4552	n_sv_he.he_ref = o_sv_he.he_ref - `1`;
4553	n_sv_he.he_data = o_sv_he.he_data;
4554
4555	if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_indx].he_record) == TRUE) {
4556	if (n_sv_he.he_ref == `0`) {
4557	OSAddAtomic(-`1`, &c_segment_svp_in_hash);
4558	}
4559	break;
4560	}
4561	}
4562	}
4563
4564
4565	static int
4566	c_segment_sv_hash_insert(uint32_t data)
4567	{
4568	int hash_sindx;
4569	int misses;
4570	struct c_sv_hash_entry o_sv_he, n_sv_he;
4571	boolean_t got_ref = FALSE;
4572
4573	if (data == `0`) {
4574	OSAddAtomic(`1`, &c_segment_svp_zero_compressions);
4575	} else {
4576	OSAddAtomic(`1`, &c_segment_svp_nonzero_compressions);
4577	}
4578
4579	hash_sindx = data & C_SV_HASH_MASK;
4580
4581	for (misses = `0`; misses < C_SV_HASH_MAX_MISS; misses++) {
4582	o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
4583
4584	while (o_sv_he.he_data == data \|\| o_sv_he.he_ref == `0`) {
4585	n_sv_he.he_ref = o_sv_he.he_ref + `1`;
4586	n_sv_he.he_data = data;
4587
4588	if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_sindx].he_record) == TRUE) {
4589	if (n_sv_he.he_ref == `1`) {
4590	OSAddAtomic(`1`, &c_segment_svp_in_hash);
4591	}
4592	got_ref = TRUE;
4593	break;
4594	}
4595	o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
4596	}
4597	if (got_ref == TRUE) {
4598	break;
4599	}
4600	hash_sindx++;
4601
4602	if (hash_sindx == C_SV_HASH_SIZE) {
4603	hash_sindx = `0`;
4604	}
4605	}
4606	if (got_ref == FALSE) {
4607	return -`1`;
4608	}
4609
4610	return hash_sindx;
4611	}
4612
4613
4614	#if RECORD_THE_COMPRESSED_DATA
4615
4616	static void
4617	c_compressed_record_data(char src, int* c_size)
4618	{
4619	if ((c_compressed_record_cptr + c_size + `4`) >= c_compressed_record_ebuf) {
4620	panic("c_compressed_record_cptr >= c_compressed_record_ebuf");
4621	}
4622
4623	(int* )((void* *)c_compressed_record_cptr) = c_size;
4624
4625	c_compressed_record_cptr += `4`;
4626
4627	memcpy(c_compressed_record_cptr, src, c_size);
4628	c_compressed_record_cptr += c_size;
4629	}
4630	#endif
4631
4632
4633	static int
4634	c_compress_page(char src, c_slot_mapping_t slot_ptr, c_segment_t current_chead, char *scratch_buf)
4635	{
4636	int c_size = -`1`;
4637	int c_rounded_size = `0`;
4638	int max_csize;
4639	c_slot_t cs;
4640	c_segment_t c_seg;
4641	bool single_value = false;
4642
4643	KERNEL_DEBUG(`0xe0400000` \| DBG_FUNC_START, *current_chead, `0`, `0`, `0`, `0`);
4644	retry:
4645	if ((c_seg = c_seg_allocate(current_chead)) == NULL) {
4646	return `1`;
4647	}
4648	/*
4649	* returns with c_seg lock held
4650	* and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
4651	* c_nextslot has been allocated and
4652	* c_store.c_buffer populated
4653	*/
4654	assert(c_seg->c_state == C_IS_FILLING);
4655
4656	cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot);
4657
4658	C_SLOT_ASSERT_PACKABLE(slot_ptr);
4659	cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr);
4660
4661	cs->c_offset = c_seg->c_nextoffset;
4662
4663	max_csize = c_seg_bufsize - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset);
4664
4665	if (max_csize > PAGE_SIZE) {
4666	max_csize = PAGE_SIZE;
4667	}
4668
4669	#if CHECKSUM_THE_DATA
4670	cs->c_hash_data = vmc_hash(src, PAGE_SIZE);
4671	#endif
4672	boolean_t incomp_copy = FALSE;
4673	int max_csize_adj = (max_csize - `4`);
4674
4675	if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
4676	#if defined(__arm64__)
4677	uint16_t ccodec = CINVALID;
4678	uint32_t inline_popcount;
4679	if (max_csize >= C_SEG_OFFSET_ALIGNMENT_BOUNDARY) {
4680	c_size = metacompressor(in: (const uint8_t *) src,
4681	cdst: (uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset],
4682	outbufsz: max_csize_adj, codec: &ccodec,
4683	cscratch: scratch_buf, &incomp_copy, pop_count_p: &inline_popcount);
4684	assert(inline_popcount == C_SLOT_NO_POPCOUNT);
4685
4686	#if C_SEG_OFFSET_ALIGNMENT_BOUNDARY > 4
4687	if (c_size > max_csize_adj) {
4688	c_size = -`1`;
4689	}
4690	#endif
4691	} else {
4692	c_size = -`1`;
4693	}
4694	assert(ccodec == CCWK \|\| ccodec == CCLZ4);
4695	cs->c_codec = ccodec;
4696	#endif
4697	} else {
4698	#if defined(__arm64__)
4699	cs->c_codec = CCWK;
4700	__unreachable_ok_push
4701	if (PAGE_SIZE == `4096`) {
4702	c_size = WKdm_compress_4k(src_buf: (WK_word )(uintptr_t)src, dest_buf: (WK_word )(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
4703	scratch: (WK_word *)(uintptr_t)scratch_buf, limit: max_csize_adj);
4704	} else {
4705	c_size = WKdm_compress_16k(src_buf: (WK_word )(uintptr_t)src, dest_buf: (WK_word )(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
4706	scratch: (WK_word *)(uintptr_t)scratch_buf, limit: max_csize_adj);
4707	}
4708	__unreachable_ok_pop
4709	#else
4710	c_size = WKdm_compress_new((const WK_word )(uintptr_t)src, (WK_word )(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
4711	(WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
4712	#endif
4713	}
4714	assertf(((c_size <= max_csize_adj) && (c_size >= -`1`)),
4715	"c_size invalid (%d, %d), cur compressions: %d", c_size, max_csize_adj, c_segment_pages_compressed);
4716
4717	if (c_size == -`1`) {
4718	if (max_csize < PAGE_SIZE) {
4719	c_current_seg_filled(c_seg, current_chead);
4720	assert(*current_chead == NULL);
4721
4722	lck_mtx_unlock_always(&c_seg->c_lock);
4723	/ TODO: it may be worth requiring codecs to distinguish*
4724	* between incompressible inputs and failures due to
4725	* budget exhaustion.
4726	*/
4727	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4728	goto retry;
4729	}
4730	c_size = PAGE_SIZE;
4731
4732	if (incomp_copy == FALSE) {
4733	memcpy(dst: &c_seg->c_store.c_buffer[cs->c_offset], src, n: c_size);
4734	}
4735
4736	OSAddAtomic(`1`, &c_segment_noncompressible_pages);
4737	} else if (c_size == `0`) {
4738	int hash_index;
4739
4740	/*
4741	* special case - this is a page completely full of a single 32 bit value
4742	*/
4743	single_value = true;
4744	hash_index = c_segment_sv_hash_insert(data: (uint32_t )(uintptr_t)src);
4745
4746	if (hash_index != -`1`) {
4747	slot_ptr->s_cindx = hash_index;
4748	slot_ptr->s_cseg = C_SV_CSEG_ID;
4749	#if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
4750	slot_ptr->s_uncompressed = `0`;
4751	#endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
4752
4753	OSAddAtomic(`1`, &c_segment_svp_hash_succeeded);
4754	#if RECORD_THE_COMPRESSED_DATA
4755	c_compressed_record_data(src, `4`);
4756	#endif
4757	goto sv_compression;
4758	}
4759	c_size = `4`;
4760
4761	memcpy(dst: &c_seg->c_store.c_buffer[cs->c_offset], src, n: c_size);
4762
4763	OSAddAtomic(`1`, &c_segment_svp_hash_failed);
4764	}
4765
4766	#if RECORD_THE_COMPRESSED_DATA
4767	c_compressed_record_data((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
4768	#endif
4769	#if CHECKSUM_THE_COMPRESSED_DATA
4770	cs->c_hash_compressed_data = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
4771	#endif
4772	#if POPCOUNT_THE_COMPRESSED_DATA
4773	cs->c_pop_cdata = vmc_pop((uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset], c_size);
4774	#endif
4775	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
4776
4777	PACK_C_SIZE(cs, c_size);
4778	c_seg->c_bytes_used += c_rounded_size;
4779	c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
4780	c_seg->c_slots_used++;
4781
4782	#if CONFIG_FREEZE
4783	/ TODO: should c_segment_pages_compressed be up here too? See 88598046 for details /
4784	OSAddAtomic(`1`, &c_segment_pages_compressed_incore);
4785	if (c_seg->c_has_donated_pages) {
4786	OSAddAtomic(`1`, &c_segment_pages_compressed_incore_late_swapout);
4787	}
4788	#endif /* CONFIG_FREEZE */
4789
4790	slot_ptr->s_cindx = c_seg->c_nextslot++;
4791	/ <csegno=0,indx=0> would mean "empty slot", so use csegno+1 /
4792	slot_ptr->s_cseg = c_seg->c_mysegno + `1`;
4793
4794	#if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
4795	slot_ptr->s_uncompressed = `0`;
4796	#endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
4797
4798	sv_compression:
4799	if (c_seg->c_nextoffset >= c_seg_off_limit \|\| c_seg->c_nextslot >= C_SLOT_MAX_INDEX) {
4800	c_current_seg_filled(c_seg, current_chead);
4801	assert(*current_chead == NULL);
4802	}
4803
4804	lck_mtx_unlock_always(&c_seg->c_lock);
4805
4806	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4807
4808	#if RECORD_THE_COMPRESSED_DATA
4809	if ((c_compressed_record_cptr - c_compressed_record_sbuf) >= c_seg_allocsize) {
4810	c_compressed_record_write(c_compressed_record_sbuf, (int)(c_compressed_record_cptr - c_compressed_record_sbuf));
4811	c_compressed_record_cptr = c_compressed_record_sbuf;
4812	}
4813	#endif
4814	if (c_size) {
4815	OSAddAtomic64(c_size, &c_segment_compressed_bytes);
4816	OSAddAtomic64(c_rounded_size, &compressor_bytes_used);
4817	}
4818	OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes);
4819
4820	OSAddAtomic(`1`, &c_segment_pages_compressed);
4821	#if DEVELOPMENT \|\| DEBUG
4822	if (!compressor_running_perf_test) {
4823	/*
4824	* The perf_compressor benchmark should not be able to trigger
4825	* compressor thrashing jetsams.
4826	*/
4827	OSAddAtomic(`1`, &sample_period_compression_count);
4828	}
4829	#else /* DEVELOPMENT \|\| DEBUG */
4830	OSAddAtomic(`1`, &sample_period_compression_count);
4831	#endif /* DEVELOPMENT \|\| DEBUG */
4832
4833	KERNEL_DEBUG(`0xe0400000` \| DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, `0`);
4834
4835	return `0`;
4836	}
4837
4838	static inline void
4839	sv_decompress(int32_t *ddst, int32_t pattern)
4840	{
4841	// assert(__builtin_constant_p(PAGE_SIZE) != 0);
4842	#if defined(__x86_64__)
4843	memset_word(ddst, pattern, PAGE_SIZE / sizeof(int32_t));
4844	#elif defined(__arm64__)
4845	assert((PAGE_SIZE % `128`) == `0`);
4846	if (pattern == `0`) {
4847	fill32_dczva((addr64_t)ddst, PAGE_SIZE);
4848	} else {
4849	fill32_nt((addr64_t)ddst, PAGE_SIZE, pattern);
4850	}
4851	#else
4852	size_t i;
4853
4854	/ Unroll the pattern fill loop 4x to encourage the*
4855	* compiler to emit NEON stores, cf.
4856	* <rdar://problem/25839866> Loop autovectorization
4857	* anomalies.
4858	*/
4859	/ * We use separate loops for each PAGE_SIZE*
4860	* to allow the autovectorizer to engage, as PAGE_SIZE
4861	* may not be a constant.
4862	*/
4863
4864	__unreachable_ok_push
4865	if (PAGE_SIZE == `4096`) {
4866	for (i = `0`; i < (`4096U` / sizeof(int32_t)); i += `4`) {
4867	*ddst++ = pattern;
4868	*ddst++ = pattern;
4869	*ddst++ = pattern;
4870	*ddst++ = pattern;
4871	}
4872	} else {
4873	assert(PAGE_SIZE == `16384`);
4874	for (i = `0`; i < (int)(`16384U` / sizeof(int32_t)); i += `4`) {
4875	*ddst++ = pattern;
4876	*ddst++ = pattern;
4877	*ddst++ = pattern;
4878	*ddst++ = pattern;
4879	}
4880	}
4881	__unreachable_ok_pop
4882	#endif
4883	}
4884
4885	static int
4886	c_decompress_page(char dst, volatile* c_slot_mapping_t slot_ptr, vm_compressor_options_t flags, int *zeroslot)
4887	{
4888	c_slot_t cs;
4889	c_segment_t c_seg;
4890	uint32_t c_segno;
4891	uint16_t c_indx;
4892	int c_rounded_size;
4893	uint32_t c_size;
4894	int retval = `0`;
4895	boolean_t need_unlock = TRUE;
4896	boolean_t consider_defragmenting = FALSE;
4897	boolean_t kdp_mode = FALSE;
4898
4899	if (__improbable(flags & C_KDP)) {
4900	if (not_in_kdp) {
4901	panic("C_KDP passed to decompress page from outside of debugger context");
4902	}
4903
4904	assert((flags & C_KEEP) == C_KEEP);
4905	assert((flags & C_DONT_BLOCK) == C_DONT_BLOCK);
4906
4907	if ((flags & (C_DONT_BLOCK \| C_KEEP)) != (C_DONT_BLOCK \| C_KEEP)) {
4908	return -`2`;
4909	}
4910
4911	kdp_mode = TRUE;
4912	*zeroslot = `0`;
4913	}
4914
4915	ReTry:
4916	if (__probable(!kdp_mode)) {
4917	PAGE_REPLACEMENT_DISALLOWED(TRUE);
4918	} else {
4919	if (kdp_lck_rw_lock_is_acquired_exclusive(lck: &c_master_lock)) {
4920	return -`2`;
4921	}
4922	}
4923
4924	#if HIBERNATION
4925	/*
4926	* if hibernation is enabled, it indicates (via a call
4927	* to 'vm_decompressor_lock' that no further
4928	* decompressions are allowed once it reaches
4929	* the point of flushing all of the currently dirty
4930	* anonymous memory through the compressor and out
4931	* to disk... in this state we allow freeing of compressed
4932	* pages and must honor the C_DONT_BLOCK case
4933	*/
4934	if (__improbable(dst && decompressions_blocked == TRUE)) {
4935	if (flags & C_DONT_BLOCK) {
4936	if (__probable(!kdp_mode)) {
4937	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4938	}
4939
4940	*zeroslot = `0`;
4941	return -`2`;
4942	}
4943	/*
4944	* it's safe to atomically assert and block behind the
4945	* lock held in shared mode because "decompressions_blocked" is
4946	* only set and cleared and the thread_wakeup done when the lock
4947	* is held exclusively
4948	*/
4949	assert_wait((event_t)&decompressions_blocked, THREAD_UNINT);
4950
4951	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4952
4953	thread_block(THREAD_CONTINUE_NULL);
4954
4955	goto ReTry;
4956	}
4957	#endif
4958	/ s_cseg is actually "segno+1" /
4959	c_segno = slot_ptr->s_cseg - `1`;
4960
4961	if (__improbable(c_segno >= c_segments_available)) {
4962	panic("c_decompress_page: c_segno %d >= c_segments_available %d, slot_ptr(%p), slot_data(%x)",
4963	c_segno, c_segments_available, slot_ptr, (int* )((void* *)slot_ptr));
4964	}
4965
4966	if (__improbable(c_segments[c_segno].c_segno < c_segments_available)) {
4967	panic("c_decompress_page: c_segno %d is free, slot_ptr(%p), slot_data(%x)",
4968	c_segno, slot_ptr, (int* )((void* *)slot_ptr));
4969	}
4970
4971	c_seg = c_segments[c_segno].c_seg;
4972
4973	if (__probable(!kdp_mode)) {
4974	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
4975	} else {
4976	if (kdp_lck_mtx_lock_spin_is_acquired(lck: &c_seg->c_lock)) {
4977	return -`2`;
4978	}
4979	}
4980
4981	assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE);
4982
4983	if (dst == NULL && c_seg->c_busy_swapping) {
4984	assert(c_seg->c_busy);
4985
4986	goto bypass_busy_check;
4987	}
4988	if (flags & C_DONT_BLOCK) {
4989	if (c_seg->c_busy \|\| (C_SEG_IS_ONDISK(c_seg) && dst)) {
4990	*zeroslot = `0`;
4991
4992	retval = -`2`;
4993	goto done;
4994	}
4995	}
4996	if (c_seg->c_busy) {
4997	PAGE_REPLACEMENT_DISALLOWED(FALSE);
4998
4999	c_seg_wait_on_busy(c_seg);
5000
5001	goto ReTry;
5002	}
5003	bypass_busy_check:
5004
5005	c_indx = slot_ptr->s_cindx;
5006
5007	if (__improbable(c_indx >= c_seg->c_nextslot)) {
5008	panic("c_decompress_page: c_indx %d >= c_nextslot %d, c_seg(%p), slot_ptr(%p), slot_data(%x)",
5009	c_indx, c_seg->c_nextslot, c_seg, slot_ptr, (int* )((void* *)slot_ptr));
5010	}
5011
5012	cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
5013
5014	c_size = UNPACK_C_SIZE(cs);
5015
5016	if (__improbable(c_size == `0`)) {
5017	panic("c_decompress_page: c_size == 0, c_seg(%p), slot_ptr(%p), slot_data(%x)",
5018	c_seg, slot_ptr, (int* )((void* *)slot_ptr));
5019	}
5020
5021	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
5022
5023	if (dst) {
5024	uint32_t age_of_cseg;
5025	clock_sec_t cur_ts_sec;
5026	clock_nsec_t cur_ts_nsec;
5027
5028	if (C_SEG_IS_ONDISK(c_seg)) {
5029	#if CONFIG_FREEZE
5030	if (freezer_incore_cseg_acct) {
5031	if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
5032	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5033	lck_mtx_unlock_always(&c_seg->c_lock);
5034
5035	memorystatus_kill_on_VM_compressor_space_shortage(FALSE / async /);
5036
5037	goto ReTry;
5038	}
5039
5040	uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
5041	if ((incore_seg_count + `1`) >= c_segments_nearing_limit) {
5042	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5043	lck_mtx_unlock_always(&c_seg->c_lock);
5044
5045	memorystatus_kill_on_VM_compressor_space_shortage(FALSE / async /);
5046
5047	goto ReTry;
5048	}
5049	}
5050	#endif /* CONFIG_FREEZE */
5051	assert(kdp_mode == FALSE);
5052	retval = c_seg_swapin(c_seg, FALSE, TRUE);
5053	assert(retval == `0`);
5054
5055	retval = `1`;
5056	}
5057	if (c_seg->c_state == C_ON_BAD_Q) {
5058	assert(c_seg->c_store.c_buffer == NULL);
5059	*zeroslot = `0`;
5060
5061	retval = -`1`;
5062	goto done;
5063	}
5064
5065	#if POPCOUNT_THE_COMPRESSED_DATA
5066	unsigned csvpop;
5067	uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset];
5068	if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) {
5069	panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%x 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void )csvaddr, (void* *) kvtophys(csvaddr), c_seg, cs, cs->c_offset, c_size, csvpop, cs->c_pop_cdata);
5070	}
5071	#endif
5072
5073	#if CHECKSUM_THE_COMPRESSED_DATA
5074	unsigned csvhash;
5075	if (cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) {
5076	panic("Compressed data doesn't match original %p %p %u %u %u", c_seg, cs, c_size, cs->c_hash_compressed_data, csvhash);
5077	}
5078	#endif
5079	if (c_rounded_size == PAGE_SIZE) {
5080	/*
5081	* page wasn't compressible... just copy it out
5082	*/
5083	memcpy(dst, src: &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE);
5084	} else if (c_size == `4`) {
5085	int32_t data;
5086	int32_t *dptr;
5087
5088	/*
5089	* page was populated with a single value
5090	* that didn't fit into our fast hash
5091	* so we packed it in as a single non-compressed value
5092	* that we need to populate the page with
5093	*/
5094	dptr = (int32_t *)(uintptr_t)dst;
5095	data = (int32_t )(&c_seg->c_store.c_buffer[cs->c_offset]);
5096	sv_decompress(ddst: dptr, pattern: data);
5097	} else {
5098	uint32_t my_cpu_no;
5099	char *scratch_buf;
5100
5101	if (__probable(!kdp_mode)) {
5102	/*
5103	* we're behind the c_seg lock held in spin mode
5104	* which means pre-emption is disabled... therefore
5105	* the following sequence is atomic and safe
5106	*/
5107	my_cpu_no = cpu_number();
5108
5109	assert(my_cpu_no < compressor_cpus);
5110
5111	scratch_buf = &compressor_scratch_bufs[my_cpu_no * vm_compressor_get_decode_scratch_size()];
5112	} else {
5113	scratch_buf = kdp_compressor_scratch_buf;
5114	}
5115
5116	if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
5117	#if defined(__arm64__)
5118	uint16_t c_codec = cs->c_codec;
5119	uint32_t inline_popcount;
5120	if (!metadecompressor(source: (const uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset],
5121	dest: (uint8_t )dst, csize: c_size, ccodec: c_codec, compressor_dscratch: (void* *)scratch_buf, pop_count_p: &inline_popcount)) {
5122	retval = -`1`;
5123	} else {
5124	assert(inline_popcount == C_SLOT_NO_POPCOUNT);
5125	}
5126	#endif
5127	} else {
5128	#if defined(__arm64__)
5129	__unreachable_ok_push
5130	if (PAGE_SIZE == `4096`) {
5131	WKdm_decompress_4k(src_buf: (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
5132	dest_buf: (WK_word )(uintptr_t)dst, scratch: (WK_word )(uintptr_t)scratch_buf, bytes: c_size);
5133	} else {
5134	WKdm_decompress_16k(src_buf: (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
5135	dest_buf: (WK_word )(uintptr_t)dst, scratch: (WK_word )(uintptr_t)scratch_buf, bytes: c_size);
5136	}
5137	__unreachable_ok_pop
5138	#else
5139	WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
5140	(WK_word )(uintptr_t)dst, (WK_word )(uintptr_t)scratch_buf, c_size);
5141	#endif
5142	}
5143	}
5144
5145	#if CHECKSUM_THE_DATA
5146	if (cs->c_hash_data != vmc_hash(dst, PAGE_SIZE)) {
5147	#if defined(__arm64__)
5148	int32_t *dinput = &c_seg->c_store.c_buffer[cs->c_offset];
5149	panic("decompressed data doesn't match original cs: %p, hash: 0x%x, offset: %d, c_size: %d, c_rounded_size: %d, codec: %d, header: 0x%x 0x%x 0x%x", cs, cs->c_hash_data, cs->c_offset, c_size, c_rounded_size, cs->c_codec, dinput, (dinput + `1`), *(dinput + `2`));
5150	#else
5151	panic("decompressed data doesn't match original cs: %p, hash: %d, offset: 0x%x, c_size: %d", cs, cs->c_hash_data, cs->c_offset, c_size);
5152	#endif
5153	}
5154	#endif
5155	if (c_seg->c_swappedin_ts == `0` && !kdp_mode) {
5156	clock_get_system_nanotime(secs: &cur_ts_sec, nanosecs: &cur_ts_nsec);
5157
5158	age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts;
5159	if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE) {
5160	OSAddAtomic(`1`, &age_of_decompressions_during_sample_period[age_of_cseg]);
5161	} else {
5162	OSAddAtomic(`1`, &overage_decompressions_during_sample_period);
5163	}
5164
5165	OSAddAtomic(`1`, &sample_period_decompression_count);
5166	}
5167	}
5168	#if CONFIG_FREEZE
5169	else {
5170	/*
5171	* We are freeing an uncompressed page from this c_seg and so balance the ledgers.
5172	*/
5173	if (C_SEG_IS_ONDISK(c_seg)) {
5174	/*
5175	* The compression sweep feature will push out anonymous pages to disk
5176	* without going through the freezer path and so those c_segs, while
5177	* swapped out, won't have an owner.
5178	*/
5179	if (c_seg->c_task_owner) {
5180	task_update_frozen_to_swap_acct(c_seg->c_task_owner, PAGE_SIZE_64, DEBIT_FROM_SWAP);
5181	}
5182
5183	/*
5184	* We are freeing a page in swap without swapping it in. We bump the in-core
5185	* count here to simulate a swapin of a page so that we can accurately
5186	* decrement it below.
5187	*/
5188	OSAddAtomic(`1`, &c_segment_pages_compressed_incore);
5189	if (c_seg->c_has_donated_pages) {
5190	OSAddAtomic(`1`, &c_segment_pages_compressed_incore_late_swapout);
5191	}
5192	} else if (c_seg->c_state == C_ON_BAD_Q) {
5193	assert(c_seg->c_store.c_buffer == NULL);
5194	*zeroslot = `0`;
5195
5196	retval = -`1`;
5197	goto done;
5198	}
5199	}
5200	#endif /* CONFIG_FREEZE */
5201
5202	if (flags & C_KEEP) {
5203	*zeroslot = `0`;
5204	goto done;
5205	}
5206	assert(kdp_mode == FALSE);
5207
5208	c_seg->c_bytes_unused += c_rounded_size;
5209	c_seg->c_bytes_used -= c_rounded_size;
5210
5211	assert(c_seg->c_slots_used);
5212	c_seg->c_slots_used--;
5213	if (dst && c_seg->c_swappedin) {
5214	task_t task = current_task();
5215	if (task) {
5216	ledger_credit(ledger: task->ledger, entry: task_ledgers.swapins, PAGE_SIZE);
5217	}
5218	}
5219
5220	PACK_C_SIZE(cs, `0`);
5221
5222	if (c_indx < c_seg->c_firstemptyslot) {
5223	c_seg->c_firstemptyslot = c_indx;
5224	}
5225
5226	OSAddAtomic(-`1`, &c_segment_pages_compressed);
5227	#if CONFIG_FREEZE
5228	OSAddAtomic(-`1`, &c_segment_pages_compressed_incore);
5229	assertf(c_segment_pages_compressed_incore >= `0`, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
5230	if (c_seg->c_has_donated_pages) {
5231	OSAddAtomic(-`1`, &c_segment_pages_compressed_incore_late_swapout);
5232	assertf(c_segment_pages_compressed_incore_late_swapout >= `0`, "-ve lateswapout count %p 0x%x", c_seg, c_segment_pages_compressed_incore_late_swapout);
5233	}
5234	#endif /* CONFIG_FREEZE */
5235
5236	if (c_seg->c_state != C_ON_BAD_Q && !(C_SEG_IS_ONDISK(c_seg))) {
5237	/*
5238	* C_SEG_IS_ONDISK == TRUE can occur when we're doing a
5239	* free of a compressed page (i.e. dst == NULL)
5240	*/
5241	OSAddAtomic64(-c_rounded_size, &compressor_bytes_used);
5242	}
5243	if (c_seg->c_busy_swapping) {
5244	/*
5245	* bypass case for c_busy_swapping...
5246	* let the swapin/swapout paths deal with putting
5247	* the c_seg on the minor compaction queue if needed
5248	*/
5249	assert(c_seg->c_busy);
5250	goto done;
5251	}
5252	assert(!c_seg->c_busy);
5253
5254	if (c_seg->c_state != C_IS_FILLING) {
5255	if (c_seg->c_bytes_used == `0`) {
5256	if (!(C_SEG_IS_ONDISK(c_seg))) {
5257	int pages_populated;
5258
5259	pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
5260	c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(`0`);
5261
5262	if (pages_populated) {
5263	assert(c_seg->c_state != C_ON_BAD_Q);
5264	assert(c_seg->c_store.c_buffer != NULL);
5265
5266	C_SEG_BUSY(c_seg);
5267	lck_mtx_unlock_always(&c_seg->c_lock);
5268
5269	kernel_memory_depopulate(
5270	addr: (vm_offset_t) c_seg->c_store.c_buffer,
5271	ptoa(pages_populated),
5272	flags: KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
5273
5274	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
5275	C_SEG_WAKEUP_DONE(c_seg);
5276	}
5277	if (!c_seg->c_on_minorcompact_q && c_seg->c_state != C_ON_SWAPIO_Q) {
5278	if (c_seg->c_state == C_ON_SWAPOUT_Q) {
5279	bool clear_busy = false;
5280	if (!lck_mtx_try_lock_spin_always(c_list_lock)) {
5281	C_SEG_BUSY(c_seg);
5282
5283	lck_mtx_unlock_always(&c_seg->c_lock);
5284	lck_mtx_lock_spin_always(c_list_lock);
5285	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
5286	clear_busy = true;
5287	}
5288	c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
5289	if (clear_busy) {
5290	C_SEG_WAKEUP_DONE(c_seg);
5291	clear_busy = false;
5292	}
5293	lck_mtx_unlock_always(c_list_lock);
5294	}
5295	c_seg_need_delayed_compaction(c_seg, FALSE);
5296	}
5297	} else {
5298	if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q) {
5299	c_seg_move_to_sparse_list(c_seg);
5300	consider_defragmenting = TRUE;
5301	}
5302	}
5303	} else if (c_seg->c_on_minorcompact_q) {
5304	assert(c_seg->c_state != C_ON_BAD_Q);
5305	assert(!C_SEG_IS_ON_DISK_OR_SOQ(c_seg));
5306
5307	if (C_SEG_SHOULD_MINORCOMPACT_NOW(c_seg)) {
5308	c_seg_try_minor_compaction_and_unlock(c_seg);
5309	need_unlock = FALSE;
5310	}
5311	} else if (!(C_SEG_IS_ONDISK(c_seg))) {
5312	if (c_seg->c_state != C_ON_BAD_Q && c_seg->c_state != C_ON_SWAPOUT_Q && c_seg->c_state != C_ON_SWAPIO_Q &&
5313	C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
5314	c_seg_need_delayed_compaction(c_seg, FALSE);
5315	}
5316	} else if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q && C_SEG_ONDISK_IS_SPARSE(c_seg)) {
5317	c_seg_move_to_sparse_list(c_seg);
5318	consider_defragmenting = TRUE;
5319	}
5320	}
5321	done:
5322	if (__improbable(kdp_mode)) {
5323	return retval;
5324	}
5325
5326	if (need_unlock == TRUE) {
5327	lck_mtx_unlock_always(&c_seg->c_lock);
5328	}
5329
5330	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5331
5332	if (consider_defragmenting == TRUE) {
5333	vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
5334	}
5335
5336	#if !XNU_TARGET_OS_OSX
5337	if ((c_minor_count && COMPRESSOR_NEEDS_TO_MINOR_COMPACT()) \|\| vm_compressor_needs_to_major_compact()) {
5338	vm_wake_compactor_swapper();
5339	}
5340	#endif /* !XNU_TARGET_OS_OSX */
5341
5342	return retval;
5343	}
5344
5345
5346	inline bool
5347	vm_compressor_is_slot_compressed(int *slot)
5348	{
5349	#if !CONFIG_TRACK_UNMODIFIED_ANON_PAGES
5350	#pragma unused(slot)
5351	return true;
5352	#else /* !CONFIG_TRACK_UNMODIFIED_ANON_PAGES*/
5353	c_slot_mapping_t slot_ptr = (c_slot_mapping_t)slot;
5354	return !slot_ptr->s_uncompressed;
5355	#endif /* !CONFIG_TRACK_UNMODIFIED_ANON_PAGES*/
5356	}
5357
5358	int
5359	vm_compressor_get(ppnum_t pn, int *slot, vm_compressor_options_t flags)
5360	{
5361	c_slot_mapping_t slot_ptr;
5362	char *dst;
5363	int zeroslot = `1`;
5364	int retval;
5365
5366	#if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
5367	if (flags & C_PAGE_UNMODIFIED) {
5368	retval = vm_uncompressed_get(pn, slot, flags \| C_KEEP);
5369	if (retval == `0`) {
5370	os_atomic_inc(&compressor_ro_uncompressed_get, relaxed);
5371	}
5372
5373	return retval;
5374	}
5375	#endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
5376
5377	dst = pmap_map_compressor_page(pn);
5378	slot_ptr = (c_slot_mapping_t)slot;
5379
5380	assert(dst != NULL);
5381
5382	if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
5383	int32_t data;
5384	int32_t *dptr;
5385
5386	/*
5387	* page was populated with a single value
5388	* that found a home in our hash table
5389	* grab that value from the hash and populate the page
5390	* that we need to populate the page with
5391	*/
5392	dptr = (int32_t *)(uintptr_t)dst;
5393	data = c_segment_sv_hash_table[slot_ptr->s_cindx].he_data;
5394	sv_decompress(ddst: dptr, pattern: data);
5395	if (!(flags & C_KEEP)) {
5396	c_segment_sv_hash_drop_ref(hash_indx: slot_ptr->s_cindx);
5397
5398	OSAddAtomic(-`1`, &c_segment_pages_compressed);
5399	*slot = `0`;
5400	}
5401	if (data) {
5402	OSAddAtomic(`1`, &c_segment_svp_nonzero_decompressions);
5403	} else {
5404	OSAddAtomic(`1`, &c_segment_svp_zero_decompressions);
5405	}
5406
5407	pmap_unmap_compressor_page(pn, dst);
5408	return `0`;
5409	}
5410
5411	retval = c_decompress_page(dst, slot_ptr, flags, zeroslot: &zeroslot);
5412
5413	/*
5414	* zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP)
5415	* or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be TRUE
5416	*/
5417	if (zeroslot) {
5418	*slot = `0`;
5419	}
5420
5421	pmap_unmap_compressor_page(pn, dst);
5422
5423	/*
5424	* returns 0 if we successfully decompressed a page from a segment already in memory
5425	* returns 1 if we had to first swap in the segment, before successfully decompressing the page
5426	* returns -1 if we encountered an error swapping in the segment - decompression failed
5427	* returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be true
5428	*/
5429	return retval;
5430	}
5431
5432	int
5433	vm_compressor_free(int *slot, vm_compressor_options_t flags)
5434	{
5435	bool slot_is_compressed = vm_compressor_is_slot_compressed(slot);
5436
5437	if (slot_is_compressed) {
5438	c_slot_mapping_t slot_ptr;
5439	int zeroslot = `1`;
5440	int retval = `0`;
5441
5442	assert(flags == `0` \|\| flags == C_DONT_BLOCK);
5443
5444	slot_ptr = (c_slot_mapping_t)slot;
5445
5446	if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
5447	c_segment_sv_hash_drop_ref(hash_indx: slot_ptr->s_cindx);
5448	OSAddAtomic(-`1`, &c_segment_pages_compressed);
5449
5450	*slot = `0`;
5451	return `0`;
5452	}
5453	retval = c_decompress_page(NULL, slot_ptr, flags, zeroslot: &zeroslot);
5454	/*
5455	* returns 0 if we successfully freed the specified compressed page
5456	* returns -1 if we encountered an error swapping in the segment - decompression failed
5457	* returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set
5458	*/
5459
5460	if (retval == `0`) {
5461	*slot = `0`;
5462	}
5463
5464	return retval;
5465	}
5466	#if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
5467	else {
5468	if ((flags & C_PAGE_UNMODIFIED) == `0`) {
5469	/ moving from uncompressed state to compressed. Free it./
5470	vm_uncompressed_free(slot, `0`);
5471	assert(*slot == `0`);
5472	}
5473	}
5474	#endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
5475	return KERN_SUCCESS;
5476	}
5477
5478	int
5479	vm_compressor_put(ppnum_t pn, int slot, void* *current_chead, char* *scratch_buf, bool unmodified)
5480	{
5481	char *src;
5482	int retval = `0`;
5483
5484	#if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
5485	if (unmodified) {
5486	if (*slot) {
5487	os_atomic_inc(&compressor_ro_uncompressed_skip_returned, relaxed);
5488	return retval;
5489	} else {
5490	retval = vm_uncompressed_put(pn, slot);
5491	if (retval == KERN_SUCCESS) {
5492	os_atomic_inc(&compressor_ro_uncompressed_put, relaxed);
5493	return retval;
5494	}
5495	}
5496	}
5497	#else /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
5498	#pragma unused(unmodified)
5499	#endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
5500
5501	src = pmap_map_compressor_page(pn);
5502	assert(src != NULL);
5503
5504	retval = c_compress_page(src, slot_ptr: (c_slot_mapping_t)slot, current_chead: (c_segment_t *)current_chead, scratch_buf);
5505	pmap_unmap_compressor_page(pn, src);
5506
5507	return retval;
5508	}
5509
5510	void
5511	vm_compressor_transfer(
5512	int *dst_slot_p,
5513	int *src_slot_p)
5514	{
5515	c_slot_mapping_t dst_slot, src_slot;
5516	c_segment_t c_seg;
5517	uint16_t c_indx;
5518	c_slot_t cs;
5519
5520	src_slot = (c_slot_mapping_t) src_slot_p;
5521
5522	if (src_slot->s_cseg == C_SV_CSEG_ID \|\| !vm_compressor_is_slot_compressed(slot: src_slot_p)) {
5523	dst_slot_p = src_slot_p;
5524	*src_slot_p = `0`;
5525	return;
5526	}
5527	dst_slot = (c_slot_mapping_t) dst_slot_p;
5528	Retry:
5529	PAGE_REPLACEMENT_DISALLOWED(TRUE);
5530	/ get segment for src_slot /
5531	c_seg = c_segments[src_slot->s_cseg - `1`].c_seg;
5532	/ lock segment /
5533	lck_mtx_lock_spin_always(lck: &c_seg->c_lock);
5534	/ wait if it's busy /
5535	if (c_seg->c_busy && !c_seg->c_busy_swapping) {
5536	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5537	c_seg_wait_on_busy(c_seg);
5538	goto Retry;
5539	}
5540	/ find the c_slot /
5541	c_indx = src_slot->s_cindx;
5542	cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
5543	/ point the c_slot back to dst_slot instead of src_slot /
5544	C_SLOT_ASSERT_PACKABLE(dst_slot);
5545	cs->c_packed_ptr = C_SLOT_PACK_PTR(dst_slot);
5546	/ transfer /
5547	dst_slot_p = src_slot_p;
5548	*src_slot_p = `0`;
5549	lck_mtx_unlock_always(&c_seg->c_lock);
5550	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5551	}
5552
5553	#if defined(__arm64__)
5554	extern clock_sec_t vm_swapfile_last_failed_to_create_ts;
5555	__attribute__((noreturn))
5556	void
5557	vm_panic_hibernate_write_image_failed(int err)
5558	{
5559	panic("hibernate_write_image encountered error 0x%x - %u, %u, %d, %d, %d, %d, %d, %d, %d, %d, %llu, %d, %d, %d\n",
5560	err,
5561	VM_PAGE_COMPRESSOR_COUNT, vm_page_wire_count,
5562	c_age_count, c_major_count, c_minor_count, (c_early_swapout_count + c_regular_swapout_count + c_late_swapout_count), c_swappedout_sparse_count,
5563	vm_num_swap_files, vm_num_pinned_swap_files, vm_swappin_enabled, vm_swap_put_failures,
5564	(vm_swapfile_last_failed_to_create_ts ? `1`:`0`), hibernate_no_swapspace, hibernate_flush_timed_out);
5565	}
5566	#endif /(__arm64__)/
5567
5568	#if CONFIG_FREEZE
5569
5570	int freezer_finished_filling = `0`;
5571
5572	void
5573	vm_compressor_finished_filling(
5574	void **current_chead)
5575	{
5576	c_segment_t c_seg;
5577
5578	if ((c_seg = (c_segment_t )current_chead) == NULL) {
5579	return;
5580	}
5581
5582	assert(c_seg->c_state == C_IS_FILLING);
5583
5584	lck_mtx_lock_spin_always(&c_seg->c_lock);
5585
5586	c_current_seg_filled(c_seg, (c_segment_t *)current_chead);
5587
5588	lck_mtx_unlock_always(&c_seg->c_lock);
5589
5590	freezer_finished_filling++;
5591	}
5592
5593
5594	/*
5595	* This routine is used to transfer the compressed chunks from
5596	* the c_seg/cindx pointed to by slot_p into a new c_seg headed
5597	* by the current_chead and a new cindx within that c_seg.
5598	*
5599	* Currently, this routine is only used by the "freezer backed by
5600	* compressor with swap" mode to create a series of c_segs that
5601	* only contain compressed data belonging to one task. So, we
5602	* move a task's previously compressed data into a set of new
5603	* c_segs which will also hold the task's yet to be compressed data.
5604	*/
5605
5606	kern_return_t
5607	vm_compressor_relocate(
5608	void **current_chead,
5609	int *slot_p)
5610	{
5611	c_slot_mapping_t slot_ptr;
5612	c_slot_mapping_t src_slot;
5613	uint32_t c_rounded_size;
5614	uint32_t c_size;
5615	uint16_t dst_slot;
5616	c_slot_t c_dst;
5617	c_slot_t c_src;
5618	uint16_t c_indx;
5619	c_segment_t c_seg_dst = NULL;
5620	c_segment_t c_seg_src = NULL;
5621	kern_return_t kr = KERN_SUCCESS;
5622
5623
5624	src_slot = (c_slot_mapping_t) slot_p;
5625
5626	if (src_slot->s_cseg == C_SV_CSEG_ID) {
5627	/*
5628	* no need to relocate... this is a page full of a single
5629	* value which is hashed to a single entry not contained
5630	* in a c_segment_t
5631	*/
5632	return kr;
5633	}
5634
5635	if (vm_compressor_is_slot_compressed((int *)src_slot) == false) {
5636	/*
5637	* Unmodified anonymous pages are sitting uncompressed on disk.
5638	* So don't pull them back in again.
5639	*/
5640	return kr;
5641	}
5642
5643	Relookup_dst:
5644	c_seg_dst = c_seg_allocate((c_segment_t *)current_chead);
5645	/*
5646	* returns with c_seg lock held
5647	* and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
5648	* c_nextslot has been allocated and
5649	* c_store.c_buffer populated
5650	*/
5651	if (c_seg_dst == NULL) {
5652	/*
5653	* Out of compression segments?
5654	*/
5655	kr = KERN_RESOURCE_SHORTAGE;
5656	goto out;
5657	}
5658
5659	assert(c_seg_dst->c_busy == `0`);
5660
5661	C_SEG_BUSY(c_seg_dst);
5662
5663	dst_slot = c_seg_dst->c_nextslot;
5664
5665	lck_mtx_unlock_always(&c_seg_dst->c_lock);
5666
5667	Relookup_src:
5668	c_seg_src = c_segments[src_slot->s_cseg - `1`].c_seg;
5669
5670	assert(c_seg_dst != c_seg_src);
5671
5672	lck_mtx_lock_spin_always(&c_seg_src->c_lock);
5673
5674	if (C_SEG_IS_ON_DISK_OR_SOQ(c_seg_src) \|\|
5675	c_seg_src->c_state == C_IS_FILLING) {
5676	/*
5677	* Skip this page if :-
5678	* a) the src c_seg is already on-disk (or on its way there)
5679	* A "thaw" can mark a process as eligible for
5680	* another freeze cycle without bringing any of
5681	* its swapped out c_segs back from disk (because
5682	* that is done on-demand).
5683	* Or, this page may be mapped elsewhere in the task's map,
5684	* and we may have marked it for swap already.
5685	*
5686	* b) Or, the src c_seg is being filled by the compressor
5687	* thread. We don't want the added latency of waiting for
5688	* this c_seg in the freeze path and so we skip it.
5689	*/
5690
5691	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5692
5693	lck_mtx_unlock_always(&c_seg_src->c_lock);
5694
5695	c_seg_src = NULL;
5696
5697	goto out;
5698	}
5699
5700	if (c_seg_src->c_busy) {
5701	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5702	c_seg_wait_on_busy(c_seg_src);
5703
5704	c_seg_src = NULL;
5705
5706	PAGE_REPLACEMENT_DISALLOWED(TRUE);
5707
5708	goto Relookup_src;
5709	}
5710
5711	C_SEG_BUSY(c_seg_src);
5712
5713	lck_mtx_unlock_always(&c_seg_src->c_lock);
5714
5715	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5716
5717	/ find the c_slot /
5718	c_indx = src_slot->s_cindx;
5719
5720	c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, c_indx);
5721
5722	c_size = UNPACK_C_SIZE(c_src);
5723
5724	assert(c_size);
5725
5726	if (c_size > (uint32_t)(c_seg_bufsize - C_SEG_OFFSET_TO_BYTES((int32_t)c_seg_dst->c_nextoffset))) {
5727	/*
5728	* This segment is full. We need a new one.
5729	*/
5730
5731	PAGE_REPLACEMENT_DISALLOWED(TRUE);
5732
5733	lck_mtx_lock_spin_always(&c_seg_src->c_lock);
5734	C_SEG_WAKEUP_DONE(c_seg_src);
5735	lck_mtx_unlock_always(&c_seg_src->c_lock);
5736
5737	c_seg_src = NULL;
5738
5739	lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
5740
5741	assert(c_seg_dst->c_busy);
5742	assert(c_seg_dst->c_state == C_IS_FILLING);
5743	assert(!c_seg_dst->c_on_minorcompact_q);
5744
5745	c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
5746	assert(*current_chead == NULL);
5747
5748	C_SEG_WAKEUP_DONE(c_seg_dst);
5749
5750	lck_mtx_unlock_always(&c_seg_dst->c_lock);
5751
5752	c_seg_dst = NULL;
5753
5754	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5755
5756	goto Relookup_dst;
5757	}
5758
5759	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
5760
5761	memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
5762	/*
5763	* Is platform alignment actually necessary since wkdm aligns its output?
5764	*/
5765	c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
5766
5767	cslot_copy(c_dst, c_src);
5768	c_dst->c_offset = c_seg_dst->c_nextoffset;
5769
5770	if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) {
5771	c_seg_dst->c_firstemptyslot++;
5772	}
5773
5774	c_seg_dst->c_slots_used++;
5775	c_seg_dst->c_nextslot++;
5776	c_seg_dst->c_bytes_used += c_rounded_size;
5777	c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
5778
5779
5780	PACK_C_SIZE(c_src, `0`);
5781
5782	c_seg_src->c_bytes_used -= c_rounded_size;
5783	c_seg_src->c_bytes_unused += c_rounded_size;
5784
5785	assert(c_seg_src->c_slots_used);
5786	c_seg_src->c_slots_used--;
5787
5788	if (!c_seg_src->c_swappedin) {
5789	/ Pessimistically lose swappedin status when non-swappedin pages are added. /
5790	c_seg_dst->c_swappedin = false;
5791	}
5792
5793	if (c_indx < c_seg_src->c_firstemptyslot) {
5794	c_seg_src->c_firstemptyslot = c_indx;
5795	}
5796
5797	c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
5798
5799	PAGE_REPLACEMENT_ALLOWED(TRUE);
5800	slot_ptr = C_SLOT_UNPACK_PTR(c_dst);
5801	/ <csegno=0,indx=0> would mean "empty slot", so use csegno+1 /
5802	slot_ptr->s_cseg = c_seg_dst->c_mysegno + `1`;
5803	slot_ptr->s_cindx = dst_slot;
5804
5805	PAGE_REPLACEMENT_ALLOWED(FALSE);
5806
5807	out:
5808	if (c_seg_src) {
5809	lck_mtx_lock_spin_always(&c_seg_src->c_lock);
5810
5811	C_SEG_WAKEUP_DONE(c_seg_src);
5812
5813	if (c_seg_src->c_bytes_used == `0` && c_seg_src->c_state != C_IS_FILLING) {
5814	if (!c_seg_src->c_on_minorcompact_q) {
5815	c_seg_need_delayed_compaction(c_seg_src, FALSE);
5816	}
5817	}
5818
5819	lck_mtx_unlock_always(&c_seg_src->c_lock);
5820	}
5821
5822	if (c_seg_dst) {
5823	PAGE_REPLACEMENT_DISALLOWED(TRUE);
5824
5825	lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
5826
5827	if (c_seg_dst->c_nextoffset >= c_seg_off_limit \|\| c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
5828	/*
5829	* Nearing or exceeded maximum slot and offset capacity.
5830	*/
5831	assert(c_seg_dst->c_busy);
5832	assert(c_seg_dst->c_state == C_IS_FILLING);
5833	assert(!c_seg_dst->c_on_minorcompact_q);
5834
5835	c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
5836	assert(*current_chead == NULL);
5837	}
5838
5839	C_SEG_WAKEUP_DONE(c_seg_dst);
5840
5841	lck_mtx_unlock_always(&c_seg_dst->c_lock);
5842
5843	c_seg_dst = NULL;
5844
5845	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5846	}
5847
5848	return kr;
5849	}
5850	#endif /* CONFIG_FREEZE */
5851
5852	#if DEVELOPMENT \|\| DEBUG
5853
5854	void
5855	vm_compressor_inject_error(int *slot)
5856	{
5857	c_slot_mapping_t slot_ptr = (c_slot_mapping_t)slot;
5858
5859	/ No error detection for single-value compression. /
5860	if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
5861	printf("%s(): cannot inject errors in SV-compressed pages\n", __func__ );
5862	return;
5863	}
5864
5865	/ s_cseg is actually "segno+1" /
5866	const uint32_t c_segno = slot_ptr->s_cseg - `1`;
5867
5868	assert(c_segno < c_segments_available);
5869	assert(c_segments[c_segno].c_segno >= c_segments_available);
5870
5871	const c_segment_t c_seg = c_segments[c_segno].c_seg;
5872
5873	PAGE_REPLACEMENT_DISALLOWED(TRUE);
5874
5875	lck_mtx_lock_spin_always(&c_seg->c_lock);
5876	assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE);
5877
5878	const uint16_t c_indx = slot_ptr->s_cindx;
5879	assert(c_indx < c_seg->c_nextslot);
5880
5881	/*
5882	* To safely make this segment temporarily writable, we need to mark
5883	* the segment busy, which allows us to release the segment lock.
5884	*/
5885	while (c_seg->c_busy) {
5886	c_seg_wait_on_busy(c_seg);
5887	lck_mtx_lock_spin_always(&c_seg->c_lock);
5888	}
5889	C_SEG_BUSY(c_seg);
5890
5891	bool already_writable = (c_seg->c_state == C_IS_FILLING);
5892	if (!already_writable) {
5893	/*
5894	* Protection update must be performed preemptibly, so temporarily drop
5895	* the lock. Having set c_busy will prevent most other concurrent
5896	* operations.
5897	*/
5898	lck_mtx_unlock_always(&c_seg->c_lock);
5899	C_SEG_MAKE_WRITEABLE(c_seg);
5900	lck_mtx_lock_spin_always(&c_seg->c_lock);
5901	}
5902
5903	/*
5904	* Once we've released the lock following our c_state == C_IS_FILLING check,
5905	* c_current_seg_filled() can (re-)write-protect the segment. However, it
5906	* will transition from C_IS_FILLING before releasing the c_seg lock, so we
5907	* can detect this by re-checking after we've reobtained the lock.
5908	*/
5909	if (already_writable && c_seg->c_state != C_IS_FILLING) {
5910	lck_mtx_unlock_always(&c_seg->c_lock);
5911	C_SEG_MAKE_WRITEABLE(c_seg);
5912	lck_mtx_lock_spin_always(&c_seg->c_lock);
5913	already_writable = false;
5914	/ Segment can't be freed while c_busy is set. /
5915	assert(c_seg->c_state != C_IS_FILLING);
5916	}
5917
5918	/*
5919	* Skip if the segment is on disk. This check can only be performed after
5920	* the final acquisition of the segment lock before we attempt to write to
5921	* the segment.
5922	*/
5923	if (!C_SEG_IS_ON_DISK_OR_SOQ(c_seg)) {
5924	c_slot_t cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
5925	int32_t *data = &c_seg->c_store.c_buffer[cs->c_offset];
5926	/ assume that the compressed data holds at least one int32_t /
5927	assert(UNPACK_C_SIZE(cs) > sizeof(*data));
5928	/*
5929	* This bit is known to be in the payload of a MISS packet resulting from
5930	* the pattern used in the test pattern from decompression_failure.c.
5931	* Flipping it should result in many corrupted bits in the test page.
5932	*/
5933	data[`0`] ^= `0x00000100`;
5934	}
5935
5936	if (!already_writable) {
5937	lck_mtx_unlock_always(&c_seg->c_lock);
5938	C_SEG_WRITE_PROTECT(c_seg);
5939	lck_mtx_lock_spin_always(&c_seg->c_lock);
5940	}
5941
5942	C_SEG_WAKEUP_DONE(c_seg);
5943	lck_mtx_unlock_always(&c_seg->c_lock);
5944
5945	PAGE_REPLACEMENT_DISALLOWED(FALSE);
5946	}
5947
5948	#endif /* DEVELOPMENT \|\| DEBUG */
5949
5950
5951	#if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
5952
5953	struct vnode;
5954	extern void vm_swapfile_open(const char path, struct* vnode **vp);
5955	extern int vm_swapfile_preallocate(struct vnode vp, uint64_t size, boolean_t *pin);
5956
5957	struct vnode *uncompressed_vp0 = NULL;
5958	struct vnode *uncompressed_vp1 = NULL;
5959	uint32_t uncompressed_file0_free_pages = `0`, uncompressed_file1_free_pages = `0`;
5960	uint64_t uncompressed_file0_free_offset = `0`, uncompressed_file1_free_offset = `0`;
5961
5962	uint64_t compressor_ro_uncompressed = `0`;
5963	uint64_t compressor_ro_uncompressed_total_returned = `0`;
5964	uint64_t compressor_ro_uncompressed_skip_returned = `0`;
5965	uint64_t compressor_ro_uncompressed_get = `0`;
5966	uint64_t compressor_ro_uncompressed_put = `0`;
5967	uint64_t compressor_ro_uncompressed_swap_usage = `0`;
5968
5969	extern void vnode_put(struct vnode* vp);
5970	extern int vnode_getwithref(struct vnode* vp);
5971	extern int vm_swapfile_io(struct vnode vp, uint64_t offset, uint64_t start, int* npages, int flags, void *upl_ctx);
5972
5973	#define MAX_OFFSET_PAGES (255)
5974	uint64_t uncompressed_file0_space_bitmap[MAX_OFFSET_PAGES];
5975	uint64_t uncompressed_file1_space_bitmap[MAX_OFFSET_PAGES];
5976
5977	#define UNCOMPRESSED_FILEIDX_OFFSET_MASK (((uint32_t)1<<31ull) - 1)
5978	#define UNCOMPRESSED_FILEIDX_SHIFT (29)
5979	#define UNCOMPRESSED_FILEIDX_MASK (3)
5980	#define UNCOMPRESSED_OFFSET_SHIFT (29)
5981	#define UNCOMPRESSED_OFFSET_MASK (7)
5982
5983	static uint32_t
5984	vm_uncompressed_extract_swap_file(int slot)
5985	{
5986	uint32_t fileidx = (((uint32_t)slot & UNCOMPRESSED_FILEIDX_OFFSET_MASK) >> UNCOMPRESSED_FILEIDX_SHIFT) & UNCOMPRESSED_FILEIDX_MASK;
5987	return fileidx;
5988	}
5989
5990	static uint32_t
5991	vm_uncompressed_extract_swap_offset(int slot)
5992	{
5993	return slot & (uint32_t)(~(UNCOMPRESSED_OFFSET_MASK << UNCOMPRESSED_OFFSET_SHIFT));
5994	}
5995
5996	static void
5997	vm_uncompressed_return_space_to_swap(int slot)
5998	{
5999	PAGE_REPLACEMENT_ALLOWED(TRUE);
6000	uint32_t fileidx = vm_uncompressed_extract_swap_file(slot);
6001	if (fileidx == `1`) {
6002	uint32_t free_offset = vm_uncompressed_extract_swap_offset(slot);
6003	uint64_t pgidx = free_offset / PAGE_SIZE_64;
6004	uint64_t chunkidx = pgidx / `64`;
6005	uint64_t chunkoffset = pgidx % `64`;
6006	#if DEVELOPMENT \|\| DEBUG
6007	uint64_t vaddr = (uint64_t)&uncompressed_file0_space_bitmap[chunkidx];
6008	uint64_t maxvaddr = (uint64_t)&uncompressed_file0_space_bitmap[MAX_OFFSET_PAGES];
6009	assertf(vaddr < maxvaddr, "0x%llx 0x%llx", vaddr, maxvaddr);
6010	#endif /DEVELOPMENT \|\| DEBUG/
6011	assertf((uncompressed_file0_space_bitmap[chunkidx] & ((uint64_t)`1` << chunkoffset)),
6012	"0x%x %llu %llu", slot, chunkidx, chunkoffset);
6013	uncompressed_file0_space_bitmap[chunkidx] &= ~((uint64_t)`1` << chunkoffset);
6014	assertf(!(uncompressed_file0_space_bitmap[chunkidx] & ((uint64_t)`1` << chunkoffset)),
6015	"0x%x %llu %llu", slot, chunkidx, chunkoffset);
6016
6017	uncompressed_file0_free_pages++;
6018	} else {
6019	uint32_t free_offset = vm_uncompressed_extract_swap_offset(slot);
6020	uint64_t pgidx = free_offset / PAGE_SIZE_64;
6021	uint64_t chunkidx = pgidx / `64`;
6022	uint64_t chunkoffset = pgidx % `64`;
6023	assertf((uncompressed_file1_space_bitmap[chunkidx] & ((uint64_t)`1` << chunkoffset)),
6024	"%llu %llu", chunkidx, chunkoffset);
6025	uncompressed_file1_space_bitmap[chunkidx] &= ~((uint64_t)`1` << chunkoffset);
6026
6027	uncompressed_file1_free_pages++;
6028	}
6029	compressor_ro_uncompressed_swap_usage--;
6030	PAGE_REPLACEMENT_ALLOWED(FALSE);
6031	}
6032
6033	static int
6034	vm_uncompressed_reserve_space_in_swap()
6035	{
6036	int slot = `0`;
6037	if (uncompressed_file0_free_pages == `0` && uncompressed_file1_free_pages == `0`) {
6038	return -`1`;
6039	}
6040
6041	PAGE_REPLACEMENT_ALLOWED(TRUE);
6042	if (uncompressed_file0_free_pages) {
6043	uint64_t chunkidx = `0`;
6044	uint64_t chunkoffset = `0`;
6045	while (uncompressed_file0_space_bitmap[chunkidx] == `0xffffffffffffffff`) {
6046	chunkidx++;
6047	}
6048	while (uncompressed_file0_space_bitmap[chunkidx] & ((uint64_t)`1` << chunkoffset)) {
6049	chunkoffset++;
6050	}
6051
6052	assertf((uncompressed_file0_space_bitmap[chunkidx] & ((uint64_t)`1` << chunkoffset)) == `0`,
6053	"%llu %llu", chunkidx, chunkoffset);
6054	#if DEVELOPMENT \|\| DEBUG
6055	uint64_t vaddr = (uint64_t)&uncompressed_file0_space_bitmap[chunkidx];
6056	uint64_t maxvaddr = (uint64_t)&uncompressed_file0_space_bitmap[MAX_OFFSET_PAGES];
6057	assertf(vaddr < maxvaddr, "0x%llx 0x%llx", vaddr, maxvaddr);
6058	#endif /DEVELOPMENT \|\| DEBUG/
6059	uncompressed_file0_space_bitmap[chunkidx] \|= ((uint64_t)`1` << chunkoffset);
6060	uncompressed_file0_free_offset = ((chunkidx * `64`) + chunkoffset) * PAGE_SIZE_64;
6061	assertf((uncompressed_file0_space_bitmap[chunkidx] & ((uint64_t)`1` << chunkoffset)),
6062	"%llu %llu", chunkidx, chunkoffset);
6063
6064	assert(uncompressed_file0_free_offset <= (`1` << UNCOMPRESSED_OFFSET_SHIFT));
6065	slot = (int)((`1` << UNCOMPRESSED_FILEIDX_SHIFT) + uncompressed_file0_free_offset);
6066	uncompressed_file0_free_pages--;
6067	} else {
6068	uint64_t chunkidx = `0`;
6069	uint64_t chunkoffset = `0`;
6070	while (uncompressed_file1_space_bitmap[chunkidx] == `0xFFFFFFFFFFFFFFFF`) {
6071	chunkidx++;
6072	}
6073	while (uncompressed_file1_space_bitmap[chunkidx] & ((uint64_t)`1` << chunkoffset)) {
6074	chunkoffset++;
6075	}
6076	assert((uncompressed_file1_space_bitmap[chunkidx] & ((uint64_t)`1` << chunkoffset)) == `0`);
6077	uncompressed_file1_space_bitmap[chunkidx] \|= ((uint64_t)`1` << chunkoffset);
6078	uncompressed_file1_free_offset = ((chunkidx * `64`) + chunkoffset) * PAGE_SIZE_64;
6079	slot = (int)((`2` << UNCOMPRESSED_FILEIDX_SHIFT) + uncompressed_file1_free_offset);
6080	uncompressed_file1_free_pages--;
6081	}
6082	compressor_ro_uncompressed_swap_usage++;
6083	PAGE_REPLACEMENT_ALLOWED(FALSE);
6084	return slot;
6085	}
6086
6087	#define MAX_IO_REQ (16)
6088	struct _uncompressor_io_req {
6089	uint64_t addr;
6090	bool inuse;
6091	} uncompressor_io_req[MAX_IO_REQ];
6092
6093	int
6094	vm_uncompressed_put(ppnum_t pn, int *slot)
6095	{
6096	int retval = `0`;
6097	struct vnode *uncompressed_vp = NULL;
6098	uint64_t uncompress_offset = `0`;
6099
6100	again:
6101	if (uncompressed_vp0 == NULL) {
6102	PAGE_REPLACEMENT_ALLOWED(TRUE);
6103	if (uncompressed_vp0 == NULL) {
6104	uint64_t size = (MAX_OFFSET_PAGES * `1024` * `1024ULL`);
6105	vm_swapfile_open("/private/var/vm/uncompressedswap0", &uncompressed_vp0);
6106	if (uncompressed_vp0 == NULL) {
6107	PAGE_REPLACEMENT_ALLOWED(FALSE);
6108	return KERN_NO_ACCESS;
6109	}
6110	vm_swapfile_preallocate(uncompressed_vp0, &size, NULL);
6111	uncompressed_file0_free_pages = (uint32_t)atop(size);
6112	bzero(uncompressed_file0_space_bitmap, sizeof(uint64_t) * MAX_OFFSET_PAGES);
6113
6114	int i = `0`;
6115	for (; i < MAX_IO_REQ; i++) {
6116	kmem_alloc(kernel_map, (vm_offset_t*)&uncompressor_io_req[i].addr, PAGE_SIZE_64, KMA_NOFAIL \| KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
6117	uncompressor_io_req[i].inuse = false;
6118	}
6119
6120	vm_swapfile_open("/private/var/vm/uncompressedswap1", &uncompressed_vp1);
6121	assert(uncompressed_vp1);
6122	vm_swapfile_preallocate(uncompressed_vp1, &size, NULL);
6123	uncompressed_file1_free_pages = (uint32_t)atop(size);
6124	bzero(uncompressed_file1_space_bitmap, sizeof(uint64_t) * MAX_OFFSET_PAGES);
6125	PAGE_REPLACEMENT_ALLOWED(FALSE);
6126	} else {
6127	PAGE_REPLACEMENT_ALLOWED(FALSE);
6128	delay(`100`);
6129	goto again;
6130	}
6131	}
6132
6133	int swapinfo = vm_uncompressed_reserve_space_in_swap();
6134	if (swapinfo == -`1`) {
6135	*slot = `0`;
6136	return KERN_RESOURCE_SHORTAGE;
6137	}
6138
6139	if (vm_uncompressed_extract_swap_file(swapinfo) == `1`) {
6140	uncompressed_vp = uncompressed_vp0;
6141	} else {
6142	uncompressed_vp = uncompressed_vp1;
6143	}
6144	uncompress_offset = vm_uncompressed_extract_swap_offset(swapinfo);
6145	if ((retval = vnode_getwithref(uncompressed_vp)) != `0`) {
6146	os_log_error_with_startup_serial(OS_LOG_DEFAULT, "vm_uncompressed_put: vnode_getwithref on swapfile failed with %d\n", retval);
6147	} else {
6148	int i = `0`;
6149	retry:
6150	PAGE_REPLACEMENT_ALLOWED(TRUE);
6151	for (i = `0`; i < MAX_IO_REQ; i++) {
6152	if (uncompressor_io_req[i].inuse == false) {
6153	uncompressor_io_req[i].inuse = true;
6154	break;
6155	}
6156	}
6157	if (i == MAX_IO_REQ) {
6158	assert_wait((event_t)&uncompressor_io_req, THREAD_UNINT);
6159	PAGE_REPLACEMENT_ALLOWED(FALSE);
6160	thread_block(THREAD_CONTINUE_NULL);
6161	goto retry;
6162	}
6163	PAGE_REPLACEMENT_ALLOWED(FALSE);
6164	void *addr = pmap_map_compressor_page(pn);
6165	memcpy((void*)uncompressor_io_req[i].addr, addr, PAGE_SIZE_64);
6166	pmap_unmap_compressor_page(pn, addr);
6167
6168	retval = vm_swapfile_io(uncompressed_vp, uncompress_offset, (uint64_t)uncompressor_io_req[i].addr, `1`, SWAP_WRITE, NULL);
6169	if (retval) {
6170	*slot = `0`;
6171	} else {
6172	slot = (int*)swapinfo;
6173	((c_slot_mapping_t)(slot))->s_uncompressed = `1`;
6174	}
6175	vnode_put(uncompressed_vp);
6176	PAGE_REPLACEMENT_ALLOWED(TRUE);
6177	uncompressor_io_req[i].inuse = false;
6178	thread_wakeup((event_t)&uncompressor_io_req);
6179	PAGE_REPLACEMENT_ALLOWED(FALSE);
6180	}
6181	return retval;
6182	}
6183
6184	int
6185	vm_uncompressed_get(ppnum_t pn, int *slot, __unused vm_compressor_options_t flags)
6186	{
6187	int retval = `0`;
6188	struct vnode *uncompressed_vp = NULL;
6189	uint32_t fileidx = vm_uncompressed_extract_swap_file(*slot);
6190	uint64_t uncompress_offset = vm_uncompressed_extract_swap_offset(*slot);
6191
6192	if (__improbable(flags & C_KDP)) {
6193	return -`2`;
6194	}
6195
6196	if (fileidx == `1`) {
6197	uncompressed_vp = uncompressed_vp0;
6198	} else {
6199	uncompressed_vp = uncompressed_vp1;
6200	}
6201
6202	if ((retval = vnode_getwithref(uncompressed_vp)) != `0`) {
6203	os_log_error_with_startup_serial(OS_LOG_DEFAULT, "vm_uncompressed_put: vnode_getwithref on swapfile failed with %d\n", retval);
6204	} else {
6205	int i = `0`;
6206	retry:
6207	PAGE_REPLACEMENT_ALLOWED(TRUE);
6208	for (i = `0`; i < MAX_IO_REQ; i++) {
6209	if (uncompressor_io_req[i].inuse == false) {
6210	uncompressor_io_req[i].inuse = true;
6211	break;
6212	}
6213	}
6214	if (i == MAX_IO_REQ) {
6215	assert_wait((event_t)&uncompressor_io_req, THREAD_UNINT);
6216	PAGE_REPLACEMENT_ALLOWED(FALSE);
6217	thread_block(THREAD_CONTINUE_NULL);
6218	goto retry;
6219	}
6220	PAGE_REPLACEMENT_ALLOWED(FALSE);
6221	retval = vm_swapfile_io(uncompressed_vp, uncompress_offset, (uint64_t)uncompressor_io_req[i].addr, `1`, SWAP_READ, NULL);
6222	vnode_put(uncompressed_vp);
6223	void *addr = pmap_map_compressor_page(pn);
6224	memcpy(addr, (void*)uncompressor_io_req[i].addr, PAGE_SIZE_64);
6225	pmap_unmap_compressor_page(pn, addr);
6226	PAGE_REPLACEMENT_ALLOWED(TRUE);
6227	uncompressor_io_req[i].inuse = false;
6228	thread_wakeup((event_t)&uncompressor_io_req);
6229	PAGE_REPLACEMENT_ALLOWED(FALSE);
6230	}
6231	return retval;
6232	}
6233
6234	int
6235	vm_uncompressed_free(int *slot, __unused vm_compressor_options_t flags)
6236	{
6237	vm_uncompressed_return_space_to_swap(*slot);
6238	*slot = `0`;
6239	return `0`;
6240	}
6241
6242	#endif /CONFIG_TRACK_UNMODIFIED_ANON_PAGES/
6243

Browse the source code of xnu/osfmk/vm/vm_compressor.c