1/*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <kern/kalloc.h>
30#include <vm/vm_compressor_pager.h>
31#include <vm/vm_kern.h>
32#include <vm/vm_page.h>
33#include <vm/vm_protos.h>
34#include <vm/WKdm_new.h>
35#include <vm/vm_object.h>
36#include <vm/vm_map.h>
37#include <machine/pmap.h>
38#include <kern/locks.h>
39
40#include <sys/kdebug.h>
41
42#if defined(__arm64__)
43#include <arm/proc_reg.h>
44#endif
45
46#define C_SEG_OFFSET_BITS 16
47#define C_SEG_BUFSIZE (1024 * 256)
48#define C_SEG_MAX_PAGES (C_SEG_BUFSIZE / PAGE_SIZE)
49
50#if CONFIG_EMBEDDED
51#define C_SEG_OFF_LIMIT (C_SEG_BYTES_TO_OFFSET((C_SEG_BUFSIZE - 512)))
52#define C_SEG_ALLOCSIZE (C_SEG_BUFSIZE + PAGE_SIZE)
53#else
54#define C_SEG_OFF_LIMIT (C_SEG_BYTES_TO_OFFSET((C_SEG_BUFSIZE - 128)))
55#define C_SEG_ALLOCSIZE (C_SEG_BUFSIZE)
56#endif
57#define C_SEG_MAX_POPULATE_SIZE (4 * PAGE_SIZE)
58
59#if defined(__arm64__)
60
61#if DEVELOPMENT || DEBUG
62
63#if defined(PLATFORM_WatchOS)
64#define VALIDATE_C_SEGMENTS (1)
65#endif
66#endif
67
68#endif
69
70
71#if DEBUG || COMPRESSOR_INTEGRITY_CHECKS
72#define ENABLE_SWAP_CHECKS 1
73#define ENABLE_COMPRESSOR_CHECKS 1
74#define POPCOUNT_THE_COMPRESSED_DATA (1)
75#else
76#define ENABLE_SWAP_CHECKS 0
77#define ENABLE_COMPRESSOR_CHECKS 0
78#endif
79
80#define CHECKSUM_THE_SWAP ENABLE_SWAP_CHECKS /* Debug swap data */
81#define CHECKSUM_THE_DATA ENABLE_COMPRESSOR_CHECKS /* Debug compressor/decompressor data */
82#define CHECKSUM_THE_COMPRESSED_DATA ENABLE_COMPRESSOR_CHECKS /* Debug compressor/decompressor compressed data */
83
84#ifndef VALIDATE_C_SEGMENTS
85#define VALIDATE_C_SEGMENTS ENABLE_COMPRESSOR_CHECKS /* Debug compaction */
86#endif
87
88#define RECORD_THE_COMPRESSED_DATA 0
89
90struct c_slot {
91 uint64_t c_offset:C_SEG_OFFSET_BITS,
92#if defined(__arm64__)
93 c_size:14,
94 c_codec:1,
95 c_packed_ptr:33;
96#elif defined(__arm__)
97 c_size:12,
98 c_codec:1,
99 c_packed_ptr:35;
100#else
101 c_size:12,
102 c_packed_ptr:36;
103#endif
104#if CHECKSUM_THE_DATA
105 unsigned int c_hash_data;
106#endif
107#if CHECKSUM_THE_COMPRESSED_DATA
108 unsigned int c_hash_compressed_data;
109#endif
110#if POPCOUNT_THE_COMPRESSED_DATA
111 unsigned int c_pop_cdata;
112#endif
113};
114
115#define C_IS_EMPTY 0
116#define C_IS_FREE 1
117#define C_IS_FILLING 2
118#define C_ON_AGE_Q 3
119#define C_ON_SWAPOUT_Q 4
120#define C_ON_SWAPPEDOUT_Q 5
121#define C_ON_SWAPPEDOUTSPARSE_Q 6
122#define C_ON_SWAPPEDIN_Q 7
123#define C_ON_MAJORCOMPACT_Q 8
124#define C_ON_BAD_Q 9
125#define C_ON_SWAPIO_Q 10
126
127
128struct c_segment {
129 lck_mtx_t c_lock;
130 queue_chain_t c_age_list;
131 queue_chain_t c_list;
132
133#define C_SEG_MAX_LIMIT (1 << 20) /* this needs to track the size of c_mysegno */
134 uint32_t c_mysegno:20,
135 c_busy:1,
136 c_busy_swapping:1,
137 c_wanted:1,
138 c_on_minorcompact_q:1, /* can also be on the age_q, the majorcompact_q or the swappedin_q */
139
140 c_state:4, /* what state is the segment in which dictates which q to find it on */
141 c_overage_swap:1,
142 c_reserved:3;
143
144 uint32_t c_creation_ts;
145 uint64_t c_generation_id;
146
147 int32_t c_bytes_used;
148 int32_t c_bytes_unused;
149 uint32_t c_slots_used;
150
151 uint16_t c_firstemptyslot;
152 uint16_t c_nextslot;
153 uint32_t c_nextoffset;
154 uint32_t c_populated_offset;
155
156 uint32_t c_swappedin_ts;
157
158 union {
159 int32_t *c_buffer;
160 uint64_t c_swap_handle;
161 } c_store;
162
163#if VALIDATE_C_SEGMENTS
164 uint32_t c_was_minor_compacted;
165 uint32_t c_was_major_compacted;
166 uint32_t c_was_major_donor;
167#endif
168#if CHECKSUM_THE_SWAP
169 unsigned int cseg_hash;
170 unsigned int cseg_swap_size;
171#endif /* CHECKSUM_THE_SWAP */
172
173#if MACH_ASSERT
174 thread_t c_busy_for_thread;
175#endif /* MACH_ASSERT */
176
177 int c_slot_var_array_len;
178 struct c_slot *c_slot_var_array;
179 struct c_slot c_slot_fixed_array[0];
180};
181
182
183struct c_slot_mapping {
184 uint32_t s_cseg:22, /* segment number + 1 */
185 s_cindx:10; /* index in the segment */
186};
187#define C_SLOT_MAX_INDEX (1 << 10)
188
189typedef struct c_slot_mapping *c_slot_mapping_t;
190
191
192#define C_SEG_SLOT_VAR_ARRAY_MIN_LEN C_SEG_MAX_PAGES
193
194extern int c_seg_fixed_array_len;
195extern vm_offset_t c_buffers;
196#define C_SEG_BUFFER_ADDRESS(c_segno) ((c_buffers + ((uint64_t)c_segno * (uint64_t)C_SEG_ALLOCSIZE)))
197
198#define C_SEG_SLOT_FROM_INDEX(cseg, index) (index < c_seg_fixed_array_len ? &(cseg->c_slot_fixed_array[index]) : &(cseg->c_slot_var_array[index - c_seg_fixed_array_len]))
199
200#define C_SEG_OFFSET_TO_BYTES(off) ((off) * (int) sizeof(int32_t))
201#define C_SEG_BYTES_TO_OFFSET(bytes) ((bytes) / (int) sizeof(int32_t))
202
203#define C_SEG_UNUSED_BYTES(cseg) (cseg->c_bytes_unused + (C_SEG_OFFSET_TO_BYTES(cseg->c_populated_offset - cseg->c_nextoffset)))
204//todo opensource
205
206#ifndef __PLATFORM_WKDM_ALIGNMENT_MASK__
207#define C_SEG_OFFSET_ALIGNMENT_MASK 0x3ULL
208#define C_SEG_OFFSET_ALIGNMENT_BOUNDARY 0x4
209#else
210#define C_SEG_OFFSET_ALIGNMENT_MASK __PLATFORM_WKDM_ALIGNMENT_MASK__
211#define C_SEG_OFFSET_ALIGNMENT_BOUNDARY __PLATFORM_WKDM_ALIGNMENT_BOUNDARY__
212#endif
213
214#define C_SEG_SHOULD_MINORCOMPACT_NOW(cseg) ((C_SEG_UNUSED_BYTES(cseg) >= (C_SEG_BUFSIZE / 4)) ? 1 : 0)
215
216/*
217 * the decsion to force a c_seg to be major compacted is based on 2 criteria
218 * 1) is the c_seg buffer almost empty (i.e. we have a chance to merge it with another c_seg)
219 * 2) are there at least a minimum number of slots unoccupied so that we have a chance
220 * of combining this c_seg with another one.
221 */
222#define C_SEG_SHOULD_MAJORCOMPACT_NOW(cseg) \
223 ((((cseg->c_bytes_unused + (C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset))) >= (C_SEG_BUFSIZE / 8)) && \
224 ((C_SLOT_MAX_INDEX - cseg->c_slots_used) > (C_SEG_BUFSIZE / PAGE_SIZE))) \
225 ? 1 : 0)
226
227#define C_SEG_ONDISK_IS_SPARSE(cseg) ((cseg->c_bytes_used < cseg->c_bytes_unused) ? 1 : 0)
228#define C_SEG_IS_ONDISK(cseg) ((cseg->c_state == C_ON_SWAPPEDOUT_Q || cseg->c_state == C_ON_SWAPPEDOUTSPARSE_Q))
229#define C_SEG_IS_ON_DISK_OR_SOQ(cseg) ((cseg->c_state == C_ON_SWAPPEDOUT_Q || \
230 cseg->c_state == C_ON_SWAPPEDOUTSPARSE_Q || \
231 cseg->c_state == C_ON_SWAPOUT_Q || \
232 cseg->c_state == C_ON_SWAPIO_Q))
233
234
235#define C_SEG_WAKEUP_DONE(cseg) \
236 MACRO_BEGIN \
237 assert((cseg)->c_busy); \
238 (cseg)->c_busy = 0; \
239 assert((cseg)->c_busy_for_thread != NULL); \
240 assert((((cseg)->c_busy_for_thread = NULL), TRUE)); \
241 if ((cseg)->c_wanted) { \
242 (cseg)->c_wanted = 0; \
243 thread_wakeup((event_t) (cseg)); \
244 } \
245 MACRO_END
246
247#define C_SEG_BUSY(cseg) \
248 MACRO_BEGIN \
249 assert((cseg)->c_busy == 0); \
250 (cseg)->c_busy = 1; \
251 assert((cseg)->c_busy_for_thread == NULL); \
252 assert((((cseg)->c_busy_for_thread = current_thread()), TRUE)); \
253 MACRO_END
254
255
256extern vm_map_t compressor_map;
257
258#if DEVELOPMENT || DEBUG
259extern boolean_t write_protect_c_segs;
260extern int vm_compressor_test_seg_wp;
261
262#define C_SEG_MAKE_WRITEABLE(cseg) \
263 MACRO_BEGIN \
264 if (write_protect_c_segs) { \
265 vm_map_protect(compressor_map, \
266 (vm_map_offset_t)cseg->c_store.c_buffer, \
267 (vm_map_offset_t)&cseg->c_store.c_buffer[C_SEG_BYTES_TO_OFFSET(C_SEG_ALLOCSIZE)],\
268 VM_PROT_READ | VM_PROT_WRITE, \
269 0); \
270 } \
271 MACRO_END
272
273#define C_SEG_WRITE_PROTECT(cseg) \
274 MACRO_BEGIN \
275 if (write_protect_c_segs) { \
276 vm_map_protect(compressor_map, \
277 (vm_map_offset_t)cseg->c_store.c_buffer, \
278 (vm_map_offset_t)&cseg->c_store.c_buffer[C_SEG_BYTES_TO_OFFSET(C_SEG_ALLOCSIZE)],\
279 VM_PROT_READ, \
280 0); \
281 } \
282 if (vm_compressor_test_seg_wp) { \
283 volatile uint32_t vmtstmp = *(volatile uint32_t *)cseg->c_store.c_buffer; \
284 *(volatile uint32_t *)cseg->c_store.c_buffer = 0xDEADABCD; \
285 (void) vmtstmp; \
286 } \
287 MACRO_END
288#endif
289
290typedef struct c_segment *c_segment_t;
291typedef struct c_slot *c_slot_t;
292
293uint64_t vm_compressor_total_compressions(void);
294void vm_wake_compactor_swapper(void);
295void vm_run_compactor(void);
296void vm_thrashing_jetsam_done(void);
297void vm_consider_waking_compactor_swapper(void);
298void vm_consider_swapping(void);
299void vm_compressor_flush(void);
300void c_seg_free(c_segment_t);
301void c_seg_free_locked(c_segment_t);
302void c_seg_insert_into_age_q(c_segment_t);
303void c_seg_need_delayed_compaction(c_segment_t, boolean_t);
304
305void vm_decompressor_lock(void);
306void vm_decompressor_unlock(void);
307
308void vm_compressor_delay_trim(void);
309void vm_compressor_do_warmup(void);
310void vm_compressor_record_warmup_start(void);
311void vm_compressor_record_warmup_end(void);
312
313int vm_wants_task_throttled(task_t);
314
315extern void vm_compaction_swapper_do_init(void);
316extern void vm_compressor_swap_init(void);
317extern void vm_compressor_init_locks(void);
318extern lck_rw_t c_master_lock;
319
320#if ENCRYPTED_SWAP
321extern void vm_swap_decrypt(c_segment_t);
322#endif /* ENCRYPTED_SWAP */
323
324extern int vm_swap_low_on_space(void);
325extern kern_return_t vm_swap_get(c_segment_t, uint64_t, uint64_t);
326extern void vm_swap_free(uint64_t);
327extern void vm_swap_consider_defragmenting(int);
328
329extern void c_seg_swapin_requeue(c_segment_t, boolean_t, boolean_t, boolean_t);
330extern int c_seg_swapin(c_segment_t, boolean_t, boolean_t);
331extern void c_seg_wait_on_busy(c_segment_t);
332extern void c_seg_trim_tail(c_segment_t);
333extern void c_seg_switch_state(c_segment_t, int, boolean_t);
334
335extern boolean_t fastwake_recording_in_progress;
336extern int compaction_swapper_inited;
337extern int compaction_swapper_running;
338extern uint64_t vm_swap_put_failures;
339
340extern int c_overage_swapped_count;
341extern int c_overage_swapped_limit;
342
343extern queue_head_t c_minor_list_head;
344extern queue_head_t c_age_list_head;
345extern queue_head_t c_swapout_list_head;
346extern queue_head_t c_swappedout_list_head;
347extern queue_head_t c_swappedout_sparse_list_head;
348
349extern uint32_t c_age_count;
350extern uint32_t c_swapout_count;
351extern uint32_t c_swappedout_count;
352extern uint32_t c_swappedout_sparse_count;
353
354extern int64_t compressor_bytes_used;
355extern uint64_t first_c_segment_to_warm_generation_id;
356extern uint64_t last_c_segment_to_warm_generation_id;
357extern boolean_t hibernate_flushing;
358extern boolean_t hibernate_no_swapspace;
359extern boolean_t hibernate_in_progress_with_pinned_swap;
360extern uint32_t swapout_target_age;
361
362extern void c_seg_insert_into_q(queue_head_t *, c_segment_t);
363
364extern uint32_t vm_compressor_minorcompact_threshold_divisor;
365extern uint32_t vm_compressor_majorcompact_threshold_divisor;
366extern uint32_t vm_compressor_unthrottle_threshold_divisor;
367extern uint32_t vm_compressor_catchup_threshold_divisor;
368
369extern uint32_t vm_compressor_minorcompact_threshold_divisor_overridden;
370extern uint32_t vm_compressor_majorcompact_threshold_divisor_overridden;
371extern uint32_t vm_compressor_unthrottle_threshold_divisor_overridden;
372extern uint32_t vm_compressor_catchup_threshold_divisor_overridden;
373
374extern uint64_t vm_compressor_compute_elapsed_msecs(clock_sec_t, clock_nsec_t, clock_sec_t, clock_nsec_t);
375
376#define PAGE_REPLACEMENT_DISALLOWED(enable) (enable == TRUE ? lck_rw_lock_shared(&c_master_lock) : lck_rw_done(&c_master_lock))
377#define PAGE_REPLACEMENT_ALLOWED(enable) (enable == TRUE ? lck_rw_lock_exclusive(&c_master_lock) : lck_rw_done(&c_master_lock))
378
379
380#define AVAILABLE_NON_COMPRESSED_MEMORY (vm_page_active_count + vm_page_inactive_count + vm_page_free_count + vm_page_speculative_count)
381#define AVAILABLE_MEMORY (AVAILABLE_NON_COMPRESSED_MEMORY + VM_PAGE_COMPRESSOR_COUNT)
382
383/*
384 * TODO, there may be a minor optimisation opportunity to replace these divisions
385 * with multiplies and shifts
386 *
387 * By multiplying by 10, the divisors can have more precision w/o resorting to floating point... a divisor specified as 25 is in reality a divide by 2.5
388 * By multiplying by 9, you get a number ~11% smaller which allows us to have another limit point derived from the same base
389 * By multiplying by 11, you get a number ~10% bigger which allows us to generate a reset limit derived from the same base which is useful for hysteresis
390 */
391
392#define VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_minorcompact_threshold_divisor ? vm_compressor_minorcompact_threshold_divisor : 10))
393#define VM_PAGE_COMPRESSOR_SWAP_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_majorcompact_threshold_divisor ? vm_compressor_majorcompact_threshold_divisor : 10))
394
395#define VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_unthrottle_threshold_divisor ? vm_compressor_unthrottle_threshold_divisor : 10))
396#define VM_PAGE_COMPRESSOR_SWAP_RETHROTTLE_THRESHOLD (((AVAILABLE_MEMORY) * 11) / (vm_compressor_unthrottle_threshold_divisor ? vm_compressor_unthrottle_threshold_divisor : 11))
397
398#define VM_PAGE_COMPRESSOR_SWAP_HAS_CAUGHTUP_THRESHOLD (((AVAILABLE_MEMORY) * 11) / (vm_compressor_catchup_threshold_divisor ? vm_compressor_catchup_threshold_divisor : 11))
399#define VM_PAGE_COMPRESSOR_SWAP_CATCHUP_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_catchup_threshold_divisor ? vm_compressor_catchup_threshold_divisor : 10))
400#define VM_PAGE_COMPRESSOR_HARD_THROTTLE_THRESHOLD (((AVAILABLE_MEMORY) * 9) / (vm_compressor_catchup_threshold_divisor ? vm_compressor_catchup_threshold_divisor : 9))
401
402#ifdef CONFIG_EMBEDDED
403#define AVAILABLE_NON_COMPRESSED_MIN 20000
404#define COMPRESSOR_NEEDS_TO_SWAP() (((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) || \
405 (AVAILABLE_NON_COMPRESSED_MEMORY < AVAILABLE_NON_COMPRESSED_MIN)) ? 1 : 0)
406#else
407#define COMPRESSOR_NEEDS_TO_SWAP() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) ? 1 : 0)
408#endif
409
410#define HARD_THROTTLE_LIMIT_REACHED() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_HARD_THROTTLE_THRESHOLD) ? 1 : 0)
411#define SWAPPER_NEEDS_TO_UNTHROTTLE() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) ? 1 : 0)
412#define SWAPPER_NEEDS_TO_RETHROTTLE() ((AVAILABLE_NON_COMPRESSED_MEMORY > VM_PAGE_COMPRESSOR_SWAP_RETHROTTLE_THRESHOLD) ? 1 : 0)
413#define SWAPPER_NEEDS_TO_CATCHUP() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_CATCHUP_THRESHOLD) ? 1 : 0)
414#define SWAPPER_HAS_CAUGHTUP() ((AVAILABLE_NON_COMPRESSED_MEMORY > VM_PAGE_COMPRESSOR_SWAP_HAS_CAUGHTUP_THRESHOLD) ? 1 : 0)
415#define COMPRESSOR_NEEDS_TO_MINOR_COMPACT() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0)
416
417
418#ifdef CONFIG_EMBEDDED
419#define COMPRESSOR_FREE_RESERVED_LIMIT 28
420#else
421#define COMPRESSOR_FREE_RESERVED_LIMIT 128
422#endif
423
424uint32_t vm_compressor_get_encode_scratch_size(void);
425uint32_t vm_compressor_get_decode_scratch_size(void);
426
427#define COMPRESSOR_SCRATCH_BUF_SIZE vm_compressor_get_encode_scratch_size()
428
429#if RECORD_THE_COMPRESSED_DATA
430extern void c_compressed_record_init(void);
431extern void c_compressed_record_write(char *, int);
432#endif
433
434extern lck_mtx_t *c_list_lock;
435
436#if DEVELOPMENT || DEBUG
437extern uint32_t vm_ktrace_enabled;
438
439#define VMKDBG(x, ...) \
440MACRO_BEGIN \
441if (vm_ktrace_enabled) { \
442 KDBG(x, ## __VA_ARGS__);\
443} \
444MACRO_END
445#endif
446