1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <vm/vm_compressor.h>
30
31#if CONFIG_PHANTOM_CACHE
32#include <vm/vm_phantom_cache.h>
33#endif
34
35#include <vm/vm_map.h>
36#include <vm/vm_pageout.h>
37#include <vm/memory_object.h>
38#include <vm/vm_compressor_algorithms.h>
39#include <vm/vm_fault.h>
40#include <vm/vm_protos.h>
41#include <mach/mach_host.h> /* for host_info() */
42#include <kern/ledger.h>
43#include <kern/policy_internal.h>
44#include <kern/thread_group.h>
45#include <san/kasan.h>
46
47#if !CONFIG_EMBEDDED
48#include <i386/misc_protos.h>
49#endif
50
51#include <IOKit/IOHibernatePrivate.h>
52
53extern boolean_t vm_darkwake_mode;
54
55#if POPCOUNT_THE_COMPRESSED_DATA
56boolean_t popcount_c_segs = TRUE;
57
58static inline uint32_t vmc_pop(uintptr_t ins, int sz) {
59 uint32_t rv = 0;
60
61 if (__probable(popcount_c_segs == FALSE)) {
62 return 0xDEAD707C;
63 }
64
65 while (sz >= 16) {
66 uint32_t rv1, rv2;
67 uint64_t *ins64 = (uint64_t *) ins;
68 uint64_t *ins642 = (uint64_t *) (ins + 8);
69 rv1 = __builtin_popcountll(*ins64);
70 rv2 = __builtin_popcountll(*ins642);
71 rv += rv1 + rv2;
72 sz -= 16;
73 ins += 16;
74 }
75
76 while (sz >= 4) {
77 uint32_t *ins32 = (uint32_t *) ins;
78 rv += __builtin_popcount(*ins32);
79 sz -= 4;
80 ins += 4;
81 }
82
83 while (sz > 0) {
84 char *ins8 = (char *)ins;
85 rv += __builtin_popcount(*ins8);
86 sz--;
87 ins++;
88 }
89 return rv;
90}
91#endif
92
93#if VALIDATE_C_SEGMENTS
94boolean_t validate_c_segs = TRUE;
95#endif
96/*
97 * vm_compressor_mode has a heirarchy of control to set its value.
98 * boot-args are checked first, then device-tree, and finally
99 * the default value that is defined below. See vm_fault_init() for
100 * the boot-arg & device-tree code.
101 */
102
103#if CONFIG_EMBEDDED
104
105#if CONFIG_FREEZE
106int vm_compressor_mode = VM_PAGER_FREEZER_DEFAULT;
107
108void *freezer_chead; /* The chead used to track c_segs allocated for the exclusive use of holding just one task's compressed memory.*/
109char *freezer_compressor_scratch_buf = NULL;
110
111extern int c_freezer_swapout_page_count; /* This count keeps track of the # of compressed pages holding just one task's compressed memory on the swapout queue. This count is used during each freeze i.e. on a per-task basis.*/
112
113#else /* CONFIG_FREEZE */
114int vm_compressor_mode = VM_PAGER_NOT_CONFIGURED;
115#endif /* CONFIG_FREEZE */
116
117int vm_scale = 1;
118
119#else /* CONFIG_EMBEDDED */
120int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP;
121int vm_scale = 16;
122
123#endif /* CONFIG_EMBEDDED */
124
125int vm_compressor_is_active = 0;
126int vm_compression_limit = 0;
127int vm_compressor_available = 0;
128
129extern void vm_pageout_io_throttle(void);
130
131#if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA
132extern unsigned int hash_string(char *cp, int len);
133static unsigned int vmc_hash(char *, int);
134boolean_t checksum_c_segs = TRUE;
135
136unsigned int vmc_hash(char *cp, int len) {
137 if (__probable(checksum_c_segs == FALSE)) {
138 return 0xDEAD7A37;
139 }
140 return hash_string(cp, len);
141}
142#endif
143
144#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size)
145#define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size))
146
147
148struct c_sv_hash_entry {
149 union {
150 struct {
151 uint32_t c_sv_he_ref;
152 uint32_t c_sv_he_data;
153 } c_sv_he;
154 uint64_t c_sv_he_record;
155
156 } c_sv_he_un;
157};
158
159#define he_ref c_sv_he_un.c_sv_he.c_sv_he_ref
160#define he_data c_sv_he_un.c_sv_he.c_sv_he_data
161#define he_record c_sv_he_un.c_sv_he_record
162
163#define C_SV_HASH_MAX_MISS 32
164#define C_SV_HASH_SIZE ((1 << 10))
165#define C_SV_HASH_MASK ((1 << 10) - 1)
166#define C_SV_CSEG_ID ((1 << 22) - 1)
167
168
169union c_segu {
170 c_segment_t c_seg;
171 uintptr_t c_segno;
172};
173
174
175
176#define C_SLOT_PACK_PTR(ptr) (((uintptr_t)ptr - (uintptr_t) KERNEL_PMAP_HEAP_RANGE_START) >> 2)
177#define C_SLOT_UNPACK_PTR(cslot) ((uintptr_t)(cslot->c_packed_ptr << 2) + (uintptr_t) KERNEL_PMAP_HEAP_RANGE_START)
178
179
180uint32_t c_segment_count = 0;
181uint32_t c_segment_count_max = 0;
182
183uint64_t c_generation_id = 0;
184uint64_t c_generation_id_flush_barrier;
185
186
187#define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120
188
189boolean_t hibernate_no_swapspace = FALSE;
190clock_sec_t hibernate_flushing_deadline = 0;
191
192
193#if RECORD_THE_COMPRESSED_DATA
194char *c_compressed_record_sbuf;
195char *c_compressed_record_ebuf;
196char *c_compressed_record_cptr;
197#endif
198
199
200queue_head_t c_age_list_head;
201queue_head_t c_swappedin_list_head;
202queue_head_t c_swapout_list_head;
203queue_head_t c_swapio_list_head;
204queue_head_t c_swappedout_list_head;
205queue_head_t c_swappedout_sparse_list_head;
206queue_head_t c_major_list_head;
207queue_head_t c_filling_list_head;
208queue_head_t c_bad_list_head;
209
210uint32_t c_age_count = 0;
211uint32_t c_swappedin_count = 0;
212uint32_t c_swapout_count = 0;
213uint32_t c_swapio_count = 0;
214uint32_t c_swappedout_count = 0;
215uint32_t c_swappedout_sparse_count = 0;
216uint32_t c_major_count = 0;
217uint32_t c_filling_count = 0;
218uint32_t c_empty_count = 0;
219uint32_t c_bad_count = 0;
220
221
222queue_head_t c_minor_list_head;
223uint32_t c_minor_count = 0;
224
225int c_overage_swapped_count = 0;
226int c_overage_swapped_limit = 0;
227
228int c_seg_fixed_array_len;
229union c_segu *c_segments;
230vm_offset_t c_buffers;
231vm_size_t c_buffers_size;
232caddr_t c_segments_next_page;
233boolean_t c_segments_busy;
234uint32_t c_segments_available;
235uint32_t c_segments_limit;
236uint32_t c_segments_nearing_limit;
237
238uint32_t c_segment_svp_in_hash;
239uint32_t c_segment_svp_hash_succeeded;
240uint32_t c_segment_svp_hash_failed;
241uint32_t c_segment_svp_zero_compressions;
242uint32_t c_segment_svp_nonzero_compressions;
243uint32_t c_segment_svp_zero_decompressions;
244uint32_t c_segment_svp_nonzero_decompressions;
245
246uint32_t c_segment_noncompressible_pages;
247
248uint32_t c_segment_pages_compressed;
249uint32_t c_segment_pages_compressed_limit;
250uint32_t c_segment_pages_compressed_nearing_limit;
251uint32_t c_free_segno_head = (uint32_t)-1;
252
253uint32_t vm_compressor_minorcompact_threshold_divisor = 10;
254uint32_t vm_compressor_majorcompact_threshold_divisor = 10;
255uint32_t vm_compressor_unthrottle_threshold_divisor = 10;
256uint32_t vm_compressor_catchup_threshold_divisor = 10;
257
258uint32_t vm_compressor_minorcompact_threshold_divisor_overridden = 0;
259uint32_t vm_compressor_majorcompact_threshold_divisor_overridden = 0;
260uint32_t vm_compressor_unthrottle_threshold_divisor_overridden = 0;
261uint32_t vm_compressor_catchup_threshold_divisor_overridden = 0;
262
263#define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu))
264
265
266lck_grp_attr_t vm_compressor_lck_grp_attr;
267lck_attr_t vm_compressor_lck_attr;
268lck_grp_t vm_compressor_lck_grp;
269lck_mtx_t *c_list_lock;
270lck_rw_t c_master_lock;
271boolean_t decompressions_blocked = FALSE;
272
273zone_t compressor_segment_zone;
274int c_compressor_swap_trigger = 0;
275
276uint32_t compressor_cpus;
277char *compressor_scratch_bufs;
278char *kdp_compressor_scratch_buf;
279char *kdp_compressor_decompressed_page;
280addr64_t kdp_compressor_decompressed_page_paddr;
281ppnum_t kdp_compressor_decompressed_page_ppnum;
282
283clock_sec_t start_of_sample_period_sec = 0;
284clock_nsec_t start_of_sample_period_nsec = 0;
285clock_sec_t start_of_eval_period_sec = 0;
286clock_nsec_t start_of_eval_period_nsec = 0;
287uint32_t sample_period_decompression_count = 0;
288uint32_t sample_period_compression_count = 0;
289uint32_t last_eval_decompression_count = 0;
290uint32_t last_eval_compression_count = 0;
291
292#define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30)
293
294boolean_t vm_swapout_ripe_segments = FALSE;
295uint32_t vm_ripe_target_age = (60 * 60 * 48);
296
297uint32_t swapout_target_age = 0;
298uint32_t age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE];
299uint32_t overage_decompressions_during_sample_period = 0;
300
301
302void do_fastwake_warmup(queue_head_t *, boolean_t);
303boolean_t fastwake_warmup = FALSE;
304boolean_t fastwake_recording_in_progress = FALSE;
305clock_sec_t dont_trim_until_ts = 0;
306
307uint64_t c_segment_warmup_count;
308uint64_t first_c_segment_to_warm_generation_id = 0;
309uint64_t last_c_segment_to_warm_generation_id = 0;
310boolean_t hibernate_flushing = FALSE;
311
312int64_t c_segment_input_bytes __attribute__((aligned(8))) = 0;
313int64_t c_segment_compressed_bytes __attribute__((aligned(8))) = 0;
314int64_t compressor_bytes_used __attribute__((aligned(8))) = 0;
315
316
317struct c_sv_hash_entry c_segment_sv_hash_table[C_SV_HASH_SIZE] __attribute__ ((aligned (8)));
318
319static boolean_t compressor_needs_to_swap(void);
320static void vm_compressor_swap_trigger_thread(void);
321static void vm_compressor_do_delayed_compactions(boolean_t);
322static void vm_compressor_compact_and_swap(boolean_t);
323static void vm_compressor_age_swapped_in_segments(boolean_t);
324
325#if !CONFIG_EMBEDDED
326static void vm_compressor_take_paging_space_action(void);
327#endif
328
329void compute_swapout_target_age(void);
330
331boolean_t c_seg_major_compact(c_segment_t, c_segment_t);
332boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t);
333
334int c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t);
335int c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t);
336void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg);
337
338void c_seg_move_to_sparse_list(c_segment_t);
339void c_seg_insert_into_q(queue_head_t *, c_segment_t);
340
341uint64_t vm_available_memory(void);
342uint64_t vm_compressor_pages_compressed(void);
343
344/*
345 * indicate the need to do a major compaction if
346 * the overall set of in-use compression segments
347 * becomes sparse... on systems that support pressure
348 * driven swapping, this will also cause swapouts to
349 * be initiated.
350 */
351static inline boolean_t vm_compressor_needs_to_major_compact()
352{
353 uint32_t incore_seg_count;
354
355 incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
356
357 if ((c_segment_count >= (c_segments_nearing_limit / 8)) &&
358 ((incore_seg_count * C_SEG_MAX_PAGES) - VM_PAGE_COMPRESSOR_COUNT) >
359 ((incore_seg_count / 8) * C_SEG_MAX_PAGES))
360 return (1);
361 return (0);
362}
363
364
365uint64_t
366vm_available_memory(void)
367{
368 return (((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64);
369}
370
371
372uint64_t
373vm_compressor_pages_compressed(void)
374{
375 return (c_segment_pages_compressed * PAGE_SIZE_64);
376}
377
378
379boolean_t
380vm_compressor_low_on_space(void)
381{
382 if ((c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) ||
383 (c_segment_count > c_segments_nearing_limit))
384 return (TRUE);
385
386 return (FALSE);
387}
388
389
390boolean_t
391vm_compressor_out_of_space(void)
392{
393 if ((c_segment_pages_compressed >= c_segment_pages_compressed_limit) ||
394 (c_segment_count >= c_segments_limit))
395 return (TRUE);
396
397 return (FALSE);
398}
399
400
401int
402vm_wants_task_throttled(task_t task)
403{
404 if (task == kernel_task)
405 return (0);
406
407 if (VM_CONFIG_SWAP_IS_ACTIVE) {
408 if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) &&
409 (unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4))
410 return (1);
411 }
412 return (0);
413}
414
415
416#if DEVELOPMENT || DEBUG
417boolean_t kill_on_no_paging_space = FALSE; /* On compressor/swap exhaustion, kill the largest process regardless of
418 * its chosen process policy. Controlled by a boot-arg of the same name. */
419#endif /* DEVELOPMENT || DEBUG */
420
421#if !CONFIG_EMBEDDED
422
423static uint32_t no_paging_space_action_in_progress = 0;
424extern void memorystatus_send_low_swap_note(void);
425
426static void
427vm_compressor_take_paging_space_action(void)
428{
429 if (no_paging_space_action_in_progress == 0) {
430
431 if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) {
432
433 if (no_paging_space_action()) {
434#if DEVELOPMENT || DEBUG
435 if (kill_on_no_paging_space == TRUE) {
436 /*
437 * Since we are choosing to always kill a process, we don't need the
438 * "out of application memory" dialog box in this mode. And, hence we won't
439 * send the knote.
440 */
441 no_paging_space_action_in_progress = 0;
442 return;
443 }
444#endif /* DEVELOPMENT || DEBUG */
445 memorystatus_send_low_swap_note();
446 }
447
448 no_paging_space_action_in_progress = 0;
449 }
450 }
451}
452#endif /* !CONFIG_EMBEDDED */
453
454
455void
456vm_compressor_init_locks(void)
457{
458 lck_grp_attr_setdefault(&vm_compressor_lck_grp_attr);
459 lck_grp_init(&vm_compressor_lck_grp, "vm_compressor", &vm_compressor_lck_grp_attr);
460 lck_attr_setdefault(&vm_compressor_lck_attr);
461
462 lck_rw_init(&c_master_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr);
463}
464
465
466void
467vm_decompressor_lock(void)
468{
469 PAGE_REPLACEMENT_ALLOWED(TRUE);
470
471 decompressions_blocked = TRUE;
472
473 PAGE_REPLACEMENT_ALLOWED(FALSE);
474}
475
476void
477vm_decompressor_unlock(void)
478{
479 PAGE_REPLACEMENT_ALLOWED(TRUE);
480
481 decompressions_blocked = FALSE;
482
483 PAGE_REPLACEMENT_ALLOWED(FALSE);
484
485 thread_wakeup((event_t)&decompressions_blocked);
486}
487
488static inline void cslot_copy(c_slot_t cdst, c_slot_t csrc) {
489#if CHECKSUM_THE_DATA
490 cdst->c_hash_data = csrc->c_hash_data;
491#endif
492#if CHECKSUM_THE_COMPRESSED_DATA
493 cdst->c_hash_compressed_data = csrc->c_hash_compressed_data;
494#endif
495#if POPCOUNT_THE_COMPRESSED_DATA
496 cdst->c_pop_cdata = csrc->c_pop_cdata;
497#endif
498 cdst->c_size = csrc->c_size;
499 cdst->c_packed_ptr = csrc->c_packed_ptr;
500#if defined(__arm__) || defined(__arm64__)
501 cdst->c_codec = csrc->c_codec;
502#endif
503}
504
505vm_map_t compressor_map;
506uint64_t compressor_pool_max_size;
507uint64_t compressor_pool_size;
508uint32_t compressor_pool_multiplier;
509
510#if DEVELOPMENT || DEBUG
511/*
512 * Compressor segments are write-protected in development/debug
513 * kernels to help debug memory corruption.
514 * In cases where performance is a concern, this can be disabled
515 * via the boot-arg "-disable_cseg_write_protection".
516 */
517boolean_t write_protect_c_segs = TRUE;
518int vm_compressor_test_seg_wp;
519uint32_t vm_ktrace_enabled;
520#endif /* DEVELOPMENT || DEBUG */
521
522void
523vm_compressor_init(void)
524{
525 thread_t thread;
526 struct c_slot cs_dummy;
527 c_slot_t cs = &cs_dummy;
528 int c_segment_min_size;
529 int c_segment_padded_size;
530 int attempts = 1;
531 kern_return_t retval = KERN_SUCCESS;
532 vm_offset_t start_addr = 0;
533 vm_size_t c_segments_arr_size = 0, compressor_submap_size = 0;
534 vm_map_kernel_flags_t vmk_flags;
535#if RECORD_THE_COMPRESSED_DATA
536 vm_size_t c_compressed_record_sbuf_size = 0;
537#endif /* RECORD_THE_COMPRESSED_DATA */
538
539#if DEVELOPMENT || DEBUG
540 char bootarg_name[32];
541 if (PE_parse_boot_argn("-kill_on_no_paging_space", bootarg_name, sizeof (bootarg_name))) {
542 kill_on_no_paging_space = TRUE;
543 }
544 if (PE_parse_boot_argn("-disable_cseg_write_protection", bootarg_name, sizeof (bootarg_name))) {
545 write_protect_c_segs = FALSE;
546 }
547 int vmcval = 1;
548 PE_parse_boot_argn("vm_compressor_validation", &vmcval, sizeof(vmcval));
549
550 if (kern_feature_override(KF_COMPRSV_OVRD)) {
551 vmcval = 0;
552 }
553 if (vmcval == 0) {
554#if POPCOUNT_THE_COMPRESSED_DATA
555 popcount_c_segs = FALSE;
556#endif
557#if CHECKSUM_THE_DATA || CHECKSUM_THE_COMPRESSED_DATA
558 checksum_c_segs = FALSE;
559#endif
560#if VALIDATE_C_SEGMENTS
561 validate_c_segs = FALSE;
562#endif
563 write_protect_c_segs = FALSE;
564 }
565#endif /* DEVELOPMENT || DEBUG */
566
567 /*
568 * ensure that any pointer that gets created from
569 * the vm_page zone can be packed properly
570 */
571 cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_min_address);
572
573 if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_min_address)
574 panic("C_SLOT_UNPACK_PTR failed on zone_map_min_address - %p", (void *)zone_map_min_address);
575
576 cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_max_address);
577
578 if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_max_address)
579 panic("C_SLOT_UNPACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address);
580
581
582 assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE);
583
584 PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof (vm_compression_limit));
585
586#ifdef CONFIG_EMBEDDED
587 vm_compressor_minorcompact_threshold_divisor = 20;
588 vm_compressor_majorcompact_threshold_divisor = 30;
589 vm_compressor_unthrottle_threshold_divisor = 40;
590 vm_compressor_catchup_threshold_divisor = 60;
591#else
592 if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) {
593 vm_compressor_minorcompact_threshold_divisor = 11;
594 vm_compressor_majorcompact_threshold_divisor = 13;
595 vm_compressor_unthrottle_threshold_divisor = 20;
596 vm_compressor_catchup_threshold_divisor = 35;
597 } else {
598 vm_compressor_minorcompact_threshold_divisor = 20;
599 vm_compressor_majorcompact_threshold_divisor = 25;
600 vm_compressor_unthrottle_threshold_divisor = 35;
601 vm_compressor_catchup_threshold_divisor = 50;
602 }
603#endif
604 /*
605 * vm_page_init_lck_grp is now responsible for calling vm_compressor_init_locks
606 * c_master_lock needs to be available early so that "vm_page_find_contiguous" can
607 * use PAGE_REPLACEMENT_ALLOWED to coordinate with the compressor.
608 */
609
610 c_list_lock = lck_mtx_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr);
611
612 queue_init(&c_bad_list_head);
613 queue_init(&c_age_list_head);
614 queue_init(&c_minor_list_head);
615 queue_init(&c_major_list_head);
616 queue_init(&c_filling_list_head);
617 queue_init(&c_swapout_list_head);
618 queue_init(&c_swapio_list_head);
619 queue_init(&c_swappedin_list_head);
620 queue_init(&c_swappedout_list_head);
621 queue_init(&c_swappedout_sparse_list_head);
622
623 c_free_segno_head = -1;
624 c_segments_available = 0;
625
626 if (vm_compression_limit)
627 compressor_pool_size = (uint64_t)vm_compression_limit * PAGE_SIZE_64;
628
629 compressor_pool_max_size = C_SEG_MAX_LIMIT;
630 compressor_pool_max_size *= C_SEG_BUFSIZE;
631
632#if defined(__x86_64__)
633
634 if (vm_compression_limit == 0) {
635
636 if (max_mem <= (4ULL * 1024ULL * 1024ULL * 1024ULL))
637 compressor_pool_size = 16ULL * max_mem;
638 else if (max_mem <= (8ULL * 1024ULL * 1024ULL * 1024ULL))
639 compressor_pool_size = 8ULL * max_mem;
640 else if (max_mem <= (32ULL * 1024ULL * 1024ULL * 1024ULL))
641 compressor_pool_size = 4ULL * max_mem;
642 else
643 compressor_pool_size = 2ULL * max_mem;
644 }
645 if (max_mem <= (8ULL * 1024ULL * 1024ULL * 1024ULL))
646 compressor_pool_multiplier = 1;
647 else if (max_mem <= (32ULL * 1024ULL * 1024ULL * 1024ULL))
648 compressor_pool_multiplier = 2;
649 else
650 compressor_pool_multiplier = 4;
651
652#elif defined(__arm__)
653
654#define VM_RESERVE_SIZE (1024 * 1024 * 256)
655#define MAX_COMPRESSOR_POOL_SIZE (1024 * 1024 * 450)
656
657 if (compressor_pool_max_size > MAX_COMPRESSOR_POOL_SIZE)
658 compressor_pool_max_size = MAX_COMPRESSOR_POOL_SIZE;
659
660 if (vm_compression_limit == 0)
661 compressor_pool_size = ((kernel_map->max_offset - kernel_map->min_offset) - kernel_map->size) - VM_RESERVE_SIZE;
662 compressor_pool_multiplier = 1;
663#else
664 if (compressor_pool_max_size > max_mem)
665 compressor_pool_max_size = max_mem;
666
667 if (vm_compression_limit == 0)
668 compressor_pool_size = max_mem;
669 compressor_pool_multiplier = 1;
670#endif
671 if (compressor_pool_size > compressor_pool_max_size)
672 compressor_pool_size = compressor_pool_max_size;
673
674try_again:
675 c_segments_limit = (uint32_t)(compressor_pool_size / (vm_size_t)(C_SEG_ALLOCSIZE));
676 c_segments_nearing_limit = (uint32_t)(((uint64_t)c_segments_limit * 98ULL) / 100ULL);
677
678 c_segment_pages_compressed_limit = (c_segments_limit * (C_SEG_BUFSIZE / PAGE_SIZE) * compressor_pool_multiplier);
679
680 if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE))
681 c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE);
682
683 c_segment_pages_compressed_nearing_limit = (uint32_t)(((uint64_t)c_segment_pages_compressed_limit * 98ULL) / 100ULL);
684
685 /*
686 * Submap needs space for:
687 * - c_segments
688 * - c_buffers
689 * - swap reclaimations -- C_SEG_BUFSIZE
690 */
691 c_segments_arr_size = vm_map_round_page((sizeof(union c_segu) * c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
692 c_buffers_size = vm_map_round_page(((vm_size_t)C_SEG_ALLOCSIZE * (vm_size_t)c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
693
694 compressor_submap_size = c_segments_arr_size + c_buffers_size + C_SEG_BUFSIZE;
695
696#if RECORD_THE_COMPRESSED_DATA
697 c_compressed_record_sbuf_size = (vm_size_t)C_SEG_ALLOCSIZE + (PAGE_SIZE * 2);
698 compressor_submap_size += c_compressed_record_sbuf_size;
699#endif /* RECORD_THE_COMPRESSED_DATA */
700
701 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
702 vmk_flags.vmkf_permanent = TRUE;
703 retval = kmem_suballoc(kernel_map, &start_addr, compressor_submap_size,
704 FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_COMPRESSOR,
705 &compressor_map);
706
707 if (retval != KERN_SUCCESS) {
708 if (++attempts > 3)
709 panic("vm_compressor_init: kmem_suballoc failed - 0x%llx", (uint64_t)compressor_submap_size);
710
711 compressor_pool_size = compressor_pool_size / 2;
712
713 kprintf("retrying creation of the compressor submap at 0x%llx bytes\n", compressor_pool_size);
714 goto try_again;
715 }
716 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
717 panic("vm_compressor_init: kernel_memory_allocate failed - c_segments\n");
718 if (kernel_memory_allocate(compressor_map, &c_buffers, c_buffers_size, 0, KMA_COMPRESSOR | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
719 panic("vm_compressor_init: kernel_memory_allocate failed - c_buffers\n");
720
721
722 c_segment_min_size = sizeof(struct c_segment) + (C_SEG_SLOT_VAR_ARRAY_MIN_LEN * sizeof(struct c_slot));
723
724 for (c_segment_padded_size = 128; c_segment_padded_size < c_segment_min_size; c_segment_padded_size = c_segment_padded_size << 1);
725
726 compressor_segment_zone = zinit(c_segment_padded_size, c_segments_limit * c_segment_padded_size, PAGE_SIZE, "compressor_segment");
727 zone_change(compressor_segment_zone, Z_CALLERACCT, FALSE);
728 zone_change(compressor_segment_zone, Z_NOENCRYPT, TRUE);
729
730 c_seg_fixed_array_len = (c_segment_padded_size - sizeof(struct c_segment)) / sizeof(struct c_slot);
731
732 c_segments_busy = FALSE;
733
734 c_segments_next_page = (caddr_t)c_segments;
735 vm_compressor_algorithm_init();
736
737 {
738 host_basic_info_data_t hinfo;
739 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
740
741#define BSD_HOST 1
742 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
743
744 compressor_cpus = hinfo.max_cpus;
745 compressor_scratch_bufs = kalloc_tag(compressor_cpus * vm_compressor_get_decode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
746
747 kdp_compressor_scratch_buf = kalloc_tag(vm_compressor_get_decode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
748
749 /*
750 * kdp_compressor_decompressed_page must be page aligned because we access
751 * it through the physical apperture by page number. kalloc() does not
752 * guarantee alignment.
753 */
754 vm_offset_t addr;
755 if (kernel_memory_allocate(kernel_map, &addr, PAGE_SIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
756 panic("vm_compressor_init: kernel_memory_allocate failed - kdp_compressor_decompressed_page\n");
757 }
758 assert((addr & PAGE_MASK) == 0);
759 kdp_compressor_decompressed_page = (void *)addr;
760 kdp_compressor_decompressed_page_paddr = kvtophys((vm_offset_t)kdp_compressor_decompressed_page);
761 kdp_compressor_decompressed_page_ppnum = (ppnum_t) atop(kdp_compressor_decompressed_page_paddr);
762 }
763#if CONFIG_FREEZE
764 freezer_compressor_scratch_buf = kalloc_tag(vm_compressor_get_encode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
765#endif
766
767#if RECORD_THE_COMPRESSED_DATA
768 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)&c_compressed_record_sbuf, c_compressed_record_sbuf_size, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
769 panic("vm_compressor_init: kernel_memory_allocate failed - c_compressed_record_sbuf\n");
770
771 c_compressed_record_cptr = c_compressed_record_sbuf;
772 c_compressed_record_ebuf = c_compressed_record_sbuf + c_compressed_record_sbuf_size;
773#endif
774
775 if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL,
776 BASEPRI_VM, &thread) != KERN_SUCCESS) {
777 panic("vm_compressor_swap_trigger_thread: create failed");
778 }
779 thread_deallocate(thread);
780
781 if (vm_pageout_internal_start() != KERN_SUCCESS) {
782 panic("vm_compressor_init: Failed to start the internal pageout thread.\n");
783 }
784 if (VM_CONFIG_SWAP_IS_PRESENT)
785 vm_compressor_swap_init();
786
787 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE)
788 vm_compressor_is_active = 1;
789
790#if CONFIG_FREEZE
791 memorystatus_freeze_enabled = TRUE;
792#endif /* CONFIG_FREEZE */
793
794 vm_compressor_available = 1;
795
796 vm_page_reactivate_all_throttled();
797}
798
799
800#if VALIDATE_C_SEGMENTS
801
802static void
803c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact)
804{
805 int c_indx;
806 int32_t bytes_used;
807 uint32_t c_rounded_size;
808 uint32_t c_size;
809 c_slot_t cs;
810
811 if (__probable(validate_c_segs == FALSE)) {
812 return;
813 }
814 if (c_seg->c_firstemptyslot < c_seg->c_nextslot) {
815 c_indx = c_seg->c_firstemptyslot;
816 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
817
818 if (cs == NULL)
819 panic("c_seg_validate: no slot backing c_firstemptyslot");
820
821 if (cs->c_size)
822 panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)\n", cs->c_size);
823 }
824 bytes_used = 0;
825
826 for (c_indx = 0; c_indx < c_seg->c_nextslot; c_indx++) {
827
828 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
829
830 c_size = UNPACK_C_SIZE(cs);
831
832 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
833
834 bytes_used += c_rounded_size;
835
836#if CHECKSUM_THE_COMPRESSED_DATA
837 unsigned csvhash;
838 if (c_size && cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) {
839 addr64_t csvphys = kvtophys((vm_offset_t)&c_seg->c_store.c_buffer[cs->c_offset]);
840 panic("Compressed data doesn't match original %p phys: 0x%llx %d %p %d %d 0x%x 0x%x", c_seg, csvphys, cs->c_offset, cs, c_indx, c_size, cs->c_hash_compressed_data, csvhash);
841 }
842#endif
843#if POPCOUNT_THE_COMPRESSED_DATA
844 unsigned csvpop;
845 if (c_size) {
846 uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset];
847 if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) {
848 panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%llx 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void *)csvaddr, (void *) kvtophys(csvaddr), c_seg, cs, cs->c_offset, c_size, csvpop, cs->c_pop_cdata);
849 }
850 }
851#endif
852
853 }
854
855 if (bytes_used != c_seg->c_bytes_used)
856 panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used, c_seg->c_bytes_used);
857
858 if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset))
859 panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n",
860 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
861
862 if (must_be_compact) {
863 if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset))
864 panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n",
865 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
866 }
867}
868
869#endif
870
871
872void
873c_seg_need_delayed_compaction(c_segment_t c_seg, boolean_t c_list_lock_held)
874{
875 boolean_t clear_busy = FALSE;
876
877 if (c_list_lock_held == FALSE) {
878 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
879 C_SEG_BUSY(c_seg);
880
881 lck_mtx_unlock_always(&c_seg->c_lock);
882 lck_mtx_lock_spin_always(c_list_lock);
883 lck_mtx_lock_spin_always(&c_seg->c_lock);
884
885 clear_busy = TRUE;
886 }
887 }
888 assert(c_seg->c_state != C_IS_FILLING);
889
890 if (!c_seg->c_on_minorcompact_q && !(C_SEG_IS_ON_DISK_OR_SOQ(c_seg))) {
891 queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list);
892 c_seg->c_on_minorcompact_q = 1;
893 c_minor_count++;
894 }
895 if (c_list_lock_held == FALSE)
896 lck_mtx_unlock_always(c_list_lock);
897
898 if (clear_busy == TRUE)
899 C_SEG_WAKEUP_DONE(c_seg);
900}
901
902
903unsigned int c_seg_moved_to_sparse_list = 0;
904
905void
906c_seg_move_to_sparse_list(c_segment_t c_seg)
907{
908 boolean_t clear_busy = FALSE;
909
910 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
911 C_SEG_BUSY(c_seg);
912
913 lck_mtx_unlock_always(&c_seg->c_lock);
914 lck_mtx_lock_spin_always(c_list_lock);
915 lck_mtx_lock_spin_always(&c_seg->c_lock);
916
917 clear_busy = TRUE;
918 }
919 c_seg_switch_state(c_seg, C_ON_SWAPPEDOUTSPARSE_Q, FALSE);
920
921 c_seg_moved_to_sparse_list++;
922
923 lck_mtx_unlock_always(c_list_lock);
924
925 if (clear_busy == TRUE)
926 C_SEG_WAKEUP_DONE(c_seg);
927}
928
929
930void
931c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg)
932{
933 c_segment_t c_seg_next;
934
935 if (queue_empty(qhead)) {
936 queue_enter(qhead, c_seg, c_segment_t, c_age_list);
937 } else {
938 c_seg_next = (c_segment_t)queue_first(qhead);
939
940 while (TRUE) {
941
942 if (c_seg->c_generation_id < c_seg_next->c_generation_id) {
943 queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list);
944 break;
945 }
946 c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list);
947
948 if (queue_end(qhead, (queue_entry_t) c_seg_next)) {
949 queue_enter(qhead, c_seg, c_segment_t, c_age_list);
950 break;
951 }
952 }
953 }
954}
955
956
957int try_minor_compaction_failed = 0;
958int try_minor_compaction_succeeded = 0;
959
960void
961c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg)
962{
963
964 assert(c_seg->c_on_minorcompact_q);
965 /*
966 * c_seg is currently on the delayed minor compaction
967 * queue and we have c_seg locked... if we can get the
968 * c_list_lock w/o blocking (if we blocked we could deadlock
969 * because the lock order is c_list_lock then c_seg's lock)
970 * we'll pull it from the delayed list and free it directly
971 */
972 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
973 /*
974 * c_list_lock is held, we need to bail
975 */
976 try_minor_compaction_failed++;
977
978 lck_mtx_unlock_always(&c_seg->c_lock);
979 } else {
980 try_minor_compaction_succeeded++;
981
982 C_SEG_BUSY(c_seg);
983 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE);
984 }
985}
986
987
988int
989c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement)
990{
991 int c_seg_freed;
992
993 assert(c_seg->c_busy);
994 assert(!C_SEG_IS_ON_DISK_OR_SOQ(c_seg));
995
996 /*
997 * check for the case that can occur when we are not swapping
998 * and this segment has been major compacted in the past
999 * and moved to the majorcompact q to remove it from further
1000 * consideration... if the occupancy falls too low we need
1001 * to put it back on the age_q so that it will be considered
1002 * in the next major compaction sweep... if we don't do this
1003 * we will eventually run into the c_segments_limit
1004 */
1005 if (c_seg->c_state == C_ON_MAJORCOMPACT_Q && C_SEG_SHOULD_MAJORCOMPACT_NOW(c_seg)) {
1006
1007 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1008 }
1009 if (!c_seg->c_on_minorcompact_q) {
1010 if (clear_busy == TRUE)
1011 C_SEG_WAKEUP_DONE(c_seg);
1012
1013 lck_mtx_unlock_always(&c_seg->c_lock);
1014
1015 return (0);
1016 }
1017 queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list);
1018 c_seg->c_on_minorcompact_q = 0;
1019 c_minor_count--;
1020
1021 lck_mtx_unlock_always(c_list_lock);
1022
1023 if (disallow_page_replacement == TRUE) {
1024 lck_mtx_unlock_always(&c_seg->c_lock);
1025
1026 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1027
1028 lck_mtx_lock_spin_always(&c_seg->c_lock);
1029 }
1030 c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy);
1031
1032 if (disallow_page_replacement == TRUE)
1033 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1034
1035 if (need_list_lock == TRUE)
1036 lck_mtx_lock_spin_always(c_list_lock);
1037
1038 return (c_seg_freed);
1039}
1040
1041
1042void
1043c_seg_wait_on_busy(c_segment_t c_seg)
1044{
1045 c_seg->c_wanted = 1;
1046 assert_wait((event_t) (c_seg), THREAD_UNINT);
1047
1048 lck_mtx_unlock_always(&c_seg->c_lock);
1049 thread_block(THREAD_CONTINUE_NULL);
1050}
1051
1052
1053void
1054c_seg_switch_state(c_segment_t c_seg, int new_state, boolean_t insert_head)
1055{
1056 int old_state = c_seg->c_state;
1057
1058#if __i386__ || __x86_64__
1059 if (new_state != C_IS_FILLING)
1060 LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
1061 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
1062#endif
1063 switch (old_state) {
1064
1065 case C_IS_EMPTY:
1066 assert(new_state == C_IS_FILLING || new_state == C_IS_FREE);
1067
1068 c_empty_count--;
1069 break;
1070
1071 case C_IS_FILLING:
1072 assert(new_state == C_ON_AGE_Q || new_state == C_ON_SWAPOUT_Q);
1073
1074 queue_remove(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
1075 c_filling_count--;
1076 break;
1077
1078 case C_ON_AGE_Q:
1079 assert(new_state == C_ON_SWAPOUT_Q || new_state == C_ON_MAJORCOMPACT_Q ||
1080 new_state == C_IS_FREE);
1081
1082 queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1083 c_age_count--;
1084 break;
1085
1086 case C_ON_SWAPPEDIN_Q:
1087 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
1088
1089 queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1090 c_swappedin_count--;
1091 break;
1092
1093 case C_ON_SWAPOUT_Q:
1094 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE || new_state == C_IS_EMPTY || new_state == C_ON_SWAPIO_Q);
1095
1096 queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1097 thread_wakeup((event_t)&compaction_swapper_running);
1098 c_swapout_count--;
1099 break;
1100
1101 case C_ON_SWAPIO_Q:
1102 assert(new_state == C_ON_SWAPPEDOUT_Q || new_state == C_ON_SWAPPEDOUTSPARSE_Q || new_state == C_ON_AGE_Q);
1103
1104 queue_remove(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1105 c_swapio_count--;
1106 break;
1107
1108 case C_ON_SWAPPEDOUT_Q:
1109 assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q ||
1110 new_state == C_ON_SWAPPEDOUTSPARSE_Q ||
1111 new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
1112
1113 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1114 c_swappedout_count--;
1115 break;
1116
1117 case C_ON_SWAPPEDOUTSPARSE_Q:
1118 assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q ||
1119 new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
1120
1121 queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1122 c_swappedout_sparse_count--;
1123 break;
1124
1125 case C_ON_MAJORCOMPACT_Q:
1126 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
1127
1128 queue_remove(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1129 c_major_count--;
1130 break;
1131
1132 case C_ON_BAD_Q:
1133 assert(new_state == C_IS_FREE);
1134
1135 queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1136 c_bad_count--;
1137 break;
1138
1139 default:
1140 panic("c_seg %p has bad c_state = %d\n", c_seg, old_state);
1141 }
1142
1143 switch(new_state) {
1144 case C_IS_FREE:
1145 assert(old_state != C_IS_FILLING);
1146
1147 break;
1148
1149 case C_IS_EMPTY:
1150 assert(old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1151
1152 c_empty_count++;
1153 break;
1154
1155 case C_IS_FILLING:
1156 assert(old_state == C_IS_EMPTY);
1157
1158 queue_enter(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
1159 c_filling_count++;
1160 break;
1161
1162 case C_ON_AGE_Q:
1163 assert(old_state == C_IS_FILLING || old_state == C_ON_SWAPPEDIN_Q ||
1164 old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPIO_Q ||
1165 old_state == C_ON_MAJORCOMPACT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1166
1167 if (old_state == C_IS_FILLING)
1168 queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1169 else {
1170 if (!queue_empty(&c_age_list_head)) {
1171 c_segment_t c_first;
1172
1173 c_first = (c_segment_t)queue_first(&c_age_list_head);
1174 c_seg->c_creation_ts = c_first->c_creation_ts;
1175 }
1176 queue_enter_first(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1177 }
1178 c_age_count++;
1179 break;
1180
1181 case C_ON_SWAPPEDIN_Q:
1182 assert(old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1183
1184 if (insert_head == TRUE)
1185 queue_enter_first(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1186 else
1187 queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1188 c_swappedin_count++;
1189 break;
1190
1191 case C_ON_SWAPOUT_Q:
1192 assert(old_state == C_ON_AGE_Q || old_state == C_IS_FILLING);
1193
1194 if (insert_head == TRUE)
1195 queue_enter_first(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1196 else
1197 queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1198 c_swapout_count++;
1199 break;
1200
1201 case C_ON_SWAPIO_Q:
1202 assert(old_state == C_ON_SWAPOUT_Q);
1203
1204 if (insert_head == TRUE)
1205 queue_enter_first(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1206 else
1207 queue_enter(&c_swapio_list_head, c_seg, c_segment_t, c_age_list);
1208 c_swapio_count++;
1209 break;
1210
1211 case C_ON_SWAPPEDOUT_Q:
1212 assert(old_state == C_ON_SWAPIO_Q);
1213
1214 if (insert_head == TRUE)
1215 queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1216 else
1217 queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1218 c_swappedout_count++;
1219 break;
1220
1221 case C_ON_SWAPPEDOUTSPARSE_Q:
1222 assert(old_state == C_ON_SWAPIO_Q || old_state == C_ON_SWAPPEDOUT_Q);
1223
1224 if (insert_head == TRUE)
1225 queue_enter_first(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1226 else
1227 queue_enter(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1228
1229 c_swappedout_sparse_count++;
1230 break;
1231
1232 case C_ON_MAJORCOMPACT_Q:
1233 assert(old_state == C_ON_AGE_Q);
1234
1235 if (insert_head == TRUE)
1236 queue_enter_first(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1237 else
1238 queue_enter(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1239 c_major_count++;
1240 break;
1241
1242 case C_ON_BAD_Q:
1243 assert(old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1244
1245 if (insert_head == TRUE)
1246 queue_enter_first(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1247 else
1248 queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1249 c_bad_count++;
1250 break;
1251
1252 default:
1253 panic("c_seg %p requesting bad c_state = %d\n", c_seg, new_state);
1254 }
1255 c_seg->c_state = new_state;
1256}
1257
1258
1259
1260void
1261c_seg_free(c_segment_t c_seg)
1262{
1263 assert(c_seg->c_busy);
1264
1265 lck_mtx_unlock_always(&c_seg->c_lock);
1266 lck_mtx_lock_spin_always(c_list_lock);
1267 lck_mtx_lock_spin_always(&c_seg->c_lock);
1268
1269 c_seg_free_locked(c_seg);
1270}
1271
1272
1273void
1274c_seg_free_locked(c_segment_t c_seg)
1275{
1276 int segno;
1277 int pages_populated = 0;
1278 int32_t *c_buffer = NULL;
1279 uint64_t c_swap_handle = 0;
1280
1281 assert(c_seg->c_busy);
1282 assert(c_seg->c_slots_used == 0);
1283 assert(!c_seg->c_on_minorcompact_q);
1284 assert(!c_seg->c_busy_swapping);
1285
1286 if (c_seg->c_overage_swap == TRUE) {
1287 c_overage_swapped_count--;
1288 c_seg->c_overage_swap = FALSE;
1289 }
1290 if ( !(C_SEG_IS_ONDISK(c_seg)))
1291 c_buffer = c_seg->c_store.c_buffer;
1292 else
1293 c_swap_handle = c_seg->c_store.c_swap_handle;
1294
1295 c_seg_switch_state(c_seg, C_IS_FREE, FALSE);
1296
1297 lck_mtx_unlock_always(c_list_lock);
1298
1299 if (c_buffer) {
1300 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
1301 c_seg->c_store.c_buffer = NULL;
1302 } else
1303 c_seg->c_store.c_swap_handle = (uint64_t)-1;
1304
1305 lck_mtx_unlock_always(&c_seg->c_lock);
1306
1307 if (c_buffer) {
1308 if (pages_populated)
1309 kernel_memory_depopulate(compressor_map, (vm_offset_t) c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR);
1310
1311 } else if (c_swap_handle) {
1312 /*
1313 * Free swap space on disk.
1314 */
1315 vm_swap_free(c_swap_handle);
1316 }
1317 lck_mtx_lock_spin_always(&c_seg->c_lock);
1318 /*
1319 * c_seg must remain busy until
1320 * after the call to vm_swap_free
1321 */
1322 C_SEG_WAKEUP_DONE(c_seg);
1323 lck_mtx_unlock_always(&c_seg->c_lock);
1324
1325 segno = c_seg->c_mysegno;
1326
1327 lck_mtx_lock_spin_always(c_list_lock);
1328 /*
1329 * because the c_buffer is now associated with the segno,
1330 * we can't put the segno back on the free list until
1331 * after we have depopulated the c_buffer range, or
1332 * we run the risk of depopulating a range that is
1333 * now being used in one of the compressor heads
1334 */
1335 c_segments[segno].c_segno = c_free_segno_head;
1336 c_free_segno_head = segno;
1337 c_segment_count--;
1338
1339 lck_mtx_unlock_always(c_list_lock);
1340
1341 lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp);
1342
1343 if (c_seg->c_slot_var_array_len)
1344 kfree(c_seg->c_slot_var_array, sizeof(struct c_slot) * c_seg->c_slot_var_array_len);
1345
1346 zfree(compressor_segment_zone, c_seg);
1347}
1348
1349#if DEVELOPMENT || DEBUG
1350int c_seg_trim_page_count = 0;
1351#endif
1352
1353void
1354c_seg_trim_tail(c_segment_t c_seg)
1355{
1356 c_slot_t cs;
1357 uint32_t c_size;
1358 uint32_t c_offset;
1359 uint32_t c_rounded_size;
1360 uint16_t current_nextslot;
1361 uint32_t current_populated_offset;
1362
1363 if (c_seg->c_bytes_used == 0)
1364 return;
1365 current_nextslot = c_seg->c_nextslot;
1366 current_populated_offset = c_seg->c_populated_offset;
1367
1368 while (c_seg->c_nextslot) {
1369
1370 cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - 1));
1371
1372 c_size = UNPACK_C_SIZE(cs);
1373
1374 if (c_size) {
1375 if (current_nextslot != c_seg->c_nextslot) {
1376 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1377 c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1378
1379 c_seg->c_nextoffset = c_offset;
1380 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) &
1381 ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1);
1382
1383 if (c_seg->c_firstemptyslot > c_seg->c_nextslot)
1384 c_seg->c_firstemptyslot = c_seg->c_nextslot;
1385#if DEVELOPMENT || DEBUG
1386 c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) -
1387 round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) /
1388 PAGE_SIZE);
1389#endif
1390 }
1391 break;
1392 }
1393 c_seg->c_nextslot--;
1394 }
1395 assert(c_seg->c_nextslot);
1396}
1397
1398
1399int
1400c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy)
1401{
1402 c_slot_mapping_t slot_ptr;
1403 uint32_t c_offset = 0;
1404 uint32_t old_populated_offset;
1405 uint32_t c_rounded_size;
1406 uint32_t c_size;
1407 int c_indx = 0;
1408 int i;
1409 c_slot_t c_dst;
1410 c_slot_t c_src;
1411
1412 assert(c_seg->c_busy);
1413
1414#if VALIDATE_C_SEGMENTS
1415 c_seg_validate(c_seg, FALSE);
1416#endif
1417 if (c_seg->c_bytes_used == 0) {
1418 c_seg_free(c_seg);
1419 return (1);
1420 }
1421 lck_mtx_unlock_always(&c_seg->c_lock);
1422
1423 if (c_seg->c_firstemptyslot >= c_seg->c_nextslot || C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE)
1424 goto done;
1425
1426/* TODO: assert first emptyslot's c_size is actually 0 */
1427
1428#if DEVELOPMENT || DEBUG
1429 C_SEG_MAKE_WRITEABLE(c_seg);
1430#endif
1431
1432#if VALIDATE_C_SEGMENTS
1433 c_seg->c_was_minor_compacted++;
1434#endif
1435 c_indx = c_seg->c_firstemptyslot;
1436 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1437
1438 old_populated_offset = c_seg->c_populated_offset;
1439 c_offset = c_dst->c_offset;
1440
1441 for (i = c_indx + 1; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) {
1442
1443 c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i);
1444
1445 c_size = UNPACK_C_SIZE(c_src);
1446
1447 if (c_size == 0)
1448 continue;
1449
1450 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1451/* N.B.: This memcpy may be an overlapping copy */
1452 memcpy(&c_seg->c_store.c_buffer[c_offset], &c_seg->c_store.c_buffer[c_src->c_offset], c_rounded_size);
1453
1454 cslot_copy(c_dst, c_src);
1455 c_dst->c_offset = c_offset;
1456
1457 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
1458 slot_ptr->s_cindx = c_indx;
1459
1460 c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1461 PACK_C_SIZE(c_src, 0);
1462 c_indx++;
1463
1464 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1465 }
1466 c_seg->c_firstemptyslot = c_indx;
1467 c_seg->c_nextslot = c_indx;
1468 c_seg->c_nextoffset = c_offset;
1469 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1);
1470 c_seg->c_bytes_unused = 0;
1471
1472#if VALIDATE_C_SEGMENTS
1473 c_seg_validate(c_seg, TRUE);
1474#endif
1475 if (old_populated_offset > c_seg->c_populated_offset) {
1476 uint32_t gc_size;
1477 int32_t *gc_ptr;
1478
1479 gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset);
1480 gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset];
1481
1482 kernel_memory_depopulate(compressor_map, (vm_offset_t)gc_ptr, gc_size, KMA_COMPRESSOR);
1483 }
1484
1485#if DEVELOPMENT || DEBUG
1486 C_SEG_WRITE_PROTECT(c_seg);
1487#endif
1488
1489done:
1490 if (clear_busy == TRUE) {
1491 lck_mtx_lock_spin_always(&c_seg->c_lock);
1492 C_SEG_WAKEUP_DONE(c_seg);
1493 lck_mtx_unlock_always(&c_seg->c_lock);
1494 }
1495 return (0);
1496}
1497
1498
1499static void
1500c_seg_alloc_nextslot(c_segment_t c_seg)
1501{
1502 struct c_slot *old_slot_array = NULL;
1503 struct c_slot *new_slot_array = NULL;
1504 int newlen;
1505 int oldlen;
1506
1507 if (c_seg->c_nextslot < c_seg_fixed_array_len)
1508 return;
1509
1510 if ((c_seg->c_nextslot - c_seg_fixed_array_len) >= c_seg->c_slot_var_array_len) {
1511
1512 oldlen = c_seg->c_slot_var_array_len;
1513 old_slot_array = c_seg->c_slot_var_array;
1514
1515 if (oldlen == 0)
1516 newlen = C_SEG_SLOT_VAR_ARRAY_MIN_LEN;
1517 else
1518 newlen = oldlen * 2;
1519
1520 new_slot_array = (struct c_slot *)kalloc(sizeof(struct c_slot) * newlen);
1521
1522 lck_mtx_lock_spin_always(&c_seg->c_lock);
1523
1524 if (old_slot_array)
1525 memcpy((char *)new_slot_array, (char *)old_slot_array, sizeof(struct c_slot) * oldlen);
1526
1527 c_seg->c_slot_var_array_len = newlen;
1528 c_seg->c_slot_var_array = new_slot_array;
1529
1530 lck_mtx_unlock_always(&c_seg->c_lock);
1531
1532 if (old_slot_array)
1533 kfree(old_slot_array, sizeof(struct c_slot) * oldlen);
1534 }
1535}
1536
1537
1538
1539struct {
1540 uint64_t asked_permission;
1541 uint64_t compactions;
1542 uint64_t moved_slots;
1543 uint64_t moved_bytes;
1544 uint64_t wasted_space_in_swapouts;
1545 uint64_t count_of_swapouts;
1546 uint64_t count_of_freed_segs;
1547} c_seg_major_compact_stats;
1548
1549
1550#define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((C_SEG_BUFSIZE * 90) / 100)
1551
1552
1553boolean_t
1554c_seg_major_compact_ok(
1555 c_segment_t c_seg_dst,
1556 c_segment_t c_seg_src)
1557{
1558
1559 c_seg_major_compact_stats.asked_permission++;
1560
1561 if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE &&
1562 c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE)
1563 return (FALSE);
1564
1565 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
1566 /*
1567 * destination segment is full... can't compact
1568 */
1569 return (FALSE);
1570 }
1571
1572 return (TRUE);
1573}
1574
1575
1576boolean_t
1577c_seg_major_compact(
1578 c_segment_t c_seg_dst,
1579 c_segment_t c_seg_src)
1580{
1581 c_slot_mapping_t slot_ptr;
1582 uint32_t c_rounded_size;
1583 uint32_t c_size;
1584 uint16_t dst_slot;
1585 int i;
1586 c_slot_t c_dst;
1587 c_slot_t c_src;
1588 boolean_t keep_compacting = TRUE;
1589
1590 /*
1591 * segments are not locked but they are both marked c_busy
1592 * which keeps c_decompress from working on them...
1593 * we can safely allocate new pages, move compressed data
1594 * from c_seg_src to c_seg_dst and update both c_segment's
1595 * state w/o holding the master lock
1596 */
1597#if DEVELOPMENT || DEBUG
1598 C_SEG_MAKE_WRITEABLE(c_seg_dst);
1599#endif
1600
1601#if VALIDATE_C_SEGMENTS
1602 c_seg_dst->c_was_major_compacted++;
1603 c_seg_src->c_was_major_donor++;
1604#endif
1605 c_seg_major_compact_stats.compactions++;
1606
1607 dst_slot = c_seg_dst->c_nextslot;
1608
1609 for (i = 0; i < c_seg_src->c_nextslot; i++) {
1610
1611 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i);
1612
1613 c_size = UNPACK_C_SIZE(c_src);
1614
1615 if (c_size == 0) {
1616 /* BATCH: move what we have so far; */
1617 continue;
1618 }
1619
1620 if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) {
1621 int size_to_populate;
1622
1623 /* doesn't fit */
1624 size_to_populate = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset);
1625
1626 if (size_to_populate == 0) {
1627 /* can't fit */
1628 keep_compacting = FALSE;
1629 break;
1630 }
1631 if (size_to_populate > C_SEG_MAX_POPULATE_SIZE)
1632 size_to_populate = C_SEG_MAX_POPULATE_SIZE;
1633
1634 kernel_memory_populate(compressor_map,
1635 (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset],
1636 size_to_populate,
1637 KMA_COMPRESSOR,
1638 VM_KERN_MEMORY_COMPRESSOR);
1639
1640 c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
1641 assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE);
1642 }
1643 c_seg_alloc_nextslot(c_seg_dst);
1644
1645 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
1646
1647 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
1648
1649 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1650
1651 c_seg_major_compact_stats.moved_slots++;
1652 c_seg_major_compact_stats.moved_bytes += c_size;
1653
1654 cslot_copy(c_dst, c_src);
1655 c_dst->c_offset = c_seg_dst->c_nextoffset;
1656
1657 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot)
1658 c_seg_dst->c_firstemptyslot++;
1659 c_seg_dst->c_slots_used++;
1660 c_seg_dst->c_nextslot++;
1661 c_seg_dst->c_bytes_used += c_rounded_size;
1662 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1663
1664 PACK_C_SIZE(c_src, 0);
1665
1666 c_seg_src->c_bytes_used -= c_rounded_size;
1667 c_seg_src->c_bytes_unused += c_rounded_size;
1668 c_seg_src->c_firstemptyslot = 0;
1669
1670 assert(c_seg_src->c_slots_used);
1671 c_seg_src->c_slots_used--;
1672
1673 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
1674 /* dest segment is now full */
1675 keep_compacting = FALSE;
1676 break;
1677 }
1678 }
1679#if DEVELOPMENT || DEBUG
1680 C_SEG_WRITE_PROTECT(c_seg_dst);
1681#endif
1682 if (dst_slot < c_seg_dst->c_nextslot) {
1683
1684 PAGE_REPLACEMENT_ALLOWED(TRUE);
1685 /*
1686 * we've now locked out c_decompress from
1687 * converting the slot passed into it into
1688 * a c_segment_t which allows us to use
1689 * the backptr to change which c_segment and
1690 * index the slot points to
1691 */
1692 while (dst_slot < c_seg_dst->c_nextslot) {
1693
1694 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
1695
1696 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
1697 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
1698 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
1699 slot_ptr->s_cindx = dst_slot++;
1700 }
1701 PAGE_REPLACEMENT_ALLOWED(FALSE);
1702 }
1703 return (keep_compacting);
1704}
1705
1706
1707uint64_t
1708vm_compressor_compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec)
1709{
1710 uint64_t end_msecs;
1711 uint64_t start_msecs;
1712
1713 end_msecs = (end_sec * 1000) + end_nsec / 1000000;
1714 start_msecs = (start_sec * 1000) + start_nsec / 1000000;
1715
1716 return (end_msecs - start_msecs);
1717}
1718
1719
1720
1721uint32_t compressor_eval_period_in_msecs = 250;
1722uint32_t compressor_sample_min_in_msecs = 500;
1723uint32_t compressor_sample_max_in_msecs = 10000;
1724uint32_t compressor_thrashing_threshold_per_10msecs = 50;
1725uint32_t compressor_thrashing_min_per_10msecs = 20;
1726
1727/* When true, reset sample data next chance we get. */
1728static boolean_t compressor_need_sample_reset = FALSE;
1729
1730
1731void
1732compute_swapout_target_age(void)
1733{
1734 clock_sec_t cur_ts_sec;
1735 clock_nsec_t cur_ts_nsec;
1736 uint32_t min_operations_needed_in_this_sample;
1737 uint64_t elapsed_msecs_in_eval;
1738 uint64_t elapsed_msecs_in_sample;
1739 boolean_t need_eval_reset = FALSE;
1740
1741 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
1742
1743 elapsed_msecs_in_sample = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec);
1744
1745 if (compressor_need_sample_reset ||
1746 elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) {
1747 compressor_need_sample_reset = TRUE;
1748 need_eval_reset = TRUE;
1749 goto done;
1750 }
1751 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec);
1752
1753 if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs)
1754 goto done;
1755 need_eval_reset = TRUE;
1756
1757 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, 0, 0);
1758
1759 min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / 10;
1760
1761 if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample ||
1762 (sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) {
1763
1764 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count,
1765 sample_period_decompression_count - last_eval_decompression_count, 0, 1, 0);
1766
1767 swapout_target_age = 0;
1768
1769 compressor_need_sample_reset = TRUE;
1770 need_eval_reset = TRUE;
1771 goto done;
1772 }
1773 last_eval_compression_count = sample_period_compression_count;
1774 last_eval_decompression_count = sample_period_decompression_count;
1775
1776 if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) {
1777
1778 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, 0, 0, 5, 0);
1779 goto done;
1780 }
1781 if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10)) {
1782
1783 uint64_t running_total;
1784 uint64_t working_target;
1785 uint64_t aging_target;
1786 uint32_t oldest_age_of_csegs_sampled = 0;
1787 uint64_t working_set_approximation = 0;
1788
1789 swapout_target_age = 0;
1790
1791 working_target = (sample_period_decompression_count / 100) * 95; /* 95 percent */
1792 aging_target = (sample_period_decompression_count / 100) * 1; /* 1 percent */
1793 running_total = 0;
1794
1795 for (oldest_age_of_csegs_sampled = 0; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) {
1796
1797 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1798
1799 working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1800
1801 if (running_total >= working_target)
1802 break;
1803 }
1804 if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) {
1805
1806 working_set_approximation = (working_set_approximation * 1000) / elapsed_msecs_in_sample;
1807
1808 if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) {
1809
1810 running_total = overage_decompressions_during_sample_period;
1811
1812 for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - 1; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) {
1813 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1814
1815 if (running_total >= aging_target)
1816 break;
1817 }
1818 swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled;
1819
1820 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 2, 0);
1821 } else {
1822 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 0, 3, 0);
1823 }
1824 } else
1825 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0);
1826
1827 compressor_need_sample_reset = TRUE;
1828 need_eval_reset = TRUE;
1829 } else
1830 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0);
1831done:
1832 if (compressor_need_sample_reset == TRUE) {
1833 bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period));
1834 overage_decompressions_during_sample_period = 0;
1835
1836 start_of_sample_period_sec = cur_ts_sec;
1837 start_of_sample_period_nsec = cur_ts_nsec;
1838 sample_period_decompression_count = 0;
1839 sample_period_compression_count = 0;
1840 last_eval_decompression_count = 0;
1841 last_eval_compression_count = 0;
1842 compressor_need_sample_reset = FALSE;
1843 }
1844 if (need_eval_reset == TRUE) {
1845 start_of_eval_period_sec = cur_ts_sec;
1846 start_of_eval_period_nsec = cur_ts_nsec;
1847 }
1848}
1849
1850
1851int compaction_swapper_init_now = 0;
1852int compaction_swapper_running = 0;
1853int compaction_swapper_awakened = 0;
1854int compaction_swapper_abort = 0;
1855
1856
1857#if CONFIG_JETSAM
1858boolean_t memorystatus_kill_on_VM_compressor_thrashing(boolean_t);
1859boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
1860boolean_t memorystatus_kill_on_FC_thrashing(boolean_t);
1861int compressor_thrashing_induced_jetsam = 0;
1862int filecache_thrashing_induced_jetsam = 0;
1863static boolean_t vm_compressor_thrashing_detected = FALSE;
1864#endif /* CONFIG_JETSAM */
1865
1866static boolean_t
1867compressor_needs_to_swap(void)
1868{
1869 boolean_t should_swap = FALSE;
1870
1871 if (vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit) {
1872 c_segment_t c_seg;
1873 clock_sec_t now;
1874 clock_sec_t age;
1875 clock_nsec_t nsec;
1876
1877 clock_get_system_nanotime(&now, &nsec);
1878 age = 0;
1879
1880 lck_mtx_lock_spin_always(c_list_lock);
1881
1882 if ( !queue_empty(&c_age_list_head)) {
1883 c_seg = (c_segment_t) queue_first(&c_age_list_head);
1884
1885 age = now - c_seg->c_creation_ts;
1886 }
1887 lck_mtx_unlock_always(c_list_lock);
1888
1889 if (age >= vm_ripe_target_age)
1890 return (TRUE);
1891 }
1892 if (VM_CONFIG_SWAP_IS_ACTIVE) {
1893 if (COMPRESSOR_NEEDS_TO_SWAP()) {
1894 return (TRUE);
1895 }
1896 if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) {
1897 return (TRUE);
1898 }
1899 if (vm_page_free_count < (vm_page_free_reserved - (COMPRESSOR_FREE_RESERVED_LIMIT * 2)))
1900 return (TRUE);
1901 }
1902 compute_swapout_target_age();
1903
1904 if (swapout_target_age) {
1905 c_segment_t c_seg;
1906
1907 lck_mtx_lock_spin_always(c_list_lock);
1908
1909 if (!queue_empty(&c_age_list_head)) {
1910
1911 c_seg = (c_segment_t) queue_first(&c_age_list_head);
1912
1913 if (c_seg->c_creation_ts > swapout_target_age)
1914 swapout_target_age = 0;
1915 }
1916 lck_mtx_unlock_always(c_list_lock);
1917 }
1918#if CONFIG_PHANTOM_CACHE
1919 if (vm_phantom_cache_check_pressure())
1920 should_swap = TRUE;
1921#endif
1922 if (swapout_target_age)
1923 should_swap = TRUE;
1924
1925#if CONFIG_JETSAM
1926 if (should_swap || vm_compressor_low_on_space() == TRUE) {
1927
1928 if (vm_compressor_thrashing_detected == FALSE) {
1929 vm_compressor_thrashing_detected = TRUE;
1930
1931 if (swapout_target_age || vm_compressor_low_on_space() == TRUE) {
1932 if (swapout_target_age) {
1933 /* The compressor is thrashing. */
1934 memorystatus_kill_on_VM_compressor_thrashing(TRUE /* async */);
1935 } else {
1936 /* The compressor is running low on space. */
1937 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
1938 }
1939 compressor_thrashing_induced_jetsam++;
1940 } else {
1941 memorystatus_kill_on_FC_thrashing(TRUE /* async */);
1942 filecache_thrashing_induced_jetsam++;
1943 }
1944 }
1945 /*
1946 * let the jetsam take precedence over
1947 * any major compactions we might have
1948 * been able to do... otherwise we run
1949 * the risk of doing major compactions
1950 * on segments we're about to free up
1951 * due to the jetsam activity.
1952 */
1953 should_swap = FALSE;
1954 }
1955
1956#endif /* CONFIG_JETSAM */
1957
1958 if (should_swap == FALSE) {
1959 /*
1960 * vm_compressor_needs_to_major_compact returns true only if we're
1961 * about to run out of available compressor segments... in this
1962 * case, we absolutely need to run a major compaction even if
1963 * we've just kicked off a jetsam or we don't otherwise need to
1964 * swap... terminating objects releases
1965 * pages back to the uncompressed cache, but does not guarantee
1966 * that we will free up even a single compression segment
1967 */
1968 should_swap = vm_compressor_needs_to_major_compact();
1969 }
1970
1971 /*
1972 * returning TRUE when swap_supported == FALSE
1973 * will cause the major compaction engine to
1974 * run, but will not trigger any swapping...
1975 * segments that have been major compacted
1976 * will be moved to the majorcompact queue
1977 */
1978 return (should_swap);
1979}
1980
1981#if CONFIG_JETSAM
1982/*
1983 * This function is called from the jetsam thread after killing something to
1984 * mitigate thrashing.
1985 *
1986 * We need to restart our thrashing detection heuristics since memory pressure
1987 * has potentially changed significantly, and we don't want to detect on old
1988 * data from before the jetsam.
1989 */
1990void
1991vm_thrashing_jetsam_done(void)
1992{
1993 vm_compressor_thrashing_detected = FALSE;
1994
1995 /* Were we compressor-thrashing or filecache-thrashing? */
1996 if (swapout_target_age) {
1997 swapout_target_age = 0;
1998 compressor_need_sample_reset = TRUE;
1999 }
2000#if CONFIG_PHANTOM_CACHE
2001 else {
2002 vm_phantom_cache_restart_sample();
2003 }
2004#endif
2005}
2006#endif /* CONFIG_JETSAM */
2007
2008uint32_t vm_wake_compactor_swapper_calls = 0;
2009uint32_t vm_run_compactor_already_running = 0;
2010uint32_t vm_run_compactor_empty_minor_q = 0;
2011uint32_t vm_run_compactor_did_compact = 0;
2012uint32_t vm_run_compactor_waited = 0;
2013
2014void
2015vm_run_compactor(void)
2016{
2017 if (c_segment_count == 0)
2018 return;
2019
2020 lck_mtx_lock_spin_always(c_list_lock);
2021
2022 if (c_minor_count == 0) {
2023 vm_run_compactor_empty_minor_q++;
2024
2025 lck_mtx_unlock_always(c_list_lock);
2026 return;
2027 }
2028 if (compaction_swapper_running) {
2029
2030 if (vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2031 vm_run_compactor_already_running++;
2032
2033 lck_mtx_unlock_always(c_list_lock);
2034 return;
2035 }
2036 vm_run_compactor_waited++;
2037
2038 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2039
2040 lck_mtx_unlock_always(c_list_lock);
2041
2042 thread_block(THREAD_CONTINUE_NULL);
2043
2044 return;
2045 }
2046 vm_run_compactor_did_compact++;
2047
2048 fastwake_warmup = FALSE;
2049 compaction_swapper_running = 1;
2050
2051 vm_compressor_do_delayed_compactions(FALSE);
2052
2053 compaction_swapper_running = 0;
2054
2055 lck_mtx_unlock_always(c_list_lock);
2056
2057 thread_wakeup((event_t)&compaction_swapper_running);
2058}
2059
2060
2061void
2062vm_wake_compactor_swapper(void)
2063{
2064 if (compaction_swapper_running || compaction_swapper_awakened || c_segment_count == 0)
2065 return;
2066
2067 if (c_minor_count || vm_compressor_needs_to_major_compact()) {
2068
2069 lck_mtx_lock_spin_always(c_list_lock);
2070
2071 fastwake_warmup = FALSE;
2072
2073 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2074
2075 vm_wake_compactor_swapper_calls++;
2076
2077 compaction_swapper_awakened = 1;
2078 thread_wakeup((event_t)&c_compressor_swap_trigger);
2079 }
2080 lck_mtx_unlock_always(c_list_lock);
2081 }
2082}
2083
2084
2085void
2086vm_consider_swapping()
2087{
2088 c_segment_t c_seg, c_seg_next;
2089 clock_sec_t now;
2090 clock_nsec_t nsec;
2091
2092 assert(VM_CONFIG_SWAP_IS_PRESENT);
2093
2094 lck_mtx_lock_spin_always(c_list_lock);
2095
2096 compaction_swapper_abort = 1;
2097
2098 while (compaction_swapper_running) {
2099 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2100
2101 lck_mtx_unlock_always(c_list_lock);
2102
2103 thread_block(THREAD_CONTINUE_NULL);
2104
2105 lck_mtx_lock_spin_always(c_list_lock);
2106 }
2107 compaction_swapper_abort = 0;
2108 compaction_swapper_running = 1;
2109
2110 vm_swapout_ripe_segments = TRUE;
2111
2112 if (!queue_empty(&c_major_list_head)) {
2113
2114 clock_get_system_nanotime(&now, &nsec);
2115
2116 c_seg = (c_segment_t)queue_first(&c_major_list_head);
2117
2118 while (!queue_end(&c_major_list_head, (queue_entry_t)c_seg)) {
2119
2120 if (c_overage_swapped_count >= c_overage_swapped_limit)
2121 break;
2122
2123 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
2124
2125 if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
2126
2127 lck_mtx_lock_spin_always(&c_seg->c_lock);
2128
2129 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
2130
2131 lck_mtx_unlock_always(&c_seg->c_lock);
2132 }
2133 c_seg = c_seg_next;
2134 }
2135 }
2136 vm_compressor_compact_and_swap(FALSE);
2137
2138 compaction_swapper_running = 0;
2139
2140 vm_swapout_ripe_segments = FALSE;
2141
2142 lck_mtx_unlock_always(c_list_lock);
2143
2144 thread_wakeup((event_t)&compaction_swapper_running);
2145}
2146
2147
2148void
2149vm_consider_waking_compactor_swapper(void)
2150{
2151 boolean_t need_wakeup = FALSE;
2152
2153 if (c_segment_count == 0)
2154 return;
2155
2156 if (compaction_swapper_running || compaction_swapper_awakened)
2157 return;
2158
2159 if (!compaction_swapper_inited && !compaction_swapper_init_now) {
2160 compaction_swapper_init_now = 1;
2161 need_wakeup = TRUE;
2162 }
2163
2164 if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) {
2165
2166 need_wakeup = TRUE;
2167
2168 } else if (compressor_needs_to_swap()) {
2169
2170 need_wakeup = TRUE;
2171
2172 } else if (c_minor_count) {
2173 uint64_t total_bytes;
2174
2175 total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64;
2176
2177 if ((total_bytes - compressor_bytes_used) > total_bytes / 10)
2178 need_wakeup = TRUE;
2179 }
2180 if (need_wakeup == TRUE) {
2181
2182 lck_mtx_lock_spin_always(c_list_lock);
2183
2184 fastwake_warmup = FALSE;
2185
2186 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2187 memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE);
2188
2189 compaction_swapper_awakened = 1;
2190 thread_wakeup((event_t)&c_compressor_swap_trigger);
2191 }
2192 lck_mtx_unlock_always(c_list_lock);
2193 }
2194}
2195
2196
2197#define C_SWAPOUT_LIMIT 4
2198#define DELAYED_COMPACTIONS_PER_PASS 30
2199
2200void
2201vm_compressor_do_delayed_compactions(boolean_t flush_all)
2202{
2203 c_segment_t c_seg;
2204 int number_compacted = 0;
2205 boolean_t needs_to_swap = FALSE;
2206
2207
2208#if !CONFIG_EMBEDDED
2209 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
2210#endif /* !CONFIG_EMBEDDED */
2211
2212 while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) {
2213
2214 c_seg = (c_segment_t)queue_first(&c_minor_list_head);
2215
2216 lck_mtx_lock_spin_always(&c_seg->c_lock);
2217
2218 if (c_seg->c_busy) {
2219
2220 lck_mtx_unlock_always(c_list_lock);
2221 c_seg_wait_on_busy(c_seg);
2222 lck_mtx_lock_spin_always(c_list_lock);
2223
2224 continue;
2225 }
2226 C_SEG_BUSY(c_seg);
2227
2228 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE);
2229
2230 if (VM_CONFIG_SWAP_IS_ACTIVE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) {
2231
2232 if ((flush_all == TRUE || compressor_needs_to_swap() == TRUE) && c_swapout_count < C_SWAPOUT_LIMIT)
2233 needs_to_swap = TRUE;
2234
2235 number_compacted = 0;
2236 }
2237 lck_mtx_lock_spin_always(c_list_lock);
2238 }
2239}
2240
2241
2242#define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10
2243
2244static void
2245vm_compressor_age_swapped_in_segments(boolean_t flush_all)
2246{
2247 c_segment_t c_seg;
2248 clock_sec_t now;
2249 clock_nsec_t nsec;
2250
2251 clock_get_system_nanotime(&now, &nsec);
2252
2253 while (!queue_empty(&c_swappedin_list_head)) {
2254
2255 c_seg = (c_segment_t)queue_first(&c_swappedin_list_head);
2256
2257 if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT)
2258 break;
2259
2260 lck_mtx_lock_spin_always(&c_seg->c_lock);
2261
2262 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
2263
2264 lck_mtx_unlock_always(&c_seg->c_lock);
2265 }
2266}
2267
2268
2269extern int vm_num_swap_files;
2270extern int vm_num_pinned_swap_files;
2271extern int vm_swappin_enabled;
2272
2273extern unsigned int vm_swapfile_total_segs_used;
2274extern unsigned int vm_swapfile_total_segs_alloced;
2275
2276
2277void
2278vm_compressor_flush(void)
2279{
2280 uint64_t vm_swap_put_failures_at_start;
2281 wait_result_t wait_result = 0;
2282 AbsoluteTime startTime, endTime;
2283 clock_sec_t now_sec;
2284 clock_nsec_t now_nsec;
2285 uint64_t nsec;
2286
2287 HIBLOG("vm_compressor_flush - starting\n");
2288
2289 clock_get_uptime(&startTime);
2290
2291 lck_mtx_lock_spin_always(c_list_lock);
2292
2293 fastwake_warmup = FALSE;
2294 compaction_swapper_abort = 1;
2295
2296 while (compaction_swapper_running) {
2297 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2298
2299 lck_mtx_unlock_always(c_list_lock);
2300
2301 thread_block(THREAD_CONTINUE_NULL);
2302
2303 lck_mtx_lock_spin_always(c_list_lock);
2304 }
2305 compaction_swapper_abort = 0;
2306 compaction_swapper_running = 1;
2307
2308 hibernate_flushing = TRUE;
2309 hibernate_no_swapspace = FALSE;
2310 c_generation_id_flush_barrier = c_generation_id + 1000;
2311
2312 clock_get_system_nanotime(&now_sec, &now_nsec);
2313 hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE;
2314
2315 vm_swap_put_failures_at_start = vm_swap_put_failures;
2316
2317 vm_compressor_compact_and_swap(TRUE);
2318
2319 while (!queue_empty(&c_swapout_list_head)) {
2320
2321 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
2322
2323 lck_mtx_unlock_always(c_list_lock);
2324
2325 wait_result = thread_block(THREAD_CONTINUE_NULL);
2326
2327 lck_mtx_lock_spin_always(c_list_lock);
2328
2329 if (wait_result == THREAD_TIMED_OUT)
2330 break;
2331 }
2332 hibernate_flushing = FALSE;
2333 compaction_swapper_running = 0;
2334
2335 if (vm_swap_put_failures > vm_swap_put_failures_at_start)
2336 HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n",
2337 vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT);
2338
2339 lck_mtx_unlock_always(c_list_lock);
2340
2341 thread_wakeup((event_t)&compaction_swapper_running);
2342
2343 clock_get_uptime(&endTime);
2344 SUB_ABSOLUTETIME(&endTime, &startTime);
2345 absolutetime_to_nanoseconds(endTime, &nsec);
2346
2347 HIBLOG("vm_compressor_flush completed - took %qd msecs - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d, vm_swappin_enabled = %d\n",
2348 nsec / 1000000ULL, vm_num_swap_files, vm_num_pinned_swap_files, vm_swappin_enabled);
2349}
2350
2351
2352int compaction_swap_trigger_thread_awakened = 0;
2353
2354static void
2355vm_compressor_swap_trigger_thread(void)
2356{
2357 current_thread()->options |= TH_OPT_VMPRIV;
2358
2359 /*
2360 * compaction_swapper_init_now is set when the first call to
2361 * vm_consider_waking_compactor_swapper is made from
2362 * vm_pageout_scan... since this function is called upon
2363 * thread creation, we want to make sure to delay adjusting
2364 * the tuneables until we are awakened via vm_pageout_scan
2365 * so that we are at a point where the vm_swapfile_open will
2366 * be operating on the correct directory (in case the default
2367 * of /var/vm/ is overridden by the dymanic_pager
2368 */
2369 if (compaction_swapper_init_now) {
2370 vm_compaction_swapper_do_init();
2371
2372 if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
2373 thread_vm_bind_group_add();
2374 thread_set_thread_name(current_thread(), "VM_cswap_trigger");
2375 compaction_swapper_init_now = 0;
2376 }
2377 lck_mtx_lock_spin_always(c_list_lock);
2378
2379 compaction_swap_trigger_thread_awakened++;
2380 compaction_swapper_awakened = 0;
2381
2382 if (compaction_swapper_running == 0) {
2383
2384 compaction_swapper_running = 1;
2385
2386 vm_compressor_compact_and_swap(FALSE);
2387
2388 compaction_swapper_running = 0;
2389 }
2390 assert_wait((event_t)&c_compressor_swap_trigger, THREAD_UNINT);
2391
2392 if (compaction_swapper_running == 0)
2393 thread_wakeup((event_t)&compaction_swapper_running);
2394
2395 lck_mtx_unlock_always(c_list_lock);
2396
2397 thread_block((thread_continue_t)vm_compressor_swap_trigger_thread);
2398
2399 /* NOTREACHED */
2400}
2401
2402
2403void
2404vm_compressor_record_warmup_start(void)
2405{
2406 c_segment_t c_seg;
2407
2408 lck_mtx_lock_spin_always(c_list_lock);
2409
2410 if (first_c_segment_to_warm_generation_id == 0) {
2411 if (!queue_empty(&c_age_list_head)) {
2412
2413 c_seg = (c_segment_t)queue_last(&c_age_list_head);
2414
2415 first_c_segment_to_warm_generation_id = c_seg->c_generation_id;
2416 } else
2417 first_c_segment_to_warm_generation_id = 0;
2418
2419 fastwake_recording_in_progress = TRUE;
2420 }
2421 lck_mtx_unlock_always(c_list_lock);
2422}
2423
2424
2425void
2426vm_compressor_record_warmup_end(void)
2427{
2428 c_segment_t c_seg;
2429
2430 lck_mtx_lock_spin_always(c_list_lock);
2431
2432 if (fastwake_recording_in_progress == TRUE) {
2433
2434 if (!queue_empty(&c_age_list_head)) {
2435
2436 c_seg = (c_segment_t)queue_last(&c_age_list_head);
2437
2438 last_c_segment_to_warm_generation_id = c_seg->c_generation_id;
2439 } else
2440 last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id;
2441
2442 fastwake_recording_in_progress = FALSE;
2443
2444 HIBLOG("vm_compressor_record_warmup (%qd - %qd)\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
2445 }
2446 lck_mtx_unlock_always(c_list_lock);
2447}
2448
2449
2450#define DELAY_TRIM_ON_WAKE_SECS 25
2451
2452void
2453vm_compressor_delay_trim(void)
2454{
2455 clock_sec_t sec;
2456 clock_nsec_t nsec;
2457
2458 clock_get_system_nanotime(&sec, &nsec);
2459 dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS;
2460}
2461
2462
2463void
2464vm_compressor_do_warmup(void)
2465{
2466 lck_mtx_lock_spin_always(c_list_lock);
2467
2468 if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) {
2469 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0;
2470
2471 lck_mtx_unlock_always(c_list_lock);
2472 return;
2473 }
2474
2475 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2476
2477 fastwake_warmup = TRUE;
2478
2479 compaction_swapper_awakened = 1;
2480 thread_wakeup((event_t)&c_compressor_swap_trigger);
2481 }
2482 lck_mtx_unlock_always(c_list_lock);
2483}
2484
2485void
2486do_fastwake_warmup_all(void)
2487{
2488
2489 lck_mtx_lock_spin_always(c_list_lock);
2490
2491 if (queue_empty(&c_swappedout_list_head) && queue_empty(&c_swappedout_sparse_list_head)) {
2492
2493 lck_mtx_unlock_always(c_list_lock);
2494 return;
2495 }
2496
2497 fastwake_warmup = TRUE;
2498
2499 do_fastwake_warmup(&c_swappedout_list_head, TRUE);
2500
2501 do_fastwake_warmup(&c_swappedout_sparse_list_head, TRUE);
2502
2503 fastwake_warmup = FALSE;
2504
2505 lck_mtx_unlock_always(c_list_lock);
2506
2507}
2508
2509void
2510do_fastwake_warmup(queue_head_t *c_queue, boolean_t consider_all_cseg)
2511{
2512 c_segment_t c_seg = NULL;
2513 AbsoluteTime startTime, endTime;
2514 uint64_t nsec;
2515
2516
2517 HIBLOG("vm_compressor_fastwake_warmup (%qd - %qd) - starting\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
2518
2519 clock_get_uptime(&startTime);
2520
2521 lck_mtx_unlock_always(c_list_lock);
2522
2523 proc_set_thread_policy(current_thread(),
2524 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
2525
2526 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2527
2528 lck_mtx_lock_spin_always(c_list_lock);
2529
2530 while (!queue_empty(c_queue) && fastwake_warmup == TRUE) {
2531
2532 c_seg = (c_segment_t) queue_first(c_queue);
2533
2534 if (consider_all_cseg == FALSE) {
2535 if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id ||
2536 c_seg->c_generation_id > last_c_segment_to_warm_generation_id)
2537 break;
2538
2539 if (vm_page_free_count < (AVAILABLE_MEMORY / 4))
2540 break;
2541 }
2542
2543 lck_mtx_lock_spin_always(&c_seg->c_lock);
2544 lck_mtx_unlock_always(c_list_lock);
2545
2546 if (c_seg->c_busy) {
2547 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2548 c_seg_wait_on_busy(c_seg);
2549 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2550 } else {
2551 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0)
2552 lck_mtx_unlock_always(&c_seg->c_lock);
2553 c_segment_warmup_count++;
2554
2555 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2556 vm_pageout_io_throttle();
2557 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2558 }
2559 lck_mtx_lock_spin_always(c_list_lock);
2560 }
2561 lck_mtx_unlock_always(c_list_lock);
2562
2563 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2564
2565 proc_set_thread_policy(current_thread(),
2566 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0);
2567
2568 clock_get_uptime(&endTime);
2569 SUB_ABSOLUTETIME(&endTime, &startTime);
2570 absolutetime_to_nanoseconds(endTime, &nsec);
2571
2572 HIBLOG("vm_compressor_fastwake_warmup completed - took %qd msecs\n", nsec / 1000000ULL);
2573
2574 lck_mtx_lock_spin_always(c_list_lock);
2575
2576 if (consider_all_cseg == FALSE) {
2577 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0;
2578 }
2579}
2580
2581
2582void
2583vm_compressor_compact_and_swap(boolean_t flush_all)
2584{
2585 c_segment_t c_seg, c_seg_next;
2586 boolean_t keep_compacting;
2587 clock_sec_t now;
2588 clock_nsec_t nsec;
2589
2590
2591 if (fastwake_warmup == TRUE) {
2592 uint64_t starting_warmup_count;
2593
2594 starting_warmup_count = c_segment_warmup_count;
2595
2596 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_START, c_segment_warmup_count,
2597 first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, 0, 0);
2598 do_fastwake_warmup(&c_swappedout_list_head, FALSE);
2599 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, 0, 0, 0);
2600
2601 fastwake_warmup = FALSE;
2602 }
2603
2604 /*
2605 * it's possible for the c_age_list_head to be empty if we
2606 * hit our limits for growing the compressor pool and we subsequently
2607 * hibernated... on the next hibernation we could see the queue as
2608 * empty and not proceeed even though we have a bunch of segments on
2609 * the swapped in queue that need to be dealt with.
2610 */
2611 vm_compressor_do_delayed_compactions(flush_all);
2612
2613 vm_compressor_age_swapped_in_segments(flush_all);
2614
2615 /*
2616 * we only need to grab the timestamp once per
2617 * invocation of this function since the
2618 * timescale we're interested in is measured
2619 * in days
2620 */
2621 clock_get_system_nanotime(&now, &nsec);
2622
2623 while (!queue_empty(&c_age_list_head) && compaction_swapper_abort == 0) {
2624
2625 if (hibernate_flushing == TRUE) {
2626 clock_sec_t sec;
2627
2628 if (hibernate_should_abort()) {
2629 HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n");
2630 break;
2631 }
2632 if (hibernate_no_swapspace == TRUE) {
2633 HIBLOG("vm_compressor_flush - out of swap space\n");
2634 break;
2635 }
2636 if (vm_swap_files_pinned() == FALSE) {
2637 HIBLOG("vm_compressor_flush - unpinned swap files\n");
2638 break;
2639 }
2640 if (hibernate_in_progress_with_pinned_swap == TRUE &&
2641 (vm_swapfile_total_segs_alloced == vm_swapfile_total_segs_used)) {
2642 HIBLOG("vm_compressor_flush - out of pinned swap space\n");
2643 break;
2644 }
2645 clock_get_system_nanotime(&sec, &nsec);
2646
2647 if (sec > hibernate_flushing_deadline) {
2648 HIBLOG("vm_compressor_flush - failed to finish before deadline\n");
2649 break;
2650 }
2651 }
2652 if (c_swapout_count >= C_SWAPOUT_LIMIT) {
2653
2654 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 100, 1000*NSEC_PER_USEC);
2655
2656 lck_mtx_unlock_always(c_list_lock);
2657
2658 thread_block(THREAD_CONTINUE_NULL);
2659
2660 lck_mtx_lock_spin_always(c_list_lock);
2661 }
2662 /*
2663 * Minor compactions
2664 */
2665 vm_compressor_do_delayed_compactions(flush_all);
2666
2667 vm_compressor_age_swapped_in_segments(flush_all);
2668
2669 if (c_swapout_count >= C_SWAPOUT_LIMIT) {
2670 /*
2671 * we timed out on the above thread_block
2672 * let's loop around and try again
2673 * the timeout allows us to continue
2674 * to do minor compactions to make
2675 * more memory available
2676 */
2677 continue;
2678 }
2679
2680 /*
2681 * Swap out segments?
2682 */
2683 if (flush_all == FALSE) {
2684 boolean_t needs_to_swap;
2685
2686 lck_mtx_unlock_always(c_list_lock);
2687
2688 needs_to_swap = compressor_needs_to_swap();
2689
2690#if !CONFIG_EMBEDDED
2691 if (needs_to_swap == TRUE && vm_swap_low_on_space())
2692 vm_compressor_take_paging_space_action();
2693#endif /* !CONFIG_EMBEDDED */
2694
2695 lck_mtx_lock_spin_always(c_list_lock);
2696
2697 if (needs_to_swap == FALSE)
2698 break;
2699 }
2700 if (queue_empty(&c_age_list_head))
2701 break;
2702 c_seg = (c_segment_t) queue_first(&c_age_list_head);
2703
2704 assert(c_seg->c_state == C_ON_AGE_Q);
2705
2706 if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier)
2707 break;
2708
2709 lck_mtx_lock_spin_always(&c_seg->c_lock);
2710
2711 if (c_seg->c_busy) {
2712
2713 lck_mtx_unlock_always(c_list_lock);
2714 c_seg_wait_on_busy(c_seg);
2715 lck_mtx_lock_spin_always(c_list_lock);
2716
2717 continue;
2718 }
2719 C_SEG_BUSY(c_seg);
2720
2721 if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) {
2722 /*
2723 * found an empty c_segment and freed it
2724 * so go grab the next guy in the queue
2725 */
2726 c_seg_major_compact_stats.count_of_freed_segs++;
2727 continue;
2728 }
2729 /*
2730 * Major compaction
2731 */
2732 keep_compacting = TRUE;
2733
2734 while (keep_compacting == TRUE) {
2735
2736 assert(c_seg->c_busy);
2737
2738 /* look for another segment to consolidate */
2739
2740 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
2741
2742 if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next))
2743 break;
2744
2745 assert(c_seg_next->c_state == C_ON_AGE_Q);
2746
2747 if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE)
2748 break;
2749
2750 lck_mtx_lock_spin_always(&c_seg_next->c_lock);
2751
2752 if (c_seg_next->c_busy) {
2753
2754 lck_mtx_unlock_always(c_list_lock);
2755 c_seg_wait_on_busy(c_seg_next);
2756 lck_mtx_lock_spin_always(c_list_lock);
2757
2758 continue;
2759 }
2760 /* grab that segment */
2761 C_SEG_BUSY(c_seg_next);
2762
2763 if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) {
2764 /*
2765 * found an empty c_segment and freed it
2766 * so we can't continue to use c_seg_next
2767 */
2768 c_seg_major_compact_stats.count_of_freed_segs++;
2769 continue;
2770 }
2771
2772 /* unlock the list ... */
2773 lck_mtx_unlock_always(c_list_lock);
2774
2775 /* do the major compaction */
2776
2777 keep_compacting = c_seg_major_compact(c_seg, c_seg_next);
2778
2779 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2780
2781 lck_mtx_lock_spin_always(&c_seg_next->c_lock);
2782 /*
2783 * run a minor compaction on the donor segment
2784 * since we pulled at least some of it's
2785 * data into our target... if we've emptied
2786 * it, now is a good time to free it which
2787 * c_seg_minor_compaction_and_unlock also takes care of
2788 *
2789 * by passing TRUE, we ask for c_busy to be cleared
2790 * and c_wanted to be taken care of
2791 */
2792 if (c_seg_minor_compaction_and_unlock(c_seg_next, TRUE))
2793 c_seg_major_compact_stats.count_of_freed_segs++;
2794
2795 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2796
2797 /* relock the list */
2798 lck_mtx_lock_spin_always(c_list_lock);
2799
2800 } /* major compaction */
2801
2802 lck_mtx_lock_spin_always(&c_seg->c_lock);
2803
2804 assert(c_seg->c_busy);
2805 assert(!c_seg->c_on_minorcompact_q);
2806
2807 if (VM_CONFIG_SWAP_IS_ACTIVE) {
2808 /*
2809 * This mode of putting a generic c_seg on the swapout list is
2810 * only supported when we have general swapping enabled
2811 */
2812 c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
2813 } else {
2814 if ((vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit)) {
2815
2816 assert(VM_CONFIG_SWAP_IS_PRESENT);
2817 /*
2818 * we are running compressor sweeps with swap-behind
2819 * make sure the c_seg has aged enough before swapping it
2820 * out...
2821 */
2822 if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
2823 c_seg->c_overage_swap = TRUE;
2824 c_overage_swapped_count++;
2825 c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
2826 }
2827 }
2828 }
2829 if (c_seg->c_state == C_ON_AGE_Q) {
2830 /*
2831 * this c_seg didn't get moved to the swapout queue
2832 * so we need to move it out of the way...
2833 * we just did a major compaction on it so put it
2834 * on that queue
2835 */
2836 c_seg_switch_state(c_seg, C_ON_MAJORCOMPACT_Q, FALSE);
2837 } else {
2838 c_seg_major_compact_stats.wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used;
2839 c_seg_major_compact_stats.count_of_swapouts++;
2840 }
2841 C_SEG_WAKEUP_DONE(c_seg);
2842
2843 lck_mtx_unlock_always(&c_seg->c_lock);
2844
2845 if (c_swapout_count) {
2846 lck_mtx_unlock_always(c_list_lock);
2847
2848 thread_wakeup((event_t)&c_swapout_list_head);
2849
2850 lck_mtx_lock_spin_always(c_list_lock);
2851 }
2852 }
2853}
2854
2855
2856static c_segment_t
2857c_seg_allocate(c_segment_t *current_chead)
2858{
2859 c_segment_t c_seg;
2860 int min_needed;
2861 int size_to_populate;
2862
2863#if !CONFIG_EMBEDDED
2864 if (vm_compressor_low_on_space())
2865 vm_compressor_take_paging_space_action();
2866#endif /* !CONFIG_EMBEDDED */
2867
2868 if ( (c_seg = *current_chead) == NULL ) {
2869 uint32_t c_segno;
2870
2871 lck_mtx_lock_spin_always(c_list_lock);
2872
2873 while (c_segments_busy == TRUE) {
2874 assert_wait((event_t) (&c_segments_busy), THREAD_UNINT);
2875
2876 lck_mtx_unlock_always(c_list_lock);
2877
2878 thread_block(THREAD_CONTINUE_NULL);
2879
2880 lck_mtx_lock_spin_always(c_list_lock);
2881 }
2882 if (c_free_segno_head == (uint32_t)-1) {
2883 uint32_t c_segments_available_new;
2884
2885 if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) {
2886 lck_mtx_unlock_always(c_list_lock);
2887
2888 return (NULL);
2889 }
2890 c_segments_busy = TRUE;
2891 lck_mtx_unlock_always(c_list_lock);
2892
2893 kernel_memory_populate(compressor_map, (vm_offset_t)c_segments_next_page,
2894 PAGE_SIZE, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
2895 c_segments_next_page += PAGE_SIZE;
2896
2897 c_segments_available_new = c_segments_available + C_SEGMENTS_PER_PAGE;
2898
2899 if (c_segments_available_new > c_segments_limit)
2900 c_segments_available_new = c_segments_limit;
2901
2902 for (c_segno = c_segments_available + 1; c_segno < c_segments_available_new; c_segno++)
2903 c_segments[c_segno - 1].c_segno = c_segno;
2904
2905 lck_mtx_lock_spin_always(c_list_lock);
2906
2907 c_segments[c_segno - 1].c_segno = c_free_segno_head;
2908 c_free_segno_head = c_segments_available;
2909 c_segments_available = c_segments_available_new;
2910
2911 c_segments_busy = FALSE;
2912 thread_wakeup((event_t) (&c_segments_busy));
2913 }
2914 c_segno = c_free_segno_head;
2915 assert(c_segno >= 0 && c_segno < c_segments_limit);
2916
2917 c_free_segno_head = (uint32_t)c_segments[c_segno].c_segno;
2918
2919 /*
2920 * do the rest of the bookkeeping now while we're still behind
2921 * the list lock and grab our generation id now into a local
2922 * so that we can install it once we have the c_seg allocated
2923 */
2924 c_segment_count++;
2925 if (c_segment_count > c_segment_count_max)
2926 c_segment_count_max = c_segment_count;
2927
2928 lck_mtx_unlock_always(c_list_lock);
2929
2930 c_seg = (c_segment_t)zalloc(compressor_segment_zone);
2931 bzero((char *)c_seg, sizeof(struct c_segment));
2932
2933 c_seg->c_store.c_buffer = (int32_t *)C_SEG_BUFFER_ADDRESS(c_segno);
2934
2935 lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr);
2936
2937 c_seg->c_state = C_IS_EMPTY;
2938 c_seg->c_firstemptyslot = C_SLOT_MAX_INDEX;
2939 c_seg->c_mysegno = c_segno;
2940
2941 lck_mtx_lock_spin_always(c_list_lock);
2942 c_empty_count++;
2943 c_seg_switch_state(c_seg, C_IS_FILLING, FALSE);
2944 c_segments[c_segno].c_seg = c_seg;
2945 assert(c_segments[c_segno].c_segno > c_segments_available);
2946 lck_mtx_unlock_always(c_list_lock);
2947
2948 *current_chead = c_seg;
2949
2950#if DEVELOPMENT || DEBUG
2951 C_SEG_MAKE_WRITEABLE(c_seg);
2952#endif
2953
2954 }
2955 c_seg_alloc_nextslot(c_seg);
2956
2957 size_to_populate = C_SEG_ALLOCSIZE - C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset);
2958
2959 if (size_to_populate) {
2960
2961 min_needed = PAGE_SIZE + (C_SEG_ALLOCSIZE - C_SEG_BUFSIZE);
2962
2963 if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset) < (unsigned) min_needed) {
2964
2965 if (size_to_populate > C_SEG_MAX_POPULATE_SIZE)
2966 size_to_populate = C_SEG_MAX_POPULATE_SIZE;
2967
2968 OSAddAtomic64(size_to_populate / PAGE_SIZE, &vm_pageout_vminfo.vm_compressor_pages_grabbed);
2969
2970 kernel_memory_populate(compressor_map,
2971 (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset],
2972 size_to_populate,
2973 KMA_COMPRESSOR,
2974 VM_KERN_MEMORY_COMPRESSOR);
2975 } else
2976 size_to_populate = 0;
2977 }
2978 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2979
2980 lck_mtx_lock_spin_always(&c_seg->c_lock);
2981
2982 if (size_to_populate)
2983 c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
2984
2985 return (c_seg);
2986}
2987
2988
2989static void
2990c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead)
2991{
2992 uint32_t unused_bytes;
2993 uint32_t offset_to_depopulate;
2994 int new_state = C_ON_AGE_Q;
2995 clock_sec_t sec;
2996 clock_nsec_t nsec;
2997 boolean_t head_insert = FALSE;
2998
2999 unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset));
3000
3001#ifndef _OPEN_SOURCE
3002 /* TODO: The HW codec can generate, lazily, a '2nd page not mapped'
3003 * exception. So on such a platform, or platforms where we're confident
3004 * the codec does not require a buffer page to absorb trailing writes,
3005 * we can create an unmapped hole at the tail of the segment, rather
3006 * than a populated mapping. This will also guarantee that the codec
3007 * does not overwrite valid data past the edge of the segment and
3008 * thus eliminate the depopulation overhead.
3009 */
3010#endif
3011 if (unused_bytes) {
3012 offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset)));
3013
3014 /*
3015 * release the extra physical page(s) at the end of the segment
3016 */
3017 lck_mtx_unlock_always(&c_seg->c_lock);
3018
3019 kernel_memory_depopulate(
3020 compressor_map,
3021 (vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate],
3022 unused_bytes,
3023 KMA_COMPRESSOR);
3024
3025 lck_mtx_lock_spin_always(&c_seg->c_lock);
3026
3027 c_seg->c_populated_offset = offset_to_depopulate;
3028 }
3029 assert(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) <= C_SEG_BUFSIZE);
3030
3031#if DEVELOPMENT || DEBUG
3032 {
3033 boolean_t c_seg_was_busy = FALSE;
3034
3035 if ( !c_seg->c_busy)
3036 C_SEG_BUSY(c_seg);
3037 else
3038 c_seg_was_busy = TRUE;
3039
3040 lck_mtx_unlock_always(&c_seg->c_lock);
3041
3042 C_SEG_WRITE_PROTECT(c_seg);
3043
3044 lck_mtx_lock_spin_always(&c_seg->c_lock);
3045
3046 if (c_seg_was_busy == FALSE)
3047 C_SEG_WAKEUP_DONE(c_seg);
3048 }
3049#endif
3050
3051#if CONFIG_FREEZE
3052 if (current_chead == (c_segment_t*)&freezer_chead &&
3053 VM_CONFIG_SWAP_IS_PRESENT &&
3054 VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3055 new_state = C_ON_SWAPOUT_Q;
3056 }
3057#endif /* CONFIG_FREEZE */
3058
3059 if (vm_darkwake_mode == TRUE) {
3060 new_state = C_ON_SWAPOUT_Q;
3061 head_insert = TRUE;
3062 }
3063
3064 clock_get_system_nanotime(&sec, &nsec);
3065 c_seg->c_creation_ts = (uint32_t)sec;
3066
3067 lck_mtx_lock_spin_always(c_list_lock);
3068
3069 c_seg->c_generation_id = c_generation_id++;
3070 c_seg_switch_state(c_seg, new_state, head_insert);
3071
3072#if CONFIG_FREEZE
3073 if (c_seg->c_state == C_ON_SWAPOUT_Q) {
3074 /*
3075 * darkwake and freezer can't co-exist together
3076 * We'll need to fix this accounting as a start.
3077 */
3078 assert(vm_darkwake_mode == FALSE);
3079 c_freezer_swapout_page_count += (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)) / PAGE_SIZE_64;
3080 }
3081#endif /* CONFIG_FREEZE */
3082
3083 if (c_seg->c_state == C_ON_AGE_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
3084 c_seg_need_delayed_compaction(c_seg, TRUE);
3085
3086 lck_mtx_unlock_always(c_list_lock);
3087
3088 if (c_seg->c_state == C_ON_SWAPOUT_Q)
3089 thread_wakeup((event_t)&c_swapout_list_head);
3090
3091 *current_chead = NULL;
3092}
3093
3094
3095/*
3096 * returns with c_seg locked
3097 */
3098void
3099c_seg_swapin_requeue(c_segment_t c_seg, boolean_t has_data, boolean_t minor_compact_ok, boolean_t age_on_swapin_q)
3100{
3101 clock_sec_t sec;
3102 clock_nsec_t nsec;
3103
3104 clock_get_system_nanotime(&sec, &nsec);
3105
3106 lck_mtx_lock_spin_always(c_list_lock);
3107 lck_mtx_lock_spin_always(&c_seg->c_lock);
3108
3109 assert(c_seg->c_busy_swapping);
3110 assert(c_seg->c_busy);
3111
3112 c_seg->c_busy_swapping = 0;
3113
3114 if (c_seg->c_overage_swap == TRUE) {
3115 c_overage_swapped_count--;
3116 c_seg->c_overage_swap = FALSE;
3117 }
3118 if (has_data == TRUE) {
3119 if (age_on_swapin_q == TRUE)
3120 c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
3121 else
3122 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
3123
3124 if (minor_compact_ok == TRUE && !c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
3125 c_seg_need_delayed_compaction(c_seg, TRUE);
3126 } else {
3127 c_seg->c_store.c_buffer = (int32_t*) NULL;
3128 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
3129
3130 c_seg_switch_state(c_seg, C_ON_BAD_Q, FALSE);
3131 }
3132 c_seg->c_swappedin_ts = (uint32_t)sec;
3133
3134 lck_mtx_unlock_always(c_list_lock);
3135}
3136
3137
3138
3139/*
3140 * c_seg has to be locked and is returned locked if the c_seg isn't freed
3141 * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE
3142 * c_seg_swapin returns 1 if the c_seg was freed, 0 otherwise
3143 */
3144
3145int
3146c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction, boolean_t age_on_swapin_q)
3147{
3148 vm_offset_t addr = 0;
3149 uint32_t io_size = 0;
3150 uint64_t f_offset;
3151
3152 assert(C_SEG_IS_ONDISK(c_seg));
3153
3154#if !CHECKSUM_THE_SWAP
3155 c_seg_trim_tail(c_seg);
3156#endif
3157 io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
3158 f_offset = c_seg->c_store.c_swap_handle;
3159
3160 C_SEG_BUSY(c_seg);
3161 c_seg->c_busy_swapping = 1;
3162
3163 /*
3164 * This thread is likely going to block for I/O.
3165 * Make sure it is ready to run when the I/O completes because
3166 * it needs to clear the busy bit on the c_seg so that other
3167 * waiting threads can make progress too. To do that, boost
3168 * the rwlock_count so that the priority is boosted.
3169 */
3170 set_thread_rwlock_boost();
3171 lck_mtx_unlock_always(&c_seg->c_lock);
3172
3173 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3174
3175 addr = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
3176 c_seg->c_store.c_buffer = (int32_t*) addr;
3177
3178 kernel_memory_populate(compressor_map, addr, io_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
3179
3180 if (vm_swap_get(c_seg, f_offset, io_size) != KERN_SUCCESS) {
3181 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3182
3183 kernel_memory_depopulate(compressor_map, addr, io_size, KMA_COMPRESSOR);
3184
3185 c_seg_swapin_requeue(c_seg, FALSE, TRUE, age_on_swapin_q);
3186 } else {
3187#if ENCRYPTED_SWAP
3188 vm_swap_decrypt(c_seg);
3189#endif /* ENCRYPTED_SWAP */
3190
3191#if CHECKSUM_THE_SWAP
3192 if (c_seg->cseg_swap_size != io_size)
3193 panic("swapin size doesn't match swapout size");
3194
3195 if (c_seg->cseg_hash != vmc_hash((char*) c_seg->c_store.c_buffer, (int)io_size)) {
3196 panic("c_seg_swapin - Swap hash mismatch\n");
3197 }
3198#endif /* CHECKSUM_THE_SWAP */
3199
3200 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3201
3202 c_seg_swapin_requeue(c_seg, TRUE, force_minor_compaction == TRUE ? FALSE : TRUE, age_on_swapin_q);
3203
3204 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
3205
3206 if (force_minor_compaction == TRUE) {
3207 if (c_seg_minor_compaction_and_unlock(c_seg, FALSE)) {
3208 /*
3209 * c_seg was completely empty so it was freed,
3210 * so be careful not to reference it again
3211 *
3212 * Drop the rwlock_count so that the thread priority
3213 * is returned back to where it is supposed to be.
3214 */
3215 clear_thread_rwlock_boost();
3216 return (1);
3217 }
3218
3219 lck_mtx_lock_spin_always(&c_seg->c_lock);
3220 }
3221 }
3222 C_SEG_WAKEUP_DONE(c_seg);
3223
3224 /*
3225 * Drop the rwlock_count so that the thread priority
3226 * is returned back to where it is supposed to be.
3227 */
3228 clear_thread_rwlock_boost();
3229
3230 return (0);
3231}
3232
3233
3234static void
3235c_segment_sv_hash_drop_ref(int hash_indx)
3236{
3237 struct c_sv_hash_entry o_sv_he, n_sv_he;
3238
3239 while (1) {
3240
3241 o_sv_he.he_record = c_segment_sv_hash_table[hash_indx].he_record;
3242
3243 n_sv_he.he_ref = o_sv_he.he_ref - 1;
3244 n_sv_he.he_data = o_sv_he.he_data;
3245
3246 if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_indx].he_record) == TRUE) {
3247 if (n_sv_he.he_ref == 0)
3248 OSAddAtomic(-1, &c_segment_svp_in_hash);
3249 break;
3250 }
3251 }
3252}
3253
3254
3255static int
3256c_segment_sv_hash_insert(uint32_t data)
3257{
3258 int hash_sindx;
3259 int misses;
3260 struct c_sv_hash_entry o_sv_he, n_sv_he;
3261 boolean_t got_ref = FALSE;
3262
3263 if (data == 0)
3264 OSAddAtomic(1, &c_segment_svp_zero_compressions);
3265 else
3266 OSAddAtomic(1, &c_segment_svp_nonzero_compressions);
3267
3268 hash_sindx = data & C_SV_HASH_MASK;
3269
3270 for (misses = 0; misses < C_SV_HASH_MAX_MISS; misses++)
3271 {
3272 o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
3273
3274 while (o_sv_he.he_data == data || o_sv_he.he_ref == 0) {
3275 n_sv_he.he_ref = o_sv_he.he_ref + 1;
3276 n_sv_he.he_data = data;
3277
3278 if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_sindx].he_record) == TRUE) {
3279 if (n_sv_he.he_ref == 1)
3280 OSAddAtomic(1, &c_segment_svp_in_hash);
3281 got_ref = TRUE;
3282 break;
3283 }
3284 o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
3285 }
3286 if (got_ref == TRUE)
3287 break;
3288 hash_sindx++;
3289
3290 if (hash_sindx == C_SV_HASH_SIZE)
3291 hash_sindx = 0;
3292 }
3293 if (got_ref == FALSE)
3294 return(-1);
3295
3296 return (hash_sindx);
3297}
3298
3299
3300#if RECORD_THE_COMPRESSED_DATA
3301
3302static void
3303c_compressed_record_data(char *src, int c_size)
3304{
3305 if ((c_compressed_record_cptr + c_size + 4) >= c_compressed_record_ebuf)
3306 panic("c_compressed_record_cptr >= c_compressed_record_ebuf");
3307
3308 *(int *)((void *)c_compressed_record_cptr) = c_size;
3309
3310 c_compressed_record_cptr += 4;
3311
3312 memcpy(c_compressed_record_cptr, src, c_size);
3313 c_compressed_record_cptr += c_size;
3314}
3315#endif
3316
3317
3318static int
3319c_compress_page(char *src, c_slot_mapping_t slot_ptr, c_segment_t *current_chead, char *scratch_buf)
3320{
3321 int c_size;
3322 int c_rounded_size = 0;
3323 int max_csize;
3324 c_slot_t cs;
3325 c_segment_t c_seg;
3326
3327 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START, *current_chead, 0, 0, 0, 0);
3328retry:
3329 if ((c_seg = c_seg_allocate(current_chead)) == NULL) {
3330 return (1);
3331 }
3332 /*
3333 * returns with c_seg lock held
3334 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
3335 * c_nextslot has been allocated and
3336 * c_store.c_buffer populated
3337 */
3338 assert(c_seg->c_state == C_IS_FILLING);
3339
3340 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot);
3341
3342 cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr);
3343 assert(slot_ptr == (c_slot_mapping_t)C_SLOT_UNPACK_PTR(cs));
3344
3345 cs->c_offset = c_seg->c_nextoffset;
3346
3347 max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset);
3348
3349 if (max_csize > PAGE_SIZE)
3350 max_csize = PAGE_SIZE;
3351
3352#if CHECKSUM_THE_DATA
3353 cs->c_hash_data = vmc_hash(src, PAGE_SIZE);
3354#endif
3355 boolean_t incomp_copy = FALSE;
3356 int max_csize_adj = (max_csize - 4);
3357
3358 if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
3359#if defined(__arm__) || defined(__arm64__)
3360 uint16_t ccodec = CINVALID;
3361
3362 if (max_csize >= C_SEG_OFFSET_ALIGNMENT_BOUNDARY) {
3363 c_size = metacompressor((const uint8_t *) src,
3364 (uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset],
3365 max_csize_adj, &ccodec,
3366 scratch_buf, &incomp_copy);
3367#if C_SEG_OFFSET_ALIGNMENT_BOUNDARY > 4
3368 if (c_size > max_csize_adj) {
3369 c_size = -1;
3370 }
3371#endif
3372 } else {
3373 c_size = -1;
3374 }
3375 assert(ccodec == CCWK || ccodec == CCLZ4);
3376 cs->c_codec = ccodec;
3377#endif
3378 } else {
3379#if defined(__arm__) || defined(__arm64__)
3380 cs->c_codec = CCWK;
3381#endif
3382#if defined(__arm64__)
3383 __unreachable_ok_push
3384 if (PAGE_SIZE == 4096)
3385 c_size = WKdm_compress_4k((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3386 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3387 else {
3388 c_size = WKdm_compress_16k((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3389 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3390 }
3391 __unreachable_ok_pop
3392#else
3393 c_size = WKdm_compress_new((const WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3394 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3395#endif
3396 }
3397 assertf(((c_size <= max_csize_adj) && (c_size >= -1)),
3398 "c_size invalid (%d, %d), cur compressions: %d", c_size, max_csize_adj, c_segment_pages_compressed);
3399
3400 if (c_size == -1) {
3401 if (max_csize < PAGE_SIZE) {
3402 c_current_seg_filled(c_seg, current_chead);
3403 assert(*current_chead == NULL);
3404
3405 lck_mtx_unlock_always(&c_seg->c_lock);
3406 /* TODO: it may be worth requiring codecs to distinguish
3407 * between incompressible inputs and failures due to
3408 * budget exhaustion.
3409 */
3410 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3411 goto retry;
3412 }
3413 c_size = PAGE_SIZE;
3414
3415 if (incomp_copy == FALSE) {
3416 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
3417 }
3418
3419 OSAddAtomic(1, &c_segment_noncompressible_pages);
3420
3421 } else if (c_size == 0) {
3422 int hash_index;
3423
3424 /*
3425 * special case - this is a page completely full of a single 32 bit value
3426 */
3427 hash_index = c_segment_sv_hash_insert(*(uint32_t *)(uintptr_t)src);
3428
3429 if (hash_index != -1) {
3430 slot_ptr->s_cindx = hash_index;
3431 slot_ptr->s_cseg = C_SV_CSEG_ID;
3432
3433 OSAddAtomic(1, &c_segment_svp_hash_succeeded);
3434#if RECORD_THE_COMPRESSED_DATA
3435 c_compressed_record_data(src, 4);
3436#endif
3437 goto sv_compression;
3438 }
3439 c_size = 4;
3440
3441 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
3442
3443 OSAddAtomic(1, &c_segment_svp_hash_failed);
3444 }
3445
3446#if RECORD_THE_COMPRESSED_DATA
3447 c_compressed_record_data((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
3448#endif
3449#if CHECKSUM_THE_COMPRESSED_DATA
3450 cs->c_hash_compressed_data = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
3451#endif
3452#if POPCOUNT_THE_COMPRESSED_DATA
3453 cs->c_pop_cdata = vmc_pop((uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset], c_size);
3454#endif
3455 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
3456
3457 PACK_C_SIZE(cs, c_size);
3458 c_seg->c_bytes_used += c_rounded_size;
3459 c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
3460 c_seg->c_slots_used++;
3461
3462 slot_ptr->s_cindx = c_seg->c_nextslot++;
3463 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
3464 slot_ptr->s_cseg = c_seg->c_mysegno + 1;
3465
3466sv_compression:
3467 if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX_INDEX) {
3468 c_current_seg_filled(c_seg, current_chead);
3469 assert(*current_chead == NULL);
3470 }
3471 lck_mtx_unlock_always(&c_seg->c_lock);
3472
3473 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3474
3475#if RECORD_THE_COMPRESSED_DATA
3476 if ((c_compressed_record_cptr - c_compressed_record_sbuf) >= C_SEG_ALLOCSIZE) {
3477 c_compressed_record_write(c_compressed_record_sbuf, (int)(c_compressed_record_cptr - c_compressed_record_sbuf));
3478 c_compressed_record_cptr = c_compressed_record_sbuf;
3479 }
3480#endif
3481 if (c_size) {
3482 OSAddAtomic64(c_size, &c_segment_compressed_bytes);
3483 OSAddAtomic64(c_rounded_size, &compressor_bytes_used);
3484 }
3485 OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes);
3486
3487 OSAddAtomic(1, &c_segment_pages_compressed);
3488 OSAddAtomic(1, &sample_period_compression_count);
3489
3490 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0);
3491
3492 return (0);
3493}
3494
3495static inline void sv_decompress(int32_t *ddst, int32_t pattern) {
3496#if __x86_64__
3497 memset_word(ddst, pattern, PAGE_SIZE / sizeof(int32_t));
3498#else
3499 size_t i;
3500
3501 /* Unroll the pattern fill loop 4x to encourage the
3502 * compiler to emit NEON stores, cf.
3503 * <rdar://problem/25839866> Loop autovectorization
3504 * anomalies.
3505 * We use separate loops for each PAGE_SIZE
3506 * to allow the autovectorizer to engage, as PAGE_SIZE
3507 * is currently not a constant.
3508 */
3509
3510 __unreachable_ok_push
3511 if (PAGE_SIZE == 4096) {
3512 for (i = 0; i < (4096U / sizeof(int32_t)); i += 4) {
3513 *ddst++ = pattern;
3514 *ddst++ = pattern;
3515 *ddst++ = pattern;
3516 *ddst++ = pattern;
3517 }
3518 } else {
3519 assert(PAGE_SIZE == 16384);
3520 for (i = 0; i < (int)(16384U / sizeof(int32_t)); i += 4) {
3521 *ddst++ = pattern;
3522 *ddst++ = pattern;
3523 *ddst++ = pattern;
3524 *ddst++ = pattern;
3525 }
3526 }
3527 __unreachable_ok_pop
3528#endif
3529}
3530
3531static int
3532c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int *zeroslot)
3533{
3534 c_slot_t cs;
3535 c_segment_t c_seg;
3536 uint32_t c_segno;
3537 int c_indx;
3538 int c_rounded_size;
3539 uint32_t c_size;
3540 int retval = 0;
3541 boolean_t need_unlock = TRUE;
3542 boolean_t consider_defragmenting = FALSE;
3543 boolean_t kdp_mode = FALSE;
3544
3545 if (__improbable(flags & C_KDP)) {
3546 if (not_in_kdp) {
3547 panic("C_KDP passed to decompress page from outside of debugger context");
3548 }
3549
3550 assert((flags & C_KEEP) == C_KEEP);
3551 assert((flags & C_DONT_BLOCK) == C_DONT_BLOCK);
3552
3553 if ((flags & (C_DONT_BLOCK | C_KEEP)) != (C_DONT_BLOCK | C_KEEP)) {
3554 return (-2);
3555 }
3556
3557 kdp_mode = TRUE;
3558 *zeroslot = 0;
3559 }
3560
3561ReTry:
3562 if (__probable(!kdp_mode)) {
3563 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3564 } else {
3565 if (kdp_lck_rw_lock_is_acquired_exclusive(&c_master_lock)) {
3566 return (-2);
3567 }
3568 }
3569
3570#if HIBERNATION
3571 /*
3572 * if hibernation is enabled, it indicates (via a call
3573 * to 'vm_decompressor_lock' that no further
3574 * decompressions are allowed once it reaches
3575 * the point of flushing all of the currently dirty
3576 * anonymous memory through the compressor and out
3577 * to disk... in this state we allow freeing of compressed
3578 * pages and must honor the C_DONT_BLOCK case
3579 */
3580 if (__improbable(dst && decompressions_blocked == TRUE)) {
3581 if (flags & C_DONT_BLOCK) {
3582
3583 if (__probable(!kdp_mode)) {
3584 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3585 }
3586
3587 *zeroslot = 0;
3588 return (-2);
3589 }
3590 /*
3591 * it's safe to atomically assert and block behind the
3592 * lock held in shared mode because "decompressions_blocked" is
3593 * only set and cleared and the thread_wakeup done when the lock
3594 * is held exclusively
3595 */
3596 assert_wait((event_t)&decompressions_blocked, THREAD_UNINT);
3597
3598 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3599
3600 thread_block(THREAD_CONTINUE_NULL);
3601
3602 goto ReTry;
3603 }
3604#endif
3605 /* s_cseg is actually "segno+1" */
3606 c_segno = slot_ptr->s_cseg - 1;
3607
3608 if (__improbable(c_segno >= c_segments_available))
3609 panic("c_decompress_page: c_segno %d >= c_segments_available %d, slot_ptr(%p), slot_data(%x)",
3610 c_segno, c_segments_available, slot_ptr, *(int *)((void *)slot_ptr));
3611
3612 if (__improbable(c_segments[c_segno].c_segno < c_segments_available))
3613 panic("c_decompress_page: c_segno %d is free, slot_ptr(%p), slot_data(%x)",
3614 c_segno, slot_ptr, *(int *)((void *)slot_ptr));
3615
3616 c_seg = c_segments[c_segno].c_seg;
3617
3618 if (__probable(!kdp_mode)) {
3619 lck_mtx_lock_spin_always(&c_seg->c_lock);
3620 } else {
3621 if (kdp_lck_mtx_lock_spin_is_acquired(&c_seg->c_lock)) {
3622 return (-2);
3623 }
3624 }
3625
3626 assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE);
3627
3628 if (dst == NULL && c_seg->c_busy_swapping) {
3629 assert(c_seg->c_busy);
3630
3631 goto bypass_busy_check;
3632 }
3633 if (flags & C_DONT_BLOCK) {
3634 if (c_seg->c_busy || (C_SEG_IS_ONDISK(c_seg) && dst)) {
3635 *zeroslot = 0;
3636
3637 retval = -2;
3638 goto done;
3639 }
3640 }
3641 if (c_seg->c_busy) {
3642
3643 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3644
3645 c_seg_wait_on_busy(c_seg);
3646
3647 goto ReTry;
3648 }
3649bypass_busy_check:
3650
3651 c_indx = slot_ptr->s_cindx;
3652
3653 if (__improbable(c_indx >= c_seg->c_nextslot))
3654 panic("c_decompress_page: c_indx %d >= c_nextslot %d, c_seg(%p), slot_ptr(%p), slot_data(%x)",
3655 c_indx, c_seg->c_nextslot, c_seg, slot_ptr, *(int *)((void *)slot_ptr));
3656
3657 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
3658
3659 c_size = UNPACK_C_SIZE(cs);
3660
3661 if (__improbable(c_size == 0))
3662 panic("c_decompress_page: c_size == 0, c_seg(%p), slot_ptr(%p), slot_data(%x)",
3663 c_seg, slot_ptr, *(int *)((void *)slot_ptr));
3664
3665 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
3666
3667 if (dst) {
3668 uint32_t age_of_cseg;
3669 clock_sec_t cur_ts_sec;
3670 clock_nsec_t cur_ts_nsec;
3671
3672 if (C_SEG_IS_ONDISK(c_seg)) {
3673 assert(kdp_mode == FALSE);
3674 retval = c_seg_swapin(c_seg, FALSE, TRUE);
3675 assert(retval == 0);
3676
3677 retval = 1;
3678 }
3679 if (c_seg->c_state == C_ON_BAD_Q) {
3680 assert(c_seg->c_store.c_buffer == NULL);
3681 *zeroslot = 0;
3682
3683 retval = -1;
3684 goto done;
3685 }
3686
3687#if POPCOUNT_THE_COMPRESSED_DATA
3688 unsigned csvpop;
3689 uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset];
3690 if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) {
3691 panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%llx 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void *)csvaddr, (void *) kvtophys(csvaddr), c_seg, cs, cs->c_offset, c_size, csvpop, cs->c_pop_cdata);
3692 }
3693#endif
3694
3695#if CHECKSUM_THE_COMPRESSED_DATA
3696 unsigned csvhash;
3697 if (cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) {
3698 panic("Compressed data doesn't match original %p %p %u %u %u", c_seg, cs, c_size, cs->c_hash_compressed_data, csvhash);
3699 }
3700#endif
3701 if (c_rounded_size == PAGE_SIZE) {
3702 /*
3703 * page wasn't compressible... just copy it out
3704 */
3705 memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE);
3706 } else if (c_size == 4) {
3707 int32_t data;
3708 int32_t *dptr;
3709
3710 /*
3711 * page was populated with a single value
3712 * that didn't fit into our fast hash
3713 * so we packed it in as a single non-compressed value
3714 * that we need to populate the page with
3715 */
3716 dptr = (int32_t *)(uintptr_t)dst;
3717 data = *(int32_t *)(&c_seg->c_store.c_buffer[cs->c_offset]);
3718 sv_decompress(dptr, data);
3719 } else {
3720 uint32_t my_cpu_no;
3721 char *scratch_buf;
3722
3723 if (__probable(!kdp_mode)) {
3724 /*
3725 * we're behind the c_seg lock held in spin mode
3726 * which means pre-emption is disabled... therefore
3727 * the following sequence is atomic and safe
3728 */
3729 my_cpu_no = cpu_number();
3730
3731 assert(my_cpu_no < compressor_cpus);
3732
3733 scratch_buf = &compressor_scratch_bufs[my_cpu_no * vm_compressor_get_decode_scratch_size()];
3734 } else {
3735 scratch_buf = kdp_compressor_scratch_buf;
3736 }
3737
3738 if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
3739#if defined(__arm__) || defined(__arm64__)
3740 uint16_t c_codec = cs->c_codec;
3741 metadecompressor((const uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset],
3742 (uint8_t *)dst, c_size, c_codec, (void *)scratch_buf);
3743#endif
3744 } else {
3745#if defined(__arm64__)
3746 __unreachable_ok_push
3747 if (PAGE_SIZE == 4096)
3748 WKdm_decompress_4k((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3749 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
3750 else {
3751 WKdm_decompress_16k((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3752 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
3753 }
3754 __unreachable_ok_pop
3755#else
3756 WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3757 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
3758#endif
3759 }
3760 }
3761
3762#if CHECKSUM_THE_DATA
3763 if (cs->c_hash_data != vmc_hash(dst, PAGE_SIZE)) {
3764#if defined(__arm__) || defined(__arm64__)
3765 int32_t *dinput = &c_seg->c_store.c_buffer[cs->c_offset];
3766 panic("decompressed data doesn't match original cs: %p, hash: 0x%x, offset: %d, c_size: %d, c_rounded_size: %d, codec: %d, header: 0x%x 0x%x 0x%x", cs, cs->c_hash_data, cs->c_offset, c_size, c_rounded_size, cs->c_codec, *dinput, *(dinput + 1), *(dinput + 2));
3767#else
3768 panic("decompressed data doesn't match original cs: %p, hash: %d, offset: 0x%x, c_size: %d", cs, cs->c_hash_data, cs->c_offset, c_size);
3769#endif
3770 }
3771#endif
3772 if (c_seg->c_swappedin_ts == 0 && !kdp_mode) {
3773
3774 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
3775
3776 age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts;
3777 if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE)
3778 OSAddAtomic(1, &age_of_decompressions_during_sample_period[age_of_cseg]);
3779 else
3780 OSAddAtomic(1, &overage_decompressions_during_sample_period);
3781
3782 OSAddAtomic(1, &sample_period_decompression_count);
3783 }
3784 }
3785 if (flags & C_KEEP) {
3786 *zeroslot = 0;
3787 goto done;
3788 }
3789 assert(kdp_mode == FALSE);
3790
3791 c_seg->c_bytes_unused += c_rounded_size;
3792 c_seg->c_bytes_used -= c_rounded_size;
3793
3794 assert(c_seg->c_slots_used);
3795 c_seg->c_slots_used--;
3796
3797 PACK_C_SIZE(cs, 0);
3798
3799 if (c_indx < c_seg->c_firstemptyslot)
3800 c_seg->c_firstemptyslot = c_indx;
3801
3802 OSAddAtomic(-1, &c_segment_pages_compressed);
3803
3804 if (c_seg->c_state != C_ON_BAD_Q && !(C_SEG_IS_ONDISK(c_seg))) {
3805 /*
3806 * C_SEG_IS_ONDISK == TRUE can occur when we're doing a
3807 * free of a compressed page (i.e. dst == NULL)
3808 */
3809 OSAddAtomic64(-c_rounded_size, &compressor_bytes_used);
3810 }
3811 if (c_seg->c_busy_swapping) {
3812 /*
3813 * bypass case for c_busy_swapping...
3814 * let the swapin/swapout paths deal with putting
3815 * the c_seg on the minor compaction queue if needed
3816 */
3817 assert(c_seg->c_busy);
3818 goto done;
3819 }
3820 assert(!c_seg->c_busy);
3821
3822 if (c_seg->c_state != C_IS_FILLING) {
3823 if (c_seg->c_bytes_used == 0) {
3824 if ( !(C_SEG_IS_ONDISK(c_seg))) {
3825 int pages_populated;
3826
3827 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
3828 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
3829
3830 if (pages_populated) {
3831
3832 assert(c_seg->c_state != C_ON_BAD_Q);
3833 assert(c_seg->c_store.c_buffer != NULL);
3834
3835 C_SEG_BUSY(c_seg);
3836 lck_mtx_unlock_always(&c_seg->c_lock);
3837
3838 kernel_memory_depopulate(compressor_map, (vm_offset_t) c_seg->c_store.c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR);
3839
3840 lck_mtx_lock_spin_always(&c_seg->c_lock);
3841 C_SEG_WAKEUP_DONE(c_seg);
3842 }
3843 if (!c_seg->c_on_minorcompact_q && c_seg->c_state != C_ON_SWAPOUT_Q && c_seg->c_state != C_ON_SWAPIO_Q)
3844 c_seg_need_delayed_compaction(c_seg, FALSE);
3845 } else {
3846 if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q) {
3847
3848 c_seg_move_to_sparse_list(c_seg);
3849 consider_defragmenting = TRUE;
3850 }
3851 }
3852 } else if (c_seg->c_on_minorcompact_q) {
3853
3854 assert(c_seg->c_state != C_ON_BAD_Q);
3855 assert(!C_SEG_IS_ON_DISK_OR_SOQ(c_seg));
3856
3857 if (C_SEG_SHOULD_MINORCOMPACT_NOW(c_seg)) {
3858 c_seg_try_minor_compaction_and_unlock(c_seg);
3859 need_unlock = FALSE;
3860 }
3861 } else if ( !(C_SEG_IS_ONDISK(c_seg))) {
3862
3863 if (c_seg->c_state != C_ON_BAD_Q && c_seg->c_state != C_ON_SWAPOUT_Q && c_seg->c_state != C_ON_SWAPIO_Q &&
3864 C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
3865 c_seg_need_delayed_compaction(c_seg, FALSE);
3866 }
3867 } else if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q && C_SEG_ONDISK_IS_SPARSE(c_seg)) {
3868
3869 c_seg_move_to_sparse_list(c_seg);
3870 consider_defragmenting = TRUE;
3871 }
3872 }
3873done:
3874 if (__improbable(kdp_mode)) {
3875 return retval;
3876 }
3877
3878 if (need_unlock == TRUE)
3879 lck_mtx_unlock_always(&c_seg->c_lock);
3880
3881 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3882
3883 if (consider_defragmenting == TRUE)
3884 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
3885
3886#if CONFIG_EMBEDDED
3887 if ((c_minor_count && COMPRESSOR_NEEDS_TO_MINOR_COMPACT()) || vm_compressor_needs_to_major_compact())
3888 vm_wake_compactor_swapper();
3889#endif
3890
3891 return (retval);
3892}
3893
3894
3895int
3896vm_compressor_get(ppnum_t pn, int *slot, int flags)
3897{
3898 c_slot_mapping_t slot_ptr;
3899 char *dst;
3900 int zeroslot = 1;
3901 int retval;
3902
3903#if __x86_64__
3904 dst = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
3905#elif __arm__ || __arm64__
3906 dst = (char *) phystokv((pmap_paddr_t)pn << PAGE_SHIFT);
3907#else
3908#error "unsupported architecture"
3909#endif
3910 slot_ptr = (c_slot_mapping_t)slot;
3911
3912 if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
3913 int32_t data;
3914 int32_t *dptr;
3915
3916 /*
3917 * page was populated with a single value
3918 * that found a home in our hash table
3919 * grab that value from the hash and populate the page
3920 * that we need to populate the page with
3921 */
3922 dptr = (int32_t *)(uintptr_t)dst;
3923 data = c_segment_sv_hash_table[slot_ptr->s_cindx].he_data;
3924#if __x86_64__
3925 memset_word(dptr, data, PAGE_SIZE / sizeof(int32_t));
3926#else
3927 {
3928 int i;
3929
3930 for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++)
3931 *dptr++ = data;
3932 }
3933#endif
3934 if ( !(flags & C_KEEP)) {
3935 c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
3936
3937 OSAddAtomic(-1, &c_segment_pages_compressed);
3938 *slot = 0;
3939 }
3940 if (data)
3941 OSAddAtomic(1, &c_segment_svp_nonzero_decompressions);
3942 else
3943 OSAddAtomic(1, &c_segment_svp_zero_decompressions);
3944
3945 return (0);
3946 }
3947
3948 retval = c_decompress_page(dst, slot_ptr, flags, &zeroslot);
3949
3950 /*
3951 * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP)
3952 * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be TRUE
3953 */
3954 if (zeroslot) {
3955 *slot = 0;
3956 }
3957 /*
3958 * returns 0 if we successfully decompressed a page from a segment already in memory
3959 * returns 1 if we had to first swap in the segment, before successfully decompressing the page
3960 * returns -1 if we encountered an error swapping in the segment - decompression failed
3961 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be true
3962 */
3963 return (retval);
3964}
3965
3966
3967int
3968vm_compressor_free(int *slot, int flags)
3969{
3970 c_slot_mapping_t slot_ptr;
3971 int zeroslot = 1;
3972 int retval;
3973
3974 assert(flags == 0 || flags == C_DONT_BLOCK);
3975
3976 slot_ptr = (c_slot_mapping_t)slot;
3977
3978 if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
3979
3980 c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
3981 OSAddAtomic(-1, &c_segment_pages_compressed);
3982
3983 *slot = 0;
3984 return (0);
3985 }
3986 retval = c_decompress_page(NULL, slot_ptr, flags, &zeroslot);
3987 /*
3988 * returns 0 if we successfully freed the specified compressed page
3989 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set
3990 */
3991
3992 if (retval == 0)
3993 *slot = 0;
3994 else
3995 assert(retval == -2);
3996
3997 return (retval);
3998}
3999
4000
4001int
4002vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf)
4003{
4004 char *src;
4005 int retval;
4006
4007#if __x86_64__
4008 src = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
4009#elif __arm__ || __arm64__
4010 src = (char *) phystokv((pmap_paddr_t)pn << PAGE_SHIFT);
4011#else
4012#error "unsupported architecture"
4013#endif
4014
4015 retval = c_compress_page(src, (c_slot_mapping_t)slot, (c_segment_t *)current_chead, scratch_buf);
4016
4017 return (retval);
4018}
4019
4020void
4021vm_compressor_transfer(
4022 int *dst_slot_p,
4023 int *src_slot_p)
4024{
4025 c_slot_mapping_t dst_slot, src_slot;
4026 c_segment_t c_seg;
4027 int c_indx;
4028 c_slot_t cs;
4029
4030 src_slot = (c_slot_mapping_t) src_slot_p;
4031
4032 if (src_slot->s_cseg == C_SV_CSEG_ID) {
4033 *dst_slot_p = *src_slot_p;
4034 *src_slot_p = 0;
4035 return;
4036 }
4037 dst_slot = (c_slot_mapping_t) dst_slot_p;
4038Retry:
4039 PAGE_REPLACEMENT_DISALLOWED(TRUE);
4040 /* get segment for src_slot */
4041 c_seg = c_segments[src_slot->s_cseg -1].c_seg;
4042 /* lock segment */
4043 lck_mtx_lock_spin_always(&c_seg->c_lock);
4044 /* wait if it's busy */
4045 if (c_seg->c_busy && !c_seg->c_busy_swapping) {
4046 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4047 c_seg_wait_on_busy(c_seg);
4048 goto Retry;
4049 }
4050 /* find the c_slot */
4051 c_indx = src_slot->s_cindx;
4052 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
4053 /* point the c_slot back to dst_slot instead of src_slot */
4054 cs->c_packed_ptr = C_SLOT_PACK_PTR(dst_slot);
4055 /* transfer */
4056 *dst_slot_p = *src_slot_p;
4057 *src_slot_p = 0;
4058 lck_mtx_unlock_always(&c_seg->c_lock);
4059 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4060}
4061
4062#if CONFIG_FREEZE
4063
4064int freezer_finished_filling = 0;
4065
4066void
4067vm_compressor_finished_filling(
4068 void **current_chead)
4069{
4070 c_segment_t c_seg;
4071
4072 if ((c_seg = *(c_segment_t *)current_chead) == NULL)
4073 return;
4074
4075 assert(c_seg->c_state == C_IS_FILLING);
4076
4077 lck_mtx_lock_spin_always(&c_seg->c_lock);
4078
4079 c_current_seg_filled(c_seg, (c_segment_t *)current_chead);
4080
4081 lck_mtx_unlock_always(&c_seg->c_lock);
4082
4083 freezer_finished_filling++;
4084}
4085
4086
4087/*
4088 * This routine is used to transfer the compressed chunks from
4089 * the c_seg/cindx pointed to by slot_p into a new c_seg headed
4090 * by the current_chead and a new cindx within that c_seg.
4091 *
4092 * Currently, this routine is only used by the "freezer backed by
4093 * compressor with swap" mode to create a series of c_segs that
4094 * only contain compressed data belonging to one task. So, we
4095 * move a task's previously compressed data into a set of new
4096 * c_segs which will also hold the task's yet to be compressed data.
4097 */
4098
4099kern_return_t
4100vm_compressor_relocate(
4101 void **current_chead,
4102 int *slot_p)
4103{
4104 c_slot_mapping_t slot_ptr;
4105 c_slot_mapping_t src_slot;
4106 uint32_t c_rounded_size;
4107 uint32_t c_size;
4108 uint16_t dst_slot;
4109 c_slot_t c_dst;
4110 c_slot_t c_src;
4111 int c_indx;
4112 c_segment_t c_seg_dst = NULL;
4113 c_segment_t c_seg_src = NULL;
4114 kern_return_t kr = KERN_SUCCESS;
4115
4116
4117 src_slot = (c_slot_mapping_t) slot_p;
4118
4119 if (src_slot->s_cseg == C_SV_CSEG_ID) {
4120 /*
4121 * no need to relocate... this is a page full of a single
4122 * value which is hashed to a single entry not contained
4123 * in a c_segment_t
4124 */
4125 return (kr);
4126 }
4127
4128Relookup_dst:
4129 c_seg_dst = c_seg_allocate((c_segment_t *)current_chead);
4130 /*
4131 * returns with c_seg lock held
4132 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
4133 * c_nextslot has been allocated and
4134 * c_store.c_buffer populated
4135 */
4136 if (c_seg_dst == NULL) {
4137 /*
4138 * Out of compression segments?
4139 */
4140 kr = KERN_RESOURCE_SHORTAGE;
4141 goto out;
4142 }
4143
4144 assert(c_seg_dst->c_busy == 0);
4145
4146 C_SEG_BUSY(c_seg_dst);
4147
4148 dst_slot = c_seg_dst->c_nextslot;
4149
4150 lck_mtx_unlock_always(&c_seg_dst->c_lock);
4151
4152Relookup_src:
4153 c_seg_src = c_segments[src_slot->s_cseg - 1].c_seg;
4154
4155 assert(c_seg_dst != c_seg_src);
4156
4157 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
4158
4159 if (C_SEG_IS_ONDISK(c_seg_src)) {
4160
4161 /*
4162 * A "thaw" can mark a process as eligible for
4163 * another freeze cycle without bringing any of
4164 * its swapped out c_segs back from disk (because
4165 * that is done on-demand).
4166 *
4167 * If the src c_seg we find for our pre-compressed
4168 * data is already on-disk, then we are dealing
4169 * with an app's data that is already packed and
4170 * swapped out. Don't do anything.
4171 */
4172
4173 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4174
4175 lck_mtx_unlock_always(&c_seg_src->c_lock);
4176
4177 c_seg_src = NULL;
4178
4179 goto out;
4180 }
4181
4182 if (c_seg_src->c_busy) {
4183
4184 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4185 c_seg_wait_on_busy(c_seg_src);
4186
4187 c_seg_src = NULL;
4188
4189 PAGE_REPLACEMENT_DISALLOWED(TRUE);
4190
4191 goto Relookup_src;
4192 }
4193
4194 C_SEG_BUSY(c_seg_src);
4195
4196 lck_mtx_unlock_always(&c_seg_src->c_lock);
4197
4198 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4199
4200 /* find the c_slot */
4201 c_indx = src_slot->s_cindx;
4202
4203 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, c_indx);
4204
4205 c_size = UNPACK_C_SIZE(c_src);
4206
4207 assert(c_size);
4208
4209 if (c_size > (uint32_t)(C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)c_seg_dst->c_nextoffset))) {
4210 /*
4211 * This segment is full. We need a new one.
4212 */
4213
4214 PAGE_REPLACEMENT_DISALLOWED(TRUE);
4215
4216 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
4217 C_SEG_WAKEUP_DONE(c_seg_src);
4218 lck_mtx_unlock_always(&c_seg_src->c_lock);
4219
4220 c_seg_src = NULL;
4221
4222 lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
4223
4224 assert(c_seg_dst->c_busy);
4225 assert(c_seg_dst->c_state == C_IS_FILLING);
4226 assert(!c_seg_dst->c_on_minorcompact_q);
4227
4228 c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
4229 assert(*current_chead == NULL);
4230
4231 C_SEG_WAKEUP_DONE(c_seg_dst);
4232
4233 lck_mtx_unlock_always(&c_seg_dst->c_lock);
4234
4235 c_seg_dst = NULL;
4236
4237 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4238
4239 goto Relookup_dst;
4240 }
4241
4242 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
4243
4244 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
4245//is platform alignment actually necessary since wkdm aligns its output?
4246 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
4247
4248 cslot_copy(c_dst, c_src);
4249 c_dst->c_offset = c_seg_dst->c_nextoffset;
4250
4251 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot)
4252 c_seg_dst->c_firstemptyslot++;
4253
4254 c_seg_dst->c_slots_used++;
4255 c_seg_dst->c_nextslot++;
4256 c_seg_dst->c_bytes_used += c_rounded_size;
4257 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
4258
4259
4260 PACK_C_SIZE(c_src, 0);
4261
4262 c_seg_src->c_bytes_used -= c_rounded_size;
4263 c_seg_src->c_bytes_unused += c_rounded_size;
4264
4265 assert(c_seg_src->c_slots_used);
4266 c_seg_src->c_slots_used--;
4267
4268 if (c_indx < c_seg_src->c_firstemptyslot) {
4269 c_seg_src->c_firstemptyslot = c_indx;
4270 }
4271
4272 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
4273
4274 PAGE_REPLACEMENT_ALLOWED(TRUE);
4275 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
4276 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
4277 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
4278 slot_ptr->s_cindx = dst_slot;
4279
4280 PAGE_REPLACEMENT_ALLOWED(FALSE);
4281
4282out:
4283 if (c_seg_src) {
4284
4285 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
4286
4287 C_SEG_WAKEUP_DONE(c_seg_src);
4288
4289 if (c_seg_src->c_bytes_used == 0 && c_seg_src->c_state != C_IS_FILLING) {
4290 if (!c_seg_src->c_on_minorcompact_q)
4291 c_seg_need_delayed_compaction(c_seg_src, FALSE);
4292 }
4293
4294 lck_mtx_unlock_always(&c_seg_src->c_lock);
4295 }
4296
4297 if (c_seg_dst) {
4298
4299 PAGE_REPLACEMENT_DISALLOWED(TRUE);
4300
4301 lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
4302
4303 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
4304 /*
4305 * Nearing or exceeded maximum slot and offset capacity.
4306 */
4307 assert(c_seg_dst->c_busy);
4308 assert(c_seg_dst->c_state == C_IS_FILLING);
4309 assert(!c_seg_dst->c_on_minorcompact_q);
4310
4311 c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
4312 assert(*current_chead == NULL);
4313 }
4314
4315 C_SEG_WAKEUP_DONE(c_seg_dst);
4316
4317 lck_mtx_unlock_always(&c_seg_dst->c_lock);
4318
4319 c_seg_dst = NULL;
4320
4321 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4322 }
4323
4324 return kr;
4325}
4326#endif /* CONFIG_FREEZE */
4327