1/*
2 * Copyright (c) 2010-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* This module implements a hybrid/adaptive compression scheme, using WKdm where
29 * profitable and, currently, an LZ4 variant elsewhere.
30 * (Created 2016, Derek Kumar)
31 */
32#include "lz4.h"
33#include "WKdm_new.h"
34#include <vm/vm_compressor_algorithms.h>
35#include <vm/vm_compressor.h>
36
37#define MZV_MAGIC (17185)
38#if defined(__arm64__)
39#include <arm64/proc_reg.h>
40#endif
41
42#define LZ4_SCRATCH_ALIGN (64)
43#define WKC_SCRATCH_ALIGN (64)
44
45#define LZ4_SCRATCH_ALIGN (64)
46#define WKC_SCRATCH_ALIGN (64)
47
48typedef union {
49 uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN)));
50 uint8_t wkscratch[0] __attribute((aligned(WKC_SCRATCH_ALIGN))); // TODO
51} compressor_encode_scratch_t;
52
53typedef union {
54 uint8_t lz4decodestate[lz4_encode_scratch_size]__attribute((aligned(64)));
55 uint8_t wkdecompscratch[0] __attribute((aligned(64)));
56} compressor_decode_scratch_t;
57
58typedef struct {
59 uint16_t lz4_selection_run;
60 uint16_t lz4_run_length;
61 uint16_t lz4_preselects;
62 uint32_t lz4_total_preselects;
63 uint16_t lz4_failure_skips;
64 uint32_t lz4_total_failure_skips;
65 uint16_t lz4_failure_run_length;
66 uint16_t lz4_total_unprofitables;
67 uint32_t lz4_total_negatives;
68 uint32_t lz4_total_failures;
69} compressor_state_t;
70
71compressor_tuneables_t vmctune = {
72 .lz4_threshold = 2048,
73 .wkdm_reeval_threshold = 1536,
74 .lz4_max_failure_skips = 0,
75 .lz4_max_failure_run_length = ~0U,
76 .lz4_max_preselects = 0,
77 .lz4_run_preselection_threshold = ~0U,
78 .lz4_run_continue_bytes = 0,
79 .lz4_profitable_bytes = 0,
80};
81
82compressor_state_t vmcstate = {
83 .lz4_selection_run = 0,
84 .lz4_run_length = 0,
85 .lz4_preselects = 0,
86 .lz4_total_preselects = 0,
87 .lz4_failure_skips = 0,
88 .lz4_total_failure_skips = 0,
89 .lz4_failure_run_length = 0,
90 .lz4_total_unprofitables = 0,
91 .lz4_total_negatives = 0,
92};
93
94compressor_stats_t compressor_stats;
95
96enum compressor_preselect_t {
97 CPRESELLZ4 = 0,
98 CSKIPLZ4 = 1,
99 CPRESELWK = 2,
100};
101
102vm_compressor_mode_t vm_compressor_current_codec = VM_COMPRESSOR_DEFAULT_CODEC;
103
104boolean_t vm_compressor_force_sw_wkdm = FALSE;
105
106boolean_t verbose = FALSE;
107
108#define VMDBGSTAT (DEBUG)
109#if VMDBGSTATS
110#define VM_COMPRESSOR_STAT_DBG(x...) \
111 do { \
112 (x); \
113 } while(0)
114#else
115#define VM_COMPRESSOR_STAT_DBG(x...) \
116 do { \
117 } while (0)
118#endif
119
120#define VMCSTATS (DEVELOPMENT || DEBUG)
121#if VMCSTATS
122#define VM_COMPRESSOR_STAT(x...) \
123 do { \
124 (x); \
125 } while(0)
126//TODO make atomic where needed, decompression paths
127#define VM_DECOMPRESSOR_STAT(x...) \
128 do { \
129 (x); \
130 } while(0)
131#else
132#define VM_COMPRESSOR_STAT(x...) \
133 do { \
134 }while (0)
135#define VM_DECOMPRESSOR_STAT(x...) \
136 do { \
137 }while (0)
138#endif
139
140static inline enum compressor_preselect_t
141compressor_preselect(void)
142{
143 if (vmcstate.lz4_failure_skips >= vmctune.lz4_max_failure_skips) {
144 vmcstate.lz4_failure_skips = 0;
145 vmcstate.lz4_failure_run_length = 0;
146 }
147
148 if (vmcstate.lz4_failure_run_length >= vmctune.lz4_max_failure_run_length) {
149 vmcstate.lz4_failure_skips++;
150 vmcstate.lz4_total_failure_skips++;
151 return CSKIPLZ4;
152 }
153
154 if (vmcstate.lz4_preselects >= vmctune.lz4_max_preselects) {
155 vmcstate.lz4_preselects = 0;
156 return CPRESELWK;
157 }
158
159 if (vmcstate.lz4_run_length >= vmctune.lz4_run_preselection_threshold) {
160 vmcstate.lz4_preselects++;
161 vmcstate.lz4_total_preselects++;
162 return CPRESELLZ4;
163 }
164 return CPRESELWK;
165}
166
167static inline void
168compressor_selector_update(int lz4sz, int didwk, int wksz)
169{
170 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressions++);
171
172 if (lz4sz == 0) {
173 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += PAGE_SIZE);
174 VM_COMPRESSOR_STAT(compressor_stats.lz4_compression_failures++);
175 vmcstate.lz4_failure_run_length++;
176 VM_COMPRESSOR_STAT(vmcstate.lz4_total_failures++);
177 vmcstate.lz4_run_length = 0;
178 } else {
179 vmcstate.lz4_failure_run_length = 0;
180
181 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += lz4sz);
182
183 if (lz4sz <= vmctune.wkdm_reeval_threshold) {
184 vmcstate.lz4_run_length = 0;
185 } else {
186 if (!didwk) {
187 vmcstate.lz4_run_length++;
188 }
189 }
190
191 if (didwk) {
192 if (__probable(wksz > lz4sz)) {
193 uint32_t lz4delta = wksz - lz4sz;
194 VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_delta += lz4delta);
195 if (lz4delta >= vmctune.lz4_run_continue_bytes) {
196 vmcstate.lz4_run_length++;
197 } else if (lz4delta <= vmctune.lz4_profitable_bytes) {
198 vmcstate.lz4_failure_run_length++;
199 VM_COMPRESSOR_STAT(vmcstate.lz4_total_unprofitables++);
200 vmcstate.lz4_run_length = 0;
201 } else {
202 vmcstate.lz4_run_length = 0;
203 }
204 } else {
205 VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_negative_delta += (lz4sz - wksz));
206 vmcstate.lz4_failure_run_length++;
207 VM_COMPRESSOR_STAT(vmcstate.lz4_total_negatives++);
208 vmcstate.lz4_run_length = 0;
209 }
210 }
211 }
212}
213
214
215static inline void
216WKdm_hv(uint32_t *wkbuf)
217{
218#if DEVELOPMENT || DEBUG
219 uint32_t *inw = (uint32_t *) wkbuf;
220 if (*inw != MZV_MAGIC) {
221 if ((*inw | *(inw + 1) | *(inw + 2)) & 0xFFFF0000) {
222 panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x", wkbuf, *inw, *(inw + 1), *(inw + 2));
223 }
224 }
225#else /* DEVELOPMENT || DEBUG */
226 (void) wkbuf;
227#endif
228}
229
230//todo fix clang diagnostic
231#pragma clang diagnostic push
232#pragma clang diagnostic ignored "-Wincompatible-pointer-types"
233
234#if defined(__arm64__)
235#endif
236
237static inline bool
238WKdmD(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes,
239 __unused uint32_t *pop_count)
240{
241#if defined(__arm64__)
242#endif
243 WKdm_hv(wkbuf: src_buf);
244#if defined(__arm64__)
245#ifndef __ARM_16K_PG__
246 if (PAGE_SIZE == 4096) {
247 WKdm_decompress_4k(src_buf, dest_buf, scratch, bytes);
248 } else
249#endif /* !____ARM_16K_PG__ */
250 {
251 __unused uint64_t wdsstart;
252
253 VM_COMPRESSOR_STAT_DBG(wdsstart = mach_absolute_time());
254 WKdm_decompress_16k(src_buf, dest_buf, scratch, bytes);
255
256 VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_dabstime += mach_absolute_time() - wdsstart);
257 VM_COMPRESSOR_STAT(compressor_stats.wks_decompressions++);
258 }
259#else /* !defined arm64 */
260 WKdm_decompress_new(src_buf, dest_buf, scratch, bytes);
261#endif
262 return true;
263}
264#if DEVELOPMENT || DEBUG
265int precompy, wkswhw;
266#endif
267
268static inline int
269WKdmC(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch,
270 boolean_t *incomp_copy, unsigned int limit, __unused uint32_t *pop_count)
271{
272 (void)incomp_copy;
273 int wkcval;
274#if defined(__arm64__)
275#ifndef __ARM_16K_PG__
276 if (PAGE_SIZE == 4096) {
277 wkcval = WKdm_compress_4k(src_buf, dest_buf, scratch, limit);
278 } else
279#endif /* !____ARM_16K_PG__ */
280 {
281 __unused uint64_t wcswstart;
282
283 VM_COMPRESSOR_STAT_DBG(wcswstart = mach_absolute_time());
284
285 int wkswsz = WKdm_compress_16k(src_buf, dest_buf, scratch, limit);
286
287 VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_cabstime += mach_absolute_time() - wcswstart);
288 VM_COMPRESSOR_STAT(compressor_stats.wks_compressions++);
289 wkcval = wkswsz;
290 }
291#else
292 wkcval = WKdm_compress_new(src_buf, dest_buf, scratch, limit);
293#endif
294 return wkcval;
295}
296
297
298int
299metacompressor(const uint8_t *in, uint8_t *cdst, int32_t outbufsz, uint16_t *codec,
300 void *cscratchin, boolean_t *incomp_copy, uint32_t *pop_count_p)
301{
302 int sz = -1;
303 int dowk = FALSE, dolz4 = FALSE, skiplz4 = FALSE;
304 int insize = PAGE_SIZE;
305 compressor_encode_scratch_t *cscratch = cscratchin;
306 /* Not all paths lead to an inline population count. */
307 uint32_t pop_count = C_SLOT_NO_POPCOUNT;
308
309 if (vm_compressor_current_codec == CMODE_WK) {
310 dowk = TRUE;
311 } else if (vm_compressor_current_codec == CMODE_LZ4) {
312 dolz4 = TRUE;
313 } else if (vm_compressor_current_codec == CMODE_HYB) {
314 enum compressor_preselect_t presel = compressor_preselect();
315 if (presel == CPRESELLZ4) {
316 dolz4 = TRUE;
317 goto lz4compress;
318 } else if (presel == CSKIPLZ4) {
319 dowk = TRUE;
320 skiplz4 = TRUE;
321 } else {
322 assert(presel == CPRESELWK);
323 dowk = TRUE;
324 }
325 }
326
327 if (dowk) {
328 *codec = CCWK;
329 VM_COMPRESSOR_STAT(compressor_stats.wk_compressions++);
330 sz = WKdmC(src_buf: in, dest_buf: cdst, scratch: &cscratch->wkscratch[0], incomp_copy, limit: outbufsz, pop_count: &pop_count);
331
332 if (sz == -1) {
333 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += PAGE_SIZE);
334 VM_COMPRESSOR_STAT(compressor_stats.wk_compression_failures++);
335
336 if (vm_compressor_current_codec == CMODE_HYB) {
337 goto lz4eval;
338 }
339 goto cexit;
340 } else if (sz == 0) {
341 VM_COMPRESSOR_STAT(compressor_stats.wk_sv_compressions++);
342 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += 4);
343 } else {
344 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += sz);
345 }
346 }
347lz4eval:
348 if (vm_compressor_current_codec == CMODE_HYB) {
349 if (((sz == -1) || (sz >= vmctune.lz4_threshold)) && (skiplz4 == FALSE)) {
350 dolz4 = TRUE;
351 } else {
352#if DEVELOPMENT || DEBUG
353 int wkc = (sz == -1) ? PAGE_SIZE : sz;
354#endif
355 VM_COMPRESSOR_STAT(compressor_stats.wk_compressions_exclusive++);
356 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_exclusive += wkc);
357 goto cexit;
358 }
359 }
360
361lz4compress:
362
363 if (dolz4) {
364 if (sz == -1) {
365 sz = PAGE_SIZE;
366 }
367 int wksz = sz;
368 *codec = CCLZ4;
369
370 sz = (int) lz4raw_encode_buffer(dst_buffer: cdst, dst_size: outbufsz, src_buffer: in, src_size: insize, hash_table: &cscratch->lz4state[0]);
371
372 compressor_selector_update(lz4sz: sz, didwk: dowk, wksz);
373 if (sz == 0) {
374 sz = -1;
375 goto cexit;
376 }
377 }
378cexit:
379 assert(pop_count_p != NULL);
380 *pop_count_p = pop_count;
381 return sz;
382}
383
384bool
385metadecompressor(const uint8_t *source, uint8_t *dest, uint32_t csize,
386 uint16_t ccodec, void *compressor_dscratchin, uint32_t *pop_count_p)
387{
388 int dolz4 = (ccodec == CCLZ4);
389 int rval;
390 compressor_decode_scratch_t *compressor_dscratch = compressor_dscratchin;
391 /* Not all paths lead to an inline population count. */
392 uint32_t pop_count = C_SLOT_NO_POPCOUNT;
393 bool success;
394
395 if (dolz4) {
396 rval = (int)lz4raw_decode_buffer(dst_buffer: dest, PAGE_SIZE, src_buffer: source, src_size: csize, work: &compressor_dscratch->lz4decodestate[0]);
397 VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressions += 1);
398 VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressed_bytes += csize);
399#if DEVELOPMENT || DEBUG
400 uint32_t *d32 = dest;
401#endif
402 assertf(rval == PAGE_SIZE, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x",
403 rval, *d32, *(d32 + 1), *(d32 + 2));
404 success = (rval == PAGE_SIZE);
405 } else {
406 assert(ccodec == CCWK);
407
408 success = WKdmD(src_buf: source, dest_buf: dest, scratch: &compressor_dscratch->wkdecompscratch[0], bytes: csize, pop_count: &pop_count);
409
410 VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressions += 1);
411 VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressed_bytes += csize);
412 }
413
414 assert(pop_count_p != NULL);
415 *pop_count_p = pop_count;
416 return success;
417}
418#pragma clang diagnostic pop
419
420uint32_t
421vm_compressor_get_encode_scratch_size(void)
422{
423 if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
424 return MAX(sizeof(compressor_encode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
425 } else {
426 return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
427 }
428}
429
430uint32_t
431vm_compressor_get_decode_scratch_size(void)
432{
433 if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
434 return MAX(sizeof(compressor_decode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
435 } else {
436 return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
437 }
438}
439
440
441int
442vm_compressor_algorithm(void)
443{
444 return vm_compressor_current_codec;
445}
446
447void
448vm_compressor_algorithm_init(void)
449{
450 vm_compressor_mode_t new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
451
452#if defined(__arm64__)
453 new_codec = CMODE_HYB;
454
455 if (PAGE_SIZE == 16384) {
456 vmctune.lz4_threshold = 12288;
457 }
458#endif
459
460 PE_parse_boot_argn(arg_string: "vm_compressor_codec", arg_ptr: &new_codec, max_arg: sizeof(new_codec));
461 assertf(((new_codec == VM_COMPRESSOR_DEFAULT_CODEC) || (new_codec == CMODE_WK) ||
462 (new_codec == CMODE_LZ4) || (new_codec == CMODE_HYB)),
463 "Invalid VM compression codec: %u", new_codec);
464
465#if defined(__arm64__)
466 uint32_t tmpc;
467 if (PE_parse_boot_argn(arg_string: "-vm_compressor_wk", arg_ptr: &tmpc, max_arg: sizeof(tmpc))) {
468 new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
469 } else if (PE_parse_boot_argn(arg_string: "-vm_compressor_hybrid", arg_ptr: &tmpc, max_arg: sizeof(tmpc))) {
470 new_codec = CMODE_HYB;
471 }
472
473 vm_compressor_current_codec = new_codec;
474#endif /* arm/arm64 */
475}
476