1/*
2 * Copyright (c) 2010-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* This module implements a hybrid/adaptive compression scheme, using WKdm where
29 * profitable and, currently, an LZ4 variant elsewhere.
30 * (Created 2016, Derek Kumar)
31 */
32#include "lz4.h"
33#include "WKdm_new.h"
34#include <vm/vm_compressor_algorithms.h>
35#include <vm/vm_compressor.h>
36
37#define MZV_MAGIC (17185)
38#if defined(__arm64__)
39#include <arm/proc_reg.h>
40#endif
41
42#define LZ4_SCRATCH_ALIGN (64)
43#define WKC_SCRATCH_ALIGN (64)
44
45#define LZ4_SCRATCH_ALIGN (64)
46#define WKC_SCRATCH_ALIGN (64)
47
48#define memcpy_T_NT memcpy
49#define memcpy_NT_T memcpy
50
51typedef union {
52 uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN)));
53 uint8_t wkscratch[0] __attribute((aligned(WKC_SCRATCH_ALIGN))); // TODO
54} compressor_encode_scratch_t;
55
56typedef union {
57 uint8_t lz4decodestate[lz4_encode_scratch_size]__attribute((aligned(64)));
58 uint8_t wkdecompscratch[0] __attribute((aligned(64)));
59} compressor_decode_scratch_t;
60
61typedef struct {
62 uint16_t lz4_selection_run;
63 uint16_t lz4_run_length;
64 uint16_t lz4_preselects;
65 uint32_t lz4_total_preselects;
66 uint16_t lz4_failure_skips;
67 uint32_t lz4_total_failure_skips;
68 uint16_t lz4_failure_run_length;
69 uint16_t lz4_total_unprofitables;
70 uint32_t lz4_total_negatives;
71 uint32_t lz4_total_failures;
72} compressor_state_t;
73
74compressor_tuneables_t vmctune = {
75 .lz4_threshold = 2048,
76 .wkdm_reeval_threshold = 1536,
77 .lz4_max_failure_skips = 0,
78 .lz4_max_failure_run_length = ~0U,
79 .lz4_max_preselects = 0,
80 .lz4_run_preselection_threshold = ~0U,
81 .lz4_run_continue_bytes = 0,
82 .lz4_profitable_bytes = 0,
83};
84
85compressor_state_t vmcstate = {
86 .lz4_selection_run = 0,
87 .lz4_run_length = 0,
88 .lz4_preselects = 0,
89 .lz4_total_preselects = 0,
90 .lz4_failure_skips = 0,
91 .lz4_total_failure_skips = 0,
92 .lz4_failure_run_length = 0,
93 .lz4_total_unprofitables = 0,
94 .lz4_total_negatives = 0,
95};
96
97compressor_stats_t compressor_stats;
98
99enum compressor_preselect_t {
100 CPRESELLZ4 = 0,
101 CSKIPLZ4 = 1,
102 CPRESELWK = 2,
103};
104
105vm_compressor_mode_t vm_compressor_current_codec = VM_COMPRESSOR_DEFAULT_CODEC;
106
107boolean_t vm_compressor_force_sw_wkdm = FALSE;
108
109boolean_t verbose = FALSE;
110
111#define VMDBGSTAT (DEBUG)
112#if VMDBGSTATS
113#define VM_COMPRESSOR_STAT_DBG(x...) \
114 do { \
115 (x); \
116 } while(0)
117#else
118#define VM_COMPRESSOR_STAT_DBG(x...) \
119 do { \
120 } while (0)
121#endif
122
123#define VMCSTATS (DEVELOPMENT || DEBUG)
124#if VMCSTATS
125#define VM_COMPRESSOR_STAT(x...) \
126 do { \
127 (x); \
128 } while(0)
129//TODO make atomic where needed, decompression paths
130#define VM_DECOMPRESSOR_STAT(x...) \
131 do { \
132 (x); \
133 } while(0)
134#else
135#define VM_COMPRESSOR_STAT(x...) \
136 do { \
137 }while (0)
138#define VM_DECOMPRESSOR_STAT(x...) \
139 do { \
140 }while (0)
141#endif
142
143static inline enum compressor_preselect_t compressor_preselect(void) {
144 if (vmcstate.lz4_failure_skips >= vmctune.lz4_max_failure_skips) {
145 vmcstate.lz4_failure_skips = 0;
146 vmcstate.lz4_failure_run_length = 0;
147 }
148
149 if (vmcstate.lz4_failure_run_length >= vmctune.lz4_max_failure_run_length) {
150 vmcstate.lz4_failure_skips++;
151 vmcstate.lz4_total_failure_skips++;
152 return CSKIPLZ4;
153 }
154
155 if (vmcstate.lz4_preselects >= vmctune.lz4_max_preselects) {
156 vmcstate.lz4_preselects = 0;
157 return CPRESELWK;
158 }
159
160 if (vmcstate.lz4_run_length >= vmctune.lz4_run_preselection_threshold) {
161 vmcstate.lz4_preselects++;
162 vmcstate.lz4_total_preselects++;
163 return CPRESELLZ4;
164 }
165 return CPRESELWK;
166}
167
168static inline void compressor_selector_update(int lz4sz, int didwk, int wksz) {
169 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressions++);
170
171 if (lz4sz == 0) {
172 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes+=PAGE_SIZE);
173 VM_COMPRESSOR_STAT(compressor_stats.lz4_compression_failures++);
174 vmcstate.lz4_failure_run_length++;
175 VM_COMPRESSOR_STAT(vmcstate.lz4_total_failures++);
176 vmcstate.lz4_run_length = 0;
177 } else {
178 vmcstate.lz4_failure_run_length = 0;
179
180 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes+=lz4sz);
181
182 if (lz4sz <= vmctune.wkdm_reeval_threshold) {
183 vmcstate.lz4_run_length = 0;
184 } else {
185 if (!didwk) {
186 vmcstate.lz4_run_length++;
187 }
188 }
189
190 if (didwk) {
191 if (__probable(wksz > lz4sz)) {
192 uint32_t lz4delta = wksz - lz4sz;
193 VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_delta+=lz4delta);
194 if (lz4delta >= vmctune.lz4_run_continue_bytes) {
195 vmcstate.lz4_run_length++;
196 } else if (lz4delta <= vmctune.lz4_profitable_bytes) {
197 vmcstate.lz4_failure_run_length++;
198 VM_COMPRESSOR_STAT(vmcstate.lz4_total_unprofitables++);
199 vmcstate.lz4_run_length = 0;
200 } else {
201 vmcstate.lz4_run_length = 0;
202 }
203 } else {
204 VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_negative_delta+=(lz4sz-wksz));
205 vmcstate.lz4_failure_run_length++;
206 VM_COMPRESSOR_STAT(vmcstate.lz4_total_negatives++);
207 vmcstate.lz4_run_length = 0;
208 }
209 }
210 }
211}
212
213
214static inline void WKdm_hv(uint32_t *wkbuf) {
215#if DEVELOPMENT || DEBUG
216 uint32_t *inw = (uint32_t *) wkbuf;
217 if (*inw != MZV_MAGIC) {
218 if ((*inw | *(inw + 1) | *(inw + 2)) & 0xFFFF0000) {
219 panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x\n", wkbuf, *inw, *(inw +1), *(inw+2));
220 }
221 }
222#else /* DEVELOPMENT || DEBUG */
223 (void) wkbuf;
224#endif
225}
226
227//todo fix clang diagnostic
228#pragma clang diagnostic push
229#pragma clang diagnostic ignored "-Wincompatible-pointer-types"
230
231#if defined(__arm64__)
232#endif
233
234static inline void WKdmD(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes) {
235#if defined(__arm64__)
236#endif
237 WKdm_hv(src_buf);
238#if defined(__arm64__)
239 if (PAGE_SIZE == 4096) {
240 WKdm_decompress_4k(src_buf, dest_buf, scratch, bytes);
241 } else {
242 __unused uint64_t wdsstart;
243
244 VM_COMPRESSOR_STAT_DBG(wdsstart = mach_absolute_time());
245 WKdm_decompress_16k(src_buf, dest_buf, scratch, bytes);
246
247 VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_dabstime += mach_absolute_time() - wdsstart);
248 VM_COMPRESSOR_STAT(compressor_stats.wks_decompressions++);
249 }
250#else /* !defined arm64 */
251 WKdm_decompress_new(src_buf, dest_buf, scratch, bytes);
252#endif
253}
254#if DEVELOPMENT || DEBUG
255int precompy, wkswhw;
256#endif
257
258static inline int WKdmC(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, boolean_t *incomp_copy, unsigned int limit) {
259 (void)incomp_copy;
260 int wkcval;
261#if defined(__arm64__)
262 if (PAGE_SIZE == 4096) {
263 wkcval = WKdm_compress_4k(src_buf, dest_buf, scratch, limit);
264 } else {
265 __unused uint64_t wcswstart;
266
267 VM_COMPRESSOR_STAT_DBG(wcswstart = mach_absolute_time());
268
269 int wkswsz = WKdm_compress_16k(src_buf, dest_buf, scratch, limit);
270
271 VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_cabstime += mach_absolute_time() - wcswstart);
272 VM_COMPRESSOR_STAT(compressor_stats.wks_compressions++);
273 wkcval = wkswsz;
274 }
275#else
276 wkcval = WKdm_compress_new(src_buf, dest_buf, scratch, limit);
277#endif
278 return wkcval;
279}
280
281
282int metacompressor(const uint8_t *in, uint8_t *cdst, int32_t outbufsz, uint16_t *codec, void *cscratchin, boolean_t *incomp_copy) {
283 int sz = -1;
284 int dowk = FALSE, dolz4 = FALSE, skiplz4 = FALSE;
285 int insize = PAGE_SIZE;
286 compressor_encode_scratch_t *cscratch = cscratchin;
287
288 if (vm_compressor_current_codec == CMODE_WK) {
289 dowk = TRUE;
290 } else if (vm_compressor_current_codec == CMODE_LZ4) {
291 dolz4 = TRUE;
292 } else if (vm_compressor_current_codec == CMODE_HYB) {
293 enum compressor_preselect_t presel = compressor_preselect();
294 if (presel == CPRESELLZ4) {
295 dolz4 = TRUE;
296 goto lz4compress;
297 } else if (presel == CSKIPLZ4) {
298 dowk = TRUE;
299 skiplz4 = TRUE;
300 } else {
301 assert(presel == CPRESELWK);
302 dowk = TRUE;
303 }
304 }
305
306 if (dowk) {
307 *codec = CCWK;
308 VM_COMPRESSOR_STAT(compressor_stats.wk_compressions++);
309 sz = WKdmC(in, cdst, &cscratch->wkscratch[0], incomp_copy, outbufsz);
310
311 if (sz == -1) {
312 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total+=PAGE_SIZE);
313 VM_COMPRESSOR_STAT(compressor_stats.wk_compression_failures++);
314
315 if (vm_compressor_current_codec == CMODE_HYB) {
316 goto lz4eval;
317 }
318 goto cexit;
319 } else if (sz == 0) {
320 VM_COMPRESSOR_STAT(compressor_stats.wk_sv_compressions++);
321 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total+=4);
322 } else {
323 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total+=sz);
324 }
325 }
326lz4eval:
327 if (vm_compressor_current_codec == CMODE_HYB) {
328 if (((sz == -1) || (sz >= vmctune.lz4_threshold)) && (skiplz4 == FALSE)) {
329 dolz4 = TRUE;
330 } else {
331#if DEVELOPMENT || DEBUG
332 int wkc = (sz == -1) ? PAGE_SIZE : sz;
333#endif
334 VM_COMPRESSOR_STAT(compressor_stats.wk_compressions_exclusive++);
335 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_exclusive+=wkc);
336 goto cexit;
337 }
338 }
339
340lz4compress:
341
342 if (dolz4) {
343 if (sz == -1) {
344 sz = PAGE_SIZE;
345 }
346 int wksz = sz;
347 *codec = CCLZ4;
348
349 sz = (int) lz4raw_encode_buffer(cdst, outbufsz, in, insize, &cscratch->lz4state[0]);
350
351 compressor_selector_update(sz, dowk, wksz);
352 if (sz == 0) {
353 sz = -1;
354 goto cexit;
355 }
356 }
357cexit:
358 return sz;
359}
360
361void metadecompressor(const uint8_t *source, uint8_t *dest, uint32_t csize, uint16_t ccodec, void *compressor_dscratchin) {
362 int dolz4 = (ccodec == CCLZ4);
363 int rval;
364 compressor_decode_scratch_t *compressor_dscratch = compressor_dscratchin;
365
366 if (dolz4) {
367 rval = (int)lz4raw_decode_buffer(dest, PAGE_SIZE, source, csize, &compressor_dscratch->lz4decodestate[0]);
368 VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressions+=1);
369 VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressed_bytes+=csize);
370#if DEVELOPMENT || DEBUG
371 uint32_t *d32 = dest;
372#endif
373 assertf(rval == PAGE_SIZE, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x",
374 rval, *d32, *(d32+1), *(d32+2));
375 } else {
376 assert(ccodec == CCWK);
377
378 WKdmD(source, dest, &compressor_dscratch->wkdecompscratch[0], csize);
379
380 VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressions+=1);
381 VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressed_bytes+=csize);
382 }
383}
384#pragma clang diagnostic pop
385
386uint32_t vm_compressor_get_encode_scratch_size(void) {
387 if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
388 return MAX(sizeof(compressor_encode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
389 } else {
390 return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
391 }
392}
393
394uint32_t vm_compressor_get_decode_scratch_size(void) {
395 if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
396 return MAX(sizeof(compressor_decode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
397 } else {
398 return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
399 }
400}
401
402
403int vm_compressor_algorithm(void) {
404 return vm_compressor_current_codec;
405}
406
407void vm_compressor_algorithm_init(void) {
408 vm_compressor_mode_t new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
409
410#if defined(__arm64__)
411 new_codec = CMODE_HYB;
412
413 if (PAGE_SIZE == 16384) {
414 vmctune.lz4_threshold = 12288;
415 }
416#endif
417
418 PE_parse_boot_argn("vm_compressor_codec", &new_codec, sizeof(new_codec));
419 assertf(((new_codec == VM_COMPRESSOR_DEFAULT_CODEC) || (new_codec == CMODE_WK) ||
420 (new_codec == CMODE_LZ4) || (new_codec == CMODE_HYB)),
421 "Invalid VM compression codec: %u", new_codec);
422
423#if defined(__arm__)||defined(__arm64__)
424 uint32_t tmpc;
425 if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc, sizeof(tmpc))) {
426 new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
427 } else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc, sizeof(tmpc))) {
428 new_codec = CMODE_HYB;
429 }
430
431 vm_compressor_current_codec = new_codec;
432#endif /* arm/arm64 */
433}
434