1 | /* |
2 | * Copyright (c) 2010-2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* This module implements a hybrid/adaptive compression scheme, using WKdm where |
29 | * profitable and, currently, an LZ4 variant elsewhere. |
30 | * (Created 2016, Derek Kumar) |
31 | */ |
32 | #include "lz4.h" |
33 | #include "WKdm_new.h" |
34 | #include <vm/vm_compressor_algorithms.h> |
35 | #include <vm/vm_compressor.h> |
36 | |
37 | #define MZV_MAGIC (17185) |
38 | #if defined(__arm64__) |
39 | #include <arm64/proc_reg.h> |
40 | #endif |
41 | |
42 | #define LZ4_SCRATCH_ALIGN (64) |
43 | #define WKC_SCRATCH_ALIGN (64) |
44 | |
45 | #define LZ4_SCRATCH_ALIGN (64) |
46 | #define WKC_SCRATCH_ALIGN (64) |
47 | |
48 | typedef union { |
49 | uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN))); |
50 | uint8_t wkscratch[0] __attribute((aligned(WKC_SCRATCH_ALIGN))); // TODO |
51 | } compressor_encode_scratch_t; |
52 | |
53 | typedef union { |
54 | uint8_t lz4decodestate[lz4_encode_scratch_size]__attribute((aligned(64))); |
55 | uint8_t wkdecompscratch[0] __attribute((aligned(64))); |
56 | } compressor_decode_scratch_t; |
57 | |
58 | typedef struct { |
59 | uint16_t lz4_selection_run; |
60 | uint16_t lz4_run_length; |
61 | uint16_t lz4_preselects; |
62 | uint32_t lz4_total_preselects; |
63 | uint16_t lz4_failure_skips; |
64 | uint32_t lz4_total_failure_skips; |
65 | uint16_t lz4_failure_run_length; |
66 | uint16_t lz4_total_unprofitables; |
67 | uint32_t lz4_total_negatives; |
68 | uint32_t lz4_total_failures; |
69 | } compressor_state_t; |
70 | |
71 | compressor_tuneables_t vmctune = { |
72 | .lz4_threshold = 2048, |
73 | .wkdm_reeval_threshold = 1536, |
74 | .lz4_max_failure_skips = 0, |
75 | .lz4_max_failure_run_length = ~0U, |
76 | .lz4_max_preselects = 0, |
77 | .lz4_run_preselection_threshold = ~0U, |
78 | .lz4_run_continue_bytes = 0, |
79 | .lz4_profitable_bytes = 0, |
80 | }; |
81 | |
82 | compressor_state_t vmcstate = { |
83 | .lz4_selection_run = 0, |
84 | .lz4_run_length = 0, |
85 | .lz4_preselects = 0, |
86 | .lz4_total_preselects = 0, |
87 | .lz4_failure_skips = 0, |
88 | .lz4_total_failure_skips = 0, |
89 | .lz4_failure_run_length = 0, |
90 | .lz4_total_unprofitables = 0, |
91 | .lz4_total_negatives = 0, |
92 | }; |
93 | |
94 | compressor_stats_t compressor_stats; |
95 | |
96 | enum compressor_preselect_t { |
97 | CPRESELLZ4 = 0, |
98 | CSKIPLZ4 = 1, |
99 | CPRESELWK = 2, |
100 | }; |
101 | |
102 | vm_compressor_mode_t vm_compressor_current_codec = VM_COMPRESSOR_DEFAULT_CODEC; |
103 | |
104 | boolean_t vm_compressor_force_sw_wkdm = FALSE; |
105 | |
106 | boolean_t verbose = FALSE; |
107 | |
108 | #define VMDBGSTAT (DEBUG) |
109 | #if VMDBGSTATS |
110 | #define VM_COMPRESSOR_STAT_DBG(x...) \ |
111 | do { \ |
112 | (x); \ |
113 | } while(0) |
114 | #else |
115 | #define VM_COMPRESSOR_STAT_DBG(x...) \ |
116 | do { \ |
117 | } while (0) |
118 | #endif |
119 | |
120 | #define VMCSTATS (DEVELOPMENT || DEBUG) |
121 | #if VMCSTATS |
122 | #define VM_COMPRESSOR_STAT(x...) \ |
123 | do { \ |
124 | (x); \ |
125 | } while(0) |
126 | //TODO make atomic where needed, decompression paths |
127 | #define VM_DECOMPRESSOR_STAT(x...) \ |
128 | do { \ |
129 | (x); \ |
130 | } while(0) |
131 | #else |
132 | #define VM_COMPRESSOR_STAT(x...) \ |
133 | do { \ |
134 | }while (0) |
135 | #define VM_DECOMPRESSOR_STAT(x...) \ |
136 | do { \ |
137 | }while (0) |
138 | #endif |
139 | |
140 | static inline enum compressor_preselect_t |
141 | compressor_preselect(void) |
142 | { |
143 | if (vmcstate.lz4_failure_skips >= vmctune.lz4_max_failure_skips) { |
144 | vmcstate.lz4_failure_skips = 0; |
145 | vmcstate.lz4_failure_run_length = 0; |
146 | } |
147 | |
148 | if (vmcstate.lz4_failure_run_length >= vmctune.lz4_max_failure_run_length) { |
149 | vmcstate.lz4_failure_skips++; |
150 | vmcstate.lz4_total_failure_skips++; |
151 | return CSKIPLZ4; |
152 | } |
153 | |
154 | if (vmcstate.lz4_preselects >= vmctune.lz4_max_preselects) { |
155 | vmcstate.lz4_preselects = 0; |
156 | return CPRESELWK; |
157 | } |
158 | |
159 | if (vmcstate.lz4_run_length >= vmctune.lz4_run_preselection_threshold) { |
160 | vmcstate.lz4_preselects++; |
161 | vmcstate.lz4_total_preselects++; |
162 | return CPRESELLZ4; |
163 | } |
164 | return CPRESELWK; |
165 | } |
166 | |
167 | static inline void |
168 | compressor_selector_update(int lz4sz, int didwk, int wksz) |
169 | { |
170 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressions++); |
171 | |
172 | if (lz4sz == 0) { |
173 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += PAGE_SIZE); |
174 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compression_failures++); |
175 | vmcstate.lz4_failure_run_length++; |
176 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_failures++); |
177 | vmcstate.lz4_run_length = 0; |
178 | } else { |
179 | vmcstate.lz4_failure_run_length = 0; |
180 | |
181 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += lz4sz); |
182 | |
183 | if (lz4sz <= vmctune.wkdm_reeval_threshold) { |
184 | vmcstate.lz4_run_length = 0; |
185 | } else { |
186 | if (!didwk) { |
187 | vmcstate.lz4_run_length++; |
188 | } |
189 | } |
190 | |
191 | if (didwk) { |
192 | if (__probable(wksz > lz4sz)) { |
193 | uint32_t lz4delta = wksz - lz4sz; |
194 | VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_delta += lz4delta); |
195 | if (lz4delta >= vmctune.lz4_run_continue_bytes) { |
196 | vmcstate.lz4_run_length++; |
197 | } else if (lz4delta <= vmctune.lz4_profitable_bytes) { |
198 | vmcstate.lz4_failure_run_length++; |
199 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_unprofitables++); |
200 | vmcstate.lz4_run_length = 0; |
201 | } else { |
202 | vmcstate.lz4_run_length = 0; |
203 | } |
204 | } else { |
205 | VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_negative_delta += (lz4sz - wksz)); |
206 | vmcstate.lz4_failure_run_length++; |
207 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_negatives++); |
208 | vmcstate.lz4_run_length = 0; |
209 | } |
210 | } |
211 | } |
212 | } |
213 | |
214 | |
215 | static inline void |
216 | WKdm_hv(uint32_t *wkbuf) |
217 | { |
218 | #if DEVELOPMENT || DEBUG |
219 | uint32_t *inw = (uint32_t *) wkbuf; |
220 | if (*inw != MZV_MAGIC) { |
221 | if ((*inw | *(inw + 1) | *(inw + 2)) & 0xFFFF0000) { |
222 | panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x" , wkbuf, *inw, *(inw + 1), *(inw + 2)); |
223 | } |
224 | } |
225 | #else /* DEVELOPMENT || DEBUG */ |
226 | (void) wkbuf; |
227 | #endif |
228 | } |
229 | |
230 | //todo fix clang diagnostic |
231 | #pragma clang diagnostic push |
232 | #pragma clang diagnostic ignored "-Wincompatible-pointer-types" |
233 | |
234 | #if defined(__arm64__) |
235 | #endif |
236 | |
237 | static inline bool |
238 | WKdmD(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes, |
239 | __unused uint32_t *pop_count) |
240 | { |
241 | #if defined(__arm64__) |
242 | #endif |
243 | WKdm_hv(wkbuf: src_buf); |
244 | #if defined(__arm64__) |
245 | #ifndef __ARM_16K_PG__ |
246 | if (PAGE_SIZE == 4096) { |
247 | WKdm_decompress_4k(src_buf, dest_buf, scratch, bytes); |
248 | } else |
249 | #endif /* !____ARM_16K_PG__ */ |
250 | { |
251 | __unused uint64_t wdsstart; |
252 | |
253 | VM_COMPRESSOR_STAT_DBG(wdsstart = mach_absolute_time()); |
254 | WKdm_decompress_16k(src_buf, dest_buf, scratch, bytes); |
255 | |
256 | VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_dabstime += mach_absolute_time() - wdsstart); |
257 | VM_COMPRESSOR_STAT(compressor_stats.wks_decompressions++); |
258 | } |
259 | #else /* !defined arm64 */ |
260 | WKdm_decompress_new(src_buf, dest_buf, scratch, bytes); |
261 | #endif |
262 | return true; |
263 | } |
264 | #if DEVELOPMENT || DEBUG |
265 | int precompy, wkswhw; |
266 | #endif |
267 | |
268 | static inline int |
269 | WKdmC(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, |
270 | boolean_t *incomp_copy, unsigned int limit, __unused uint32_t *pop_count) |
271 | { |
272 | (void)incomp_copy; |
273 | int wkcval; |
274 | #if defined(__arm64__) |
275 | #ifndef __ARM_16K_PG__ |
276 | if (PAGE_SIZE == 4096) { |
277 | wkcval = WKdm_compress_4k(src_buf, dest_buf, scratch, limit); |
278 | } else |
279 | #endif /* !____ARM_16K_PG__ */ |
280 | { |
281 | __unused uint64_t wcswstart; |
282 | |
283 | VM_COMPRESSOR_STAT_DBG(wcswstart = mach_absolute_time()); |
284 | |
285 | int wkswsz = WKdm_compress_16k(src_buf, dest_buf, scratch, limit); |
286 | |
287 | VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_cabstime += mach_absolute_time() - wcswstart); |
288 | VM_COMPRESSOR_STAT(compressor_stats.wks_compressions++); |
289 | wkcval = wkswsz; |
290 | } |
291 | #else |
292 | wkcval = WKdm_compress_new(src_buf, dest_buf, scratch, limit); |
293 | #endif |
294 | return wkcval; |
295 | } |
296 | |
297 | |
298 | int |
299 | metacompressor(const uint8_t *in, uint8_t *cdst, int32_t outbufsz, uint16_t *codec, |
300 | void *cscratchin, boolean_t *incomp_copy, uint32_t *pop_count_p) |
301 | { |
302 | int sz = -1; |
303 | int dowk = FALSE, dolz4 = FALSE, skiplz4 = FALSE; |
304 | int insize = PAGE_SIZE; |
305 | compressor_encode_scratch_t *cscratch = cscratchin; |
306 | /* Not all paths lead to an inline population count. */ |
307 | uint32_t pop_count = C_SLOT_NO_POPCOUNT; |
308 | |
309 | if (vm_compressor_current_codec == CMODE_WK) { |
310 | dowk = TRUE; |
311 | } else if (vm_compressor_current_codec == CMODE_LZ4) { |
312 | dolz4 = TRUE; |
313 | } else if (vm_compressor_current_codec == CMODE_HYB) { |
314 | enum compressor_preselect_t presel = compressor_preselect(); |
315 | if (presel == CPRESELLZ4) { |
316 | dolz4 = TRUE; |
317 | goto lz4compress; |
318 | } else if (presel == CSKIPLZ4) { |
319 | dowk = TRUE; |
320 | skiplz4 = TRUE; |
321 | } else { |
322 | assert(presel == CPRESELWK); |
323 | dowk = TRUE; |
324 | } |
325 | } |
326 | |
327 | if (dowk) { |
328 | *codec = CCWK; |
329 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressions++); |
330 | sz = WKdmC(src_buf: in, dest_buf: cdst, scratch: &cscratch->wkscratch[0], incomp_copy, limit: outbufsz, pop_count: &pop_count); |
331 | |
332 | if (sz == -1) { |
333 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += PAGE_SIZE); |
334 | VM_COMPRESSOR_STAT(compressor_stats.wk_compression_failures++); |
335 | |
336 | if (vm_compressor_current_codec == CMODE_HYB) { |
337 | goto lz4eval; |
338 | } |
339 | goto cexit; |
340 | } else if (sz == 0) { |
341 | VM_COMPRESSOR_STAT(compressor_stats.wk_sv_compressions++); |
342 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += 4); |
343 | } else { |
344 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += sz); |
345 | } |
346 | } |
347 | lz4eval: |
348 | if (vm_compressor_current_codec == CMODE_HYB) { |
349 | if (((sz == -1) || (sz >= vmctune.lz4_threshold)) && (skiplz4 == FALSE)) { |
350 | dolz4 = TRUE; |
351 | } else { |
352 | #if DEVELOPMENT || DEBUG |
353 | int wkc = (sz == -1) ? PAGE_SIZE : sz; |
354 | #endif |
355 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressions_exclusive++); |
356 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_exclusive += wkc); |
357 | goto cexit; |
358 | } |
359 | } |
360 | |
361 | lz4compress: |
362 | |
363 | if (dolz4) { |
364 | if (sz == -1) { |
365 | sz = PAGE_SIZE; |
366 | } |
367 | int wksz = sz; |
368 | *codec = CCLZ4; |
369 | |
370 | sz = (int) lz4raw_encode_buffer(dst_buffer: cdst, dst_size: outbufsz, src_buffer: in, src_size: insize, hash_table: &cscratch->lz4state[0]); |
371 | |
372 | compressor_selector_update(lz4sz: sz, didwk: dowk, wksz); |
373 | if (sz == 0) { |
374 | sz = -1; |
375 | goto cexit; |
376 | } |
377 | } |
378 | cexit: |
379 | assert(pop_count_p != NULL); |
380 | *pop_count_p = pop_count; |
381 | return sz; |
382 | } |
383 | |
384 | bool |
385 | metadecompressor(const uint8_t *source, uint8_t *dest, uint32_t csize, |
386 | uint16_t ccodec, void *compressor_dscratchin, uint32_t *pop_count_p) |
387 | { |
388 | int dolz4 = (ccodec == CCLZ4); |
389 | int rval; |
390 | compressor_decode_scratch_t *compressor_dscratch = compressor_dscratchin; |
391 | /* Not all paths lead to an inline population count. */ |
392 | uint32_t pop_count = C_SLOT_NO_POPCOUNT; |
393 | bool success; |
394 | |
395 | if (dolz4) { |
396 | rval = (int)lz4raw_decode_buffer(dst_buffer: dest, PAGE_SIZE, src_buffer: source, src_size: csize, work: &compressor_dscratch->lz4decodestate[0]); |
397 | VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressions += 1); |
398 | VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressed_bytes += csize); |
399 | #if DEVELOPMENT || DEBUG |
400 | uint32_t *d32 = dest; |
401 | #endif |
402 | assertf(rval == PAGE_SIZE, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x" , |
403 | rval, *d32, *(d32 + 1), *(d32 + 2)); |
404 | success = (rval == PAGE_SIZE); |
405 | } else { |
406 | assert(ccodec == CCWK); |
407 | |
408 | success = WKdmD(src_buf: source, dest_buf: dest, scratch: &compressor_dscratch->wkdecompscratch[0], bytes: csize, pop_count: &pop_count); |
409 | |
410 | VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressions += 1); |
411 | VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressed_bytes += csize); |
412 | } |
413 | |
414 | assert(pop_count_p != NULL); |
415 | *pop_count_p = pop_count; |
416 | return success; |
417 | } |
418 | #pragma clang diagnostic pop |
419 | |
420 | uint32_t |
421 | vm_compressor_get_encode_scratch_size(void) |
422 | { |
423 | if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) { |
424 | return MAX(sizeof(compressor_encode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL); |
425 | } else { |
426 | return WKdm_SCRATCH_BUF_SIZE_INTERNAL; |
427 | } |
428 | } |
429 | |
430 | uint32_t |
431 | vm_compressor_get_decode_scratch_size(void) |
432 | { |
433 | if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) { |
434 | return MAX(sizeof(compressor_decode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL); |
435 | } else { |
436 | return WKdm_SCRATCH_BUF_SIZE_INTERNAL; |
437 | } |
438 | } |
439 | |
440 | |
441 | int |
442 | vm_compressor_algorithm(void) |
443 | { |
444 | return vm_compressor_current_codec; |
445 | } |
446 | |
447 | void |
448 | vm_compressor_algorithm_init(void) |
449 | { |
450 | vm_compressor_mode_t new_codec = VM_COMPRESSOR_DEFAULT_CODEC; |
451 | |
452 | #if defined(__arm64__) |
453 | new_codec = CMODE_HYB; |
454 | |
455 | if (PAGE_SIZE == 16384) { |
456 | vmctune.lz4_threshold = 12288; |
457 | } |
458 | #endif |
459 | |
460 | PE_parse_boot_argn(arg_string: "vm_compressor_codec" , arg_ptr: &new_codec, max_arg: sizeof(new_codec)); |
461 | assertf(((new_codec == VM_COMPRESSOR_DEFAULT_CODEC) || (new_codec == CMODE_WK) || |
462 | (new_codec == CMODE_LZ4) || (new_codec == CMODE_HYB)), |
463 | "Invalid VM compression codec: %u" , new_codec); |
464 | |
465 | #if defined(__arm64__) |
466 | uint32_t tmpc; |
467 | if (PE_parse_boot_argn(arg_string: "-vm_compressor_wk" , arg_ptr: &tmpc, max_arg: sizeof(tmpc))) { |
468 | new_codec = VM_COMPRESSOR_DEFAULT_CODEC; |
469 | } else if (PE_parse_boot_argn(arg_string: "-vm_compressor_hybrid" , arg_ptr: &tmpc, max_arg: sizeof(tmpc))) { |
470 | new_codec = CMODE_HYB; |
471 | } |
472 | |
473 | vm_compressor_current_codec = new_codec; |
474 | #endif /* arm/arm64 */ |
475 | } |
476 | |