1 | /* |
2 | * Copyright (c) 2016-2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | // LZ4_RAW buffer API |
30 | // EB May 2015 |
31 | // Mar 2016 Imported from the Compression project, with minor optimisations and |
32 | // early abort detection (Derek Kumar) |
33 | |
34 | #include <string.h> |
35 | #include "lz4.h" |
36 | |
37 | size_t |
38 | lz4raw_decode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, |
39 | const uint8_t * __restrict src_buffer, size_t src_size, |
40 | void * __restrict work __attribute__((unused))) |
41 | { |
42 | const uint8_t * src = src_buffer; |
43 | uint8_t * dst = dst_buffer; |
44 | |
45 | // Go fast if we can, keeping away from the end of buffers |
46 | #if LZ4_ENABLE_ASSEMBLY_DECODE |
47 | if (dst_size > LZ4_GOFAST_SAFETY_MARGIN && src_size > LZ4_GOFAST_SAFETY_MARGIN) { |
48 | if (lz4_decode_asm(dst_ptr: &dst, dst_begin: dst_buffer, dst_end: dst_buffer + dst_size - LZ4_GOFAST_SAFETY_MARGIN, src_ptr: &src, src_end: src_buffer + src_size - LZ4_GOFAST_SAFETY_MARGIN)) { |
49 | return 0; // FAIL |
50 | } |
51 | } |
52 | #endif |
53 | //DRKTODO: Can the 'C' "safety" decode be eliminated for 4/16K fixed-sized buffers? |
54 | |
55 | // Finish safe |
56 | if (lz4_decode(dst_ptr: &dst, dst_begin: dst_buffer, dst_end: dst_buffer + dst_size, src_ptr: &src, src_end: src_buffer + src_size)) { |
57 | return 0; // FAIL |
58 | } |
59 | return (size_t)(dst - dst_buffer); // bytes produced |
60 | } |
61 | // Debug flags |
62 | #if LZ4DEBUG |
63 | #define DEBUG_LZ4_ENCODE_ERRORS (1) |
64 | #define DEBUG_LZ4_DECODE_ERRORS (1) |
65 | #endif |
66 | |
67 | #if DEBUG_LZ4_ENCODE_ERRORS |
68 | #endif |
69 | |
70 | #if !LZ4_ENABLE_ASSEMBLY_ENCODE |
71 | |
72 | #if defined(__x86_64__) || defined(__x86_64h__) |
73 | # define LZ4_MATCH_SEARCH_INIT_SIZE 32 |
74 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 32 |
75 | #else |
76 | # define LZ4_MATCH_SEARCH_INIT_SIZE 8 |
77 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 8 |
78 | #endif |
79 | |
80 | // Return hash for 4-byte sequence X |
81 | static inline uint32_t |
82 | lz4_hash(uint32_t x) |
83 | { |
84 | return (x * 2654435761U) >> (32 - LZ4_COMPRESS_HASH_BITS); |
85 | } |
86 | |
87 | // Store 0xfff..fff at *PTR |
88 | static inline void |
89 | lz4_fill16(uint8_t * ptr) |
90 | { |
91 | store8(ptr, -1); |
92 | store8(ptr + 8, -1); |
93 | } |
94 | |
95 | // Return number of matching bytes 0..4 at positions A and B. |
96 | static inline size_t |
97 | lz4_nmatch4(const uint8_t * a, const uint8_t * b) |
98 | { |
99 | uint32_t x = load4(a) ^ load4(b); |
100 | return (x == 0)?4:(__builtin_ctzl(x) >> 3); |
101 | } |
102 | |
103 | // Return number of matching bytes 0..8 at positions A and B. |
104 | static inline size_t |
105 | lz4_nmatch8(const uint8_t * a, const uint8_t * b) |
106 | { |
107 | uint64_t x = load8(a) ^ load8(b); |
108 | return (x == 0)?8:(__builtin_ctzll(x) >> 3); |
109 | } |
110 | |
111 | // Return number of matching bytes 0..16 at positions A and B. |
112 | static inline size_t |
113 | lz4_nmatch16(const uint8_t * a, const uint8_t * b) |
114 | { |
115 | size_t n = lz4_nmatch8(a, b); |
116 | return (n == 8)?(8 + lz4_nmatch8(a + 8, b + 8)):n; |
117 | } |
118 | |
119 | // Return number of matching bytes 0..32 at positions A and B. |
120 | static inline size_t |
121 | lz4_nmatch32(const uint8_t * a, const uint8_t * b) |
122 | { |
123 | size_t n = lz4_nmatch16(a, b); |
124 | return (n == 16)?(16 + lz4_nmatch16(a + 16, b + 16)):n; |
125 | } |
126 | |
127 | // Return number of matching bytes 0..64 at positions A and B. |
128 | static inline size_t |
129 | lz4_nmatch64(const uint8_t * a, const uint8_t * b) |
130 | { |
131 | size_t n = lz4_nmatch32(a, b); |
132 | return (n == 32)?(32 + lz4_nmatch32(a + 32, b + 32)):n; |
133 | } |
134 | |
135 | // Compile-time selection, return number of matching bytes 0..N at positions A and B. |
136 | static inline size_t |
137 | lz4_nmatch(int N, const uint8_t * a, const uint8_t * b) |
138 | { |
139 | switch (N) { |
140 | case 4: return lz4_nmatch4(a, b); |
141 | case 8: return lz4_nmatch8(a, b); |
142 | case 16: return lz4_nmatch16(a, b); |
143 | case 32: return lz4_nmatch32(a, b); |
144 | case 64: return lz4_nmatch64(a, b); |
145 | } |
146 | __builtin_trap(); // FAIL |
147 | } |
148 | |
149 | // Store LENGTH in DST using the literal_length/match_length extension scheme: X is the sum of all bytes until we reach a byte < 0xff. |
150 | // We are allowed to access a constant number of bytes above DST_END. |
151 | // Return incremented DST pointer on success, and 0 on failure |
152 | static inline uint8_t * |
153 | lz4_store_length(uint8_t * dst, const uint8_t * const end, uint32_t L) |
154 | { |
155 | (void)end; |
156 | while (L >= 17 * 255) { |
157 | lz4_fill16(dst); |
158 | dst += 16; |
159 | L -= 16 * 255; |
160 | } |
161 | lz4_fill16(dst); |
162 | //DRKTODO verify these modulos/divisions are optimally handled by clang |
163 | dst += L / 255; |
164 | *dst++ = L % 255; |
165 | return dst; |
166 | } |
167 | |
168 | static inline uint32_t |
169 | clamp(uint32_t x, uint32_t max) |
170 | __attribute__((overloadable)) |
171 | { |
172 | return x > max ? max : x; |
173 | } |
174 | |
175 | static inline uint8_t * |
176 | copy_literal(uint8_t *dst, const uint8_t * restrict src, uint32_t L) |
177 | { |
178 | uint8_t *end = dst + L; |
179 | { copy16(dst, src); dst += 16; src += 16; } |
180 | while (dst < end) { |
181 | copy32(dst, src); dst += 32; src += 32; |
182 | } |
183 | return end; |
184 | } |
185 | |
186 | static uint8_t * |
187 | lz4_emit_match(uint32_t L, uint32_t M, uint32_t D, |
188 | uint8_t * restrict dst, |
189 | const uint8_t * const end, |
190 | const uint8_t * restrict src) |
191 | { |
192 | // The LZ4 encoding scheme requires that M is at least 4, because |
193 | // the actual value stored by the encoding is M - 4. Check this |
194 | // requirement for debug builds. |
195 | assert(M >= 4 && "LZ4 encoding requires that M is at least 4" ); |
196 | // Having checked that M >= 4, translate M by four. |
197 | M -= 4; |
198 | // Similarly, we must have D < 2**16, because we use only two bytes |
199 | // to represent the value of D in the encoding. |
200 | assert(D <= USHRT_MAX && "LZ4 encoding requries that D can be stored in two bytes." ); |
201 | // Construct the command byte by clamping both L and M to 0 ... 15 |
202 | // and packing them into a single byte, and store it. |
203 | *dst++ = clamp(L, 15) << 4 | clamp(M, 15); |
204 | // If L is 15 or greater, we need to encode extra literal length bytes. |
205 | if (L >= 15) { |
206 | dst = lz4_store_length(dst, end, L - 15); |
207 | if (dst == 0 || dst + L >= end) { |
208 | return NULL; |
209 | } |
210 | } |
211 | // Copy the literal itself from src to dst. |
212 | dst = copy_literal(dst, src, L); |
213 | // Store match distance. |
214 | store2(dst, D); dst += 2; |
215 | // If M is 15 or greater, we need to encode extra match length bytes. |
216 | if (M >= 15) { |
217 | dst = lz4_store_length(dst, end, M - 15); |
218 | if (dst == 0) { |
219 | return NULL; |
220 | } |
221 | } |
222 | return dst; |
223 | } |
224 | |
225 | /* #ifndef LZ4_EARLY_ABORT */ |
226 | /* #define LZ4_EARLY_ABORT (1) */ |
227 | /* #endif */ |
228 | |
229 | #if LZ4_EARLY_ABORT |
230 | int lz4_do_early_abort = 1; |
231 | int lz4_early_aborts = 0; |
232 | #define LZ4_EARLY_ABORT_EVAL (448) |
233 | #define LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR (20) |
234 | #endif /* LZ4_EARLY_ABORT */ |
235 | |
236 | void |
237 | lz4_encode_2gb(uint8_t ** dst_ptr, |
238 | size_t dst_size, |
239 | const uint8_t ** src_ptr, |
240 | const uint8_t * src_begin, |
241 | size_t src_size, |
242 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES], |
243 | int skip_final_literals) |
244 | { |
245 | uint8_t *dst = *dst_ptr; // current output stream position |
246 | uint8_t *end = dst + dst_size - LZ4_GOFAST_SAFETY_MARGIN; |
247 | const uint8_t *src = *src_ptr; // current input stream literal to encode |
248 | const uint8_t *src_end = src + src_size - LZ4_GOFAST_SAFETY_MARGIN; |
249 | const uint8_t *match_begin = 0; // first byte of matched sequence |
250 | const uint8_t *match_end = 0; // first byte after matched sequence |
251 | #if LZ4_EARLY_ABORT |
252 | uint8_t * const dst_begin = dst; |
253 | uint32_t lz4_do_abort_eval = lz4_do_early_abort; |
254 | #endif |
255 | |
256 | while (dst < end) { |
257 | ptrdiff_t match_distance = 0; |
258 | for (match_begin = src; match_begin < src_end; match_begin += 1) { |
259 | const uint32_t pos = (uint32_t)(match_begin - src_begin); |
260 | const uint32_t w0 = load4(match_begin); |
261 | const uint32_t w1 = load4(match_begin + 1); |
262 | const uint32_t w2 = load4(match_begin + 2); |
263 | const uint32_t w3 = load4(match_begin + 3); |
264 | const int i0 = lz4_hash(w0); |
265 | const int i1 = lz4_hash(w1); |
266 | const int i2 = lz4_hash(w2); |
267 | const int i3 = lz4_hash(w3); |
268 | const uint8_t *c0 = src_begin + hash_table[i0].offset; |
269 | const uint8_t *c1 = src_begin + hash_table[i1].offset; |
270 | const uint8_t *c2 = src_begin + hash_table[i2].offset; |
271 | const uint8_t *c3 = src_begin + hash_table[i3].offset; |
272 | const uint32_t m0 = hash_table[i0].word; |
273 | const uint32_t m1 = hash_table[i1].word; |
274 | const uint32_t m2 = hash_table[i2].word; |
275 | const uint32_t m3 = hash_table[i3].word; |
276 | hash_table[i0].offset = pos; |
277 | hash_table[i0].word = w0; |
278 | hash_table[i1].offset = pos + 1; |
279 | hash_table[i1].word = w1; |
280 | |
281 | hash_table[i2].offset = pos + 2; |
282 | hash_table[i2].word = w2; |
283 | hash_table[i3].offset = pos + 3; |
284 | hash_table[i3].word = w3; |
285 | |
286 | match_distance = (match_begin - c0); |
287 | if (w0 == m0 && match_distance < 0x10000 && match_distance > 0) { |
288 | match_end = match_begin + 4; |
289 | goto EXPAND_FORWARD; |
290 | } |
291 | |
292 | match_begin++; |
293 | match_distance = (match_begin - c1); |
294 | if (w1 == m1 && match_distance < 0x10000 && match_distance > 0) { |
295 | match_end = match_begin + 4; |
296 | goto EXPAND_FORWARD; |
297 | } |
298 | |
299 | match_begin++; |
300 | match_distance = (match_begin - c2); |
301 | if (w2 == m2 && match_distance < 0x10000 && match_distance > 0) { |
302 | match_end = match_begin + 4; |
303 | goto EXPAND_FORWARD; |
304 | } |
305 | |
306 | match_begin++; |
307 | match_distance = (match_begin - c3); |
308 | if (w3 == m3 && match_distance < 0x10000 && match_distance > 0) { |
309 | match_end = match_begin + 4; |
310 | goto EXPAND_FORWARD; |
311 | } |
312 | |
313 | #if LZ4_EARLY_ABORT |
314 | //DRKTODO: Evaluate unrolling further. 2xunrolling had some modest benefits |
315 | if (lz4_do_abort_eval && ((pos) >= LZ4_EARLY_ABORT_EVAL)) { |
316 | ptrdiff_t dstd = dst - dst_begin; |
317 | |
318 | if (dstd == 0) { |
319 | lz4_early_aborts++; |
320 | return; |
321 | } |
322 | |
323 | /* if (dstd >= pos) { */ |
324 | /* return; */ |
325 | /* } */ |
326 | /* ptrdiff_t cbytes = pos - dstd; */ |
327 | /* if ((cbytes * LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR) > pos) { */ |
328 | /* return; */ |
329 | /* } */ |
330 | lz4_do_abort_eval = 0; |
331 | } |
332 | #endif |
333 | } |
334 | |
335 | if (skip_final_literals) { |
336 | *src_ptr = src; *dst_ptr = dst; return; |
337 | } // do not emit the final literal sequence |
338 | |
339 | // Emit a trailing literal that covers the remainder of the source buffer, |
340 | // if we can do so without exceeding the bounds of the destination buffer. |
341 | size_t src_remaining = src_end + LZ4_GOFAST_SAFETY_MARGIN - src; |
342 | if (src_remaining < 15) { |
343 | *dst++ = (uint8_t)(src_remaining << 4); |
344 | memcpy(dst, src, 16); dst += src_remaining; |
345 | } else { |
346 | *dst++ = 0xf0; |
347 | dst = lz4_store_length(dst, end, (uint32_t)(src_remaining - 15)); |
348 | if (dst == 0 || dst + src_remaining >= end) { |
349 | return; |
350 | } |
351 | memcpy(dst, src, src_remaining); dst += src_remaining; |
352 | } |
353 | *dst_ptr = dst; |
354 | *src_ptr = src + src_remaining; |
355 | return; |
356 | |
357 | EXPAND_FORWARD: |
358 | |
359 | // Expand match forward |
360 | { |
361 | const uint8_t * ref_end = match_end - match_distance; |
362 | while (match_end < src_end) { |
363 | size_t n = lz4_nmatch(LZ4_MATCH_SEARCH_LOOP_SIZE, ref_end, match_end); |
364 | if (n < LZ4_MATCH_SEARCH_LOOP_SIZE) { |
365 | match_end += n; break; |
366 | } |
367 | match_end += LZ4_MATCH_SEARCH_LOOP_SIZE; |
368 | ref_end += LZ4_MATCH_SEARCH_LOOP_SIZE; |
369 | } |
370 | } |
371 | |
372 | // Expand match backward |
373 | { |
374 | // match_begin_min = max(src_begin + match_distance,literal) |
375 | const uint8_t * match_begin_min = src_begin + match_distance; |
376 | match_begin_min = (match_begin_min < src)?src:match_begin_min; |
377 | const uint8_t * ref_begin = match_begin - match_distance; |
378 | |
379 | while (match_begin > match_begin_min && ref_begin[-1] == match_begin[-1]) { |
380 | match_begin -= 1; ref_begin -= 1; |
381 | } |
382 | } |
383 | |
384 | // Emit match |
385 | dst = lz4_emit_match((uint32_t)(match_begin - src), (uint32_t)(match_end - match_begin), (uint32_t)match_distance, dst, end, src); |
386 | if (!dst) { |
387 | return; |
388 | } |
389 | |
390 | // Update state |
391 | src = match_end; |
392 | |
393 | // Update return values to include the last fully encoded match |
394 | *dst_ptr = dst; |
395 | *src_ptr = src; |
396 | } |
397 | } |
398 | |
399 | #endif |
400 | |
401 | size_t |
402 | lz4raw_encode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, |
403 | const uint8_t * __restrict src_buffer, size_t src_size, |
404 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES]) |
405 | { |
406 | // Initialize hash table |
407 | const lz4_hash_entry_t HASH_FILL = { .offset = 0x80000000, .word = 0x0 }; |
408 | |
409 | const uint8_t * src = src_buffer; |
410 | uint8_t * dst = dst_buffer; |
411 | |
412 | // We need several blocks because our base function is limited to 2GB input |
413 | const size_t BLOCK_SIZE = 0x7ffff000; |
414 | while (src_size > 0) { |
415 | //DRKTODO either implement pattern4 or figure out optimal unroll |
416 | //DRKTODO: bizarrely, with plain O3 the compiler generates a single |
417 | //DRKTODO: scalar STP per loop iteration with the stock loop |
418 | //DRKTODO If hand unrolled, it switches to NEON store pairs |
419 | // Reset hash table for each block |
420 | /* #if __STDC_HOSTED__ */ |
421 | /* memset_pattern8(hash_table, &HASH_FILL, lz4_encode_scratch_size); */ |
422 | /* #else */ |
423 | /* for (int i=0;i<LZ4_COMPRESS_HASH_ENTRIES;i++) hash_table[i] = HASH_FILL; */ |
424 | /* #endif */ |
425 | |
426 | for (int i = 0; i < LZ4_COMPRESS_HASH_ENTRIES;) { |
427 | hash_table[i++] = HASH_FILL; |
428 | hash_table[i++] = HASH_FILL; |
429 | hash_table[i++] = HASH_FILL; |
430 | hash_table[i++] = HASH_FILL; |
431 | } |
432 | |
433 | // Bytes to encode in this block |
434 | const size_t src_to_encode = src_size > BLOCK_SIZE ? BLOCK_SIZE : src_size; |
435 | |
436 | // Run the encoder, only the last block emits final literals. Allows concatenation of encoded payloads. |
437 | // Blocks are encoded independently, so src_begin is set to each block origin instead of src_buffer |
438 | uint8_t * dst_start = dst; |
439 | const uint8_t * src_start = src; |
440 | lz4_encode_2gb(dst_ptr: &dst, dst_size, src_ptr: &src, src_begin: src, src_size: src_to_encode, hash_table, skip_final_literals: src_to_encode < src_size); |
441 | |
442 | // Check progress |
443 | size_t dst_used = dst - dst_start; |
444 | size_t src_used = src - src_start; // src_used <= src_to_encode |
445 | if (src_to_encode == src_size && src_used < src_to_encode) { |
446 | return 0; // FAIL to encode last block |
447 | } |
448 | // Note that there is a potential problem here in case of non compressible data requiring more blocks. |
449 | // We may end up here with src_used very small, or even 0, and will not be able to make progress during |
450 | // compression. We FAIL unless the length of literals remaining at the end is small enough. |
451 | if (src_to_encode < src_size && src_to_encode - src_used >= (1 << 16)) { |
452 | return 0; // FAIL too many literals |
453 | } |
454 | // Update counters (SRC and DST already have been updated) |
455 | src_size -= src_used; |
456 | dst_size -= dst_used; |
457 | } |
458 | |
459 | return (size_t)(dst - dst_buffer); // bytes produced |
460 | } |
461 | |
462 | typedef uint32_t lz4_uint128 __attribute__((ext_vector_type(4))) __attribute__((__aligned__(1))); |
463 | |
464 | int |
465 | lz4_decode(uint8_t ** dst_ptr, |
466 | uint8_t * dst_begin, |
467 | uint8_t * dst_end, |
468 | const uint8_t ** src_ptr, |
469 | const uint8_t * src_end) |
470 | { |
471 | uint8_t * dst = *dst_ptr; |
472 | const uint8_t * src = *src_ptr; |
473 | |
474 | // Require dst_end > dst. |
475 | if (dst_end <= dst) { |
476 | goto OUT_FULL; |
477 | } |
478 | |
479 | while (src < src_end) { |
480 | // Keep last good position |
481 | *src_ptr = src; |
482 | *dst_ptr = dst; |
483 | |
484 | uint8_t cmd = *src++; // 1 byte encoding literal+(match-4) length: LLLLMMMM |
485 | uint32_t literalLength = (cmd >> 4) & 15; // 0..15 |
486 | uint32_t matchLength = 4 + (cmd & 15); // 4..19 |
487 | |
488 | // extra bytes for literalLength |
489 | if (__improbable(literalLength == 15)) { |
490 | uint8_t s; |
491 | do { |
492 | #if DEBUG_LZ4_DECODE_ERRORS |
493 | if (__improbable(src >= src_end)) { |
494 | printf("Truncated SRC literal length\n" ); |
495 | } |
496 | #endif |
497 | if (__improbable(src >= src_end)) { |
498 | goto IN_FAIL; // unexpected end of input (1 byte needed) |
499 | } |
500 | s = *src++; |
501 | literalLength += s; |
502 | } while (__improbable(s == 255)); |
503 | } |
504 | |
505 | // copy literal |
506 | #if DEBUG_LZ4_DECODE_ERRORS |
507 | if (__improbable(literalLength > (size_t)(src_end - src))) { |
508 | printf("Truncated SRC literal\n" ); |
509 | } |
510 | #endif |
511 | if (__improbable(literalLength > (size_t)(src_end - src))) { |
512 | goto IN_FAIL; |
513 | } |
514 | if (__improbable(literalLength > (size_t)(dst_end - dst))) { |
515 | // literal will take us past the end of the destination buffer, |
516 | // so we can only copy part of it. |
517 | literalLength = (uint32_t)(dst_end - dst); |
518 | memcpy(dst, src, n: literalLength); |
519 | dst += literalLength; |
520 | goto OUT_FULL; |
521 | } |
522 | memcpy(dst, src, n: literalLength); |
523 | src += literalLength; |
524 | dst += literalLength; |
525 | |
526 | if (__improbable(src >= src_end)) { |
527 | goto OUT_FULL; // valid end of stream |
528 | } |
529 | #if DEBUG_LZ4_DECODE_ERRORS |
530 | if (__improbable(2 > (size_t)(src_end - src))) { |
531 | printf("Truncated SRC distance\n" ); |
532 | } |
533 | #endif |
534 | if (__improbable(2 > (size_t)(src_end - src))) { |
535 | goto IN_FAIL; // unexpected end of input (2 bytes needed) |
536 | } |
537 | //DRKTODO: this causes an alignment increase warning (legitimate?) |
538 | //DRKTODO: cast of char * to uint16_t* |
539 | #pragma clang diagnostic push |
540 | #pragma clang diagnostic ignored "-Wcast-align" |
541 | |
542 | // match distance |
543 | uint64_t matchDistance = *(const uint16_t *)src; // 0x0000 <= matchDistance <= 0xffff |
544 | #pragma clang diagnostic pop |
545 | src += 2; |
546 | #if DEBUG_LZ4_DECODE_ERRORS |
547 | if (matchDistance == 0) { |
548 | printf("Invalid match distance D = 0\n" ); |
549 | } |
550 | #endif |
551 | if (__improbable(matchDistance == 0)) { |
552 | goto IN_FAIL; // 0x0000 invalid |
553 | } |
554 | uint8_t * ref = dst - matchDistance; |
555 | #if DEBUG_LZ4_DECODE_ERRORS |
556 | if (__improbable(ref < dst_begin)) { |
557 | printf("Invalid reference D=0x%llx dst_begin=%p dst=%p dst_end=%p\n" , matchDistance, dst_begin, dst, dst_end); |
558 | } |
559 | #endif |
560 | if (__improbable(ref < dst_begin)) { |
561 | goto OUT_FAIL; // out of range |
562 | } |
563 | // extra bytes for matchLength |
564 | if (__improbable(matchLength == 19)) { |
565 | uint8_t s; |
566 | do { |
567 | #if DEBUG_LZ4_DECODE_ERRORS |
568 | if (__improbable(src >= src_end)) { |
569 | printf("Truncated SRC match length\n" ); |
570 | } |
571 | #endif |
572 | if (__improbable(src >= src_end)) { |
573 | goto IN_FAIL; // unexpected end of input (1 byte needed) |
574 | } |
575 | s = *src++; |
576 | matchLength += s; |
577 | } while (__improbable(s == 255)); |
578 | } |
579 | |
580 | // copy match (may overlap) |
581 | if (__improbable(matchLength > (size_t)(dst_end - dst))) { |
582 | // match will take us past the end of the destination buffer, |
583 | // so we can only copy part of it. |
584 | matchLength = (uint32_t)(dst_end - dst); |
585 | for (uint32_t i = 0; i < matchLength; ++i) { |
586 | dst[i] = ref[i]; |
587 | } |
588 | dst += matchLength; |
589 | goto OUT_FULL; |
590 | } |
591 | for (uint64_t i = 0; i < matchLength; i++) { |
592 | dst[i] = ref[i]; |
593 | } |
594 | dst += matchLength; |
595 | } |
596 | |
597 | // We reached the end of the input buffer after a full instruction |
598 | OUT_FULL: |
599 | // Or we reached the end of the output buffer |
600 | *dst_ptr = dst; |
601 | *src_ptr = src; |
602 | return 0; |
603 | |
604 | // Error conditions |
605 | OUT_FAIL: |
606 | IN_FAIL: |
607 | return 1; // FAIL |
608 | } |
609 | |