1 | /* |
2 | * Copyright (c) 2016-2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | // LZ4_RAW buffer API |
30 | // EB May 2015 |
31 | // Mar 2016 Imported from the Compression project, with minor optimisations and |
32 | // early abort detection (Derek Kumar) |
33 | |
34 | #include "lz4.h" |
35 | #define memcpy __builtin_memcpy |
36 | |
37 | size_t lz4raw_decode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, |
38 | const uint8_t * __restrict src_buffer, size_t src_size, |
39 | void * __restrict work __attribute__((unused))) |
40 | { |
41 | const uint8_t * src = src_buffer; |
42 | uint8_t * dst = dst_buffer; |
43 | |
44 | // Go fast if we can, keeping away from the end of buffers |
45 | #if LZ4_ENABLE_ASSEMBLY_DECODE |
46 | if (dst_size > LZ4_GOFAST_SAFETY_MARGIN && src_size > LZ4_GOFAST_SAFETY_MARGIN) |
47 | { |
48 | if (lz4_decode_asm(&dst, dst_buffer, dst_buffer + dst_size - LZ4_GOFAST_SAFETY_MARGIN, &src, src_buffer + src_size - LZ4_GOFAST_SAFETY_MARGIN)) |
49 | return 0; // FAIL |
50 | } |
51 | #endif |
52 | //DRKTODO: Can the 'C' "safety" decode be eliminated for 4/16K fixed-sized buffers? |
53 | |
54 | // Finish safe |
55 | if (lz4_decode(&dst, dst_buffer, dst_buffer + dst_size, &src, src_buffer + src_size)) |
56 | return 0; // FAIL |
57 | |
58 | return (size_t)(dst - dst_buffer); // bytes produced |
59 | } |
60 | // Debug flags |
61 | #if LZ4DEBUG |
62 | #define DEBUG_LZ4_ENCODE_ERRORS (1) |
63 | #define DEBUG_LZ4_DECODE_ERRORS (1) |
64 | #endif |
65 | |
66 | #if DEBUG_LZ4_ENCODE_ERRORS |
67 | #endif |
68 | |
69 | #if !LZ4_ENABLE_ASSEMBLY_ENCODE |
70 | |
71 | #if defined(__x86_64__) || defined(__x86_64h__) |
72 | # define LZ4_MATCH_SEARCH_INIT_SIZE 32 |
73 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 32 |
74 | #else |
75 | # define LZ4_MATCH_SEARCH_INIT_SIZE 8 |
76 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 8 |
77 | #endif |
78 | |
79 | // Return hash for 4-byte sequence X |
80 | static inline uint32_t lz4_hash(uint32_t x) { return (x * 2654435761U) >> (32 - LZ4_COMPRESS_HASH_BITS); } |
81 | |
82 | // Store 0xfff..fff at *PTR |
83 | static inline void lz4_fill16(uint8_t * ptr) |
84 | { |
85 | store8(ptr,-1); |
86 | store8(ptr+8,-1); |
87 | } |
88 | |
89 | // Return number of matching bytes 0..4 at positions A and B. |
90 | static inline size_t lz4_nmatch4(const uint8_t * a,const uint8_t * b) |
91 | { |
92 | uint32_t x = load4(a) ^ load4(b); |
93 | return (x == 0)?4:(__builtin_ctzl(x) >> 3); |
94 | } |
95 | |
96 | // Return number of matching bytes 0..8 at positions A and B. |
97 | static inline size_t lz4_nmatch8(const uint8_t * a,const uint8_t * b) |
98 | { |
99 | uint64_t x = load8(a) ^ load8(b); |
100 | return (x == 0)?8:(__builtin_ctzll(x) >> 3); |
101 | } |
102 | |
103 | // Return number of matching bytes 0..16 at positions A and B. |
104 | static inline size_t lz4_nmatch16(const uint8_t * a,const uint8_t * b) |
105 | { |
106 | size_t n = lz4_nmatch8(a,b); |
107 | return (n == 8)?(8 + lz4_nmatch8(a+8,b+8)):n; |
108 | } |
109 | |
110 | // Return number of matching bytes 0..32 at positions A and B. |
111 | static inline size_t lz4_nmatch32(const uint8_t * a,const uint8_t * b) |
112 | { |
113 | size_t n = lz4_nmatch16(a,b); |
114 | return (n == 16)?(16 + lz4_nmatch16(a+16,b+16)):n; |
115 | } |
116 | |
117 | // Return number of matching bytes 0..64 at positions A and B. |
118 | static inline size_t lz4_nmatch64(const uint8_t * a,const uint8_t * b) |
119 | { |
120 | size_t n = lz4_nmatch32(a,b); |
121 | return (n == 32)?(32 + lz4_nmatch32(a+32,b+32)):n; |
122 | } |
123 | |
124 | // Compile-time selection, return number of matching bytes 0..N at positions A and B. |
125 | static inline size_t lz4_nmatch(int N, const uint8_t * a, const uint8_t * b) |
126 | { |
127 | switch (N) { |
128 | case 4: return lz4_nmatch4(a,b); |
129 | case 8: return lz4_nmatch8(a,b); |
130 | case 16: return lz4_nmatch16(a,b); |
131 | case 32: return lz4_nmatch32(a,b); |
132 | case 64: return lz4_nmatch64(a,b); |
133 | } |
134 | __builtin_trap(); // FAIL |
135 | } |
136 | |
137 | // Store LENGTH in DST using the literal_length/match_length extension scheme: X is the sum of all bytes until we reach a byte < 0xff. |
138 | // We are allowed to access a constant number of bytes above DST_END. |
139 | // Return incremented DST pointer on success, and 0 on failure |
140 | static inline uint8_t *lz4_store_length(uint8_t * dst, const uint8_t * const end, uint32_t L) { |
141 | (void)end; |
142 | while (L >= 17*255) { |
143 | lz4_fill16(dst); |
144 | dst += 16; |
145 | L -= 16*255; |
146 | } |
147 | lz4_fill16(dst); |
148 | //DRKTODO verify these modulos/divisions are optimally handled by clang |
149 | dst += L/255; |
150 | *dst++ = L%255; |
151 | return dst; |
152 | } |
153 | |
154 | static inline uint32_t clamp(uint32_t x, uint32_t max) __attribute__((overloadable)) { return x > max ? max : x; } |
155 | |
156 | static inline uint8_t *copy_literal(uint8_t *dst, const uint8_t * restrict src, uint32_t L) { |
157 | uint8_t *end = dst + L; |
158 | { copy16(dst, src); dst += 16; src += 16; } |
159 | while (dst < end) { copy32(dst, src); dst += 32; src += 32; } |
160 | return end; |
161 | } |
162 | |
163 | static uint8_t *lz4_emit_match(uint32_t L, uint32_t M, uint32_t D, |
164 | uint8_t * restrict dst, |
165 | const uint8_t * const end, |
166 | const uint8_t * restrict src) { |
167 | // The LZ4 encoding scheme requires that M is at least 4, because |
168 | // the actual value stored by the encoding is M - 4. Check this |
169 | // requirement for debug builds. |
170 | assert(M >= 4 && "LZ4 encoding requires that M is at least 4" ); |
171 | // Having checked that M >= 4, translate M by four. |
172 | M -= 4; |
173 | // Similarly, we must have D < 2**16, because we use only two bytes |
174 | // to represent the value of D in the encoding. |
175 | assert(D <= USHRT_MAX && "LZ4 encoding requries that D can be stored in two bytes." ); |
176 | // Construct the command byte by clamping both L and M to 0 ... 15 |
177 | // and packing them into a single byte, and store it. |
178 | *dst++ = clamp(L, 15) << 4 | clamp(M, 15); |
179 | // If L is 15 or greater, we need to encode extra literal length bytes. |
180 | if (L >= 15) { |
181 | dst = lz4_store_length(dst, end, L - 15); |
182 | if (dst == 0 || dst + L >= end) return NULL; |
183 | } |
184 | // Copy the literal itself from src to dst. |
185 | dst = copy_literal(dst, src, L); |
186 | // Store match distance. |
187 | store2(dst, D); dst += 2; |
188 | // If M is 15 or greater, we need to encode extra match length bytes. |
189 | if (M >= 15) { |
190 | dst = lz4_store_length(dst, end, M - 15); |
191 | if (dst == 0) return NULL; |
192 | } |
193 | return dst; |
194 | } |
195 | |
196 | /* #ifndef LZ4_EARLY_ABORT */ |
197 | /* #define LZ4_EARLY_ABORT (1) */ |
198 | /* #endif */ |
199 | |
200 | #if LZ4_EARLY_ABORT |
201 | int lz4_do_early_abort = 1; |
202 | int lz4_early_aborts = 0; |
203 | #define LZ4_EARLY_ABORT_EVAL (448) |
204 | #define LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR (20) |
205 | #endif /* LZ4_EARLY_ABORT */ |
206 | |
207 | void lz4_encode_2gb(uint8_t ** dst_ptr, |
208 | size_t dst_size, |
209 | const uint8_t ** src_ptr, |
210 | const uint8_t * src_begin, |
211 | size_t src_size, |
212 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES], |
213 | int skip_final_literals) |
214 | { |
215 | uint8_t *dst = *dst_ptr; // current output stream position |
216 | uint8_t *end = dst + dst_size - LZ4_GOFAST_SAFETY_MARGIN; |
217 | const uint8_t *src = *src_ptr; // current input stream literal to encode |
218 | const uint8_t *src_end = src + src_size - LZ4_GOFAST_SAFETY_MARGIN; |
219 | const uint8_t *match_begin = 0; // first byte of matched sequence |
220 | const uint8_t *match_end = 0; // first byte after matched sequence |
221 | #if LZ4_EARLY_ABORT |
222 | uint8_t * const dst_begin = dst; |
223 | uint32_t lz4_do_abort_eval = lz4_do_early_abort; |
224 | #endif |
225 | |
226 | while (dst < end) |
227 | { |
228 | ptrdiff_t match_distance = 0; |
229 | for (match_begin = src; match_begin < src_end; match_begin += 1) { |
230 | const uint32_t pos = (uint32_t)(match_begin - src_begin); |
231 | const uint32_t w0 = load4(match_begin); |
232 | const uint32_t w1 = load4(match_begin + 1); |
233 | const uint32_t w2 = load4(match_begin + 2); |
234 | const uint32_t w3 = load4(match_begin + 3); |
235 | const int i0 = lz4_hash(w0); |
236 | const int i1 = lz4_hash(w1); |
237 | const int i2 = lz4_hash(w2); |
238 | const int i3 = lz4_hash(w3); |
239 | const uint8_t *c0 = src_begin + hash_table[i0].offset; |
240 | const uint8_t *c1 = src_begin + hash_table[i1].offset; |
241 | const uint8_t *c2 = src_begin + hash_table[i2].offset; |
242 | const uint8_t *c3 = src_begin + hash_table[i3].offset; |
243 | const uint32_t m0 = hash_table[i0].word; |
244 | const uint32_t m1 = hash_table[i1].word; |
245 | const uint32_t m2 = hash_table[i2].word; |
246 | const uint32_t m3 = hash_table[i3].word; |
247 | hash_table[i0].offset = pos; |
248 | hash_table[i0].word = w0; |
249 | hash_table[i1].offset = pos + 1; |
250 | hash_table[i1].word = w1; |
251 | |
252 | hash_table[i2].offset = pos + 2; |
253 | hash_table[i2].word = w2; |
254 | hash_table[i3].offset = pos + 3; |
255 | hash_table[i3].word = w3; |
256 | |
257 | match_distance = (match_begin - c0); |
258 | if (w0 == m0 && match_distance < 0x10000 && match_distance > 0) { |
259 | match_end = match_begin + 4; |
260 | goto EXPAND_FORWARD; |
261 | } |
262 | |
263 | match_begin++; |
264 | match_distance = (match_begin - c1); |
265 | if (w1 == m1 && match_distance < 0x10000 && match_distance > 0) { |
266 | match_end = match_begin + 4; |
267 | goto EXPAND_FORWARD; |
268 | } |
269 | |
270 | match_begin++; |
271 | match_distance = (match_begin - c2); |
272 | if (w2 == m2 && match_distance < 0x10000 && match_distance > 0) { |
273 | match_end = match_begin + 4; |
274 | goto EXPAND_FORWARD; |
275 | } |
276 | |
277 | match_begin++; |
278 | match_distance = (match_begin - c3); |
279 | if (w3 == m3 && match_distance < 0x10000 && match_distance > 0) { |
280 | match_end = match_begin + 4; |
281 | goto EXPAND_FORWARD; |
282 | } |
283 | |
284 | #if LZ4_EARLY_ABORT |
285 | //DRKTODO: Evaluate unrolling further. 2xunrolling had some modest benefits |
286 | if (lz4_do_abort_eval && ((pos) >= LZ4_EARLY_ABORT_EVAL)) { |
287 | ptrdiff_t dstd = dst - dst_begin; |
288 | |
289 | if (dstd == 0) { |
290 | lz4_early_aborts++; |
291 | return; |
292 | } |
293 | |
294 | /* if (dstd >= pos) { */ |
295 | /* return; */ |
296 | /* } */ |
297 | /* ptrdiff_t cbytes = pos - dstd; */ |
298 | /* if ((cbytes * LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR) > pos) { */ |
299 | /* return; */ |
300 | /* } */ |
301 | lz4_do_abort_eval = 0; |
302 | } |
303 | #endif |
304 | } |
305 | |
306 | if (skip_final_literals) { *src_ptr = src; *dst_ptr = dst; return; } // do not emit the final literal sequence |
307 | |
308 | // Emit a trailing literal that covers the remainder of the source buffer, |
309 | // if we can do so without exceeding the bounds of the destination buffer. |
310 | size_t src_remaining = src_end + LZ4_GOFAST_SAFETY_MARGIN - src; |
311 | if (src_remaining < 15) { |
312 | *dst++ = (uint8_t)(src_remaining << 4); |
313 | memcpy(dst, src, 16); dst += src_remaining; |
314 | } else { |
315 | *dst++ = 0xf0; |
316 | dst = lz4_store_length(dst, end, (uint32_t)(src_remaining - 15)); |
317 | if (dst == 0 || dst + src_remaining >= end) return; |
318 | memcpy(dst, src, src_remaining); dst += src_remaining; |
319 | } |
320 | *dst_ptr = dst; |
321 | *src_ptr = src + src_remaining; |
322 | return; |
323 | |
324 | EXPAND_FORWARD: |
325 | |
326 | // Expand match forward |
327 | { |
328 | const uint8_t * ref_end = match_end - match_distance; |
329 | while (match_end < src_end) |
330 | { |
331 | size_t n = lz4_nmatch(LZ4_MATCH_SEARCH_LOOP_SIZE, ref_end, match_end); |
332 | if (n < LZ4_MATCH_SEARCH_LOOP_SIZE) { match_end += n; break; } |
333 | match_end += LZ4_MATCH_SEARCH_LOOP_SIZE; |
334 | ref_end += LZ4_MATCH_SEARCH_LOOP_SIZE; |
335 | } |
336 | } |
337 | |
338 | // Expand match backward |
339 | { |
340 | // match_begin_min = max(src_begin + match_distance,literal) |
341 | const uint8_t * match_begin_min = src_begin + match_distance; |
342 | match_begin_min = (match_begin_min < src)?src:match_begin_min; |
343 | const uint8_t * ref_begin = match_begin - match_distance; |
344 | |
345 | while (match_begin > match_begin_min && ref_begin[-1] == match_begin[-1] ) { match_begin -= 1; ref_begin -= 1; } |
346 | } |
347 | |
348 | // Emit match |
349 | dst = lz4_emit_match((uint32_t)(match_begin - src), (uint32_t)(match_end - match_begin), (uint32_t)match_distance, dst, end, src); |
350 | if (!dst) return; |
351 | |
352 | // Update state |
353 | src = match_end; |
354 | |
355 | // Update return values to include the last fully encoded match |
356 | *dst_ptr = dst; |
357 | *src_ptr = src; |
358 | } |
359 | } |
360 | |
361 | #endif |
362 | |
363 | size_t lz4raw_encode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, |
364 | const uint8_t * __restrict src_buffer, size_t src_size, |
365 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES]) |
366 | { |
367 | // Initialize hash table |
368 | const lz4_hash_entry_t HASH_FILL = { .offset = 0x80000000, .word = 0x0 }; |
369 | |
370 | const uint8_t * src = src_buffer; |
371 | uint8_t * dst = dst_buffer; |
372 | |
373 | // We need several blocks because our base function is limited to 2GB input |
374 | const size_t BLOCK_SIZE = 0x7ffff000; |
375 | while (src_size > 0) |
376 | { |
377 | //DRKTODO either implement pattern4 or figure out optimal unroll |
378 | //DRKTODO: bizarrely, with plain O3 the compiler generates a single |
379 | //DRKTODO: scalar STP per loop iteration with the stock loop |
380 | //DRKTODO If hand unrolled, it switches to NEON store pairs |
381 | // Reset hash table for each block |
382 | /* #if __STDC_HOSTED__ */ |
383 | /* memset_pattern8(hash_table, &HASH_FILL, lz4_encode_scratch_size); */ |
384 | /* #else */ |
385 | /* for (int i=0;i<LZ4_COMPRESS_HASH_ENTRIES;i++) hash_table[i] = HASH_FILL; */ |
386 | /* #endif */ |
387 | |
388 | for (int i=0;i<LZ4_COMPRESS_HASH_ENTRIES;) { |
389 | hash_table[i++] = HASH_FILL; |
390 | hash_table[i++] = HASH_FILL; |
391 | hash_table[i++] = HASH_FILL; |
392 | hash_table[i++] = HASH_FILL; |
393 | } |
394 | |
395 | // Bytes to encode in this block |
396 | const size_t src_to_encode = src_size > BLOCK_SIZE ? BLOCK_SIZE : src_size; |
397 | |
398 | // Run the encoder, only the last block emits final literals. Allows concatenation of encoded payloads. |
399 | // Blocks are encoded independently, so src_begin is set to each block origin instead of src_buffer |
400 | uint8_t * dst_start = dst; |
401 | const uint8_t * src_start = src; |
402 | lz4_encode_2gb(&dst, dst_size, &src, src, src_to_encode, hash_table, src_to_encode < src_size); |
403 | |
404 | // Check progress |
405 | size_t dst_used = dst - dst_start; |
406 | size_t src_used = src - src_start; // src_used <= src_to_encode |
407 | if (src_to_encode == src_size && src_used < src_to_encode) return 0; // FAIL to encode last block |
408 | |
409 | // Note that there is a potential problem here in case of non compressible data requiring more blocks. |
410 | // We may end up here with src_used very small, or even 0, and will not be able to make progress during |
411 | // compression. We FAIL unless the length of literals remaining at the end is small enough. |
412 | if (src_to_encode < src_size && src_to_encode - src_used >= (1<<16)) return 0; // FAIL too many literals |
413 | |
414 | // Update counters (SRC and DST already have been updated) |
415 | src_size -= src_used; |
416 | dst_size -= dst_used; |
417 | } |
418 | |
419 | return (size_t)(dst - dst_buffer); // bytes produced |
420 | } |
421 | |
422 | typedef uint32_t lz4_uint128 __attribute__((ext_vector_type(4))) __attribute__((__aligned__(1))); |
423 | |
424 | int lz4_decode(uint8_t ** dst_ptr, |
425 | uint8_t * dst_begin, |
426 | uint8_t * dst_end, |
427 | const uint8_t ** src_ptr, |
428 | const uint8_t * src_end) |
429 | { |
430 | uint8_t * dst = *dst_ptr; |
431 | const uint8_t * src = *src_ptr; |
432 | |
433 | // Require dst_end > dst. |
434 | if (dst_end <= dst) goto OUT_FULL; |
435 | |
436 | while (src < src_end) |
437 | { |
438 | // Keep last good position |
439 | *src_ptr = src; |
440 | *dst_ptr = dst; |
441 | |
442 | uint8_t cmd = *src++; // 1 byte encoding literal+(match-4) length: LLLLMMMM |
443 | uint32_t literalLength = (cmd >> 4) & 15; // 0..15 |
444 | uint32_t matchLength = 4 + (cmd & 15); // 4..19 |
445 | |
446 | // extra bytes for literalLength |
447 | if (__improbable(literalLength == 15)) |
448 | { |
449 | uint8_t s; |
450 | do { |
451 | #if DEBUG_LZ4_DECODE_ERRORS |
452 | if (__improbable(src >= src_end)) printf("Truncated SRC literal length\n" ); |
453 | #endif |
454 | if (__improbable(src >= src_end)) goto IN_FAIL; // unexpected end of input (1 byte needed) |
455 | s = *src++; |
456 | literalLength += s; |
457 | } while (__improbable(s == 255)); |
458 | } |
459 | |
460 | // copy literal |
461 | #if DEBUG_LZ4_DECODE_ERRORS |
462 | if (__improbable(literalLength > (size_t)(src_end - src))) printf("Truncated SRC literal\n" ); |
463 | #endif |
464 | if (__improbable(literalLength > (size_t)(src_end - src))) goto IN_FAIL; |
465 | if (__improbable(literalLength > (size_t)(dst_end - dst))) { |
466 | // literal will take us past the end of the destination buffer, |
467 | // so we can only copy part of it. |
468 | literalLength = (uint32_t)(dst_end - dst); |
469 | memcpy(dst, src, literalLength); |
470 | dst += literalLength; |
471 | goto OUT_FULL; |
472 | } |
473 | memcpy(dst,src,literalLength); |
474 | src += literalLength; |
475 | dst += literalLength; |
476 | |
477 | if (__improbable(src >= src_end)) goto OUT_FULL; // valid end of stream |
478 | #if DEBUG_LZ4_DECODE_ERRORS |
479 | if (__improbable(2 > (size_t)(src_end - src))) printf("Truncated SRC distance\n" ); |
480 | #endif |
481 | if (__improbable(2 > (size_t)(src_end - src))) goto IN_FAIL; // unexpected end of input (2 bytes needed) |
482 | |
483 | //DRKTODO: this causes an alignment increase warning (legitimate?) |
484 | //DRKTODO: cast of char * to uint16_t* |
485 | #pragma clang diagnostic push |
486 | #pragma clang diagnostic ignored "-Wcast-align" |
487 | |
488 | // match distance |
489 | uint64_t matchDistance = *(const uint16_t *)src; // 0x0000 <= matchDistance <= 0xffff |
490 | #pragma clang diagnostic pop |
491 | src += 2; |
492 | #if DEBUG_LZ4_DECODE_ERRORS |
493 | if (matchDistance == 0) printf("Invalid match distance D = 0\n" ); |
494 | #endif |
495 | if (__improbable(matchDistance == 0)) goto IN_FAIL; // 0x0000 invalid |
496 | uint8_t * ref = dst - matchDistance; |
497 | #if DEBUG_LZ4_DECODE_ERRORS |
498 | if (__improbable(ref < dst_begin)) printf("Invalid reference D=0x%llx dst_begin=%p dst=%p dst_end=%p\n" ,matchDistance,dst_begin,dst,dst_end); |
499 | #endif |
500 | if (__improbable(ref < dst_begin)) goto OUT_FAIL; // out of range |
501 | |
502 | // extra bytes for matchLength |
503 | if (__improbable(matchLength == 19)) |
504 | { |
505 | uint8_t s; |
506 | do { |
507 | #if DEBUG_LZ4_DECODE_ERRORS |
508 | if (__improbable(src >= src_end)) printf("Truncated SRC match length\n" ); |
509 | #endif |
510 | if (__improbable(src >= src_end)) goto IN_FAIL; // unexpected end of input (1 byte needed) |
511 | s = *src++; |
512 | matchLength += s; |
513 | } while (__improbable(s == 255)); |
514 | } |
515 | |
516 | // copy match (may overlap) |
517 | if (__improbable(matchLength > (size_t)(dst_end - dst))) { |
518 | // match will take us past the end of the destination buffer, |
519 | // so we can only copy part of it. |
520 | matchLength = (uint32_t)(dst_end - dst); |
521 | for (uint32_t i=0; i<matchLength; ++i) dst[i] = ref[i]; |
522 | dst += matchLength; |
523 | goto OUT_FULL; |
524 | } |
525 | for (uint64_t i=0;i<matchLength;i++) dst[i] = ref[i]; |
526 | dst += matchLength; |
527 | } |
528 | |
529 | // We reached the end of the input buffer after a full instruction |
530 | OUT_FULL: |
531 | // Or we reached the end of the output buffer |
532 | *dst_ptr = dst; |
533 | *src_ptr = src; |
534 | return 0; |
535 | |
536 | // Error conditions |
537 | OUT_FAIL: |
538 | IN_FAIL: |
539 | return 1; // FAIL |
540 | } |
541 | |