1 | /* |
2 | * Copyright (c) 2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #ifndef __COMPRESSION_H |
30 | #define __COMPRESSION_H |
31 | |
32 | #include <stdint.h> |
33 | #include <stddef.h> |
34 | #include <os/base.h> |
35 | |
36 | /*! |
37 | * @enum compression_algorithm_t |
38 | * @abstract Tag used to select a compression algorithm. |
39 | * @discussion Further details on the supported formats, and their implementation: |
40 | * |
41 | * - LZ4 is an extremely high-performance compressor. The open source version |
42 | * is already one of the fastest compressors of which we are aware, and we |
43 | * have optimized it still further in our implementation. The encoded format |
44 | * we produce and consume is compatible with the open source version, except |
45 | * that we add a very simple frame to the raw stream to allow some additional |
46 | * validation and functionality. |
47 | * |
48 | * The frame is documented here so that you can easily wrap another LZ4 |
49 | * encoder/decoder to produce/consume the same data stream if necessary. An |
50 | * LZ4 encoded buffer is a sequence of blocks, each of which begins with a |
51 | * header. There are three possible headers: |
52 | * |
53 | * a "compressed block header" is (hex) 62 76 34 31, followed by the |
54 | * size in bytes of the decoded (plaintext) data represented by the |
55 | * block and the size (in bytes) of the encoded data stored in the |
56 | * block. Both size fields are stored as (possibly unaligned) 32-bit |
57 | * little-endian values. The compressed block header is followed |
58 | * immediately by the actual lz4-encoded data stream. |
59 | * |
60 | * an "uncompressed block header" is (hex) 62 76 34 2d, followed by the |
61 | * size of the data stored in the uncompressed block as a (possibly |
62 | * unaligned) 32-bit little-endian value. The uncompressed block header |
63 | * is followed immediately by the uncompressed data buffer of the |
64 | * specified size. |
65 | * |
66 | * an "end of stream header" is (hex) 62 76 34 24, and marks the end |
67 | * of the lz4 frame. No further data may be written or read beyond |
68 | * this header. |
69 | * |
70 | * - SMB (Server Message Block) is a protocol for sharing files, printers |
71 | * and other abstractions over a computer network. SMB supports compression |
72 | * to speed up transfers. The following SMB compression algorithms are |
73 | * supported: |
74 | * |
75 | * ---------------|---------|---------|-------|--------------------------- |
76 | * Algorithm | Encoder | Decoder | Ratio | Encoder / decoder memory |
77 | * ---------------|---------|---------|-------|--------------------------- |
78 | * LZ77 | fastest | fastest | 2.3x | 66 KB / 0 KB |
79 | * LZ77+Huffman | slowest | slowest | 2.8x | 172 KB / 6 KB |
80 | * LZNT1 | fast | fastest | 2.0x | 33 KB / 0 KB |
81 | * ---------------|---------|---------|-------|--------------------------- |
82 | */ |
83 | |
84 | typedef enum{ |
85 | COMPRESSION_LZ4 = 0x100, // LZ4 + simple frame format (buffer + stream API) |
86 | COMPRESSION_LZ4_RAW = 0x101, // LZ4 (buffer API only) |
87 | COMPRESSION_SMB_LZNT1 = 0xC00, // SMB LZNT1 (buffer API only) |
88 | COMPRESSION_SMB_LZ77 = 0xC10, // SMB LZ77 (buffer API only) |
89 | COMPRESSION_SMB_LZ77H = 0xC20, // SMB LZ77-HUFF (buffer API only) |
90 | } compression_algorithm_t; |
91 | |
92 | // ================================================================================================================= |
93 | #pragma mark - Buffer API |
94 | |
95 | /*! |
96 | * @abstract Get the minimum scratch buffer size for the specified compression algorithm encoder. |
97 | * @param algorithm The compression algorithm for which the scratch space will be used. |
98 | * @return The number of bytes to allocate as a scratch buffer for use to encode with the specified |
99 | * compression algorithm. This number may be 0. |
100 | */ |
101 | typedef size_t (*compression_encode_scratch_buffer_size_proc) |
102 | (compression_algorithm_t algorithm); |
103 | |
104 | /*! |
105 | * @abstract Compresses a buffer. |
106 | * @param dst_buffer Pointer to the first byte of the destination buffer. |
107 | * @param dst_size Size of the destination buffer in bytes. |
108 | * @param src_buffer Pointer to the first byte of the source buffer. |
109 | * @param src_size Size of the source buffer in bytes. |
110 | * @param scratch_buffer A pointer to scratch space that the routine can use for temporary |
111 | * storage during compression. To determine how much space to allocate for this |
112 | * scratch space, call compression_encode_scratch_buffer_size(algorithm). Scratch space |
113 | * may be re-used across multiple (serial) calls to _encode and _decode. |
114 | * Can be NULL, if an algorithm does not need any scratch space. |
115 | * @param algorithm The compression algorithm to be used. |
116 | * @return The number of bytes written to the destination buffer if the input is |
117 | * is successfully compressed. If the entire input cannot be compressed to fit |
118 | * into the provided destination buffer, or an error occurs, 0 is returned. |
119 | */ |
120 | typedef size_t (*compression_encode_buffer_proc) |
121 | (uint8_t* dst_buffer, size_t dst_size, |
122 | const uint8_t* src_buffer, size_t src_size, |
123 | void* scratch_buffer, compression_algorithm_t algorithm); |
124 | |
125 | /*! |
126 | * @abstract Get the minimum scratch buffer size for the specified compression algorithm decoder. |
127 | * @param algorithm The compression algorithm for which the scratch space will be used. |
128 | * @return The number of bytes to allocate as a scratch buffer for use to decode with the specified |
129 | * compression algorithm. This number may be 0. |
130 | */ |
131 | typedef size_t (*compression_decode_scratch_buffer_size_proc) |
132 | (compression_algorithm_t algorithm); |
133 | |
134 | /*! |
135 | * @abstract Decompresses a buffer. |
136 | * @param dst_buffer Pointer to the first byte of the destination buffer. |
137 | * @param dst_size Size of the destination buffer in bytes. |
138 | * @param src_buffer Pointer to the first byte of the source buffer. |
139 | * @param src_size Size of the source buffer in bytes. |
140 | * @param scratch_buffer A pointer to scratch space that the routine can use for temporary |
141 | * storage during decompression. To determine how much space to allocate for this |
142 | * scratch space, call compression_decode_scratch_buffer_size(algorithm). Scratch space |
143 | * may be re-used across multiple (serial) calls to _encode and _decode. |
144 | * Can be NULL, if an algorithm does not need any scratch space. |
145 | * @param algorithm The compression algorithm to be used. |
146 | * @return The number of bytes written to the destination buffer if the input is |
147 | * is successfully decompressed. If there is not enough space in the destination |
148 | * buffer to hold the entire expanded output, only the first dst_size bytes will |
149 | * be written to the buffer and dst_size is returned. Note that this behavior |
150 | * differs from that of compression_encode. If an error occurs, 0 is returned. |
151 | * SMB algorithms do not support truncated decodes. |
152 | * SMB algorithms expect src_size to be exactly the size of the compressed input. |
153 | */ |
154 | typedef size_t (*compression_decode_buffer_proc) |
155 | (uint8_t* dst_buffer, size_t dst_size, |
156 | const uint8_t* src_buffer, size_t src_size, |
157 | void* scratch_buffer, compression_algorithm_t algorithm); |
158 | |
159 | // ================================================================================================================= |
160 | #pragma mark - Stream API |
161 | |
162 | /* Return values for the compression_stream functions. */ |
163 | typedef enum{ |
164 | COMPRESSION_STATUS_OK = 0, |
165 | COMPRESSION_STATUS_ERROR = -1, |
166 | COMPRESSION_STATUS_END = 1, |
167 | } compression_status_t; |
168 | |
169 | typedef enum{ |
170 | COMPRESSION_STREAM_ENCODE = 0, /* Encode to a compressed stream */ |
171 | COMPRESSION_STREAM_DECODE = 1, /* Decode from a compressed stream */ |
172 | } compression_stream_operation_t; |
173 | |
174 | /* Bits for the flags in compression_stream_process. */ |
175 | typedef enum{ |
176 | COMPRESSION_STREAM_FINALIZE = 0x0001, |
177 | } compression_stream_flags_t; |
178 | |
179 | typedef struct{ |
180 | /* |
181 | * You are partially responsible for management of the dst_ptr, |
182 | * dst_size, src_ptr, and src_size fields. You must initialize |
183 | * them to describe valid memory buffers before making a call to |
184 | * compression_stream_process. compression_stream_process will update |
185 | * these fields before returning to account for the bytes of the src |
186 | * and dst buffers that were successfully processed. |
187 | */ |
188 | uint8_t* dst_ptr; |
189 | size_t dst_size; |
190 | const uint8_t* src_ptr; |
191 | size_t src_size; |
192 | |
193 | /* The stream state object is managed by the compression_stream functions. |
194 | * You should not ever directly access this field. */ |
195 | void* state; |
196 | } compression_stream_t; |
197 | |
198 | /* There are two critical features of the stream interfaces: |
199 | * |
200 | * - They allow encoding and decoding to be resumed from where it ended |
201 | * when the end of a source or destination block was reached. |
202 | * |
203 | * - When resuming, the new source and destination blocks need not be |
204 | * contiguous with earlier blocks in the stream; all necessary state |
205 | * to resume compression is represented by the compression_stream_t object. |
206 | * |
207 | * These two properties enable tasks like: |
208 | * |
209 | * - Decoding a compressed stream into a buffer with the ability to grow |
210 | * the buffer and resume decoding if the expanded stream is too large |
211 | * to fit without repeating any work. |
212 | * |
213 | * - Encoding a stream as pieces of it become available without ever needing |
214 | * to create an allocation large enough to hold all the uncompressed data. |
215 | * |
216 | * The basic workflow for using the stream interface is as follows: |
217 | * |
218 | * 1. initialize the state of your compression_stream object by calling |
219 | * compression_stream_init with the operation parameter set to specify |
220 | * whether you will be encoding or decoding, and the chosen algorithm |
221 | * specified by the algorithm parameter. This will allocate storage |
222 | * for the state that allows encoding or decoding to be resumed |
223 | * across calls. |
224 | * |
225 | * 2. set the dst_buffer, dst_size, src_buffer, and src_size fields of |
226 | * the compression_stream object to point to the next blocks to be |
227 | * processed. |
228 | * |
229 | * 3. call compression_stream_process. If no further input will be added |
230 | * to the stream via subsequent calls, finalize should be non-zero. |
231 | * If compression_stream_process returns COMPRESSION_STATUS_END, there |
232 | * will be no further output from the stream. |
233 | * |
234 | * 4. repeat steps 2 and 3 as necessary to process the entire stream. |
235 | * |
236 | * 5. call compression_stream_destroy to free the state object in the |
237 | * compression_stream. |
238 | */ |
239 | |
240 | /*! |
241 | * @abstract Initialize a compression_stream for |
242 | * encoding (if operation is COMPRESSION_STREAM_ENCODE) or |
243 | * decoding (if operation is COMPRESSION_STREAM_DECODE). |
244 | * @param stream Pointer to the compression_stream object to be initialized. |
245 | * @param operation Specifies whether the stream is to initialized for encoding or decoding. |
246 | * Must be either COMPRESSION_STREAM_ENCODE or COMPRESSION_STREAM_DECODE. |
247 | * @param algorithm The compression algorithm to be used. Must be one of the values specified |
248 | * in the compression_algorithm_t enum. |
249 | * @discussion This call initializes all fields of the compression_stream to zero, except for state; |
250 | * this routine allocates storage to capture the internal state of the encoding or decoding |
251 | * process so that it may be resumed. This storage is tracked via the state parameter. |
252 | * @return COMPRESSION_STATUS_OK if the stream was successfully initialized, or |
253 | * COMPRESSION_STATUS_ERROR if an error occurred. |
254 | */ |
255 | typedef compression_status_t (*compression_stream_init_proc) |
256 | (compression_stream_t* stream, |
257 | compression_stream_operation_t operation, |
258 | compression_algorithm_t algorithm); |
259 | |
260 | /*! |
261 | * @abstract Functionally equivalent to compression_stream_destroy then compression_stream_init, but keeps the allocated state buffer. |
262 | * @return Status of the virtual compression_stream_init call |
263 | */ |
264 | typedef compression_status_t (*compression_stream_reinit_proc) |
265 | (compression_stream_t* stream, |
266 | compression_stream_operation_t operation, |
267 | compression_algorithm_t algorithm); |
268 | |
269 | /*! |
270 | * @abstract Cleans up state information stored in a compression_stream object. |
271 | * @discussion Use this to free memory allocated by compression_stream_init. After calling |
272 | * this function, you will need to re-init the compression_stream object before |
273 | * using it again. |
274 | */ |
275 | typedef compression_status_t (*compression_stream_destroy_proc) |
276 | (compression_stream_t* stream); |
277 | |
278 | /*! |
279 | * @abstract Encodes or decodes a block of the stream. |
280 | * @param stream Pointer to the compression_stream object to be operated on. Before calling |
281 | * this function, you must initialize the stream object by calling |
282 | * compression_stream_init, and setting the user-managed fields to describe your |
283 | * input and output buffers. When compression_stream_process returns, those |
284 | * fields will have been updated to account for the bytes that were successfully |
285 | * encoded or decoded in the course of its operation. |
286 | * @param flags Binary OR of zero or more compression_stream_flags: |
287 | * COMPRESSION_STREAM_FINALIZE |
288 | * If set, indicates that no further input will be added to the stream, and |
289 | * thus that the end of stream should be indicated if the input block is |
290 | * completely processed. |
291 | * @discussion Processes the buffers described by the stream object until the source buffer |
292 | * becomes empty, or the destination buffer becomes full, or the entire stream is |
293 | * processed, or an error is encountered. |
294 | * @return When encoding COMPRESSION_STATUS_END is returned only if all input has been |
295 | * read from the source, all output (including an end-of-stream marker) has been |
296 | * written to the destination, and COMPRESSION_STREAM_FINALIZE bit is set. |
297 | * |
298 | * When decoding COMPRESSION_STATUS_END is returned only if all input (including |
299 | * and end-of-stream marker) has been read from the source, and all output has |
300 | * been written to the destination. |
301 | * |
302 | * COMPRESSION_STATUS_OK is returned if all data in the source buffer is consumed, |
303 | * or all space in the destination buffer is used. In that case, further calls |
304 | * to compression_stream_process are expected, providing more data in the source |
305 | * buffer, or more space in the destination buffer. |
306 | * |
307 | * COMPRESSION_STATUS_ERROR is returned if an error is encountered (if the |
308 | * encoded data is corrupted, for example). |
309 | * |
310 | * When decoding a valid stream, the end of stream will be detected from the contents |
311 | * of the input, and COMPRESSION_STATUS_END will be returned in that case, even if |
312 | * COMPRESSION_STREAM_FINALIZE is not set, or more input is provided. |
313 | * |
314 | * When decoding a corrupted or truncated stream, if COMPRESSION_STREAM_FINALIZE is not |
315 | * set to notify the decoder that no more input is coming, the decoder will not consume |
316 | * or produce any data, and return COMPRESSION_STATUS_OK. In that case, the client code |
317 | * will call compression_stream_process again with the same state, entering an infinite loop. |
318 | * To avoid this, it is strongly advised to always set COMPRESSION_STREAM_FINALIZE when |
319 | * no more input is expected, for both encoding and decoding. |
320 | */ |
321 | typedef compression_status_t (*compression_stream_process_proc) |
322 | (compression_stream_t* stream, int flags); |
323 | |
324 | /*! |
325 | * @abstract Identify the compression algorithm for the first 4 bytes of compressed data. |
326 | * @param data Points to 4 bytes at the beginning of the compressed data. |
327 | * @discussion This call identifies the compression algorithm used to generate the given data bytes. |
328 | * @return A valid compression_algorithm_t on success, or -1 if the data bytes do not correspond to any supported algorithm. |
329 | */ |
330 | typedef int (*compression_stream_identify_algorithm_proc) |
331 | (const uint8_t* data); |
332 | |
333 | // ================================================================================================================= |
334 | #pragma mark - Kernel interface |
335 | |
336 | typedef struct{ |
337 | // Stream API |
338 | compression_stream_init_proc compression_stream_init; |
339 | compression_stream_reinit_proc compression_stream_reinit; |
340 | compression_stream_destroy_proc compression_stream_destroy; |
341 | compression_stream_process_proc compression_stream_process; |
342 | compression_stream_identify_algorithm_proc compression_stream_identify_algorithm; |
343 | |
344 | // Buffer API |
345 | compression_encode_scratch_buffer_size_proc compression_encode_scratch_buffer_size; |
346 | compression_encode_buffer_proc compression_encode_buffer; |
347 | compression_decode_scratch_buffer_size_proc compression_decode_scratch_buffer_size; |
348 | compression_decode_buffer_proc compression_decode_buffer; |
349 | } compression_ki_t; |
350 | |
351 | __BEGIN_DECLS |
352 | |
353 | /** |
354 | * @abstract The compression interface that was registered. |
355 | */ |
356 | extern const compression_ki_t * compression_ki_ptr; |
357 | |
358 | /** |
359 | * @abstract Registers the compression kext interface for use within the kernel proper. |
360 | * @param ki The interface to register. |
361 | * @discussion This routine may only be called once and must be called before late-const has been applied to kernel memory. |
362 | */ |
363 | OS_EXPORT OS_NONNULL1 |
364 | void compression_interface_register(const compression_ki_t *ki); |
365 | |
366 | #if PRIVATE |
367 | |
368 | typedef void (*registration_callback_t)(void); |
369 | |
370 | void compression_interface_set_registration_callback(registration_callback_t callback); |
371 | |
372 | #endif /* PRIVATE */ |
373 | |
374 | __END_DECLS |
375 | |
376 | #endif // __COMPRESSION_H |
377 | |