1/*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#ifndef __COMPRESSION_H
30#define __COMPRESSION_H
31
32#include <stdint.h>
33#include <stddef.h>
34#include <os/base.h>
35
36/*!
37 * @enum compression_algorithm_t
38 * @abstract Tag used to select a compression algorithm.
39 * @discussion Further details on the supported formats, and their implementation:
40 *
41 * - LZ4 is an extremely high-performance compressor. The open source version
42 * is already one of the fastest compressors of which we are aware, and we
43 * have optimized it still further in our implementation. The encoded format
44 * we produce and consume is compatible with the open source version, except
45 * that we add a very simple frame to the raw stream to allow some additional
46 * validation and functionality.
47 *
48 * The frame is documented here so that you can easily wrap another LZ4
49 * encoder/decoder to produce/consume the same data stream if necessary. An
50 * LZ4 encoded buffer is a sequence of blocks, each of which begins with a
51 * header. There are three possible headers:
52 *
53 * a "compressed block header" is (hex) 62 76 34 31, followed by the
54 * size in bytes of the decoded (plaintext) data represented by the
55 * block and the size (in bytes) of the encoded data stored in the
56 * block. Both size fields are stored as (possibly unaligned) 32-bit
57 * little-endian values. The compressed block header is followed
58 * immediately by the actual lz4-encoded data stream.
59 *
60 * an "uncompressed block header" is (hex) 62 76 34 2d, followed by the
61 * size of the data stored in the uncompressed block as a (possibly
62 * unaligned) 32-bit little-endian value. The uncompressed block header
63 * is followed immediately by the uncompressed data buffer of the
64 * specified size.
65 *
66 * an "end of stream header" is (hex) 62 76 34 24, and marks the end
67 * of the lz4 frame. No further data may be written or read beyond
68 * this header.
69 *
70 * - SMB (Server Message Block) is a protocol for sharing files, printers
71 * and other abstractions over a computer network. SMB supports compression
72 * to speed up transfers. The following SMB compression algorithms are
73 * supported:
74 *
75 * ---------------|---------|---------|-------|---------------------------
76 * Algorithm | Encoder | Decoder | Ratio | Encoder / decoder memory
77 * ---------------|---------|---------|-------|---------------------------
78 * LZ77 | fastest | fastest | 2.3x | 66 KB / 0 KB
79 * LZ77+Huffman | slowest | slowest | 2.8x | 172 KB / 6 KB
80 * LZNT1 | fast | fastest | 2.0x | 33 KB / 0 KB
81 * ---------------|---------|---------|-------|---------------------------
82 */
83
84typedef enum{
85 COMPRESSION_LZ4 = 0x100, // LZ4 + simple frame format (buffer + stream API)
86 COMPRESSION_LZ4_RAW = 0x101, // LZ4 (buffer API only)
87 COMPRESSION_SMB_LZNT1 = 0xC00, // SMB LZNT1 (buffer API only)
88 COMPRESSION_SMB_LZ77 = 0xC10, // SMB LZ77 (buffer API only)
89 COMPRESSION_SMB_LZ77H = 0xC20, // SMB LZ77-HUFF (buffer API only)
90} compression_algorithm_t;
91
92// =================================================================================================================
93#pragma mark - Buffer API
94
95/*!
96 * @abstract Get the minimum scratch buffer size for the specified compression algorithm encoder.
97 * @param algorithm The compression algorithm for which the scratch space will be used.
98 * @return The number of bytes to allocate as a scratch buffer for use to encode with the specified
99 * compression algorithm. This number may be 0.
100 */
101typedef size_t (*compression_encode_scratch_buffer_size_proc)
102(compression_algorithm_t algorithm);
103
104/*!
105 * @abstract Compresses a buffer.
106 * @param dst_buffer Pointer to the first byte of the destination buffer.
107 * @param dst_size Size of the destination buffer in bytes.
108 * @param src_buffer Pointer to the first byte of the source buffer.
109 * @param src_size Size of the source buffer in bytes.
110 * @param scratch_buffer A pointer to scratch space that the routine can use for temporary
111 * storage during compression. To determine how much space to allocate for this
112 * scratch space, call compression_encode_scratch_buffer_size(algorithm). Scratch space
113 * may be re-used across multiple (serial) calls to _encode and _decode.
114 * Can be NULL, if an algorithm does not need any scratch space.
115 * @param algorithm The compression algorithm to be used.
116 * @return The number of bytes written to the destination buffer if the input is
117 * is successfully compressed. If the entire input cannot be compressed to fit
118 * into the provided destination buffer, or an error occurs, 0 is returned.
119 */
120typedef size_t (*compression_encode_buffer_proc)
121(uint8_t* dst_buffer, size_t dst_size,
122 const uint8_t* src_buffer, size_t src_size,
123 void* scratch_buffer, compression_algorithm_t algorithm);
124
125/*!
126 * @abstract Get the minimum scratch buffer size for the specified compression algorithm decoder.
127 * @param algorithm The compression algorithm for which the scratch space will be used.
128 * @return The number of bytes to allocate as a scratch buffer for use to decode with the specified
129 * compression algorithm. This number may be 0.
130 */
131typedef size_t (*compression_decode_scratch_buffer_size_proc)
132(compression_algorithm_t algorithm);
133
134/*!
135 * @abstract Decompresses a buffer.
136 * @param dst_buffer Pointer to the first byte of the destination buffer.
137 * @param dst_size Size of the destination buffer in bytes.
138 * @param src_buffer Pointer to the first byte of the source buffer.
139 * @param src_size Size of the source buffer in bytes.
140 * @param scratch_buffer A pointer to scratch space that the routine can use for temporary
141 * storage during decompression. To determine how much space to allocate for this
142 * scratch space, call compression_decode_scratch_buffer_size(algorithm). Scratch space
143 * may be re-used across multiple (serial) calls to _encode and _decode.
144 * Can be NULL, if an algorithm does not need any scratch space.
145 * @param algorithm The compression algorithm to be used.
146 * @return The number of bytes written to the destination buffer if the input is
147 * is successfully decompressed. If there is not enough space in the destination
148 * buffer to hold the entire expanded output, only the first dst_size bytes will
149 * be written to the buffer and dst_size is returned. Note that this behavior
150 * differs from that of compression_encode. If an error occurs, 0 is returned.
151 * SMB algorithms do not support truncated decodes.
152 * SMB algorithms expect src_size to be exactly the size of the compressed input.
153 */
154typedef size_t (*compression_decode_buffer_proc)
155(uint8_t* dst_buffer, size_t dst_size,
156 const uint8_t* src_buffer, size_t src_size,
157 void* scratch_buffer, compression_algorithm_t algorithm);
158
159// =================================================================================================================
160#pragma mark - Stream API
161
162/* Return values for the compression_stream functions. */
163typedef enum{
164 COMPRESSION_STATUS_OK = 0,
165 COMPRESSION_STATUS_ERROR = -1,
166 COMPRESSION_STATUS_END = 1,
167} compression_status_t;
168
169typedef enum{
170 COMPRESSION_STREAM_ENCODE = 0, /* Encode to a compressed stream */
171 COMPRESSION_STREAM_DECODE = 1, /* Decode from a compressed stream */
172} compression_stream_operation_t;
173
174/* Bits for the flags in compression_stream_process. */
175typedef enum{
176 COMPRESSION_STREAM_FINALIZE = 0x0001,
177} compression_stream_flags_t;
178
179typedef struct{
180 /*
181 * You are partially responsible for management of the dst_ptr,
182 * dst_size, src_ptr, and src_size fields. You must initialize
183 * them to describe valid memory buffers before making a call to
184 * compression_stream_process. compression_stream_process will update
185 * these fields before returning to account for the bytes of the src
186 * and dst buffers that were successfully processed.
187 */
188 uint8_t* dst_ptr;
189 size_t dst_size;
190 const uint8_t* src_ptr;
191 size_t src_size;
192
193 /* The stream state object is managed by the compression_stream functions.
194 * You should not ever directly access this field. */
195 void* state;
196} compression_stream_t;
197
198/* There are two critical features of the stream interfaces:
199 *
200 * - They allow encoding and decoding to be resumed from where it ended
201 * when the end of a source or destination block was reached.
202 *
203 * - When resuming, the new source and destination blocks need not be
204 * contiguous with earlier blocks in the stream; all necessary state
205 * to resume compression is represented by the compression_stream_t object.
206 *
207 * These two properties enable tasks like:
208 *
209 * - Decoding a compressed stream into a buffer with the ability to grow
210 * the buffer and resume decoding if the expanded stream is too large
211 * to fit without repeating any work.
212 *
213 * - Encoding a stream as pieces of it become available without ever needing
214 * to create an allocation large enough to hold all the uncompressed data.
215 *
216 * The basic workflow for using the stream interface is as follows:
217 *
218 * 1. initialize the state of your compression_stream object by calling
219 * compression_stream_init with the operation parameter set to specify
220 * whether you will be encoding or decoding, and the chosen algorithm
221 * specified by the algorithm parameter. This will allocate storage
222 * for the state that allows encoding or decoding to be resumed
223 * across calls.
224 *
225 * 2. set the dst_buffer, dst_size, src_buffer, and src_size fields of
226 * the compression_stream object to point to the next blocks to be
227 * processed.
228 *
229 * 3. call compression_stream_process. If no further input will be added
230 * to the stream via subsequent calls, finalize should be non-zero.
231 * If compression_stream_process returns COMPRESSION_STATUS_END, there
232 * will be no further output from the stream.
233 *
234 * 4. repeat steps 2 and 3 as necessary to process the entire stream.
235 *
236 * 5. call compression_stream_destroy to free the state object in the
237 * compression_stream.
238 */
239
240/*!
241 * @abstract Initialize a compression_stream for
242 * encoding (if operation is COMPRESSION_STREAM_ENCODE) or
243 * decoding (if operation is COMPRESSION_STREAM_DECODE).
244 * @param stream Pointer to the compression_stream object to be initialized.
245 * @param operation Specifies whether the stream is to initialized for encoding or decoding.
246 * Must be either COMPRESSION_STREAM_ENCODE or COMPRESSION_STREAM_DECODE.
247 * @param algorithm The compression algorithm to be used. Must be one of the values specified
248 * in the compression_algorithm_t enum.
249 * @discussion This call initializes all fields of the compression_stream to zero, except for state;
250 * this routine allocates storage to capture the internal state of the encoding or decoding
251 * process so that it may be resumed. This storage is tracked via the state parameter.
252 * @return COMPRESSION_STATUS_OK if the stream was successfully initialized, or
253 * COMPRESSION_STATUS_ERROR if an error occurred.
254 */
255typedef compression_status_t (*compression_stream_init_proc)
256(compression_stream_t* stream,
257 compression_stream_operation_t operation,
258 compression_algorithm_t algorithm);
259
260/*!
261 * @abstract Functionally equivalent to compression_stream_destroy then compression_stream_init, but keeps the allocated state buffer.
262 * @return Status of the virtual compression_stream_init call
263 */
264typedef compression_status_t (*compression_stream_reinit_proc)
265(compression_stream_t* stream,
266 compression_stream_operation_t operation,
267 compression_algorithm_t algorithm);
268
269/*!
270 * @abstract Cleans up state information stored in a compression_stream object.
271 * @discussion Use this to free memory allocated by compression_stream_init. After calling
272 * this function, you will need to re-init the compression_stream object before
273 * using it again.
274 */
275typedef compression_status_t (*compression_stream_destroy_proc)
276(compression_stream_t* stream);
277
278/*!
279 * @abstract Encodes or decodes a block of the stream.
280 * @param stream Pointer to the compression_stream object to be operated on. Before calling
281 * this function, you must initialize the stream object by calling
282 * compression_stream_init, and setting the user-managed fields to describe your
283 * input and output buffers. When compression_stream_process returns, those
284 * fields will have been updated to account for the bytes that were successfully
285 * encoded or decoded in the course of its operation.
286 * @param flags Binary OR of zero or more compression_stream_flags:
287 * COMPRESSION_STREAM_FINALIZE
288 * If set, indicates that no further input will be added to the stream, and
289 * thus that the end of stream should be indicated if the input block is
290 * completely processed.
291 * @discussion Processes the buffers described by the stream object until the source buffer
292 * becomes empty, or the destination buffer becomes full, or the entire stream is
293 * processed, or an error is encountered.
294 * @return When encoding COMPRESSION_STATUS_END is returned only if all input has been
295 * read from the source, all output (including an end-of-stream marker) has been
296 * written to the destination, and COMPRESSION_STREAM_FINALIZE bit is set.
297 *
298 * When decoding COMPRESSION_STATUS_END is returned only if all input (including
299 * and end-of-stream marker) has been read from the source, and all output has
300 * been written to the destination.
301 *
302 * COMPRESSION_STATUS_OK is returned if all data in the source buffer is consumed,
303 * or all space in the destination buffer is used. In that case, further calls
304 * to compression_stream_process are expected, providing more data in the source
305 * buffer, or more space in the destination buffer.
306 *
307 * COMPRESSION_STATUS_ERROR is returned if an error is encountered (if the
308 * encoded data is corrupted, for example).
309 *
310 * When decoding a valid stream, the end of stream will be detected from the contents
311 * of the input, and COMPRESSION_STATUS_END will be returned in that case, even if
312 * COMPRESSION_STREAM_FINALIZE is not set, or more input is provided.
313 *
314 * When decoding a corrupted or truncated stream, if COMPRESSION_STREAM_FINALIZE is not
315 * set to notify the decoder that no more input is coming, the decoder will not consume
316 * or produce any data, and return COMPRESSION_STATUS_OK. In that case, the client code
317 * will call compression_stream_process again with the same state, entering an infinite loop.
318 * To avoid this, it is strongly advised to always set COMPRESSION_STREAM_FINALIZE when
319 * no more input is expected, for both encoding and decoding.
320 */
321typedef compression_status_t (*compression_stream_process_proc)
322(compression_stream_t* stream, int flags);
323
324/*!
325 * @abstract Identify the compression algorithm for the first 4 bytes of compressed data.
326 * @param data Points to 4 bytes at the beginning of the compressed data.
327 * @discussion This call identifies the compression algorithm used to generate the given data bytes.
328 * @return A valid compression_algorithm_t on success, or -1 if the data bytes do not correspond to any supported algorithm.
329 */
330typedef int (*compression_stream_identify_algorithm_proc)
331(const uint8_t* data);
332
333// =================================================================================================================
334#pragma mark - Kernel interface
335
336typedef struct{
337 // Stream API
338 compression_stream_init_proc compression_stream_init;
339 compression_stream_reinit_proc compression_stream_reinit;
340 compression_stream_destroy_proc compression_stream_destroy;
341 compression_stream_process_proc compression_stream_process;
342 compression_stream_identify_algorithm_proc compression_stream_identify_algorithm;
343
344 // Buffer API
345 compression_encode_scratch_buffer_size_proc compression_encode_scratch_buffer_size;
346 compression_encode_buffer_proc compression_encode_buffer;
347 compression_decode_scratch_buffer_size_proc compression_decode_scratch_buffer_size;
348 compression_decode_buffer_proc compression_decode_buffer;
349} compression_ki_t;
350
351__BEGIN_DECLS
352
353/**
354 * @abstract The compression interface that was registered.
355 */
356extern const compression_ki_t * compression_ki_ptr;
357
358/**
359 * @abstract Registers the compression kext interface for use within the kernel proper.
360 * @param ki The interface to register.
361 * @discussion This routine may only be called once and must be called before late-const has been applied to kernel memory.
362 */
363OS_EXPORT OS_NONNULL1
364void compression_interface_register(const compression_ki_t *ki);
365
366#if PRIVATE
367
368typedef void (*registration_callback_t)(void);
369
370void compression_interface_set_registration_callback(registration_callback_t callback);
371
372#endif /* PRIVATE */
373
374__END_DECLS
375
376#endif // __COMPRESSION_H
377