1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: expandtab:ts=8:sw=4:softtabstop=4:
3 /**
4  * \file        lzma/base.h
5  * \brief       Data types and functions used in many places in liblzma API
6  */
7 
8 /*
9  * Author: Lasse Collin
10  *
11  * This file has been put into the public domain.
12  * You can do whatever you want with this file.
13  *
14  * See ../lzma.h for information about liblzma as a whole.
15  */
16 
17 #ifndef LZMA_H_INTERNAL
18 #	error Never include this file directly. Use <lzma.h> instead.
19 #endif
20 
21 
22 /**
23  * \brief       Boolean
24  *
25  * This is here because C89 doesn't have stdbool.h. To set a value for
26  * variables having type lzma_bool, you can use
27  *   - C99's `true' and `false' from stdbool.h;
28  *   - C++'s internal `true' and `false'; or
29  *   - integers one (true) and zero (false).
30  */
31 typedef unsigned char lzma_bool;
32 
33 
34 /**
35  * \brief       Type of reserved enumeration variable in structures
36  *
37  * To avoid breaking library ABI when new features are added, several
38  * structures contain extra variables that may be used in future. Since
39  * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may
40  * even vary depending on the range of enumeration constants, we specify
41  * a separate type to be used for reserved enumeration variables. All
42  * enumeration constants in liblzma API will be non-negative and less
43  * than 128, which should guarantee that the ABI won't break even when
44  * new constants are added to existing enumerations.
45  */
46 typedef enum {
47 	LZMA_RESERVED_ENUM      = 0
48 } lzma_reserved_enum;
49 
50 
51 /**
52  * \brief       Return values used by several functions in liblzma
53  *
54  * Check the descriptions of specific functions to find out which return
55  * values they can return. With some functions the return values may have
56  * more specific meanings than described here; those differences are
57  * described per-function basis.
58  */
59 typedef enum {
60 	LZMA_OK                 = 0,
61 		/**<
62 		 * \brief       Operation completed successfully
63 		 */
64 
65 	LZMA_STREAM_END         = 1,
66 		/**<
67 		 * \brief       End of stream was reached
68 		 *
69 		 * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or
70 		 * LZMA_FINISH was finished. In decoder, this indicates
71 		 * that all the data was successfully decoded.
72 		 *
73 		 * In all cases, when LZMA_STREAM_END is returned, the last
74 		 * output bytes should be picked from strm->next_out.
75 		 */
76 
77 	LZMA_NO_CHECK           = 2,
78 		/**<
79 		 * \brief       Input stream has no integrity check
80 		 *
81 		 * This return value can be returned only if the
82 		 * LZMA_TELL_NO_CHECK flag was used when initializing
83 		 * the decoder. LZMA_NO_CHECK is just a warning, and
84 		 * the decoding can be continued normally.
85 		 *
86 		 * It is possible to call lzma_get_check() immediatelly after
87 		 * lzma_code has returned LZMA_NO_CHECK. The result will
88 		 * naturally be LZMA_CHECK_NONE, but the possibility to call
89 		 * lzma_get_check() may be convenient in some applications.
90 		 */
91 
92 	LZMA_UNSUPPORTED_CHECK  = 3,
93 		/**<
94 		 * \brief       Cannot calculate the integrity check
95 		 *
96 		 * The usage of this return value is different in encoders
97 		 * and decoders.
98 		 *
99 		 * Encoders can return this value only from the initialization
100 		 * function. If initialization fails with this value, the
101 		 * encoding cannot be done, because there's no way to produce
102 		 * output with the correct integrity check.
103 		 *
104 		 * Decoders can return this value only from lzma_code() and
105 		 * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when
106 		 * initializing the decoder. The decoding can still be
107 		 * continued normally even if the check type is unsupported,
108 		 * but naturally the check will not be validated, and possible
109 		 * errors may go undetected.
110 		 *
111 		 * With decoder, it is possible to call lzma_get_check()
112 		 * immediatelly after lzma_code() has returned
113 		 * LZMA_UNSUPPORTED_CHECK. This way it is possible to find
114 		 * out what the unsupported Check ID was.
115 		 */
116 
117 	LZMA_GET_CHECK          = 4,
118 		/**<
119 		 * \brief       Integrity check type is now available
120 		 *
121 		 * This value can be returned only by the lzma_code() function
122 		 * and only if the decoder was initialized with the
123 		 * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the
124 		 * application that it may now call lzma_get_check() to find
125 		 * out the Check ID. This can be used, for example, to
126 		 * implement a decoder that accepts only files that have
127 		 * strong enough integrity check.
128 		 */
129 
130 	LZMA_MEM_ERROR          = 5,
131 		/**<
132 		 * \brief       Cannot allocate memory
133 		 *
134 		 * Memory allocation failed, or the size of the allocation
135 		 * would be greater than SIZE_MAX.
136 		 *
137 		 * Due to internal implementation reasons, the coding cannot
138 		 * be continued even if more memory were made available after
139 		 * LZMA_MEM_ERROR.
140 		 */
141 
142 	LZMA_MEMLIMIT_ERROR     = 6,
143 		/**
144 		 * \brief       Memory usage limit was reached
145 		 *
146 		 * Decoder would need more memory than allowed by the
147 		 * specified memory usage limit. To continue decoding,
148 		 * the memory usage limit has to be increased with
149 		 * lzma_memlimit_set().
150 		 */
151 
152 	LZMA_FORMAT_ERROR       = 7,
153 		/**<
154 		 * \brief       File format not recognized
155 		 *
156 		 * The decoder did not recognize the input as supported file
157 		 * format. This error can occur, for example, when trying to
158 		 * decode .lzma format file with lzma_stream_decoder,
159 		 * because lzma_stream_decoder accepts only the .xz format.
160 		 */
161 
162 	LZMA_OPTIONS_ERROR      = 8,
163 		/**<
164 		 * \brief       Invalid or unsupported options
165 		 *
166 		 * Invalid or unsupported options, for example
167 		 *  - unsupported filter(s) or filter options; or
168 		 *  - reserved bits set in headers (decoder only).
169 		 *
170 		 * Rebuilding liblzma with more features enabled, or
171 		 * upgrading to a newer version of liblzma may help.
172 		 */
173 
174 	LZMA_DATA_ERROR         = 9,
175 		/**<
176 		 * \brief       Data is corrupt
177 		 *
178 		 * The usage of this return value is different in encoders
179 		 * and decoders. In both encoder and decoder, the coding
180 		 * cannot continue after this error.
181 		 *
182 		 * Encoders return this if size limits of the target file
183 		 * format would be exceeded. These limits are huge, thus
184 		 * getting this error from an encoder is mostly theoretical.
185 		 * For example, the maximum compressed and uncompressed
186 		 * size of a .xz Stream is roughly 8 EiB (2^63 bytes).
187 		 *
188 		 * Decoders return this error if the input data is corrupt.
189 		 * This can mean, for example, invalid CRC32 in headers
190 		 * or invalid check of uncompressed data.
191 		 */
192 
193 	LZMA_BUF_ERROR          = 10,
194 		/**<
195 		 * \brief       No progress is possible
196 		 *
197 		 * This error code is returned when the coder cannot consume
198 		 * any new input and produce any new output. The most common
199 		 * reason for this error is that the input stream being
200 		 * decoded is truncated or corrupt.
201 		 *
202 		 * This error is not fatal. Coding can be continued normally
203 		 * by providing more input and/or more output space, if
204 		 * possible.
205 		 *
206 		 * Typically the first call to lzma_code() that can do no
207 		 * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only
208 		 * the second consecutive call doing no progress will return
209 		 * LZMA_BUF_ERROR. This is intentional.
210 		 *
211 		 * With zlib, Z_BUF_ERROR may be returned even if the
212 		 * application is doing nothing wrong, so apps will need
213 		 * to handle Z_BUF_ERROR specially. The above hack
214 		 * guarantees that liblzma never returns LZMA_BUF_ERROR
215 		 * to properly written applications unless the input file
216 		 * is truncated or corrupt. This should simplify the
217 		 * applications a little.
218 		 */
219 
220 	LZMA_PROG_ERROR         = 11,
221 		/**<
222 		 * \brief       Programming error
223 		 *
224 		 * This indicates that the arguments given to the function are
225 		 * invalid or the internal state of the decoder is corrupt.
226 		 *   - Function arguments are invalid or the structures
227 		 *     pointed by the argument pointers are invalid
228 		 *     e.g. if strm->next_out has been set to NULL and
229 		 *     strm->avail_out > 0 when calling lzma_code().
230 		 *   - lzma_* functions have been called in wrong order
231 		 *     e.g. lzma_code() was called right after lzma_end().
232 		 *   - If errors occur randomly, the reason might be flaky
233 		 *     hardware.
234 		 *
235 		 * If you think that your code is correct, this error code
236 		 * can be a sign of a bug in liblzma. See the documentation
237 		 * how to report bugs.
238 		 */
239 } lzma_ret;
240 
241 
242 /**
243  * \brief       The `action' argument for lzma_code()
244  *
245  * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH,
246  * the same `action' must is used until lzma_code() returns LZMA_STREAM_END.
247  * Also, the amount of input (that is, strm->avail_in) must not be modified
248  * by the application until lzma_code() returns LZMA_STREAM_END. Changing the
249  * `action' or modifying the amount of input will make lzma_code() return
250  * LZMA_PROG_ERROR.
251  */
252 typedef enum {
253 	LZMA_RUN = 0,
254 		/**<
255 		 * \brief       Continue coding
256 		 *
257 		 * Encoder: Encode as much input as possible. Some internal
258 		 * buffering will probably be done (depends on the filter
259 		 * chain in use), which causes latency: the input used won't
260 		 * usually be decodeable from the output of the same
261 		 * lzma_code() call.
262 		 *
263 		 * Decoder: Decode as much input as possible and produce as
264 		 * much output as possible.
265 		 */
266 
267 	LZMA_SYNC_FLUSH = 1,
268 		/**<
269 		 * \brief       Make all the input available at output
270 		 *
271 		 * Normally the encoder introduces some latency.
272 		 * LZMA_SYNC_FLUSH forces all the buffered data to be
273 		 * available at output without resetting the internal
274 		 * state of the encoder. This way it is possible to use
275 		 * compressed stream for example for communication over
276 		 * network.
277 		 *
278 		 * Only some filters support LZMA_SYNC_FLUSH. Trying to use
279 		 * LZMA_SYNC_FLUSH with filters that don't support it will
280 		 * make lzma_code() return LZMA_OPTIONS_ERROR. For example,
281 		 * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does.
282 		 *
283 		 * Using LZMA_SYNC_FLUSH very often can dramatically reduce
284 		 * the compression ratio. With some filters (for example,
285 		 * LZMA2), finetuning the compression options may help
286 		 * mitigate this problem significantly.
287 		 *
288 		 * Decoders don't support LZMA_SYNC_FLUSH.
289 		 */
290 
291 	LZMA_FULL_FLUSH = 2,
292 		/**<
293 		 * \brief       Make all the input available at output
294 		 *
295 		 * Finish encoding of the current Block. All the input
296 		 * data going to the current Block must have been given
297 		 * to the encoder (the last bytes can still be pending in
298 		 * next_in). Call lzma_code() with LZMA_FULL_FLUSH until
299 		 * it returns LZMA_STREAM_END. Then continue normally with
300 		 * LZMA_RUN or finish the Stream with LZMA_FINISH.
301 		 *
302 		 * This action is currently supported only by Stream encoder
303 		 * and easy encoder (which uses Stream encoder). If there is
304 		 * no unfinished Block, no empty Block is created.
305 		 */
306 
307 	LZMA_FINISH = 3
308 		/**<
309 		 * \brief       Finish the coding operation
310 		 *
311 		 * Finishes the coding operation. All the input data must
312 		 * have been given to the encoder (the last bytes can still
313 		 * be pending in next_in). Call lzma_code() with LZMA_FINISH
314 		 * until it returns LZMA_STREAM_END. Once LZMA_FINISH has
315 		 * been used, the amount of input must no longer be changed
316 		 * by the application.
317 		 *
318 		 * When decoding, using LZMA_FINISH is optional unless the
319 		 * LZMA_CONCATENATED flag was used when the decoder was
320 		 * initialized. When LZMA_CONCATENATED was not used, the only
321 		 * effect of LZMA_FINISH is that the amount of input must not
322 		 * be changed just like in the encoder.
323 		 */
324 } lzma_action;
325 
326 
327 /**
328  * \brief       Custom functions for memory handling
329  *
330  * A pointer to lzma_allocator may be passed via lzma_stream structure
331  * to liblzma, and some advanced functions take a pointer to lzma_allocator
332  * as a separate function argument. The library will use the functions
333  * specified in lzma_allocator for memory handling instead of the default
334  * malloc() and free(). C++ users should note that the custom memory
335  * handling functions must not throw exceptions.
336  *
337  * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is
338  * OK to change these function pointers in the middle of the coding
339  * process, but obviously it must be done carefully to make sure that the
340  * replacement `free' can deallocate memory allocated by the earlier
341  * `alloc' function(s).
342  */
343 typedef struct {
344 	/**
345 	 * \brief       Pointer to a custom memory allocation function
346 	 *
347 	 * If you don't want a custom allocator, but still want
348 	 * custom free(), set this to NULL and liblzma will use
349 	 * the standard malloc().
350 	 *
351 	 * \param       opaque  lzma_allocator.opaque (see below)
352 	 * \param       nmemb   Number of elements like in calloc(). liblzma
353 	 *                      will always set nmemb to 1, so it is safe to
354 	 *                      ignore nmemb in a custom allocator if you like.
355 	 *                      The nmemb argument exists only for
356 	 *                      compatibility with zlib and libbzip2.
357 	 * \param       size    Size of an element in bytes.
358 	 *                      liblzma never sets this to zero.
359 	 *
360 	 * \return      Pointer to the beginning of a memory block of
361 	 *              `size' bytes, or NULL if allocation fails
362 	 *              for some reason. When allocation fails, functions
363 	 *              of liblzma return LZMA_MEM_ERROR.
364 	 *
365 	 * The allocator should not waste time zeroing the allocated buffers.
366 	 * This is not only about speed, but also memory usage, since the
367 	 * operating system kernel doesn't necessarily allocate the requested
368 	 * memory in physical memory until it is actually used. With small
369 	 * input files, liblzma may actually need only a fraction of the
370 	 * memory that it requested for allocation.
371 	 *
372 	 * \note        LZMA_MEM_ERROR is also used when the size of the
373 	 *              allocation would be greater than SIZE_MAX. Thus,
374 	 *              don't assume that the custom allocator must have
375 	 *              returned NULL if some function from liblzma
376 	 *              returns LZMA_MEM_ERROR.
377 	 */
378 	void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size);
379 
380 	/**
381 	 * \brief       Pointer to a custom memory freeing function
382 	 *
383 	 * If you don't want a custom freeing function, but still
384 	 * want a custom allocator, set this to NULL and liblzma
385 	 * will use the standard free().
386 	 *
387 	 * \param       opaque  lzma_allocator.opaque (see below)
388 	 * \param       ptr     Pointer returned by lzma_allocator.alloc(),
389 	 *                      or when it is set to NULL, a pointer returned
390 	 *                      by the standard malloc().
391 	 */
392 	void (LZMA_API_CALL *free)(void *opaque, void *ptr);
393 
394 	/**
395 	 * \brief       Pointer passed to .alloc() and .free()
396 	 *
397 	 * opaque is passed as the first argument to lzma_allocator.alloc()
398 	 * and lzma_allocator.free(). This intended to ease implementing
399 	 * custom memory allocation functions for use with liblzma.
400 	 *
401 	 * If you don't need this, you should set this to NULL.
402 	 */
403 	void *opaque;
404 
405 } lzma_allocator;
406 
407 
408 /**
409  * \brief       Internal data structure
410  *
411  * The contents of this structure is not visible outside the library.
412  */
413 typedef struct lzma_internal_s lzma_internal;
414 
415 
416 /**
417  * \brief       Passing data to and from liblzma
418  *
419  * The lzma_stream structure is used for
420  *  - passing pointers to input and output buffers to liblzma;
421  *  - defining custom memory hander functions; and
422  *  - holding a pointer to coder-specific internal data structures.
423  *
424  * Typical usage:
425  *
426  *  - After allocating lzma_stream (on stack or with malloc()), it must be
427  *    initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details).
428  *
429  *  - Initialize a coder to the lzma_stream, for example by using
430  *    lzma_easy_encoder() or lzma_auto_decoder(). Some notes:
431  *      - In contrast to zlib, strm->next_in and strm->next_out are
432  *        ignored by all initialization functions, thus it is safe
433  *        to not initialize them yet.
434  *      - The initialization functions always set strm->total_in and
435  *        strm->total_out to zero.
436  *      - If the initialization function fails, no memory is left allocated
437  *        that would require freeing with lzma_end() even if some memory was
438  *        associated with the lzma_stream structure when the initialization
439  *        function was called.
440  *
441  *  - Use lzma_code() to do the actual work.
442  *
443  *  - Once the coding has been finished, the existing lzma_stream can be
444  *    reused. It is OK to reuse lzma_stream with different initialization
445  *    function without calling lzma_end() first. Old allocations are
446  *    automatically freed.
447  *
448  *  - Finally, use lzma_end() to free the allocated memory. lzma_end() never
449  *    frees the lzma_stream structure itself.
450  *
451  * Application may modify the values of total_in and total_out as it wants.
452  * They are updated by liblzma to match the amount of data read and
453  * written, but aren't used for anything else.
454  */
455 typedef struct {
456 	const uint8_t *next_in; /**< Pointer to the next input byte. */
457 	size_t avail_in;    /**< Number of available input bytes in next_in. */
458 	uint64_t total_in;  /**< Total number of bytes read by liblzma. */
459 
460 	uint8_t *next_out;  /**< Pointer to the next output position. */
461 	size_t avail_out;   /**< Amount of free space in next_out. */
462 	uint64_t total_out; /**< Total number of bytes written by liblzma. */
463 
464 	/**
465 	 * \brief       Custom memory allocation functions
466 	 *
467 	 * In most cases this is NULL which makes liblzma use
468 	 * the standard malloc() and free().
469 	 */
470 	lzma_allocator *allocator;
471 
472 	/** Internal state is not visible to applications. */
473 	lzma_internal *internal;
474 
475 	/*
476 	 * Reserved space to allow possible future extensions without
477 	 * breaking the ABI. Excluding the initialization of this structure,
478 	 * you should not touch these, because the names of these variables
479 	 * may change.
480 	 */
481 	void *reserved_ptr1;
482 	void *reserved_ptr2;
483 	uint64_t reserved_int1;
484 	uint64_t reserved_int2;
485 	lzma_reserved_enum reserved_enum1;
486 	lzma_reserved_enum reserved_enum2;
487 
488 } lzma_stream;
489 
490 
491 /**
492  * \brief       Initialization for lzma_stream
493  *
494  * When you declare an instance of lzma_stream, you can immediatelly
495  * initialize it so that initialization functions know that no memory
496  * has been allocated yet:
497  *
498  *     lzma_stream strm = LZMA_STREAM_INIT;
499  *
500  * If you need to initialize a dynamically allocated lzma_stream, you can use
501  * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this
502  * violates the C standard since NULL may have different internal
503  * representation than zero, but it should be portable enough in practice.
504  * Anyway, for maximum portability, you can use something like this:
505  *
506  *     lzma_stream tmp = LZMA_STREAM_INIT;
507  *     *strm = tmp;
508  */
509 #define LZMA_STREAM_INIT \
510 	{ NULL, 0, 0, NULL, 0, 0, NULL, NULL, \
511 	NULL, NULL, 0, 0, LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM }
512 
513 
514 /**
515  * \brief       Encode or decode data
516  *
517  * Once the lzma_stream has been successfully initialized (e.g. with
518  * lzma_stream_encoder()), the actual encoding or decoding is done
519  * using this function. The application has to update strm->next_in,
520  * strm->avail_in, strm->next_out, and strm->avail_out to pass input
521  * to and get output from liblzma.
522  *
523  * See the description of the coder-specific initialization function to find
524  * out what `action' values are supported by the coder.
525  */
526 extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action)
527 		lzma_nothrow lzma_attr_warn_unused_result;
528 
529 
530 /**
531  * \brief       Free memory allocated for the coder data structures
532  *
533  * \param       strm    Pointer to lzma_stream that is at least initialized
534  *                      with LZMA_STREAM_INIT.
535  *
536  * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other
537  * members of the lzma_stream structure are touched.
538  *
539  * \note        zlib indicates an error if application end()s unfinished
540  *              stream structure. liblzma doesn't do this, and assumes that
541  *              application knows what it is doing.
542  */
543 extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow;
544 
545 
546 /**
547  * \brief       Get the memory usage of decoder filter chain
548  *
549  * This function is currently supported only when *strm has been initialized
550  * with a function that takes a memlimit argument. With other functions, you
551  * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage()
552  * to estimate the memory requirements.
553  *
554  * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big
555  * the memory usage limit should have been to decode the input. Note that
556  * this may give misleading information if decoding .xz Streams that have
557  * multiple Blocks, because each Block can have different memory requirements.
558  *
559  * \return      Rough estimate of how much memory is currently allocated
560  *              for the filter decoders. If no filter chain is currently
561  *              allocated, some non-zero value is still returned, which is
562  *              less than or equal to what any filter chain would indicate
563  *              as its memory requirement.
564  *
565  *              If this function isn't supported by *strm or some other error
566  *              occurs, zero is returned.
567  */
568 extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm)
569 		lzma_nothrow lzma_attr_pure;
570 
571 
572 /**
573  * \brief       Get the current memory usage limit
574  *
575  * This function is supported only when *strm has been initialized with
576  * a function that takes a memlimit argument.
577  *
578  * \return      On success, the current memory usage limit is returned
579  *              (always non-zero). On error, zero is returned.
580  */
581 extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm)
582 		lzma_nothrow lzma_attr_pure;
583 
584 
585 /**
586  * \brief       Set the memory usage limit
587  *
588  * This function is supported only when *strm has been initialized with
589  * a function that takes a memlimit argument.
590  *
591  * \return      - LZMA_OK: New memory usage limit successfully set.
592  *              - LZMA_MEMLIMIT_ERROR: The new limit is too small.
593  *                The limit was not changed.
594  *              - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't
595  *                support memory usage limit or memlimit was zero.
596  */
597 extern LZMA_API(lzma_ret) lzma_memlimit_set(
598 		lzma_stream *strm, uint64_t memlimit) lzma_nothrow;
599