1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 // vim: expandtab:ts=8:sw=4:softtabstop=4: 3 /** 4 * \file lzma/base.h 5 * \brief Data types and functions used in many places in liblzma API 6 */ 7 8 /* 9 * Author: Lasse Collin 10 * 11 * This file has been put into the public domain. 12 * You can do whatever you want with this file. 13 * 14 * See ../lzma.h for information about liblzma as a whole. 15 */ 16 17 #ifndef LZMA_H_INTERNAL 18 # error Never include this file directly. Use <lzma.h> instead. 19 #endif 20 21 22 /** 23 * \brief Boolean 24 * 25 * This is here because C89 doesn't have stdbool.h. To set a value for 26 * variables having type lzma_bool, you can use 27 * - C99's `true' and `false' from stdbool.h; 28 * - C++'s internal `true' and `false'; or 29 * - integers one (true) and zero (false). 30 */ 31 typedef unsigned char lzma_bool; 32 33 34 /** 35 * \brief Type of reserved enumeration variable in structures 36 * 37 * To avoid breaking library ABI when new features are added, several 38 * structures contain extra variables that may be used in future. Since 39 * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may 40 * even vary depending on the range of enumeration constants, we specify 41 * a separate type to be used for reserved enumeration variables. All 42 * enumeration constants in liblzma API will be non-negative and less 43 * than 128, which should guarantee that the ABI won't break even when 44 * new constants are added to existing enumerations. 45 */ 46 typedef enum { 47 LZMA_RESERVED_ENUM = 0 48 } lzma_reserved_enum; 49 50 51 /** 52 * \brief Return values used by several functions in liblzma 53 * 54 * Check the descriptions of specific functions to find out which return 55 * values they can return. With some functions the return values may have 56 * more specific meanings than described here; those differences are 57 * described per-function basis. 58 */ 59 typedef enum { 60 LZMA_OK = 0, 61 /**< 62 * \brief Operation completed successfully 63 */ 64 65 LZMA_STREAM_END = 1, 66 /**< 67 * \brief End of stream was reached 68 * 69 * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or 70 * LZMA_FINISH was finished. In decoder, this indicates 71 * that all the data was successfully decoded. 72 * 73 * In all cases, when LZMA_STREAM_END is returned, the last 74 * output bytes should be picked from strm->next_out. 75 */ 76 77 LZMA_NO_CHECK = 2, 78 /**< 79 * \brief Input stream has no integrity check 80 * 81 * This return value can be returned only if the 82 * LZMA_TELL_NO_CHECK flag was used when initializing 83 * the decoder. LZMA_NO_CHECK is just a warning, and 84 * the decoding can be continued normally. 85 * 86 * It is possible to call lzma_get_check() immediatelly after 87 * lzma_code has returned LZMA_NO_CHECK. The result will 88 * naturally be LZMA_CHECK_NONE, but the possibility to call 89 * lzma_get_check() may be convenient in some applications. 90 */ 91 92 LZMA_UNSUPPORTED_CHECK = 3, 93 /**< 94 * \brief Cannot calculate the integrity check 95 * 96 * The usage of this return value is different in encoders 97 * and decoders. 98 * 99 * Encoders can return this value only from the initialization 100 * function. If initialization fails with this value, the 101 * encoding cannot be done, because there's no way to produce 102 * output with the correct integrity check. 103 * 104 * Decoders can return this value only from lzma_code() and 105 * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when 106 * initializing the decoder. The decoding can still be 107 * continued normally even if the check type is unsupported, 108 * but naturally the check will not be validated, and possible 109 * errors may go undetected. 110 * 111 * With decoder, it is possible to call lzma_get_check() 112 * immediatelly after lzma_code() has returned 113 * LZMA_UNSUPPORTED_CHECK. This way it is possible to find 114 * out what the unsupported Check ID was. 115 */ 116 117 LZMA_GET_CHECK = 4, 118 /**< 119 * \brief Integrity check type is now available 120 * 121 * This value can be returned only by the lzma_code() function 122 * and only if the decoder was initialized with the 123 * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the 124 * application that it may now call lzma_get_check() to find 125 * out the Check ID. This can be used, for example, to 126 * implement a decoder that accepts only files that have 127 * strong enough integrity check. 128 */ 129 130 LZMA_MEM_ERROR = 5, 131 /**< 132 * \brief Cannot allocate memory 133 * 134 * Memory allocation failed, or the size of the allocation 135 * would be greater than SIZE_MAX. 136 * 137 * Due to internal implementation reasons, the coding cannot 138 * be continued even if more memory were made available after 139 * LZMA_MEM_ERROR. 140 */ 141 142 LZMA_MEMLIMIT_ERROR = 6, 143 /** 144 * \brief Memory usage limit was reached 145 * 146 * Decoder would need more memory than allowed by the 147 * specified memory usage limit. To continue decoding, 148 * the memory usage limit has to be increased with 149 * lzma_memlimit_set(). 150 */ 151 152 LZMA_FORMAT_ERROR = 7, 153 /**< 154 * \brief File format not recognized 155 * 156 * The decoder did not recognize the input as supported file 157 * format. This error can occur, for example, when trying to 158 * decode .lzma format file with lzma_stream_decoder, 159 * because lzma_stream_decoder accepts only the .xz format. 160 */ 161 162 LZMA_OPTIONS_ERROR = 8, 163 /**< 164 * \brief Invalid or unsupported options 165 * 166 * Invalid or unsupported options, for example 167 * - unsupported filter(s) or filter options; or 168 * - reserved bits set in headers (decoder only). 169 * 170 * Rebuilding liblzma with more features enabled, or 171 * upgrading to a newer version of liblzma may help. 172 */ 173 174 LZMA_DATA_ERROR = 9, 175 /**< 176 * \brief Data is corrupt 177 * 178 * The usage of this return value is different in encoders 179 * and decoders. In both encoder and decoder, the coding 180 * cannot continue after this error. 181 * 182 * Encoders return this if size limits of the target file 183 * format would be exceeded. These limits are huge, thus 184 * getting this error from an encoder is mostly theoretical. 185 * For example, the maximum compressed and uncompressed 186 * size of a .xz Stream is roughly 8 EiB (2^63 bytes). 187 * 188 * Decoders return this error if the input data is corrupt. 189 * This can mean, for example, invalid CRC32 in headers 190 * or invalid check of uncompressed data. 191 */ 192 193 LZMA_BUF_ERROR = 10, 194 /**< 195 * \brief No progress is possible 196 * 197 * This error code is returned when the coder cannot consume 198 * any new input and produce any new output. The most common 199 * reason for this error is that the input stream being 200 * decoded is truncated or corrupt. 201 * 202 * This error is not fatal. Coding can be continued normally 203 * by providing more input and/or more output space, if 204 * possible. 205 * 206 * Typically the first call to lzma_code() that can do no 207 * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only 208 * the second consecutive call doing no progress will return 209 * LZMA_BUF_ERROR. This is intentional. 210 * 211 * With zlib, Z_BUF_ERROR may be returned even if the 212 * application is doing nothing wrong, so apps will need 213 * to handle Z_BUF_ERROR specially. The above hack 214 * guarantees that liblzma never returns LZMA_BUF_ERROR 215 * to properly written applications unless the input file 216 * is truncated or corrupt. This should simplify the 217 * applications a little. 218 */ 219 220 LZMA_PROG_ERROR = 11, 221 /**< 222 * \brief Programming error 223 * 224 * This indicates that the arguments given to the function are 225 * invalid or the internal state of the decoder is corrupt. 226 * - Function arguments are invalid or the structures 227 * pointed by the argument pointers are invalid 228 * e.g. if strm->next_out has been set to NULL and 229 * strm->avail_out > 0 when calling lzma_code(). 230 * - lzma_* functions have been called in wrong order 231 * e.g. lzma_code() was called right after lzma_end(). 232 * - If errors occur randomly, the reason might be flaky 233 * hardware. 234 * 235 * If you think that your code is correct, this error code 236 * can be a sign of a bug in liblzma. See the documentation 237 * how to report bugs. 238 */ 239 } lzma_ret; 240 241 242 /** 243 * \brief The `action' argument for lzma_code() 244 * 245 * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH, 246 * the same `action' must is used until lzma_code() returns LZMA_STREAM_END. 247 * Also, the amount of input (that is, strm->avail_in) must not be modified 248 * by the application until lzma_code() returns LZMA_STREAM_END. Changing the 249 * `action' or modifying the amount of input will make lzma_code() return 250 * LZMA_PROG_ERROR. 251 */ 252 typedef enum { 253 LZMA_RUN = 0, 254 /**< 255 * \brief Continue coding 256 * 257 * Encoder: Encode as much input as possible. Some internal 258 * buffering will probably be done (depends on the filter 259 * chain in use), which causes latency: the input used won't 260 * usually be decodeable from the output of the same 261 * lzma_code() call. 262 * 263 * Decoder: Decode as much input as possible and produce as 264 * much output as possible. 265 */ 266 267 LZMA_SYNC_FLUSH = 1, 268 /**< 269 * \brief Make all the input available at output 270 * 271 * Normally the encoder introduces some latency. 272 * LZMA_SYNC_FLUSH forces all the buffered data to be 273 * available at output without resetting the internal 274 * state of the encoder. This way it is possible to use 275 * compressed stream for example for communication over 276 * network. 277 * 278 * Only some filters support LZMA_SYNC_FLUSH. Trying to use 279 * LZMA_SYNC_FLUSH with filters that don't support it will 280 * make lzma_code() return LZMA_OPTIONS_ERROR. For example, 281 * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does. 282 * 283 * Using LZMA_SYNC_FLUSH very often can dramatically reduce 284 * the compression ratio. With some filters (for example, 285 * LZMA2), finetuning the compression options may help 286 * mitigate this problem significantly. 287 * 288 * Decoders don't support LZMA_SYNC_FLUSH. 289 */ 290 291 LZMA_FULL_FLUSH = 2, 292 /**< 293 * \brief Make all the input available at output 294 * 295 * Finish encoding of the current Block. All the input 296 * data going to the current Block must have been given 297 * to the encoder (the last bytes can still be pending in 298 * next_in). Call lzma_code() with LZMA_FULL_FLUSH until 299 * it returns LZMA_STREAM_END. Then continue normally with 300 * LZMA_RUN or finish the Stream with LZMA_FINISH. 301 * 302 * This action is currently supported only by Stream encoder 303 * and easy encoder (which uses Stream encoder). If there is 304 * no unfinished Block, no empty Block is created. 305 */ 306 307 LZMA_FINISH = 3 308 /**< 309 * \brief Finish the coding operation 310 * 311 * Finishes the coding operation. All the input data must 312 * have been given to the encoder (the last bytes can still 313 * be pending in next_in). Call lzma_code() with LZMA_FINISH 314 * until it returns LZMA_STREAM_END. Once LZMA_FINISH has 315 * been used, the amount of input must no longer be changed 316 * by the application. 317 * 318 * When decoding, using LZMA_FINISH is optional unless the 319 * LZMA_CONCATENATED flag was used when the decoder was 320 * initialized. When LZMA_CONCATENATED was not used, the only 321 * effect of LZMA_FINISH is that the amount of input must not 322 * be changed just like in the encoder. 323 */ 324 } lzma_action; 325 326 327 /** 328 * \brief Custom functions for memory handling 329 * 330 * A pointer to lzma_allocator may be passed via lzma_stream structure 331 * to liblzma, and some advanced functions take a pointer to lzma_allocator 332 * as a separate function argument. The library will use the functions 333 * specified in lzma_allocator for memory handling instead of the default 334 * malloc() and free(). C++ users should note that the custom memory 335 * handling functions must not throw exceptions. 336 * 337 * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is 338 * OK to change these function pointers in the middle of the coding 339 * process, but obviously it must be done carefully to make sure that the 340 * replacement `free' can deallocate memory allocated by the earlier 341 * `alloc' function(s). 342 */ 343 typedef struct { 344 /** 345 * \brief Pointer to a custom memory allocation function 346 * 347 * If you don't want a custom allocator, but still want 348 * custom free(), set this to NULL and liblzma will use 349 * the standard malloc(). 350 * 351 * \param opaque lzma_allocator.opaque (see below) 352 * \param nmemb Number of elements like in calloc(). liblzma 353 * will always set nmemb to 1, so it is safe to 354 * ignore nmemb in a custom allocator if you like. 355 * The nmemb argument exists only for 356 * compatibility with zlib and libbzip2. 357 * \param size Size of an element in bytes. 358 * liblzma never sets this to zero. 359 * 360 * \return Pointer to the beginning of a memory block of 361 * `size' bytes, or NULL if allocation fails 362 * for some reason. When allocation fails, functions 363 * of liblzma return LZMA_MEM_ERROR. 364 * 365 * The allocator should not waste time zeroing the allocated buffers. 366 * This is not only about speed, but also memory usage, since the 367 * operating system kernel doesn't necessarily allocate the requested 368 * memory in physical memory until it is actually used. With small 369 * input files, liblzma may actually need only a fraction of the 370 * memory that it requested for allocation. 371 * 372 * \note LZMA_MEM_ERROR is also used when the size of the 373 * allocation would be greater than SIZE_MAX. Thus, 374 * don't assume that the custom allocator must have 375 * returned NULL if some function from liblzma 376 * returns LZMA_MEM_ERROR. 377 */ 378 void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size); 379 380 /** 381 * \brief Pointer to a custom memory freeing function 382 * 383 * If you don't want a custom freeing function, but still 384 * want a custom allocator, set this to NULL and liblzma 385 * will use the standard free(). 386 * 387 * \param opaque lzma_allocator.opaque (see below) 388 * \param ptr Pointer returned by lzma_allocator.alloc(), 389 * or when it is set to NULL, a pointer returned 390 * by the standard malloc(). 391 */ 392 void (LZMA_API_CALL *free)(void *opaque, void *ptr); 393 394 /** 395 * \brief Pointer passed to .alloc() and .free() 396 * 397 * opaque is passed as the first argument to lzma_allocator.alloc() 398 * and lzma_allocator.free(). This intended to ease implementing 399 * custom memory allocation functions for use with liblzma. 400 * 401 * If you don't need this, you should set this to NULL. 402 */ 403 void *opaque; 404 405 } lzma_allocator; 406 407 408 /** 409 * \brief Internal data structure 410 * 411 * The contents of this structure is not visible outside the library. 412 */ 413 typedef struct lzma_internal_s lzma_internal; 414 415 416 /** 417 * \brief Passing data to and from liblzma 418 * 419 * The lzma_stream structure is used for 420 * - passing pointers to input and output buffers to liblzma; 421 * - defining custom memory hander functions; and 422 * - holding a pointer to coder-specific internal data structures. 423 * 424 * Typical usage: 425 * 426 * - After allocating lzma_stream (on stack or with malloc()), it must be 427 * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details). 428 * 429 * - Initialize a coder to the lzma_stream, for example by using 430 * lzma_easy_encoder() or lzma_auto_decoder(). Some notes: 431 * - In contrast to zlib, strm->next_in and strm->next_out are 432 * ignored by all initialization functions, thus it is safe 433 * to not initialize them yet. 434 * - The initialization functions always set strm->total_in and 435 * strm->total_out to zero. 436 * - If the initialization function fails, no memory is left allocated 437 * that would require freeing with lzma_end() even if some memory was 438 * associated with the lzma_stream structure when the initialization 439 * function was called. 440 * 441 * - Use lzma_code() to do the actual work. 442 * 443 * - Once the coding has been finished, the existing lzma_stream can be 444 * reused. It is OK to reuse lzma_stream with different initialization 445 * function without calling lzma_end() first. Old allocations are 446 * automatically freed. 447 * 448 * - Finally, use lzma_end() to free the allocated memory. lzma_end() never 449 * frees the lzma_stream structure itself. 450 * 451 * Application may modify the values of total_in and total_out as it wants. 452 * They are updated by liblzma to match the amount of data read and 453 * written, but aren't used for anything else. 454 */ 455 typedef struct { 456 const uint8_t *next_in; /**< Pointer to the next input byte. */ 457 size_t avail_in; /**< Number of available input bytes in next_in. */ 458 uint64_t total_in; /**< Total number of bytes read by liblzma. */ 459 460 uint8_t *next_out; /**< Pointer to the next output position. */ 461 size_t avail_out; /**< Amount of free space in next_out. */ 462 uint64_t total_out; /**< Total number of bytes written by liblzma. */ 463 464 /** 465 * \brief Custom memory allocation functions 466 * 467 * In most cases this is NULL which makes liblzma use 468 * the standard malloc() and free(). 469 */ 470 lzma_allocator *allocator; 471 472 /** Internal state is not visible to applications. */ 473 lzma_internal *internal; 474 475 /* 476 * Reserved space to allow possible future extensions without 477 * breaking the ABI. Excluding the initialization of this structure, 478 * you should not touch these, because the names of these variables 479 * may change. 480 */ 481 void *reserved_ptr1; 482 void *reserved_ptr2; 483 uint64_t reserved_int1; 484 uint64_t reserved_int2; 485 lzma_reserved_enum reserved_enum1; 486 lzma_reserved_enum reserved_enum2; 487 488 } lzma_stream; 489 490 491 /** 492 * \brief Initialization for lzma_stream 493 * 494 * When you declare an instance of lzma_stream, you can immediatelly 495 * initialize it so that initialization functions know that no memory 496 * has been allocated yet: 497 * 498 * lzma_stream strm = LZMA_STREAM_INIT; 499 * 500 * If you need to initialize a dynamically allocated lzma_stream, you can use 501 * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this 502 * violates the C standard since NULL may have different internal 503 * representation than zero, but it should be portable enough in practice. 504 * Anyway, for maximum portability, you can use something like this: 505 * 506 * lzma_stream tmp = LZMA_STREAM_INIT; 507 * *strm = tmp; 508 */ 509 #define LZMA_STREAM_INIT \ 510 { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ 511 NULL, NULL, 0, 0, LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } 512 513 514 /** 515 * \brief Encode or decode data 516 * 517 * Once the lzma_stream has been successfully initialized (e.g. with 518 * lzma_stream_encoder()), the actual encoding or decoding is done 519 * using this function. The application has to update strm->next_in, 520 * strm->avail_in, strm->next_out, and strm->avail_out to pass input 521 * to and get output from liblzma. 522 * 523 * See the description of the coder-specific initialization function to find 524 * out what `action' values are supported by the coder. 525 */ 526 extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) 527 lzma_nothrow lzma_attr_warn_unused_result; 528 529 530 /** 531 * \brief Free memory allocated for the coder data structures 532 * 533 * \param strm Pointer to lzma_stream that is at least initialized 534 * with LZMA_STREAM_INIT. 535 * 536 * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other 537 * members of the lzma_stream structure are touched. 538 * 539 * \note zlib indicates an error if application end()s unfinished 540 * stream structure. liblzma doesn't do this, and assumes that 541 * application knows what it is doing. 542 */ 543 extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow; 544 545 546 /** 547 * \brief Get the memory usage of decoder filter chain 548 * 549 * This function is currently supported only when *strm has been initialized 550 * with a function that takes a memlimit argument. With other functions, you 551 * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage() 552 * to estimate the memory requirements. 553 * 554 * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big 555 * the memory usage limit should have been to decode the input. Note that 556 * this may give misleading information if decoding .xz Streams that have 557 * multiple Blocks, because each Block can have different memory requirements. 558 * 559 * \return Rough estimate of how much memory is currently allocated 560 * for the filter decoders. If no filter chain is currently 561 * allocated, some non-zero value is still returned, which is 562 * less than or equal to what any filter chain would indicate 563 * as its memory requirement. 564 * 565 * If this function isn't supported by *strm or some other error 566 * occurs, zero is returned. 567 */ 568 extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) 569 lzma_nothrow lzma_attr_pure; 570 571 572 /** 573 * \brief Get the current memory usage limit 574 * 575 * This function is supported only when *strm has been initialized with 576 * a function that takes a memlimit argument. 577 * 578 * \return On success, the current memory usage limit is returned 579 * (always non-zero). On error, zero is returned. 580 */ 581 extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) 582 lzma_nothrow lzma_attr_pure; 583 584 585 /** 586 * \brief Set the memory usage limit 587 * 588 * This function is supported only when *strm has been initialized with 589 * a function that takes a memlimit argument. 590 * 591 * \return - LZMA_OK: New memory usage limit successfully set. 592 * - LZMA_MEMLIMIT_ERROR: The new limit is too small. 593 * The limit was not changed. 594 * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't 595 * support memory usage limit or memlimit was zero. 596 */ 597 extern LZMA_API(lzma_ret) lzma_memlimit_set( 598 lzma_stream *strm, uint64_t memlimit) lzma_nothrow; 599