1 /*- 2 * Copyright (c) 2008 Joerg Sonnenberger 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 /*- 27 * Copyright (c) 1985, 1986, 1992, 1993 28 * The Regents of the University of California. All rights reserved. 29 * 30 * This code is derived from software contributed to Berkeley by 31 * Diomidis Spinellis and James A. Woods, derived from original 32 * work by Spencer Thomas and Joseph Orost. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. Neither the name of the University nor the names of its contributors 43 * may be used to endorse or promote products derived from this software 44 * without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * SUCH DAMAGE. 57 */ 58 59 #include "archive_platform.h" 60 61 __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_compress.c 201111 2009-12-28 03:33:05Z kientzle $"); 62 63 #ifdef HAVE_ERRNO_H 64 #include <errno.h> 65 #endif 66 #ifdef HAVE_STDLIB_H 67 #include <stdlib.h> 68 #endif 69 #ifdef HAVE_STRING_H 70 #include <string.h> 71 #endif 72 73 #include "archive.h" 74 #include "archive_private.h" 75 #include "archive_write_private.h" 76 77 #define HSIZE 69001 /* 95% occupancy */ 78 #define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */ 79 #define CHECK_GAP 10000 /* Ratio check interval. */ 80 81 #define MAXCODE(bits) ((1 << (bits)) - 1) 82 83 /* 84 * the next two codes should not be changed lightly, as they must not 85 * lie within the contiguous general code space. 86 */ 87 #define FIRST 257 /* First free entry. */ 88 #define CLEAR 256 /* Table clear output code. */ 89 90 struct private_data { 91 int64_t in_count, out_count, checkpoint; 92 93 int code_len; /* Number of bits/code. */ 94 int cur_maxcode; /* Maximum code, given n_bits. */ 95 int max_maxcode; /* Should NEVER generate this code. */ 96 int hashtab [HSIZE]; 97 unsigned short codetab [HSIZE]; 98 int first_free; /* First unused entry. */ 99 int compress_ratio; 100 101 int cur_code, cur_fcode; 102 103 int bit_offset; 104 unsigned char bit_buf; 105 106 unsigned char *compressed; 107 size_t compressed_buffer_size; 108 size_t compressed_offset; 109 }; 110 111 static int archive_compressor_compress_open(struct archive_write_filter *); 112 static int archive_compressor_compress_write(struct archive_write_filter *, 113 const void *, size_t); 114 static int archive_compressor_compress_close(struct archive_write_filter *); 115 static int archive_compressor_compress_free(struct archive_write_filter *); 116 117 #if ARCHIVE_VERSION_NUMBER < 4000000 118 int 119 archive_write_set_compression_compress(struct archive *a) 120 { 121 __archive_write_filters_free(a); 122 return (archive_write_add_filter_compress(a)); 123 } 124 #endif 125 126 /* 127 * Add a compress filter to this write handle. 128 */ 129 int 130 archive_write_add_filter_compress(struct archive *_a) 131 { 132 struct archive_write *a = (struct archive_write *)_a; 133 struct archive_write_filter *f = __archive_write_allocate_filter(_a); 134 135 archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, 136 ARCHIVE_STATE_NEW, "archive_write_add_filter_compress"); 137 f->open = &archive_compressor_compress_open; 138 f->code = ARCHIVE_FILTER_COMPRESS; 139 f->name = "compress"; 140 return (ARCHIVE_OK); 141 } 142 143 /* 144 * Setup callback. 145 */ 146 static int 147 archive_compressor_compress_open(struct archive_write_filter *f) 148 { 149 int ret; 150 struct private_data *state; 151 size_t bs = 65536, bpb; 152 153 f->code = ARCHIVE_FILTER_COMPRESS; 154 f->name = "compress"; 155 156 ret = __archive_write_open_filter(f->next_filter); 157 if (ret != ARCHIVE_OK) 158 return (ret); 159 160 state = (struct private_data *)calloc(1, sizeof(*state)); 161 if (state == NULL) { 162 archive_set_error(f->archive, ENOMEM, 163 "Can't allocate data for compression"); 164 return (ARCHIVE_FATAL); 165 } 166 167 if (f->archive->magic == ARCHIVE_WRITE_MAGIC) { 168 /* Buffer size should be a multiple number of the of bytes 169 * per block for performance. */ 170 bpb = archive_write_get_bytes_per_block(f->archive); 171 if (bpb > bs) 172 bs = bpb; 173 else if (bpb != 0) 174 bs -= bs % bpb; 175 } 176 state->compressed_buffer_size = bs; 177 state->compressed = malloc(state->compressed_buffer_size); 178 179 if (state->compressed == NULL) { 180 archive_set_error(f->archive, ENOMEM, 181 "Can't allocate data for compression buffer"); 182 free(state); 183 return (ARCHIVE_FATAL); 184 } 185 186 f->write = archive_compressor_compress_write; 187 f->close = archive_compressor_compress_close; 188 f->free = archive_compressor_compress_free; 189 190 state->max_maxcode = 0x10000; /* Should NEVER generate this code. */ 191 state->in_count = 0; /* Length of input. */ 192 state->bit_buf = 0; 193 state->bit_offset = 0; 194 state->out_count = 3; /* Includes 3-byte header mojo. */ 195 state->compress_ratio = 0; 196 state->checkpoint = CHECK_GAP; 197 state->code_len = 9; 198 state->cur_maxcode = MAXCODE(state->code_len); 199 state->first_free = FIRST; 200 201 memset(state->hashtab, 0xff, sizeof(state->hashtab)); 202 203 /* Prime output buffer with a gzip header. */ 204 state->compressed[0] = 0x1f; /* Compress */ 205 state->compressed[1] = 0x9d; 206 state->compressed[2] = 0x90; /* Block mode, 16bit max */ 207 state->compressed_offset = 3; 208 209 f->data = state; 210 return (0); 211 } 212 213 /*- 214 * Output the given code. 215 * Inputs: 216 * code: A n_bits-bit integer. If == -1, then EOF. This assumes 217 * that n_bits <= (long)wordsize - 1. 218 * Outputs: 219 * Outputs code to the file. 220 * Assumptions: 221 * Chars are 8 bits long. 222 * Algorithm: 223 * Maintain a BITS character long buffer (so that 8 codes will 224 * fit in it exactly). Use the VAX insv instruction to insert each 225 * code in turn. When the buffer fills up empty it and start over. 226 */ 227 228 static const unsigned char rmask[9] = 229 {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 230 231 static int 232 output_byte(struct archive_write_filter *f, unsigned char c) 233 { 234 struct private_data *state = f->data; 235 236 state->compressed[state->compressed_offset++] = c; 237 ++state->out_count; 238 239 if (state->compressed_buffer_size == state->compressed_offset) { 240 int ret = __archive_write_filter(f->next_filter, 241 state->compressed, state->compressed_buffer_size); 242 if (ret != ARCHIVE_OK) 243 return ARCHIVE_FATAL; 244 state->compressed_offset = 0; 245 } 246 247 return ARCHIVE_OK; 248 } 249 250 static int 251 output_code(struct archive_write_filter *f, int ocode) 252 { 253 struct private_data *state = f->data; 254 int bits, ret, clear_flg, bit_offset; 255 256 clear_flg = ocode == CLEAR; 257 258 /* 259 * Since ocode is always >= 8 bits, only need to mask the first 260 * hunk on the left. 261 */ 262 bit_offset = state->bit_offset % 8; 263 state->bit_buf |= (ocode << bit_offset) & 0xff; 264 output_byte(f, state->bit_buf); 265 266 bits = state->code_len - (8 - bit_offset); 267 ocode >>= 8 - bit_offset; 268 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 269 if (bits >= 8) { 270 output_byte(f, ocode & 0xff); 271 ocode >>= 8; 272 bits -= 8; 273 } 274 /* Last bits. */ 275 state->bit_offset += state->code_len; 276 state->bit_buf = ocode & rmask[bits]; 277 if (state->bit_offset == state->code_len * 8) 278 state->bit_offset = 0; 279 280 /* 281 * If the next entry is going to be too big for the ocode size, 282 * then increase it, if possible. 283 */ 284 if (clear_flg || state->first_free > state->cur_maxcode) { 285 /* 286 * Write the whole buffer, because the input side won't 287 * discover the size increase until after it has read it. 288 */ 289 if (state->bit_offset > 0) { 290 while (state->bit_offset < state->code_len * 8) { 291 ret = output_byte(f, state->bit_buf); 292 if (ret != ARCHIVE_OK) 293 return ret; 294 state->bit_offset += 8; 295 state->bit_buf = 0; 296 } 297 } 298 state->bit_buf = 0; 299 state->bit_offset = 0; 300 301 if (clear_flg) { 302 state->code_len = 9; 303 state->cur_maxcode = MAXCODE(state->code_len); 304 } else { 305 state->code_len++; 306 if (state->code_len == 16) 307 state->cur_maxcode = state->max_maxcode; 308 else 309 state->cur_maxcode = MAXCODE(state->code_len); 310 } 311 } 312 313 return (ARCHIVE_OK); 314 } 315 316 static int 317 output_flush(struct archive_write_filter *f) 318 { 319 struct private_data *state = f->data; 320 int ret; 321 322 /* At EOF, write the rest of the buffer. */ 323 if (state->bit_offset % 8) { 324 state->code_len = (state->bit_offset % 8 + 7) / 8; 325 ret = output_byte(f, state->bit_buf); 326 if (ret != ARCHIVE_OK) 327 return ret; 328 } 329 330 return (ARCHIVE_OK); 331 } 332 333 /* 334 * Write data to the compressed stream. 335 */ 336 static int 337 archive_compressor_compress_write(struct archive_write_filter *f, 338 const void *buff, size_t length) 339 { 340 struct private_data *state = (struct private_data *)f->data; 341 int i; 342 int ratio; 343 int c, disp, ret; 344 const unsigned char *bp; 345 346 if (length == 0) 347 return ARCHIVE_OK; 348 349 bp = buff; 350 351 if (state->in_count == 0) { 352 state->cur_code = *bp++; 353 ++state->in_count; 354 --length; 355 } 356 357 while (length--) { 358 c = *bp++; 359 state->in_count++; 360 state->cur_fcode = (c << 16) + state->cur_code; 361 i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */ 362 363 if (state->hashtab[i] == state->cur_fcode) { 364 state->cur_code = state->codetab[i]; 365 continue; 366 } 367 if (state->hashtab[i] < 0) /* Empty slot. */ 368 goto nomatch; 369 /* Secondary hash (after G. Knott). */ 370 if (i == 0) 371 disp = 1; 372 else 373 disp = HSIZE - i; 374 probe: 375 if ((i -= disp) < 0) 376 i += HSIZE; 377 378 if (state->hashtab[i] == state->cur_fcode) { 379 state->cur_code = state->codetab[i]; 380 continue; 381 } 382 if (state->hashtab[i] >= 0) 383 goto probe; 384 nomatch: 385 ret = output_code(f, state->cur_code); 386 if (ret != ARCHIVE_OK) 387 return ret; 388 state->cur_code = c; 389 if (state->first_free < state->max_maxcode) { 390 state->codetab[i] = state->first_free++; /* code -> hashtable */ 391 state->hashtab[i] = state->cur_fcode; 392 continue; 393 } 394 if (state->in_count < state->checkpoint) 395 continue; 396 397 state->checkpoint = state->in_count + CHECK_GAP; 398 399 if (state->in_count <= 0x007fffff && state->out_count != 0) 400 ratio = (int)(state->in_count * 256 / state->out_count); 401 else if ((ratio = (int)(state->out_count / 256)) == 0) 402 ratio = 0x7fffffff; 403 else 404 ratio = (int)(state->in_count / ratio); 405 406 if (ratio > state->compress_ratio) 407 state->compress_ratio = ratio; 408 else { 409 state->compress_ratio = 0; 410 memset(state->hashtab, 0xff, sizeof(state->hashtab)); 411 state->first_free = FIRST; 412 ret = output_code(f, CLEAR); 413 if (ret != ARCHIVE_OK) 414 return ret; 415 } 416 } 417 418 return (ARCHIVE_OK); 419 } 420 421 422 /* 423 * Finish the compression... 424 */ 425 static int 426 archive_compressor_compress_close(struct archive_write_filter *f) 427 { 428 struct private_data *state = (struct private_data *)f->data; 429 int ret, ret2; 430 431 ret = output_code(f, state->cur_code); 432 if (ret != ARCHIVE_OK) 433 goto cleanup; 434 ret = output_flush(f); 435 if (ret != ARCHIVE_OK) 436 goto cleanup; 437 438 /* Write the last block */ 439 ret = __archive_write_filter(f->next_filter, 440 state->compressed, state->compressed_offset); 441 cleanup: 442 ret2 = __archive_write_close_filter(f->next_filter); 443 if (ret > ret2) 444 ret = ret2; 445 free(state->compressed); 446 free(state); 447 return (ret); 448 } 449 450 static int 451 archive_compressor_compress_free(struct archive_write_filter *f) 452 { 453 (void)f; /* UNUSED */ 454 return (ARCHIVE_OK); 455 } 456