1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 /* 27 * This file contains the "essential" portions of the read API, that 28 * is, stuff that will probably always be used by any client that 29 * actually needs to read an archive. Optional pieces have been, as 30 * far as possible, separated out into separate files to avoid 31 * needlessly bloating statically-linked clients. 32 */ 33 34 #include "archive_platform.h" 35 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read.c 201157 2009-12-29 05:30:23Z kientzle $"); 36 37 #ifdef HAVE_ERRNO_H 38 #include <errno.h> 39 #endif 40 #include <stdio.h> 41 #ifdef HAVE_STDLIB_H 42 #include <stdlib.h> 43 #endif 44 #ifdef HAVE_STRING_H 45 #include <string.h> 46 #endif 47 #ifdef HAVE_UNISTD_H 48 #include <unistd.h> 49 #endif 50 51 #include "archive.h" 52 #include "archive_entry.h" 53 #include "archive_private.h" 54 #include "archive_read_private.h" 55 56 #define minimum(a, b) (a < b ? a : b) 57 58 static int build_stream(struct archive_read *); 59 static int choose_format(struct archive_read *); 60 static int cleanup_filters(struct archive_read *); 61 static struct archive_vtable *archive_read_vtable(void); 62 static int _archive_read_close(struct archive *); 63 static int _archive_read_finish(struct archive *); 64 65 static struct archive_vtable * 66 archive_read_vtable(void) 67 { 68 static struct archive_vtable av; 69 static int inited = 0; 70 71 if (!inited) { 72 av.archive_finish = _archive_read_finish; 73 av.archive_close = _archive_read_close; 74 } 75 return (&av); 76 } 77 78 /* 79 * Allocate, initialize and return a struct archive object. 80 */ 81 struct archive * 82 archive_read_new(void) 83 { 84 struct archive_read *a; 85 86 a = (struct archive_read *)malloc(sizeof(*a)); 87 if (a == NULL) 88 return (NULL); 89 memset(a, 0, sizeof(*a)); 90 a->archive.magic = ARCHIVE_READ_MAGIC; 91 92 a->archive.state = ARCHIVE_STATE_NEW; 93 a->entry = archive_entry_new(); 94 a->archive.vtable = archive_read_vtable(); 95 96 return (&a->archive); 97 } 98 99 /* 100 * Record the do-not-extract-to file. This belongs in archive_read_extract.c. 101 */ 102 void 103 archive_read_extract_set_skip_file(struct archive *_a, dev_t d, ino_t i) 104 { 105 struct archive_read *a = (struct archive_read *)_a; 106 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, 107 "archive_read_extract_set_skip_file"); 108 a->skip_file_dev = d; 109 a->skip_file_ino = i; 110 } 111 112 /* 113 * Set read options for the format. 114 */ 115 int 116 archive_read_set_format_options(struct archive *_a, const char *s) 117 { 118 struct archive_read *a; 119 struct archive_format_descriptor *format; 120 char key[64], val[64]; 121 char *valp; 122 size_t i; 123 int len, r; 124 125 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 126 "archive_read_set_format_options"); 127 128 if (s == NULL || *s == '\0') 129 return (ARCHIVE_OK); 130 a = (struct archive_read *)_a; 131 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 132 ARCHIVE_STATE_NEW, "archive_read_set_format_options"); 133 len = 0; 134 for (i = 0; i < sizeof(a->formats)/sizeof(a->formats[0]); i++) { 135 format = &a->formats[i]; 136 if (format == NULL || format->options == NULL || 137 format->name == NULL) 138 /* This format does not support option. */ 139 continue; 140 141 while ((len = __archive_parse_options(s, format->name, 142 sizeof(key), key, sizeof(val), val)) > 0) { 143 valp = val[0] == '\0' ? NULL : val; 144 a->format = format; 145 r = format->options(a, key, valp); 146 a->format = NULL; 147 if (r == ARCHIVE_FATAL) 148 return (r); 149 s += len; 150 } 151 } 152 if (len < 0) { 153 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 154 "Illegal format options."); 155 return (ARCHIVE_WARN); 156 } 157 return (ARCHIVE_OK); 158 } 159 160 /* 161 * Set read options for the filter. 162 */ 163 int 164 archive_read_set_filter_options(struct archive *_a, const char *s) 165 { 166 struct archive_read *a; 167 struct archive_read_filter *filter; 168 struct archive_read_filter_bidder *bidder; 169 char key[64], val[64]; 170 int len, r; 171 172 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 173 "archive_read_set_filter_options"); 174 175 if (s == NULL || *s == '\0') 176 return (ARCHIVE_OK); 177 a = (struct archive_read *)_a; 178 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 179 ARCHIVE_STATE_NEW, "archive_read_set_filter_options"); 180 len = 0; 181 for (filter = a->filter; filter != NULL; filter = filter->upstream) { 182 bidder = filter->bidder; 183 if (bidder == NULL) 184 continue; 185 if (bidder->options == NULL) 186 /* This bidder does not support option */ 187 continue; 188 while ((len = __archive_parse_options(s, filter->name, 189 sizeof(key), key, sizeof(val), val)) > 0) { 190 if (val[0] == '\0') 191 r = bidder->options(bidder, key, NULL); 192 else 193 r = bidder->options(bidder, key, val); 194 if (r == ARCHIVE_FATAL) 195 return (r); 196 s += len; 197 } 198 } 199 if (len < 0) { 200 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 201 "Illegal format options."); 202 return (ARCHIVE_WARN); 203 } 204 return (ARCHIVE_OK); 205 } 206 207 /* 208 * Set read options for the format and the filter. 209 */ 210 int 211 archive_read_set_options(struct archive *_a, const char *s) 212 { 213 int r; 214 215 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 216 "archive_read_set_options"); 217 archive_clear_error(_a); 218 219 r = archive_read_set_format_options(_a, s); 220 if (r != ARCHIVE_OK) 221 return (r); 222 r = archive_read_set_filter_options(_a, s); 223 if (r != ARCHIVE_OK) 224 return (r); 225 return (ARCHIVE_OK); 226 } 227 228 /* 229 * Open the archive 230 */ 231 int 232 archive_read_open(struct archive *a, void *client_data, 233 archive_open_callback *client_opener, archive_read_callback *client_reader, 234 archive_close_callback *client_closer) 235 { 236 /* Old archive_read_open() is just a thin shell around 237 * archive_read_open2. */ 238 return archive_read_open2(a, client_data, client_opener, 239 client_reader, NULL, client_closer); 240 } 241 242 static ssize_t 243 client_read_proxy(struct archive_read_filter *self, const void **buff) 244 { 245 ssize_t r; 246 r = (self->archive->client.reader)(&self->archive->archive, 247 self->data, buff); 248 self->archive->archive.raw_position += r; 249 return (r); 250 } 251 252 static int64_t 253 client_skip_proxy(struct archive_read_filter *self, int64_t request) 254 { 255 int64_t ask, get, total; 256 /* Limit our maximum seek request to 1GB on platforms 257 * with 32-bit off_t (such as Windows). */ 258 int64_t skip_limit = ((int64_t)1) << (sizeof(off_t) * 8 - 2); 259 260 if (self->archive->client.skipper == NULL) 261 return (0); 262 total = 0; 263 for (;;) { 264 ask = request; 265 if (ask > skip_limit) 266 ask = skip_limit; 267 get = (self->archive->client.skipper)(&self->archive->archive, 268 self->data, ask); 269 if (get == 0) 270 return (total); 271 request -= get; 272 self->archive->archive.raw_position += get; 273 total += get; 274 } 275 } 276 277 static int 278 client_close_proxy(struct archive_read_filter *self) 279 { 280 int r = ARCHIVE_OK; 281 282 if (self->archive->client.closer != NULL) 283 r = (self->archive->client.closer)((struct archive *)self->archive, 284 self->data); 285 self->data = NULL; 286 return (r); 287 } 288 289 290 int 291 archive_read_open2(struct archive *_a, void *client_data, 292 archive_open_callback *client_opener, 293 archive_read_callback *client_reader, 294 archive_skip_callback *client_skipper, 295 archive_close_callback *client_closer) 296 { 297 struct archive_read *a = (struct archive_read *)_a; 298 struct archive_read_filter *filter; 299 int e; 300 301 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 302 "archive_read_open"); 303 archive_clear_error(&a->archive); 304 305 if (client_reader == NULL) 306 __archive_errx(1, 307 "No reader function provided to archive_read_open"); 308 309 /* Open data source. */ 310 if (client_opener != NULL) { 311 e =(client_opener)(&a->archive, client_data); 312 if (e != 0) { 313 /* If the open failed, call the closer to clean up. */ 314 if (client_closer) 315 (client_closer)(&a->archive, client_data); 316 return (e); 317 } 318 } 319 320 /* Save the client functions and mock up the initial source. */ 321 a->client.reader = client_reader; 322 a->client.skipper = client_skipper; 323 a->client.closer = client_closer; 324 325 filter = calloc(1, sizeof(*filter)); 326 if (filter == NULL) 327 return (ARCHIVE_FATAL); 328 filter->bidder = NULL; 329 filter->upstream = NULL; 330 filter->archive = a; 331 filter->data = client_data; 332 filter->read = client_read_proxy; 333 filter->skip = client_skip_proxy; 334 filter->close = client_close_proxy; 335 filter->name = "none"; 336 filter->code = ARCHIVE_COMPRESSION_NONE; 337 a->filter = filter; 338 339 /* Build out the input pipeline. */ 340 e = build_stream(a); 341 if (e == ARCHIVE_OK) 342 a->archive.state = ARCHIVE_STATE_HEADER; 343 344 return (e); 345 } 346 347 /* 348 * Allow each registered stream transform to bid on whether 349 * it wants to handle this stream. Repeat until we've finished 350 * building the pipeline. 351 */ 352 static int 353 build_stream(struct archive_read *a) 354 { 355 int number_bidders, i, bid, best_bid; 356 struct archive_read_filter_bidder *bidder, *best_bidder; 357 struct archive_read_filter *filter; 358 ssize_t avail; 359 int r; 360 361 for (;;) { 362 number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]); 363 364 best_bid = 0; 365 best_bidder = NULL; 366 367 bidder = a->bidders; 368 for (i = 0; i < number_bidders; i++, bidder++) { 369 if (bidder->bid != NULL) { 370 bid = (bidder->bid)(bidder, a->filter); 371 if (bid > best_bid) { 372 best_bid = bid; 373 best_bidder = bidder; 374 } 375 } 376 } 377 378 /* If no bidder, we're done. */ 379 if (best_bidder == NULL) { 380 a->archive.compression_name = a->filter->name; 381 a->archive.compression_code = a->filter->code; 382 return (ARCHIVE_OK); 383 } 384 385 filter 386 = (struct archive_read_filter *)calloc(1, sizeof(*filter)); 387 if (filter == NULL) 388 return (ARCHIVE_FATAL); 389 filter->bidder = best_bidder; 390 filter->archive = a; 391 filter->upstream = a->filter; 392 r = (best_bidder->init)(filter); 393 if (r != ARCHIVE_OK) { 394 free(filter); 395 return (r); 396 } 397 a->filter = filter; 398 /* Verify the filter by asking it for some data. */ 399 __archive_read_filter_ahead(filter, 1, &avail); 400 if (avail < 0) { 401 cleanup_filters(a); 402 return (ARCHIVE_FATAL); 403 } 404 } 405 } 406 407 /* 408 * Read header of next entry. 409 */ 410 int 411 archive_read_next_header2(struct archive *_a, struct archive_entry *entry) 412 { 413 struct archive_read *a = (struct archive_read *)_a; 414 int slot, ret; 415 416 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, 417 ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, 418 "archive_read_next_header"); 419 420 ++_a->file_count; 421 archive_entry_clear(entry); 422 archive_clear_error(&a->archive); 423 424 /* 425 * If no format has yet been chosen, choose one. 426 */ 427 if (a->format == NULL) { 428 slot = choose_format(a); 429 if (slot < 0) { 430 a->archive.state = ARCHIVE_STATE_FATAL; 431 return (ARCHIVE_FATAL); 432 } 433 a->format = &(a->formats[slot]); 434 } 435 436 /* 437 * If client didn't consume entire data, skip any remainder 438 * (This is especially important for GNU incremental directories.) 439 */ 440 if (a->archive.state == ARCHIVE_STATE_DATA) { 441 ret = archive_read_data_skip(&a->archive); 442 if (ret == ARCHIVE_EOF) { 443 archive_set_error(&a->archive, EIO, "Premature end-of-file."); 444 a->archive.state = ARCHIVE_STATE_FATAL; 445 return (ARCHIVE_FATAL); 446 } 447 if (ret != ARCHIVE_OK) 448 return (ret); 449 } 450 451 /* Record start-of-header. */ 452 a->header_position = a->archive.file_position; 453 454 ret = (a->format->read_header)(a, entry); 455 456 /* 457 * EOF and FATAL are persistent at this layer. By 458 * modifying the state, we guarantee that future calls to 459 * read a header or read data will fail. 460 */ 461 switch (ret) { 462 case ARCHIVE_EOF: 463 a->archive.state = ARCHIVE_STATE_EOF; 464 break; 465 case ARCHIVE_OK: 466 a->archive.state = ARCHIVE_STATE_DATA; 467 break; 468 case ARCHIVE_WARN: 469 a->archive.state = ARCHIVE_STATE_DATA; 470 break; 471 case ARCHIVE_RETRY: 472 break; 473 case ARCHIVE_FATAL: 474 a->archive.state = ARCHIVE_STATE_FATAL; 475 break; 476 } 477 478 a->read_data_output_offset = 0; 479 a->read_data_remaining = 0; 480 return (ret); 481 } 482 483 int 484 archive_read_next_header(struct archive *_a, struct archive_entry **entryp) 485 { 486 int ret; 487 struct archive_read *a = (struct archive_read *)_a; 488 *entryp = NULL; 489 ret = archive_read_next_header2(_a, a->entry); 490 *entryp = a->entry; 491 return ret; 492 } 493 494 /* 495 * Allow each registered format to bid on whether it wants to handle 496 * the next entry. Return index of winning bidder. 497 */ 498 static int 499 choose_format(struct archive_read *a) 500 { 501 int slots; 502 int i; 503 int bid, best_bid; 504 int best_bid_slot; 505 506 slots = sizeof(a->formats) / sizeof(a->formats[0]); 507 best_bid = -1; 508 best_bid_slot = -1; 509 510 /* Set up a->format and a->pformat_data for convenience of bidders. */ 511 a->format = &(a->formats[0]); 512 for (i = 0; i < slots; i++, a->format++) { 513 if (a->format->bid) { 514 bid = (a->format->bid)(a); 515 if (bid == ARCHIVE_FATAL) 516 return (ARCHIVE_FATAL); 517 if ((bid > best_bid) || (best_bid_slot < 0)) { 518 best_bid = bid; 519 best_bid_slot = i; 520 } 521 } 522 } 523 524 /* 525 * There were no bidders; this is a serious programmer error 526 * and demands a quick and definitive abort. 527 */ 528 if (best_bid_slot < 0) 529 __archive_errx(1, "No formats were registered; you must " 530 "invoke at least one archive_read_support_format_XXX " 531 "function in order to successfully read an archive."); 532 533 /* 534 * There were bidders, but no non-zero bids; this means we 535 * can't support this stream. 536 */ 537 if (best_bid < 1) { 538 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 539 "Unrecognized archive format"); 540 return (ARCHIVE_FATAL); 541 } 542 543 return (best_bid_slot); 544 } 545 546 /* 547 * Return the file offset (within the uncompressed data stream) where 548 * the last header started. 549 */ 550 int64_t 551 archive_read_header_position(struct archive *_a) 552 { 553 struct archive_read *a = (struct archive_read *)_a; 554 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, 555 ARCHIVE_STATE_ANY, "archive_read_header_position"); 556 return (a->header_position); 557 } 558 559 /* 560 * Read data from an archive entry, using a read(2)-style interface. 561 * This is a convenience routine that just calls 562 * archive_read_data_block and copies the results into the client 563 * buffer, filling any gaps with zero bytes. Clients using this 564 * API can be completely ignorant of sparse-file issues; sparse files 565 * will simply be padded with nulls. 566 * 567 * DO NOT intermingle calls to this function and archive_read_data_block 568 * to read a single entry body. 569 */ 570 ssize_t 571 archive_read_data(struct archive *_a, void *buff, size_t s) 572 { 573 struct archive_read *a = (struct archive_read *)_a; 574 char *dest; 575 const void *read_buf; 576 size_t bytes_read; 577 size_t len; 578 int r; 579 580 bytes_read = 0; 581 dest = (char *)buff; 582 583 while (s > 0) { 584 if (a->read_data_remaining == 0) { 585 read_buf = a->read_data_block; 586 r = archive_read_data_block(&a->archive, &read_buf, 587 &a->read_data_remaining, &a->read_data_offset); 588 a->read_data_block = read_buf; 589 if (r == ARCHIVE_EOF) 590 return (bytes_read); 591 /* 592 * Error codes are all negative, so the status 593 * return here cannot be confused with a valid 594 * byte count. (ARCHIVE_OK is zero.) 595 */ 596 if (r < ARCHIVE_OK) 597 return (r); 598 } 599 600 if (a->read_data_offset < a->read_data_output_offset) { 601 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 602 "Encountered out-of-order sparse blocks"); 603 return (ARCHIVE_RETRY); 604 } 605 606 /* Compute the amount of zero padding needed. */ 607 if (a->read_data_output_offset + (off_t)s < 608 a->read_data_offset) { 609 len = s; 610 } else if (a->read_data_output_offset < 611 a->read_data_offset) { 612 len = a->read_data_offset - 613 a->read_data_output_offset; 614 } else 615 len = 0; 616 617 /* Add zeroes. */ 618 memset(dest, 0, len); 619 s -= len; 620 a->read_data_output_offset += len; 621 dest += len; 622 bytes_read += len; 623 624 /* Copy data if there is any space left. */ 625 if (s > 0) { 626 len = a->read_data_remaining; 627 if (len > s) 628 len = s; 629 memcpy(dest, a->read_data_block, len); 630 s -= len; 631 a->read_data_block += len; 632 a->read_data_remaining -= len; 633 a->read_data_output_offset += len; 634 a->read_data_offset += len; 635 dest += len; 636 bytes_read += len; 637 } 638 } 639 return (bytes_read); 640 } 641 642 #if ARCHIVE_API_VERSION < 3 643 /* 644 * Obsolete function provided for compatibility only. Note that the API 645 * of this function doesn't allow the caller to detect if the remaining 646 * data from the archive entry is shorter than the buffer provided, or 647 * even if an error occurred while reading data. 648 */ 649 int 650 archive_read_data_into_buffer(struct archive *a, void *d, ssize_t len) 651 { 652 653 archive_read_data(a, d, len); 654 return (ARCHIVE_OK); 655 } 656 #endif 657 658 /* 659 * Skip over all remaining data in this entry. 660 */ 661 int 662 archive_read_data_skip(struct archive *_a) 663 { 664 struct archive_read *a = (struct archive_read *)_a; 665 int r; 666 const void *buff; 667 size_t size; 668 off_t offset; 669 670 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, 671 "archive_read_data_skip"); 672 673 if (a->format->read_data_skip != NULL) 674 r = (a->format->read_data_skip)(a); 675 else { 676 while ((r = archive_read_data_block(&a->archive, 677 &buff, &size, &offset)) 678 == ARCHIVE_OK) 679 ; 680 } 681 682 if (r == ARCHIVE_EOF) 683 r = ARCHIVE_OK; 684 685 a->archive.state = ARCHIVE_STATE_HEADER; 686 return (r); 687 } 688 689 /* 690 * Read the next block of entry data from the archive. 691 * This is a zero-copy interface; the client receives a pointer, 692 * size, and file offset of the next available block of data. 693 * 694 * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if 695 * the end of entry is encountered. 696 */ 697 int 698 archive_read_data_block(struct archive *_a, 699 const void **buff, size_t *size, off_t *offset) 700 { 701 struct archive_read *a = (struct archive_read *)_a; 702 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, 703 "archive_read_data_block"); 704 705 if (a->format->read_data == NULL) { 706 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 707 "Internal error: " 708 "No format_read_data_block function registered"); 709 return (ARCHIVE_FATAL); 710 } 711 712 return (a->format->read_data)(a, buff, size, offset); 713 } 714 715 /* 716 * Close the file and release most resources. 717 * 718 * Be careful: client might just call read_new and then read_finish. 719 * Don't assume we actually read anything or performed any non-trivial 720 * initialization. 721 */ 722 static int 723 _archive_read_close(struct archive *_a) 724 { 725 struct archive_read *a = (struct archive_read *)_a; 726 int r = ARCHIVE_OK, r1 = ARCHIVE_OK; 727 size_t i, n; 728 729 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 730 ARCHIVE_STATE_ANY, "archive_read_close"); 731 archive_clear_error(&a->archive); 732 a->archive.state = ARCHIVE_STATE_CLOSED; 733 734 735 /* Call cleanup functions registered by optional components. */ 736 if (a->cleanup_archive_extract != NULL) 737 r = (a->cleanup_archive_extract)(a); 738 739 /* TODO: Clean up the formatters. */ 740 741 /* Release the filter objects. */ 742 r1 = cleanup_filters(a); 743 if (r1 < r) 744 r = r1; 745 746 /* Release the bidder objects. */ 747 n = sizeof(a->bidders)/sizeof(a->bidders[0]); 748 for (i = 0; i < n; i++) { 749 if (a->bidders[i].free != NULL) { 750 r1 = (a->bidders[i].free)(&a->bidders[i]); 751 if (r1 < r) 752 r = r1; 753 } 754 } 755 756 return (r); 757 } 758 759 static int 760 cleanup_filters(struct archive_read *a) 761 { 762 int r = ARCHIVE_OK; 763 /* Clean up the filter pipeline. */ 764 while (a->filter != NULL) { 765 struct archive_read_filter *t = a->filter->upstream; 766 if (a->filter->close != NULL) { 767 int r1 = (a->filter->close)(a->filter); 768 if (r1 < r) 769 r = r1; 770 } 771 free(a->filter->buffer); 772 free(a->filter); 773 a->filter = t; 774 } 775 return r; 776 } 777 778 /* 779 * Release memory and other resources. 780 */ 781 static int 782 _archive_read_finish(struct archive *_a) 783 { 784 struct archive_read *a = (struct archive_read *)_a; 785 int i; 786 int slots; 787 int r = ARCHIVE_OK; 788 789 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, 790 "archive_read_finish"); 791 if (a->archive.state != ARCHIVE_STATE_CLOSED) 792 r = archive_read_close(&a->archive); 793 794 /* Cleanup format-specific data. */ 795 slots = sizeof(a->formats) / sizeof(a->formats[0]); 796 for (i = 0; i < slots; i++) { 797 a->format = &(a->formats[i]); 798 if (a->formats[i].cleanup) 799 (a->formats[i].cleanup)(a); 800 } 801 802 archive_string_free(&a->archive.error_string); 803 if (a->entry) 804 archive_entry_free(a->entry); 805 a->archive.magic = 0; 806 free(a); 807 #if ARCHIVE_API_VERSION > 1 808 return (r); 809 #endif 810 } 811 812 /* 813 * Used internally by read format handlers to register their bid and 814 * initialization functions. 815 */ 816 int 817 __archive_read_register_format(struct archive_read *a, 818 void *format_data, 819 const char *name, 820 int (*bid)(struct archive_read *), 821 int (*options)(struct archive_read *, const char *, const char *), 822 int (*read_header)(struct archive_read *, struct archive_entry *), 823 int (*read_data)(struct archive_read *, const void **, size_t *, off_t *), 824 int (*read_data_skip)(struct archive_read *), 825 int (*cleanup)(struct archive_read *)) 826 { 827 int i, number_slots; 828 829 __archive_check_magic(&a->archive, 830 ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 831 "__archive_read_register_format"); 832 833 number_slots = sizeof(a->formats) / sizeof(a->formats[0]); 834 835 for (i = 0; i < number_slots; i++) { 836 if (a->formats[i].bid == bid) 837 return (ARCHIVE_WARN); /* We've already installed */ 838 if (a->formats[i].bid == NULL) { 839 a->formats[i].bid = bid; 840 a->formats[i].options = options; 841 a->formats[i].read_header = read_header; 842 a->formats[i].read_data = read_data; 843 a->formats[i].read_data_skip = read_data_skip; 844 a->formats[i].cleanup = cleanup; 845 a->formats[i].data = format_data; 846 a->formats[i].name = name; 847 return (ARCHIVE_OK); 848 } 849 } 850 851 __archive_errx(1, "Not enough slots for format registration"); 852 return (ARCHIVE_FATAL); /* Never actually called. */ 853 } 854 855 /* 856 * Used internally by decompression routines to register their bid and 857 * initialization functions. 858 */ 859 struct archive_read_filter_bidder * 860 __archive_read_get_bidder(struct archive_read *a) 861 { 862 int i, number_slots; 863 864 __archive_check_magic(&a->archive, 865 ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 866 "__archive_read_get_bidder"); 867 868 number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]); 869 870 for (i = 0; i < number_slots; i++) { 871 if (a->bidders[i].bid == NULL) { 872 memset(a->bidders + i, 0, sizeof(a->bidders[0])); 873 return (a->bidders + i); 874 } 875 } 876 877 __archive_errx(1, "Not enough slots for compression registration"); 878 return (NULL); /* Never actually executed. */ 879 } 880 881 /* 882 * The next three functions comprise the peek/consume internal I/O 883 * system used by archive format readers. This system allows fairly 884 * flexible read-ahead and allows the I/O code to operate in a 885 * zero-copy manner most of the time. 886 * 887 * In the ideal case, filters generate blocks of data 888 * and __archive_read_ahead() just returns pointers directly into 889 * those blocks. Then __archive_read_consume() just bumps those 890 * pointers. Only if your request would span blocks does the I/O 891 * layer use a copy buffer to provide you with a contiguous block of 892 * data. The __archive_read_skip() is an optimization; it scans ahead 893 * very quickly (it usually translates into a seek() operation if 894 * you're reading uncompressed disk files). 895 * 896 * A couple of useful idioms: 897 * * "I just want some data." Ask for 1 byte and pay attention to 898 * the "number of bytes available" from __archive_read_ahead(). 899 * You can consume more than you asked for; you just can't consume 900 * more than is available. If you consume everything that's 901 * immediately available, the next read_ahead() call will pull 902 * the next block. 903 * * "I want to output a large block of data." As above, ask for 1 byte, 904 * emit all that's available (up to whatever limit you have), then 905 * repeat until you're done. 906 * * "I want to peek ahead by a large amount." Ask for 4k or so, then 907 * double and repeat until you get an error or have enough. Note 908 * that the I/O layer will likely end up expanding its copy buffer 909 * to fit your request, so use this technique cautiously. This 910 * technique is used, for example, by some of the format tasting 911 * code that has uncertain look-ahead needs. 912 * 913 * TODO: Someday, provide a more generic __archive_read_seek() for 914 * those cases where it's useful. This is tricky because there are lots 915 * of cases where seek() is not available (reading gzip data from a 916 * network socket, for instance), so there needs to be a good way to 917 * communicate whether seek() is available and users of that interface 918 * need to use non-seeking strategies whenever seek() is not available. 919 */ 920 921 /* 922 * Looks ahead in the input stream: 923 * * If 'avail' pointer is provided, that returns number of bytes available 924 * in the current buffer, which may be much larger than requested. 925 * * If end-of-file, *avail gets set to zero. 926 * * If error, *avail gets error code. 927 * * If request can be met, returns pointer to data, returns NULL 928 * if request is not met. 929 * 930 * Note: If you just want "some data", ask for 1 byte and pay attention 931 * to *avail, which will have the actual amount available. If you 932 * know exactly how many bytes you need, just ask for that and treat 933 * a NULL return as an error. 934 * 935 * Important: This does NOT move the file pointer. See 936 * __archive_read_consume() below. 937 */ 938 939 /* 940 * This is tricky. We need to provide our clients with pointers to 941 * contiguous blocks of memory but we want to avoid copying whenever 942 * possible. 943 * 944 * Mostly, this code returns pointers directly into the block of data 945 * provided by the client_read routine. It can do this unless the 946 * request would split across blocks. In that case, we have to copy 947 * into an internal buffer to combine reads. 948 */ 949 const void * 950 __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail) 951 { 952 return (__archive_read_filter_ahead(a->filter, min, avail)); 953 } 954 955 const void * 956 __archive_read_filter_ahead(struct archive_read_filter *filter, 957 size_t min, ssize_t *avail) 958 { 959 ssize_t bytes_read; 960 size_t tocopy; 961 962 if (filter->fatal) { 963 if (avail) 964 *avail = ARCHIVE_FATAL; 965 return (NULL); 966 } 967 968 /* 969 * Keep pulling more data until we can satisfy the request. 970 */ 971 for (;;) { 972 973 /* 974 * If we can satisfy from the copy buffer (and the 975 * copy buffer isn't empty), we're done. In particular, 976 * note that min == 0 is a perfectly well-defined 977 * request. 978 */ 979 if (filter->avail >= min && filter->avail > 0) { 980 if (avail != NULL) 981 *avail = filter->avail; 982 return (filter->next); 983 } 984 985 /* 986 * We can satisfy directly from client buffer if everything 987 * currently in the copy buffer is still in the client buffer. 988 */ 989 if (filter->client_total >= filter->client_avail + filter->avail 990 && filter->client_avail + filter->avail >= min) { 991 /* "Roll back" to client buffer. */ 992 filter->client_avail += filter->avail; 993 filter->client_next -= filter->avail; 994 /* Copy buffer is now empty. */ 995 filter->avail = 0; 996 filter->next = filter->buffer; 997 /* Return data from client buffer. */ 998 if (avail != NULL) 999 *avail = filter->client_avail; 1000 return (filter->client_next); 1001 } 1002 1003 /* Move data forward in copy buffer if necessary. */ 1004 if (filter->next > filter->buffer && 1005 filter->next + min > filter->buffer + filter->buffer_size) { 1006 if (filter->avail > 0) 1007 memmove(filter->buffer, filter->next, filter->avail); 1008 filter->next = filter->buffer; 1009 } 1010 1011 /* If we've used up the client data, get more. */ 1012 if (filter->client_avail <= 0) { 1013 if (filter->end_of_file) { 1014 if (avail != NULL) 1015 *avail = 0; 1016 return (NULL); 1017 } 1018 bytes_read = (filter->read)(filter, 1019 &filter->client_buff); 1020 if (bytes_read < 0) { /* Read error. */ 1021 filter->client_total = filter->client_avail = 0; 1022 filter->client_next = filter->client_buff = NULL; 1023 filter->fatal = 1; 1024 if (avail != NULL) 1025 *avail = ARCHIVE_FATAL; 1026 return (NULL); 1027 } 1028 if (bytes_read == 0) { /* Premature end-of-file. */ 1029 filter->client_total = filter->client_avail = 0; 1030 filter->client_next = filter->client_buff = NULL; 1031 filter->end_of_file = 1; 1032 /* Return whatever we do have. */ 1033 if (avail != NULL) 1034 *avail = filter->avail; 1035 return (NULL); 1036 } 1037 filter->position += bytes_read; 1038 filter->client_total = bytes_read; 1039 filter->client_avail = filter->client_total; 1040 filter->client_next = filter->client_buff; 1041 } 1042 else 1043 { 1044 /* 1045 * We can't satisfy the request from the copy 1046 * buffer or the existing client data, so we 1047 * need to copy more client data over to the 1048 * copy buffer. 1049 */ 1050 1051 /* Ensure the buffer is big enough. */ 1052 if (min > filter->buffer_size) { 1053 size_t s, t; 1054 char *p; 1055 1056 /* Double the buffer; watch for overflow. */ 1057 s = t = filter->buffer_size; 1058 if (s == 0) 1059 s = min; 1060 while (s < min) { 1061 t *= 2; 1062 if (t <= s) { /* Integer overflow! */ 1063 archive_set_error( 1064 &filter->archive->archive, 1065 ENOMEM, 1066 "Unable to allocate copy buffer"); 1067 filter->fatal = 1; 1068 if (avail != NULL) 1069 *avail = ARCHIVE_FATAL; 1070 return (NULL); 1071 } 1072 s = t; 1073 } 1074 /* Now s >= min, so allocate a new buffer. */ 1075 p = (char *)malloc(s); 1076 if (p == NULL) { 1077 archive_set_error( 1078 &filter->archive->archive, 1079 ENOMEM, 1080 "Unable to allocate copy buffer"); 1081 filter->fatal = 1; 1082 if (avail != NULL) 1083 *avail = ARCHIVE_FATAL; 1084 return (NULL); 1085 } 1086 /* Move data into newly-enlarged buffer. */ 1087 if (filter->avail > 0) 1088 memmove(p, filter->next, filter->avail); 1089 free(filter->buffer); 1090 filter->next = filter->buffer = p; 1091 filter->buffer_size = s; 1092 } 1093 1094 /* We can add client data to copy buffer. */ 1095 /* First estimate: copy to fill rest of buffer. */ 1096 tocopy = (filter->buffer + filter->buffer_size) 1097 - (filter->next + filter->avail); 1098 /* Don't waste time buffering more than we need to. */ 1099 if (tocopy + filter->avail > min) 1100 tocopy = min - filter->avail; 1101 /* Don't copy more than is available. */ 1102 if (tocopy > filter->client_avail) 1103 tocopy = filter->client_avail; 1104 1105 memcpy(filter->next + filter->avail, filter->client_next, 1106 tocopy); 1107 /* Remove this data from client buffer. */ 1108 filter->client_next += tocopy; 1109 filter->client_avail -= tocopy; 1110 /* add it to copy buffer. */ 1111 filter->avail += tocopy; 1112 } 1113 } 1114 } 1115 1116 /* 1117 * Move the file pointer forward. This should be called after 1118 * __archive_read_ahead() returns data to you. Don't try to move 1119 * ahead by more than the amount of data available according to 1120 * __archive_read_ahead(). 1121 */ 1122 /* 1123 * Mark the appropriate data as used. Note that the request here will 1124 * often be much smaller than the size of the previous read_ahead 1125 * request. 1126 */ 1127 ssize_t 1128 __archive_read_consume(struct archive_read *a, size_t request) 1129 { 1130 ssize_t r; 1131 r = __archive_read_filter_consume(a->filter, request); 1132 a->archive.file_position += r; 1133 return (r); 1134 } 1135 1136 ssize_t 1137 __archive_read_filter_consume(struct archive_read_filter * filter, 1138 size_t request) 1139 { 1140 if (filter->avail > 0) { 1141 /* Read came from copy buffer. */ 1142 filter->next += request; 1143 filter->avail -= request; 1144 } else { 1145 /* Read came from client buffer. */ 1146 filter->client_next += request; 1147 filter->client_avail -= request; 1148 } 1149 return (request); 1150 } 1151 1152 /* 1153 * Move the file pointer ahead by an arbitrary amount. If you're 1154 * reading uncompressed data from a disk file, this will actually 1155 * translate into a seek() operation. Even in cases where seek() 1156 * isn't feasible, this at least pushes the read-and-discard loop 1157 * down closer to the data source. 1158 */ 1159 int64_t 1160 __archive_read_skip(struct archive_read *a, int64_t request) 1161 { 1162 int64_t skipped = __archive_read_skip_lenient(a, request); 1163 if (skipped == request) 1164 return (skipped); 1165 /* We hit EOF before we satisfied the skip request. */ 1166 if (skipped < 0) // Map error code to 0 for error message below. 1167 skipped = 0; 1168 archive_set_error(&a->archive, 1169 ARCHIVE_ERRNO_MISC, 1170 "Truncated input file (needed %jd bytes, only %jd available)", 1171 (intmax_t)request, (intmax_t)skipped); 1172 return (ARCHIVE_FATAL); 1173 } 1174 1175 int64_t 1176 __archive_read_skip_lenient(struct archive_read *a, int64_t request) 1177 { 1178 int64_t skipped = __archive_read_filter_skip(a->filter, request); 1179 if (skipped > 0) 1180 a->archive.file_position += skipped; 1181 return (skipped); 1182 } 1183 1184 int64_t 1185 __archive_read_filter_skip(struct archive_read_filter *filter, int64_t request) 1186 { 1187 int64_t bytes_skipped, total_bytes_skipped = 0; 1188 size_t min; 1189 1190 if (filter->fatal) 1191 return (-1); 1192 /* 1193 * If there is data in the buffers already, use that first. 1194 */ 1195 if (filter->avail > 0) { 1196 min = minimum(request, (off_t)filter->avail); 1197 bytes_skipped = __archive_read_filter_consume(filter, min); 1198 request -= bytes_skipped; 1199 total_bytes_skipped += bytes_skipped; 1200 } 1201 if (filter->client_avail > 0) { 1202 min = minimum(request, (int64_t)filter->client_avail); 1203 bytes_skipped = __archive_read_filter_consume(filter, min); 1204 request -= bytes_skipped; 1205 total_bytes_skipped += bytes_skipped; 1206 } 1207 if (request == 0) 1208 return (total_bytes_skipped); 1209 /* 1210 * If a client_skipper was provided, try that first. 1211 */ 1212 #if ARCHIVE_API_VERSION < 2 1213 if ((filter->skip != NULL) && (request < SSIZE_MAX)) { 1214 #else 1215 if (filter->skip != NULL) { 1216 #endif 1217 bytes_skipped = (filter->skip)(filter, request); 1218 if (bytes_skipped < 0) { /* error */ 1219 filter->client_total = filter->client_avail = 0; 1220 filter->client_next = filter->client_buff = NULL; 1221 filter->fatal = 1; 1222 return (bytes_skipped); 1223 } 1224 total_bytes_skipped += bytes_skipped; 1225 request -= bytes_skipped; 1226 filter->client_next = filter->client_buff; 1227 filter->client_avail = filter->client_total = 0; 1228 } 1229 /* 1230 * Note that client_skipper will usually not satisfy the 1231 * full request (due to low-level blocking concerns), 1232 * so even if client_skipper is provided, we may still 1233 * have to use ordinary reads to finish out the request. 1234 */ 1235 while (request > 0) { 1236 ssize_t bytes_read; 1237 (void)__archive_read_filter_ahead(filter, 1, &bytes_read); 1238 if (bytes_read < 0) 1239 return (bytes_read); 1240 if (bytes_read == 0) { 1241 return (total_bytes_skipped); 1242 } 1243 min = (size_t)(minimum(bytes_read, request)); 1244 bytes_read = __archive_read_filter_consume(filter, min); 1245 total_bytes_skipped += bytes_read; 1246 request -= bytes_read; 1247 } 1248 return (total_bytes_skipped); 1249 } 1250