1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 /* 27 * This file contains the "essential" portions of the read API, that 28 * is, stuff that will probably always be used by any client that 29 * actually needs to read an archive. Optional pieces have been, as 30 * far as possible, separated out into separate files to avoid 31 * needlessly bloating statically-linked clients. 32 */ 33 34 #include "archive_platform.h" 35 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read.c,v 1.39 2008/12/06 06:45:15 kientzle Exp $"); 36 37 #ifdef HAVE_ERRNO_H 38 #include <errno.h> 39 #endif 40 #include <stdio.h> 41 #ifdef HAVE_STDLIB_H 42 #include <stdlib.h> 43 #endif 44 #ifdef HAVE_STRING_H 45 #include <string.h> 46 #endif 47 #ifdef HAVE_UNISTD_H 48 #include <unistd.h> 49 #endif 50 51 #include "archive.h" 52 #include "archive_entry.h" 53 #include "archive_private.h" 54 #include "archive_read_private.h" 55 56 #define minimum(a, b) (a < b ? a : b) 57 58 static int build_stream(struct archive_read *); 59 static int choose_format(struct archive_read *); 60 static struct archive_vtable *archive_read_vtable(void); 61 static int _archive_read_close(struct archive *); 62 static int _archive_read_finish(struct archive *); 63 64 static struct archive_vtable * 65 archive_read_vtable(void) 66 { 67 static struct archive_vtable av; 68 static int inited = 0; 69 70 if (!inited) { 71 av.archive_finish = _archive_read_finish; 72 av.archive_close = _archive_read_close; 73 } 74 return (&av); 75 } 76 77 /* 78 * Allocate, initialize and return a struct archive object. 79 */ 80 struct archive * 81 archive_read_new(void) 82 { 83 struct archive_read *a; 84 85 a = (struct archive_read *)malloc(sizeof(*a)); 86 if (a == NULL) 87 return (NULL); 88 memset(a, 0, sizeof(*a)); 89 a->archive.magic = ARCHIVE_READ_MAGIC; 90 91 a->archive.state = ARCHIVE_STATE_NEW; 92 a->entry = archive_entry_new(); 93 a->archive.vtable = archive_read_vtable(); 94 95 return (&a->archive); 96 } 97 98 /* 99 * Record the do-not-extract-to file. This belongs in archive_read_extract.c. 100 */ 101 void 102 archive_read_extract_set_skip_file(struct archive *_a, dev_t d, ino_t i) 103 { 104 struct archive_read *a = (struct archive_read *)_a; 105 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, 106 "archive_read_extract_set_skip_file"); 107 a->skip_file_dev = d; 108 a->skip_file_ino = i; 109 } 110 111 /* 112 * Set read options for the format. 113 */ 114 int 115 archive_read_set_format_options(struct archive *_a, const char *s) 116 { 117 struct archive_read *a; 118 struct archive_format_descriptor *format; 119 char key[64], val[64]; 120 char *valp; 121 size_t i; 122 int len, r; 123 124 if (s == NULL || *s == '\0') 125 return (ARCHIVE_OK); 126 a = (struct archive_read *)_a; 127 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 128 ARCHIVE_STATE_NEW, "archive_read_set_format_options"); 129 len = 0; 130 for (i = 0; i < sizeof(a->formats)/sizeof(a->formats[0]); i++) { 131 format = &a->formats[i]; 132 if (format == NULL || format->options == NULL || 133 format->name == NULL) 134 /* This format does not support option. */ 135 continue; 136 137 while ((len = __archive_parse_options(s, format->name, 138 sizeof(key), key, sizeof(val), val)) > 0) { 139 valp = val[0] == '\0' ? NULL : val; 140 a->format = format; 141 r = format->options(a, key, valp); 142 a->format = NULL; 143 if (r == ARCHIVE_FATAL) 144 return (r); 145 s += len; 146 } 147 } 148 if (len < 0) { 149 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 150 "Illegal format options."); 151 return (ARCHIVE_WARN); 152 } 153 return (ARCHIVE_OK); 154 } 155 156 /* 157 * Set read options for the filter. 158 */ 159 int 160 archive_read_set_filter_options(struct archive *_a, const char *s) 161 { 162 struct archive_read *a; 163 struct archive_read_filter *filter; 164 struct archive_read_filter_bidder *bidder; 165 char key[64], val[64]; 166 int len, r; 167 168 if (s == NULL || *s == '\0') 169 return (ARCHIVE_OK); 170 a = (struct archive_read *)_a; 171 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 172 ARCHIVE_STATE_NEW, "archive_read_set_filter_options"); 173 filter = a->filter; 174 len = 0; 175 for (filter = a->filter; filter != NULL; filter = filter->upstream) { 176 bidder = filter->bidder; 177 if (bidder == NULL) 178 continue; 179 if (bidder->options == NULL) 180 /* This bidder does not support option */ 181 continue; 182 while ((len = __archive_parse_options(s, filter->name, 183 sizeof(key), key, sizeof(val), val)) > 0) { 184 if (val[0] == '\0') 185 r = bidder->options(bidder, key, NULL); 186 else 187 r = bidder->options(bidder, key, val); 188 if (r == ARCHIVE_FATAL) 189 return (r); 190 s += len; 191 } 192 } 193 if (len < 0) { 194 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 195 "Illegal format options."); 196 return (ARCHIVE_WARN); 197 } 198 return (ARCHIVE_OK); 199 } 200 201 /* 202 * Set read options for the format and the filter. 203 */ 204 int 205 archive_read_set_options(struct archive *_a, const char *s) 206 { 207 int r; 208 209 r = archive_read_set_format_options(_a, s); 210 if (r != ARCHIVE_OK) 211 return (r); 212 r = archive_read_set_filter_options(_a, s); 213 if (r != ARCHIVE_OK) 214 return (r); 215 return (ARCHIVE_OK); 216 } 217 218 /* 219 * Open the archive 220 */ 221 int 222 archive_read_open(struct archive *a, void *client_data, 223 archive_open_callback *client_opener, archive_read_callback *client_reader, 224 archive_close_callback *client_closer) 225 { 226 /* Old archive_read_open() is just a thin shell around 227 * archive_read_open2. */ 228 return archive_read_open2(a, client_data, client_opener, 229 client_reader, NULL, client_closer); 230 } 231 232 static ssize_t 233 client_read_proxy(struct archive_read_filter *self, const void **buff) 234 { 235 ssize_t r; 236 r = (self->archive->client.reader)(&self->archive->archive, 237 self->data, buff); 238 self->archive->archive.raw_position += r; 239 return (r); 240 } 241 242 static int64_t 243 client_skip_proxy(struct archive_read_filter *self, int64_t request) 244 { 245 int64_t ask, get, total; 246 /* Limit our maximum seek request to 1GB on platforms 247 * with 32-bit off_t (such as Windows). */ 248 int64_t skip_limit = ((int64_t)1) << (sizeof(off_t) * 8 - 2); 249 250 if (self->archive->client.skipper == NULL) 251 return (0); 252 total = 0; 253 for (;;) { 254 ask = request; 255 if (ask > skip_limit) 256 ask = skip_limit; 257 get = (self->archive->client.skipper)(&self->archive->archive, 258 self->data, ask); 259 if (get == 0) 260 return (total); 261 request -= get; 262 self->archive->archive.raw_position += get; 263 total += get; 264 } 265 } 266 267 static int 268 client_close_proxy(struct archive_read_filter *self) 269 { 270 int r = ARCHIVE_OK; 271 272 if (self->archive->client.closer != NULL) 273 r = (self->archive->client.closer)((struct archive *)self->archive, 274 self->data); 275 self->data = NULL; 276 return (r); 277 } 278 279 280 int 281 archive_read_open2(struct archive *_a, void *client_data, 282 archive_open_callback *client_opener, 283 archive_read_callback *client_reader, 284 archive_skip_callback *client_skipper, 285 archive_close_callback *client_closer) 286 { 287 struct archive_read *a = (struct archive_read *)_a; 288 struct archive_read_filter *filter; 289 int e; 290 291 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 292 "archive_read_open"); 293 294 if (client_reader == NULL) 295 __archive_errx(1, 296 "No reader function provided to archive_read_open"); 297 298 /* Open data source. */ 299 if (client_opener != NULL) { 300 e =(client_opener)(&a->archive, client_data); 301 if (e != 0) { 302 /* If the open failed, call the closer to clean up. */ 303 if (client_closer) 304 (client_closer)(&a->archive, client_data); 305 return (e); 306 } 307 } 308 309 /* Save the client functions and mock up the initial source. */ 310 a->client.reader = client_reader; 311 a->client.skipper = client_skipper; 312 a->client.closer = client_closer; 313 314 filter = calloc(1, sizeof(*filter)); 315 if (filter == NULL) 316 return (ARCHIVE_FATAL); 317 filter->bidder = NULL; 318 filter->upstream = NULL; 319 filter->archive = a; 320 filter->data = client_data; 321 filter->read = client_read_proxy; 322 filter->skip = client_skip_proxy; 323 filter->close = client_close_proxy; 324 filter->name = "none"; 325 filter->code = ARCHIVE_COMPRESSION_NONE; 326 a->filter = filter; 327 328 /* Build out the input pipeline. */ 329 e = build_stream(a); 330 if (e == ARCHIVE_OK) 331 a->archive.state = ARCHIVE_STATE_HEADER; 332 333 return (e); 334 } 335 336 /* 337 * Allow each registered stream transform to bid on whether 338 * it wants to handle this stream. Repeat until we've finished 339 * building the pipeline. 340 */ 341 static int 342 build_stream(struct archive_read *a) 343 { 344 int number_bidders, i, bid, best_bid; 345 struct archive_read_filter_bidder *bidder, *best_bidder; 346 struct archive_read_filter *filter; 347 int r; 348 349 for (;;) { 350 number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]); 351 352 best_bid = 0; 353 best_bidder = NULL; 354 355 bidder = a->bidders; 356 for (i = 0; i < number_bidders; i++, bidder++) { 357 if (bidder->bid != NULL) { 358 bid = (bidder->bid)(bidder, a->filter); 359 if (bid > best_bid) { 360 best_bid = bid; 361 best_bidder = bidder; 362 } 363 } 364 } 365 366 /* If no bidder, we're done. */ 367 if (best_bidder == NULL) { 368 a->archive.compression_name = a->filter->name; 369 a->archive.compression_code = a->filter->code; 370 return (ARCHIVE_OK); 371 } 372 373 filter 374 = (struct archive_read_filter *)calloc(1, sizeof(*filter)); 375 if (filter == NULL) 376 return (ARCHIVE_FATAL); 377 filter->bidder = best_bidder; 378 filter->archive = a; 379 filter->upstream = a->filter; 380 r = (best_bidder->init)(filter); 381 if (r != ARCHIVE_OK) { 382 free(filter); 383 return (r); 384 } 385 a->filter = filter; 386 } 387 } 388 389 /* 390 * Read header of next entry. 391 */ 392 int 393 archive_read_next_header2(struct archive *_a, struct archive_entry *entry) 394 { 395 struct archive_read *a = (struct archive_read *)_a; 396 int slot, ret; 397 398 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, 399 ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, 400 "archive_read_next_header"); 401 402 archive_entry_clear(entry); 403 archive_clear_error(&a->archive); 404 405 /* 406 * If no format has yet been chosen, choose one. 407 */ 408 if (a->format == NULL) { 409 slot = choose_format(a); 410 if (slot < 0) { 411 a->archive.state = ARCHIVE_STATE_FATAL; 412 return (ARCHIVE_FATAL); 413 } 414 a->format = &(a->formats[slot]); 415 } 416 417 /* 418 * If client didn't consume entire data, skip any remainder 419 * (This is especially important for GNU incremental directories.) 420 */ 421 if (a->archive.state == ARCHIVE_STATE_DATA) { 422 ret = archive_read_data_skip(&a->archive); 423 if (ret == ARCHIVE_EOF) { 424 archive_set_error(&a->archive, EIO, "Premature end-of-file."); 425 a->archive.state = ARCHIVE_STATE_FATAL; 426 return (ARCHIVE_FATAL); 427 } 428 if (ret != ARCHIVE_OK) 429 return (ret); 430 } 431 432 /* Record start-of-header. */ 433 a->header_position = a->archive.file_position; 434 435 ret = (a->format->read_header)(a, entry); 436 437 /* 438 * EOF and FATAL are persistent at this layer. By 439 * modifying the state, we guarantee that future calls to 440 * read a header or read data will fail. 441 */ 442 switch (ret) { 443 case ARCHIVE_EOF: 444 a->archive.state = ARCHIVE_STATE_EOF; 445 break; 446 case ARCHIVE_OK: 447 a->archive.state = ARCHIVE_STATE_DATA; 448 break; 449 case ARCHIVE_WARN: 450 a->archive.state = ARCHIVE_STATE_DATA; 451 break; 452 case ARCHIVE_RETRY: 453 break; 454 case ARCHIVE_FATAL: 455 a->archive.state = ARCHIVE_STATE_FATAL; 456 break; 457 } 458 459 a->read_data_output_offset = 0; 460 a->read_data_remaining = 0; 461 return (ret); 462 } 463 464 int 465 archive_read_next_header(struct archive *_a, struct archive_entry **entryp) 466 { 467 int ret; 468 struct archive_read *a = (struct archive_read *)_a; 469 *entryp = NULL; 470 ret = archive_read_next_header2(_a, a->entry); 471 *entryp = a->entry; 472 return ret; 473 } 474 475 /* 476 * Allow each registered format to bid on whether it wants to handle 477 * the next entry. Return index of winning bidder. 478 */ 479 static int 480 choose_format(struct archive_read *a) 481 { 482 int slots; 483 int i; 484 int bid, best_bid; 485 int best_bid_slot; 486 487 slots = sizeof(a->formats) / sizeof(a->formats[0]); 488 best_bid = -1; 489 best_bid_slot = -1; 490 491 /* Set up a->format and a->pformat_data for convenience of bidders. */ 492 a->format = &(a->formats[0]); 493 for (i = 0; i < slots; i++, a->format++) { 494 if (a->format->bid) { 495 bid = (a->format->bid)(a); 496 if (bid == ARCHIVE_FATAL) 497 return (ARCHIVE_FATAL); 498 if ((bid > best_bid) || (best_bid_slot < 0)) { 499 best_bid = bid; 500 best_bid_slot = i; 501 } 502 } 503 } 504 505 /* 506 * There were no bidders; this is a serious programmer error 507 * and demands a quick and definitive abort. 508 */ 509 if (best_bid_slot < 0) 510 __archive_errx(1, "No formats were registered; you must " 511 "invoke at least one archive_read_support_format_XXX " 512 "function in order to successfully read an archive."); 513 514 /* 515 * There were bidders, but no non-zero bids; this means we 516 * can't support this stream. 517 */ 518 if (best_bid < 1) { 519 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 520 "Unrecognized archive format"); 521 return (ARCHIVE_FATAL); 522 } 523 524 return (best_bid_slot); 525 } 526 527 /* 528 * Return the file offset (within the uncompressed data stream) where 529 * the last header started. 530 */ 531 int64_t 532 archive_read_header_position(struct archive *_a) 533 { 534 struct archive_read *a = (struct archive_read *)_a; 535 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, 536 ARCHIVE_STATE_ANY, "archive_read_header_position"); 537 return (a->header_position); 538 } 539 540 /* 541 * Read data from an archive entry, using a read(2)-style interface. 542 * This is a convenience routine that just calls 543 * archive_read_data_block and copies the results into the client 544 * buffer, filling any gaps with zero bytes. Clients using this 545 * API can be completely ignorant of sparse-file issues; sparse files 546 * will simply be padded with nulls. 547 * 548 * DO NOT intermingle calls to this function and archive_read_data_block 549 * to read a single entry body. 550 */ 551 ssize_t 552 archive_read_data(struct archive *_a, void *buff, size_t s) 553 { 554 struct archive_read *a = (struct archive_read *)_a; 555 char *dest; 556 const void *read_buf; 557 size_t bytes_read; 558 size_t len; 559 int r; 560 561 bytes_read = 0; 562 dest = (char *)buff; 563 564 while (s > 0) { 565 if (a->read_data_remaining == 0) { 566 read_buf = a->read_data_block; 567 r = archive_read_data_block(&a->archive, &read_buf, 568 &a->read_data_remaining, &a->read_data_offset); 569 a->read_data_block = read_buf; 570 if (r == ARCHIVE_EOF) 571 return (bytes_read); 572 /* 573 * Error codes are all negative, so the status 574 * return here cannot be confused with a valid 575 * byte count. (ARCHIVE_OK is zero.) 576 */ 577 if (r < ARCHIVE_OK) 578 return (r); 579 } 580 581 if (a->read_data_offset < a->read_data_output_offset) { 582 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 583 "Encountered out-of-order sparse blocks"); 584 return (ARCHIVE_RETRY); 585 } 586 587 /* Compute the amount of zero padding needed. */ 588 if (a->read_data_output_offset + (off_t)s < 589 a->read_data_offset) { 590 len = s; 591 } else if (a->read_data_output_offset < 592 a->read_data_offset) { 593 len = a->read_data_offset - 594 a->read_data_output_offset; 595 } else 596 len = 0; 597 598 /* Add zeroes. */ 599 memset(dest, 0, len); 600 s -= len; 601 a->read_data_output_offset += len; 602 dest += len; 603 bytes_read += len; 604 605 /* Copy data if there is any space left. */ 606 if (s > 0) { 607 len = a->read_data_remaining; 608 if (len > s) 609 len = s; 610 memcpy(dest, a->read_data_block, len); 611 s -= len; 612 a->read_data_block += len; 613 a->read_data_remaining -= len; 614 a->read_data_output_offset += len; 615 a->read_data_offset += len; 616 dest += len; 617 bytes_read += len; 618 } 619 } 620 return (bytes_read); 621 } 622 623 #if ARCHIVE_API_VERSION < 3 624 /* 625 * Obsolete function provided for compatibility only. Note that the API 626 * of this function doesn't allow the caller to detect if the remaining 627 * data from the archive entry is shorter than the buffer provided, or 628 * even if an error occurred while reading data. 629 */ 630 int 631 archive_read_data_into_buffer(struct archive *a, void *d, ssize_t len) 632 { 633 634 archive_read_data(a, d, len); 635 return (ARCHIVE_OK); 636 } 637 #endif 638 639 /* 640 * Skip over all remaining data in this entry. 641 */ 642 int 643 archive_read_data_skip(struct archive *_a) 644 { 645 struct archive_read *a = (struct archive_read *)_a; 646 int r; 647 const void *buff; 648 size_t size; 649 off_t offset; 650 651 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, 652 "archive_read_data_skip"); 653 654 if (a->format->read_data_skip != NULL) 655 r = (a->format->read_data_skip)(a); 656 else { 657 while ((r = archive_read_data_block(&a->archive, 658 &buff, &size, &offset)) 659 == ARCHIVE_OK) 660 ; 661 } 662 663 if (r == ARCHIVE_EOF) 664 r = ARCHIVE_OK; 665 666 a->archive.state = ARCHIVE_STATE_HEADER; 667 return (r); 668 } 669 670 /* 671 * Read the next block of entry data from the archive. 672 * This is a zero-copy interface; the client receives a pointer, 673 * size, and file offset of the next available block of data. 674 * 675 * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if 676 * the end of entry is encountered. 677 */ 678 int 679 archive_read_data_block(struct archive *_a, 680 const void **buff, size_t *size, off_t *offset) 681 { 682 struct archive_read *a = (struct archive_read *)_a; 683 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, 684 "archive_read_data_block"); 685 686 if (a->format->read_data == NULL) { 687 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 688 "Internal error: " 689 "No format_read_data_block function registered"); 690 return (ARCHIVE_FATAL); 691 } 692 693 return (a->format->read_data)(a, buff, size, offset); 694 } 695 696 /* 697 * Close the file and release most resources. 698 * 699 * Be careful: client might just call read_new and then read_finish. 700 * Don't assume we actually read anything or performed any non-trivial 701 * initialization. 702 */ 703 static int 704 _archive_read_close(struct archive *_a) 705 { 706 struct archive_read *a = (struct archive_read *)_a; 707 int r = ARCHIVE_OK, r1 = ARCHIVE_OK; 708 size_t i, n; 709 710 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 711 ARCHIVE_STATE_ANY, "archive_read_close"); 712 archive_clear_error(&a->archive); 713 a->archive.state = ARCHIVE_STATE_CLOSED; 714 715 716 /* Call cleanup functions registered by optional components. */ 717 if (a->cleanup_archive_extract != NULL) 718 r = (a->cleanup_archive_extract)(a); 719 720 /* TODO: Clean up the formatters. */ 721 722 /* Clean up the filter pipeline. */ 723 while (a->filter != NULL) { 724 struct archive_read_filter *t = a->filter->upstream; 725 if (a->filter->close != NULL) { 726 r1 = (a->filter->close)(a->filter); 727 if (r1 < r) 728 r = r1; 729 } 730 free(a->filter->buffer); 731 free(a->filter); 732 a->filter = t; 733 } 734 735 /* Release the bidder objects. */ 736 n = sizeof(a->bidders)/sizeof(a->bidders[0]); 737 for (i = 0; i < n; i++) { 738 if (a->bidders[i].free != NULL) { 739 r1 = (a->bidders[i].free)(&a->bidders[i]); 740 if (r1 < r) 741 r = r1; 742 } 743 } 744 745 return (r); 746 } 747 748 /* 749 * Release memory and other resources. 750 */ 751 int 752 _archive_read_finish(struct archive *_a) 753 { 754 struct archive_read *a = (struct archive_read *)_a; 755 int i; 756 int slots; 757 int r = ARCHIVE_OK; 758 759 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, 760 "archive_read_finish"); 761 if (a->archive.state != ARCHIVE_STATE_CLOSED) 762 r = archive_read_close(&a->archive); 763 764 /* Cleanup format-specific data. */ 765 slots = sizeof(a->formats) / sizeof(a->formats[0]); 766 for (i = 0; i < slots; i++) { 767 a->format = &(a->formats[i]); 768 if (a->formats[i].cleanup) 769 (a->formats[i].cleanup)(a); 770 } 771 772 archive_string_free(&a->archive.error_string); 773 if (a->entry) 774 archive_entry_free(a->entry); 775 a->archive.magic = 0; 776 free(a); 777 #if ARCHIVE_API_VERSION > 1 778 return (r); 779 #endif 780 } 781 782 /* 783 * Used internally by read format handlers to register their bid and 784 * initialization functions. 785 */ 786 int 787 __archive_read_register_format(struct archive_read *a, 788 void *format_data, 789 const char *name, 790 int (*bid)(struct archive_read *), 791 int (*options)(struct archive_read *, const char *, const char *), 792 int (*read_header)(struct archive_read *, struct archive_entry *), 793 int (*read_data)(struct archive_read *, const void **, size_t *, off_t *), 794 int (*read_data_skip)(struct archive_read *), 795 int (*cleanup)(struct archive_read *)) 796 { 797 int i, number_slots; 798 799 __archive_check_magic(&a->archive, 800 ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 801 "__archive_read_register_format"); 802 803 number_slots = sizeof(a->formats) / sizeof(a->formats[0]); 804 805 for (i = 0; i < number_slots; i++) { 806 if (a->formats[i].bid == bid) 807 return (ARCHIVE_WARN); /* We've already installed */ 808 if (a->formats[i].bid == NULL) { 809 a->formats[i].bid = bid; 810 a->formats[i].options = options; 811 a->formats[i].read_header = read_header; 812 a->formats[i].read_data = read_data; 813 a->formats[i].read_data_skip = read_data_skip; 814 a->formats[i].cleanup = cleanup; 815 a->formats[i].data = format_data; 816 a->formats[i].name = name; 817 return (ARCHIVE_OK); 818 } 819 } 820 821 __archive_errx(1, "Not enough slots for format registration"); 822 return (ARCHIVE_FATAL); /* Never actually called. */ 823 } 824 825 /* 826 * Used internally by decompression routines to register their bid and 827 * initialization functions. 828 */ 829 struct archive_read_filter_bidder * 830 __archive_read_get_bidder(struct archive_read *a) 831 { 832 int i, number_slots; 833 834 __archive_check_magic(&a->archive, 835 ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 836 "__archive_read_get_bidder"); 837 838 number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]); 839 840 for (i = 0; i < number_slots; i++) { 841 if (a->bidders[i].bid == NULL) { 842 memset(a->bidders + i, 0, sizeof(a->bidders[0])); 843 return (a->bidders + i); 844 } 845 } 846 847 __archive_errx(1, "Not enough slots for compression registration"); 848 return (NULL); /* Never actually executed. */ 849 } 850 851 /* 852 * The next three functions comprise the peek/consume internal I/O 853 * system used by archive format readers. This system allows fairly 854 * flexible read-ahead and allows the I/O code to operate in a 855 * zero-copy manner most of the time. 856 * 857 * In the ideal case, filters generate blocks of data 858 * and __archive_read_ahead() just returns pointers directly into 859 * those blocks. Then __archive_read_consume() just bumps those 860 * pointers. Only if your request would span blocks does the I/O 861 * layer use a copy buffer to provide you with a contiguous block of 862 * data. The __archive_read_skip() is an optimization; it scans ahead 863 * very quickly (it usually translates into a seek() operation if 864 * you're reading uncompressed disk files). 865 * 866 * A couple of useful idioms: 867 * * "I just want some data." Ask for 1 byte and pay attention to 868 * the "number of bytes available" from __archive_read_ahead(). 869 * You can consume more than you asked for; you just can't consume 870 * more than is available. If you consume everything that's 871 * immediately available, the next read_ahead() call will pull 872 * the next block. 873 * * "I want to output a large block of data." As above, ask for 1 byte, 874 * emit all that's available (up to whatever limit you have), then 875 * repeat until you're done. 876 * * "I want to peek ahead by a large amount." Ask for 4k or so, then 877 * double and repeat until you get an error or have enough. Note 878 * that the I/O layer will likely end up expanding its copy buffer 879 * to fit your request, so use this technique cautiously. This 880 * technique is used, for example, by some of the format tasting 881 * code that has uncertain look-ahead needs. 882 * 883 * TODO: Someday, provide a more generic __archive_read_seek() for 884 * those cases where it's useful. This is tricky because there are lots 885 * of cases where seek() is not available (reading gzip data from a 886 * network socket, for instance), so there needs to be a good way to 887 * communicate whether seek() is available and users of that interface 888 * need to use non-seeking strategies whenever seek() is not available. 889 */ 890 891 /* 892 * Looks ahead in the input stream: 893 * * If 'avail' pointer is provided, that returns number of bytes available 894 * in the current buffer, which may be much larger than requested. 895 * * If end-of-file, *avail gets set to zero. 896 * * If error, *avail gets error code. 897 * * If request can be met, returns pointer to data, returns NULL 898 * if request is not met. 899 * 900 * Note: If you just want "some data", ask for 1 byte and pay attention 901 * to *avail, which will have the actual amount available. If you 902 * know exactly how many bytes you need, just ask for that and treat 903 * a NULL return as an error. 904 * 905 * Important: This does NOT move the file pointer. See 906 * __archive_read_consume() below. 907 */ 908 909 /* 910 * This is tricky. We need to provide our clients with pointers to 911 * contiguous blocks of memory but we want to avoid copying whenever 912 * possible. 913 * 914 * Mostly, this code returns pointers directly into the block of data 915 * provided by the client_read routine. It can do this unless the 916 * request would split across blocks. In that case, we have to copy 917 * into an internal buffer to combine reads. 918 */ 919 const void * 920 __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail) 921 { 922 return (__archive_read_filter_ahead(a->filter, min, avail)); 923 } 924 925 const void * 926 __archive_read_filter_ahead(struct archive_read_filter *filter, 927 size_t min, ssize_t *avail) 928 { 929 ssize_t bytes_read; 930 size_t tocopy; 931 932 if (filter->fatal) { 933 if (avail) 934 *avail = ARCHIVE_FATAL; 935 return (NULL); 936 } 937 938 /* 939 * Keep pulling more data until we can satisfy the request. 940 */ 941 for (;;) { 942 943 /* 944 * If we can satisfy from the copy buffer, we're done. 945 */ 946 if (filter->avail >= min) { 947 if (avail != NULL) 948 *avail = filter->avail; 949 return (filter->next); 950 } 951 952 /* 953 * We can satisfy directly from client buffer if everything 954 * currently in the copy buffer is still in the client buffer. 955 */ 956 if (filter->client_total >= filter->client_avail + filter->avail 957 && filter->client_avail + filter->avail >= min) { 958 /* "Roll back" to client buffer. */ 959 filter->client_avail += filter->avail; 960 filter->client_next -= filter->avail; 961 /* Copy buffer is now empty. */ 962 filter->avail = 0; 963 filter->next = filter->buffer; 964 /* Return data from client buffer. */ 965 if (avail != NULL) 966 *avail = filter->client_avail; 967 return (filter->client_next); 968 } 969 970 /* Move data forward in copy buffer if necessary. */ 971 if (filter->next > filter->buffer && 972 filter->next + min > filter->buffer + filter->buffer_size) { 973 if (filter->avail > 0) 974 memmove(filter->buffer, filter->next, filter->avail); 975 filter->next = filter->buffer; 976 } 977 978 /* If we've used up the client data, get more. */ 979 if (filter->client_avail <= 0) { 980 if (filter->end_of_file) { 981 if (avail != NULL) 982 *avail = 0; 983 return (NULL); 984 } 985 bytes_read = (filter->read)(filter, 986 &filter->client_buff); 987 if (bytes_read < 0) { /* Read error. */ 988 filter->client_total = filter->client_avail = 0; 989 filter->client_next = filter->client_buff = NULL; 990 filter->fatal = 1; 991 if (avail != NULL) 992 *avail = ARCHIVE_FATAL; 993 return (NULL); 994 } 995 if (bytes_read == 0) { /* Premature end-of-file. */ 996 filter->client_total = filter->client_avail = 0; 997 filter->client_next = filter->client_buff = NULL; 998 filter->end_of_file = 1; 999 /* Return whatever we do have. */ 1000 if (avail != NULL) 1001 *avail = filter->avail; 1002 return (NULL); 1003 } 1004 filter->position += bytes_read; 1005 filter->client_total = bytes_read; 1006 filter->client_avail = filter->client_total; 1007 filter->client_next = filter->client_buff; 1008 } 1009 else 1010 { 1011 /* 1012 * We can't satisfy the request from the copy 1013 * buffer or the existing client data, so we 1014 * need to copy more client data over to the 1015 * copy buffer. 1016 */ 1017 1018 /* Ensure the buffer is big enough. */ 1019 if (min > filter->buffer_size) { 1020 size_t s, t; 1021 char *p; 1022 1023 /* Double the buffer; watch for overflow. */ 1024 s = t = filter->buffer_size; 1025 if (s == 0) 1026 s = min; 1027 while (s < min) { 1028 t *= 2; 1029 if (t <= s) { /* Integer overflow! */ 1030 archive_set_error( 1031 &filter->archive->archive, 1032 ENOMEM, 1033 "Unable to allocate copy buffer"); 1034 filter->fatal = 1; 1035 if (avail != NULL) 1036 *avail = ARCHIVE_FATAL; 1037 return (NULL); 1038 } 1039 s = t; 1040 } 1041 /* Now s >= min, so allocate a new buffer. */ 1042 p = (char *)malloc(s); 1043 if (p == NULL) { 1044 archive_set_error( 1045 &filter->archive->archive, 1046 ENOMEM, 1047 "Unable to allocate copy buffer"); 1048 filter->fatal = 1; 1049 if (avail != NULL) 1050 *avail = ARCHIVE_FATAL; 1051 return (NULL); 1052 } 1053 /* Move data into newly-enlarged buffer. */ 1054 if (filter->avail > 0) 1055 memmove(p, filter->next, filter->avail); 1056 free(filter->buffer); 1057 filter->next = filter->buffer = p; 1058 filter->buffer_size = s; 1059 } 1060 1061 /* We can add client data to copy buffer. */ 1062 /* First estimate: copy to fill rest of buffer. */ 1063 tocopy = (filter->buffer + filter->buffer_size) 1064 - (filter->next + filter->avail); 1065 /* Don't waste time buffering more than we need to. */ 1066 if (tocopy + filter->avail > min) 1067 tocopy = min - filter->avail; 1068 /* Don't copy more than is available. */ 1069 if (tocopy > filter->client_avail) 1070 tocopy = filter->client_avail; 1071 1072 memcpy(filter->next + filter->avail, filter->client_next, 1073 tocopy); 1074 /* Remove this data from client buffer. */ 1075 filter->client_next += tocopy; 1076 filter->client_avail -= tocopy; 1077 /* add it to copy buffer. */ 1078 filter->avail += tocopy; 1079 } 1080 } 1081 } 1082 1083 /* 1084 * Move the file pointer forward. This should be called after 1085 * __archive_read_ahead() returns data to you. Don't try to move 1086 * ahead by more than the amount of data available according to 1087 * __archive_read_ahead(). 1088 */ 1089 /* 1090 * Mark the appropriate data as used. Note that the request here will 1091 * often be much smaller than the size of the previous read_ahead 1092 * request. 1093 */ 1094 ssize_t 1095 __archive_read_consume(struct archive_read *a, size_t request) 1096 { 1097 ssize_t r; 1098 r = __archive_read_filter_consume(a->filter, request); 1099 a->archive.file_position += r; 1100 return (r); 1101 } 1102 1103 ssize_t 1104 __archive_read_filter_consume(struct archive_read_filter * filter, 1105 size_t request) 1106 { 1107 if (filter->avail > 0) { 1108 /* Read came from copy buffer. */ 1109 filter->next += request; 1110 filter->avail -= request; 1111 } else { 1112 /* Read came from client buffer. */ 1113 filter->client_next += request; 1114 filter->client_avail -= request; 1115 } 1116 return (request); 1117 } 1118 1119 /* 1120 * Move the file pointer ahead by an arbitrary amount. If you're 1121 * reading uncompressed data from a disk file, this will actually 1122 * translate into a seek() operation. Even in cases where seek() 1123 * isn't feasible, this at least pushes the read-and-discard loop 1124 * down closer to the data source. 1125 */ 1126 int64_t 1127 __archive_read_skip(struct archive_read *a, int64_t request) 1128 { 1129 return (__archive_read_filter_skip(a->filter, request)); 1130 } 1131 1132 int64_t 1133 __archive_read_filter_skip(struct archive_read_filter *filter, int64_t request) 1134 { 1135 int64_t bytes_skipped, total_bytes_skipped = 0; 1136 size_t min; 1137 1138 if (filter->fatal) 1139 return (-1); 1140 /* 1141 * If there is data in the buffers already, use that first. 1142 */ 1143 if (filter->avail > 0) { 1144 min = minimum(request, (off_t)filter->avail); 1145 bytes_skipped = __archive_read_consume(filter->archive, min); 1146 request -= bytes_skipped; 1147 total_bytes_skipped += bytes_skipped; 1148 } 1149 if (filter->client_avail > 0) { 1150 min = minimum(request, (int64_t)filter->client_avail); 1151 bytes_skipped = __archive_read_consume(filter->archive, min); 1152 request -= bytes_skipped; 1153 total_bytes_skipped += bytes_skipped; 1154 } 1155 if (request == 0) 1156 return (total_bytes_skipped); 1157 /* 1158 * If a client_skipper was provided, try that first. 1159 */ 1160 #if ARCHIVE_API_VERSION < 2 1161 if ((filter->skip != NULL) && (request < SSIZE_MAX)) { 1162 #else 1163 if (filter->skip != NULL) { 1164 #endif 1165 bytes_skipped = (filter->skip)(filter, request); 1166 if (bytes_skipped < 0) { /* error */ 1167 filter->client_total = filter->client_avail = 0; 1168 filter->client_next = filter->client_buff = NULL; 1169 filter->fatal = 1; 1170 return (bytes_skipped); 1171 } 1172 filter->archive->archive.file_position += bytes_skipped; 1173 total_bytes_skipped += bytes_skipped; 1174 request -= bytes_skipped; 1175 filter->client_next = filter->client_buff; 1176 filter->client_avail = filter->client_total = 0; 1177 } 1178 /* 1179 * Note that client_skipper will usually not satisfy the 1180 * full request (due to low-level blocking concerns), 1181 * so even if client_skipper is provided, we may still 1182 * have to use ordinary reads to finish out the request. 1183 */ 1184 while (request > 0) { 1185 const void* dummy_buffer; 1186 ssize_t bytes_read; 1187 dummy_buffer = __archive_read_ahead(filter->archive, 1188 1, &bytes_read); 1189 if (bytes_read < 0) 1190 return (bytes_read); 1191 if (bytes_read == 0) { 1192 /* We hit EOF before we satisfied the skip request. */ 1193 archive_set_error(&filter->archive->archive, 1194 ARCHIVE_ERRNO_MISC, 1195 "Truncated input file (need to skip %jd bytes)", 1196 (intmax_t)request); 1197 return (ARCHIVE_FATAL); 1198 } 1199 min = (size_t)(minimum(bytes_read, request)); 1200 bytes_read = __archive_read_consume(filter->archive, min); 1201 total_bytes_skipped += bytes_read; 1202 request -= bytes_read; 1203 } 1204 return (total_bytes_skipped); 1205 } 1206