1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 /* 27 * This file contains the "essential" portions of the read API, that 28 * is, stuff that will probably always be used by any client that 29 * actually needs to read an archive. Optional pieces have been, as 30 * far as possible, separated out into separate files to avoid 31 * needlessly bloating statically-linked clients. 32 */ 33 34 #include "archive_platform.h" 35 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read.c,v 1.39 2008/12/06 06:45:15 kientzle Exp $"); 36 37 #ifdef HAVE_ERRNO_H 38 #include <errno.h> 39 #endif 40 #include <stdio.h> 41 #ifdef HAVE_STDLIB_H 42 #include <stdlib.h> 43 #endif 44 #ifdef HAVE_STRING_H 45 #include <string.h> 46 #endif 47 #ifdef HAVE_UNISTD_H 48 #include <unistd.h> 49 #endif 50 51 #include "archive.h" 52 #include "archive_entry.h" 53 #include "archive_private.h" 54 #include "archive_read_private.h" 55 56 #define minimum(a, b) (a < b ? a : b) 57 58 static int build_stream(struct archive_read *); 59 static int choose_format(struct archive_read *); 60 static struct archive_vtable *archive_read_vtable(void); 61 static int _archive_read_close(struct archive *); 62 static int _archive_read_finish(struct archive *); 63 64 static struct archive_vtable * 65 archive_read_vtable(void) 66 { 67 static struct archive_vtable av; 68 static int inited = 0; 69 70 if (!inited) { 71 av.archive_finish = _archive_read_finish; 72 av.archive_close = _archive_read_close; 73 } 74 return (&av); 75 } 76 77 /* 78 * Allocate, initialize and return a struct archive object. 79 */ 80 struct archive * 81 archive_read_new(void) 82 { 83 struct archive_read *a; 84 85 a = (struct archive_read *)malloc(sizeof(*a)); 86 if (a == NULL) 87 return (NULL); 88 memset(a, 0, sizeof(*a)); 89 a->archive.magic = ARCHIVE_READ_MAGIC; 90 91 a->archive.state = ARCHIVE_STATE_NEW; 92 a->entry = archive_entry_new(); 93 a->archive.vtable = archive_read_vtable(); 94 95 return (&a->archive); 96 } 97 98 /* 99 * Record the do-not-extract-to file. This belongs in archive_read_extract.c. 100 */ 101 void 102 archive_read_extract_set_skip_file(struct archive *_a, dev_t d, ino_t i) 103 { 104 struct archive_read *a = (struct archive_read *)_a; 105 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, 106 "archive_read_extract_set_skip_file"); 107 a->skip_file_dev = d; 108 a->skip_file_ino = i; 109 } 110 111 /* 112 * Set read options for the format. 113 */ 114 int 115 archive_read_set_format_options(struct archive *_a, const char *s) 116 { 117 struct archive_read *a; 118 struct archive_format_descriptor *format; 119 char key[64], val[64]; 120 size_t i; 121 int len, r; 122 123 if (s == NULL || *s == '\0') 124 return (ARCHIVE_OK); 125 a = (struct archive_read *)_a; 126 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 127 ARCHIVE_STATE_NEW, "archive_read_set_format_options"); 128 len = 0; 129 for (i = 0; i < sizeof(a->formats)/sizeof(a->formats[0]); i++) { 130 format = &a->formats[i]; 131 if (format == NULL || format->options == NULL || 132 format->name == NULL) 133 /* This format does not support option. */ 134 continue; 135 136 while ((len = __archive_parse_options(s, format->name, 137 sizeof(key), key, sizeof(val), val)) > 0) { 138 if (val[0] == '\0') 139 r = format->options(a, key, NULL); 140 else 141 r = format->options(a, key, val); 142 if (r == ARCHIVE_FATAL) 143 return (r); 144 s += len; 145 } 146 } 147 if (len < 0) { 148 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 149 "Illegal format options."); 150 return (ARCHIVE_WARN); 151 } 152 return (ARCHIVE_OK); 153 } 154 155 /* 156 * Set read options for the filter. 157 */ 158 int 159 archive_read_set_filter_options(struct archive *_a, const char *s) 160 { 161 struct archive_read *a; 162 struct archive_read_filter *filter; 163 struct archive_read_filter_bidder *bidder; 164 char key[64], val[64]; 165 int len, r; 166 167 if (s == NULL || *s == '\0') 168 return (ARCHIVE_OK); 169 a = (struct archive_read *)_a; 170 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 171 ARCHIVE_STATE_NEW, "archive_read_set_filter_options"); 172 filter = a->filter; 173 len = 0; 174 for (filter = a->filter; filter != NULL; filter = filter->upstream) { 175 bidder = filter->bidder; 176 if (bidder == NULL) 177 continue; 178 if (bidder->options == NULL) 179 /* This bidder does not support option */ 180 continue; 181 while ((len = __archive_parse_options(s, filter->name, 182 sizeof(key), key, sizeof(val), val)) > 0) { 183 if (val[0] == '\0') 184 r = bidder->options(bidder, key, NULL); 185 else 186 r = bidder->options(bidder, key, val); 187 if (r == ARCHIVE_FATAL) 188 return (r); 189 s += len; 190 } 191 } 192 if (len < 0) { 193 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 194 "Illegal format options."); 195 return (ARCHIVE_WARN); 196 } 197 return (ARCHIVE_OK); 198 } 199 200 /* 201 * Set read options for the format and the filter. 202 */ 203 int 204 archive_read_set_options(struct archive *_a, const char *s) 205 { 206 int r; 207 208 r = archive_read_set_format_options(_a, s); 209 if (r != ARCHIVE_OK) 210 return (r); 211 r = archive_read_set_filter_options(_a, s); 212 if (r != ARCHIVE_OK) 213 return (r); 214 return (ARCHIVE_OK); 215 } 216 217 /* 218 * Open the archive 219 */ 220 int 221 archive_read_open(struct archive *a, void *client_data, 222 archive_open_callback *client_opener, archive_read_callback *client_reader, 223 archive_close_callback *client_closer) 224 { 225 /* Old archive_read_open() is just a thin shell around 226 * archive_read_open2. */ 227 return archive_read_open2(a, client_data, client_opener, 228 client_reader, NULL, client_closer); 229 } 230 231 static ssize_t 232 client_read_proxy(struct archive_read_filter *self, const void **buff) 233 { 234 ssize_t r; 235 r = (self->archive->client.reader)(&self->archive->archive, 236 self->data, buff); 237 self->archive->archive.raw_position += r; 238 return (r); 239 } 240 241 static int64_t 242 client_skip_proxy(struct archive_read_filter *self, int64_t request) 243 { 244 int64_t r; 245 if (self->archive->client.skipper == NULL) 246 return (0); 247 r = (self->archive->client.skipper)(&self->archive->archive, 248 self->data, request); 249 self->archive->archive.raw_position += r; 250 return (r); 251 } 252 253 static int 254 client_close_proxy(struct archive_read_filter *self) 255 { 256 int r = ARCHIVE_OK; 257 258 if (self->archive->client.closer != NULL) 259 r = (self->archive->client.closer)((struct archive *)self->archive, 260 self->data); 261 self->data = NULL; 262 return (r); 263 } 264 265 266 int 267 archive_read_open2(struct archive *_a, void *client_data, 268 archive_open_callback *client_opener, 269 archive_read_callback *client_reader, 270 archive_skip_callback *client_skipper, 271 archive_close_callback *client_closer) 272 { 273 struct archive_read *a = (struct archive_read *)_a; 274 struct archive_read_filter *filter; 275 int e; 276 277 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 278 "archive_read_open"); 279 280 if (client_reader == NULL) 281 __archive_errx(1, 282 "No reader function provided to archive_read_open"); 283 284 /* Open data source. */ 285 if (client_opener != NULL) { 286 e =(client_opener)(&a->archive, client_data); 287 if (e != 0) { 288 /* If the open failed, call the closer to clean up. */ 289 if (client_closer) 290 (client_closer)(&a->archive, client_data); 291 return (e); 292 } 293 } 294 295 /* Save the client functions and mock up the initial source. */ 296 a->client.reader = client_reader; 297 a->client.skipper = client_skipper; 298 a->client.closer = client_closer; 299 300 filter = calloc(1, sizeof(*filter)); 301 if (filter == NULL) 302 return (ARCHIVE_FATAL); 303 filter->bidder = NULL; 304 filter->upstream = NULL; 305 filter->archive = a; 306 filter->data = client_data; 307 filter->read = client_read_proxy; 308 filter->skip = client_skip_proxy; 309 filter->close = client_close_proxy; 310 filter->name = "none"; 311 filter->code = ARCHIVE_COMPRESSION_NONE; 312 a->filter = filter; 313 314 /* Build out the input pipeline. */ 315 e = build_stream(a); 316 if (e == ARCHIVE_OK) 317 a->archive.state = ARCHIVE_STATE_HEADER; 318 319 return (e); 320 } 321 322 /* 323 * Allow each registered stream transform to bid on whether 324 * it wants to handle this stream. Repeat until we've finished 325 * building the pipeline. 326 */ 327 static int 328 build_stream(struct archive_read *a) 329 { 330 int number_bidders, i, bid, best_bid; 331 struct archive_read_filter_bidder *bidder, *best_bidder; 332 struct archive_read_filter *filter; 333 int r; 334 335 for (;;) { 336 number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]); 337 338 best_bid = 0; 339 best_bidder = NULL; 340 341 bidder = a->bidders; 342 for (i = 0; i < number_bidders; i++, bidder++) { 343 if (bidder->bid != NULL) { 344 bid = (bidder->bid)(bidder, a->filter); 345 if (bid > best_bid) { 346 best_bid = bid; 347 best_bidder = bidder; 348 } 349 } 350 } 351 352 /* If no bidder, we're done. */ 353 if (best_bidder == NULL) { 354 a->archive.compression_name = a->filter->name; 355 a->archive.compression_code = a->filter->code; 356 return (ARCHIVE_OK); 357 } 358 359 filter 360 = (struct archive_read_filter *)calloc(1, sizeof(*filter)); 361 if (filter == NULL) 362 return (ARCHIVE_FATAL); 363 filter->bidder = best_bidder; 364 filter->archive = a; 365 filter->upstream = a->filter; 366 r = (best_bidder->init)(filter); 367 if (r != ARCHIVE_OK) { 368 free(filter); 369 return (r); 370 } 371 a->filter = filter; 372 } 373 } 374 375 /* 376 * Read header of next entry. 377 */ 378 int 379 archive_read_next_header2(struct archive *_a, struct archive_entry *entry) 380 { 381 struct archive_read *a = (struct archive_read *)_a; 382 int slot, ret; 383 384 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, 385 ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, 386 "archive_read_next_header"); 387 388 archive_entry_clear(entry); 389 archive_clear_error(&a->archive); 390 391 /* 392 * If no format has yet been chosen, choose one. 393 */ 394 if (a->format == NULL) { 395 slot = choose_format(a); 396 if (slot < 0) { 397 a->archive.state = ARCHIVE_STATE_FATAL; 398 return (ARCHIVE_FATAL); 399 } 400 a->format = &(a->formats[slot]); 401 } 402 403 /* 404 * If client didn't consume entire data, skip any remainder 405 * (This is especially important for GNU incremental directories.) 406 */ 407 if (a->archive.state == ARCHIVE_STATE_DATA) { 408 ret = archive_read_data_skip(&a->archive); 409 if (ret == ARCHIVE_EOF) { 410 archive_set_error(&a->archive, EIO, "Premature end-of-file."); 411 a->archive.state = ARCHIVE_STATE_FATAL; 412 return (ARCHIVE_FATAL); 413 } 414 if (ret != ARCHIVE_OK) 415 return (ret); 416 } 417 418 /* Record start-of-header. */ 419 a->header_position = a->archive.file_position; 420 421 ret = (a->format->read_header)(a, entry); 422 423 /* 424 * EOF and FATAL are persistent at this layer. By 425 * modifying the state, we guarantee that future calls to 426 * read a header or read data will fail. 427 */ 428 switch (ret) { 429 case ARCHIVE_EOF: 430 a->archive.state = ARCHIVE_STATE_EOF; 431 break; 432 case ARCHIVE_OK: 433 a->archive.state = ARCHIVE_STATE_DATA; 434 break; 435 case ARCHIVE_WARN: 436 a->archive.state = ARCHIVE_STATE_DATA; 437 break; 438 case ARCHIVE_RETRY: 439 break; 440 case ARCHIVE_FATAL: 441 a->archive.state = ARCHIVE_STATE_FATAL; 442 break; 443 } 444 445 a->read_data_output_offset = 0; 446 a->read_data_remaining = 0; 447 return (ret); 448 } 449 450 int 451 archive_read_next_header(struct archive *_a, struct archive_entry **entryp) 452 { 453 int ret; 454 struct archive_read *a = (struct archive_read *)_a; 455 *entryp = NULL; 456 ret = archive_read_next_header2(_a, a->entry); 457 *entryp = a->entry; 458 return ret; 459 } 460 461 /* 462 * Allow each registered format to bid on whether it wants to handle 463 * the next entry. Return index of winning bidder. 464 */ 465 static int 466 choose_format(struct archive_read *a) 467 { 468 int slots; 469 int i; 470 int bid, best_bid; 471 int best_bid_slot; 472 473 slots = sizeof(a->formats) / sizeof(a->formats[0]); 474 best_bid = -1; 475 best_bid_slot = -1; 476 477 /* Set up a->format and a->pformat_data for convenience of bidders. */ 478 a->format = &(a->formats[0]); 479 for (i = 0; i < slots; i++, a->format++) { 480 if (a->format->bid) { 481 bid = (a->format->bid)(a); 482 if (bid == ARCHIVE_FATAL) 483 return (ARCHIVE_FATAL); 484 if ((bid > best_bid) || (best_bid_slot < 0)) { 485 best_bid = bid; 486 best_bid_slot = i; 487 } 488 } 489 } 490 491 /* 492 * There were no bidders; this is a serious programmer error 493 * and demands a quick and definitive abort. 494 */ 495 if (best_bid_slot < 0) 496 __archive_errx(1, "No formats were registered; you must " 497 "invoke at least one archive_read_support_format_XXX " 498 "function in order to successfully read an archive."); 499 500 /* 501 * There were bidders, but no non-zero bids; this means we 502 * can't support this stream. 503 */ 504 if (best_bid < 1) { 505 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 506 "Unrecognized archive format"); 507 return (ARCHIVE_FATAL); 508 } 509 510 return (best_bid_slot); 511 } 512 513 /* 514 * Return the file offset (within the uncompressed data stream) where 515 * the last header started. 516 */ 517 int64_t 518 archive_read_header_position(struct archive *_a) 519 { 520 struct archive_read *a = (struct archive_read *)_a; 521 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, 522 ARCHIVE_STATE_ANY, "archive_read_header_position"); 523 return (a->header_position); 524 } 525 526 /* 527 * Read data from an archive entry, using a read(2)-style interface. 528 * This is a convenience routine that just calls 529 * archive_read_data_block and copies the results into the client 530 * buffer, filling any gaps with zero bytes. Clients using this 531 * API can be completely ignorant of sparse-file issues; sparse files 532 * will simply be padded with nulls. 533 * 534 * DO NOT intermingle calls to this function and archive_read_data_block 535 * to read a single entry body. 536 */ 537 ssize_t 538 archive_read_data(struct archive *_a, void *buff, size_t s) 539 { 540 struct archive_read *a = (struct archive_read *)_a; 541 char *dest; 542 const void *read_buf; 543 size_t bytes_read; 544 size_t len; 545 int r; 546 547 bytes_read = 0; 548 dest = (char *)buff; 549 550 while (s > 0) { 551 if (a->read_data_remaining == 0) { 552 read_buf = a->read_data_block; 553 r = archive_read_data_block(&a->archive, &read_buf, 554 &a->read_data_remaining, &a->read_data_offset); 555 a->read_data_block = read_buf; 556 if (r == ARCHIVE_EOF) 557 return (bytes_read); 558 /* 559 * Error codes are all negative, so the status 560 * return here cannot be confused with a valid 561 * byte count. (ARCHIVE_OK is zero.) 562 */ 563 if (r < ARCHIVE_OK) 564 return (r); 565 } 566 567 if (a->read_data_offset < a->read_data_output_offset) { 568 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 569 "Encountered out-of-order sparse blocks"); 570 return (ARCHIVE_RETRY); 571 } 572 573 /* Compute the amount of zero padding needed. */ 574 if (a->read_data_output_offset + (off_t)s < 575 a->read_data_offset) { 576 len = s; 577 } else if (a->read_data_output_offset < 578 a->read_data_offset) { 579 len = a->read_data_offset - 580 a->read_data_output_offset; 581 } else 582 len = 0; 583 584 /* Add zeroes. */ 585 memset(dest, 0, len); 586 s -= len; 587 a->read_data_output_offset += len; 588 dest += len; 589 bytes_read += len; 590 591 /* Copy data if there is any space left. */ 592 if (s > 0) { 593 len = a->read_data_remaining; 594 if (len > s) 595 len = s; 596 memcpy(dest, a->read_data_block, len); 597 s -= len; 598 a->read_data_block += len; 599 a->read_data_remaining -= len; 600 a->read_data_output_offset += len; 601 a->read_data_offset += len; 602 dest += len; 603 bytes_read += len; 604 } 605 } 606 return (bytes_read); 607 } 608 609 #if ARCHIVE_API_VERSION < 3 610 /* 611 * Obsolete function provided for compatibility only. Note that the API 612 * of this function doesn't allow the caller to detect if the remaining 613 * data from the archive entry is shorter than the buffer provided, or 614 * even if an error occurred while reading data. 615 */ 616 int 617 archive_read_data_into_buffer(struct archive *a, void *d, ssize_t len) 618 { 619 620 archive_read_data(a, d, len); 621 return (ARCHIVE_OK); 622 } 623 #endif 624 625 /* 626 * Skip over all remaining data in this entry. 627 */ 628 int 629 archive_read_data_skip(struct archive *_a) 630 { 631 struct archive_read *a = (struct archive_read *)_a; 632 int r; 633 const void *buff; 634 size_t size; 635 off_t offset; 636 637 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, 638 "archive_read_data_skip"); 639 640 if (a->format->read_data_skip != NULL) 641 r = (a->format->read_data_skip)(a); 642 else { 643 while ((r = archive_read_data_block(&a->archive, 644 &buff, &size, &offset)) 645 == ARCHIVE_OK) 646 ; 647 } 648 649 if (r == ARCHIVE_EOF) 650 r = ARCHIVE_OK; 651 652 a->archive.state = ARCHIVE_STATE_HEADER; 653 return (r); 654 } 655 656 /* 657 * Read the next block of entry data from the archive. 658 * This is a zero-copy interface; the client receives a pointer, 659 * size, and file offset of the next available block of data. 660 * 661 * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if 662 * the end of entry is encountered. 663 */ 664 int 665 archive_read_data_block(struct archive *_a, 666 const void **buff, size_t *size, off_t *offset) 667 { 668 struct archive_read *a = (struct archive_read *)_a; 669 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, 670 "archive_read_data_block"); 671 672 if (a->format->read_data == NULL) { 673 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 674 "Internal error: " 675 "No format_read_data_block function registered"); 676 return (ARCHIVE_FATAL); 677 } 678 679 return (a->format->read_data)(a, buff, size, offset); 680 } 681 682 /* 683 * Close the file and release most resources. 684 * 685 * Be careful: client might just call read_new and then read_finish. 686 * Don't assume we actually read anything or performed any non-trivial 687 * initialization. 688 */ 689 static int 690 _archive_read_close(struct archive *_a) 691 { 692 struct archive_read *a = (struct archive_read *)_a; 693 int r = ARCHIVE_OK, r1 = ARCHIVE_OK; 694 size_t i, n; 695 696 __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 697 ARCHIVE_STATE_ANY, "archive_read_close"); 698 archive_clear_error(&a->archive); 699 a->archive.state = ARCHIVE_STATE_CLOSED; 700 701 702 /* Call cleanup functions registered by optional components. */ 703 if (a->cleanup_archive_extract != NULL) 704 r = (a->cleanup_archive_extract)(a); 705 706 /* TODO: Clean up the formatters. */ 707 708 /* Clean up the filter pipeline. */ 709 while (a->filter != NULL) { 710 struct archive_read_filter *t = a->filter->upstream; 711 if (a->filter->close != NULL) { 712 r1 = (a->filter->close)(a->filter); 713 if (r1 < r) 714 r = r1; 715 } 716 free(a->filter->buffer); 717 free(a->filter); 718 a->filter = t; 719 } 720 721 /* Release the bidder objects. */ 722 n = sizeof(a->bidders)/sizeof(a->bidders[0]); 723 for (i = 0; i < n; i++) { 724 if (a->bidders[i].free != NULL) { 725 r1 = (a->bidders[i].free)(&a->bidders[i]); 726 if (r1 < r) 727 r = r1; 728 } 729 } 730 731 return (r); 732 } 733 734 /* 735 * Release memory and other resources. 736 */ 737 int 738 _archive_read_finish(struct archive *_a) 739 { 740 struct archive_read *a = (struct archive_read *)_a; 741 int i; 742 int slots; 743 int r = ARCHIVE_OK; 744 745 __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, 746 "archive_read_finish"); 747 if (a->archive.state != ARCHIVE_STATE_CLOSED) 748 r = archive_read_close(&a->archive); 749 750 /* Cleanup format-specific data. */ 751 slots = sizeof(a->formats) / sizeof(a->formats[0]); 752 for (i = 0; i < slots; i++) { 753 a->format = &(a->formats[i]); 754 if (a->formats[i].cleanup) 755 (a->formats[i].cleanup)(a); 756 } 757 758 archive_string_free(&a->archive.error_string); 759 if (a->entry) 760 archive_entry_free(a->entry); 761 a->archive.magic = 0; 762 free(a); 763 #if ARCHIVE_API_VERSION > 1 764 return (r); 765 #endif 766 } 767 768 /* 769 * Used internally by read format handlers to register their bid and 770 * initialization functions. 771 */ 772 int 773 __archive_read_register_format(struct archive_read *a, 774 void *format_data, 775 const char *name, 776 int (*bid)(struct archive_read *), 777 int (*options)(struct archive_read *, const char *, const char *), 778 int (*read_header)(struct archive_read *, struct archive_entry *), 779 int (*read_data)(struct archive_read *, const void **, size_t *, off_t *), 780 int (*read_data_skip)(struct archive_read *), 781 int (*cleanup)(struct archive_read *)) 782 { 783 int i, number_slots; 784 785 __archive_check_magic(&a->archive, 786 ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 787 "__archive_read_register_format"); 788 789 number_slots = sizeof(a->formats) / sizeof(a->formats[0]); 790 791 for (i = 0; i < number_slots; i++) { 792 if (a->formats[i].bid == bid) 793 return (ARCHIVE_WARN); /* We've already installed */ 794 if (a->formats[i].bid == NULL) { 795 a->formats[i].bid = bid; 796 a->formats[i].options = options; 797 a->formats[i].read_header = read_header; 798 a->formats[i].read_data = read_data; 799 a->formats[i].read_data_skip = read_data_skip; 800 a->formats[i].cleanup = cleanup; 801 a->formats[i].data = format_data; 802 a->formats[i].name = name; 803 return (ARCHIVE_OK); 804 } 805 } 806 807 __archive_errx(1, "Not enough slots for format registration"); 808 return (ARCHIVE_FATAL); /* Never actually called. */ 809 } 810 811 /* 812 * Used internally by decompression routines to register their bid and 813 * initialization functions. 814 */ 815 struct archive_read_filter_bidder * 816 __archive_read_get_bidder(struct archive_read *a) 817 { 818 int i, number_slots; 819 820 __archive_check_magic(&a->archive, 821 ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 822 "__archive_read_get_bidder"); 823 824 number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]); 825 826 for (i = 0; i < number_slots; i++) { 827 if (a->bidders[i].bid == NULL) { 828 memset(a->bidders + i, 0, sizeof(a->bidders[0])); 829 return (a->bidders + i); 830 } 831 } 832 833 __archive_errx(1, "Not enough slots for compression registration"); 834 return (NULL); /* Never actually executed. */ 835 } 836 837 /* 838 * The next three functions comprise the peek/consume internal I/O 839 * system used by archive format readers. This system allows fairly 840 * flexible read-ahead and allows the I/O code to operate in a 841 * zero-copy manner most of the time. 842 * 843 * In the ideal case, filters generate blocks of data 844 * and __archive_read_ahead() just returns pointers directly into 845 * those blocks. Then __archive_read_consume() just bumps those 846 * pointers. Only if your request would span blocks does the I/O 847 * layer use a copy buffer to provide you with a contiguous block of 848 * data. The __archive_read_skip() is an optimization; it scans ahead 849 * very quickly (it usually translates into a seek() operation if 850 * you're reading uncompressed disk files). 851 * 852 * A couple of useful idioms: 853 * * "I just want some data." Ask for 1 byte and pay attention to 854 * the "number of bytes available" from __archive_read_ahead(). 855 * You can consume more than you asked for; you just can't consume 856 * more than is available. If you consume everything that's 857 * immediately available, the next read_ahead() call will pull 858 * the next block. 859 * * "I want to output a large block of data." As above, ask for 1 byte, 860 * emit all that's available (up to whatever limit you have), then 861 * repeat until you're done. 862 * * "I want to peek ahead by a large amount." Ask for 4k or so, then 863 * double and repeat until you get an error or have enough. Note 864 * that the I/O layer will likely end up expanding its copy buffer 865 * to fit your request, so use this technique cautiously. This 866 * technique is used, for example, by some of the format tasting 867 * code that has uncertain look-ahead needs. 868 * 869 * TODO: Someday, provide a more generic __archive_read_seek() for 870 * those cases where it's useful. This is tricky because there are lots 871 * of cases where seek() is not available (reading gzip data from a 872 * network socket, for instance), so there needs to be a good way to 873 * communicate whether seek() is available and users of that interface 874 * need to use non-seeking strategies whenever seek() is not available. 875 */ 876 877 /* 878 * Looks ahead in the input stream: 879 * * If 'avail' pointer is provided, that returns number of bytes available 880 * in the current buffer, which may be much larger than requested. 881 * * If end-of-file, *avail gets set to zero. 882 * * If error, *avail gets error code. 883 * * If request can be met, returns pointer to data, returns NULL 884 * if request is not met. 885 * 886 * Note: If you just want "some data", ask for 1 byte and pay attention 887 * to *avail, which will have the actual amount available. If you 888 * know exactly how many bytes you need, just ask for that and treat 889 * a NULL return as an error. 890 * 891 * Important: This does NOT move the file pointer. See 892 * __archive_read_consume() below. 893 */ 894 895 /* 896 * This is tricky. We need to provide our clients with pointers to 897 * contiguous blocks of memory but we want to avoid copying whenever 898 * possible. 899 * 900 * Mostly, this code returns pointers directly into the block of data 901 * provided by the client_read routine. It can do this unless the 902 * request would split across blocks. In that case, we have to copy 903 * into an internal buffer to combine reads. 904 */ 905 const void * 906 __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail) 907 { 908 return (__archive_read_filter_ahead(a->filter, min, avail)); 909 } 910 911 const void * 912 __archive_read_filter_ahead(struct archive_read_filter *filter, 913 size_t min, ssize_t *avail) 914 { 915 ssize_t bytes_read; 916 size_t tocopy; 917 918 if (filter->fatal) { 919 if (avail) 920 *avail = ARCHIVE_FATAL; 921 return (NULL); 922 } 923 924 /* 925 * Keep pulling more data until we can satisfy the request. 926 */ 927 for (;;) { 928 929 /* 930 * If we can satisfy from the copy buffer, we're done. 931 */ 932 if (filter->avail >= min) { 933 if (avail != NULL) 934 *avail = filter->avail; 935 return (filter->next); 936 } 937 938 /* 939 * We can satisfy directly from client buffer if everything 940 * currently in the copy buffer is still in the client buffer. 941 */ 942 if (filter->client_total >= filter->client_avail + filter->avail 943 && filter->client_avail + filter->avail >= min) { 944 /* "Roll back" to client buffer. */ 945 filter->client_avail += filter->avail; 946 filter->client_next -= filter->avail; 947 /* Copy buffer is now empty. */ 948 filter->avail = 0; 949 filter->next = filter->buffer; 950 /* Return data from client buffer. */ 951 if (avail != NULL) 952 *avail = filter->client_avail; 953 return (filter->client_next); 954 } 955 956 /* Move data forward in copy buffer if necessary. */ 957 if (filter->next > filter->buffer && 958 filter->next + min > filter->buffer + filter->buffer_size) { 959 if (filter->avail > 0) 960 memmove(filter->buffer, filter->next, filter->avail); 961 filter->next = filter->buffer; 962 } 963 964 /* If we've used up the client data, get more. */ 965 if (filter->client_avail <= 0) { 966 if (filter->end_of_file) { 967 if (avail != NULL) 968 *avail = 0; 969 return (NULL); 970 } 971 bytes_read = (filter->read)(filter, 972 &filter->client_buff); 973 if (bytes_read < 0) { /* Read error. */ 974 filter->client_total = filter->client_avail = 0; 975 filter->client_next = filter->client_buff = NULL; 976 filter->fatal = 1; 977 if (avail != NULL) 978 *avail = ARCHIVE_FATAL; 979 return (NULL); 980 } 981 if (bytes_read == 0) { /* Premature end-of-file. */ 982 filter->client_total = filter->client_avail = 0; 983 filter->client_next = filter->client_buff = NULL; 984 filter->end_of_file = 1; 985 /* Return whatever we do have. */ 986 if (avail != NULL) 987 *avail = filter->avail; 988 return (NULL); 989 } 990 filter->position += bytes_read; 991 filter->client_total = bytes_read; 992 filter->client_avail = filter->client_total; 993 filter->client_next = filter->client_buff; 994 } 995 else 996 { 997 /* 998 * We can't satisfy the request from the copy 999 * buffer or the existing client data, so we 1000 * need to copy more client data over to the 1001 * copy buffer. 1002 */ 1003 1004 /* Ensure the buffer is big enough. */ 1005 if (min > filter->buffer_size) { 1006 size_t s, t; 1007 char *p; 1008 1009 /* Double the buffer; watch for overflow. */ 1010 s = t = filter->buffer_size; 1011 if (s == 0) 1012 s = min; 1013 while (s < min) { 1014 t *= 2; 1015 if (t <= s) { /* Integer overflow! */ 1016 archive_set_error( 1017 &filter->archive->archive, 1018 ENOMEM, 1019 "Unable to allocate copy buffer"); 1020 filter->fatal = 1; 1021 if (avail != NULL) 1022 *avail = ARCHIVE_FATAL; 1023 return (NULL); 1024 } 1025 s = t; 1026 } 1027 /* Now s >= min, so allocate a new buffer. */ 1028 p = (char *)malloc(s); 1029 if (p == NULL) { 1030 archive_set_error( 1031 &filter->archive->archive, 1032 ENOMEM, 1033 "Unable to allocate copy buffer"); 1034 filter->fatal = 1; 1035 if (avail != NULL) 1036 *avail = ARCHIVE_FATAL; 1037 return (NULL); 1038 } 1039 /* Move data into newly-enlarged buffer. */ 1040 if (filter->avail > 0) 1041 memmove(p, filter->next, filter->avail); 1042 free(filter->buffer); 1043 filter->next = filter->buffer = p; 1044 filter->buffer_size = s; 1045 } 1046 1047 /* We can add client data to copy buffer. */ 1048 /* First estimate: copy to fill rest of buffer. */ 1049 tocopy = (filter->buffer + filter->buffer_size) 1050 - (filter->next + filter->avail); 1051 /* Don't waste time buffering more than we need to. */ 1052 if (tocopy + filter->avail > min) 1053 tocopy = min - filter->avail; 1054 /* Don't copy more than is available. */ 1055 if (tocopy > filter->client_avail) 1056 tocopy = filter->client_avail; 1057 1058 memcpy(filter->next + filter->avail, filter->client_next, 1059 tocopy); 1060 /* Remove this data from client buffer. */ 1061 filter->client_next += tocopy; 1062 filter->client_avail -= tocopy; 1063 /* add it to copy buffer. */ 1064 filter->avail += tocopy; 1065 } 1066 } 1067 } 1068 1069 /* 1070 * Move the file pointer forward. This should be called after 1071 * __archive_read_ahead() returns data to you. Don't try to move 1072 * ahead by more than the amount of data available according to 1073 * __archive_read_ahead(). 1074 */ 1075 /* 1076 * Mark the appropriate data as used. Note that the request here will 1077 * often be much smaller than the size of the previous read_ahead 1078 * request. 1079 */ 1080 ssize_t 1081 __archive_read_consume(struct archive_read *a, size_t request) 1082 { 1083 ssize_t r; 1084 r = __archive_read_filter_consume(a->filter, request); 1085 a->archive.file_position += r; 1086 return (r); 1087 } 1088 1089 ssize_t 1090 __archive_read_filter_consume(struct archive_read_filter * filter, 1091 size_t request) 1092 { 1093 if (filter->avail > 0) { 1094 /* Read came from copy buffer. */ 1095 filter->next += request; 1096 filter->avail -= request; 1097 } else { 1098 /* Read came from client buffer. */ 1099 filter->client_next += request; 1100 filter->client_avail -= request; 1101 } 1102 return (request); 1103 } 1104 1105 /* 1106 * Move the file pointer ahead by an arbitrary amount. If you're 1107 * reading uncompressed data from a disk file, this will actually 1108 * translate into a seek() operation. Even in cases where seek() 1109 * isn't feasible, this at least pushes the read-and-discard loop 1110 * down closer to the data source. 1111 */ 1112 int64_t 1113 __archive_read_skip(struct archive_read *a, int64_t request) 1114 { 1115 return (__archive_read_filter_skip(a->filter, request)); 1116 } 1117 1118 int64_t 1119 __archive_read_filter_skip(struct archive_read_filter *filter, int64_t request) 1120 { 1121 off_t bytes_skipped, total_bytes_skipped = 0; 1122 size_t min; 1123 1124 if (filter->fatal) 1125 return (-1); 1126 /* 1127 * If there is data in the buffers already, use that first. 1128 */ 1129 if (filter->avail > 0) { 1130 min = minimum(request, (off_t)filter->avail); 1131 bytes_skipped = __archive_read_consume(filter->archive, min); 1132 request -= bytes_skipped; 1133 total_bytes_skipped += bytes_skipped; 1134 } 1135 if (filter->client_avail > 0) { 1136 min = minimum(request, (off_t)filter->client_avail); 1137 bytes_skipped = __archive_read_consume(filter->archive, min); 1138 request -= bytes_skipped; 1139 total_bytes_skipped += bytes_skipped; 1140 } 1141 if (request == 0) 1142 return (total_bytes_skipped); 1143 /* 1144 * If a client_skipper was provided, try that first. 1145 */ 1146 #if ARCHIVE_API_VERSION < 2 1147 if ((filter->skip != NULL) && (request < SSIZE_MAX)) { 1148 #else 1149 if (filter->skip != NULL) { 1150 #endif 1151 bytes_skipped = (filter->skip)(filter, request); 1152 if (bytes_skipped < 0) { /* error */ 1153 filter->client_total = filter->client_avail = 0; 1154 filter->client_next = filter->client_buff = NULL; 1155 filter->fatal = 1; 1156 return (bytes_skipped); 1157 } 1158 filter->archive->archive.file_position += bytes_skipped; 1159 total_bytes_skipped += bytes_skipped; 1160 request -= bytes_skipped; 1161 filter->client_next = filter->client_buff; 1162 filter->client_avail = filter->client_total = 0; 1163 } 1164 /* 1165 * Note that client_skipper will usually not satisfy the 1166 * full request (due to low-level blocking concerns), 1167 * so even if client_skipper is provided, we may still 1168 * have to use ordinary reads to finish out the request. 1169 */ 1170 while (request > 0) { 1171 const void* dummy_buffer; 1172 ssize_t bytes_read; 1173 dummy_buffer = __archive_read_ahead(filter->archive, 1174 1, &bytes_read); 1175 if (bytes_read < 0) 1176 return (bytes_read); 1177 if (bytes_read == 0) { 1178 /* We hit EOF before we satisfied the skip request. */ 1179 archive_set_error(&filter->archive->archive, 1180 ARCHIVE_ERRNO_MISC, 1181 "Truncated input file (need to skip %jd bytes)", 1182 (intmax_t)request); 1183 return (ARCHIVE_FATAL); 1184 } 1185 min = (size_t)(minimum(bytes_read, request)); 1186 bytes_read = __archive_read_consume(filter->archive, min); 1187 total_bytes_skipped += bytes_read; 1188 request -= bytes_read; 1189 } 1190 return (total_bytes_skipped); 1191 } 1192