1 /*- 2 * Copyright (c) 2003-2011 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 /* 27 * This file contains the "essential" portions of the read API, that 28 * is, stuff that will probably always be used by any client that 29 * actually needs to read an archive. Optional pieces have been, as 30 * far as possible, separated out into separate files to avoid 31 * needlessly bloating statically-linked clients. 32 */ 33 34 #include "archive_platform.h" 35 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read.c 201157 2009-12-29 05:30:23Z kientzle $"); 36 37 #ifdef HAVE_ERRNO_H 38 #include <errno.h> 39 #endif 40 #include <stdio.h> 41 #ifdef HAVE_STDLIB_H 42 #include <stdlib.h> 43 #endif 44 #ifdef HAVE_STRING_H 45 #include <string.h> 46 #endif 47 #ifdef HAVE_UNISTD_H 48 #include <unistd.h> 49 #endif 50 51 #include "archive.h" 52 #include "archive_entry.h" 53 #include "archive_private.h" 54 #include "archive_read_private.h" 55 56 #define minimum(a, b) (a < b ? a : b) 57 58 static int choose_filters(struct archive_read *); 59 static int choose_format(struct archive_read *); 60 static void free_filters(struct archive_read *); 61 static int close_filters(struct archive_read *); 62 static struct archive_vtable *archive_read_vtable(void); 63 static int64_t _archive_filter_bytes(struct archive *, int); 64 static int _archive_filter_code(struct archive *, int); 65 static const char *_archive_filter_name(struct archive *, int); 66 static int _archive_filter_count(struct archive *); 67 static int _archive_read_close(struct archive *); 68 static int _archive_read_data_block(struct archive *, 69 const void **, size_t *, int64_t *); 70 static int _archive_read_free(struct archive *); 71 static int _archive_read_next_header(struct archive *, 72 struct archive_entry **); 73 static int _archive_read_next_header2(struct archive *, 74 struct archive_entry *); 75 static int64_t advance_file_pointer(struct archive_read_filter *, int64_t); 76 77 static struct archive_vtable * 78 archive_read_vtable(void) 79 { 80 static struct archive_vtable av; 81 static int inited = 0; 82 83 if (!inited) { 84 av.archive_filter_bytes = _archive_filter_bytes; 85 av.archive_filter_code = _archive_filter_code; 86 av.archive_filter_name = _archive_filter_name; 87 av.archive_filter_count = _archive_filter_count; 88 av.archive_read_data_block = _archive_read_data_block; 89 av.archive_read_next_header = _archive_read_next_header; 90 av.archive_read_next_header2 = _archive_read_next_header2; 91 av.archive_free = _archive_read_free; 92 av.archive_close = _archive_read_close; 93 inited = 1; 94 } 95 return (&av); 96 } 97 98 /* 99 * Allocate, initialize and return a struct archive object. 100 */ 101 struct archive * 102 archive_read_new(void) 103 { 104 struct archive_read *a; 105 106 a = (struct archive_read *)malloc(sizeof(*a)); 107 if (a == NULL) 108 return (NULL); 109 memset(a, 0, sizeof(*a)); 110 a->archive.magic = ARCHIVE_READ_MAGIC; 111 112 a->archive.state = ARCHIVE_STATE_NEW; 113 a->entry = archive_entry_new2(&a->archive); 114 a->archive.vtable = archive_read_vtable(); 115 116 return (&a->archive); 117 } 118 119 /* 120 * Record the do-not-extract-to file. This belongs in archive_read_extract.c. 121 */ 122 void 123 archive_read_extract_set_skip_file(struct archive *_a, int64_t d, int64_t i) 124 { 125 struct archive_read *a = (struct archive_read *)_a; 126 127 if (ARCHIVE_OK != __archive_check_magic(_a, ARCHIVE_READ_MAGIC, 128 ARCHIVE_STATE_ANY, "archive_read_extract_set_skip_file")) 129 return; 130 a->skip_file_set = 1; 131 a->skip_file_dev = d; 132 a->skip_file_ino = i; 133 } 134 135 /* 136 * Open the archive 137 */ 138 int 139 archive_read_open(struct archive *a, void *client_data, 140 archive_open_callback *client_opener, archive_read_callback *client_reader, 141 archive_close_callback *client_closer) 142 { 143 /* Old archive_read_open() is just a thin shell around 144 * archive_read_open1. */ 145 archive_read_set_open_callback(a, client_opener); 146 archive_read_set_read_callback(a, client_reader); 147 archive_read_set_close_callback(a, client_closer); 148 archive_read_set_callback_data(a, client_data); 149 return archive_read_open1(a); 150 } 151 152 153 int 154 archive_read_open2(struct archive *a, void *client_data, 155 archive_open_callback *client_opener, 156 archive_read_callback *client_reader, 157 archive_skip_callback *client_skipper, 158 archive_close_callback *client_closer) 159 { 160 /* Old archive_read_open2() is just a thin shell around 161 * archive_read_open1. */ 162 archive_read_set_callback_data(a, client_data); 163 archive_read_set_open_callback(a, client_opener); 164 archive_read_set_read_callback(a, client_reader); 165 archive_read_set_skip_callback(a, client_skipper); 166 archive_read_set_close_callback(a, client_closer); 167 return archive_read_open1(a); 168 } 169 170 static ssize_t 171 client_read_proxy(struct archive_read_filter *self, const void **buff) 172 { 173 ssize_t r; 174 r = (self->archive->client.reader)(&self->archive->archive, 175 self->data, buff); 176 return (r); 177 } 178 179 static int64_t 180 client_skip_proxy(struct archive_read_filter *self, int64_t request) 181 { 182 if (request < 0) 183 __archive_errx(1, "Negative skip requested."); 184 if (request == 0) 185 return 0; 186 187 if (self->archive->client.skipper != NULL) { 188 /* Seek requests over 1GiB are broken down into 189 * multiple seeks. This avoids overflows when the 190 * requests get passed through 32-bit arguments. */ 191 int64_t skip_limit = (int64_t)1 << 30; 192 int64_t total = 0; 193 for (;;) { 194 int64_t get, ask = request; 195 if (ask > skip_limit) 196 ask = skip_limit; 197 get = (self->archive->client.skipper)(&self->archive->archive, 198 self->data, ask); 199 if (get == 0) 200 return (total); 201 request -= get; 202 total += get; 203 } 204 return total; 205 } else if (self->archive->client.seeker != NULL 206 && request > 64 * 1024) { 207 /* If the client provided a seeker but not a skipper, 208 * we can use the seeker to skip forward. 209 * 210 * Note: This isn't always a good idea. The client 211 * skipper is allowed to skip by less than requested 212 * if it needs to maintain block alignment. The 213 * seeker is not allowed to play such games, so using 214 * the seeker here may be a performance loss compared 215 * to just reading and discarding. That's why we 216 * only do this for skips of over 64k. 217 */ 218 int64_t before = self->position; 219 int64_t after = (self->archive->client.seeker)(&self->archive->archive, 220 self->data, request, SEEK_CUR); 221 if (after != before + request) 222 return ARCHIVE_FATAL; 223 return after - before; 224 } 225 return 0; 226 } 227 228 static int64_t 229 client_seek_proxy(struct archive_read_filter *self, int64_t offset, int whence) 230 { 231 /* DO NOT use the skipper here! If we transparently handled 232 * forward seek here by using the skipper, that will break 233 * other libarchive code that assumes a successful forward 234 * seek means it can also seek backwards. 235 */ 236 if (self->archive->client.seeker == NULL) 237 return (ARCHIVE_FAILED); 238 return (self->archive->client.seeker)(&self->archive->archive, 239 self->data, offset, whence); 240 } 241 242 static int 243 client_close_proxy(struct archive_read_filter *self) 244 { 245 int r = ARCHIVE_OK; 246 247 if (self->archive->client.closer != NULL) 248 r = (self->archive->client.closer)((struct archive *)self->archive, 249 self->data); 250 return (r); 251 } 252 253 int 254 archive_read_set_open_callback(struct archive *_a, 255 archive_open_callback *client_opener) 256 { 257 struct archive_read *a = (struct archive_read *)_a; 258 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 259 "archive_read_set_open_callback"); 260 a->client.opener = client_opener; 261 return ARCHIVE_OK; 262 } 263 264 int 265 archive_read_set_read_callback(struct archive *_a, 266 archive_read_callback *client_reader) 267 { 268 struct archive_read *a = (struct archive_read *)_a; 269 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 270 "archive_read_set_read_callback"); 271 a->client.reader = client_reader; 272 return ARCHIVE_OK; 273 } 274 275 int 276 archive_read_set_skip_callback(struct archive *_a, 277 archive_skip_callback *client_skipper) 278 { 279 struct archive_read *a = (struct archive_read *)_a; 280 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 281 "archive_read_set_skip_callback"); 282 a->client.skipper = client_skipper; 283 return ARCHIVE_OK; 284 } 285 286 int 287 archive_read_set_seek_callback(struct archive *_a, 288 archive_seek_callback *client_seeker) 289 { 290 struct archive_read *a = (struct archive_read *)_a; 291 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 292 "archive_read_set_seek_callback"); 293 a->client.seeker = client_seeker; 294 return ARCHIVE_OK; 295 } 296 297 int 298 archive_read_set_close_callback(struct archive *_a, 299 archive_close_callback *client_closer) 300 { 301 struct archive_read *a = (struct archive_read *)_a; 302 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 303 "archive_read_set_close_callback"); 304 a->client.closer = client_closer; 305 return ARCHIVE_OK; 306 } 307 308 int 309 archive_read_set_callback_data(struct archive *_a, void *client_data) 310 { 311 struct archive_read *a = (struct archive_read *)_a; 312 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 313 "archive_read_set_callback_data"); 314 a->client.data = client_data; 315 return ARCHIVE_OK; 316 } 317 318 int 319 archive_read_open1(struct archive *_a) 320 { 321 struct archive_read *a = (struct archive_read *)_a; 322 struct archive_read_filter *filter; 323 int slot, e; 324 325 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 326 "archive_read_open"); 327 archive_clear_error(&a->archive); 328 329 if (a->client.reader == NULL) { 330 archive_set_error(&a->archive, EINVAL, 331 "No reader function provided to archive_read_open"); 332 a->archive.state = ARCHIVE_STATE_FATAL; 333 return (ARCHIVE_FATAL); 334 } 335 336 /* Open data source. */ 337 if (a->client.opener != NULL) { 338 e =(a->client.opener)(&a->archive, a->client.data); 339 if (e != 0) { 340 /* If the open failed, call the closer to clean up. */ 341 if (a->client.closer) 342 (a->client.closer)(&a->archive, a->client.data); 343 return (e); 344 } 345 } 346 347 filter = calloc(1, sizeof(*filter)); 348 if (filter == NULL) 349 return (ARCHIVE_FATAL); 350 filter->bidder = NULL; 351 filter->upstream = NULL; 352 filter->archive = a; 353 filter->data = a->client.data; 354 filter->read = client_read_proxy; 355 filter->skip = client_skip_proxy; 356 filter->seek = client_seek_proxy; 357 filter->close = client_close_proxy; 358 filter->name = "none"; 359 filter->code = ARCHIVE_COMPRESSION_NONE; 360 a->filter = filter; 361 362 /* Build out the input pipeline. */ 363 e = choose_filters(a); 364 if (e < ARCHIVE_WARN) { 365 a->archive.state = ARCHIVE_STATE_FATAL; 366 return (ARCHIVE_FATAL); 367 } 368 369 slot = choose_format(a); 370 if (slot < 0) { 371 close_filters(a); 372 a->archive.state = ARCHIVE_STATE_FATAL; 373 return (ARCHIVE_FATAL); 374 } 375 a->format = &(a->formats[slot]); 376 377 a->archive.state = ARCHIVE_STATE_HEADER; 378 return (e); 379 } 380 381 /* 382 * Allow each registered stream transform to bid on whether 383 * it wants to handle this stream. Repeat until we've finished 384 * building the pipeline. 385 */ 386 static int 387 choose_filters(struct archive_read *a) 388 { 389 int number_bidders, i, bid, best_bid; 390 struct archive_read_filter_bidder *bidder, *best_bidder; 391 struct archive_read_filter *filter; 392 ssize_t avail; 393 int r; 394 395 for (;;) { 396 number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]); 397 398 best_bid = 0; 399 best_bidder = NULL; 400 401 bidder = a->bidders; 402 for (i = 0; i < number_bidders; i++, bidder++) { 403 if (bidder->bid != NULL) { 404 bid = (bidder->bid)(bidder, a->filter); 405 if (bid > best_bid) { 406 best_bid = bid; 407 best_bidder = bidder; 408 } 409 } 410 } 411 412 /* If no bidder, we're done. */ 413 if (best_bidder == NULL) { 414 /* Verify the filter by asking it for some data. */ 415 __archive_read_filter_ahead(a->filter, 1, &avail); 416 if (avail < 0) { 417 close_filters(a); 418 free_filters(a); 419 return (ARCHIVE_FATAL); 420 } 421 a->archive.compression_name = a->filter->name; 422 a->archive.compression_code = a->filter->code; 423 return (ARCHIVE_OK); 424 } 425 426 filter 427 = (struct archive_read_filter *)calloc(1, sizeof(*filter)); 428 if (filter == NULL) 429 return (ARCHIVE_FATAL); 430 filter->bidder = best_bidder; 431 filter->archive = a; 432 filter->upstream = a->filter; 433 a->filter = filter; 434 r = (best_bidder->init)(a->filter); 435 if (r != ARCHIVE_OK) { 436 close_filters(a); 437 free_filters(a); 438 return (ARCHIVE_FATAL); 439 } 440 } 441 } 442 443 /* 444 * Read header of next entry. 445 */ 446 static int 447 _archive_read_next_header2(struct archive *_a, struct archive_entry *entry) 448 { 449 struct archive_read *a = (struct archive_read *)_a; 450 int r1 = ARCHIVE_OK, r2; 451 452 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 453 ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, 454 "archive_read_next_header"); 455 456 archive_entry_clear(entry); 457 archive_clear_error(&a->archive); 458 459 /* 460 * If client didn't consume entire data, skip any remainder 461 * (This is especially important for GNU incremental directories.) 462 */ 463 if (a->archive.state == ARCHIVE_STATE_DATA) { 464 r1 = archive_read_data_skip(&a->archive); 465 if (r1 == ARCHIVE_EOF) 466 archive_set_error(&a->archive, EIO, 467 "Premature end-of-file."); 468 if (r1 == ARCHIVE_EOF || r1 == ARCHIVE_FATAL) { 469 a->archive.state = ARCHIVE_STATE_FATAL; 470 return (ARCHIVE_FATAL); 471 } 472 } 473 474 /* Record start-of-header offset in uncompressed stream. */ 475 a->header_position = a->filter->position; 476 477 ++_a->file_count; 478 r2 = (a->format->read_header)(a, entry); 479 480 /* 481 * EOF and FATAL are persistent at this layer. By 482 * modifying the state, we guarantee that future calls to 483 * read a header or read data will fail. 484 */ 485 switch (r2) { 486 case ARCHIVE_EOF: 487 a->archive.state = ARCHIVE_STATE_EOF; 488 --_a->file_count;/* Revert a file counter. */ 489 break; 490 case ARCHIVE_OK: 491 a->archive.state = ARCHIVE_STATE_DATA; 492 break; 493 case ARCHIVE_WARN: 494 a->archive.state = ARCHIVE_STATE_DATA; 495 break; 496 case ARCHIVE_RETRY: 497 break; 498 case ARCHIVE_FATAL: 499 a->archive.state = ARCHIVE_STATE_FATAL; 500 break; 501 } 502 503 a->read_data_output_offset = 0; 504 a->read_data_remaining = 0; 505 /* EOF always wins; otherwise return the worst error. */ 506 return (r2 < r1 || r2 == ARCHIVE_EOF) ? r2 : r1; 507 } 508 509 int 510 _archive_read_next_header(struct archive *_a, struct archive_entry **entryp) 511 { 512 int ret; 513 struct archive_read *a = (struct archive_read *)_a; 514 *entryp = NULL; 515 ret = _archive_read_next_header2(_a, a->entry); 516 *entryp = a->entry; 517 return ret; 518 } 519 520 /* 521 * Allow each registered format to bid on whether it wants to handle 522 * the next entry. Return index of winning bidder. 523 */ 524 static int 525 choose_format(struct archive_read *a) 526 { 527 int slots; 528 int i; 529 int bid, best_bid; 530 int best_bid_slot; 531 532 slots = sizeof(a->formats) / sizeof(a->formats[0]); 533 best_bid = -1; 534 best_bid_slot = -1; 535 536 /* Set up a->format for convenience of bidders. */ 537 a->format = &(a->formats[0]); 538 for (i = 0; i < slots; i++, a->format++) { 539 if (a->format->bid) { 540 bid = (a->format->bid)(a, best_bid); 541 if (bid == ARCHIVE_FATAL) 542 return (ARCHIVE_FATAL); 543 if (a->filter->position != 0) 544 __archive_read_seek(a, 0, SEEK_SET); 545 if ((bid > best_bid) || (best_bid_slot < 0)) { 546 best_bid = bid; 547 best_bid_slot = i; 548 } 549 } 550 } 551 552 /* 553 * There were no bidders; this is a serious programmer error 554 * and demands a quick and definitive abort. 555 */ 556 if (best_bid_slot < 0) { 557 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 558 "No formats registered"); 559 return (ARCHIVE_FATAL); 560 } 561 562 /* 563 * There were bidders, but no non-zero bids; this means we 564 * can't support this stream. 565 */ 566 if (best_bid < 1) { 567 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 568 "Unrecognized archive format"); 569 return (ARCHIVE_FATAL); 570 } 571 572 return (best_bid_slot); 573 } 574 575 /* 576 * Return the file offset (within the uncompressed data stream) where 577 * the last header started. 578 */ 579 int64_t 580 archive_read_header_position(struct archive *_a) 581 { 582 struct archive_read *a = (struct archive_read *)_a; 583 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 584 ARCHIVE_STATE_ANY, "archive_read_header_position"); 585 return (a->header_position); 586 } 587 588 /* 589 * Read data from an archive entry, using a read(2)-style interface. 590 * This is a convenience routine that just calls 591 * archive_read_data_block and copies the results into the client 592 * buffer, filling any gaps with zero bytes. Clients using this 593 * API can be completely ignorant of sparse-file issues; sparse files 594 * will simply be padded with nulls. 595 * 596 * DO NOT intermingle calls to this function and archive_read_data_block 597 * to read a single entry body. 598 */ 599 ssize_t 600 archive_read_data(struct archive *_a, void *buff, size_t s) 601 { 602 struct archive_read *a = (struct archive_read *)_a; 603 char *dest; 604 const void *read_buf; 605 size_t bytes_read; 606 size_t len; 607 int r; 608 609 bytes_read = 0; 610 dest = (char *)buff; 611 612 while (s > 0) { 613 if (a->read_data_remaining == 0) { 614 read_buf = a->read_data_block; 615 r = _archive_read_data_block(&a->archive, &read_buf, 616 &a->read_data_remaining, &a->read_data_offset); 617 a->read_data_block = read_buf; 618 if (r == ARCHIVE_EOF) 619 return (bytes_read); 620 /* 621 * Error codes are all negative, so the status 622 * return here cannot be confused with a valid 623 * byte count. (ARCHIVE_OK is zero.) 624 */ 625 if (r < ARCHIVE_OK) 626 return (r); 627 } 628 629 if (a->read_data_offset < a->read_data_output_offset) { 630 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 631 "Encountered out-of-order sparse blocks"); 632 return (ARCHIVE_RETRY); 633 } 634 635 /* Compute the amount of zero padding needed. */ 636 if (a->read_data_output_offset + s < 637 a->read_data_offset) { 638 len = s; 639 } else if (a->read_data_output_offset < 640 a->read_data_offset) { 641 len = a->read_data_offset - 642 a->read_data_output_offset; 643 } else 644 len = 0; 645 646 /* Add zeroes. */ 647 memset(dest, 0, len); 648 s -= len; 649 a->read_data_output_offset += len; 650 dest += len; 651 bytes_read += len; 652 653 /* Copy data if there is any space left. */ 654 if (s > 0) { 655 len = a->read_data_remaining; 656 if (len > s) 657 len = s; 658 memcpy(dest, a->read_data_block, len); 659 s -= len; 660 a->read_data_block += len; 661 a->read_data_remaining -= len; 662 a->read_data_output_offset += len; 663 a->read_data_offset += len; 664 dest += len; 665 bytes_read += len; 666 } 667 } 668 return (bytes_read); 669 } 670 671 /* 672 * Skip over all remaining data in this entry. 673 */ 674 int 675 archive_read_data_skip(struct archive *_a) 676 { 677 struct archive_read *a = (struct archive_read *)_a; 678 int r; 679 const void *buff; 680 size_t size; 681 int64_t offset; 682 683 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, 684 "archive_read_data_skip"); 685 686 if (a->format->read_data_skip != NULL) 687 r = (a->format->read_data_skip)(a); 688 else { 689 while ((r = archive_read_data_block(&a->archive, 690 &buff, &size, &offset)) 691 == ARCHIVE_OK) 692 ; 693 } 694 695 if (r == ARCHIVE_EOF) 696 r = ARCHIVE_OK; 697 698 a->archive.state = ARCHIVE_STATE_HEADER; 699 return (r); 700 } 701 702 /* 703 * Read the next block of entry data from the archive. 704 * This is a zero-copy interface; the client receives a pointer, 705 * size, and file offset of the next available block of data. 706 * 707 * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if 708 * the end of entry is encountered. 709 */ 710 static int 711 _archive_read_data_block(struct archive *_a, 712 const void **buff, size_t *size, int64_t *offset) 713 { 714 struct archive_read *a = (struct archive_read *)_a; 715 archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, 716 "archive_read_data_block"); 717 718 if (a->format->read_data == NULL) { 719 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 720 "Internal error: " 721 "No format_read_data_block function registered"); 722 return (ARCHIVE_FATAL); 723 } 724 725 return (a->format->read_data)(a, buff, size, offset); 726 } 727 728 static int 729 close_filters(struct archive_read *a) 730 { 731 struct archive_read_filter *f = a->filter; 732 int r = ARCHIVE_OK; 733 /* Close each filter in the pipeline. */ 734 while (f != NULL) { 735 struct archive_read_filter *t = f->upstream; 736 if (!f->closed && f->close != NULL) { 737 int r1 = (f->close)(f); 738 f->closed = 1; 739 if (r1 < r) 740 r = r1; 741 } 742 free(f->buffer); 743 f->buffer = NULL; 744 f = t; 745 } 746 return r; 747 } 748 749 static void 750 free_filters(struct archive_read *a) 751 { 752 while (a->filter != NULL) { 753 struct archive_read_filter *t = a->filter->upstream; 754 free(a->filter); 755 a->filter = t; 756 } 757 } 758 759 /* 760 * return the count of # of filters in use 761 */ 762 static int 763 _archive_filter_count(struct archive *_a) 764 { 765 struct archive_read *a = (struct archive_read *)_a; 766 struct archive_read_filter *p = a->filter; 767 int count = 0; 768 while(p) { 769 count++; 770 p = p->upstream; 771 } 772 return count; 773 } 774 775 /* 776 * Close the file and all I/O. 777 */ 778 static int 779 _archive_read_close(struct archive *_a) 780 { 781 struct archive_read *a = (struct archive_read *)_a; 782 int r = ARCHIVE_OK, r1 = ARCHIVE_OK; 783 784 archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, 785 ARCHIVE_STATE_ANY | ARCHIVE_STATE_FATAL, "archive_read_close"); 786 if (a->archive.state == ARCHIVE_STATE_CLOSED) 787 return (ARCHIVE_OK); 788 archive_clear_error(&a->archive); 789 a->archive.state = ARCHIVE_STATE_CLOSED; 790 791 /* TODO: Clean up the formatters. */ 792 793 /* Release the filter objects. */ 794 r1 = close_filters(a); 795 if (r1 < r) 796 r = r1; 797 798 return (r); 799 } 800 801 /* 802 * Release memory and other resources. 803 */ 804 static int 805 _archive_read_free(struct archive *_a) 806 { 807 struct archive_read *a = (struct archive_read *)_a; 808 int i, n; 809 int slots; 810 int r = ARCHIVE_OK; 811 812 if (_a == NULL) 813 return (ARCHIVE_OK); 814 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 815 ARCHIVE_STATE_ANY | ARCHIVE_STATE_FATAL, "archive_read_free"); 816 if (a->archive.state != ARCHIVE_STATE_CLOSED 817 && a->archive.state != ARCHIVE_STATE_FATAL) 818 r = archive_read_close(&a->archive); 819 820 /* Call cleanup functions registered by optional components. */ 821 if (a->cleanup_archive_extract != NULL) 822 r = (a->cleanup_archive_extract)(a); 823 824 /* Cleanup format-specific data. */ 825 slots = sizeof(a->formats) / sizeof(a->formats[0]); 826 for (i = 0; i < slots; i++) { 827 a->format = &(a->formats[i]); 828 if (a->formats[i].cleanup) 829 (a->formats[i].cleanup)(a); 830 } 831 832 /* Free the filters */ 833 free_filters(a); 834 835 /* Release the bidder objects. */ 836 n = sizeof(a->bidders)/sizeof(a->bidders[0]); 837 for (i = 0; i < n; i++) { 838 if (a->bidders[i].free != NULL) { 839 int r1 = (a->bidders[i].free)(&a->bidders[i]); 840 if (r1 < r) 841 r = r1; 842 } 843 } 844 845 archive_string_free(&a->archive.error_string); 846 if (a->entry) 847 archive_entry_free(a->entry); 848 a->archive.magic = 0; 849 __archive_clean(&a->archive); 850 free(a); 851 return (r); 852 } 853 854 static struct archive_read_filter * 855 get_filter(struct archive *_a, int n) 856 { 857 struct archive_read *a = (struct archive_read *)_a; 858 struct archive_read_filter *f = a->filter; 859 /* We use n == -1 for 'the last filter', which is always the client proxy. */ 860 if (n == -1 && f != NULL) { 861 struct archive_read_filter *last = f; 862 f = f->upstream; 863 while (f != NULL) { 864 last = f; 865 f = f->upstream; 866 } 867 return (last); 868 } 869 if (n < 0) 870 return NULL; 871 while (n > 0 && f != NULL) { 872 f = f->upstream; 873 --n; 874 } 875 return (f); 876 } 877 878 static int 879 _archive_filter_code(struct archive *_a, int n) 880 { 881 struct archive_read_filter *f = get_filter(_a, n); 882 return f == NULL ? -1 : f->code; 883 } 884 885 static const char * 886 _archive_filter_name(struct archive *_a, int n) 887 { 888 struct archive_read_filter *f = get_filter(_a, n); 889 return f == NULL ? NULL : f->name; 890 } 891 892 static int64_t 893 _archive_filter_bytes(struct archive *_a, int n) 894 { 895 struct archive_read_filter *f = get_filter(_a, n); 896 return f == NULL ? -1 : f->position; 897 } 898 899 /* 900 * Used internally by read format handlers to register their bid and 901 * initialization functions. 902 */ 903 int 904 __archive_read_register_format(struct archive_read *a, 905 void *format_data, 906 const char *name, 907 int (*bid)(struct archive_read *, int), 908 int (*options)(struct archive_read *, const char *, const char *), 909 int (*read_header)(struct archive_read *, struct archive_entry *), 910 int (*read_data)(struct archive_read *, const void **, size_t *, int64_t *), 911 int (*read_data_skip)(struct archive_read *), 912 int (*cleanup)(struct archive_read *)) 913 { 914 int i, number_slots; 915 916 archive_check_magic(&a->archive, 917 ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 918 "__archive_read_register_format"); 919 920 number_slots = sizeof(a->formats) / sizeof(a->formats[0]); 921 922 for (i = 0; i < number_slots; i++) { 923 if (a->formats[i].bid == bid) 924 return (ARCHIVE_WARN); /* We've already installed */ 925 if (a->formats[i].bid == NULL) { 926 a->formats[i].bid = bid; 927 a->formats[i].options = options; 928 a->formats[i].read_header = read_header; 929 a->formats[i].read_data = read_data; 930 a->formats[i].read_data_skip = read_data_skip; 931 a->formats[i].cleanup = cleanup; 932 a->formats[i].data = format_data; 933 a->formats[i].name = name; 934 return (ARCHIVE_OK); 935 } 936 } 937 938 archive_set_error(&a->archive, ENOMEM, 939 "Not enough slots for format registration"); 940 return (ARCHIVE_FATAL); 941 } 942 943 /* 944 * Used internally by decompression routines to register their bid and 945 * initialization functions. 946 */ 947 int 948 __archive_read_get_bidder(struct archive_read *a, 949 struct archive_read_filter_bidder **bidder) 950 { 951 int i, number_slots; 952 953 number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]); 954 955 for (i = 0; i < number_slots; i++) { 956 if (a->bidders[i].bid == NULL) { 957 memset(a->bidders + i, 0, sizeof(a->bidders[0])); 958 *bidder = (a->bidders + i); 959 return (ARCHIVE_OK); 960 } 961 } 962 963 archive_set_error(&a->archive, ENOMEM, 964 "Not enough slots for filter registration"); 965 return (ARCHIVE_FATAL); 966 } 967 968 /* 969 * The next section implements the peek/consume internal I/O 970 * system used by archive readers. This system allows simple 971 * read-ahead for consumers while preserving zero-copy operation 972 * most of the time. 973 * 974 * The two key operations: 975 * * The read-ahead function returns a pointer to a block of data 976 * that satisfies a minimum request. 977 * * The consume function advances the file pointer. 978 * 979 * In the ideal case, filters generate blocks of data 980 * and __archive_read_ahead() just returns pointers directly into 981 * those blocks. Then __archive_read_consume() just bumps those 982 * pointers. Only if your request would span blocks does the I/O 983 * layer use a copy buffer to provide you with a contiguous block of 984 * data. 985 * 986 * A couple of useful idioms: 987 * * "I just want some data." Ask for 1 byte and pay attention to 988 * the "number of bytes available" from __archive_read_ahead(). 989 * Consume whatever you actually use. 990 * * "I want to output a large block of data." As above, ask for 1 byte, 991 * emit all that's available (up to whatever limit you have), consume 992 * it all, then repeat until you're done. This effectively means that 993 * you're passing along the blocks that came from your provider. 994 * * "I want to peek ahead by a large amount." Ask for 4k or so, then 995 * double and repeat until you get an error or have enough. Note 996 * that the I/O layer will likely end up expanding its copy buffer 997 * to fit your request, so use this technique cautiously. This 998 * technique is used, for example, by some of the format tasting 999 * code that has uncertain look-ahead needs. 1000 */ 1001 1002 /* 1003 * Looks ahead in the input stream: 1004 * * If 'avail' pointer is provided, that returns number of bytes available 1005 * in the current buffer, which may be much larger than requested. 1006 * * If end-of-file, *avail gets set to zero. 1007 * * If error, *avail gets error code. 1008 * * If request can be met, returns pointer to data. 1009 * * If minimum request cannot be met, returns NULL. 1010 * 1011 * Note: If you just want "some data", ask for 1 byte and pay attention 1012 * to *avail, which will have the actual amount available. If you 1013 * know exactly how many bytes you need, just ask for that and treat 1014 * a NULL return as an error. 1015 * 1016 * Important: This does NOT move the file pointer. See 1017 * __archive_read_consume() below. 1018 */ 1019 const void * 1020 __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail) 1021 { 1022 return (__archive_read_filter_ahead(a->filter, min, avail)); 1023 } 1024 1025 const void * 1026 __archive_read_filter_ahead(struct archive_read_filter *filter, 1027 size_t min, ssize_t *avail) 1028 { 1029 ssize_t bytes_read; 1030 size_t tocopy; 1031 1032 if (filter->fatal) { 1033 if (avail) 1034 *avail = ARCHIVE_FATAL; 1035 return (NULL); 1036 } 1037 1038 /* 1039 * Keep pulling more data until we can satisfy the request. 1040 */ 1041 for (;;) { 1042 1043 /* 1044 * If we can satisfy from the copy buffer (and the 1045 * copy buffer isn't empty), we're done. In particular, 1046 * note that min == 0 is a perfectly well-defined 1047 * request. 1048 */ 1049 if (filter->avail >= min && filter->avail > 0) { 1050 if (avail != NULL) 1051 *avail = filter->avail; 1052 return (filter->next); 1053 } 1054 1055 /* 1056 * We can satisfy directly from client buffer if everything 1057 * currently in the copy buffer is still in the client buffer. 1058 */ 1059 if (filter->client_total >= filter->client_avail + filter->avail 1060 && filter->client_avail + filter->avail >= min) { 1061 /* "Roll back" to client buffer. */ 1062 filter->client_avail += filter->avail; 1063 filter->client_next -= filter->avail; 1064 /* Copy buffer is now empty. */ 1065 filter->avail = 0; 1066 filter->next = filter->buffer; 1067 /* Return data from client buffer. */ 1068 if (avail != NULL) 1069 *avail = filter->client_avail; 1070 return (filter->client_next); 1071 } 1072 1073 /* Move data forward in copy buffer if necessary. */ 1074 if (filter->next > filter->buffer && 1075 filter->next + min > filter->buffer + filter->buffer_size) { 1076 if (filter->avail > 0) 1077 memmove(filter->buffer, filter->next, filter->avail); 1078 filter->next = filter->buffer; 1079 } 1080 1081 /* If we've used up the client data, get more. */ 1082 if (filter->client_avail <= 0) { 1083 if (filter->end_of_file) { 1084 if (avail != NULL) 1085 *avail = 0; 1086 return (NULL); 1087 } 1088 bytes_read = (filter->read)(filter, 1089 &filter->client_buff); 1090 if (bytes_read < 0) { /* Read error. */ 1091 filter->client_total = filter->client_avail = 0; 1092 filter->client_next = filter->client_buff = NULL; 1093 filter->fatal = 1; 1094 if (avail != NULL) 1095 *avail = ARCHIVE_FATAL; 1096 return (NULL); 1097 } 1098 if (bytes_read == 0) { /* Premature end-of-file. */ 1099 filter->client_total = filter->client_avail = 0; 1100 filter->client_next = filter->client_buff = NULL; 1101 filter->end_of_file = 1; 1102 /* Return whatever we do have. */ 1103 if (avail != NULL) 1104 *avail = filter->avail; 1105 return (NULL); 1106 } 1107 filter->client_total = bytes_read; 1108 filter->client_avail = filter->client_total; 1109 filter->client_next = filter->client_buff; 1110 } 1111 else 1112 { 1113 /* 1114 * We can't satisfy the request from the copy 1115 * buffer or the existing client data, so we 1116 * need to copy more client data over to the 1117 * copy buffer. 1118 */ 1119 1120 /* Ensure the buffer is big enough. */ 1121 if (min > filter->buffer_size) { 1122 size_t s, t; 1123 char *p; 1124 1125 /* Double the buffer; watch for overflow. */ 1126 s = t = filter->buffer_size; 1127 if (s == 0) 1128 s = min; 1129 while (s < min) { 1130 t *= 2; 1131 if (t <= s) { /* Integer overflow! */ 1132 archive_set_error( 1133 &filter->archive->archive, 1134 ENOMEM, 1135 "Unable to allocate copy buffer"); 1136 filter->fatal = 1; 1137 if (avail != NULL) 1138 *avail = ARCHIVE_FATAL; 1139 return (NULL); 1140 } 1141 s = t; 1142 } 1143 /* Now s >= min, so allocate a new buffer. */ 1144 p = (char *)malloc(s); 1145 if (p == NULL) { 1146 archive_set_error( 1147 &filter->archive->archive, 1148 ENOMEM, 1149 "Unable to allocate copy buffer"); 1150 filter->fatal = 1; 1151 if (avail != NULL) 1152 *avail = ARCHIVE_FATAL; 1153 return (NULL); 1154 } 1155 /* Move data into newly-enlarged buffer. */ 1156 if (filter->avail > 0) 1157 memmove(p, filter->next, filter->avail); 1158 free(filter->buffer); 1159 filter->next = filter->buffer = p; 1160 filter->buffer_size = s; 1161 } 1162 1163 /* We can add client data to copy buffer. */ 1164 /* First estimate: copy to fill rest of buffer. */ 1165 tocopy = (filter->buffer + filter->buffer_size) 1166 - (filter->next + filter->avail); 1167 /* Don't waste time buffering more than we need to. */ 1168 if (tocopy + filter->avail > min) 1169 tocopy = min - filter->avail; 1170 /* Don't copy more than is available. */ 1171 if (tocopy > filter->client_avail) 1172 tocopy = filter->client_avail; 1173 1174 memcpy(filter->next + filter->avail, filter->client_next, 1175 tocopy); 1176 /* Remove this data from client buffer. */ 1177 filter->client_next += tocopy; 1178 filter->client_avail -= tocopy; 1179 /* add it to copy buffer. */ 1180 filter->avail += tocopy; 1181 } 1182 } 1183 } 1184 1185 /* 1186 * Move the file pointer forward. 1187 */ 1188 int64_t 1189 __archive_read_consume(struct archive_read *a, int64_t request) 1190 { 1191 return (__archive_read_filter_consume(a->filter, request)); 1192 } 1193 1194 int64_t 1195 __archive_read_filter_consume(struct archive_read_filter * filter, 1196 int64_t request) 1197 { 1198 int64_t skipped; 1199 1200 if (request == 0) 1201 return 0; 1202 1203 skipped = advance_file_pointer(filter, request); 1204 if (skipped == request) 1205 return (skipped); 1206 /* We hit EOF before we satisfied the skip request. */ 1207 if (skipped < 0) /* Map error code to 0 for error message below. */ 1208 skipped = 0; 1209 archive_set_error(&filter->archive->archive, 1210 ARCHIVE_ERRNO_MISC, 1211 "Truncated input file (needed %jd bytes, only %jd available)", 1212 (intmax_t)request, (intmax_t)skipped); 1213 return (ARCHIVE_FATAL); 1214 } 1215 1216 /* 1217 * Advance the file pointer by the amount requested. 1218 * Returns the amount actually advanced, which may be less than the 1219 * request if EOF is encountered first. 1220 * Returns a negative value if there's an I/O error. 1221 */ 1222 static int64_t 1223 advance_file_pointer(struct archive_read_filter *filter, int64_t request) 1224 { 1225 int64_t bytes_skipped, total_bytes_skipped = 0; 1226 ssize_t bytes_read; 1227 size_t min; 1228 1229 if (filter->fatal) 1230 return (-1); 1231 1232 /* Use up the copy buffer first. */ 1233 if (filter->avail > 0) { 1234 min = minimum(request, (int64_t)filter->avail); 1235 filter->next += min; 1236 filter->avail -= min; 1237 request -= min; 1238 filter->position += min; 1239 total_bytes_skipped += min; 1240 } 1241 1242 /* Then use up the client buffer. */ 1243 if (filter->client_avail > 0) { 1244 min = minimum(request, (int64_t)filter->client_avail); 1245 filter->client_next += min; 1246 filter->client_avail -= min; 1247 request -= min; 1248 filter->position += min; 1249 total_bytes_skipped += min; 1250 } 1251 if (request == 0) 1252 return (total_bytes_skipped); 1253 1254 /* If there's an optimized skip function, use it. */ 1255 if (filter->skip != NULL) { 1256 bytes_skipped = (filter->skip)(filter, request); 1257 if (bytes_skipped < 0) { /* error */ 1258 filter->fatal = 1; 1259 return (bytes_skipped); 1260 } 1261 filter->position += bytes_skipped; 1262 total_bytes_skipped += bytes_skipped; 1263 request -= bytes_skipped; 1264 if (request == 0) 1265 return (total_bytes_skipped); 1266 } 1267 1268 /* Use ordinary reads as necessary to complete the request. */ 1269 for (;;) { 1270 bytes_read = (filter->read)(filter, &filter->client_buff); 1271 if (bytes_read < 0) { 1272 filter->client_buff = NULL; 1273 filter->fatal = 1; 1274 return (bytes_read); 1275 } 1276 1277 if (bytes_read == 0) { 1278 filter->client_buff = NULL; 1279 filter->end_of_file = 1; 1280 return (total_bytes_skipped); 1281 } 1282 1283 if (bytes_read >= request) { 1284 filter->client_next = 1285 ((const char *)filter->client_buff) + request; 1286 filter->client_avail = bytes_read - request; 1287 filter->client_total = bytes_read; 1288 total_bytes_skipped += request; 1289 filter->position += request; 1290 return (total_bytes_skipped); 1291 } 1292 1293 filter->position += bytes_read; 1294 total_bytes_skipped += bytes_read; 1295 request -= bytes_read; 1296 } 1297 } 1298 1299 /** 1300 * Returns ARCHIVE_FAILED if seeking isn't supported. 1301 */ 1302 int64_t 1303 __archive_read_seek(struct archive_read *a, int64_t offset, int whence) 1304 { 1305 return __archive_read_filter_seek(a->filter, offset, whence); 1306 } 1307 1308 int64_t 1309 __archive_read_filter_seek(struct archive_read_filter *filter, int64_t offset, int whence) 1310 { 1311 int64_t r; 1312 1313 if (filter->closed || filter->fatal) 1314 return (ARCHIVE_FATAL); 1315 if (filter->seek == NULL) 1316 return (ARCHIVE_FAILED); 1317 r = filter->seek(filter, offset, whence); 1318 if (r >= 0) { 1319 /* 1320 * Ouch. Clearing the buffer like this hurts, especially 1321 * at bid time. A lot of our efficiency at bid time comes 1322 * from having bidders reuse the data we've already read. 1323 * 1324 * TODO: If the seek request is in data we already 1325 * have, then don't call the seek callback. 1326 * 1327 * TODO: Zip seeks to end-of-file at bid time. If 1328 * other formats also start doing this, we may need to 1329 * find a way for clients to fudge the seek offset to 1330 * a block boundary. 1331 * 1332 * Hmmm... If whence was SEEK_END, we know the file 1333 * size is (r - offset). Can we use that to simplify 1334 * the TODO items above? 1335 */ 1336 filter->avail = filter->client_avail = 0; 1337 filter->next = filter->buffer; 1338 filter->position = r; 1339 filter->end_of_file = 0; 1340 } 1341 return r; 1342 } 1343