1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 /*
27  * This file contains the "essential" portions of the read API, that
28  * is, stuff that will probably always be used by any client that
29  * actually needs to read an archive.  Optional pieces have been, as
30  * far as possible, separated out into separate files to avoid
31  * needlessly bloating statically-linked clients.
32  */
33 
34 #include "archive_platform.h"
35 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read.c,v 1.39 2008/12/06 06:45:15 kientzle Exp $");
36 
37 #ifdef HAVE_ERRNO_H
38 #include <errno.h>
39 #endif
40 #include <stdio.h>
41 #ifdef HAVE_STDLIB_H
42 #include <stdlib.h>
43 #endif
44 #ifdef HAVE_STRING_H
45 #include <string.h>
46 #endif
47 #ifdef HAVE_UNISTD_H
48 #include <unistd.h>
49 #endif
50 
51 #include "archive.h"
52 #include "archive_entry.h"
53 #include "archive_private.h"
54 #include "archive_read_private.h"
55 
56 #define minimum(a, b) (a < b ? a : b)
57 
58 static int	build_stream(struct archive_read *);
59 static int	choose_format(struct archive_read *);
60 static struct archive_vtable *archive_read_vtable(void);
61 static int	_archive_read_close(struct archive *);
62 static int	_archive_read_finish(struct archive *);
63 
64 static struct archive_vtable *
65 archive_read_vtable(void)
66 {
67 	static struct archive_vtable av;
68 	static int inited = 0;
69 
70 	if (!inited) {
71 		av.archive_finish = _archive_read_finish;
72 		av.archive_close = _archive_read_close;
73 	}
74 	return (&av);
75 }
76 
77 /*
78  * Allocate, initialize and return a struct archive object.
79  */
80 struct archive *
81 archive_read_new(void)
82 {
83 	struct archive_read *a;
84 
85 	a = (struct archive_read *)malloc(sizeof(*a));
86 	if (a == NULL)
87 		return (NULL);
88 	memset(a, 0, sizeof(*a));
89 	a->archive.magic = ARCHIVE_READ_MAGIC;
90 
91 	a->archive.state = ARCHIVE_STATE_NEW;
92 	a->entry = archive_entry_new();
93 	a->archive.vtable = archive_read_vtable();
94 
95 	return (&a->archive);
96 }
97 
98 /*
99  * Record the do-not-extract-to file. This belongs in archive_read_extract.c.
100  */
101 void
102 archive_read_extract_set_skip_file(struct archive *_a, dev_t d, ino_t i)
103 {
104 	struct archive_read *a = (struct archive_read *)_a;
105 	__archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY,
106 	    "archive_read_extract_set_skip_file");
107 	a->skip_file_dev = d;
108 	a->skip_file_ino = i;
109 }
110 
111 /*
112  * Set read options for the format.
113  */
114 int
115 archive_read_set_format_options(struct archive *_a, const char *s)
116 {
117 	struct archive_read *a;
118 	struct archive_format_descriptor *format;
119 	char key[64], val[64];
120 	char *valp;
121 	size_t i;
122 	int len, r;
123 
124 	if (s == NULL || *s == '\0')
125 		return (ARCHIVE_OK);
126 	a = (struct archive_read *)_a;
127 	__archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC,
128 	    ARCHIVE_STATE_NEW, "archive_read_set_format_options");
129 	len = 0;
130 	for (i = 0; i < sizeof(a->formats)/sizeof(a->formats[0]); i++) {
131 		format = &a->formats[i];
132 		if (format == NULL || format->options == NULL ||
133 		    format->name == NULL)
134 			/* This format does not support option. */
135 			continue;
136 
137 		while ((len = __archive_parse_options(s, format->name,
138 		    sizeof(key), key, sizeof(val), val)) > 0) {
139 			valp = val[0] == '\0' ? NULL : val;
140 			a->format = format;
141 			r = format->options(a, key, valp);
142 			a->format = NULL;
143 			if (r == ARCHIVE_FATAL)
144 				return (r);
145 			s += len;
146 		}
147 	}
148 	if (len < 0) {
149 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
150 		    "Illegal format options.");
151 		return (ARCHIVE_WARN);
152 	}
153 	return (ARCHIVE_OK);
154 }
155 
156 /*
157  * Set read options for the filter.
158  */
159 int
160 archive_read_set_filter_options(struct archive *_a, const char *s)
161 {
162 	struct archive_read *a;
163 	struct archive_read_filter *filter;
164 	struct archive_read_filter_bidder *bidder;
165 	char key[64], val[64];
166 	int len, r;
167 
168 	if (s == NULL || *s == '\0')
169 		return (ARCHIVE_OK);
170 	a = (struct archive_read *)_a;
171 	__archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC,
172 	    ARCHIVE_STATE_NEW, "archive_read_set_filter_options");
173 	filter = a->filter;
174 	len = 0;
175 	for (filter = a->filter; filter != NULL; filter = filter->upstream) {
176 		bidder = filter->bidder;
177 		if (bidder == NULL)
178 			continue;
179 		if (bidder->options == NULL)
180 			/* This bidder does not support option */
181 			continue;
182 		while ((len = __archive_parse_options(s, filter->name,
183 		    sizeof(key), key, sizeof(val), val)) > 0) {
184 			if (val[0] == '\0')
185 				r = bidder->options(bidder, key, NULL);
186 			else
187 				r = bidder->options(bidder, key, val);
188 			if (r == ARCHIVE_FATAL)
189 				return (r);
190 			s += len;
191 		}
192 	}
193 	if (len < 0) {
194 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
195 		    "Illegal format options.");
196 		return (ARCHIVE_WARN);
197 	}
198 	return (ARCHIVE_OK);
199 }
200 
201 /*
202  * Set read options for the format and the filter.
203  */
204 int
205 archive_read_set_options(struct archive *_a, const char *s)
206 {
207 	int r;
208 
209 	r = archive_read_set_format_options(_a, s);
210 	if (r != ARCHIVE_OK)
211 		return (r);
212 	r = archive_read_set_filter_options(_a, s);
213 	if (r != ARCHIVE_OK)
214 		return (r);
215 	return (ARCHIVE_OK);
216 }
217 
218 /*
219  * Open the archive
220  */
221 int
222 archive_read_open(struct archive *a, void *client_data,
223     archive_open_callback *client_opener, archive_read_callback *client_reader,
224     archive_close_callback *client_closer)
225 {
226 	/* Old archive_read_open() is just a thin shell around
227 	 * archive_read_open2. */
228 	return archive_read_open2(a, client_data, client_opener,
229 	    client_reader, NULL, client_closer);
230 }
231 
232 static ssize_t
233 client_read_proxy(struct archive_read_filter *self, const void **buff)
234 {
235 	ssize_t r;
236 	r = (self->archive->client.reader)(&self->archive->archive,
237 	    self->data, buff);
238 	self->archive->archive.raw_position += r;
239 	return (r);
240 }
241 
242 static int64_t
243 client_skip_proxy(struct archive_read_filter *self, int64_t request)
244 {
245 	int64_t ask, get, total;
246 	/* Limit our maximum seek request to 1GB on platforms
247 	* with 32-bit off_t (such as Windows). */
248 	int64_t skip_limit = ((int64_t)1) << (sizeof(off_t) * 8 - 2);
249 
250 	if (self->archive->client.skipper == NULL)
251 		return (0);
252 	total = 0;
253 	for (;;) {
254 		ask = request;
255 		if (ask > skip_limit)
256 			ask = skip_limit;
257 		get = (self->archive->client.skipper)(&self->archive->archive,
258 			self->data, ask);
259 		if (get == 0)
260 			return (total);
261 		request -= get;
262 		self->archive->archive.raw_position += get;
263 		total += get;
264 	}
265 }
266 
267 static int
268 client_close_proxy(struct archive_read_filter *self)
269 {
270 	int r = ARCHIVE_OK;
271 
272 	if (self->archive->client.closer != NULL)
273 		r = (self->archive->client.closer)((struct archive *)self->archive,
274 		    self->data);
275 	self->data = NULL;
276 	return (r);
277 }
278 
279 
280 int
281 archive_read_open2(struct archive *_a, void *client_data,
282     archive_open_callback *client_opener,
283     archive_read_callback *client_reader,
284     archive_skip_callback *client_skipper,
285     archive_close_callback *client_closer)
286 {
287 	struct archive_read *a = (struct archive_read *)_a;
288 	struct archive_read_filter *filter;
289 	int e;
290 
291 	__archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
292 	    "archive_read_open");
293 
294 	if (client_reader == NULL)
295 		__archive_errx(1,
296 		    "No reader function provided to archive_read_open");
297 
298 	/* Open data source. */
299 	if (client_opener != NULL) {
300 		e =(client_opener)(&a->archive, client_data);
301 		if (e != 0) {
302 			/* If the open failed, call the closer to clean up. */
303 			if (client_closer)
304 				(client_closer)(&a->archive, client_data);
305 			return (e);
306 		}
307 	}
308 
309 	/* Save the client functions and mock up the initial source. */
310 	a->client.reader = client_reader;
311 	a->client.skipper = client_skipper;
312 	a->client.closer = client_closer;
313 
314 	filter = calloc(1, sizeof(*filter));
315 	if (filter == NULL)
316 		return (ARCHIVE_FATAL);
317 	filter->bidder = NULL;
318 	filter->upstream = NULL;
319 	filter->archive = a;
320 	filter->data = client_data;
321 	filter->read = client_read_proxy;
322 	filter->skip = client_skip_proxy;
323 	filter->close = client_close_proxy;
324 	filter->name = "none";
325 	filter->code = ARCHIVE_COMPRESSION_NONE;
326 	a->filter = filter;
327 
328 	/* Build out the input pipeline. */
329 	e = build_stream(a);
330 	if (e == ARCHIVE_OK)
331 		a->archive.state = ARCHIVE_STATE_HEADER;
332 
333 	return (e);
334 }
335 
336 /*
337  * Allow each registered stream transform to bid on whether
338  * it wants to handle this stream.  Repeat until we've finished
339  * building the pipeline.
340  */
341 static int
342 build_stream(struct archive_read *a)
343 {
344 	int number_bidders, i, bid, best_bid;
345 	struct archive_read_filter_bidder *bidder, *best_bidder;
346 	struct archive_read_filter *filter;
347 	int r;
348 
349 	for (;;) {
350 		number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]);
351 
352 		best_bid = 0;
353 		best_bidder = NULL;
354 
355 		bidder = a->bidders;
356 		for (i = 0; i < number_bidders; i++, bidder++) {
357 			if (bidder->bid != NULL) {
358 				bid = (bidder->bid)(bidder, a->filter);
359 				if (bid > best_bid) {
360 					best_bid = bid;
361 					best_bidder = bidder;
362 				}
363 			}
364 		}
365 
366 		/* If no bidder, we're done. */
367 		if (best_bidder == NULL) {
368 			a->archive.compression_name = a->filter->name;
369 			a->archive.compression_code = a->filter->code;
370 			return (ARCHIVE_OK);
371 		}
372 
373 		filter
374 		    = (struct archive_read_filter *)calloc(1, sizeof(*filter));
375 		if (filter == NULL)
376 			return (ARCHIVE_FATAL);
377 		filter->bidder = best_bidder;
378 		filter->archive = a;
379 		filter->upstream = a->filter;
380 		r = (best_bidder->init)(filter);
381 		if (r != ARCHIVE_OK) {
382 			free(filter);
383 			return (r);
384 		}
385 		a->filter = filter;
386 	}
387 }
388 
389 /*
390  * Read header of next entry.
391  */
392 int
393 archive_read_next_header2(struct archive *_a, struct archive_entry *entry)
394 {
395 	struct archive_read *a = (struct archive_read *)_a;
396 	int slot, ret;
397 
398 	__archive_check_magic(_a, ARCHIVE_READ_MAGIC,
399 	    ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA,
400 	    "archive_read_next_header");
401 
402 	archive_entry_clear(entry);
403 	archive_clear_error(&a->archive);
404 
405 	/*
406 	 * If no format has yet been chosen, choose one.
407 	 */
408 	if (a->format == NULL) {
409 		slot = choose_format(a);
410 		if (slot < 0) {
411 			a->archive.state = ARCHIVE_STATE_FATAL;
412 			return (ARCHIVE_FATAL);
413 		}
414 		a->format = &(a->formats[slot]);
415 	}
416 
417 	/*
418 	 * If client didn't consume entire data, skip any remainder
419 	 * (This is especially important for GNU incremental directories.)
420 	 */
421 	if (a->archive.state == ARCHIVE_STATE_DATA) {
422 		ret = archive_read_data_skip(&a->archive);
423 		if (ret == ARCHIVE_EOF) {
424 			archive_set_error(&a->archive, EIO, "Premature end-of-file.");
425 			a->archive.state = ARCHIVE_STATE_FATAL;
426 			return (ARCHIVE_FATAL);
427 		}
428 		if (ret != ARCHIVE_OK)
429 			return (ret);
430 	}
431 
432 	/* Record start-of-header. */
433 	a->header_position = a->archive.file_position;
434 
435 	ret = (a->format->read_header)(a, entry);
436 
437 	/*
438 	 * EOF and FATAL are persistent at this layer.  By
439 	 * modifying the state, we guarantee that future calls to
440 	 * read a header or read data will fail.
441 	 */
442 	switch (ret) {
443 	case ARCHIVE_EOF:
444 		a->archive.state = ARCHIVE_STATE_EOF;
445 		break;
446 	case ARCHIVE_OK:
447 		a->archive.state = ARCHIVE_STATE_DATA;
448 		break;
449 	case ARCHIVE_WARN:
450 		a->archive.state = ARCHIVE_STATE_DATA;
451 		break;
452 	case ARCHIVE_RETRY:
453 		break;
454 	case ARCHIVE_FATAL:
455 		a->archive.state = ARCHIVE_STATE_FATAL;
456 		break;
457 	}
458 
459 	a->read_data_output_offset = 0;
460 	a->read_data_remaining = 0;
461 	return (ret);
462 }
463 
464 int
465 archive_read_next_header(struct archive *_a, struct archive_entry **entryp)
466 {
467 	int ret;
468 	struct archive_read *a = (struct archive_read *)_a;
469 	*entryp = NULL;
470 	ret = archive_read_next_header2(_a, a->entry);
471 	*entryp = a->entry;
472 	return ret;
473 }
474 
475 /*
476  * Allow each registered format to bid on whether it wants to handle
477  * the next entry.  Return index of winning bidder.
478  */
479 static int
480 choose_format(struct archive_read *a)
481 {
482 	int slots;
483 	int i;
484 	int bid, best_bid;
485 	int best_bid_slot;
486 
487 	slots = sizeof(a->formats) / sizeof(a->formats[0]);
488 	best_bid = -1;
489 	best_bid_slot = -1;
490 
491 	/* Set up a->format and a->pformat_data for convenience of bidders. */
492 	a->format = &(a->formats[0]);
493 	for (i = 0; i < slots; i++, a->format++) {
494 		if (a->format->bid) {
495 			bid = (a->format->bid)(a);
496 			if (bid == ARCHIVE_FATAL)
497 				return (ARCHIVE_FATAL);
498 			if ((bid > best_bid) || (best_bid_slot < 0)) {
499 				best_bid = bid;
500 				best_bid_slot = i;
501 			}
502 		}
503 	}
504 
505 	/*
506 	 * There were no bidders; this is a serious programmer error
507 	 * and demands a quick and definitive abort.
508 	 */
509 	if (best_bid_slot < 0)
510 		__archive_errx(1, "No formats were registered; you must "
511 		    "invoke at least one archive_read_support_format_XXX "
512 		    "function in order to successfully read an archive.");
513 
514 	/*
515 	 * There were bidders, but no non-zero bids; this means we
516 	 * can't support this stream.
517 	 */
518 	if (best_bid < 1) {
519 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
520 		    "Unrecognized archive format");
521 		return (ARCHIVE_FATAL);
522 	}
523 
524 	return (best_bid_slot);
525 }
526 
527 /*
528  * Return the file offset (within the uncompressed data stream) where
529  * the last header started.
530  */
531 int64_t
532 archive_read_header_position(struct archive *_a)
533 {
534 	struct archive_read *a = (struct archive_read *)_a;
535 	__archive_check_magic(_a, ARCHIVE_READ_MAGIC,
536 	    ARCHIVE_STATE_ANY, "archive_read_header_position");
537 	return (a->header_position);
538 }
539 
540 /*
541  * Read data from an archive entry, using a read(2)-style interface.
542  * This is a convenience routine that just calls
543  * archive_read_data_block and copies the results into the client
544  * buffer, filling any gaps with zero bytes.  Clients using this
545  * API can be completely ignorant of sparse-file issues; sparse files
546  * will simply be padded with nulls.
547  *
548  * DO NOT intermingle calls to this function and archive_read_data_block
549  * to read a single entry body.
550  */
551 ssize_t
552 archive_read_data(struct archive *_a, void *buff, size_t s)
553 {
554 	struct archive_read *a = (struct archive_read *)_a;
555 	char	*dest;
556 	const void *read_buf;
557 	size_t	 bytes_read;
558 	size_t	 len;
559 	int	 r;
560 
561 	bytes_read = 0;
562 	dest = (char *)buff;
563 
564 	while (s > 0) {
565 		if (a->read_data_remaining == 0) {
566 			read_buf = a->read_data_block;
567 			r = archive_read_data_block(&a->archive, &read_buf,
568 			    &a->read_data_remaining, &a->read_data_offset);
569 			a->read_data_block = read_buf;
570 			if (r == ARCHIVE_EOF)
571 				return (bytes_read);
572 			/*
573 			 * Error codes are all negative, so the status
574 			 * return here cannot be confused with a valid
575 			 * byte count.  (ARCHIVE_OK is zero.)
576 			 */
577 			if (r < ARCHIVE_OK)
578 				return (r);
579 		}
580 
581 		if (a->read_data_offset < a->read_data_output_offset) {
582 			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
583 			    "Encountered out-of-order sparse blocks");
584 			return (ARCHIVE_RETRY);
585 		}
586 
587 		/* Compute the amount of zero padding needed. */
588 		if (a->read_data_output_offset + (off_t)s <
589 		    a->read_data_offset) {
590 			len = s;
591 		} else if (a->read_data_output_offset <
592 		    a->read_data_offset) {
593 			len = a->read_data_offset -
594 			    a->read_data_output_offset;
595 		} else
596 			len = 0;
597 
598 		/* Add zeroes. */
599 		memset(dest, 0, len);
600 		s -= len;
601 		a->read_data_output_offset += len;
602 		dest += len;
603 		bytes_read += len;
604 
605 		/* Copy data if there is any space left. */
606 		if (s > 0) {
607 			len = a->read_data_remaining;
608 			if (len > s)
609 				len = s;
610 			memcpy(dest, a->read_data_block, len);
611 			s -= len;
612 			a->read_data_block += len;
613 			a->read_data_remaining -= len;
614 			a->read_data_output_offset += len;
615 			a->read_data_offset += len;
616 			dest += len;
617 			bytes_read += len;
618 		}
619 	}
620 	return (bytes_read);
621 }
622 
623 #if ARCHIVE_API_VERSION < 3
624 /*
625  * Obsolete function provided for compatibility only.  Note that the API
626  * of this function doesn't allow the caller to detect if the remaining
627  * data from the archive entry is shorter than the buffer provided, or
628  * even if an error occurred while reading data.
629  */
630 int
631 archive_read_data_into_buffer(struct archive *a, void *d, ssize_t len)
632 {
633 
634 	archive_read_data(a, d, len);
635 	return (ARCHIVE_OK);
636 }
637 #endif
638 
639 /*
640  * Skip over all remaining data in this entry.
641  */
642 int
643 archive_read_data_skip(struct archive *_a)
644 {
645 	struct archive_read *a = (struct archive_read *)_a;
646 	int r;
647 	const void *buff;
648 	size_t size;
649 	off_t offset;
650 
651 	__archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA,
652 	    "archive_read_data_skip");
653 
654 	if (a->format->read_data_skip != NULL)
655 		r = (a->format->read_data_skip)(a);
656 	else {
657 		while ((r = archive_read_data_block(&a->archive,
658 			    &buff, &size, &offset))
659 		    == ARCHIVE_OK)
660 			;
661 	}
662 
663 	if (r == ARCHIVE_EOF)
664 		r = ARCHIVE_OK;
665 
666 	a->archive.state = ARCHIVE_STATE_HEADER;
667 	return (r);
668 }
669 
670 /*
671  * Read the next block of entry data from the archive.
672  * This is a zero-copy interface; the client receives a pointer,
673  * size, and file offset of the next available block of data.
674  *
675  * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if
676  * the end of entry is encountered.
677  */
678 int
679 archive_read_data_block(struct archive *_a,
680     const void **buff, size_t *size, off_t *offset)
681 {
682 	struct archive_read *a = (struct archive_read *)_a;
683 	__archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA,
684 	    "archive_read_data_block");
685 
686 	if (a->format->read_data == NULL) {
687 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
688 		    "Internal error: "
689 		    "No format_read_data_block function registered");
690 		return (ARCHIVE_FATAL);
691 	}
692 
693 	return (a->format->read_data)(a, buff, size, offset);
694 }
695 
696 /*
697  * Close the file and release most resources.
698  *
699  * Be careful: client might just call read_new and then read_finish.
700  * Don't assume we actually read anything or performed any non-trivial
701  * initialization.
702  */
703 static int
704 _archive_read_close(struct archive *_a)
705 {
706 	struct archive_read *a = (struct archive_read *)_a;
707 	int r = ARCHIVE_OK, r1 = ARCHIVE_OK;
708 	size_t i, n;
709 
710 	__archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC,
711 	    ARCHIVE_STATE_ANY, "archive_read_close");
712 	archive_clear_error(&a->archive);
713 	a->archive.state = ARCHIVE_STATE_CLOSED;
714 
715 
716 	/* Call cleanup functions registered by optional components. */
717 	if (a->cleanup_archive_extract != NULL)
718 		r = (a->cleanup_archive_extract)(a);
719 
720 	/* TODO: Clean up the formatters. */
721 
722 	/* Clean up the filter pipeline. */
723 	while (a->filter != NULL) {
724 		struct archive_read_filter *t = a->filter->upstream;
725 		if (a->filter->close != NULL) {
726 			r1 = (a->filter->close)(a->filter);
727 			if (r1 < r)
728 				r = r1;
729 		}
730 		free(a->filter->buffer);
731 		free(a->filter);
732 		a->filter = t;
733 	}
734 
735 	/* Release the bidder objects. */
736 	n = sizeof(a->bidders)/sizeof(a->bidders[0]);
737 	for (i = 0; i < n; i++) {
738 		if (a->bidders[i].free != NULL) {
739 			r1 = (a->bidders[i].free)(&a->bidders[i]);
740 			if (r1 < r)
741 				r = r1;
742 		}
743 	}
744 
745 	return (r);
746 }
747 
748 /*
749  * Release memory and other resources.
750  */
751 int
752 _archive_read_finish(struct archive *_a)
753 {
754 	struct archive_read *a = (struct archive_read *)_a;
755 	int i;
756 	int slots;
757 	int r = ARCHIVE_OK;
758 
759 	__archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY,
760 	    "archive_read_finish");
761 	if (a->archive.state != ARCHIVE_STATE_CLOSED)
762 		r = archive_read_close(&a->archive);
763 
764 	/* Cleanup format-specific data. */
765 	slots = sizeof(a->formats) / sizeof(a->formats[0]);
766 	for (i = 0; i < slots; i++) {
767 		a->format = &(a->formats[i]);
768 		if (a->formats[i].cleanup)
769 			(a->formats[i].cleanup)(a);
770 	}
771 
772 	archive_string_free(&a->archive.error_string);
773 	if (a->entry)
774 		archive_entry_free(a->entry);
775 	a->archive.magic = 0;
776 	free(a);
777 #if ARCHIVE_API_VERSION > 1
778 	return (r);
779 #endif
780 }
781 
782 /*
783  * Used internally by read format handlers to register their bid and
784  * initialization functions.
785  */
786 int
787 __archive_read_register_format(struct archive_read *a,
788     void *format_data,
789     const char *name,
790     int (*bid)(struct archive_read *),
791     int (*options)(struct archive_read *, const char *, const char *),
792     int (*read_header)(struct archive_read *, struct archive_entry *),
793     int (*read_data)(struct archive_read *, const void **, size_t *, off_t *),
794     int (*read_data_skip)(struct archive_read *),
795     int (*cleanup)(struct archive_read *))
796 {
797 	int i, number_slots;
798 
799 	__archive_check_magic(&a->archive,
800 	    ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
801 	    "__archive_read_register_format");
802 
803 	number_slots = sizeof(a->formats) / sizeof(a->formats[0]);
804 
805 	for (i = 0; i < number_slots; i++) {
806 		if (a->formats[i].bid == bid)
807 			return (ARCHIVE_WARN); /* We've already installed */
808 		if (a->formats[i].bid == NULL) {
809 			a->formats[i].bid = bid;
810 			a->formats[i].options = options;
811 			a->formats[i].read_header = read_header;
812 			a->formats[i].read_data = read_data;
813 			a->formats[i].read_data_skip = read_data_skip;
814 			a->formats[i].cleanup = cleanup;
815 			a->formats[i].data = format_data;
816 			a->formats[i].name = name;
817 			return (ARCHIVE_OK);
818 		}
819 	}
820 
821 	__archive_errx(1, "Not enough slots for format registration");
822 	return (ARCHIVE_FATAL); /* Never actually called. */
823 }
824 
825 /*
826  * Used internally by decompression routines to register their bid and
827  * initialization functions.
828  */
829 struct archive_read_filter_bidder *
830 __archive_read_get_bidder(struct archive_read *a)
831 {
832 	int i, number_slots;
833 
834 	__archive_check_magic(&a->archive,
835 	    ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
836 	    "__archive_read_get_bidder");
837 
838 	number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]);
839 
840 	for (i = 0; i < number_slots; i++) {
841 		if (a->bidders[i].bid == NULL) {
842 			memset(a->bidders + i, 0, sizeof(a->bidders[0]));
843 			return (a->bidders + i);
844 		}
845 	}
846 
847 	__archive_errx(1, "Not enough slots for compression registration");
848 	return (NULL); /* Never actually executed. */
849 }
850 
851 /*
852  * The next three functions comprise the peek/consume internal I/O
853  * system used by archive format readers.  This system allows fairly
854  * flexible read-ahead and allows the I/O code to operate in a
855  * zero-copy manner most of the time.
856  *
857  * In the ideal case, filters generate blocks of data
858  * and __archive_read_ahead() just returns pointers directly into
859  * those blocks.  Then __archive_read_consume() just bumps those
860  * pointers.  Only if your request would span blocks does the I/O
861  * layer use a copy buffer to provide you with a contiguous block of
862  * data.  The __archive_read_skip() is an optimization; it scans ahead
863  * very quickly (it usually translates into a seek() operation if
864  * you're reading uncompressed disk files).
865  *
866  * A couple of useful idioms:
867  *  * "I just want some data."  Ask for 1 byte and pay attention to
868  *    the "number of bytes available" from __archive_read_ahead().
869  *    You can consume more than you asked for; you just can't consume
870  *    more than is available.  If you consume everything that's
871  *    immediately available, the next read_ahead() call will pull
872  *    the next block.
873  *  * "I want to output a large block of data."  As above, ask for 1 byte,
874  *    emit all that's available (up to whatever limit you have), then
875  *    repeat until you're done.
876  *  * "I want to peek ahead by a large amount."  Ask for 4k or so, then
877  *    double and repeat until you get an error or have enough.  Note
878  *    that the I/O layer will likely end up expanding its copy buffer
879  *    to fit your request, so use this technique cautiously.  This
880  *    technique is used, for example, by some of the format tasting
881  *    code that has uncertain look-ahead needs.
882  *
883  * TODO: Someday, provide a more generic __archive_read_seek() for
884  * those cases where it's useful.  This is tricky because there are lots
885  * of cases where seek() is not available (reading gzip data from a
886  * network socket, for instance), so there needs to be a good way to
887  * communicate whether seek() is available and users of that interface
888  * need to use non-seeking strategies whenever seek() is not available.
889  */
890 
891 /*
892  * Looks ahead in the input stream:
893  *  * If 'avail' pointer is provided, that returns number of bytes available
894  *    in the current buffer, which may be much larger than requested.
895  *  * If end-of-file, *avail gets set to zero.
896  *  * If error, *avail gets error code.
897  *  * If request can be met, returns pointer to data, returns NULL
898  *    if request is not met.
899  *
900  * Note: If you just want "some data", ask for 1 byte and pay attention
901  * to *avail, which will have the actual amount available.  If you
902  * know exactly how many bytes you need, just ask for that and treat
903  * a NULL return as an error.
904  *
905  * Important:  This does NOT move the file pointer.  See
906  * __archive_read_consume() below.
907  */
908 
909 /*
910  * This is tricky.  We need to provide our clients with pointers to
911  * contiguous blocks of memory but we want to avoid copying whenever
912  * possible.
913  *
914  * Mostly, this code returns pointers directly into the block of data
915  * provided by the client_read routine.  It can do this unless the
916  * request would split across blocks.  In that case, we have to copy
917  * into an internal buffer to combine reads.
918  */
919 const void *
920 __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail)
921 {
922 	return (__archive_read_filter_ahead(a->filter, min, avail));
923 }
924 
925 const void *
926 __archive_read_filter_ahead(struct archive_read_filter *filter,
927     size_t min, ssize_t *avail)
928 {
929 	ssize_t bytes_read;
930 	size_t tocopy;
931 
932 	if (filter->fatal) {
933 		if (avail)
934 			*avail = ARCHIVE_FATAL;
935 		return (NULL);
936 	}
937 
938 	/*
939 	 * Keep pulling more data until we can satisfy the request.
940 	 */
941 	for (;;) {
942 
943 		/*
944 		 * If we can satisfy from the copy buffer, we're done.
945 		 */
946 		if (filter->avail >= min) {
947 			if (avail != NULL)
948 				*avail = filter->avail;
949 			return (filter->next);
950 		}
951 
952 		/*
953 		 * We can satisfy directly from client buffer if everything
954 		 * currently in the copy buffer is still in the client buffer.
955 		 */
956 		if (filter->client_total >= filter->client_avail + filter->avail
957 		    && filter->client_avail + filter->avail >= min) {
958 			/* "Roll back" to client buffer. */
959 			filter->client_avail += filter->avail;
960 			filter->client_next -= filter->avail;
961 			/* Copy buffer is now empty. */
962 			filter->avail = 0;
963 			filter->next = filter->buffer;
964 			/* Return data from client buffer. */
965 			if (avail != NULL)
966 				*avail = filter->client_avail;
967 			return (filter->client_next);
968 		}
969 
970 		/* Move data forward in copy buffer if necessary. */
971 		if (filter->next > filter->buffer &&
972 		    filter->next + min > filter->buffer + filter->buffer_size) {
973 			if (filter->avail > 0)
974 				memmove(filter->buffer, filter->next, filter->avail);
975 			filter->next = filter->buffer;
976 		}
977 
978 		/* If we've used up the client data, get more. */
979 		if (filter->client_avail <= 0) {
980 			if (filter->end_of_file) {
981 				if (avail != NULL)
982 					*avail = 0;
983 				return (NULL);
984 			}
985 			bytes_read = (filter->read)(filter,
986 			    &filter->client_buff);
987 			if (bytes_read < 0) {		/* Read error. */
988 				filter->client_total = filter->client_avail = 0;
989 				filter->client_next = filter->client_buff = NULL;
990 				filter->fatal = 1;
991 				if (avail != NULL)
992 					*avail = ARCHIVE_FATAL;
993 				return (NULL);
994 			}
995 			if (bytes_read == 0) {	/* Premature end-of-file. */
996 				filter->client_total = filter->client_avail = 0;
997 				filter->client_next = filter->client_buff = NULL;
998 				filter->end_of_file = 1;
999 				/* Return whatever we do have. */
1000 				if (avail != NULL)
1001 					*avail = filter->avail;
1002 				return (NULL);
1003 			}
1004 			filter->position += bytes_read;
1005 			filter->client_total = bytes_read;
1006 			filter->client_avail = filter->client_total;
1007 			filter->client_next = filter->client_buff;
1008 		}
1009 		else
1010 		{
1011 			/*
1012 			 * We can't satisfy the request from the copy
1013 			 * buffer or the existing client data, so we
1014 			 * need to copy more client data over to the
1015 			 * copy buffer.
1016 			 */
1017 
1018 			/* Ensure the buffer is big enough. */
1019 			if (min > filter->buffer_size) {
1020 				size_t s, t;
1021 				char *p;
1022 
1023 				/* Double the buffer; watch for overflow. */
1024 				s = t = filter->buffer_size;
1025 				if (s == 0)
1026 					s = min;
1027 				while (s < min) {
1028 					t *= 2;
1029 					if (t <= s) { /* Integer overflow! */
1030 						archive_set_error(
1031 							&filter->archive->archive,
1032 							ENOMEM,
1033 						    "Unable to allocate copy buffer");
1034 						filter->fatal = 1;
1035 						if (avail != NULL)
1036 							*avail = ARCHIVE_FATAL;
1037 						return (NULL);
1038 					}
1039 					s = t;
1040 				}
1041 				/* Now s >= min, so allocate a new buffer. */
1042 				p = (char *)malloc(s);
1043 				if (p == NULL) {
1044 					archive_set_error(
1045 						&filter->archive->archive,
1046 						ENOMEM,
1047 					    "Unable to allocate copy buffer");
1048 					filter->fatal = 1;
1049 					if (avail != NULL)
1050 						*avail = ARCHIVE_FATAL;
1051 					return (NULL);
1052 				}
1053 				/* Move data into newly-enlarged buffer. */
1054 				if (filter->avail > 0)
1055 					memmove(p, filter->next, filter->avail);
1056 				free(filter->buffer);
1057 				filter->next = filter->buffer = p;
1058 				filter->buffer_size = s;
1059 			}
1060 
1061 			/* We can add client data to copy buffer. */
1062 			/* First estimate: copy to fill rest of buffer. */
1063 			tocopy = (filter->buffer + filter->buffer_size)
1064 			    - (filter->next + filter->avail);
1065 			/* Don't waste time buffering more than we need to. */
1066 			if (tocopy + filter->avail > min)
1067 				tocopy = min - filter->avail;
1068 			/* Don't copy more than is available. */
1069 			if (tocopy > filter->client_avail)
1070 				tocopy = filter->client_avail;
1071 
1072 			memcpy(filter->next + filter->avail, filter->client_next,
1073 			    tocopy);
1074 			/* Remove this data from client buffer. */
1075 			filter->client_next += tocopy;
1076 			filter->client_avail -= tocopy;
1077 			/* add it to copy buffer. */
1078 			filter->avail += tocopy;
1079 		}
1080 	}
1081 }
1082 
1083 /*
1084  * Move the file pointer forward.  This should be called after
1085  * __archive_read_ahead() returns data to you.  Don't try to move
1086  * ahead by more than the amount of data available according to
1087  * __archive_read_ahead().
1088  */
1089 /*
1090  * Mark the appropriate data as used.  Note that the request here will
1091  * often be much smaller than the size of the previous read_ahead
1092  * request.
1093  */
1094 ssize_t
1095 __archive_read_consume(struct archive_read *a, size_t request)
1096 {
1097 	ssize_t r;
1098 	r = __archive_read_filter_consume(a->filter, request);
1099 	a->archive.file_position += r;
1100 	return (r);
1101 }
1102 
1103 ssize_t
1104 __archive_read_filter_consume(struct archive_read_filter * filter,
1105     size_t request)
1106 {
1107 	if (filter->avail > 0) {
1108 		/* Read came from copy buffer. */
1109 		filter->next += request;
1110 		filter->avail -= request;
1111 	} else {
1112 		/* Read came from client buffer. */
1113 		filter->client_next += request;
1114 		filter->client_avail -= request;
1115 	}
1116 	return (request);
1117 }
1118 
1119 /*
1120  * Move the file pointer ahead by an arbitrary amount.  If you're
1121  * reading uncompressed data from a disk file, this will actually
1122  * translate into a seek() operation.  Even in cases where seek()
1123  * isn't feasible, this at least pushes the read-and-discard loop
1124  * down closer to the data source.
1125  */
1126 int64_t
1127 __archive_read_skip(struct archive_read *a, int64_t request)
1128 {
1129 	return (__archive_read_filter_skip(a->filter, request));
1130 }
1131 
1132 int64_t
1133 __archive_read_filter_skip(struct archive_read_filter *filter, int64_t request)
1134 {
1135 	int64_t bytes_skipped, total_bytes_skipped = 0;
1136 	size_t min;
1137 
1138 	if (filter->fatal)
1139 		return (-1);
1140 	/*
1141 	 * If there is data in the buffers already, use that first.
1142 	 */
1143 	if (filter->avail > 0) {
1144 		min = minimum(request, (off_t)filter->avail);
1145 		bytes_skipped = __archive_read_consume(filter->archive, min);
1146 		request -= bytes_skipped;
1147 		total_bytes_skipped += bytes_skipped;
1148 	}
1149 	if (filter->client_avail > 0) {
1150 		min = minimum(request, (int64_t)filter->client_avail);
1151 		bytes_skipped = __archive_read_consume(filter->archive, min);
1152 		request -= bytes_skipped;
1153 		total_bytes_skipped += bytes_skipped;
1154 	}
1155 	if (request == 0)
1156 		return (total_bytes_skipped);
1157 	/*
1158 	 * If a client_skipper was provided, try that first.
1159 	 */
1160 #if ARCHIVE_API_VERSION < 2
1161 	if ((filter->skip != NULL) && (request < SSIZE_MAX)) {
1162 #else
1163 	if (filter->skip != NULL) {
1164 #endif
1165 		bytes_skipped = (filter->skip)(filter, request);
1166 		if (bytes_skipped < 0) {	/* error */
1167 			filter->client_total = filter->client_avail = 0;
1168 			filter->client_next = filter->client_buff = NULL;
1169 			filter->fatal = 1;
1170 			return (bytes_skipped);
1171 		}
1172 		filter->archive->archive.file_position += bytes_skipped;
1173 		total_bytes_skipped += bytes_skipped;
1174 		request -= bytes_skipped;
1175 		filter->client_next = filter->client_buff;
1176 		filter->client_avail = filter->client_total = 0;
1177 	}
1178 	/*
1179 	 * Note that client_skipper will usually not satisfy the
1180 	 * full request (due to low-level blocking concerns),
1181 	 * so even if client_skipper is provided, we may still
1182 	 * have to use ordinary reads to finish out the request.
1183 	 */
1184 	while (request > 0) {
1185 		const void* dummy_buffer;
1186 		ssize_t bytes_read;
1187 		dummy_buffer = __archive_read_ahead(filter->archive,
1188 		    1, &bytes_read);
1189 		if (bytes_read < 0)
1190 			return (bytes_read);
1191 		if (bytes_read == 0) {
1192 			/* We hit EOF before we satisfied the skip request. */
1193 			archive_set_error(&filter->archive->archive,
1194 			    ARCHIVE_ERRNO_MISC,
1195 			    "Truncated input file (need to skip %jd bytes)",
1196 			    (intmax_t)request);
1197 			return (ARCHIVE_FATAL);
1198 		}
1199 		min = (size_t)(minimum(bytes_read, request));
1200 		bytes_read = __archive_read_consume(filter->archive, min);
1201 		total_bytes_skipped += bytes_read;
1202 		request -= bytes_read;
1203 	}
1204 	return (total_bytes_skipped);
1205 }
1206