1.\" Copyright (c) 2003-2007 Tim Kientzle
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD: src/lib/libarchive/archive_read.3,v 1.37 2008/05/26 17:00:22 kientzle Exp $
26.\"
27.Dd August 19, 2006
28.Dt archive_read 3
29.Os
30.Sh NAME
31.Nm archive_read_new ,
32.Nm archive_read_set_filter_options ,
33.Nm archive_read_set_format_options ,
34.Nm archive_read_set_options ,
35.Nm archive_read_support_compression_all ,
36.Nm archive_read_support_compression_bzip2 ,
37.Nm archive_read_support_compression_compress ,
38.Nm archive_read_support_compression_gzip ,
39.Nm archive_read_support_compression_none ,
40.Nm archive_read_support_compression_program ,
41.Nm archive_read_support_format_all ,
42.Nm archive_read_support_format_cpio ,
43.Nm archive_read_support_format_empty ,
44.Nm archive_read_support_format_iso9660 ,
45.Nm archive_read_support_format_tar ,
46.Nm archive_read_support_format_zip ,
47.Nm archive_read_open ,
48.Nm archive_read_open2 ,
49.Nm archive_read_open_fd ,
50.Nm archive_read_open_FILE ,
51.Nm archive_read_open_filename ,
52.Nm archive_read_open_memory ,
53.Nm archive_read_next_header ,
54.Nm archive_read_next_header2 ,
55.Nm archive_read_data ,
56.Nm archive_read_data_block ,
57.Nm archive_read_data_skip ,
58.\" #if ARCHIVE_API_VERSION < 3
59.Nm archive_read_data_into_buffer ,
60.\" #endif
61.Nm archive_read_data_into_fd ,
62.Nm archive_read_extract ,
63.Nm archive_read_extract2 ,
64.Nm archive_read_extract_set_progress_callback ,
65.Nm archive_read_close ,
66.Nm archive_read_finish
67.Nd functions for reading streaming archives
68.Sh SYNOPSIS
69.In archive.h
70.Ft struct archive *
71.Fn archive_read_new "void"
72.Ft int
73.Fn archive_read_support_compression_all "struct archive *"
74.Ft int
75.Fn archive_read_support_compression_bzip2 "struct archive *"
76.Ft int
77.Fn archive_read_support_compression_compress "struct archive *"
78.Ft int
79.Fn archive_read_support_compression_gzip "struct archive *"
80.Ft int
81.Fn archive_read_support_compression_none "struct archive *"
82.Ft int
83.Fo archive_read_support_compression_program
84.Fa "struct archive *"
85.Fa "const char *cmd"
86.Fc
87.Ft int
88.Fn archive_read_support_format_all "struct archive *"
89.Ft int
90.Fn archive_read_support_format_cpio "struct archive *"
91.Ft int
92.Fn archive_read_support_format_empty "struct archive *"
93.Ft int
94.Fn archive_read_support_format_iso9660 "struct archive *"
95.Ft int
96.Fn archive_read_support_format_tar "struct archive *"
97.Ft int
98.Fn archive_read_support_format_zip "struct archive *"
99.Ft int
100.Fn archive_read_set_filter_options "struct archive *" "const char *"
101.Ft int
102.Fn archive_read_set_format_options "struct archive *" "const char *"
103.Ft int
104.Fn archive_read_set_options "struct archive *" "const char *"
105.Ft int
106.Fo archive_read_open
107.Fa "struct archive *"
108.Fa "void *client_data"
109.Fa "archive_open_callback *"
110.Fa "archive_read_callback *"
111.Fa "archive_close_callback *"
112.Fc
113.Ft int
114.Fo archive_read_open2
115.Fa "struct archive *"
116.Fa "void *client_data"
117.Fa "archive_open_callback *"
118.Fa "archive_read_callback *"
119.Fa "archive_skip_callback *"
120.Fa "archive_close_callback *"
121.Fc
122.Ft int
123.Fn archive_read_open_FILE "struct archive *" "FILE *file"
124.Ft int
125.Fn archive_read_open_fd "struct archive *" "int fd" "size_t block_size"
126.Ft int
127.Fo archive_read_open_filename
128.Fa "struct archive *"
129.Fa "const char *filename"
130.Fa "size_t block_size"
131.Fc
132.Ft int
133.Fn archive_read_open_memory "struct archive *" "void *buff" "size_t size"
134.Ft int
135.Fn archive_read_next_header "struct archive *" "struct archive_entry **"
136.Ft int
137.Fn archive_read_next_header2 "struct archive *" "struct archive_entry *"
138.Ft ssize_t
139.Fn archive_read_data "struct archive *" "void *buff" "size_t len"
140.Ft int
141.Fo archive_read_data_block
142.Fa "struct archive *"
143.Fa "const void **buff"
144.Fa "size_t *len"
145.Fa "off_t *offset"
146.Fc
147.Ft int
148.Fn archive_read_data_skip "struct archive *"
149.\" #if ARCHIVE_API_VERSION < 3
150.Ft int
151.Fn archive_read_data_into_buffer "struct archive *" "void *" "ssize_t len"
152.\" #endif
153.Ft int
154.Fn archive_read_data_into_fd "struct archive *" "int fd"
155.Ft int
156.Fo archive_read_extract
157.Fa "struct archive *"
158.Fa "struct archive_entry *"
159.Fa "int flags"
160.Fc
161.Ft int
162.Fo archive_read_extract2
163.Fa "struct archive *src"
164.Fa "struct archive_entry *"
165.Fa "struct archive *dest"
166.Fc
167.Ft void
168.Fo archive_read_extract_set_progress_callback
169.Fa "struct archive *"
170.Fa "void (*func)(void *)"
171.Fa "void *user_data"
172.Fc
173.Ft int
174.Fn archive_read_close "struct archive *"
175.Ft int
176.Fn archive_read_finish "struct archive *"
177.Sh DESCRIPTION
178These functions provide a complete API for reading streaming archives.
179The general process is to first create the
180.Tn struct archive
181object, set options, initialize the reader, iterate over the archive
182headers and associated data, then close the archive and release all
183resources.
184The following summary describes the functions in approximately the
185order they would be used:
186.Bl -tag -compact -width indent
187.It Fn archive_read_new
188Allocates and initializes a
189.Tn struct archive
190object suitable for reading from an archive.
191.It Xo
192.Fn archive_read_support_compression_all ,
193.Fn archive_read_support_compression_bzip2 ,
194.Fn archive_read_support_compression_compress ,
195.Fn archive_read_support_compression_gzip ,
196.Fn archive_read_support_compression_none
197.Xc
198Enables auto-detection code and decompression support for the
199specified compression.
200Note that
201.Dq none
202is always enabled by default.
203For convenience,
204.Fn archive_read_support_compression_all
205enables all available decompression code.
206.It Fn archive_read_support_compression_program
207Data is fed through the specified external program before being dearchived.
208Note that this disables automatic detection of the compression format,
209so it makes no sense to specify this in conjunction with any other
210decompression option.
211.It Xo
212.Fn archive_read_support_format_all ,
213.Fn archive_read_support_format_cpio ,
214.Fn archive_read_support_format_empty ,
215.Fn archive_read_support_format_iso9660 ,
216.Fn archive_read_support_format_tar ,
217.Fn archive_read_support_format_zip
218.Xc
219Enables support---including auto-detection code---for the
220specified archive format.
221For example,
222.Fn archive_read_support_format_tar
223enables support for a variety of standard tar formats, old-style tar,
224ustar, pax interchange format, and many common variants.
225For convenience,
226.Fn archive_read_support_format_all
227enables support for all available formats.
228Only empty archives are supported by default.
229.It Xo
230.Fn archive_read_set_filter_options ,
231.Fn archive_read_set_format_options ,
232.Fn archive_read_set_options
233.Xc
234Specifies options that will be passed to currently-registered
235filters (including decompression filters) and/or format readers.
236The argument is a comma-separated list of individual options.
237Individual options have one of the following forms:
238.Bl -tag -compact -width indent
239.It Ar option=value
240The option/value pair will be provided to every module.
241Modules that do not accept an option with this name will ignore it.
242.It Ar option
243The option will be provided to every module with a value of
244.Dq 1 .
245.It Ar !option
246The option will be provided to every module with a NULL value.
247.It Ar module:option=value , Ar module:option , Ar module:!option
248As above, but the corresponding option and value will be provided
249only to modules whose name matches
250.Ar module .
251.El
252The return value will be
253.Cm ARCHIVE_OK
254if any module accepts the option, or
255.Cm ARCHIVE_WARN
256if no module accepted the option, or
257.Cm ARCHIVE_FATAL
258if there was a fatal error while attempting to process the option.
259.Pp
260The currently supported options are:
261.Bl -tag -compact -width indent
262.It Format iso9660
263.Bl -tag -compact -width indent
264.It Cm joliet
265Support Joliet extensions.
266Defaults to enabled, use
267.Cm !joliet
268to disable.
269.El
270.El
271.It Fn archive_read_open
272The same as
273.Fn archive_read_open2 ,
274except that the skip callback is assumed to be
275.Dv NULL .
276.It Fn archive_read_open2
277Freeze the settings, open the archive, and prepare for reading entries.
278This is the most generic version of this call, which accepts
279four callback functions.
280Most clients will want to use
281.Fn archive_read_open_filename ,
282.Fn archive_read_open_FILE ,
283.Fn archive_read_open_fd ,
284or
285.Fn archive_read_open_memory
286instead.
287The library invokes the client-provided functions to obtain
288raw bytes from the archive.
289.It Fn archive_read_open_FILE
290Like
291.Fn archive_read_open ,
292except that it accepts a
293.Ft "FILE *"
294pointer.
295This function should not be used with tape drives or other devices
296that require strict I/O blocking.
297.It Fn archive_read_open_fd
298Like
299.Fn archive_read_open ,
300except that it accepts a file descriptor and block size rather than
301a set of function pointers.
302Note that the file descriptor will not be automatically closed at
303end-of-archive.
304This function is safe for use with tape drives or other blocked devices.
305.It Fn archive_read_open_file
306This is a deprecated synonym for
307.Fn archive_read_open_filename .
308.It Fn archive_read_open_filename
309Like
310.Fn archive_read_open ,
311except that it accepts a simple filename and a block size.
312A NULL filename represents standard input.
313This function is safe for use with tape drives or other blocked devices.
314.It Fn archive_read_open_memory
315Like
316.Fn archive_read_open ,
317except that it accepts a pointer and size of a block of
318memory containing the archive data.
319.It Fn archive_read_next_header
320Read the header for the next entry and return a pointer to
321a
322.Tn struct archive_entry .
323This is a convenience wrapper around
324.Fn archive_read_next_header2
325that uses an internal
326.Tn struct archive_entry
327object.
328.It Fn archive_read_next_header2
329Read the header for the next entry and populate the provided
330.Tn struct archive_entry .
331.It Fn archive_read_data
332Read data associated with the header just read.
333Internally, this is a convenience function that calls
334.Fn archive_read_data_block
335and fills any gaps with nulls so that callers see a single
336continuous stream of data.
337.It Fn archive_read_data_block
338Return the next available block of data for this entry.
339Unlike
340.Fn archive_read_data ,
341the
342.Fn archive_read_data_block
343function avoids copying data and allows you to correctly handle
344sparse files, as supported by some archive formats.
345The library guarantees that offsets will increase and that blocks
346will not overlap.
347Note that the blocks returned from this function can be much larger
348than the block size read from disk, due to compression
349and internal buffer optimizations.
350.It Fn archive_read_data_skip
351A convenience function that repeatedly calls
352.Fn archive_read_data_block
353to skip all of the data for this archive entry.
354.\" #if ARCHIVE_API_VERSION < 3
355.It Fn archive_read_data_into_buffer
356This function is deprecated and will be removed.
357Use
358.Fn archive_read_data
359instead.
360.\" #endif
361.It Fn archive_read_data_into_fd
362A convenience function that repeatedly calls
363.Fn archive_read_data_block
364to copy the entire entry to the provided file descriptor.
365.It Fn archive_read_extract , Fn archive_read_extract_set_skip_file
366A convenience function that wraps the corresponding
367.Xr archive_write_disk 3
368interfaces.
369The first call to
370.Fn archive_read_extract
371creates a restore object using
372.Xr archive_write_disk_new 3
373and
374.Xr archive_write_disk_set_standard_lookup 3 ,
375then transparently invokes
376.Xr archive_write_disk_set_options 3 ,
377.Xr archive_write_header 3 ,
378.Xr archive_write_data 3 ,
379and
380.Xr archive_write_finish_entry 3
381to create the entry on disk and copy data into it.
382The
383.Va flags
384argument is passed unmodified to
385.Xr archive_write_disk_set_options 3 .
386.It Fn archive_read_extract2
387This is another version of
388.Fn archive_read_extract
389that allows you to provide your own restore object.
390In particular, this allows you to override the standard lookup functions
391using
392.Xr archive_write_disk_set_group_lookup 3 ,
393and
394.Xr archive_write_disk_set_user_lookup 3 .
395Note that
396.Fn archive_read_extract2
397does not accept a
398.Va flags
399argument; you should use
400.Fn archive_write_disk_set_options
401to set the restore options yourself.
402.It Fn archive_read_extract_set_progress_callback
403Sets a pointer to a user-defined callback that can be used
404for updating progress displays during extraction.
405The progress function will be invoked during the extraction of large
406regular files.
407The progress function will be invoked with the pointer provided to this call.
408Generally, the data pointed to should include a reference to the archive
409object and the archive_entry object so that various statistics
410can be retrieved for the progress display.
411.It Fn archive_read_close
412Complete the archive and invoke the close callback.
413.It Fn archive_read_finish
414Invokes
415.Fn archive_read_close
416if it was not invoked manually, then release all resources.
417Note: In libarchive 1.x, this function was declared to return
418.Ft void ,
419which made it impossible to detect certain errors when
420.Fn archive_read_close
421was invoked implicitly from this function.
422The declaration is corrected beginning with libarchive 2.0.
423.El
424.Pp
425Note that the library determines most of the relevant information about
426the archive by inspection.
427In particular, it automatically detects
428.Xr gzip 1
429or
430.Xr bzip2 1
431compression and transparently performs the appropriate decompression.
432It also automatically detects the archive format.
433.Pp
434A complete description of the
435.Tn struct archive
436and
437.Tn struct archive_entry
438objects can be found in the overview manual page for
439.Xr libarchive 3 .
440.Sh CLIENT CALLBACKS
441The callback functions must match the following prototypes:
442.Bl -item -offset indent
443.It
444.Ft typedef ssize_t
445.Fo archive_read_callback
446.Fa "struct archive *"
447.Fa "void *client_data"
448.Fa "const void **buffer"
449.Fc
450.It
451.\" #if ARCHIVE_API_VERSION < 2
452.Ft typedef int
453.Fo archive_skip_callback
454.Fa "struct archive *"
455.Fa "void *client_data"
456.Fa "size_t request"
457.Fc
458.\" #else
459.\" .Ft typedef off_t
460.\" .Fo archive_skip_callback
461.\" .Fa "struct archive *"
462.\" .Fa "void *client_data"
463.\" .Fa "off_t request"
464.\" .Fc
465.\" #endif
466.It
467.Ft typedef int
468.Fn archive_open_callback "struct archive *" "void *client_data"
469.It
470.Ft typedef int
471.Fn archive_close_callback "struct archive *" "void *client_data"
472.El
473.Pp
474The open callback is invoked by
475.Fn archive_open .
476It should return
477.Cm ARCHIVE_OK
478if the underlying file or data source is successfully
479opened.
480If the open fails, it should call
481.Fn archive_set_error
482to register an error code and message and return
483.Cm ARCHIVE_FATAL .
484.Pp
485The read callback is invoked whenever the library
486requires raw bytes from the archive.
487The read callback should read data into a buffer,
488set the
489.Li const void **buffer
490argument to point to the available data, and
491return a count of the number of bytes available.
492The library will invoke the read callback again
493only after it has consumed this data.
494The library imposes no constraints on the size
495of the data blocks returned.
496On end-of-file, the read callback should
497return zero.
498On error, the read callback should invoke
499.Fn archive_set_error
500to register an error code and message and
501return -1.
502.Pp
503The skip callback is invoked when the
504library wants to ignore a block of data.
505The return value is the number of bytes actually
506skipped, which may differ from the request.
507If the callback cannot skip data, it should return
508zero.
509If the skip callback is not provided (the
510function pointer is
511.Dv NULL ),
512the library will invoke the read function
513instead and simply discard the result.
514A skip callback can provide significant
515performance gains when reading uncompressed
516archives from slow disk drives or other media
517that can skip quickly.
518.Pp
519The close callback is invoked by archive_close when
520the archive processing is complete.
521The callback should return
522.Cm ARCHIVE_OK
523on success.
524On failure, the callback should invoke
525.Fn archive_set_error
526to register an error code and message and
527return
528.Cm ARCHIVE_FATAL.
529.Sh EXAMPLE
530The following illustrates basic usage of the library.
531In this example,
532the callback functions are simply wrappers around the standard
533.Xr open 2 ,
534.Xr read 2 ,
535and
536.Xr close 2
537system calls.
538.Bd -literal -offset indent
539void
540list_archive(const char *name)
541{
542  struct mydata *mydata;
543  struct archive *a;
544  struct archive_entry *entry;
545
546  mydata = malloc(sizeof(struct mydata));
547  a = archive_read_new();
548  mydata->name = name;
549  archive_read_support_compression_all(a);
550  archive_read_support_format_all(a);
551  archive_read_open(a, mydata, myopen, myread, myclose);
552  while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
553    printf("%s\\n",archive_entry_pathname(entry));
554    archive_read_data_skip(a);
555  }
556  archive_read_finish(a);
557  free(mydata);
558}
559
560ssize_t
561myread(struct archive *a, void *client_data, const void **buff)
562{
563  struct mydata *mydata = client_data;
564
565  *buff = mydata->buff;
566  return (read(mydata->fd, mydata->buff, 10240));
567}
568
569int
570myopen(struct archive *a, void *client_data)
571{
572  struct mydata *mydata = client_data;
573
574  mydata->fd = open(mydata->name, O_RDONLY);
575  return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL);
576}
577
578int
579myclose(struct archive *a, void *client_data)
580{
581  struct mydata *mydata = client_data;
582
583  if (mydata->fd > 0)
584    close(mydata->fd);
585  return (ARCHIVE_OK);
586}
587.Ed
588.Sh RETURN VALUES
589Most functions return zero on success, non-zero on error.
590The possible return codes include:
591.Cm ARCHIVE_OK
592(the operation succeeded),
593.Cm ARCHIVE_WARN
594(the operation succeeded but a non-critical error was encountered),
595.Cm ARCHIVE_EOF
596(end-of-archive was encountered),
597.Cm ARCHIVE_RETRY
598(the operation failed but can be retried),
599and
600.Cm ARCHIVE_FATAL
601(there was a fatal error; the archive should be closed immediately).
602Detailed error codes and textual descriptions are available from the
603.Fn archive_errno
604and
605.Fn archive_error_string
606functions.
607.Pp
608.Fn archive_read_new
609returns a pointer to a freshly allocated
610.Tn struct archive
611object.
612It returns
613.Dv NULL
614on error.
615.Pp
616.Fn archive_read_data
617returns a count of bytes actually read or zero at the end of the entry.
618On error, a value of
619.Cm ARCHIVE_FATAL ,
620.Cm ARCHIVE_WARN ,
621or
622.Cm ARCHIVE_RETRY
623is returned and an error code and textual description can be retrieved from the
624.Fn archive_errno
625and
626.Fn archive_error_string
627functions.
628.Pp
629The library expects the client callbacks to behave similarly.
630If there is an error, you can use
631.Fn archive_set_error
632to set an appropriate error code and description,
633then return one of the non-zero values above.
634(Note that the value eventually returned to the client may
635not be the same; many errors that are not critical at the level
636of basic I/O can prevent the archive from being properly read,
637thus most I/O errors eventually cause
638.Cm ARCHIVE_FATAL
639to be returned.)
640.\" .Sh ERRORS
641.Sh SEE ALSO
642.Xr tar 1 ,
643.Xr archive 3 ,
644.Xr archive_util 3 ,
645.Xr tar 5
646.Sh HISTORY
647The
648.Nm libarchive
649library first appeared in
650.Fx 5.3 .
651.Sh AUTHORS
652.An -nosplit
653The
654.Nm libarchive
655library was written by
656.An Tim Kientzle Aq kientzle@acm.org .
657.Sh BUGS
658Many traditional archiver programs treat
659empty files as valid empty archives.
660For example, many implementations of
661.Xr tar 1
662allow you to append entries to an empty file.
663Of course, it is impossible to determine the format of an empty file
664by inspecting the contents, so this library treats empty files as
665having a special
666.Dq empty
667format.
668