1.\" Copyright (c) 2003-2007 Tim Kientzle
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD: head/lib/libarchive/archive_read.3 191595 2009-04-27 20:13:13Z kientzle $
26.\"
27.Dd April 13, 2009
28.Dt archive_read 3
29.Os
30.Sh NAME
31.Nm archive_read_new ,
32.Nm archive_read_set_filter_options ,
33.Nm archive_read_set_format_options ,
34.Nm archive_read_set_options ,
35.Nm archive_read_support_compression_all ,
36.Nm archive_read_support_compression_bzip2 ,
37.Nm archive_read_support_compression_compress ,
38.Nm archive_read_support_compression_gzip ,
39.Nm archive_read_support_compression_lzma ,
40.Nm archive_read_support_compression_none ,
41.Nm archive_read_support_compression_xz ,
42.Nm archive_read_support_compression_program ,
43.Nm archive_read_support_compression_program_signature ,
44.Nm archive_read_support_format_all ,
45.Nm archive_read_support_format_ar ,
46.Nm archive_read_support_format_cpio ,
47.Nm archive_read_support_format_empty ,
48.Nm archive_read_support_format_iso9660 ,
49.Nm archive_read_support_format_mtree,
50.Nm archive_read_support_format_raw,
51.Nm archive_read_support_format_tar ,
52.Nm archive_read_support_format_zip ,
53.Nm archive_read_open ,
54.Nm archive_read_open2 ,
55.Nm archive_read_open_fd ,
56.Nm archive_read_open_FILE ,
57.Nm archive_read_open_filename ,
58.Nm archive_read_open_memory ,
59.Nm archive_read_next_header ,
60.Nm archive_read_next_header2 ,
61.Nm archive_read_data ,
62.Nm archive_read_data_block ,
63.Nm archive_read_data_skip ,
64.\" #if ARCHIVE_API_VERSION < 3
65.Nm archive_read_data_into_buffer ,
66.\" #endif
67.Nm archive_read_data_into_fd ,
68.Nm archive_read_extract ,
69.Nm archive_read_extract2 ,
70.Nm archive_read_extract_set_progress_callback ,
71.Nm archive_read_close ,
72.Nm archive_read_finish
73.Nd functions for reading streaming archives
74.Sh SYNOPSIS
75.In archive.h
76.Ft struct archive *
77.Fn archive_read_new "void"
78.Ft int
79.Fn archive_read_support_compression_all "struct archive *"
80.Ft int
81.Fn archive_read_support_compression_bzip2 "struct archive *"
82.Ft int
83.Fn archive_read_support_compression_compress "struct archive *"
84.Ft int
85.Fn archive_read_support_compression_gzip "struct archive *"
86.Ft int
87.Fn archive_read_support_compression_lzma "struct archive *"
88.Ft int
89.Fn archive_read_support_compression_none "struct archive *"
90.Ft int
91.Fn archive_read_support_compression_xz "struct archive *"
92.Ft int
93.Fo archive_read_support_compression_program
94.Fa "struct archive *"
95.Fa "const char *cmd"
96.Fc
97.Ft int
98.Fo archive_read_support_compression_program_signature
99.Fa "struct archive *"
100.Fa "const char *cmd"
101.Fa "const void *signature"
102.Fa "size_t signature_length"
103.Fc
104.Ft int
105.Fn archive_read_support_format_all "struct archive *"
106.Ft int
107.Fn archive_read_support_format_ar "struct archive *"
108.Ft int
109.Fn archive_read_support_format_cpio "struct archive *"
110.Ft int
111.Fn archive_read_support_format_empty "struct archive *"
112.Ft int
113.Fn archive_read_support_format_iso9660 "struct archive *"
114.Ft int
115.Fn archive_read_support_format_mtree "struct archive *"
116.Ft int
117.Fn archive_read_support_format_raw "struct archive *"
118.Ft int
119.Fn archive_read_support_format_tar "struct archive *"
120.Ft int
121.Fn archive_read_support_format_zip "struct archive *"
122.Ft int
123.Fn archive_read_set_filter_options "struct archive *" "const char *"
124.Ft int
125.Fn archive_read_set_format_options "struct archive *" "const char *"
126.Ft int
127.Fn archive_read_set_options "struct archive *" "const char *"
128.Ft int
129.Fo archive_read_open
130.Fa "struct archive *"
131.Fa "void *client_data"
132.Fa "archive_open_callback *"
133.Fa "archive_read_callback *"
134.Fa "archive_close_callback *"
135.Fc
136.Ft int
137.Fo archive_read_open2
138.Fa "struct archive *"
139.Fa "void *client_data"
140.Fa "archive_open_callback *"
141.Fa "archive_read_callback *"
142.Fa "archive_skip_callback *"
143.Fa "archive_close_callback *"
144.Fc
145.Ft int
146.Fn archive_read_open_FILE "struct archive *" "FILE *file"
147.Ft int
148.Fn archive_read_open_fd "struct archive *" "int fd" "size_t block_size"
149.Ft int
150.Fo archive_read_open_filename
151.Fa "struct archive *"
152.Fa "const char *filename"
153.Fa "size_t block_size"
154.Fc
155.Ft int
156.Fn archive_read_open_memory "struct archive *" "void *buff" "size_t size"
157.Ft int
158.Fn archive_read_next_header "struct archive *" "struct archive_entry **"
159.Ft int
160.Fn archive_read_next_header2 "struct archive *" "struct archive_entry *"
161.Ft ssize_t
162.Fn archive_read_data "struct archive *" "void *buff" "size_t len"
163.Ft int
164.Fo archive_read_data_block
165.Fa "struct archive *"
166.Fa "const void **buff"
167.Fa "size_t *len"
168.Fa "off_t *offset"
169.Fc
170.Ft int
171.Fn archive_read_data_skip "struct archive *"
172.\" #if ARCHIVE_API_VERSION < 3
173.Ft int
174.Fn archive_read_data_into_buffer "struct archive *" "void *" "ssize_t len"
175.\" #endif
176.Ft int
177.Fn archive_read_data_into_fd "struct archive *" "int fd"
178.Ft int
179.Fo archive_read_extract
180.Fa "struct archive *"
181.Fa "struct archive_entry *"
182.Fa "int flags"
183.Fc
184.Ft int
185.Fo archive_read_extract2
186.Fa "struct archive *src"
187.Fa "struct archive_entry *"
188.Fa "struct archive *dest"
189.Fc
190.Ft void
191.Fo archive_read_extract_set_progress_callback
192.Fa "struct archive *"
193.Fa "void (*func)(void *)"
194.Fa "void *user_data"
195.Fc
196.Ft int
197.Fn archive_read_close "struct archive *"
198.Ft int
199.Fn archive_read_finish "struct archive *"
200.Sh DESCRIPTION
201These functions provide a complete API for reading streaming archives.
202The general process is to first create the
203.Tn struct archive
204object, set options, initialize the reader, iterate over the archive
205headers and associated data, then close the archive and release all
206resources.
207The following summary describes the functions in approximately the
208order they would be used:
209.Bl -tag -compact -width indent
210.It Fn archive_read_new
211Allocates and initializes a
212.Tn struct archive
213object suitable for reading from an archive.
214.It Xo
215.Fn archive_read_support_compression_bzip2 ,
216.Fn archive_read_support_compression_compress ,
217.Fn archive_read_support_compression_gzip ,
218.Fn archive_read_support_compression_lzma ,
219.Fn archive_read_support_compression_none ,
220.Fn archive_read_support_compression_xz
221.Xc
222Enables auto-detection code and decompression support for the
223specified compression.
224Returns
225.Cm ARCHIVE_OK
226if the compression is fully supported, or
227.Cm ARCHIVE_WARN
228if the compression is supported only through an external program.
229Note that decompression using an external program is usually slower than
230decompression through built-in libraries.
231Note that
232.Dq none
233is always enabled by default.
234.It Fn archive_read_support_compression_all
235Enables all available decompression filters.
236.It Fn archive_read_support_compression_program
237Data is fed through the specified external program before being dearchived.
238Note that this disables automatic detection of the compression format,
239so it makes no sense to specify this in conjunction with any other
240decompression option.
241.It Fn archive_read_support_compression_program_signature
242This feeds data through the specified external program
243but only if the initial bytes of the data match the specified
244signature value.
245.It Xo
246.Fn archive_read_support_format_all ,
247.Fn archive_read_support_format_ar ,
248.Fn archive_read_support_format_cpio ,
249.Fn archive_read_support_format_empty ,
250.Fn archive_read_support_format_iso9660 ,
251.Fn archive_read_support_format_mtree ,
252.Fn archive_read_support_format_tar ,
253.Fn archive_read_support_format_zip
254.Xc
255Enables support---including auto-detection code---for the
256specified archive format.
257For example,
258.Fn archive_read_support_format_tar
259enables support for a variety of standard tar formats, old-style tar,
260ustar, pax interchange format, and many common variants.
261For convenience,
262.Fn archive_read_support_format_all
263enables support for all available formats.
264Only empty archives are supported by default.
265.It Fn archive_read_support_format_raw
266The
267.Dq raw
268format handler allows libarchive to be used to read arbitrary data.
269It treats any data stream as an archive with a single entry.
270The pathname of this entry is
271.Dq data ;
272all other entry fields are unset.
273This is not enabled by
274.Fn archive_read_support_format_all
275in order to avoid erroneous handling of damaged archives.
276.It Xo
277.Fn archive_read_set_filter_options ,
278.Fn archive_read_set_format_options ,
279.Fn archive_read_set_options
280.Xc
281Specifies options that will be passed to currently-registered
282filters (including decompression filters) and/or format readers.
283The argument is a comma-separated list of individual options.
284Individual options have one of the following forms:
285.Bl -tag -compact -width indent
286.It Ar option=value
287The option/value pair will be provided to every module.
288Modules that do not accept an option with this name will ignore it.
289.It Ar option
290The option will be provided to every module with a value of
291.Dq 1 .
292.It Ar !option
293The option will be provided to every module with a NULL value.
294.It Ar module:option=value , Ar module:option , Ar module:!option
295As above, but the corresponding option and value will be provided
296only to modules whose name matches
297.Ar module .
298.El
299The return value will be
300.Cm ARCHIVE_OK
301if any module accepts the option, or
302.Cm ARCHIVE_WARN
303if no module accepted the option, or
304.Cm ARCHIVE_FATAL
305if there was a fatal error while attempting to process the option.
306.Pp
307The currently supported options are:
308.Bl -tag -compact -width indent
309.It Format iso9660
310.Bl -tag -compact -width indent
311.It Cm joliet
312Support Joliet extensions.
313Defaults to enabled, use
314.Cm !joliet
315to disable.
316.El
317.El
318.It Fn archive_read_open
319The same as
320.Fn archive_read_open2 ,
321except that the skip callback is assumed to be
322.Dv NULL .
323.It Fn archive_read_open2
324Freeze the settings, open the archive, and prepare for reading entries.
325This is the most generic version of this call, which accepts
326four callback functions.
327Most clients will want to use
328.Fn archive_read_open_filename ,
329.Fn archive_read_open_FILE ,
330.Fn archive_read_open_fd ,
331or
332.Fn archive_read_open_memory
333instead.
334The library invokes the client-provided functions to obtain
335raw bytes from the archive.
336.It Fn archive_read_open_FILE
337Like
338.Fn archive_read_open ,
339except that it accepts a
340.Ft "FILE *"
341pointer.
342This function should not be used with tape drives or other devices
343that require strict I/O blocking.
344.It Fn archive_read_open_fd
345Like
346.Fn archive_read_open ,
347except that it accepts a file descriptor and block size rather than
348a set of function pointers.
349Note that the file descriptor will not be automatically closed at
350end-of-archive.
351This function is safe for use with tape drives or other blocked devices.
352.It Fn archive_read_open_file
353This is a deprecated synonym for
354.Fn archive_read_open_filename .
355.It Fn archive_read_open_filename
356Like
357.Fn archive_read_open ,
358except that it accepts a simple filename and a block size.
359A NULL filename represents standard input.
360This function is safe for use with tape drives or other blocked devices.
361.It Fn archive_read_open_memory
362Like
363.Fn archive_read_open ,
364except that it accepts a pointer and size of a block of
365memory containing the archive data.
366.It Fn archive_read_next_header
367Read the header for the next entry and return a pointer to
368a
369.Tn struct archive_entry .
370This is a convenience wrapper around
371.Fn archive_read_next_header2
372that reuses an internal
373.Tn struct archive_entry
374object for each request.
375.It Fn archive_read_next_header2
376Read the header for the next entry and populate the provided
377.Tn struct archive_entry .
378.It Fn archive_read_data
379Read data associated with the header just read.
380Internally, this is a convenience function that calls
381.Fn archive_read_data_block
382and fills any gaps with nulls so that callers see a single
383continuous stream of data.
384.It Fn archive_read_data_block
385Return the next available block of data for this entry.
386Unlike
387.Fn archive_read_data ,
388the
389.Fn archive_read_data_block
390function avoids copying data and allows you to correctly handle
391sparse files, as supported by some archive formats.
392The library guarantees that offsets will increase and that blocks
393will not overlap.
394Note that the blocks returned from this function can be much larger
395than the block size read from disk, due to compression
396and internal buffer optimizations.
397.It Fn archive_read_data_skip
398A convenience function that repeatedly calls
399.Fn archive_read_data_block
400to skip all of the data for this archive entry.
401.\" #if ARCHIVE_API_VERSION < 3
402.It Fn archive_read_data_into_buffer
403This function is deprecated and will be removed.
404Use
405.Fn archive_read_data
406instead.
407.\" #endif
408.It Fn archive_read_data_into_fd
409A convenience function that repeatedly calls
410.Fn archive_read_data_block
411to copy the entire entry to the provided file descriptor.
412.It Fn archive_read_extract , Fn archive_read_extract_set_skip_file
413A convenience function that wraps the corresponding
414.Xr archive_write_disk 3
415interfaces.
416The first call to
417.Fn archive_read_extract
418creates a restore object using
419.Xr archive_write_disk_new 3
420and
421.Xr archive_write_disk_set_standard_lookup 3 ,
422then transparently invokes
423.Xr archive_write_disk_set_options 3 ,
424.Xr archive_write_header 3 ,
425.Xr archive_write_data 3 ,
426and
427.Xr archive_write_finish_entry 3
428to create the entry on disk and copy data into it.
429The
430.Va flags
431argument is passed unmodified to
432.Xr archive_write_disk_set_options 3 .
433.It Fn archive_read_extract2
434This is another version of
435.Fn archive_read_extract
436that allows you to provide your own restore object.
437In particular, this allows you to override the standard lookup functions
438using
439.Xr archive_write_disk_set_group_lookup 3 ,
440and
441.Xr archive_write_disk_set_user_lookup 3 .
442Note that
443.Fn archive_read_extract2
444does not accept a
445.Va flags
446argument; you should use
447.Fn archive_write_disk_set_options
448to set the restore options yourself.
449.It Fn archive_read_extract_set_progress_callback
450Sets a pointer to a user-defined callback that can be used
451for updating progress displays during extraction.
452The progress function will be invoked during the extraction of large
453regular files.
454The progress function will be invoked with the pointer provided to this call.
455Generally, the data pointed to should include a reference to the archive
456object and the archive_entry object so that various statistics
457can be retrieved for the progress display.
458.It Fn archive_read_close
459Complete the archive and invoke the close callback.
460.It Fn archive_read_finish
461Invokes
462.Fn archive_read_close
463if it was not invoked manually, then release all resources.
464Note: In libarchive 1.x, this function was declared to return
465.Ft void ,
466which made it impossible to detect certain errors when
467.Fn archive_read_close
468was invoked implicitly from this function.
469The declaration is corrected beginning with libarchive 2.0.
470.El
471.Pp
472Note that the library determines most of the relevant information about
473the archive by inspection.
474In particular, it automatically detects
475.Xr gzip 1
476or
477.Xr bzip2 1
478compression and transparently performs the appropriate decompression.
479It also automatically detects the archive format.
480.Pp
481A complete description of the
482.Tn struct archive
483and
484.Tn struct archive_entry
485objects can be found in the overview manual page for
486.Xr libarchive 3 .
487.Sh CLIENT CALLBACKS
488The callback functions must match the following prototypes:
489.Bl -item -offset indent
490.It
491.Ft typedef ssize_t
492.Fo archive_read_callback
493.Fa "struct archive *"
494.Fa "void *client_data"
495.Fa "const void **buffer"
496.Fc
497.It
498.\" #if ARCHIVE_API_VERSION < 2
499.Ft typedef int
500.Fo archive_skip_callback
501.Fa "struct archive *"
502.Fa "void *client_data"
503.Fa "size_t request"
504.Fc
505.\" #else
506.\" .Ft typedef off_t
507.\" .Fo archive_skip_callback
508.\" .Fa "struct archive *"
509.\" .Fa "void *client_data"
510.\" .Fa "off_t request"
511.\" .Fc
512.\" #endif
513.It
514.Ft typedef int
515.Fn archive_open_callback "struct archive *" "void *client_data"
516.It
517.Ft typedef int
518.Fn archive_close_callback "struct archive *" "void *client_data"
519.El
520.Pp
521The open callback is invoked by
522.Fn archive_open .
523It should return
524.Cm ARCHIVE_OK
525if the underlying file or data source is successfully
526opened.
527If the open fails, it should call
528.Fn archive_set_error
529to register an error code and message and return
530.Cm ARCHIVE_FATAL .
531.Pp
532The read callback is invoked whenever the library
533requires raw bytes from the archive.
534The read callback should read data into a buffer,
535set the
536.Li const void **buffer
537argument to point to the available data, and
538return a count of the number of bytes available.
539The library will invoke the read callback again
540only after it has consumed this data.
541The library imposes no constraints on the size
542of the data blocks returned.
543On end-of-file, the read callback should
544return zero.
545On error, the read callback should invoke
546.Fn archive_set_error
547to register an error code and message and
548return -1.
549.Pp
550The skip callback is invoked when the
551library wants to ignore a block of data.
552The return value is the number of bytes actually
553skipped, which may differ from the request.
554If the callback cannot skip data, it should return
555zero.
556If the skip callback is not provided (the
557function pointer is
558.Dv NULL ),
559the library will invoke the read function
560instead and simply discard the result.
561A skip callback can provide significant
562performance gains when reading uncompressed
563archives from slow disk drives or other media
564that can skip quickly.
565.Pp
566The close callback is invoked by archive_close when
567the archive processing is complete.
568The callback should return
569.Cm ARCHIVE_OK
570on success.
571On failure, the callback should invoke
572.Fn archive_set_error
573to register an error code and message and
574return
575.Cm ARCHIVE_FATAL.
576.Sh EXAMPLE
577The following illustrates basic usage of the library.
578In this example,
579the callback functions are simply wrappers around the standard
580.Xr open 2 ,
581.Xr read 2 ,
582and
583.Xr close 2
584system calls.
585.Bd -literal -offset indent
586void
587list_archive(const char *name)
588{
589  struct mydata *mydata;
590  struct archive *a;
591  struct archive_entry *entry;
592
593  mydata = malloc(sizeof(struct mydata));
594  a = archive_read_new();
595  mydata->name = name;
596  archive_read_support_compression_all(a);
597  archive_read_support_format_all(a);
598  archive_read_open(a, mydata, myopen, myread, myclose);
599  while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
600    printf("%s\en",archive_entry_pathname(entry));
601    archive_read_data_skip(a);
602  }
603  archive_read_finish(a);
604  free(mydata);
605}
606
607ssize_t
608myread(struct archive *a, void *client_data, const void **buff)
609{
610  struct mydata *mydata = client_data;
611
612  *buff = mydata->buff;
613  return (read(mydata->fd, mydata->buff, 10240));
614}
615
616int
617myopen(struct archive *a, void *client_data)
618{
619  struct mydata *mydata = client_data;
620
621  mydata->fd = open(mydata->name, O_RDONLY);
622  return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL);
623}
624
625int
626myclose(struct archive *a, void *client_data)
627{
628  struct mydata *mydata = client_data;
629
630  if (mydata->fd > 0)
631    close(mydata->fd);
632  return (ARCHIVE_OK);
633}
634.Ed
635.Sh RETURN VALUES
636Most functions return zero on success, non-zero on error.
637The possible return codes include:
638.Cm ARCHIVE_OK
639(the operation succeeded),
640.Cm ARCHIVE_WARN
641(the operation succeeded but a non-critical error was encountered),
642.Cm ARCHIVE_EOF
643(end-of-archive was encountered),
644.Cm ARCHIVE_RETRY
645(the operation failed but can be retried),
646and
647.Cm ARCHIVE_FATAL
648(there was a fatal error; the archive should be closed immediately).
649Detailed error codes and textual descriptions are available from the
650.Fn archive_errno
651and
652.Fn archive_error_string
653functions.
654.Pp
655.Fn archive_read_new
656returns a pointer to a freshly allocated
657.Tn struct archive
658object.
659It returns
660.Dv NULL
661on error.
662.Pp
663.Fn archive_read_data
664returns a count of bytes actually read or zero at the end of the entry.
665On error, a value of
666.Cm ARCHIVE_FATAL ,
667.Cm ARCHIVE_WARN ,
668or
669.Cm ARCHIVE_RETRY
670is returned and an error code and textual description can be retrieved from the
671.Fn archive_errno
672and
673.Fn archive_error_string
674functions.
675.Pp
676The library expects the client callbacks to behave similarly.
677If there is an error, you can use
678.Fn archive_set_error
679to set an appropriate error code and description,
680then return one of the non-zero values above.
681(Note that the value eventually returned to the client may
682not be the same; many errors that are not critical at the level
683of basic I/O can prevent the archive from being properly read,
684thus most I/O errors eventually cause
685.Cm ARCHIVE_FATAL
686to be returned.)
687.\" .Sh ERRORS
688.Sh SEE ALSO
689.Xr tar 1 ,
690.Xr archive 3 ,
691.Xr archive_util 3 ,
692.Xr tar 5
693.Sh HISTORY
694The
695.Nm libarchive
696library first appeared in
697.Fx 5.3 .
698.Sh AUTHORS
699.An -nosplit
700The
701.Nm libarchive
702library was written by
703.An Tim Kientzle Aq kientzle@acm.org .
704.Sh BUGS
705Many traditional archiver programs treat
706empty files as valid empty archives.
707For example, many implementations of
708.Xr tar 1
709allow you to append entries to an empty file.
710Of course, it is impossible to determine the format of an empty file
711by inspecting the contents, so this library treats empty files as
712having a special
713.Dq empty
714format.
715