1.\" Copyright (c) 2003-2007 Tim Kientzle
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD: head/lib/libarchive/archive_read.3 191595 2009-04-27 20:13:13Z kientzle $
26.\"
27.Dd April 13, 2009
28.Dt archive_read 3
29.Os
30.Sh NAME
31.Nm archive_read_new ,
32.Nm archive_read_set_filter_options ,
33.Nm archive_read_set_format_options ,
34.Nm archive_read_set_options ,
35.Nm archive_read_support_compression_all ,
36.Nm archive_read_support_compression_bzip2 ,
37.Nm archive_read_support_compression_compress ,
38.Nm archive_read_support_compression_gzip ,
39.Nm archive_read_support_compression_lzma ,
40.Nm archive_read_support_compression_none ,
41.Nm archive_read_support_compression_xz ,
42.Nm archive_read_support_compression_program ,
43.Nm archive_read_support_compression_program_signature ,
44.Nm archive_read_support_format_all ,
45.Nm archive_read_support_format_ar ,
46.Nm archive_read_support_format_cpio ,
47.Nm archive_read_support_format_empty ,
48.Nm archive_read_support_format_iso9660 ,
49.Nm archive_read_support_format_mtree,
50.Nm archive_read_support_format_raw,
51.Nm archive_read_support_format_tar ,
52.Nm archive_read_support_format_zip ,
53.Nm archive_read_open ,
54.Nm archive_read_open2 ,
55.Nm archive_read_open_fd ,
56.Nm archive_read_open_FILE ,
57.Nm archive_read_open_filename ,
58.Nm archive_read_open_memory ,
59.Nm archive_read_next_header ,
60.Nm archive_read_next_header2 ,
61.Nm archive_read_data ,
62.Nm archive_read_data_block ,
63.Nm archive_read_data_skip ,
64.\" #if ARCHIVE_API_VERSION < 3
65.Nm archive_read_data_into_buffer ,
66.\" #endif
67.Nm archive_read_data_into_fd ,
68.Nm archive_read_extract ,
69.Nm archive_read_extract2 ,
70.Nm archive_read_extract_set_progress_callback ,
71.Nm archive_read_close ,
72.Nm archive_read_finish
73.Nd functions for reading streaming archives
74.Sh SYNOPSIS
75.In archive.h
76.Ft struct archive *
77.Fn archive_read_new "void"
78.Ft int
79.Fn archive_read_support_compression_all "struct archive *"
80.Ft int
81.Fn archive_read_support_compression_bzip2 "struct archive *"
82.Ft int
83.Fn archive_read_support_compression_compress "struct archive *"
84.Ft int
85.Fn archive_read_support_compression_gzip "struct archive *"
86.Ft int
87.Fn archive_read_support_compression_lzma "struct archive *"
88.Ft int
89.Fn archive_read_support_compression_none "struct archive *"
90.Ft int
91.Fn archive_read_support_compression_xz "struct archive *"
92.Ft int
93.Fo archive_read_support_compression_program
94.Fa "struct archive *"
95.Fa "const char *cmd"
96.Fc
97.Ft int
98.Fo archive_read_support_compression_program_signature
99.Fa "struct archive *"
100.Fa "const char *cmd"
101.Fa "const void *signature"
102.Fa "size_t signature_length"
103.Fc
104.Ft int
105.Fn archive_read_support_format_all "struct archive *"
106.Ft int
107.Fn archive_read_support_format_ar "struct archive *"
108.Ft int
109.Fn archive_read_support_format_cpio "struct archive *"
110.Ft int
111.Fn archive_read_support_format_empty "struct archive *"
112.Ft int
113.Fn archive_read_support_format_iso9660 "struct archive *"
114.Ft int
115.Fn archive_read_support_format_mtree "struct archive *"
116.Ft int
117.Fn archive_read_support_format_raw "struct archive *"
118.Ft int
119.Fn archive_read_support_format_tar "struct archive *"
120.Ft int
121.Fn archive_read_support_format_zip "struct archive *"
122.Ft int
123.Fn archive_read_set_filter_options "struct archive *" "const char *"
124.Ft int
125.Fn archive_read_set_format_options "struct archive *" "const char *"
126.Ft int
127.Fn archive_read_set_options "struct archive *" "const char *"
128.Ft int
129.Fo archive_read_open
130.Fa "struct archive *"
131.Fa "void *client_data"
132.Fa "archive_open_callback *"
133.Fa "archive_read_callback *"
134.Fa "archive_close_callback *"
135.Fc
136.Ft int
137.Fo archive_read_open2
138.Fa "struct archive *"
139.Fa "void *client_data"
140.Fa "archive_open_callback *"
141.Fa "archive_read_callback *"
142.Fa "archive_skip_callback *"
143.Fa "archive_close_callback *"
144.Fc
145.Ft int
146.Fn archive_read_open_FILE "struct archive *" "FILE *file"
147.Ft int
148.Fn archive_read_open_fd "struct archive *" "int fd" "size_t block_size"
149.Ft int
150.Fo archive_read_open_filename
151.Fa "struct archive *"
152.Fa "const char *filename"
153.Fa "size_t block_size"
154.Fc
155.Ft int
156.Fn archive_read_open_memory "struct archive *" "void *buff" "size_t size"
157.Ft int
158.Fn archive_read_next_header "struct archive *" "struct archive_entry **"
159.Ft int
160.Fn archive_read_next_header2 "struct archive *" "struct archive_entry *"
161.Ft ssize_t
162.Fn archive_read_data "struct archive *" "void *buff" "size_t len"
163.Ft int
164.Fo archive_read_data_block
165.Fa "struct archive *"
166.Fa "const void **buff"
167.Fa "size_t *len"
168.Fa "off_t *offset"
169.Fc
170.Ft int
171.Fn archive_read_data_skip "struct archive *"
172.\" #if ARCHIVE_API_VERSION < 3
173.Ft int
174.Fn archive_read_data_into_buffer "struct archive *" "void *" "ssize_t len"
175.\" #endif
176.Ft int
177.Fn archive_read_data_into_fd "struct archive *" "int fd"
178.Ft int
179.Fo archive_read_extract
180.Fa "struct archive *"
181.Fa "struct archive_entry *"
182.Fa "int flags"
183.Fc
184.Ft int
185.Fo archive_read_extract2
186.Fa "struct archive *src"
187.Fa "struct archive_entry *"
188.Fa "struct archive *dest"
189.Fc
190.Ft void
191.Fo archive_read_extract_set_progress_callback
192.Fa "struct archive *"
193.Fa "void (*func)(void *)"
194.Fa "void *user_data"
195.Fc
196.Ft int
197.Fn archive_read_close "struct archive *"
198.Ft int
199.Fn archive_read_finish "struct archive *"
200.Sh DESCRIPTION
201These functions provide a complete API for reading streaming archives.
202The general process is to first create the
203.Tn struct archive
204object, set options, initialize the reader, iterate over the archive
205headers and associated data, then close the archive and release all
206resources.
207The following summary describes the functions in approximately the
208order they would be used:
209.Bl -tag -compact -width indent
210.It Fn archive_read_new
211Allocates and initializes a
212.Tn struct archive
213object suitable for reading from an archive.
214.It Fn archive_read_support_compression_bzip2 , \
215Fn archive_read_support_compression_compress , \
216Fn archive_read_support_compression_gzip , \
217Fn archive_read_support_compression_lzma , \
218Fn archive_read_support_compression_none , \
219Fn archive_read_support_compression_xz
220Enables auto-detection code and decompression support for the
221specified compression.
222Returns
223.Cm ARCHIVE_OK
224if the compression is fully supported, or
225.Cm ARCHIVE_WARN
226if the compression is supported only through an external program.
227Note that decompression using an external program is usually slower than
228decompression through built-in libraries.
229Note that
230.Dq none
231is always enabled by default.
232.It Fn archive_read_support_compression_all
233Enables all available decompression filters.
234.It Fn archive_read_support_compression_program
235Data is fed through the specified external program before being dearchived.
236Note that this disables automatic detection of the compression format,
237so it makes no sense to specify this in conjunction with any other
238decompression option.
239.It Fn archive_read_support_compression_program_signature
240This feeds data through the specified external program
241but only if the initial bytes of the data match the specified
242signature value.
243.It Fn archive_read_support_format_all , \
244Fn archive_read_support_format_ar , \
245Fn archive_read_support_format_cpio , \
246Fn archive_read_support_format_empty , \
247Fn archive_read_support_format_iso9660 , \
248Fn archive_read_support_format_mtree , \
249Fn archive_read_support_format_tar , \
250Fn archive_read_support_format_zip
251Enables support---including auto-detection code---for the
252specified archive format.
253For example,
254.Fn archive_read_support_format_tar
255enables support for a variety of standard tar formats, old-style tar,
256ustar, pax interchange format, and many common variants.
257For convenience,
258.Fn archive_read_support_format_all
259enables support for all available formats.
260Only empty archives are supported by default.
261.It Fn archive_read_support_format_raw
262The
263.Dq raw
264format handler allows libarchive to be used to read arbitrary data.
265It treats any data stream as an archive with a single entry.
266The pathname of this entry is
267.Dq data ;
268all other entry fields are unset.
269This is not enabled by
270.Fn archive_read_support_format_all
271in order to avoid erroneous handling of damaged archives.
272.It Fn archive_read_set_filter_options , \
273Fn archive_read_set_format_options , \
274Fn archive_read_set_options
275Specifies options that will be passed to currently-registered
276filters (including decompression filters) and/or format readers.
277The argument is a comma-separated list of individual options.
278Individual options have one of the following forms:
279.Bl -tag -compact -width indent
280.It Ar option=value
281The option/value pair will be provided to every module.
282Modules that do not accept an option with this name will ignore it.
283.It Ar option
284The option will be provided to every module with a value of
285.Dq 1 .
286.It Ar !option
287The option will be provided to every module with a NULL value.
288.It Ar module:option=value , Ar module:option , Ar module:!option
289As above, but the corresponding option and value will be provided
290only to modules whose name matches
291.Ar module .
292.El
293The return value will be
294.Cm ARCHIVE_OK
295if any module accepts the option, or
296.Cm ARCHIVE_WARN
297if no module accepted the option, or
298.Cm ARCHIVE_FATAL
299if there was a fatal error while attempting to process the option.
300.Pp
301The currently supported options are:
302.Bl -tag -compact -width indent
303.It Format iso9660
304.Bl -tag -compact -width indent
305.It Cm joliet
306Support Joliet extensions.
307Defaults to enabled, use
308.Cm !joliet
309to disable.
310.El
311.El
312.It Fn archive_read_open
313The same as
314.Fn archive_read_open2 ,
315except that the skip callback is assumed to be
316.Dv NULL .
317.It Fn archive_read_open2
318Freeze the settings, open the archive, and prepare for reading entries.
319This is the most generic version of this call, which accepts
320four callback functions.
321Most clients will want to use
322.Fn archive_read_open_filename ,
323.Fn archive_read_open_FILE ,
324.Fn archive_read_open_fd ,
325or
326.Fn archive_read_open_memory
327instead.
328The library invokes the client-provided functions to obtain
329raw bytes from the archive.
330.It Fn archive_read_open_FILE
331Like
332.Fn archive_read_open ,
333except that it accepts a
334.Ft "FILE *"
335pointer.
336This function should not be used with tape drives or other devices
337that require strict I/O blocking.
338.It Fn archive_read_open_fd
339Like
340.Fn archive_read_open ,
341except that it accepts a file descriptor and block size rather than
342a set of function pointers.
343Note that the file descriptor will not be automatically closed at
344end-of-archive.
345This function is safe for use with tape drives or other blocked devices.
346.It Fn archive_read_open_file
347This is a deprecated synonym for
348.Fn archive_read_open_filename .
349.It Fn archive_read_open_filename
350Like
351.Fn archive_read_open ,
352except that it accepts a simple filename and a block size.
353A NULL filename represents standard input.
354This function is safe for use with tape drives or other blocked devices.
355.It Fn archive_read_open_memory
356Like
357.Fn archive_read_open ,
358except that it accepts a pointer and size of a block of
359memory containing the archive data.
360.It Fn archive_read_next_header
361Read the header for the next entry and return a pointer to
362a
363.Tn struct archive_entry .
364This is a convenience wrapper around
365.Fn archive_read_next_header2
366that reuses an internal
367.Tn struct archive_entry
368object for each request.
369.It Fn archive_read_next_header2
370Read the header for the next entry and populate the provided
371.Tn struct archive_entry .
372.It Fn archive_read_data
373Read data associated with the header just read.
374Internally, this is a convenience function that calls
375.Fn archive_read_data_block
376and fills any gaps with nulls so that callers see a single
377continuous stream of data.
378.It Fn archive_read_data_block
379Return the next available block of data for this entry.
380Unlike
381.Fn archive_read_data ,
382the
383.Fn archive_read_data_block
384function avoids copying data and allows you to correctly handle
385sparse files, as supported by some archive formats.
386The library guarantees that offsets will increase and that blocks
387will not overlap.
388Note that the blocks returned from this function can be much larger
389than the block size read from disk, due to compression
390and internal buffer optimizations.
391.It Fn archive_read_data_skip
392A convenience function that repeatedly calls
393.Fn archive_read_data_block
394to skip all of the data for this archive entry.
395.\" #if ARCHIVE_API_VERSION < 3
396.It Fn archive_read_data_into_buffer
397This function is deprecated and will be removed.
398Use
399.Fn archive_read_data
400instead.
401.\" #endif
402.It Fn archive_read_data_into_fd
403A convenience function that repeatedly calls
404.Fn archive_read_data_block
405to copy the entire entry to the provided file descriptor.
406.It Fn archive_read_extract , Fn archive_read_extract_set_skip_file
407A convenience function that wraps the corresponding
408.Xr archive_write_disk 3
409interfaces.
410The first call to
411.Fn archive_read_extract
412creates a restore object using
413.Xr archive_write_disk_new 3
414and
415.Xr archive_write_disk_set_standard_lookup 3 ,
416then transparently invokes
417.Xr archive_write_disk_set_options 3 ,
418.Xr archive_write_header 3 ,
419.Xr archive_write_data 3 ,
420and
421.Xr archive_write_finish_entry 3
422to create the entry on disk and copy data into it.
423The
424.Va flags
425argument is passed unmodified to
426.Xr archive_write_disk_set_options 3 .
427.It Fn archive_read_extract2
428This is another version of
429.Fn archive_read_extract
430that allows you to provide your own restore object.
431In particular, this allows you to override the standard lookup functions
432using
433.Xr archive_write_disk_set_group_lookup 3 ,
434and
435.Xr archive_write_disk_set_user_lookup 3 .
436Note that
437.Fn archive_read_extract2
438does not accept a
439.Va flags
440argument; you should use
441.Fn archive_write_disk_set_options
442to set the restore options yourself.
443.It Fn archive_read_extract_set_progress_callback
444Sets a pointer to a user-defined callback that can be used
445for updating progress displays during extraction.
446The progress function will be invoked during the extraction of large
447regular files.
448The progress function will be invoked with the pointer provided to this call.
449Generally, the data pointed to should include a reference to the archive
450object and the archive_entry object so that various statistics
451can be retrieved for the progress display.
452.It Fn archive_read_close
453Complete the archive and invoke the close callback.
454.It Fn archive_read_finish
455Invokes
456.Fn archive_read_close
457if it was not invoked manually, then release all resources.
458Note: In libarchive 1.x, this function was declared to return
459.Ft void ,
460which made it impossible to detect certain errors when
461.Fn archive_read_close
462was invoked implicitly from this function.
463The declaration is corrected beginning with libarchive 2.0.
464.El
465.Pp
466Note that the library determines most of the relevant information about
467the archive by inspection.
468In particular, it automatically detects
469.Xr gzip 1
470or
471.Xr bzip2 1
472compression and transparently performs the appropriate decompression.
473It also automatically detects the archive format.
474.Pp
475A complete description of the
476.Tn struct archive
477and
478.Tn struct archive_entry
479objects can be found in the overview manual page for
480.Xr libarchive 3 .
481.Sh CLIENT CALLBACKS
482The callback functions must match the following prototypes:
483.Bl -item -offset indent
484.It
485.Ft typedef ssize_t
486.Fo archive_read_callback
487.Fa "struct archive *"
488.Fa "void *client_data"
489.Fa "const void **buffer"
490.Fc
491.It
492.\" #if ARCHIVE_API_VERSION < 2
493.Ft typedef int
494.Fo archive_skip_callback
495.Fa "struct archive *"
496.Fa "void *client_data"
497.Fa "size_t request"
498.Fc
499.\" #else
500.\" .Ft typedef off_t
501.\" .Fo archive_skip_callback
502.\" .Fa "struct archive *"
503.\" .Fa "void *client_data"
504.\" .Fa "off_t request"
505.\" .Fc
506.\" #endif
507.It
508.Ft typedef int
509.Fn archive_open_callback "struct archive *" "void *client_data"
510.It
511.Ft typedef int
512.Fn archive_close_callback "struct archive *" "void *client_data"
513.El
514.Pp
515The open callback is invoked by
516.Fn archive_open .
517It should return
518.Cm ARCHIVE_OK
519if the underlying file or data source is successfully
520opened.
521If the open fails, it should call
522.Fn archive_set_error
523to register an error code and message and return
524.Cm ARCHIVE_FATAL .
525.Pp
526The read callback is invoked whenever the library
527requires raw bytes from the archive.
528The read callback should read data into a buffer,
529set the
530.Li const void **buffer
531argument to point to the available data, and
532return a count of the number of bytes available.
533The library will invoke the read callback again
534only after it has consumed this data.
535The library imposes no constraints on the size
536of the data blocks returned.
537On end-of-file, the read callback should
538return zero.
539On error, the read callback should invoke
540.Fn archive_set_error
541to register an error code and message and
542return -1.
543.Pp
544The skip callback is invoked when the
545library wants to ignore a block of data.
546The return value is the number of bytes actually
547skipped, which may differ from the request.
548If the callback cannot skip data, it should return
549zero.
550If the skip callback is not provided (the
551function pointer is
552.Dv NULL ),
553the library will invoke the read function
554instead and simply discard the result.
555A skip callback can provide significant
556performance gains when reading uncompressed
557archives from slow disk drives or other media
558that can skip quickly.
559.Pp
560The close callback is invoked by archive_close when
561the archive processing is complete.
562The callback should return
563.Cm ARCHIVE_OK
564on success.
565On failure, the callback should invoke
566.Fn archive_set_error
567to register an error code and message and
568return
569.Cm ARCHIVE_FATAL.
570.Sh EXAMPLE
571The following illustrates basic usage of the library.
572In this example,
573the callback functions are simply wrappers around the standard
574.Xr open 2 ,
575.Xr read 2 ,
576and
577.Xr close 2
578system calls.
579.Bd -literal -offset indent
580void
581list_archive(const char *name)
582{
583  struct mydata *mydata;
584  struct archive *a;
585  struct archive_entry *entry;
586
587  mydata = malloc(sizeof(struct mydata));
588  a = archive_read_new();
589  mydata->name = name;
590  archive_read_support_compression_all(a);
591  archive_read_support_format_all(a);
592  archive_read_open(a, mydata, myopen, myread, myclose);
593  while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
594    printf("%s\en",archive_entry_pathname(entry));
595    archive_read_data_skip(a);
596  }
597  archive_read_finish(a);
598  free(mydata);
599}
600
601ssize_t
602myread(struct archive *a, void *client_data, const void **buff)
603{
604  struct mydata *mydata = client_data;
605
606  *buff = mydata->buff;
607  return (read(mydata->fd, mydata->buff, 10240));
608}
609
610int
611myopen(struct archive *a, void *client_data)
612{
613  struct mydata *mydata = client_data;
614
615  mydata->fd = open(mydata->name, O_RDONLY);
616  return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL);
617}
618
619int
620myclose(struct archive *a, void *client_data)
621{
622  struct mydata *mydata = client_data;
623
624  if (mydata->fd > 0)
625    close(mydata->fd);
626  return (ARCHIVE_OK);
627}
628.Ed
629.Sh RETURN VALUES
630Most functions return zero on success, non-zero on error.
631The possible return codes include:
632.Cm ARCHIVE_OK
633(the operation succeeded),
634.Cm ARCHIVE_WARN
635(the operation succeeded but a non-critical error was encountered),
636.Cm ARCHIVE_EOF
637(end-of-archive was encountered),
638.Cm ARCHIVE_RETRY
639(the operation failed but can be retried),
640and
641.Cm ARCHIVE_FATAL
642(there was a fatal error; the archive should be closed immediately).
643Detailed error codes and textual descriptions are available from the
644.Fn archive_errno
645and
646.Fn archive_error_string
647functions.
648.Pp
649.Fn archive_read_new
650returns a pointer to a freshly allocated
651.Tn struct archive
652object.
653It returns
654.Dv NULL
655on error.
656.Pp
657.Fn archive_read_data
658returns a count of bytes actually read or zero at the end of the entry.
659On error, a value of
660.Cm ARCHIVE_FATAL ,
661.Cm ARCHIVE_WARN ,
662or
663.Cm ARCHIVE_RETRY
664is returned and an error code and textual description can be retrieved from the
665.Fn archive_errno
666and
667.Fn archive_error_string
668functions.
669.Pp
670The library expects the client callbacks to behave similarly.
671If there is an error, you can use
672.Fn archive_set_error
673to set an appropriate error code and description,
674then return one of the non-zero values above.
675(Note that the value eventually returned to the client may
676not be the same; many errors that are not critical at the level
677of basic I/O can prevent the archive from being properly read,
678thus most I/O errors eventually cause
679.Cm ARCHIVE_FATAL
680to be returned.)
681.\" .Sh ERRORS
682.Sh SEE ALSO
683.Xr tar 1 ,
684.Xr archive 3 ,
685.Xr archive_util 3 ,
686.Xr tar 5
687.Sh HISTORY
688The
689.Nm libarchive
690library first appeared in
691.Fx 5.3 .
692.Sh AUTHORS
693.An -nosplit
694The
695.Nm libarchive
696library was written by
697.An Tim Kientzle Aq Mt kientzle@acm.org .
698.Sh BUGS
699Many traditional archiver programs treat
700empty files as valid empty archives.
701For example, many implementations of
702.Xr tar 1
703allow you to append entries to an empty file.
704Of course, it is impossible to determine the format of an empty file
705by inspecting the contents, so this library treats empty files as
706having a special
707.Dq empty
708format.
709