1.\" Copyright (c) 2003-2007 Tim Kientzle 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD: src/lib/libarchive/archive_read.3,v 1.37 2008/05/26 17:00:22 kientzle Exp $ 26.\" 27.Dd August 19, 2006 28.Dt archive_read 3 29.Os 30.Sh NAME 31.Nm archive_read_new , 32.Nm archive_read_set_filter_options , 33.Nm archive_read_set_format_options , 34.Nm archive_read_set_options , 35.Nm archive_read_support_compression_all , 36.Nm archive_read_support_compression_bzip2 , 37.Nm archive_read_support_compression_compress , 38.Nm archive_read_support_compression_gzip , 39.Nm archive_read_support_compression_none , 40.Nm archive_read_support_compression_program , 41.Nm archive_read_support_format_all , 42.Nm archive_read_support_format_cpio , 43.Nm archive_read_support_format_empty , 44.Nm archive_read_support_format_iso9660 , 45.Nm archive_read_support_format_tar , 46.Nm archive_read_support_format_zip , 47.Nm archive_read_open , 48.Nm archive_read_open2 , 49.Nm archive_read_open_fd , 50.Nm archive_read_open_FILE , 51.Nm archive_read_open_filename , 52.Nm archive_read_open_memory , 53.Nm archive_read_next_header , 54.Nm archive_read_next_header2 , 55.Nm archive_read_data , 56.Nm archive_read_data_block , 57.Nm archive_read_data_skip , 58.\" #if ARCHIVE_API_VERSION < 3 59.Nm archive_read_data_into_buffer , 60.\" #endif 61.Nm archive_read_data_into_fd , 62.Nm archive_read_extract , 63.Nm archive_read_extract2 , 64.Nm archive_read_extract_set_progress_callback , 65.Nm archive_read_close , 66.Nm archive_read_finish 67.Nd functions for reading streaming archives 68.Sh SYNOPSIS 69.In archive.h 70.Ft struct archive * 71.Fn archive_read_new "void" 72.Ft int 73.Fn archive_read_support_compression_all "struct archive *" 74.Ft int 75.Fn archive_read_support_compression_bzip2 "struct archive *" 76.Ft int 77.Fn archive_read_support_compression_compress "struct archive *" 78.Ft int 79.Fn archive_read_support_compression_gzip "struct archive *" 80.Ft int 81.Fn archive_read_support_compression_none "struct archive *" 82.Ft int 83.Fo archive_read_support_compression_program 84.Fa "struct archive *" 85.Fa "const char *cmd" 86.Fc 87.Ft int 88.Fn archive_read_support_format_all "struct archive *" 89.Ft int 90.Fn archive_read_support_format_cpio "struct archive *" 91.Ft int 92.Fn archive_read_support_format_empty "struct archive *" 93.Ft int 94.Fn archive_read_support_format_iso9660 "struct archive *" 95.Ft int 96.Fn archive_read_support_format_tar "struct archive *" 97.Ft int 98.Fn archive_read_support_format_zip "struct archive *" 99.Ft int 100.Fn archive_read_set_filter_options "struct archive *" "const char *" 101.Ft int 102.Fn archive_read_set_format_options "struct archive *" "const char *" 103.Ft int 104.Fn archive_read_set_options "struct archive *" "const char *" 105.Ft int 106.Fo archive_read_open 107.Fa "struct archive *" 108.Fa "void *client_data" 109.Fa "archive_open_callback *" 110.Fa "archive_read_callback *" 111.Fa "archive_close_callback *" 112.Fc 113.Ft int 114.Fo archive_read_open2 115.Fa "struct archive *" 116.Fa "void *client_data" 117.Fa "archive_open_callback *" 118.Fa "archive_read_callback *" 119.Fa "archive_skip_callback *" 120.Fa "archive_close_callback *" 121.Fc 122.Ft int 123.Fn archive_read_open_FILE "struct archive *" "FILE *file" 124.Ft int 125.Fn archive_read_open_fd "struct archive *" "int fd" "size_t block_size" 126.Ft int 127.Fo archive_read_open_filename 128.Fa "struct archive *" 129.Fa "const char *filename" 130.Fa "size_t block_size" 131.Fc 132.Ft int 133.Fn archive_read_open_memory "struct archive *" "void *buff" "size_t size" 134.Ft int 135.Fn archive_read_next_header "struct archive *" "struct archive_entry **" 136.Ft int 137.Fn archive_read_next_header2 "struct archive *" "struct archive_entry *" 138.Ft ssize_t 139.Fn archive_read_data "struct archive *" "void *buff" "size_t len" 140.Ft int 141.Fo archive_read_data_block 142.Fa "struct archive *" 143.Fa "const void **buff" 144.Fa "size_t *len" 145.Fa "off_t *offset" 146.Fc 147.Ft int 148.Fn archive_read_data_skip "struct archive *" 149.\" #if ARCHIVE_API_VERSION < 3 150.Ft int 151.Fn archive_read_data_into_buffer "struct archive *" "void *" "ssize_t len" 152.\" #endif 153.Ft int 154.Fn archive_read_data_into_fd "struct archive *" "int fd" 155.Ft int 156.Fo archive_read_extract 157.Fa "struct archive *" 158.Fa "struct archive_entry *" 159.Fa "int flags" 160.Fc 161.Ft int 162.Fo archive_read_extract2 163.Fa "struct archive *src" 164.Fa "struct archive_entry *" 165.Fa "struct archive *dest" 166.Fc 167.Ft void 168.Fo archive_read_extract_set_progress_callback 169.Fa "struct archive *" 170.Fa "void (*func)(void *)" 171.Fa "void *user_data" 172.Fc 173.Ft int 174.Fn archive_read_close "struct archive *" 175.Ft int 176.Fn archive_read_finish "struct archive *" 177.Sh DESCRIPTION 178These functions provide a complete API for reading streaming archives. 179The general process is to first create the 180.Tn struct archive 181object, set options, initialize the reader, iterate over the archive 182headers and associated data, then close the archive and release all 183resources. 184The following summary describes the functions in approximately the 185order they would be used: 186.Bl -tag -compact -width indent 187.It Fn archive_read_new 188Allocates and initializes a 189.Tn struct archive 190object suitable for reading from an archive. 191.It Xo 192.Fn archive_read_support_compression_all , 193.Fn archive_read_support_compression_bzip2 , 194.Fn archive_read_support_compression_compress , 195.Fn archive_read_support_compression_gzip , 196.Fn archive_read_support_compression_none 197.Xc 198Enables auto-detection code and decompression support for the 199specified compression. 200Note that 201.Dq none 202is always enabled by default. 203For convenience, 204.Fn archive_read_support_compression_all 205enables all available decompression code. 206.It Fn archive_read_support_compression_program 207Data is fed through the specified external program before being dearchived. 208Note that this disables automatic detection of the compression format, 209so it makes no sense to specify this in conjunction with any other 210decompression option. 211.It Xo 212.Fn archive_read_support_format_all , 213.Fn archive_read_support_format_cpio , 214.Fn archive_read_support_format_empty , 215.Fn archive_read_support_format_iso9660 , 216.Fn archive_read_support_format_tar , 217.Fn archive_read_support_format_zip 218.Xc 219Enables support---including auto-detection code---for the 220specified archive format. 221For example, 222.Fn archive_read_support_format_tar 223enables support for a variety of standard tar formats, old-style tar, 224ustar, pax interchange format, and many common variants. 225For convenience, 226.Fn archive_read_support_format_all 227enables support for all available formats. 228Only empty archives are supported by default. 229.It Xo 230.Fn archive_read_set_filter_options , 231.Fn archive_read_set_format_options , 232.Fn archive_read_set_options 233.Xc 234Specifies options that will be passed to currently-registered 235filters (including decompression filters) and/or format readers. 236The argument is a comma-separated list of individual options. 237Individual options have one of the following forms: 238.Bl -tag -compact -width indent 239.It Ar option=value 240The option/value pair will be provided to every module. 241Modules that do not accept an option with this name will ignore it. 242.It Ar option 243The option will be provided to every module with a value of 244.Dq 1 . 245.It Ar !option 246The option will be provided to every module with a NULL value. 247.It Ar module:option=value , Ar module:option , Ar module:!option 248As above, but the corresponding option and value will be provided 249only to modules whose name matches 250.Ar module . 251.El 252The return value will be 253.Cm ARCHIVE_OK 254if any module accepts the option, or 255.Cm ARCHIVE_WARN 256if no module accepted the option, or 257.Cm ARCHIVE_FATAL 258if there was a fatal error while attempting to process the option. 259.Pp 260The currently supported options are: 261.Bl -tag -compact -width indent 262.It Format iso9660 263.Bl -tag -compact -width indent 264.It Cm joliet 265Support Joliet extensions. 266Defaults to enabled, use 267.Cm !joliet 268to disable. 269.El 270.El 271.It Fn archive_read_open 272The same as 273.Fn archive_read_open2 , 274except that the skip callback is assumed to be 275.Dv NULL . 276.It Fn archive_read_open2 277Freeze the settings, open the archive, and prepare for reading entries. 278This is the most generic version of this call, which accepts 279four callback functions. 280Most clients will want to use 281.Fn archive_read_open_filename , 282.Fn archive_read_open_FILE , 283.Fn archive_read_open_fd , 284or 285.Fn archive_read_open_memory 286instead. 287The library invokes the client-provided functions to obtain 288raw bytes from the archive. 289.It Fn archive_read_open_FILE 290Like 291.Fn archive_read_open , 292except that it accepts a 293.Ft "FILE *" 294pointer. 295This function should not be used with tape drives or other devices 296that require strict I/O blocking. 297.It Fn archive_read_open_fd 298Like 299.Fn archive_read_open , 300except that it accepts a file descriptor and block size rather than 301a set of function pointers. 302Note that the file descriptor will not be automatically closed at 303end-of-archive. 304This function is safe for use with tape drives or other blocked devices. 305.It Fn archive_read_open_file 306This is a deprecated synonym for 307.Fn archive_read_open_filename . 308.It Fn archive_read_open_filename 309Like 310.Fn archive_read_open , 311except that it accepts a simple filename and a block size. 312A NULL filename represents standard input. 313This function is safe for use with tape drives or other blocked devices. 314.It Fn archive_read_open_memory 315Like 316.Fn archive_read_open , 317except that it accepts a pointer and size of a block of 318memory containing the archive data. 319.It Fn archive_read_next_header 320Read the header for the next entry and return a pointer to 321a 322.Tn struct archive_entry . 323This is a convenience wrapper around 324.Fn archive_read_next_header2 325that uses an internal 326.Tn struct archive_entry 327object. 328.It Fn archive_read_next_header2 329Read the header for the next entry and populate the provided 330.Tn struct archive_entry . 331.It Fn archive_read_data 332Read data associated with the header just read. 333Internally, this is a convenience function that calls 334.Fn archive_read_data_block 335and fills any gaps with nulls so that callers see a single 336continuous stream of data. 337.It Fn archive_read_data_block 338Return the next available block of data for this entry. 339Unlike 340.Fn archive_read_data , 341the 342.Fn archive_read_data_block 343function avoids copying data and allows you to correctly handle 344sparse files, as supported by some archive formats. 345The library guarantees that offsets will increase and that blocks 346will not overlap. 347Note that the blocks returned from this function can be much larger 348than the block size read from disk, due to compression 349and internal buffer optimizations. 350.It Fn archive_read_data_skip 351A convenience function that repeatedly calls 352.Fn archive_read_data_block 353to skip all of the data for this archive entry. 354.\" #if ARCHIVE_API_VERSION < 3 355.It Fn archive_read_data_into_buffer 356This function is deprecated and will be removed. 357Use 358.Fn archive_read_data 359instead. 360.\" #endif 361.It Fn archive_read_data_into_fd 362A convenience function that repeatedly calls 363.Fn archive_read_data_block 364to copy the entire entry to the provided file descriptor. 365.It Fn archive_read_extract , Fn archive_read_extract_set_skip_file 366A convenience function that wraps the corresponding 367.Xr archive_write_disk 3 368interfaces. 369The first call to 370.Fn archive_read_extract 371creates a restore object using 372.Xr archive_write_disk_new 3 373and 374.Xr archive_write_disk_set_standard_lookup 3 , 375then transparently invokes 376.Xr archive_write_disk_set_options 3 , 377.Xr archive_write_header 3 , 378.Xr archive_write_data 3 , 379and 380.Xr archive_write_finish_entry 3 381to create the entry on disk and copy data into it. 382The 383.Va flags 384argument is passed unmodified to 385.Xr archive_write_disk_set_options 3 . 386.It Fn archive_read_extract2 387This is another version of 388.Fn archive_read_extract 389that allows you to provide your own restore object. 390In particular, this allows you to override the standard lookup functions 391using 392.Xr archive_write_disk_set_group_lookup 3 , 393and 394.Xr archive_write_disk_set_user_lookup 3 . 395Note that 396.Fn archive_read_extract2 397does not accept a 398.Va flags 399argument; you should use 400.Fn archive_write_disk_set_options 401to set the restore options yourself. 402.It Fn archive_read_extract_set_progress_callback 403Sets a pointer to a user-defined callback that can be used 404for updating progress displays during extraction. 405The progress function will be invoked during the extraction of large 406regular files. 407The progress function will be invoked with the pointer provided to this call. 408Generally, the data pointed to should include a reference to the archive 409object and the archive_entry object so that various statistics 410can be retrieved for the progress display. 411.It Fn archive_read_close 412Complete the archive and invoke the close callback. 413.It Fn archive_read_finish 414Invokes 415.Fn archive_read_close 416if it was not invoked manually, then release all resources. 417Note: In libarchive 1.x, this function was declared to return 418.Ft void , 419which made it impossible to detect certain errors when 420.Fn archive_read_close 421was invoked implicitly from this function. 422The declaration is corrected beginning with libarchive 2.0. 423.El 424.Pp 425Note that the library determines most of the relevant information about 426the archive by inspection. 427In particular, it automatically detects 428.Xr gzip 1 429or 430.Xr bzip2 1 431compression and transparently performs the appropriate decompression. 432It also automatically detects the archive format. 433.Pp 434A complete description of the 435.Tn struct archive 436and 437.Tn struct archive_entry 438objects can be found in the overview manual page for 439.Xr libarchive 3 . 440.Sh CLIENT CALLBACKS 441The callback functions must match the following prototypes: 442.Bl -item -offset indent 443.It 444.Ft typedef ssize_t 445.Fo archive_read_callback 446.Fa "struct archive *" 447.Fa "void *client_data" 448.Fa "const void **buffer" 449.Fc 450.It 451.\" #if ARCHIVE_API_VERSION < 2 452.Ft typedef int 453.Fo archive_skip_callback 454.Fa "struct archive *" 455.Fa "void *client_data" 456.Fa "size_t request" 457.Fc 458.\" #else 459.\" .Ft typedef off_t 460.\" .Fo archive_skip_callback 461.\" .Fa "struct archive *" 462.\" .Fa "void *client_data" 463.\" .Fa "off_t request" 464.\" .Fc 465.\" #endif 466.It 467.Ft typedef int 468.Fn archive_open_callback "struct archive *" "void *client_data" 469.It 470.Ft typedef int 471.Fn archive_close_callback "struct archive *" "void *client_data" 472.El 473.Pp 474The open callback is invoked by 475.Fn archive_open . 476It should return 477.Cm ARCHIVE_OK 478if the underlying file or data source is successfully 479opened. 480If the open fails, it should call 481.Fn archive_set_error 482to register an error code and message and return 483.Cm ARCHIVE_FATAL . 484.Pp 485The read callback is invoked whenever the library 486requires raw bytes from the archive. 487The read callback should read data into a buffer, 488set the 489.Li const void **buffer 490argument to point to the available data, and 491return a count of the number of bytes available. 492The library will invoke the read callback again 493only after it has consumed this data. 494The library imposes no constraints on the size 495of the data blocks returned. 496On end-of-file, the read callback should 497return zero. 498On error, the read callback should invoke 499.Fn archive_set_error 500to register an error code and message and 501return -1. 502.Pp 503The skip callback is invoked when the 504library wants to ignore a block of data. 505The return value is the number of bytes actually 506skipped, which may differ from the request. 507If the callback cannot skip data, it should return 508zero. 509If the skip callback is not provided (the 510function pointer is 511.Dv NULL ), 512the library will invoke the read function 513instead and simply discard the result. 514A skip callback can provide significant 515performance gains when reading uncompressed 516archives from slow disk drives or other media 517that can skip quickly. 518.Pp 519The close callback is invoked by archive_close when 520the archive processing is complete. 521The callback should return 522.Cm ARCHIVE_OK 523on success. 524On failure, the callback should invoke 525.Fn archive_set_error 526to register an error code and message and 527return 528.Cm ARCHIVE_FATAL. 529.Sh EXAMPLE 530The following illustrates basic usage of the library. 531In this example, 532the callback functions are simply wrappers around the standard 533.Xr open 2 , 534.Xr read 2 , 535and 536.Xr close 2 537system calls. 538.Bd -literal -offset indent 539void 540list_archive(const char *name) 541{ 542 struct mydata *mydata; 543 struct archive *a; 544 struct archive_entry *entry; 545 546 mydata = malloc(sizeof(struct mydata)); 547 a = archive_read_new(); 548 mydata->name = name; 549 archive_read_support_compression_all(a); 550 archive_read_support_format_all(a); 551 archive_read_open(a, mydata, myopen, myread, myclose); 552 while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { 553 printf("%s\\n",archive_entry_pathname(entry)); 554 archive_read_data_skip(a); 555 } 556 archive_read_finish(a); 557 free(mydata); 558} 559 560ssize_t 561myread(struct archive *a, void *client_data, const void **buff) 562{ 563 struct mydata *mydata = client_data; 564 565 *buff = mydata->buff; 566 return (read(mydata->fd, mydata->buff, 10240)); 567} 568 569int 570myopen(struct archive *a, void *client_data) 571{ 572 struct mydata *mydata = client_data; 573 574 mydata->fd = open(mydata->name, O_RDONLY); 575 return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL); 576} 577 578int 579myclose(struct archive *a, void *client_data) 580{ 581 struct mydata *mydata = client_data; 582 583 if (mydata->fd > 0) 584 close(mydata->fd); 585 return (ARCHIVE_OK); 586} 587.Ed 588.Sh RETURN VALUES 589Most functions return zero on success, non-zero on error. 590The possible return codes include: 591.Cm ARCHIVE_OK 592(the operation succeeded), 593.Cm ARCHIVE_WARN 594(the operation succeeded but a non-critical error was encountered), 595.Cm ARCHIVE_EOF 596(end-of-archive was encountered), 597.Cm ARCHIVE_RETRY 598(the operation failed but can be retried), 599and 600.Cm ARCHIVE_FATAL 601(there was a fatal error; the archive should be closed immediately). 602Detailed error codes and textual descriptions are available from the 603.Fn archive_errno 604and 605.Fn archive_error_string 606functions. 607.Pp 608.Fn archive_read_new 609returns a pointer to a freshly allocated 610.Tn struct archive 611object. 612It returns 613.Dv NULL 614on error. 615.Pp 616.Fn archive_read_data 617returns a count of bytes actually read or zero at the end of the entry. 618On error, a value of 619.Cm ARCHIVE_FATAL , 620.Cm ARCHIVE_WARN , 621or 622.Cm ARCHIVE_RETRY 623is returned and an error code and textual description can be retrieved from the 624.Fn archive_errno 625and 626.Fn archive_error_string 627functions. 628.Pp 629The library expects the client callbacks to behave similarly. 630If there is an error, you can use 631.Fn archive_set_error 632to set an appropriate error code and description, 633then return one of the non-zero values above. 634(Note that the value eventually returned to the client may 635not be the same; many errors that are not critical at the level 636of basic I/O can prevent the archive from being properly read, 637thus most I/O errors eventually cause 638.Cm ARCHIVE_FATAL 639to be returned.) 640.\" .Sh ERRORS 641.Sh SEE ALSO 642.Xr tar 1 , 643.Xr archive 3 , 644.Xr archive_util 3 , 645.Xr tar 5 646.Sh HISTORY 647The 648.Nm libarchive 649library first appeared in 650.Fx 5.3 . 651.Sh AUTHORS 652.An -nosplit 653The 654.Nm libarchive 655library was written by 656.An Tim Kientzle Aq kientzle@acm.org . 657.Sh BUGS 658Many traditional archiver programs treat 659empty files as valid empty archives. 660For example, many implementations of 661.Xr tar 1 662allow you to append entries to an empty file. 663Of course, it is impossible to determine the format of an empty file 664by inspecting the contents, so this library treats empty files as 665having a special 666.Dq empty 667format. 668