1.\" Copyright (c) 2003-2007 Tim Kientzle 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD: head/lib/libarchive/archive_read.3 191595 2009-04-27 20:13:13Z kientzle $ 26.\" 27.Dd April 13, 2009 28.Dt archive_read 3 29.Os 30.Sh NAME 31.Nm archive_read_new , 32.Nm archive_read_set_filter_options , 33.Nm archive_read_set_format_options , 34.Nm archive_read_set_options , 35.Nm archive_read_support_compression_all , 36.Nm archive_read_support_compression_bzip2 , 37.Nm archive_read_support_compression_compress , 38.Nm archive_read_support_compression_gzip , 39.Nm archive_read_support_compression_lzma , 40.Nm archive_read_support_compression_none , 41.Nm archive_read_support_compression_xz , 42.Nm archive_read_support_compression_program , 43.Nm archive_read_support_compression_program_signature , 44.Nm archive_read_support_format_all , 45.Nm archive_read_support_format_ar , 46.Nm archive_read_support_format_cpio , 47.Nm archive_read_support_format_empty , 48.Nm archive_read_support_format_iso9660 , 49.Nm archive_read_support_format_mtree, 50.Nm archive_read_support_format_raw, 51.Nm archive_read_support_format_tar , 52.Nm archive_read_support_format_zip , 53.Nm archive_read_open , 54.Nm archive_read_open2 , 55.Nm archive_read_open_fd , 56.Nm archive_read_open_FILE , 57.Nm archive_read_open_filename , 58.Nm archive_read_open_memory , 59.Nm archive_read_next_header , 60.Nm archive_read_next_header2 , 61.Nm archive_read_data , 62.Nm archive_read_data_block , 63.Nm archive_read_data_skip , 64.\" #if ARCHIVE_API_VERSION < 3 65.Nm archive_read_data_into_buffer , 66.\" #endif 67.Nm archive_read_data_into_fd , 68.Nm archive_read_extract , 69.Nm archive_read_extract2 , 70.Nm archive_read_extract_set_progress_callback , 71.Nm archive_read_close , 72.Nm archive_read_finish 73.Nd functions for reading streaming archives 74.Sh SYNOPSIS 75.In archive.h 76.Ft struct archive * 77.Fn archive_read_new "void" 78.Ft int 79.Fn archive_read_support_compression_all "struct archive *" 80.Ft int 81.Fn archive_read_support_compression_bzip2 "struct archive *" 82.Ft int 83.Fn archive_read_support_compression_compress "struct archive *" 84.Ft int 85.Fn archive_read_support_compression_gzip "struct archive *" 86.Ft int 87.Fn archive_read_support_compression_lzma "struct archive *" 88.Ft int 89.Fn archive_read_support_compression_none "struct archive *" 90.Ft int 91.Fn archive_read_support_compression_xz "struct archive *" 92.Ft int 93.Fo archive_read_support_compression_program 94.Fa "struct archive *" 95.Fa "const char *cmd" 96.Fc 97.Ft int 98.Fo archive_read_support_compression_program_signature 99.Fa "struct archive *" 100.Fa "const char *cmd" 101.Fa "const void *signature" 102.Fa "size_t signature_length" 103.Fc 104.Ft int 105.Fn archive_read_support_format_all "struct archive *" 106.Ft int 107.Fn archive_read_support_format_ar "struct archive *" 108.Ft int 109.Fn archive_read_support_format_cpio "struct archive *" 110.Ft int 111.Fn archive_read_support_format_empty "struct archive *" 112.Ft int 113.Fn archive_read_support_format_iso9660 "struct archive *" 114.Ft int 115.Fn archive_read_support_format_mtree "struct archive *" 116.Ft int 117.Fn archive_read_support_format_raw "struct archive *" 118.Ft int 119.Fn archive_read_support_format_tar "struct archive *" 120.Ft int 121.Fn archive_read_support_format_zip "struct archive *" 122.Ft int 123.Fn archive_read_set_filter_options "struct archive *" "const char *" 124.Ft int 125.Fn archive_read_set_format_options "struct archive *" "const char *" 126.Ft int 127.Fn archive_read_set_options "struct archive *" "const char *" 128.Ft int 129.Fo archive_read_open 130.Fa "struct archive *" 131.Fa "void *client_data" 132.Fa "archive_open_callback *" 133.Fa "archive_read_callback *" 134.Fa "archive_close_callback *" 135.Fc 136.Ft int 137.Fo archive_read_open2 138.Fa "struct archive *" 139.Fa "void *client_data" 140.Fa "archive_open_callback *" 141.Fa "archive_read_callback *" 142.Fa "archive_skip_callback *" 143.Fa "archive_close_callback *" 144.Fc 145.Ft int 146.Fn archive_read_open_FILE "struct archive *" "FILE *file" 147.Ft int 148.Fn archive_read_open_fd "struct archive *" "int fd" "size_t block_size" 149.Ft int 150.Fo archive_read_open_filename 151.Fa "struct archive *" 152.Fa "const char *filename" 153.Fa "size_t block_size" 154.Fc 155.Ft int 156.Fn archive_read_open_memory "struct archive *" "void *buff" "size_t size" 157.Ft int 158.Fn archive_read_next_header "struct archive *" "struct archive_entry **" 159.Ft int 160.Fn archive_read_next_header2 "struct archive *" "struct archive_entry *" 161.Ft ssize_t 162.Fn archive_read_data "struct archive *" "void *buff" "size_t len" 163.Ft int 164.Fo archive_read_data_block 165.Fa "struct archive *" 166.Fa "const void **buff" 167.Fa "size_t *len" 168.Fa "off_t *offset" 169.Fc 170.Ft int 171.Fn archive_read_data_skip "struct archive *" 172.\" #if ARCHIVE_API_VERSION < 3 173.Ft int 174.Fn archive_read_data_into_buffer "struct archive *" "void *" "ssize_t len" 175.\" #endif 176.Ft int 177.Fn archive_read_data_into_fd "struct archive *" "int fd" 178.Ft int 179.Fo archive_read_extract 180.Fa "struct archive *" 181.Fa "struct archive_entry *" 182.Fa "int flags" 183.Fc 184.Ft int 185.Fo archive_read_extract2 186.Fa "struct archive *src" 187.Fa "struct archive_entry *" 188.Fa "struct archive *dest" 189.Fc 190.Ft void 191.Fo archive_read_extract_set_progress_callback 192.Fa "struct archive *" 193.Fa "void (*func)(void *)" 194.Fa "void *user_data" 195.Fc 196.Ft int 197.Fn archive_read_close "struct archive *" 198.Ft int 199.Fn archive_read_finish "struct archive *" 200.Sh DESCRIPTION 201These functions provide a complete API for reading streaming archives. 202The general process is to first create the 203.Tn struct archive 204object, set options, initialize the reader, iterate over the archive 205headers and associated data, then close the archive and release all 206resources. 207The following summary describes the functions in approximately the 208order they would be used: 209.Bl -tag -compact -width indent 210.It Fn archive_read_new 211Allocates and initializes a 212.Tn struct archive 213object suitable for reading from an archive. 214.It Xo 215.Fn archive_read_support_compression_bzip2 , 216.Fn archive_read_support_compression_compress , 217.Fn archive_read_support_compression_gzip , 218.Fn archive_read_support_compression_lzma , 219.Fn archive_read_support_compression_none , 220.Fn archive_read_support_compression_xz 221.Xc 222Enables auto-detection code and decompression support for the 223specified compression. 224Returns 225.Cm ARCHIVE_OK 226if the compression is fully supported, or 227.Cm ARCHIVE_WARN 228if the compression is supported only through an external program. 229Note that decompression using an external program is usually slower than 230decompression through built-in libraries. 231Note that 232.Dq none 233is always enabled by default. 234.It Fn archive_read_support_compression_all 235Enables all available decompression filters. 236.It Fn archive_read_support_compression_program 237Data is fed through the specified external program before being dearchived. 238Note that this disables automatic detection of the compression format, 239so it makes no sense to specify this in conjunction with any other 240decompression option. 241.It Fn archive_read_support_compression_program_signature 242This feeds data through the specified external program 243but only if the initial bytes of the data match the specified 244signature value. 245.It Xo 246.Fn archive_read_support_format_all , 247.Fn archive_read_support_format_ar , 248.Fn archive_read_support_format_cpio , 249.Fn archive_read_support_format_empty , 250.Fn archive_read_support_format_iso9660 , 251.Fn archive_read_support_format_mtree , 252.Fn archive_read_support_format_tar , 253.Fn archive_read_support_format_zip 254.Xc 255Enables support---including auto-detection code---for the 256specified archive format. 257For example, 258.Fn archive_read_support_format_tar 259enables support for a variety of standard tar formats, old-style tar, 260ustar, pax interchange format, and many common variants. 261For convenience, 262.Fn archive_read_support_format_all 263enables support for all available formats. 264Only empty archives are supported by default. 265.It Fn archive_read_support_format_raw 266The 267.Dq raw 268format handler allows libarchive to be used to read arbitrary data. 269It treats any data stream as an archive with a single entry. 270The pathname of this entry is 271.Dq data ; 272all other entry fields are unset. 273This is not enabled by 274.Fn archive_read_support_format_all 275in order to avoid erroneous handling of damaged archives. 276.It Xo 277.Fn archive_read_set_filter_options , 278.Fn archive_read_set_format_options , 279.Fn archive_read_set_options 280.Xc 281Specifies options that will be passed to currently-registered 282filters (including decompression filters) and/or format readers. 283The argument is a comma-separated list of individual options. 284Individual options have one of the following forms: 285.Bl -tag -compact -width indent 286.It Ar option=value 287The option/value pair will be provided to every module. 288Modules that do not accept an option with this name will ignore it. 289.It Ar option 290The option will be provided to every module with a value of 291.Dq 1 . 292.It Ar !option 293The option will be provided to every module with a NULL value. 294.It Ar module:option=value , Ar module:option , Ar module:!option 295As above, but the corresponding option and value will be provided 296only to modules whose name matches 297.Ar module . 298.El 299The return value will be 300.Cm ARCHIVE_OK 301if any module accepts the option, or 302.Cm ARCHIVE_WARN 303if no module accepted the option, or 304.Cm ARCHIVE_FATAL 305if there was a fatal error while attempting to process the option. 306.Pp 307The currently supported options are: 308.Bl -tag -compact -width indent 309.It Format iso9660 310.Bl -tag -compact -width indent 311.It Cm joliet 312Support Joliet extensions. 313Defaults to enabled, use 314.Cm !joliet 315to disable. 316.El 317.El 318.It Fn archive_read_open 319The same as 320.Fn archive_read_open2 , 321except that the skip callback is assumed to be 322.Dv NULL . 323.It Fn archive_read_open2 324Freeze the settings, open the archive, and prepare for reading entries. 325This is the most generic version of this call, which accepts 326four callback functions. 327Most clients will want to use 328.Fn archive_read_open_filename , 329.Fn archive_read_open_FILE , 330.Fn archive_read_open_fd , 331or 332.Fn archive_read_open_memory 333instead. 334The library invokes the client-provided functions to obtain 335raw bytes from the archive. 336.It Fn archive_read_open_FILE 337Like 338.Fn archive_read_open , 339except that it accepts a 340.Ft "FILE *" 341pointer. 342This function should not be used with tape drives or other devices 343that require strict I/O blocking. 344.It Fn archive_read_open_fd 345Like 346.Fn archive_read_open , 347except that it accepts a file descriptor and block size rather than 348a set of function pointers. 349Note that the file descriptor will not be automatically closed at 350end-of-archive. 351This function is safe for use with tape drives or other blocked devices. 352.It Fn archive_read_open_file 353This is a deprecated synonym for 354.Fn archive_read_open_filename . 355.It Fn archive_read_open_filename 356Like 357.Fn archive_read_open , 358except that it accepts a simple filename and a block size. 359A NULL filename represents standard input. 360This function is safe for use with tape drives or other blocked devices. 361.It Fn archive_read_open_memory 362Like 363.Fn archive_read_open , 364except that it accepts a pointer and size of a block of 365memory containing the archive data. 366.It Fn archive_read_next_header 367Read the header for the next entry and return a pointer to 368a 369.Tn struct archive_entry . 370This is a convenience wrapper around 371.Fn archive_read_next_header2 372that reuses an internal 373.Tn struct archive_entry 374object for each request. 375.It Fn archive_read_next_header2 376Read the header for the next entry and populate the provided 377.Tn struct archive_entry . 378.It Fn archive_read_data 379Read data associated with the header just read. 380Internally, this is a convenience function that calls 381.Fn archive_read_data_block 382and fills any gaps with nulls so that callers see a single 383continuous stream of data. 384.It Fn archive_read_data_block 385Return the next available block of data for this entry. 386Unlike 387.Fn archive_read_data , 388the 389.Fn archive_read_data_block 390function avoids copying data and allows you to correctly handle 391sparse files, as supported by some archive formats. 392The library guarantees that offsets will increase and that blocks 393will not overlap. 394Note that the blocks returned from this function can be much larger 395than the block size read from disk, due to compression 396and internal buffer optimizations. 397.It Fn archive_read_data_skip 398A convenience function that repeatedly calls 399.Fn archive_read_data_block 400to skip all of the data for this archive entry. 401.\" #if ARCHIVE_API_VERSION < 3 402.It Fn archive_read_data_into_buffer 403This function is deprecated and will be removed. 404Use 405.Fn archive_read_data 406instead. 407.\" #endif 408.It Fn archive_read_data_into_fd 409A convenience function that repeatedly calls 410.Fn archive_read_data_block 411to copy the entire entry to the provided file descriptor. 412.It Fn archive_read_extract , Fn archive_read_extract_set_skip_file 413A convenience function that wraps the corresponding 414.Xr archive_write_disk 3 415interfaces. 416The first call to 417.Fn archive_read_extract 418creates a restore object using 419.Xr archive_write_disk_new 3 420and 421.Xr archive_write_disk_set_standard_lookup 3 , 422then transparently invokes 423.Xr archive_write_disk_set_options 3 , 424.Xr archive_write_header 3 , 425.Xr archive_write_data 3 , 426and 427.Xr archive_write_finish_entry 3 428to create the entry on disk and copy data into it. 429The 430.Va flags 431argument is passed unmodified to 432.Xr archive_write_disk_set_options 3 . 433.It Fn archive_read_extract2 434This is another version of 435.Fn archive_read_extract 436that allows you to provide your own restore object. 437In particular, this allows you to override the standard lookup functions 438using 439.Xr archive_write_disk_set_group_lookup 3 , 440and 441.Xr archive_write_disk_set_user_lookup 3 . 442Note that 443.Fn archive_read_extract2 444does not accept a 445.Va flags 446argument; you should use 447.Fn archive_write_disk_set_options 448to set the restore options yourself. 449.It Fn archive_read_extract_set_progress_callback 450Sets a pointer to a user-defined callback that can be used 451for updating progress displays during extraction. 452The progress function will be invoked during the extraction of large 453regular files. 454The progress function will be invoked with the pointer provided to this call. 455Generally, the data pointed to should include a reference to the archive 456object and the archive_entry object so that various statistics 457can be retrieved for the progress display. 458.It Fn archive_read_close 459Complete the archive and invoke the close callback. 460.It Fn archive_read_finish 461Invokes 462.Fn archive_read_close 463if it was not invoked manually, then release all resources. 464Note: In libarchive 1.x, this function was declared to return 465.Ft void , 466which made it impossible to detect certain errors when 467.Fn archive_read_close 468was invoked implicitly from this function. 469The declaration is corrected beginning with libarchive 2.0. 470.El 471.Pp 472Note that the library determines most of the relevant information about 473the archive by inspection. 474In particular, it automatically detects 475.Xr gzip 1 476or 477.Xr bzip2 1 478compression and transparently performs the appropriate decompression. 479It also automatically detects the archive format. 480.Pp 481A complete description of the 482.Tn struct archive 483and 484.Tn struct archive_entry 485objects can be found in the overview manual page for 486.Xr libarchive 3 . 487.Sh CLIENT CALLBACKS 488The callback functions must match the following prototypes: 489.Bl -item -offset indent 490.It 491.Ft typedef ssize_t 492.Fo archive_read_callback 493.Fa "struct archive *" 494.Fa "void *client_data" 495.Fa "const void **buffer" 496.Fc 497.It 498.\" #if ARCHIVE_API_VERSION < 2 499.Ft typedef int 500.Fo archive_skip_callback 501.Fa "struct archive *" 502.Fa "void *client_data" 503.Fa "size_t request" 504.Fc 505.\" #else 506.\" .Ft typedef off_t 507.\" .Fo archive_skip_callback 508.\" .Fa "struct archive *" 509.\" .Fa "void *client_data" 510.\" .Fa "off_t request" 511.\" .Fc 512.\" #endif 513.It 514.Ft typedef int 515.Fn archive_open_callback "struct archive *" "void *client_data" 516.It 517.Ft typedef int 518.Fn archive_close_callback "struct archive *" "void *client_data" 519.El 520.Pp 521The open callback is invoked by 522.Fn archive_open . 523It should return 524.Cm ARCHIVE_OK 525if the underlying file or data source is successfully 526opened. 527If the open fails, it should call 528.Fn archive_set_error 529to register an error code and message and return 530.Cm ARCHIVE_FATAL . 531.Pp 532The read callback is invoked whenever the library 533requires raw bytes from the archive. 534The read callback should read data into a buffer, 535set the 536.Li const void **buffer 537argument to point to the available data, and 538return a count of the number of bytes available. 539The library will invoke the read callback again 540only after it has consumed this data. 541The library imposes no constraints on the size 542of the data blocks returned. 543On end-of-file, the read callback should 544return zero. 545On error, the read callback should invoke 546.Fn archive_set_error 547to register an error code and message and 548return -1. 549.Pp 550The skip callback is invoked when the 551library wants to ignore a block of data. 552The return value is the number of bytes actually 553skipped, which may differ from the request. 554If the callback cannot skip data, it should return 555zero. 556If the skip callback is not provided (the 557function pointer is 558.Dv NULL ), 559the library will invoke the read function 560instead and simply discard the result. 561A skip callback can provide significant 562performance gains when reading uncompressed 563archives from slow disk drives or other media 564that can skip quickly. 565.Pp 566The close callback is invoked by archive_close when 567the archive processing is complete. 568The callback should return 569.Cm ARCHIVE_OK 570on success. 571On failure, the callback should invoke 572.Fn archive_set_error 573to register an error code and message and 574return 575.Cm ARCHIVE_FATAL. 576.Sh EXAMPLE 577The following illustrates basic usage of the library. 578In this example, 579the callback functions are simply wrappers around the standard 580.Xr open 2 , 581.Xr read 2 , 582and 583.Xr close 2 584system calls. 585.Bd -literal -offset indent 586void 587list_archive(const char *name) 588{ 589 struct mydata *mydata; 590 struct archive *a; 591 struct archive_entry *entry; 592 593 mydata = malloc(sizeof(struct mydata)); 594 a = archive_read_new(); 595 mydata->name = name; 596 archive_read_support_compression_all(a); 597 archive_read_support_format_all(a); 598 archive_read_open(a, mydata, myopen, myread, myclose); 599 while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { 600 printf("%s\en",archive_entry_pathname(entry)); 601 archive_read_data_skip(a); 602 } 603 archive_read_finish(a); 604 free(mydata); 605} 606 607ssize_t 608myread(struct archive *a, void *client_data, const void **buff) 609{ 610 struct mydata *mydata = client_data; 611 612 *buff = mydata->buff; 613 return (read(mydata->fd, mydata->buff, 10240)); 614} 615 616int 617myopen(struct archive *a, void *client_data) 618{ 619 struct mydata *mydata = client_data; 620 621 mydata->fd = open(mydata->name, O_RDONLY); 622 return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL); 623} 624 625int 626myclose(struct archive *a, void *client_data) 627{ 628 struct mydata *mydata = client_data; 629 630 if (mydata->fd > 0) 631 close(mydata->fd); 632 return (ARCHIVE_OK); 633} 634.Ed 635.Sh RETURN VALUES 636Most functions return zero on success, non-zero on error. 637The possible return codes include: 638.Cm ARCHIVE_OK 639(the operation succeeded), 640.Cm ARCHIVE_WARN 641(the operation succeeded but a non-critical error was encountered), 642.Cm ARCHIVE_EOF 643(end-of-archive was encountered), 644.Cm ARCHIVE_RETRY 645(the operation failed but can be retried), 646and 647.Cm ARCHIVE_FATAL 648(there was a fatal error; the archive should be closed immediately). 649Detailed error codes and textual descriptions are available from the 650.Fn archive_errno 651and 652.Fn archive_error_string 653functions. 654.Pp 655.Fn archive_read_new 656returns a pointer to a freshly allocated 657.Tn struct archive 658object. 659It returns 660.Dv NULL 661on error. 662.Pp 663.Fn archive_read_data 664returns a count of bytes actually read or zero at the end of the entry. 665On error, a value of 666.Cm ARCHIVE_FATAL , 667.Cm ARCHIVE_WARN , 668or 669.Cm ARCHIVE_RETRY 670is returned and an error code and textual description can be retrieved from the 671.Fn archive_errno 672and 673.Fn archive_error_string 674functions. 675.Pp 676The library expects the client callbacks to behave similarly. 677If there is an error, you can use 678.Fn archive_set_error 679to set an appropriate error code and description, 680then return one of the non-zero values above. 681(Note that the value eventually returned to the client may 682not be the same; many errors that are not critical at the level 683of basic I/O can prevent the archive from being properly read, 684thus most I/O errors eventually cause 685.Cm ARCHIVE_FATAL 686to be returned.) 687.\" .Sh ERRORS 688.Sh SEE ALSO 689.Xr tar 1 , 690.Xr archive 3 , 691.Xr archive_util 3 , 692.Xr tar 5 693.Sh HISTORY 694The 695.Nm libarchive 696library first appeared in 697.Fx 5.3 . 698.Sh AUTHORS 699.An -nosplit 700The 701.Nm libarchive 702library was written by 703.An Tim Kientzle Aq kientzle@acm.org . 704.Sh BUGS 705Many traditional archiver programs treat 706empty files as valid empty archives. 707For example, many implementations of 708.Xr tar 1 709allow you to append entries to an empty file. 710Of course, it is impossible to determine the format of an empty file 711by inspecting the contents, so this library treats empty files as 712having a special 713.Dq empty 714format. 715