1.\" Copyright (c) 2003-2007 Tim Kientzle 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD: head/lib/libarchive/archive_read.3 191595 2009-04-27 20:13:13Z kientzle $ 26.\" 27.Dd April 13, 2009 28.Dt archive_read 3 29.Os 30.Sh NAME 31.Nm archive_read_new , 32.Nm archive_read_set_filter_options , 33.Nm archive_read_set_format_options , 34.Nm archive_read_set_options , 35.Nm archive_read_support_compression_all , 36.Nm archive_read_support_compression_bzip2 , 37.Nm archive_read_support_compression_compress , 38.Nm archive_read_support_compression_gzip , 39.Nm archive_read_support_compression_lzma , 40.Nm archive_read_support_compression_none , 41.Nm archive_read_support_compression_xz , 42.Nm archive_read_support_compression_program , 43.Nm archive_read_support_compression_program_signature , 44.Nm archive_read_support_format_all , 45.Nm archive_read_support_format_ar , 46.Nm archive_read_support_format_cpio , 47.Nm archive_read_support_format_empty , 48.Nm archive_read_support_format_iso9660 , 49.Nm archive_read_support_format_mtree, 50.Nm archive_read_support_format_raw, 51.Nm archive_read_support_format_tar , 52.Nm archive_read_support_format_zip , 53.Nm archive_read_open , 54.Nm archive_read_open2 , 55.Nm archive_read_open_fd , 56.Nm archive_read_open_FILE , 57.Nm archive_read_open_filename , 58.Nm archive_read_open_memory , 59.Nm archive_read_next_header , 60.Nm archive_read_next_header2 , 61.Nm archive_read_data , 62.Nm archive_read_data_block , 63.Nm archive_read_data_skip , 64.\" #if ARCHIVE_API_VERSION < 3 65.Nm archive_read_data_into_buffer , 66.\" #endif 67.Nm archive_read_data_into_fd , 68.Nm archive_read_extract , 69.Nm archive_read_extract2 , 70.Nm archive_read_extract_set_progress_callback , 71.Nm archive_read_close , 72.Nm archive_read_finish 73.Nd functions for reading streaming archives 74.Sh SYNOPSIS 75.In archive.h 76.Ft struct archive * 77.Fn archive_read_new "void" 78.Ft int 79.Fn archive_read_support_compression_all "struct archive *" 80.Ft int 81.Fn archive_read_support_compression_bzip2 "struct archive *" 82.Ft int 83.Fn archive_read_support_compression_compress "struct archive *" 84.Ft int 85.Fn archive_read_support_compression_gzip "struct archive *" 86.Ft int 87.Fn archive_read_support_compression_lzma "struct archive *" 88.Ft int 89.Fn archive_read_support_compression_none "struct archive *" 90.Ft int 91.Fn archive_read_support_compression_xz "struct archive *" 92.Ft int 93.Fo archive_read_support_compression_program 94.Fa "struct archive *" 95.Fa "const char *cmd" 96.Fc 97.Ft int 98.Fo archive_read_support_compression_program_signature 99.Fa "struct archive *" 100.Fa "const char *cmd" 101.Fa "const void *signature" 102.Fa "size_t signature_length" 103.Fc 104.Ft int 105.Fn archive_read_support_format_all "struct archive *" 106.Ft int 107.Fn archive_read_support_format_ar "struct archive *" 108.Ft int 109.Fn archive_read_support_format_cpio "struct archive *" 110.Ft int 111.Fn archive_read_support_format_empty "struct archive *" 112.Ft int 113.Fn archive_read_support_format_iso9660 "struct archive *" 114.Ft int 115.Fn archive_read_support_format_mtree "struct archive *" 116.Ft int 117.Fn archive_read_support_format_raw "struct archive *" 118.Ft int 119.Fn archive_read_support_format_tar "struct archive *" 120.Ft int 121.Fn archive_read_support_format_zip "struct archive *" 122.Ft int 123.Fn archive_read_set_filter_options "struct archive *" "const char *" 124.Ft int 125.Fn archive_read_set_format_options "struct archive *" "const char *" 126.Ft int 127.Fn archive_read_set_options "struct archive *" "const char *" 128.Ft int 129.Fo archive_read_open 130.Fa "struct archive *" 131.Fa "void *client_data" 132.Fa "archive_open_callback *" 133.Fa "archive_read_callback *" 134.Fa "archive_close_callback *" 135.Fc 136.Ft int 137.Fo archive_read_open2 138.Fa "struct archive *" 139.Fa "void *client_data" 140.Fa "archive_open_callback *" 141.Fa "archive_read_callback *" 142.Fa "archive_skip_callback *" 143.Fa "archive_close_callback *" 144.Fc 145.Ft int 146.Fn archive_read_open_FILE "struct archive *" "FILE *file" 147.Ft int 148.Fn archive_read_open_fd "struct archive *" "int fd" "size_t block_size" 149.Ft int 150.Fo archive_read_open_filename 151.Fa "struct archive *" 152.Fa "const char *filename" 153.Fa "size_t block_size" 154.Fc 155.Ft int 156.Fn archive_read_open_memory "struct archive *" "void *buff" "size_t size" 157.Ft int 158.Fn archive_read_next_header "struct archive *" "struct archive_entry **" 159.Ft int 160.Fn archive_read_next_header2 "struct archive *" "struct archive_entry *" 161.Ft ssize_t 162.Fn archive_read_data "struct archive *" "void *buff" "size_t len" 163.Ft int 164.Fo archive_read_data_block 165.Fa "struct archive *" 166.Fa "const void **buff" 167.Fa "size_t *len" 168.Fa "off_t *offset" 169.Fc 170.Ft int 171.Fn archive_read_data_skip "struct archive *" 172.\" #if ARCHIVE_API_VERSION < 3 173.Ft int 174.Fn archive_read_data_into_buffer "struct archive *" "void *" "ssize_t len" 175.\" #endif 176.Ft int 177.Fn archive_read_data_into_fd "struct archive *" "int fd" 178.Ft int 179.Fo archive_read_extract 180.Fa "struct archive *" 181.Fa "struct archive_entry *" 182.Fa "int flags" 183.Fc 184.Ft int 185.Fo archive_read_extract2 186.Fa "struct archive *src" 187.Fa "struct archive_entry *" 188.Fa "struct archive *dest" 189.Fc 190.Ft void 191.Fo archive_read_extract_set_progress_callback 192.Fa "struct archive *" 193.Fa "void (*func)(void *)" 194.Fa "void *user_data" 195.Fc 196.Ft int 197.Fn archive_read_close "struct archive *" 198.Ft int 199.Fn archive_read_finish "struct archive *" 200.Sh DESCRIPTION 201These functions provide a complete API for reading streaming archives. 202The general process is to first create the 203.Tn struct archive 204object, set options, initialize the reader, iterate over the archive 205headers and associated data, then close the archive and release all 206resources. 207The following summary describes the functions in approximately the 208order they would be used: 209.Bl -tag -compact -width indent 210.It Fn archive_read_new 211Allocates and initializes a 212.Tn struct archive 213object suitable for reading from an archive. 214.It Fn archive_read_support_compression_bzip2 , \ 215Fn archive_read_support_compression_compress , \ 216Fn archive_read_support_compression_gzip , \ 217Fn archive_read_support_compression_lzma , \ 218Fn archive_read_support_compression_none , \ 219Fn archive_read_support_compression_xz 220Enables auto-detection code and decompression support for the 221specified compression. 222Returns 223.Cm ARCHIVE_OK 224if the compression is fully supported, or 225.Cm ARCHIVE_WARN 226if the compression is supported only through an external program. 227Note that decompression using an external program is usually slower than 228decompression through built-in libraries. 229Note that 230.Dq none 231is always enabled by default. 232.It Fn archive_read_support_compression_all 233Enables all available decompression filters. 234.It Fn archive_read_support_compression_program 235Data is fed through the specified external program before being dearchived. 236Note that this disables automatic detection of the compression format, 237so it makes no sense to specify this in conjunction with any other 238decompression option. 239.It Fn archive_read_support_compression_program_signature 240This feeds data through the specified external program 241but only if the initial bytes of the data match the specified 242signature value. 243.It Fn archive_read_support_format_all , \ 244Fn archive_read_support_format_ar , \ 245Fn archive_read_support_format_cpio , \ 246Fn archive_read_support_format_empty , \ 247Fn archive_read_support_format_iso9660 , \ 248Fn archive_read_support_format_mtree , \ 249Fn archive_read_support_format_tar , \ 250Fn archive_read_support_format_zip 251Enables support---including auto-detection code---for the 252specified archive format. 253For example, 254.Fn archive_read_support_format_tar 255enables support for a variety of standard tar formats, old-style tar, 256ustar, pax interchange format, and many common variants. 257For convenience, 258.Fn archive_read_support_format_all 259enables support for all available formats. 260Only empty archives are supported by default. 261.It Fn archive_read_support_format_raw 262The 263.Dq raw 264format handler allows libarchive to be used to read arbitrary data. 265It treats any data stream as an archive with a single entry. 266The pathname of this entry is 267.Dq data ; 268all other entry fields are unset. 269This is not enabled by 270.Fn archive_read_support_format_all 271in order to avoid erroneous handling of damaged archives. 272.It Fn archive_read_set_filter_options , \ 273Fn archive_read_set_format_options , \ 274Fn archive_read_set_options 275Specifies options that will be passed to currently-registered 276filters (including decompression filters) and/or format readers. 277The argument is a comma-separated list of individual options. 278Individual options have one of the following forms: 279.Bl -tag -compact -width indent 280.It Ar option=value 281The option/value pair will be provided to every module. 282Modules that do not accept an option with this name will ignore it. 283.It Ar option 284The option will be provided to every module with a value of 285.Dq 1 . 286.It Ar !option 287The option will be provided to every module with a NULL value. 288.It Ar module:option=value , Ar module:option , Ar module:!option 289As above, but the corresponding option and value will be provided 290only to modules whose name matches 291.Ar module . 292.El 293The return value will be 294.Cm ARCHIVE_OK 295if any module accepts the option, or 296.Cm ARCHIVE_WARN 297if no module accepted the option, or 298.Cm ARCHIVE_FATAL 299if there was a fatal error while attempting to process the option. 300.Pp 301The currently supported options are: 302.Bl -tag -compact -width indent 303.It Format iso9660 304.Bl -tag -compact -width indent 305.It Cm joliet 306Support Joliet extensions. 307Defaults to enabled, use 308.Cm !joliet 309to disable. 310.El 311.El 312.It Fn archive_read_open 313The same as 314.Fn archive_read_open2 , 315except that the skip callback is assumed to be 316.Dv NULL . 317.It Fn archive_read_open2 318Freeze the settings, open the archive, and prepare for reading entries. 319This is the most generic version of this call, which accepts 320four callback functions. 321Most clients will want to use 322.Fn archive_read_open_filename , 323.Fn archive_read_open_FILE , 324.Fn archive_read_open_fd , 325or 326.Fn archive_read_open_memory 327instead. 328The library invokes the client-provided functions to obtain 329raw bytes from the archive. 330.It Fn archive_read_open_FILE 331Like 332.Fn archive_read_open , 333except that it accepts a 334.Ft "FILE *" 335pointer. 336This function should not be used with tape drives or other devices 337that require strict I/O blocking. 338.It Fn archive_read_open_fd 339Like 340.Fn archive_read_open , 341except that it accepts a file descriptor and block size rather than 342a set of function pointers. 343Note that the file descriptor will not be automatically closed at 344end-of-archive. 345This function is safe for use with tape drives or other blocked devices. 346.It Fn archive_read_open_file 347This is a deprecated synonym for 348.Fn archive_read_open_filename . 349.It Fn archive_read_open_filename 350Like 351.Fn archive_read_open , 352except that it accepts a simple filename and a block size. 353A NULL filename represents standard input. 354This function is safe for use with tape drives or other blocked devices. 355.It Fn archive_read_open_memory 356Like 357.Fn archive_read_open , 358except that it accepts a pointer and size of a block of 359memory containing the archive data. 360.It Fn archive_read_next_header 361Read the header for the next entry and return a pointer to 362a 363.Tn struct archive_entry . 364This is a convenience wrapper around 365.Fn archive_read_next_header2 366that reuses an internal 367.Tn struct archive_entry 368object for each request. 369.It Fn archive_read_next_header2 370Read the header for the next entry and populate the provided 371.Tn struct archive_entry . 372.It Fn archive_read_data 373Read data associated with the header just read. 374Internally, this is a convenience function that calls 375.Fn archive_read_data_block 376and fills any gaps with nulls so that callers see a single 377continuous stream of data. 378.It Fn archive_read_data_block 379Return the next available block of data for this entry. 380Unlike 381.Fn archive_read_data , 382the 383.Fn archive_read_data_block 384function avoids copying data and allows you to correctly handle 385sparse files, as supported by some archive formats. 386The library guarantees that offsets will increase and that blocks 387will not overlap. 388Note that the blocks returned from this function can be much larger 389than the block size read from disk, due to compression 390and internal buffer optimizations. 391.It Fn archive_read_data_skip 392A convenience function that repeatedly calls 393.Fn archive_read_data_block 394to skip all of the data for this archive entry. 395.\" #if ARCHIVE_API_VERSION < 3 396.It Fn archive_read_data_into_buffer 397This function is deprecated and will be removed. 398Use 399.Fn archive_read_data 400instead. 401.\" #endif 402.It Fn archive_read_data_into_fd 403A convenience function that repeatedly calls 404.Fn archive_read_data_block 405to copy the entire entry to the provided file descriptor. 406.It Fn archive_read_extract , Fn archive_read_extract_set_skip_file 407A convenience function that wraps the corresponding 408.Xr archive_write_disk 3 409interfaces. 410The first call to 411.Fn archive_read_extract 412creates a restore object using 413.Xr archive_write_disk_new 3 414and 415.Xr archive_write_disk_set_standard_lookup 3 , 416then transparently invokes 417.Xr archive_write_disk_set_options 3 , 418.Xr archive_write_header 3 , 419.Xr archive_write_data 3 , 420and 421.Xr archive_write_finish_entry 3 422to create the entry on disk and copy data into it. 423The 424.Va flags 425argument is passed unmodified to 426.Xr archive_write_disk_set_options 3 . 427.It Fn archive_read_extract2 428This is another version of 429.Fn archive_read_extract 430that allows you to provide your own restore object. 431In particular, this allows you to override the standard lookup functions 432using 433.Xr archive_write_disk_set_group_lookup 3 , 434and 435.Xr archive_write_disk_set_user_lookup 3 . 436Note that 437.Fn archive_read_extract2 438does not accept a 439.Va flags 440argument; you should use 441.Fn archive_write_disk_set_options 442to set the restore options yourself. 443.It Fn archive_read_extract_set_progress_callback 444Sets a pointer to a user-defined callback that can be used 445for updating progress displays during extraction. 446The progress function will be invoked during the extraction of large 447regular files. 448The progress function will be invoked with the pointer provided to this call. 449Generally, the data pointed to should include a reference to the archive 450object and the archive_entry object so that various statistics 451can be retrieved for the progress display. 452.It Fn archive_read_close 453Complete the archive and invoke the close callback. 454.It Fn archive_read_finish 455Invokes 456.Fn archive_read_close 457if it was not invoked manually, then release all resources. 458Note: In libarchive 1.x, this function was declared to return 459.Ft void , 460which made it impossible to detect certain errors when 461.Fn archive_read_close 462was invoked implicitly from this function. 463The declaration is corrected beginning with libarchive 2.0. 464.El 465.Pp 466Note that the library determines most of the relevant information about 467the archive by inspection. 468In particular, it automatically detects 469.Xr gzip 1 470or 471.Xr bzip2 1 472compression and transparently performs the appropriate decompression. 473It also automatically detects the archive format. 474.Pp 475A complete description of the 476.Tn struct archive 477and 478.Tn struct archive_entry 479objects can be found in the overview manual page for 480.Xr libarchive 3 . 481.Sh CLIENT CALLBACKS 482The callback functions must match the following prototypes: 483.Bl -item -offset indent 484.It 485.Ft typedef ssize_t 486.Fo archive_read_callback 487.Fa "struct archive *" 488.Fa "void *client_data" 489.Fa "const void **buffer" 490.Fc 491.It 492.\" #if ARCHIVE_API_VERSION < 2 493.Ft typedef int 494.Fo archive_skip_callback 495.Fa "struct archive *" 496.Fa "void *client_data" 497.Fa "size_t request" 498.Fc 499.\" #else 500.\" .Ft typedef off_t 501.\" .Fo archive_skip_callback 502.\" .Fa "struct archive *" 503.\" .Fa "void *client_data" 504.\" .Fa "off_t request" 505.\" .Fc 506.\" #endif 507.It 508.Ft typedef int 509.Fn archive_open_callback "struct archive *" "void *client_data" 510.It 511.Ft typedef int 512.Fn archive_close_callback "struct archive *" "void *client_data" 513.El 514.Pp 515The open callback is invoked by 516.Fn archive_open . 517It should return 518.Cm ARCHIVE_OK 519if the underlying file or data source is successfully 520opened. 521If the open fails, it should call 522.Fn archive_set_error 523to register an error code and message and return 524.Cm ARCHIVE_FATAL . 525.Pp 526The read callback is invoked whenever the library 527requires raw bytes from the archive. 528The read callback should read data into a buffer, 529set the 530.Li const void **buffer 531argument to point to the available data, and 532return a count of the number of bytes available. 533The library will invoke the read callback again 534only after it has consumed this data. 535The library imposes no constraints on the size 536of the data blocks returned. 537On end-of-file, the read callback should 538return zero. 539On error, the read callback should invoke 540.Fn archive_set_error 541to register an error code and message and 542return -1. 543.Pp 544The skip callback is invoked when the 545library wants to ignore a block of data. 546The return value is the number of bytes actually 547skipped, which may differ from the request. 548If the callback cannot skip data, it should return 549zero. 550If the skip callback is not provided (the 551function pointer is 552.Dv NULL ), 553the library will invoke the read function 554instead and simply discard the result. 555A skip callback can provide significant 556performance gains when reading uncompressed 557archives from slow disk drives or other media 558that can skip quickly. 559.Pp 560The close callback is invoked by archive_close when 561the archive processing is complete. 562The callback should return 563.Cm ARCHIVE_OK 564on success. 565On failure, the callback should invoke 566.Fn archive_set_error 567to register an error code and message and 568return 569.Cm ARCHIVE_FATAL. 570.Sh EXAMPLE 571The following illustrates basic usage of the library. 572In this example, 573the callback functions are simply wrappers around the standard 574.Xr open 2 , 575.Xr read 2 , 576and 577.Xr close 2 578system calls. 579.Bd -literal -offset indent 580void 581list_archive(const char *name) 582{ 583 struct mydata *mydata; 584 struct archive *a; 585 struct archive_entry *entry; 586 587 mydata = malloc(sizeof(struct mydata)); 588 a = archive_read_new(); 589 mydata->name = name; 590 archive_read_support_compression_all(a); 591 archive_read_support_format_all(a); 592 archive_read_open(a, mydata, myopen, myread, myclose); 593 while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { 594 printf("%s\en",archive_entry_pathname(entry)); 595 archive_read_data_skip(a); 596 } 597 archive_read_finish(a); 598 free(mydata); 599} 600 601ssize_t 602myread(struct archive *a, void *client_data, const void **buff) 603{ 604 struct mydata *mydata = client_data; 605 606 *buff = mydata->buff; 607 return (read(mydata->fd, mydata->buff, 10240)); 608} 609 610int 611myopen(struct archive *a, void *client_data) 612{ 613 struct mydata *mydata = client_data; 614 615 mydata->fd = open(mydata->name, O_RDONLY); 616 return (mydata->fd >= 0 ? ARCHIVE_OK : ARCHIVE_FATAL); 617} 618 619int 620myclose(struct archive *a, void *client_data) 621{ 622 struct mydata *mydata = client_data; 623 624 if (mydata->fd > 0) 625 close(mydata->fd); 626 return (ARCHIVE_OK); 627} 628.Ed 629.Sh RETURN VALUES 630Most functions return zero on success, non-zero on error. 631The possible return codes include: 632.Cm ARCHIVE_OK 633(the operation succeeded), 634.Cm ARCHIVE_WARN 635(the operation succeeded but a non-critical error was encountered), 636.Cm ARCHIVE_EOF 637(end-of-archive was encountered), 638.Cm ARCHIVE_RETRY 639(the operation failed but can be retried), 640and 641.Cm ARCHIVE_FATAL 642(there was a fatal error; the archive should be closed immediately). 643Detailed error codes and textual descriptions are available from the 644.Fn archive_errno 645and 646.Fn archive_error_string 647functions. 648.Pp 649.Fn archive_read_new 650returns a pointer to a freshly allocated 651.Tn struct archive 652object. 653It returns 654.Dv NULL 655on error. 656.Pp 657.Fn archive_read_data 658returns a count of bytes actually read or zero at the end of the entry. 659On error, a value of 660.Cm ARCHIVE_FATAL , 661.Cm ARCHIVE_WARN , 662or 663.Cm ARCHIVE_RETRY 664is returned and an error code and textual description can be retrieved from the 665.Fn archive_errno 666and 667.Fn archive_error_string 668functions. 669.Pp 670The library expects the client callbacks to behave similarly. 671If there is an error, you can use 672.Fn archive_set_error 673to set an appropriate error code and description, 674then return one of the non-zero values above. 675(Note that the value eventually returned to the client may 676not be the same; many errors that are not critical at the level 677of basic I/O can prevent the archive from being properly read, 678thus most I/O errors eventually cause 679.Cm ARCHIVE_FATAL 680to be returned.) 681.\" .Sh ERRORS 682.Sh SEE ALSO 683.Xr tar 1 , 684.Xr archive 3 , 685.Xr archive_util 3 , 686.Xr tar 5 687.Sh HISTORY 688The 689.Nm libarchive 690library first appeared in 691.Fx 5.3 . 692.Sh AUTHORS 693.An -nosplit 694The 695.Nm libarchive 696library was written by 697.An Tim Kientzle Aq kientzle@acm.org . 698.Sh BUGS 699Many traditional archiver programs treat 700empty files as valid empty archives. 701For example, many implementations of 702.Xr tar 1 703allow you to append entries to an empty file. 704Of course, it is impossible to determine the format of an empty file 705by inspecting the contents, so this library treats empty files as 706having a special 707.Dq empty 708format. 709