1.\" Copyright (c) 2003-2009 Tim Kientzle
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd April 3, 2017
28.Dt ARCHIVE_READ_DISK 3
29.Os
30.Sh NAME
31.Nm archive_read_disk_new ,
32.Nm archive_read_disk_open ,
33.Nm archive_read_disk_open_w ,
34.Nm archive_read_disk_set_behavior ,
35.Nm archive_read_disk_set_symlink_logical ,
36.Nm archive_read_disk_set_symlink_physical ,
37.Nm archive_read_disk_set_symlink_hybrid ,
38.Nm archive_read_disk_entry_from_file ,
39.Nm archive_read_disk_gname ,
40.Nm archive_read_disk_uname ,
41.Nm archive_read_disk_set_uname_lookup ,
42.Nm archive_read_disk_set_gname_lookup ,
43.Nm archive_read_disk_set_standard_lookup ,
44.Nm archive_read_disk_descend ,
45.Nm archive_read_disk_can_descend ,
46.Nm archive_read_disk_current_filesystem ,
47.Nm archive_read_disk_current_filesystem_is_synthetic ,
48.Nm archive_read_disk_current_filesystem_is_remote ,
49.Nm archive_read_disk_set_matching ,
50.Nm archive_read_disk_set_metadata_filter_callback ,
51.Nd functions for reading objects from disk
52.Sh LIBRARY
53Streaming Archive Library (libarchive, -larchive)
54.Sh SYNOPSIS
55.In archive.h
56.Ft struct archive *
57.Fn archive_read_disk_new "void"
58.Ft int
59.Fn archive_read_disk_open "struct archive *" "const char *"
60.Ft int
61.Fn archive_read_disk_open_w "struct archive *" "const wchar_t *"
62.Ft int
63.Fn archive_read_disk_set_behavior "struct archive *" "int"
64.Ft int
65.Fn archive_read_disk_set_symlink_logical "struct archive *"
66.Ft int
67.Fn archive_read_disk_set_symlink_physical "struct archive *"
68.Ft int
69.Fn archive_read_disk_set_symlink_hybrid "struct archive *"
70.Ft const char *
71.Fn archive_read_disk_gname "struct archive *" "gid_t"
72.Ft const char *
73.Fn archive_read_disk_uname "struct archive *" "uid_t"
74.Ft int
75.Fo archive_read_disk_set_gname_lookup
76.Fa "struct archive *"
77.Fa "void *"
78.Fa "const char *(*lookup)(void *, gid_t)"
79.Fa "void (*cleanup)(void *)"
80.Fc
81.Ft int
82.Fo archive_read_disk_set_uname_lookup
83.Fa "struct archive *"
84.Fa "void *"
85.Fa "const char *(*lookup)(void *, uid_t)"
86.Fa "void (*cleanup)(void *)"
87.Fc
88.Ft int
89.Fn archive_read_disk_set_standard_lookup "struct archive *"
90.Ft int
91.Fo archive_read_disk_entry_from_file
92.Fa "struct archive *"
93.Fa "struct archive_entry *"
94.Fa "int fd"
95.Fa "const struct stat *"
96.Fc
97.Ft int
98.Fn archive_read_disk_descend "struct archive *"
99.Ft int
100.Fn archive_read_disk_can_descend "struct archive *"
101.Ft int
102.Fn archive_read_disk_current_filesystem "struct archive *"
103.Ft int
104.Fn archive_read_disk_current_filesystem_is_synthetic "struct archive *"
105.Ft int
106.Fn archive_read_disk_current_filesystem_is_remote "struct archive *"
107.Ft int
108.Fo archive_read_disk_set_matching
109.Fa "struct archive *"
110.Fa "struct archive *"
111.Fa "void (*excluded_func)(struct archive *, void *, struct archive entry *)"
112.Fa "void *"
113.Fc
114.Ft int
115.Fo archive_read_disk_set_metadata_filter_callback
116.Fa "struct archive *"
117.Fa "int (*metadata_filter_func)(struct archive *, void*, struct archive_entry *)"
118.Fa "void *"
119.Fc
120.Sh DESCRIPTION
121These functions provide an API for reading information about
122objects on disk.
123In particular, they provide an interface for populating
124.Tn struct archive_entry
125objects.
126.Bl -tag -width indent
127.It Fn archive_read_disk_new
128Allocates and initializes a
129.Tn struct archive
130object suitable for reading object information from disk.
131.It Fn archive_read_disk_open
132Opens the file or directory from the given path and prepares the
133.Tn struct archive
134to read it from disk.
135.It Fn archive_read_disk_open_w
136Opens the file or directory from the given path as a wide character string and prepares the
137.Tn struct archive
138to read it from disk.
139.It Fn archive_read_disk_set_behavior
140Configures various behavior options when reading entries from disk.
141The flags field consists of a bitwise OR of one or more of the
142following values:
143.Bl -tag -compact -width "indent"
144.It Cm ARCHIVE_READDISK_HONOR_NODUMP
145Skip files and directories with the nodump file attribute (file flag) set.
146By default, the nodump file attribute is ignored.
147.It Cm ARCHIVE_READDISK_MAC_COPYFILE
148Mac OS X specific.
149Read metadata (ACLs and extended attributes) with
150.Xr copyfile 3 .
151By default, metadata is read using
152.Xr copyfile 3 .
153.It Cm ARCHIVE_READDISK_NO_ACL
154Do not read Access Control Lists.
155By default, ACLs are read from disk.
156.It Cm ARCHIVE_READDISK_NO_FFLAGS
157Do not read file attributes (file flags).
158By default, file attributes are read from disk.
159See
160.Xr chattr 1
161.Pq Linux
162or
163.Xr chflags 1
164.Pq FreeBSD, Mac OS X
165for more information on file attributes.
166.It Cm ARCHIVE_READDISK_NO_TRAVERSE_MOUNTS
167Do not traverse mount points.
168By default, mount points are traversed.
169.It Cm ARCHIVE_READDISK_NO_XATTR
170Do not read extended file attributes (xattrs).
171By default, extended file attributes are read from disk.
172See
173.Xr xattr 7
174.Pq Linux ,
175.Xr xattr 2
176.Pq Mac OS X ,
177or
178.Xr getextattr 8
179.Pq FreeBSD
180for more information on extended file attributes.
181.It Cm ARCHIVE_READDISK_RESTORE_ATIME
182Restore access time of traversed files.
183By default, access time of traversed files is not restored.
184.It Cm ARCHIVE_READDISK_NO_SPARSE
185Do not read sparse file information.
186By default, sparse file information is read from disk.
187.El
188.It Xo
189.Fn archive_read_disk_set_symlink_logical ,
190.Fn archive_read_disk_set_symlink_physical ,
191.Fn archive_read_disk_set_symlink_hybrid
192.Xc
193This sets the mode used for handling symbolic links.
194The
195.Dq logical
196mode follows all symbolic links.
197The
198.Dq physical
199mode does not follow any symbolic links.
200The
201.Dq hybrid
202mode currently behaves identically to the
203.Dq logical
204mode.
205.It Xo
206.Fn archive_read_disk_gname ,
207.Fn archive_read_disk_uname
208.Xc
209Returns a user or group name given a gid or uid value.
210By default, these always return a NULL string.
211.It Xo
212.Fn archive_read_disk_set_gname_lookup ,
213.Fn archive_read_disk_set_uname_lookup
214.Xc
215These allow you to override the functions used for
216user and group name lookups.
217You may also provide a
218.Tn void *
219pointer to a private data structure and a cleanup function for
220that data.
221The cleanup function will be invoked when the
222.Tn struct archive
223object is destroyed or when new lookup functions are registered.
224.It Fn archive_read_disk_set_standard_lookup
225This convenience function installs a standard set of user
226and group name lookup functions.
227These functions use
228.Xr getpwuid 3
229and
230.Xr getgrgid 3
231to convert ids to names, defaulting to NULL if the names cannot
232be looked up.
233These functions also implement a simple memory cache to reduce
234the number of calls to
235.Xr getpwuid 3
236and
237.Xr getgrgid 3 .
238.It Fn archive_read_disk_entry_from_file
239Populates a
240.Tn struct archive_entry
241object with information about a particular file.
242The
243.Tn archive_entry
244object must have already been created with
245.Xr archive_entry_new 3
246and at least one of the source path or path fields must already be set.
247(If both are set, the source path will be used.)
248.Pp
249Information is read from disk using the path name from the
250.Tn struct archive_entry
251object.
252If a file descriptor is provided, some information will be obtained using
253that file descriptor, on platforms that support the appropriate
254system calls.
255.Pp
256If a pointer to a
257.Tn struct stat
258is provided, information from that structure will be used instead
259of reading from the disk where appropriate.
260This can provide performance benefits in scenarios where
261.Tn struct stat
262information has already been read from the disk as a side effect
263of some other operation.
264(For example, directory traversal libraries often provide this information.)
265.Pp
266Where necessary, user and group ids are converted to user and group names
267using the currently-registered lookup functions above.
268This affects the file ownership fields and ACL values in the
269.Tn struct archive_entry
270object.
271.It Fn archive_read_disk_descend
272If the current entry can be descended, this function will mark the directory as the next entry for
273.Xr archive_read_header 3
274to visit.
275.It Fn archive_read_disk_can_descend
276Returns 1 if the current entry is an unvisited directory and 0 otherwise.
277.It Fn archive_read_disk_current_filesystem
278Returns the index of the most recent filesystem entry that has been visited through archive_read_disk
279.It Fn archive_read_disk_current_filesystem_is_synthetic
280Returns 1 if the current filesystem is a virtual filesystem. Returns 0 if the current filesystem is not a virtual filesystem. Returns -1 if it is unknown.
281.It Fn archive_read_disk_current_filesystem_is_remote
282Returns 1 if the current filesystem is a remote filesystem. Returns 0 if the current filesystem is not a remote filesystem. Returns -1 if it is unknown.
283.It Fn archive_read_disk_set_matching
284Allows the caller to set
285.Tn struct archive
286*_ma to compare each entry during
287.Xr archive_read_header 3
288calls. If matched based on calls to
289.Tn archive_match_path_excluded ,
290.Tn archive_match_time_excluded ,
291or
292.Tn archive_match_owner_excluded ,
293then the callback function specified by the _excluded_func parameter will execute. This function will recieve data provided to the fourth parameter, void *_client_data.
294.It Fn archive_read_disk_set_metadata_filter_callback
295Allows the caller to set a callback function during calls to
296.Xr archive_read_header 3
297to filter out metadata for each entry. The callback function recieves the
298.Tn struct archive
299object, void* custom filter data, and the
300.Tn struct archive_entry .
301If the callback function returns an error, ARCHIVE_RETRY will be returned and the entry will not be further processed.
302.El
303More information about the
304.Va struct archive
305object and the overall design of the library can be found in the
306.Xr libarchive 3
307overview.
308.Sh EXAMPLES
309The following illustrates basic usage of the library by
310showing how to use it to copy an item on disk into an archive.
311.Bd -literal -offset indent
312void
313file_to_archive(struct archive *a, const char *name)
314{
315  char buff[8192];
316  size_t bytes_read;
317  struct archive *ard;
318  struct archive_entry *entry;
319  int fd;
320
321  ard = archive_read_disk_new();
322  archive_read_disk_set_standard_lookup(ard);
323  entry = archive_entry_new();
324  fd = open(name, O_RDONLY);
325  if (fd < 0)
326     return;
327  archive_entry_copy_pathname(entry, name);
328  archive_read_disk_entry_from_file(ard, entry, fd, NULL);
329  archive_write_header(a, entry);
330  while ((bytes_read = read(fd, buff, sizeof(buff))) > 0)
331    archive_write_data(a, buff, bytes_read);
332  archive_write_finish_entry(a);
333  archive_read_free(ard);
334  archive_entry_free(entry);
335}
336.Ed
337.Sh RETURN VALUES
338Most functions return
339.Cm ARCHIVE_OK
340(zero) on success, or one of several negative
341error codes for errors.
342Specific error codes include:
343.Cm ARCHIVE_RETRY
344for operations that might succeed if retried,
345.Cm ARCHIVE_WARN
346for unusual conditions that do not prevent further operations, and
347.Cm ARCHIVE_FATAL
348for serious errors that make remaining operations impossible.
349.Pp
350.Fn archive_read_disk_new
351returns a pointer to a newly-allocated
352.Tn struct archive
353object or NULL if the allocation failed for any reason.
354.Pp
355.Fn archive_read_disk_gname
356and
357.Fn archive_read_disk_uname
358return
359.Tn const char *
360pointers to the textual name or NULL if the lookup failed for any reason.
361The returned pointer points to internal storage that
362may be reused on the next call to either of these functions;
363callers should copy the string if they need to continue accessing it.
364.\"
365.Sh ERRORS
366Detailed error codes and textual descriptions are available from the
367.Fn archive_errno
368and
369.Fn archive_error_string
370functions.
371.\"
372.Sh SEE ALSO
373.Xr tar 1 ,
374.Xr archive_read 3 ,
375.Xr archive_util 3 ,
376.Xr archive_write 3 ,
377.Xr archive_write_disk 3 ,
378.Xr libarchive 3
379.Sh HISTORY
380The
381.Nm libarchive
382library first appeared in
383.Fx 5.3 .
384The
385.Nm archive_read_disk
386interface was added to
387.Nm libarchive 2.6
388and first appeared in
389.Fx 8.0 .
390.Sh AUTHORS
391.An -nosplit
392The
393.Nm libarchive
394library was written by
395.An Tim Kientzle Aq kientzle@FreeBSD.org .
396.Sh BUGS
397The
398.Dq standard
399user name and group name lookup functions are not the defaults because
400.Xr getgrgid 3
401and
402.Xr getpwuid 3
403are sometimes too large for particular applications.
404The current design allows the application author to use a more
405compact implementation when appropriate.
406.Pp
407The full list of metadata read from disk by
408.Fn archive_read_disk_entry_from_file
409is necessarily system-dependent.
410.Pp
411The
412.Fn archive_read_disk_entry_from_file
413function reads as much information as it can from disk.
414Some method should be provided to limit this so that clients who
415do not need ACLs, for instance, can avoid the extra work needed
416to look up such information.
417.Pp
418This API should provide a set of methods for walking a directory tree.
419That would make it a direct parallel of the
420.Xr archive_read 3
421API.
422When such methods are implemented, the
423.Dq hybrid
424symbolic link mode will make sense.
425