1 /*-
2  * Copyright (c) 2003-2010 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009-12-28 02:28:44Z kientzle $");
28 
29 #ifdef HAVE_SYS_IOCTL_H
30 #include <sys/ioctl.h>
31 #endif
32 #ifdef HAVE_SYS_STAT_H
33 #include <sys/stat.h>
34 #endif
35 #ifdef HAVE_ERRNO_H
36 #include <errno.h>
37 #endif
38 #ifdef HAVE_FCNTL_H
39 #include <fcntl.h>
40 #endif
41 #ifdef HAVE_IO_H
42 #include <io.h>
43 #endif
44 #ifdef HAVE_STDLIB_H
45 #include <stdlib.h>
46 #endif
47 #ifdef HAVE_STRING_H
48 #include <string.h>
49 #endif
50 #ifdef HAVE_UNISTD_H
51 #include <unistd.h>
52 #endif
53 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
54 #include <sys/disk.h>
55 #elif defined(__NetBSD__) || defined(__OpenBSD__)
56 #include <sys/disklabel.h>
57 #include <sys/dkio.h>
58 #elif defined(__DragonFly__)
59 #include <sys/diskslice.h>
60 #endif
61 
62 #include "archive.h"
63 #include "archive_string.h"
64 
65 #ifndef O_BINARY
66 #define O_BINARY 0
67 #endif
68 
69 struct read_file_data {
70 	int	 fd;
71 	size_t	 block_size;
72 	void	*buffer;
73 	mode_t	 st_mode;  /* Mode bits for opened file. */
74 	char	 use_lseek;
75 	enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type;
76 	union {
77 		char	 m[1];/* MBS filename. */
78 		wchar_t	 w[1];/* WCS filename. */
79 	} filename; /* Must be last! */
80 };
81 
82 static int	file_close(struct archive *, void *);
83 static int	file_open_filename(struct archive *, enum fnt_e, const void *,
84 		    size_t);
85 static ssize_t	file_read(struct archive *, void *, const void **buff);
86 static int64_t	file_seek(struct archive *, void *, int64_t request, int);
87 static int64_t	file_skip(struct archive *, void *, int64_t request);
88 static int64_t	file_skip_lseek(struct archive *, void *, int64_t request);
89 
90 int
91 archive_read_open_file(struct archive *a, const char *filename,
92     size_t block_size)
93 {
94 	return (archive_read_open_filename(a, filename, block_size));
95 }
96 
97 int
98 archive_read_open_filename(struct archive *a, const char *filename,
99     size_t block_size)
100 {
101 	enum fnt_e filename_type;
102 
103 	if (filename == NULL || filename[0] == '\0') {
104 		filename_type = FNT_STDIN;
105 	} else
106 		filename_type = FNT_MBS;
107 	return (file_open_filename(a, filename_type, filename, block_size));
108 }
109 
110 int
111 archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename,
112     size_t block_size)
113 {
114 	enum fnt_e filename_type;
115 
116 	if (wfilename == NULL || wfilename[0] == L'\0') {
117 		filename_type = FNT_STDIN;
118 	} else {
119 #if defined(_WIN32) && !defined(__CYGWIN__)
120 		filename_type = FNT_WCS;
121 #else
122 		/*
123 		 * POSIX system does not support a wchar_t interface for
124 		 * open() system call, so we have to translate a whcar_t
125 		 * filename to multi-byte one and use it.
126 		 */
127 		struct archive_string fn;
128 		int r;
129 
130 		archive_string_init(&fn);
131 		if (archive_string_append_from_wcs(&fn, wfilename,
132 		    wcslen(wfilename)) != 0) {
133 			if (errno == ENOMEM)
134 				archive_set_error(a, errno,
135 				    "Can't allocate memory");
136 			else
137 				archive_set_error(a, EINVAL,
138 				    "Failed to convert a wide-character"
139 				    " filename to a multi-byte filename");
140 			archive_string_free(&fn);
141 			return (ARCHIVE_FATAL);
142 		}
143 		r = file_open_filename(a, FNT_MBS, fn.s, block_size);
144 		archive_string_free(&fn);
145 		return (r);
146 #endif
147 	}
148 	return (file_open_filename(a, filename_type, wfilename, block_size));
149 }
150 
151 static int
152 file_open_filename(struct archive *a, enum fnt_e filename_type,
153     const void *_filename, size_t block_size)
154 {
155 	struct stat st;
156 	struct read_file_data *mine;
157 	void *buffer;
158 	const char *filename = NULL;
159 	const wchar_t *wfilename = NULL;
160 	int fd;
161 	int is_disk_like = 0;
162 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
163 	off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */
164 #elif defined(__NetBSD__) || defined(__OpenBSD__)
165 	struct disklabel dl;
166 #elif defined(__DragonFly__)
167 	struct partinfo pi;
168 #endif
169 
170 	archive_clear_error(a);
171 	if (filename_type == FNT_STDIN) {
172 		/* We used to delegate stdin support by
173 		 * directly calling archive_read_open_fd(a,0,block_size)
174 		 * here, but that doesn't (and shouldn't) handle the
175 		 * end-of-file flush when reading stdout from a pipe.
176 		 * Basically, read_open_fd() is intended for folks who
177 		 * are willing to handle such details themselves.  This
178 		 * API is intended to be a little smarter for folks who
179 		 * want easy handling of the common case.
180 		 */
181 		fd = 0;
182 #if defined(__CYGWIN__) || defined(_WIN32)
183 		setmode(0, O_BINARY);
184 #endif
185 		filename = "";
186 	} else if (filename_type == FNT_MBS) {
187 		filename = (const char *)_filename;
188 		fd = open(filename, O_RDONLY | O_BINARY);
189 		if (fd < 0) {
190 			archive_set_error(a, errno,
191 			    "Failed to open '%s'", filename);
192 			return (ARCHIVE_FATAL);
193 		}
194 	} else {
195 #if defined(_WIN32) && !defined(__CYGWIN__)
196 		wfilename = (const wchar_t *)_filename;
197 		fd = _wopen(wfilename, O_RDONLY | O_BINARY);
198 		if (fd < 0 && errno == ENOENT) {
199 			wchar_t *fullpath;
200 			fullpath = __la_win_permissive_name_w(wfilename);
201 			if (fullpath != NULL) {
202 				fd = _wopen(fullpath, O_RDONLY | O_BINARY);
203 				free(fullpath);
204 			}
205 		}
206 		if (fd < 0) {
207 			archive_set_error(a, errno,
208 			    "Failed to open '%S'", wfilename);
209 			return (ARCHIVE_FATAL);
210 		}
211 #else
212 		archive_set_error(a, ARCHIVE_ERRNO_MISC,
213 		    "Unexpedted operation in archive_read_open_filename");
214 		return (ARCHIVE_FATAL);
215 #endif
216 	}
217 	if (fstat(fd, &st) != 0) {
218 		if (filename_type == FNT_WCS)
219 			archive_set_error(a, errno, "Can't stat '%S'",
220 			    wfilename);
221 		else
222 			archive_set_error(a, errno, "Can't stat '%s'",
223 			    filename);
224 		return (ARCHIVE_FATAL);
225 	}
226 
227 	/*
228 	 * Determine whether the input looks like a disk device or a
229 	 * tape device.  The results are used below to select an I/O
230 	 * strategy:
231 	 *  = "disk-like" devices support arbitrary lseek() and will
232 	 *    support I/O requests of any size.  So we get easy skipping
233 	 *    and can cheat on block sizes to get better performance.
234 	 *  = "tape-like" devices require strict blocking and use
235 	 *    specialized ioctls for seeking.
236 	 *  = "socket-like" devices cannot seek at all but can improve
237 	 *    performance by using nonblocking I/O to read "whatever is
238 	 *    available right now".
239 	 *
240 	 * Right now, we only specially recognize disk-like devices,
241 	 * but it should be straightforward to add probes and strategy
242 	 * here for tape-like and socket-like devices.
243 	 */
244 	if (S_ISREG(st.st_mode)) {
245 		/* Safety:  Tell the extractor not to overwrite the input. */
246 		archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
247 		/* Regular files act like disks. */
248 		is_disk_like = 1;
249 	}
250 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
251 	/* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */
252 	else if (S_ISCHR(st.st_mode) &&
253 	    ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 &&
254 	    mediasize > 0) {
255 		is_disk_like = 1;
256 	}
257 #elif defined(__NetBSD__) || defined(__OpenBSD__)
258 	/* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */
259 	else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) &&
260 	    ioctl(fd, DIOCGDINFO, &dl) == 0 &&
261 	    dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) {
262 		is_disk_like = 1;
263 	}
264 #elif defined(__DragonFly__)
265 	/* DragonFly BSD:  if it supports DIOCGPART ioctl, it's disk-like. */
266 	else if (S_ISCHR(st.st_mode) &&
267 	    ioctl(fd, DIOCGPART, &pi) == 0 &&
268 	    pi.media_size > 0) {
269 		is_disk_like = 1;
270 	}
271 #elif defined(__linux__)
272 	/* Linux:  All block devices are disk-like. */
273 	else if (S_ISBLK(st.st_mode) &&
274 	    lseek(fd, 0, SEEK_CUR) == 0 &&
275 	    lseek(fd, 0, SEEK_SET) == 0 &&
276 	    lseek(fd, 0, SEEK_END) > 0 &&
277 	    lseek(fd, 0, SEEK_SET) == 0) {
278 		is_disk_like = 1;
279 	}
280 #endif
281 	/* TODO: Add an "is_tape_like" variable and appropriate tests. */
282 
283 	if (filename_type == FNT_WCS)
284 		mine = (struct read_file_data *)calloc(1,
285 		    sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t));
286 	else
287 		mine = (struct read_file_data *)calloc(1,
288 		    sizeof(*mine) + strlen(filename));
289 	/* Disk-like devices prefer power-of-two block sizes.  */
290 	/* Use provided block_size as a guide so users have some control. */
291 	if (is_disk_like) {
292 		size_t new_block_size = 64 * 1024;
293 		while (new_block_size < block_size
294 		    && new_block_size < 64 * 1024 * 1024)
295 			new_block_size *= 2;
296 		block_size = new_block_size;
297 	}
298 	buffer = malloc(block_size);
299 	if (mine == NULL || buffer == NULL) {
300 		archive_set_error(a, ENOMEM, "No memory");
301 		free(mine);
302 		free(buffer);
303 		return (ARCHIVE_FATAL);
304 	}
305 	if (filename_type == FNT_WCS)
306 		wcscpy(mine->filename.w, wfilename);
307 	else
308 		strcpy(mine->filename.m, filename);
309 	mine->filename_type = filename_type;
310 	mine->block_size = block_size;
311 	mine->buffer = buffer;
312 	mine->fd = fd;
313 	/* Remember mode so close can decide whether to flush. */
314 	mine->st_mode = st.st_mode;
315 
316 	/* Disk-like inputs can use lseek(). */
317 	if (is_disk_like) {
318 		archive_read_set_seek_callback(a, file_seek);
319 		mine->use_lseek = 1;
320 	}
321 
322 	archive_read_set_read_callback(a, file_read);
323 	archive_read_set_skip_callback(a, file_skip);
324 	archive_read_set_close_callback(a, file_close);
325 	archive_read_set_callback_data(a, mine);
326 	return (archive_read_open1(a));
327 }
328 
329 static ssize_t
330 file_read(struct archive *a, void *client_data, const void **buff)
331 {
332 	struct read_file_data *mine = (struct read_file_data *)client_data;
333 	ssize_t bytes_read;
334 
335 	/* TODO: If a recent lseek() operation has left us
336 	 * mis-aligned, read and return a short block to try to get
337 	 * us back in alignment. */
338 
339 	/* TODO: Someday, try mmap() here; if that succeeds, give
340 	 * the entire file to libarchive as a single block.  That
341 	 * could be a lot faster than block-by-block manual I/O. */
342 
343 	/* TODO: We might be able to improve performance on pipes and
344 	 * sockets by setting non-blocking I/O and just accepting
345 	 * whatever we get here instead of waiting for a full block
346 	 * worth of data. */
347 
348 	*buff = mine->buffer;
349 	for (;;) {
350 		bytes_read = read(mine->fd, mine->buffer, mine->block_size);
351 		if (bytes_read < 0) {
352 			if (errno == EINTR)
353 				continue;
354 			else if (mine->filename_type == FNT_STDIN)
355 				archive_set_error(a, errno,
356 				    "Error reading stdin");
357 			else if (mine->filename_type == FNT_MBS)
358 				archive_set_error(a, errno,
359 				    "Error reading '%s'", mine->filename.m);
360 			else
361 				archive_set_error(a, errno,
362 				    "Error reading '%S'", mine->filename.w);
363 		}
364 		return (bytes_read);
365 	}
366 }
367 
368 /*
369  * Regular files and disk-like block devices can use simple lseek
370  * without needing to round the request to the block size.
371  *
372  * TODO: This can leave future reads mis-aligned.  Since we know the
373  * offset here, we should store it and use it in file_read() above
374  * to determine whether we should perform a short read to get back
375  * into alignment.  Long series of mis-aligned reads can negatively
376  * impact disk throughput.  (Of course, the performance impact should
377  * be carefully tested; extra code complexity is only worthwhile if
378  * it does provide measurable improvement.)
379  *
380  * TODO: Be lazy about the actual seek.  There are a few pathological
381  * cases where libarchive makes a bunch of seek requests in a row
382  * without any intervening reads.  This isn't a huge performance
383  * problem, since the kernel handles seeks lazily already, but
384  * it would be very slightly faster if we simply remembered the
385  * seek request here and then actually performed the seek at the
386  * top of the read callback above.
387  */
388 static int64_t
389 file_skip_lseek(struct archive *a, void *client_data, int64_t request)
390 {
391 	struct read_file_data *mine = (struct read_file_data *)client_data;
392 #if defined(_WIN32) && !defined(__CYGWIN__)
393 	/* We use _lseeki64() on Windows. */
394 	int64_t old_offset, new_offset;
395 #else
396 	off_t old_offset, new_offset;
397 #endif
398 
399 	/* We use off_t here because lseek() is declared that way. */
400 
401 	/* TODO: Deal with case where off_t isn't 64 bits.
402 	 * This shouldn't be a problem on Linux or other POSIX
403 	 * systems, since the configuration logic for libarchive
404 	 * tries to obtain a 64-bit off_t.  It's still an issue
405 	 * on Windows, though, so it might suffice to just use
406 	 * _lseeki64() on Windows.
407 	 */
408 	if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 &&
409 	    (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0)
410 		return (new_offset - old_offset);
411 
412 	/* If lseek() fails, don't bother trying again. */
413 	mine->use_lseek = 0;
414 
415 	/* Let libarchive recover with read+discard */
416 	if (errno == ESPIPE)
417 		return (0);
418 
419 	/* If the input is corrupted or truncated, fail. */
420 	if (mine->filename_type == FNT_STDIN)
421 		archive_set_error(a, errno, "Error seeking in stdin");
422 	else if (mine->filename_type == FNT_MBS)
423 		archive_set_error(a, errno, "Error seeking in '%s'",
424 		    mine->filename.m);
425 	else
426 		archive_set_error(a, errno, "Error seeking in '%S'",
427 		    mine->filename.w);
428 	return (-1);
429 }
430 
431 
432 /*
433  * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to
434  * accelerate operation on tape drives.
435  */
436 
437 static int64_t
438 file_skip(struct archive *a, void *client_data, int64_t request)
439 {
440 	struct read_file_data *mine = (struct read_file_data *)client_data;
441 
442 	/* Delegate skip requests. */
443 	if (mine->use_lseek)
444 		return (file_skip_lseek(a, client_data, request));
445 
446 	/* If we can't skip, return 0; libarchive will read+discard instead. */
447 	return (0);
448 }
449 
450 /*
451  * TODO: Store the offset and use it in the read callback.
452  */
453 static int64_t
454 file_seek(struct archive *a, void *client_data, int64_t request, int whence)
455 {
456 	struct read_file_data *mine = (struct read_file_data *)client_data;
457 	int64_t r;
458 
459 	/* We use off_t here because lseek() is declared that way. */
460 	/* See above for notes about when off_t is less than 64 bits. */
461 	r = lseek(mine->fd, request, whence);
462 	if (r >= 0)
463 		return r;
464 
465 	/* If the input is corrupted or truncated, fail. */
466 	if (mine->filename_type == FNT_STDIN)
467 		archive_set_error(a, errno, "Error seeking in stdin");
468 	else if (mine->filename_type == FNT_MBS)
469 		archive_set_error(a, errno, "Error seeking in '%s'",
470 		    mine->filename.m);
471 	else
472 		archive_set_error(a, errno, "Error seeking in '%S'",
473 		    mine->filename.w);
474 	return (ARCHIVE_FATAL);
475 }
476 
477 static int
478 file_close(struct archive *a, void *client_data)
479 {
480 	struct read_file_data *mine = (struct read_file_data *)client_data;
481 
482 	(void)a; /* UNUSED */
483 
484 	/* Only flush and close if open succeeded. */
485 	if (mine->fd >= 0) {
486 		/*
487 		 * Sometimes, we should flush the input before closing.
488 		 *   Regular files: faster to just close without flush.
489 		 *   Disk-like devices:  Ditto.
490 		 *   Tapes: must not flush (user might need to
491 		 *      read the "next" item on a non-rewind device).
492 		 *   Pipes and sockets:  must flush (otherwise, the
493 		 *      program feeding the pipe or socket may complain).
494 		 * Here, I flush everything except for regular files and
495 		 * device nodes.
496 		 */
497 		if (!S_ISREG(mine->st_mode)
498 		    && !S_ISCHR(mine->st_mode)
499 		    && !S_ISBLK(mine->st_mode)) {
500 			ssize_t bytesRead;
501 			do {
502 				bytesRead = read(mine->fd, mine->buffer,
503 				    mine->block_size);
504 			} while (bytesRead > 0);
505 		}
506 		/* If a named file was opened, then it needs to be closed. */
507 		if (mine->filename_type != FNT_STDIN)
508 			close(mine->fd);
509 	}
510 	free(mine->buffer);
511 	free(mine);
512 	return (ARCHIVE_OK);
513 }
514