1 /*-
2  * Copyright (c) 2003-2010 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009-12-28 02:28:44Z kientzle $");
28 
29 #ifdef HAVE_SYS_IOCTL_H
30 #include <sys/ioctl.h>
31 #endif
32 #ifdef HAVE_SYS_STAT_H
33 #include <sys/stat.h>
34 #endif
35 #ifdef HAVE_ERRNO_H
36 #include <errno.h>
37 #endif
38 #ifdef HAVE_FCNTL_H
39 #include <fcntl.h>
40 #endif
41 #ifdef HAVE_IO_H
42 #include <io.h>
43 #endif
44 #ifdef HAVE_STDLIB_H
45 #include <stdlib.h>
46 #endif
47 #ifdef HAVE_STRING_H
48 #include <string.h>
49 #endif
50 #ifdef HAVE_UNISTD_H
51 #include <unistd.h>
52 #endif
53 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
54 #include <sys/disk.h>
55 #elif defined(__NetBSD__) || defined(__OpenBSD__)
56 #include <sys/disklabel.h>
57 #include <sys/dkio.h>
58 #elif defined(__DragonFly__)
59 #include <sys/diskslice.h>
60 #endif
61 
62 #include "archive.h"
63 #include "archive_private.h"
64 #include "archive_string.h"
65 
66 #ifndef O_BINARY
67 #define O_BINARY 0
68 #endif
69 #ifndef O_CLOEXEC
70 #define O_CLOEXEC	0
71 #endif
72 
73 struct read_file_data {
74 	int	 fd;
75 	size_t	 block_size;
76 	void	*buffer;
77 	mode_t	 st_mode;  /* Mode bits for opened file. */
78 	char	 use_lseek;
79 	enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type;
80 	union {
81 		char	 m[1];/* MBS filename. */
82 		wchar_t	 w[1];/* WCS filename. */
83 	} filename; /* Must be last! */
84 };
85 
86 static int	file_open(struct archive *, void *);
87 static int	file_close(struct archive *, void *);
88 static int file_close2(struct archive *, void *);
89 static int file_switch(struct archive *, void *, void *);
90 static ssize_t	file_read(struct archive *, void *, const void **buff);
91 static int64_t	file_seek(struct archive *, void *, int64_t request, int);
92 static int64_t	file_skip(struct archive *, void *, int64_t request);
93 static int64_t	file_skip_lseek(struct archive *, void *, int64_t request);
94 
95 int
96 archive_read_open_file(struct archive *a, const char *filename,
97     size_t block_size)
98 {
99 	return (archive_read_open_filename(a, filename, block_size));
100 }
101 
102 int
103 archive_read_open_filename(struct archive *a, const char *filename,
104     size_t block_size)
105 {
106 	const char *filenames[2];
107 	filenames[0] = filename;
108 	filenames[1] = NULL;
109 	return archive_read_open_filenames(a, filenames, block_size);
110 }
111 
112 int
113 archive_read_open_filenames(struct archive *a, const char **filenames,
114     size_t block_size)
115 {
116 	struct read_file_data *mine;
117 	const char *filename = NULL;
118 	if (filenames)
119 		filename = *(filenames++);
120 
121 	archive_clear_error(a);
122 	do
123 	{
124 		if (filename == NULL)
125 			filename = "";
126 		mine = (struct read_file_data *)calloc(1,
127 			sizeof(*mine) + strlen(filename));
128 		if (mine == NULL)
129 			goto no_memory;
130 		strcpy(mine->filename.m, filename);
131 		mine->block_size = block_size;
132 		mine->fd = -1;
133 		mine->buffer = NULL;
134 		mine->st_mode = mine->use_lseek = 0;
135 		if (filename == NULL || filename[0] == '\0') {
136 			mine->filename_type = FNT_STDIN;
137 		} else
138 			mine->filename_type = FNT_MBS;
139 		if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
140 			return (ARCHIVE_FATAL);
141 		if (filenames == NULL)
142 			break;
143 		filename = *(filenames++);
144 	} while (filename != NULL && filename[0] != '\0');
145 	archive_read_set_open_callback(a, file_open);
146 	archive_read_set_read_callback(a, file_read);
147 	archive_read_set_skip_callback(a, file_skip);
148 	archive_read_set_close_callback(a, file_close);
149 	archive_read_set_switch_callback(a, file_switch);
150 	archive_read_set_seek_callback(a, file_seek);
151 
152 	return (archive_read_open1(a));
153 no_memory:
154 	archive_set_error(a, ENOMEM, "No memory");
155 	return (ARCHIVE_FATAL);
156 }
157 
158 int
159 archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename,
160     size_t block_size)
161 {
162 	struct read_file_data *mine = (struct read_file_data *)calloc(1,
163 		sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t));
164 	if (!mine)
165 	{
166 		archive_set_error(a, ENOMEM, "No memory");
167 		return (ARCHIVE_FATAL);
168 	}
169 	mine->fd = -1;
170 	mine->block_size = block_size;
171 
172 	if (wfilename == NULL || wfilename[0] == L'\0') {
173 		mine->filename_type = FNT_STDIN;
174 	} else {
175 #if defined(_WIN32) && !defined(__CYGWIN__)
176 		mine->filename_type = FNT_WCS;
177 		wcscpy(mine->filename.w, wfilename);
178 #else
179 		/*
180 		 * POSIX system does not support a wchar_t interface for
181 		 * open() system call, so we have to translate a wchar_t
182 		 * filename to multi-byte one and use it.
183 		 */
184 		struct archive_string fn;
185 
186 		archive_string_init(&fn);
187 		if (archive_string_append_from_wcs(&fn, wfilename,
188 		    wcslen(wfilename)) != 0) {
189 			if (errno == ENOMEM)
190 				archive_set_error(a, errno,
191 				    "Can't allocate memory");
192 			else
193 				archive_set_error(a, EINVAL,
194 				    "Failed to convert a wide-character"
195 				    " filename to a multi-byte filename");
196 			archive_string_free(&fn);
197 			free(mine);
198 			return (ARCHIVE_FATAL);
199 		}
200 		mine->filename_type = FNT_MBS;
201 		strcpy(mine->filename.m, fn.s);
202 		archive_string_free(&fn);
203 #endif
204 	}
205 	if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
206 		return (ARCHIVE_FATAL);
207 	archive_read_set_open_callback(a, file_open);
208 	archive_read_set_read_callback(a, file_read);
209 	archive_read_set_skip_callback(a, file_skip);
210 	archive_read_set_close_callback(a, file_close);
211 	archive_read_set_switch_callback(a, file_switch);
212 	archive_read_set_seek_callback(a, file_seek);
213 
214 	return (archive_read_open1(a));
215 }
216 
217 static int
218 file_open(struct archive *a, void *client_data)
219 {
220 	struct stat st;
221 	struct read_file_data *mine = (struct read_file_data *)client_data;
222 	void *buffer;
223 	const char *filename = NULL;
224 	const wchar_t *wfilename = NULL;
225 	int fd;
226 	int is_disk_like = 0;
227 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
228 	off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */
229 #elif defined(__NetBSD__) || defined(__OpenBSD__)
230 	struct disklabel dl;
231 #elif defined(__DragonFly__)
232 	struct partinfo pi;
233 #endif
234 
235 	archive_clear_error(a);
236 	if (mine->filename_type == FNT_STDIN) {
237 		/* We used to delegate stdin support by
238 		 * directly calling archive_read_open_fd(a,0,block_size)
239 		 * here, but that doesn't (and shouldn't) handle the
240 		 * end-of-file flush when reading stdout from a pipe.
241 		 * Basically, read_open_fd() is intended for folks who
242 		 * are willing to handle such details themselves.  This
243 		 * API is intended to be a little smarter for folks who
244 		 * want easy handling of the common case.
245 		 */
246 		fd = 0;
247 #if defined(__CYGWIN__) || defined(_WIN32)
248 		setmode(0, O_BINARY);
249 #endif
250 		filename = "";
251 	} else if (mine->filename_type == FNT_MBS) {
252 		filename = mine->filename.m;
253 		fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC);
254 		__archive_ensure_cloexec_flag(fd);
255 		if (fd < 0) {
256 			archive_set_error(a, errno,
257 			    "Failed to open '%s'", filename);
258 			return (ARCHIVE_FATAL);
259 		}
260 	} else {
261 #if defined(_WIN32) && !defined(__CYGWIN__)
262 		wfilename = mine->filename.w;
263 		fd = _wopen(wfilename, O_RDONLY | O_BINARY);
264 		if (fd < 0 && errno == ENOENT) {
265 			wchar_t *fullpath;
266 			fullpath = __la_win_permissive_name_w(wfilename);
267 			if (fullpath != NULL) {
268 				fd = _wopen(fullpath, O_RDONLY | O_BINARY);
269 				free(fullpath);
270 			}
271 		}
272 		if (fd < 0) {
273 			archive_set_error(a, errno,
274 			    "Failed to open '%S'", wfilename);
275 			return (ARCHIVE_FATAL);
276 		}
277 #else
278 		archive_set_error(a, ARCHIVE_ERRNO_MISC,
279 		    "Unexpedted operation in archive_read_open_filename");
280 		return (ARCHIVE_FATAL);
281 #endif
282 	}
283 	if (fstat(fd, &st) != 0) {
284 		if (mine->filename_type == FNT_WCS)
285 			archive_set_error(a, errno, "Can't stat '%S'",
286 			    wfilename);
287 		else
288 			archive_set_error(a, errno, "Can't stat '%s'",
289 			    filename);
290 		return (ARCHIVE_FATAL);
291 	}
292 
293 	/*
294 	 * Determine whether the input looks like a disk device or a
295 	 * tape device.  The results are used below to select an I/O
296 	 * strategy:
297 	 *  = "disk-like" devices support arbitrary lseek() and will
298 	 *    support I/O requests of any size.  So we get easy skipping
299 	 *    and can cheat on block sizes to get better performance.
300 	 *  = "tape-like" devices require strict blocking and use
301 	 *    specialized ioctls for seeking.
302 	 *  = "socket-like" devices cannot seek at all but can improve
303 	 *    performance by using nonblocking I/O to read "whatever is
304 	 *    available right now".
305 	 *
306 	 * Right now, we only specially recognize disk-like devices,
307 	 * but it should be straightforward to add probes and strategy
308 	 * here for tape-like and socket-like devices.
309 	 */
310 	if (S_ISREG(st.st_mode)) {
311 		/* Safety:  Tell the extractor not to overwrite the input. */
312 		archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
313 		/* Regular files act like disks. */
314 		is_disk_like = 1;
315 	}
316 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
317 	/* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */
318 	else if (S_ISCHR(st.st_mode) &&
319 	    ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 &&
320 	    mediasize > 0) {
321 		is_disk_like = 1;
322 	}
323 #elif defined(__NetBSD__) || defined(__OpenBSD__)
324 	/* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */
325 	else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) &&
326 	    ioctl(fd, DIOCGDINFO, &dl) == 0 &&
327 	    dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) {
328 		is_disk_like = 1;
329 	}
330 #elif defined(__DragonFly__)
331 	/* DragonFly BSD:  if it supports DIOCGPART ioctl, it's disk-like. */
332 	else if (S_ISCHR(st.st_mode) &&
333 	    ioctl(fd, DIOCGPART, &pi) == 0 &&
334 	    pi.media_size > 0) {
335 		is_disk_like = 1;
336 	}
337 #elif defined(__linux__)
338 	/* Linux:  All block devices are disk-like. */
339 	else if (S_ISBLK(st.st_mode) &&
340 	    lseek(fd, 0, SEEK_CUR) == 0 &&
341 	    lseek(fd, 0, SEEK_SET) == 0 &&
342 	    lseek(fd, 0, SEEK_END) > 0 &&
343 	    lseek(fd, 0, SEEK_SET) == 0) {
344 		is_disk_like = 1;
345 	}
346 #endif
347 	/* TODO: Add an "is_tape_like" variable and appropriate tests. */
348 
349 	/* Disk-like devices prefer power-of-two block sizes.  */
350 	/* Use provided block_size as a guide so users have some control. */
351 	if (is_disk_like) {
352 		size_t new_block_size = 64 * 1024;
353 		while (new_block_size < mine->block_size
354 		    && new_block_size < 64 * 1024 * 1024)
355 			new_block_size *= 2;
356 		mine->block_size = new_block_size;
357 	}
358 	buffer = malloc(mine->block_size);
359 	if (mine == NULL || buffer == NULL) {
360 		archive_set_error(a, ENOMEM, "No memory");
361 		free(mine);
362 		free(buffer);
363 		return (ARCHIVE_FATAL);
364 	}
365 	mine->buffer = buffer;
366 	mine->fd = fd;
367 	/* Remember mode so close can decide whether to flush. */
368 	mine->st_mode = st.st_mode;
369 
370 	/* Disk-like inputs can use lseek(). */
371 	if (is_disk_like)
372 		mine->use_lseek = 1;
373 
374 	return (ARCHIVE_OK);
375 }
376 
377 static ssize_t
378 file_read(struct archive *a, void *client_data, const void **buff)
379 {
380 	struct read_file_data *mine = (struct read_file_data *)client_data;
381 	ssize_t bytes_read;
382 
383 	/* TODO: If a recent lseek() operation has left us
384 	 * mis-aligned, read and return a short block to try to get
385 	 * us back in alignment. */
386 
387 	/* TODO: Someday, try mmap() here; if that succeeds, give
388 	 * the entire file to libarchive as a single block.  That
389 	 * could be a lot faster than block-by-block manual I/O. */
390 
391 	/* TODO: We might be able to improve performance on pipes and
392 	 * sockets by setting non-blocking I/O and just accepting
393 	 * whatever we get here instead of waiting for a full block
394 	 * worth of data. */
395 
396 	*buff = mine->buffer;
397 	for (;;) {
398 		bytes_read = read(mine->fd, mine->buffer, mine->block_size);
399 		if (bytes_read < 0) {
400 			if (errno == EINTR)
401 				continue;
402 			else if (mine->filename_type == FNT_STDIN)
403 				archive_set_error(a, errno,
404 				    "Error reading stdin");
405 			else if (mine->filename_type == FNT_MBS)
406 				archive_set_error(a, errno,
407 				    "Error reading '%s'", mine->filename.m);
408 			else
409 				archive_set_error(a, errno,
410 				    "Error reading '%S'", mine->filename.w);
411 		}
412 		return (bytes_read);
413 	}
414 }
415 
416 /*
417  * Regular files and disk-like block devices can use simple lseek
418  * without needing to round the request to the block size.
419  *
420  * TODO: This can leave future reads mis-aligned.  Since we know the
421  * offset here, we should store it and use it in file_read() above
422  * to determine whether we should perform a short read to get back
423  * into alignment.  Long series of mis-aligned reads can negatively
424  * impact disk throughput.  (Of course, the performance impact should
425  * be carefully tested; extra code complexity is only worthwhile if
426  * it does provide measurable improvement.)
427  *
428  * TODO: Be lazy about the actual seek.  There are a few pathological
429  * cases where libarchive makes a bunch of seek requests in a row
430  * without any intervening reads.  This isn't a huge performance
431  * problem, since the kernel handles seeks lazily already, but
432  * it would be very slightly faster if we simply remembered the
433  * seek request here and then actually performed the seek at the
434  * top of the read callback above.
435  */
436 static int64_t
437 file_skip_lseek(struct archive *a, void *client_data, int64_t request)
438 {
439 	struct read_file_data *mine = (struct read_file_data *)client_data;
440 #if defined(_WIN32) && !defined(__CYGWIN__)
441 	/* We use _lseeki64() on Windows. */
442 	int64_t old_offset, new_offset;
443 #else
444 	off_t old_offset, new_offset;
445 #endif
446 
447 	/* We use off_t here because lseek() is declared that way. */
448 
449 	/* TODO: Deal with case where off_t isn't 64 bits.
450 	 * This shouldn't be a problem on Linux or other POSIX
451 	 * systems, since the configuration logic for libarchive
452 	 * tries to obtain a 64-bit off_t.
453 	 */
454 	if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 &&
455 	    (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0)
456 		return (new_offset - old_offset);
457 
458 	/* If lseek() fails, don't bother trying again. */
459 	mine->use_lseek = 0;
460 
461 	/* Let libarchive recover with read+discard */
462 	if (errno == ESPIPE)
463 		return (0);
464 
465 	/* If the input is corrupted or truncated, fail. */
466 	if (mine->filename_type == FNT_STDIN)
467 		archive_set_error(a, errno, "Error seeking in stdin");
468 	else if (mine->filename_type == FNT_MBS)
469 		archive_set_error(a, errno, "Error seeking in '%s'",
470 		    mine->filename.m);
471 	else
472 		archive_set_error(a, errno, "Error seeking in '%S'",
473 		    mine->filename.w);
474 	return (-1);
475 }
476 
477 
478 /*
479  * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to
480  * accelerate operation on tape drives.
481  */
482 
483 static int64_t
484 file_skip(struct archive *a, void *client_data, int64_t request)
485 {
486 	struct read_file_data *mine = (struct read_file_data *)client_data;
487 
488 	/* Delegate skip requests. */
489 	if (mine->use_lseek)
490 		return (file_skip_lseek(a, client_data, request));
491 
492 	/* If we can't skip, return 0; libarchive will read+discard instead. */
493 	return (0);
494 }
495 
496 /*
497  * TODO: Store the offset and use it in the read callback.
498  */
499 static int64_t
500 file_seek(struct archive *a, void *client_data, int64_t request, int whence)
501 {
502 	struct read_file_data *mine = (struct read_file_data *)client_data;
503 	int64_t r;
504 
505 	/* We use off_t here because lseek() is declared that way. */
506 	/* See above for notes about when off_t is less than 64 bits. */
507 	r = lseek(mine->fd, request, whence);
508 	if (r >= 0)
509 		return r;
510 
511 	/* If the input is corrupted or truncated, fail. */
512 	if (mine->filename_type == FNT_STDIN)
513 		archive_set_error(a, errno, "Error seeking in stdin");
514 	else if (mine->filename_type == FNT_MBS)
515 		archive_set_error(a, errno, "Error seeking in '%s'",
516 		    mine->filename.m);
517 	else
518 		archive_set_error(a, errno, "Error seeking in '%S'",
519 		    mine->filename.w);
520 	return (ARCHIVE_FATAL);
521 }
522 
523 static int
524 file_close2(struct archive *a, void *client_data)
525 {
526 	struct read_file_data *mine = (struct read_file_data *)client_data;
527 
528 	(void)a; /* UNUSED */
529 
530 	/* Only flush and close if open succeeded. */
531 	if (mine->fd >= 0) {
532 		/*
533 		 * Sometimes, we should flush the input before closing.
534 		 *   Regular files: faster to just close without flush.
535 		 *   Disk-like devices:  Ditto.
536 		 *   Tapes: must not flush (user might need to
537 		 *      read the "next" item on a non-rewind device).
538 		 *   Pipes and sockets:  must flush (otherwise, the
539 		 *      program feeding the pipe or socket may complain).
540 		 * Here, I flush everything except for regular files and
541 		 * device nodes.
542 		 */
543 		if (!S_ISREG(mine->st_mode)
544 		    && !S_ISCHR(mine->st_mode)
545 		    && !S_ISBLK(mine->st_mode)) {
546 			ssize_t bytesRead;
547 			do {
548 				bytesRead = read(mine->fd, mine->buffer,
549 				    mine->block_size);
550 			} while (bytesRead > 0);
551 		}
552 		/* If a named file was opened, then it needs to be closed. */
553 		if (mine->filename_type != FNT_STDIN)
554 			close(mine->fd);
555 	}
556 	free(mine->buffer);
557 	mine->buffer = NULL;
558 	mine->fd = -1;
559 	return (ARCHIVE_OK);
560 }
561 
562 static int
563 file_close(struct archive *a, void *client_data)
564 {
565 	struct read_file_data *mine = (struct read_file_data *)client_data;
566 	file_close2(a, client_data);
567 	free(mine);
568 	return (ARCHIVE_OK);
569 }
570 
571 static int
572 file_switch(struct archive *a, void *client_data1, void *client_data2)
573 {
574 	file_close2(a, client_data1);
575 	return file_open(a, client_data2);
576 }
577