1 /*- 2 * Copyright (c) 2003-2010 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009-12-28 02:28:44Z kientzle $"); 28 29 #ifdef HAVE_SYS_IOCTL_H 30 #include <sys/ioctl.h> 31 #endif 32 #ifdef HAVE_SYS_STAT_H 33 #include <sys/stat.h> 34 #endif 35 #ifdef HAVE_ERRNO_H 36 #include <errno.h> 37 #endif 38 #ifdef HAVE_FCNTL_H 39 #include <fcntl.h> 40 #endif 41 #ifdef HAVE_IO_H 42 #include <io.h> 43 #endif 44 #ifdef HAVE_STDLIB_H 45 #include <stdlib.h> 46 #endif 47 #ifdef HAVE_STRING_H 48 #include <string.h> 49 #endif 50 #ifdef HAVE_UNISTD_H 51 #include <unistd.h> 52 #endif 53 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 54 #include <sys/disk.h> 55 #elif defined(__NetBSD__) || defined(__OpenBSD__) 56 #include <sys/disklabel.h> 57 #include <sys/dkio.h> 58 #elif defined(__DragonFly__) 59 #include <sys/diskslice.h> 60 #endif 61 62 #include "archive.h" 63 #include "archive_string.h" 64 65 #ifndef O_BINARY 66 #define O_BINARY 0 67 #endif 68 69 struct read_file_data { 70 int fd; 71 size_t block_size; 72 void *buffer; 73 mode_t st_mode; /* Mode bits for opened file. */ 74 char use_lseek; 75 enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type; 76 union { 77 char m[1];/* MBS filename. */ 78 wchar_t w[1];/* WCS filename. */ 79 } filename; /* Must be last! */ 80 }; 81 82 static int file_close(struct archive *, void *); 83 static int file_open_filename(struct archive *, enum fnt_e, const void *, 84 size_t); 85 static ssize_t file_read(struct archive *, void *, const void **buff); 86 static int64_t file_seek(struct archive *, void *, int64_t request, int); 87 static int64_t file_skip(struct archive *, void *, int64_t request); 88 static int64_t file_skip_lseek(struct archive *, void *, int64_t request); 89 90 int 91 archive_read_open_file(struct archive *a, const char *filename, 92 size_t block_size) 93 { 94 return (archive_read_open_filename(a, filename, block_size)); 95 } 96 97 int 98 archive_read_open_filename(struct archive *a, const char *filename, 99 size_t block_size) 100 { 101 enum fnt_e filename_type; 102 103 if (filename == NULL || filename[0] == '\0') { 104 filename_type = FNT_STDIN; 105 } else 106 filename_type = FNT_MBS; 107 return (file_open_filename(a, filename_type, filename, block_size)); 108 } 109 110 int 111 archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename, 112 size_t block_size) 113 { 114 enum fnt_e filename_type; 115 116 if (wfilename == NULL || wfilename[0] == L'\0') { 117 filename_type = FNT_STDIN; 118 } else { 119 #if defined(_WIN32) && !defined(__CYGWIN__) 120 filename_type = FNT_WCS; 121 #else 122 /* 123 * POSIX system does not support a wchar_t interface for 124 * open() system call, so we have to translate a whcar_t 125 * filename to multi-byte one and use it. 126 */ 127 struct archive_string fn; 128 int r; 129 130 archive_string_init(&fn); 131 if (archive_string_append_from_wcs(&fn, wfilename, 132 wcslen(wfilename)) != 0) { 133 if (errno == ENOMEM) 134 archive_set_error(a, errno, 135 "Can't allocate memory"); 136 else 137 archive_set_error(a, EINVAL, 138 "Failed to convert a wide-character" 139 " filename to a multi-byte filename"); 140 archive_string_free(&fn); 141 return (ARCHIVE_FATAL); 142 } 143 r = file_open_filename(a, FNT_MBS, fn.s, block_size); 144 archive_string_free(&fn); 145 return (r); 146 #endif 147 } 148 return (file_open_filename(a, filename_type, wfilename, block_size)); 149 } 150 151 static int 152 file_open_filename(struct archive *a, enum fnt_e filename_type, 153 const void *_filename, size_t block_size) 154 { 155 struct stat st; 156 struct read_file_data *mine; 157 void *buffer; 158 const char *filename = NULL; 159 const wchar_t *wfilename = NULL; 160 int fd; 161 int is_disk_like = 0; 162 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 163 off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */ 164 #elif defined(__NetBSD__) || defined(__OpenBSD__) 165 struct disklabel dl; 166 #elif defined(__DragonFly__) 167 struct partinfo pi; 168 #endif 169 170 archive_clear_error(a); 171 if (filename_type == FNT_STDIN) { 172 /* We used to delegate stdin support by 173 * directly calling archive_read_open_fd(a,0,block_size) 174 * here, but that doesn't (and shouldn't) handle the 175 * end-of-file flush when reading stdout from a pipe. 176 * Basically, read_open_fd() is intended for folks who 177 * are willing to handle such details themselves. This 178 * API is intended to be a little smarter for folks who 179 * want easy handling of the common case. 180 */ 181 fd = 0; 182 #if defined(__CYGWIN__) || defined(_WIN32) 183 setmode(0, O_BINARY); 184 #endif 185 filename = ""; 186 } else if (filename_type == FNT_MBS) { 187 filename = (const char *)_filename; 188 fd = open(filename, O_RDONLY | O_BINARY); 189 if (fd < 0) { 190 archive_set_error(a, errno, 191 "Failed to open '%s'", filename); 192 return (ARCHIVE_FATAL); 193 } 194 } else { 195 #if defined(_WIN32) && !defined(__CYGWIN__) 196 wfilename = (const wchar_t *)_filename; 197 fd = _wopen(wfilename, O_RDONLY | O_BINARY); 198 if (fd < 0 && errno == ENOENT) { 199 wchar_t *fullpath; 200 fullpath = __la_win_permissive_name_w(wfilename); 201 if (fullpath != NULL) { 202 fd = _wopen(fullpath, O_RDONLY | O_BINARY); 203 free(fullpath); 204 } 205 } 206 if (fd < 0) { 207 archive_set_error(a, errno, 208 "Failed to open '%S'", wfilename); 209 return (ARCHIVE_FATAL); 210 } 211 #else 212 archive_set_error(a, ARCHIVE_ERRNO_MISC, 213 "Unexpedted operation in archive_read_open_filename"); 214 return (ARCHIVE_FATAL); 215 #endif 216 } 217 if (fstat(fd, &st) != 0) { 218 if (filename_type == FNT_WCS) 219 archive_set_error(a, errno, "Can't stat '%S'", 220 wfilename); 221 else 222 archive_set_error(a, errno, "Can't stat '%s'", 223 filename); 224 return (ARCHIVE_FATAL); 225 } 226 227 /* 228 * Determine whether the input looks like a disk device or a 229 * tape device. The results are used below to select an I/O 230 * strategy: 231 * = "disk-like" devices support arbitrary lseek() and will 232 * support I/O requests of any size. So we get easy skipping 233 * and can cheat on block sizes to get better performance. 234 * = "tape-like" devices require strict blocking and use 235 * specialized ioctls for seeking. 236 * = "socket-like" devices cannot seek at all but can improve 237 * performance by using nonblocking I/O to read "whatever is 238 * available right now". 239 * 240 * Right now, we only specially recognize disk-like devices, 241 * but it should be straightforward to add probes and strategy 242 * here for tape-like and socket-like devices. 243 */ 244 if (S_ISREG(st.st_mode)) { 245 /* Safety: Tell the extractor not to overwrite the input. */ 246 archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino); 247 /* Regular files act like disks. */ 248 is_disk_like = 1; 249 } 250 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 251 /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */ 252 else if (S_ISCHR(st.st_mode) && 253 ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 && 254 mediasize > 0) { 255 is_disk_like = 1; 256 } 257 #elif defined(__NetBSD__) || defined(__OpenBSD__) 258 /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */ 259 else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) && 260 ioctl(fd, DIOCGDINFO, &dl) == 0 && 261 dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) { 262 is_disk_like = 1; 263 } 264 #elif defined(__DragonFly__) 265 /* DragonFly BSD: if it supports DIOCGPART ioctl, it's disk-like. */ 266 else if (S_ISCHR(st.st_mode) && 267 ioctl(fd, DIOCGPART, &pi) == 0 && 268 pi.media_size > 0) { 269 is_disk_like = 1; 270 } 271 #elif defined(__linux__) 272 /* Linux: All block devices are disk-like. */ 273 else if (S_ISBLK(st.st_mode) && 274 lseek(fd, 0, SEEK_CUR) == 0 && 275 lseek(fd, 0, SEEK_SET) == 0 && 276 lseek(fd, 0, SEEK_END) > 0 && 277 lseek(fd, 0, SEEK_SET) == 0) { 278 is_disk_like = 1; 279 } 280 #endif 281 /* TODO: Add an "is_tape_like" variable and appropriate tests. */ 282 283 if (filename_type == FNT_WCS) 284 mine = (struct read_file_data *)calloc(1, 285 sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t)); 286 else 287 mine = (struct read_file_data *)calloc(1, 288 sizeof(*mine) + strlen(filename)); 289 /* Disk-like devices prefer power-of-two block sizes. */ 290 /* Use provided block_size as a guide so users have some control. */ 291 if (is_disk_like) { 292 size_t new_block_size = 64 * 1024; 293 while (new_block_size < block_size 294 && new_block_size < 64 * 1024 * 1024) 295 new_block_size *= 2; 296 block_size = new_block_size; 297 } 298 buffer = malloc(block_size); 299 if (mine == NULL || buffer == NULL) { 300 archive_set_error(a, ENOMEM, "No memory"); 301 free(mine); 302 free(buffer); 303 return (ARCHIVE_FATAL); 304 } 305 if (filename_type == FNT_WCS) 306 wcscpy(mine->filename.w, wfilename); 307 else 308 strcpy(mine->filename.m, filename); 309 mine->filename_type = filename_type; 310 mine->block_size = block_size; 311 mine->buffer = buffer; 312 mine->fd = fd; 313 /* Remember mode so close can decide whether to flush. */ 314 mine->st_mode = st.st_mode; 315 316 /* Disk-like inputs can use lseek(). */ 317 if (is_disk_like) { 318 archive_read_set_seek_callback(a, file_seek); 319 mine->use_lseek = 1; 320 } 321 322 archive_read_set_read_callback(a, file_read); 323 archive_read_set_skip_callback(a, file_skip); 324 archive_read_set_close_callback(a, file_close); 325 archive_read_set_callback_data(a, mine); 326 return (archive_read_open1(a)); 327 } 328 329 static ssize_t 330 file_read(struct archive *a, void *client_data, const void **buff) 331 { 332 struct read_file_data *mine = (struct read_file_data *)client_data; 333 ssize_t bytes_read; 334 335 /* TODO: If a recent lseek() operation has left us 336 * mis-aligned, read and return a short block to try to get 337 * us back in alignment. */ 338 339 /* TODO: Someday, try mmap() here; if that succeeds, give 340 * the entire file to libarchive as a single block. That 341 * could be a lot faster than block-by-block manual I/O. */ 342 343 /* TODO: We might be able to improve performance on pipes and 344 * sockets by setting non-blocking I/O and just accepting 345 * whatever we get here instead of waiting for a full block 346 * worth of data. */ 347 348 *buff = mine->buffer; 349 for (;;) { 350 bytes_read = read(mine->fd, mine->buffer, mine->block_size); 351 if (bytes_read < 0) { 352 if (errno == EINTR) 353 continue; 354 else if (mine->filename_type == FNT_STDIN) 355 archive_set_error(a, errno, 356 "Error reading stdin"); 357 else if (mine->filename_type == FNT_MBS) 358 archive_set_error(a, errno, 359 "Error reading '%s'", mine->filename.m); 360 else 361 archive_set_error(a, errno, 362 "Error reading '%S'", mine->filename.w); 363 } 364 return (bytes_read); 365 } 366 } 367 368 /* 369 * Regular files and disk-like block devices can use simple lseek 370 * without needing to round the request to the block size. 371 * 372 * TODO: This can leave future reads mis-aligned. Since we know the 373 * offset here, we should store it and use it in file_read() above 374 * to determine whether we should perform a short read to get back 375 * into alignment. Long series of mis-aligned reads can negatively 376 * impact disk throughput. (Of course, the performance impact should 377 * be carefully tested; extra code complexity is only worthwhile if 378 * it does provide measurable improvement.) 379 * 380 * TODO: Be lazy about the actual seek. There are a few pathological 381 * cases where libarchive makes a bunch of seek requests in a row 382 * without any intervening reads. This isn't a huge performance 383 * problem, since the kernel handles seeks lazily already, but 384 * it would be very slightly faster if we simply remembered the 385 * seek request here and then actually performed the seek at the 386 * top of the read callback above. 387 */ 388 static int64_t 389 file_skip_lseek(struct archive *a, void *client_data, int64_t request) 390 { 391 struct read_file_data *mine = (struct read_file_data *)client_data; 392 #if defined(_WIN32) && !defined(__CYGWIN__) 393 /* We use _lseeki64() on Windows. */ 394 int64_t old_offset, new_offset; 395 #else 396 off_t old_offset, new_offset; 397 #endif 398 399 /* We use off_t here because lseek() is declared that way. */ 400 401 /* TODO: Deal with case where off_t isn't 64 bits. 402 * This shouldn't be a problem on Linux or other POSIX 403 * systems, since the configuration logic for libarchive 404 * tries to obtain a 64-bit off_t. It's still an issue 405 * on Windows, though, so it might suffice to just use 406 * _lseeki64() on Windows. 407 */ 408 if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 && 409 (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0) 410 return (new_offset - old_offset); 411 412 /* If lseek() fails, don't bother trying again. */ 413 mine->use_lseek = 0; 414 415 /* Let libarchive recover with read+discard */ 416 if (errno == ESPIPE) 417 return (0); 418 419 /* If the input is corrupted or truncated, fail. */ 420 if (mine->filename_type == FNT_STDIN) 421 archive_set_error(a, errno, "Error seeking in stdin"); 422 else if (mine->filename_type == FNT_MBS) 423 archive_set_error(a, errno, "Error seeking in '%s'", 424 mine->filename.m); 425 else 426 archive_set_error(a, errno, "Error seeking in '%S'", 427 mine->filename.w); 428 return (-1); 429 } 430 431 432 /* 433 * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to 434 * accelerate operation on tape drives. 435 */ 436 437 static int64_t 438 file_skip(struct archive *a, void *client_data, int64_t request) 439 { 440 struct read_file_data *mine = (struct read_file_data *)client_data; 441 442 /* Delegate skip requests. */ 443 if (mine->use_lseek) 444 return (file_skip_lseek(a, client_data, request)); 445 446 /* If we can't skip, return 0; libarchive will read+discard instead. */ 447 return (0); 448 } 449 450 /* 451 * TODO: Store the offset and use it in the read callback. 452 */ 453 static int64_t 454 file_seek(struct archive *a, void *client_data, int64_t request, int whence) 455 { 456 struct read_file_data *mine = (struct read_file_data *)client_data; 457 int64_t r; 458 459 /* We use off_t here because lseek() is declared that way. */ 460 /* See above for notes about when off_t is less than 64 bits. */ 461 r = lseek(mine->fd, request, whence); 462 if (r >= 0) 463 return r; 464 465 /* If the input is corrupted or truncated, fail. */ 466 if (mine->filename_type == FNT_STDIN) 467 archive_set_error(a, errno, "Error seeking in stdin"); 468 else if (mine->filename_type == FNT_MBS) 469 archive_set_error(a, errno, "Error seeking in '%s'", 470 mine->filename.m); 471 else 472 archive_set_error(a, errno, "Error seeking in '%S'", 473 mine->filename.w); 474 return (ARCHIVE_FATAL); 475 } 476 477 static int 478 file_close(struct archive *a, void *client_data) 479 { 480 struct read_file_data *mine = (struct read_file_data *)client_data; 481 482 (void)a; /* UNUSED */ 483 484 /* Only flush and close if open succeeded. */ 485 if (mine->fd >= 0) { 486 /* 487 * Sometimes, we should flush the input before closing. 488 * Regular files: faster to just close without flush. 489 * Disk-like devices: Ditto. 490 * Tapes: must not flush (user might need to 491 * read the "next" item on a non-rewind device). 492 * Pipes and sockets: must flush (otherwise, the 493 * program feeding the pipe or socket may complain). 494 * Here, I flush everything except for regular files and 495 * device nodes. 496 */ 497 if (!S_ISREG(mine->st_mode) 498 && !S_ISCHR(mine->st_mode) 499 && !S_ISBLK(mine->st_mode)) { 500 ssize_t bytesRead; 501 do { 502 bytesRead = read(mine->fd, mine->buffer, 503 mine->block_size); 504 } while (bytesRead > 0); 505 } 506 /* If a named file was opened, then it needs to be closed. */ 507 if (mine->filename_type != FNT_STDIN) 508 close(mine->fd); 509 } 510 free(mine->buffer); 511 free(mine); 512 return (ARCHIVE_OK); 513 } 514