1 /*- 2 * Copyright (c) 2003-2010 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 28 #ifdef HAVE_SYS_IOCTL_H 29 #include <sys/ioctl.h> 30 #endif 31 #ifdef HAVE_SYS_STAT_H 32 #include <sys/stat.h> 33 #endif 34 #ifdef HAVE_ERRNO_H 35 #include <errno.h> 36 #endif 37 #ifdef HAVE_FCNTL_H 38 #include <fcntl.h> 39 #endif 40 #ifdef HAVE_IO_H 41 #include <io.h> 42 #endif 43 #ifdef HAVE_STDLIB_H 44 #include <stdlib.h> 45 #endif 46 #ifdef HAVE_STRING_H 47 #include <string.h> 48 #endif 49 #ifdef HAVE_UNISTD_H 50 #include <unistd.h> 51 #endif 52 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 53 #include <sys/disk.h> 54 #elif defined(__NetBSD__) || defined(__OpenBSD__) 55 #include <sys/disklabel.h> 56 #include <sys/dkio.h> 57 #elif defined(__DragonFly__) 58 #include <sys/diskslice.h> 59 #endif 60 61 #include "archive.h" 62 #include "archive_private.h" 63 #include "archive_string.h" 64 65 #ifndef O_BINARY 66 #define O_BINARY 0 67 #endif 68 #ifndef O_CLOEXEC 69 #define O_CLOEXEC 0 70 #endif 71 72 struct read_file_data { 73 int fd; 74 size_t block_size; 75 void *buffer; 76 mode_t st_mode; /* Mode bits for opened file. */ 77 char use_lseek; 78 enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type; 79 union { 80 char m[1];/* MBS filename. */ 81 wchar_t w[1];/* WCS filename. */ 82 } filename; /* Must be last! */ 83 }; 84 85 static int file_open(struct archive *, void *); 86 static int file_close(struct archive *, void *); 87 static int file_close2(struct archive *, void *); 88 static int file_switch(struct archive *, void *, void *); 89 static ssize_t file_read(struct archive *, void *, const void **buff); 90 static int64_t file_seek(struct archive *, void *, int64_t request, int); 91 static int64_t file_skip(struct archive *, void *, int64_t request); 92 static int64_t file_skip_lseek(struct archive *, void *, int64_t request); 93 94 int 95 archive_read_open_file(struct archive *a, const char *filename, 96 size_t block_size) 97 { 98 return (archive_read_open_filename(a, filename, block_size)); 99 } 100 101 int 102 archive_read_open_filename(struct archive *a, const char *filename, 103 size_t block_size) 104 { 105 const char *filenames[2]; 106 filenames[0] = filename; 107 filenames[1] = NULL; 108 return archive_read_open_filenames(a, filenames, block_size); 109 } 110 111 int 112 archive_read_open_filenames(struct archive *a, const char **filenames, 113 size_t block_size) 114 { 115 struct read_file_data *mine; 116 const char *filename = NULL; 117 if (filenames) 118 filename = *(filenames++); 119 120 archive_clear_error(a); 121 do 122 { 123 if (filename == NULL) 124 filename = ""; 125 mine = (struct read_file_data *)calloc(1, 126 sizeof(*mine) + strlen(filename)); 127 if (mine == NULL) 128 goto no_memory; 129 strcpy(mine->filename.m, filename); 130 mine->block_size = block_size; 131 mine->fd = -1; 132 mine->buffer = NULL; 133 mine->st_mode = mine->use_lseek = 0; 134 if (filename == NULL || filename[0] == '\0') { 135 mine->filename_type = FNT_STDIN; 136 } else 137 mine->filename_type = FNT_MBS; 138 if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK)) 139 return (ARCHIVE_FATAL); 140 if (filenames == NULL) 141 break; 142 filename = *(filenames++); 143 } while (filename != NULL && filename[0] != '\0'); 144 archive_read_set_open_callback(a, file_open); 145 archive_read_set_read_callback(a, file_read); 146 archive_read_set_skip_callback(a, file_skip); 147 archive_read_set_close_callback(a, file_close); 148 archive_read_set_switch_callback(a, file_switch); 149 archive_read_set_seek_callback(a, file_seek); 150 151 return (archive_read_open1(a)); 152 no_memory: 153 archive_set_error(a, ENOMEM, "No memory"); 154 return (ARCHIVE_FATAL); 155 } 156 157 /* 158 * This function is an implementation detail of archive_read_open_filename_w, 159 * which is exposed as a separate API on Windows. 160 */ 161 #if !defined(_WIN32) || defined(__CYGWIN__) 162 static 163 #endif 164 int 165 archive_read_open_filenames_w(struct archive *a, const wchar_t **wfilenames, 166 size_t block_size) 167 { 168 struct read_file_data *mine; 169 const wchar_t *wfilename = NULL; 170 if (wfilenames) 171 wfilename = *(wfilenames++); 172 173 archive_clear_error(a); 174 do 175 { 176 if (wfilename == NULL) 177 wfilename = L""; 178 mine = (struct read_file_data *)calloc(1, 179 sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t)); 180 if (mine == NULL) 181 goto no_memory; 182 mine->block_size = block_size; 183 mine->fd = -1; 184 185 if (wfilename == NULL || wfilename[0] == L'\0') { 186 mine->filename_type = FNT_STDIN; 187 } else { 188 #if defined(_WIN32) && !defined(__CYGWIN__) 189 mine->filename_type = FNT_WCS; 190 wcscpy(mine->filename.w, wfilename); 191 #else 192 /* 193 * POSIX system does not support a wchar_t interface for 194 * open() system call, so we have to translate a wchar_t 195 * filename to multi-byte one and use it. 196 */ 197 struct archive_string fn; 198 199 archive_string_init(&fn); 200 if (archive_string_append_from_wcs(&fn, wfilename, 201 wcslen(wfilename)) != 0) { 202 if (errno == ENOMEM) 203 archive_set_error(a, errno, 204 "Can't allocate memory"); 205 else 206 archive_set_error(a, EINVAL, 207 "Failed to convert a wide-character" 208 " filename to a multi-byte filename"); 209 archive_string_free(&fn); 210 free(mine); 211 return (ARCHIVE_FATAL); 212 } 213 mine->filename_type = FNT_MBS; 214 strcpy(mine->filename.m, fn.s); 215 archive_string_free(&fn); 216 #endif 217 } 218 if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK)) 219 return (ARCHIVE_FATAL); 220 if (wfilenames == NULL) 221 break; 222 wfilename = *(wfilenames++); 223 } while (wfilename != NULL && wfilename[0] != '\0'); 224 archive_read_set_open_callback(a, file_open); 225 archive_read_set_read_callback(a, file_read); 226 archive_read_set_skip_callback(a, file_skip); 227 archive_read_set_close_callback(a, file_close); 228 archive_read_set_switch_callback(a, file_switch); 229 archive_read_set_seek_callback(a, file_seek); 230 231 return (archive_read_open1(a)); 232 no_memory: 233 archive_set_error(a, ENOMEM, "No memory"); 234 return (ARCHIVE_FATAL); 235 } 236 237 int 238 archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename, 239 size_t block_size) 240 { 241 const wchar_t *wfilenames[2]; 242 wfilenames[0] = wfilename; 243 wfilenames[1] = NULL; 244 return archive_read_open_filenames_w(a, wfilenames, block_size); 245 } 246 247 static int 248 file_open(struct archive *a, void *client_data) 249 { 250 struct stat st; 251 struct read_file_data *mine = (struct read_file_data *)client_data; 252 void *buffer; 253 const char *filename = NULL; 254 #if defined(_WIN32) && !defined(__CYGWIN__) 255 const wchar_t *wfilename = NULL; 256 #endif 257 int fd = -1; 258 int is_disk_like = 0; 259 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 260 off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */ 261 #elif defined(__NetBSD__) || defined(__OpenBSD__) 262 struct disklabel dl; 263 #elif defined(__DragonFly__) 264 struct partinfo pi; 265 #endif 266 267 archive_clear_error(a); 268 if (mine->filename_type == FNT_STDIN) { 269 /* We used to delegate stdin support by 270 * directly calling archive_read_open_fd(a,0,block_size) 271 * here, but that doesn't (and shouldn't) handle the 272 * end-of-file flush when reading stdout from a pipe. 273 * Basically, read_open_fd() is intended for folks who 274 * are willing to handle such details themselves. This 275 * API is intended to be a little smarter for folks who 276 * want easy handling of the common case. 277 */ 278 fd = 0; 279 #if defined(__CYGWIN__) || defined(_WIN32) 280 setmode(0, O_BINARY); 281 #endif 282 filename = ""; 283 } else if (mine->filename_type == FNT_MBS) { 284 filename = mine->filename.m; 285 fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC); 286 __archive_ensure_cloexec_flag(fd); 287 if (fd < 0) { 288 archive_set_error(a, errno, 289 "Failed to open '%s'", filename); 290 return (ARCHIVE_FATAL); 291 } 292 } else { 293 #if defined(_WIN32) && !defined(__CYGWIN__) 294 wfilename = mine->filename.w; 295 fd = _wopen(wfilename, O_RDONLY | O_BINARY); 296 if (fd < 0 && errno == ENOENT) { 297 wchar_t *fullpath; 298 fullpath = __la_win_permissive_name_w(wfilename); 299 if (fullpath != NULL) { 300 fd = _wopen(fullpath, O_RDONLY | O_BINARY); 301 free(fullpath); 302 } 303 } 304 if (fd < 0) { 305 archive_set_error(a, errno, 306 "Failed to open '%S'", wfilename); 307 return (ARCHIVE_FATAL); 308 } 309 #else 310 archive_set_error(a, ARCHIVE_ERRNO_MISC, 311 "Unexpedted operation in archive_read_open_filename"); 312 goto fail; 313 #endif 314 } 315 if (fstat(fd, &st) != 0) { 316 #if defined(_WIN32) && !defined(__CYGWIN__) 317 if (mine->filename_type == FNT_WCS) 318 archive_set_error(a, errno, "Can't stat '%S'", 319 wfilename); 320 else 321 #endif 322 archive_set_error(a, errno, "Can't stat '%s'", 323 filename); 324 goto fail; 325 } 326 327 /* 328 * Determine whether the input looks like a disk device or a 329 * tape device. The results are used below to select an I/O 330 * strategy: 331 * = "disk-like" devices support arbitrary lseek() and will 332 * support I/O requests of any size. So we get easy skipping 333 * and can cheat on block sizes to get better performance. 334 * = "tape-like" devices require strict blocking and use 335 * specialized ioctls for seeking. 336 * = "socket-like" devices cannot seek at all but can improve 337 * performance by using nonblocking I/O to read "whatever is 338 * available right now". 339 * 340 * Right now, we only specially recognize disk-like devices, 341 * but it should be straightforward to add probes and strategy 342 * here for tape-like and socket-like devices. 343 */ 344 if (S_ISREG(st.st_mode)) { 345 /* Safety: Tell the extractor not to overwrite the input. */ 346 archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino); 347 /* Regular files act like disks. */ 348 is_disk_like = 1; 349 } 350 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 351 /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */ 352 else if (S_ISCHR(st.st_mode) && 353 ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 && 354 mediasize > 0) { 355 is_disk_like = 1; 356 } 357 #elif defined(__NetBSD__) || defined(__OpenBSD__) 358 /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */ 359 else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) && 360 ioctl(fd, DIOCGDINFO, &dl) == 0 && 361 dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) { 362 is_disk_like = 1; 363 } 364 #elif defined(__DragonFly__) 365 /* DragonFly BSD: if it supports DIOCGPART ioctl, it's disk-like. */ 366 else if (S_ISCHR(st.st_mode) && 367 ioctl(fd, DIOCGPART, &pi) == 0 && 368 pi.media_size > 0) { 369 is_disk_like = 1; 370 } 371 #elif defined(__linux__) 372 /* Linux: All block devices are disk-like. */ 373 else if (S_ISBLK(st.st_mode) && 374 lseek(fd, 0, SEEK_CUR) == 0 && 375 lseek(fd, 0, SEEK_SET) == 0 && 376 lseek(fd, 0, SEEK_END) > 0 && 377 lseek(fd, 0, SEEK_SET) == 0) { 378 is_disk_like = 1; 379 } 380 #endif 381 /* TODO: Add an "is_tape_like" variable and appropriate tests. */ 382 383 /* Disk-like devices prefer power-of-two block sizes. */ 384 /* Use provided block_size as a guide so users have some control. */ 385 if (is_disk_like) { 386 size_t new_block_size = 64 * 1024; 387 while (new_block_size < mine->block_size 388 && new_block_size < 64 * 1024 * 1024) 389 new_block_size *= 2; 390 mine->block_size = new_block_size; 391 } 392 buffer = malloc(mine->block_size); 393 if (buffer == NULL) { 394 archive_set_error(a, ENOMEM, "No memory"); 395 goto fail; 396 } 397 mine->buffer = buffer; 398 mine->fd = fd; 399 /* Remember mode so close can decide whether to flush. */ 400 mine->st_mode = st.st_mode; 401 402 /* Disk-like inputs can use lseek(). */ 403 if (is_disk_like) 404 mine->use_lseek = 1; 405 406 return (ARCHIVE_OK); 407 fail: 408 /* 409 * Don't close file descriptors not opened or ones pointing referring 410 * to `FNT_STDIN`. 411 */ 412 if (fd != -1 && fd != 0) 413 close(fd); 414 return (ARCHIVE_FATAL); 415 } 416 417 static ssize_t 418 file_read(struct archive *a, void *client_data, const void **buff) 419 { 420 struct read_file_data *mine = (struct read_file_data *)client_data; 421 ssize_t bytes_read; 422 423 /* TODO: If a recent lseek() operation has left us 424 * mis-aligned, read and return a short block to try to get 425 * us back in alignment. */ 426 427 /* TODO: Someday, try mmap() here; if that succeeds, give 428 * the entire file to libarchive as a single block. That 429 * could be a lot faster than block-by-block manual I/O. */ 430 431 /* TODO: We might be able to improve performance on pipes and 432 * sockets by setting non-blocking I/O and just accepting 433 * whatever we get here instead of waiting for a full block 434 * worth of data. */ 435 436 *buff = mine->buffer; 437 for (;;) { 438 bytes_read = read(mine->fd, mine->buffer, mine->block_size); 439 if (bytes_read < 0) { 440 if (errno == EINTR) 441 continue; 442 else if (mine->filename_type == FNT_STDIN) 443 archive_set_error(a, errno, 444 "Error reading stdin"); 445 else if (mine->filename_type == FNT_MBS) 446 archive_set_error(a, errno, 447 "Error reading '%s'", mine->filename.m); 448 else 449 archive_set_error(a, errno, 450 "Error reading '%S'", mine->filename.w); 451 } 452 return (bytes_read); 453 } 454 } 455 456 /* 457 * Regular files and disk-like block devices can use simple lseek 458 * without needing to round the request to the block size. 459 * 460 * TODO: This can leave future reads mis-aligned. Since we know the 461 * offset here, we should store it and use it in file_read() above 462 * to determine whether we should perform a short read to get back 463 * into alignment. Long series of mis-aligned reads can negatively 464 * impact disk throughput. (Of course, the performance impact should 465 * be carefully tested; extra code complexity is only worthwhile if 466 * it does provide measurable improvement.) 467 * 468 * TODO: Be lazy about the actual seek. There are a few pathological 469 * cases where libarchive makes a bunch of seek requests in a row 470 * without any intervening reads. This isn't a huge performance 471 * problem, since the kernel handles seeks lazily already, but 472 * it would be very slightly faster if we simply remembered the 473 * seek request here and then actually performed the seek at the 474 * top of the read callback above. 475 */ 476 static int64_t 477 file_skip_lseek(struct archive *a, void *client_data, int64_t request) 478 { 479 struct read_file_data *mine = (struct read_file_data *)client_data; 480 #if defined(_WIN32) && !defined(__CYGWIN__) 481 /* We use _lseeki64() on Windows. */ 482 int64_t old_offset, new_offset; 483 #else 484 off_t old_offset, new_offset; 485 #endif 486 487 /* We use off_t here because lseek() is declared that way. */ 488 489 /* TODO: Deal with case where off_t isn't 64 bits. 490 * This shouldn't be a problem on Linux or other POSIX 491 * systems, since the configuration logic for libarchive 492 * tries to obtain a 64-bit off_t. 493 */ 494 if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 && 495 (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0) 496 return (new_offset - old_offset); 497 498 /* If lseek() fails, don't bother trying again. */ 499 mine->use_lseek = 0; 500 501 /* Let libarchive recover with read+discard */ 502 if (errno == ESPIPE) 503 return (0); 504 505 /* If the input is corrupted or truncated, fail. */ 506 if (mine->filename_type == FNT_STDIN) 507 archive_set_error(a, errno, "Error seeking in stdin"); 508 else if (mine->filename_type == FNT_MBS) 509 archive_set_error(a, errno, "Error seeking in '%s'", 510 mine->filename.m); 511 else 512 archive_set_error(a, errno, "Error seeking in '%S'", 513 mine->filename.w); 514 return (-1); 515 } 516 517 518 /* 519 * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to 520 * accelerate operation on tape drives. 521 */ 522 523 static int64_t 524 file_skip(struct archive *a, void *client_data, int64_t request) 525 { 526 struct read_file_data *mine = (struct read_file_data *)client_data; 527 528 /* Delegate skip requests. */ 529 if (mine->use_lseek) 530 return (file_skip_lseek(a, client_data, request)); 531 532 /* If we can't skip, return 0; libarchive will read+discard instead. */ 533 return (0); 534 } 535 536 /* 537 * TODO: Store the offset and use it in the read callback. 538 */ 539 static int64_t 540 file_seek(struct archive *a, void *client_data, int64_t request, int whence) 541 { 542 struct read_file_data *mine = (struct read_file_data *)client_data; 543 int64_t r; 544 545 /* We use off_t here because lseek() is declared that way. */ 546 /* See above for notes about when off_t is less than 64 bits. */ 547 r = lseek(mine->fd, request, whence); 548 if (r >= 0) 549 return r; 550 551 /* If the input is corrupted or truncated, fail. */ 552 if (mine->filename_type == FNT_STDIN) 553 archive_set_error(a, errno, "Error seeking in stdin"); 554 else if (mine->filename_type == FNT_MBS) 555 archive_set_error(a, errno, "Error seeking in '%s'", 556 mine->filename.m); 557 else 558 archive_set_error(a, errno, "Error seeking in '%S'", 559 mine->filename.w); 560 return (ARCHIVE_FATAL); 561 } 562 563 static int 564 file_close2(struct archive *a, void *client_data) 565 { 566 struct read_file_data *mine = (struct read_file_data *)client_data; 567 568 (void)a; /* UNUSED */ 569 570 /* Only flush and close if open succeeded. */ 571 if (mine->fd >= 0) { 572 /* 573 * Sometimes, we should flush the input before closing. 574 * Regular files: faster to just close without flush. 575 * Disk-like devices: Ditto. 576 * Tapes: must not flush (user might need to 577 * read the "next" item on a non-rewind device). 578 * Pipes and sockets: must flush (otherwise, the 579 * program feeding the pipe or socket may complain). 580 * Here, I flush everything except for regular files and 581 * device nodes. 582 */ 583 if (!S_ISREG(mine->st_mode) 584 && !S_ISCHR(mine->st_mode) 585 && !S_ISBLK(mine->st_mode)) { 586 ssize_t bytesRead; 587 do { 588 bytesRead = read(mine->fd, mine->buffer, 589 mine->block_size); 590 } while (bytesRead > 0); 591 } 592 /* If a named file was opened, then it needs to be closed. */ 593 if (mine->filename_type != FNT_STDIN) 594 close(mine->fd); 595 } 596 free(mine->buffer); 597 mine->buffer = NULL; 598 mine->fd = -1; 599 return (ARCHIVE_OK); 600 } 601 602 static int 603 file_close(struct archive *a, void *client_data) 604 { 605 struct read_file_data *mine = (struct read_file_data *)client_data; 606 file_close2(a, client_data); 607 free(mine); 608 return (ARCHIVE_OK); 609 } 610 611 static int 612 file_switch(struct archive *a, void *client_data1, void *client_data2) 613 { 614 file_close2(a, client_data1); 615 return file_open(a, client_data2); 616 } 617