1 /*- 2 * Copyright (c) 2003-2010 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009-12-28 02:28:44Z kientzle $"); 28 29 #ifdef HAVE_SYS_IOCTL_H 30 #include <sys/ioctl.h> 31 #endif 32 #ifdef HAVE_SYS_STAT_H 33 #include <sys/stat.h> 34 #endif 35 #ifdef HAVE_ERRNO_H 36 #include <errno.h> 37 #endif 38 #ifdef HAVE_FCNTL_H 39 #include <fcntl.h> 40 #endif 41 #ifdef HAVE_IO_H 42 #include <io.h> 43 #endif 44 #ifdef HAVE_STDLIB_H 45 #include <stdlib.h> 46 #endif 47 #ifdef HAVE_STRING_H 48 #include <string.h> 49 #endif 50 #ifdef HAVE_UNISTD_H 51 #include <unistd.h> 52 #endif 53 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 54 #include <sys/disk.h> 55 #elif defined(__NetBSD__) || defined(__OpenBSD__) 56 #include <sys/disklabel.h> 57 #include <sys/dkio.h> 58 #elif defined(__DragonFly__) 59 #include <sys/diskslice.h> 60 #endif 61 62 #include "archive.h" 63 #include "archive_private.h" 64 #include "archive_string.h" 65 66 #ifndef O_BINARY 67 #define O_BINARY 0 68 #endif 69 #ifndef O_CLOEXEC 70 #define O_CLOEXEC 0 71 #endif 72 73 struct read_file_data { 74 int fd; 75 size_t block_size; 76 void *buffer; 77 mode_t st_mode; /* Mode bits for opened file. */ 78 char use_lseek; 79 enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type; 80 union { 81 char m[1];/* MBS filename. */ 82 wchar_t w[1];/* WCS filename. */ 83 } filename; /* Must be last! */ 84 }; 85 86 static int file_open(struct archive *, void *); 87 static int file_close(struct archive *, void *); 88 static int file_close2(struct archive *, void *); 89 static int file_switch(struct archive *, void *, void *); 90 static ssize_t file_read(struct archive *, void *, const void **buff); 91 static int64_t file_seek(struct archive *, void *, int64_t request, int); 92 static int64_t file_skip(struct archive *, void *, int64_t request); 93 static int64_t file_skip_lseek(struct archive *, void *, int64_t request); 94 95 int 96 archive_read_open_file(struct archive *a, const char *filename, 97 size_t block_size) 98 { 99 return (archive_read_open_filename(a, filename, block_size)); 100 } 101 102 int 103 archive_read_open_filename(struct archive *a, const char *filename, 104 size_t block_size) 105 { 106 const char *filenames[2] = { filename, NULL }; 107 return archive_read_open_filenames(a, filenames, block_size); 108 } 109 110 int 111 archive_read_open_filenames(struct archive *a, const char **filenames, 112 size_t block_size) 113 { 114 struct read_file_data *mine; 115 const char *filename = NULL; 116 if (filenames) 117 filename = *(filenames++); 118 119 archive_clear_error(a); 120 do 121 { 122 if (filename == NULL) 123 filename = ""; 124 mine = (struct read_file_data *)calloc(1, 125 sizeof(*mine) + strlen(filename)); 126 if (mine == NULL) 127 goto no_memory; 128 strcpy(mine->filename.m, filename); 129 mine->block_size = block_size; 130 mine->fd = -1; 131 mine->buffer = NULL; 132 mine->st_mode = mine->use_lseek = 0; 133 if (filename == NULL || filename[0] == '\0') { 134 mine->filename_type = FNT_STDIN; 135 } else 136 mine->filename_type = FNT_MBS; 137 if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK)) 138 return (ARCHIVE_FATAL); 139 if (filenames == NULL) 140 break; 141 filename = *(filenames++); 142 } while (filename != NULL && filename[0] != '\0'); 143 archive_read_set_open_callback(a, file_open); 144 archive_read_set_read_callback(a, file_read); 145 archive_read_set_skip_callback(a, file_skip); 146 archive_read_set_close_callback(a, file_close); 147 archive_read_set_switch_callback(a, file_switch); 148 archive_read_set_seek_callback(a, file_seek); 149 150 return (archive_read_open1(a)); 151 no_memory: 152 archive_set_error(a, ENOMEM, "No memory"); 153 return (ARCHIVE_FATAL); 154 } 155 156 int 157 archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename, 158 size_t block_size) 159 { 160 struct read_file_data *mine = (struct read_file_data *)calloc(1, 161 sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t)); 162 if (!mine) 163 { 164 archive_set_error(a, ENOMEM, "No memory"); 165 return (ARCHIVE_FATAL); 166 } 167 mine->fd = -1; 168 mine->block_size = block_size; 169 170 if (wfilename == NULL || wfilename[0] == L'\0') { 171 mine->filename_type = FNT_STDIN; 172 } else { 173 #if defined(_WIN32) && !defined(__CYGWIN__) 174 mine->filename_type = FNT_WCS; 175 wcscpy(mine->filename.w, wfilename); 176 #else 177 /* 178 * POSIX system does not support a wchar_t interface for 179 * open() system call, so we have to translate a whcar_t 180 * filename to multi-byte one and use it. 181 */ 182 struct archive_string fn; 183 184 archive_string_init(&fn); 185 if (archive_string_append_from_wcs(&fn, wfilename, 186 wcslen(wfilename)) != 0) { 187 if (errno == ENOMEM) 188 archive_set_error(a, errno, 189 "Can't allocate memory"); 190 else 191 archive_set_error(a, EINVAL, 192 "Failed to convert a wide-character" 193 " filename to a multi-byte filename"); 194 archive_string_free(&fn); 195 free(mine); 196 return (ARCHIVE_FATAL); 197 } 198 mine->filename_type = FNT_MBS; 199 strcpy(mine->filename.m, fn.s); 200 archive_string_free(&fn); 201 #endif 202 } 203 if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK)) 204 return (ARCHIVE_FATAL); 205 archive_read_set_open_callback(a, file_open); 206 archive_read_set_read_callback(a, file_read); 207 archive_read_set_skip_callback(a, file_skip); 208 archive_read_set_close_callback(a, file_close); 209 archive_read_set_switch_callback(a, file_switch); 210 archive_read_set_seek_callback(a, file_seek); 211 212 return (archive_read_open1(a)); 213 } 214 215 static int 216 file_open(struct archive *a, void *client_data) 217 { 218 struct stat st; 219 struct read_file_data *mine = (struct read_file_data *)client_data; 220 void *buffer; 221 const char *filename = NULL; 222 const wchar_t *wfilename = NULL; 223 int fd; 224 int is_disk_like = 0; 225 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 226 off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */ 227 #elif defined(__NetBSD__) || defined(__OpenBSD__) 228 struct disklabel dl; 229 #elif defined(__DragonFly__) 230 struct partinfo pi; 231 #endif 232 233 archive_clear_error(a); 234 if (mine->filename_type == FNT_STDIN) { 235 /* We used to delegate stdin support by 236 * directly calling archive_read_open_fd(a,0,block_size) 237 * here, but that doesn't (and shouldn't) handle the 238 * end-of-file flush when reading stdout from a pipe. 239 * Basically, read_open_fd() is intended for folks who 240 * are willing to handle such details themselves. This 241 * API is intended to be a little smarter for folks who 242 * want easy handling of the common case. 243 */ 244 fd = 0; 245 #if defined(__CYGWIN__) || defined(_WIN32) 246 setmode(0, O_BINARY); 247 #endif 248 filename = ""; 249 } else if (mine->filename_type == FNT_MBS) { 250 filename = mine->filename.m; 251 fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC); 252 __archive_ensure_cloexec_flag(fd); 253 if (fd < 0) { 254 archive_set_error(a, errno, 255 "Failed to open '%s'", filename); 256 return (ARCHIVE_FATAL); 257 } 258 } else { 259 #if defined(_WIN32) && !defined(__CYGWIN__) 260 wfilename = mine->filename.w; 261 fd = _wopen(wfilename, O_RDONLY | O_BINARY); 262 if (fd < 0 && errno == ENOENT) { 263 wchar_t *fullpath; 264 fullpath = __la_win_permissive_name_w(wfilename); 265 if (fullpath != NULL) { 266 fd = _wopen(fullpath, O_RDONLY | O_BINARY); 267 free(fullpath); 268 } 269 } 270 if (fd < 0) { 271 archive_set_error(a, errno, 272 "Failed to open '%S'", wfilename); 273 return (ARCHIVE_FATAL); 274 } 275 #else 276 archive_set_error(a, ARCHIVE_ERRNO_MISC, 277 "Unexpedted operation in archive_read_open_filename"); 278 return (ARCHIVE_FATAL); 279 #endif 280 } 281 if (fstat(fd, &st) != 0) { 282 if (mine->filename_type == FNT_WCS) 283 archive_set_error(a, errno, "Can't stat '%S'", 284 wfilename); 285 else 286 archive_set_error(a, errno, "Can't stat '%s'", 287 filename); 288 return (ARCHIVE_FATAL); 289 } 290 291 /* 292 * Determine whether the input looks like a disk device or a 293 * tape device. The results are used below to select an I/O 294 * strategy: 295 * = "disk-like" devices support arbitrary lseek() and will 296 * support I/O requests of any size. So we get easy skipping 297 * and can cheat on block sizes to get better performance. 298 * = "tape-like" devices require strict blocking and use 299 * specialized ioctls for seeking. 300 * = "socket-like" devices cannot seek at all but can improve 301 * performance by using nonblocking I/O to read "whatever is 302 * available right now". 303 * 304 * Right now, we only specially recognize disk-like devices, 305 * but it should be straightforward to add probes and strategy 306 * here for tape-like and socket-like devices. 307 */ 308 if (S_ISREG(st.st_mode)) { 309 /* Safety: Tell the extractor not to overwrite the input. */ 310 archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino); 311 /* Regular files act like disks. */ 312 is_disk_like = 1; 313 } 314 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 315 /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */ 316 else if (S_ISCHR(st.st_mode) && 317 ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 && 318 mediasize > 0) { 319 is_disk_like = 1; 320 } 321 #elif defined(__NetBSD__) || defined(__OpenBSD__) 322 /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */ 323 else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) && 324 ioctl(fd, DIOCGDINFO, &dl) == 0 && 325 dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) { 326 is_disk_like = 1; 327 } 328 #elif defined(__DragonFly__) 329 /* DragonFly BSD: if it supports DIOCGPART ioctl, it's disk-like. */ 330 else if (S_ISCHR(st.st_mode) && 331 ioctl(fd, DIOCGPART, &pi) == 0 && 332 pi.media_size > 0) { 333 is_disk_like = 1; 334 } 335 #elif defined(__linux__) 336 /* Linux: All block devices are disk-like. */ 337 else if (S_ISBLK(st.st_mode) && 338 lseek(fd, 0, SEEK_CUR) == 0 && 339 lseek(fd, 0, SEEK_SET) == 0 && 340 lseek(fd, 0, SEEK_END) > 0 && 341 lseek(fd, 0, SEEK_SET) == 0) { 342 is_disk_like = 1; 343 } 344 #endif 345 /* TODO: Add an "is_tape_like" variable and appropriate tests. */ 346 347 /* Disk-like devices prefer power-of-two block sizes. */ 348 /* Use provided block_size as a guide so users have some control. */ 349 if (is_disk_like) { 350 size_t new_block_size = 64 * 1024; 351 while (new_block_size < mine->block_size 352 && new_block_size < 64 * 1024 * 1024) 353 new_block_size *= 2; 354 mine->block_size = new_block_size; 355 } 356 buffer = malloc(mine->block_size); 357 if (mine == NULL || buffer == NULL) { 358 archive_set_error(a, ENOMEM, "No memory"); 359 free(mine); 360 free(buffer); 361 return (ARCHIVE_FATAL); 362 } 363 mine->buffer = buffer; 364 mine->fd = fd; 365 /* Remember mode so close can decide whether to flush. */ 366 mine->st_mode = st.st_mode; 367 368 /* Disk-like inputs can use lseek(). */ 369 if (is_disk_like) 370 mine->use_lseek = 1; 371 372 return (ARCHIVE_OK); 373 } 374 375 static ssize_t 376 file_read(struct archive *a, void *client_data, const void **buff) 377 { 378 struct read_file_data *mine = (struct read_file_data *)client_data; 379 ssize_t bytes_read; 380 381 /* TODO: If a recent lseek() operation has left us 382 * mis-aligned, read and return a short block to try to get 383 * us back in alignment. */ 384 385 /* TODO: Someday, try mmap() here; if that succeeds, give 386 * the entire file to libarchive as a single block. That 387 * could be a lot faster than block-by-block manual I/O. */ 388 389 /* TODO: We might be able to improve performance on pipes and 390 * sockets by setting non-blocking I/O and just accepting 391 * whatever we get here instead of waiting for a full block 392 * worth of data. */ 393 394 *buff = mine->buffer; 395 for (;;) { 396 bytes_read = read(mine->fd, mine->buffer, mine->block_size); 397 if (bytes_read < 0) { 398 if (errno == EINTR) 399 continue; 400 else if (mine->filename_type == FNT_STDIN) 401 archive_set_error(a, errno, 402 "Error reading stdin"); 403 else if (mine->filename_type == FNT_MBS) 404 archive_set_error(a, errno, 405 "Error reading '%s'", mine->filename.m); 406 else 407 archive_set_error(a, errno, 408 "Error reading '%S'", mine->filename.w); 409 } 410 return (bytes_read); 411 } 412 } 413 414 /* 415 * Regular files and disk-like block devices can use simple lseek 416 * without needing to round the request to the block size. 417 * 418 * TODO: This can leave future reads mis-aligned. Since we know the 419 * offset here, we should store it and use it in file_read() above 420 * to determine whether we should perform a short read to get back 421 * into alignment. Long series of mis-aligned reads can negatively 422 * impact disk throughput. (Of course, the performance impact should 423 * be carefully tested; extra code complexity is only worthwhile if 424 * it does provide measurable improvement.) 425 * 426 * TODO: Be lazy about the actual seek. There are a few pathological 427 * cases where libarchive makes a bunch of seek requests in a row 428 * without any intervening reads. This isn't a huge performance 429 * problem, since the kernel handles seeks lazily already, but 430 * it would be very slightly faster if we simply remembered the 431 * seek request here and then actually performed the seek at the 432 * top of the read callback above. 433 */ 434 static int64_t 435 file_skip_lseek(struct archive *a, void *client_data, int64_t request) 436 { 437 struct read_file_data *mine = (struct read_file_data *)client_data; 438 #if defined(_WIN32) && !defined(__CYGWIN__) 439 /* We use _lseeki64() on Windows. */ 440 int64_t old_offset, new_offset; 441 #else 442 off_t old_offset, new_offset; 443 #endif 444 445 /* We use off_t here because lseek() is declared that way. */ 446 447 /* TODO: Deal with case where off_t isn't 64 bits. 448 * This shouldn't be a problem on Linux or other POSIX 449 * systems, since the configuration logic for libarchive 450 * tries to obtain a 64-bit off_t. 451 */ 452 if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 && 453 (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0) 454 return (new_offset - old_offset); 455 456 /* If lseek() fails, don't bother trying again. */ 457 mine->use_lseek = 0; 458 459 /* Let libarchive recover with read+discard */ 460 if (errno == ESPIPE) 461 return (0); 462 463 /* If the input is corrupted or truncated, fail. */ 464 if (mine->filename_type == FNT_STDIN) 465 archive_set_error(a, errno, "Error seeking in stdin"); 466 else if (mine->filename_type == FNT_MBS) 467 archive_set_error(a, errno, "Error seeking in '%s'", 468 mine->filename.m); 469 else 470 archive_set_error(a, errno, "Error seeking in '%S'", 471 mine->filename.w); 472 return (-1); 473 } 474 475 476 /* 477 * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to 478 * accelerate operation on tape drives. 479 */ 480 481 static int64_t 482 file_skip(struct archive *a, void *client_data, int64_t request) 483 { 484 struct read_file_data *mine = (struct read_file_data *)client_data; 485 486 /* Delegate skip requests. */ 487 if (mine->use_lseek) 488 return (file_skip_lseek(a, client_data, request)); 489 490 /* If we can't skip, return 0; libarchive will read+discard instead. */ 491 return (0); 492 } 493 494 /* 495 * TODO: Store the offset and use it in the read callback. 496 */ 497 static int64_t 498 file_seek(struct archive *a, void *client_data, int64_t request, int whence) 499 { 500 struct read_file_data *mine = (struct read_file_data *)client_data; 501 int64_t r; 502 503 /* We use off_t here because lseek() is declared that way. */ 504 /* See above for notes about when off_t is less than 64 bits. */ 505 r = lseek(mine->fd, request, whence); 506 if (r >= 0) 507 return r; 508 509 /* If the input is corrupted or truncated, fail. */ 510 if (mine->filename_type == FNT_STDIN) 511 archive_set_error(a, errno, "Error seeking in stdin"); 512 else if (mine->filename_type == FNT_MBS) 513 archive_set_error(a, errno, "Error seeking in '%s'", 514 mine->filename.m); 515 else 516 archive_set_error(a, errno, "Error seeking in '%S'", 517 mine->filename.w); 518 return (ARCHIVE_FATAL); 519 } 520 521 static int 522 file_close2(struct archive *a, void *client_data) 523 { 524 struct read_file_data *mine = (struct read_file_data *)client_data; 525 526 (void)a; /* UNUSED */ 527 528 /* Only flush and close if open succeeded. */ 529 if (mine->fd >= 0) { 530 /* 531 * Sometimes, we should flush the input before closing. 532 * Regular files: faster to just close without flush. 533 * Disk-like devices: Ditto. 534 * Tapes: must not flush (user might need to 535 * read the "next" item on a non-rewind device). 536 * Pipes and sockets: must flush (otherwise, the 537 * program feeding the pipe or socket may complain). 538 * Here, I flush everything except for regular files and 539 * device nodes. 540 */ 541 if (!S_ISREG(mine->st_mode) 542 && !S_ISCHR(mine->st_mode) 543 && !S_ISBLK(mine->st_mode)) { 544 ssize_t bytesRead; 545 do { 546 bytesRead = read(mine->fd, mine->buffer, 547 mine->block_size); 548 } while (bytesRead > 0); 549 } 550 /* If a named file was opened, then it needs to be closed. */ 551 if (mine->filename_type != FNT_STDIN) 552 close(mine->fd); 553 } 554 free(mine->buffer); 555 mine->buffer = NULL; 556 mine->fd = -1; 557 return (ARCHIVE_OK); 558 } 559 560 static int 561 file_close(struct archive *a, void *client_data) 562 { 563 struct read_file_data *mine = (struct read_file_data *)client_data; 564 file_close2(a, client_data); 565 free(mine); 566 return (ARCHIVE_OK); 567 } 568 569 static int 570 file_switch(struct archive *a, void *client_data1, void *client_data2) 571 { 572 file_close2(a, client_data1); 573 return file_open(a, client_data2); 574 } 575