1 /*- 2 * Copyright (c) 2007 Kai Wang 3 * Copyright (c) 2007 Tim Kientzle 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "archive_platform.h" 29 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_ar.c 201101 2009-12-28 03:06:27Z kientzle $"); 30 31 #ifdef HAVE_SYS_STAT_H 32 #include <sys/stat.h> 33 #endif 34 #ifdef HAVE_ERRNO_H 35 #include <errno.h> 36 #endif 37 #ifdef HAVE_STDLIB_H 38 #include <stdlib.h> 39 #endif 40 #ifdef HAVE_STRING_H 41 #include <string.h> 42 #endif 43 #ifdef HAVE_LIMITS_H 44 #include <limits.h> 45 #endif 46 47 #include "archive.h" 48 #include "archive_entry.h" 49 #include "archive_private.h" 50 #include "archive_read_private.h" 51 52 struct ar { 53 int64_t entry_bytes_remaining; 54 /* unconsumed is purely to track data we've gotten from readahead, 55 * but haven't yet marked as consumed. Must be paired with 56 * entry_bytes_remaining usage/modification. 57 */ 58 size_t entry_bytes_unconsumed; 59 int64_t entry_offset; 60 int64_t entry_padding; 61 char *strtab; 62 size_t strtab_size; 63 char read_global_header; 64 }; 65 66 /* 67 * Define structure of the "ar" header. 68 */ 69 #define AR_name_offset 0 70 #define AR_name_size 16 71 #define AR_date_offset 16 72 #define AR_date_size 12 73 #define AR_uid_offset 28 74 #define AR_uid_size 6 75 #define AR_gid_offset 34 76 #define AR_gid_size 6 77 #define AR_mode_offset 40 78 #define AR_mode_size 8 79 #define AR_size_offset 48 80 #define AR_size_size 10 81 #define AR_fmag_offset 58 82 #define AR_fmag_size 2 83 84 static int archive_read_format_ar_bid(struct archive_read *a, int); 85 static int archive_read_format_ar_cleanup(struct archive_read *a); 86 static int archive_read_format_ar_read_data(struct archive_read *a, 87 const void **buff, size_t *size, int64_t *offset); 88 static int archive_read_format_ar_skip(struct archive_read *a); 89 static int archive_read_format_ar_read_header(struct archive_read *a, 90 struct archive_entry *e); 91 static uint64_t ar_atol8(const char *p, unsigned char_cnt); 92 static uint64_t ar_atol10(const char *p, unsigned char_cnt); 93 static int ar_parse_gnu_filename_table(struct archive_read *a); 94 static int ar_parse_common_header(struct ar *ar, struct archive_entry *, 95 const char *h); 96 97 int 98 archive_read_support_format_ar(struct archive *_a) 99 { 100 struct archive_read *a = (struct archive_read *)_a; 101 struct ar *ar; 102 int r; 103 104 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 105 ARCHIVE_STATE_NEW, "archive_read_support_format_ar"); 106 107 ar = (struct ar *)malloc(sizeof(*ar)); 108 if (ar == NULL) { 109 archive_set_error(&a->archive, ENOMEM, 110 "Can't allocate ar data"); 111 return (ARCHIVE_FATAL); 112 } 113 memset(ar, 0, sizeof(*ar)); 114 ar->strtab = NULL; 115 116 r = __archive_read_register_format(a, 117 ar, 118 "ar", 119 archive_read_format_ar_bid, 120 NULL, 121 archive_read_format_ar_read_header, 122 archive_read_format_ar_read_data, 123 archive_read_format_ar_skip, 124 NULL, 125 archive_read_format_ar_cleanup, 126 NULL, 127 NULL); 128 129 if (r != ARCHIVE_OK) { 130 free(ar); 131 return (r); 132 } 133 return (ARCHIVE_OK); 134 } 135 136 static int 137 archive_read_format_ar_cleanup(struct archive_read *a) 138 { 139 struct ar *ar; 140 141 ar = (struct ar *)(a->format->data); 142 if (ar->strtab) 143 free(ar->strtab); 144 free(ar); 145 (a->format->data) = NULL; 146 return (ARCHIVE_OK); 147 } 148 149 static int 150 archive_read_format_ar_bid(struct archive_read *a, int best_bid) 151 { 152 const void *h; 153 154 (void)best_bid; /* UNUSED */ 155 156 /* 157 * Verify the 8-byte file signature. 158 * TODO: Do we need to check more than this? 159 */ 160 if ((h = __archive_read_ahead(a, 8, NULL)) == NULL) 161 return (-1); 162 if (memcmp(h, "!<arch>\n", 8) == 0) { 163 return (64); 164 } 165 return (-1); 166 } 167 168 static int 169 _ar_read_header(struct archive_read *a, struct archive_entry *entry, 170 struct ar *ar, const char *h, size_t *unconsumed) 171 { 172 char filename[AR_name_size + 1]; 173 uint64_t number; /* Used to hold parsed numbers before validation. */ 174 size_t bsd_name_length, entry_size; 175 char *p, *st; 176 const void *b; 177 int r; 178 179 /* Verify the magic signature on the file header. */ 180 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) { 181 archive_set_error(&a->archive, EINVAL, 182 "Incorrect file header signature"); 183 return (ARCHIVE_FATAL); 184 } 185 186 /* Copy filename into work buffer. */ 187 strncpy(filename, h + AR_name_offset, AR_name_size); 188 filename[AR_name_size] = '\0'; 189 190 /* 191 * Guess the format variant based on the filename. 192 */ 193 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) { 194 /* We don't already know the variant, so let's guess. */ 195 /* 196 * Biggest clue is presence of '/': GNU starts special 197 * filenames with '/', appends '/' as terminator to 198 * non-special names, so anything with '/' should be 199 * GNU except for BSD long filenames. 200 */ 201 if (strncmp(filename, "#1/", 3) == 0) 202 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 203 else if (strchr(filename, '/') != NULL) 204 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU; 205 else if (strncmp(filename, "__.SYMDEF", 9) == 0) 206 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 207 /* 208 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/' 209 * if name exactly fills 16-byte field? If so, we 210 * can't assume entries without '/' are BSD. XXX 211 */ 212 } 213 214 /* Update format name from the code. */ 215 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU) 216 a->archive.archive_format_name = "ar (GNU/SVR4)"; 217 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD) 218 a->archive.archive_format_name = "ar (BSD)"; 219 else 220 a->archive.archive_format_name = "ar"; 221 222 /* 223 * Remove trailing spaces from the filename. GNU and BSD 224 * variants both pad filename area out with spaces. 225 * This will only be wrong if GNU/SVR4 'ar' implementations 226 * omit trailing '/' for 16-char filenames and we have 227 * a 16-char filename that ends in ' '. 228 */ 229 p = filename + AR_name_size - 1; 230 while (p >= filename && *p == ' ') { 231 *p = '\0'; 232 p--; 233 } 234 235 /* 236 * Remove trailing slash unless first character is '/'. 237 * (BSD entries never end in '/', so this will only trim 238 * GNU-format entries. GNU special entries start with '/' 239 * and are not terminated in '/', so we don't trim anything 240 * that starts with '/'.) 241 */ 242 if (filename[0] != '/' && p > filename && *p == '/') { 243 *p = '\0'; 244 } 245 246 if (p < filename) { 247 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 248 "Found entry with empty filename"); 249 return (ARCHIVE_FATAL); 250 } 251 252 /* 253 * '//' is the GNU filename table. 254 * Later entries can refer to names in this table. 255 */ 256 if (strcmp(filename, "//") == 0) { 257 /* This must come before any call to _read_ahead. */ 258 ar_parse_common_header(ar, entry, h); 259 archive_entry_copy_pathname(entry, filename); 260 archive_entry_set_filetype(entry, AE_IFREG); 261 /* Get the size of the filename table. */ 262 number = ar_atol10(h + AR_size_offset, AR_size_size); 263 if (number > SIZE_MAX) { 264 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 265 "Filename table too large"); 266 return (ARCHIVE_FATAL); 267 } 268 entry_size = (size_t)number; 269 if (entry_size == 0) { 270 archive_set_error(&a->archive, EINVAL, 271 "Invalid string table"); 272 return (ARCHIVE_FATAL); 273 } 274 if (ar->strtab != NULL) { 275 archive_set_error(&a->archive, EINVAL, 276 "More than one string tables exist"); 277 return (ARCHIVE_FATAL); 278 } 279 280 /* Read the filename table into memory. */ 281 st = malloc(entry_size); 282 if (st == NULL) { 283 archive_set_error(&a->archive, ENOMEM, 284 "Can't allocate filename table buffer"); 285 return (ARCHIVE_FATAL); 286 } 287 ar->strtab = st; 288 ar->strtab_size = entry_size; 289 290 if (*unconsumed) { 291 __archive_read_consume(a, *unconsumed); 292 *unconsumed = 0; 293 } 294 295 if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL) 296 return (ARCHIVE_FATAL); 297 memcpy(st, b, entry_size); 298 __archive_read_consume(a, entry_size); 299 /* All contents are consumed. */ 300 ar->entry_bytes_remaining = 0; 301 archive_entry_set_size(entry, ar->entry_bytes_remaining); 302 303 /* Parse the filename table. */ 304 return (ar_parse_gnu_filename_table(a)); 305 } 306 307 /* 308 * GNU variant handles long filenames by storing /<number> 309 * to indicate a name stored in the filename table. 310 * XXX TODO: Verify that it's all digits... Don't be fooled 311 * by "/9xyz" XXX 312 */ 313 if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') { 314 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1); 315 /* 316 * If we can't look up the real name, warn and return 317 * the entry with the wrong name. 318 */ 319 if (ar->strtab == NULL || number > ar->strtab_size) { 320 archive_set_error(&a->archive, EINVAL, 321 "Can't find long filename for GNU/SVR4 archive entry"); 322 archive_entry_copy_pathname(entry, filename); 323 /* Parse the time, owner, mode, size fields. */ 324 ar_parse_common_header(ar, entry, h); 325 return (ARCHIVE_FATAL); 326 } 327 328 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]); 329 /* Parse the time, owner, mode, size fields. */ 330 return (ar_parse_common_header(ar, entry, h)); 331 } 332 333 /* 334 * BSD handles long filenames by storing "#1/" followed by the 335 * length of filename as a decimal number, then prepends the 336 * the filename to the file contents. 337 */ 338 if (strncmp(filename, "#1/", 3) == 0) { 339 /* Parse the time, owner, mode, size fields. */ 340 /* This must occur before _read_ahead is called again. */ 341 ar_parse_common_header(ar, entry, h); 342 343 /* Parse the size of the name, adjust the file size. */ 344 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3); 345 bsd_name_length = (size_t)number; 346 /* Guard against the filename + trailing NUL 347 * overflowing a size_t and against the filename size 348 * being larger than the entire entry. */ 349 if (number > (uint64_t)(bsd_name_length + 1) 350 || (int64_t)bsd_name_length > ar->entry_bytes_remaining) { 351 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 352 "Bad input file size"); 353 return (ARCHIVE_FATAL); 354 } 355 ar->entry_bytes_remaining -= bsd_name_length; 356 /* Adjust file size reported to client. */ 357 archive_entry_set_size(entry, ar->entry_bytes_remaining); 358 359 if (*unconsumed) { 360 __archive_read_consume(a, *unconsumed); 361 *unconsumed = 0; 362 } 363 364 /* Read the long name into memory. */ 365 if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) { 366 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 367 "Truncated input file"); 368 return (ARCHIVE_FATAL); 369 } 370 /* Store it in the entry. */ 371 p = (char *)malloc(bsd_name_length + 1); 372 if (p == NULL) { 373 archive_set_error(&a->archive, ENOMEM, 374 "Can't allocate fname buffer"); 375 return (ARCHIVE_FATAL); 376 } 377 strncpy(p, b, bsd_name_length); 378 p[bsd_name_length] = '\0'; 379 380 __archive_read_consume(a, bsd_name_length); 381 382 archive_entry_copy_pathname(entry, p); 383 free(p); 384 return (ARCHIVE_OK); 385 } 386 387 /* 388 * "/" is the SVR4/GNU archive symbol table. 389 */ 390 if (strcmp(filename, "/") == 0) { 391 archive_entry_copy_pathname(entry, "/"); 392 /* Parse the time, owner, mode, size fields. */ 393 r = ar_parse_common_header(ar, entry, h); 394 /* Force the file type to a regular file. */ 395 archive_entry_set_filetype(entry, AE_IFREG); 396 return (r); 397 } 398 399 /* 400 * "__.SYMDEF" is a BSD archive symbol table. 401 */ 402 if (strcmp(filename, "__.SYMDEF") == 0) { 403 archive_entry_copy_pathname(entry, filename); 404 /* Parse the time, owner, mode, size fields. */ 405 return (ar_parse_common_header(ar, entry, h)); 406 } 407 408 /* 409 * Otherwise, this is a standard entry. The filename 410 * has already been trimmed as much as possible, based 411 * on our current knowledge of the format. 412 */ 413 archive_entry_copy_pathname(entry, filename); 414 return (ar_parse_common_header(ar, entry, h)); 415 } 416 417 static int 418 archive_read_format_ar_read_header(struct archive_read *a, 419 struct archive_entry *entry) 420 { 421 struct ar *ar = (struct ar*)(a->format->data); 422 size_t unconsumed; 423 const void *header_data; 424 int ret; 425 426 if (!ar->read_global_header) { 427 /* 428 * We are now at the beginning of the archive, 429 * so we need first consume the ar global header. 430 */ 431 __archive_read_consume(a, 8); 432 ar->read_global_header = 1; 433 /* Set a default format code for now. */ 434 a->archive.archive_format = ARCHIVE_FORMAT_AR; 435 } 436 437 /* Read the header for the next file entry. */ 438 if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL) 439 /* Broken header. */ 440 return (ARCHIVE_EOF); 441 442 unconsumed = 60; 443 444 ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed); 445 446 if (unconsumed) 447 __archive_read_consume(a, unconsumed); 448 449 return ret; 450 } 451 452 453 static int 454 ar_parse_common_header(struct ar *ar, struct archive_entry *entry, 455 const char *h) 456 { 457 uint64_t n; 458 459 /* Copy remaining header */ 460 archive_entry_set_mtime(entry, 461 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L); 462 archive_entry_set_uid(entry, 463 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size)); 464 archive_entry_set_gid(entry, 465 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size)); 466 archive_entry_set_mode(entry, 467 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size)); 468 n = ar_atol10(h + AR_size_offset, AR_size_size); 469 470 ar->entry_offset = 0; 471 ar->entry_padding = n % 2; 472 archive_entry_set_size(entry, n); 473 ar->entry_bytes_remaining = n; 474 return (ARCHIVE_OK); 475 } 476 477 static int 478 archive_read_format_ar_read_data(struct archive_read *a, 479 const void **buff, size_t *size, int64_t *offset) 480 { 481 ssize_t bytes_read; 482 struct ar *ar; 483 484 ar = (struct ar *)(a->format->data); 485 486 if (ar->entry_bytes_unconsumed) { 487 __archive_read_consume(a, ar->entry_bytes_unconsumed); 488 ar->entry_bytes_unconsumed = 0; 489 } 490 491 if (ar->entry_bytes_remaining > 0) { 492 *buff = __archive_read_ahead(a, 1, &bytes_read); 493 if (bytes_read == 0) { 494 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 495 "Truncated ar archive"); 496 return (ARCHIVE_FATAL); 497 } 498 if (bytes_read < 0) 499 return (ARCHIVE_FATAL); 500 if (bytes_read > ar->entry_bytes_remaining) 501 bytes_read = (ssize_t)ar->entry_bytes_remaining; 502 *size = bytes_read; 503 ar->entry_bytes_unconsumed = bytes_read; 504 *offset = ar->entry_offset; 505 ar->entry_offset += bytes_read; 506 ar->entry_bytes_remaining -= bytes_read; 507 return (ARCHIVE_OK); 508 } else { 509 int64_t skipped = __archive_read_consume(a, ar->entry_padding); 510 if (skipped >= 0) { 511 ar->entry_padding -= skipped; 512 } 513 if (ar->entry_padding) { 514 if (skipped >= 0) { 515 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 516 "Truncated ar archive- failed consuming padding"); 517 } 518 return (ARCHIVE_FATAL); 519 } 520 *buff = NULL; 521 *size = 0; 522 *offset = ar->entry_offset; 523 return (ARCHIVE_EOF); 524 } 525 } 526 527 static int 528 archive_read_format_ar_skip(struct archive_read *a) 529 { 530 int64_t bytes_skipped; 531 struct ar* ar; 532 533 ar = (struct ar *)(a->format->data); 534 535 bytes_skipped = __archive_read_consume(a, 536 ar->entry_bytes_remaining + ar->entry_padding 537 + ar->entry_bytes_unconsumed); 538 if (bytes_skipped < 0) 539 return (ARCHIVE_FATAL); 540 541 ar->entry_bytes_remaining = 0; 542 ar->entry_bytes_unconsumed = 0; 543 ar->entry_padding = 0; 544 545 return (ARCHIVE_OK); 546 } 547 548 static int 549 ar_parse_gnu_filename_table(struct archive_read *a) 550 { 551 struct ar *ar; 552 char *p; 553 size_t size; 554 555 ar = (struct ar*)(a->format->data); 556 size = ar->strtab_size; 557 558 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) { 559 if (*p == '/') { 560 *p++ = '\0'; 561 if (*p != '\n') 562 goto bad_string_table; 563 *p = '\0'; 564 } 565 } 566 /* 567 * GNU ar always pads the table to an even size. 568 * The pad character is either '\n' or '`'. 569 */ 570 if (p != ar->strtab + size && *p != '\n' && *p != '`') 571 goto bad_string_table; 572 573 /* Enforce zero termination. */ 574 ar->strtab[size - 1] = '\0'; 575 576 return (ARCHIVE_OK); 577 578 bad_string_table: 579 archive_set_error(&a->archive, EINVAL, 580 "Invalid string table"); 581 free(ar->strtab); 582 ar->strtab = NULL; 583 return (ARCHIVE_FATAL); 584 } 585 586 static uint64_t 587 ar_atol8(const char *p, unsigned char_cnt) 588 { 589 uint64_t l, limit, last_digit_limit; 590 unsigned int digit, base; 591 592 base = 8; 593 limit = UINT64_MAX / base; 594 last_digit_limit = UINT64_MAX % base; 595 596 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 597 p++; 598 599 l = 0; 600 digit = *p - '0'; 601 while (*p >= '0' && digit < base && char_cnt-- > 0) { 602 if (l>limit || (l == limit && digit > last_digit_limit)) { 603 l = UINT64_MAX; /* Truncate on overflow. */ 604 break; 605 } 606 l = (l * base) + digit; 607 digit = *++p - '0'; 608 } 609 return (l); 610 } 611 612 static uint64_t 613 ar_atol10(const char *p, unsigned char_cnt) 614 { 615 uint64_t l, limit, last_digit_limit; 616 unsigned int base, digit; 617 618 base = 10; 619 limit = UINT64_MAX / base; 620 last_digit_limit = UINT64_MAX % base; 621 622 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 623 p++; 624 l = 0; 625 digit = *p - '0'; 626 while (*p >= '0' && digit < base && char_cnt-- > 0) { 627 if (l > limit || (l == limit && digit > last_digit_limit)) { 628 l = UINT64_MAX; /* Truncate on overflow. */ 629 break; 630 } 631 l = (l * base) + digit; 632 digit = *++p - '0'; 633 } 634 return (l); 635 } 636