1 /*- 2 * Copyright (c) 2007 Kai Wang 3 * Copyright (c) 2007 Tim Kientzle 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "archive_platform.h" 29 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_ar.c 201101 2009-12-28 03:06:27Z kientzle $"); 30 31 #ifdef HAVE_SYS_STAT_H 32 #include <sys/stat.h> 33 #endif 34 #ifdef HAVE_ERRNO_H 35 #include <errno.h> 36 #endif 37 #ifdef HAVE_STDLIB_H 38 #include <stdlib.h> 39 #endif 40 #ifdef HAVE_STRING_H 41 #include <string.h> 42 #endif 43 #ifdef HAVE_LIMITS_H 44 #include <limits.h> 45 #endif 46 47 #include "archive.h" 48 #include "archive_entry.h" 49 #include "archive_private.h" 50 #include "archive_read_private.h" 51 52 struct ar { 53 int64_t entry_bytes_remaining; 54 /* unconsumed is purely to track data we've gotten from readahead, 55 * but haven't yet marked as consumed. Must be paired with 56 * entry_bytes_remaining usage/modification. 57 */ 58 size_t entry_bytes_unconsumed; 59 int64_t entry_offset; 60 int64_t entry_padding; 61 char *strtab; 62 size_t strtab_size; 63 char read_global_header; 64 }; 65 66 /* 67 * Define structure of the "ar" header. 68 */ 69 #define AR_name_offset 0 70 #define AR_name_size 16 71 #define AR_date_offset 16 72 #define AR_date_size 12 73 #define AR_uid_offset 28 74 #define AR_uid_size 6 75 #define AR_gid_offset 34 76 #define AR_gid_size 6 77 #define AR_mode_offset 40 78 #define AR_mode_size 8 79 #define AR_size_offset 48 80 #define AR_size_size 10 81 #define AR_fmag_offset 58 82 #define AR_fmag_size 2 83 84 static int archive_read_format_ar_bid(struct archive_read *a, int); 85 static int archive_read_format_ar_cleanup(struct archive_read *a); 86 static int archive_read_format_ar_read_data(struct archive_read *a, 87 const void **buff, size_t *size, int64_t *offset); 88 static int archive_read_format_ar_skip(struct archive_read *a); 89 static int archive_read_format_ar_read_header(struct archive_read *a, 90 struct archive_entry *e); 91 static uint64_t ar_atol8(const char *p, unsigned char_cnt); 92 static uint64_t ar_atol10(const char *p, unsigned char_cnt); 93 static int ar_parse_gnu_filename_table(struct archive_read *a); 94 static int ar_parse_common_header(struct ar *ar, struct archive_entry *, 95 const char *h); 96 97 int 98 archive_read_support_format_ar(struct archive *_a) 99 { 100 struct archive_read *a = (struct archive_read *)_a; 101 struct ar *ar; 102 int r; 103 104 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 105 ARCHIVE_STATE_NEW, "archive_read_support_format_ar"); 106 107 ar = (struct ar *)malloc(sizeof(*ar)); 108 if (ar == NULL) { 109 archive_set_error(&a->archive, ENOMEM, 110 "Can't allocate ar data"); 111 return (ARCHIVE_FATAL); 112 } 113 memset(ar, 0, sizeof(*ar)); 114 ar->strtab = NULL; 115 116 r = __archive_read_register_format(a, 117 ar, 118 "ar", 119 archive_read_format_ar_bid, 120 NULL, 121 archive_read_format_ar_read_header, 122 archive_read_format_ar_read_data, 123 archive_read_format_ar_skip, 124 archive_read_format_ar_cleanup); 125 126 if (r != ARCHIVE_OK) { 127 free(ar); 128 return (r); 129 } 130 return (ARCHIVE_OK); 131 } 132 133 static int 134 archive_read_format_ar_cleanup(struct archive_read *a) 135 { 136 struct ar *ar; 137 138 ar = (struct ar *)(a->format->data); 139 if (ar->strtab) 140 free(ar->strtab); 141 free(ar); 142 (a->format->data) = NULL; 143 return (ARCHIVE_OK); 144 } 145 146 static int 147 archive_read_format_ar_bid(struct archive_read *a, int best_bid) 148 { 149 const void *h; 150 151 (void)best_bid; /* UNUSED */ 152 153 /* 154 * Verify the 8-byte file signature. 155 * TODO: Do we need to check more than this? 156 */ 157 if ((h = __archive_read_ahead(a, 8, NULL)) == NULL) 158 return (-1); 159 if (memcmp(h, "!<arch>\n", 8) == 0) { 160 return (64); 161 } 162 return (-1); 163 } 164 165 static int 166 _ar_read_header(struct archive_read *a, struct archive_entry *entry, 167 struct ar *ar, const char *h, size_t *unconsumed) 168 { 169 char filename[AR_name_size + 1]; 170 uint64_t number; /* Used to hold parsed numbers before validation. */ 171 size_t bsd_name_length, entry_size; 172 char *p, *st; 173 const void *b; 174 int r; 175 176 /* Verify the magic signature on the file header. */ 177 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) { 178 archive_set_error(&a->archive, EINVAL, 179 "Incorrect file header signature"); 180 return (ARCHIVE_WARN); 181 } 182 183 /* Copy filename into work buffer. */ 184 strncpy(filename, h + AR_name_offset, AR_name_size); 185 filename[AR_name_size] = '\0'; 186 187 /* 188 * Guess the format variant based on the filename. 189 */ 190 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) { 191 /* We don't already know the variant, so let's guess. */ 192 /* 193 * Biggest clue is presence of '/': GNU starts special 194 * filenames with '/', appends '/' as terminator to 195 * non-special names, so anything with '/' should be 196 * GNU except for BSD long filenames. 197 */ 198 if (strncmp(filename, "#1/", 3) == 0) 199 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 200 else if (strchr(filename, '/') != NULL) 201 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU; 202 else if (strncmp(filename, "__.SYMDEF", 9) == 0) 203 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 204 /* 205 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/' 206 * if name exactly fills 16-byte field? If so, we 207 * can't assume entries without '/' are BSD. XXX 208 */ 209 } 210 211 /* Update format name from the code. */ 212 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU) 213 a->archive.archive_format_name = "ar (GNU/SVR4)"; 214 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD) 215 a->archive.archive_format_name = "ar (BSD)"; 216 else 217 a->archive.archive_format_name = "ar"; 218 219 /* 220 * Remove trailing spaces from the filename. GNU and BSD 221 * variants both pad filename area out with spaces. 222 * This will only be wrong if GNU/SVR4 'ar' implementations 223 * omit trailing '/' for 16-char filenames and we have 224 * a 16-char filename that ends in ' '. 225 */ 226 p = filename + AR_name_size - 1; 227 while (p >= filename && *p == ' ') { 228 *p = '\0'; 229 p--; 230 } 231 232 /* 233 * Remove trailing slash unless first character is '/'. 234 * (BSD entries never end in '/', so this will only trim 235 * GNU-format entries. GNU special entries start with '/' 236 * and are not terminated in '/', so we don't trim anything 237 * that starts with '/'.) 238 */ 239 if (filename[0] != '/' && *p == '/') 240 *p = '\0'; 241 242 /* 243 * '//' is the GNU filename table. 244 * Later entries can refer to names in this table. 245 */ 246 if (strcmp(filename, "//") == 0) { 247 /* This must come before any call to _read_ahead. */ 248 ar_parse_common_header(ar, entry, h); 249 archive_entry_copy_pathname(entry, filename); 250 archive_entry_set_filetype(entry, AE_IFREG); 251 /* Get the size of the filename table. */ 252 number = ar_atol10(h + AR_size_offset, AR_size_size); 253 if (number > SIZE_MAX) { 254 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 255 "Filename table too large"); 256 return (ARCHIVE_FATAL); 257 } 258 entry_size = (size_t)number; 259 if (entry_size == 0) { 260 archive_set_error(&a->archive, EINVAL, 261 "Invalid string table"); 262 return (ARCHIVE_WARN); 263 } 264 if (ar->strtab != NULL) { 265 archive_set_error(&a->archive, EINVAL, 266 "More than one string tables exist"); 267 return (ARCHIVE_WARN); 268 } 269 270 /* Read the filename table into memory. */ 271 st = malloc(entry_size); 272 if (st == NULL) { 273 archive_set_error(&a->archive, ENOMEM, 274 "Can't allocate filename table buffer"); 275 return (ARCHIVE_FATAL); 276 } 277 ar->strtab = st; 278 ar->strtab_size = entry_size; 279 280 if (*unconsumed) { 281 __archive_read_consume(a, *unconsumed); 282 *unconsumed = 0; 283 } 284 285 if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL) 286 return (ARCHIVE_FATAL); 287 memcpy(st, b, entry_size); 288 __archive_read_consume(a, entry_size); 289 /* All contents are consumed. */ 290 ar->entry_bytes_remaining = 0; 291 archive_entry_set_size(entry, ar->entry_bytes_remaining); 292 293 /* Parse the filename table. */ 294 return (ar_parse_gnu_filename_table(a)); 295 } 296 297 /* 298 * GNU variant handles long filenames by storing /<number> 299 * to indicate a name stored in the filename table. 300 * XXX TODO: Verify that it's all digits... Don't be fooled 301 * by "/9xyz" XXX 302 */ 303 if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') { 304 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1); 305 /* 306 * If we can't look up the real name, warn and return 307 * the entry with the wrong name. 308 */ 309 if (ar->strtab == NULL || number > ar->strtab_size) { 310 archive_set_error(&a->archive, EINVAL, 311 "Can't find long filename for entry"); 312 archive_entry_copy_pathname(entry, filename); 313 /* Parse the time, owner, mode, size fields. */ 314 ar_parse_common_header(ar, entry, h); 315 return (ARCHIVE_WARN); 316 } 317 318 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]); 319 /* Parse the time, owner, mode, size fields. */ 320 return (ar_parse_common_header(ar, entry, h)); 321 } 322 323 /* 324 * BSD handles long filenames by storing "#1/" followed by the 325 * length of filename as a decimal number, then prepends the 326 * the filename to the file contents. 327 */ 328 if (strncmp(filename, "#1/", 3) == 0) { 329 /* Parse the time, owner, mode, size fields. */ 330 /* This must occur before _read_ahead is called again. */ 331 ar_parse_common_header(ar, entry, h); 332 333 /* Parse the size of the name, adjust the file size. */ 334 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3); 335 bsd_name_length = (size_t)number; 336 /* Guard against the filename + trailing NUL 337 * overflowing a size_t and against the filename size 338 * being larger than the entire entry. */ 339 if (number > (uint64_t)(bsd_name_length + 1) 340 || (int64_t)bsd_name_length > ar->entry_bytes_remaining) { 341 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 342 "Bad input file size"); 343 return (ARCHIVE_FATAL); 344 } 345 ar->entry_bytes_remaining -= bsd_name_length; 346 /* Adjust file size reported to client. */ 347 archive_entry_set_size(entry, ar->entry_bytes_remaining); 348 349 if (*unconsumed) { 350 __archive_read_consume(a, *unconsumed); 351 *unconsumed = 0; 352 } 353 354 /* Read the long name into memory. */ 355 if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) { 356 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 357 "Truncated input file"); 358 return (ARCHIVE_FATAL); 359 } 360 /* Store it in the entry. */ 361 p = (char *)malloc(bsd_name_length + 1); 362 if (p == NULL) { 363 archive_set_error(&a->archive, ENOMEM, 364 "Can't allocate fname buffer"); 365 return (ARCHIVE_FATAL); 366 } 367 strncpy(p, b, bsd_name_length); 368 p[bsd_name_length] = '\0'; 369 370 __archive_read_consume(a, bsd_name_length); 371 372 archive_entry_copy_pathname(entry, p); 373 free(p); 374 return (ARCHIVE_OK); 375 } 376 377 /* 378 * "/" is the SVR4/GNU archive symbol table. 379 */ 380 if (strcmp(filename, "/") == 0) { 381 archive_entry_copy_pathname(entry, "/"); 382 /* Parse the time, owner, mode, size fields. */ 383 r = ar_parse_common_header(ar, entry, h); 384 /* Force the file type to a regular file. */ 385 archive_entry_set_filetype(entry, AE_IFREG); 386 return (r); 387 } 388 389 /* 390 * "__.SYMDEF" is a BSD archive symbol table. 391 */ 392 if (strcmp(filename, "__.SYMDEF") == 0) { 393 archive_entry_copy_pathname(entry, filename); 394 /* Parse the time, owner, mode, size fields. */ 395 return (ar_parse_common_header(ar, entry, h)); 396 } 397 398 /* 399 * Otherwise, this is a standard entry. The filename 400 * has already been trimmed as much as possible, based 401 * on our current knowledge of the format. 402 */ 403 archive_entry_copy_pathname(entry, filename); 404 return (ar_parse_common_header(ar, entry, h)); 405 } 406 407 static int 408 archive_read_format_ar_read_header(struct archive_read *a, 409 struct archive_entry *entry) 410 { 411 struct ar *ar = (struct ar*)(a->format->data); 412 size_t unconsumed; 413 const void *header_data; 414 int ret; 415 416 if (!ar->read_global_header) { 417 /* 418 * We are now at the beginning of the archive, 419 * so we need first consume the ar global header. 420 */ 421 __archive_read_consume(a, 8); 422 ar->read_global_header = 1; 423 /* Set a default format code for now. */ 424 a->archive.archive_format = ARCHIVE_FORMAT_AR; 425 } 426 427 /* Read the header for the next file entry. */ 428 if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL) 429 /* Broken header. */ 430 return (ARCHIVE_EOF); 431 432 unconsumed = 60; 433 434 ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed); 435 436 if (unconsumed) 437 __archive_read_consume(a, unconsumed); 438 439 return ret; 440 } 441 442 443 static int 444 ar_parse_common_header(struct ar *ar, struct archive_entry *entry, 445 const char *h) 446 { 447 uint64_t n; 448 449 /* Copy remaining header */ 450 archive_entry_set_mtime(entry, 451 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L); 452 archive_entry_set_uid(entry, 453 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size)); 454 archive_entry_set_gid(entry, 455 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size)); 456 archive_entry_set_mode(entry, 457 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size)); 458 n = ar_atol10(h + AR_size_offset, AR_size_size); 459 460 ar->entry_offset = 0; 461 ar->entry_padding = n % 2; 462 archive_entry_set_size(entry, n); 463 ar->entry_bytes_remaining = n; 464 return (ARCHIVE_OK); 465 } 466 467 static int 468 archive_read_format_ar_read_data(struct archive_read *a, 469 const void **buff, size_t *size, int64_t *offset) 470 { 471 ssize_t bytes_read; 472 struct ar *ar; 473 474 ar = (struct ar *)(a->format->data); 475 476 if (ar->entry_bytes_unconsumed) { 477 __archive_read_consume(a, ar->entry_bytes_unconsumed); 478 ar->entry_bytes_unconsumed = 0; 479 } 480 481 if (ar->entry_bytes_remaining > 0) { 482 *buff = __archive_read_ahead(a, 1, &bytes_read); 483 if (bytes_read == 0) { 484 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 485 "Truncated ar archive"); 486 return (ARCHIVE_FATAL); 487 } 488 if (bytes_read < 0) 489 return (ARCHIVE_FATAL); 490 if (bytes_read > ar->entry_bytes_remaining) 491 bytes_read = (ssize_t)ar->entry_bytes_remaining; 492 *size = bytes_read; 493 ar->entry_bytes_unconsumed = bytes_read; 494 *offset = ar->entry_offset; 495 ar->entry_offset += bytes_read; 496 ar->entry_bytes_remaining -= bytes_read; 497 return (ARCHIVE_OK); 498 } else { 499 int64_t skipped = __archive_read_consume(a, ar->entry_padding); 500 if (skipped >= 0) { 501 ar->entry_padding -= skipped; 502 } 503 if (ar->entry_padding) { 504 if (skipped >= 0) { 505 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 506 "Truncated ar archive- failed consuming padding"); 507 } 508 return (ARCHIVE_FATAL); 509 } 510 *buff = NULL; 511 *size = 0; 512 *offset = ar->entry_offset; 513 return (ARCHIVE_EOF); 514 } 515 } 516 517 static int 518 archive_read_format_ar_skip(struct archive_read *a) 519 { 520 int64_t bytes_skipped; 521 struct ar* ar; 522 523 ar = (struct ar *)(a->format->data); 524 525 bytes_skipped = __archive_read_consume(a, 526 ar->entry_bytes_remaining + ar->entry_padding 527 + ar->entry_bytes_unconsumed); 528 if (bytes_skipped < 0) 529 return (ARCHIVE_FATAL); 530 531 ar->entry_bytes_remaining = 0; 532 ar->entry_bytes_unconsumed = 0; 533 ar->entry_padding = 0; 534 535 return (ARCHIVE_OK); 536 } 537 538 static int 539 ar_parse_gnu_filename_table(struct archive_read *a) 540 { 541 struct ar *ar; 542 char *p; 543 size_t size; 544 545 ar = (struct ar*)(a->format->data); 546 size = ar->strtab_size; 547 548 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) { 549 if (*p == '/') { 550 *p++ = '\0'; 551 if (*p != '\n') 552 goto bad_string_table; 553 *p = '\0'; 554 } 555 } 556 /* 557 * GNU ar always pads the table to an even size. 558 * The pad character is either '\n' or '`'. 559 */ 560 if (p != ar->strtab + size && *p != '\n' && *p != '`') 561 goto bad_string_table; 562 563 /* Enforce zero termination. */ 564 ar->strtab[size - 1] = '\0'; 565 566 return (ARCHIVE_OK); 567 568 bad_string_table: 569 archive_set_error(&a->archive, EINVAL, 570 "Invalid string table"); 571 free(ar->strtab); 572 ar->strtab = NULL; 573 return (ARCHIVE_WARN); 574 } 575 576 static uint64_t 577 ar_atol8(const char *p, unsigned char_cnt) 578 { 579 uint64_t l, limit, last_digit_limit; 580 unsigned int digit, base; 581 582 base = 8; 583 limit = UINT64_MAX / base; 584 last_digit_limit = UINT64_MAX % base; 585 586 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 587 p++; 588 589 l = 0; 590 digit = *p - '0'; 591 while (*p >= '0' && digit < base && char_cnt-- > 0) { 592 if (l>limit || (l == limit && digit > last_digit_limit)) { 593 l = UINT64_MAX; /* Truncate on overflow. */ 594 break; 595 } 596 l = (l * base) + digit; 597 digit = *++p - '0'; 598 } 599 return (l); 600 } 601 602 static uint64_t 603 ar_atol10(const char *p, unsigned char_cnt) 604 { 605 uint64_t l, limit, last_digit_limit; 606 unsigned int base, digit; 607 608 base = 10; 609 limit = UINT64_MAX / base; 610 last_digit_limit = UINT64_MAX % base; 611 612 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 613 p++; 614 l = 0; 615 digit = *p - '0'; 616 while (*p >= '0' && digit < base && char_cnt-- > 0) { 617 if (l > limit || (l == limit && digit > last_digit_limit)) { 618 l = UINT64_MAX; /* Truncate on overflow. */ 619 break; 620 } 621 l = (l * base) + digit; 622 digit = *++p - '0'; 623 } 624 return (l); 625 } 626