1 /*- 2 * Copyright (c) 2007 Kai Wang 3 * Copyright (c) 2007 Tim Kientzle 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "archive_platform.h" 29 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_ar.c 201101 2009-12-28 03:06:27Z kientzle $"); 30 31 #ifdef HAVE_SYS_STAT_H 32 #include <sys/stat.h> 33 #endif 34 #ifdef HAVE_ERRNO_H 35 #include <errno.h> 36 #endif 37 #ifdef HAVE_STDLIB_H 38 #include <stdlib.h> 39 #endif 40 #ifdef HAVE_STRING_H 41 #include <string.h> 42 #endif 43 #ifdef HAVE_LIMITS_H 44 #include <limits.h> 45 #endif 46 47 #include "archive.h" 48 #include "archive_entry.h" 49 #include "archive_private.h" 50 #include "archive_read_private.h" 51 52 struct ar { 53 off_t entry_bytes_remaining; 54 off_t entry_offset; 55 off_t entry_padding; 56 char *strtab; 57 size_t strtab_size; 58 }; 59 60 /* 61 * Define structure of the "ar" header. 62 */ 63 #define AR_name_offset 0 64 #define AR_name_size 16 65 #define AR_date_offset 16 66 #define AR_date_size 12 67 #define AR_uid_offset 28 68 #define AR_uid_size 6 69 #define AR_gid_offset 34 70 #define AR_gid_size 6 71 #define AR_mode_offset 40 72 #define AR_mode_size 8 73 #define AR_size_offset 48 74 #define AR_size_size 10 75 #define AR_fmag_offset 58 76 #define AR_fmag_size 2 77 78 static int archive_read_format_ar_bid(struct archive_read *a); 79 static int archive_read_format_ar_cleanup(struct archive_read *a); 80 static int archive_read_format_ar_read_data(struct archive_read *a, 81 const void **buff, size_t *size, off_t *offset); 82 static int archive_read_format_ar_skip(struct archive_read *a); 83 static int archive_read_format_ar_read_header(struct archive_read *a, 84 struct archive_entry *e); 85 static uint64_t ar_atol8(const char *p, unsigned char_cnt); 86 static uint64_t ar_atol10(const char *p, unsigned char_cnt); 87 static int ar_parse_gnu_filename_table(struct archive_read *a); 88 static int ar_parse_common_header(struct ar *ar, struct archive_entry *, 89 const char *h); 90 91 int 92 archive_read_support_format_ar(struct archive *_a) 93 { 94 struct archive_read *a = (struct archive_read *)_a; 95 struct ar *ar; 96 int r; 97 98 ar = (struct ar *)malloc(sizeof(*ar)); 99 if (ar == NULL) { 100 archive_set_error(&a->archive, ENOMEM, 101 "Can't allocate ar data"); 102 return (ARCHIVE_FATAL); 103 } 104 memset(ar, 0, sizeof(*ar)); 105 ar->strtab = NULL; 106 107 r = __archive_read_register_format(a, 108 ar, 109 "ar", 110 archive_read_format_ar_bid, 111 NULL, 112 archive_read_format_ar_read_header, 113 archive_read_format_ar_read_data, 114 archive_read_format_ar_skip, 115 archive_read_format_ar_cleanup); 116 117 if (r != ARCHIVE_OK) { 118 free(ar); 119 return (r); 120 } 121 return (ARCHIVE_OK); 122 } 123 124 static int 125 archive_read_format_ar_cleanup(struct archive_read *a) 126 { 127 struct ar *ar; 128 129 ar = (struct ar *)(a->format->data); 130 if (ar->strtab) 131 free(ar->strtab); 132 free(ar); 133 (a->format->data) = NULL; 134 return (ARCHIVE_OK); 135 } 136 137 static int 138 archive_read_format_ar_bid(struct archive_read *a) 139 { 140 const void *h; 141 142 if (a->archive.archive_format != 0 && 143 (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) != 144 ARCHIVE_FORMAT_AR) 145 return(0); 146 147 /* 148 * Verify the 8-byte file signature. 149 * TODO: Do we need to check more than this? 150 */ 151 if ((h = __archive_read_ahead(a, 8, NULL)) == NULL) 152 return (-1); 153 if (strncmp((const char*)h, "!<arch>\n", 8) == 0) { 154 return (64); 155 } 156 return (-1); 157 } 158 159 static int 160 archive_read_format_ar_read_header(struct archive_read *a, 161 struct archive_entry *entry) 162 { 163 char filename[AR_name_size + 1]; 164 struct ar *ar; 165 uint64_t number; /* Used to hold parsed numbers before validation. */ 166 ssize_t bytes_read; 167 size_t bsd_name_length, entry_size; 168 char *p, *st; 169 const void *b; 170 const char *h; 171 int r; 172 173 ar = (struct ar*)(a->format->data); 174 175 if (a->archive.file_position == 0) { 176 /* 177 * We are now at the beginning of the archive, 178 * so we need first consume the ar global header. 179 */ 180 __archive_read_consume(a, 8); 181 /* Set a default format code for now. */ 182 a->archive.archive_format = ARCHIVE_FORMAT_AR; 183 } 184 185 /* Read the header for the next file entry. */ 186 if ((b = __archive_read_ahead(a, 60, &bytes_read)) == NULL) 187 /* Broken header. */ 188 return (ARCHIVE_EOF); 189 __archive_read_consume(a, 60); 190 h = (const char *)b; 191 192 /* Verify the magic signature on the file header. */ 193 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) { 194 archive_set_error(&a->archive, EINVAL, 195 "Incorrect file header signature"); 196 return (ARCHIVE_WARN); 197 } 198 199 /* Copy filename into work buffer. */ 200 strncpy(filename, h + AR_name_offset, AR_name_size); 201 filename[AR_name_size] = '\0'; 202 203 /* 204 * Guess the format variant based on the filename. 205 */ 206 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) { 207 /* We don't already know the variant, so let's guess. */ 208 /* 209 * Biggest clue is presence of '/': GNU starts special 210 * filenames with '/', appends '/' as terminator to 211 * non-special names, so anything with '/' should be 212 * GNU except for BSD long filenames. 213 */ 214 if (strncmp(filename, "#1/", 3) == 0) 215 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 216 else if (strchr(filename, '/') != NULL) 217 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU; 218 else if (strncmp(filename, "__.SYMDEF", 9) == 0) 219 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 220 /* 221 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/' 222 * if name exactly fills 16-byte field? If so, we 223 * can't assume entries without '/' are BSD. XXX 224 */ 225 } 226 227 /* Update format name from the code. */ 228 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU) 229 a->archive.archive_format_name = "ar (GNU/SVR4)"; 230 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD) 231 a->archive.archive_format_name = "ar (BSD)"; 232 else 233 a->archive.archive_format_name = "ar"; 234 235 /* 236 * Remove trailing spaces from the filename. GNU and BSD 237 * variants both pad filename area out with spaces. 238 * This will only be wrong if GNU/SVR4 'ar' implementations 239 * omit trailing '/' for 16-char filenames and we have 240 * a 16-char filename that ends in ' '. 241 */ 242 p = filename + AR_name_size - 1; 243 while (p >= filename && *p == ' ') { 244 *p = '\0'; 245 p--; 246 } 247 248 /* 249 * Remove trailing slash unless first character is '/'. 250 * (BSD entries never end in '/', so this will only trim 251 * GNU-format entries. GNU special entries start with '/' 252 * and are not terminated in '/', so we don't trim anything 253 * that starts with '/'.) 254 */ 255 if (filename[0] != '/' && *p == '/') 256 *p = '\0'; 257 258 /* 259 * '//' is the GNU filename table. 260 * Later entries can refer to names in this table. 261 */ 262 if (strcmp(filename, "//") == 0) { 263 /* This must come before any call to _read_ahead. */ 264 ar_parse_common_header(ar, entry, h); 265 archive_entry_copy_pathname(entry, filename); 266 archive_entry_set_filetype(entry, AE_IFREG); 267 /* Get the size of the filename table. */ 268 number = ar_atol10(h + AR_size_offset, AR_size_size); 269 if (number > SIZE_MAX) { 270 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 271 "Filename table too large"); 272 return (ARCHIVE_FATAL); 273 } 274 entry_size = (size_t)number; 275 if (entry_size == 0) { 276 archive_set_error(&a->archive, EINVAL, 277 "Invalid string table"); 278 return (ARCHIVE_WARN); 279 } 280 if (ar->strtab != NULL) { 281 archive_set_error(&a->archive, EINVAL, 282 "More than one string tables exist"); 283 return (ARCHIVE_WARN); 284 } 285 286 /* Read the filename table into memory. */ 287 st = malloc(entry_size); 288 if (st == NULL) { 289 archive_set_error(&a->archive, ENOMEM, 290 "Can't allocate filename table buffer"); 291 return (ARCHIVE_FATAL); 292 } 293 ar->strtab = st; 294 ar->strtab_size = entry_size; 295 if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL) 296 return (ARCHIVE_FATAL); 297 memcpy(st, b, entry_size); 298 __archive_read_consume(a, entry_size); 299 /* All contents are consumed. */ 300 ar->entry_bytes_remaining = 0; 301 archive_entry_set_size(entry, ar->entry_bytes_remaining); 302 303 /* Parse the filename table. */ 304 return (ar_parse_gnu_filename_table(a)); 305 } 306 307 /* 308 * GNU variant handles long filenames by storing /<number> 309 * to indicate a name stored in the filename table. 310 * XXX TODO: Verify that it's all digits... Don't be fooled 311 * by "/9xyz" XXX 312 */ 313 if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') { 314 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1); 315 /* 316 * If we can't look up the real name, warn and return 317 * the entry with the wrong name. 318 */ 319 if (ar->strtab == NULL || number > ar->strtab_size) { 320 archive_set_error(&a->archive, EINVAL, 321 "Can't find long filename for entry"); 322 archive_entry_copy_pathname(entry, filename); 323 /* Parse the time, owner, mode, size fields. */ 324 ar_parse_common_header(ar, entry, h); 325 return (ARCHIVE_WARN); 326 } 327 328 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]); 329 /* Parse the time, owner, mode, size fields. */ 330 return (ar_parse_common_header(ar, entry, h)); 331 } 332 333 /* 334 * BSD handles long filenames by storing "#1/" followed by the 335 * length of filename as a decimal number, then prepends the 336 * the filename to the file contents. 337 */ 338 if (strncmp(filename, "#1/", 3) == 0) { 339 /* Parse the time, owner, mode, size fields. */ 340 /* This must occur before _read_ahead is called again. */ 341 ar_parse_common_header(ar, entry, h); 342 343 /* Parse the size of the name, adjust the file size. */ 344 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3); 345 bsd_name_length = (size_t)number; 346 /* Guard against the filename + trailing NUL 347 * overflowing a size_t and against the filename size 348 * being larger than the entire entry. */ 349 if (number > (uint64_t)(bsd_name_length + 1) 350 || (off_t)bsd_name_length > ar->entry_bytes_remaining) { 351 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 352 "Bad input file size"); 353 return (ARCHIVE_FATAL); 354 } 355 ar->entry_bytes_remaining -= bsd_name_length; 356 /* Adjust file size reported to client. */ 357 archive_entry_set_size(entry, ar->entry_bytes_remaining); 358 359 /* Read the long name into memory. */ 360 if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) { 361 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 362 "Truncated input file"); 363 return (ARCHIVE_FATAL); 364 } 365 __archive_read_consume(a, bsd_name_length); 366 367 /* Store it in the entry. */ 368 p = (char *)malloc(bsd_name_length + 1); 369 if (p == NULL) { 370 archive_set_error(&a->archive, ENOMEM, 371 "Can't allocate fname buffer"); 372 return (ARCHIVE_FATAL); 373 } 374 strncpy(p, b, bsd_name_length); 375 p[bsd_name_length] = '\0'; 376 archive_entry_copy_pathname(entry, p); 377 free(p); 378 return (ARCHIVE_OK); 379 } 380 381 /* 382 * "/" is the SVR4/GNU archive symbol table. 383 */ 384 if (strcmp(filename, "/") == 0) { 385 archive_entry_copy_pathname(entry, "/"); 386 /* Parse the time, owner, mode, size fields. */ 387 r = ar_parse_common_header(ar, entry, h); 388 /* Force the file type to a regular file. */ 389 archive_entry_set_filetype(entry, AE_IFREG); 390 return (r); 391 } 392 393 /* 394 * "__.SYMDEF" is a BSD archive symbol table. 395 */ 396 if (strcmp(filename, "__.SYMDEF") == 0) { 397 archive_entry_copy_pathname(entry, filename); 398 /* Parse the time, owner, mode, size fields. */ 399 return (ar_parse_common_header(ar, entry, h)); 400 } 401 402 /* 403 * Otherwise, this is a standard entry. The filename 404 * has already been trimmed as much as possible, based 405 * on our current knowledge of the format. 406 */ 407 archive_entry_copy_pathname(entry, filename); 408 return (ar_parse_common_header(ar, entry, h)); 409 } 410 411 static int 412 ar_parse_common_header(struct ar *ar, struct archive_entry *entry, 413 const char *h) 414 { 415 uint64_t n; 416 417 /* Copy remaining header */ 418 archive_entry_set_mtime(entry, 419 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L); 420 archive_entry_set_uid(entry, 421 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size)); 422 archive_entry_set_gid(entry, 423 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size)); 424 archive_entry_set_mode(entry, 425 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size)); 426 n = ar_atol10(h + AR_size_offset, AR_size_size); 427 428 ar->entry_offset = 0; 429 ar->entry_padding = n % 2; 430 archive_entry_set_size(entry, n); 431 ar->entry_bytes_remaining = n; 432 return (ARCHIVE_OK); 433 } 434 435 static int 436 archive_read_format_ar_read_data(struct archive_read *a, 437 const void **buff, size_t *size, off_t *offset) 438 { 439 ssize_t bytes_read; 440 struct ar *ar; 441 442 ar = (struct ar *)(a->format->data); 443 444 if (ar->entry_bytes_remaining > 0) { 445 *buff = __archive_read_ahead(a, 1, &bytes_read); 446 if (bytes_read == 0) { 447 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 448 "Truncated ar archive"); 449 return (ARCHIVE_FATAL); 450 } 451 if (bytes_read < 0) 452 return (ARCHIVE_FATAL); 453 if (bytes_read > ar->entry_bytes_remaining) 454 bytes_read = (ssize_t)ar->entry_bytes_remaining; 455 *size = bytes_read; 456 *offset = ar->entry_offset; 457 ar->entry_offset += bytes_read; 458 ar->entry_bytes_remaining -= bytes_read; 459 __archive_read_consume(a, (size_t)bytes_read); 460 return (ARCHIVE_OK); 461 } else { 462 while (ar->entry_padding > 0) { 463 *buff = __archive_read_ahead(a, 1, &bytes_read); 464 if (bytes_read <= 0) 465 return (ARCHIVE_FATAL); 466 if (bytes_read > ar->entry_padding) 467 bytes_read = (ssize_t)ar->entry_padding; 468 __archive_read_consume(a, (size_t)bytes_read); 469 ar->entry_padding -= bytes_read; 470 } 471 *buff = NULL; 472 *size = 0; 473 *offset = ar->entry_offset; 474 return (ARCHIVE_EOF); 475 } 476 } 477 478 static int 479 archive_read_format_ar_skip(struct archive_read *a) 480 { 481 off_t bytes_skipped; 482 struct ar* ar; 483 484 ar = (struct ar *)(a->format->data); 485 486 bytes_skipped = __archive_read_skip(a, 487 ar->entry_bytes_remaining + ar->entry_padding); 488 if (bytes_skipped < 0) 489 return (ARCHIVE_FATAL); 490 491 ar->entry_bytes_remaining = 0; 492 ar->entry_padding = 0; 493 494 return (ARCHIVE_OK); 495 } 496 497 static int 498 ar_parse_gnu_filename_table(struct archive_read *a) 499 { 500 struct ar *ar; 501 char *p; 502 size_t size; 503 504 ar = (struct ar*)(a->format->data); 505 size = ar->strtab_size; 506 507 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) { 508 if (*p == '/') { 509 *p++ = '\0'; 510 if (*p != '\n') 511 goto bad_string_table; 512 *p = '\0'; 513 } 514 } 515 /* 516 * GNU ar always pads the table to an even size. 517 * The pad character is either '\n' or '`'. 518 */ 519 if (p != ar->strtab + size && *p != '\n' && *p != '`') 520 goto bad_string_table; 521 522 /* Enforce zero termination. */ 523 ar->strtab[size - 1] = '\0'; 524 525 return (ARCHIVE_OK); 526 527 bad_string_table: 528 archive_set_error(&a->archive, EINVAL, 529 "Invalid string table"); 530 free(ar->strtab); 531 ar->strtab = NULL; 532 return (ARCHIVE_WARN); 533 } 534 535 static uint64_t 536 ar_atol8(const char *p, unsigned char_cnt) 537 { 538 uint64_t l, limit, last_digit_limit; 539 unsigned int digit, base; 540 541 base = 8; 542 limit = UINT64_MAX / base; 543 last_digit_limit = UINT64_MAX % base; 544 545 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 546 p++; 547 548 l = 0; 549 digit = *p - '0'; 550 while (*p >= '0' && digit < base && char_cnt-- > 0) { 551 if (l>limit || (l == limit && digit > last_digit_limit)) { 552 l = UINT64_MAX; /* Truncate on overflow. */ 553 break; 554 } 555 l = (l * base) + digit; 556 digit = *++p - '0'; 557 } 558 return (l); 559 } 560 561 static uint64_t 562 ar_atol10(const char *p, unsigned char_cnt) 563 { 564 uint64_t l, limit, last_digit_limit; 565 unsigned int base, digit; 566 567 base = 10; 568 limit = UINT64_MAX / base; 569 last_digit_limit = UINT64_MAX % base; 570 571 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 572 p++; 573 l = 0; 574 digit = *p - '0'; 575 while (*p >= '0' && digit < base && char_cnt-- > 0) { 576 if (l > limit || (l == limit && digit > last_digit_limit)) { 577 l = UINT64_MAX; /* Truncate on overflow. */ 578 break; 579 } 580 l = (l * base) + digit; 581 digit = *++p - '0'; 582 } 583 return (l); 584 } 585