1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * Copyright (c) 2011-2012 Michihiro NAKAJIMA 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "archive_platform.h" 28 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_tar.c 201161 2009-12-29 05:44:39Z kientzle $"); 29 30 #ifdef HAVE_ERRNO_H 31 #include <errno.h> 32 #endif 33 #include <stddef.h> 34 #ifdef HAVE_STDLIB_H 35 #include <stdlib.h> 36 #endif 37 #ifdef HAVE_STRING_H 38 #include <string.h> 39 #endif 40 41 #include "archive.h" 42 #include "archive_acl_private.h" /* For ACL parsing routines. */ 43 #include "archive_entry.h" 44 #include "archive_entry_locale.h" 45 #include "archive_private.h" 46 #include "archive_read_private.h" 47 48 #define tar_min(a,b) ((a) < (b) ? (a) : (b)) 49 50 /* 51 * Layout of POSIX 'ustar' tar header. 52 */ 53 struct archive_entry_header_ustar { 54 char name[100]; 55 char mode[8]; 56 char uid[8]; 57 char gid[8]; 58 char size[12]; 59 char mtime[12]; 60 char checksum[8]; 61 char typeflag[1]; 62 char linkname[100]; /* "old format" header ends here */ 63 char magic[6]; /* For POSIX: "ustar\0" */ 64 char version[2]; /* For POSIX: "00" */ 65 char uname[32]; 66 char gname[32]; 67 char rdevmajor[8]; 68 char rdevminor[8]; 69 char prefix[155]; 70 }; 71 72 /* 73 * Structure of GNU tar header 74 */ 75 struct gnu_sparse { 76 char offset[12]; 77 char numbytes[12]; 78 }; 79 80 struct archive_entry_header_gnutar { 81 char name[100]; 82 char mode[8]; 83 char uid[8]; 84 char gid[8]; 85 char size[12]; 86 char mtime[12]; 87 char checksum[8]; 88 char typeflag[1]; 89 char linkname[100]; 90 char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ 91 char uname[32]; 92 char gname[32]; 93 char rdevmajor[8]; 94 char rdevminor[8]; 95 char atime[12]; 96 char ctime[12]; 97 char offset[12]; 98 char longnames[4]; 99 char unused[1]; 100 struct gnu_sparse sparse[4]; 101 char isextended[1]; 102 char realsize[12]; 103 /* 104 * Old GNU format doesn't use POSIX 'prefix' field; they use 105 * the 'L' (longname) entry instead. 106 */ 107 }; 108 109 /* 110 * Data specific to this format. 111 */ 112 struct sparse_block { 113 struct sparse_block *next; 114 int64_t offset; 115 int64_t remaining; 116 int hole; 117 }; 118 119 struct tar { 120 struct archive_string acl_text; 121 struct archive_string entry_pathname; 122 /* For "GNU.sparse.name" and other similar path extensions. */ 123 struct archive_string entry_pathname_override; 124 struct archive_string entry_linkpath; 125 struct archive_string entry_uname; 126 struct archive_string entry_gname; 127 struct archive_string longlink; 128 struct archive_string longname; 129 struct archive_string pax_header; 130 struct archive_string pax_global; 131 struct archive_string line; 132 int pax_hdrcharset_binary; 133 int header_recursion_depth; 134 int64_t entry_bytes_remaining; 135 int64_t entry_offset; 136 int64_t entry_padding; 137 int64_t entry_bytes_unconsumed; 138 int64_t realsize; 139 struct sparse_block *sparse_list; 140 struct sparse_block *sparse_last; 141 int64_t sparse_offset; 142 int64_t sparse_numbytes; 143 int sparse_gnu_major; 144 int sparse_gnu_minor; 145 char sparse_gnu_pending; 146 147 struct archive_string localname; 148 struct archive_string_conv *opt_sconv; 149 struct archive_string_conv *sconv; 150 struct archive_string_conv *sconv_acl; 151 struct archive_string_conv *sconv_default; 152 int init_default_conversion; 153 int compat_2x; 154 }; 155 156 static int archive_block_is_null(const char *p); 157 static char *base64_decode(const char *, size_t, size_t *); 158 static int gnu_add_sparse_entry(struct archive_read *, struct tar *, 159 int64_t offset, int64_t remaining); 160 161 static void gnu_clear_sparse_list(struct tar *); 162 static int gnu_sparse_old_read(struct archive_read *, struct tar *, 163 const struct archive_entry_header_gnutar *header, size_t *); 164 static int gnu_sparse_old_parse(struct archive_read *, struct tar *, 165 const struct gnu_sparse *sparse, int length); 166 static int gnu_sparse_01_parse(struct archive_read *, struct tar *, 167 const char *); 168 static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *, 169 size_t *); 170 static int header_Solaris_ACL(struct archive_read *, struct tar *, 171 struct archive_entry *, const void *, size_t *); 172 static int header_common(struct archive_read *, struct tar *, 173 struct archive_entry *, const void *); 174 static int header_old_tar(struct archive_read *, struct tar *, 175 struct archive_entry *, const void *); 176 static int header_pax_extensions(struct archive_read *, struct tar *, 177 struct archive_entry *, const void *, size_t *); 178 static int header_pax_global(struct archive_read *, struct tar *, 179 struct archive_entry *, const void *h, size_t *); 180 static int header_longlink(struct archive_read *, struct tar *, 181 struct archive_entry *, const void *h, size_t *); 182 static int header_longname(struct archive_read *, struct tar *, 183 struct archive_entry *, const void *h, size_t *); 184 static int read_mac_metadata_blob(struct archive_read *, struct tar *, 185 struct archive_entry *, const void *h, size_t *); 186 static int header_volume(struct archive_read *, struct tar *, 187 struct archive_entry *, const void *h, size_t *); 188 static int header_ustar(struct archive_read *, struct tar *, 189 struct archive_entry *, const void *h); 190 static int header_gnutar(struct archive_read *, struct tar *, 191 struct archive_entry *, const void *h, size_t *); 192 static int archive_read_format_tar_bid(struct archive_read *, int); 193 static int archive_read_format_tar_options(struct archive_read *, 194 const char *, const char *); 195 static int archive_read_format_tar_cleanup(struct archive_read *); 196 static int archive_read_format_tar_read_data(struct archive_read *a, 197 const void **buff, size_t *size, int64_t *offset); 198 static int archive_read_format_tar_skip(struct archive_read *a); 199 static int archive_read_format_tar_read_header(struct archive_read *, 200 struct archive_entry *); 201 static int checksum(struct archive_read *, const void *); 202 static int pax_attribute(struct archive_read *, struct tar *, 203 struct archive_entry *, char *key, char *value); 204 static int pax_header(struct archive_read *, struct tar *, 205 struct archive_entry *, char *attr); 206 static void pax_time(const char *, int64_t *sec, long *nanos); 207 static ssize_t readline(struct archive_read *, struct tar *, const char **, 208 ssize_t limit, size_t *); 209 static int read_body_to_string(struct archive_read *, struct tar *, 210 struct archive_string *, const void *h, size_t *); 211 static int solaris_sparse_parse(struct archive_read *, struct tar *, 212 struct archive_entry *, const char *); 213 static int64_t tar_atol(const char *, unsigned); 214 static int64_t tar_atol10(const char *, unsigned); 215 static int64_t tar_atol256(const char *, unsigned); 216 static int64_t tar_atol8(const char *, unsigned); 217 static int tar_read_header(struct archive_read *, struct tar *, 218 struct archive_entry *, size_t *); 219 static int tohex(int c); 220 static char *url_decode(const char *); 221 static void tar_flush_unconsumed(struct archive_read *, size_t *); 222 223 224 int 225 archive_read_support_format_gnutar(struct archive *a) 226 { 227 archive_check_magic(a, ARCHIVE_READ_MAGIC, 228 ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar"); 229 return (archive_read_support_format_tar(a)); 230 } 231 232 233 int 234 archive_read_support_format_tar(struct archive *_a) 235 { 236 struct archive_read *a = (struct archive_read *)_a; 237 struct tar *tar; 238 int r; 239 240 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 241 ARCHIVE_STATE_NEW, "archive_read_support_format_tar"); 242 243 tar = (struct tar *)calloc(1, sizeof(*tar)); 244 if (tar == NULL) { 245 archive_set_error(&a->archive, ENOMEM, 246 "Can't allocate tar data"); 247 return (ARCHIVE_FATAL); 248 } 249 250 r = __archive_read_register_format(a, tar, "tar", 251 archive_read_format_tar_bid, 252 archive_read_format_tar_options, 253 archive_read_format_tar_read_header, 254 archive_read_format_tar_read_data, 255 archive_read_format_tar_skip, 256 archive_read_format_tar_cleanup); 257 258 if (r != ARCHIVE_OK) 259 free(tar); 260 return (ARCHIVE_OK); 261 } 262 263 static int 264 archive_read_format_tar_cleanup(struct archive_read *a) 265 { 266 struct tar *tar; 267 268 tar = (struct tar *)(a->format->data); 269 gnu_clear_sparse_list(tar); 270 archive_string_free(&tar->acl_text); 271 archive_string_free(&tar->entry_pathname); 272 archive_string_free(&tar->entry_pathname_override); 273 archive_string_free(&tar->entry_linkpath); 274 archive_string_free(&tar->entry_uname); 275 archive_string_free(&tar->entry_gname); 276 archive_string_free(&tar->line); 277 archive_string_free(&tar->pax_global); 278 archive_string_free(&tar->pax_header); 279 archive_string_free(&tar->longname); 280 archive_string_free(&tar->longlink); 281 archive_string_free(&tar->localname); 282 free(tar); 283 (a->format->data) = NULL; 284 return (ARCHIVE_OK); 285 } 286 287 288 static int 289 archive_read_format_tar_bid(struct archive_read *a, int best_bid) 290 { 291 int bid; 292 const char *h; 293 const struct archive_entry_header_ustar *header; 294 295 (void)best_bid; /* UNUSED */ 296 297 bid = 0; 298 299 /* Now let's look at the actual header and see if it matches. */ 300 h = __archive_read_ahead(a, 512, NULL); 301 if (h == NULL) 302 return (-1); 303 304 /* If it's an end-of-archive mark, we can handle it. */ 305 if (h[0] == 0 && archive_block_is_null(h)) { 306 /* 307 * Usually, I bid the number of bits verified, but 308 * in this case, 4096 seems excessive so I picked 10 as 309 * an arbitrary but reasonable-seeming value. 310 */ 311 return (10); 312 } 313 314 /* If it's not an end-of-archive mark, it must have a valid checksum.*/ 315 if (!checksum(a, h)) 316 return (0); 317 bid += 48; /* Checksum is usually 6 octal digits. */ 318 319 header = (const struct archive_entry_header_ustar *)h; 320 321 /* Recognize POSIX formats. */ 322 if ((memcmp(header->magic, "ustar\0", 6) == 0) 323 && (memcmp(header->version, "00", 2) == 0)) 324 bid += 56; 325 326 /* Recognize GNU tar format. */ 327 if ((memcmp(header->magic, "ustar ", 6) == 0) 328 && (memcmp(header->version, " \0", 2) == 0)) 329 bid += 56; 330 331 /* Type flag must be null, digit or A-Z, a-z. */ 332 if (header->typeflag[0] != 0 && 333 !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && 334 !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && 335 !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) 336 return (0); 337 bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ 338 339 /* Sanity check: Look at first byte of mode field. */ 340 switch (255 & (unsigned)header->mode[0]) { 341 case 0: case 255: 342 /* Base-256 value: No further verification possible! */ 343 break; 344 case ' ': /* Not recommended, but not illegal, either. */ 345 break; 346 case '0': case '1': case '2': case '3': 347 case '4': case '5': case '6': case '7': 348 /* Octal Value. */ 349 /* TODO: Check format of remainder of this field. */ 350 break; 351 default: 352 /* Not a valid mode; bail out here. */ 353 return (0); 354 } 355 /* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */ 356 357 return (bid); 358 } 359 360 static int 361 archive_read_format_tar_options(struct archive_read *a, 362 const char *key, const char *val) 363 { 364 struct tar *tar; 365 int ret = ARCHIVE_FAILED; 366 367 tar = (struct tar *)(a->format->data); 368 if (strcmp(key, "compat-2x") == 0) { 369 /* Handle UTF-8 filnames as libarchive 2.x */ 370 tar->compat_2x = (val != NULL)?1:0; 371 tar->init_default_conversion = tar->compat_2x; 372 return (ARCHIVE_OK); 373 } else if (strcmp(key, "hdrcharset") == 0) { 374 if (val == NULL || val[0] == 0) 375 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 376 "tar: hdrcharset option needs a character-set name"); 377 else { 378 tar->opt_sconv = 379 archive_string_conversion_from_charset( 380 &a->archive, val, 0); 381 if (tar->opt_sconv != NULL) 382 ret = ARCHIVE_OK; 383 else 384 ret = ARCHIVE_FATAL; 385 } 386 return (ret); 387 } 388 389 /* Note: The "warn" return is just to inform the options 390 * supervisor that we didn't handle it. It will generate 391 * a suitable error if no one used this option. */ 392 return (ARCHIVE_WARN); 393 } 394 395 /* utility function- this exists to centralize the logic of tracking 396 * how much unconsumed data we have floating around, and to consume 397 * anything outstanding since we're going to do read_aheads 398 */ 399 static void 400 tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed) 401 { 402 if (*unconsumed) { 403 /* 404 void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL); 405 * this block of code is to poison claimed unconsumed space, ensuring 406 * things break if it is in use still. 407 * currently it WILL break things, so enable it only for debugging this issue 408 if (data) { 409 memset(data, 0xff, *unconsumed); 410 } 411 */ 412 __archive_read_consume(a, *unconsumed); 413 *unconsumed = 0; 414 } 415 } 416 417 /* 418 * The function invoked by archive_read_next_header(). This 419 * just sets up a few things and then calls the internal 420 * tar_read_header() function below. 421 */ 422 static int 423 archive_read_format_tar_read_header(struct archive_read *a, 424 struct archive_entry *entry) 425 { 426 /* 427 * When converting tar archives to cpio archives, it is 428 * essential that each distinct file have a distinct inode 429 * number. To simplify this, we keep a static count here to 430 * assign fake dev/inode numbers to each tar entry. Note that 431 * pax format archives may overwrite this with something more 432 * useful. 433 * 434 * Ideally, we would track every file read from the archive so 435 * that we could assign the same dev/ino pair to hardlinks, 436 * but the memory required to store a complete lookup table is 437 * probably not worthwhile just to support the relatively 438 * obscure tar->cpio conversion case. 439 */ 440 static int default_inode; 441 static int default_dev; 442 struct tar *tar; 443 const char *p; 444 int r; 445 size_t l, unconsumed = 0; 446 447 /* Assign default device/inode values. */ 448 archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */ 449 archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */ 450 /* Limit generated st_ino number to 16 bits. */ 451 if (default_inode >= 0xffff) { 452 ++default_dev; 453 default_inode = 0; 454 } 455 456 tar = (struct tar *)(a->format->data); 457 tar->entry_offset = 0; 458 gnu_clear_sparse_list(tar); 459 tar->realsize = -1; /* Mark this as "unset" */ 460 461 /* Setup default string conversion. */ 462 tar->sconv = tar->opt_sconv; 463 if (tar->sconv == NULL) { 464 if (!tar->init_default_conversion) { 465 tar->sconv_default = 466 archive_string_default_conversion_for_read(&(a->archive)); 467 tar->init_default_conversion = 1; 468 } 469 tar->sconv = tar->sconv_default; 470 } 471 472 r = tar_read_header(a, tar, entry, &unconsumed); 473 474 tar_flush_unconsumed(a, &unconsumed); 475 476 /* 477 * "non-sparse" files are really just sparse files with 478 * a single block. 479 */ 480 if (tar->sparse_list == NULL) { 481 if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining) 482 != ARCHIVE_OK) 483 return (ARCHIVE_FATAL); 484 } else { 485 struct sparse_block *sb; 486 487 for (sb = tar->sparse_list; sb != NULL; sb = sb->next) { 488 if (!sb->hole) 489 archive_entry_sparse_add_entry(entry, 490 sb->offset, sb->remaining); 491 } 492 } 493 494 if (r == ARCHIVE_OK) { 495 /* 496 * "Regular" entry with trailing '/' is really 497 * directory: This is needed for certain old tar 498 * variants and even for some broken newer ones. 499 */ 500 const wchar_t *wp; 501 wp = archive_entry_pathname_w(entry); 502 if (wp != NULL) { 503 l = wcslen(wp); 504 if (archive_entry_filetype(entry) == AE_IFREG 505 && wp[l-1] == L'/') 506 archive_entry_set_filetype(entry, AE_IFDIR); 507 } else { 508 p = archive_entry_pathname(entry); 509 if (p == NULL) 510 return (ARCHIVE_FAILED); 511 l = strlen(p); 512 if (archive_entry_filetype(entry) == AE_IFREG 513 && p[l-1] == '/') 514 archive_entry_set_filetype(entry, AE_IFDIR); 515 } 516 } 517 return (r); 518 } 519 520 static int 521 archive_read_format_tar_read_data(struct archive_read *a, 522 const void **buff, size_t *size, int64_t *offset) 523 { 524 ssize_t bytes_read; 525 struct tar *tar; 526 struct sparse_block *p; 527 528 tar = (struct tar *)(a->format->data); 529 530 for (;;) { 531 /* Remove exhausted entries from sparse list. */ 532 while (tar->sparse_list != NULL && 533 tar->sparse_list->remaining == 0) { 534 p = tar->sparse_list; 535 tar->sparse_list = p->next; 536 free(p); 537 } 538 539 if (tar->entry_bytes_unconsumed) { 540 __archive_read_consume(a, tar->entry_bytes_unconsumed); 541 tar->entry_bytes_unconsumed = 0; 542 } 543 544 /* If we're at end of file, return EOF. */ 545 if (tar->sparse_list == NULL || 546 tar->entry_bytes_remaining == 0) { 547 if (__archive_read_consume(a, tar->entry_padding) < 0) 548 return (ARCHIVE_FATAL); 549 tar->entry_padding = 0; 550 *buff = NULL; 551 *size = 0; 552 *offset = tar->realsize; 553 return (ARCHIVE_EOF); 554 } 555 556 *buff = __archive_read_ahead(a, 1, &bytes_read); 557 if (bytes_read < 0) 558 return (ARCHIVE_FATAL); 559 if (*buff == NULL) { 560 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 561 "Truncated tar archive"); 562 return (ARCHIVE_FATAL); 563 } 564 if (bytes_read > tar->entry_bytes_remaining) 565 bytes_read = (ssize_t)tar->entry_bytes_remaining; 566 /* Don't read more than is available in the 567 * current sparse block. */ 568 if (tar->sparse_list->remaining < bytes_read) 569 bytes_read = (ssize_t)tar->sparse_list->remaining; 570 *size = bytes_read; 571 *offset = tar->sparse_list->offset; 572 tar->sparse_list->remaining -= bytes_read; 573 tar->sparse_list->offset += bytes_read; 574 tar->entry_bytes_remaining -= bytes_read; 575 tar->entry_bytes_unconsumed = bytes_read; 576 577 if (!tar->sparse_list->hole) 578 return (ARCHIVE_OK); 579 /* Current is hole data and skip this. */ 580 } 581 } 582 583 static int 584 archive_read_format_tar_skip(struct archive_read *a) 585 { 586 int64_t bytes_skipped; 587 struct tar* tar; 588 589 tar = (struct tar *)(a->format->data); 590 591 bytes_skipped = __archive_read_consume(a, 592 tar->entry_bytes_remaining + tar->entry_padding + 593 tar->entry_bytes_unconsumed); 594 if (bytes_skipped < 0) 595 return (ARCHIVE_FATAL); 596 597 tar->entry_bytes_remaining = 0; 598 tar->entry_bytes_unconsumed = 0; 599 tar->entry_padding = 0; 600 601 /* Free the sparse list. */ 602 gnu_clear_sparse_list(tar); 603 604 return (ARCHIVE_OK); 605 } 606 607 /* 608 * This function recursively interprets all of the headers associated 609 * with a single entry. 610 */ 611 static int 612 tar_read_header(struct archive_read *a, struct tar *tar, 613 struct archive_entry *entry, size_t *unconsumed) 614 { 615 ssize_t bytes; 616 int err; 617 const char *h; 618 const struct archive_entry_header_ustar *header; 619 620 tar_flush_unconsumed(a, unconsumed); 621 622 /* Read 512-byte header record */ 623 h = __archive_read_ahead(a, 512, &bytes); 624 if (bytes < 0) 625 return (bytes); 626 if (bytes == 0) { /* EOF at a block boundary. */ 627 /* Some writers do omit the block of nulls. <sigh> */ 628 return (ARCHIVE_EOF); 629 } 630 if (bytes < 512) { /* Short block at EOF; this is bad. */ 631 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 632 "Truncated tar archive"); 633 return (ARCHIVE_FATAL); 634 } 635 *unconsumed = 512; 636 637 /* Check for end-of-archive mark. */ 638 if (h[0] == 0 && archive_block_is_null(h)) { 639 /* Try to consume a second all-null record, as well. */ 640 tar_flush_unconsumed(a, unconsumed); 641 h = __archive_read_ahead(a, 512, NULL); 642 if (h != NULL) 643 __archive_read_consume(a, 512); 644 archive_clear_error(&a->archive); 645 if (a->archive.archive_format_name == NULL) { 646 a->archive.archive_format = ARCHIVE_FORMAT_TAR; 647 a->archive.archive_format_name = "tar"; 648 } 649 return (ARCHIVE_EOF); 650 } 651 652 /* 653 * Note: If the checksum fails and we return ARCHIVE_RETRY, 654 * then the client is likely to just retry. This is a very 655 * crude way to search for the next valid header! 656 * 657 * TODO: Improve this by implementing a real header scan. 658 */ 659 if (!checksum(a, h)) { 660 tar_flush_unconsumed(a, unconsumed); 661 archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); 662 return (ARCHIVE_RETRY); /* Retryable: Invalid header */ 663 } 664 665 if (++tar->header_recursion_depth > 32) { 666 tar_flush_unconsumed(a, unconsumed); 667 archive_set_error(&a->archive, EINVAL, "Too many special headers"); 668 return (ARCHIVE_WARN); 669 } 670 671 /* Determine the format variant. */ 672 header = (const struct archive_entry_header_ustar *)h; 673 674 switch(header->typeflag[0]) { 675 case 'A': /* Solaris tar ACL */ 676 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 677 a->archive.archive_format_name = "Solaris tar"; 678 err = header_Solaris_ACL(a, tar, entry, h, unconsumed); 679 break; 680 case 'g': /* POSIX-standard 'g' header. */ 681 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 682 a->archive.archive_format_name = "POSIX pax interchange format"; 683 err = header_pax_global(a, tar, entry, h, unconsumed); 684 break; 685 case 'K': /* Long link name (GNU tar, others) */ 686 err = header_longlink(a, tar, entry, h, unconsumed); 687 break; 688 case 'L': /* Long filename (GNU tar, others) */ 689 err = header_longname(a, tar, entry, h, unconsumed); 690 break; 691 case 'V': /* GNU volume header */ 692 err = header_volume(a, tar, entry, h, unconsumed); 693 break; 694 case 'X': /* Used by SUN tar; same as 'x'. */ 695 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 696 a->archive.archive_format_name = 697 "POSIX pax interchange format (Sun variant)"; 698 err = header_pax_extensions(a, tar, entry, h, unconsumed); 699 break; 700 case 'x': /* POSIX-standard 'x' header. */ 701 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 702 a->archive.archive_format_name = "POSIX pax interchange format"; 703 err = header_pax_extensions(a, tar, entry, h, unconsumed); 704 break; 705 default: 706 if (memcmp(header->magic, "ustar \0", 8) == 0) { 707 a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; 708 a->archive.archive_format_name = "GNU tar format"; 709 err = header_gnutar(a, tar, entry, h, unconsumed); 710 } else if (memcmp(header->magic, "ustar", 5) == 0) { 711 if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { 712 a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; 713 a->archive.archive_format_name = "POSIX ustar format"; 714 } 715 err = header_ustar(a, tar, entry, h); 716 } else { 717 a->archive.archive_format = ARCHIVE_FORMAT_TAR; 718 a->archive.archive_format_name = "tar (non-POSIX)"; 719 err = header_old_tar(a, tar, entry, h); 720 } 721 } 722 if (err == ARCHIVE_FATAL) 723 return (err); 724 725 tar_flush_unconsumed(a, unconsumed); 726 727 h = NULL; 728 header = NULL; 729 730 --tar->header_recursion_depth; 731 /* Yuck. Apple's design here ends up storing long pathname 732 * extensions for both the AppleDouble extension entry and the 733 * regular entry. 734 */ 735 /* TODO: Should this be disabled on non-Mac platforms? */ 736 if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) && 737 tar->header_recursion_depth == 0) { 738 int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed); 739 if (err2 < err) 740 err = err2; 741 } 742 743 /* We return warnings or success as-is. Anything else is fatal. */ 744 if (err == ARCHIVE_WARN || err == ARCHIVE_OK) { 745 if (tar->sparse_gnu_pending) { 746 if (tar->sparse_gnu_major == 1 && 747 tar->sparse_gnu_minor == 0) { 748 ssize_t bytes_read; 749 750 tar->sparse_gnu_pending = 0; 751 /* Read initial sparse map. */ 752 bytes_read = gnu_sparse_10_read(a, tar, unconsumed); 753 tar->entry_bytes_remaining -= bytes_read; 754 if (bytes_read < 0) 755 return (bytes_read); 756 } else { 757 archive_set_error(&a->archive, 758 ARCHIVE_ERRNO_MISC, 759 "Unrecognized GNU sparse file format"); 760 return (ARCHIVE_WARN); 761 } 762 tar->sparse_gnu_pending = 0; 763 } 764 return (err); 765 } 766 if (err == ARCHIVE_EOF) 767 /* EOF when recursively reading a header is bad. */ 768 archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); 769 return (ARCHIVE_FATAL); 770 } 771 772 /* 773 * Return true if block checksum is correct. 774 */ 775 static int 776 checksum(struct archive_read *a, const void *h) 777 { 778 const unsigned char *bytes; 779 const struct archive_entry_header_ustar *header; 780 int check, i, sum; 781 782 (void)a; /* UNUSED */ 783 bytes = (const unsigned char *)h; 784 header = (const struct archive_entry_header_ustar *)h; 785 786 /* 787 * Test the checksum. Note that POSIX specifies _unsigned_ 788 * bytes for this calculation. 789 */ 790 sum = (int)tar_atol(header->checksum, sizeof(header->checksum)); 791 check = 0; 792 for (i = 0; i < 148; i++) 793 check += (unsigned char)bytes[i]; 794 for (; i < 156; i++) 795 check += 32; 796 for (; i < 512; i++) 797 check += (unsigned char)bytes[i]; 798 if (sum == check) 799 return (1); 800 801 /* 802 * Repeat test with _signed_ bytes, just in case this archive 803 * was created by an old BSD, Solaris, or HP-UX tar with a 804 * broken checksum calculation. 805 */ 806 check = 0; 807 for (i = 0; i < 148; i++) 808 check += (signed char)bytes[i]; 809 for (; i < 156; i++) 810 check += 32; 811 for (; i < 512; i++) 812 check += (signed char)bytes[i]; 813 if (sum == check) 814 return (1); 815 816 return (0); 817 } 818 819 /* 820 * Return true if this block contains only nulls. 821 */ 822 static int 823 archive_block_is_null(const char *p) 824 { 825 unsigned i; 826 827 for (i = 0; i < 512; i++) 828 if (*p++) 829 return (0); 830 return (1); 831 } 832 833 /* 834 * Interpret 'A' Solaris ACL header 835 */ 836 static int 837 header_Solaris_ACL(struct archive_read *a, struct tar *tar, 838 struct archive_entry *entry, const void *h, size_t *unconsumed) 839 { 840 const struct archive_entry_header_ustar *header; 841 size_t size; 842 int err; 843 int64_t type; 844 char *acl, *p; 845 846 /* 847 * read_body_to_string adds a NUL terminator, but we need a little 848 * more to make sure that we don't overrun acl_text later. 849 */ 850 header = (const struct archive_entry_header_ustar *)h; 851 size = (size_t)tar_atol(header->size, sizeof(header->size)); 852 err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed); 853 if (err != ARCHIVE_OK) 854 return (err); 855 856 /* Recursively read next header */ 857 err = tar_read_header(a, tar, entry, unconsumed); 858 if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 859 return (err); 860 861 /* TODO: Examine the first characters to see if this 862 * is an AIX ACL descriptor. We'll likely never support 863 * them, but it would be polite to recognize and warn when 864 * we do see them. */ 865 866 /* Leading octal number indicates ACL type and number of entries. */ 867 p = acl = tar->acl_text.s; 868 type = 0; 869 while (*p != '\0' && p < acl + size) { 870 if (*p < '0' || *p > '7') { 871 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 872 "Malformed Solaris ACL attribute (invalid digit)"); 873 return(ARCHIVE_WARN); 874 } 875 type <<= 3; 876 type += *p - '0'; 877 if (type > 077777777) { 878 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 879 "Malformed Solaris ACL attribute (count too large)"); 880 return (ARCHIVE_WARN); 881 } 882 p++; 883 } 884 switch ((int)type & ~0777777) { 885 case 01000000: 886 /* POSIX.1e ACL */ 887 break; 888 case 03000000: 889 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 890 "Solaris NFSv4 ACLs not supported"); 891 return (ARCHIVE_WARN); 892 default: 893 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 894 "Malformed Solaris ACL attribute (unsupported type %o)", 895 (int)type); 896 return (ARCHIVE_WARN); 897 } 898 p++; 899 900 if (p >= acl + size) { 901 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 902 "Malformed Solaris ACL attribute (body overflow)"); 903 return(ARCHIVE_WARN); 904 } 905 906 /* ACL text is null-terminated; find the end. */ 907 size -= (p - acl); 908 acl = p; 909 910 while (*p != '\0' && p < acl + size) 911 p++; 912 913 if (tar->sconv_acl == NULL) { 914 tar->sconv_acl = archive_string_conversion_from_charset( 915 &(a->archive), "UTF-8", 1); 916 if (tar->sconv_acl == NULL) 917 return (ARCHIVE_FATAL); 918 } 919 archive_strncpy(&(tar->localname), acl, p - acl); 920 err = archive_acl_parse_l(archive_entry_acl(entry), 921 tar->localname.s, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl); 922 if (err != ARCHIVE_OK) { 923 if (errno == ENOMEM) { 924 archive_set_error(&a->archive, ENOMEM, 925 "Can't allocate memory for ACL"); 926 } else 927 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 928 "Malformed Solaris ACL attribute (unparsable)"); 929 } 930 return (err); 931 } 932 933 /* 934 * Interpret 'K' long linkname header. 935 */ 936 static int 937 header_longlink(struct archive_read *a, struct tar *tar, 938 struct archive_entry *entry, const void *h, size_t *unconsumed) 939 { 940 int err; 941 942 err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed); 943 if (err != ARCHIVE_OK) 944 return (err); 945 err = tar_read_header(a, tar, entry, unconsumed); 946 if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 947 return (err); 948 /* Set symlink if symlink already set, else hardlink. */ 949 archive_entry_copy_link(entry, tar->longlink.s); 950 return (ARCHIVE_OK); 951 } 952 953 static int 954 set_conversion_failed_error(struct archive_read *a, 955 struct archive_string_conv *sconv, const char *name) 956 { 957 if (errno == ENOMEM) { 958 archive_set_error(&a->archive, ENOMEM, 959 "Can't allocate memory for %s", name); 960 return (ARCHIVE_FATAL); 961 } 962 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 963 "%s can't be converted from %s to current locale.", 964 name, archive_string_conversion_charset_name(sconv)); 965 return (ARCHIVE_WARN); 966 } 967 968 /* 969 * Interpret 'L' long filename header. 970 */ 971 static int 972 header_longname(struct archive_read *a, struct tar *tar, 973 struct archive_entry *entry, const void *h, size_t *unconsumed) 974 { 975 int err; 976 977 err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed); 978 if (err != ARCHIVE_OK) 979 return (err); 980 /* Read and parse "real" header, then override name. */ 981 err = tar_read_header(a, tar, entry, unconsumed); 982 if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 983 return (err); 984 if (archive_entry_copy_pathname_l(entry, tar->longname.s, 985 archive_strlen(&(tar->longname)), tar->sconv) != 0) 986 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 987 return (err); 988 } 989 990 991 /* 992 * Interpret 'V' GNU tar volume header. 993 */ 994 static int 995 header_volume(struct archive_read *a, struct tar *tar, 996 struct archive_entry *entry, const void *h, size_t *unconsumed) 997 { 998 (void)h; 999 1000 /* Just skip this and read the next header. */ 1001 return (tar_read_header(a, tar, entry, unconsumed)); 1002 } 1003 1004 /* 1005 * Read body of an archive entry into an archive_string object. 1006 */ 1007 static int 1008 read_body_to_string(struct archive_read *a, struct tar *tar, 1009 struct archive_string *as, const void *h, size_t *unconsumed) 1010 { 1011 int64_t size; 1012 const struct archive_entry_header_ustar *header; 1013 const void *src; 1014 1015 (void)tar; /* UNUSED */ 1016 header = (const struct archive_entry_header_ustar *)h; 1017 size = tar_atol(header->size, sizeof(header->size)); 1018 if ((size > 1048576) || (size < 0)) { 1019 archive_set_error(&a->archive, EINVAL, 1020 "Special header too large"); 1021 return (ARCHIVE_FATAL); 1022 } 1023 1024 /* Fail if we can't make our buffer big enough. */ 1025 if (archive_string_ensure(as, (size_t)size+1) == NULL) { 1026 archive_set_error(&a->archive, ENOMEM, 1027 "No memory"); 1028 return (ARCHIVE_FATAL); 1029 } 1030 1031 tar_flush_unconsumed(a, unconsumed); 1032 1033 /* Read the body into the string. */ 1034 *unconsumed = (size_t)((size + 511) & ~ 511); 1035 src = __archive_read_ahead(a, *unconsumed, NULL); 1036 if (src == NULL) { 1037 *unconsumed = 0; 1038 return (ARCHIVE_FATAL); 1039 } 1040 memcpy(as->s, src, (size_t)size); 1041 as->s[size] = '\0'; 1042 as->length = (size_t)size; 1043 return (ARCHIVE_OK); 1044 } 1045 1046 /* 1047 * Parse out common header elements. 1048 * 1049 * This would be the same as header_old_tar, except that the 1050 * filename is handled slightly differently for old and POSIX 1051 * entries (POSIX entries support a 'prefix'). This factoring 1052 * allows header_old_tar and header_ustar 1053 * to handle filenames differently, while still putting most of the 1054 * common parsing into one place. 1055 */ 1056 static int 1057 header_common(struct archive_read *a, struct tar *tar, 1058 struct archive_entry *entry, const void *h) 1059 { 1060 const struct archive_entry_header_ustar *header; 1061 char tartype; 1062 int err = ARCHIVE_OK; 1063 1064 header = (const struct archive_entry_header_ustar *)h; 1065 if (header->linkname[0]) 1066 archive_strncpy(&(tar->entry_linkpath), 1067 header->linkname, sizeof(header->linkname)); 1068 else 1069 archive_string_empty(&(tar->entry_linkpath)); 1070 1071 /* Parse out the numeric fields (all are octal) */ 1072 archive_entry_set_mode(entry, 1073 (mode_t)tar_atol(header->mode, sizeof(header->mode))); 1074 archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); 1075 archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); 1076 tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); 1077 if (tar->entry_bytes_remaining < 0) { 1078 tar->entry_bytes_remaining = 0; 1079 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1080 "Tar entry has negative size?"); 1081 err = ARCHIVE_WARN; 1082 } 1083 tar->realsize = tar->entry_bytes_remaining; 1084 archive_entry_set_size(entry, tar->entry_bytes_remaining); 1085 archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); 1086 1087 /* Handle the tar type flag appropriately. */ 1088 tartype = header->typeflag[0]; 1089 1090 switch (tartype) { 1091 case '1': /* Hard link */ 1092 if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s, 1093 archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { 1094 err = set_conversion_failed_error(a, tar->sconv, 1095 "Linkname"); 1096 if (err == ARCHIVE_FATAL) 1097 return (err); 1098 } 1099 /* 1100 * The following may seem odd, but: Technically, tar 1101 * does not store the file type for a "hard link" 1102 * entry, only the fact that it is a hard link. So, I 1103 * leave the type zero normally. But, pax interchange 1104 * format allows hard links to have data, which 1105 * implies that the underlying entry is a regular 1106 * file. 1107 */ 1108 if (archive_entry_size(entry) > 0) 1109 archive_entry_set_filetype(entry, AE_IFREG); 1110 1111 /* 1112 * A tricky point: Traditionally, tar readers have 1113 * ignored the size field when reading hardlink 1114 * entries, and some writers put non-zero sizes even 1115 * though the body is empty. POSIX blessed this 1116 * convention in the 1988 standard, but broke with 1117 * this tradition in 2001 by permitting hardlink 1118 * entries to store valid bodies in pax interchange 1119 * format, but not in ustar format. Since there is no 1120 * hard and fast way to distinguish pax interchange 1121 * from earlier archives (the 'x' and 'g' entries are 1122 * optional, after all), we need a heuristic. 1123 */ 1124 if (archive_entry_size(entry) == 0) { 1125 /* If the size is already zero, we're done. */ 1126 } else if (a->archive.archive_format 1127 == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { 1128 /* Definitely pax extended; must obey hardlink size. */ 1129 } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR 1130 || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR) 1131 { 1132 /* Old-style or GNU tar: we must ignore the size. */ 1133 archive_entry_set_size(entry, 0); 1134 tar->entry_bytes_remaining = 0; 1135 } else if (archive_read_format_tar_bid(a, 50) > 50) { 1136 /* 1137 * We don't know if it's pax: If the bid 1138 * function sees a valid ustar header 1139 * immediately following, then let's ignore 1140 * the hardlink size. 1141 */ 1142 archive_entry_set_size(entry, 0); 1143 tar->entry_bytes_remaining = 0; 1144 } 1145 /* 1146 * TODO: There are still two cases I'd like to handle: 1147 * = a ustar non-pax archive with a hardlink entry at 1148 * end-of-archive. (Look for block of nulls following?) 1149 * = a pax archive that has not seen any pax headers 1150 * and has an entry which is a hardlink entry storing 1151 * a body containing an uncompressed tar archive. 1152 * The first is worth addressing; I don't see any reliable 1153 * way to deal with the second possibility. 1154 */ 1155 break; 1156 case '2': /* Symlink */ 1157 archive_entry_set_filetype(entry, AE_IFLNK); 1158 archive_entry_set_size(entry, 0); 1159 tar->entry_bytes_remaining = 0; 1160 if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s, 1161 archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { 1162 err = set_conversion_failed_error(a, tar->sconv, 1163 "Linkname"); 1164 if (err == ARCHIVE_FATAL) 1165 return (err); 1166 } 1167 break; 1168 case '3': /* Character device */ 1169 archive_entry_set_filetype(entry, AE_IFCHR); 1170 archive_entry_set_size(entry, 0); 1171 tar->entry_bytes_remaining = 0; 1172 break; 1173 case '4': /* Block device */ 1174 archive_entry_set_filetype(entry, AE_IFBLK); 1175 archive_entry_set_size(entry, 0); 1176 tar->entry_bytes_remaining = 0; 1177 break; 1178 case '5': /* Dir */ 1179 archive_entry_set_filetype(entry, AE_IFDIR); 1180 archive_entry_set_size(entry, 0); 1181 tar->entry_bytes_remaining = 0; 1182 break; 1183 case '6': /* FIFO device */ 1184 archive_entry_set_filetype(entry, AE_IFIFO); 1185 archive_entry_set_size(entry, 0); 1186 tar->entry_bytes_remaining = 0; 1187 break; 1188 case 'D': /* GNU incremental directory type */ 1189 /* 1190 * No special handling is actually required here. 1191 * It might be nice someday to preprocess the file list and 1192 * provide it to the client, though. 1193 */ 1194 archive_entry_set_filetype(entry, AE_IFDIR); 1195 break; 1196 case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ 1197 /* 1198 * As far as I can tell, this is just like a regular file 1199 * entry, except that the contents should be _appended_ to 1200 * the indicated file at the indicated offset. This may 1201 * require some API work to fully support. 1202 */ 1203 break; 1204 case 'N': /* Old GNU "long filename" entry. */ 1205 /* The body of this entry is a script for renaming 1206 * previously-extracted entries. Ugh. It will never 1207 * be supported by libarchive. */ 1208 archive_entry_set_filetype(entry, AE_IFREG); 1209 break; 1210 case 'S': /* GNU sparse files */ 1211 /* 1212 * Sparse files are really just regular files with 1213 * sparse information in the extended area. 1214 */ 1215 /* FALLTHROUGH */ 1216 default: /* Regular file and non-standard types */ 1217 /* 1218 * Per POSIX: non-recognized types should always be 1219 * treated as regular files. 1220 */ 1221 archive_entry_set_filetype(entry, AE_IFREG); 1222 break; 1223 } 1224 return (err); 1225 } 1226 1227 /* 1228 * Parse out header elements for "old-style" tar archives. 1229 */ 1230 static int 1231 header_old_tar(struct archive_read *a, struct tar *tar, 1232 struct archive_entry *entry, const void *h) 1233 { 1234 const struct archive_entry_header_ustar *header; 1235 int err = ARCHIVE_OK, err2; 1236 1237 /* Copy filename over (to ensure null termination). */ 1238 header = (const struct archive_entry_header_ustar *)h; 1239 if (archive_entry_copy_pathname_l(entry, 1240 header->name, sizeof(header->name), tar->sconv) != 0) { 1241 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1242 if (err == ARCHIVE_FATAL) 1243 return (err); 1244 } 1245 1246 /* Grab rest of common fields */ 1247 err2 = header_common(a, tar, entry, h); 1248 if (err > err2) 1249 err = err2; 1250 1251 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1252 return (err); 1253 } 1254 1255 /* 1256 * Read a Mac AppleDouble-encoded blob of file metadata, 1257 * if there is one. 1258 */ 1259 static int 1260 read_mac_metadata_blob(struct archive_read *a, struct tar *tar, 1261 struct archive_entry *entry, const void *h, size_t *unconsumed) 1262 { 1263 int64_t size; 1264 const void *data; 1265 const char *p, *name; 1266 const wchar_t *wp, *wname; 1267 1268 (void)h; /* UNUSED */ 1269 1270 wname = wp = archive_entry_pathname_w(entry); 1271 if (wp != NULL) { 1272 /* Find the last path element. */ 1273 for (; *wp != L'\0'; ++wp) { 1274 if (wp[0] == '/' && wp[1] != L'\0') 1275 wname = wp + 1; 1276 } 1277 /* 1278 * If last path element starts with "._", then 1279 * this is a Mac extension. 1280 */ 1281 if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0') 1282 return ARCHIVE_OK; 1283 } else { 1284 /* Find the last path element. */ 1285 name = p = archive_entry_pathname(entry); 1286 if (p == NULL) 1287 return (ARCHIVE_FAILED); 1288 for (; *p != '\0'; ++p) { 1289 if (p[0] == '/' && p[1] != '\0') 1290 name = p + 1; 1291 } 1292 /* 1293 * If last path element starts with "._", then 1294 * this is a Mac extension. 1295 */ 1296 if (name[0] != '.' || name[1] != '_' || name[2] == '\0') 1297 return ARCHIVE_OK; 1298 } 1299 1300 /* Read the body as a Mac OS metadata blob. */ 1301 size = archive_entry_size(entry); 1302 1303 /* 1304 * TODO: Look beyond the body here to peek at the next header. 1305 * If it's a regular header (not an extension header) 1306 * that has the wrong name, just return the current 1307 * entry as-is, without consuming the body here. 1308 * That would reduce the risk of us mis-identifying 1309 * an ordinary file that just happened to have 1310 * a name starting with "._". 1311 * 1312 * Q: Is the above idea really possible? Even 1313 * when there are GNU or pax extension entries? 1314 */ 1315 data = __archive_read_ahead(a, (size_t)size, NULL); 1316 if (data == NULL) { 1317 *unconsumed = 0; 1318 return (ARCHIVE_FATAL); 1319 } 1320 archive_entry_copy_mac_metadata(entry, data, (size_t)size); 1321 *unconsumed = (size_t)((size + 511) & ~ 511); 1322 tar_flush_unconsumed(a, unconsumed); 1323 return (tar_read_header(a, tar, entry, unconsumed)); 1324 } 1325 1326 /* 1327 * Parse a file header for a pax extended archive entry. 1328 */ 1329 static int 1330 header_pax_global(struct archive_read *a, struct tar *tar, 1331 struct archive_entry *entry, const void *h, size_t *unconsumed) 1332 { 1333 int err; 1334 1335 err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed); 1336 if (err != ARCHIVE_OK) 1337 return (err); 1338 err = tar_read_header(a, tar, entry, unconsumed); 1339 return (err); 1340 } 1341 1342 static int 1343 header_pax_extensions(struct archive_read *a, struct tar *tar, 1344 struct archive_entry *entry, const void *h, size_t *unconsumed) 1345 { 1346 int err, err2; 1347 1348 err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed); 1349 if (err != ARCHIVE_OK) 1350 return (err); 1351 1352 /* Parse the next header. */ 1353 err = tar_read_header(a, tar, entry, unconsumed); 1354 if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 1355 return (err); 1356 1357 /* 1358 * TODO: Parse global/default options into 'entry' struct here 1359 * before handling file-specific options. 1360 * 1361 * This design (parse standard header, then overwrite with pax 1362 * extended attribute data) usually works well, but isn't ideal; 1363 * it would be better to parse the pax extended attributes first 1364 * and then skip any fields in the standard header that were 1365 * defined in the pax header. 1366 */ 1367 err2 = pax_header(a, tar, entry, tar->pax_header.s); 1368 err = err_combine(err, err2); 1369 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1370 return (err); 1371 } 1372 1373 1374 /* 1375 * Parse a file header for a Posix "ustar" archive entry. This also 1376 * handles "pax" or "extended ustar" entries. 1377 */ 1378 static int 1379 header_ustar(struct archive_read *a, struct tar *tar, 1380 struct archive_entry *entry, const void *h) 1381 { 1382 const struct archive_entry_header_ustar *header; 1383 struct archive_string *as; 1384 int err = ARCHIVE_OK, r; 1385 1386 header = (const struct archive_entry_header_ustar *)h; 1387 1388 /* Copy name into an internal buffer to ensure null-termination. */ 1389 as = &(tar->entry_pathname); 1390 if (header->prefix[0]) { 1391 archive_strncpy(as, header->prefix, sizeof(header->prefix)); 1392 if (as->s[archive_strlen(as) - 1] != '/') 1393 archive_strappend_char(as, '/'); 1394 archive_strncat(as, header->name, sizeof(header->name)); 1395 } else { 1396 archive_strncpy(as, header->name, sizeof(header->name)); 1397 } 1398 if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as), 1399 tar->sconv) != 0) { 1400 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1401 if (err == ARCHIVE_FATAL) 1402 return (err); 1403 } 1404 1405 /* Handle rest of common fields. */ 1406 r = header_common(a, tar, entry, h); 1407 if (r == ARCHIVE_FATAL) 1408 return (r); 1409 if (r < err) 1410 err = r; 1411 1412 /* Handle POSIX ustar fields. */ 1413 if (archive_entry_copy_uname_l(entry, 1414 header->uname, sizeof(header->uname), tar->sconv) != 0) { 1415 err = set_conversion_failed_error(a, tar->sconv, "Uname"); 1416 if (err == ARCHIVE_FATAL) 1417 return (err); 1418 } 1419 1420 if (archive_entry_copy_gname_l(entry, 1421 header->gname, sizeof(header->gname), tar->sconv) != 0) { 1422 err = set_conversion_failed_error(a, tar->sconv, "Gname"); 1423 if (err == ARCHIVE_FATAL) 1424 return (err); 1425 } 1426 1427 /* Parse out device numbers only for char and block specials. */ 1428 if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { 1429 archive_entry_set_rdevmajor(entry, (dev_t) 1430 tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); 1431 archive_entry_set_rdevminor(entry, (dev_t) 1432 tar_atol(header->rdevminor, sizeof(header->rdevminor))); 1433 } 1434 1435 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1436 1437 return (err); 1438 } 1439 1440 1441 /* 1442 * Parse the pax extended attributes record. 1443 * 1444 * Returns non-zero if there's an error in the data. 1445 */ 1446 static int 1447 pax_header(struct archive_read *a, struct tar *tar, 1448 struct archive_entry *entry, char *attr) 1449 { 1450 size_t attr_length, l, line_length; 1451 char *p; 1452 char *key, *value; 1453 struct archive_string *as; 1454 struct archive_string_conv *sconv; 1455 int err, err2; 1456 1457 attr_length = strlen(attr); 1458 tar->pax_hdrcharset_binary = 0; 1459 archive_string_empty(&(tar->entry_gname)); 1460 archive_string_empty(&(tar->entry_linkpath)); 1461 archive_string_empty(&(tar->entry_pathname)); 1462 archive_string_empty(&(tar->entry_pathname_override)); 1463 archive_string_empty(&(tar->entry_uname)); 1464 err = ARCHIVE_OK; 1465 while (attr_length > 0) { 1466 /* Parse decimal length field at start of line. */ 1467 line_length = 0; 1468 l = attr_length; 1469 p = attr; /* Record start of line. */ 1470 while (l>0) { 1471 if (*p == ' ') { 1472 p++; 1473 l--; 1474 break; 1475 } 1476 if (*p < '0' || *p > '9') { 1477 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1478 "Ignoring malformed pax extended attributes"); 1479 return (ARCHIVE_WARN); 1480 } 1481 line_length *= 10; 1482 line_length += *p - '0'; 1483 if (line_length > 999999) { 1484 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1485 "Rejecting pax extended attribute > 1MB"); 1486 return (ARCHIVE_WARN); 1487 } 1488 p++; 1489 l--; 1490 } 1491 1492 /* 1493 * Parsed length must be no bigger than available data, 1494 * at least 1, and the last character of the line must 1495 * be '\n'. 1496 */ 1497 if (line_length > attr_length 1498 || line_length < 1 1499 || attr[line_length - 1] != '\n') 1500 { 1501 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1502 "Ignoring malformed pax extended attribute"); 1503 return (ARCHIVE_WARN); 1504 } 1505 1506 /* Null-terminate the line. */ 1507 attr[line_length - 1] = '\0'; 1508 1509 /* Find end of key and null terminate it. */ 1510 key = p; 1511 if (key[0] == '=') 1512 return (-1); 1513 while (*p && *p != '=') 1514 ++p; 1515 if (*p == '\0') { 1516 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1517 "Invalid pax extended attributes"); 1518 return (ARCHIVE_WARN); 1519 } 1520 *p = '\0'; 1521 1522 /* Identify null-terminated 'value' portion. */ 1523 value = p + 1; 1524 1525 /* Identify this attribute and set it in the entry. */ 1526 err2 = pax_attribute(a, tar, entry, key, value); 1527 if (err2 == ARCHIVE_FATAL) 1528 return (err2); 1529 err = err_combine(err, err2); 1530 1531 /* Skip to next line */ 1532 attr += line_length; 1533 attr_length -= line_length; 1534 } 1535 1536 /* 1537 * PAX format uses UTF-8 as default charset for its metadata 1538 * unless hdrcharset=BINARY is present in its header. 1539 * We apply the charset specified by the hdrcharset option only 1540 * when the hdrcharset attribute(in PAX header) is BINARY because 1541 * we respect the charset described in PAX header and BINARY also 1542 * means that metadata(filename,uname and gname) character-set 1543 * is unknown. 1544 */ 1545 if (tar->pax_hdrcharset_binary) 1546 sconv = tar->opt_sconv; 1547 else { 1548 sconv = archive_string_conversion_from_charset( 1549 &(a->archive), "UTF-8", 1); 1550 if (sconv == NULL) 1551 return (ARCHIVE_FATAL); 1552 if (tar->compat_2x) 1553 archive_string_conversion_set_opt(sconv, 1554 SCONV_SET_OPT_UTF8_LIBARCHIVE2X); 1555 } 1556 1557 if (archive_strlen(&(tar->entry_gname)) > 0) { 1558 if (archive_entry_copy_gname_l(entry, tar->entry_gname.s, 1559 archive_strlen(&(tar->entry_gname)), sconv) != 0) { 1560 err = set_conversion_failed_error(a, sconv, "Gname"); 1561 if (err == ARCHIVE_FATAL) 1562 return (err); 1563 /* Use a converted an original name. */ 1564 archive_entry_copy_gname(entry, tar->entry_gname.s); 1565 } 1566 } 1567 if (archive_strlen(&(tar->entry_linkpath)) > 0) { 1568 if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s, 1569 archive_strlen(&(tar->entry_linkpath)), sconv) != 0) { 1570 err = set_conversion_failed_error(a, sconv, "Linkname"); 1571 if (err == ARCHIVE_FATAL) 1572 return (err); 1573 /* Use a converted an original name. */ 1574 archive_entry_copy_link(entry, tar->entry_linkpath.s); 1575 } 1576 } 1577 /* 1578 * Some extensions (such as the GNU sparse file extensions) 1579 * deliberately store a synthetic name under the regular 'path' 1580 * attribute and the real file name under a different attribute. 1581 * Since we're supposed to not care about the order, we 1582 * have no choice but to store all of the various filenames 1583 * we find and figure it all out afterwards. This is the 1584 * figuring out part. 1585 */ 1586 as = NULL; 1587 if (archive_strlen(&(tar->entry_pathname_override)) > 0) 1588 as = &(tar->entry_pathname_override); 1589 else if (archive_strlen(&(tar->entry_pathname)) > 0) 1590 as = &(tar->entry_pathname); 1591 if (as != NULL) { 1592 if (archive_entry_copy_pathname_l(entry, as->s, 1593 archive_strlen(as), sconv) != 0) { 1594 err = set_conversion_failed_error(a, sconv, "Pathname"); 1595 if (err == ARCHIVE_FATAL) 1596 return (err); 1597 /* Use a converted an original name. */ 1598 archive_entry_copy_pathname(entry, as->s); 1599 } 1600 } 1601 if (archive_strlen(&(tar->entry_uname)) > 0) { 1602 if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, 1603 archive_strlen(&(tar->entry_uname)), sconv) != 0) { 1604 err = set_conversion_failed_error(a, sconv, "Uname"); 1605 if (err == ARCHIVE_FATAL) 1606 return (err); 1607 /* Use a converted an original name. */ 1608 archive_entry_copy_uname(entry, tar->entry_uname.s); 1609 } 1610 } 1611 return (err); 1612 } 1613 1614 static int 1615 pax_attribute_xattr(struct archive_entry *entry, 1616 char *name, char *value) 1617 { 1618 char *name_decoded; 1619 void *value_decoded; 1620 size_t value_len; 1621 1622 if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0) 1623 return 3; 1624 1625 name += 17; 1626 1627 /* URL-decode name */ 1628 name_decoded = url_decode(name); 1629 if (name_decoded == NULL) 1630 return 2; 1631 1632 /* Base-64 decode value */ 1633 value_decoded = base64_decode(value, strlen(value), &value_len); 1634 if (value_decoded == NULL) { 1635 free(name_decoded); 1636 return 1; 1637 } 1638 1639 archive_entry_xattr_add_entry(entry, name_decoded, 1640 value_decoded, value_len); 1641 1642 free(name_decoded); 1643 free(value_decoded); 1644 return 0; 1645 } 1646 1647 /* 1648 * Parse a single key=value attribute. key/value pointers are 1649 * assumed to point into reasonably long-lived storage. 1650 * 1651 * Note that POSIX reserves all-lowercase keywords. Vendor-specific 1652 * extensions should always have keywords of the form "VENDOR.attribute" 1653 * In particular, it's quite feasible to support many different 1654 * vendor extensions here. I'm using "LIBARCHIVE" for extensions 1655 * unique to this library. 1656 * 1657 * Investigate other vendor-specific extensions and see if 1658 * any of them look useful. 1659 */ 1660 static int 1661 pax_attribute(struct archive_read *a, struct tar *tar, 1662 struct archive_entry *entry, char *key, char *value) 1663 { 1664 int64_t s; 1665 long n; 1666 int err = ARCHIVE_OK, r; 1667 1668 if (value == NULL) 1669 value = ""; /* Disable compiler warning; do not pass 1670 * NULL pointer to strlen(). */ 1671 switch (key[0]) { 1672 case 'G': 1673 /* GNU "0.0" sparse pax format. */ 1674 if (strcmp(key, "GNU.sparse.numblocks") == 0) { 1675 tar->sparse_offset = -1; 1676 tar->sparse_numbytes = -1; 1677 tar->sparse_gnu_major = 0; 1678 tar->sparse_gnu_minor = 0; 1679 } 1680 if (strcmp(key, "GNU.sparse.offset") == 0) { 1681 tar->sparse_offset = tar_atol10(value, strlen(value)); 1682 if (tar->sparse_numbytes != -1) { 1683 if (gnu_add_sparse_entry(a, tar, 1684 tar->sparse_offset, tar->sparse_numbytes) 1685 != ARCHIVE_OK) 1686 return (ARCHIVE_FATAL); 1687 tar->sparse_offset = -1; 1688 tar->sparse_numbytes = -1; 1689 } 1690 } 1691 if (strcmp(key, "GNU.sparse.numbytes") == 0) { 1692 tar->sparse_numbytes = tar_atol10(value, strlen(value)); 1693 if (tar->sparse_numbytes != -1) { 1694 if (gnu_add_sparse_entry(a, tar, 1695 tar->sparse_offset, tar->sparse_numbytes) 1696 != ARCHIVE_OK) 1697 return (ARCHIVE_FATAL); 1698 tar->sparse_offset = -1; 1699 tar->sparse_numbytes = -1; 1700 } 1701 } 1702 if (strcmp(key, "GNU.sparse.size") == 0) { 1703 tar->realsize = tar_atol10(value, strlen(value)); 1704 archive_entry_set_size(entry, tar->realsize); 1705 } 1706 1707 /* GNU "0.1" sparse pax format. */ 1708 if (strcmp(key, "GNU.sparse.map") == 0) { 1709 tar->sparse_gnu_major = 0; 1710 tar->sparse_gnu_minor = 1; 1711 if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK) 1712 return (ARCHIVE_WARN); 1713 } 1714 1715 /* GNU "1.0" sparse pax format */ 1716 if (strcmp(key, "GNU.sparse.major") == 0) { 1717 tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value)); 1718 tar->sparse_gnu_pending = 1; 1719 } 1720 if (strcmp(key, "GNU.sparse.minor") == 0) { 1721 tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value)); 1722 tar->sparse_gnu_pending = 1; 1723 } 1724 if (strcmp(key, "GNU.sparse.name") == 0) { 1725 /* 1726 * The real filename; when storing sparse 1727 * files, GNU tar puts a synthesized name into 1728 * the regular 'path' attribute in an attempt 1729 * to limit confusion. ;-) 1730 */ 1731 archive_strcpy(&(tar->entry_pathname_override), value); 1732 } 1733 if (strcmp(key, "GNU.sparse.realsize") == 0) { 1734 tar->realsize = tar_atol10(value, strlen(value)); 1735 archive_entry_set_size(entry, tar->realsize); 1736 } 1737 break; 1738 case 'L': 1739 /* Our extensions */ 1740 /* TODO: Handle arbitrary extended attributes... */ 1741 /* 1742 if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) 1743 archive_entry_set_xxxxxx(entry, value); 1744 */ 1745 if (strcmp(key, "LIBARCHIVE.creationtime") == 0) { 1746 pax_time(value, &s, &n); 1747 archive_entry_set_birthtime(entry, s, n); 1748 } 1749 if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0) 1750 pax_attribute_xattr(entry, key, value); 1751 break; 1752 case 'S': 1753 /* We support some keys used by the "star" archiver */ 1754 if (strcmp(key, "SCHILY.acl.access") == 0) { 1755 if (tar->sconv_acl == NULL) { 1756 tar->sconv_acl = 1757 archive_string_conversion_from_charset( 1758 &(a->archive), "UTF-8", 1); 1759 if (tar->sconv_acl == NULL) 1760 return (ARCHIVE_FATAL); 1761 } 1762 1763 r = archive_acl_parse_l(archive_entry_acl(entry), 1764 value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, 1765 tar->sconv_acl); 1766 if (r != ARCHIVE_OK) { 1767 err = r; 1768 if (err == ARCHIVE_FATAL) { 1769 archive_set_error(&a->archive, ENOMEM, 1770 "Can't allocate memory for " 1771 "SCHILY.acl.access"); 1772 return (err); 1773 } 1774 archive_set_error(&a->archive, 1775 ARCHIVE_ERRNO_MISC, 1776 "Parse error: SCHILY.acl.access"); 1777 } 1778 } else if (strcmp(key, "SCHILY.acl.default") == 0) { 1779 if (tar->sconv_acl == NULL) { 1780 tar->sconv_acl = 1781 archive_string_conversion_from_charset( 1782 &(a->archive), "UTF-8", 1); 1783 if (tar->sconv_acl == NULL) 1784 return (ARCHIVE_FATAL); 1785 } 1786 1787 r = archive_acl_parse_l(archive_entry_acl(entry), 1788 value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT, 1789 tar->sconv_acl); 1790 if (r != ARCHIVE_OK) { 1791 err = r; 1792 if (err == ARCHIVE_FATAL) { 1793 archive_set_error(&a->archive, ENOMEM, 1794 "Can't allocate memory for " 1795 "SCHILY.acl.default"); 1796 return (err); 1797 } 1798 archive_set_error(&a->archive, 1799 ARCHIVE_ERRNO_MISC, 1800 "Parse error: SCHILY.acl.default"); 1801 } 1802 } else if (strcmp(key, "SCHILY.devmajor") == 0) { 1803 archive_entry_set_rdevmajor(entry, 1804 (dev_t)tar_atol10(value, strlen(value))); 1805 } else if (strcmp(key, "SCHILY.devminor") == 0) { 1806 archive_entry_set_rdevminor(entry, 1807 (dev_t)tar_atol10(value, strlen(value))); 1808 } else if (strcmp(key, "SCHILY.fflags") == 0) { 1809 archive_entry_copy_fflags_text(entry, value); 1810 } else if (strcmp(key, "SCHILY.dev") == 0) { 1811 archive_entry_set_dev(entry, 1812 (dev_t)tar_atol10(value, strlen(value))); 1813 } else if (strcmp(key, "SCHILY.ino") == 0) { 1814 archive_entry_set_ino(entry, 1815 tar_atol10(value, strlen(value))); 1816 } else if (strcmp(key, "SCHILY.nlink") == 0) { 1817 archive_entry_set_nlink(entry, (unsigned) 1818 tar_atol10(value, strlen(value))); 1819 } else if (strcmp(key, "SCHILY.realsize") == 0) { 1820 tar->realsize = tar_atol10(value, strlen(value)); 1821 archive_entry_set_size(entry, tar->realsize); 1822 } else if (strcmp(key, "SUN.holesdata") == 0) { 1823 /* A Solaris extension for sparse. */ 1824 r = solaris_sparse_parse(a, tar, entry, value); 1825 if (r < err) { 1826 if (r == ARCHIVE_FATAL) 1827 return (r); 1828 err = r; 1829 archive_set_error(&a->archive, 1830 ARCHIVE_ERRNO_MISC, 1831 "Parse error: SUN.holesdata"); 1832 } 1833 } 1834 break; 1835 case 'a': 1836 if (strcmp(key, "atime") == 0) { 1837 pax_time(value, &s, &n); 1838 archive_entry_set_atime(entry, s, n); 1839 } 1840 break; 1841 case 'c': 1842 if (strcmp(key, "ctime") == 0) { 1843 pax_time(value, &s, &n); 1844 archive_entry_set_ctime(entry, s, n); 1845 } else if (strcmp(key, "charset") == 0) { 1846 /* TODO: Publish charset information in entry. */ 1847 } else if (strcmp(key, "comment") == 0) { 1848 /* TODO: Publish comment in entry. */ 1849 } 1850 break; 1851 case 'g': 1852 if (strcmp(key, "gid") == 0) { 1853 archive_entry_set_gid(entry, 1854 tar_atol10(value, strlen(value))); 1855 } else if (strcmp(key, "gname") == 0) { 1856 archive_strcpy(&(tar->entry_gname), value); 1857 } 1858 break; 1859 case 'h': 1860 if (strcmp(key, "hdrcharset") == 0) { 1861 if (strcmp(value, "BINARY") == 0) 1862 /* Binary mode. */ 1863 tar->pax_hdrcharset_binary = 1; 1864 else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0) 1865 tar->pax_hdrcharset_binary = 0; 1866 } 1867 break; 1868 case 'l': 1869 /* pax interchange doesn't distinguish hardlink vs. symlink. */ 1870 if (strcmp(key, "linkpath") == 0) { 1871 archive_strcpy(&(tar->entry_linkpath), value); 1872 } 1873 break; 1874 case 'm': 1875 if (strcmp(key, "mtime") == 0) { 1876 pax_time(value, &s, &n); 1877 archive_entry_set_mtime(entry, s, n); 1878 } 1879 break; 1880 case 'p': 1881 if (strcmp(key, "path") == 0) { 1882 archive_strcpy(&(tar->entry_pathname), value); 1883 } 1884 break; 1885 case 'r': 1886 /* POSIX has reserved 'realtime.*' */ 1887 break; 1888 case 's': 1889 /* POSIX has reserved 'security.*' */ 1890 /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */ 1891 if (strcmp(key, "size") == 0) { 1892 /* "size" is the size of the data in the entry. */ 1893 tar->entry_bytes_remaining 1894 = tar_atol10(value, strlen(value)); 1895 /* 1896 * But, "size" is not necessarily the size of 1897 * the file on disk; if this is a sparse file, 1898 * the disk size may have already been set from 1899 * GNU.sparse.realsize or GNU.sparse.size or 1900 * an old GNU header field or SCHILY.realsize 1901 * or .... 1902 */ 1903 if (tar->realsize < 0) { 1904 archive_entry_set_size(entry, 1905 tar->entry_bytes_remaining); 1906 tar->realsize 1907 = tar->entry_bytes_remaining; 1908 } 1909 } 1910 break; 1911 case 'u': 1912 if (strcmp(key, "uid") == 0) { 1913 archive_entry_set_uid(entry, 1914 tar_atol10(value, strlen(value))); 1915 } else if (strcmp(key, "uname") == 0) { 1916 archive_strcpy(&(tar->entry_uname), value); 1917 } 1918 break; 1919 } 1920 return (err); 1921 } 1922 1923 1924 1925 /* 1926 * parse a decimal time value, which may include a fractional portion 1927 */ 1928 static void 1929 pax_time(const char *p, int64_t *ps, long *pn) 1930 { 1931 char digit; 1932 int64_t s; 1933 unsigned long l; 1934 int sign; 1935 int64_t limit, last_digit_limit; 1936 1937 limit = INT64_MAX / 10; 1938 last_digit_limit = INT64_MAX % 10; 1939 1940 s = 0; 1941 sign = 1; 1942 if (*p == '-') { 1943 sign = -1; 1944 p++; 1945 } 1946 while (*p >= '0' && *p <= '9') { 1947 digit = *p - '0'; 1948 if (s > limit || 1949 (s == limit && digit > last_digit_limit)) { 1950 s = INT64_MAX; 1951 break; 1952 } 1953 s = (s * 10) + digit; 1954 ++p; 1955 } 1956 1957 *ps = s * sign; 1958 1959 /* Calculate nanoseconds. */ 1960 *pn = 0; 1961 1962 if (*p != '.') 1963 return; 1964 1965 l = 100000000UL; 1966 do { 1967 ++p; 1968 if (*p >= '0' && *p <= '9') 1969 *pn += (*p - '0') * l; 1970 else 1971 break; 1972 } while (l /= 10); 1973 } 1974 1975 /* 1976 * Parse GNU tar header 1977 */ 1978 static int 1979 header_gnutar(struct archive_read *a, struct tar *tar, 1980 struct archive_entry *entry, const void *h, size_t *unconsumed) 1981 { 1982 const struct archive_entry_header_gnutar *header; 1983 int64_t t; 1984 int err = ARCHIVE_OK; 1985 1986 /* 1987 * GNU header is like POSIX ustar, except 'prefix' is 1988 * replaced with some other fields. This also means the 1989 * filename is stored as in old-style archives. 1990 */ 1991 1992 /* Grab fields common to all tar variants. */ 1993 err = header_common(a, tar, entry, h); 1994 if (err == ARCHIVE_FATAL) 1995 return (err); 1996 1997 /* Copy filename over (to ensure null termination). */ 1998 header = (const struct archive_entry_header_gnutar *)h; 1999 if (archive_entry_copy_pathname_l(entry, 2000 header->name, sizeof(header->name), tar->sconv) != 0) { 2001 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 2002 if (err == ARCHIVE_FATAL) 2003 return (err); 2004 } 2005 2006 /* Fields common to ustar and GNU */ 2007 /* XXX Can the following be factored out since it's common 2008 * to ustar and gnu tar? Is it okay to move it down into 2009 * header_common, perhaps? */ 2010 if (archive_entry_copy_uname_l(entry, 2011 header->uname, sizeof(header->uname), tar->sconv) != 0) { 2012 err = set_conversion_failed_error(a, tar->sconv, "Uname"); 2013 if (err == ARCHIVE_FATAL) 2014 return (err); 2015 } 2016 2017 if (archive_entry_copy_gname_l(entry, 2018 header->gname, sizeof(header->gname), tar->sconv) != 0) { 2019 err = set_conversion_failed_error(a, tar->sconv, "Gname"); 2020 if (err == ARCHIVE_FATAL) 2021 return (err); 2022 } 2023 2024 /* Parse out device numbers only for char and block specials */ 2025 if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { 2026 archive_entry_set_rdevmajor(entry, (dev_t) 2027 tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); 2028 archive_entry_set_rdevminor(entry, (dev_t) 2029 tar_atol(header->rdevminor, sizeof(header->rdevminor))); 2030 } else 2031 archive_entry_set_rdev(entry, 0); 2032 2033 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 2034 2035 /* Grab GNU-specific fields. */ 2036 t = tar_atol(header->atime, sizeof(header->atime)); 2037 if (t > 0) 2038 archive_entry_set_atime(entry, t, 0); 2039 t = tar_atol(header->ctime, sizeof(header->ctime)); 2040 if (t > 0) 2041 archive_entry_set_ctime(entry, t, 0); 2042 2043 if (header->realsize[0] != 0) { 2044 tar->realsize 2045 = tar_atol(header->realsize, sizeof(header->realsize)); 2046 archive_entry_set_size(entry, tar->realsize); 2047 } 2048 2049 if (header->sparse[0].offset[0] != 0) { 2050 if (gnu_sparse_old_read(a, tar, header, unconsumed) 2051 != ARCHIVE_OK) 2052 return (ARCHIVE_FATAL); 2053 } else { 2054 if (header->isextended[0] != 0) { 2055 /* XXX WTF? XXX */ 2056 } 2057 } 2058 2059 return (err); 2060 } 2061 2062 static int 2063 gnu_add_sparse_entry(struct archive_read *a, struct tar *tar, 2064 int64_t offset, int64_t remaining) 2065 { 2066 struct sparse_block *p; 2067 2068 p = (struct sparse_block *)malloc(sizeof(*p)); 2069 if (p == NULL) { 2070 archive_set_error(&a->archive, ENOMEM, "Out of memory"); 2071 return (ARCHIVE_FATAL); 2072 } 2073 memset(p, 0, sizeof(*p)); 2074 if (tar->sparse_last != NULL) 2075 tar->sparse_last->next = p; 2076 else 2077 tar->sparse_list = p; 2078 tar->sparse_last = p; 2079 p->offset = offset; 2080 p->remaining = remaining; 2081 return (ARCHIVE_OK); 2082 } 2083 2084 static void 2085 gnu_clear_sparse_list(struct tar *tar) 2086 { 2087 struct sparse_block *p; 2088 2089 while (tar->sparse_list != NULL) { 2090 p = tar->sparse_list; 2091 tar->sparse_list = p->next; 2092 free(p); 2093 } 2094 tar->sparse_last = NULL; 2095 } 2096 2097 /* 2098 * GNU tar old-format sparse data. 2099 * 2100 * GNU old-format sparse data is stored in a fixed-field 2101 * format. Offset/size values are 11-byte octal fields (same 2102 * format as 'size' field in ustart header). These are 2103 * stored in the header, allocating subsequent header blocks 2104 * as needed. Extending the header in this way is a pretty 2105 * severe POSIX violation; this design has earned GNU tar a 2106 * lot of criticism. 2107 */ 2108 2109 static int 2110 gnu_sparse_old_read(struct archive_read *a, struct tar *tar, 2111 const struct archive_entry_header_gnutar *header, size_t *unconsumed) 2112 { 2113 ssize_t bytes_read; 2114 const void *data; 2115 struct extended { 2116 struct gnu_sparse sparse[21]; 2117 char isextended[1]; 2118 char padding[7]; 2119 }; 2120 const struct extended *ext; 2121 2122 if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK) 2123 return (ARCHIVE_FATAL); 2124 if (header->isextended[0] == 0) 2125 return (ARCHIVE_OK); 2126 2127 do { 2128 tar_flush_unconsumed(a, unconsumed); 2129 data = __archive_read_ahead(a, 512, &bytes_read); 2130 if (bytes_read < 0) 2131 return (ARCHIVE_FATAL); 2132 if (bytes_read < 512) { 2133 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2134 "Truncated tar archive " 2135 "detected while reading sparse file data"); 2136 return (ARCHIVE_FATAL); 2137 } 2138 *unconsumed = 512; 2139 ext = (const struct extended *)data; 2140 if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK) 2141 return (ARCHIVE_FATAL); 2142 } while (ext->isextended[0] != 0); 2143 if (tar->sparse_list != NULL) 2144 tar->entry_offset = tar->sparse_list->offset; 2145 return (ARCHIVE_OK); 2146 } 2147 2148 static int 2149 gnu_sparse_old_parse(struct archive_read *a, struct tar *tar, 2150 const struct gnu_sparse *sparse, int length) 2151 { 2152 while (length > 0 && sparse->offset[0] != 0) { 2153 if (gnu_add_sparse_entry(a, tar, 2154 tar_atol(sparse->offset, sizeof(sparse->offset)), 2155 tar_atol(sparse->numbytes, sizeof(sparse->numbytes))) 2156 != ARCHIVE_OK) 2157 return (ARCHIVE_FATAL); 2158 sparse++; 2159 length--; 2160 } 2161 return (ARCHIVE_OK); 2162 } 2163 2164 /* 2165 * GNU tar sparse format 0.0 2166 * 2167 * Beginning with GNU tar 1.15, sparse files are stored using 2168 * information in the pax extended header. The GNU tar maintainers 2169 * have gone through a number of variations in the process of working 2170 * out this scheme; fortunately, they're all numbered. 2171 * 2172 * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the 2173 * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to 2174 * store offset/size for each block. The repeated instances of these 2175 * latter fields violate the pax specification (which frowns on 2176 * duplicate keys), so this format was quickly replaced. 2177 */ 2178 2179 /* 2180 * GNU tar sparse format 0.1 2181 * 2182 * This version replaced the offset/numbytes attributes with 2183 * a single "map" attribute that stored a list of integers. This 2184 * format had two problems: First, the "map" attribute could be very 2185 * long, which caused problems for some implementations. More 2186 * importantly, the sparse data was lost when extracted by archivers 2187 * that didn't recognize this extension. 2188 */ 2189 2190 static int 2191 gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p) 2192 { 2193 const char *e; 2194 int64_t offset = -1, size = -1; 2195 2196 for (;;) { 2197 e = p; 2198 while (*e != '\0' && *e != ',') { 2199 if (*e < '0' || *e > '9') 2200 return (ARCHIVE_WARN); 2201 e++; 2202 } 2203 if (offset < 0) { 2204 offset = tar_atol10(p, e - p); 2205 if (offset < 0) 2206 return (ARCHIVE_WARN); 2207 } else { 2208 size = tar_atol10(p, e - p); 2209 if (size < 0) 2210 return (ARCHIVE_WARN); 2211 if (gnu_add_sparse_entry(a, tar, offset, size) 2212 != ARCHIVE_OK) 2213 return (ARCHIVE_FATAL); 2214 offset = -1; 2215 } 2216 if (*e == '\0') 2217 return (ARCHIVE_OK); 2218 p = e + 1; 2219 } 2220 } 2221 2222 /* 2223 * GNU tar sparse format 1.0 2224 * 2225 * The idea: The offset/size data is stored as a series of base-10 2226 * ASCII numbers prepended to the file data, so that dearchivers that 2227 * don't support this format will extract the block map along with the 2228 * data and a separate post-process can restore the sparseness. 2229 * 2230 * Unfortunately, GNU tar 1.16 had a bug that added unnecessary 2231 * padding to the body of the file when using this format. GNU tar 2232 * 1.17 corrected this bug without bumping the version number, so 2233 * it's not possible to support both variants. This code supports 2234 * the later variant at the expense of not supporting the former. 2235 * 2236 * This variant also replaced GNU.sparse.size with GNU.sparse.realsize 2237 * and introduced the GNU.sparse.major/GNU.sparse.minor attributes. 2238 */ 2239 2240 /* 2241 * Read the next line from the input, and parse it as a decimal 2242 * integer followed by '\n'. Returns positive integer value or 2243 * negative on error. 2244 */ 2245 static int64_t 2246 gnu_sparse_10_atol(struct archive_read *a, struct tar *tar, 2247 int64_t *remaining, size_t *unconsumed) 2248 { 2249 int64_t l, limit, last_digit_limit; 2250 const char *p; 2251 ssize_t bytes_read; 2252 int base, digit; 2253 2254 base = 10; 2255 limit = INT64_MAX / base; 2256 last_digit_limit = INT64_MAX % base; 2257 2258 /* 2259 * Skip any lines starting with '#'; GNU tar specs 2260 * don't require this, but they should. 2261 */ 2262 do { 2263 bytes_read = readline(a, tar, &p, 2264 (ssize_t)tar_min(*remaining, 100), unconsumed); 2265 if (bytes_read <= 0) 2266 return (ARCHIVE_FATAL); 2267 *remaining -= bytes_read; 2268 } while (p[0] == '#'); 2269 2270 l = 0; 2271 while (bytes_read > 0) { 2272 if (*p == '\n') 2273 return (l); 2274 if (*p < '0' || *p >= '0' + base) 2275 return (ARCHIVE_WARN); 2276 digit = *p - '0'; 2277 if (l > limit || (l == limit && digit > last_digit_limit)) 2278 l = INT64_MAX; /* Truncate on overflow. */ 2279 else 2280 l = (l * base) + digit; 2281 p++; 2282 bytes_read--; 2283 } 2284 /* TODO: Error message. */ 2285 return (ARCHIVE_WARN); 2286 } 2287 2288 /* 2289 * Returns length (in bytes) of the sparse data description 2290 * that was read. 2291 */ 2292 static ssize_t 2293 gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed) 2294 { 2295 ssize_t bytes_read; 2296 int entries; 2297 int64_t offset, size, to_skip, remaining; 2298 2299 /* Clear out the existing sparse list. */ 2300 gnu_clear_sparse_list(tar); 2301 2302 remaining = tar->entry_bytes_remaining; 2303 2304 /* Parse entries. */ 2305 entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2306 if (entries < 0) 2307 return (ARCHIVE_FATAL); 2308 /* Parse the individual entries. */ 2309 while (entries-- > 0) { 2310 /* Parse offset/size */ 2311 offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2312 if (offset < 0) 2313 return (ARCHIVE_FATAL); 2314 size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2315 if (size < 0) 2316 return (ARCHIVE_FATAL); 2317 /* Add a new sparse entry. */ 2318 if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) 2319 return (ARCHIVE_FATAL); 2320 } 2321 /* Skip rest of block... */ 2322 tar_flush_unconsumed(a, unconsumed); 2323 bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining); 2324 to_skip = 0x1ff & -bytes_read; 2325 if (to_skip != __archive_read_consume(a, to_skip)) 2326 return (ARCHIVE_FATAL); 2327 return ((ssize_t)(bytes_read + to_skip)); 2328 } 2329 2330 /* 2331 * Solaris pax extension for a sparse file. This is recorded with the 2332 * data and hole pairs. The way recording sparse information by Solaris' 2333 * pax simply indicates where data and sparse are, so the stored contents 2334 * consist of both data and hole. 2335 */ 2336 static int 2337 solaris_sparse_parse(struct archive_read *a, struct tar *tar, 2338 struct archive_entry *entry, const char *p) 2339 { 2340 const char *e; 2341 int64_t start, end; 2342 int hole = 1; 2343 2344 (void)entry; /* UNUSED */ 2345 2346 end = 0; 2347 if (*p == ' ') 2348 p++; 2349 else 2350 return (ARCHIVE_WARN); 2351 for (;;) { 2352 e = p; 2353 while (*e != '\0' && *e != ' ') { 2354 if (*e < '0' || *e > '9') 2355 return (ARCHIVE_WARN); 2356 e++; 2357 } 2358 start = end; 2359 end = tar_atol10(p, e - p); 2360 if (end < 0) 2361 return (ARCHIVE_WARN); 2362 if (start < end) { 2363 if (gnu_add_sparse_entry(a, tar, start, 2364 end - start) != ARCHIVE_OK) 2365 return (ARCHIVE_FATAL); 2366 tar->sparse_last->hole = hole; 2367 } 2368 if (*e == '\0') 2369 return (ARCHIVE_OK); 2370 p = e + 1; 2371 hole = hole == 0; 2372 } 2373 } 2374 2375 /*- 2376 * Convert text->integer. 2377 * 2378 * Traditional tar formats (including POSIX) specify base-8 for 2379 * all of the standard numeric fields. This is a significant limitation 2380 * in practice: 2381 * = file size is limited to 8GB 2382 * = rdevmajor and rdevminor are limited to 21 bits 2383 * = uid/gid are limited to 21 bits 2384 * 2385 * There are two workarounds for this: 2386 * = pax extended headers, which use variable-length string fields 2387 * = GNU tar and STAR both allow either base-8 or base-256 in 2388 * most fields. The high bit is set to indicate base-256. 2389 * 2390 * On read, this implementation supports both extensions. 2391 */ 2392 static int64_t 2393 tar_atol(const char *p, unsigned char_cnt) 2394 { 2395 /* 2396 * Technically, GNU tar considers a field to be in base-256 2397 * only if the first byte is 0xff or 0x80. 2398 */ 2399 if (*p & 0x80) 2400 return (tar_atol256(p, char_cnt)); 2401 return (tar_atol8(p, char_cnt)); 2402 } 2403 2404 /* 2405 * Note that this implementation does not (and should not!) obey 2406 * locale settings; you cannot simply substitute strtol here, since 2407 * it does obey locale. 2408 */ 2409 static int64_t 2410 tar_atol8(const char *p, unsigned char_cnt) 2411 { 2412 int64_t l, limit, last_digit_limit; 2413 int digit, sign, base; 2414 2415 base = 8; 2416 limit = INT64_MAX / base; 2417 last_digit_limit = INT64_MAX % base; 2418 2419 while (*p == ' ' || *p == '\t') 2420 p++; 2421 if (*p == '-') { 2422 sign = -1; 2423 p++; 2424 } else 2425 sign = 1; 2426 2427 l = 0; 2428 digit = *p - '0'; 2429 while (digit >= 0 && digit < base && char_cnt-- > 0) { 2430 if (l>limit || (l == limit && digit > last_digit_limit)) { 2431 l = INT64_MAX; /* Truncate on overflow. */ 2432 break; 2433 } 2434 l = (l * base) + digit; 2435 digit = *++p - '0'; 2436 } 2437 return (sign < 0) ? -l : l; 2438 } 2439 2440 /* 2441 * Note that this implementation does not (and should not!) obey 2442 * locale settings; you cannot simply substitute strtol here, since 2443 * it does obey locale. 2444 */ 2445 static int64_t 2446 tar_atol10(const char *p, unsigned char_cnt) 2447 { 2448 int64_t l, limit, last_digit_limit; 2449 int base, digit, sign; 2450 2451 base = 10; 2452 limit = INT64_MAX / base; 2453 last_digit_limit = INT64_MAX % base; 2454 2455 while (*p == ' ' || *p == '\t') 2456 p++; 2457 if (*p == '-') { 2458 sign = -1; 2459 p++; 2460 } else 2461 sign = 1; 2462 2463 l = 0; 2464 digit = *p - '0'; 2465 while (digit >= 0 && digit < base && char_cnt-- > 0) { 2466 if (l > limit || (l == limit && digit > last_digit_limit)) { 2467 l = INT64_MAX; /* Truncate on overflow. */ 2468 break; 2469 } 2470 l = (l * base) + digit; 2471 digit = *++p - '0'; 2472 } 2473 return (sign < 0) ? -l : l; 2474 } 2475 2476 /* 2477 * Parse a base-256 integer. This is just a straight signed binary 2478 * value in big-endian order, except that the high-order bit is 2479 * ignored. 2480 */ 2481 static int64_t 2482 tar_atol256(const char *_p, unsigned char_cnt) 2483 { 2484 int64_t l, upper_limit, lower_limit; 2485 const unsigned char *p = (const unsigned char *)_p; 2486 2487 upper_limit = INT64_MAX / 256; 2488 lower_limit = INT64_MIN / 256; 2489 2490 /* Pad with 1 or 0 bits, depending on sign. */ 2491 if ((0x40 & *p) == 0x40) 2492 l = (int64_t)-1; 2493 else 2494 l = 0; 2495 l = (l << 6) | (0x3f & *p++); 2496 while (--char_cnt > 0) { 2497 if (l > upper_limit) { 2498 l = INT64_MAX; /* Truncate on overflow */ 2499 break; 2500 } else if (l < lower_limit) { 2501 l = INT64_MIN; 2502 break; 2503 } 2504 l = (l << 8) | (0xff & (int64_t)*p++); 2505 } 2506 return (l); 2507 } 2508 2509 /* 2510 * Returns length of line (including trailing newline) 2511 * or negative on error. 'start' argument is updated to 2512 * point to first character of line. This avoids copying 2513 * when possible. 2514 */ 2515 static ssize_t 2516 readline(struct archive_read *a, struct tar *tar, const char **start, 2517 ssize_t limit, size_t *unconsumed) 2518 { 2519 ssize_t bytes_read; 2520 ssize_t total_size = 0; 2521 const void *t; 2522 const char *s; 2523 void *p; 2524 2525 tar_flush_unconsumed(a, unconsumed); 2526 2527 t = __archive_read_ahead(a, 1, &bytes_read); 2528 if (bytes_read <= 0) 2529 return (ARCHIVE_FATAL); 2530 s = t; /* Start of line? */ 2531 p = memchr(t, '\n', bytes_read); 2532 /* If we found '\n' in the read buffer, return pointer to that. */ 2533 if (p != NULL) { 2534 bytes_read = 1 + ((const char *)p) - s; 2535 if (bytes_read > limit) { 2536 archive_set_error(&a->archive, 2537 ARCHIVE_ERRNO_FILE_FORMAT, 2538 "Line too long"); 2539 return (ARCHIVE_FATAL); 2540 } 2541 *unconsumed = bytes_read; 2542 *start = s; 2543 return (bytes_read); 2544 } 2545 *unconsumed = bytes_read; 2546 /* Otherwise, we need to accumulate in a line buffer. */ 2547 for (;;) { 2548 if (total_size + bytes_read > limit) { 2549 archive_set_error(&a->archive, 2550 ARCHIVE_ERRNO_FILE_FORMAT, 2551 "Line too long"); 2552 return (ARCHIVE_FATAL); 2553 } 2554 if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) { 2555 archive_set_error(&a->archive, ENOMEM, 2556 "Can't allocate working buffer"); 2557 return (ARCHIVE_FATAL); 2558 } 2559 memcpy(tar->line.s + total_size, t, bytes_read); 2560 tar_flush_unconsumed(a, unconsumed); 2561 total_size += bytes_read; 2562 /* If we found '\n', clean up and return. */ 2563 if (p != NULL) { 2564 *start = tar->line.s; 2565 return (total_size); 2566 } 2567 /* Read some more. */ 2568 t = __archive_read_ahead(a, 1, &bytes_read); 2569 if (bytes_read <= 0) 2570 return (ARCHIVE_FATAL); 2571 s = t; /* Start of line? */ 2572 p = memchr(t, '\n', bytes_read); 2573 /* If we found '\n', trim the read. */ 2574 if (p != NULL) { 2575 bytes_read = 1 + ((const char *)p) - s; 2576 } 2577 *unconsumed = bytes_read; 2578 } 2579 } 2580 2581 /* 2582 * base64_decode - Base64 decode 2583 * 2584 * This accepts most variations of base-64 encoding, including: 2585 * * with or without line breaks 2586 * * with or without the final group padded with '=' or '_' characters 2587 * (The most economical Base-64 variant does not pad the last group and 2588 * omits line breaks; RFC1341 used for MIME requires both.) 2589 */ 2590 static char * 2591 base64_decode(const char *s, size_t len, size_t *out_len) 2592 { 2593 static const unsigned char digits[64] = { 2594 'A','B','C','D','E','F','G','H','I','J','K','L','M','N', 2595 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b', 2596 'c','d','e','f','g','h','i','j','k','l','m','n','o','p', 2597 'q','r','s','t','u','v','w','x','y','z','0','1','2','3', 2598 '4','5','6','7','8','9','+','/' }; 2599 static unsigned char decode_table[128]; 2600 char *out, *d; 2601 const unsigned char *src = (const unsigned char *)s; 2602 2603 /* If the decode table is not yet initialized, prepare it. */ 2604 if (decode_table[digits[1]] != 1) { 2605 unsigned i; 2606 memset(decode_table, 0xff, sizeof(decode_table)); 2607 for (i = 0; i < sizeof(digits); i++) 2608 decode_table[digits[i]] = i; 2609 } 2610 2611 /* Allocate enough space to hold the entire output. */ 2612 /* Note that we may not use all of this... */ 2613 out = (char *)malloc(len - len / 4 + 1); 2614 if (out == NULL) { 2615 *out_len = 0; 2616 return (NULL); 2617 } 2618 d = out; 2619 2620 while (len > 0) { 2621 /* Collect the next group of (up to) four characters. */ 2622 int v = 0; 2623 int group_size = 0; 2624 while (group_size < 4 && len > 0) { 2625 /* '=' or '_' padding indicates final group. */ 2626 if (*src == '=' || *src == '_') { 2627 len = 0; 2628 break; 2629 } 2630 /* Skip illegal characters (including line breaks) */ 2631 if (*src > 127 || *src < 32 2632 || decode_table[*src] == 0xff) { 2633 len--; 2634 src++; 2635 continue; 2636 } 2637 v <<= 6; 2638 v |= decode_table[*src++]; 2639 len --; 2640 group_size++; 2641 } 2642 /* Align a short group properly. */ 2643 v <<= 6 * (4 - group_size); 2644 /* Unpack the group we just collected. */ 2645 switch (group_size) { 2646 case 4: d[2] = v & 0xff; 2647 /* FALLTHROUGH */ 2648 case 3: d[1] = (v >> 8) & 0xff; 2649 /* FALLTHROUGH */ 2650 case 2: d[0] = (v >> 16) & 0xff; 2651 break; 2652 case 1: /* this is invalid! */ 2653 break; 2654 } 2655 d += group_size * 3 / 4; 2656 } 2657 2658 *out_len = d - out; 2659 return (out); 2660 } 2661 2662 static char * 2663 url_decode(const char *in) 2664 { 2665 char *out, *d; 2666 const char *s; 2667 2668 out = (char *)malloc(strlen(in) + 1); 2669 if (out == NULL) 2670 return (NULL); 2671 for (s = in, d = out; *s != '\0'; ) { 2672 if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') { 2673 /* Try to convert % escape */ 2674 int digit1 = tohex(s[1]); 2675 int digit2 = tohex(s[2]); 2676 if (digit1 >= 0 && digit2 >= 0) { 2677 /* Looks good, consume three chars */ 2678 s += 3; 2679 /* Convert output */ 2680 *d++ = ((digit1 << 4) | digit2); 2681 continue; 2682 } 2683 /* Else fall through and treat '%' as normal char */ 2684 } 2685 *d++ = *s++; 2686 } 2687 *d = '\0'; 2688 return (out); 2689 } 2690 2691 static int 2692 tohex(int c) 2693 { 2694 if (c >= '0' && c <= '9') 2695 return (c - '0'); 2696 else if (c >= 'A' && c <= 'F') 2697 return (c - 'A' + 10); 2698 else if (c >= 'a' && c <= 'f') 2699 return (c - 'a' + 10); 2700 else 2701 return (-1); 2702 } 2703