1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * Copyright (c) 2008 Joerg Sonnenberger 4 * Copyright (c) 2011-2012 Michihiro NAKAJIMA 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "archive_platform.h" 29 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_mtree.c 201165 2009-12-29 05:52:13Z kientzle $"); 30 31 #ifdef HAVE_SYS_STAT_H 32 #include <sys/stat.h> 33 #endif 34 #ifdef HAVE_ERRNO_H 35 #include <errno.h> 36 #endif 37 #ifdef HAVE_FCNTL_H 38 #include <fcntl.h> 39 #endif 40 #include <stddef.h> 41 /* #include <stdint.h> */ /* See archive_platform.h */ 42 #ifdef HAVE_STDLIB_H 43 #include <stdlib.h> 44 #endif 45 #ifdef HAVE_STRING_H 46 #include <string.h> 47 #endif 48 #ifdef HAVE_CTYPE_H 49 #include <ctype.h> 50 #endif 51 52 #include "archive.h" 53 #include "archive_entry.h" 54 #include "archive_private.h" 55 #include "archive_rb.h" 56 #include "archive_read_private.h" 57 #include "archive_string.h" 58 #include "archive_pack_dev.h" 59 60 #ifndef O_BINARY 61 #define O_BINARY 0 62 #endif 63 #ifndef O_CLOEXEC 64 #define O_CLOEXEC 0 65 #endif 66 67 #define MTREE_HAS_DEVICE 0x0001 68 #define MTREE_HAS_FFLAGS 0x0002 69 #define MTREE_HAS_GID 0x0004 70 #define MTREE_HAS_GNAME 0x0008 71 #define MTREE_HAS_MTIME 0x0010 72 #define MTREE_HAS_NLINK 0x0020 73 #define MTREE_HAS_PERM 0x0040 74 #define MTREE_HAS_SIZE 0x0080 75 #define MTREE_HAS_TYPE 0x0100 76 #define MTREE_HAS_UID 0x0200 77 #define MTREE_HAS_UNAME 0x0400 78 79 #define MTREE_HAS_OPTIONAL 0x0800 80 #define MTREE_HAS_NOCHANGE 0x1000 /* FreeBSD specific */ 81 82 #define MAX_LINE_LEN (1024 * 1024) 83 84 struct mtree_option { 85 struct mtree_option *next; 86 char *value; 87 }; 88 89 struct mtree_entry { 90 struct archive_rb_node rbnode; 91 struct mtree_entry *next_dup; 92 struct mtree_entry *next; 93 struct mtree_option *options; 94 char *name; 95 char full; 96 char used; 97 }; 98 99 struct mtree { 100 struct archive_string line; 101 size_t buffsize; 102 char *buff; 103 int64_t offset; 104 int fd; 105 int archive_format; 106 const char *archive_format_name; 107 struct mtree_entry *entries; 108 struct mtree_entry *this_entry; 109 struct archive_rb_tree entry_rbtree; 110 struct archive_string current_dir; 111 struct archive_string contents_name; 112 113 struct archive_entry_linkresolver *resolver; 114 struct archive_rb_tree rbtree; 115 116 int64_t cur_size; 117 char checkfs; 118 }; 119 120 static int bid_keycmp(const char *, const char *, ssize_t); 121 static int cleanup(struct archive_read *); 122 static int detect_form(struct archive_read *, int *); 123 static int mtree_bid(struct archive_read *, int); 124 static int parse_file(struct archive_read *, struct archive_entry *, 125 struct mtree *, struct mtree_entry *, int *); 126 static void parse_escapes(char *, struct mtree_entry *); 127 static int parse_line(struct archive_read *, struct archive_entry *, 128 struct mtree *, struct mtree_entry *, int *); 129 static int parse_keyword(struct archive_read *, struct mtree *, 130 struct archive_entry *, struct mtree_option *, int *); 131 static int read_data(struct archive_read *a, 132 const void **buff, size_t *size, int64_t *offset); 133 static ssize_t readline(struct archive_read *, struct mtree *, char **, ssize_t); 134 static int skip(struct archive_read *a); 135 static int read_header(struct archive_read *, 136 struct archive_entry *); 137 static int64_t mtree_atol(char **, int base); 138 139 /* 140 * There's no standard for TIME_T_MAX/TIME_T_MIN. So we compute them 141 * here. TODO: Move this to configure time, but be careful 142 * about cross-compile environments. 143 */ 144 static int64_t 145 get_time_t_max(void) 146 { 147 #if defined(TIME_T_MAX) 148 return TIME_T_MAX; 149 #else 150 /* ISO C allows time_t to be a floating-point type, 151 but POSIX requires an integer type. The following 152 should work on any system that follows the POSIX 153 conventions. */ 154 if (((time_t)0) < ((time_t)-1)) { 155 /* Time_t is unsigned */ 156 return (~(time_t)0); 157 } else { 158 /* Time_t is signed. */ 159 /* Assume it's the same as int64_t or int32_t */ 160 if (sizeof(time_t) == sizeof(int64_t)) { 161 return (time_t)INT64_MAX; 162 } else { 163 return (time_t)INT32_MAX; 164 } 165 } 166 #endif 167 } 168 169 static int64_t 170 get_time_t_min(void) 171 { 172 #if defined(TIME_T_MIN) 173 return TIME_T_MIN; 174 #else 175 if (((time_t)0) < ((time_t)-1)) { 176 /* Time_t is unsigned */ 177 return (time_t)0; 178 } else { 179 /* Time_t is signed. */ 180 if (sizeof(time_t) == sizeof(int64_t)) { 181 return (time_t)INT64_MIN; 182 } else { 183 return (time_t)INT32_MIN; 184 } 185 } 186 #endif 187 } 188 189 static int 190 archive_read_format_mtree_options(struct archive_read *a, 191 const char *key, const char *val) 192 { 193 struct mtree *mtree; 194 195 mtree = (struct mtree *)(a->format->data); 196 if (strcmp(key, "checkfs") == 0) { 197 /* Allows to read information missing from the mtree from the file system */ 198 if (val == NULL || val[0] == 0) { 199 mtree->checkfs = 0; 200 } else { 201 mtree->checkfs = 1; 202 } 203 return (ARCHIVE_OK); 204 } 205 206 /* Note: The "warn" return is just to inform the options 207 * supervisor that we didn't handle it. It will generate 208 * a suitable error if no one used this option. */ 209 return (ARCHIVE_WARN); 210 } 211 212 static void 213 free_options(struct mtree_option *head) 214 { 215 struct mtree_option *next; 216 217 for (; head != NULL; head = next) { 218 next = head->next; 219 free(head->value); 220 free(head); 221 } 222 } 223 224 static int 225 mtree_cmp_node(const struct archive_rb_node *n1, 226 const struct archive_rb_node *n2) 227 { 228 const struct mtree_entry *e1 = (const struct mtree_entry *)n1; 229 const struct mtree_entry *e2 = (const struct mtree_entry *)n2; 230 231 return (strcmp(e1->name, e2->name)); 232 } 233 234 static int 235 mtree_cmp_key(const struct archive_rb_node *n, const void *key) 236 { 237 const struct mtree_entry *e = (const struct mtree_entry *)n; 238 239 return (strcmp(e->name, key)); 240 } 241 242 int 243 archive_read_support_format_mtree(struct archive *_a) 244 { 245 static const struct archive_rb_tree_ops rb_ops = { 246 mtree_cmp_node, mtree_cmp_key, 247 }; 248 struct archive_read *a = (struct archive_read *)_a; 249 struct mtree *mtree; 250 int r; 251 252 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 253 ARCHIVE_STATE_NEW, "archive_read_support_format_mtree"); 254 255 mtree = (struct mtree *)calloc(1, sizeof(*mtree)); 256 if (mtree == NULL) { 257 archive_set_error(&a->archive, ENOMEM, 258 "Can't allocate mtree data"); 259 return (ARCHIVE_FATAL); 260 } 261 mtree->checkfs = 0; 262 mtree->fd = -1; 263 264 __archive_rb_tree_init(&mtree->rbtree, &rb_ops); 265 266 r = __archive_read_register_format(a, mtree, "mtree", 267 mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL); 268 269 if (r != ARCHIVE_OK) 270 free(mtree); 271 return (ARCHIVE_OK); 272 } 273 274 static int 275 cleanup(struct archive_read *a) 276 { 277 struct mtree *mtree; 278 struct mtree_entry *p, *q; 279 280 mtree = (struct mtree *)(a->format->data); 281 282 p = mtree->entries; 283 while (p != NULL) { 284 q = p->next; 285 free(p->name); 286 free_options(p->options); 287 free(p); 288 p = q; 289 } 290 archive_string_free(&mtree->line); 291 archive_string_free(&mtree->current_dir); 292 archive_string_free(&mtree->contents_name); 293 archive_entry_linkresolver_free(mtree->resolver); 294 295 free(mtree->buff); 296 free(mtree); 297 (a->format->data) = NULL; 298 return (ARCHIVE_OK); 299 } 300 301 static ssize_t 302 get_line_size(const char *b, ssize_t avail, ssize_t *nlsize) 303 { 304 ssize_t len; 305 306 len = 0; 307 while (len < avail) { 308 switch (*b) { 309 case '\0':/* Non-ascii character or control character. */ 310 if (nlsize != NULL) 311 *nlsize = 0; 312 return (-1); 313 case '\r': 314 if (avail-len > 1 && b[1] == '\n') { 315 if (nlsize != NULL) 316 *nlsize = 2; 317 return (len+2); 318 } 319 /* FALL THROUGH */ 320 case '\n': 321 if (nlsize != NULL) 322 *nlsize = 1; 323 return (len+1); 324 default: 325 b++; 326 len++; 327 break; 328 } 329 } 330 if (nlsize != NULL) 331 *nlsize = 0; 332 return (avail); 333 } 334 335 /* 336 * <---------------- ravail ---------------------> 337 * <-- diff ------> <--- avail -----------------> 338 * <---- len -----------> 339 * | Previous lines | line being parsed nl extra | 340 * ^ 341 * b 342 * 343 */ 344 static ssize_t 345 next_line(struct archive_read *a, 346 const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl) 347 { 348 ssize_t len; 349 int quit; 350 351 quit = 0; 352 if (*avail == 0) { 353 *nl = 0; 354 len = 0; 355 } else 356 len = get_line_size(*b, *avail, nl); 357 /* 358 * Read bytes more while it does not reach the end of line. 359 */ 360 while (*nl == 0 && len == *avail && !quit) { 361 ssize_t diff = *ravail - *avail; 362 size_t nbytes_req = (*ravail+1023) & ~1023U; 363 ssize_t tested; 364 365 /* 366 * Place an arbitrary limit on the line length. 367 * mtree is almost free-form input and without line length limits, 368 * it can consume a lot of memory. 369 */ 370 if (len >= MAX_LINE_LEN) 371 return (-1); 372 373 /* Increase reading bytes if it is not enough to at least 374 * new two lines. */ 375 if (nbytes_req < (size_t)*ravail + 160) 376 nbytes_req <<= 1; 377 378 *b = __archive_read_ahead(a, nbytes_req, avail); 379 if (*b == NULL) { 380 if (*ravail >= *avail) 381 return (0); 382 /* Reading bytes reaches the end of file. */ 383 *b = __archive_read_ahead(a, *avail, avail); 384 quit = 1; 385 } 386 *ravail = *avail; 387 *b += diff; 388 *avail -= diff; 389 tested = len;/* Skip some bytes we already determinated. */ 390 len = get_line_size(*b + len, *avail - len, nl); 391 if (len >= 0) 392 len += tested; 393 } 394 return (len); 395 } 396 397 /* 398 * Compare characters with a mtree keyword. 399 * Returns the length of a mtree keyword if matched. 400 * Returns 0 if not matched. 401 */ 402 static int 403 bid_keycmp(const char *p, const char *key, ssize_t len) 404 { 405 int match_len = 0; 406 407 while (len > 0 && *p && *key) { 408 if (*p == *key) { 409 --len; 410 ++p; 411 ++key; 412 ++match_len; 413 continue; 414 } 415 return (0);/* Not match */ 416 } 417 if (*key != '\0') 418 return (0);/* Not match */ 419 420 /* A following character should be specified characters */ 421 if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' || 422 p[0] == '\n' || p[0] == '\r' || 423 (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))) 424 return (match_len); 425 return (0);/* Not match */ 426 } 427 428 /* 429 * Test whether the characters 'p' has is mtree keyword. 430 * Returns the length of a detected keyword. 431 * Returns 0 if any keywords were not found. 432 */ 433 static int 434 bid_keyword(const char *p, ssize_t len) 435 { 436 static const char * const keys_c[] = { 437 "content", "contents", "cksum", NULL 438 }; 439 static const char * const keys_df[] = { 440 "device", "flags", NULL 441 }; 442 static const char * const keys_g[] = { 443 "gid", "gname", NULL 444 }; 445 static const char * const keys_il[] = { 446 "ignore", "inode", "link", NULL 447 }; 448 static const char * const keys_m[] = { 449 "md5", "md5digest", "mode", NULL 450 }; 451 static const char * const keys_no[] = { 452 "nlink", "nochange", "optional", NULL 453 }; 454 static const char * const keys_r[] = { 455 "resdevice", "rmd160", "rmd160digest", NULL 456 }; 457 static const char * const keys_s[] = { 458 "sha1", "sha1digest", 459 "sha256", "sha256digest", 460 "sha384", "sha384digest", 461 "sha512", "sha512digest", 462 "size", NULL 463 }; 464 static const char * const keys_t[] = { 465 "tags", "time", "type", NULL 466 }; 467 static const char * const keys_u[] = { 468 "uid", "uname", NULL 469 }; 470 const char * const *keys; 471 int i; 472 473 switch (*p) { 474 case 'c': keys = keys_c; break; 475 case 'd': case 'f': keys = keys_df; break; 476 case 'g': keys = keys_g; break; 477 case 'i': case 'l': keys = keys_il; break; 478 case 'm': keys = keys_m; break; 479 case 'n': case 'o': keys = keys_no; break; 480 case 'r': keys = keys_r; break; 481 case 's': keys = keys_s; break; 482 case 't': keys = keys_t; break; 483 case 'u': keys = keys_u; break; 484 default: return (0);/* Unknown key */ 485 } 486 487 for (i = 0; keys[i] != NULL; i++) { 488 int l = bid_keycmp(p, keys[i], len); 489 if (l > 0) 490 return (l); 491 } 492 return (0);/* Unknown key */ 493 } 494 495 /* 496 * Test whether there is a set of mtree keywords. 497 * Returns the number of keyword. 498 * Returns -1 if we got incorrect sequence. 499 * This function expects a set of "<space characters>keyword=value". 500 * When "unset" is specified, expects a set of "<space characters>keyword". 501 */ 502 static int 503 bid_keyword_list(const char *p, ssize_t len, int unset, int last_is_path) 504 { 505 int l; 506 int keycnt = 0; 507 508 while (len > 0 && *p) { 509 int blank = 0; 510 511 /* Test whether there are blank characters in the line. */ 512 while (len >0 && (*p == ' ' || *p == '\t')) { 513 ++p; 514 --len; 515 blank = 1; 516 } 517 if (*p == '\n' || *p == '\r') 518 break; 519 if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')) 520 break; 521 if (!blank && !last_is_path) /* No blank character. */ 522 return (-1); 523 if (last_is_path && len == 0) 524 return (keycnt); 525 526 if (unset) { 527 l = bid_keycmp(p, "all", len); 528 if (l > 0) 529 return (1); 530 } 531 /* Test whether there is a correct key in the line. */ 532 l = bid_keyword(p, len); 533 if (l == 0) 534 return (-1);/* Unknown keyword was found. */ 535 p += l; 536 len -= l; 537 keycnt++; 538 539 /* Skip value */ 540 if (*p == '=') { 541 int value = 0; 542 ++p; 543 --len; 544 while (len > 0 && *p != ' ' && *p != '\t') { 545 ++p; 546 --len; 547 value = 1; 548 } 549 /* A keyword should have a its value unless 550 * "/unset" operation. */ 551 if (!unset && value == 0) 552 return (-1); 553 } 554 } 555 return (keycnt); 556 } 557 558 static int 559 bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path) 560 { 561 int f = 0; 562 static const unsigned char safe_char[256] = { 563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */ 564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */ 565 /* !"$%&'()*+,-./ EXCLUSION:( )(#) */ 566 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */ 567 /* 0123456789:;<>? EXCLUSION:(=) */ 568 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */ 569 /* @ABCDEFGHIJKLMNO */ 570 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */ 571 /* PQRSTUVWXYZ[\]^_ */ 572 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */ 573 /* `abcdefghijklmno */ 574 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */ 575 /* pqrstuvwxyz{|}~ */ 576 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */ 577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */ 578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */ 579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */ 580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */ 581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */ 582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */ 583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */ 584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */ 585 }; 586 ssize_t ll; 587 const char *pp = p; 588 const char * const pp_end = pp + len; 589 590 *last_is_path = 0; 591 /* 592 * Skip the path-name which is quoted. 593 */ 594 for (;pp < pp_end; ++pp) { 595 if (!safe_char[*(const unsigned char *)pp]) { 596 if (*pp != ' ' && *pp != '\t' && *pp != '\r' 597 && *pp != '\n') 598 f = 0; 599 break; 600 } 601 f = 1; 602 } 603 ll = pp_end - pp; 604 605 /* If a path-name was not found at the first, try to check 606 * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates, 607 * which places the path-name at the last. */ 608 if (f == 0) { 609 const char *pb = p + len - nl; 610 int name_len = 0; 611 int slash; 612 613 /* The form D accepts only a single line for an entry. */ 614 if (pb-2 >= p && 615 pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t')) 616 return (-1); 617 if (pb-1 >= p && pb[-1] == '\\') 618 return (-1); 619 620 slash = 0; 621 while (p <= --pb && *pb != ' ' && *pb != '\t') { 622 if (!safe_char[*(const unsigned char *)pb]) 623 return (-1); 624 name_len++; 625 /* The pathname should have a slash in this 626 * format. */ 627 if (*pb == '/') 628 slash = 1; 629 } 630 if (name_len == 0 || slash == 0) 631 return (-1); 632 /* If '/' is placed at the first in this field, this is not 633 * a valid filename. */ 634 if (pb[1] == '/') 635 return (-1); 636 ll = len - nl - name_len; 637 pp = p; 638 *last_is_path = 1; 639 } 640 641 return (bid_keyword_list(pp, ll, 0, *last_is_path)); 642 } 643 644 #define MAX_BID_ENTRY 3 645 646 static int 647 mtree_bid(struct archive_read *a, int best_bid) 648 { 649 const char *signature = "#mtree"; 650 const char *p; 651 652 (void)best_bid; /* UNUSED */ 653 654 /* Now let's look at the actual header and see if it matches. */ 655 p = __archive_read_ahead(a, strlen(signature), NULL); 656 if (p == NULL) 657 return (-1); 658 659 if (memcmp(p, signature, strlen(signature)) == 0) 660 return (8 * (int)strlen(signature)); 661 662 /* 663 * There is not a mtree signature. Let's try to detect mtree format. 664 */ 665 return (detect_form(a, NULL)); 666 } 667 668 static int 669 detect_form(struct archive_read *a, int *is_form_d) 670 { 671 const char *p; 672 ssize_t avail, ravail; 673 ssize_t detected_bytes = 0, len, nl; 674 int entry_cnt = 0, multiline = 0; 675 int form_D = 0;/* The archive is generated by `NetBSD mtree -D' 676 * (In this source we call it `form D') . */ 677 678 if (is_form_d != NULL) 679 *is_form_d = 0; 680 p = __archive_read_ahead(a, 1, &avail); 681 if (p == NULL) 682 return (-1); 683 ravail = avail; 684 for (;;) { 685 len = next_line(a, &p, &avail, &ravail, &nl); 686 /* The terminal character of the line should be 687 * a new line character, '\r\n' or '\n'. */ 688 if (len <= 0 || nl == 0) 689 break; 690 if (!multiline) { 691 /* Leading whitespace is never significant, 692 * ignore it. */ 693 while (len > 0 && (*p == ' ' || *p == '\t')) { 694 ++p; 695 --avail; 696 --len; 697 } 698 /* Skip comment or empty line. */ 699 if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') { 700 p += len; 701 avail -= len; 702 continue; 703 } 704 } else { 705 /* A continuance line; the terminal 706 * character of previous line was '\' character. */ 707 if (bid_keyword_list(p, len, 0, 0) <= 0) 708 break; 709 if (multiline == 1) 710 detected_bytes += len; 711 if (p[len-nl-1] != '\\') { 712 if (multiline == 1 && 713 ++entry_cnt >= MAX_BID_ENTRY) 714 break; 715 multiline = 0; 716 } 717 p += len; 718 avail -= len; 719 continue; 720 } 721 if (p[0] != '/') { 722 int last_is_path, keywords; 723 724 keywords = bid_entry(p, len, nl, &last_is_path); 725 if (keywords >= 0) { 726 detected_bytes += len; 727 if (form_D == 0) { 728 if (last_is_path) 729 form_D = 1; 730 else if (keywords > 0) 731 /* This line is not `form D'. */ 732 form_D = -1; 733 } else if (form_D == 1) { 734 if (!last_is_path && keywords > 0) 735 /* This this is not `form D' 736 * and We cannot accept mixed 737 * format. */ 738 break; 739 } 740 if (!last_is_path && p[len-nl-1] == '\\') 741 /* This line continues. */ 742 multiline = 1; 743 else { 744 /* We've got plenty of correct lines 745 * to assume that this file is a mtree 746 * format. */ 747 if (++entry_cnt >= MAX_BID_ENTRY) 748 break; 749 } 750 } else 751 break; 752 } else if (len > 4 && strncmp(p, "/set", 4) == 0) { 753 if (bid_keyword_list(p+4, len-4, 0, 0) <= 0) 754 break; 755 /* This line continues. */ 756 if (p[len-nl-1] == '\\') 757 multiline = 2; 758 } else if (len > 6 && strncmp(p, "/unset", 6) == 0) { 759 if (bid_keyword_list(p+6, len-6, 1, 0) <= 0) 760 break; 761 /* This line continues. */ 762 if (p[len-nl-1] == '\\') 763 multiline = 2; 764 } else 765 break; 766 767 /* Test next line. */ 768 p += len; 769 avail -= len; 770 } 771 if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) { 772 if (is_form_d != NULL) { 773 if (form_D == 1) 774 *is_form_d = 1; 775 } 776 return (32); 777 } 778 779 return (0); 780 } 781 782 /* 783 * The extended mtree format permits multiple lines specifying 784 * attributes for each file. For those entries, only the last line 785 * is actually used. Practically speaking, that means we have 786 * to read the entire mtree file into memory up front. 787 * 788 * The parsing is done in two steps. First, it is decided if a line 789 * changes the global defaults and if it is, processed accordingly. 790 * Otherwise, the options of the line are merged with the current 791 * global options. 792 */ 793 static int 794 add_option(struct archive_read *a, struct mtree_option **global, 795 const char *value, size_t len) 796 { 797 struct mtree_option *opt; 798 799 if ((opt = malloc(sizeof(*opt))) == NULL) { 800 archive_set_error(&a->archive, errno, "Can't allocate memory"); 801 return (ARCHIVE_FATAL); 802 } 803 if ((opt->value = malloc(len + 1)) == NULL) { 804 free(opt); 805 archive_set_error(&a->archive, errno, "Can't allocate memory"); 806 return (ARCHIVE_FATAL); 807 } 808 memcpy(opt->value, value, len); 809 opt->value[len] = '\0'; 810 opt->next = *global; 811 *global = opt; 812 return (ARCHIVE_OK); 813 } 814 815 static void 816 remove_option(struct mtree_option **global, const char *value, size_t len) 817 { 818 struct mtree_option *iter, *last; 819 820 last = NULL; 821 for (iter = *global; iter != NULL; last = iter, iter = iter->next) { 822 if (strncmp(iter->value, value, len) == 0 && 823 (iter->value[len] == '\0' || 824 iter->value[len] == '=')) 825 break; 826 } 827 if (iter == NULL) 828 return; 829 if (last == NULL) 830 *global = iter->next; 831 else 832 last->next = iter->next; 833 834 free(iter->value); 835 free(iter); 836 } 837 838 static int 839 process_global_set(struct archive_read *a, 840 struct mtree_option **global, const char *line) 841 { 842 const char *next, *eq; 843 size_t len; 844 int r; 845 846 line += 4; 847 for (;;) { 848 next = line + strspn(line, " \t\r\n"); 849 if (*next == '\0') 850 return (ARCHIVE_OK); 851 line = next; 852 next = line + strcspn(line, " \t\r\n"); 853 eq = strchr(line, '='); 854 if (eq > next) 855 len = next - line; 856 else 857 len = eq - line; 858 859 remove_option(global, line, len); 860 r = add_option(a, global, line, next - line); 861 if (r != ARCHIVE_OK) 862 return (r); 863 line = next; 864 } 865 } 866 867 static int 868 process_global_unset(struct archive_read *a, 869 struct mtree_option **global, const char *line) 870 { 871 const char *next; 872 size_t len; 873 874 line += 6; 875 if (strchr(line, '=') != NULL) { 876 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 877 "/unset shall not contain `='"); 878 return ARCHIVE_FATAL; 879 } 880 881 for (;;) { 882 next = line + strspn(line, " \t\r\n"); 883 if (*next == '\0') 884 return (ARCHIVE_OK); 885 line = next; 886 len = strcspn(line, " \t\r\n"); 887 888 if (len == 3 && strncmp(line, "all", 3) == 0) { 889 free_options(*global); 890 *global = NULL; 891 } else { 892 remove_option(global, line, len); 893 } 894 895 line += len; 896 } 897 } 898 899 static int 900 process_add_entry(struct archive_read *a, struct mtree *mtree, 901 struct mtree_option **global, const char *line, ssize_t line_len, 902 struct mtree_entry **last_entry, int is_form_d) 903 { 904 struct mtree_entry *entry; 905 struct mtree_option *iter; 906 const char *next, *eq, *name, *end; 907 size_t name_len, len; 908 int r, i; 909 910 if ((entry = malloc(sizeof(*entry))) == NULL) { 911 archive_set_error(&a->archive, errno, "Can't allocate memory"); 912 return (ARCHIVE_FATAL); 913 } 914 entry->next = NULL; 915 entry->options = NULL; 916 entry->name = NULL; 917 entry->used = 0; 918 entry->full = 0; 919 920 /* Add this entry to list. */ 921 if (*last_entry == NULL) 922 mtree->entries = entry; 923 else 924 (*last_entry)->next = entry; 925 *last_entry = entry; 926 927 if (is_form_d) { 928 /* Filename is last item on line. */ 929 /* Adjust line_len to trim trailing whitespace */ 930 while (line_len > 0) { 931 char last_character = line[line_len - 1]; 932 if (last_character == '\r' 933 || last_character == '\n' 934 || last_character == '\t' 935 || last_character == ' ') { 936 line_len--; 937 } else { 938 break; 939 } 940 } 941 /* Name starts after the last whitespace separator */ 942 name = line; 943 for (i = 0; i < line_len; i++) { 944 if (line[i] == '\r' 945 || line[i] == '\n' 946 || line[i] == '\t' 947 || line[i] == ' ') { 948 name = line + i + 1; 949 } 950 } 951 name_len = line + line_len - name; 952 end = name; 953 } else { 954 /* Filename is first item on line */ 955 name_len = strcspn(line, " \t\r\n"); 956 name = line; 957 line += name_len; 958 end = line + line_len; 959 } 960 /* name/name_len is the name within the line. */ 961 /* line..end brackets the entire line except the name */ 962 963 if ((entry->name = malloc(name_len + 1)) == NULL) { 964 archive_set_error(&a->archive, errno, "Can't allocate memory"); 965 return (ARCHIVE_FATAL); 966 } 967 968 memcpy(entry->name, name, name_len); 969 entry->name[name_len] = '\0'; 970 parse_escapes(entry->name, entry); 971 972 entry->next_dup = NULL; 973 if (entry->full) { 974 if (!__archive_rb_tree_insert_node(&mtree->rbtree, &entry->rbnode)) { 975 struct mtree_entry *alt; 976 alt = (struct mtree_entry *)__archive_rb_tree_find_node( 977 &mtree->rbtree, entry->name); 978 while (alt->next_dup) 979 alt = alt->next_dup; 980 alt->next_dup = entry; 981 } 982 } 983 984 for (iter = *global; iter != NULL; iter = iter->next) { 985 r = add_option(a, &entry->options, iter->value, 986 strlen(iter->value)); 987 if (r != ARCHIVE_OK) 988 return (r); 989 } 990 991 for (;;) { 992 next = line + strspn(line, " \t\r\n"); 993 if (*next == '\0') 994 return (ARCHIVE_OK); 995 if (next >= end) 996 return (ARCHIVE_OK); 997 line = next; 998 next = line + strcspn(line, " \t\r\n"); 999 eq = strchr(line, '='); 1000 if (eq == NULL || eq > next) 1001 len = next - line; 1002 else 1003 len = eq - line; 1004 1005 remove_option(&entry->options, line, len); 1006 r = add_option(a, &entry->options, line, next - line); 1007 if (r != ARCHIVE_OK) 1008 return (r); 1009 line = next; 1010 } 1011 } 1012 1013 static int 1014 read_mtree(struct archive_read *a, struct mtree *mtree) 1015 { 1016 ssize_t len; 1017 uintmax_t counter; 1018 char *p, *s; 1019 struct mtree_option *global; 1020 struct mtree_entry *last_entry; 1021 int r, is_form_d; 1022 1023 mtree->archive_format = ARCHIVE_FORMAT_MTREE; 1024 mtree->archive_format_name = "mtree"; 1025 1026 global = NULL; 1027 last_entry = NULL; 1028 1029 (void)detect_form(a, &is_form_d); 1030 1031 for (counter = 1; ; ++counter) { 1032 r = ARCHIVE_OK; 1033 len = readline(a, mtree, &p, 65536); 1034 if (len == 0) { 1035 mtree->this_entry = mtree->entries; 1036 free_options(global); 1037 return (ARCHIVE_OK); 1038 } 1039 if (len < 0) { 1040 free_options(global); 1041 return ((int)len); 1042 } 1043 /* Leading whitespace is never significant, ignore it. */ 1044 while (*p == ' ' || *p == '\t') { 1045 ++p; 1046 --len; 1047 } 1048 /* Skip content lines and blank lines. */ 1049 if (*p == '#') 1050 continue; 1051 if (*p == '\r' || *p == '\n' || *p == '\0') 1052 continue; 1053 /* Non-printable characters are not allowed */ 1054 for (s = p;s < p + len - 1; s++) { 1055 if (!isprint(*s)) { 1056 r = ARCHIVE_FATAL; 1057 break; 1058 } 1059 } 1060 if (r != ARCHIVE_OK) 1061 break; 1062 if (*p != '/') { 1063 r = process_add_entry(a, mtree, &global, p, len, 1064 &last_entry, is_form_d); 1065 } else if (len > 4 && strncmp(p, "/set", 4) == 0) { 1066 if (p[4] != ' ' && p[4] != '\t') 1067 break; 1068 r = process_global_set(a, &global, p); 1069 } else if (len > 6 && strncmp(p, "/unset", 6) == 0) { 1070 if (p[6] != ' ' && p[6] != '\t') 1071 break; 1072 r = process_global_unset(a, &global, p); 1073 } else 1074 break; 1075 1076 if (r != ARCHIVE_OK) { 1077 free_options(global); 1078 return r; 1079 } 1080 } 1081 1082 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1083 "Can't parse line %ju", counter); 1084 free_options(global); 1085 return (ARCHIVE_FATAL); 1086 } 1087 1088 /* 1089 * Read in the entire mtree file into memory on the first request. 1090 * Then use the next unused file to satisfy each header request. 1091 */ 1092 static int 1093 read_header(struct archive_read *a, struct archive_entry *entry) 1094 { 1095 struct mtree *mtree; 1096 char *p; 1097 int r, use_next; 1098 1099 mtree = (struct mtree *)(a->format->data); 1100 1101 if (mtree->fd >= 0) { 1102 close(mtree->fd); 1103 mtree->fd = -1; 1104 } 1105 1106 if (mtree->entries == NULL) { 1107 mtree->resolver = archive_entry_linkresolver_new(); 1108 if (mtree->resolver == NULL) 1109 return ARCHIVE_FATAL; 1110 archive_entry_linkresolver_set_strategy(mtree->resolver, 1111 ARCHIVE_FORMAT_MTREE); 1112 r = read_mtree(a, mtree); 1113 if (r != ARCHIVE_OK) 1114 return (r); 1115 } 1116 1117 a->archive.archive_format = mtree->archive_format; 1118 a->archive.archive_format_name = mtree->archive_format_name; 1119 1120 for (;;) { 1121 if (mtree->this_entry == NULL) 1122 return (ARCHIVE_EOF); 1123 if (strcmp(mtree->this_entry->name, "..") == 0) { 1124 mtree->this_entry->used = 1; 1125 if (archive_strlen(&mtree->current_dir) > 0) { 1126 /* Roll back current path. */ 1127 p = mtree->current_dir.s 1128 + mtree->current_dir.length - 1; 1129 while (p >= mtree->current_dir.s && *p != '/') 1130 --p; 1131 if (p >= mtree->current_dir.s) 1132 --p; 1133 mtree->current_dir.length 1134 = p - mtree->current_dir.s + 1; 1135 } 1136 } 1137 if (!mtree->this_entry->used) { 1138 use_next = 0; 1139 r = parse_file(a, entry, mtree, mtree->this_entry, 1140 &use_next); 1141 if (use_next == 0) 1142 return (r); 1143 } 1144 mtree->this_entry = mtree->this_entry->next; 1145 } 1146 } 1147 1148 /* 1149 * A single file can have multiple lines contribute specifications. 1150 * Parse as many lines as necessary, then pull additional information 1151 * from a backing file on disk as necessary. 1152 */ 1153 static int 1154 parse_file(struct archive_read *a, struct archive_entry *entry, 1155 struct mtree *mtree, struct mtree_entry *mentry, int *use_next) 1156 { 1157 const char *path; 1158 struct stat st_storage, *st; 1159 struct mtree_entry *mp; 1160 struct archive_entry *sparse_entry; 1161 int r = ARCHIVE_OK, r1, parsed_kws; 1162 1163 mentry->used = 1; 1164 1165 /* Initialize reasonable defaults. */ 1166 archive_entry_set_filetype(entry, AE_IFREG); 1167 archive_entry_set_size(entry, 0); 1168 archive_string_empty(&mtree->contents_name); 1169 1170 /* Parse options from this line. */ 1171 parsed_kws = 0; 1172 r = parse_line(a, entry, mtree, mentry, &parsed_kws); 1173 1174 if (mentry->full) { 1175 archive_entry_copy_pathname(entry, mentry->name); 1176 /* 1177 * "Full" entries are allowed to have multiple lines 1178 * and those lines aren't required to be adjacent. We 1179 * don't support multiple lines for "relative" entries 1180 * nor do we make any attempt to merge data from 1181 * separate "relative" and "full" entries. (Merging 1182 * "relative" and "full" entries would require dealing 1183 * with pathname canonicalization, which is a very 1184 * tricky subject.) 1185 */ 1186 mp = (struct mtree_entry *)__archive_rb_tree_find_node( 1187 &mtree->rbtree, mentry->name); 1188 for (; mp; mp = mp->next_dup) { 1189 if (mp->full && !mp->used) { 1190 /* Later lines override earlier ones. */ 1191 mp->used = 1; 1192 r1 = parse_line(a, entry, mtree, mp, &parsed_kws); 1193 if (r1 < r) 1194 r = r1; 1195 } 1196 } 1197 } else { 1198 /* 1199 * Relative entries require us to construct 1200 * the full path and possibly update the 1201 * current directory. 1202 */ 1203 size_t n = archive_strlen(&mtree->current_dir); 1204 if (n > 0) 1205 archive_strcat(&mtree->current_dir, "/"); 1206 archive_strcat(&mtree->current_dir, mentry->name); 1207 archive_entry_copy_pathname(entry, mtree->current_dir.s); 1208 if (archive_entry_filetype(entry) != AE_IFDIR) 1209 mtree->current_dir.length = n; 1210 } 1211 1212 if (mtree->checkfs) { 1213 /* 1214 * Try to open and stat the file to get the real size 1215 * and other file info. It would be nice to avoid 1216 * this here so that getting a listing of an mtree 1217 * wouldn't require opening every referenced contents 1218 * file. But then we wouldn't know the actual 1219 * contents size, so I don't see a really viable way 1220 * around this. (Also, we may want to someday pull 1221 * other unspecified info from the contents file on 1222 * disk.) 1223 */ 1224 mtree->fd = -1; 1225 if (archive_strlen(&mtree->contents_name) > 0) 1226 path = mtree->contents_name.s; 1227 else 1228 path = archive_entry_pathname(entry); 1229 1230 if (archive_entry_filetype(entry) == AE_IFREG || 1231 archive_entry_filetype(entry) == AE_IFDIR) { 1232 mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC); 1233 __archive_ensure_cloexec_flag(mtree->fd); 1234 if (mtree->fd == -1 && 1235 (errno != ENOENT || 1236 archive_strlen(&mtree->contents_name) > 0)) { 1237 archive_set_error(&a->archive, errno, 1238 "Can't open %s", path); 1239 r = ARCHIVE_WARN; 1240 } 1241 } 1242 1243 st = &st_storage; 1244 if (mtree->fd >= 0) { 1245 if (fstat(mtree->fd, st) == -1) { 1246 archive_set_error(&a->archive, errno, 1247 "Could not fstat %s", path); 1248 r = ARCHIVE_WARN; 1249 /* If we can't stat it, don't keep it open. */ 1250 close(mtree->fd); 1251 mtree->fd = -1; 1252 st = NULL; 1253 } 1254 } else if (lstat(path, st) == -1) { 1255 st = NULL; 1256 } 1257 1258 /* 1259 * Check for a mismatch between the type in the specification 1260 * and the type of the contents object on disk. 1261 */ 1262 if (st != NULL) { 1263 if (((st->st_mode & S_IFMT) == S_IFREG && 1264 archive_entry_filetype(entry) == AE_IFREG) 1265 #ifdef S_IFLNK 1266 ||((st->st_mode & S_IFMT) == S_IFLNK && 1267 archive_entry_filetype(entry) == AE_IFLNK) 1268 #endif 1269 #ifdef S_IFSOCK 1270 ||((st->st_mode & S_IFSOCK) == S_IFSOCK && 1271 archive_entry_filetype(entry) == AE_IFSOCK) 1272 #endif 1273 #ifdef S_IFCHR 1274 ||((st->st_mode & S_IFMT) == S_IFCHR && 1275 archive_entry_filetype(entry) == AE_IFCHR) 1276 #endif 1277 #ifdef S_IFBLK 1278 ||((st->st_mode & S_IFMT) == S_IFBLK && 1279 archive_entry_filetype(entry) == AE_IFBLK) 1280 #endif 1281 ||((st->st_mode & S_IFMT) == S_IFDIR && 1282 archive_entry_filetype(entry) == AE_IFDIR) 1283 #ifdef S_IFIFO 1284 ||((st->st_mode & S_IFMT) == S_IFIFO && 1285 archive_entry_filetype(entry) == AE_IFIFO) 1286 #endif 1287 ) { 1288 /* Types match. */ 1289 } else { 1290 /* Types don't match; bail out gracefully. */ 1291 if (mtree->fd >= 0) 1292 close(mtree->fd); 1293 mtree->fd = -1; 1294 if (parsed_kws & MTREE_HAS_OPTIONAL) { 1295 /* It's not an error for an optional 1296 * entry to not match disk. */ 1297 *use_next = 1; 1298 } else if (r == ARCHIVE_OK) { 1299 archive_set_error(&a->archive, 1300 ARCHIVE_ERRNO_MISC, 1301 "mtree specification has different" 1302 " type for %s", 1303 archive_entry_pathname(entry)); 1304 r = ARCHIVE_WARN; 1305 } 1306 return (r); 1307 } 1308 } 1309 1310 /* 1311 * If there is a contents file on disk, pick some of the 1312 * metadata from that file. For most of these, we only 1313 * set it from the contents if it wasn't already parsed 1314 * from the specification. 1315 */ 1316 if (st != NULL) { 1317 if (((parsed_kws & MTREE_HAS_DEVICE) == 0 || 1318 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) && 1319 (archive_entry_filetype(entry) == AE_IFCHR || 1320 archive_entry_filetype(entry) == AE_IFBLK)) 1321 archive_entry_set_rdev(entry, st->st_rdev); 1322 if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME)) 1323 == 0 || 1324 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) 1325 archive_entry_set_gid(entry, st->st_gid); 1326 if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME)) 1327 == 0 || 1328 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) 1329 archive_entry_set_uid(entry, st->st_uid); 1330 if ((parsed_kws & MTREE_HAS_MTIME) == 0 || 1331 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) { 1332 #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC 1333 archive_entry_set_mtime(entry, st->st_mtime, 1334 st->st_mtimespec.tv_nsec); 1335 #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1336 archive_entry_set_mtime(entry, st->st_mtime, 1337 st->st_mtim.tv_nsec); 1338 #elif HAVE_STRUCT_STAT_ST_MTIME_N 1339 archive_entry_set_mtime(entry, st->st_mtime, 1340 st->st_mtime_n); 1341 #elif HAVE_STRUCT_STAT_ST_UMTIME 1342 archive_entry_set_mtime(entry, st->st_mtime, 1343 st->st_umtime*1000); 1344 #elif HAVE_STRUCT_STAT_ST_MTIME_USEC 1345 archive_entry_set_mtime(entry, st->st_mtime, 1346 st->st_mtime_usec*1000); 1347 #else 1348 archive_entry_set_mtime(entry, st->st_mtime, 0); 1349 #endif 1350 } 1351 if ((parsed_kws & MTREE_HAS_NLINK) == 0 || 1352 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) 1353 archive_entry_set_nlink(entry, st->st_nlink); 1354 if ((parsed_kws & MTREE_HAS_PERM) == 0 || 1355 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) 1356 archive_entry_set_perm(entry, st->st_mode); 1357 if ((parsed_kws & MTREE_HAS_SIZE) == 0 || 1358 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) 1359 archive_entry_set_size(entry, st->st_size); 1360 archive_entry_set_ino(entry, st->st_ino); 1361 archive_entry_set_dev(entry, st->st_dev); 1362 1363 archive_entry_linkify(mtree->resolver, &entry, 1364 &sparse_entry); 1365 } else if (parsed_kws & MTREE_HAS_OPTIONAL) { 1366 /* 1367 * Couldn't open the entry, stat it or the on-disk type 1368 * didn't match. If this entry is optional, just 1369 * ignore it and read the next header entry. 1370 */ 1371 *use_next = 1; 1372 return ARCHIVE_OK; 1373 } 1374 } 1375 1376 mtree->cur_size = archive_entry_size(entry); 1377 mtree->offset = 0; 1378 1379 return r; 1380 } 1381 1382 /* 1383 * Each line contains a sequence of keywords. 1384 */ 1385 static int 1386 parse_line(struct archive_read *a, struct archive_entry *entry, 1387 struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws) 1388 { 1389 struct mtree_option *iter; 1390 int r = ARCHIVE_OK, r1; 1391 1392 for (iter = mp->options; iter != NULL; iter = iter->next) { 1393 r1 = parse_keyword(a, mtree, entry, iter, parsed_kws); 1394 if (r1 < r) 1395 r = r1; 1396 } 1397 if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) { 1398 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1399 "Missing type keyword in mtree specification"); 1400 return (ARCHIVE_WARN); 1401 } 1402 return (r); 1403 } 1404 1405 /* 1406 * Device entries have one of the following forms: 1407 * - raw dev_t 1408 * - format,major,minor[,subdevice] 1409 * When parsing succeeded, `pdev' will contain the appropriate dev_t value. 1410 */ 1411 1412 /* strsep() is not in C90, but strcspn() is. */ 1413 /* Taken from http://unixpapa.com/incnote/string.html */ 1414 static char * 1415 la_strsep(char **sp, const char *sep) 1416 { 1417 char *p, *s; 1418 if (sp == NULL || *sp == NULL || **sp == '\0') 1419 return(NULL); 1420 s = *sp; 1421 p = s + strcspn(s, sep); 1422 if (*p != '\0') 1423 *p++ = '\0'; 1424 *sp = p; 1425 return(s); 1426 } 1427 1428 static int 1429 parse_device(dev_t *pdev, struct archive *a, char *val) 1430 { 1431 #define MAX_PACK_ARGS 3 1432 unsigned long numbers[MAX_PACK_ARGS]; 1433 char *p, *dev; 1434 int argc; 1435 pack_t *pack; 1436 dev_t result; 1437 const char *error = NULL; 1438 1439 memset(pdev, 0, sizeof(*pdev)); 1440 if ((dev = strchr(val, ',')) != NULL) { 1441 /* 1442 * Device's major/minor are given in a specified format. 1443 * Decode and pack it accordingly. 1444 */ 1445 *dev++ = '\0'; 1446 if ((pack = pack_find(val)) == NULL) { 1447 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, 1448 "Unknown format `%s'", val); 1449 return ARCHIVE_WARN; 1450 } 1451 argc = 0; 1452 while ((p = la_strsep(&dev, ",")) != NULL) { 1453 if (*p == '\0') { 1454 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, 1455 "Missing number"); 1456 return ARCHIVE_WARN; 1457 } 1458 if (argc >= MAX_PACK_ARGS) { 1459 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, 1460 "Too many arguments"); 1461 return ARCHIVE_WARN; 1462 } 1463 numbers[argc++] = (unsigned long)mtree_atol(&p, 0); 1464 } 1465 if (argc < 2) { 1466 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, 1467 "Not enough arguments"); 1468 return ARCHIVE_WARN; 1469 } 1470 result = (*pack)(argc, numbers, &error); 1471 if (error != NULL) { 1472 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT, 1473 "%s", error); 1474 return ARCHIVE_WARN; 1475 } 1476 } else { 1477 /* file system raw value. */ 1478 result = (dev_t)mtree_atol(&val, 0); 1479 } 1480 *pdev = result; 1481 return ARCHIVE_OK; 1482 #undef MAX_PACK_ARGS 1483 } 1484 1485 /* 1486 * Parse a single keyword and its value. 1487 */ 1488 static int 1489 parse_keyword(struct archive_read *a, struct mtree *mtree, 1490 struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws) 1491 { 1492 char *val, *key; 1493 1494 key = opt->value; 1495 1496 if (*key == '\0') 1497 return (ARCHIVE_OK); 1498 1499 if (strcmp(key, "nochange") == 0) { 1500 *parsed_kws |= MTREE_HAS_NOCHANGE; 1501 return (ARCHIVE_OK); 1502 } 1503 if (strcmp(key, "optional") == 0) { 1504 *parsed_kws |= MTREE_HAS_OPTIONAL; 1505 return (ARCHIVE_OK); 1506 } 1507 if (strcmp(key, "ignore") == 0) { 1508 /* 1509 * The mtree processing is not recursive, so 1510 * recursion will only happen for explicitly listed 1511 * entries. 1512 */ 1513 return (ARCHIVE_OK); 1514 } 1515 1516 val = strchr(key, '='); 1517 if (val == NULL) { 1518 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1519 "Malformed attribute \"%s\" (%d)", key, key[0]); 1520 return (ARCHIVE_WARN); 1521 } 1522 1523 *val = '\0'; 1524 ++val; 1525 1526 switch (key[0]) { 1527 case 'c': 1528 if (strcmp(key, "content") == 0 1529 || strcmp(key, "contents") == 0) { 1530 parse_escapes(val, NULL); 1531 archive_strcpy(&mtree->contents_name, val); 1532 break; 1533 } 1534 if (strcmp(key, "cksum") == 0) 1535 break; 1536 __LA_FALLTHROUGH; 1537 case 'd': 1538 if (strcmp(key, "device") == 0) { 1539 /* stat(2) st_rdev field, e.g. the major/minor IDs 1540 * of a char/block special file */ 1541 int r; 1542 dev_t dev; 1543 1544 *parsed_kws |= MTREE_HAS_DEVICE; 1545 r = parse_device(&dev, &a->archive, val); 1546 if (r == ARCHIVE_OK) 1547 archive_entry_set_rdev(entry, dev); 1548 return r; 1549 } 1550 __LA_FALLTHROUGH; 1551 case 'f': 1552 if (strcmp(key, "flags") == 0) { 1553 *parsed_kws |= MTREE_HAS_FFLAGS; 1554 archive_entry_copy_fflags_text(entry, val); 1555 break; 1556 } 1557 __LA_FALLTHROUGH; 1558 case 'g': 1559 if (strcmp(key, "gid") == 0) { 1560 *parsed_kws |= MTREE_HAS_GID; 1561 archive_entry_set_gid(entry, mtree_atol(&val, 10)); 1562 break; 1563 } 1564 if (strcmp(key, "gname") == 0) { 1565 *parsed_kws |= MTREE_HAS_GNAME; 1566 archive_entry_copy_gname(entry, val); 1567 break; 1568 } 1569 __LA_FALLTHROUGH; 1570 case 'i': 1571 if (strcmp(key, "inode") == 0) { 1572 archive_entry_set_ino(entry, mtree_atol(&val, 10)); 1573 break; 1574 } 1575 __LA_FALLTHROUGH; 1576 case 'l': 1577 if (strcmp(key, "link") == 0) { 1578 archive_entry_copy_symlink(entry, val); 1579 break; 1580 } 1581 __LA_FALLTHROUGH; 1582 case 'm': 1583 if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0) 1584 break; 1585 if (strcmp(key, "mode") == 0) { 1586 if (val[0] >= '0' && val[0] <= '7') { 1587 *parsed_kws |= MTREE_HAS_PERM; 1588 archive_entry_set_perm(entry, 1589 (mode_t)mtree_atol(&val, 8)); 1590 } else { 1591 archive_set_error(&a->archive, 1592 ARCHIVE_ERRNO_FILE_FORMAT, 1593 "Symbolic or non-octal mode \"%s\" unsupported", val); 1594 return ARCHIVE_WARN; 1595 } 1596 break; 1597 } 1598 __LA_FALLTHROUGH; 1599 case 'n': 1600 if (strcmp(key, "nlink") == 0) { 1601 *parsed_kws |= MTREE_HAS_NLINK; 1602 archive_entry_set_nlink(entry, 1603 (unsigned int)mtree_atol(&val, 10)); 1604 break; 1605 } 1606 __LA_FALLTHROUGH; 1607 case 'r': 1608 if (strcmp(key, "resdevice") == 0) { 1609 /* stat(2) st_dev field, e.g. the device ID where the 1610 * inode resides */ 1611 int r; 1612 dev_t dev; 1613 1614 r = parse_device(&dev, &a->archive, val); 1615 if (r == ARCHIVE_OK) 1616 archive_entry_set_dev(entry, dev); 1617 return r; 1618 } 1619 if (strcmp(key, "rmd160") == 0 || 1620 strcmp(key, "rmd160digest") == 0) 1621 break; 1622 __LA_FALLTHROUGH; 1623 case 's': 1624 if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0) 1625 break; 1626 if (strcmp(key, "sha256") == 0 || 1627 strcmp(key, "sha256digest") == 0) 1628 break; 1629 if (strcmp(key, "sha384") == 0 || 1630 strcmp(key, "sha384digest") == 0) 1631 break; 1632 if (strcmp(key, "sha512") == 0 || 1633 strcmp(key, "sha512digest") == 0) 1634 break; 1635 if (strcmp(key, "size") == 0) { 1636 archive_entry_set_size(entry, mtree_atol(&val, 10)); 1637 break; 1638 } 1639 __LA_FALLTHROUGH; 1640 case 't': 1641 if (strcmp(key, "tags") == 0) { 1642 /* 1643 * Comma delimited list of tags. 1644 * Ignore the tags for now, but the interface 1645 * should be extended to allow inclusion/exclusion. 1646 */ 1647 break; 1648 } 1649 if (strcmp(key, "time") == 0) { 1650 int64_t m; 1651 int64_t my_time_t_max = get_time_t_max(); 1652 int64_t my_time_t_min = get_time_t_min(); 1653 long ns = 0; 1654 1655 *parsed_kws |= MTREE_HAS_MTIME; 1656 m = mtree_atol(&val, 10); 1657 /* Replicate an old mtree bug: 1658 * 123456789.1 represents 123456789 1659 * seconds and 1 nanosecond. */ 1660 if (*val == '.') { 1661 ++val; 1662 ns = (long)mtree_atol(&val, 10); 1663 if (ns < 0) 1664 ns = 0; 1665 else if (ns > 999999999) 1666 ns = 999999999; 1667 } 1668 if (m > my_time_t_max) 1669 m = my_time_t_max; 1670 else if (m < my_time_t_min) 1671 m = my_time_t_min; 1672 archive_entry_set_mtime(entry, (time_t)m, ns); 1673 break; 1674 } 1675 if (strcmp(key, "type") == 0) { 1676 switch (val[0]) { 1677 case 'b': 1678 if (strcmp(val, "block") == 0) { 1679 archive_entry_set_filetype(entry, AE_IFBLK); 1680 break; 1681 } 1682 __LA_FALLTHROUGH; 1683 case 'c': 1684 if (strcmp(val, "char") == 0) { 1685 archive_entry_set_filetype(entry, 1686 AE_IFCHR); 1687 break; 1688 } 1689 __LA_FALLTHROUGH; 1690 case 'd': 1691 if (strcmp(val, "dir") == 0) { 1692 archive_entry_set_filetype(entry, 1693 AE_IFDIR); 1694 break; 1695 } 1696 __LA_FALLTHROUGH; 1697 case 'f': 1698 if (strcmp(val, "fifo") == 0) { 1699 archive_entry_set_filetype(entry, 1700 AE_IFIFO); 1701 break; 1702 } 1703 if (strcmp(val, "file") == 0) { 1704 archive_entry_set_filetype(entry, 1705 AE_IFREG); 1706 break; 1707 } 1708 __LA_FALLTHROUGH; 1709 case 'l': 1710 if (strcmp(val, "link") == 0) { 1711 archive_entry_set_filetype(entry, 1712 AE_IFLNK); 1713 break; 1714 } 1715 __LA_FALLTHROUGH; 1716 default: 1717 archive_set_error(&a->archive, 1718 ARCHIVE_ERRNO_FILE_FORMAT, 1719 "Unrecognized file type \"%s\"; " 1720 "assuming \"file\"", val); 1721 archive_entry_set_filetype(entry, AE_IFREG); 1722 return (ARCHIVE_WARN); 1723 } 1724 *parsed_kws |= MTREE_HAS_TYPE; 1725 break; 1726 } 1727 __LA_FALLTHROUGH; 1728 case 'u': 1729 if (strcmp(key, "uid") == 0) { 1730 *parsed_kws |= MTREE_HAS_UID; 1731 archive_entry_set_uid(entry, mtree_atol(&val, 10)); 1732 break; 1733 } 1734 if (strcmp(key, "uname") == 0) { 1735 *parsed_kws |= MTREE_HAS_UNAME; 1736 archive_entry_copy_uname(entry, val); 1737 break; 1738 } 1739 __LA_FALLTHROUGH; 1740 default: 1741 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1742 "Unrecognized key %s=%s", key, val); 1743 return (ARCHIVE_WARN); 1744 } 1745 return (ARCHIVE_OK); 1746 } 1747 1748 static int 1749 read_data(struct archive_read *a, const void **buff, size_t *size, 1750 int64_t *offset) 1751 { 1752 size_t bytes_to_read; 1753 ssize_t bytes_read; 1754 struct mtree *mtree; 1755 1756 mtree = (struct mtree *)(a->format->data); 1757 if (mtree->fd < 0) { 1758 *buff = NULL; 1759 *offset = 0; 1760 *size = 0; 1761 return (ARCHIVE_EOF); 1762 } 1763 if (mtree->buff == NULL) { 1764 mtree->buffsize = 64 * 1024; 1765 mtree->buff = malloc(mtree->buffsize); 1766 if (mtree->buff == NULL) { 1767 archive_set_error(&a->archive, ENOMEM, 1768 "Can't allocate memory"); 1769 return (ARCHIVE_FATAL); 1770 } 1771 } 1772 1773 *buff = mtree->buff; 1774 *offset = mtree->offset; 1775 if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset) 1776 bytes_to_read = (size_t)(mtree->cur_size - mtree->offset); 1777 else 1778 bytes_to_read = mtree->buffsize; 1779 bytes_read = read(mtree->fd, mtree->buff, bytes_to_read); 1780 if (bytes_read < 0) { 1781 archive_set_error(&a->archive, errno, "Can't read"); 1782 return (ARCHIVE_WARN); 1783 } 1784 if (bytes_read == 0) { 1785 *size = 0; 1786 return (ARCHIVE_EOF); 1787 } 1788 mtree->offset += bytes_read; 1789 *size = bytes_read; 1790 return (ARCHIVE_OK); 1791 } 1792 1793 /* Skip does nothing except possibly close the contents file. */ 1794 static int 1795 skip(struct archive_read *a) 1796 { 1797 struct mtree *mtree; 1798 1799 mtree = (struct mtree *)(a->format->data); 1800 if (mtree->fd >= 0) { 1801 close(mtree->fd); 1802 mtree->fd = -1; 1803 } 1804 return (ARCHIVE_OK); 1805 } 1806 1807 /* 1808 * Since parsing backslash sequences always makes strings shorter, 1809 * we can always do this conversion in-place. 1810 */ 1811 static void 1812 parse_escapes(char *src, struct mtree_entry *mentry) 1813 { 1814 char *dest = src; 1815 char c; 1816 1817 if (mentry != NULL && strcmp(src, ".") == 0) 1818 mentry->full = 1; 1819 1820 while (*src != '\0') { 1821 c = *src++; 1822 if (c == '/' && mentry != NULL) 1823 mentry->full = 1; 1824 if (c == '\\') { 1825 switch (src[0]) { 1826 case '0': 1827 if (src[1] < '0' || src[1] > '7') { 1828 c = 0; 1829 ++src; 1830 break; 1831 } 1832 /* FALLTHROUGH */ 1833 case '1': 1834 case '2': 1835 case '3': 1836 if (src[1] >= '0' && src[1] <= '7' && 1837 src[2] >= '0' && src[2] <= '7') { 1838 c = (src[0] - '0') << 6; 1839 c |= (src[1] - '0') << 3; 1840 c |= (src[2] - '0'); 1841 src += 3; 1842 } 1843 break; 1844 case 'a': 1845 c = '\a'; 1846 ++src; 1847 break; 1848 case 'b': 1849 c = '\b'; 1850 ++src; 1851 break; 1852 case 'f': 1853 c = '\f'; 1854 ++src; 1855 break; 1856 case 'n': 1857 c = '\n'; 1858 ++src; 1859 break; 1860 case 'r': 1861 c = '\r'; 1862 ++src; 1863 break; 1864 case 's': 1865 c = ' '; 1866 ++src; 1867 break; 1868 case 't': 1869 c = '\t'; 1870 ++src; 1871 break; 1872 case 'v': 1873 c = '\v'; 1874 ++src; 1875 break; 1876 case '\\': 1877 c = '\\'; 1878 ++src; 1879 break; 1880 } 1881 } 1882 *dest++ = c; 1883 } 1884 *dest = '\0'; 1885 } 1886 1887 /* Parse a hex digit. */ 1888 static int 1889 parsedigit(char c) 1890 { 1891 if (c >= '0' && c <= '9') 1892 return c - '0'; 1893 else if (c >= 'a' && c <= 'f') 1894 return c - 'a'; 1895 else if (c >= 'A' && c <= 'F') 1896 return c - 'A'; 1897 else 1898 return -1; 1899 } 1900 1901 /* 1902 * Note that this implementation does not (and should not!) obey 1903 * locale settings; you cannot simply substitute strtol here, since 1904 * it does obey locale. 1905 */ 1906 static int64_t 1907 mtree_atol(char **p, int base) 1908 { 1909 int64_t l, limit; 1910 int digit, last_digit_limit; 1911 1912 if (base == 0) { 1913 if (**p != '0') 1914 base = 10; 1915 else if ((*p)[1] == 'x' || (*p)[1] == 'X') { 1916 *p += 2; 1917 base = 16; 1918 } else { 1919 base = 8; 1920 } 1921 } 1922 1923 if (**p == '-') { 1924 limit = INT64_MIN / base; 1925 last_digit_limit = INT64_MIN % base; 1926 ++(*p); 1927 1928 l = 0; 1929 digit = parsedigit(**p); 1930 while (digit >= 0 && digit < base) { 1931 if (l < limit || (l == limit && digit > last_digit_limit)) 1932 return INT64_MIN; 1933 l = (l * base) - digit; 1934 digit = parsedigit(*++(*p)); 1935 } 1936 return l; 1937 } else { 1938 limit = INT64_MAX / base; 1939 last_digit_limit = INT64_MAX % base; 1940 1941 l = 0; 1942 digit = parsedigit(**p); 1943 while (digit >= 0 && digit < base) { 1944 if (l > limit || (l == limit && digit > last_digit_limit)) 1945 return INT64_MAX; 1946 l = (l * base) + digit; 1947 digit = parsedigit(*++(*p)); 1948 } 1949 return l; 1950 } 1951 } 1952 1953 /* 1954 * Returns length of line (including trailing newline) 1955 * or negative on error. 'start' argument is updated to 1956 * point to first character of line. 1957 */ 1958 static ssize_t 1959 readline(struct archive_read *a, struct mtree *mtree, char **start, 1960 ssize_t limit) 1961 { 1962 ssize_t bytes_read; 1963 ssize_t total_size = 0; 1964 ssize_t find_off = 0; 1965 const void *t; 1966 void *nl; 1967 char *u; 1968 1969 /* Accumulate line in a line buffer. */ 1970 for (;;) { 1971 /* Read some more. */ 1972 t = __archive_read_ahead(a, 1, &bytes_read); 1973 if (t == NULL) 1974 return (0); 1975 if (bytes_read < 0) 1976 return (ARCHIVE_FATAL); 1977 nl = memchr(t, '\n', bytes_read); 1978 /* If we found '\n', trim the read to end exactly there. */ 1979 if (nl != NULL) { 1980 bytes_read = ((const char *)nl) - ((const char *)t) + 1; 1981 } 1982 if (total_size + bytes_read + 1 > limit) { 1983 archive_set_error(&a->archive, 1984 ARCHIVE_ERRNO_FILE_FORMAT, 1985 "Line too long"); 1986 return (ARCHIVE_FATAL); 1987 } 1988 if (archive_string_ensure(&mtree->line, 1989 total_size + bytes_read + 1) == NULL) { 1990 archive_set_error(&a->archive, ENOMEM, 1991 "Can't allocate working buffer"); 1992 return (ARCHIVE_FATAL); 1993 } 1994 /* Append new bytes to string. */ 1995 memcpy(mtree->line.s + total_size, t, bytes_read); 1996 __archive_read_consume(a, bytes_read); 1997 total_size += bytes_read; 1998 mtree->line.s[total_size] = '\0'; 1999 2000 for (u = mtree->line.s + find_off; *u; ++u) { 2001 if (u[0] == '\n') { 2002 /* Ends with unescaped newline. */ 2003 *start = mtree->line.s; 2004 return total_size; 2005 } else if (u[0] == '#') { 2006 /* Ends with comment sequence #...\n */ 2007 if (nl == NULL) { 2008 /* But we've not found the \n yet */ 2009 break; 2010 } 2011 } else if (u[0] == '\\') { 2012 if (u[1] == '\n') { 2013 /* Trim escaped newline. */ 2014 total_size -= 2; 2015 mtree->line.s[total_size] = '\0'; 2016 break; 2017 } else if (u[1] != '\0') { 2018 /* Skip the two-char escape sequence */ 2019 ++u; 2020 } 2021 } 2022 } 2023 find_off = u - mtree->line.s; 2024 } 2025 } 2026