1 /*
2 * libdpkg - Debian packaging suite library routines
3 * tarfn.c - tar archive extraction functions
4 *
5 * Copyright © 1995 Bruce Perens
6 * Copyright © 2007-2011, 2013-2017 Guillem Jover <guillem@debian.org>
7 *
8 * This is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 #include <config.h>
23 #include <compat.h>
24
25 #if HAVE_SYS_SYSMACROS_H
26 #include <sys/sysmacros.h>
27 #endif
28 #include <sys/stat.h>
29
30 #include <errno.h>
31 #include <string.h>
32 #include <pwd.h>
33 #include <grp.h>
34 #include <unistd.h>
35 #include <inttypes.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38
39 #include <dpkg/macros.h>
40 #include <dpkg/dpkg.h>
41 #include <dpkg/i18n.h>
42 #include <dpkg/error.h>
43 #include <dpkg/tarfn.h>
44
45 #define TAR_MAGIC_USTAR "ustar\0" "00"
46 #define TAR_MAGIC_GNU "ustar " " \0"
47
48 #define TAR_TYPE_SIGNED(t) (!((t)0 < (t)-1))
49
50 #define TAR_TYPE_MIN(t) \
51 (TAR_TYPE_SIGNED(t) ? \
52 ~(t)TAR_TYPE_MAX(t) : \
53 (t)0)
54 #define TAR_TYPE_MAX(t) \
55 (TAR_TYPE_SIGNED(t) ? \
56 ((((t)1 << (sizeof(t) * 8 - 2)) - 1) * 2 + 1) : \
57 ~(t)0)
58
59 #define TAR_ATOUL(str, type) \
60 (type)tar_atoul(str, sizeof(str), TAR_TYPE_MAX(type))
61 #define TAR_ATOSL(str, type) \
62 (type)tar_atosl(str, sizeof(str), TAR_TYPE_MIN(type), TAR_TYPE_MAX(type))
63
64 struct tar_header {
65 char name[100];
66 char mode[8];
67 char uid[8];
68 char gid[8];
69 char size[12];
70 char mtime[12];
71 char checksum[8];
72 char linkflag;
73 char linkname[100];
74
75 /* Only valid on ustar and gnu. */
76 char magic[8];
77 char user[32];
78 char group[32];
79 char devmajor[8];
80 char devminor[8];
81
82 /* Only valid on ustar. */
83 char prefix[155];
84 };
85
86 static inline uintmax_t
tar_ret_errno(int err,uintmax_t ret)87 tar_ret_errno(int err, uintmax_t ret)
88 {
89 errno = err;
90 return ret;
91 }
92
93 /**
94 * Convert an ASCII octal string to an intmax_t.
95 */
96 static uintmax_t
tar_atol8(const char * s,size_t size)97 tar_atol8(const char *s, size_t size)
98 {
99 const char *end = s + size;
100 uintmax_t n = 0;
101
102 /* Old implementations might precede the value with spaces. */
103 while (s < end && *s == ' ')
104 s++;
105
106 if (s == end)
107 return tar_ret_errno(EINVAL, 0);
108
109 while (s < end) {
110 if (*s == '\0' || *s == ' ')
111 break;
112 if (*s < '0' || *s > '7')
113 return tar_ret_errno(ERANGE, 0);
114 n = (n * 010) + (*s++ - '0');
115 }
116
117 while (s < end) {
118 if (*s != '\0' && *s != ' ')
119 return tar_ret_errno(EINVAL, 0);
120 s++;
121 }
122
123 if (s < end)
124 return tar_ret_errno(EINVAL, 0);
125
126 return tar_ret_errno(0, n);
127 }
128
129 /**
130 * Convert a base-256 two-complement number to an intmax_t.
131 */
132 static uintmax_t
tar_atol256(const char * s,size_t size,intmax_t min,uintmax_t max)133 tar_atol256(const char *s, size_t size, intmax_t min, uintmax_t max)
134 {
135 uintmax_t n = 0;
136 unsigned char c;
137 int sign;
138
139 /* The encoding always sets the first bit to one, so that it can be
140 * distinguished from the ASCII encoding. For positive numbers we
141 * need to reset it. For negative numbers we initialize n to -1. */
142 c = *s++;
143 if (c == 0x80)
144 c = 0;
145 else
146 n = ~(uintmax_t)0;
147 sign = c;
148
149 /* Check for overflows. */
150 while (size > sizeof(uintmax_t)) {
151 if (c != sign)
152 return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
153 c = *s++;
154 size--;
155 }
156
157 if ((c & 0x80) != (sign & 0x80))
158 return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
159
160 for (;;) {
161 n = (n << 8) | c;
162 if (--size == 0)
163 break;
164 c = *s++;
165 }
166
167 return tar_ret_errno(0, n);
168 }
169
170 static uintmax_t
tar_atol(const char * s,size_t size,intmax_t min,uintmax_t max)171 tar_atol(const char *s, size_t size, intmax_t min, uintmax_t max)
172 {
173 const unsigned char *a = (const unsigned char *)s;
174
175 /* Check if it is a long two-complement base-256 number, positive or
176 * negative. */
177 if (*a == 0xff || *a == 0x80)
178 return tar_atol256(s, size, min, max);
179 else
180 return tar_atol8(s, size);
181 }
182
183 uintmax_t
tar_atoul(const char * s,size_t size,uintmax_t max)184 tar_atoul(const char *s, size_t size, uintmax_t max)
185 {
186 uintmax_t n = tar_atol(s, size, 0, UINTMAX_MAX);
187
188 if (n > max)
189 return tar_ret_errno(ERANGE, UINTMAX_MAX);
190
191 return n;
192 }
193
194 intmax_t
tar_atosl(const char * s,size_t size,intmax_t min,intmax_t max)195 tar_atosl(const char *s, size_t size, intmax_t min, intmax_t max)
196 {
197 intmax_t n = tar_atol(s, size, INTMAX_MIN, INTMAX_MAX);
198
199 if (n < min)
200 return tar_ret_errno(ERANGE, INTMAX_MIN);
201 if (n > max)
202 return tar_ret_errno(ERANGE, INTMAX_MAX);
203
204 return n;
205 }
206
207 static char *
tar_header_get_prefix_name(struct tar_header * h)208 tar_header_get_prefix_name(struct tar_header *h)
209 {
210 return str_fmt("%.*s/%.*s", (int)sizeof(h->prefix), h->prefix,
211 (int)sizeof(h->name), h->name);
212 }
213
214 static mode_t
tar_header_get_unix_mode(struct tar_header * h)215 tar_header_get_unix_mode(struct tar_header *h)
216 {
217 mode_t mode;
218 enum tar_filetype type;
219
220 type = (enum tar_filetype)h->linkflag;
221
222 switch (type) {
223 case TAR_FILETYPE_FILE0:
224 case TAR_FILETYPE_FILE:
225 case TAR_FILETYPE_HARDLINK:
226 mode = S_IFREG;
227 break;
228 case TAR_FILETYPE_SYMLINK:
229 mode = S_IFLNK;
230 break;
231 case TAR_FILETYPE_DIR:
232 mode = S_IFDIR;
233 break;
234 case TAR_FILETYPE_CHARDEV:
235 mode = S_IFCHR;
236 break;
237 case TAR_FILETYPE_BLOCKDEV:
238 mode = S_IFBLK;
239 break;
240 case TAR_FILETYPE_FIFO:
241 mode = S_IFIFO;
242 break;
243 default:
244 mode = 0;
245 break;
246 }
247
248 mode |= TAR_ATOUL(h->mode, mode_t);
249
250 return mode;
251 }
252
253 static long
tar_header_checksum(struct tar_header * h)254 tar_header_checksum(struct tar_header *h)
255 {
256 unsigned char *s = (unsigned char *)h;
257 unsigned int i;
258 const size_t checksum_offset = offsetof(struct tar_header, checksum);
259 long sum;
260
261 /* Treat checksum field as all blank. */
262 sum = ' ' * sizeof(h->checksum);
263
264 for (i = checksum_offset; i > 0; i--)
265 sum += *s++;
266
267 /* Skip the real checksum field. */
268 s += sizeof(h->checksum);
269
270 for (i = TARBLKSZ - checksum_offset - sizeof(h->checksum); i > 0; i--)
271 sum += *s++;
272
273 return sum;
274 }
275
276 static int
tar_header_decode(struct tar_header * h,struct tar_entry * d,struct dpkg_error * err)277 tar_header_decode(struct tar_header *h, struct tar_entry *d, struct dpkg_error *err)
278 {
279 long checksum;
280
281 errno = 0;
282
283 if (memcmp(h->magic, TAR_MAGIC_GNU, 6) == 0)
284 d->format = TAR_FORMAT_GNU;
285 else if (memcmp(h->magic, TAR_MAGIC_USTAR, 6) == 0)
286 d->format = TAR_FORMAT_USTAR;
287 else
288 d->format = TAR_FORMAT_OLD;
289
290 d->type = (enum tar_filetype)h->linkflag;
291 if (d->type == TAR_FILETYPE_FILE0)
292 d->type = TAR_FILETYPE_FILE;
293
294 /* Concatenate prefix and name to support ustar style long names. */
295 if (d->format == TAR_FORMAT_USTAR && h->prefix[0] != '\0')
296 d->name = tar_header_get_prefix_name(h);
297 else
298 d->name = m_strndup(h->name, sizeof(h->name));
299 d->linkname = m_strndup(h->linkname, sizeof(h->linkname));
300 d->stat.mode = tar_header_get_unix_mode(h);
301 /* Even though off_t is signed, we use an unsigned parser here because
302 * negative offsets are not allowed. */
303 d->size = TAR_ATOUL(h->size, off_t);
304 if (errno)
305 return dpkg_put_errno(err, _("invalid tar header size field"));
306 d->mtime = TAR_ATOSL(h->mtime, time_t);
307 if (errno)
308 return dpkg_put_errno(err, _("invalid tar header mtime field"));
309
310 if (d->type == TAR_FILETYPE_CHARDEV || d->type == TAR_FILETYPE_BLOCKDEV)
311 d->dev = makedev(TAR_ATOUL(h->devmajor, dev_t),
312 TAR_ATOUL(h->devminor, dev_t));
313 else
314 d->dev = makedev(0, 0);
315
316 if (*h->user)
317 d->stat.uname = m_strndup(h->user, sizeof(h->user));
318 else
319 d->stat.uname = NULL;
320 d->stat.uid = TAR_ATOUL(h->uid, uid_t);
321 if (errno)
322 return dpkg_put_errno(err, _("invalid tar header uid field"));
323
324 if (*h->group)
325 d->stat.gname = m_strndup(h->group, sizeof(h->group));
326 else
327 d->stat.gname = NULL;
328 d->stat.gid = TAR_ATOUL(h->gid, gid_t);
329 if (errno)
330 return dpkg_put_errno(err, _("invalid tar header gid field"));
331
332 checksum = tar_atol8(h->checksum, sizeof(h->checksum));
333 if (errno)
334 return dpkg_put_errno(err, _("invalid tar header checksum field"));
335
336 if (tar_header_checksum(h) != checksum)
337 return dpkg_put_error(err, _("invalid tar header checksum"));
338
339 return 0;
340 }
341
342 /**
343 * Decode a GNU longlink or longname from the tar archive.
344 *
345 * The way the GNU long{link,name} stuff works is like this:
346 *
347 * - The first header is a “dummy” header that contains the size of the
348 * filename.
349 * - The next N headers contain the filename.
350 * - After the headers with the filename comes the “real” header with a
351 * bogus name or link.
352 */
353 static int
tar_gnu_long(struct tar_archive * tar,struct tar_entry * te,char ** longp)354 tar_gnu_long(struct tar_archive *tar, struct tar_entry *te, char **longp)
355 {
356 char buf[TARBLKSZ];
357 char *bp;
358 int status = 0;
359 int long_read;
360
361 free(*longp);
362 *longp = bp = m_malloc(te->size);
363
364 for (long_read = te->size; long_read > 0; long_read -= TARBLKSZ) {
365 int copysize;
366
367 status = tar->ops->read(tar, buf, TARBLKSZ);
368 if (status == TARBLKSZ)
369 status = 0;
370 else {
371 /* Read partial header record? */
372 if (status > 0) {
373 errno = 0;
374 status = dpkg_put_error(&tar->err,
375 _("partially read tar header"));
376 }
377
378 /* If we didn't get TARBLKSZ bytes read, punt. */
379 break;
380 }
381
382 copysize = min(long_read, TARBLKSZ);
383 memcpy(bp, buf, copysize);
384 bp += copysize;
385 }
386
387 return status;
388 }
389
390 static void
tar_entry_copy(struct tar_entry * dst,struct tar_entry * src)391 tar_entry_copy(struct tar_entry *dst, struct tar_entry *src)
392 {
393 memcpy(dst, src, sizeof(struct tar_entry));
394
395 dst->name = m_strdup(src->name);
396 dst->linkname = m_strdup(src->linkname);
397
398 if (src->stat.uname)
399 dst->stat.uname = m_strdup(src->stat.uname);
400 if (src->stat.gname)
401 dst->stat.gname = m_strdup(src->stat.gname);
402 }
403
404 static void
tar_entry_destroy(struct tar_entry * te)405 tar_entry_destroy(struct tar_entry *te)
406 {
407 free(te->name);
408 free(te->linkname);
409 free(te->stat.uname);
410 free(te->stat.gname);
411
412 memset(te, 0, sizeof(*te));
413 }
414
415 struct tar_symlink_entry {
416 struct tar_symlink_entry *next;
417 struct tar_entry h;
418 };
419
420 /**
421 * Update the tar entry from system information.
422 *
423 * Normalize UID and GID relative to the current system.
424 */
425 void
tar_entry_update_from_system(struct tar_entry * te)426 tar_entry_update_from_system(struct tar_entry *te)
427 {
428 struct passwd *passwd;
429 struct group *group;
430
431 if (te->stat.uname) {
432 passwd = getpwnam(te->stat.uname);
433 if (passwd)
434 te->stat.uid = passwd->pw_uid;
435 }
436 if (te->stat.gname) {
437 group = getgrnam(te->stat.gname);
438 if (group)
439 te->stat.gid = group->gr_gid;
440 }
441 }
442
443 int
tar_extractor(struct tar_archive * tar)444 tar_extractor(struct tar_archive *tar)
445 {
446 int status;
447 char buffer[TARBLKSZ];
448 struct tar_entry h;
449
450 char *next_long_name, *next_long_link;
451 struct tar_symlink_entry *symlink_head, *symlink_tail, *symlink_node;
452
453 next_long_name = NULL;
454 next_long_link = NULL;
455 symlink_tail = symlink_head = NULL;
456
457 h.name = NULL;
458 h.linkname = NULL;
459 h.stat.uname = NULL;
460 h.stat.gname = NULL;
461
462 while ((status = tar->ops->read(tar, buffer, TARBLKSZ)) == TARBLKSZ) {
463 int name_len;
464
465 if (tar_header_decode((struct tar_header *)buffer, &h, &tar->err) < 0) {
466 if (h.name[0] == '\0') {
467 /* End Of Tape. */
468 status = 0;
469 } else {
470 status = -1;
471 }
472 tar_entry_destroy(&h);
473 break;
474 }
475 if (h.type != TAR_FILETYPE_GNU_LONGLINK &&
476 h.type != TAR_FILETYPE_GNU_LONGNAME) {
477 if (next_long_name)
478 h.name = next_long_name;
479
480 if (next_long_link)
481 h.linkname = next_long_link;
482
483 next_long_link = NULL;
484 next_long_name = NULL;
485 }
486
487 if (h.name[0] == '\0') {
488 status = dpkg_put_error(&tar->err,
489 _("invalid tar header with empty name field"));
490 errno = 0;
491 tar_entry_destroy(&h);
492 break;
493 }
494
495 name_len = strlen(h.name);
496
497 switch (h.type) {
498 case TAR_FILETYPE_FILE:
499 /* Compatibility with pre-ANSI ustar. */
500 if (h.name[name_len - 1] != '/') {
501 status = tar->ops->extract_file(tar, &h);
502 break;
503 }
504 /* Else, fall through. */
505 case TAR_FILETYPE_DIR:
506 if (h.name[name_len - 1] == '/') {
507 h.name[name_len - 1] = '\0';
508 }
509 status = tar->ops->mkdir(tar, &h);
510 break;
511 case TAR_FILETYPE_HARDLINK:
512 status = tar->ops->link(tar, &h);
513 break;
514 case TAR_FILETYPE_SYMLINK:
515 symlink_node = m_malloc(sizeof(*symlink_node));
516 symlink_node->next = NULL;
517 tar_entry_copy(&symlink_node->h, &h);
518
519 if (symlink_head)
520 symlink_tail->next = symlink_node;
521 else
522 symlink_head = symlink_node;
523 symlink_tail = symlink_node;
524 status = 0;
525 break;
526 case TAR_FILETYPE_CHARDEV:
527 case TAR_FILETYPE_BLOCKDEV:
528 case TAR_FILETYPE_FIFO:
529 status = tar->ops->mknod(tar, &h);
530 break;
531 case TAR_FILETYPE_GNU_LONGLINK:
532 status = tar_gnu_long(tar, &h, &next_long_link);
533 break;
534 case TAR_FILETYPE_GNU_LONGNAME:
535 status = tar_gnu_long(tar, &h, &next_long_name);
536 break;
537 case TAR_FILETYPE_GNU_VOLUME:
538 case TAR_FILETYPE_GNU_MULTIVOL:
539 case TAR_FILETYPE_GNU_SPARSE:
540 case TAR_FILETYPE_GNU_DUMPDIR:
541 status = dpkg_put_error(&tar->err,
542 _("unsupported GNU tar header type '%c'"),
543 h.type);
544 errno = 0;
545 break;
546 case TAR_FILETYPE_SOLARIS_EXTENDED:
547 case TAR_FILETYPE_SOLARIS_ACL:
548 status = dpkg_put_error(&tar->err,
549 _("unsupported Solaris tar header type '%c'"),
550 h.type);
551 errno = 0;
552 break;
553 case TAR_FILETYPE_PAX_GLOBAL:
554 case TAR_FILETYPE_PAX_EXTENDED:
555 status = dpkg_put_error(&tar->err,
556 _("unsupported PAX tar header type '%c'"),
557 h.type);
558 errno = 0;
559 break;
560 default:
561 status = dpkg_put_error(&tar->err,
562 _("unknown tar header type '%c'"),
563 h.type);
564 errno = 0;
565 }
566 tar_entry_destroy(&h);
567 if (status != 0)
568 /* Pass on status from coroutine. */
569 break;
570 }
571
572 while (symlink_head) {
573 symlink_node = symlink_head->next;
574 if (status == 0)
575 status = tar->ops->symlink(tar, &symlink_head->h);
576 tar_entry_destroy(&symlink_head->h);
577 free(symlink_head);
578 symlink_head = symlink_node;
579 }
580 /* Make sure we free the long names, in case of a bogus or truncated
581 * tar archive with long entries not followed by a normal entry. */
582 free(next_long_name);
583 free(next_long_link);
584
585 if (status > 0) {
586 status = dpkg_put_error(&tar->err,
587 _("partially read tar header"));
588 errno = 0;
589 }
590
591 /* Return whatever I/O function returned. */
592 return status;
593 }
594