1 /*
2  * libdpkg - Debian packaging suite library routines
3  * tarfn.c - tar archive extraction functions
4  *
5  * Copyright © 1995 Bruce Perens
6  * Copyright © 2007-2011, 2013-2017 Guillem Jover <guillem@debian.org>
7  *
8  * This is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 #include <compat.h>
24 
25 #if HAVE_SYS_SYSMACROS_H
26 #include <sys/sysmacros.h>
27 #endif
28 #include <sys/stat.h>
29 
30 #include <errno.h>
31 #include <string.h>
32 #include <pwd.h>
33 #include <grp.h>
34 #include <unistd.h>
35 #include <inttypes.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 
39 #include <dpkg/macros.h>
40 #include <dpkg/dpkg.h>
41 #include <dpkg/i18n.h>
42 #include <dpkg/error.h>
43 #include <dpkg/tarfn.h>
44 
45 #define TAR_MAGIC_USTAR "ustar\0" "00"
46 #define TAR_MAGIC_GNU   "ustar "  " \0"
47 
48 #define TAR_TYPE_SIGNED(t)	(!((t)0 < (t)-1))
49 
50 #define TAR_TYPE_MIN(t) \
51 	(TAR_TYPE_SIGNED(t) ? \
52 	 ~(t)TAR_TYPE_MAX(t) : \
53 	 (t)0)
54 #define TAR_TYPE_MAX(t) \
55 	(TAR_TYPE_SIGNED(t) ? \
56 	 ((((t)1 << (sizeof(t) * 8 - 2)) - 1) * 2 + 1) : \
57 	 ~(t)0)
58 
59 #define TAR_ATOUL(str, type) \
60 	(type)tar_atoul(str, sizeof(str), TAR_TYPE_MAX(type))
61 #define TAR_ATOSL(str, type) \
62 	(type)tar_atosl(str, sizeof(str), TAR_TYPE_MIN(type), TAR_TYPE_MAX(type))
63 
64 struct tar_header {
65 	char name[100];
66 	char mode[8];
67 	char uid[8];
68 	char gid[8];
69 	char size[12];
70 	char mtime[12];
71 	char checksum[8];
72 	char linkflag;
73 	char linkname[100];
74 
75 	/* Only valid on ustar and gnu. */
76 	char magic[8];
77 	char user[32];
78 	char group[32];
79 	char devmajor[8];
80 	char devminor[8];
81 
82 	/* Only valid on ustar. */
83 	char prefix[155];
84 };
85 
86 static inline uintmax_t
tar_ret_errno(int err,uintmax_t ret)87 tar_ret_errno(int err, uintmax_t ret)
88 {
89 	errno = err;
90 	return ret;
91 }
92 
93 /**
94  * Convert an ASCII octal string to an intmax_t.
95  */
96 static uintmax_t
tar_atol8(const char * s,size_t size)97 tar_atol8(const char *s, size_t size)
98 {
99 	const char *end = s + size;
100 	uintmax_t n = 0;
101 
102 	/* Old implementations might precede the value with spaces. */
103 	while (s < end && *s == ' ')
104 		s++;
105 
106 	if (s == end)
107 		return tar_ret_errno(EINVAL, 0);
108 
109 	while (s < end) {
110 		if (*s == '\0' || *s == ' ')
111 			break;
112 		if (*s < '0' || *s > '7')
113 			return tar_ret_errno(ERANGE, 0);
114 		n = (n * 010) + (*s++ - '0');
115 	}
116 
117 	while (s < end) {
118 		if (*s != '\0' && *s != ' ')
119 			return tar_ret_errno(EINVAL, 0);
120 		s++;
121 	}
122 
123 	if (s < end)
124 		return tar_ret_errno(EINVAL, 0);
125 
126 	return tar_ret_errno(0, n);
127 }
128 
129 /**
130  * Convert a base-256 two-complement number to an intmax_t.
131  */
132 static uintmax_t
tar_atol256(const char * s,size_t size,intmax_t min,uintmax_t max)133 tar_atol256(const char *s, size_t size, intmax_t min, uintmax_t max)
134 {
135 	uintmax_t n = 0;
136 	unsigned char c;
137 	int sign;
138 
139 	/* The encoding always sets the first bit to one, so that it can be
140 	 * distinguished from the ASCII encoding. For positive numbers we
141 	 * need to reset it. For negative numbers we initialize n to -1. */
142 	c = *s++;
143 	if (c == 0x80)
144 		c = 0;
145 	else
146 		n = ~(uintmax_t)0;
147 	sign = c;
148 
149 	/* Check for overflows. */
150 	while (size > sizeof(uintmax_t)) {
151 		if (c != sign)
152 			return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
153 		c = *s++;
154 		size--;
155 	}
156 
157 	if ((c & 0x80) != (sign & 0x80))
158 		return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
159 
160 	for (;;) {
161 		n = (n << 8) | c;
162 		if (--size == 0)
163 			break;
164 		c = *s++;
165 	}
166 
167 	return tar_ret_errno(0, n);
168 }
169 
170 static uintmax_t
tar_atol(const char * s,size_t size,intmax_t min,uintmax_t max)171 tar_atol(const char *s, size_t size, intmax_t min, uintmax_t max)
172 {
173 	const unsigned char *a = (const unsigned char *)s;
174 
175 	/* Check if it is a long two-complement base-256 number, positive or
176 	 * negative. */
177 	if (*a == 0xff || *a == 0x80)
178 		return tar_atol256(s, size, min, max);
179 	else
180 		return tar_atol8(s, size);
181 }
182 
183 uintmax_t
tar_atoul(const char * s,size_t size,uintmax_t max)184 tar_atoul(const char *s, size_t size, uintmax_t max)
185 {
186 	uintmax_t n = tar_atol(s, size, 0, UINTMAX_MAX);
187 
188 	if (n > max)
189 		return tar_ret_errno(ERANGE, UINTMAX_MAX);
190 
191 	return n;
192 }
193 
194 intmax_t
tar_atosl(const char * s,size_t size,intmax_t min,intmax_t max)195 tar_atosl(const char *s, size_t size, intmax_t min, intmax_t max)
196 {
197 	intmax_t n = tar_atol(s, size, INTMAX_MIN, INTMAX_MAX);
198 
199 	if (n < min)
200 		return tar_ret_errno(ERANGE, INTMAX_MIN);
201 	if (n > max)
202 		return tar_ret_errno(ERANGE, INTMAX_MAX);
203 
204 	return n;
205 }
206 
207 static char *
tar_header_get_prefix_name(struct tar_header * h)208 tar_header_get_prefix_name(struct tar_header *h)
209 {
210 	return str_fmt("%.*s/%.*s", (int)sizeof(h->prefix), h->prefix,
211 	               (int)sizeof(h->name), h->name);
212 }
213 
214 static mode_t
tar_header_get_unix_mode(struct tar_header * h)215 tar_header_get_unix_mode(struct tar_header *h)
216 {
217 	mode_t mode;
218 	enum tar_filetype type;
219 
220 	type = (enum tar_filetype)h->linkflag;
221 
222 	switch (type) {
223 	case TAR_FILETYPE_FILE0:
224 	case TAR_FILETYPE_FILE:
225 	case TAR_FILETYPE_HARDLINK:
226 		mode = S_IFREG;
227 		break;
228 	case TAR_FILETYPE_SYMLINK:
229 		mode = S_IFLNK;
230 		break;
231 	case TAR_FILETYPE_DIR:
232 		mode = S_IFDIR;
233 		break;
234 	case TAR_FILETYPE_CHARDEV:
235 		mode = S_IFCHR;
236 		break;
237 	case TAR_FILETYPE_BLOCKDEV:
238 		mode = S_IFBLK;
239 		break;
240 	case TAR_FILETYPE_FIFO:
241 		mode = S_IFIFO;
242 		break;
243 	default:
244 		mode = 0;
245 		break;
246 	}
247 
248 	mode |= TAR_ATOUL(h->mode, mode_t);
249 
250 	return mode;
251 }
252 
253 static long
tar_header_checksum(struct tar_header * h)254 tar_header_checksum(struct tar_header *h)
255 {
256 	unsigned char *s = (unsigned char *)h;
257 	unsigned int i;
258 	const size_t checksum_offset = offsetof(struct tar_header, checksum);
259 	long sum;
260 
261 	/* Treat checksum field as all blank. */
262 	sum = ' ' * sizeof(h->checksum);
263 
264 	for (i = checksum_offset; i > 0; i--)
265 		sum += *s++;
266 
267 	/* Skip the real checksum field. */
268 	s += sizeof(h->checksum);
269 
270 	for (i = TARBLKSZ - checksum_offset - sizeof(h->checksum); i > 0; i--)
271 		sum += *s++;
272 
273 	return sum;
274 }
275 
276 static int
tar_header_decode(struct tar_header * h,struct tar_entry * d,struct dpkg_error * err)277 tar_header_decode(struct tar_header *h, struct tar_entry *d, struct dpkg_error *err)
278 {
279 	long checksum;
280 
281 	errno = 0;
282 
283 	if (memcmp(h->magic, TAR_MAGIC_GNU, 6) == 0)
284 		d->format = TAR_FORMAT_GNU;
285 	else if (memcmp(h->magic, TAR_MAGIC_USTAR, 6) == 0)
286 		d->format = TAR_FORMAT_USTAR;
287 	else
288 		d->format = TAR_FORMAT_OLD;
289 
290 	d->type = (enum tar_filetype)h->linkflag;
291 	if (d->type == TAR_FILETYPE_FILE0)
292 		d->type = TAR_FILETYPE_FILE;
293 
294 	/* Concatenate prefix and name to support ustar style long names. */
295 	if (d->format == TAR_FORMAT_USTAR && h->prefix[0] != '\0')
296 		d->name = tar_header_get_prefix_name(h);
297 	else
298 		d->name = m_strndup(h->name, sizeof(h->name));
299 	d->linkname = m_strndup(h->linkname, sizeof(h->linkname));
300 	d->stat.mode = tar_header_get_unix_mode(h);
301 	/* Even though off_t is signed, we use an unsigned parser here because
302 	 * negative offsets are not allowed. */
303 	d->size = TAR_ATOUL(h->size, off_t);
304 	if (errno)
305 		return dpkg_put_errno(err, _("invalid tar header size field"));
306 	d->mtime = TAR_ATOSL(h->mtime, time_t);
307 	if (errno)
308 		return dpkg_put_errno(err, _("invalid tar header mtime field"));
309 
310 	if (d->type == TAR_FILETYPE_CHARDEV || d->type == TAR_FILETYPE_BLOCKDEV)
311 		d->dev = makedev(TAR_ATOUL(h->devmajor, dev_t),
312 		                 TAR_ATOUL(h->devminor, dev_t));
313 	else
314 		d->dev = makedev(0, 0);
315 
316 	if (*h->user)
317 		d->stat.uname = m_strndup(h->user, sizeof(h->user));
318 	else
319 		d->stat.uname = NULL;
320 	d->stat.uid = TAR_ATOUL(h->uid, uid_t);
321 	if (errno)
322 		return dpkg_put_errno(err, _("invalid tar header uid field"));
323 
324 	if (*h->group)
325 		d->stat.gname = m_strndup(h->group, sizeof(h->group));
326 	else
327 		d->stat.gname = NULL;
328 	d->stat.gid = TAR_ATOUL(h->gid, gid_t);
329 	if (errno)
330 		return dpkg_put_errno(err, _("invalid tar header gid field"));
331 
332 	checksum = tar_atol8(h->checksum, sizeof(h->checksum));
333 	if (errno)
334 		return dpkg_put_errno(err, _("invalid tar header checksum field"));
335 
336 	if (tar_header_checksum(h) != checksum)
337 		return dpkg_put_error(err, _("invalid tar header checksum"));
338 
339 	return 0;
340 }
341 
342 /**
343  * Decode a GNU longlink or longname from the tar archive.
344  *
345  * The way the GNU long{link,name} stuff works is like this:
346  *
347  * - The first header is a “dummy” header that contains the size of the
348  *   filename.
349  * - The next N headers contain the filename.
350  * - After the headers with the filename comes the “real” header with a
351  *   bogus name or link.
352  */
353 static int
tar_gnu_long(struct tar_archive * tar,struct tar_entry * te,char ** longp)354 tar_gnu_long(struct tar_archive *tar, struct tar_entry *te, char **longp)
355 {
356 	char buf[TARBLKSZ];
357 	char *bp;
358 	int status = 0;
359 	int long_read;
360 
361 	free(*longp);
362 	*longp = bp = m_malloc(te->size);
363 
364 	for (long_read = te->size; long_read > 0; long_read -= TARBLKSZ) {
365 		int copysize;
366 
367 		status = tar->ops->read(tar, buf, TARBLKSZ);
368 		if (status == TARBLKSZ)
369 			status = 0;
370 		else {
371 			/* Read partial header record? */
372 			if (status > 0) {
373 				errno = 0;
374 				status = dpkg_put_error(&tar->err,
375 				                        _("partially read tar header"));
376 			}
377 
378 			/* If we didn't get TARBLKSZ bytes read, punt. */
379 			break;
380 		}
381 
382 		copysize = min(long_read, TARBLKSZ);
383 		memcpy(bp, buf, copysize);
384 		bp += copysize;
385 	}
386 
387 	return status;
388 }
389 
390 static void
tar_entry_copy(struct tar_entry * dst,struct tar_entry * src)391 tar_entry_copy(struct tar_entry *dst, struct tar_entry *src)
392 {
393 	memcpy(dst, src, sizeof(struct tar_entry));
394 
395 	dst->name = m_strdup(src->name);
396 	dst->linkname = m_strdup(src->linkname);
397 
398 	if (src->stat.uname)
399 		dst->stat.uname = m_strdup(src->stat.uname);
400 	if (src->stat.gname)
401 		dst->stat.gname = m_strdup(src->stat.gname);
402 }
403 
404 static void
tar_entry_destroy(struct tar_entry * te)405 tar_entry_destroy(struct tar_entry *te)
406 {
407 	free(te->name);
408 	free(te->linkname);
409 	free(te->stat.uname);
410 	free(te->stat.gname);
411 
412 	memset(te, 0, sizeof(*te));
413 }
414 
415 struct tar_symlink_entry {
416 	struct tar_symlink_entry *next;
417 	struct tar_entry h;
418 };
419 
420 /**
421  * Update the tar entry from system information.
422  *
423  * Normalize UID and GID relative to the current system.
424  */
425 void
tar_entry_update_from_system(struct tar_entry * te)426 tar_entry_update_from_system(struct tar_entry *te)
427 {
428 	struct passwd *passwd;
429 	struct group *group;
430 
431 	if (te->stat.uname) {
432 		passwd = getpwnam(te->stat.uname);
433 		if (passwd)
434 			te->stat.uid = passwd->pw_uid;
435 	}
436 	if (te->stat.gname) {
437 		group = getgrnam(te->stat.gname);
438 		if (group)
439 			te->stat.gid = group->gr_gid;
440 	}
441 }
442 
443 int
tar_extractor(struct tar_archive * tar)444 tar_extractor(struct tar_archive *tar)
445 {
446 	int status;
447 	char buffer[TARBLKSZ];
448 	struct tar_entry h;
449 
450 	char *next_long_name, *next_long_link;
451 	struct tar_symlink_entry *symlink_head, *symlink_tail, *symlink_node;
452 
453 	next_long_name = NULL;
454 	next_long_link = NULL;
455 	symlink_tail = symlink_head = NULL;
456 
457 	h.name = NULL;
458 	h.linkname = NULL;
459 	h.stat.uname = NULL;
460 	h.stat.gname = NULL;
461 
462 	while ((status = tar->ops->read(tar, buffer, TARBLKSZ)) == TARBLKSZ) {
463 		int name_len;
464 
465 		if (tar_header_decode((struct tar_header *)buffer, &h, &tar->err) < 0) {
466 			if (h.name[0] == '\0') {
467 				/* End Of Tape. */
468 				status = 0;
469 			} else {
470 				status = -1;
471 			}
472 			tar_entry_destroy(&h);
473 			break;
474 		}
475 		if (h.type != TAR_FILETYPE_GNU_LONGLINK &&
476 		    h.type != TAR_FILETYPE_GNU_LONGNAME) {
477 			if (next_long_name)
478 				h.name = next_long_name;
479 
480 			if (next_long_link)
481 				h.linkname = next_long_link;
482 
483 			next_long_link = NULL;
484 			next_long_name = NULL;
485 		}
486 
487 		if (h.name[0] == '\0') {
488 			status = dpkg_put_error(&tar->err,
489 			                        _("invalid tar header with empty name field"));
490 			errno = 0;
491 			tar_entry_destroy(&h);
492 			break;
493 		}
494 
495 		name_len = strlen(h.name);
496 
497 		switch (h.type) {
498 		case TAR_FILETYPE_FILE:
499 			/* Compatibility with pre-ANSI ustar. */
500 			if (h.name[name_len - 1] != '/') {
501 				status = tar->ops->extract_file(tar, &h);
502 				break;
503 			}
504 			/* Else, fall through. */
505 		case TAR_FILETYPE_DIR:
506 			if (h.name[name_len - 1] == '/') {
507 				h.name[name_len - 1] = '\0';
508 			}
509 			status = tar->ops->mkdir(tar, &h);
510 			break;
511 		case TAR_FILETYPE_HARDLINK:
512 			status = tar->ops->link(tar, &h);
513 			break;
514 		case TAR_FILETYPE_SYMLINK:
515 			symlink_node = m_malloc(sizeof(*symlink_node));
516 			symlink_node->next = NULL;
517 			tar_entry_copy(&symlink_node->h, &h);
518 
519 			if (symlink_head)
520 				symlink_tail->next = symlink_node;
521 			else
522 				symlink_head = symlink_node;
523 			symlink_tail = symlink_node;
524 			status = 0;
525 			break;
526 		case TAR_FILETYPE_CHARDEV:
527 		case TAR_FILETYPE_BLOCKDEV:
528 		case TAR_FILETYPE_FIFO:
529 			status = tar->ops->mknod(tar, &h);
530 			break;
531 		case TAR_FILETYPE_GNU_LONGLINK:
532 			status = tar_gnu_long(tar, &h, &next_long_link);
533 			break;
534 		case TAR_FILETYPE_GNU_LONGNAME:
535 			status = tar_gnu_long(tar, &h, &next_long_name);
536 			break;
537 		case TAR_FILETYPE_GNU_VOLUME:
538 		case TAR_FILETYPE_GNU_MULTIVOL:
539 		case TAR_FILETYPE_GNU_SPARSE:
540 		case TAR_FILETYPE_GNU_DUMPDIR:
541 			status = dpkg_put_error(&tar->err,
542 			                        _("unsupported GNU tar header type '%c'"),
543 			                        h.type);
544 			errno = 0;
545 			break;
546 		case TAR_FILETYPE_SOLARIS_EXTENDED:
547 		case TAR_FILETYPE_SOLARIS_ACL:
548 			status = dpkg_put_error(&tar->err,
549 			                        _("unsupported Solaris tar header type '%c'"),
550 			                        h.type);
551 			errno = 0;
552 			break;
553 		case TAR_FILETYPE_PAX_GLOBAL:
554 		case TAR_FILETYPE_PAX_EXTENDED:
555 			status = dpkg_put_error(&tar->err,
556 			                        _("unsupported PAX tar header type '%c'"),
557 			                        h.type);
558 			errno = 0;
559 			break;
560 		default:
561 			status = dpkg_put_error(&tar->err,
562 			                        _("unknown tar header type '%c'"),
563 			                        h.type);
564 			errno = 0;
565 		}
566 		tar_entry_destroy(&h);
567 		if (status != 0)
568 			/* Pass on status from coroutine. */
569 			break;
570 	}
571 
572 	while (symlink_head) {
573 		symlink_node = symlink_head->next;
574 		if (status == 0)
575 			status = tar->ops->symlink(tar, &symlink_head->h);
576 		tar_entry_destroy(&symlink_head->h);
577 		free(symlink_head);
578 		symlink_head = symlink_node;
579 	}
580 	/* Make sure we free the long names, in case of a bogus or truncated
581 	 * tar archive with long entries not followed by a normal entry. */
582 	free(next_long_name);
583 	free(next_long_link);
584 
585 	if (status > 0) {
586 		status = dpkg_put_error(&tar->err,
587 		                        _("partially read tar header"));
588 		errno = 0;
589 	}
590 
591 	/* Return whatever I/O function returned. */
592 	return status;
593 }
594