xref: /dragonfly/contrib/libarchive/tar/util.c (revision 7eedf208)
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "bsdtar_platform.h"
27 __FBSDID("$FreeBSD: src/usr.bin/tar/util.c,v 1.23 2008/12/15 06:00:25 kientzle Exp $");
28 
29 #ifdef HAVE_SYS_STAT_H
30 #include <sys/stat.h>
31 #endif
32 #ifdef HAVE_SYS_TYPES_H
33 #include <sys/types.h>  /* Linux doesn't define mode_t, etc. in sys/stat.h. */
34 #endif
35 #include <ctype.h>
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #ifdef HAVE_IO_H
40 #include <io.h>
41 #endif
42 #ifdef HAVE_STDARG_H
43 #include <stdarg.h>
44 #endif
45 #ifdef HAVE_STDINT_H
46 #include <stdint.h>
47 #endif
48 #include <stdio.h>
49 #ifdef HAVE_STDLIB_H
50 #include <stdlib.h>
51 #endif
52 #ifdef HAVE_STRING_H
53 #include <string.h>
54 #endif
55 #ifdef HAVE_WCTYPE_H
56 #include <wctype.h>
57 #else
58 /* If we don't have wctype, we need to hack up some version of iswprint(). */
59 #define	iswprint isprint
60 #endif
61 
62 #include "bsdtar.h"
63 #include "err.h"
64 
65 static size_t	bsdtar_expand_char(char *, size_t, char);
66 static const char *strip_components(const char *path, int elements);
67 
68 #if defined(_WIN32) && !defined(__CYGWIN__)
69 #define	read _read
70 #endif
71 
72 /* TODO:  Hack up a version of mbtowc for platforms with no wide
73  * character support at all.  I think the following might suffice,
74  * but it needs careful testing.
75  * #if !HAVE_MBTOWC
76  * #define	mbtowc(wcp, p, n) ((*wcp = *p), 1)
77  * #endif
78  */
79 
80 /*
81  * Print a string, taking care with any non-printable characters.
82  *
83  * Note that we use a stack-allocated buffer to receive the formatted
84  * string if we can.  This is partly performance (avoiding a call to
85  * malloc()), partly out of expedience (we have to call vsnprintf()
86  * before malloc() anyway to find out how big a buffer we need; we may
87  * as well point that first call at a small local buffer in case it
88  * works), but mostly for safety (so we can use this to print messages
89  * about out-of-memory conditions).
90  */
91 
92 void
93 safe_fprintf(FILE *f, const char *fmt, ...)
94 {
95 	char fmtbuff_stack[256]; /* Place to format the printf() string. */
96 	char outbuff[256]; /* Buffer for outgoing characters. */
97 	char *fmtbuff_heap; /* If fmtbuff_stack is too small, we use malloc */
98 	char *fmtbuff;  /* Pointer to fmtbuff_stack or fmtbuff_heap. */
99 	int fmtbuff_length;
100 	int length, n;
101 	va_list ap;
102 	const char *p;
103 	unsigned i;
104 	wchar_t wc;
105 	char try_wc;
106 
107 	/* Use a stack-allocated buffer if we can, for speed and safety. */
108 	fmtbuff_heap = NULL;
109 	fmtbuff_length = sizeof(fmtbuff_stack);
110 	fmtbuff = fmtbuff_stack;
111 
112 	/* Try formatting into the stack buffer. */
113 	va_start(ap, fmt);
114 	length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
115 	va_end(ap);
116 
117 	/* If the result was too large, allocate a buffer on the heap. */
118 	while (length < 0 || length >= fmtbuff_length) {
119 		if (length >= fmtbuff_length)
120 			fmtbuff_length = length+1;
121 		else if (fmtbuff_length < 8192)
122 			fmtbuff_length *= 2;
123 		else {
124 			int old_length = fmtbuff_length;
125 			fmtbuff_length += fmtbuff_length / 4;
126 			if (old_length > fmtbuff_length) {
127 				length = old_length;
128 				fmtbuff_heap[length-1] = '\0';
129 				break;
130 			}
131 		}
132 		free(fmtbuff_heap);
133 		fmtbuff_heap = malloc(fmtbuff_length);
134 
135 		/* Reformat the result into the heap buffer if we can. */
136 		if (fmtbuff_heap != NULL) {
137 			fmtbuff = fmtbuff_heap;
138 			va_start(ap, fmt);
139 			length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
140 			va_end(ap);
141 		} else {
142 			/* Leave fmtbuff pointing to the truncated
143 			 * string in fmtbuff_stack. */
144 			length = sizeof(fmtbuff_stack) - 1;
145 			break;
146 		}
147 	}
148 
149 	/* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit
150 	 * more portable, so we use that here instead. */
151 	n = mbtowc(NULL, NULL, 1); /* Reset the shift state. */
152 
153 	/* Write data, expanding unprintable characters. */
154 	p = fmtbuff;
155 	i = 0;
156 	try_wc = 1;
157 	while (*p != '\0') {
158 
159 		/* Convert to wide char, test if the wide
160 		 * char is printable in the current locale. */
161 		if (try_wc && (n = mbtowc(&wc, p, length)) != -1) {
162 			length -= n;
163 			if (iswprint(wc) && wc != L'\\') {
164 				/* Printable, copy the bytes through. */
165 				while (n-- > 0)
166 					outbuff[i++] = *p++;
167 			} else {
168 				/* Not printable, format the bytes. */
169 				while (n-- > 0)
170 					i += (unsigned)bsdtar_expand_char(
171 					    outbuff, i, *p++);
172 			}
173 		} else {
174 			/* After any conversion failure, don't bother
175 			 * trying to convert the rest. */
176 			i += (unsigned)bsdtar_expand_char(outbuff, i, *p++);
177 			try_wc = 0;
178 		}
179 
180 		/* If our output buffer is full, dump it and keep going. */
181 		if (i > (sizeof(outbuff) - 20)) {
182 			outbuff[i] = '\0';
183 			fprintf(f, "%s", outbuff);
184 			i = 0;
185 		}
186 	}
187 	outbuff[i] = '\0';
188 	fprintf(f, "%s", outbuff);
189 
190 	/* If we allocated a heap-based formatting buffer, free it now. */
191 	if (fmtbuff_heap != NULL)
192 		free(fmtbuff_heap);
193 }
194 
195 /*
196  * Render an arbitrary sequence of bytes into printable ASCII characters.
197  */
198 static size_t
199 bsdtar_expand_char(char *buff, size_t offset, char c)
200 {
201 	size_t i = offset;
202 
203 	if (isprint((unsigned char)c) && c != '\\')
204 		buff[i++] = c;
205 	else {
206 		buff[i++] = '\\';
207 		switch (c) {
208 		case '\a': buff[i++] = 'a'; break;
209 		case '\b': buff[i++] = 'b'; break;
210 		case '\f': buff[i++] = 'f'; break;
211 		case '\n': buff[i++] = 'n'; break;
212 #if '\r' != '\n'
213 		/* On some platforms, \n and \r are the same. */
214 		case '\r': buff[i++] = 'r'; break;
215 #endif
216 		case '\t': buff[i++] = 't'; break;
217 		case '\v': buff[i++] = 'v'; break;
218 		case '\\': buff[i++] = '\\'; break;
219 		default:
220 			sprintf(buff + i, "%03o", 0xFF & (int)c);
221 			i += 3;
222 		}
223 	}
224 
225 	return (i - offset);
226 }
227 
228 int
229 yes(const char *fmt, ...)
230 {
231 	char buff[32];
232 	char *p;
233 	ssize_t l;
234 
235 	va_list ap;
236 	va_start(ap, fmt);
237 	vfprintf(stderr, fmt, ap);
238 	va_end(ap);
239 	fprintf(stderr, " (y/N)? ");
240 	fflush(stderr);
241 
242 	l = read(2, buff, sizeof(buff) - 1);
243 	if (l < 0) {
244 	  fprintf(stderr, "Keyboard read failed\n");
245 	  exit(1);
246 	}
247 	if (l == 0)
248 		return (0);
249 	buff[l] = 0;
250 
251 	for (p = buff; *p != '\0'; p++) {
252 		if (isspace((unsigned char)*p))
253 			continue;
254 		switch(*p) {
255 		case 'y': case 'Y':
256 			return (1);
257 		case 'n': case 'N':
258 			return (0);
259 		default:
260 			return (0);
261 		}
262 	}
263 
264 	return (0);
265 }
266 
267 /*-
268  * The logic here for -C <dir> attempts to avoid
269  * chdir() as long as possible.  For example:
270  * "-C /foo -C /bar file"          needs chdir("/bar") but not chdir("/foo")
271  * "-C /foo -C bar file"           needs chdir("/foo/bar")
272  * "-C /foo -C bar /file1"         does not need chdir()
273  * "-C /foo -C bar /file1 file2"   needs chdir("/foo/bar") before file2
274  *
275  * The only correct way to handle this is to record a "pending" chdir
276  * request and combine multiple requests intelligently until we
277  * need to process a non-absolute file.  set_chdir() adds the new dir
278  * to the pending list; do_chdir() actually executes any pending chdir.
279  *
280  * This way, programs that build tar command lines don't have to worry
281  * about -C with non-existent directories; such requests will only
282  * fail if the directory must be accessed.
283  *
284  */
285 void
286 set_chdir(struct bsdtar *bsdtar, const char *newdir)
287 {
288 #if defined(_WIN32) && !defined(__CYGWIN__)
289 	if (newdir[0] == '/' || newdir[0] == '\\' ||
290 	    /* Detect this type, for example, "C:\" or "C:/" */
291 	    (((newdir[0] >= 'a' && newdir[0] <= 'z') ||
292 	      (newdir[0] >= 'A' && newdir[0] <= 'Z')) &&
293 	    newdir[1] == ':' && (newdir[2] == '/' || newdir[2] == '\\'))) {
294 #else
295 	if (newdir[0] == '/') {
296 #endif
297 		/* The -C /foo -C /bar case; dump first one. */
298 		free(bsdtar->pending_chdir);
299 		bsdtar->pending_chdir = NULL;
300 	}
301 	if (bsdtar->pending_chdir == NULL)
302 		/* Easy case: no previously-saved dir. */
303 		bsdtar->pending_chdir = strdup(newdir);
304 	else {
305 		/* The -C /foo -C bar case; concatenate */
306 		char *old_pending = bsdtar->pending_chdir;
307 		size_t old_len = strlen(old_pending);
308 		bsdtar->pending_chdir = malloc(old_len + strlen(newdir) + 2);
309 		if (old_pending[old_len - 1] == '/')
310 			old_pending[old_len - 1] = '\0';
311 		if (bsdtar->pending_chdir != NULL)
312 			sprintf(bsdtar->pending_chdir, "%s/%s",
313 			    old_pending, newdir);
314 		free(old_pending);
315 	}
316 	if (bsdtar->pending_chdir == NULL)
317 		lafe_errc(1, errno, "No memory");
318 }
319 
320 void
321 do_chdir(struct bsdtar *bsdtar)
322 {
323 	if (bsdtar->pending_chdir == NULL)
324 		return;
325 
326 	if (chdir(bsdtar->pending_chdir) != 0) {
327 		lafe_errc(1, 0, "could not chdir to '%s'\n",
328 		    bsdtar->pending_chdir);
329 	}
330 	free(bsdtar->pending_chdir);
331 	bsdtar->pending_chdir = NULL;
332 }
333 
334 static const char *
335 strip_components(const char *p, int elements)
336 {
337 	/* Skip as many elements as necessary. */
338 	while (elements > 0) {
339 		switch (*p++) {
340 		case '/':
341 #if defined(_WIN32) && !defined(__CYGWIN__)
342 		case '\\': /* Support \ path sep on Windows ONLY. */
343 #endif
344 			elements--;
345 			break;
346 		case '\0':
347 			/* Path is too short, skip it. */
348 			return (NULL);
349 		}
350 	}
351 
352 	/* Skip any / characters.  This handles short paths that have
353 	 * additional / termination.  This also handles the case where
354 	 * the logic above stops in the middle of a duplicate //
355 	 * sequence (which would otherwise get converted to an
356 	 * absolute path). */
357 	for (;;) {
358 		switch (*p) {
359 		case '/':
360 #if defined(_WIN32) && !defined(__CYGWIN__)
361 		case '\\': /* Support \ path sep on Windows ONLY. */
362 #endif
363 			++p;
364 			break;
365 		case '\0':
366 			return (NULL);
367 		default:
368 			return (p);
369 		}
370 	}
371 }
372 
373 /*
374  * Handle --strip-components and any future path-rewriting options.
375  * Returns non-zero if the pathname should not be extracted.
376  *
377  * TODO: Support pax-style regex path rewrites.
378  */
379 int
380 edit_pathname(struct bsdtar *bsdtar, struct archive_entry *entry)
381 {
382 	const char *name = archive_entry_pathname(entry);
383 #if HAVE_REGEX_H
384 	char *subst_name;
385 	int r;
386 
387 	r = apply_substitution(bsdtar, name, &subst_name, 0, 0);
388 	if (r == -1) {
389 		lafe_warnc(0, "Invalid substitution, skipping entry");
390 		return 1;
391 	}
392 	if (r == 1) {
393 		archive_entry_copy_pathname(entry, subst_name);
394 		if (*subst_name == '\0') {
395 			free(subst_name);
396 			return -1;
397 		} else
398 			free(subst_name);
399 		name = archive_entry_pathname(entry);
400 	}
401 
402 	if (archive_entry_hardlink(entry)) {
403 		r = apply_substitution(bsdtar, archive_entry_hardlink(entry), &subst_name, 0, 1);
404 		if (r == -1) {
405 			lafe_warnc(0, "Invalid substitution, skipping entry");
406 			return 1;
407 		}
408 		if (r == 1) {
409 			archive_entry_copy_hardlink(entry, subst_name);
410 			free(subst_name);
411 		}
412 	}
413 	if (archive_entry_symlink(entry) != NULL) {
414 		r = apply_substitution(bsdtar, archive_entry_symlink(entry), &subst_name, 1, 0);
415 		if (r == -1) {
416 			lafe_warnc(0, "Invalid substitution, skipping entry");
417 			return 1;
418 		}
419 		if (r == 1) {
420 			archive_entry_copy_symlink(entry, subst_name);
421 			free(subst_name);
422 		}
423 	}
424 #endif
425 
426 	/* Strip leading dir names as per --strip-components option. */
427 	if (bsdtar->strip_components > 0) {
428 		const char *linkname = archive_entry_hardlink(entry);
429 
430 		name = strip_components(name, bsdtar->strip_components);
431 		if (name == NULL)
432 			return (1);
433 
434 		if (linkname != NULL) {
435 			linkname = strip_components(linkname,
436 			    bsdtar->strip_components);
437 			if (linkname == NULL)
438 				return (1);
439 			archive_entry_copy_hardlink(entry, linkname);
440 		}
441 	}
442 
443 	/* By default, don't write or restore absolute pathnames. */
444 	if (!bsdtar->option_absolute_paths) {
445 		const char *rp, *p = name;
446 		int slashonly = 1;
447 
448 		/* Remove leading "//./" or "//?/" or "//?/UNC/"
449 		 * (absolute path prefixes used by Windows API) */
450 		if ((p[0] == '/' || p[0] == '\\') &&
451 		    (p[1] == '/' || p[1] == '\\') &&
452 		    (p[2] == '.' || p[2] == '?') &&
453 		    (p[3] == '/' || p[3] == '\\'))
454 		{
455 			if (p[2] == '?' &&
456 			    (p[4] == 'U' || p[4] == 'u') &&
457 			    (p[5] == 'N' || p[5] == 'n') &&
458 			    (p[6] == 'C' || p[6] == 'c') &&
459 			    (p[7] == '/' || p[7] == '\\'))
460 				p += 8;
461 			else
462 				p += 4;
463 			slashonly = 0;
464 		}
465 		do {
466 			rp = p;
467 			/* Remove leading drive letter from archives created
468 			 * on Windows. */
469 			if (((p[0] >= 'a' && p[0] <= 'z') ||
470 			     (p[0] >= 'A' && p[0] <= 'Z')) &&
471 				 p[1] == ':') {
472 				p += 2;
473 				slashonly = 0;
474 			}
475 			/* Remove leading "/../", "//", etc. */
476 			while (p[0] == '/' || p[0] == '\\') {
477 				if (p[1] == '.' && p[2] == '.' &&
478 					(p[3] == '/' || p[3] == '\\')) {
479 					p += 3; /* Remove "/..", leave "/"
480 							 * for next pass. */
481 					slashonly = 0;
482 				} else
483 					p += 1; /* Remove "/". */
484 			}
485 		} while (rp != p);
486 
487 		if (p != name && !bsdtar->warned_lead_slash) {
488 			/* Generate a warning the first time this happens. */
489 			if (slashonly)
490 				lafe_warnc(0,
491 				    "Removing leading '%c' from member names",
492 				    name[0]);
493 			else
494 				lafe_warnc(0,
495 				    "Removing leading drive letter from "
496 				    "member names");
497 			bsdtar->warned_lead_slash = 1;
498 		}
499 
500 		/* Special case: Stripping everything yields ".". */
501 		if (*p == '\0')
502 			name = ".";
503 		else
504 			name = p;
505 	} else {
506 		/* Strip redundant leading '/' characters. */
507 		while (name[0] == '/' && name[1] == '/')
508 			name++;
509 	}
510 
511 	/* Safely replace name in archive_entry. */
512 	if (name != archive_entry_pathname(entry)) {
513 		char *q = strdup(name);
514 		archive_entry_copy_pathname(entry, q);
515 		free(q);
516 	}
517 	return (0);
518 }
519 
520 /*
521  * It would be nice to just use printf() for formatting large numbers,
522  * but the compatibility problems are quite a headache.  Hence the
523  * following simple utility function.
524  */
525 const char *
526 tar_i64toa(int64_t n0)
527 {
528 	static char buff[24];
529 	uint64_t n = n0 < 0 ? -n0 : n0;
530 	char *p = buff + sizeof(buff);
531 
532 	*--p = '\0';
533 	do {
534 		*--p = '0' + (int)(n % 10);
535 	} while (n /= 10);
536 	if (n0 < 0)
537 		*--p = '-';
538 	return p;
539 }
540 
541 /*
542  * Like strcmp(), but try to be a little more aware of the fact that
543  * we're comparing two paths.  Right now, it just handles leading
544  * "./" and trailing '/' specially, so that "a/b/" == "./a/b"
545  *
546  * TODO: Make this better, so that "./a//b/./c/" == "a/b/c"
547  * TODO: After this works, push it down into libarchive.
548  * TODO: Publish the path normalization routines in libarchive so
549  * that bsdtar can normalize paths and use fast strcmp() instead
550  * of this.
551  *
552  * Note: This is currently only used within write.c, so should
553  * not handle \ path separators.
554  */
555 
556 int
557 pathcmp(const char *a, const char *b)
558 {
559 	/* Skip leading './' */
560 	if (a[0] == '.' && a[1] == '/' && a[2] != '\0')
561 		a += 2;
562 	if (b[0] == '.' && b[1] == '/' && b[2] != '\0')
563 		b += 2;
564 	/* Find the first difference, or return (0) if none. */
565 	while (*a == *b) {
566 		if (*a == '\0')
567 			return (0);
568 		a++;
569 		b++;
570 	}
571 	/*
572 	 * If one ends in '/' and the other one doesn't,
573 	 * they're the same.
574 	 */
575 	if (a[0] == '/' && a[1] == '\0' && b[0] == '\0')
576 		return (0);
577 	if (a[0] == '\0' && b[0] == '/' && b[1] == '\0')
578 		return (0);
579 	/* They're really different, return the correct sign. */
580 	return (*(const unsigned char *)a - *(const unsigned char *)b);
581 }
582