1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "archive_platform.h"
28 
29 #ifdef HAVE_ERRNO_H
30 #include <errno.h>
31 #endif
32 #include <stdio.h>
33 #ifdef HAVE_STDLIB_H
34 #include <stdlib.h>
35 #endif
36 #ifdef HAVE_STRING_H
37 #include <string.h>
38 #endif
39 
40 #include "archive.h"
41 #include "archive_entry.h"
42 #include "archive_entry_locale.h"
43 #include "archive_private.h"
44 #include "archive_write_private.h"
45 #include "archive_write_set_format_private.h"
46 
47 static ssize_t	archive_write_binary_data(struct archive_write *,
48 		    const void *buff, size_t s);
49 static int	archive_write_binary_close(struct archive_write *);
50 static int	archive_write_binary_free(struct archive_write *);
51 static int	archive_write_binary_finish_entry(struct archive_write *);
52 static int	archive_write_binary_header(struct archive_write *,
53 		    struct archive_entry *);
54 static int	archive_write_binary_options(struct archive_write *,
55 		    const char *, const char *);
56 static int	write_header(struct archive_write *, struct archive_entry *);
57 
58 struct cpio {
59 	uint64_t	  entry_bytes_remaining;
60 
61 	int64_t		  ino_next;
62 
63 	struct		 { int64_t old; int new;} *ino_list;
64 	size_t		  ino_list_size;
65 	size_t		  ino_list_next;
66 
67 	struct archive_string_conv *opt_sconv;
68 	struct archive_string_conv *sconv_default;
69 	int		  init_default_conversion;
70 };
71 
72 /* This struct needs to be packed to get the header right */
73 
74 #if defined(__GNUC__)
75 #define PACKED(x) x __attribute__((packed))
76 #elif defined(_MSC_VER)
77 #define PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
78 #else
79 #define PACKED(x) x
80 #endif
81 
82 #define HSIZE 26
83 
84 PACKED(struct cpio_binary_header {
85 	uint16_t	h_magic;
86 	uint16_t	h_dev;
87 	uint16_t	h_ino;
88 	uint16_t	h_mode;
89 	uint16_t	h_uid;
90 	uint16_t	h_gid;
91 	uint16_t	h_nlink;
92 	uint16_t	h_majmin;
93 	uint32_t	h_mtime;
94 	uint16_t	h_namesize;
95 	uint32_t	h_filesize;
96 });
97 
98 /* Back in the day, the 7th Edition cpio.c had this, to
99  * adapt to, as the comment said, "VAX, Interdata, ...":
100  *
101  * union { long l; short s[2]; char c[4]; } U;
102  * #define MKSHORT(v,lv) {U.l=1L;if(U.c[0]) U.l=lv,v[0]=U.s[1],v[1]=U.s[0]; else U.l=lv,v[0]=U.s[0],v[1]=U.s[1];}
103  * long mklong(v)
104  * short v[];
105  * {
106  *         U.l = 1;
107  *         if(U.c[0])
108  *                 U.s[0] = v[1], U.s[1] = v[0];
109  *         else
110  *                 U.s[0] = v[0], U.s[1] = v[1];
111  *         return U.l;
112  * }
113  *
114  * Of course, that assumes that all machines have little-endian shorts,
115  * and just adapts the others to the special endianness of the PDP-11.
116  *
117  * Now, we could do this:
118  *
119  * union { uint32_t l; uint16_t s[2]; uint8_t c[4]; } U;
120  * #define PUTI16(v,sv) {U.s[0]=1;if(U.c[0]) v=sv; else U.s[0]=sv,U.c[2]=U.c[1],U.c[3]=U.c[0],v=U.s[1];}
121  * #define PUTI32(v,lv) {char_t Ut;U.l=1;if(U.c[0]) U.l=lv,v[0]=U.s[1],v[1]=U.s[0]; else U.l=lv,Ut=U.c[0],U.c[0]=U.c[1],U.c[1]=Ut,Ut=U.c[2],U.c[2]=U.c[3],U.c[3]=Ut,v[0]=U.s[0],v[1]=U.s[1];}
122  *
123  * ...but it feels a little better to do it like this:
124  */
125 
126 static uint16_t la_swap16(uint16_t in) {
127 	union {
128 		uint16_t s[2];
129 		uint8_t c[4];
130 	} U;
131 	U.s[0] = 1;
132 	if (U.c[0])
133 		return in;
134 	else {
135 		U.s[0] = in;
136 		U.c[2] = U.c[1];
137 		U.c[3] = U.c[0];
138 		return U.s[1];
139 	}
140 	/* NOTREACHED */
141 }
142 
143 static uint32_t la_swap32(uint32_t in) {
144 	union {
145 		uint32_t l;
146 		uint16_t s[2];
147 		uint8_t c[4];
148 	} U;
149 	U.l = 1;
150 	if (U.c[0]) {		/* Little-endian */
151 		uint16_t t;
152 		U.l = in;
153 		t = U.s[0];
154 		U.s[0] = U.s[1];
155 		U.s[1] = t;
156 	} else if (U.c[3]) {	/* Big-endian */
157 		U.l = in;
158 		U.s[0] = la_swap16(U.s[0]);
159 		U.s[1] = la_swap16(U.s[1]);
160 	} else {		/* PDP-endian */
161 		U.l = in;
162 	}
163 	return U.l;
164 }
165 
166 /*
167  * Set output format to the selected binary variant
168  */
169 static int
170 archive_write_set_format_cpio_binary(struct archive *_a, int format)
171 {
172 	struct archive_write *a = (struct archive_write *)_a;
173 	struct cpio *cpio;
174 
175 	if (sizeof(struct cpio_binary_header) != HSIZE) {
176 		archive_set_error(&a->archive, EINVAL,
177 				  "Binary cpio format not supported on this platform");
178 		return (ARCHIVE_FATAL);
179 	}
180 
181 	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
182 	    ARCHIVE_STATE_NEW, "archive_write_set_format_cpio_binary");
183 
184 	/* If someone else was already registered, unregister them. */
185 	if (a->format_free != NULL)
186 		(a->format_free)(a);
187 
188 	cpio = (struct cpio *)calloc(1, sizeof(*cpio));
189 	if (cpio == NULL) {
190 		archive_set_error(&a->archive, ENOMEM, "Can't allocate cpio data");
191 		return (ARCHIVE_FATAL);
192 	}
193 	a->format_data = cpio;
194 	a->format_name = "cpio";
195 	a->format_options = archive_write_binary_options;
196 	a->format_write_header = archive_write_binary_header;
197 	a->format_write_data = archive_write_binary_data;
198 	a->format_finish_entry = archive_write_binary_finish_entry;
199 	a->format_close = archive_write_binary_close;
200 	a->format_free = archive_write_binary_free;
201 	a->archive.archive_format = format;
202 	switch (format) {
203 	case ARCHIVE_FORMAT_CPIO_PWB:
204 		a->archive.archive_format_name = "PWB cpio";
205 		break;
206 	case ARCHIVE_FORMAT_CPIO_BIN_LE:
207 		a->archive.archive_format_name = "7th Edition cpio";
208 		break;
209 	default:
210 		archive_set_error(&a->archive, EINVAL, "binary format must be 'pwb' or 'bin'");
211 		return (ARCHIVE_FATAL);
212 	}
213 	return (ARCHIVE_OK);
214 }
215 
216 /*
217  * Set output format to PWB (6th Edition) binary format
218  */
219 int
220 archive_write_set_format_cpio_pwb(struct archive *_a)
221 {
222 	return archive_write_set_format_cpio_binary(_a, ARCHIVE_FORMAT_CPIO_PWB);
223 }
224 
225 /*
226  * Set output format to 7th Edition binary format
227  */
228 int
229 archive_write_set_format_cpio_bin(struct archive *_a)
230 {
231 	return archive_write_set_format_cpio_binary(_a, ARCHIVE_FORMAT_CPIO_BIN_LE);
232 }
233 
234 static int
235 archive_write_binary_options(struct archive_write *a, const char *key,
236     const char *val)
237 {
238 	struct cpio *cpio = (struct cpio *)a->format_data;
239 	int ret = ARCHIVE_FAILED;
240 
241 	if (strcmp(key, "hdrcharset")  == 0) {
242 		if (val == NULL || val[0] == 0)
243 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
244 			    "%s: hdrcharset option needs a character-set name",
245 			    a->format_name);
246 		else {
247 			cpio->opt_sconv = archive_string_conversion_to_charset(
248 			    &a->archive, val, 0);
249 			if (cpio->opt_sconv != NULL)
250 				ret = ARCHIVE_OK;
251 			else
252 				ret = ARCHIVE_FATAL;
253 		}
254 		return (ret);
255 	}
256 
257 	/* Note: The "warn" return is just to inform the options
258 	 * supervisor that we didn't handle it.  It will generate
259 	 * a suitable error if no one used this option. */
260 	return (ARCHIVE_WARN);
261 }
262 
263 /*
264  * Ino values are as long as 64 bits on some systems; cpio format
265  * only allows 16 bits and relies on the ino values to identify hardlinked
266  * files.  So, we can't merely "hash" the ino numbers since collisions
267  * would corrupt the archive.  Instead, we generate synthetic ino values
268  * to store in the archive and maintain a map of original ino values to
269  * synthetic ones so we can preserve hardlink information.
270  *
271  * TODO: Make this more efficient.  It's not as bad as it looks (most
272  * files don't have any hardlinks and we don't do any work here for those),
273  * but it wouldn't be hard to do better.
274  *
275  * TODO: Work with dev/ino pairs here instead of just ino values.
276  */
277 static int
278 synthesize_ino_value(struct cpio *cpio, struct archive_entry *entry)
279 {
280 	int64_t ino = archive_entry_ino64(entry);
281 	int ino_new;
282 	size_t i;
283 
284 	/*
285 	 * If no index number was given, don't assign one.  In
286 	 * particular, this handles the end-of-archive marker
287 	 * correctly by giving it a zero index value.  (This is also
288 	 * why we start our synthetic index numbers with one below.)
289 	 */
290 	if (ino == 0)
291 		return (0);
292 
293 	/* Don't store a mapping if we don't need to. */
294 	if (archive_entry_nlink(entry) < 2) {
295 		return (int)(++cpio->ino_next);
296 	}
297 
298 	/* Look up old ino; if we have it, this is a hardlink
299 	 * and we reuse the same value. */
300 	for (i = 0; i < cpio->ino_list_next; ++i) {
301 		if (cpio->ino_list[i].old == ino)
302 			return (cpio->ino_list[i].new);
303 	}
304 
305 	/* Assign a new index number. */
306 	ino_new = (int)(++cpio->ino_next);
307 
308 	/* Ensure space for the new mapping. */
309 	if (cpio->ino_list_size <= cpio->ino_list_next) {
310 		size_t newsize = cpio->ino_list_size < 512
311 		    ? 512 : cpio->ino_list_size * 2;
312 		void *newlist = realloc(cpio->ino_list,
313 		    sizeof(cpio->ino_list[0]) * newsize);
314 		if (newlist == NULL)
315 			return (-1);
316 
317 		cpio->ino_list_size = newsize;
318 		cpio->ino_list = newlist;
319 	}
320 
321 	/* Record and return the new value. */
322 	cpio->ino_list[cpio->ino_list_next].old = ino;
323 	cpio->ino_list[cpio->ino_list_next].new = ino_new;
324 	++cpio->ino_list_next;
325 	return (ino_new);
326 }
327 
328 
329 static struct archive_string_conv *
330 get_sconv(struct archive_write *a)
331 {
332 	struct cpio *cpio;
333 	struct archive_string_conv *sconv;
334 
335 	cpio = (struct cpio *)a->format_data;
336 	sconv = cpio->opt_sconv;
337 	if (sconv == NULL) {
338 		if (!cpio->init_default_conversion) {
339 			cpio->sconv_default =
340 			    archive_string_default_conversion_for_write(
341 			      &(a->archive));
342 			cpio->init_default_conversion = 1;
343 		}
344 		sconv = cpio->sconv_default;
345 	}
346 	return (sconv);
347 }
348 
349 static int
350 archive_write_binary_header(struct archive_write *a, struct archive_entry *entry)
351 {
352 	const char *path;
353 	size_t len;
354 
355 	if (archive_entry_filetype(entry) == 0 && archive_entry_hardlink(entry) == NULL) {
356 		archive_set_error(&a->archive, -1, "Filetype required");
357 		return (ARCHIVE_FAILED);
358 	}
359 
360 	if (archive_entry_pathname_l(entry, &path, &len, get_sconv(a)) != 0
361 	    && errno == ENOMEM) {
362 		archive_set_error(&a->archive, ENOMEM,
363 		    "Can't allocate memory for Pathname");
364 		return (ARCHIVE_FATAL);
365 	}
366 	if (len == 0 || path == NULL || path[0] == '\0') {
367 		archive_set_error(&a->archive, -1, "Pathname required");
368 		return (ARCHIVE_FAILED);
369 	}
370 
371 	if (!archive_entry_size_is_set(entry) || archive_entry_size(entry) < 0) {
372 		archive_set_error(&a->archive, -1, "Size required");
373 		return (ARCHIVE_FAILED);
374 	}
375 	return write_header(a, entry);
376 }
377 
378 static int
379 write_header(struct archive_write *a, struct archive_entry *entry)
380 {
381 	struct cpio *cpio;
382 	const char *p, *path;
383 	int pathlength, ret, ret_final;
384 	int64_t	ino;
385 	struct cpio_binary_header h;
386 	struct archive_string_conv *sconv;
387 	struct archive_entry *entry_main;
388 	size_t len;
389 
390 	cpio = (struct cpio *)a->format_data;
391 	ret_final = ARCHIVE_OK;
392 	sconv = get_sconv(a);
393 
394 #if defined(_WIN32) && !defined(__CYGWIN__)
395 	/* Make sure the path separators in pathname, hardlink and symlink
396 	 * are all slash '/', not the Windows path separator '\'. */
397 	entry_main = __la_win_entry_in_posix_pathseparator(entry);
398 	if (entry_main == NULL) {
399 		archive_set_error(&a->archive, ENOMEM,
400 		    "Can't allocate ustar data");
401 		return(ARCHIVE_FATAL);
402 	}
403 	if (entry != entry_main)
404 		entry = entry_main;
405 	else
406 		entry_main = NULL;
407 #else
408 	entry_main = NULL;
409 #endif
410 
411 	ret = archive_entry_pathname_l(entry, &path, &len, sconv);
412 	if (ret != 0) {
413 		if (errno == ENOMEM) {
414 			archive_set_error(&a->archive, ENOMEM,
415 			    "Can't allocate memory for Pathname");
416 			ret_final = ARCHIVE_FATAL;
417 			goto exit_write_header;
418 		}
419 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
420 		    "Can't translate pathname '%s' to %s",
421 		    archive_entry_pathname(entry),
422 		    archive_string_conversion_charset_name(sconv));
423 		ret_final = ARCHIVE_WARN;
424 	}
425 	/* Include trailing null */
426 	pathlength = (int)len + 1;
427 
428 	h.h_magic = la_swap16(070707);
429 	h.h_dev = la_swap16(archive_entry_dev(entry));
430 
431 	ino = synthesize_ino_value(cpio, entry);
432 	if (ino < 0) {
433 		archive_set_error(&a->archive, ENOMEM,
434 		    "No memory for ino translation table");
435 		ret_final = ARCHIVE_FATAL;
436 		goto exit_write_header;
437 	} else if (ino > 077777) {
438 		archive_set_error(&a->archive, ERANGE,
439 		    "Too many files for this cpio format");
440 		ret_final = ARCHIVE_FATAL;
441 		goto exit_write_header;
442 	}
443 	h.h_ino = la_swap16((uint16_t)ino);
444 
445 	h.h_mode = archive_entry_mode(entry);
446 	if (((h.h_mode & AE_IFMT) == AE_IFSOCK) || ((h.h_mode & AE_IFMT) == AE_IFIFO)) {
447 		archive_set_error(&a->archive, EINVAL,
448 				  "sockets and fifos cannot be represented in the binary cpio formats");
449 		ret_final = ARCHIVE_FATAL;
450 		goto exit_write_header;
451 	}
452 	if (a->archive.archive_format == ARCHIVE_FORMAT_CPIO_PWB) {
453 		if ((h.h_mode & AE_IFMT) == AE_IFLNK) {
454 			archive_set_error(&a->archive, EINVAL,
455 					  "symbolic links cannot be represented in the PWB cpio format");
456 			ret_final = ARCHIVE_FATAL;
457 			goto exit_write_header;
458 		}
459 		/* we could turn off AE_IFREG here, but it does no harm, */
460 		/* and allows v7 cpio to read the entry without confusion */
461 	}
462 	h.h_mode = la_swap16(h.h_mode);
463 
464 	h.h_uid = la_swap16((uint16_t)archive_entry_uid(entry));
465 	h.h_gid = la_swap16((uint16_t)archive_entry_gid(entry));
466 	h.h_nlink = la_swap16((uint16_t)archive_entry_nlink(entry));
467 
468 	if (archive_entry_filetype(entry) == AE_IFBLK
469 	    || archive_entry_filetype(entry) == AE_IFCHR)
470 		h.h_majmin = la_swap16(archive_entry_rdev(entry));
471 	else
472 		h.h_majmin = 0;
473 
474 	h.h_mtime = la_swap32((uint32_t)archive_entry_mtime(entry));
475 	h.h_namesize = la_swap16(pathlength);
476 
477 	/* Non-regular files don't store bodies. */
478 	if (archive_entry_filetype(entry) != AE_IFREG)
479 		archive_entry_set_size(entry, 0);
480 
481 	/* Symlinks get the link written as the body of the entry. */
482 	ret = archive_entry_symlink_l(entry, &p, &len, sconv);
483 	if (ret != 0) {
484 		if (errno == ENOMEM) {
485 			archive_set_error(&a->archive, ENOMEM,
486 			    "Can't allocate memory for Linkname");
487 			ret_final = ARCHIVE_FATAL;
488 			goto exit_write_header;
489 		}
490 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
491 		    "Can't translate linkname '%s' to %s",
492 		    archive_entry_symlink(entry),
493 		    archive_string_conversion_charset_name(sconv));
494 		ret_final = ARCHIVE_WARN;
495 	}
496 
497 	if (len > 0 && p != NULL  &&  *p != '\0') {
498 		if (a->archive.archive_format == ARCHIVE_FORMAT_CPIO_PWB) {
499 			archive_set_error(&a->archive, EINVAL,
500 					  "symlinks are not supported by UNIX V6 or by PWB cpio");
501 			ret_final = ARCHIVE_FATAL;
502 			goto exit_write_header;
503 		}
504 		h.h_filesize = la_swap32((uint32_t)strlen(p)); /* symlink */
505 	} else {
506 		if ((a->archive.archive_format == ARCHIVE_FORMAT_CPIO_PWB) &&
507 		    (archive_entry_size(entry) > 256*256*256-1)) {
508 			archive_set_error(&a->archive, ERANGE,
509 					  "File is too large for PWB binary cpio format.");
510 			ret_final = ARCHIVE_FAILED;
511 			goto exit_write_header;
512 		} else if (archive_entry_size(entry) > INT32_MAX) {
513 			archive_set_error(&a->archive, ERANGE,
514 					  "File is too large for binary cpio format.");
515 			ret_final = ARCHIVE_FAILED;
516 			goto exit_write_header;
517 		}
518 		h.h_filesize = la_swap32((uint32_t)archive_entry_size(entry)); /* file */
519 	}
520 
521 	ret = __archive_write_output(a, &h, HSIZE);
522 	if (ret != ARCHIVE_OK) {
523 		ret_final = ARCHIVE_FATAL;
524 		goto exit_write_header;
525 	}
526 
527 	ret = __archive_write_output(a, path, pathlength);
528 	if ((ret == ARCHIVE_OK) && ((pathlength % 2) != 0))
529 		ret = __archive_write_nulls(a, 1);
530 	if (ret != ARCHIVE_OK) {
531 		ret_final = ARCHIVE_FATAL;
532 		goto exit_write_header;
533 	}
534 
535 	cpio->entry_bytes_remaining = archive_entry_size(entry);
536 	if ((cpio->entry_bytes_remaining % 2) != 0)
537 		cpio->entry_bytes_remaining++;
538 
539 	/* Write the symlink now. */
540 	if (p != NULL  &&  *p != '\0') {
541 		ret = __archive_write_output(a, p, strlen(p));
542 		if ((ret == ARCHIVE_OK) && ((strlen(p) % 2) != 0))
543 			ret = __archive_write_nulls(a, 1);
544 		if (ret != ARCHIVE_OK) {
545 			ret_final = ARCHIVE_FATAL;
546 			goto exit_write_header;
547 		}
548 	}
549 
550 exit_write_header:
551 	archive_entry_free(entry_main);
552 	return (ret_final);
553 }
554 
555 static ssize_t
556 archive_write_binary_data(struct archive_write *a, const void *buff, size_t s)
557 {
558 	struct cpio *cpio;
559 	int ret;
560 
561 	cpio = (struct cpio *)a->format_data;
562 	if (s > cpio->entry_bytes_remaining)
563 		s = (size_t)cpio->entry_bytes_remaining;
564 
565 	ret = __archive_write_output(a, buff, s);
566 	cpio->entry_bytes_remaining -= s;
567 	if (ret >= 0)
568 		return (s);
569 	else
570 		return (ret);
571 }
572 
573 static int
574 archive_write_binary_close(struct archive_write *a)
575 {
576 	int er;
577 	struct archive_entry *trailer;
578 
579 	trailer = archive_entry_new2(NULL);
580 	/* nlink = 1 here for GNU cpio compat. */
581 	archive_entry_set_nlink(trailer, 1);
582 	archive_entry_set_size(trailer, 0);
583 	archive_entry_set_pathname(trailer, "TRAILER!!!");
584 	er = write_header(a, trailer);
585 	archive_entry_free(trailer);
586 	return (er);
587 }
588 
589 static int
590 archive_write_binary_free(struct archive_write *a)
591 {
592 	struct cpio *cpio;
593 
594 	cpio = (struct cpio *)a->format_data;
595 	free(cpio->ino_list);
596 	free(cpio);
597 	a->format_data = NULL;
598 	return (ARCHIVE_OK);
599 }
600 
601 static int
602 archive_write_binary_finish_entry(struct archive_write *a)
603 {
604 	struct cpio *cpio;
605 
606 	cpio = (struct cpio *)a->format_data;
607 	return (__archive_write_nulls(a,
608 		(size_t)cpio->entry_bytes_remaining));
609 }
610