1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD: head/lib/libarchive/test/test_tar_large.c 201247 2009-12-30 05:59:21Z kientzle $");
27 
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 /*
33  * This is a somewhat tricky test that verifies the ability to
34  * write and read very large entries to tar archives.  It
35  * writes entries from 2GB up to 1TB to an archive in memory.
36  * The memory storage here carefully avoids actually storing
37  * any part of the file bodies, so it runs very quickly and requires
38  * very little memory.  If you're willing to wait a few minutes,
39  * you should be able to exercise petabyte entries with this code.
40  */
41 
42 /*
43  * Each file is built up by duplicating the following block.
44  */
45 static size_t filedatasize;
46 static void *filedata;
47 
48 /*
49  * We store the archive as blocks of data generated by libarchive,
50  * each possibly followed by bytes of file data.
51  */
52 struct memblock {
53 	struct memblock *next;
54 	size_t	size;
55 	void *buff;
56 	int64_t filebytes;
57 };
58 
59 /*
60  * The total memory store is just a list of memblocks plus
61  * some accounting overhead.
62  */
63 struct memdata {
64 	int64_t filebytes;
65 	void *buff;
66 	struct memblock *first;
67 	struct memblock *last;
68 };
69 
70 /* The following size definitions simplify things below. */
71 #define KB ((int64_t)1024)
72 #define MB ((int64_t)1024 * KB)
73 #define GB ((int64_t)1024 * MB)
74 #define TB ((int64_t)1024 * GB)
75 
76 #if ARCHIVE_VERSION_NUMBER < 2000000
77 static ssize_t	memory_read_skip(struct archive *, void *, size_t request);
78 #else
79 static off_t	memory_read_skip(struct archive *, void *, off_t request);
80 #endif
81 static ssize_t	memory_read(struct archive *, void *, const void **buff);
82 static ssize_t	memory_write(struct archive *, void *, const void *, size_t);
83 
84 
85 static ssize_t
86 memory_write(struct archive *a, void *_private, const void *buff, size_t size)
87 {
88 	struct memdata *private = _private;
89 	struct memblock *block;
90 
91 	(void)a;
92 
93 	/*
94 	 * Since libarchive tries to behave in a zero-copy manner, if
95 	 * you give a pointer to filedata to the library, a pointer
96 	 * into that data will (usually) pop out here.  This way, we
97 	 * can tell the difference between filedata and library header
98 	 * and metadata.
99 	 */
100 	if ((const char *)filedata <= (const char *)buff
101 	    && (const char *)buff < (const char *)filedata + filedatasize) {
102 		/* We don't need to store a block of file data. */
103 		private->last->filebytes += (int64_t)size;
104 	} else {
105 		/* Yes, we're assuming the very first write is metadata. */
106 		/* It's header or metadata, copy and save it. */
107 		block = (struct memblock *)malloc(sizeof(*block));
108 		memset(block, 0, sizeof(*block));
109 		block->size = size;
110 		block->buff = malloc(size);
111 		memcpy(block->buff, buff, size);
112 		if (private->last == NULL) {
113 			private->first = private->last = block;
114 		} else {
115 			private->last->next = block;
116 			private->last = block;
117 		}
118 		block->next = NULL;
119 	}
120 	return ((long)size);
121 }
122 
123 static ssize_t
124 memory_read(struct archive *a, void *_private, const void **buff)
125 {
126 	struct memdata *private = _private;
127 	struct memblock *block;
128 	ssize_t size;
129 
130 	(void)a;
131 
132 	free(private->buff);
133 	private->buff = NULL;
134 	if (private->first == NULL) {
135 		private->last = NULL;
136 		return (ARCHIVE_EOF);
137 	}
138 	if (private->filebytes > 0) {
139 		/*
140 		 * We're returning file bytes, simulate it by
141 		 * passing blocks from the template data.
142 		 */
143 		if (private->filebytes > (int64_t)filedatasize)
144 			size = (ssize_t)filedatasize;
145 		else
146 			size = (ssize_t)private->filebytes;
147 		private->filebytes -= size;
148 		*buff = filedata;
149 	} else {
150 		/*
151 		 * We need to get some real data to return.
152 		 */
153 		block = private->first;
154 		private->first = block->next;
155 		size = (ssize_t)block->size;
156 		if (block->buff != NULL) {
157 			private->buff = block->buff;
158 			*buff = block->buff;
159 		} else {
160 			private->buff = NULL;
161 			*buff = filedata;
162 		}
163 		private->filebytes = block->filebytes;
164 		free(block);
165 	}
166 	return (size);
167 }
168 
169 
170 #if ARCHIVE_VERSION_NUMBER < 2000000
171 static ssize_t
172 memory_read_skip(struct archive *a, void *private, size_t skip)
173 {
174 	(void)a;  /* UNUSED */
175 	(void)private; /* UNUSED */
176 	(void)skip; /* UNUSED */
177 	return (0);
178 }
179 #else
180 static off_t
181 memory_read_skip(struct archive *a, void *_private, off_t skip)
182 {
183 	struct memdata *private = _private;
184 
185 	(void)a;
186 
187 	if (private->first == NULL) {
188 		private->last = NULL;
189 		return (0);
190 	}
191 	if (private->filebytes > 0) {
192 		if (private->filebytes < skip)
193 			skip = (off_t)private->filebytes;
194 		private->filebytes -= skip;
195 	} else {
196 		skip = 0;
197 	}
198 	return (skip);
199 }
200 #endif
201 
202 DEFINE_TEST(test_tar_large)
203 {
204 	/* The sizes of the entries we're going to generate. */
205 	static int64_t tests[] = {
206 		/* Test for 32-bit signed overflow. */
207 		2 * GB - 1, 2 * GB, 2 * GB + 1,
208 		/* Test for 32-bit unsigned overflow. */
209 		4 * GB - 1, 4 * GB, 4 * GB + 1,
210 		/* 8GB is the "official" max for ustar. */
211 		8 * GB - 1, 8 * GB, 8 * GB + 1,
212 		/* Bend ustar a tad and you can get 64GB (12 octal digits). */
213 		64 * GB - 1, 64 * GB,
214 		/* And larger entries that require non-ustar extensions. */
215 		256 * GB, 1 * TB, 0 };
216 	int i;
217 	char namebuff[64];
218 	struct memdata memdata;
219 	struct archive_entry *ae;
220 	struct archive *a;
221 	int64_t  filesize;
222 	size_t writesize;
223 
224 	filedatasize = (size_t)(1 * MB);
225 	filedata = malloc(filedatasize);
226 	memset(filedata, 0xAA, filedatasize);
227 	memset(&memdata, 0, sizeof(memdata));
228 
229 	/*
230 	 * Open an archive for writing.
231 	 */
232 	a = archive_write_new();
233 	archive_write_set_format_pax_restricted(a);
234 	archive_write_set_bytes_per_block(a, 0); /* No buffering. */
235 	archive_write_open(a, &memdata, NULL, memory_write, NULL);
236 
237 	/*
238 	 * Write a series of large files to it.
239 	 */
240 	for (i = 0; tests[i] != 0; i++) {
241 		assert((ae = archive_entry_new()) != NULL);
242 		sprintf(namebuff, "file_%d", i);
243 		archive_entry_copy_pathname(ae, namebuff);
244 		archive_entry_set_mode(ae, S_IFREG | 0755);
245 		filesize = tests[i];
246 
247 		archive_entry_set_size(ae, filesize);
248 
249 		assertA(0 == archive_write_header(a, ae));
250 		archive_entry_free(ae);
251 
252 		/*
253 		 * Write the actual data to the archive.
254 		 */
255 		while (filesize > 0) {
256 			writesize = filedatasize;
257 			if ((int64_t)writesize > filesize)
258 				writesize = (size_t)filesize;
259 			assertA((int)writesize
260 			    == archive_write_data(a, filedata, writesize));
261 			filesize -= writesize;
262 		}
263 	}
264 
265 	assert((ae = archive_entry_new()) != NULL);
266 	archive_entry_copy_pathname(ae, "lastfile");
267 	archive_entry_set_mode(ae, S_IFREG | 0755);
268 	assertA(0 == archive_write_header(a, ae));
269 	archive_entry_free(ae);
270 
271 
272 	/* Close out the archive. */
273 	assertA(0 == archive_write_close(a));
274 #if ARCHIVE_VERSION_NUMBER < 2000000
275 	archive_write_finish(a);
276 #else
277 	assertA(0 == archive_write_finish(a));
278 #endif
279 
280 	/*
281 	 * Open the same archive for reading.
282 	 */
283 	a = archive_read_new();
284 	archive_read_support_format_tar(a);
285 	archive_read_open2(a, &memdata, NULL,
286 	    memory_read, memory_read_skip, NULL);
287 
288 	/*
289 	 * Read entries back.
290 	 */
291 	for (i = 0; tests[i] > 0; i++) {
292 		assertEqualIntA(a, 0, archive_read_next_header(a, &ae));
293 		sprintf(namebuff, "file_%d", i);
294 		assertEqualString(namebuff, archive_entry_pathname(ae));
295 		assert(tests[i] == archive_entry_size(ae));
296 	}
297 	assertEqualIntA(a, 0, archive_read_next_header(a, &ae));
298 	assertEqualString("lastfile", archive_entry_pathname(ae));
299 
300 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
301 
302 	/* Close out the archive. */
303 	assertA(0 == archive_read_close(a));
304 #if ARCHIVE_VERSION_NUMBER < 2000000
305 	archive_read_finish(a);
306 #else
307 	assertA(0 == archive_read_finish(a));
308 #endif
309 
310 	free(memdata.buff);
311 	free(filedata);
312 }
313