1 /*-
2  * Copyright (c) 2010-2012 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27 
28 #ifdef HAVE_SYS_IOCTL_H
29 #include <sys/ioctl.h>
30 #endif
31 #ifdef HAVE_SYS_PARAM_H
32 #include <sys/param.h>
33 #endif
34 #ifdef HAVE_FCNTL_H
35 #include <fcntl.h>
36 #endif
37 #ifdef HAVE_LIMITS_H
38 #include <limits.h>
39 #endif
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #ifdef HAVE_LINUX_TYPES_H
44 #include <linux/types.h>
45 #endif
46 #ifdef HAVE_LINUX_FIEMAP_H
47 #include <linux/fiemap.h>
48 #endif
49 #ifdef HAVE_LINUX_FS_H
50 #include <linux/fs.h>
51 #endif
52 
53 /* The logic to compare sparse file data read from disk with the
54  * specification is a little involved.  Set to 1 to have the progress
55  * dumped. */
56 #define DEBUG 0
57 
58 /*
59  * NOTE: On FreeBSD and Solaris, this test needs ZFS.
60  * You may perform this test as
61  * 'TMPDIR=<a directory on the ZFS> libarchive_test'.
62  */
63 
64 struct sparse {
65 	enum { DATA, HOLE, END } type;
66 	size_t	size;
67 };
68 
69 static void create_sparse_file(const char *, const struct sparse *);
70 
71 #if defined(_WIN32) && !defined(__CYGWIN__)
72 #include <winioctl.h>
73 /*
74  * Create a sparse file on Windows.
75  */
76 
77 #if !defined(PATH_MAX)
78 #define	PATH_MAX	MAX_PATH
79 #endif
80 #if !defined(__BORLANDC__)
81 #define getcwd _getcwd
82 #endif
83 
84 static int
85 is_sparse_supported(const char *path)
86 {
87 	char root[MAX_PATH+1];
88 	char vol[MAX_PATH+1];
89 	char sys[MAX_PATH+1];
90 	DWORD flags;
91 	BOOL r;
92 
93 	strncpy(root, path, sizeof(root)-1);
94 	if (((root[0] >= 'c' && root[0] <= 'z') ||
95 	    (root[0] >= 'C' && root[0] <= 'Z')) &&
96 		root[1] == ':' &&
97 	    (root[2] == '\\' || root[2] == '/'))
98 		root[3] = '\0';
99 	else
100 		return (0);
101 	assertEqualInt((r = GetVolumeInformation(root, vol,
102 	    sizeof(vol), NULL, NULL, &flags, sys, sizeof(sys))), 1);
103 	return (r != 0 && (flags & FILE_SUPPORTS_SPARSE_FILES) != 0);
104 }
105 
106 static void
107 create_sparse_file(const char *path, const struct sparse *s)
108 {
109 	char buff[1024];
110 	HANDLE handle;
111 	DWORD dmy;
112 
113 	memset(buff, ' ', sizeof(buff));
114 
115 	handle = CreateFileA(path, GENERIC_WRITE, 0,
116 	    NULL, CREATE_NEW, FILE_ATTRIBUTE_NORMAL,
117 	    NULL);
118 	assert(handle != INVALID_HANDLE_VALUE);
119 	assert(DeviceIoControl(handle, FSCTL_SET_SPARSE, NULL, 0,
120 	    NULL, 0, &dmy, NULL) != 0);
121 
122 	size_t offsetSoFar = 0;
123 
124 	while (s->type != END) {
125 		if (s->type == HOLE) {
126 			LARGE_INTEGER fileOffset, beyondOffset, distanceToMove;
127 			fileOffset.QuadPart = offsetSoFar;
128 			beyondOffset.QuadPart = offsetSoFar + s->size;
129 			distanceToMove.QuadPart = s->size;
130 
131 			FILE_ZERO_DATA_INFORMATION zeroInformation;
132 			zeroInformation.FileOffset = fileOffset;
133 			zeroInformation.BeyondFinalZero = beyondOffset;
134 
135 			DWORD bytesReturned;
136 			assert(SetFilePointerEx(handle, distanceToMove,
137 				NULL, FILE_CURRENT) != 0);
138 			assert(SetEndOfFile(handle) != 0);
139 			assert(DeviceIoControl(handle, FSCTL_SET_ZERO_DATA, &zeroInformation,
140 				sizeof(FILE_ZERO_DATA_INFORMATION), NULL, 0, &bytesReturned, NULL) != 0);
141 		} else {
142 			DWORD w, wr;
143 			size_t size;
144 
145 			size = s->size;
146 			while (size) {
147 				if (size > sizeof(buff))
148 					w = sizeof(buff);
149 				else
150 					w = (DWORD)size;
151 				assert(WriteFile(handle, buff, w, &wr, NULL) != 0);
152 				size -= wr;
153 			}
154 		}
155 		offsetSoFar += s->size;
156 		s++;
157 	}
158 	assertEqualInt(CloseHandle(handle), 1);
159 }
160 
161 #else
162 
163 #if defined(HAVE_LINUX_FIEMAP_H)
164 /*
165  * FIEMAP, which can detect 'hole' of a sparse file, has
166  * been supported from 2.6.28
167  */
168 
169 static int
170 is_sparse_supported_fiemap(const char *path)
171 {
172 	const struct sparse sparse_file[] = {
173  		/* This hole size is too small to create a sparse
174 		 * files for almost filesystem. */
175 		{ HOLE,	 1024 }, { DATA, 10240 },
176 		{ END,	0 }
177 	};
178 	int fd, r;
179 	struct fiemap *fm;
180 	char buff[1024];
181 	const char *testfile = "can_sparse";
182 
183 	(void)path; /* UNUSED */
184 	memset(buff, 0, sizeof(buff));
185 	create_sparse_file(testfile, sparse_file);
186 	fd = open(testfile,  O_RDWR);
187 	if (fd < 0)
188 		return (0);
189 	fm = (struct fiemap *)buff;
190 	fm->fm_start = 0;
191 	fm->fm_length = ~0ULL;;
192 	fm->fm_flags = FIEMAP_FLAG_SYNC;
193 	fm->fm_extent_count = (sizeof(buff) - sizeof(*fm))/
194 		sizeof(struct fiemap_extent);
195 	r = ioctl(fd, FS_IOC_FIEMAP, fm);
196 	close(fd);
197 	unlink(testfile);
198 	return (r >= 0);
199 }
200 
201 #if !defined(SEEK_HOLE) || !defined(SEEK_DATA)
202 static int
203 is_sparse_supported(const char *path)
204 {
205 	return is_sparse_supported_fiemap(path);
206 }
207 #endif
208 #endif
209 
210 #if defined(_PC_MIN_HOLE_SIZE)
211 
212 /*
213  * FreeBSD and Solaris can detect 'hole' of a sparse file
214  * through lseek(HOLE) on ZFS. (UFS does not support yet)
215  */
216 
217 static int
218 is_sparse_supported(const char *path)
219 {
220 	return (pathconf(path, _PC_MIN_HOLE_SIZE) > 0);
221 }
222 
223 #elif defined(SEEK_HOLE) && defined(SEEK_DATA)
224 
225 static int
226 is_sparse_supported(const char *path)
227 {
228 	const struct sparse sparse_file[] = {
229  		/* This hole size is too small to create a sparse
230 		 * files for almost filesystem. */
231 		{ HOLE,	 1024 }, { DATA, 10240 },
232 		{ END,	0 }
233 	};
234 	int fd, r;
235 	const char *testfile = "can_sparse";
236 
237 	(void)path; /* UNUSED */
238 	create_sparse_file(testfile, sparse_file);
239 	fd = open(testfile,  O_RDWR);
240 	if (fd < 0)
241 		return (0);
242 	r = lseek(fd, 0, SEEK_HOLE);
243 	close(fd);
244 	unlink(testfile);
245 #if defined(HAVE_LINUX_FIEMAP_H)
246 	if (r < 0)
247 		return (is_sparse_supported_fiemap(path));
248 #endif
249 	return (r >= 0);
250 }
251 
252 #elif !defined(HAVE_LINUX_FIEMAP_H)
253 
254 /*
255  * Other system may do not have the API such as lseek(HOLE),
256  * which detect 'hole' of a sparse file.
257  */
258 
259 static int
260 is_sparse_supported(const char *path)
261 {
262 	(void)path; /* UNUSED */
263 	return (0);
264 }
265 
266 #endif
267 
268 /*
269  * Create a sparse file on POSIX like system.
270  */
271 
272 static void
273 create_sparse_file(const char *path, const struct sparse *s)
274 {
275 	char buff[1024];
276 	int fd;
277 	size_t total_size = 0;
278 	const struct sparse *cur = s;
279 
280 	memset(buff, ' ', sizeof(buff));
281 	assert((fd = open(path, O_CREAT | O_WRONLY, 0600)) != -1);
282 
283 	/* Handle holes at the end by extending the file */
284 	while (cur->type != END) {
285 		total_size += cur->size;
286 		++cur;
287 	}
288 	assert(ftruncate(fd, total_size) != -1);
289 
290 	while (s->type != END) {
291 		if (s->type == HOLE) {
292 			assert(lseek(fd, s->size, SEEK_CUR) != (off_t)-1);
293 		} else {
294 			size_t w, size;
295 
296 			size = s->size;
297 			while (size) {
298 				if (size > sizeof(buff))
299 					w = sizeof(buff);
300 				else
301 					w = size;
302 				assert(write(fd, buff, w) != (ssize_t)-1);
303 				size -= w;
304 			}
305 		}
306 		s++;
307 	}
308 	close(fd);
309 }
310 
311 #endif
312 
313 /*
314  * Sparse test with directory traversals.
315  */
316 static void
317 verify_sparse_file(struct archive *a, const char *path,
318     const struct sparse *sparse, int expected_holes)
319 {
320 	struct archive_entry *ae;
321 	const void *buff;
322 	size_t bytes_read;
323 	int64_t offset, expected_offset, last_offset;
324 	int holes_seen = 0;
325 
326 	create_sparse_file(path, sparse);
327 	assert((ae = archive_entry_new()) != NULL);
328 	assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_open(a, path));
329 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
330 
331 	expected_offset = 0;
332 	last_offset = 0;
333 	while (ARCHIVE_OK == archive_read_data_block(a, &buff, &bytes_read,
334 	    &offset)) {
335 		const char *start = buff;
336 #if DEBUG
337 		fprintf(stderr, "%s: bytes_read=%d offset=%d\n", path, (int)bytes_read, (int)offset);
338 #endif
339 		if (offset > last_offset) {
340 			++holes_seen;
341 		}
342 		/* Blocks entirely before the data we just read. */
343 		while (expected_offset + (int64_t)sparse->size < offset) {
344 #if DEBUG
345 			fprintf(stderr, "    skipping expected_offset=%d, size=%d\n", (int)expected_offset, (int)sparse->size);
346 #endif
347 			/* Must be holes. */
348 			assert(sparse->type == HOLE);
349 			expected_offset += sparse->size;
350 			++sparse;
351 		}
352 		/* Block that overlaps beginning of data */
353 		if (expected_offset < offset
354 		    && expected_offset + (int64_t)sparse->size <= offset + (int64_t)bytes_read) {
355 			const char *end = (const char *)buff + (expected_offset - offset) + (size_t)sparse->size;
356 #if DEBUG
357 			fprintf(stderr, "    overlapping hole expected_offset=%d, size=%d\n", (int)expected_offset, (int)sparse->size);
358 #endif
359 			/* Must be a hole, overlap must be filled with '\0' */
360 			if (assert(sparse->type == HOLE)) {
361 				assertMemoryFilledWith(start, end - start, '\0');
362 			}
363 			start = end;
364 			expected_offset += sparse->size;
365 			++sparse;
366 		}
367 		/* Blocks completely contained in data we just read. */
368 		while (expected_offset + (int64_t)sparse->size <= offset + (int64_t)bytes_read) {
369 			const char *end = (const char *)buff + (expected_offset - offset) + (size_t)sparse->size;
370 			if (sparse->type == HOLE) {
371 #if DEBUG
372 				fprintf(stderr, "    contained hole expected_offset=%d, size=%d\n", (int)expected_offset, (int)sparse->size);
373 #endif
374 
375 				/* verify data corresponding to hole is '\0' */
376 				if (end > (const char *)buff + bytes_read) {
377 					end = (const char *)buff + bytes_read;
378 				}
379 				assertMemoryFilledWith(start, end - start, '\0');
380 				start = end;
381 				expected_offset += sparse->size;
382 				++sparse;
383 			} else if (sparse->type == DATA) {
384 #if DEBUG
385 				fprintf(stderr, "    contained data expected_offset=%d, size=%d\n", (int)expected_offset, (int)sparse->size);
386 #endif
387 				/* verify data corresponding to hole is ' ' */
388 				if (assert(expected_offset + sparse->size <= offset + bytes_read)) {
389 					assert(start == (const char *)buff + (size_t)(expected_offset - offset));
390 					assertMemoryFilledWith(start, end - start, ' ');
391 				}
392 				start = end;
393 				expected_offset += sparse->size;
394 				++sparse;
395 			} else {
396 				break;
397 			}
398 		}
399 		/* Block that overlaps end of data */
400 		if (expected_offset < offset + (int64_t)bytes_read) {
401 			const char *end = (const char *)buff + bytes_read;
402 #if DEBUG
403 			fprintf(stderr, "    trailing overlap expected_offset=%d, size=%d\n", (int)expected_offset, (int)sparse->size);
404 #endif
405 			/* Must be a hole, overlap must be filled with '\0' */
406 			if (assert(sparse->type == HOLE)) {
407 				assertMemoryFilledWith(start, end - start, '\0');
408 			}
409 		}
410 		last_offset = offset + bytes_read;
411 	}
412 	/* Count a hole at EOF? */
413 	if (last_offset < archive_entry_size(ae)) {
414 		++holes_seen;
415 	}
416 
417 	/* Verify blocks after last read */
418 	while (sparse->type == HOLE) {
419 		expected_offset += sparse->size;
420 		++sparse;
421 	}
422 	assert(sparse->type == END);
423 	assertEqualInt(expected_offset, archive_entry_size(ae));
424 
425 	failure(path);
426 	assertEqualInt(holes_seen, expected_holes);
427 
428 	assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
429 	archive_entry_free(ae);
430 }
431 
432 #if defined(_WIN32) && !defined(__CYGWIN__)
433 #define	close		_close
434 #define	open		_open
435 #endif
436 
437 /*
438  * Sparse test without directory traversals.
439  */
440 static void
441 verify_sparse_file2(struct archive *a, const char *path,
442     const struct sparse *sparse, int blocks, int preopen)
443 {
444 	struct archive_entry *ae;
445 	int fd;
446 
447 	(void)sparse; /* UNUSED */
448 	assert((ae = archive_entry_new()) != NULL);
449 	archive_entry_set_pathname(ae, path);
450 	if (preopen)
451 		fd = open(path, O_RDONLY | O_BINARY);
452 	else
453 		fd = -1;
454 	assertEqualIntA(a, ARCHIVE_OK,
455 	    archive_read_disk_entry_from_file(a, ae, fd, NULL));
456 	if (fd >= 0)
457 		close(fd);
458 	/* Verify the number of holes only, not its offset nor its
459 	 * length because those alignments are deeply dependence on
460 	 * its filesystem. */
461 	failure(path);
462 	assertEqualInt(blocks, archive_entry_sparse_count(ae));
463 	archive_entry_free(ae);
464 }
465 
466 static void
467 test_sparse_whole_file_data()
468 {
469 	struct archive_entry *ae;
470 	int64_t offset;
471 	int i;
472 
473 	assert((ae = archive_entry_new()) != NULL);
474 	archive_entry_set_size(ae, 1024*10);
475 
476 	/*
477 	 * Add sparse block data up to the file size.
478 	 */
479 	offset = 0;
480 	for (i = 0; i < 10; i++) {
481 		archive_entry_sparse_add_entry(ae, offset, 1024);
482 		offset += 1024;
483 	}
484 
485 	failure("There should be no sparse");
486 	assertEqualInt(0, archive_entry_sparse_count(ae));
487 	archive_entry_free(ae);
488 }
489 
490 DEFINE_TEST(test_sparse_basic)
491 {
492 	char *cwd;
493 	struct archive *a;
494 	/*
495 	 * The alignment of the hole of sparse files deeply depends
496 	 * on filesystem. In my experience, sparse_file2 test with
497 	 * 204800 bytes hole size did not pass on ZFS and the result
498 	 * of that test seemed the size was too small, thus you should
499 	 * keep a hole size more than 409600 bytes to pass this test
500 	 * on all platform.
501 	 */
502 	const struct sparse sparse_file0[] = {
503 		// 0             // 1024
504 		{ DATA,	 1024 }, { HOLE,   2048000 },
505 		// 2049024       // 2051072
506 		{ DATA,	 2048 }, { HOLE,   2048000 },
507 		// 4099072       // 4103168
508 		{ DATA,	 4096 }, { HOLE,  20480000 },
509 		// 24583168      // 24591360
510 		{ DATA,	 8192 }, { HOLE, 204800000 },
511 		// 229391360     // 229391361
512 		{ DATA,     1 }, { END,	0 }
513 	};
514 	const struct sparse sparse_file1[] = {
515 		{ HOLE,	409600 }, { DATA, 1 },
516 		{ HOLE,	409600 }, { DATA, 1 },
517 		{ HOLE,	409600 }, { END,  0 }
518 	};
519 	const struct sparse sparse_file2[] = {
520 		{ HOLE,	409600 * 1 }, { DATA, 1024 },
521 		{ HOLE,	409600 * 2 }, { DATA, 1024 },
522 		{ HOLE,	409600 * 3 }, { DATA, 1024 },
523 		{ HOLE,	409600 * 4 }, { DATA, 1024 },
524 		{ HOLE,	409600 * 5 }, { DATA, 1024 },
525 		{ HOLE,	409600 * 6 }, { DATA, 1024 },
526 		{ HOLE,	409600 * 7 }, { DATA, 1024 },
527 		{ HOLE,	409600 * 8 }, { DATA, 1024 },
528 		{ HOLE,	409600 * 9 }, { DATA, 1024 },
529 		{ HOLE,	409600 * 10}, { DATA, 1024 },/* 10 */
530 		{ HOLE,	409600 * 1 }, { DATA, 1024 * 1 },
531 		{ HOLE,	409600 * 2 }, { DATA, 1024 * 2 },
532 		{ HOLE,	409600 * 3 }, { DATA, 1024 * 3 },
533 		{ HOLE,	409600 * 4 }, { DATA, 1024 * 4 },
534 		{ HOLE,	409600 * 5 }, { DATA, 1024 * 5 },
535 		{ HOLE,	409600 * 6 }, { DATA, 1024 * 6 },
536 		{ HOLE,	409600 * 7 }, { DATA, 1024 * 7 },
537 		{ HOLE,	409600 * 8 }, { DATA, 1024 * 8 },
538 		{ HOLE,	409600 * 9 }, { DATA, 1024 * 9 },
539 		{ HOLE,	409600 * 10}, { DATA, 1024 * 10},/* 20 */
540 		{ END,	0 }
541 	};
542 	const struct sparse sparse_file3[] = {
543  		/* This hole size is too small to create a sparse file */
544 		{ HOLE,	 1 }, { DATA, 10240 },
545 		{ HOLE,	 1 }, { DATA, 10240 },
546 		{ HOLE,	 1 }, { DATA, 10240 },
547 		{ END,	0 }
548 	};
549 
550 	/*
551 	 * Test for the case that sparse data indicates just the whole file
552 	 * data.
553 	 */
554 	test_sparse_whole_file_data();
555 
556 	/* Check if the filesystem where CWD on can
557 	 * report the number of the holes of a sparse file. */
558 #ifdef PATH_MAX
559 	cwd = getcwd(NULL, PATH_MAX);/* Solaris getcwd needs the size. */
560 #else
561 	cwd = getcwd(NULL, 0);
562 #endif
563 	if (!assert(cwd != NULL))
564 		return;
565 	if (!is_sparse_supported(cwd)) {
566 		free(cwd);
567 		skipping("This filesystem or platform do not support "
568 		    "the reporting of the holes of a sparse file through "
569 		    "API such as lseek(HOLE)");
570 		return;
571 	}
572 
573 	/*
574 	 * Get sparse data through directory traversals.
575 	 */
576 	assert((a = archive_read_disk_new()) != NULL);
577 
578 	verify_sparse_file(a, "file0", sparse_file0, 4);
579 	verify_sparse_file(a, "file1", sparse_file1, 3);
580 	verify_sparse_file(a, "file2", sparse_file2, 20);
581 	/* Encoded non sparse; expect a data block but no sparse entries. */
582 	verify_sparse_file(a, "file3", sparse_file3, 0);
583 
584 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
585 
586 	/*
587 	 * Get sparse data through archive_read_disk_entry_from_file().
588 	 */
589 	assert((a = archive_read_disk_new()) != NULL);
590 
591 	verify_sparse_file2(a, "file0", sparse_file0, 5, 0);
592 	verify_sparse_file2(a, "file0", sparse_file0, 5, 1);
593 
594 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
595 	free(cwd);
596 }
597 
598 DEFINE_TEST(test_fully_sparse_files)
599 {
600 	char *cwd;
601 	struct archive *a;
602 
603 	const struct sparse sparse_file[] = {
604 		{ HOLE, 409600 }, { END, 0 }
605 	};
606 	/* Check if the filesystem where CWD on can
607 	 * report the number of the holes of a sparse file. */
608 #ifdef PATH_MAX
609 	cwd = getcwd(NULL, PATH_MAX);/* Solaris getcwd needs the size. */
610 #else
611 	cwd = getcwd(NULL, 0);
612 #endif
613 	if (!assert(cwd != NULL))
614 		return;
615 	if (!is_sparse_supported(cwd)) {
616 		free(cwd);
617 		skipping("This filesystem or platform do not support "
618 		    "the reporting of the holes of a sparse file through "
619 		    "API such as lseek(HOLE)");
620 		return;
621 	}
622 
623 	assert((a = archive_read_disk_new()) != NULL);
624 
625 	/* Fully sparse files are encoded with a zero-length "data" block. */
626 	verify_sparse_file(a, "file0", sparse_file, 1);
627 
628 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
629 	free(cwd);
630 }
631