xref: /minix/minix/tests/test74.c (revision ebfedea0)
1 /* Test 74 - mmap functionality & regression test.
2  *
3  * This test tests some basic functionality of mmap, and also some
4  * cases that are quite complex for the system to handle.
5  *
6  * Memory pages are generally made available on demand. Memory copying
7  * is done by the kernel. As the kernel may encounter pagefaults in
8  * legitimate memory ranges (e.g. pages that aren't mapped; pages that
9  * are mapped RO as they are COW), it cooperates with VM to make the
10  * mappings and let the copy succeed transparently.
11  *
12  * With file-mapped ranges this can result in a deadlock, if care is
13  * not taken, as the copy might be request by VFS or an FS. This test
14  * triggers as many of these states as possible to ensure they are
15  * successful or (where appropriate) fail gracefully, i.e. without
16  * deadlock.
17  *
18  * To do this, system calls are done with source or target buffers with
19  * missing or readonly mappings, both anonymous and file-mapped. The
20  * cache is flushed before mmap() so that we know the mappings should
21  * not be present on mmap() time. Then e.g. a read() or write() is
22  * executed with that buffer as target. This triggers a FS copying
23  * to or from a missing range that it itself is needed to map in first.
24  * VFS detects this, requests VM to map in the pages, which does so with
25  * the help of another VFS thread and the FS, and then re-issues the
26  * request to the FS.
27  *
28  * Another case is the VFS itself does such a copy. This is actually
29  * unusual as filenames are already faulted in by the requesting process
30  * in libc by strlen(). select() allows such a case, however, so this
31  * is tested too. We are satisfied if the call completes.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/mman.h>
36 #include <sys/ioctl.h>
37 #include <sys/ioc_memory.h>
38 #include <sys/param.h>
39 #include <minix/paths.h>
40 #include <stdio.h>
41 #include <assert.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <unistd.h>
45 #include <fcntl.h>
46 #include <dirent.h>
47 
48 #include "common.h"
49 #include "testcache.h"
50 
51 int max_error = 0;	/* make all e()'s fatal */
52 
53 int
54 dowriteblock(int b, int blocksize, u32_t seed, char *data)
55 {
56 	u64_t offset;
57 	int fd;
58 
59 	get_fd_offset(b, blocksize, &offset, &fd);
60 
61 	if(pwrite(fd, data, blocksize, offset) < blocksize) {
62 		perror("pwrite");
63 		return -1;
64 	}
65 
66 	return blocksize;
67 }
68 
69 int
70 readblock(int b, int blocksize, u32_t seed, char *data)
71 {
72 	u64_t offset;
73 	int fd;
74 	char *mmapdata;
75 	int pread_first = random() % 2;
76 
77 	get_fd_offset(b, blocksize, &offset, &fd);
78 
79 	if(pread_first) {
80 		if(pread(fd, data, blocksize, offset) < blocksize) {
81 			perror("pread");
82 			return -1;
83 		}
84 	}
85 
86 	if((mmapdata = mmap(NULL, blocksize, PROT_READ, MAP_PRIVATE | MAP_FILE,
87 		fd, offset)) == MAP_FAILED) {
88 		perror("mmap");
89 		return -1;
90 	}
91 
92 	if(!pread_first) {
93 		if(pread(fd, data, blocksize, offset) < blocksize) {
94 			perror("pread");
95 			return -1;
96 		}
97 	}
98 
99 	if(memcmp(mmapdata, data, blocksize)) {
100 		fprintf(stderr, "readblock: mmap, pread mismatch\n");
101 		return -1;
102 	}
103 
104 	if(munmap(mmapdata, blocksize) < 0) {
105 		perror("munmap");
106 		return -1;
107 	}
108 
109 	return blocksize;
110 }
111 
112 void testend(void) { }
113 
114 static void do_read(void *buf, int fd, int writable)
115 {
116 	ssize_t ret;
117 	size_t n = PAGE_SIZE;
118 	struct stat sb;
119 	if(fstat(fd, &sb) < 0) e(1);
120 	if(S_ISDIR(sb.st_mode)) return;
121 	ret = read(fd, buf, n);
122 
123 	/* if the buffer is writable, it should succeed */
124 	if(writable) { if(ret != n) e(3); return; }
125 
126 	/* if the buffer is not writable, it should fail with EFAULT */
127 	if(ret >= 0) e(4);
128 	if(errno != EFAULT) e(5);
129 }
130 
131 static void do_write(void *buf, int fd, int writable)
132 {
133 	size_t n = PAGE_SIZE;
134 	struct stat sb;
135 	if(fstat(fd, &sb) < 0) e(1);
136 	if(S_ISDIR(sb.st_mode)) return;
137 	if(write(fd, buf, n) != n) e(3);
138 }
139 
140 static void do_stat(void *buf, int fd, int writable)
141 {
142 	int r;
143 	r = fstat(fd, (struct stat *) buf);
144 
145 	/* should succeed if buf is writable */
146 	if(writable) { if(r < 0) e(3); return; }
147 
148 	/* should fail with EFAULT if buf is not */
149 	if(r >= 0) e(4);
150 	if(errno != EFAULT) e(5);
151 }
152 
153 static void do_getdents(void *buf, int fd, int writable)
154 {
155 	struct stat sb;
156 	int r;
157 	if(fstat(fd, &sb) < 0) e(1);
158 	if(!S_ISDIR(sb.st_mode)) return;	/* OK */
159 	r = getdents(fd, buf, PAGE_SIZE);
160 	if(writable) { if(r < 0) e(3); return; }
161 
162 	/* should fail with EFAULT if buf is not */
163 	if(r >= 0) e(4);
164 	if(errno != EFAULT) e(5);
165 }
166 
167 static void do_readlink1(void *buf, int fd, int writable)
168 {
169 	char target[200];
170 	/* the system call just has to fail gracefully */
171 	readlink(buf, target, sizeof(target));
172 }
173 
174 #define NODENAME	"a"
175 #define TARGETNAME	"b"
176 
177 static void do_readlink2(void *buf, int fd, int writable)
178 {
179 	ssize_t rl;
180 	unlink(NODENAME);
181 	if(symlink(TARGETNAME, NODENAME) < 0) e(1);
182 	rl=readlink(NODENAME, buf, sizeof(buf));
183 
184 	/* if buf is writable, it should succeed, with a certain result */
185 	if(writable) {
186 		if(rl < 0) e(2);
187 		((char *) buf)[rl] = '\0';
188 		if(strcmp(buf, TARGETNAME)) {
189 			fprintf(stderr, "readlink: expected %s, got %s\n",
190 				TARGETNAME, (char *)buf);
191 			e(3);
192 		}
193 		return;
194 	}
195 
196 	/* if buf is not writable, it should fail with EFAULT */
197 	if(rl >= 0) e(4);
198 
199 	if(errno != EFAULT) e(5);
200 }
201 
202 static void do_symlink1(void *buf, int fd, int writable)
203 {
204 	int r;
205 	/* the system call just has to fail gracefully */
206 	r = symlink(buf, NODENAME);
207 }
208 
209 static void do_symlink2(void *buf, int fd, int writable)
210 {
211 	int r;
212 	/* the system call just has to fail gracefully */
213 	r = symlink(NODENAME, buf);
214 }
215 
216 static void do_open(void *buf, int fd, int writable)
217 {
218 	int r;
219 	/* the system call just has to fail gracefully */
220 	r = open(buf, O_RDONLY);
221 	if(r >= 0) close(r);
222 }
223 
224 static void do_select1(void *buf, int fd, int writable)
225 {
226 	int r;
227 	struct timeval timeout = { 0, 200000 };	/* 0.2 sec */
228 	/* the system call just has to fail gracefully */
229 	r = select(1, buf, NULL, NULL, &timeout);
230 }
231 
232 static void do_select2(void *buf, int fd, int writable)
233 {
234 	int r;
235 	struct timeval timeout = { 0, 200000 };	/* 1 sec */
236 	/* the system call just has to fail gracefully */
237 	r = select(1, NULL, buf, NULL, &timeout);
238 }
239 
240 static void do_select3(void *buf, int fd, int writable)
241 {
242 	int r;
243 	struct timeval timeout = { 0, 200000 };	/* 1 sec */
244 	/* the system call just has to fail gracefully */
245 	r = select(1, NULL, NULL, buf, &timeout);
246 }
247 
248 static void fillfile(int fd, int size)
249 {
250 	char *buf = malloc(size);
251 
252 	if(size < 1 || size % PAGE_SIZE || !buf) { e(1); }
253 	memset(buf, 'A', size);
254 	buf[50] = '\0';	/* so it can be used as a filename arg */
255 	buf[size-1] = '\0';
256 	if(write(fd, buf, size) != size) { e(2); }
257 	if(lseek(fd, SEEK_SET, 0) < 0) { e(3); }
258 	free(buf);
259 }
260 
261 static void make_buffers(int size,
262 	int *ret_fd_rw, int *ret_fd_ro,
263 	void **filebuf_rw, void **filebuf_ro, void **anonbuf)
264 {
265 	char fn_rw[] = "testfile_rw.XXXXXX", fn_ro[] = "testfile_ro.XXXXXX";
266 	*ret_fd_rw = mkstemp(fn_rw);
267 	*ret_fd_ro = mkstemp(fn_ro);
268 
269 	if(size < 1 || size % PAGE_SIZE) { e(2); }
270 	if(*ret_fd_rw < 0) { e(1); }
271 	if(*ret_fd_ro < 0) { e(1); }
272 	fillfile(*ret_fd_rw, size);
273 	fillfile(*ret_fd_ro, size);
274 	if(fcntl(*ret_fd_rw, F_FLUSH_FS_CACHE) < 0) { e(4); }
275 	if(fcntl(*ret_fd_ro, F_FLUSH_FS_CACHE) < 0) { e(4); }
276 
277 	if((*filebuf_rw = mmap(0, size, PROT_READ | PROT_WRITE,
278 		MAP_PRIVATE | MAP_FILE, *ret_fd_rw, 0)) == MAP_FAILED) {
279 		e(5);
280 		quit();
281 	}
282 
283 	if((*filebuf_ro = mmap(0, size, PROT_READ,
284 		MAP_PRIVATE | MAP_FILE, *ret_fd_ro, 0)) == MAP_FAILED) {
285 		e(5);
286 		quit();
287 	}
288 
289 	if((*anonbuf = mmap(0, size, PROT_READ | PROT_WRITE,
290 		MAP_PRIVATE | MAP_ANON, -1, 0)) == MAP_FAILED) {
291 		e(6);
292 		quit();
293 	}
294 
295 	if(unlink(fn_rw) < 0) { e(12); }
296 	if(unlink(fn_ro) < 0) { e(12); }
297 }
298 
299 static void forget_buffers(void *buf1, void *buf2, void *buf3, int fd1, int fd2, int size)
300 {
301 	if(munmap(buf1, size) < 0) { e(1); }
302 	if(munmap(buf2, size) < 0) { e(2); }
303 	if(munmap(buf3, size) < 0) { e(2); }
304 	if(fcntl(fd1, F_FLUSH_FS_CACHE) < 0) { e(3); }
305 	if(fcntl(fd2, F_FLUSH_FS_CACHE) < 0) { e(3); }
306 	if(close(fd1) < 0) { e(4); }
307 	if(close(fd2) < 0) { e(4); }
308 }
309 
310 #define NEXPERIMENTS 12
311 struct {
312 	void (*do_operation)(void * buf, int fd, int writable);
313 } experiments[NEXPERIMENTS] = {
314 	{ do_read },
315 	{ do_write },
316 	{ do_stat },
317 	{ do_getdents },
318 	{ do_readlink1 },
319 	{ do_readlink2 },
320 	{ do_symlink1 },
321 	{ do_symlink2 },
322 	{ do_open, },
323 	{ do_select1 },
324 	{ do_select2 },
325 	{ do_select3 },
326 };
327 
328 static void test_memory_types_vs_operations(void)
329 {
330 #define NFDS 4
331 #define BUFSIZE (10 * PAGE_SIZE)
332 	int exp, fds[NFDS];
333 	int f = 0, size = BUFSIZE;
334 
335 	/* open some test fd's */
336 #define OPEN(fn, mode) { assert(f >= 0 && f < NFDS); \
337 	fds[f] = open(fn, mode); if(fds[f] < 0) { e(2); } f++; }
338 	OPEN("regular", O_RDWR | O_CREAT);
339 	OPEN(".", O_RDONLY);
340 	OPEN("/dev/ram", O_RDWR);
341 	OPEN("/dev/zero", O_RDWR);
342 
343 	/* make sure the regular file has plenty of size to play with */
344 	fillfile(fds[0], BUFSIZE);
345 
346 	/* and the ramdisk too */
347         if(ioctl(fds[2], MIOCRAMSIZE, &size) < 0) { e(3); }
348 
349 	for(exp = 0; exp < NEXPERIMENTS; exp++) {
350 		for(f = 0; f < NFDS; f++) {
351 			void *anonmem, *filemem_rw, *filemem_ro;
352 			int buffd_rw, buffd_ro;
353 
354 			make_buffers(BUFSIZE, &buffd_rw, &buffd_ro,
355 				&filemem_rw, &filemem_ro, &anonmem);
356 
357 			if(lseek(fds[f], 0, SEEK_SET) != 0) { e(10); }
358 			experiments[exp].do_operation(anonmem, fds[f], 1);
359 
360 			if(lseek(fds[f], 0, SEEK_SET) != 0) { e(11); }
361 			experiments[exp].do_operation(filemem_rw, fds[f], 1);
362 
363 			if(lseek(fds[f], 0, SEEK_SET) != 0) { e(12); }
364 			experiments[exp].do_operation(filemem_ro, fds[f], 0);
365 
366 			forget_buffers(filemem_rw, filemem_ro, anonmem, buffd_rw, buffd_ro, BUFSIZE);
367 		}
368 	}
369 }
370 
371 static void basic_regression(void)
372 {
373 	int fd, fd1, fd2;
374 	ssize_t rb, wr;
375 	char buf[PAGE_SIZE*2];
376 	void *block, *block1, *block2;
377 #define BLOCKSIZE (PAGE_SIZE*10)
378 	block = mmap(0, BLOCKSIZE, PROT_READ | PROT_WRITE,
379 		MAP_PRIVATE | MAP_ANON, -1, 0);
380 
381 	if(block == MAP_FAILED) { e(1); }
382 
383 	memset(block, 0, BLOCKSIZE);
384 
385 	/* shrink from bottom */
386 	munmap(block, PAGE_SIZE);
387 
388 	/* Next test: use a system call write() to access a block of
389 	 * unavailable file-mapped memory.
390 	 *
391 	 * This is a thorny corner case to make succeed transparently
392 	 * because
393 	 *   (1) it is a filesystem that is doing the memory access
394 	 *       (copy from the constblock1 range in this process to the
395 	 *       FS) but is also the FS needed to satisfy the range if it
396 	 *       isn't in the cache.
397 	 *   (2) there are two separate memory regions involved, requiring
398 	 *       separate VFS requests from VM to properly satisfy, requiring
399 	 *       some complex state to be kept.
400 	 */
401 
402 	fd1 = open("../testsh1", O_RDONLY);
403 	fd2 = open("../testsh2", O_RDONLY);
404 	if(fd1 < 0 || fd2 < 0) { e(2); }
405 
406 	/* just check that we can't mmap() a file writable */
407 	if(mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FILE, fd1, 0) != MAP_FAILED) {
408 		e(1);
409 	}
410 
411 	/* check that we can mmap() a file MAP_SHARED readonly */
412 	if(mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_FILE, fd1, 0) == MAP_FAILED) {
413 		e(1);
414 	}
415 
416 	/* clear cache of files before mmap so pages won't be present already */
417 	if(fcntl(fd1, F_FLUSH_FS_CACHE) < 0) { e(1); }
418 	if(fcntl(fd2, F_FLUSH_FS_CACHE) < 0) { e(1); }
419 
420 #define LOCATION1 (void *) 0x90000000
421 #define LOCATION2 ((void *)((char *)LOCATION1 + PAGE_SIZE))
422 	block1 = mmap(LOCATION1, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_FILE, fd1, 0);
423 	if(block1 == MAP_FAILED) { e(4); }
424 	if(block1 != LOCATION1) { e(5); }
425 
426 	block2 = mmap(LOCATION2, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_FILE, fd2, 0);
427 	if(block2 == MAP_FAILED) { e(10); }
428 	if(block2 != LOCATION2) { e(11); }
429 
430 	unlink("testfile");
431 	fd = open("testfile", O_CREAT | O_RDWR);
432 	if(fd < 0) { e(15); }
433 
434 	/* write() using the mmap()ped memory as buffer */
435 
436 	if((wr=write(fd, LOCATION1, sizeof(buf))) != sizeof(buf)) {
437 		fprintf(stderr, "wrote %zd bytes instead of %zd\n",
438 			wr, sizeof(buf));
439 		e(20);
440 		quit();
441 	}
442 
443 	/* verify written contents */
444 
445 	if((rb=pread(fd, buf, sizeof(buf), 0)) != sizeof(buf)) {
446 		if(rb < 0) perror("pread");
447 		fprintf(stderr, "wrote %zd bytes\n", wr);
448 		fprintf(stderr, "read %zd bytes instead of %zd\n",
449 			rb, sizeof(buf));
450 		e(21);
451 		quit();
452 	}
453 
454 	if(memcmp(buf, LOCATION1, sizeof(buf))) {
455 		e(22);
456 		quit();
457 	}
458 
459 	close(fd);
460 	close(fd1);
461 	close(fd2);
462 
463 }
464 
465 /*
466  * Test mmap on none-dev file systems - file systems that do not have a buffer
467  * cache and therefore have to fake mmap support.  We use procfs as target.
468  * The idea is that while we succeed in mapping in /proc/uptime, we also get
469  * a new uptime value every time we map in the page -- VM must not cache it.
470  */
471 static void
472 nonedev_regression(void)
473 {
474 	int fd, fd2;
475 	char *buf;
476 	unsigned long uptime1, uptime2, uptime3;
477 
478 	subtest++;
479 
480 	if ((fd = open(_PATH_PROC "uptime", O_RDONLY)) < 0) e(1);
481 
482 	buf = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
483 	if (buf == MAP_FAILED) e(2);
484 
485 	if (buf[4095] != 0) e(3);
486 
487 	if ((uptime1 = atoi(buf)) == 0) e(4);
488 
489 	if (munmap(buf, 4096) != 0) e(5);
490 
491 	sleep(2);
492 
493 	buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FILE,
494 	    fd, 0);
495 	if (buf == MAP_FAILED) e(6);
496 
497 	if (buf[4095] != 0) e(7);
498 
499 	if ((uptime2 = atoi(buf)) == 0) e(8);
500 
501 	if (uptime1 == uptime2) e(9);
502 
503 	if (munmap(buf, 4096) != 0) e(10);
504 
505 	sleep(2);
506 
507 	buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0);
508 	if (buf == MAP_FAILED) e(11);
509 
510 	if (buf[4095] != 0) e(12);
511 
512 	if ((uptime3 = atoi(buf)) == 0) e(13);
513 
514 	if (uptime1 == uptime3) e(14);
515 	if (uptime2 == uptime3) e(15);
516 
517 	if (munmap(buf, 4096) != 0) e(16);
518 
519 	/* Also test page faults not incurred by the process itself. */
520 	if ((fd2 = open("testfile", O_CREAT | O_TRUNC | O_WRONLY)) < 0) e(17);
521 
522 	if (unlink("testfile") != 0) e(18);
523 
524 	buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0);
525 	if (buf == MAP_FAILED) e(19);
526 
527 	if (write(fd2, buf, 10) != 10) e(20);
528 
529 	if (munmap(buf, 4096) != 0) e(21);
530 
531 	close(fd2);
532 	close(fd);
533 }
534 
535 /*
536  * Regression test for a nasty memory-mapped file corruption bug, which is not
537  * easy to reproduce but, before being solved, did occur in practice every once
538  * in a while.  The executive summary is that through stale inode associations,
539  * VM could end up using an old block to satisfy a memory mapping.
540  *
541  * This subtest relies on a number of assumptions regarding allocation and
542  * reuse of inode numbers and blocks.  These assumptions hold for MFS but
543  * possibly no other file system.  However, if the subtest's assumptions are
544  * not met, it will simply succeed.
545  */
546 static void
547 corruption_regression(void)
548 {
549 	char *ptr, *buf;
550 	struct statvfs sf;
551 	struct stat st;
552 	size_t block_size;
553 	off_t size;
554 	int fd, fd2;
555 
556 	subtest = 1;
557 
558 	if (statvfs(".", &sf) != 0) e(0);
559 	block_size = sf.f_bsize;
560 
561 	if ((buf = malloc(block_size * 2)) == NULL) e(0);
562 
563 	/*
564 	 * We first need a file that is just large enough that it requires the
565 	 * allocation of a metadata block - an indirect block - when more data
566 	 * is written to it.  This is fileA.  We keep it open throughout the
567 	 * test so we can unlink it immediately.
568 	 */
569 	if ((fd = open("fileA", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
570 		e(0);
571 	if (unlink("fileA") != 0) e(0);
572 
573 	/*
574 	 * Write to fileA until its next block requires the allocation of an
575 	 * additional metadata block - an indirect block.
576 	 */
577 	size = 0;
578 	memset(buf, 'A', block_size);
579 	do {
580 		/*
581 		 * Repeatedly write an extra block, until the file consists of
582 		 * more blocks than just the file data.
583 		 */
584 		if (write(fd, buf, block_size) != block_size) e(0);
585 		size += block_size;
586 		if (size >= block_size * 64) {
587 			/*
588 			 * It doesn't look like this is going to work.
589 			 * Skip this subtest altogether.
590 			 */
591 			if (close(fd) != 0) e(0);
592 			free(buf);
593 
594 			return;
595 		}
596 		if (fstat(fd, &st) != 0) e(0);
597 	} while (st.st_blocks * 512 == size);
598 
599 	/* Once we get there, go one step back by truncating by one block. */
600 	size -= block_size; /* for MFS, size will end up being 7*block_size */
601 	if (ftruncate(fd, size) != 0) e(0);
602 
603 	/*
604 	 * Create a first file, fileB, and write two blocks to it.  FileB's
605 	 * blocks are going to end up in the secondary VM cache, associated to
606 	 * fileB's inode number (and two different offsets within the file).
607 	 * The block cache does not know about files getting deleted, so we can
608 	 * unlink fileB immediately after creating it.  So far so good.
609 	 */
610 	if ((fd2 = open("fileB", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
611 		e(0);
612 	if (unlink("fileB") != 0) e(0);
613 	memset(buf, 'B', block_size * 2);
614 	if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
615 	if (close(fd2) != 0) e(0);
616 
617 	/*
618 	 * Write one extra block to fileA, hoping that this causes allocation
619 	 * of a metadata block as well.  This is why we tried to get fileA to
620 	 * the point that one more block would also require the allocation of a
621 	 * metadata block.  Our intent is to recycle the blocks that we just
622 	 * allocated and freed for fileB.  As of writing, for the metadata
623 	 * block, this will *not* break the association with fileB's inode,
624 	 * which by itself is not a problem, yet crucial to reproducing
625 	 * the actual problem a bit later.  Note that the test does not rely on
626 	 * whether the file system allocates the data block or the metadata
627 	 * block first, although it does need reverse deallocation (see below).
628 	 */
629 	memset(buf, 'A', block_size);
630 	if (write(fd, buf, block_size) != block_size) e(0);
631 
632 	/*
633 	 * Create a new file, fileC, which recycles the inode number of fileB,
634 	 * but uses two new blocks to store its data.  These new blocks will
635 	 * get associated to the fileB inode number, and one of them will
636 	 * thereby eclipse (but not remove) the association of fileA's metadata
637 	 * block to the inode of fileB.
638 	 */
639 	if ((fd2 = open("fileC", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
640 		e(0);
641 	if (unlink("fileC") != 0) e(0);
642 	memset(buf, 'C', block_size * 2);
643 	if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
644 	if (close(fd2) != 0) e(0);
645 
646 	/*
647 	 * Free up the extra fileA blocks for reallocation, in particular
648 	 * including the metadata block.  Again, this will not affect the
649 	 * contents of the VM cache in any way.  FileA's metadata block remains
650 	 * cached in VM, with the inode association for fileB's block.
651 	 */
652 	if (ftruncate(fd, size) != 0) e(0);
653 
654 	/*
655 	 * Now create yet one more file, fileD, which also recycles the inode
656 	 * number of fileB and fileC.  Write two blocks to it; these blocks
657 	 * should recycle the blocks we just freed.  One of these is fileA's
658 	 * just-freed metadata block, for which the new inode association will
659 	 * be equal to the inode association it had already (as long as blocks
660 	 * are freed in reverse order of their allocation, which happens to be
661 	 * the case for MFS).  As a result, the block is not updated in the VM
662 	 * cache, and VM will therefore continue to see the inode association
663 	 * for the corresponding block of fileC which is still in the VM cache.
664 	 */
665 	if ((fd2 = open("fileD", O_CREAT | O_TRUNC | O_RDWR, 0600)) == -1)
666 		e(0);
667 	memset(buf, 'D', block_size * 2);
668 	if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
669 
670 	ptr = mmap(NULL, block_size * 2, PROT_READ, MAP_FILE, fd2, 0);
671 	if (ptr == MAP_FAILED) e(0);
672 
673 	/*
674 	 * Finally, we can test the issue.  Since fileC's block is still the
675 	 * block for which VM has the corresponding inode association, VM will
676 	 * now find and map in fileC's block, instead of fileD's block.  The
677 	 * result is that we get a memory-mapped area with stale contents,
678 	 * different from those of the underlying file.
679 	 */
680 	if (memcmp(buf, ptr, block_size * 2)) e(0);
681 
682 	/* Clean up. */
683 	if (munmap(ptr, block_size * 2) != 0) e(0);
684 
685 	if (close(fd2) != 0) e(0);
686 	if (unlink("fileD") != 0) e(0);
687 
688 	if (close(fd) != 0) e(0);
689 
690 	free(buf);
691 }
692 
693 /*
694  * Test mmap on file holes.  Holes are a tricky case with the current VM
695  * implementation.  There are two main issues.  First, whenever a file data
696  * block is freed, VM has to know about this, or it will later blindly map in
697  * the old data.  This, file systems explicitly tell VM (through libminixfs)
698  * whenever a block is freed, upon which VM cache forgets the block.  Second,
699  * blocks are accessed primarily by a <dev,dev_off> pair and only additionally
700  * by a <ino,ino_off> pair.  Holes have no meaningful value for the first pair,
701  * but do need to be registered in VM with the second pair, or accessing them
702  * will generate a segmentation fault.  Thus, file systems explicitly tell VM
703  * (through libminixfs) when a hole is being peeked; libminixfs currently fakes
704  * a device offset to make this work.
705  */
706 static void
707 hole_regression(void)
708 {
709 	struct statvfs st;
710 	size_t block_size;
711 	char *buf;
712 	int fd;
713 
714 	if (statvfs(".", &st) < 0) e(1);
715 
716 	block_size = st.f_bsize;
717 
718 	if ((buf = malloc(block_size)) == NULL) e(2);
719 
720 	if ((fd = open("testfile", O_CREAT | O_TRUNC | O_RDWR)) < 0) e(3);
721 
722 	if (unlink("testfile") != 0) e(4);
723 
724 	/*
725 	 * We perform the test twice, in a not-so-perfect attempt to test the
726 	 * two aspects independently.  The first part immediately creates a
727 	 * hole, and is supposed to fail only if reporting holes to VM does not
728 	 * work.  However, it may also fail if a page for a previous file with
729 	 * the same inode number as "testfile" is still in the VM cache.
730 	 */
731 	memset(buf, 12, block_size);
732 
733 	if (write(fd, buf, block_size) != block_size) e(5);
734 
735 	if (lseek(fd, block_size * 2, SEEK_CUR) != block_size * 3) e(6);
736 
737 	memset(buf, 78, block_size);
738 
739 	if (write(fd, buf, block_size) != block_size) e(7);
740 
741 	free(buf);
742 
743 	if ((buf = mmap(NULL, 4 * block_size, PROT_READ, MAP_SHARED | MAP_FILE,
744 	    fd, 0)) == MAP_FAILED) e(8);
745 
746 	if (buf[0 * block_size] != 12 || buf[1 * block_size - 1] != 12) e(9);
747 	if (buf[1 * block_size] !=  0 || buf[2 * block_size - 1] !=  0) e(10);
748 	if (buf[2 * block_size] !=  0 || buf[3 * block_size - 1] !=  0) e(11);
749 	if (buf[3 * block_size] != 78 || buf[4 * block_size - 1] != 78) e(12);
750 
751 	if (munmap(buf, 4 * block_size) != 0) e(13);
752 
753 	/*
754 	 * The second part first creates file content and only turns part of it
755 	 * into a file hole, thus ensuring that VM has previously cached pages
756 	 * for the blocks that are freed.  The test will fail if VM keeps the
757 	 * pages around in its cache.
758 	 */
759 	if ((buf = malloc(block_size)) == NULL) e(14);
760 
761 	if (lseek(fd, block_size, SEEK_SET) != block_size) e(15);
762 
763 	memset(buf, 34, block_size);
764 
765 	if (write(fd, buf, block_size) != block_size) e(16);
766 
767 	memset(buf, 56, block_size);
768 
769 	if (write(fd, buf, block_size) != block_size) e(17);
770 
771 	if (ftruncate(fd, block_size) != 0) e(18);
772 
773 	if (lseek(fd, block_size * 3, SEEK_SET) != block_size * 3) e(19);
774 
775 	memset(buf, 78, block_size);
776 
777 	if (write(fd, buf, block_size) != block_size) e(20);
778 
779 	free(buf);
780 
781 	if ((buf = mmap(NULL, 4 * block_size, PROT_READ, MAP_SHARED | MAP_FILE,
782 	    fd, 0)) == MAP_FAILED) e(21);
783 
784 	if (buf[0 * block_size] != 12 || buf[1 * block_size - 1] != 12) e(22);
785 	if (buf[1 * block_size] !=  0 || buf[2 * block_size - 1] !=  0) e(23);
786 	if (buf[2 * block_size] !=  0 || buf[3 * block_size - 1] !=  0) e(24);
787 	if (buf[3 * block_size] != 78 || buf[4 * block_size - 1] != 78) e(25);
788 
789 	if (munmap(buf, 4 * block_size) != 0) e(26);
790 
791 	close(fd);
792 }
793 
794 int
795 main(int argc, char *argv[])
796 {
797 	int i, iter = 2;
798 
799 	start(74);
800 
801 	basic_regression();
802 
803 	nonedev_regression();
804 
805 	/*
806 	 * Any inode or block allocation happening concurrently with this
807 	 * subtest will make the subtest succeed without testing the actual
808 	 * issue.  Thus, repeat the subtest a fair number of times.
809 	 */
810 	for (i = 0; i < 10; i++)
811 		corruption_regression();
812 
813 	hole_regression();
814 
815 	test_memory_types_vs_operations();
816 
817 	makefiles(MAXFILES);
818 
819 	cachequiet(!bigflag);
820 	if(bigflag) iter = 3;
821 
822 	/* Try various combinations working set sizes
823 	 * and block sizes in order to specifically
824 	 * target the primary cache, then primary+secondary
825 	 * cache, then primary+secondary cache+secondary
826 	 * cache eviction.
827 	 */
828 
829 	if(dotest(PAGE_SIZE,    100, iter)) e(5);
830 	if(dotest(PAGE_SIZE*2,  100, iter)) e(2);
831 	if(dotest(PAGE_SIZE*3,  100, iter)) e(3);
832 	if(dotest(PAGE_SIZE,  20000, iter)) e(5);
833 
834 	if(bigflag) {
835 		u32_t totalmem, freemem, cachedmem;
836 		if(dotest(PAGE_SIZE,  150000, iter)) e(5);
837 		getmem(&totalmem, &freemem, &cachedmem);
838 		if(dotest(PAGE_SIZE,  totalmem*1.5, iter)) e(6);
839 	}
840 
841 	quit();
842 
843 	return 0;
844 }
845 
846