1 /* Test 74 - mmap functionality & regression test. 2 * 3 * This test tests some basic functionality of mmap, and also some 4 * cases that are quite complex for the system to handle. 5 * 6 * Memory pages are generally made available on demand. Memory copying 7 * is done by the kernel. As the kernel may encounter pagefaults in 8 * legitimate memory ranges (e.g. pages that aren't mapped; pages that 9 * are mapped RO as they are COW), it cooperates with VM to make the 10 * mappings and let the copy succeed transparently. 11 * 12 * With file-mapped ranges this can result in a deadlock, if care is 13 * not taken, as the copy might be request by VFS or an FS. This test 14 * triggers as many of these states as possible to ensure they are 15 * successful or (where appropriate) fail gracefully, i.e. without 16 * deadlock. 17 * 18 * To do this, system calls are done with source or target buffers with 19 * missing or readonly mappings, both anonymous and file-mapped. The 20 * cache is flushed before mmap() so that we know the mappings should 21 * not be present on mmap() time. Then e.g. a read() or write() is 22 * executed with that buffer as target. This triggers a FS copying 23 * to or from a missing range that it itself is needed to map in first. 24 * VFS detects this, requests VM to map in the pages, which does so with 25 * the help of another VFS thread and the FS, and then re-issues the 26 * request to the FS. 27 * 28 * Another case is the VFS itself does such a copy. This is actually 29 * unusual as filenames are already faulted in by the requesting process 30 * in libc by strlen(). select() allows such a case, however, so this 31 * is tested too. We are satisfied if the call completes. 32 */ 33 34 #include <sys/types.h> 35 #include <sys/mman.h> 36 #include <sys/ioctl.h> 37 #include <sys/ioc_memory.h> 38 #include <sys/param.h> 39 #include <minix/paths.h> 40 #include <stdio.h> 41 #include <assert.h> 42 #include <string.h> 43 #include <stdlib.h> 44 #include <unistd.h> 45 #include <fcntl.h> 46 #include <dirent.h> 47 48 #include "common.h" 49 #include "testcache.h" 50 51 int max_error = 0; /* make all e()'s fatal */ 52 53 int 54 dowriteblock(int b, int blocksize, u32_t seed, char *data) 55 { 56 u64_t offset; 57 int fd; 58 59 get_fd_offset(b, blocksize, &offset, &fd); 60 61 if(pwrite(fd, data, blocksize, offset) < blocksize) { 62 perror("pwrite"); 63 return -1; 64 } 65 66 return blocksize; 67 } 68 69 int 70 readblock(int b, int blocksize, u32_t seed, char *data) 71 { 72 u64_t offset; 73 int fd; 74 char *mmapdata; 75 int pread_first = random() % 2; 76 77 get_fd_offset(b, blocksize, &offset, &fd); 78 79 if(pread_first) { 80 if(pread(fd, data, blocksize, offset) < blocksize) { 81 perror("pread"); 82 return -1; 83 } 84 } 85 86 if((mmapdata = mmap(NULL, blocksize, PROT_READ, MAP_PRIVATE | MAP_FILE, 87 fd, offset)) == MAP_FAILED) { 88 perror("mmap"); 89 return -1; 90 } 91 92 if(!pread_first) { 93 if(pread(fd, data, blocksize, offset) < blocksize) { 94 perror("pread"); 95 return -1; 96 } 97 } 98 99 if(memcmp(mmapdata, data, blocksize)) { 100 fprintf(stderr, "readblock: mmap, pread mismatch\n"); 101 return -1; 102 } 103 104 if(munmap(mmapdata, blocksize) < 0) { 105 perror("munmap"); 106 return -1; 107 } 108 109 return blocksize; 110 } 111 112 void testend(void) { } 113 114 static void do_read(void *buf, int fd, int writable) 115 { 116 ssize_t ret; 117 size_t n = PAGE_SIZE; 118 struct stat sb; 119 if(fstat(fd, &sb) < 0) e(1); 120 if(S_ISDIR(sb.st_mode)) return; 121 ret = read(fd, buf, n); 122 123 /* if the buffer is writable, it should succeed */ 124 if(writable) { if(ret != n) e(3); return; } 125 126 /* if the buffer is not writable, it should fail with EFAULT */ 127 if(ret >= 0) e(4); 128 if(errno != EFAULT) e(5); 129 } 130 131 static void do_write(void *buf, int fd, int writable) 132 { 133 size_t n = PAGE_SIZE; 134 struct stat sb; 135 if(fstat(fd, &sb) < 0) e(1); 136 if(S_ISDIR(sb.st_mode)) return; 137 if(write(fd, buf, n) != n) e(3); 138 } 139 140 static void do_stat(void *buf, int fd, int writable) 141 { 142 int r; 143 r = fstat(fd, (struct stat *) buf); 144 145 /* should succeed if buf is writable */ 146 if(writable) { if(r < 0) e(3); return; } 147 148 /* should fail with EFAULT if buf is not */ 149 if(r >= 0) e(4); 150 if(errno != EFAULT) e(5); 151 } 152 153 static void do_getdents(void *buf, int fd, int writable) 154 { 155 struct stat sb; 156 int r; 157 if(fstat(fd, &sb) < 0) e(1); 158 if(!S_ISDIR(sb.st_mode)) return; /* OK */ 159 r = getdents(fd, buf, PAGE_SIZE); 160 if(writable) { if(r < 0) e(3); return; } 161 162 /* should fail with EFAULT if buf is not */ 163 if(r >= 0) e(4); 164 if(errno != EFAULT) e(5); 165 } 166 167 static void do_readlink1(void *buf, int fd, int writable) 168 { 169 char target[200]; 170 /* the system call just has to fail gracefully */ 171 readlink(buf, target, sizeof(target)); 172 } 173 174 #define NODENAME "a" 175 #define TARGETNAME "b" 176 177 static void do_readlink2(void *buf, int fd, int writable) 178 { 179 ssize_t rl; 180 unlink(NODENAME); 181 if(symlink(TARGETNAME, NODENAME) < 0) e(1); 182 rl=readlink(NODENAME, buf, sizeof(buf)); 183 184 /* if buf is writable, it should succeed, with a certain result */ 185 if(writable) { 186 if(rl < 0) e(2); 187 ((char *) buf)[rl] = '\0'; 188 if(strcmp(buf, TARGETNAME)) { 189 fprintf(stderr, "readlink: expected %s, got %s\n", 190 TARGETNAME, (char *)buf); 191 e(3); 192 } 193 return; 194 } 195 196 /* if buf is not writable, it should fail with EFAULT */ 197 if(rl >= 0) e(4); 198 199 if(errno != EFAULT) e(5); 200 } 201 202 static void do_symlink1(void *buf, int fd, int writable) 203 { 204 int r; 205 /* the system call just has to fail gracefully */ 206 r = symlink(buf, NODENAME); 207 } 208 209 static void do_symlink2(void *buf, int fd, int writable) 210 { 211 int r; 212 /* the system call just has to fail gracefully */ 213 r = symlink(NODENAME, buf); 214 } 215 216 static void do_open(void *buf, int fd, int writable) 217 { 218 int r; 219 /* the system call just has to fail gracefully */ 220 r = open(buf, O_RDONLY); 221 if(r >= 0) close(r); 222 } 223 224 static void do_select1(void *buf, int fd, int writable) 225 { 226 int r; 227 struct timeval timeout = { 0, 200000 }; /* 0.2 sec */ 228 /* the system call just has to fail gracefully */ 229 r = select(1, buf, NULL, NULL, &timeout); 230 } 231 232 static void do_select2(void *buf, int fd, int writable) 233 { 234 int r; 235 struct timeval timeout = { 0, 200000 }; /* 1 sec */ 236 /* the system call just has to fail gracefully */ 237 r = select(1, NULL, buf, NULL, &timeout); 238 } 239 240 static void do_select3(void *buf, int fd, int writable) 241 { 242 int r; 243 struct timeval timeout = { 0, 200000 }; /* 1 sec */ 244 /* the system call just has to fail gracefully */ 245 r = select(1, NULL, NULL, buf, &timeout); 246 } 247 248 static void fillfile(int fd, int size) 249 { 250 char *buf = malloc(size); 251 252 if(size < 1 || size % PAGE_SIZE || !buf) { e(1); } 253 memset(buf, 'A', size); 254 buf[50] = '\0'; /* so it can be used as a filename arg */ 255 buf[size-1] = '\0'; 256 if(write(fd, buf, size) != size) { e(2); } 257 if(lseek(fd, SEEK_SET, 0) < 0) { e(3); } 258 free(buf); 259 } 260 261 static void make_buffers(int size, 262 int *ret_fd_rw, int *ret_fd_ro, 263 void **filebuf_rw, void **filebuf_ro, void **anonbuf) 264 { 265 char fn_rw[] = "testfile_rw.XXXXXX", fn_ro[] = "testfile_ro.XXXXXX"; 266 *ret_fd_rw = mkstemp(fn_rw); 267 *ret_fd_ro = mkstemp(fn_ro); 268 269 if(size < 1 || size % PAGE_SIZE) { e(2); } 270 if(*ret_fd_rw < 0) { e(1); } 271 if(*ret_fd_ro < 0) { e(1); } 272 fillfile(*ret_fd_rw, size); 273 fillfile(*ret_fd_ro, size); 274 if(fcntl(*ret_fd_rw, F_FLUSH_FS_CACHE) < 0) { e(4); } 275 if(fcntl(*ret_fd_ro, F_FLUSH_FS_CACHE) < 0) { e(4); } 276 277 if((*filebuf_rw = mmap(0, size, PROT_READ | PROT_WRITE, 278 MAP_PRIVATE | MAP_FILE, *ret_fd_rw, 0)) == MAP_FAILED) { 279 e(5); 280 quit(); 281 } 282 283 if((*filebuf_ro = mmap(0, size, PROT_READ, 284 MAP_PRIVATE | MAP_FILE, *ret_fd_ro, 0)) == MAP_FAILED) { 285 e(5); 286 quit(); 287 } 288 289 if((*anonbuf = mmap(0, size, PROT_READ | PROT_WRITE, 290 MAP_PRIVATE | MAP_ANON, -1, 0)) == MAP_FAILED) { 291 e(6); 292 quit(); 293 } 294 295 if(unlink(fn_rw) < 0) { e(12); } 296 if(unlink(fn_ro) < 0) { e(12); } 297 } 298 299 static void forget_buffers(void *buf1, void *buf2, void *buf3, int fd1, int fd2, int size) 300 { 301 if(munmap(buf1, size) < 0) { e(1); } 302 if(munmap(buf2, size) < 0) { e(2); } 303 if(munmap(buf3, size) < 0) { e(2); } 304 if(fcntl(fd1, F_FLUSH_FS_CACHE) < 0) { e(3); } 305 if(fcntl(fd2, F_FLUSH_FS_CACHE) < 0) { e(3); } 306 if(close(fd1) < 0) { e(4); } 307 if(close(fd2) < 0) { e(4); } 308 } 309 310 #define NEXPERIMENTS 12 311 struct { 312 void (*do_operation)(void * buf, int fd, int writable); 313 } experiments[NEXPERIMENTS] = { 314 { do_read }, 315 { do_write }, 316 { do_stat }, 317 { do_getdents }, 318 { do_readlink1 }, 319 { do_readlink2 }, 320 { do_symlink1 }, 321 { do_symlink2 }, 322 { do_open, }, 323 { do_select1 }, 324 { do_select2 }, 325 { do_select3 }, 326 }; 327 328 static void test_memory_types_vs_operations(void) 329 { 330 #define NFDS 4 331 #define BUFSIZE (10 * PAGE_SIZE) 332 int exp, fds[NFDS]; 333 int f = 0, size = BUFSIZE; 334 335 /* open some test fd's */ 336 #define OPEN(fn, mode) { assert(f >= 0 && f < NFDS); \ 337 fds[f] = open(fn, mode); if(fds[f] < 0) { e(2); } f++; } 338 OPEN("regular", O_RDWR | O_CREAT); 339 OPEN(".", O_RDONLY); 340 OPEN("/dev/ram", O_RDWR); 341 OPEN("/dev/zero", O_RDWR); 342 343 /* make sure the regular file has plenty of size to play with */ 344 fillfile(fds[0], BUFSIZE); 345 346 /* and the ramdisk too */ 347 if(ioctl(fds[2], MIOCRAMSIZE, &size) < 0) { e(3); } 348 349 for(exp = 0; exp < NEXPERIMENTS; exp++) { 350 for(f = 0; f < NFDS; f++) { 351 void *anonmem, *filemem_rw, *filemem_ro; 352 int buffd_rw, buffd_ro; 353 354 make_buffers(BUFSIZE, &buffd_rw, &buffd_ro, 355 &filemem_rw, &filemem_ro, &anonmem); 356 357 if(lseek(fds[f], 0, SEEK_SET) != 0) { e(10); } 358 experiments[exp].do_operation(anonmem, fds[f], 1); 359 360 if(lseek(fds[f], 0, SEEK_SET) != 0) { e(11); } 361 experiments[exp].do_operation(filemem_rw, fds[f], 1); 362 363 if(lseek(fds[f], 0, SEEK_SET) != 0) { e(12); } 364 experiments[exp].do_operation(filemem_ro, fds[f], 0); 365 366 forget_buffers(filemem_rw, filemem_ro, anonmem, buffd_rw, buffd_ro, BUFSIZE); 367 } 368 } 369 } 370 371 static void basic_regression(void) 372 { 373 int fd, fd1, fd2; 374 ssize_t rb, wr; 375 char buf[PAGE_SIZE*2]; 376 void *block, *block1, *block2; 377 #define BLOCKSIZE (PAGE_SIZE*10) 378 block = mmap(0, BLOCKSIZE, PROT_READ | PROT_WRITE, 379 MAP_PRIVATE | MAP_ANON, -1, 0); 380 381 if(block == MAP_FAILED) { e(1); } 382 383 memset(block, 0, BLOCKSIZE); 384 385 /* shrink from bottom */ 386 munmap(block, PAGE_SIZE); 387 388 /* Next test: use a system call write() to access a block of 389 * unavailable file-mapped memory. 390 * 391 * This is a thorny corner case to make succeed transparently 392 * because 393 * (1) it is a filesystem that is doing the memory access 394 * (copy from the constblock1 range in this process to the 395 * FS) but is also the FS needed to satisfy the range if it 396 * isn't in the cache. 397 * (2) there are two separate memory regions involved, requiring 398 * separate VFS requests from VM to properly satisfy, requiring 399 * some complex state to be kept. 400 */ 401 402 fd1 = open("../testsh1", O_RDONLY); 403 fd2 = open("../testsh2", O_RDONLY); 404 if(fd1 < 0 || fd2 < 0) { e(2); } 405 406 /* just check that we can't mmap() a file writable */ 407 if(mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FILE, fd1, 0) != MAP_FAILED) { 408 e(1); 409 } 410 411 /* check that we can mmap() a file MAP_SHARED readonly */ 412 if(mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_FILE, fd1, 0) == MAP_FAILED) { 413 e(1); 414 } 415 416 /* clear cache of files before mmap so pages won't be present already */ 417 if(fcntl(fd1, F_FLUSH_FS_CACHE) < 0) { e(1); } 418 if(fcntl(fd2, F_FLUSH_FS_CACHE) < 0) { e(1); } 419 420 #define LOCATION1 (void *) 0x90000000 421 #define LOCATION2 ((void *)((char *)LOCATION1 + PAGE_SIZE)) 422 block1 = mmap(LOCATION1, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_FILE, fd1, 0); 423 if(block1 == MAP_FAILED) { e(4); } 424 if(block1 != LOCATION1) { e(5); } 425 426 block2 = mmap(LOCATION2, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_FILE, fd2, 0); 427 if(block2 == MAP_FAILED) { e(10); } 428 if(block2 != LOCATION2) { e(11); } 429 430 unlink("testfile"); 431 fd = open("testfile", O_CREAT | O_RDWR); 432 if(fd < 0) { e(15); } 433 434 /* write() using the mmap()ped memory as buffer */ 435 436 if((wr=write(fd, LOCATION1, sizeof(buf))) != sizeof(buf)) { 437 fprintf(stderr, "wrote %zd bytes instead of %zd\n", 438 wr, sizeof(buf)); 439 e(20); 440 quit(); 441 } 442 443 /* verify written contents */ 444 445 if((rb=pread(fd, buf, sizeof(buf), 0)) != sizeof(buf)) { 446 if(rb < 0) perror("pread"); 447 fprintf(stderr, "wrote %zd bytes\n", wr); 448 fprintf(stderr, "read %zd bytes instead of %zd\n", 449 rb, sizeof(buf)); 450 e(21); 451 quit(); 452 } 453 454 if(memcmp(buf, LOCATION1, sizeof(buf))) { 455 e(22); 456 quit(); 457 } 458 459 close(fd); 460 close(fd1); 461 close(fd2); 462 463 } 464 465 /* 466 * Test mmap on none-dev file systems - file systems that do not have a buffer 467 * cache and therefore have to fake mmap support. We use procfs as target. 468 * The idea is that while we succeed in mapping in /proc/uptime, we also get 469 * a new uptime value every time we map in the page -- VM must not cache it. 470 */ 471 static void 472 nonedev_regression(void) 473 { 474 int fd, fd2; 475 char *buf; 476 unsigned long uptime1, uptime2, uptime3; 477 478 subtest++; 479 480 if ((fd = open(_PATH_PROC "uptime", O_RDONLY)) < 0) e(1); 481 482 buf = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0); 483 if (buf == MAP_FAILED) e(2); 484 485 if (buf[4095] != 0) e(3); 486 487 if ((uptime1 = atoi(buf)) == 0) e(4); 488 489 if (munmap(buf, 4096) != 0) e(5); 490 491 sleep(2); 492 493 buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FILE, 494 fd, 0); 495 if (buf == MAP_FAILED) e(6); 496 497 if (buf[4095] != 0) e(7); 498 499 if ((uptime2 = atoi(buf)) == 0) e(8); 500 501 if (uptime1 == uptime2) e(9); 502 503 if (munmap(buf, 4096) != 0) e(10); 504 505 sleep(2); 506 507 buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0); 508 if (buf == MAP_FAILED) e(11); 509 510 if (buf[4095] != 0) e(12); 511 512 if ((uptime3 = atoi(buf)) == 0) e(13); 513 514 if (uptime1 == uptime3) e(14); 515 if (uptime2 == uptime3) e(15); 516 517 if (munmap(buf, 4096) != 0) e(16); 518 519 /* Also test page faults not incurred by the process itself. */ 520 if ((fd2 = open("testfile", O_CREAT | O_TRUNC | O_WRONLY)) < 0) e(17); 521 522 if (unlink("testfile") != 0) e(18); 523 524 buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0); 525 if (buf == MAP_FAILED) e(19); 526 527 if (write(fd2, buf, 10) != 10) e(20); 528 529 if (munmap(buf, 4096) != 0) e(21); 530 531 close(fd2); 532 close(fd); 533 } 534 535 /* 536 * Regression test for a nasty memory-mapped file corruption bug, which is not 537 * easy to reproduce but, before being solved, did occur in practice every once 538 * in a while. The executive summary is that through stale inode associations, 539 * VM could end up using an old block to satisfy a memory mapping. 540 * 541 * This subtest relies on a number of assumptions regarding allocation and 542 * reuse of inode numbers and blocks. These assumptions hold for MFS but 543 * possibly no other file system. However, if the subtest's assumptions are 544 * not met, it will simply succeed. 545 */ 546 static void 547 corruption_regression(void) 548 { 549 char *ptr, *buf; 550 struct statvfs sf; 551 struct stat st; 552 size_t block_size; 553 off_t size; 554 int fd, fd2; 555 556 subtest = 1; 557 558 if (statvfs(".", &sf) != 0) e(0); 559 block_size = sf.f_bsize; 560 561 if ((buf = malloc(block_size * 2)) == NULL) e(0); 562 563 /* 564 * We first need a file that is just large enough that it requires the 565 * allocation of a metadata block - an indirect block - when more data 566 * is written to it. This is fileA. We keep it open throughout the 567 * test so we can unlink it immediately. 568 */ 569 if ((fd = open("fileA", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1) 570 e(0); 571 if (unlink("fileA") != 0) e(0); 572 573 /* 574 * Write to fileA until its next block requires the allocation of an 575 * additional metadata block - an indirect block. 576 */ 577 size = 0; 578 memset(buf, 'A', block_size); 579 do { 580 /* 581 * Repeatedly write an extra block, until the file consists of 582 * more blocks than just the file data. 583 */ 584 if (write(fd, buf, block_size) != block_size) e(0); 585 size += block_size; 586 if (size >= block_size * 64) { 587 /* 588 * It doesn't look like this is going to work. 589 * Skip this subtest altogether. 590 */ 591 if (close(fd) != 0) e(0); 592 free(buf); 593 594 return; 595 } 596 if (fstat(fd, &st) != 0) e(0); 597 } while (st.st_blocks * 512 == size); 598 599 /* Once we get there, go one step back by truncating by one block. */ 600 size -= block_size; /* for MFS, size will end up being 7*block_size */ 601 if (ftruncate(fd, size) != 0) e(0); 602 603 /* 604 * Create a first file, fileB, and write two blocks to it. FileB's 605 * blocks are going to end up in the secondary VM cache, associated to 606 * fileB's inode number (and two different offsets within the file). 607 * The block cache does not know about files getting deleted, so we can 608 * unlink fileB immediately after creating it. So far so good. 609 */ 610 if ((fd2 = open("fileB", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1) 611 e(0); 612 if (unlink("fileB") != 0) e(0); 613 memset(buf, 'B', block_size * 2); 614 if (write(fd2, buf, block_size * 2) != block_size * 2) e(0); 615 if (close(fd2) != 0) e(0); 616 617 /* 618 * Write one extra block to fileA, hoping that this causes allocation 619 * of a metadata block as well. This is why we tried to get fileA to 620 * the point that one more block would also require the allocation of a 621 * metadata block. Our intent is to recycle the blocks that we just 622 * allocated and freed for fileB. As of writing, for the metadata 623 * block, this will *not* break the association with fileB's inode, 624 * which by itself is not a problem, yet crucial to reproducing 625 * the actual problem a bit later. Note that the test does not rely on 626 * whether the file system allocates the data block or the metadata 627 * block first, although it does need reverse deallocation (see below). 628 */ 629 memset(buf, 'A', block_size); 630 if (write(fd, buf, block_size) != block_size) e(0); 631 632 /* 633 * Create a new file, fileC, which recycles the inode number of fileB, 634 * but uses two new blocks to store its data. These new blocks will 635 * get associated to the fileB inode number, and one of them will 636 * thereby eclipse (but not remove) the association of fileA's metadata 637 * block to the inode of fileB. 638 */ 639 if ((fd2 = open("fileC", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1) 640 e(0); 641 if (unlink("fileC") != 0) e(0); 642 memset(buf, 'C', block_size * 2); 643 if (write(fd2, buf, block_size * 2) != block_size * 2) e(0); 644 if (close(fd2) != 0) e(0); 645 646 /* 647 * Free up the extra fileA blocks for reallocation, in particular 648 * including the metadata block. Again, this will not affect the 649 * contents of the VM cache in any way. FileA's metadata block remains 650 * cached in VM, with the inode association for fileB's block. 651 */ 652 if (ftruncate(fd, size) != 0) e(0); 653 654 /* 655 * Now create yet one more file, fileD, which also recycles the inode 656 * number of fileB and fileC. Write two blocks to it; these blocks 657 * should recycle the blocks we just freed. One of these is fileA's 658 * just-freed metadata block, for which the new inode association will 659 * be equal to the inode association it had already (as long as blocks 660 * are freed in reverse order of their allocation, which happens to be 661 * the case for MFS). As a result, the block is not updated in the VM 662 * cache, and VM will therefore continue to see the inode association 663 * for the corresponding block of fileC which is still in the VM cache. 664 */ 665 if ((fd2 = open("fileD", O_CREAT | O_TRUNC | O_RDWR, 0600)) == -1) 666 e(0); 667 memset(buf, 'D', block_size * 2); 668 if (write(fd2, buf, block_size * 2) != block_size * 2) e(0); 669 670 ptr = mmap(NULL, block_size * 2, PROT_READ, MAP_FILE, fd2, 0); 671 if (ptr == MAP_FAILED) e(0); 672 673 /* 674 * Finally, we can test the issue. Since fileC's block is still the 675 * block for which VM has the corresponding inode association, VM will 676 * now find and map in fileC's block, instead of fileD's block. The 677 * result is that we get a memory-mapped area with stale contents, 678 * different from those of the underlying file. 679 */ 680 if (memcmp(buf, ptr, block_size * 2)) e(0); 681 682 /* Clean up. */ 683 if (munmap(ptr, block_size * 2) != 0) e(0); 684 685 if (close(fd2) != 0) e(0); 686 if (unlink("fileD") != 0) e(0); 687 688 if (close(fd) != 0) e(0); 689 690 free(buf); 691 } 692 693 /* 694 * Test mmap on file holes. Holes are a tricky case with the current VM 695 * implementation. There are two main issues. First, whenever a file data 696 * block is freed, VM has to know about this, or it will later blindly map in 697 * the old data. This, file systems explicitly tell VM (through libminixfs) 698 * whenever a block is freed, upon which VM cache forgets the block. Second, 699 * blocks are accessed primarily by a <dev,dev_off> pair and only additionally 700 * by a <ino,ino_off> pair. Holes have no meaningful value for the first pair, 701 * but do need to be registered in VM with the second pair, or accessing them 702 * will generate a segmentation fault. Thus, file systems explicitly tell VM 703 * (through libminixfs) when a hole is being peeked; libminixfs currently fakes 704 * a device offset to make this work. 705 */ 706 static void 707 hole_regression(void) 708 { 709 struct statvfs st; 710 size_t block_size; 711 char *buf; 712 int fd; 713 714 if (statvfs(".", &st) < 0) e(1); 715 716 block_size = st.f_bsize; 717 718 if ((buf = malloc(block_size)) == NULL) e(2); 719 720 if ((fd = open("testfile", O_CREAT | O_TRUNC | O_RDWR)) < 0) e(3); 721 722 if (unlink("testfile") != 0) e(4); 723 724 /* 725 * We perform the test twice, in a not-so-perfect attempt to test the 726 * two aspects independently. The first part immediately creates a 727 * hole, and is supposed to fail only if reporting holes to VM does not 728 * work. However, it may also fail if a page for a previous file with 729 * the same inode number as "testfile" is still in the VM cache. 730 */ 731 memset(buf, 12, block_size); 732 733 if (write(fd, buf, block_size) != block_size) e(5); 734 735 if (lseek(fd, block_size * 2, SEEK_CUR) != block_size * 3) e(6); 736 737 memset(buf, 78, block_size); 738 739 if (write(fd, buf, block_size) != block_size) e(7); 740 741 free(buf); 742 743 if ((buf = mmap(NULL, 4 * block_size, PROT_READ, MAP_SHARED | MAP_FILE, 744 fd, 0)) == MAP_FAILED) e(8); 745 746 if (buf[0 * block_size] != 12 || buf[1 * block_size - 1] != 12) e(9); 747 if (buf[1 * block_size] != 0 || buf[2 * block_size - 1] != 0) e(10); 748 if (buf[2 * block_size] != 0 || buf[3 * block_size - 1] != 0) e(11); 749 if (buf[3 * block_size] != 78 || buf[4 * block_size - 1] != 78) e(12); 750 751 if (munmap(buf, 4 * block_size) != 0) e(13); 752 753 /* 754 * The second part first creates file content and only turns part of it 755 * into a file hole, thus ensuring that VM has previously cached pages 756 * for the blocks that are freed. The test will fail if VM keeps the 757 * pages around in its cache. 758 */ 759 if ((buf = malloc(block_size)) == NULL) e(14); 760 761 if (lseek(fd, block_size, SEEK_SET) != block_size) e(15); 762 763 memset(buf, 34, block_size); 764 765 if (write(fd, buf, block_size) != block_size) e(16); 766 767 memset(buf, 56, block_size); 768 769 if (write(fd, buf, block_size) != block_size) e(17); 770 771 if (ftruncate(fd, block_size) != 0) e(18); 772 773 if (lseek(fd, block_size * 3, SEEK_SET) != block_size * 3) e(19); 774 775 memset(buf, 78, block_size); 776 777 if (write(fd, buf, block_size) != block_size) e(20); 778 779 free(buf); 780 781 if ((buf = mmap(NULL, 4 * block_size, PROT_READ, MAP_SHARED | MAP_FILE, 782 fd, 0)) == MAP_FAILED) e(21); 783 784 if (buf[0 * block_size] != 12 || buf[1 * block_size - 1] != 12) e(22); 785 if (buf[1 * block_size] != 0 || buf[2 * block_size - 1] != 0) e(23); 786 if (buf[2 * block_size] != 0 || buf[3 * block_size - 1] != 0) e(24); 787 if (buf[3 * block_size] != 78 || buf[4 * block_size - 1] != 78) e(25); 788 789 if (munmap(buf, 4 * block_size) != 0) e(26); 790 791 close(fd); 792 } 793 794 int 795 main(int argc, char *argv[]) 796 { 797 int i, iter = 2; 798 799 start(74); 800 801 basic_regression(); 802 803 nonedev_regression(); 804 805 /* 806 * Any inode or block allocation happening concurrently with this 807 * subtest will make the subtest succeed without testing the actual 808 * issue. Thus, repeat the subtest a fair number of times. 809 */ 810 for (i = 0; i < 10; i++) 811 corruption_regression(); 812 813 hole_regression(); 814 815 test_memory_types_vs_operations(); 816 817 makefiles(MAXFILES); 818 819 cachequiet(!bigflag); 820 if(bigflag) iter = 3; 821 822 /* Try various combinations working set sizes 823 * and block sizes in order to specifically 824 * target the primary cache, then primary+secondary 825 * cache, then primary+secondary cache+secondary 826 * cache eviction. 827 */ 828 829 if(dotest(PAGE_SIZE, 100, iter)) e(5); 830 if(dotest(PAGE_SIZE*2, 100, iter)) e(2); 831 if(dotest(PAGE_SIZE*3, 100, iter)) e(3); 832 if(dotest(PAGE_SIZE, 20000, iter)) e(5); 833 834 if(bigflag) { 835 u32_t totalmem, freemem, cachedmem; 836 if(dotest(PAGE_SIZE, 150000, iter)) e(5); 837 getmem(&totalmem, &freemem, &cachedmem); 838 if(dotest(PAGE_SIZE, totalmem*1.5, iter)) e(6); 839 } 840 841 quit(); 842 843 return 0; 844 } 845 846