1 2 #define _SYSTEM 3 4 #include <assert.h> 5 #include <errno.h> 6 #include <math.h> 7 #include <stdlib.h> 8 9 #include <machine/vmparam.h> 10 11 #include <sys/param.h> 12 #include <sys/mman.h> 13 14 #include <minix/dmap.h> 15 #include <minix/libminixfs.h> 16 #include <minix/syslib.h> 17 #include <minix/sysutil.h> 18 #include <minix/u64.h> 19 #include <minix/bdev.h> 20 21 #define BUFHASH(b) ((b) % nr_bufs) 22 #define MARKCLEAN lmfs_markclean 23 24 #define MINBUFS 6 /* minimal no of bufs for sanity check */ 25 26 static struct buf *front; /* points to least recently used free block */ 27 static struct buf *rear; /* points to most recently used free block */ 28 static unsigned int bufs_in_use;/* # bufs currently in use (not on free list)*/ 29 30 static void rm_lru(struct buf *bp); 31 static void read_block(struct buf *); 32 static void flushall(dev_t dev); 33 static void freeblock(struct buf *bp); 34 static void cache_heuristic_check(int major); 35 36 static int vmcache = 0; /* are we using vm's secondary cache? (initially not) */ 37 38 static struct buf *buf; 39 static struct buf **buf_hash; /* the buffer hash table */ 40 static unsigned int nr_bufs; 41 static int may_use_vmcache; 42 43 static int fs_block_size = PAGE_SIZE; /* raw i/o block size */ 44 45 static int rdwt_err; 46 47 static int quiet = 0; 48 49 void lmfs_setquiet(int q) { quiet = q; } 50 51 static u32_t fs_bufs_heuristic(int minbufs, u32_t btotal, u64_t bfree, 52 int blocksize, dev_t majordev) 53 { 54 struct vm_stats_info vsi; 55 int bufs; 56 u32_t kbytes_used_fs, kbytes_total_fs, kbcache, kb_fsmax; 57 u32_t kbytes_remain_mem; 58 u64_t bused; 59 60 bused = btotal-bfree; 61 62 /* set a reasonable cache size; cache at most a certain 63 * portion of the used FS, and at most a certain %age of remaining 64 * memory 65 */ 66 if(vm_info_stats(&vsi) != OK) { 67 bufs = 1024; 68 if(!quiet) 69 printf("fslib: heuristic info fail: default to %d bufs\n", bufs); 70 return bufs; 71 } 72 73 /* remaining free memory is unused memory plus memory in used for cache, 74 * as the cache can be evicted 75 */ 76 kbytes_remain_mem = (u64_t)(vsi.vsi_free + vsi.vsi_cached) * 77 vsi.vsi_pagesize / 1024; 78 79 /* check fs usage. */ 80 kbytes_used_fs = (unsigned long)(((u64_t)bused * blocksize) / 1024); 81 kbytes_total_fs = (unsigned long)(((u64_t)btotal * blocksize) / 1024); 82 83 /* heuristic for a desired cache size based on FS usage; 84 * but never bigger than half of the total filesystem 85 */ 86 kb_fsmax = sqrt_approx(kbytes_used_fs)*40; 87 kb_fsmax = MIN(kb_fsmax, kbytes_total_fs/2); 88 89 /* heuristic for a maximum usage - 10% of remaining memory */ 90 kbcache = MIN(kbytes_remain_mem/10, kb_fsmax); 91 bufs = kbcache * 1024 / blocksize; 92 93 /* but we simply need MINBUFS no matter what */ 94 if(bufs < minbufs) 95 bufs = minbufs; 96 97 return bufs; 98 } 99 100 void lmfs_blockschange(dev_t dev, int delta) 101 { 102 /* Change the number of allocated blocks by 'delta.' 103 * Also accumulate the delta since the last cache re-evaluation. 104 * If it is outside a certain band, ask the cache library to 105 * re-evaluate the cache size. 106 */ 107 static int bitdelta = 0; 108 bitdelta += delta; 109 #define BANDKB (10*1024) /* recheck cache every 10MB change */ 110 if(bitdelta*fs_block_size/1024 > BANDKB || 111 bitdelta*fs_block_size/1024 < -BANDKB) { 112 lmfs_cache_reevaluate(dev); 113 bitdelta = 0; 114 } 115 } 116 117 void lmfs_markdirty(struct buf *bp) 118 { 119 bp->lmfs_flags |= VMMC_DIRTY; 120 } 121 122 void lmfs_markclean(struct buf *bp) 123 { 124 bp->lmfs_flags &= ~VMMC_DIRTY; 125 } 126 127 int lmfs_isclean(struct buf *bp) 128 { 129 return !(bp->lmfs_flags & VMMC_DIRTY); 130 } 131 132 dev_t lmfs_dev(struct buf *bp) 133 { 134 return bp->lmfs_dev; 135 } 136 137 int lmfs_bytes(struct buf *bp) 138 { 139 return bp->lmfs_bytes; 140 } 141 142 static void free_unused_blocks(void) 143 { 144 struct buf *bp; 145 146 int freed = 0, bytes = 0; 147 printf("libminixfs: freeing; %d blocks in use\n", bufs_in_use); 148 for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 149 if(bp->lmfs_bytes > 0 && bp->lmfs_count == 0) { 150 freed++; 151 bytes += bp->lmfs_bytes; 152 freeblock(bp); 153 } 154 } 155 printf("libminixfs: freeing; %d blocks, %d bytes\n", freed, bytes); 156 } 157 158 static void lmfs_alloc_block(struct buf *bp) 159 { 160 int len; 161 ASSERT(!bp->data); 162 ASSERT(bp->lmfs_bytes == 0); 163 164 len = roundup(fs_block_size, PAGE_SIZE); 165 166 if((bp->data = mmap(0, fs_block_size, 167 PROT_READ|PROT_WRITE, MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { 168 free_unused_blocks(); 169 if((bp->data = mmap(0, fs_block_size, PROT_READ|PROT_WRITE, 170 MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { 171 panic("libminixfs: could not allocate block"); 172 } 173 } 174 assert(bp->data); 175 bp->lmfs_bytes = fs_block_size; 176 bp->lmfs_needsetcache = 1; 177 } 178 179 /*===========================================================================* 180 * lmfs_get_block * 181 *===========================================================================*/ 182 struct buf *lmfs_get_block(register dev_t dev, register block_t block, 183 int only_search) 184 { 185 return lmfs_get_block_ino(dev, block, only_search, VMC_NO_INODE, 0); 186 } 187 188 void munmap_t(void *a, int len) 189 { 190 vir_bytes av = (vir_bytes) a; 191 assert(a); 192 assert(a != MAP_FAILED); 193 assert(len > 0); 194 assert(!(av % PAGE_SIZE)); 195 196 len = roundup(len, PAGE_SIZE); 197 198 assert(!(len % PAGE_SIZE)); 199 200 if(munmap(a, len) < 0) 201 panic("libminixfs cache: munmap failed"); 202 } 203 204 static void raisecount(struct buf *bp) 205 { 206 assert(bufs_in_use >= 0); 207 ASSERT(bp->lmfs_count >= 0); 208 bp->lmfs_count++; 209 if(bp->lmfs_count == 1) bufs_in_use++; 210 assert(bufs_in_use > 0); 211 } 212 213 static void lowercount(struct buf *bp) 214 { 215 assert(bufs_in_use > 0); 216 ASSERT(bp->lmfs_count > 0); 217 bp->lmfs_count--; 218 if(bp->lmfs_count == 0) bufs_in_use--; 219 assert(bufs_in_use >= 0); 220 } 221 222 static void freeblock(struct buf *bp) 223 { 224 ASSERT(bp->lmfs_count == 0); 225 /* If the block taken is dirty, make it clean by writing it to the disk. 226 * Avoid hysteresis by flushing all other dirty blocks for the same device. 227 */ 228 if (bp->lmfs_dev != NO_DEV) { 229 if (!lmfs_isclean(bp)) flushall(bp->lmfs_dev); 230 assert(bp->lmfs_bytes == fs_block_size); 231 bp->lmfs_dev = NO_DEV; 232 } 233 234 /* Fill in block's parameters and add it to the hash chain where it goes. */ 235 MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ 236 if(bp->lmfs_bytes > 0) { 237 assert(bp->data); 238 munmap_t(bp->data, bp->lmfs_bytes); 239 bp->lmfs_bytes = 0; 240 bp->data = NULL; 241 } else assert(!bp->data); 242 } 243 244 /*===========================================================================* 245 * lmfs_get_block_ino * 246 *===========================================================================*/ 247 struct buf *lmfs_get_block_ino(dev_t dev, block_t block, int only_search, 248 ino_t ino, u64_t ino_off) 249 { 250 /* Check to see if the requested block is in the block cache. If so, return 251 * a pointer to it. If not, evict some other block and fetch it (unless 252 * 'only_search' is 1). All the blocks in the cache that are not in use 253 * are linked together in a chain, with 'front' pointing to the least recently 254 * used block and 'rear' to the most recently used block. If 'only_search' is 255 * 1, the block being requested will be overwritten in its entirety, so it is 256 * only necessary to see if it is in the cache; if it is not, any free buffer 257 * will do. It is not necessary to actually read the block in from disk. 258 * If 'only_search' is PREFETCH, the block need not be read from the disk, 259 * and the device is not to be marked on the block, so callers can tell if 260 * the block returned is valid. 261 * In addition to the LRU chain, there is also a hash chain to link together 262 * blocks whose block numbers end with the same bit strings, for fast lookup. 263 */ 264 265 int b; 266 static struct buf *bp; 267 u64_t dev_off = (u64_t) block * fs_block_size; 268 struct buf *prev_ptr; 269 270 assert(buf_hash); 271 assert(buf); 272 assert(nr_bufs > 0); 273 274 ASSERT(fs_block_size > 0); 275 276 assert(dev != NO_DEV); 277 278 if((ino_off % fs_block_size)) { 279 280 printf("cache: unaligned lmfs_get_block_ino ino_off %llu\n", 281 ino_off); 282 util_stacktrace(); 283 } 284 285 /* Search the hash chain for (dev, block). */ 286 b = BUFHASH(block); 287 bp = buf_hash[b]; 288 while (bp != NULL) { 289 if (bp->lmfs_blocknr == block && bp->lmfs_dev == dev) { 290 if(bp->lmfs_flags & VMMC_EVICTED) { 291 /* We had it but VM evicted it; invalidate it. */ 292 ASSERT(bp->lmfs_count == 0); 293 ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); 294 ASSERT(!(bp->lmfs_flags & VMMC_DIRTY)); 295 bp->lmfs_dev = NO_DEV; 296 bp->lmfs_bytes = 0; 297 bp->data = NULL; 298 break; 299 } 300 /* Block needed has been found. */ 301 if (bp->lmfs_count == 0) { 302 rm_lru(bp); 303 ASSERT(bp->lmfs_needsetcache == 0); 304 ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); 305 bp->lmfs_flags |= VMMC_BLOCK_LOCKED; 306 } 307 raisecount(bp); 308 ASSERT(bp->lmfs_bytes == fs_block_size); 309 ASSERT(bp->lmfs_dev == dev); 310 ASSERT(bp->lmfs_dev != NO_DEV); 311 ASSERT(bp->lmfs_flags & VMMC_BLOCK_LOCKED); 312 ASSERT(bp->data); 313 314 if(ino != VMC_NO_INODE) { 315 if(bp->lmfs_inode == VMC_NO_INODE 316 || bp->lmfs_inode != ino 317 || bp->lmfs_inode_offset != ino_off) { 318 bp->lmfs_inode = ino; 319 bp->lmfs_inode_offset = ino_off; 320 bp->lmfs_needsetcache = 1; 321 } 322 } 323 324 return(bp); 325 } else { 326 /* This block is not the one sought. */ 327 bp = bp->lmfs_hash; /* move to next block on hash chain */ 328 } 329 } 330 331 /* Desired block is not on available chain. Find a free block to use. */ 332 if(bp) { 333 ASSERT(bp->lmfs_flags & VMMC_EVICTED); 334 } else { 335 if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs); 336 } 337 assert(bp); 338 339 rm_lru(bp); 340 341 /* Remove the block that was just taken from its hash chain. */ 342 b = BUFHASH(bp->lmfs_blocknr); 343 prev_ptr = buf_hash[b]; 344 if (prev_ptr == bp) { 345 buf_hash[b] = bp->lmfs_hash; 346 } else { 347 /* The block just taken is not on the front of its hash chain. */ 348 while (prev_ptr->lmfs_hash != NULL) 349 if (prev_ptr->lmfs_hash == bp) { 350 prev_ptr->lmfs_hash = bp->lmfs_hash; /* found it */ 351 break; 352 } else { 353 prev_ptr = prev_ptr->lmfs_hash; /* keep looking */ 354 } 355 } 356 357 freeblock(bp); 358 359 bp->lmfs_inode = ino; 360 bp->lmfs_inode_offset = ino_off; 361 362 bp->lmfs_flags = VMMC_BLOCK_LOCKED; 363 bp->lmfs_needsetcache = 0; 364 bp->lmfs_dev = dev; /* fill in device number */ 365 bp->lmfs_blocknr = block; /* fill in block number */ 366 ASSERT(bp->lmfs_count == 0); 367 raisecount(bp); 368 b = BUFHASH(bp->lmfs_blocknr); 369 bp->lmfs_hash = buf_hash[b]; 370 371 buf_hash[b] = bp; /* add to hash list */ 372 373 assert(dev != NO_DEV); 374 375 /* Block is not found in our cache, but we do want it 376 * if it's in the vm cache. 377 */ 378 assert(!bp->data); 379 assert(!bp->lmfs_bytes); 380 if(vmcache) { 381 if((bp->data = vm_map_cacheblock(dev, dev_off, ino, ino_off, 382 &bp->lmfs_flags, fs_block_size)) != MAP_FAILED) { 383 bp->lmfs_bytes = fs_block_size; 384 ASSERT(!bp->lmfs_needsetcache); 385 return bp; 386 } 387 } 388 bp->data = NULL; 389 390 /* Not in the cache; reserve memory for its contents. */ 391 392 lmfs_alloc_block(bp); 393 394 assert(bp->data); 395 396 if(only_search == PREFETCH) { 397 /* PREFETCH: don't do i/o. */ 398 bp->lmfs_dev = NO_DEV; 399 } else if (only_search == NORMAL) { 400 read_block(bp); 401 } else if(only_search == NO_READ) { 402 /* This block will be overwritten by new contents. */ 403 } else 404 panic("unexpected only_search value: %d", only_search); 405 406 assert(bp->data); 407 408 return(bp); /* return the newly acquired block */ 409 } 410 411 /*===========================================================================* 412 * lmfs_put_block * 413 *===========================================================================*/ 414 void lmfs_put_block( 415 struct buf *bp, /* pointer to the buffer to be released */ 416 int block_type /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */ 417 ) 418 { 419 /* Return a block to the list of available blocks. Depending on 'block_type' 420 * it may be put on the front or rear of the LRU chain. Blocks that are 421 * expected to be needed again shortly (e.g., partially full data blocks) 422 * go on the rear; blocks that are unlikely to be needed again shortly 423 * (e.g., full data blocks) go on the front. Blocks whose loss can hurt 424 * the integrity of the file system (e.g., inode blocks) are written to 425 * disk immediately if they are dirty. 426 */ 427 dev_t dev; 428 off_t dev_off; 429 int r; 430 431 if (bp == NULL) return; /* it is easier to check here than in caller */ 432 433 dev = bp->lmfs_dev; 434 435 dev_off = (off_t) bp->lmfs_blocknr * fs_block_size; 436 437 lowercount(bp); 438 if (bp->lmfs_count != 0) return; /* block is still in use */ 439 440 /* Put this block back on the LRU chain. */ 441 if (dev == DEV_RAM || (block_type & ONE_SHOT)) { 442 /* Block probably won't be needed quickly. Put it on front of chain. 443 * It will be the next block to be evicted from the cache. 444 */ 445 bp->lmfs_prev = NULL; 446 bp->lmfs_next = front; 447 if (front == NULL) 448 rear = bp; /* LRU chain was empty */ 449 else 450 front->lmfs_prev = bp; 451 front = bp; 452 } 453 else { 454 /* Block probably will be needed quickly. Put it on rear of chain. 455 * It will not be evicted from the cache for a long time. 456 */ 457 bp->lmfs_prev = rear; 458 bp->lmfs_next = NULL; 459 if (rear == NULL) 460 front = bp; 461 else 462 rear->lmfs_next = bp; 463 rear = bp; 464 } 465 466 assert(bp->lmfs_flags & VMMC_BLOCK_LOCKED); 467 bp->lmfs_flags &= ~VMMC_BLOCK_LOCKED; 468 469 /* block has sensible content - if necesary, identify it to VM */ 470 if(vmcache && bp->lmfs_needsetcache && dev != NO_DEV) { 471 if((r=vm_set_cacheblock(bp->data, dev, dev_off, 472 bp->lmfs_inode, bp->lmfs_inode_offset, 473 &bp->lmfs_flags, fs_block_size)) != OK) { 474 if(r == ENOSYS) { 475 printf("libminixfs: ENOSYS, disabling VM calls\n"); 476 vmcache = 0; 477 } else { 478 panic("libminixfs: setblock of %p dev 0x%llx off " 479 "0x%llx failed\n", bp->data, dev, dev_off); 480 } 481 } 482 } 483 bp->lmfs_needsetcache = 0; 484 485 } 486 487 void lmfs_cache_reevaluate(dev_t dev) 488 { 489 if(bufs_in_use == 0 && dev != NO_DEV) { 490 /* if the cache isn't in use any more, we could resize it. */ 491 cache_heuristic_check(major(dev)); 492 } 493 } 494 495 /*===========================================================================* 496 * read_block * 497 *===========================================================================*/ 498 static void read_block( 499 struct buf *bp /* buffer pointer */ 500 ) 501 { 502 /* Read or write a disk block. This is the only routine in which actual disk 503 * I/O is invoked. If an error occurs, a message is printed here, but the error 504 * is not reported to the caller. If the error occurred while purging a block 505 * from the cache, it is not clear what the caller could do about it anyway. 506 */ 507 int r, op_failed; 508 off_t pos; 509 dev_t dev = bp->lmfs_dev; 510 511 op_failed = 0; 512 513 assert(dev != NO_DEV); 514 515 ASSERT(bp->lmfs_bytes == fs_block_size); 516 ASSERT(fs_block_size > 0); 517 518 pos = (off_t)bp->lmfs_blocknr * fs_block_size; 519 if(fs_block_size > PAGE_SIZE) { 520 #define MAXPAGES 20 521 vir_bytes blockrem, vaddr = (vir_bytes) bp->data; 522 int p = 0; 523 static iovec_t iovec[MAXPAGES]; 524 blockrem = fs_block_size; 525 while(blockrem > 0) { 526 vir_bytes chunk = blockrem >= PAGE_SIZE ? PAGE_SIZE : blockrem; 527 iovec[p].iov_addr = vaddr; 528 iovec[p].iov_size = chunk; 529 vaddr += chunk; 530 blockrem -= chunk; 531 p++; 532 } 533 r = bdev_gather(dev, pos, iovec, p, BDEV_NOFLAGS); 534 } else { 535 r = bdev_read(dev, pos, bp->data, fs_block_size, 536 BDEV_NOFLAGS); 537 } 538 if (r < 0) { 539 printf("fs cache: I/O error on device %d/%d, block %u\n", 540 major(dev), minor(dev), bp->lmfs_blocknr); 541 op_failed = 1; 542 } else if (r != (ssize_t) fs_block_size) { 543 r = END_OF_FILE; 544 op_failed = 1; 545 } 546 547 if (op_failed) { 548 bp->lmfs_dev = NO_DEV; /* invalidate block */ 549 550 /* Report read errors to interested parties. */ 551 rdwt_err = r; 552 } 553 554 } 555 556 /*===========================================================================* 557 * lmfs_invalidate * 558 *===========================================================================*/ 559 void lmfs_invalidate( 560 dev_t device /* device whose blocks are to be purged */ 561 ) 562 { 563 /* Remove all the blocks belonging to some device from the cache. */ 564 565 register struct buf *bp; 566 567 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 568 if (bp->lmfs_dev == device) { 569 assert(bp->data); 570 assert(bp->lmfs_bytes > 0); 571 munmap_t(bp->data, bp->lmfs_bytes); 572 bp->lmfs_dev = NO_DEV; 573 bp->lmfs_bytes = 0; 574 bp->data = NULL; 575 } 576 } 577 578 vm_clear_cache(device); 579 } 580 581 /*===========================================================================* 582 * flushall * 583 *===========================================================================*/ 584 static void flushall(dev_t dev) 585 { 586 /* Flush all dirty blocks for one device. */ 587 588 register struct buf *bp; 589 static struct buf **dirty; /* static so it isn't on stack */ 590 static unsigned int dirtylistsize = 0; 591 int ndirty; 592 593 if(dirtylistsize != nr_bufs) { 594 if(dirtylistsize > 0) { 595 assert(dirty != NULL); 596 free(dirty); 597 } 598 if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs))) 599 panic("couldn't allocate dirty buf list"); 600 dirtylistsize = nr_bufs; 601 } 602 603 for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) { 604 if (!lmfs_isclean(bp) && bp->lmfs_dev == dev) { 605 dirty[ndirty++] = bp; 606 } 607 } 608 609 lmfs_rw_scattered(dev, dirty, ndirty, WRITING); 610 } 611 612 /*===========================================================================* 613 * lmfs_rw_scattered * 614 *===========================================================================*/ 615 void lmfs_rw_scattered( 616 dev_t dev, /* major-minor device number */ 617 struct buf **bufq, /* pointer to array of buffers */ 618 int bufqsize, /* number of buffers */ 619 int rw_flag /* READING or WRITING */ 620 ) 621 { 622 /* Read or write scattered data from a device. */ 623 624 register struct buf *bp; 625 int gap; 626 register int i; 627 register iovec_t *iop; 628 static iovec_t iovec[NR_IOREQS]; 629 off_t pos; 630 int iov_per_block; 631 int start_in_use = bufs_in_use, start_bufqsize = bufqsize; 632 633 assert(bufqsize >= 0); 634 if(bufqsize == 0) return; 635 636 /* for READING, check all buffers on the list are obtained and held 637 * (count > 0) 638 */ 639 if (rw_flag == READING) { 640 for(i = 0; i < bufqsize; i++) { 641 assert(bufq[i] != NULL); 642 assert(bufq[i]->lmfs_count > 0); 643 } 644 645 /* therefore they are all 'in use' and must be at least this many */ 646 assert(start_in_use >= start_bufqsize); 647 } 648 649 assert(dev != NO_DEV); 650 assert(fs_block_size > 0); 651 iov_per_block = roundup(fs_block_size, PAGE_SIZE) / PAGE_SIZE; 652 assert(iov_per_block < NR_IOREQS); 653 654 /* (Shell) sort buffers on lmfs_blocknr. */ 655 gap = 1; 656 do 657 gap = 3 * gap + 1; 658 while (gap <= bufqsize); 659 while (gap != 1) { 660 int j; 661 gap /= 3; 662 for (j = gap; j < bufqsize; j++) { 663 for (i = j - gap; 664 i >= 0 && bufq[i]->lmfs_blocknr > bufq[i + gap]->lmfs_blocknr; 665 i -= gap) { 666 bp = bufq[i]; 667 bufq[i] = bufq[i + gap]; 668 bufq[i + gap] = bp; 669 } 670 } 671 } 672 673 /* Set up I/O vector and do I/O. The result of bdev I/O is OK if everything 674 * went fine, otherwise the error code for the first failed transfer. 675 */ 676 while (bufqsize > 0) { 677 int nblocks = 0, niovecs = 0; 678 int r; 679 for (iop = iovec; nblocks < bufqsize; nblocks++) { 680 int p; 681 vir_bytes vdata, blockrem; 682 bp = bufq[nblocks]; 683 if (bp->lmfs_blocknr != (block_t) bufq[0]->lmfs_blocknr + nblocks) 684 break; 685 if(niovecs >= NR_IOREQS-iov_per_block) break; 686 vdata = (vir_bytes) bp->data; 687 blockrem = fs_block_size; 688 for(p = 0; p < iov_per_block; p++) { 689 vir_bytes chunk = blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE; 690 iop->iov_addr = vdata; 691 iop->iov_size = chunk; 692 vdata += PAGE_SIZE; 693 blockrem -= chunk; 694 iop++; 695 niovecs++; 696 } 697 assert(p == iov_per_block); 698 assert(blockrem == 0); 699 } 700 701 assert(nblocks > 0); 702 assert(niovecs > 0); 703 704 pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; 705 if (rw_flag == READING) 706 r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); 707 else 708 r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); 709 710 /* Harvest the results. The driver may have returned an error, or it 711 * may have done less than what we asked for. 712 */ 713 if (r < 0) { 714 printf("fs cache: I/O error %d on device %d/%d, block %u\n", 715 r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); 716 } 717 for (i = 0; i < nblocks; i++) { 718 bp = bufq[i]; 719 if (r < (ssize_t) fs_block_size) { 720 /* Transfer failed. */ 721 if (i == 0) { 722 bp->lmfs_dev = NO_DEV; /* Invalidate block */ 723 } 724 break; 725 } 726 if (rw_flag == READING) { 727 bp->lmfs_dev = dev; /* validate block */ 728 lmfs_put_block(bp, PARTIAL_DATA_BLOCK); 729 } else { 730 MARKCLEAN(bp); 731 } 732 r -= fs_block_size; 733 } 734 735 bufq += i; 736 bufqsize -= i; 737 738 if (rw_flag == READING) { 739 /* Don't bother reading more than the device is willing to 740 * give at this time. Don't forget to release those extras. 741 */ 742 while (bufqsize > 0) { 743 lmfs_put_block(*bufq++, PARTIAL_DATA_BLOCK); 744 bufqsize--; 745 } 746 } 747 if (rw_flag == WRITING && i == 0) { 748 /* We're not making progress, this means we might keep 749 * looping. Buffers remain dirty if un-written. Buffers are 750 * lost if invalidate()d or LRU-removed while dirty. This 751 * is better than keeping unwritable blocks around forever.. 752 */ 753 break; 754 } 755 } 756 757 if(rw_flag == READING) { 758 assert(start_in_use >= start_bufqsize); 759 760 /* READING callers assume all bufs are released. */ 761 assert(start_in_use - start_bufqsize == bufs_in_use); 762 } 763 } 764 765 /*===========================================================================* 766 * rm_lru * 767 *===========================================================================*/ 768 static void rm_lru(struct buf *bp) 769 { 770 /* Remove a block from its LRU chain. */ 771 struct buf *next_ptr, *prev_ptr; 772 773 next_ptr = bp->lmfs_next; /* successor on LRU chain */ 774 prev_ptr = bp->lmfs_prev; /* predecessor on LRU chain */ 775 if (prev_ptr != NULL) 776 prev_ptr->lmfs_next = next_ptr; 777 else 778 front = next_ptr; /* this block was at front of chain */ 779 780 if (next_ptr != NULL) 781 next_ptr->lmfs_prev = prev_ptr; 782 else 783 rear = prev_ptr; /* this block was at rear of chain */ 784 } 785 786 /*===========================================================================* 787 * cache_resize * 788 *===========================================================================*/ 789 static void cache_resize(unsigned int blocksize, unsigned int bufs) 790 { 791 struct buf *bp; 792 793 assert(blocksize > 0); 794 assert(bufs >= MINBUFS); 795 796 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) 797 if(bp->lmfs_count != 0) panic("change blocksize with buffer in use"); 798 799 lmfs_buf_pool(bufs); 800 801 fs_block_size = blocksize; 802 } 803 804 static void cache_heuristic_check(int major) 805 { 806 int bufs, d; 807 u64_t btotal, bfree, bused; 808 809 fs_blockstats(&btotal, &bfree, &bused); 810 811 bufs = fs_bufs_heuristic(10, btotal, bfree, 812 fs_block_size, major); 813 814 /* set the cache to the new heuristic size if the new one 815 * is more than 10% off from the current one. 816 */ 817 d = bufs-nr_bufs; 818 if(d < 0) d = -d; 819 if(d*100/nr_bufs > 10) { 820 cache_resize(fs_block_size, bufs); 821 } 822 } 823 824 /*===========================================================================* 825 * lmfs_set_blocksize * 826 *===========================================================================*/ 827 void lmfs_set_blocksize(int new_block_size, int major) 828 { 829 cache_resize(new_block_size, MINBUFS); 830 cache_heuristic_check(major); 831 832 /* Decide whether to use seconday cache or not. 833 * Only do this if 834 * - it's available, and 835 * - use of it hasn't been disabled for this fs, and 836 * - our main FS device isn't a memory device 837 */ 838 839 vmcache = 0; 840 841 if(may_use_vmcache && !(new_block_size % PAGE_SIZE)) 842 vmcache = 1; 843 } 844 845 /*===========================================================================* 846 * lmfs_buf_pool * 847 *===========================================================================*/ 848 void lmfs_buf_pool(int new_nr_bufs) 849 { 850 /* Initialize the buffer pool. */ 851 register struct buf *bp; 852 853 assert(new_nr_bufs >= MINBUFS); 854 855 if(nr_bufs > 0) { 856 assert(buf); 857 (void) fs_sync(); 858 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 859 if(bp->data) { 860 assert(bp->lmfs_bytes > 0); 861 munmap_t(bp->data, bp->lmfs_bytes); 862 } 863 } 864 } 865 866 if(buf) 867 free(buf); 868 869 if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs))) 870 panic("couldn't allocate buf list (%d)", new_nr_bufs); 871 872 if(buf_hash) 873 free(buf_hash); 874 if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs))) 875 panic("couldn't allocate buf hash list (%d)", new_nr_bufs); 876 877 nr_bufs = new_nr_bufs; 878 879 bufs_in_use = 0; 880 front = &buf[0]; 881 rear = &buf[nr_bufs - 1]; 882 883 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 884 bp->lmfs_blocknr = NO_BLOCK; 885 bp->lmfs_dev = NO_DEV; 886 bp->lmfs_next = bp + 1; 887 bp->lmfs_prev = bp - 1; 888 bp->data = NULL; 889 bp->lmfs_bytes = 0; 890 } 891 front->lmfs_prev = NULL; 892 rear->lmfs_next = NULL; 893 894 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->lmfs_hash = bp->lmfs_next; 895 buf_hash[0] = front; 896 } 897 898 int lmfs_bufs_in_use(void) 899 { 900 return bufs_in_use; 901 } 902 903 int lmfs_nr_bufs(void) 904 { 905 return nr_bufs; 906 } 907 908 void lmfs_flushall(void) 909 { 910 struct buf *bp; 911 for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) 912 if(bp->lmfs_dev != NO_DEV && !lmfs_isclean(bp)) 913 flushall(bp->lmfs_dev); 914 } 915 916 int lmfs_fs_block_size(void) 917 { 918 return fs_block_size; 919 } 920 921 void lmfs_may_use_vmcache(int ok) 922 { 923 may_use_vmcache = ok; 924 } 925 926 void lmfs_reset_rdwt_err(void) 927 { 928 rdwt_err = OK; 929 } 930 931 int lmfs_rdwt_err(void) 932 { 933 return rdwt_err; 934 } 935 936 int lmfs_do_bpeek(message *m) 937 { 938 block_t startblock, b, limitblock; 939 dev_t dev = m->m_vfs_fs_breadwrite.device; 940 off_t extra, pos = m->m_vfs_fs_breadwrite.seek_pos; 941 size_t len = m->m_vfs_fs_breadwrite.nbytes; 942 struct buf *bp; 943 944 assert(m->m_type == REQ_BPEEK); 945 assert(fs_block_size > 0); 946 assert(dev != NO_DEV); 947 948 if(!vmcache) { return ENXIO; } 949 950 assert(!(fs_block_size % PAGE_SIZE)); 951 952 if((extra=(pos % fs_block_size))) { 953 pos -= extra; 954 len += extra; 955 } 956 957 len = roundup(len, fs_block_size); 958 959 startblock = pos/fs_block_size; 960 limitblock = startblock + len/fs_block_size; 961 962 for(b = startblock; b < limitblock; b++) { 963 bp = lmfs_get_block(dev, b, NORMAL); 964 assert(bp); 965 lmfs_put_block(bp, FULL_DATA_BLOCK); 966 } 967 968 return OK; 969 } 970