1 2 #define _SYSTEM 3 4 #include <assert.h> 5 #include <errno.h> 6 #include <math.h> 7 #include <stdlib.h> 8 9 #include <machine/vmparam.h> 10 11 #include <sys/param.h> 12 #include <sys/mman.h> 13 14 #include <minix/dmap.h> 15 #include <minix/libminixfs.h> 16 #include <minix/syslib.h> 17 #include <minix/sysutil.h> 18 #include <minix/u64.h> 19 #include <minix/bdev.h> 20 21 #define BUFHASH(b) ((b) % nr_bufs) 22 #define MARKCLEAN lmfs_markclean 23 24 #define MINBUFS 6 /* minimal no of bufs for sanity check */ 25 26 static struct buf *front; /* points to least recently used free block */ 27 static struct buf *rear; /* points to most recently used free block */ 28 static unsigned int bufs_in_use;/* # bufs currently in use (not on free list)*/ 29 30 static void rm_lru(struct buf *bp); 31 static void read_block(struct buf *); 32 static void freeblock(struct buf *bp); 33 static void cache_heuristic_check(int major); 34 35 static int vmcache = 0; /* are we using vm's secondary cache? (initially not) */ 36 37 static struct buf *buf; 38 static struct buf **buf_hash; /* the buffer hash table */ 39 static unsigned int nr_bufs; 40 static int may_use_vmcache; 41 42 static size_t fs_block_size = PAGE_SIZE; /* raw i/o block size */ 43 44 static int rdwt_err; 45 46 static int quiet = 0; 47 48 void lmfs_setquiet(int q) { quiet = q; } 49 50 static u32_t fs_bufs_heuristic(int minbufs, u32_t btotal, u64_t bfree, 51 int blocksize, dev_t majordev) 52 { 53 struct vm_stats_info vsi; 54 int bufs; 55 u32_t kbytes_used_fs, kbytes_total_fs, kbcache, kb_fsmax; 56 u32_t kbytes_remain_mem; 57 u64_t bused; 58 59 bused = btotal-bfree; 60 61 /* set a reasonable cache size; cache at most a certain 62 * portion of the used FS, and at most a certain %age of remaining 63 * memory 64 */ 65 if(vm_info_stats(&vsi) != OK) { 66 bufs = 1024; 67 if(!quiet) 68 printf("fslib: heuristic info fail: default to %d bufs\n", bufs); 69 return bufs; 70 } 71 72 /* remaining free memory is unused memory plus memory in used for cache, 73 * as the cache can be evicted 74 */ 75 kbytes_remain_mem = (u64_t)(vsi.vsi_free + vsi.vsi_cached) * 76 vsi.vsi_pagesize / 1024; 77 78 /* check fs usage. */ 79 kbytes_used_fs = (unsigned long)(((u64_t)bused * blocksize) / 1024); 80 kbytes_total_fs = (unsigned long)(((u64_t)btotal * blocksize) / 1024); 81 82 /* heuristic for a desired cache size based on FS usage; 83 * but never bigger than half of the total filesystem 84 */ 85 kb_fsmax = sqrt_approx(kbytes_used_fs)*40; 86 kb_fsmax = MIN(kb_fsmax, kbytes_total_fs/2); 87 88 /* heuristic for a maximum usage - 10% of remaining memory */ 89 kbcache = MIN(kbytes_remain_mem/10, kb_fsmax); 90 bufs = kbcache * 1024 / blocksize; 91 92 /* but we simply need MINBUFS no matter what */ 93 if(bufs < minbufs) 94 bufs = minbufs; 95 96 return bufs; 97 } 98 99 void lmfs_blockschange(dev_t dev, int delta) 100 { 101 /* Change the number of allocated blocks by 'delta.' 102 * Also accumulate the delta since the last cache re-evaluation. 103 * If it is outside a certain band, ask the cache library to 104 * re-evaluate the cache size. 105 */ 106 static int bitdelta = 0; 107 bitdelta += delta; 108 #define BANDKB (10*1024) /* recheck cache every 10MB change */ 109 if(bitdelta*(int)fs_block_size/1024 > BANDKB || 110 bitdelta*(int)fs_block_size/1024 < -BANDKB) { 111 lmfs_cache_reevaluate(dev); 112 bitdelta = 0; 113 } 114 } 115 116 void lmfs_markdirty(struct buf *bp) 117 { 118 bp->lmfs_flags |= VMMC_DIRTY; 119 } 120 121 void lmfs_markclean(struct buf *bp) 122 { 123 bp->lmfs_flags &= ~VMMC_DIRTY; 124 } 125 126 int lmfs_isclean(struct buf *bp) 127 { 128 return !(bp->lmfs_flags & VMMC_DIRTY); 129 } 130 131 dev_t lmfs_dev(struct buf *bp) 132 { 133 return bp->lmfs_dev; 134 } 135 136 int lmfs_bytes(struct buf *bp) 137 { 138 return bp->lmfs_bytes; 139 } 140 141 static void free_unused_blocks(void) 142 { 143 struct buf *bp; 144 145 int freed = 0, bytes = 0; 146 printf("libminixfs: freeing; %d blocks in use\n", bufs_in_use); 147 for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 148 if(bp->lmfs_bytes > 0 && bp->lmfs_count == 0) { 149 freed++; 150 bytes += bp->lmfs_bytes; 151 freeblock(bp); 152 } 153 } 154 printf("libminixfs: freeing; %d blocks, %d bytes\n", freed, bytes); 155 } 156 157 static void lmfs_alloc_block(struct buf *bp) 158 { 159 int len; 160 ASSERT(!bp->data); 161 ASSERT(bp->lmfs_bytes == 0); 162 163 len = roundup(fs_block_size, PAGE_SIZE); 164 165 if((bp->data = mmap(0, fs_block_size, 166 PROT_READ|PROT_WRITE, MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { 167 free_unused_blocks(); 168 if((bp->data = mmap(0, fs_block_size, PROT_READ|PROT_WRITE, 169 MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { 170 panic("libminixfs: could not allocate block"); 171 } 172 } 173 assert(bp->data); 174 bp->lmfs_bytes = fs_block_size; 175 bp->lmfs_needsetcache = 1; 176 } 177 178 /*===========================================================================* 179 * lmfs_get_block * 180 *===========================================================================*/ 181 struct buf *lmfs_get_block(register dev_t dev, register block_t block, 182 int only_search) 183 { 184 return lmfs_get_block_ino(dev, block, only_search, VMC_NO_INODE, 0); 185 } 186 187 static void munmap_t(void *a, int len) 188 { 189 vir_bytes av = (vir_bytes) a; 190 assert(a); 191 assert(a != MAP_FAILED); 192 assert(len > 0); 193 assert(!(av % PAGE_SIZE)); 194 195 len = roundup(len, PAGE_SIZE); 196 197 assert(!(len % PAGE_SIZE)); 198 199 if(munmap(a, len) < 0) 200 panic("libminixfs cache: munmap failed"); 201 } 202 203 static void raisecount(struct buf *bp) 204 { 205 assert(bufs_in_use >= 0); 206 ASSERT(bp->lmfs_count >= 0); 207 bp->lmfs_count++; 208 if(bp->lmfs_count == 1) bufs_in_use++; 209 assert(bufs_in_use > 0); 210 } 211 212 static void lowercount(struct buf *bp) 213 { 214 assert(bufs_in_use > 0); 215 ASSERT(bp->lmfs_count > 0); 216 bp->lmfs_count--; 217 if(bp->lmfs_count == 0) bufs_in_use--; 218 assert(bufs_in_use >= 0); 219 } 220 221 static void freeblock(struct buf *bp) 222 { 223 ASSERT(bp->lmfs_count == 0); 224 /* If the block taken is dirty, make it clean by writing it to the disk. 225 * Avoid hysteresis by flushing all other dirty blocks for the same device. 226 */ 227 if (bp->lmfs_dev != NO_DEV) { 228 if (!lmfs_isclean(bp)) lmfs_flushdev(bp->lmfs_dev); 229 assert(bp->lmfs_bytes == fs_block_size); 230 bp->lmfs_dev = NO_DEV; 231 } 232 233 /* Fill in block's parameters and add it to the hash chain where it goes. */ 234 MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ 235 if(bp->lmfs_bytes > 0) { 236 assert(bp->data); 237 munmap_t(bp->data, bp->lmfs_bytes); 238 bp->lmfs_bytes = 0; 239 bp->data = NULL; 240 } else assert(!bp->data); 241 } 242 243 /*===========================================================================* 244 * lmfs_get_block_ino * 245 *===========================================================================*/ 246 struct buf *lmfs_get_block_ino(dev_t dev, block_t block, int only_search, 247 ino_t ino, u64_t ino_off) 248 { 249 /* Check to see if the requested block is in the block cache. If so, return 250 * a pointer to it. If not, evict some other block and fetch it (unless 251 * 'only_search' is 1). All the blocks in the cache that are not in use 252 * are linked together in a chain, with 'front' pointing to the least recently 253 * used block and 'rear' to the most recently used block. If 'only_search' is 254 * 1, the block being requested will be overwritten in its entirety, so it is 255 * only necessary to see if it is in the cache; if it is not, any free buffer 256 * will do. It is not necessary to actually read the block in from disk. 257 * If 'only_search' is PREFETCH, the block need not be read from the disk, 258 * and the device is not to be marked on the block, so callers can tell if 259 * the block returned is valid. 260 * In addition to the LRU chain, there is also a hash chain to link together 261 * blocks whose block numbers end with the same bit strings, for fast lookup. 262 */ 263 264 int b; 265 static struct buf *bp; 266 u64_t dev_off = (u64_t) block * fs_block_size; 267 struct buf *prev_ptr; 268 269 assert(buf_hash); 270 assert(buf); 271 assert(nr_bufs > 0); 272 273 ASSERT(fs_block_size > 0); 274 275 assert(dev != NO_DEV); 276 277 if((ino_off % fs_block_size)) { 278 279 printf("cache: unaligned lmfs_get_block_ino ino_off %llu\n", 280 ino_off); 281 util_stacktrace(); 282 } 283 284 /* Search the hash chain for (dev, block). */ 285 b = BUFHASH(block); 286 bp = buf_hash[b]; 287 while (bp != NULL) { 288 if (bp->lmfs_blocknr == block && bp->lmfs_dev == dev) { 289 if(bp->lmfs_flags & VMMC_EVICTED) { 290 /* We had it but VM evicted it; invalidate it. */ 291 ASSERT(bp->lmfs_count == 0); 292 ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); 293 ASSERT(!(bp->lmfs_flags & VMMC_DIRTY)); 294 bp->lmfs_dev = NO_DEV; 295 bp->lmfs_bytes = 0; 296 bp->data = NULL; 297 break; 298 } 299 /* Block needed has been found. */ 300 if (bp->lmfs_count == 0) { 301 rm_lru(bp); 302 ASSERT(bp->lmfs_needsetcache == 0); 303 ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); 304 bp->lmfs_flags |= VMMC_BLOCK_LOCKED; 305 } 306 raisecount(bp); 307 ASSERT(bp->lmfs_bytes == fs_block_size); 308 ASSERT(bp->lmfs_dev == dev); 309 ASSERT(bp->lmfs_dev != NO_DEV); 310 ASSERT(bp->lmfs_flags & VMMC_BLOCK_LOCKED); 311 ASSERT(bp->data); 312 313 if(ino != VMC_NO_INODE) { 314 if(bp->lmfs_inode == VMC_NO_INODE 315 || bp->lmfs_inode != ino 316 || bp->lmfs_inode_offset != ino_off) { 317 bp->lmfs_inode = ino; 318 bp->lmfs_inode_offset = ino_off; 319 bp->lmfs_needsetcache = 1; 320 } 321 } 322 323 return(bp); 324 } else { 325 /* This block is not the one sought. */ 326 bp = bp->lmfs_hash; /* move to next block on hash chain */ 327 } 328 } 329 330 /* Desired block is not on available chain. Find a free block to use. */ 331 if(bp) { 332 ASSERT(bp->lmfs_flags & VMMC_EVICTED); 333 } else { 334 if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs); 335 } 336 assert(bp); 337 338 rm_lru(bp); 339 340 /* Remove the block that was just taken from its hash chain. */ 341 b = BUFHASH(bp->lmfs_blocknr); 342 prev_ptr = buf_hash[b]; 343 if (prev_ptr == bp) { 344 buf_hash[b] = bp->lmfs_hash; 345 } else { 346 /* The block just taken is not on the front of its hash chain. */ 347 while (prev_ptr->lmfs_hash != NULL) 348 if (prev_ptr->lmfs_hash == bp) { 349 prev_ptr->lmfs_hash = bp->lmfs_hash; /* found it */ 350 break; 351 } else { 352 prev_ptr = prev_ptr->lmfs_hash; /* keep looking */ 353 } 354 } 355 356 freeblock(bp); 357 358 bp->lmfs_inode = ino; 359 bp->lmfs_inode_offset = ino_off; 360 361 bp->lmfs_flags = VMMC_BLOCK_LOCKED; 362 bp->lmfs_needsetcache = 0; 363 bp->lmfs_dev = dev; /* fill in device number */ 364 bp->lmfs_blocknr = block; /* fill in block number */ 365 ASSERT(bp->lmfs_count == 0); 366 raisecount(bp); 367 b = BUFHASH(bp->lmfs_blocknr); 368 bp->lmfs_hash = buf_hash[b]; 369 370 buf_hash[b] = bp; /* add to hash list */ 371 372 assert(dev != NO_DEV); 373 374 /* Block is not found in our cache, but we do want it 375 * if it's in the vm cache. 376 */ 377 assert(!bp->data); 378 assert(!bp->lmfs_bytes); 379 if(vmcache) { 380 if((bp->data = vm_map_cacheblock(dev, dev_off, ino, ino_off, 381 &bp->lmfs_flags, fs_block_size)) != MAP_FAILED) { 382 bp->lmfs_bytes = fs_block_size; 383 ASSERT(!bp->lmfs_needsetcache); 384 return bp; 385 } 386 } 387 bp->data = NULL; 388 389 /* Not in the cache; reserve memory for its contents. */ 390 391 lmfs_alloc_block(bp); 392 393 assert(bp->data); 394 395 if(only_search == PREFETCH) { 396 /* PREFETCH: don't do i/o. */ 397 bp->lmfs_dev = NO_DEV; 398 } else if (only_search == NORMAL) { 399 read_block(bp); 400 } else if(only_search == NO_READ) { 401 /* This block will be overwritten by new contents. */ 402 } else 403 panic("unexpected only_search value: %d", only_search); 404 405 assert(bp->data); 406 407 return(bp); /* return the newly acquired block */ 408 } 409 410 /*===========================================================================* 411 * lmfs_put_block * 412 *===========================================================================*/ 413 void lmfs_put_block( 414 struct buf *bp, /* pointer to the buffer to be released */ 415 int block_type /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */ 416 ) 417 { 418 /* Return a block to the list of available blocks. Depending on 'block_type' 419 * it may be put on the front or rear of the LRU chain. Blocks that are 420 * expected to be needed again shortly (e.g., partially full data blocks) 421 * go on the rear; blocks that are unlikely to be needed again shortly 422 * (e.g., full data blocks) go on the front. Blocks whose loss can hurt 423 * the integrity of the file system (e.g., inode blocks) are written to 424 * disk immediately if they are dirty. 425 */ 426 dev_t dev; 427 off_t dev_off; 428 int r; 429 430 if (bp == NULL) return; /* it is easier to check here than in caller */ 431 432 dev = bp->lmfs_dev; 433 434 dev_off = (off_t) bp->lmfs_blocknr * fs_block_size; 435 436 lowercount(bp); 437 if (bp->lmfs_count != 0) return; /* block is still in use */ 438 439 /* Put this block back on the LRU chain. */ 440 if (dev == DEV_RAM || (block_type & ONE_SHOT)) { 441 /* Block probably won't be needed quickly. Put it on front of chain. 442 * It will be the next block to be evicted from the cache. 443 */ 444 bp->lmfs_prev = NULL; 445 bp->lmfs_next = front; 446 if (front == NULL) 447 rear = bp; /* LRU chain was empty */ 448 else 449 front->lmfs_prev = bp; 450 front = bp; 451 } 452 else { 453 /* Block probably will be needed quickly. Put it on rear of chain. 454 * It will not be evicted from the cache for a long time. 455 */ 456 bp->lmfs_prev = rear; 457 bp->lmfs_next = NULL; 458 if (rear == NULL) 459 front = bp; 460 else 461 rear->lmfs_next = bp; 462 rear = bp; 463 } 464 465 assert(bp->lmfs_flags & VMMC_BLOCK_LOCKED); 466 bp->lmfs_flags &= ~VMMC_BLOCK_LOCKED; 467 468 /* block has sensible content - if necesary, identify it to VM */ 469 if(vmcache && bp->lmfs_needsetcache && dev != NO_DEV) { 470 if((r=vm_set_cacheblock(bp->data, dev, dev_off, 471 bp->lmfs_inode, bp->lmfs_inode_offset, 472 &bp->lmfs_flags, fs_block_size, 0)) != OK) { 473 if(r == ENOSYS) { 474 printf("libminixfs: ENOSYS, disabling VM calls\n"); 475 vmcache = 0; 476 } else { 477 panic("libminixfs: setblock of %p dev 0x%llx off " 478 "0x%llx failed\n", bp->data, dev, dev_off); 479 } 480 } 481 } 482 bp->lmfs_needsetcache = 0; 483 484 } 485 486 void lmfs_cache_reevaluate(dev_t dev) 487 { 488 if(bufs_in_use == 0 && dev != NO_DEV) { 489 /* if the cache isn't in use any more, we could resize it. */ 490 cache_heuristic_check(major(dev)); 491 } 492 } 493 494 /*===========================================================================* 495 * read_block * 496 *===========================================================================*/ 497 static void read_block( 498 struct buf *bp /* buffer pointer */ 499 ) 500 { 501 /* Read or write a disk block. This is the only routine in which actual disk 502 * I/O is invoked. If an error occurs, a message is printed here, but the error 503 * is not reported to the caller. If the error occurred while purging a block 504 * from the cache, it is not clear what the caller could do about it anyway. 505 */ 506 int r, op_failed; 507 off_t pos; 508 dev_t dev = bp->lmfs_dev; 509 510 op_failed = 0; 511 512 assert(dev != NO_DEV); 513 514 ASSERT(bp->lmfs_bytes == fs_block_size); 515 ASSERT(fs_block_size > 0); 516 517 pos = (off_t)bp->lmfs_blocknr * fs_block_size; 518 if(fs_block_size > PAGE_SIZE) { 519 #define MAXPAGES 20 520 vir_bytes blockrem, vaddr = (vir_bytes) bp->data; 521 int p = 0; 522 static iovec_t iovec[MAXPAGES]; 523 blockrem = fs_block_size; 524 while(blockrem > 0) { 525 vir_bytes chunk = blockrem >= PAGE_SIZE ? PAGE_SIZE : blockrem; 526 iovec[p].iov_addr = vaddr; 527 iovec[p].iov_size = chunk; 528 vaddr += chunk; 529 blockrem -= chunk; 530 p++; 531 } 532 r = bdev_gather(dev, pos, iovec, p, BDEV_NOFLAGS); 533 } else { 534 r = bdev_read(dev, pos, bp->data, fs_block_size, 535 BDEV_NOFLAGS); 536 } 537 if (r < 0) { 538 printf("fs cache: I/O error on device %d/%d, block %u\n", 539 major(dev), minor(dev), bp->lmfs_blocknr); 540 op_failed = 1; 541 } else if (r != (ssize_t) fs_block_size) { 542 r = END_OF_FILE; 543 op_failed = 1; 544 } 545 546 if (op_failed) { 547 bp->lmfs_dev = NO_DEV; /* invalidate block */ 548 549 /* Report read errors to interested parties. */ 550 rdwt_err = r; 551 } 552 553 } 554 555 /*===========================================================================* 556 * lmfs_invalidate * 557 *===========================================================================*/ 558 void lmfs_invalidate( 559 dev_t device /* device whose blocks are to be purged */ 560 ) 561 { 562 /* Remove all the blocks belonging to some device from the cache. */ 563 564 register struct buf *bp; 565 566 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 567 if (bp->lmfs_dev == device) { 568 assert(bp->data); 569 assert(bp->lmfs_bytes > 0); 570 munmap_t(bp->data, bp->lmfs_bytes); 571 bp->lmfs_dev = NO_DEV; 572 bp->lmfs_bytes = 0; 573 bp->data = NULL; 574 } 575 } 576 577 vm_clear_cache(device); 578 } 579 580 /*===========================================================================* 581 * lmfs_flushdev * 582 *===========================================================================*/ 583 void lmfs_flushdev(dev_t dev) 584 { 585 /* Flush all dirty blocks for one device. */ 586 587 register struct buf *bp; 588 static struct buf **dirty; /* static so it isn't on stack */ 589 static unsigned int dirtylistsize = 0; 590 int ndirty; 591 592 if(dirtylistsize != nr_bufs) { 593 if(dirtylistsize > 0) { 594 assert(dirty != NULL); 595 free(dirty); 596 } 597 if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs))) 598 panic("couldn't allocate dirty buf list"); 599 dirtylistsize = nr_bufs; 600 } 601 602 for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) { 603 if (!lmfs_isclean(bp) && bp->lmfs_dev == dev) { 604 dirty[ndirty++] = bp; 605 } 606 } 607 608 lmfs_rw_scattered(dev, dirty, ndirty, WRITING); 609 } 610 611 /*===========================================================================* 612 * lmfs_rw_scattered * 613 *===========================================================================*/ 614 void lmfs_rw_scattered( 615 dev_t dev, /* major-minor device number */ 616 struct buf **bufq, /* pointer to array of buffers */ 617 int bufqsize, /* number of buffers */ 618 int rw_flag /* READING or WRITING */ 619 ) 620 { 621 /* Read or write scattered data from a device. */ 622 623 register struct buf *bp; 624 int gap; 625 register int i; 626 register iovec_t *iop; 627 static iovec_t iovec[NR_IOREQS]; 628 off_t pos; 629 int iov_per_block; 630 unsigned int start_in_use = bufs_in_use, start_bufqsize = bufqsize; 631 632 assert(bufqsize >= 0); 633 if(bufqsize == 0) return; 634 635 /* for READING, check all buffers on the list are obtained and held 636 * (count > 0) 637 */ 638 if (rw_flag == READING) { 639 for(i = 0; i < bufqsize; i++) { 640 assert(bufq[i] != NULL); 641 assert(bufq[i]->lmfs_count > 0); 642 } 643 644 /* therefore they are all 'in use' and must be at least this many */ 645 assert(start_in_use >= start_bufqsize); 646 } 647 648 assert(dev != NO_DEV); 649 assert(fs_block_size > 0); 650 iov_per_block = roundup(fs_block_size, PAGE_SIZE) / PAGE_SIZE; 651 assert(iov_per_block < NR_IOREQS); 652 653 /* (Shell) sort buffers on lmfs_blocknr. */ 654 gap = 1; 655 do 656 gap = 3 * gap + 1; 657 while (gap <= bufqsize); 658 while (gap != 1) { 659 int j; 660 gap /= 3; 661 for (j = gap; j < bufqsize; j++) { 662 for (i = j - gap; 663 i >= 0 && bufq[i]->lmfs_blocknr > bufq[i + gap]->lmfs_blocknr; 664 i -= gap) { 665 bp = bufq[i]; 666 bufq[i] = bufq[i + gap]; 667 bufq[i + gap] = bp; 668 } 669 } 670 } 671 672 /* Set up I/O vector and do I/O. The result of bdev I/O is OK if everything 673 * went fine, otherwise the error code for the first failed transfer. 674 */ 675 while (bufqsize > 0) { 676 int nblocks = 0, niovecs = 0; 677 int r; 678 for (iop = iovec; nblocks < bufqsize; nblocks++) { 679 int p; 680 vir_bytes vdata, blockrem; 681 bp = bufq[nblocks]; 682 if (bp->lmfs_blocknr != (block_t) bufq[0]->lmfs_blocknr + nblocks) 683 break; 684 if(niovecs >= NR_IOREQS-iov_per_block) break; 685 vdata = (vir_bytes) bp->data; 686 blockrem = fs_block_size; 687 for(p = 0; p < iov_per_block; p++) { 688 vir_bytes chunk = blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE; 689 iop->iov_addr = vdata; 690 iop->iov_size = chunk; 691 vdata += PAGE_SIZE; 692 blockrem -= chunk; 693 iop++; 694 niovecs++; 695 } 696 assert(p == iov_per_block); 697 assert(blockrem == 0); 698 } 699 700 assert(nblocks > 0); 701 assert(niovecs > 0); 702 703 pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; 704 if (rw_flag == READING) 705 r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); 706 else 707 r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); 708 709 /* Harvest the results. The driver may have returned an error, or it 710 * may have done less than what we asked for. 711 */ 712 if (r < 0) { 713 printf("fs cache: I/O error %d on device %d/%d, block %u\n", 714 r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); 715 } 716 for (i = 0; i < nblocks; i++) { 717 bp = bufq[i]; 718 if (r < (ssize_t) fs_block_size) { 719 /* Transfer failed. */ 720 if (i == 0) { 721 bp->lmfs_dev = NO_DEV; /* Invalidate block */ 722 } 723 break; 724 } 725 if (rw_flag == READING) { 726 bp->lmfs_dev = dev; /* validate block */ 727 lmfs_put_block(bp, PARTIAL_DATA_BLOCK); 728 } else { 729 MARKCLEAN(bp); 730 } 731 r -= fs_block_size; 732 } 733 734 bufq += i; 735 bufqsize -= i; 736 737 if (rw_flag == READING) { 738 /* Don't bother reading more than the device is willing to 739 * give at this time. Don't forget to release those extras. 740 */ 741 while (bufqsize > 0) { 742 lmfs_put_block(*bufq++, PARTIAL_DATA_BLOCK); 743 bufqsize--; 744 } 745 } 746 if (rw_flag == WRITING && i == 0) { 747 /* We're not making progress, this means we might keep 748 * looping. Buffers remain dirty if un-written. Buffers are 749 * lost if invalidate()d or LRU-removed while dirty. This 750 * is better than keeping unwritable blocks around forever.. 751 */ 752 break; 753 } 754 } 755 756 if(rw_flag == READING) { 757 assert(start_in_use >= start_bufqsize); 758 759 /* READING callers assume all bufs are released. */ 760 assert(start_in_use - start_bufqsize == bufs_in_use); 761 } 762 } 763 764 /*===========================================================================* 765 * rm_lru * 766 *===========================================================================*/ 767 static void rm_lru(struct buf *bp) 768 { 769 /* Remove a block from its LRU chain. */ 770 struct buf *next_ptr, *prev_ptr; 771 772 next_ptr = bp->lmfs_next; /* successor on LRU chain */ 773 prev_ptr = bp->lmfs_prev; /* predecessor on LRU chain */ 774 if (prev_ptr != NULL) 775 prev_ptr->lmfs_next = next_ptr; 776 else 777 front = next_ptr; /* this block was at front of chain */ 778 779 if (next_ptr != NULL) 780 next_ptr->lmfs_prev = prev_ptr; 781 else 782 rear = prev_ptr; /* this block was at rear of chain */ 783 } 784 785 /*===========================================================================* 786 * cache_resize * 787 *===========================================================================*/ 788 static void cache_resize(unsigned int blocksize, unsigned int bufs) 789 { 790 struct buf *bp; 791 792 assert(blocksize > 0); 793 assert(bufs >= MINBUFS); 794 795 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) 796 if(bp->lmfs_count != 0) panic("change blocksize with buffer in use"); 797 798 lmfs_buf_pool(bufs); 799 800 fs_block_size = blocksize; 801 } 802 803 static void cache_heuristic_check(int major) 804 { 805 int bufs, d; 806 u64_t btotal, bfree, bused; 807 808 fs_blockstats(&btotal, &bfree, &bused); 809 810 bufs = fs_bufs_heuristic(10, btotal, bfree, 811 fs_block_size, major); 812 813 /* set the cache to the new heuristic size if the new one 814 * is more than 10% off from the current one. 815 */ 816 d = bufs-nr_bufs; 817 if(d < 0) d = -d; 818 if(d*100/nr_bufs > 10) { 819 cache_resize(fs_block_size, bufs); 820 } 821 } 822 823 /*===========================================================================* 824 * lmfs_set_blocksize * 825 *===========================================================================*/ 826 void lmfs_set_blocksize(int new_block_size, int major) 827 { 828 cache_resize(new_block_size, MINBUFS); 829 cache_heuristic_check(major); 830 831 /* Decide whether to use seconday cache or not. 832 * Only do this if 833 * - it's available, and 834 * - use of it hasn't been disabled for this fs, and 835 * - our main FS device isn't a memory device 836 */ 837 838 vmcache = 0; 839 840 if(may_use_vmcache && !(new_block_size % PAGE_SIZE)) 841 vmcache = 1; 842 } 843 844 /*===========================================================================* 845 * lmfs_buf_pool * 846 *===========================================================================*/ 847 void lmfs_buf_pool(int new_nr_bufs) 848 { 849 /* Initialize the buffer pool. */ 850 register struct buf *bp; 851 852 assert(new_nr_bufs >= MINBUFS); 853 854 if(nr_bufs > 0) { 855 assert(buf); 856 lmfs_flushall(); 857 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 858 if(bp->data) { 859 assert(bp->lmfs_bytes > 0); 860 munmap_t(bp->data, bp->lmfs_bytes); 861 } 862 } 863 } 864 865 if(buf) 866 free(buf); 867 868 if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs))) 869 panic("couldn't allocate buf list (%d)", new_nr_bufs); 870 871 if(buf_hash) 872 free(buf_hash); 873 if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs))) 874 panic("couldn't allocate buf hash list (%d)", new_nr_bufs); 875 876 nr_bufs = new_nr_bufs; 877 878 bufs_in_use = 0; 879 front = &buf[0]; 880 rear = &buf[nr_bufs - 1]; 881 882 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 883 bp->lmfs_blocknr = NO_BLOCK; 884 bp->lmfs_dev = NO_DEV; 885 bp->lmfs_next = bp + 1; 886 bp->lmfs_prev = bp - 1; 887 bp->data = NULL; 888 bp->lmfs_bytes = 0; 889 } 890 front->lmfs_prev = NULL; 891 rear->lmfs_next = NULL; 892 893 for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->lmfs_hash = bp->lmfs_next; 894 buf_hash[0] = front; 895 } 896 897 int lmfs_bufs_in_use(void) 898 { 899 return bufs_in_use; 900 } 901 902 int lmfs_nr_bufs(void) 903 { 904 return nr_bufs; 905 } 906 907 void lmfs_flushall(void) 908 { 909 struct buf *bp; 910 for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) 911 if(bp->lmfs_dev != NO_DEV && !lmfs_isclean(bp)) 912 lmfs_flushdev(bp->lmfs_dev); 913 } 914 915 int lmfs_fs_block_size(void) 916 { 917 return fs_block_size; 918 } 919 920 void lmfs_may_use_vmcache(int ok) 921 { 922 may_use_vmcache = ok; 923 } 924 925 void lmfs_reset_rdwt_err(void) 926 { 927 rdwt_err = OK; 928 } 929 930 int lmfs_rdwt_err(void) 931 { 932 return rdwt_err; 933 } 934