1 #include "fs.h" 2 #include <stddef.h> 3 #include <string.h> 4 #include <stdlib.h> 5 #include "buf.h" 6 #include "inode.h" 7 #include "super.h" 8 #include <sys/param.h> 9 #include <sys/dirent.h> 10 #include <assert.h> 11 12 13 static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t 14 position, unsigned bytes_ahead); 15 static int rw_chunk(struct inode *rip, u64_t position, unsigned off, 16 size_t chunk, unsigned left, int call, struct fsdriver_data *data, 17 unsigned buf_off, unsigned int block_size, int *completed); 18 19 20 /*===========================================================================* 21 * fs_readwrite * 22 *===========================================================================*/ 23 ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes, 24 off_t position, int call) 25 { 26 int r; 27 int regular; 28 off_t f_size, bytes_left; 29 size_t off, cum_io, block_size, chunk; 30 mode_t mode_word; 31 int completed; 32 struct inode *rip; 33 34 r = OK; 35 36 /* Find the inode referred */ 37 if ((rip = find_inode(fs_dev, ino_nr)) == NULL) 38 return(EINVAL); 39 40 mode_word = rip->i_mode & I_TYPE; 41 regular = (mode_word == I_REGULAR); 42 43 /* Determine blocksize */ 44 block_size = rip->i_sp->s_block_size; 45 f_size = rip->i_size; 46 47 /* If this is file i/o, check we can write */ 48 if (call == FSC_WRITE) { 49 if(rip->i_sp->s_rd_only) 50 return EROFS; 51 52 /* Check in advance to see if file will grow too big. */ 53 if (position > (off_t) (rip->i_sp->s_max_size - nrbytes)) 54 return(EFBIG); 55 56 /* Clear the zone containing present EOF if hole about 57 * to be created. This is necessary because all unwritten 58 * blocks prior to the EOF must read as zeros. 59 */ 60 if(position > f_size) clear_zone(rip, f_size, 0); 61 } 62 63 cum_io = 0; 64 /* Split the transfer into chunks that don't span two blocks. */ 65 while (nrbytes > 0) { 66 off = ((unsigned int) position) % block_size; /* offset in blk*/ 67 chunk = block_size - off; 68 if (chunk > nrbytes) 69 chunk = nrbytes; 70 71 if (call != FSC_WRITE) { 72 bytes_left = f_size - position; 73 if (position >= f_size) break; /* we are beyond EOF */ 74 if (chunk > (unsigned int) bytes_left) chunk = bytes_left; 75 } 76 77 /* Read or write 'chunk' bytes. */ 78 r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk, 79 nrbytes, call, data, cum_io, block_size, &completed); 80 81 if (r != OK) break; 82 83 /* Update counters and pointers. */ 84 nrbytes -= chunk; /* bytes yet to be read */ 85 cum_io += chunk; /* bytes read so far */ 86 position += (off_t) chunk; /* position within the file */ 87 } 88 89 /* On write, update file size and access time. */ 90 if (call == FSC_WRITE) { 91 if (regular || mode_word == I_DIRECTORY) { 92 if (position > f_size) rip->i_size = position; 93 } 94 } 95 96 rip->i_seek = NO_SEEK; 97 98 if (r != OK) 99 return r; 100 101 /* even on a ROFS, writing to a device node on it is fine, 102 * just don't update the inode stats for it. And dito for reading. 103 */ 104 if (!rip->i_sp->s_rd_only) { 105 if (call == FSC_READ) rip->i_update |= ATIME; 106 if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME; 107 IN_MARKDIRTY(rip); /* inode is thus now dirty */ 108 } 109 110 return cum_io; 111 } 112 113 114 /*===========================================================================* 115 * rw_chunk * 116 *===========================================================================*/ 117 static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off, 118 block_size, completed) 119 register struct inode *rip; /* pointer to inode for file to be rd/wr */ 120 u64_t position; /* position within file to read or write */ 121 unsigned off; /* off within the current block */ 122 size_t chunk; /* number of bytes to read or write */ 123 unsigned left; /* max number of bytes wanted after position */ 124 int call; /* FSC_READ, FSC_WRITE, or FSC_PEEK */ 125 struct fsdriver_data *data; /* structure for (remote) user buffer */ 126 unsigned buf_off; /* offset in user buffer */ 127 unsigned int block_size; /* block size of FS operating on */ 128 int *completed; /* number of bytes copied */ 129 { 130 /* Read or write (part of) a block. */ 131 struct buf *bp = NULL; 132 register int r = OK; 133 int n; 134 block_t b; 135 dev_t dev; 136 ino_t ino = VMC_NO_INODE; 137 u64_t ino_off = rounddown(position, block_size); 138 139 *completed = 0; 140 141 if (ex64hi(position) != 0) 142 panic("rw_chunk: position too high"); 143 b = read_map(rip, (off_t) ex64lo(position), 0); 144 dev = rip->i_dev; 145 ino = rip->i_num; 146 assert(ino != VMC_NO_INODE); 147 148 if (b == NO_BLOCK) { 149 if (call == FSC_READ) { 150 /* Reading from a nonexistent block. Must read as all zeros.*/ 151 r = fsdriver_zero(data, buf_off, chunk); 152 if(r != OK) { 153 printf("MFS: fsdriver_zero failed\n"); 154 } 155 return r; 156 } else if (call == FSC_PEEK) { 157 /* Peeking a nonexistent block. Report to VM. */ 158 lmfs_zero_block_ino(dev, ino, ino_off); 159 return OK; 160 } else { 161 /* Writing to a nonexistent block. 162 * Create and enter in inode. 163 */ 164 if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL) 165 return(err_code); 166 } 167 } else if (call != FSC_WRITE) { 168 /* Read and read ahead if convenient. */ 169 bp = rahead(rip, b, position, left); 170 } else { 171 /* Normally an existing block to be partially overwritten is first read 172 * in. However, a full block need not be read in. If it is already in 173 * the cache, acquire it, otherwise just acquire a free buffer. 174 */ 175 n = (chunk == block_size ? NO_READ : NORMAL); 176 if (off == 0 && (off_t) ex64lo(position) >= rip->i_size) 177 n = NO_READ; 178 assert(ino != VMC_NO_INODE); 179 assert(!(ino_off % block_size)); 180 if ((r = lmfs_get_block_ino(&bp, dev, b, n, ino, ino_off)) != OK) 181 panic("MFS: error getting block (%llu,%u): %d", dev, b, r); 182 } 183 184 /* In all cases, bp now points to a valid buffer. */ 185 assert(bp != NULL); 186 187 if (call == FSC_WRITE && chunk != block_size && 188 (off_t) ex64lo(position) >= rip->i_size && off == 0) { 189 zero_block(bp); 190 } 191 192 if (call == FSC_READ) { 193 /* Copy a chunk from the block buffer to user space. */ 194 r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk); 195 } else if (call == FSC_WRITE) { 196 /* Copy a chunk from user space to the block buffer. */ 197 r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk); 198 MARKDIRTY(bp); 199 } 200 201 put_block(bp); 202 203 return(r); 204 } 205 206 207 /*===========================================================================* 208 * read_map * 209 *===========================================================================*/ 210 block_t read_map(rip, position, opportunistic) 211 register struct inode *rip; /* ptr to inode to map from */ 212 off_t position; /* position in file whose blk wanted */ 213 int opportunistic; /* if nonzero, only use cache for metadata */ 214 { 215 /* Given an inode and a position within the corresponding file, locate the 216 * block (not zone) number in which that position is to be found and return it. 217 */ 218 219 struct buf *bp; 220 zone_t z; 221 int scale, boff, index, zind; 222 unsigned int dzones, nr_indirects; 223 block_t b; 224 unsigned long excess, zone, block_pos; 225 int iomode; 226 227 iomode = opportunistic ? PEEK : NORMAL; 228 229 scale = rip->i_sp->s_log_zone_size; /* for block-zone conversion */ 230 block_pos = position/rip->i_sp->s_block_size; /* relative blk # in file */ 231 zone = block_pos >> scale; /* position's zone */ 232 boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */ 233 dzones = rip->i_ndzones; 234 nr_indirects = rip->i_nindirs; 235 236 /* Is 'position' to be found in the inode itself? */ 237 if (zone < dzones) { 238 zind = (int) zone; /* index should be an int */ 239 z = rip->i_zone[zind]; 240 if (z == NO_ZONE) return(NO_BLOCK); 241 b = (block_t) ((z << scale) + boff); 242 return(b); 243 } 244 245 /* It is not in the inode, so it must be single or double indirect. */ 246 excess = zone - dzones; /* first Vx_NR_DZONES don't count */ 247 248 if (excess < nr_indirects) { 249 /* 'position' can be located via the single indirect block. */ 250 z = rip->i_zone[dzones]; 251 } else { 252 /* 'position' can be located via the double indirect block. */ 253 if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK); 254 excess -= nr_indirects; /* single indir doesn't count*/ 255 b = (block_t) z << scale; 256 ASSERT(rip->i_dev != NO_DEV); 257 index = (int) (excess/nr_indirects); 258 if ((unsigned int) index > rip->i_nindirs) 259 return(NO_BLOCK); /* Can't go beyond double indirects */ 260 bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */ 261 if (bp == NULL) 262 return NO_BLOCK; /* peeking failed */ 263 z = rd_indir(bp, index); /* z= zone for single*/ 264 put_block(bp); /* release double ind block */ 265 excess = excess % nr_indirects; /* index into single ind blk */ 266 } 267 268 /* 'z' is zone num for single indirect block; 'excess' is index into it. */ 269 if (z == NO_ZONE) return(NO_BLOCK); 270 b = (block_t) z << scale; /* b is blk # for single ind */ 271 bp = get_block(rip->i_dev, b, iomode); /* get single indirect block */ 272 if (bp == NULL) 273 return NO_BLOCK; /* peeking failed */ 274 z = rd_indir(bp, (int) excess); /* get block pointed to */ 275 put_block(bp); /* release single indir blk */ 276 if (z == NO_ZONE) return(NO_BLOCK); 277 b = (block_t) ((z << scale) + boff); 278 return(b); 279 } 280 281 struct buf *get_block_map(register struct inode *rip, u64_t position) 282 { 283 struct buf *bp; 284 int r, block_size; 285 block_t b = read_map(rip, position, 0); /* get block number */ 286 if(b == NO_BLOCK) 287 return NULL; 288 block_size = get_block_size(rip->i_dev); 289 position = rounddown(position, block_size); 290 assert(rip->i_num != VMC_NO_INODE); 291 if ((r = lmfs_get_block_ino(&bp, rip->i_dev, b, NORMAL, rip->i_num, 292 position)) != OK) 293 panic("MFS: error getting block (%llu,%u): %d", 294 rip->i_dev, b, r); 295 return bp; 296 } 297 298 /*===========================================================================* 299 * rd_indir * 300 *===========================================================================*/ 301 zone_t rd_indir(bp, index) 302 struct buf *bp; /* pointer to indirect block */ 303 int index; /* index into *bp */ 304 { 305 struct super_block *sp; 306 zone_t zone; 307 308 if(bp == NULL) 309 panic("rd_indir() on NULL"); 310 311 sp = &superblock; 312 313 /* read a zone from an indirect block */ 314 assert(sp->s_version == V3); 315 zone = (zone_t) conv4(sp->s_native, (long) b_v2_ind(bp)[index]); 316 317 if (zone != NO_ZONE && 318 (zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) { 319 printf("Illegal zone number %ld in indirect block, index %d\n", 320 (long) zone, index); 321 panic("check file system"); 322 } 323 324 return(zone); 325 } 326 327 /*===========================================================================* 328 * rahead * 329 *===========================================================================*/ 330 static struct buf *rahead(rip, baseblock, position, bytes_ahead) 331 register struct inode *rip; /* pointer to inode for file to be read */ 332 block_t baseblock; /* block at current position */ 333 u64_t position; /* position within file */ 334 unsigned bytes_ahead; /* bytes beyond position for immediate use */ 335 { 336 /* Fetch a block from the cache or the device. If a physical read is 337 * required, prefetch as many more blocks as convenient into the cache. 338 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM. 339 * The device driver may decide it knows better and stop reading at a 340 * cylinder boundary (or after an error). Rw_scattered() puts an optional 341 * flag on all reads to allow this. 342 */ 343 /* Minimum number of blocks to prefetch. */ 344 # define BLOCKS_MINIMUM 32 345 int r, scale, read_q_size; 346 unsigned int blocks_ahead, fragment, block_size; 347 block_t block, blocks_left; 348 off_t ind1_pos; 349 dev_t dev; 350 struct buf *bp; 351 static block64_t read_q[LMFS_MAX_PREFETCH]; 352 u64_t position_running; 353 354 dev = rip->i_dev; 355 assert(dev != NO_DEV); 356 357 block_size = get_block_size(dev); 358 359 block = baseblock; 360 361 fragment = position % block_size; 362 position -= fragment; 363 position_running = position; 364 bytes_ahead += fragment; 365 blocks_ahead = (bytes_ahead + block_size - 1) / block_size; 366 367 r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position); 368 if (r == OK) 369 return(bp); 370 if (r != ENOENT) 371 panic("MFS: error getting block (%llu,%u): %d", dev, block, r); 372 373 /* The best guess for the number of blocks to prefetch: A lot. 374 * It is impossible to tell what the device looks like, so we don't even 375 * try to guess the geometry, but leave it to the driver. 376 * 377 * The floppy driver can read a full track with no rotational delay, and it 378 * avoids reading partial tracks if it can, so handing it enough buffers to 379 * read two tracks is perfect. (Two, because some diskette types have 380 * an odd number of sectors per track, so a block may span tracks.) 381 * 382 * The disk drivers don't try to be smart. With todays disks it is 383 * impossible to tell what the real geometry looks like, so it is best to 384 * read as much as you can. With luck the caching on the drive allows 385 * for a little time to start the next read. 386 * 387 * The current solution below is a bit of a hack, it just reads blocks from 388 * the current file position hoping that more of the file can be found. A 389 * better solution must look at the already available zone pointers and 390 * indirect blocks (but don't call read_map!). 391 */ 392 393 blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) / 394 block_size; 395 396 /* Go for the first indirect block if we are in its neighborhood. */ 397 scale = rip->i_sp->s_log_zone_size; 398 ind1_pos = (off_t) rip->i_ndzones * (block_size << scale); 399 if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) { 400 blocks_ahead++; 401 blocks_left++; 402 } 403 404 /* Read at least the minimum number of blocks, but not after a seek. */ 405 if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK) 406 blocks_ahead = BLOCKS_MINIMUM; 407 408 /* Can't go past end of file. */ 409 if (blocks_ahead > blocks_left) blocks_ahead = blocks_left; 410 411 /* No more than the maximum request. */ 412 if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH; 413 414 read_q_size = 0; 415 416 /* Acquire block buffers. */ 417 for (;;) { 418 block_t thisblock; 419 read_q[read_q_size++] = block; 420 421 if (--blocks_ahead == 0) break; 422 423 block++; 424 position_running += block_size; 425 426 thisblock = read_map(rip, (off_t) ex64lo(position_running), 1); 427 if (thisblock != NO_BLOCK) { 428 r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num, 429 position_running); 430 block = thisblock; 431 } else 432 r = lmfs_get_block(&bp, dev, block, PEEK); 433 434 if (r == OK) { 435 /* Oops, block already in the cache, get out. */ 436 put_block(bp); 437 break; 438 } 439 if (r != ENOENT) 440 panic("MFS: error getting block (%llu,%u): %d", dev, block, r); 441 } 442 lmfs_prefetch(dev, read_q, read_q_size); 443 444 r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position); 445 if (r != OK) 446 panic("MFS: error getting block (%llu,%u): %d", dev, baseblock, r); 447 return bp; 448 } 449 450 451 /*===========================================================================* 452 * fs_getdents * 453 *===========================================================================*/ 454 ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes, 455 off_t *posp) 456 { 457 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + MFS_NAME_MAX + 1) 458 #define GETDENTS_ENTRIES 8 459 static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES]; 460 struct fsdriver_dentry fsdentry; 461 struct inode *rip, *entrip; 462 int r, done; 463 unsigned int block_size, len, type; 464 off_t pos, off, block_pos, new_pos, ent_pos; 465 struct buf *bp; 466 struct direct *dp; 467 char *cp; 468 469 /* Check whether the position is properly aligned */ 470 pos = *posp; 471 if( (unsigned int) pos % DIR_ENTRY_SIZE) 472 return(ENOENT); 473 474 if( (rip = get_inode(fs_dev, ino_nr)) == NULL) 475 return(EINVAL); 476 477 block_size = rip->i_sp->s_block_size; 478 off = (pos % block_size); /* Offset in block */ 479 block_pos = pos - off; 480 done = FALSE; /* Stop processing directory blocks when done is set */ 481 482 fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf, 483 sizeof(getdents_buf)); 484 485 /* The default position for the next request is EOF. If the user's buffer 486 * fills up before EOF, new_pos will be modified. */ 487 new_pos = rip->i_size; 488 489 r = 0; 490 491 for(; block_pos < rip->i_size; block_pos += block_size) { 492 /* Since directories don't have holes, 'bp' cannot be NULL. */ 493 bp = get_block_map(rip, block_pos); /* get a dir block */ 494 assert(bp != NULL); 495 496 /* Search a directory block. */ 497 if (block_pos < pos) 498 dp = &b_dir(bp)[off / DIR_ENTRY_SIZE]; 499 else 500 dp = &b_dir(bp)[0]; 501 for (; dp < &b_dir(bp)[NR_DIR_ENTRIES(block_size)]; dp++) { 502 if (dp->mfs_d_ino == 0) 503 continue; /* Entry is not in use */ 504 505 /* Compute the length of the name */ 506 cp = memchr(dp->mfs_d_name, '\0', sizeof(dp->mfs_d_name)); 507 if (cp == NULL) 508 len = sizeof(dp->mfs_d_name); 509 else 510 len = cp - (dp->mfs_d_name); 511 512 /* Need the position of this entry in the directory */ 513 ent_pos = block_pos + ((char *) dp - (char *) bp->data); 514 515 /* We also need(?) the file type of the target inode. */ 516 if (!(entrip = get_inode(fs_dev, (ino_t) dp->mfs_d_ino))) 517 panic("unexpected get_inode failure"); 518 type = IFTODT(entrip->i_mode); 519 put_inode(entrip); 520 521 /* MFS does not store file types in its directory entries, and 522 * fetching the mode from the inode is seriously expensive. 523 * Userland should always be prepared to receive DT_UNKNOWN. 524 */ 525 r = fsdriver_dentry_add(&fsdentry, (ino_t) dp->mfs_d_ino, 526 dp->mfs_d_name, len, type); 527 528 /* If the user buffer is full, or an error occurred, stop. */ 529 if (r <= 0) { 530 done = TRUE; 531 532 /* Record the position of this entry, it is the 533 * starting point of the next request (unless the 534 * postion is modified with lseek). 535 */ 536 new_pos = ent_pos; 537 break; 538 } 539 } 540 541 put_block(bp); 542 if (done) 543 break; 544 } 545 546 if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) { 547 *posp = new_pos; 548 if(!rip->i_sp->s_rd_only) { 549 rip->i_update |= ATIME; 550 IN_MARKDIRTY(rip); 551 } 552 } 553 554 put_inode(rip); /* release the inode */ 555 return(r); 556 } 557