1 /* Created (MFS based): 2 * February 2010 (Evgeniy Ivanov) 3 */ 4 5 #include "fs.h" 6 #include <stddef.h> 7 #include <string.h> 8 #include <stdlib.h> 9 #include "buf.h" 10 #include "inode.h" 11 #include "super.h" 12 #include <sys/param.h> 13 #include <assert.h> 14 15 16 static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t 17 position, unsigned bytes_ahead); 18 static int rw_chunk(struct inode *rip, u64_t position, unsigned off, 19 size_t chunk, unsigned left, int call, struct fsdriver_data *data, 20 unsigned buf_off, unsigned int block_size, int *completed); 21 22 /*===========================================================================* 23 * fs_readwrite * 24 *===========================================================================*/ 25 ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes, 26 off_t position, int call) 27 { 28 int r; 29 int regular; 30 off_t f_size, bytes_left; 31 size_t off, cum_io, block_size, chunk; 32 mode_t mode_word; 33 int completed; 34 struct inode *rip; 35 36 r = OK; 37 38 /* Find the inode referred */ 39 if ((rip = find_inode(fs_dev, ino_nr)) == NULL) 40 return(EINVAL); 41 42 mode_word = rip->i_mode & I_TYPE; 43 regular = (mode_word == I_REGULAR); 44 45 /* Determine blocksize */ 46 block_size = rip->i_sp->s_block_size; 47 f_size = rip->i_size; 48 if (f_size < 0) f_size = MAX_FILE_POS; 49 50 lmfs_reset_rdwt_err(); 51 52 if (call == FSC_WRITE) { 53 /* Check in advance to see if file will grow too big. */ 54 if (position > (off_t) (rip->i_sp->s_max_size - nrbytes)) 55 return(EFBIG); 56 } 57 58 cum_io = 0; 59 /* Split the transfer into chunks that don't span two blocks. */ 60 while (nrbytes != 0) { 61 off = (unsigned int) (position % block_size);/* offset in blk*/ 62 chunk = block_size - off; 63 if (chunk > nrbytes) 64 chunk = nrbytes; 65 66 if (call == FSC_READ) { 67 bytes_left = f_size - position; 68 if (position >= f_size) break; /* we are beyond EOF */ 69 if (chunk > bytes_left) chunk = (int) bytes_left; 70 } 71 72 /* Read or write 'chunk' bytes. */ 73 r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk, 74 nrbytes, call, data, cum_io, block_size, &completed); 75 76 if (r != OK) break; /* EOF reached */ 77 if (lmfs_rdwt_err() < 0) break; 78 79 /* Update counters and pointers. */ 80 nrbytes -= chunk; /* bytes yet to be read */ 81 cum_io += chunk; /* bytes read so far */ 82 position += (off_t) chunk; /* position within the file */ 83 } 84 85 /* On write, update file size and access time. */ 86 if (call == FSC_WRITE) { 87 if (regular || mode_word == I_DIRECTORY) { 88 if (position > f_size) rip->i_size = position; 89 } 90 } 91 92 rip->i_seek = NO_SEEK; 93 94 if (lmfs_rdwt_err() != OK) r = lmfs_rdwt_err(); /* check for disk error */ 95 if (lmfs_rdwt_err() == END_OF_FILE) r = OK; 96 97 if (r != OK) 98 return r; 99 100 if (call == FSC_READ) rip->i_update |= ATIME; 101 if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME; 102 rip->i_dirt = IN_DIRTY; /* inode is thus now dirty */ 103 104 return(cum_io); 105 } 106 107 108 /*===========================================================================* 109 * rw_chunk * 110 *===========================================================================*/ 111 static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off, 112 block_size, completed) 113 register struct inode *rip; /* pointer to inode for file to be rd/wr */ 114 u64_t position; /* position within file to read or write */ 115 unsigned off; /* off within the current block */ 116 size_t chunk; /* number of bytes to read or write */ 117 unsigned left; /* max number of bytes wanted after position */ 118 int call; /* FSC_READ, FSC_WRITE, or FSC_PEEK */ 119 struct fsdriver_data *data; /* structure for (remote) user buffer */ 120 unsigned buf_off; /* offset in user buffer */ 121 unsigned int block_size; /* block size of FS operating on */ 122 int *completed; /* number of bytes copied */ 123 { 124 /* Read or write (part of) a block. */ 125 126 register struct buf *bp = NULL; 127 register int r = OK; 128 int n; 129 block_t b; 130 dev_t dev; 131 ino_t ino = VMC_NO_INODE; 132 u64_t ino_off = rounddown(position, block_size); 133 134 *completed = 0; 135 136 if (ex64hi(position) != 0) 137 panic("rw_chunk: position too high"); 138 b = read_map(rip, (off_t) ex64lo(position), 0); 139 dev = rip->i_dev; 140 ino = rip->i_num; 141 assert(ino != VMC_NO_INODE); 142 143 if (b == NO_BLOCK) { 144 if (call == FSC_READ) { 145 /* Reading from a nonexistent block. Must read as all zeros.*/ 146 r = fsdriver_zero(data, buf_off, chunk); 147 if(r != OK) { 148 printf("ext2fs: fsdriver_zero failed\n"); 149 } 150 return r; 151 } else { 152 /* Writing to or peeking a nonexistent block. 153 * Create and enter in inode. 154 */ 155 if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL) 156 return(err_code); 157 } 158 } else if (call != FSC_WRITE) { 159 /* Read and read ahead if convenient. */ 160 bp = rahead(rip, b, position, left); 161 } else { 162 /* Normally an existing block to be partially overwritten is first read 163 * in. However, a full block need not be read in. If it is already in 164 * the cache, acquire it, otherwise just acquire a free buffer. 165 */ 166 n = (chunk == block_size ? NO_READ : NORMAL); 167 if (off == 0 && (off_t) ex64lo(position) >= rip->i_size) 168 n = NO_READ; 169 assert(ino != VMC_NO_INODE); 170 assert(!(ino_off % block_size)); 171 bp = lmfs_get_block_ino(dev, b, n, ino, ino_off); 172 } 173 174 /* In all cases, bp now points to a valid buffer. */ 175 if (bp == NULL) 176 panic("bp not valid in rw_chunk, this can't happen"); 177 178 if (call == FSC_WRITE && chunk != block_size && 179 (off_t) ex64lo(position) >= rip->i_size && off == 0) { 180 zero_block(bp); 181 } 182 183 if (call == FSC_READ) { 184 /* Copy a chunk from the block buffer to user space. */ 185 r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk); 186 } else if (call == FSC_WRITE) { 187 /* Copy a chunk from user space to the block buffer. */ 188 r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk); 189 lmfs_markdirty(bp); 190 } 191 192 n = (off + chunk == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK); 193 put_block(bp, n); 194 195 return(r); 196 } 197 198 199 /*===========================================================================* 200 * read_map * 201 *===========================================================================*/ 202 block_t read_map(rip, position, opportunistic) 203 register struct inode *rip; /* ptr to inode to map from */ 204 off_t position; /* position in file whose blk wanted */ 205 int opportunistic; 206 { 207 /* Given an inode and a position within the corresponding file, locate the 208 * block number in which that position is to be found and return it. 209 */ 210 211 struct buf *bp; 212 int mindex; 213 block_t b; 214 unsigned long excess, block_pos; 215 static char first_time = TRUE; 216 static long addr_in_block; 217 static long addr_in_block2; 218 static long doub_ind_s; 219 static long triple_ind_s; 220 static long out_range_s; 221 int iomode = NORMAL; 222 223 if(opportunistic) iomode = PREFETCH; 224 225 if (first_time) { 226 addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES; 227 addr_in_block2 = addr_in_block * addr_in_block; 228 doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block; 229 triple_ind_s = doub_ind_s + addr_in_block2; 230 out_range_s = triple_ind_s + addr_in_block2 * addr_in_block; 231 first_time = FALSE; 232 } 233 234 block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */ 235 236 /* Is 'position' to be found in the inode itself? */ 237 if (block_pos < EXT2_NDIR_BLOCKS) 238 return(rip->i_block[block_pos]); 239 240 /* It is not in the inode, so it must be single, double or triple indirect */ 241 if (block_pos < doub_ind_s) { 242 b = rip->i_block[EXT2_NDIR_BLOCKS]; /* address of single indirect block */ 243 mindex = block_pos - EXT2_NDIR_BLOCKS; 244 } else if (block_pos >= out_range_s) { /* TODO: do we need it? */ 245 return(NO_BLOCK); 246 } else { 247 /* double or triple indirect block. At first if it's triple, 248 * find double indirect block. 249 */ 250 excess = block_pos - doub_ind_s; 251 b = rip->i_block[EXT2_DIND_BLOCK]; 252 if (block_pos >= triple_ind_s) { 253 b = rip->i_block[EXT2_TIND_BLOCK]; 254 if (b == NO_BLOCK) return(NO_BLOCK); 255 bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */ 256 ASSERT(lmfs_dev(bp) != NO_DEV); 257 ASSERT(lmfs_dev(bp) == rip->i_dev); 258 excess = block_pos - triple_ind_s; 259 mindex = excess / addr_in_block2; 260 b = rd_indir(bp, mindex); /* num of double ind block */ 261 put_block(bp, INDIRECT_BLOCK); /* release triple ind block */ 262 excess = excess % addr_in_block2; 263 } 264 if (b == NO_BLOCK) return(NO_BLOCK); 265 bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */ 266 if(opportunistic && lmfs_dev(bp) == NO_DEV) { 267 put_block(bp, INDIRECT_BLOCK); 268 return NO_BLOCK; 269 } 270 ASSERT(lmfs_dev(bp) != NO_DEV); 271 ASSERT(lmfs_dev(bp) == rip->i_dev); 272 mindex = excess / addr_in_block; 273 b = rd_indir(bp, mindex); /* num of single ind block */ 274 put_block(bp, INDIRECT_BLOCK); /* release double ind block */ 275 mindex = excess % addr_in_block; /* index into single ind blk */ 276 } 277 if (b == NO_BLOCK) return(NO_BLOCK); 278 bp = get_block(rip->i_dev, b, iomode); /* get single indirect block */ 279 if(opportunistic && lmfs_dev(bp) == NO_DEV) { 280 put_block(bp, INDIRECT_BLOCK); 281 return NO_BLOCK; 282 } 283 284 ASSERT(lmfs_dev(bp) != NO_DEV); 285 ASSERT(lmfs_dev(bp) == rip->i_dev); 286 b = rd_indir(bp, mindex); 287 put_block(bp, INDIRECT_BLOCK); /* release single ind block */ 288 289 return(b); 290 } 291 292 struct buf *get_block_map(register struct inode *rip, u64_t position) 293 { 294 block_t b = read_map(rip, position, 0); /* get block number */ 295 int block_size = get_block_size(rip->i_dev); 296 if(b == NO_BLOCK) 297 return NULL; 298 position = rounddown(position, block_size); 299 assert(rip->i_num != VMC_NO_INODE); 300 return lmfs_get_block_ino(rip->i_dev, b, NORMAL, rip->i_num, position); 301 } 302 303 /*===========================================================================* 304 * rd_indir * 305 *===========================================================================*/ 306 block_t rd_indir(bp, mindex) 307 struct buf *bp; /* pointer to indirect block */ 308 int mindex; /* index into *bp */ 309 { 310 if (bp == NULL) 311 panic("rd_indir() on NULL"); 312 /* TODO: use conv call */ 313 return conv4(le_CPU, b_ind(bp)[mindex]); 314 } 315 316 317 /*===========================================================================* 318 * rahead * 319 *===========================================================================*/ 320 static struct buf *rahead(rip, baseblock, position, bytes_ahead) 321 register struct inode *rip; /* pointer to inode for file to be read */ 322 block_t baseblock; /* block at current position */ 323 u64_t position; /* position within file */ 324 unsigned bytes_ahead; /* bytes beyond position for immediate use */ 325 { 326 /* Fetch a block from the cache or the device. If a physical read is 327 * required, prefetch as many more blocks as convenient into the cache. 328 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM. 329 * The device driver may decide it knows better and stop reading at a 330 * cylinder boundary (or after an error). Rw_scattered() puts an optional 331 * flag on all reads to allow this. 332 */ 333 /* Minimum number of blocks to prefetch. */ 334 # define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32) 335 int nr_bufs = lmfs_nr_bufs(); 336 int read_q_size; 337 unsigned int blocks_ahead, fragment, block_size; 338 block_t block, blocks_left; 339 off_t ind1_pos; 340 dev_t dev; 341 struct buf *bp = NULL; 342 static unsigned int readqsize = 0; 343 static struct buf **read_q = NULL; 344 u64_t position_running; 345 346 if(readqsize != nr_bufs) { 347 if(readqsize > 0) { 348 assert(read_q != NULL); 349 free(read_q); 350 read_q = NULL; 351 readqsize = 0; 352 } 353 354 assert(readqsize == 0); 355 assert(read_q == NULL); 356 357 if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs))) 358 panic("couldn't allocate read_q"); 359 readqsize = nr_bufs; 360 } 361 362 dev = rip->i_dev; 363 assert(dev != NO_DEV); 364 block_size = get_block_size(dev); 365 366 block = baseblock; 367 368 fragment = position % block_size; 369 position -= fragment; 370 position_running = position; 371 bytes_ahead += fragment; 372 blocks_ahead = (bytes_ahead + block_size - 1) / block_size; 373 374 bp = lmfs_get_block_ino(dev, block, PREFETCH, rip->i_num, position); 375 assert(bp != NULL); 376 if (lmfs_dev(bp) != NO_DEV) return(bp); 377 378 /* The best guess for the number of blocks to prefetch: A lot. 379 * It is impossible to tell what the device looks like, so we don't even 380 * try to guess the geometry, but leave it to the driver. 381 * 382 * The floppy driver can read a full track with no rotational delay, and it 383 * avoids reading partial tracks if it can, so handing it enough buffers to 384 * read two tracks is perfect. (Two, because some diskette types have 385 * an odd number of sectors per track, so a block may span tracks.) 386 * 387 * The disk drivers don't try to be smart. With todays disks it is 388 * impossible to tell what the real geometry looks like, so it is best to 389 * read as much as you can. With luck the caching on the drive allows 390 * for a little time to start the next read. 391 * 392 * The current solution below is a bit of a hack, it just reads blocks from 393 * the current file position hoping that more of the file can be found. A 394 * better solution must look at the already available 395 * indirect blocks (but don't call read_map!). 396 */ 397 398 blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) / 399 block_size; 400 401 /* Go for the first indirect block if we are in its neighborhood. */ 402 ind1_pos = (EXT2_NDIR_BLOCKS) * block_size; 403 if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) { 404 blocks_ahead++; 405 blocks_left++; 406 } 407 408 /* No more than the maximum request. */ 409 if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS; 410 411 /* Read at least the minimum number of blocks, but not after a seek. */ 412 if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK) 413 blocks_ahead = BLOCKS_MINIMUM; 414 415 /* Can't go past end of file. */ 416 if (blocks_ahead > blocks_left) blocks_ahead = blocks_left; 417 418 read_q_size = 0; 419 420 /* Acquire block buffers. */ 421 for (;;) { 422 block_t thisblock; 423 read_q[read_q_size++] = bp; 424 425 if (--blocks_ahead == 0) break; 426 427 /* Don't trash the cache, leave 4 free. */ 428 if (lmfs_bufs_in_use() >= nr_bufs - 4) break; 429 430 block++; 431 position_running += block_size; 432 433 thisblock = read_map(rip, (off_t) ex64lo(position_running), 1); 434 if (thisblock != NO_BLOCK) { 435 bp = lmfs_get_block_ino(dev, thisblock, PREFETCH, rip->i_num, 436 position_running); 437 } else { 438 bp = get_block(dev, block, PREFETCH); 439 } 440 if (lmfs_dev(bp) != NO_DEV) { 441 /* Oops, block already in the cache, get out. */ 442 put_block(bp, FULL_DATA_BLOCK); 443 break; 444 } 445 } 446 lmfs_rw_scattered(dev, read_q, read_q_size, READING); 447 448 return(lmfs_get_block_ino(dev, baseblock, NORMAL, rip->i_num, position)); 449 } 450 451 452 /*===========================================================================* 453 * get_dtype * 454 *===========================================================================*/ 455 static unsigned int get_dtype(struct ext2_disk_dir_desc *dp) 456 { 457 /* Return the type of the file identified by the given directory entry. */ 458 459 if (!HAS_INCOMPAT_FEATURE(superblock, INCOMPAT_FILETYPE)) 460 return DT_UNKNOWN; 461 462 switch (dp->d_file_type) { 463 case EXT2_FT_REG_FILE: return DT_REG; 464 case EXT2_FT_DIR: return DT_DIR; 465 case EXT2_FT_SYMLINK: return DT_LNK; 466 case EXT2_FT_BLKDEV: return DT_BLK; 467 case EXT2_FT_CHRDEV: return DT_CHR; 468 case EXT2_FT_FIFO: return DT_FIFO; 469 default: return DT_UNKNOWN; 470 } 471 } 472 473 /*===========================================================================* 474 * fs_getdents * 475 *===========================================================================*/ 476 ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes, 477 off_t *posp) 478 { 479 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + EXT2_NAME_MAX + 1) 480 #define GETDENTS_ENTRIES 8 481 static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES]; 482 struct fsdriver_dentry fsdentry; 483 struct inode *rip; 484 int r, done; 485 unsigned int block_size, len; 486 off_t pos, off, block_pos, new_pos, ent_pos; 487 struct buf *bp; 488 struct ext2_disk_dir_desc *d_desc; 489 ino_t child_nr; 490 491 /* Check whether the position is properly aligned */ 492 pos = *posp; 493 if ((unsigned int) pos % DIR_ENTRY_ALIGN) 494 return(ENOENT); 495 496 if ((rip = get_inode(fs_dev, ino_nr)) == NULL) 497 return(EINVAL); 498 499 block_size = rip->i_sp->s_block_size; 500 off = (pos % block_size); /* Offset in block */ 501 block_pos = pos - off; 502 done = FALSE; /* Stop processing directory blocks when done is set */ 503 504 fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf, 505 sizeof(getdents_buf)); 506 507 /* The default position for the next request is EOF. If the user's buffer 508 * fills up before EOF, new_pos will be modified. */ 509 new_pos = rip->i_size; 510 511 r = 0; 512 513 for (; block_pos < rip->i_size; block_pos += block_size) { 514 off_t temp_pos = block_pos; 515 /* Since directories don't have holes, 'bp' cannot be NULL. */ 516 bp = get_block_map(rip, block_pos); /* get a dir block */ 517 assert(bp != NULL); 518 assert(bp != NULL); 519 520 /* Search a directory block. */ 521 d_desc = (struct ext2_disk_dir_desc*) &b_data(bp); 522 523 /* we need to seek to entry at off bytes. 524 * when NEXT_DISC_DIR_POS == block_size it's last dentry. 525 */ 526 for (; temp_pos + conv2(le_CPU, d_desc->d_rec_len) <= pos 527 && NEXT_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size; 528 d_desc = NEXT_DISC_DIR_DESC(d_desc)) { 529 temp_pos += conv2(le_CPU, d_desc->d_rec_len); 530 } 531 532 for (; CUR_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size; 533 d_desc = NEXT_DISC_DIR_DESC(d_desc)) { 534 if (d_desc->d_ino == 0) 535 continue; /* Entry is not in use */ 536 537 len = d_desc->d_name_len; 538 assert(len <= NAME_MAX); 539 assert(len <= EXT2_NAME_MAX); 540 541 /* Need the position of this entry in the directory */ 542 ent_pos = block_pos + ((char *)d_desc - b_data(bp)); 543 544 child_nr = (ino_t) conv4(le_CPU, d_desc->d_ino); 545 r = fsdriver_dentry_add(&fsdentry, child_nr, d_desc->d_name, 546 len, get_dtype(d_desc)); 547 548 /* If the user buffer is full, or an error occurred, stop. */ 549 if (r <= 0) { 550 done = TRUE; 551 552 /* Record the position of this entry, it is the 553 * starting point of the next request (unless the 554 * position is modified with lseek). 555 */ 556 new_pos = ent_pos; 557 break; 558 } 559 } 560 561 put_block(bp, DIRECTORY_BLOCK); 562 if (done) 563 break; 564 } 565 566 if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) { 567 *posp = new_pos; 568 rip->i_update |= ATIME; 569 rip->i_dirt = IN_DIRTY; 570 } 571 572 put_inode(rip); /* release the inode */ 573 return(r); 574 } 575