1 /* Created (MFS based): 2 * February 2010 (Evgeniy Ivanov) 3 */ 4 5 #include "fs.h" 6 #include <stddef.h> 7 #include <string.h> 8 #include <stdlib.h> 9 #include "buf.h" 10 #include "inode.h" 11 #include "super.h" 12 #include <sys/param.h> 13 #include <sys/dirent.h> 14 #include <assert.h> 15 16 17 static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t 18 position, unsigned bytes_ahead); 19 static int rw_chunk(struct inode *rip, u64_t position, unsigned off, 20 size_t chunk, unsigned left, int call, struct fsdriver_data *data, 21 unsigned buf_off, unsigned int block_size, int *completed); 22 23 /*===========================================================================* 24 * fs_readwrite * 25 *===========================================================================*/ 26 ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes, 27 off_t position, int call) 28 { 29 int r; 30 int regular; 31 off_t f_size, bytes_left; 32 size_t off, cum_io, block_size, chunk; 33 mode_t mode_word; 34 int completed; 35 struct inode *rip; 36 37 r = OK; 38 39 /* Find the inode referred */ 40 if ((rip = find_inode(fs_dev, ino_nr)) == NULL) 41 return(EINVAL); 42 43 mode_word = rip->i_mode & I_TYPE; 44 regular = (mode_word == I_REGULAR); 45 46 /* Determine blocksize */ 47 block_size = rip->i_sp->s_block_size; 48 f_size = rip->i_size; 49 if (f_size < 0) f_size = MAX_FILE_POS; 50 51 if (call == FSC_WRITE) { 52 /* Check in advance to see if file will grow too big. */ 53 if (position > (off_t) (rip->i_sp->s_max_size - nrbytes)) 54 return(EFBIG); 55 } 56 57 cum_io = 0; 58 /* Split the transfer into chunks that don't span two blocks. */ 59 while (nrbytes != 0) { 60 off = (unsigned int) (position % block_size);/* offset in blk*/ 61 chunk = block_size - off; 62 if (chunk > nrbytes) 63 chunk = nrbytes; 64 65 if (call == FSC_READ) { 66 bytes_left = f_size - position; 67 if (position >= f_size) break; /* we are beyond EOF */ 68 if (chunk > bytes_left) chunk = (int) bytes_left; 69 } 70 71 /* Read or write 'chunk' bytes. */ 72 r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk, 73 nrbytes, call, data, cum_io, block_size, &completed); 74 75 if (r != OK) break; 76 77 /* Update counters and pointers. */ 78 nrbytes -= chunk; /* bytes yet to be read */ 79 cum_io += chunk; /* bytes read so far */ 80 position += (off_t) chunk; /* position within the file */ 81 } 82 83 /* On write, update file size and access time. */ 84 if (call == FSC_WRITE) { 85 if (regular || mode_word == I_DIRECTORY) { 86 if (position > f_size) rip->i_size = position; 87 } 88 } 89 90 rip->i_seek = NO_SEEK; 91 92 if (r != OK) 93 return r; 94 95 if (call == FSC_READ) rip->i_update |= ATIME; 96 if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME; 97 rip->i_dirt = IN_DIRTY; /* inode is thus now dirty */ 98 99 return(cum_io); 100 } 101 102 103 /*===========================================================================* 104 * rw_chunk * 105 *===========================================================================*/ 106 static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off, 107 block_size, completed) 108 register struct inode *rip; /* pointer to inode for file to be rd/wr */ 109 u64_t position; /* position within file to read or write */ 110 unsigned off; /* off within the current block */ 111 size_t chunk; /* number of bytes to read or write */ 112 unsigned left; /* max number of bytes wanted after position */ 113 int call; /* FSC_READ, FSC_WRITE, or FSC_PEEK */ 114 struct fsdriver_data *data; /* structure for (remote) user buffer */ 115 unsigned buf_off; /* offset in user buffer */ 116 unsigned int block_size; /* block size of FS operating on */ 117 int *completed; /* number of bytes copied */ 118 { 119 /* Read or write (part of) a block. */ 120 121 struct buf *bp = NULL; 122 register int r = OK; 123 int n; 124 block_t b; 125 dev_t dev; 126 ino_t ino = VMC_NO_INODE; 127 u64_t ino_off = rounddown(position, block_size); 128 129 *completed = 0; 130 131 if (ex64hi(position) != 0) 132 panic("rw_chunk: position too high"); 133 b = read_map(rip, (off_t) ex64lo(position), 0); 134 dev = rip->i_dev; 135 ino = rip->i_num; 136 assert(ino != VMC_NO_INODE); 137 138 if (b == NO_BLOCK) { 139 if (call == FSC_READ) { 140 /* Reading from a nonexistent block. Must read as all zeros.*/ 141 r = fsdriver_zero(data, buf_off, chunk); 142 if(r != OK) { 143 printf("ext2fs: fsdriver_zero failed\n"); 144 } 145 return r; 146 } else if (call == FSC_PEEK) { 147 /* Peeking a nonexistent block. Report to VM. */ 148 lmfs_zero_block_ino(dev, ino, ino_off); 149 return OK; 150 } else { 151 /* Writing to a nonexistent block. 152 * Create and enter in inode. 153 */ 154 if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL) 155 return(err_code); 156 } 157 } else if (call != FSC_WRITE) { 158 /* Read and read ahead if convenient. */ 159 bp = rahead(rip, b, position, left); 160 } else { 161 /* Normally an existing block to be partially overwritten is first read 162 * in. However, a full block need not be read in. If it is already in 163 * the cache, acquire it, otherwise just acquire a free buffer. 164 */ 165 n = (chunk == block_size ? NO_READ : NORMAL); 166 if (off == 0 && (off_t) ex64lo(position) >= rip->i_size) 167 n = NO_READ; 168 assert(ino != VMC_NO_INODE); 169 assert(!(ino_off % block_size)); 170 if ((r = lmfs_get_block_ino(&bp, dev, b, n, ino, ino_off)) != OK) 171 panic("ext2: error getting block (%llu,%u): %d", dev, b, r); 172 } 173 174 /* In all cases, bp now points to a valid buffer. */ 175 if (bp == NULL) 176 panic("bp not valid in rw_chunk, this can't happen"); 177 178 if (call == FSC_WRITE && chunk != block_size && 179 (off_t) ex64lo(position) >= rip->i_size && off == 0) { 180 zero_block(bp); 181 } 182 183 if (call == FSC_READ) { 184 /* Copy a chunk from the block buffer to user space. */ 185 r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk); 186 } else if (call == FSC_WRITE) { 187 /* Copy a chunk from user space to the block buffer. */ 188 r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk); 189 lmfs_markdirty(bp); 190 } 191 192 put_block(bp); 193 194 return(r); 195 } 196 197 198 /*===========================================================================* 199 * read_map * 200 *===========================================================================*/ 201 block_t read_map(rip, position, opportunistic) 202 register struct inode *rip; /* ptr to inode to map from */ 203 off_t position; /* position in file whose blk wanted */ 204 int opportunistic; 205 { 206 /* Given an inode and a position within the corresponding file, locate the 207 * block number in which that position is to be found and return it. 208 */ 209 210 struct buf *bp; 211 int mindex; 212 block_t b; 213 unsigned long excess, block_pos; 214 static char first_time = TRUE; 215 static long addr_in_block; 216 static long addr_in_block2; 217 static long doub_ind_s; 218 static long triple_ind_s; 219 static long out_range_s; 220 int iomode; 221 222 iomode = opportunistic ? PEEK : NORMAL; 223 224 if (first_time) { 225 addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES; 226 addr_in_block2 = addr_in_block * addr_in_block; 227 doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block; 228 triple_ind_s = doub_ind_s + addr_in_block2; 229 out_range_s = triple_ind_s + addr_in_block2 * addr_in_block; 230 first_time = FALSE; 231 } 232 233 block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */ 234 235 /* Is 'position' to be found in the inode itself? */ 236 if (block_pos < EXT2_NDIR_BLOCKS) 237 return(rip->i_block[block_pos]); 238 239 /* It is not in the inode, so it must be single, double or triple indirect */ 240 if (block_pos < doub_ind_s) { 241 b = rip->i_block[EXT2_NDIR_BLOCKS]; /* address of single indirect block */ 242 mindex = block_pos - EXT2_NDIR_BLOCKS; 243 } else if (block_pos >= out_range_s) { /* TODO: do we need it? */ 244 return(NO_BLOCK); 245 } else { 246 /* double or triple indirect block. At first if it's triple, 247 * find double indirect block. 248 */ 249 excess = block_pos - doub_ind_s; 250 b = rip->i_block[EXT2_DIND_BLOCK]; 251 if (block_pos >= triple_ind_s) { 252 b = rip->i_block[EXT2_TIND_BLOCK]; 253 if (b == NO_BLOCK) return(NO_BLOCK); 254 bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */ 255 excess = block_pos - triple_ind_s; 256 mindex = excess / addr_in_block2; 257 b = rd_indir(bp, mindex); /* num of double ind block */ 258 put_block(bp); /* release triple ind block */ 259 excess = excess % addr_in_block2; 260 } 261 if (b == NO_BLOCK) return(NO_BLOCK); 262 bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */ 263 if (bp == NULL) 264 return NO_BLOCK; /* peeking failed */ 265 mindex = excess / addr_in_block; 266 b = rd_indir(bp, mindex); /* num of single ind block */ 267 put_block(bp); /* release double ind block */ 268 mindex = excess % addr_in_block; /* index into single ind blk */ 269 } 270 if (b == NO_BLOCK) return(NO_BLOCK); 271 bp = get_block(rip->i_dev, b, iomode); /* get single indirect block */ 272 if (bp == NULL) 273 return NO_BLOCK; /* peeking failed */ 274 275 b = rd_indir(bp, mindex); 276 put_block(bp); /* release single ind block */ 277 278 return(b); 279 } 280 281 struct buf *get_block_map(register struct inode *rip, u64_t position) 282 { 283 struct buf *bp; 284 int r, block_size; 285 block_t b = read_map(rip, position, 0); /* get block number */ 286 if(b == NO_BLOCK) 287 return NULL; 288 block_size = get_block_size(rip->i_dev); 289 position = rounddown(position, block_size); 290 assert(rip->i_num != VMC_NO_INODE); 291 if ((r = lmfs_get_block_ino(&bp, rip->i_dev, b, NORMAL, rip->i_num, 292 position)) != OK) 293 panic("ext2: error getting block (%llu,%u): %d", 294 rip->i_dev, b, r); 295 return bp; 296 } 297 298 /*===========================================================================* 299 * rd_indir * 300 *===========================================================================*/ 301 block_t rd_indir(bp, mindex) 302 struct buf *bp; /* pointer to indirect block */ 303 int mindex; /* index into *bp */ 304 { 305 if (bp == NULL) 306 panic("rd_indir() on NULL"); 307 /* TODO: use conv call */ 308 return conv4(le_CPU, b_ind(bp)[mindex]); 309 } 310 311 312 /*===========================================================================* 313 * rahead * 314 *===========================================================================*/ 315 static struct buf *rahead(rip, baseblock, position, bytes_ahead) 316 register struct inode *rip; /* pointer to inode for file to be read */ 317 block_t baseblock; /* block at current position */ 318 u64_t position; /* position within file */ 319 unsigned bytes_ahead; /* bytes beyond position for immediate use */ 320 { 321 /* Fetch a block from the cache or the device. If a physical read is 322 * required, prefetch as many more blocks as convenient into the cache. 323 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM. 324 * The device driver may decide it knows better and stop reading at a 325 * cylinder boundary (or after an error). Rw_scattered() puts an optional 326 * flag on all reads to allow this. 327 */ 328 /* Minimum number of blocks to prefetch. */ 329 # define BLOCKS_MINIMUM 32 330 int r, read_q_size; 331 unsigned int blocks_ahead, fragment, block_size; 332 block_t block, blocks_left; 333 off_t ind1_pos; 334 dev_t dev; 335 struct buf *bp = NULL; 336 static block64_t read_q[LMFS_MAX_PREFETCH]; 337 u64_t position_running; 338 339 dev = rip->i_dev; 340 assert(dev != NO_DEV); 341 block_size = get_block_size(dev); 342 343 block = baseblock; 344 345 fragment = position % block_size; 346 position -= fragment; 347 position_running = position; 348 bytes_ahead += fragment; 349 blocks_ahead = (bytes_ahead + block_size - 1) / block_size; 350 351 r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position); 352 if (r == OK) 353 return(bp); 354 if (r != ENOENT) 355 panic("ext2: error getting block (%llu,%u): %d", dev, block, r); 356 357 /* The best guess for the number of blocks to prefetch: A lot. 358 * It is impossible to tell what the device looks like, so we don't even 359 * try to guess the geometry, but leave it to the driver. 360 * 361 * The floppy driver can read a full track with no rotational delay, and it 362 * avoids reading partial tracks if it can, so handing it enough buffers to 363 * read two tracks is perfect. (Two, because some diskette types have 364 * an odd number of sectors per track, so a block may span tracks.) 365 * 366 * The disk drivers don't try to be smart. With todays disks it is 367 * impossible to tell what the real geometry looks like, so it is best to 368 * read as much as you can. With luck the caching on the drive allows 369 * for a little time to start the next read. 370 * 371 * The current solution below is a bit of a hack, it just reads blocks from 372 * the current file position hoping that more of the file can be found. A 373 * better solution must look at the already available 374 * indirect blocks (but don't call read_map!). 375 */ 376 377 blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) / 378 block_size; 379 380 /* Go for the first indirect block if we are in its neighborhood. */ 381 ind1_pos = (EXT2_NDIR_BLOCKS) * block_size; 382 if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) { 383 blocks_ahead++; 384 blocks_left++; 385 } 386 387 /* Read at least the minimum number of blocks, but not after a seek. */ 388 if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK) 389 blocks_ahead = BLOCKS_MINIMUM; 390 391 /* Can't go past end of file. */ 392 if (blocks_ahead > blocks_left) blocks_ahead = blocks_left; 393 394 /* No more than the maximum request. */ 395 if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH; 396 397 read_q_size = 0; 398 399 /* Acquire block buffers. */ 400 for (;;) { 401 block_t thisblock; 402 read_q[read_q_size++] = block; 403 404 if (--blocks_ahead == 0) break; 405 406 block++; 407 position_running += block_size; 408 409 thisblock = read_map(rip, (off_t) ex64lo(position_running), 1); 410 if (thisblock != NO_BLOCK) { 411 r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num, 412 position_running); 413 block = thisblock; 414 } else 415 r = lmfs_get_block(&bp, dev, block, PEEK); 416 417 if (r == OK) { 418 /* Oops, block already in the cache, get out. */ 419 put_block(bp); 420 break; 421 } 422 if (r != ENOENT) 423 panic("ext2: error getting block (%llu,%u): %d", dev, block, 424 r); 425 } 426 lmfs_prefetch(dev, read_q, read_q_size); 427 428 r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position); 429 if (r != OK) 430 panic("ext2: error getting block (%llu,%u): %d", dev, baseblock, r); 431 return bp; 432 } 433 434 435 /*===========================================================================* 436 * get_dtype * 437 *===========================================================================*/ 438 static unsigned int get_dtype(struct ext2_disk_dir_desc *dp) 439 { 440 /* Return the type of the file identified by the given directory entry. */ 441 442 if (!HAS_INCOMPAT_FEATURE(superblock, INCOMPAT_FILETYPE)) 443 return DT_UNKNOWN; 444 445 switch (dp->d_file_type) { 446 case EXT2_FT_REG_FILE: return DT_REG; 447 case EXT2_FT_DIR: return DT_DIR; 448 case EXT2_FT_SYMLINK: return DT_LNK; 449 case EXT2_FT_BLKDEV: return DT_BLK; 450 case EXT2_FT_CHRDEV: return DT_CHR; 451 case EXT2_FT_FIFO: return DT_FIFO; 452 default: return DT_UNKNOWN; 453 } 454 } 455 456 /*===========================================================================* 457 * fs_getdents * 458 *===========================================================================*/ 459 ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes, 460 off_t *posp) 461 { 462 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + EXT2_NAME_MAX + 1) 463 #define GETDENTS_ENTRIES 8 464 static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES]; 465 struct fsdriver_dentry fsdentry; 466 struct inode *rip; 467 int r, done; 468 unsigned int block_size, len; 469 off_t pos, off, block_pos, new_pos, ent_pos; 470 struct buf *bp; 471 struct ext2_disk_dir_desc *d_desc; 472 ino_t child_nr; 473 474 /* Check whether the position is properly aligned */ 475 pos = *posp; 476 if ((unsigned int) pos % DIR_ENTRY_ALIGN) 477 return(ENOENT); 478 479 if ((rip = get_inode(fs_dev, ino_nr)) == NULL) 480 return(EINVAL); 481 482 block_size = rip->i_sp->s_block_size; 483 off = (pos % block_size); /* Offset in block */ 484 block_pos = pos - off; 485 done = FALSE; /* Stop processing directory blocks when done is set */ 486 487 fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf, 488 sizeof(getdents_buf)); 489 490 /* The default position for the next request is EOF. If the user's buffer 491 * fills up before EOF, new_pos will be modified. */ 492 new_pos = rip->i_size; 493 494 r = 0; 495 496 for (; block_pos < rip->i_size; block_pos += block_size) { 497 off_t temp_pos = block_pos; 498 /* Since directories don't have holes, 'bp' cannot be NULL. */ 499 bp = get_block_map(rip, block_pos); /* get a dir block */ 500 assert(bp != NULL); 501 assert(bp != NULL); 502 503 /* Search a directory block. */ 504 d_desc = (struct ext2_disk_dir_desc*) &b_data(bp); 505 506 /* we need to seek to entry at off bytes. 507 * when NEXT_DISC_DIR_POS == block_size it's last dentry. 508 */ 509 for (; temp_pos + conv2(le_CPU, d_desc->d_rec_len) <= pos 510 && NEXT_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size; 511 d_desc = NEXT_DISC_DIR_DESC(d_desc)) { 512 temp_pos += conv2(le_CPU, d_desc->d_rec_len); 513 } 514 515 for (; CUR_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size; 516 d_desc = NEXT_DISC_DIR_DESC(d_desc)) { 517 if (d_desc->d_ino == 0) 518 continue; /* Entry is not in use */ 519 520 len = d_desc->d_name_len; 521 assert(len <= NAME_MAX); 522 assert(len <= EXT2_NAME_MAX); 523 524 /* Need the position of this entry in the directory */ 525 ent_pos = block_pos + ((char *)d_desc - b_data(bp)); 526 527 child_nr = (ino_t) conv4(le_CPU, d_desc->d_ino); 528 r = fsdriver_dentry_add(&fsdentry, child_nr, d_desc->d_name, 529 len, get_dtype(d_desc)); 530 531 /* If the user buffer is full, or an error occurred, stop. */ 532 if (r <= 0) { 533 done = TRUE; 534 535 /* Record the position of this entry, it is the 536 * starting point of the next request (unless the 537 * position is modified with lseek). 538 */ 539 new_pos = ent_pos; 540 break; 541 } 542 } 543 544 put_block(bp); 545 if (done) 546 break; 547 } 548 549 if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) { 550 *posp = new_pos; 551 rip->i_update |= ATIME; 552 rip->i_dirt = IN_DIRTY; 553 } 554 555 put_inode(rip); /* release the inode */ 556 return(r); 557 } 558