xref: /minix/minix/fs/ext2/read.c (revision 0a6a1f1d)
1 /* Created (MFS based):
2  *   February 2010 (Evgeniy Ivanov)
3  */
4 
5 #include "fs.h"
6 #include <stddef.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include "buf.h"
10 #include "inode.h"
11 #include "super.h"
12 #include <sys/param.h>
13 #include <sys/dirent.h>
14 #include <assert.h>
15 
16 
17 static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t
18 	position, unsigned bytes_ahead);
19 static int rw_chunk(struct inode *rip, u64_t position, unsigned off,
20 	size_t chunk, unsigned left, int call, struct fsdriver_data *data,
21 	unsigned buf_off, unsigned int block_size, int *completed);
22 
23 /*===========================================================================*
24  *				fs_readwrite				     *
25  *===========================================================================*/
26 ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes,
27 	off_t position, int call)
28 {
29   int r;
30   int regular;
31   off_t f_size, bytes_left;
32   size_t off, cum_io, block_size, chunk;
33   mode_t mode_word;
34   int completed;
35   struct inode *rip;
36 
37   r = OK;
38 
39   /* Find the inode referred */
40   if ((rip = find_inode(fs_dev, ino_nr)) == NULL)
41 	return(EINVAL);
42 
43   mode_word = rip->i_mode & I_TYPE;
44   regular = (mode_word == I_REGULAR);
45 
46   /* Determine blocksize */
47   block_size = rip->i_sp->s_block_size;
48   f_size = rip->i_size;
49   if (f_size < 0) f_size = MAX_FILE_POS;
50 
51   if (call == FSC_WRITE) {
52 	/* Check in advance to see if file will grow too big. */
53 	if (position > (off_t) (rip->i_sp->s_max_size - nrbytes))
54 		return(EFBIG);
55   }
56 
57   cum_io = 0;
58   /* Split the transfer into chunks that don't span two blocks. */
59   while (nrbytes != 0) {
60 	off = (unsigned int) (position % block_size);/* offset in blk*/
61 	chunk = block_size - off;
62 	if (chunk > nrbytes)
63 		chunk = nrbytes;
64 
65 	if (call == FSC_READ) {
66 		bytes_left = f_size - position;
67 		if (position >= f_size) break;        /* we are beyond EOF */
68 		if (chunk > bytes_left) chunk = (int) bytes_left;
69 	}
70 
71 	/* Read or write 'chunk' bytes. */
72 	r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk,
73 		nrbytes, call, data, cum_io, block_size, &completed);
74 
75 	if (r != OK) break;
76 
77 	/* Update counters and pointers. */
78 	nrbytes -= chunk;     /* bytes yet to be read */
79 	cum_io += chunk;      /* bytes read so far */
80 	position += (off_t) chunk;    /* position within the file */
81   }
82 
83   /* On write, update file size and access time. */
84   if (call == FSC_WRITE) {
85 	if (regular || mode_word == I_DIRECTORY) {
86 		if (position > f_size) rip->i_size = position;
87         }
88   }
89 
90   rip->i_seek = NO_SEEK;
91 
92   if (r != OK)
93 	return r;
94 
95   if (call == FSC_READ) rip->i_update |= ATIME;
96   if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME;
97   rip->i_dirt = IN_DIRTY;          /* inode is thus now dirty */
98 
99   return(cum_io);
100 }
101 
102 
103 /*===========================================================================*
104  *				rw_chunk				     *
105  *===========================================================================*/
106 static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off,
107 	block_size, completed)
108 register struct inode *rip;     /* pointer to inode for file to be rd/wr */
109 u64_t position;                 /* position within file to read or write */
110 unsigned off;                   /* off within the current block */
111 size_t chunk;                   /* number of bytes to read or write */
112 unsigned left;                  /* max number of bytes wanted after position */
113 int call;                       /* FSC_READ, FSC_WRITE, or FSC_PEEK */
114 struct fsdriver_data *data;     /* structure for (remote) user buffer */
115 unsigned buf_off;               /* offset in user buffer */
116 unsigned int block_size;        /* block size of FS operating on */
117 int *completed;                 /* number of bytes copied */
118 {
119 /* Read or write (part of) a block. */
120 
121   struct buf *bp = NULL;
122   register int r = OK;
123   int n;
124   block_t b;
125   dev_t dev;
126   ino_t ino = VMC_NO_INODE;
127   u64_t ino_off = rounddown(position, block_size);
128 
129   *completed = 0;
130 
131   if (ex64hi(position) != 0)
132 	panic("rw_chunk: position too high");
133   b = read_map(rip, (off_t) ex64lo(position), 0);
134   dev = rip->i_dev;
135   ino = rip->i_num;
136   assert(ino != VMC_NO_INODE);
137 
138   if (b == NO_BLOCK) {
139 	if (call == FSC_READ) {
140 		/* Reading from a nonexistent block.  Must read as all zeros.*/
141 		r = fsdriver_zero(data, buf_off, chunk);
142 		if(r != OK) {
143 			printf("ext2fs: fsdriver_zero failed\n");
144 		}
145 		return r;
146 	} else if (call == FSC_PEEK) {
147 		/* Peeking a nonexistent block. Report to VM. */
148 		lmfs_zero_block_ino(dev, ino, ino_off);
149 		return OK;
150 	} else {
151                /* Writing to a nonexistent block.
152                 * Create and enter in inode.
153                 */
154 		if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL)
155 			return(err_code);
156         }
157   } else if (call != FSC_WRITE) {
158 	/* Read and read ahead if convenient. */
159 	bp = rahead(rip, b, position, left);
160   } else {
161 	/* Normally an existing block to be partially overwritten is first read
162 	 * in.  However, a full block need not be read in.  If it is already in
163 	 * the cache, acquire it, otherwise just acquire a free buffer.
164          */
165 	n = (chunk == block_size ? NO_READ : NORMAL);
166 	if (off == 0 && (off_t) ex64lo(position) >= rip->i_size)
167 		n = NO_READ;
168 	assert(ino != VMC_NO_INODE);
169 	assert(!(ino_off % block_size));
170 	if ((r = lmfs_get_block_ino(&bp, dev, b, n, ino, ino_off)) != OK)
171 		panic("ext2: error getting block (%llu,%u): %d", dev, b, r);
172   }
173 
174   /* In all cases, bp now points to a valid buffer. */
175   if (bp == NULL)
176 	panic("bp not valid in rw_chunk, this can't happen");
177 
178   if (call == FSC_WRITE && chunk != block_size &&
179       (off_t) ex64lo(position) >= rip->i_size && off == 0) {
180 	zero_block(bp);
181   }
182 
183   if (call == FSC_READ) {
184 	/* Copy a chunk from the block buffer to user space. */
185 	r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk);
186   } else if (call == FSC_WRITE) {
187 	/* Copy a chunk from user space to the block buffer. */
188 	r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk);
189 	lmfs_markdirty(bp);
190   }
191 
192   put_block(bp);
193 
194   return(r);
195 }
196 
197 
198 /*===========================================================================*
199  *				read_map				     *
200  *===========================================================================*/
201 block_t read_map(rip, position, opportunistic)
202 register struct inode *rip;     /* ptr to inode to map from */
203 off_t position;                 /* position in file whose blk wanted */
204 int opportunistic;
205 {
206 /* Given an inode and a position within the corresponding file, locate the
207  * block number in which that position is to be found and return it.
208  */
209 
210   struct buf *bp;
211   int mindex;
212   block_t b;
213   unsigned long excess, block_pos;
214   static char first_time = TRUE;
215   static long addr_in_block;
216   static long addr_in_block2;
217   static long doub_ind_s;
218   static long triple_ind_s;
219   static long out_range_s;
220   int iomode;
221 
222   iomode = opportunistic ? PEEK : NORMAL;
223 
224   if (first_time) {
225 	addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES;
226 	addr_in_block2 = addr_in_block * addr_in_block;
227 	doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block;
228 	triple_ind_s = doub_ind_s + addr_in_block2;
229 	out_range_s = triple_ind_s + addr_in_block2 * addr_in_block;
230 	first_time = FALSE;
231   }
232 
233   block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */
234 
235   /* Is 'position' to be found in the inode itself? */
236   if (block_pos < EXT2_NDIR_BLOCKS)
237 	return(rip->i_block[block_pos]);
238 
239   /* It is not in the inode, so it must be single, double or triple indirect */
240   if (block_pos < doub_ind_s) {
241 	b = rip->i_block[EXT2_NDIR_BLOCKS]; /* address of single indirect block */
242 	mindex = block_pos - EXT2_NDIR_BLOCKS;
243   } else if (block_pos >= out_range_s) { /* TODO: do we need it? */
244 	return(NO_BLOCK);
245   } else {
246 	/* double or triple indirect block. At first if it's triple,
247 	 * find double indirect block.
248 	 */
249 	excess = block_pos - doub_ind_s;
250 	b = rip->i_block[EXT2_DIND_BLOCK];
251 	if (block_pos >= triple_ind_s) {
252 		b = rip->i_block[EXT2_TIND_BLOCK];
253 		if (b == NO_BLOCK) return(NO_BLOCK);
254 		bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */
255 		excess = block_pos - triple_ind_s;
256 		mindex = excess / addr_in_block2;
257 		b = rd_indir(bp, mindex);	/* num of double ind block */
258 		put_block(bp);			/* release triple ind block */
259 		excess = excess % addr_in_block2;
260 	}
261 	if (b == NO_BLOCK) return(NO_BLOCK);
262 	bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */
263 	if (bp == NULL)
264 		return NO_BLOCK;		/* peeking failed */
265 	mindex = excess / addr_in_block;
266 	b = rd_indir(bp, mindex);	/* num of single ind block */
267 	put_block(bp);				/* release double ind block */
268 	mindex = excess % addr_in_block;	/* index into single ind blk */
269   }
270   if (b == NO_BLOCK) return(NO_BLOCK);
271   bp = get_block(rip->i_dev, b, iomode);       /* get single indirect block */
272   if (bp == NULL)
273 	return NO_BLOCK;			/* peeking failed */
274 
275   b = rd_indir(bp, mindex);
276   put_block(bp);				/* release single ind block */
277 
278   return(b);
279 }
280 
281 struct buf *get_block_map(register struct inode *rip, u64_t position)
282 {
283 	struct buf *bp;
284 	int r, block_size;
285 	block_t b = read_map(rip, position, 0);	/* get block number */
286 	if(b == NO_BLOCK)
287 		return NULL;
288 	block_size = get_block_size(rip->i_dev);
289 	position = rounddown(position, block_size);
290 	assert(rip->i_num != VMC_NO_INODE);
291 	if ((r = lmfs_get_block_ino(&bp, rip->i_dev, b, NORMAL, rip->i_num,
292 	    position)) != OK)
293 		panic("ext2: error getting block (%llu,%u): %d",
294 		    rip->i_dev, b, r);
295 	return bp;
296 }
297 
298 /*===========================================================================*
299  *				rd_indir				     *
300  *===========================================================================*/
301 block_t rd_indir(bp, mindex)
302 struct buf *bp;                 /* pointer to indirect block */
303 int mindex;                      /* index into *bp */
304 {
305   if (bp == NULL)
306 	panic("rd_indir() on NULL");
307   /* TODO: use conv call */
308   return conv4(le_CPU, b_ind(bp)[mindex]);
309 }
310 
311 
312 /*===========================================================================*
313  *				rahead					     *
314  *===========================================================================*/
315 static struct buf *rahead(rip, baseblock, position, bytes_ahead)
316 register struct inode *rip;     /* pointer to inode for file to be read */
317 block_t baseblock;              /* block at current position */
318 u64_t position;                 /* position within file */
319 unsigned bytes_ahead;           /* bytes beyond position for immediate use */
320 {
321 /* Fetch a block from the cache or the device.  If a physical read is
322  * required, prefetch as many more blocks as convenient into the cache.
323  * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
324  * The device driver may decide it knows better and stop reading at a
325  * cylinder boundary (or after an error).  Rw_scattered() puts an optional
326  * flag on all reads to allow this.
327  */
328 /* Minimum number of blocks to prefetch. */
329 # define BLOCKS_MINIMUM		32
330   int r, read_q_size;
331   unsigned int blocks_ahead, fragment, block_size;
332   block_t block, blocks_left;
333   off_t ind1_pos;
334   dev_t dev;
335   struct buf *bp = NULL;
336   static block64_t read_q[LMFS_MAX_PREFETCH];
337   u64_t position_running;
338 
339   dev = rip->i_dev;
340   assert(dev != NO_DEV);
341   block_size = get_block_size(dev);
342 
343   block = baseblock;
344 
345   fragment = position % block_size;
346   position -= fragment;
347   position_running = position;
348   bytes_ahead += fragment;
349   blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
350 
351   r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position);
352   if (r == OK)
353 	return(bp);
354   if (r != ENOENT)
355 	panic("ext2: error getting block (%llu,%u): %d", dev, block, r);
356 
357   /* The best guess for the number of blocks to prefetch:  A lot.
358    * It is impossible to tell what the device looks like, so we don't even
359    * try to guess the geometry, but leave it to the driver.
360    *
361    * The floppy driver can read a full track with no rotational delay, and it
362    * avoids reading partial tracks if it can, so handing it enough buffers to
363    * read two tracks is perfect.  (Two, because some diskette types have
364    * an odd number of sectors per track, so a block may span tracks.)
365    *
366    * The disk drivers don't try to be smart.  With todays disks it is
367    * impossible to tell what the real geometry looks like, so it is best to
368    * read as much as you can.  With luck the caching on the drive allows
369    * for a little time to start the next read.
370    *
371    * The current solution below is a bit of a hack, it just reads blocks from
372    * the current file position hoping that more of the file can be found.  A
373    * better solution must look at the already available
374    * indirect blocks (but don't call read_map!).
375    */
376 
377   blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) /
378                                                                 block_size;
379 
380   /* Go for the first indirect block if we are in its neighborhood. */
381   ind1_pos = (EXT2_NDIR_BLOCKS) * block_size;
382   if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
383 	blocks_ahead++;
384 	blocks_left++;
385   }
386 
387   /* Read at least the minimum number of blocks, but not after a seek. */
388   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
389 	blocks_ahead = BLOCKS_MINIMUM;
390 
391   /* Can't go past end of file. */
392   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
393 
394   /* No more than the maximum request. */
395   if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH;
396 
397   read_q_size = 0;
398 
399   /* Acquire block buffers. */
400   for (;;) {
401   	block_t thisblock;
402 	read_q[read_q_size++] = block;
403 
404 	if (--blocks_ahead == 0) break;
405 
406 	block++;
407 	position_running += block_size;
408 
409 	thisblock = read_map(rip, (off_t) ex64lo(position_running), 1);
410 	if (thisblock != NO_BLOCK) {
411 		r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num,
412 		    position_running);
413 		block = thisblock;
414 	} else
415 		r = lmfs_get_block(&bp, dev, block, PEEK);
416 
417 	if (r == OK) {
418 		/* Oops, block already in the cache, get out. */
419 		put_block(bp);
420 		break;
421 	}
422 	if (r != ENOENT)
423 		panic("ext2: error getting block (%llu,%u): %d", dev, block,
424 		    r);
425   }
426   lmfs_prefetch(dev, read_q, read_q_size);
427 
428   r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position);
429   if (r != OK)
430 	panic("ext2: error getting block (%llu,%u): %d", dev, baseblock, r);
431   return bp;
432 }
433 
434 
435 /*===========================================================================*
436  *				get_dtype				     *
437  *===========================================================================*/
438 static unsigned int get_dtype(struct ext2_disk_dir_desc *dp)
439 {
440 /* Return the type of the file identified by the given directory entry. */
441 
442   if (!HAS_INCOMPAT_FEATURE(superblock, INCOMPAT_FILETYPE))
443 	return DT_UNKNOWN;
444 
445   switch (dp->d_file_type) {
446   case EXT2_FT_REG_FILE:	return DT_REG;
447   case EXT2_FT_DIR:		return DT_DIR;
448   case EXT2_FT_SYMLINK:		return DT_LNK;
449   case EXT2_FT_BLKDEV:		return DT_BLK;
450   case EXT2_FT_CHRDEV:		return DT_CHR;
451   case EXT2_FT_FIFO:		return DT_FIFO;
452   default:			return DT_UNKNOWN;
453   }
454 }
455 
456 /*===========================================================================*
457  *				fs_getdents				     *
458  *===========================================================================*/
459 ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes,
460 	off_t *posp)
461 {
462 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + EXT2_NAME_MAX + 1)
463 #define GETDENTS_ENTRIES	8
464   static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES];
465   struct fsdriver_dentry fsdentry;
466   struct inode *rip;
467   int r, done;
468   unsigned int block_size, len;
469   off_t pos, off, block_pos, new_pos, ent_pos;
470   struct buf *bp;
471   struct ext2_disk_dir_desc *d_desc;
472   ino_t child_nr;
473 
474   /* Check whether the position is properly aligned */
475   pos = *posp;
476   if ((unsigned int) pos % DIR_ENTRY_ALIGN)
477 	return(ENOENT);
478 
479   if ((rip = get_inode(fs_dev, ino_nr)) == NULL)
480 	return(EINVAL);
481 
482   block_size = rip->i_sp->s_block_size;
483   off = (pos % block_size);             /* Offset in block */
484   block_pos = pos - off;
485   done = FALSE;       /* Stop processing directory blocks when done is set */
486 
487   fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf,
488 	sizeof(getdents_buf));
489 
490   /* The default position for the next request is EOF. If the user's buffer
491    * fills up before EOF, new_pos will be modified. */
492   new_pos = rip->i_size;
493 
494   r = 0;
495 
496   for (; block_pos < rip->i_size; block_pos += block_size) {
497 	off_t temp_pos = block_pos;
498         /* Since directories don't have holes, 'bp' cannot be NULL. */
499         bp = get_block_map(rip, block_pos);     /* get a dir block */
500         assert(bp != NULL);
501 	assert(bp != NULL);
502 
503 	/* Search a directory block. */
504 	d_desc = (struct ext2_disk_dir_desc*) &b_data(bp);
505 
506 	/* we need to seek to entry at off bytes.
507 	* when NEXT_DISC_DIR_POS == block_size it's last dentry.
508 	*/
509 	for (; temp_pos + conv2(le_CPU, d_desc->d_rec_len) <= pos
510 	       && NEXT_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size;
511 	       d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
512 		temp_pos += conv2(le_CPU, d_desc->d_rec_len);
513 	}
514 
515 	for (; CUR_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size;
516 	     d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
517 		if (d_desc->d_ino == 0)
518 			continue; /* Entry is not in use */
519 
520 		len = d_desc->d_name_len;
521 		assert(len <= NAME_MAX);
522 		assert(len <= EXT2_NAME_MAX);
523 
524 		/* Need the position of this entry in the directory */
525 		ent_pos = block_pos + ((char *)d_desc - b_data(bp));
526 
527 		child_nr = (ino_t) conv4(le_CPU, d_desc->d_ino);
528 		r = fsdriver_dentry_add(&fsdentry, child_nr, d_desc->d_name,
529 			len, get_dtype(d_desc));
530 
531 		/* If the user buffer is full, or an error occurred, stop. */
532 		if (r <= 0) {
533 			done = TRUE;
534 
535 			/* Record the position of this entry, it is the
536 			 * starting point of the next request (unless the
537 			 * position is modified with lseek).
538 			 */
539 			new_pos = ent_pos;
540 			break;
541 		}
542 	}
543 
544 	put_block(bp);
545 	if (done)
546 		break;
547   }
548 
549   if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) {
550 	*posp = new_pos;
551 	rip->i_update |= ATIME;
552 	rip->i_dirt = IN_DIRTY;
553   }
554 
555   put_inode(rip);               /* release the inode */
556   return(r);
557 }
558