xref: /minix/minix/fs/ext2/read.c (revision 83133719)
1 /* Created (MFS based):
2  *   February 2010 (Evgeniy Ivanov)
3  */
4 
5 #include "fs.h"
6 #include <stddef.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include "buf.h"
10 #include "inode.h"
11 #include "super.h"
12 #include <sys/param.h>
13 #include <assert.h>
14 
15 
16 static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t
17 	position, unsigned bytes_ahead);
18 static int rw_chunk(struct inode *rip, u64_t position, unsigned off,
19 	size_t chunk, unsigned left, int call, struct fsdriver_data *data,
20 	unsigned buf_off, unsigned int block_size, int *completed);
21 
22 /*===========================================================================*
23  *				fs_readwrite				     *
24  *===========================================================================*/
25 ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes,
26 	off_t position, int call)
27 {
28   int r;
29   int regular;
30   off_t f_size, bytes_left;
31   size_t off, cum_io, block_size, chunk;
32   mode_t mode_word;
33   int completed;
34   struct inode *rip;
35 
36   r = OK;
37 
38   /* Find the inode referred */
39   if ((rip = find_inode(fs_dev, ino_nr)) == NULL)
40 	return(EINVAL);
41 
42   mode_word = rip->i_mode & I_TYPE;
43   regular = (mode_word == I_REGULAR);
44 
45   /* Determine blocksize */
46   block_size = rip->i_sp->s_block_size;
47   f_size = rip->i_size;
48   if (f_size < 0) f_size = MAX_FILE_POS;
49 
50   lmfs_reset_rdwt_err();
51 
52   if (call == FSC_WRITE) {
53 	/* Check in advance to see if file will grow too big. */
54 	if (position > (off_t) (rip->i_sp->s_max_size - nrbytes))
55 		return(EFBIG);
56   }
57 
58   cum_io = 0;
59   /* Split the transfer into chunks that don't span two blocks. */
60   while (nrbytes != 0) {
61 	off = (unsigned int) (position % block_size);/* offset in blk*/
62 	chunk = block_size - off;
63 	if (chunk > nrbytes)
64 		chunk = nrbytes;
65 
66 	if (call == FSC_READ) {
67 		bytes_left = f_size - position;
68 		if (position >= f_size) break;        /* we are beyond EOF */
69 		if (chunk > bytes_left) chunk = (int) bytes_left;
70 	}
71 
72 	/* Read or write 'chunk' bytes. */
73 	r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk,
74 		nrbytes, call, data, cum_io, block_size, &completed);
75 
76 	if (r != OK) break;   /* EOF reached */
77 	if (lmfs_rdwt_err() < 0) break;
78 
79 	/* Update counters and pointers. */
80 	nrbytes -= chunk;     /* bytes yet to be read */
81 	cum_io += chunk;      /* bytes read so far */
82 	position += (off_t) chunk;    /* position within the file */
83   }
84 
85   /* On write, update file size and access time. */
86   if (call == FSC_WRITE) {
87 	if (regular || mode_word == I_DIRECTORY) {
88 		if (position > f_size) rip->i_size = position;
89         }
90   }
91 
92   rip->i_seek = NO_SEEK;
93 
94   if (lmfs_rdwt_err() != OK) r = lmfs_rdwt_err(); /* check for disk error */
95   if (lmfs_rdwt_err() == END_OF_FILE) r = OK;
96 
97   if (r != OK)
98 	return r;
99 
100   if (call == FSC_READ) rip->i_update |= ATIME;
101   if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME;
102   rip->i_dirt = IN_DIRTY;          /* inode is thus now dirty */
103 
104   return(cum_io);
105 }
106 
107 
108 /*===========================================================================*
109  *				rw_chunk				     *
110  *===========================================================================*/
111 static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off,
112 	block_size, completed)
113 register struct inode *rip;     /* pointer to inode for file to be rd/wr */
114 u64_t position;                 /* position within file to read or write */
115 unsigned off;                   /* off within the current block */
116 size_t chunk;                   /* number of bytes to read or write */
117 unsigned left;                  /* max number of bytes wanted after position */
118 int call;                       /* FSC_READ, FSC_WRITE, or FSC_PEEK */
119 struct fsdriver_data *data;     /* structure for (remote) user buffer */
120 unsigned buf_off;               /* offset in user buffer */
121 unsigned int block_size;        /* block size of FS operating on */
122 int *completed;                 /* number of bytes copied */
123 {
124 /* Read or write (part of) a block. */
125 
126   register struct buf *bp = NULL;
127   register int r = OK;
128   int n;
129   block_t b;
130   dev_t dev;
131   ino_t ino = VMC_NO_INODE;
132   u64_t ino_off = rounddown(position, block_size);
133 
134   *completed = 0;
135 
136   if (ex64hi(position) != 0)
137 	panic("rw_chunk: position too high");
138   b = read_map(rip, (off_t) ex64lo(position), 0);
139   dev = rip->i_dev;
140   ino = rip->i_num;
141   assert(ino != VMC_NO_INODE);
142 
143   if (b == NO_BLOCK) {
144 	if (call == FSC_READ) {
145 		/* Reading from a nonexistent block.  Must read as all zeros.*/
146 		r = fsdriver_zero(data, buf_off, chunk);
147 		if(r != OK) {
148 			printf("ext2fs: fsdriver_zero failed\n");
149 		}
150 		return r;
151 	} else {
152                /* Writing to or peeking a nonexistent block.
153                 * Create and enter in inode.
154                 */
155 		if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL)
156 			return(err_code);
157         }
158   } else if (call != FSC_WRITE) {
159 	/* Read and read ahead if convenient. */
160 	bp = rahead(rip, b, position, left);
161   } else {
162 	/* Normally an existing block to be partially overwritten is first read
163 	 * in.  However, a full block need not be read in.  If it is already in
164 	 * the cache, acquire it, otherwise just acquire a free buffer.
165          */
166 	n = (chunk == block_size ? NO_READ : NORMAL);
167 	if (off == 0 && (off_t) ex64lo(position) >= rip->i_size)
168 		n = NO_READ;
169 	assert(ino != VMC_NO_INODE);
170 	assert(!(ino_off % block_size));
171 	bp = lmfs_get_block_ino(dev, b, n, ino, ino_off);
172   }
173 
174   /* In all cases, bp now points to a valid buffer. */
175   if (bp == NULL)
176 	panic("bp not valid in rw_chunk, this can't happen");
177 
178   if (call == FSC_WRITE && chunk != block_size &&
179       (off_t) ex64lo(position) >= rip->i_size && off == 0) {
180 	zero_block(bp);
181   }
182 
183   if (call == FSC_READ) {
184 	/* Copy a chunk from the block buffer to user space. */
185 	r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk);
186   } else if (call == FSC_WRITE) {
187 	/* Copy a chunk from user space to the block buffer. */
188 	r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk);
189 	lmfs_markdirty(bp);
190   }
191 
192   n = (off + chunk == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
193   put_block(bp, n);
194 
195   return(r);
196 }
197 
198 
199 /*===========================================================================*
200  *				read_map				     *
201  *===========================================================================*/
202 block_t read_map(rip, position, opportunistic)
203 register struct inode *rip;     /* ptr to inode to map from */
204 off_t position;                 /* position in file whose blk wanted */
205 int opportunistic;
206 {
207 /* Given an inode and a position within the corresponding file, locate the
208  * block number in which that position is to be found and return it.
209  */
210 
211   struct buf *bp;
212   int mindex;
213   block_t b;
214   unsigned long excess, block_pos;
215   static char first_time = TRUE;
216   static long addr_in_block;
217   static long addr_in_block2;
218   static long doub_ind_s;
219   static long triple_ind_s;
220   static long out_range_s;
221   int iomode = NORMAL;
222 
223   if(opportunistic) iomode = PREFETCH;
224 
225   if (first_time) {
226 	addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES;
227 	addr_in_block2 = addr_in_block * addr_in_block;
228 	doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block;
229 	triple_ind_s = doub_ind_s + addr_in_block2;
230 	out_range_s = triple_ind_s + addr_in_block2 * addr_in_block;
231 	first_time = FALSE;
232   }
233 
234   block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */
235 
236   /* Is 'position' to be found in the inode itself? */
237   if (block_pos < EXT2_NDIR_BLOCKS)
238 	return(rip->i_block[block_pos]);
239 
240   /* It is not in the inode, so it must be single, double or triple indirect */
241   if (block_pos < doub_ind_s) {
242 	b = rip->i_block[EXT2_NDIR_BLOCKS]; /* address of single indirect block */
243 	mindex = block_pos - EXT2_NDIR_BLOCKS;
244   } else if (block_pos >= out_range_s) { /* TODO: do we need it? */
245 	return(NO_BLOCK);
246   } else {
247 	/* double or triple indirect block. At first if it's triple,
248 	 * find double indirect block.
249 	 */
250 	excess = block_pos - doub_ind_s;
251 	b = rip->i_block[EXT2_DIND_BLOCK];
252 	if (block_pos >= triple_ind_s) {
253 		b = rip->i_block[EXT2_TIND_BLOCK];
254 		if (b == NO_BLOCK) return(NO_BLOCK);
255 		bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */
256 		ASSERT(lmfs_dev(bp) != NO_DEV);
257 		ASSERT(lmfs_dev(bp) == rip->i_dev);
258 		excess = block_pos - triple_ind_s;
259 		mindex = excess / addr_in_block2;
260 		b = rd_indir(bp, mindex);	/* num of double ind block */
261 		put_block(bp, INDIRECT_BLOCK);	/* release triple ind block */
262 		excess = excess % addr_in_block2;
263 	}
264 	if (b == NO_BLOCK) return(NO_BLOCK);
265 	bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */
266 	if(opportunistic && lmfs_dev(bp) == NO_DEV) {
267 		put_block(bp, INDIRECT_BLOCK);
268 		return NO_BLOCK;
269 	}
270 	ASSERT(lmfs_dev(bp) != NO_DEV);
271 	ASSERT(lmfs_dev(bp) == rip->i_dev);
272 	mindex = excess / addr_in_block;
273 	b = rd_indir(bp, mindex);	/* num of single ind block */
274 	put_block(bp, INDIRECT_BLOCK);	/* release double ind block */
275 	mindex = excess % addr_in_block;	/* index into single ind blk */
276   }
277   if (b == NO_BLOCK) return(NO_BLOCK);
278   bp = get_block(rip->i_dev, b, iomode);       /* get single indirect block */
279   if(opportunistic && lmfs_dev(bp) == NO_DEV) {
280        put_block(bp, INDIRECT_BLOCK);
281        return NO_BLOCK;
282   }
283 
284   ASSERT(lmfs_dev(bp) != NO_DEV);
285   ASSERT(lmfs_dev(bp) == rip->i_dev);
286   b = rd_indir(bp, mindex);
287   put_block(bp, INDIRECT_BLOCK);	/* release single ind block */
288 
289   return(b);
290 }
291 
292 struct buf *get_block_map(register struct inode *rip, u64_t position)
293 {
294 	block_t b = read_map(rip, position, 0);	/* get block number */
295 	int block_size = get_block_size(rip->i_dev);
296 	if(b == NO_BLOCK)
297 		return NULL;
298 	position = rounddown(position, block_size);
299 	assert(rip->i_num != VMC_NO_INODE);
300 	return lmfs_get_block_ino(rip->i_dev, b, NORMAL, rip->i_num, position);
301 }
302 
303 /*===========================================================================*
304  *				rd_indir				     *
305  *===========================================================================*/
306 block_t rd_indir(bp, mindex)
307 struct buf *bp;                 /* pointer to indirect block */
308 int mindex;                      /* index into *bp */
309 {
310   if (bp == NULL)
311 	panic("rd_indir() on NULL");
312   /* TODO: use conv call */
313   return conv4(le_CPU, b_ind(bp)[mindex]);
314 }
315 
316 
317 /*===========================================================================*
318  *				rahead					     *
319  *===========================================================================*/
320 static struct buf *rahead(rip, baseblock, position, bytes_ahead)
321 register struct inode *rip;     /* pointer to inode for file to be read */
322 block_t baseblock;              /* block at current position */
323 u64_t position;                 /* position within file */
324 unsigned bytes_ahead;           /* bytes beyond position for immediate use */
325 {
326 /* Fetch a block from the cache or the device.  If a physical read is
327  * required, prefetch as many more blocks as convenient into the cache.
328  * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
329  * The device driver may decide it knows better and stop reading at a
330  * cylinder boundary (or after an error).  Rw_scattered() puts an optional
331  * flag on all reads to allow this.
332  */
333 /* Minimum number of blocks to prefetch. */
334 # define BLOCKS_MINIMUM		(nr_bufs < 50 ? 18 : 32)
335   int nr_bufs = lmfs_nr_bufs();
336   int read_q_size;
337   unsigned int blocks_ahead, fragment, block_size;
338   block_t block, blocks_left;
339   off_t ind1_pos;
340   dev_t dev;
341   struct buf *bp = NULL;
342   static unsigned int readqsize = 0;
343   static struct buf **read_q = NULL;
344   u64_t position_running;
345 
346   if(readqsize != nr_bufs) {
347 	if(readqsize > 0) {
348 		assert(read_q != NULL);
349 		free(read_q);
350 		read_q = NULL;
351 		readqsize = 0;
352 	}
353 
354 	assert(readqsize == 0);
355 	assert(read_q == NULL);
356 
357 	if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs)))
358 		panic("couldn't allocate read_q");
359 	readqsize = nr_bufs;
360   }
361 
362   dev = rip->i_dev;
363   assert(dev != NO_DEV);
364   block_size = get_block_size(dev);
365 
366   block = baseblock;
367 
368   fragment = position % block_size;
369   position -= fragment;
370   position_running = position;
371   bytes_ahead += fragment;
372   blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
373 
374   bp = lmfs_get_block_ino(dev, block, PREFETCH, rip->i_num, position);
375   assert(bp != NULL);
376   if (lmfs_dev(bp) != NO_DEV) return(bp);
377 
378   /* The best guess for the number of blocks to prefetch:  A lot.
379    * It is impossible to tell what the device looks like, so we don't even
380    * try to guess the geometry, but leave it to the driver.
381    *
382    * The floppy driver can read a full track with no rotational delay, and it
383    * avoids reading partial tracks if it can, so handing it enough buffers to
384    * read two tracks is perfect.  (Two, because some diskette types have
385    * an odd number of sectors per track, so a block may span tracks.)
386    *
387    * The disk drivers don't try to be smart.  With todays disks it is
388    * impossible to tell what the real geometry looks like, so it is best to
389    * read as much as you can.  With luck the caching on the drive allows
390    * for a little time to start the next read.
391    *
392    * The current solution below is a bit of a hack, it just reads blocks from
393    * the current file position hoping that more of the file can be found.  A
394    * better solution must look at the already available
395    * indirect blocks (but don't call read_map!).
396    */
397 
398   blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) /
399                                                                 block_size;
400 
401   /* Go for the first indirect block if we are in its neighborhood. */
402   ind1_pos = (EXT2_NDIR_BLOCKS) * block_size;
403   if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
404 	blocks_ahead++;
405 	blocks_left++;
406   }
407 
408   /* No more than the maximum request. */
409   if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;
410 
411   /* Read at least the minimum number of blocks, but not after a seek. */
412   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
413 	blocks_ahead = BLOCKS_MINIMUM;
414 
415   /* Can't go past end of file. */
416   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
417 
418   read_q_size = 0;
419 
420   /* Acquire block buffers. */
421   for (;;) {
422   	block_t thisblock;
423 	read_q[read_q_size++] = bp;
424 
425 	if (--blocks_ahead == 0) break;
426 
427 	/* Don't trash the cache, leave 4 free. */
428 	if (lmfs_bufs_in_use() >= nr_bufs - 4) break;
429 
430 	block++;
431 	position_running += block_size;
432 
433 	thisblock = read_map(rip, (off_t) ex64lo(position_running), 1);
434 	if (thisblock != NO_BLOCK) {
435 		bp = lmfs_get_block_ino(dev, thisblock, PREFETCH, rip->i_num,
436 			position_running);
437 	} else {
438 		bp = get_block(dev, block, PREFETCH);
439 	}
440 	if (lmfs_dev(bp) != NO_DEV) {
441 		/* Oops, block already in the cache, get out. */
442 		put_block(bp, FULL_DATA_BLOCK);
443 		break;
444 	}
445   }
446   lmfs_rw_scattered(dev, read_q, read_q_size, READING);
447 
448   return(lmfs_get_block_ino(dev, baseblock, NORMAL, rip->i_num, position));
449 }
450 
451 
452 /*===========================================================================*
453  *				get_dtype				     *
454  *===========================================================================*/
455 static unsigned int get_dtype(struct ext2_disk_dir_desc *dp)
456 {
457 /* Return the type of the file identified by the given directory entry. */
458 
459   if (!HAS_INCOMPAT_FEATURE(superblock, INCOMPAT_FILETYPE))
460 	return DT_UNKNOWN;
461 
462   switch (dp->d_file_type) {
463   case EXT2_FT_REG_FILE:	return DT_REG;
464   case EXT2_FT_DIR:		return DT_DIR;
465   case EXT2_FT_SYMLINK:		return DT_LNK;
466   case EXT2_FT_BLKDEV:		return DT_BLK;
467   case EXT2_FT_CHRDEV:		return DT_CHR;
468   case EXT2_FT_FIFO:		return DT_FIFO;
469   default:			return DT_UNKNOWN;
470   }
471 }
472 
473 /*===========================================================================*
474  *				fs_getdents				     *
475  *===========================================================================*/
476 ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes,
477 	off_t *posp)
478 {
479 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + EXT2_NAME_MAX + 1)
480 #define GETDENTS_ENTRIES	8
481   static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES];
482   struct fsdriver_dentry fsdentry;
483   struct inode *rip;
484   int r, done;
485   unsigned int block_size, len;
486   off_t pos, off, block_pos, new_pos, ent_pos;
487   struct buf *bp;
488   struct ext2_disk_dir_desc *d_desc;
489   ino_t child_nr;
490 
491   /* Check whether the position is properly aligned */
492   pos = *posp;
493   if ((unsigned int) pos % DIR_ENTRY_ALIGN)
494 	return(ENOENT);
495 
496   if ((rip = get_inode(fs_dev, ino_nr)) == NULL)
497 	return(EINVAL);
498 
499   block_size = rip->i_sp->s_block_size;
500   off = (pos % block_size);             /* Offset in block */
501   block_pos = pos - off;
502   done = FALSE;       /* Stop processing directory blocks when done is set */
503 
504   fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf,
505 	sizeof(getdents_buf));
506 
507   /* The default position for the next request is EOF. If the user's buffer
508    * fills up before EOF, new_pos will be modified. */
509   new_pos = rip->i_size;
510 
511   r = 0;
512 
513   for (; block_pos < rip->i_size; block_pos += block_size) {
514 	off_t temp_pos = block_pos;
515         /* Since directories don't have holes, 'bp' cannot be NULL. */
516         bp = get_block_map(rip, block_pos);     /* get a dir block */
517         assert(bp != NULL);
518 	assert(bp != NULL);
519 
520 	/* Search a directory block. */
521 	d_desc = (struct ext2_disk_dir_desc*) &b_data(bp);
522 
523 	/* we need to seek to entry at off bytes.
524 	* when NEXT_DISC_DIR_POS == block_size it's last dentry.
525 	*/
526 	for (; temp_pos + conv2(le_CPU, d_desc->d_rec_len) <= pos
527 	       && NEXT_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size;
528 	       d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
529 		temp_pos += conv2(le_CPU, d_desc->d_rec_len);
530 	}
531 
532 	for (; CUR_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size;
533 	     d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
534 		if (d_desc->d_ino == 0)
535 			continue; /* Entry is not in use */
536 
537 		len = d_desc->d_name_len;
538 		assert(len <= NAME_MAX);
539 		assert(len <= EXT2_NAME_MAX);
540 
541 		/* Need the position of this entry in the directory */
542 		ent_pos = block_pos + ((char *)d_desc - b_data(bp));
543 
544 		child_nr = (ino_t) conv4(le_CPU, d_desc->d_ino);
545 		r = fsdriver_dentry_add(&fsdentry, child_nr, d_desc->d_name,
546 			len, get_dtype(d_desc));
547 
548 		/* If the user buffer is full, or an error occurred, stop. */
549 		if (r <= 0) {
550 			done = TRUE;
551 
552 			/* Record the position of this entry, it is the
553 			 * starting point of the next request (unless the
554 			 * position is modified with lseek).
555 			 */
556 			new_pos = ent_pos;
557 			break;
558 		}
559 	}
560 
561 	put_block(bp, DIRECTORY_BLOCK);
562 	if (done)
563 		break;
564   }
565 
566   if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) {
567 	*posp = new_pos;
568 	rip->i_update |= ATIME;
569 	rip->i_dirt = IN_DIRTY;
570   }
571 
572   put_inode(rip);               /* release the inode */
573   return(r);
574 }
575