xref: /minix/minix/fs/mfs/read.c (revision 0a6a1f1d)
1 #include "fs.h"
2 #include <stddef.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include "buf.h"
6 #include "inode.h"
7 #include "super.h"
8 #include <sys/param.h>
9 #include <sys/dirent.h>
10 #include <assert.h>
11 
12 
13 static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t
14 	position, unsigned bytes_ahead);
15 static int rw_chunk(struct inode *rip, u64_t position, unsigned off,
16 	size_t chunk, unsigned left, int call, struct fsdriver_data *data,
17 	unsigned buf_off, unsigned int block_size, int *completed);
18 
19 
20 /*===========================================================================*
21  *				fs_readwrite				     *
22  *===========================================================================*/
23 ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes,
24 	off_t position, int call)
25 {
26   int r;
27   int regular;
28   off_t f_size, bytes_left;
29   size_t off, cum_io, block_size, chunk;
30   mode_t mode_word;
31   int completed;
32   struct inode *rip;
33 
34   r = OK;
35 
36   /* Find the inode referred */
37   if ((rip = find_inode(fs_dev, ino_nr)) == NULL)
38 	return(EINVAL);
39 
40   mode_word = rip->i_mode & I_TYPE;
41   regular = (mode_word == I_REGULAR);
42 
43   /* Determine blocksize */
44   block_size = rip->i_sp->s_block_size;
45   f_size = rip->i_size;
46 
47   /* If this is file i/o, check we can write */
48   if (call == FSC_WRITE) {
49   	  if(rip->i_sp->s_rd_only)
50 		  return EROFS;
51 
52 	  /* Check in advance to see if file will grow too big. */
53 	  if (position > (off_t) (rip->i_sp->s_max_size - nrbytes))
54 		  return(EFBIG);
55 
56 	  /* Clear the zone containing present EOF if hole about
57 	   * to be created.  This is necessary because all unwritten
58 	   * blocks prior to the EOF must read as zeros.
59 	   */
60 	  if(position > f_size) clear_zone(rip, f_size, 0);
61   }
62 
63   cum_io = 0;
64   /* Split the transfer into chunks that don't span two blocks. */
65   while (nrbytes > 0) {
66 	  off = ((unsigned int) position) % block_size; /* offset in blk*/
67 	  chunk = block_size - off;
68 	  if (chunk > nrbytes)
69 		chunk = nrbytes;
70 
71 	  if (call != FSC_WRITE) {
72 		  bytes_left = f_size - position;
73 		  if (position >= f_size) break;	/* we are beyond EOF */
74 		  if (chunk > (unsigned int) bytes_left) chunk = bytes_left;
75 	  }
76 
77 	  /* Read or write 'chunk' bytes. */
78 	  r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk,
79 		nrbytes, call, data, cum_io, block_size, &completed);
80 
81 	  if (r != OK) break;
82 
83 	  /* Update counters and pointers. */
84 	  nrbytes -= chunk;	/* bytes yet to be read */
85 	  cum_io += chunk;	/* bytes read so far */
86 	  position += (off_t) chunk;	/* position within the file */
87   }
88 
89   /* On write, update file size and access time. */
90   if (call == FSC_WRITE) {
91 	  if (regular || mode_word == I_DIRECTORY) {
92 		  if (position > f_size) rip->i_size = position;
93 	  }
94   }
95 
96   rip->i_seek = NO_SEEK;
97 
98   if (r != OK)
99 	return r;
100 
101   /* even on a ROFS, writing to a device node on it is fine,
102    * just don't update the inode stats for it. And dito for reading.
103    */
104   if (!rip->i_sp->s_rd_only) {
105 	  if (call == FSC_READ) rip->i_update |= ATIME;
106 	  if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME;
107 	  IN_MARKDIRTY(rip);		/* inode is thus now dirty */
108   }
109 
110   return cum_io;
111 }
112 
113 
114 /*===========================================================================*
115  *				rw_chunk				     *
116  *===========================================================================*/
117 static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off,
118 	block_size, completed)
119 register struct inode *rip;	/* pointer to inode for file to be rd/wr */
120 u64_t position;			/* position within file to read or write */
121 unsigned off;			/* off within the current block */
122 size_t chunk;			/* number of bytes to read or write */
123 unsigned left;			/* max number of bytes wanted after position */
124 int call;			/* FSC_READ, FSC_WRITE, or FSC_PEEK */
125 struct fsdriver_data *data;	/* structure for (remote) user buffer */
126 unsigned buf_off;		/* offset in user buffer */
127 unsigned int block_size;	/* block size of FS operating on */
128 int *completed;			/* number of bytes copied */
129 {
130 /* Read or write (part of) a block. */
131   struct buf *bp = NULL;
132   register int r = OK;
133   int n;
134   block_t b;
135   dev_t dev;
136   ino_t ino = VMC_NO_INODE;
137   u64_t ino_off = rounddown(position, block_size);
138 
139   *completed = 0;
140 
141   if (ex64hi(position) != 0)
142 	panic("rw_chunk: position too high");
143   b = read_map(rip, (off_t) ex64lo(position), 0);
144   dev = rip->i_dev;
145   ino = rip->i_num;
146   assert(ino != VMC_NO_INODE);
147 
148   if (b == NO_BLOCK) {
149 	if (call == FSC_READ) {
150 		/* Reading from a nonexistent block.  Must read as all zeros.*/
151 		r = fsdriver_zero(data, buf_off, chunk);
152 		if(r != OK) {
153 			printf("MFS: fsdriver_zero failed\n");
154 		}
155 		return r;
156 	} else if (call == FSC_PEEK) {
157 		/* Peeking a nonexistent block. Report to VM. */
158 		lmfs_zero_block_ino(dev, ino, ino_off);
159 		return OK;
160 	} else {
161 		/* Writing to a nonexistent block.
162 		 * Create and enter in inode.
163 		 */
164 		if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL)
165 			return(err_code);
166 	}
167   } else if (call != FSC_WRITE) {
168 	/* Read and read ahead if convenient. */
169 	bp = rahead(rip, b, position, left);
170   } else {
171 	/* Normally an existing block to be partially overwritten is first read
172 	 * in.  However, a full block need not be read in.  If it is already in
173 	 * the cache, acquire it, otherwise just acquire a free buffer.
174 	 */
175 	n = (chunk == block_size ? NO_READ : NORMAL);
176 	if (off == 0 && (off_t) ex64lo(position) >= rip->i_size)
177 		n = NO_READ;
178 	assert(ino != VMC_NO_INODE);
179 	assert(!(ino_off % block_size));
180 	if ((r = lmfs_get_block_ino(&bp, dev, b, n, ino, ino_off)) != OK)
181 		panic("MFS: error getting block (%llu,%u): %d", dev, b, r);
182   }
183 
184   /* In all cases, bp now points to a valid buffer. */
185   assert(bp != NULL);
186 
187   if (call == FSC_WRITE && chunk != block_size &&
188       (off_t) ex64lo(position) >= rip->i_size && off == 0) {
189 	zero_block(bp);
190   }
191 
192   if (call == FSC_READ) {
193 	/* Copy a chunk from the block buffer to user space. */
194 	r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk);
195   } else if (call == FSC_WRITE) {
196 	/* Copy a chunk from user space to the block buffer. */
197 	r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk);
198 	MARKDIRTY(bp);
199   }
200 
201   put_block(bp);
202 
203   return(r);
204 }
205 
206 
207 /*===========================================================================*
208  *				read_map				     *
209  *===========================================================================*/
210 block_t read_map(rip, position, opportunistic)
211 register struct inode *rip;	/* ptr to inode to map from */
212 off_t position;			/* position in file whose blk wanted */
213 int opportunistic;		/* if nonzero, only use cache for metadata */
214 {
215 /* Given an inode and a position within the corresponding file, locate the
216  * block (not zone) number in which that position is to be found and return it.
217  */
218 
219   struct buf *bp;
220   zone_t z;
221   int scale, boff, index, zind;
222   unsigned int dzones, nr_indirects;
223   block_t b;
224   unsigned long excess, zone, block_pos;
225   int iomode;
226 
227   iomode = opportunistic ? PEEK : NORMAL;
228 
229   scale = rip->i_sp->s_log_zone_size;	/* for block-zone conversion */
230   block_pos = position/rip->i_sp->s_block_size;	/* relative blk # in file */
231   zone = block_pos >> scale;	/* position's zone */
232   boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
233   dzones = rip->i_ndzones;
234   nr_indirects = rip->i_nindirs;
235 
236   /* Is 'position' to be found in the inode itself? */
237   if (zone < dzones) {
238 	zind = (int) zone;	/* index should be an int */
239 	z = rip->i_zone[zind];
240 	if (z == NO_ZONE) return(NO_BLOCK);
241 	b = (block_t) ((z << scale) + boff);
242 	return(b);
243   }
244 
245   /* It is not in the inode, so it must be single or double indirect. */
246   excess = zone - dzones;	/* first Vx_NR_DZONES don't count */
247 
248   if (excess < nr_indirects) {
249 	/* 'position' can be located via the single indirect block. */
250 	z = rip->i_zone[dzones];
251   } else {
252 	/* 'position' can be located via the double indirect block. */
253 	if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
254 	excess -= nr_indirects;			/* single indir doesn't count*/
255 	b = (block_t) z << scale;
256 	ASSERT(rip->i_dev != NO_DEV);
257 	index = (int) (excess/nr_indirects);
258 	if ((unsigned int) index > rip->i_nindirs)
259 		return(NO_BLOCK);	/* Can't go beyond double indirects */
260 	bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */
261 	if (bp == NULL)
262 		return NO_BLOCK;		/* peeking failed */
263 	z = rd_indir(bp, index);		/* z= zone for single*/
264 	put_block(bp);				/* release double ind block */
265 	excess = excess % nr_indirects;		/* index into single ind blk */
266   }
267 
268   /* 'z' is zone num for single indirect block; 'excess' is index into it. */
269   if (z == NO_ZONE) return(NO_BLOCK);
270   b = (block_t) z << scale;			/* b is blk # for single ind */
271   bp = get_block(rip->i_dev, b, iomode);	/* get single indirect block */
272   if (bp == NULL)
273 	return NO_BLOCK;			/* peeking failed */
274   z = rd_indir(bp, (int) excess);		/* get block pointed to */
275   put_block(bp);				/* release single indir blk */
276   if (z == NO_ZONE) return(NO_BLOCK);
277   b = (block_t) ((z << scale) + boff);
278   return(b);
279 }
280 
281 struct buf *get_block_map(register struct inode *rip, u64_t position)
282 {
283 	struct buf *bp;
284 	int r, block_size;
285 	block_t b = read_map(rip, position, 0);	/* get block number */
286 	if(b == NO_BLOCK)
287 		return NULL;
288 	block_size = get_block_size(rip->i_dev);
289 	position = rounddown(position, block_size);
290 	assert(rip->i_num != VMC_NO_INODE);
291 	if ((r = lmfs_get_block_ino(&bp, rip->i_dev, b, NORMAL, rip->i_num,
292 	    position)) != OK)
293 		panic("MFS: error getting block (%llu,%u): %d",
294 		    rip->i_dev, b, r);
295 	return bp;
296 }
297 
298 /*===========================================================================*
299  *				rd_indir				     *
300  *===========================================================================*/
301 zone_t rd_indir(bp, index)
302 struct buf *bp;			/* pointer to indirect block */
303 int index;			/* index into *bp */
304 {
305   struct super_block *sp;
306   zone_t zone;
307 
308   if(bp == NULL)
309 	panic("rd_indir() on NULL");
310 
311   sp = &superblock;
312 
313   /* read a zone from an indirect block */
314   assert(sp->s_version == V3);
315   zone = (zone_t) conv4(sp->s_native, (long) b_v2_ind(bp)[index]);
316 
317   if (zone != NO_ZONE &&
318 		(zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
319 	printf("Illegal zone number %ld in indirect block, index %d\n",
320 	       (long) zone, index);
321 	panic("check file system");
322   }
323 
324   return(zone);
325 }
326 
327 /*===========================================================================*
328  *				rahead					     *
329  *===========================================================================*/
330 static struct buf *rahead(rip, baseblock, position, bytes_ahead)
331 register struct inode *rip;	/* pointer to inode for file to be read */
332 block_t baseblock;		/* block at current position */
333 u64_t position;			/* position within file */
334 unsigned bytes_ahead;		/* bytes beyond position for immediate use */
335 {
336 /* Fetch a block from the cache or the device.  If a physical read is
337  * required, prefetch as many more blocks as convenient into the cache.
338  * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
339  * The device driver may decide it knows better and stop reading at a
340  * cylinder boundary (or after an error).  Rw_scattered() puts an optional
341  * flag on all reads to allow this.
342  */
343 /* Minimum number of blocks to prefetch. */
344 # define BLOCKS_MINIMUM		32
345   int r, scale, read_q_size;
346   unsigned int blocks_ahead, fragment, block_size;
347   block_t block, blocks_left;
348   off_t ind1_pos;
349   dev_t dev;
350   struct buf *bp;
351   static block64_t read_q[LMFS_MAX_PREFETCH];
352   u64_t position_running;
353 
354   dev = rip->i_dev;
355   assert(dev != NO_DEV);
356 
357   block_size = get_block_size(dev);
358 
359   block = baseblock;
360 
361   fragment = position % block_size;
362   position -= fragment;
363   position_running = position;
364   bytes_ahead += fragment;
365   blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
366 
367   r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position);
368   if (r == OK)
369 	return(bp);
370   if (r != ENOENT)
371 	panic("MFS: error getting block (%llu,%u): %d", dev, block, r);
372 
373   /* The best guess for the number of blocks to prefetch:  A lot.
374    * It is impossible to tell what the device looks like, so we don't even
375    * try to guess the geometry, but leave it to the driver.
376    *
377    * The floppy driver can read a full track with no rotational delay, and it
378    * avoids reading partial tracks if it can, so handing it enough buffers to
379    * read two tracks is perfect.  (Two, because some diskette types have
380    * an odd number of sectors per track, so a block may span tracks.)
381    *
382    * The disk drivers don't try to be smart.  With todays disks it is
383    * impossible to tell what the real geometry looks like, so it is best to
384    * read as much as you can.  With luck the caching on the drive allows
385    * for a little time to start the next read.
386    *
387    * The current solution below is a bit of a hack, it just reads blocks from
388    * the current file position hoping that more of the file can be found.  A
389    * better solution must look at the already available zone pointers and
390    * indirect blocks (but don't call read_map!).
391    */
392 
393   blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) /
394 								block_size;
395 
396   /* Go for the first indirect block if we are in its neighborhood. */
397   scale = rip->i_sp->s_log_zone_size;
398   ind1_pos = (off_t) rip->i_ndzones * (block_size << scale);
399   if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
400 	blocks_ahead++;
401 	blocks_left++;
402   }
403 
404   /* Read at least the minimum number of blocks, but not after a seek. */
405   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
406 	blocks_ahead = BLOCKS_MINIMUM;
407 
408   /* Can't go past end of file. */
409   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
410 
411   /* No more than the maximum request. */
412   if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH;
413 
414   read_q_size = 0;
415 
416   /* Acquire block buffers. */
417   for (;;) {
418   	block_t thisblock;
419 	read_q[read_q_size++] = block;
420 
421 	if (--blocks_ahead == 0) break;
422 
423 	block++;
424 	position_running += block_size;
425 
426 	thisblock = read_map(rip, (off_t) ex64lo(position_running), 1);
427 	if (thisblock != NO_BLOCK) {
428 		r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num,
429 		    position_running);
430 		block = thisblock;
431 	} else
432 		r = lmfs_get_block(&bp, dev, block, PEEK);
433 
434 	if (r == OK) {
435 		/* Oops, block already in the cache, get out. */
436 		put_block(bp);
437 		break;
438 	}
439 	if (r != ENOENT)
440 		panic("MFS: error getting block (%llu,%u): %d", dev, block, r);
441   }
442   lmfs_prefetch(dev, read_q, read_q_size);
443 
444   r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position);
445   if (r != OK)
446 	panic("MFS: error getting block (%llu,%u): %d", dev, baseblock, r);
447   return bp;
448 }
449 
450 
451 /*===========================================================================*
452  *				fs_getdents				     *
453  *===========================================================================*/
454 ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes,
455 	off_t *posp)
456 {
457 #define GETDENTS_BUFSIZE	(sizeof(struct dirent) + MFS_NAME_MAX + 1)
458 #define GETDENTS_ENTRIES	8
459   static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES];
460   struct fsdriver_dentry fsdentry;
461   struct inode *rip, *entrip;
462   int r, done;
463   unsigned int block_size, len, type;
464   off_t pos, off, block_pos, new_pos, ent_pos;
465   struct buf *bp;
466   struct direct *dp;
467   char *cp;
468 
469   /* Check whether the position is properly aligned */
470   pos = *posp;
471   if( (unsigned int) pos % DIR_ENTRY_SIZE)
472 	  return(ENOENT);
473 
474   if( (rip = get_inode(fs_dev, ino_nr)) == NULL)
475 	  return(EINVAL);
476 
477   block_size = rip->i_sp->s_block_size;
478   off = (pos % block_size);		/* Offset in block */
479   block_pos = pos - off;
480   done = FALSE;		/* Stop processing directory blocks when done is set */
481 
482   fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf,
483 	sizeof(getdents_buf));
484 
485   /* The default position for the next request is EOF. If the user's buffer
486    * fills up before EOF, new_pos will be modified. */
487   new_pos = rip->i_size;
488 
489   r = 0;
490 
491   for(; block_pos < rip->i_size; block_pos += block_size) {
492 	/* Since directories don't have holes, 'bp' cannot be NULL. */
493 	bp = get_block_map(rip, block_pos);	/* get a dir block */
494 	assert(bp != NULL);
495 
496 	/* Search a directory block. */
497 	if (block_pos < pos)
498 		dp = &b_dir(bp)[off / DIR_ENTRY_SIZE];
499 	else
500 		dp = &b_dir(bp)[0];
501 	for (; dp < &b_dir(bp)[NR_DIR_ENTRIES(block_size)]; dp++) {
502 		if (dp->mfs_d_ino == 0)
503 			continue;	/* Entry is not in use */
504 
505 		/* Compute the length of the name */
506 		cp = memchr(dp->mfs_d_name, '\0', sizeof(dp->mfs_d_name));
507 		if (cp == NULL)
508 			len = sizeof(dp->mfs_d_name);
509 		else
510 			len = cp - (dp->mfs_d_name);
511 
512 		/* Need the position of this entry in the directory */
513 		ent_pos = block_pos + ((char *) dp - (char *) bp->data);
514 
515 		/* We also need(?) the file type of the target inode. */
516 		if (!(entrip = get_inode(fs_dev, (ino_t) dp->mfs_d_ino)))
517 			panic("unexpected get_inode failure");
518 		type = IFTODT(entrip->i_mode);
519 		put_inode(entrip);
520 
521 		/* MFS does not store file types in its directory entries, and
522 		 * fetching the mode from the inode is seriously expensive.
523 		 * Userland should always be prepared to receive DT_UNKNOWN.
524 		 */
525 		r = fsdriver_dentry_add(&fsdentry, (ino_t) dp->mfs_d_ino,
526 			dp->mfs_d_name, len, type);
527 
528 		/* If the user buffer is full, or an error occurred, stop. */
529 		if (r <= 0) {
530 			done = TRUE;
531 
532 			/* Record the position of this entry, it is the
533 			 * starting point of the next request (unless the
534 			 * postion is modified with lseek).
535 			 */
536 			new_pos = ent_pos;
537 			break;
538 		}
539 	}
540 
541 	put_block(bp);
542 	if (done)
543 		break;
544   }
545 
546   if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) {
547 	  *posp = new_pos;
548 	  if(!rip->i_sp->s_rd_only) {
549 		  rip->i_update |= ATIME;
550 		  IN_MARKDIRTY(rip);
551 	  }
552   }
553 
554   put_inode(rip);		/* release the inode */
555   return(r);
556 }
557