xref: /minix/minix/servers/vfs/read.c (revision 90b80121)
1 /* This file contains the heart of the mechanism used to read (and write)
2  * files.  Read and write requests are split up into chunks that do not cross
3  * block boundaries.  Each chunk is then processed in turn.  Reads on special
4  * files are also detected and handled.
5  *
6  * The entry points into this file are
7  *   do_read:	 perform the READ system call by calling read_write
8  *   do_getdents: read entries from a directory (GETDENTS)
9  *   read_write: actually do the work of READ and WRITE
10  *
11  */
12 
13 #include "fs.h"
14 #include <minix/callnr.h>
15 #include <minix/com.h>
16 #include <minix/u64.h>
17 #include <minix/vfsif.h>
18 #include <assert.h>
19 #include <sys/dirent.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include "file.h"
23 #include "vnode.h"
24 #include "vmnt.h"
25 
26 
27 /*===========================================================================*
28  *				do_read					     *
29  *===========================================================================*/
30 int do_read(void)
31 {
32 
33   /*
34    * This field is currently reserved for internal usage only, and must be set
35    * to zero by the caller.  We may use it for future SA_RESTART support just
36    * like we are using it internally now.
37    */
38   if (job_m_in.m_lc_vfs_readwrite.cum_io != 0)
39 	return(EINVAL);
40 
41   return(do_read_write_peek(READING, job_m_in.m_lc_vfs_readwrite.fd,
42 	job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len));
43 }
44 
45 
46 /*===========================================================================*
47  *				lock_bsf				     *
48  *===========================================================================*/
49 void lock_bsf(void)
50 {
51   struct worker_thread *org_self;
52 
53   if (mutex_trylock(&bsf_lock) == 0)
54 	return;
55 
56   org_self = worker_suspend();
57 
58   if (mutex_lock(&bsf_lock) != 0)
59 	panic("unable to lock block special file lock");
60 
61   worker_resume(org_self);
62 }
63 
64 /*===========================================================================*
65  *				unlock_bsf				     *
66  *===========================================================================*/
67 void unlock_bsf(void)
68 {
69   if (mutex_unlock(&bsf_lock) != 0)
70 	panic("failed to unlock block special file lock");
71 }
72 
73 /*===========================================================================*
74  *				check_bsf				     *
75  *===========================================================================*/
76 void check_bsf_lock(void)
77 {
78 	int r = mutex_trylock(&bsf_lock);
79 
80 	if (r == -EBUSY)
81 		panic("bsf_lock locked");
82 	else if (r != 0)
83 		panic("bsf_lock weird state");
84 
85 	/* r == 0 */
86 	unlock_bsf();
87 }
88 
89 /*===========================================================================*
90  *				actual_read_write_peek			     *
91  *===========================================================================*/
92 int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd,
93 	vir_bytes buf, size_t nbytes)
94 {
95 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
96   struct filp *f;
97   tll_access_t locktype;
98   int r;
99   int ro = 1;
100 
101   if(rw_flag == WRITING) ro = 0;
102 
103   locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ;
104   if ((f = get_filp2(rfp, fd, locktype)) == NULL)
105 	return(err_code);
106 
107   assert(f->filp_count > 0);
108 
109   if (((f->filp_mode) & (ro ? R_BIT : W_BIT)) == 0) {
110 	unlock_filp(f);
111 	return(EBADF);
112   }
113   if (nbytes == 0) {
114 	unlock_filp(f);
115 	return(0);	/* so char special files need not check for 0*/
116   }
117 
118   r = read_write(rfp, rw_flag, fd, f, buf, nbytes, who_e);
119 
120   unlock_filp(f);
121   return(r);
122 }
123 
124 /*===========================================================================*
125  *				do_read_write_peek			     *
126  *===========================================================================*/
127 int do_read_write_peek(int rw_flag, int fd, vir_bytes buf, size_t nbytes)
128 {
129 	return actual_read_write_peek(fp, rw_flag, fd, buf, nbytes);
130 }
131 
132 /*===========================================================================*
133  *				read_write				     *
134  *===========================================================================*/
135 int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f,
136 	vir_bytes buf, size_t size, endpoint_t for_e)
137 {
138   register struct vnode *vp;
139   off_t position, res_pos;
140   size_t cum_io, res_cum_io;
141   size_t cum_io_incr;
142   int op, r;
143   dev_t dev;
144 
145   position = f->filp_pos;
146   vp = f->filp_vno;
147   r = OK;
148   cum_io = 0;
149 
150   assert(rw_flag == READING || rw_flag == WRITING || rw_flag == PEEKING);
151 
152   if (size > SSIZE_MAX) return(EINVAL);
153 
154   if (S_ISFIFO(vp->v_mode)) {		/* Pipes */
155 	if(rw_flag == PEEKING) {
156 	  	printf("read_write: peek on pipe makes no sense\n");
157 		return EINVAL;
158 	}
159 	assert(fd != -1);
160 	op = (rw_flag == READING ? VFS_READ : VFS_WRITE);
161 	r = rw_pipe(rw_flag, for_e, f, op, fd, buf, size, 0 /*cum_io*/);
162   } else if (S_ISCHR(vp->v_mode)) {	/* Character special files. */
163 	if(rw_flag == PEEKING) {
164 	  	printf("read_write: peek on char device makes no sense\n");
165 		return EINVAL;
166 	}
167 
168 	if (vp->v_sdev == NO_DEV)
169 		panic("VFS: read_write tries to access char dev NO_DEV");
170 
171 	dev = vp->v_sdev;
172 	op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE);
173 
174 	r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags);
175 	if (r >= 0) {
176 		/* This should no longer happen: all calls are asynchronous. */
177 		printf("VFS: I/O to device %llx succeeded immediately!?\n", dev);
178 		cum_io = r;
179 		position += r;
180 		r = OK;
181 	} else if (r == SUSPEND) {
182 		/* FIXME: multiple read/write operations on a single filp
183 		 * should be serialized. They currently aren't; in order to
184 		 * achieve a similar effect, we optimistically advance the file
185 		 * position here. This works under the following assumptions:
186 		 * - character drivers that use the seek position at all,
187 		 *   expose a view of a statically-sized range of bytes, i.e.,
188 		 *   they are basically byte-granular block devices;
189 		 * - if short I/O or an error is returned, all subsequent calls
190 		 *   will return (respectively) EOF and an error;
191 		 * - the application never checks its own file seek position,
192 		 *   or does not care that it may end up having seeked beyond
193 		 *   the number of bytes it has actually read;
194 		 * - communication to the character driver is FIFO (this one
195 		 *   is actually true! whew).
196 		 * Many improvements are possible here, but in the end,
197 		 * anything short of queuing concurrent operations will be
198 		 * suboptimal - so we settle for this hack for now.
199 		 */
200 		position += size;
201 	}
202   } else if (S_ISSOCK(vp->v_mode)) {
203 	if (rw_flag == PEEKING) {
204 		printf("VFS: read_write tries to peek on sock dev\n");
205 		return EINVAL;
206 	}
207 
208 	if (vp->v_sdev == NO_DEV)
209 		panic("VFS: read_write tries to access sock dev NO_DEV");
210 
211 	r = sdev_readwrite(vp->v_sdev, buf, size, 0, 0, 0, 0, 0, rw_flag,
212 	    f->filp_flags, 0);
213   } else if (S_ISBLK(vp->v_mode)) {	/* Block special files. */
214 	if (vp->v_sdev == NO_DEV)
215 		panic("VFS: read_write tries to access block dev NO_DEV");
216 
217 	lock_bsf();
218 
219 	if(rw_flag == PEEKING) {
220 		r = req_bpeek(vp->v_bfs_e, vp->v_sdev, position, size);
221 	} else {
222 		r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, position,
223 		       size, buf, rw_flag, &res_pos, &res_cum_io);
224 		if (r == OK) {
225 			position = res_pos;
226 			cum_io += res_cum_io;
227 		}
228 	}
229 
230 	unlock_bsf();
231   } else {				/* Regular files */
232 	if (rw_flag == WRITING) {
233 		/* Check for O_APPEND flag. */
234 		if (f->filp_flags & O_APPEND) position = vp->v_size;
235 	}
236 
237 	/* Issue request */
238 	if(rw_flag == PEEKING) {
239 		r = req_peek(vp->v_fs_e, vp->v_inode_nr, position, size);
240 	} else {
241 		off_t new_pos;
242 		r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position,
243 			rw_flag, for_e, buf, size, &new_pos,
244 			&cum_io_incr);
245 
246 		if (r >= 0) {
247 			position = new_pos;
248 			cum_io += cum_io_incr;
249 		}
250         }
251   }
252 
253   /* On write, update file size and access time. */
254   if (rw_flag == WRITING) {
255 	if (S_ISREG(vp->v_mode) || S_ISDIR(vp->v_mode)) {
256 		if (position > vp->v_size) {
257 			vp->v_size = position;
258 		}
259 	}
260   }
261 
262   f->filp_pos = position;
263 
264   if (r == EPIPE && rw_flag == WRITING) {
265 	/* Process is writing, but there is no reader. Tell the kernel to
266 	 * generate a SIGPIPE signal.
267 	 */
268 	if (!(f->filp_flags & O_NOSIGPIPE)) {
269 		sys_kill(rfp->fp_endpoint, SIGPIPE);
270 	}
271   }
272 
273   if (r == OK) {
274 	return(cum_io);
275   }
276   return(r);
277 }
278 
279 /*===========================================================================*
280  *				do_getdents				     *
281  *===========================================================================*/
282 int do_getdents(void)
283 {
284 /* Perform the getdents(fd, buf, size) system call. */
285   int fd, r = OK;
286   off_t new_pos;
287   vir_bytes buf;
288   size_t size;
289   register struct filp *rfilp;
290 
291   /* This field must always be set to zero for getdents(). */
292   if (job_m_in.m_lc_vfs_readwrite.cum_io != 0)
293 	return(EINVAL);
294 
295   fd = job_m_in.m_lc_vfs_readwrite.fd;
296   buf = job_m_in.m_lc_vfs_readwrite.buf;
297   size = job_m_in.m_lc_vfs_readwrite.len;
298 
299   /* Is the file descriptor valid? */
300   if ( (rfilp = get_filp(fd, VNODE_READ)) == NULL)
301 	return(err_code);
302 
303   if (!(rfilp->filp_mode & R_BIT))
304 	r = EBADF;
305   else if (!S_ISDIR(rfilp->filp_vno->v_mode))
306 	r = EBADF;
307 
308   if (r == OK) {
309 	r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
310 	    rfilp->filp_pos, buf, size, &new_pos, 0);
311 
312 	if (r > 0) rfilp->filp_pos = new_pos;
313   }
314 
315   unlock_filp(rfilp);
316   return(r);
317 }
318 
319 
320 /*===========================================================================*
321  *				rw_pipe					     *
322  *===========================================================================*/
323 int rw_pipe(int rw_flag, endpoint_t usr_e, struct filp *f, int callnr, int fd,
324 	vir_bytes buf, size_t nbytes, size_t cum_io)
325 {
326   int r, oflags, partial_pipe = FALSE;
327   size_t size;
328   size_t cum_io_incr;
329   struct vnode *vp;
330   off_t  position, new_pos;
331 
332   /* Must make sure we're operating on locked filp and vnode */
333   assert(tll_locked_by_me(&f->filp_vno->v_lock));
334   assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
335 
336   oflags = f->filp_flags;
337   vp = f->filp_vno;
338   position = 0;	/* Not actually used */
339 
340   assert(rw_flag == READING || rw_flag == WRITING);
341 
342   r = pipe_check(f, rw_flag, oflags, nbytes, 0);
343   if (r <= 0) {
344 	if (r == SUSPEND)
345 		pipe_suspend(callnr, fd, buf, nbytes, cum_io);
346 
347 	/* If pipe_check returns an error instead of suspending the call, we
348 	 * return that error, even if we are resuming a partially completed
349 	 * operation (ie, a large blocking write), to match NetBSD's behavior.
350 	 */
351 	return(r);
352   }
353 
354   size = r;
355   if (size < nbytes) partial_pipe = TRUE;
356 
357   /* Truncate read request at size. */
358   if (rw_flag == READING && size > vp->v_size) {
359 	size = vp->v_size;
360   }
361 
362   if (vp->v_mapfs_e == 0)
363 	panic("unmapped pipe");
364 
365   r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e,
366 		    buf, size, &new_pos, &cum_io_incr);
367 
368   if (r != OK) {
369 	assert(r != SUSPEND);
370 	return(r);
371   }
372 
373   cum_io += cum_io_incr;
374   buf += cum_io_incr;
375   nbytes -= cum_io_incr;
376 
377   if (rw_flag == READING)
378 	vp->v_size -= cum_io_incr;
379   else
380 	vp->v_size += cum_io_incr;
381 
382   if (partial_pipe) {
383 	/* partial write on pipe with */
384 	/* O_NONBLOCK, return write count */
385 	if (!(oflags & O_NONBLOCK)) {
386 		/* partial write on pipe with nbytes > PIPE_BUF, non-atomic */
387 		pipe_suspend(callnr, fd, buf, nbytes, cum_io);
388 		return(SUSPEND);
389 	}
390   }
391 
392   return(cum_io);
393 }
394