1 /* This file contains the heart of the mechanism used to read (and write) 2 * files. Read and write requests are split up into chunks that do not cross 3 * block boundaries. Each chunk is then processed in turn. Reads on special 4 * files are also detected and handled. 5 * 6 * The entry points into this file are 7 * do_read: perform the READ system call by calling read_write 8 * do_getdents: read entries from a directory (GETDENTS) 9 * read_write: actually do the work of READ and WRITE 10 * 11 */ 12 13 #include "fs.h" 14 #include <minix/callnr.h> 15 #include <minix/com.h> 16 #include <minix/u64.h> 17 #include <minix/vfsif.h> 18 #include <assert.h> 19 #include <sys/dirent.h> 20 #include <fcntl.h> 21 #include <unistd.h> 22 #include "file.h" 23 #include "vnode.h" 24 #include "vmnt.h" 25 26 27 /*===========================================================================* 28 * do_read * 29 *===========================================================================*/ 30 int do_read(void) 31 { 32 33 /* 34 * This field is currently reserved for internal usage only, and must be set 35 * to zero by the caller. We may use it for future SA_RESTART support just 36 * like we are using it internally now. 37 */ 38 if (job_m_in.m_lc_vfs_readwrite.cum_io != 0) 39 return(EINVAL); 40 41 return(do_read_write_peek(READING, job_m_in.m_lc_vfs_readwrite.fd, 42 job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len)); 43 } 44 45 46 /*===========================================================================* 47 * lock_bsf * 48 *===========================================================================*/ 49 void lock_bsf(void) 50 { 51 struct worker_thread *org_self; 52 53 if (mutex_trylock(&bsf_lock) == 0) 54 return; 55 56 org_self = worker_suspend(); 57 58 if (mutex_lock(&bsf_lock) != 0) 59 panic("unable to lock block special file lock"); 60 61 worker_resume(org_self); 62 } 63 64 /*===========================================================================* 65 * unlock_bsf * 66 *===========================================================================*/ 67 void unlock_bsf(void) 68 { 69 if (mutex_unlock(&bsf_lock) != 0) 70 panic("failed to unlock block special file lock"); 71 } 72 73 /*===========================================================================* 74 * check_bsf * 75 *===========================================================================*/ 76 void check_bsf_lock(void) 77 { 78 int r = mutex_trylock(&bsf_lock); 79 80 if (r == -EBUSY) 81 panic("bsf_lock locked"); 82 else if (r != 0) 83 panic("bsf_lock weird state"); 84 85 /* r == 0 */ 86 unlock_bsf(); 87 } 88 89 /*===========================================================================* 90 * actual_read_write_peek * 91 *===========================================================================*/ 92 int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd, 93 vir_bytes buf, size_t nbytes) 94 { 95 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */ 96 struct filp *f; 97 tll_access_t locktype; 98 int r; 99 int ro = 1; 100 101 if(rw_flag == WRITING) ro = 0; 102 103 locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ; 104 if ((f = get_filp2(rfp, fd, locktype)) == NULL) 105 return(err_code); 106 107 assert(f->filp_count > 0); 108 109 if (((f->filp_mode) & (ro ? R_BIT : W_BIT)) == 0) { 110 unlock_filp(f); 111 return(EBADF); 112 } 113 if (nbytes == 0) { 114 unlock_filp(f); 115 return(0); /* so char special files need not check for 0*/ 116 } 117 118 r = read_write(rfp, rw_flag, fd, f, buf, nbytes, who_e); 119 120 unlock_filp(f); 121 return(r); 122 } 123 124 /*===========================================================================* 125 * do_read_write_peek * 126 *===========================================================================*/ 127 int do_read_write_peek(int rw_flag, int fd, vir_bytes buf, size_t nbytes) 128 { 129 return actual_read_write_peek(fp, rw_flag, fd, buf, nbytes); 130 } 131 132 /*===========================================================================* 133 * read_write * 134 *===========================================================================*/ 135 int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f, 136 vir_bytes buf, size_t size, endpoint_t for_e) 137 { 138 register struct vnode *vp; 139 off_t position, res_pos; 140 size_t cum_io, res_cum_io; 141 size_t cum_io_incr; 142 int op, r; 143 dev_t dev; 144 145 position = f->filp_pos; 146 vp = f->filp_vno; 147 r = OK; 148 cum_io = 0; 149 150 assert(rw_flag == READING || rw_flag == WRITING || rw_flag == PEEKING); 151 152 if (size > SSIZE_MAX) return(EINVAL); 153 154 if (S_ISFIFO(vp->v_mode)) { /* Pipes */ 155 if(rw_flag == PEEKING) { 156 printf("read_write: peek on pipe makes no sense\n"); 157 return EINVAL; 158 } 159 assert(fd != -1); 160 op = (rw_flag == READING ? VFS_READ : VFS_WRITE); 161 r = rw_pipe(rw_flag, for_e, f, op, fd, buf, size, 0 /*cum_io*/); 162 } else if (S_ISCHR(vp->v_mode)) { /* Character special files. */ 163 if(rw_flag == PEEKING) { 164 printf("read_write: peek on char device makes no sense\n"); 165 return EINVAL; 166 } 167 168 if (vp->v_sdev == NO_DEV) 169 panic("VFS: read_write tries to access char dev NO_DEV"); 170 171 dev = vp->v_sdev; 172 op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE); 173 174 r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags); 175 if (r >= 0) { 176 /* This should no longer happen: all calls are asynchronous. */ 177 printf("VFS: I/O to device %llx succeeded immediately!?\n", dev); 178 cum_io = r; 179 position += r; 180 r = OK; 181 } else if (r == SUSPEND) { 182 /* FIXME: multiple read/write operations on a single filp 183 * should be serialized. They currently aren't; in order to 184 * achieve a similar effect, we optimistically advance the file 185 * position here. This works under the following assumptions: 186 * - character drivers that use the seek position at all, 187 * expose a view of a statically-sized range of bytes, i.e., 188 * they are basically byte-granular block devices; 189 * - if short I/O or an error is returned, all subsequent calls 190 * will return (respectively) EOF and an error; 191 * - the application never checks its own file seek position, 192 * or does not care that it may end up having seeked beyond 193 * the number of bytes it has actually read; 194 * - communication to the character driver is FIFO (this one 195 * is actually true! whew). 196 * Many improvements are possible here, but in the end, 197 * anything short of queuing concurrent operations will be 198 * suboptimal - so we settle for this hack for now. 199 */ 200 position += size; 201 } 202 } else if (S_ISSOCK(vp->v_mode)) { 203 if (rw_flag == PEEKING) { 204 printf("VFS: read_write tries to peek on sock dev\n"); 205 return EINVAL; 206 } 207 208 if (vp->v_sdev == NO_DEV) 209 panic("VFS: read_write tries to access sock dev NO_DEV"); 210 211 r = sdev_readwrite(vp->v_sdev, buf, size, 0, 0, 0, 0, 0, rw_flag, 212 f->filp_flags, 0); 213 } else if (S_ISBLK(vp->v_mode)) { /* Block special files. */ 214 if (vp->v_sdev == NO_DEV) 215 panic("VFS: read_write tries to access block dev NO_DEV"); 216 217 lock_bsf(); 218 219 if(rw_flag == PEEKING) { 220 r = req_bpeek(vp->v_bfs_e, vp->v_sdev, position, size); 221 } else { 222 r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, position, 223 size, buf, rw_flag, &res_pos, &res_cum_io); 224 if (r == OK) { 225 position = res_pos; 226 cum_io += res_cum_io; 227 } 228 } 229 230 unlock_bsf(); 231 } else { /* Regular files */ 232 if (rw_flag == WRITING) { 233 /* Check for O_APPEND flag. */ 234 if (f->filp_flags & O_APPEND) position = vp->v_size; 235 } 236 237 /* Issue request */ 238 if(rw_flag == PEEKING) { 239 r = req_peek(vp->v_fs_e, vp->v_inode_nr, position, size); 240 } else { 241 off_t new_pos; 242 r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, 243 rw_flag, for_e, buf, size, &new_pos, 244 &cum_io_incr); 245 246 if (r >= 0) { 247 position = new_pos; 248 cum_io += cum_io_incr; 249 } 250 } 251 } 252 253 /* On write, update file size and access time. */ 254 if (rw_flag == WRITING) { 255 if (S_ISREG(vp->v_mode) || S_ISDIR(vp->v_mode)) { 256 if (position > vp->v_size) { 257 vp->v_size = position; 258 } 259 } 260 } 261 262 f->filp_pos = position; 263 264 if (r == EPIPE && rw_flag == WRITING) { 265 /* Process is writing, but there is no reader. Tell the kernel to 266 * generate a SIGPIPE signal. 267 */ 268 if (!(f->filp_flags & O_NOSIGPIPE)) { 269 sys_kill(rfp->fp_endpoint, SIGPIPE); 270 } 271 } 272 273 if (r == OK) { 274 return(cum_io); 275 } 276 return(r); 277 } 278 279 /*===========================================================================* 280 * do_getdents * 281 *===========================================================================*/ 282 int do_getdents(void) 283 { 284 /* Perform the getdents(fd, buf, size) system call. */ 285 int fd, r = OK; 286 off_t new_pos; 287 vir_bytes buf; 288 size_t size; 289 register struct filp *rfilp; 290 291 /* This field must always be set to zero for getdents(). */ 292 if (job_m_in.m_lc_vfs_readwrite.cum_io != 0) 293 return(EINVAL); 294 295 fd = job_m_in.m_lc_vfs_readwrite.fd; 296 buf = job_m_in.m_lc_vfs_readwrite.buf; 297 size = job_m_in.m_lc_vfs_readwrite.len; 298 299 /* Is the file descriptor valid? */ 300 if ( (rfilp = get_filp(fd, VNODE_READ)) == NULL) 301 return(err_code); 302 303 if (!(rfilp->filp_mode & R_BIT)) 304 r = EBADF; 305 else if (!S_ISDIR(rfilp->filp_vno->v_mode)) 306 r = EBADF; 307 308 if (r == OK) { 309 r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr, 310 rfilp->filp_pos, buf, size, &new_pos, 0); 311 312 if (r > 0) rfilp->filp_pos = new_pos; 313 } 314 315 unlock_filp(rfilp); 316 return(r); 317 } 318 319 320 /*===========================================================================* 321 * rw_pipe * 322 *===========================================================================*/ 323 int rw_pipe(int rw_flag, endpoint_t usr_e, struct filp *f, int callnr, int fd, 324 vir_bytes buf, size_t nbytes, size_t cum_io) 325 { 326 int r, oflags, partial_pipe = FALSE; 327 size_t size; 328 size_t cum_io_incr; 329 struct vnode *vp; 330 off_t position, new_pos; 331 332 /* Must make sure we're operating on locked filp and vnode */ 333 assert(tll_locked_by_me(&f->filp_vno->v_lock)); 334 assert(mutex_trylock(&f->filp_lock) == -EDEADLK); 335 336 oflags = f->filp_flags; 337 vp = f->filp_vno; 338 position = 0; /* Not actually used */ 339 340 assert(rw_flag == READING || rw_flag == WRITING); 341 342 r = pipe_check(f, rw_flag, oflags, nbytes, 0); 343 if (r <= 0) { 344 if (r == SUSPEND) 345 pipe_suspend(callnr, fd, buf, nbytes, cum_io); 346 347 /* If pipe_check returns an error instead of suspending the call, we 348 * return that error, even if we are resuming a partially completed 349 * operation (ie, a large blocking write), to match NetBSD's behavior. 350 */ 351 return(r); 352 } 353 354 size = r; 355 if (size < nbytes) partial_pipe = TRUE; 356 357 /* Truncate read request at size. */ 358 if (rw_flag == READING && size > vp->v_size) { 359 size = vp->v_size; 360 } 361 362 if (vp->v_mapfs_e == 0) 363 panic("unmapped pipe"); 364 365 r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e, 366 buf, size, &new_pos, &cum_io_incr); 367 368 if (r != OK) { 369 assert(r != SUSPEND); 370 return(r); 371 } 372 373 cum_io += cum_io_incr; 374 buf += cum_io_incr; 375 nbytes -= cum_io_incr; 376 377 if (rw_flag == READING) 378 vp->v_size -= cum_io_incr; 379 else 380 vp->v_size += cum_io_incr; 381 382 if (partial_pipe) { 383 /* partial write on pipe with */ 384 /* O_NONBLOCK, return write count */ 385 if (!(oflags & O_NONBLOCK)) { 386 /* partial write on pipe with nbytes > PIPE_BUF, non-atomic */ 387 pipe_suspend(callnr, fd, buf, nbytes, cum_io); 388 return(SUSPEND); 389 } 390 } 391 392 return(cum_io); 393 } 394