1 /* $OpenBSD: ffs_vnops.c,v 1.102 2024/02/03 18:51:58 beck Exp $ */
2 /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)ffs_vnops.c 8.10 (Berkeley) 8/10/94
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/resourcevar.h>
38 #include <sys/kernel.h>
39 #include <sys/stat.h>
40 #include <sys/buf.h>
41 #include <sys/mount.h>
42 #include <sys/vnode.h>
43 #include <sys/malloc.h>
44 #include <sys/signalvar.h>
45 #include <sys/pool.h>
46 #include <sys/event.h>
47 #include <sys/specdev.h>
48
49 #include <miscfs/fifofs/fifo.h>
50
51 #include <ufs/ufs/quota.h>
52 #include <ufs/ufs/inode.h>
53 #include <ufs/ufs/dir.h>
54 #include <ufs/ufs/ufs_extern.h>
55 #include <ufs/ufs/ufsmount.h>
56
57 #include <ufs/ffs/fs.h>
58 #include <ufs/ffs/ffs_extern.h>
59
60 const struct vops ffs_vops = {
61 .vop_lookup = ufs_lookup,
62 .vop_create = ufs_create,
63 .vop_mknod = ufs_mknod,
64 .vop_open = ufs_open,
65 .vop_close = ufs_close,
66 .vop_access = ufs_access,
67 .vop_getattr = ufs_getattr,
68 .vop_setattr = ufs_setattr,
69 .vop_read = ffs_read,
70 .vop_write = ffs_write,
71 .vop_ioctl = ufs_ioctl,
72 .vop_kqfilter = ufs_kqfilter,
73 .vop_revoke = vop_generic_revoke,
74 .vop_fsync = ffs_fsync,
75 .vop_remove = ufs_remove,
76 .vop_link = ufs_link,
77 .vop_rename = ufs_rename,
78 .vop_mkdir = ufs_mkdir,
79 .vop_rmdir = ufs_rmdir,
80 .vop_symlink = ufs_symlink,
81 .vop_readdir = ufs_readdir,
82 .vop_readlink = ufs_readlink,
83 .vop_abortop = vop_generic_abortop,
84 .vop_inactive = ufs_inactive,
85 .vop_reclaim = ffs_reclaim,
86 .vop_lock = ufs_lock,
87 .vop_unlock = ufs_unlock,
88 .vop_bmap = ufs_bmap,
89 .vop_strategy = ufs_strategy,
90 .vop_print = ufs_print,
91 .vop_islocked = ufs_islocked,
92 .vop_pathconf = ufs_pathconf,
93 .vop_advlock = ufs_advlock,
94 .vop_bwrite = vop_generic_bwrite
95 };
96
97 const struct vops ffs_specvops = {
98 .vop_close = ufsspec_close,
99 .vop_access = ufs_access,
100 .vop_getattr = ufs_getattr,
101 .vop_setattr = ufs_setattr,
102 .vop_read = ufsspec_read,
103 .vop_write = ufsspec_write,
104 .vop_fsync = ffs_fsync,
105 .vop_inactive = ufs_inactive,
106 .vop_reclaim = ffs_reclaim,
107 .vop_lock = ufs_lock,
108 .vop_unlock = ufs_unlock,
109 .vop_print = ufs_print,
110 .vop_islocked = ufs_islocked,
111
112 /* XXX: Keep in sync with spec_vops */
113 .vop_lookup = vop_generic_lookup,
114 .vop_create = vop_generic_badop,
115 .vop_mknod = vop_generic_badop,
116 .vop_open = spec_open,
117 .vop_ioctl = spec_ioctl,
118 .vop_kqfilter = spec_kqfilter,
119 .vop_revoke = vop_generic_revoke,
120 .vop_remove = vop_generic_badop,
121 .vop_link = vop_generic_badop,
122 .vop_rename = vop_generic_badop,
123 .vop_mkdir = vop_generic_badop,
124 .vop_rmdir = vop_generic_badop,
125 .vop_symlink = vop_generic_badop,
126 .vop_readdir = vop_generic_badop,
127 .vop_readlink = vop_generic_badop,
128 .vop_abortop = vop_generic_badop,
129 .vop_bmap = vop_generic_bmap,
130 .vop_strategy = spec_strategy,
131 .vop_pathconf = spec_pathconf,
132 .vop_advlock = spec_advlock,
133 .vop_bwrite = vop_generic_bwrite,
134 };
135
136 #ifdef FIFO
137 const struct vops ffs_fifovops = {
138 .vop_close = ufsfifo_close,
139 .vop_access = ufs_access,
140 .vop_getattr = ufs_getattr,
141 .vop_setattr = ufs_setattr,
142 .vop_read = ufsfifo_read,
143 .vop_write = ufsfifo_write,
144 .vop_fsync = ffs_fsync,
145 .vop_inactive = ufs_inactive,
146 .vop_reclaim = ffsfifo_reclaim,
147 .vop_lock = ufs_lock,
148 .vop_unlock = ufs_unlock,
149 .vop_print = ufs_print,
150 .vop_islocked = ufs_islocked,
151 .vop_bwrite = vop_generic_bwrite,
152
153 /* XXX: Keep in sync with fifo_vops */
154 .vop_lookup = vop_generic_lookup,
155 .vop_create = vop_generic_badop,
156 .vop_mknod = vop_generic_badop,
157 .vop_open = fifo_open,
158 .vop_ioctl = fifo_ioctl,
159 .vop_kqfilter = fifo_kqfilter,
160 .vop_revoke = vop_generic_revoke,
161 .vop_remove = vop_generic_badop,
162 .vop_link = vop_generic_badop,
163 .vop_rename = vop_generic_badop,
164 .vop_mkdir = vop_generic_badop,
165 .vop_rmdir = vop_generic_badop,
166 .vop_symlink = vop_generic_badop,
167 .vop_readdir = vop_generic_badop,
168 .vop_readlink = vop_generic_badop,
169 .vop_abortop = vop_generic_badop,
170 .vop_bmap = vop_generic_bmap,
171 .vop_strategy = vop_generic_badop,
172 .vop_pathconf = fifo_pathconf,
173 .vop_advlock = fifo_advlock
174 };
175 #endif /* FIFO */
176
177 /*
178 * Vnode op for reading.
179 */
180 int
ffs_read(void * v)181 ffs_read(void *v)
182 {
183 struct vop_read_args *ap = v;
184 struct vnode *vp;
185 struct inode *ip;
186 struct uio *uio;
187 struct fs *fs;
188 struct buf *bp;
189 daddr_t lbn, nextlbn;
190 off_t bytesinfile;
191 int size, xfersize, blkoffset;
192 mode_t mode;
193 int error;
194
195 vp = ap->a_vp;
196 ip = VTOI(vp);
197 mode = DIP(ip, mode);
198 uio = ap->a_uio;
199
200 #ifdef DIAGNOSTIC
201 if (uio->uio_rw != UIO_READ)
202 panic("ffs_read: mode");
203
204 if (vp->v_type == VLNK) {
205 if (DIP(ip, size) < ip->i_ump->um_maxsymlinklen)
206 panic("ffs_read: short symlink");
207 } else if (vp->v_type != VREG && vp->v_type != VDIR)
208 panic("ffs_read: type %d", vp->v_type);
209 #endif
210 fs = ip->i_fs;
211 if (uio->uio_offset < 0)
212 return (EINVAL);
213 if (uio->uio_resid == 0)
214 return (0);
215
216 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
217 if ((bytesinfile = DIP(ip, size) - uio->uio_offset) <= 0)
218 break;
219 lbn = lblkno(fs, uio->uio_offset);
220 nextlbn = lbn + 1;
221 size = fs->fs_bsize; /* WAS blksize(fs, ip, lbn); */
222 blkoffset = blkoff(fs, uio->uio_offset);
223 xfersize = fs->fs_bsize - blkoffset;
224 if (uio->uio_resid < xfersize)
225 xfersize = uio->uio_resid;
226 if (bytesinfile < xfersize)
227 xfersize = bytesinfile;
228
229 if (lblktosize(fs, nextlbn) >= DIP(ip, size))
230 error = bread(vp, lbn, size, &bp);
231 else if (lbn - 1 == ip->i_ci.ci_lastr ||
232 uio->uio_resid > xfersize) {
233 error = bread_cluster(vp, lbn, size, &bp);
234 } else
235 error = bread(vp, lbn, size, &bp);
236
237 if (error)
238 break;
239 ip->i_ci.ci_lastr = lbn;
240
241 /*
242 * We should only get non-zero b_resid when an I/O error
243 * has occurred, which should cause us to break above.
244 * However, if the short read did not cause an error,
245 * then we want to ensure that we do not uiomove bad
246 * or uninitialized data.
247 */
248 size -= bp->b_resid;
249 if (size < xfersize) {
250 if (size == 0)
251 break;
252 xfersize = size;
253 }
254 error = uiomove(bp->b_data + blkoffset, xfersize, uio);
255 if (error)
256 break;
257 brelse(bp);
258 }
259 if (bp != NULL)
260 brelse(bp);
261 if (!(vp->v_mount->mnt_flag & MNT_NOATIME) ||
262 (ip->i_flag & (IN_CHANGE | IN_UPDATE))) {
263 ip->i_flag |= IN_ACCESS;
264 }
265 return (error);
266 }
267
268 /*
269 * Vnode op for writing.
270 */
271 int
ffs_write(void * v)272 ffs_write(void *v)
273 {
274 struct vop_write_args *ap = v;
275 struct vnode *vp;
276 struct uio *uio;
277 struct inode *ip;
278 struct fs *fs;
279 struct buf *bp;
280 daddr_t lbn;
281 off_t osize;
282 int blkoffset, error, extended, flags, ioflag, size, xfersize;
283 size_t resid;
284 ssize_t overrun;
285
286 extended = 0;
287 ioflag = ap->a_ioflag;
288 uio = ap->a_uio;
289 vp = ap->a_vp;
290 ip = VTOI(vp);
291
292 #ifdef DIAGNOSTIC
293 if (uio->uio_rw != UIO_WRITE)
294 panic("ffs_write: mode");
295 #endif
296
297 /*
298 * If writing 0 bytes, succeed and do not change
299 * update time or file offset (standards compliance)
300 */
301 if (uio->uio_resid == 0)
302 return (0);
303
304 switch (vp->v_type) {
305 case VREG:
306 if (ioflag & IO_APPEND)
307 uio->uio_offset = DIP(ip, size);
308 if ((DIP(ip, flags) & APPEND) && uio->uio_offset != DIP(ip, size))
309 return (EPERM);
310 /* FALLTHROUGH */
311 case VLNK:
312 break;
313 case VDIR:
314 if ((ioflag & IO_SYNC) == 0)
315 panic("ffs_write: nonsync dir write");
316 break;
317 default:
318 panic("ffs_write: type %d", vp->v_type);
319 }
320
321 fs = ip->i_fs;
322 if (uio->uio_offset < 0 ||
323 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
324 return (EFBIG);
325
326 /* do the filesize rlimit check */
327 if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
328 return (error);
329
330 resid = uio->uio_resid;
331 osize = DIP(ip, size);
332 flags = ioflag & IO_SYNC ? B_SYNC : 0;
333
334 for (error = 0; uio->uio_resid > 0;) {
335 lbn = lblkno(fs, uio->uio_offset);
336 blkoffset = blkoff(fs, uio->uio_offset);
337 xfersize = fs->fs_bsize - blkoffset;
338 if (uio->uio_resid < xfersize)
339 xfersize = uio->uio_resid;
340 if (fs->fs_bsize > xfersize)
341 flags |= B_CLRBUF;
342 else
343 flags &= ~B_CLRBUF;
344
345 if ((error = UFS_BUF_ALLOC(ip, uio->uio_offset, xfersize,
346 ap->a_cred, flags, &bp)) != 0)
347 break;
348 if (uio->uio_offset + xfersize > DIP(ip, size)) {
349 DIP_ASSIGN(ip, size, uio->uio_offset + xfersize);
350 uvm_vnp_setsize(vp, DIP(ip, size));
351 extended = 1;
352 }
353 (void)uvm_vnp_uncache(vp);
354
355 size = blksize(fs, ip, lbn) - bp->b_resid;
356 if (size < xfersize)
357 xfersize = size;
358
359 error = uiomove(bp->b_data + blkoffset, xfersize, uio);
360 /*
361 * If the buffer is not already filled and we encounter an
362 * error while trying to fill it, we have to clear out any
363 * garbage data from the pages instantiated for the buffer.
364 * If we do not, a failed uiomove() during a write can leave
365 * the prior contents of the pages exposed to a userland mmap.
366 *
367 * Note that we don't need to clear buffers that were
368 * allocated with the B_CLRBUF flag set.
369 */
370 if (error != 0 && !(flags & B_CLRBUF))
371 memset(bp->b_data + blkoffset, 0, xfersize);
372
373 if (ioflag & IO_NOCACHE)
374 bp->b_flags |= B_NOCACHE;
375
376 if (ioflag & IO_SYNC)
377 (void)bwrite(bp);
378 else if (xfersize + blkoffset == fs->fs_bsize) {
379 bawrite(bp);
380 } else
381 bdwrite(bp);
382
383 if (error || xfersize == 0)
384 break;
385 ip->i_flag |= IN_CHANGE | IN_UPDATE;
386 }
387 /*
388 * If we successfully wrote any data, and we are not the superuser
389 * we clear the setuid and setgid bits as a precaution against
390 * tampering.
391 */
392 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0 &&
393 !vnoperm(vp))
394 DIP_ASSIGN(ip, mode, DIP(ip, mode) & ~(ISUID | ISGID));
395 if (resid > uio->uio_resid)
396 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
397 if (error) {
398 if (ioflag & IO_UNIT) {
399 (void)UFS_TRUNCATE(ip, osize,
400 ioflag & IO_SYNC, ap->a_cred);
401 uio->uio_offset -= resid - uio->uio_resid;
402 uio->uio_resid = resid;
403 }
404 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
405 error = UFS_UPDATE(ip, 1);
406 }
407 /* correct the result for writes clamped by vn_fsizechk() */
408 uio->uio_resid += overrun;
409 return (error);
410 }
411
412 /*
413 * Synch an open file.
414 */
415 int
ffs_fsync(void * v)416 ffs_fsync(void *v)
417 {
418 struct vop_fsync_args *ap = v;
419 struct vnode *vp = ap->a_vp;
420 struct buf *bp, *nbp;
421 int s, error, passes, skipmeta;
422
423 /*
424 * Flush all dirty buffers associated with a vnode.
425 */
426 passes = NIADDR + 1;
427 skipmeta = 0;
428 if (ap->a_waitfor == MNT_WAIT)
429 skipmeta = 1;
430 s = splbio();
431 loop:
432 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
433 bp->b_flags &= ~B_SCANNED;
434 }
435 LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) {
436 /*
437 * Reasons to skip this buffer: it has already been considered
438 * on this pass, this pass is the first time through on a
439 * synchronous flush request and the buffer being considered
440 * is metadata, the buffer has dependencies that will cause
441 * it to be redirtied and it has not already been deferred,
442 * or it is already being written.
443 */
444 if (bp->b_flags & (B_BUSY | B_SCANNED))
445 continue;
446 if ((bp->b_flags & B_DELWRI) == 0)
447 panic("ffs_fsync: not dirty");
448 if (skipmeta && bp->b_lblkno < 0)
449 continue;
450
451 bremfree(bp);
452 buf_acquire(bp);
453 bp->b_flags |= B_SCANNED;
454 splx(s);
455 /*
456 * On our final pass through, do all I/O synchronously
457 * so that we can find out if our flush is failing
458 * because of write errors.
459 */
460 if (passes > 0 || ap->a_waitfor != MNT_WAIT)
461 (void) bawrite(bp);
462 else if ((error = bwrite(bp)) != 0)
463 return (error);
464 s = splbio();
465 /*
466 * Since we may have slept during the I/O, we need
467 * to start from a known point.
468 */
469 nbp = LIST_FIRST(&vp->v_dirtyblkhd);
470 }
471 if (skipmeta) {
472 skipmeta = 0;
473 goto loop;
474 }
475 if (ap->a_waitfor == MNT_WAIT) {
476 vwaitforio(vp, 0, "ffs_fsync", INFSLP);
477
478 /*
479 * Ensure that any filesystem metadata associated
480 * with the vnode has been written.
481 */
482 splx(s);
483 /* XXX softdep was here. reconsider this locking dance */
484 s = splbio();
485 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
486 /*
487 * Block devices associated with filesystems may
488 * have new I/O requests posted for them even if
489 * the vnode is locked, so no amount of trying will
490 * get them clean. Thus we give block devices a
491 * good effort, then just give up. For all other file
492 * types, go around and try again until it is clean.
493 */
494 if (passes > 0) {
495 passes -= 1;
496 goto loop;
497 }
498 #ifdef DIAGNOSTIC
499 if (vp->v_type != VBLK)
500 vprint("ffs_fsync: dirty", vp);
501 #endif
502 }
503 }
504 splx(s);
505 return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
506 }
507
508 /*
509 * Reclaim an inode so that it can be used for other purposes.
510 */
511 int
ffs_reclaim(void * v)512 ffs_reclaim(void *v)
513 {
514 struct vop_reclaim_args *ap = v;
515 struct vnode *vp = ap->a_vp;
516 struct inode *ip = VTOI(vp);
517 int error;
518
519 if ((error = ufs_reclaim(vp)) != 0)
520 return (error);
521
522 if (ip->i_din1 != NULL) {
523 #ifdef FFS2
524 if (ip->i_ump->um_fstype == UM_UFS2)
525 pool_put(&ffs_dinode2_pool, ip->i_din2);
526 else
527 #endif
528 pool_put(&ffs_dinode1_pool, ip->i_din1);
529 }
530
531 pool_put(&ffs_ino_pool, ip);
532
533 vp->v_data = NULL;
534
535 return (0);
536 }
537
538 #ifdef FIFO
539 int
ffsfifo_reclaim(void * v)540 ffsfifo_reclaim(void *v)
541 {
542 fifo_reclaim(v);
543 return (ffs_reclaim(v));
544 }
545 #endif
546