1 /* $NetBSD: vfs_subr.c,v 1.500 2023/04/30 08:46:11 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008, 2019, 2020
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center, by Charles M. Hannum, by Andrew Doran,
11 * by Marshall Kirk McKusick and Greg Ganger at the University of Michigan.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*
36 * Copyright (c) 1989, 1993
37 * The Regents of the University of California. All rights reserved.
38 * (c) UNIX System Laboratories, Inc.
39 * All or some portions of this file are derived from material licensed
40 * to the University of California by American Telephone and Telegraph
41 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42 * the permission of UNIX System Laboratories, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * SUCH DAMAGE.
67 *
68 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
69 */
70
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.500 2023/04/30 08:46:11 riastradh Exp $");
73
74 #ifdef _KERNEL_OPT
75 #include "opt_compat_43.h"
76 #include "opt_compat_netbsd.h"
77 #include "opt_ddb.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/types.h>
82
83 #include <sys/buf.h>
84 #include <sys/conf.h>
85 #include <sys/dirent.h>
86 #include <sys/errno.h>
87 #include <sys/filedesc.h>
88 #include <sys/fstrans.h>
89 #include <sys/kauth.h>
90 #include <sys/kernel.h>
91 #include <sys/kmem.h>
92 #include <sys/module.h>
93 #include <sys/mount.h>
94 #include <sys/namei.h>
95 #include <sys/stat.h>
96 #include <sys/syscallargs.h>
97 #include <sys/sysctl.h>
98 #include <sys/systm.h>
99 #include <sys/vnode_impl.h>
100
101 #include <miscfs/deadfs/deadfs.h>
102 #include <miscfs/genfs/genfs.h>
103 #include <miscfs/specfs/specdev.h>
104
105 #include <uvm/uvm_ddb.h>
106
107 SDT_PROBE_DEFINE3(vfs, syncer, worklist, vnode__add,
108 "struct vnode *"/*vp*/,
109 "int"/*delayx*/,
110 "int"/*slot*/);
111 SDT_PROBE_DEFINE4(vfs, syncer, worklist, vnode__update,
112 "struct vnode *"/*vp*/,
113 "int"/*delayx*/,
114 "int"/*oslot*/,
115 "int"/*nslot*/);
116 SDT_PROBE_DEFINE1(vfs, syncer, worklist, vnode__remove,
117 "struct vnode *"/*vp*/);
118
119 SDT_PROBE_DEFINE3(vfs, syncer, worklist, mount__add,
120 "struct mount *"/*mp*/,
121 "int"/*vdelay*/,
122 "int"/*slot*/);
123 SDT_PROBE_DEFINE4(vfs, syncer, worklist, mount__update,
124 "struct mount *"/*vp*/,
125 "int"/*vdelay*/,
126 "int"/*oslot*/,
127 "int"/*nslot*/);
128 SDT_PROBE_DEFINE1(vfs, syncer, worklist, mount__remove,
129 "struct mount *"/*mp*/);
130
131 SDT_PROBE_DEFINE1(vfs, syncer, sync, start,
132 "int"/*starttime*/);
133 SDT_PROBE_DEFINE1(vfs, syncer, sync, mount__start,
134 "struct mount *"/*mp*/);
135 SDT_PROBE_DEFINE2(vfs, syncer, sync, mount__done,
136 "struct mount *"/*mp*/,
137 "int"/*error*/);
138 SDT_PROBE_DEFINE1(vfs, syncer, sync, mount__skip,
139 "struct mount *"/*mp*/);
140 SDT_PROBE_DEFINE1(vfs, syncer, sync, vnode__start,
141 "struct vnode *"/*vp*/);
142 SDT_PROBE_DEFINE2(vfs, syncer, sync, vnode__done,
143 "struct vnode *"/*vp*/,
144 "int"/*error*/);
145 SDT_PROBE_DEFINE2(vfs, syncer, sync, vnode__fail__lock,
146 "struct vnode *"/*vp*/,
147 "int"/*error*/);
148 SDT_PROBE_DEFINE2(vfs, syncer, sync, vnode__fail__vget,
149 "struct vnode *"/*vp*/,
150 "int"/*error*/);
151 SDT_PROBE_DEFINE2(vfs, syncer, sync, done,
152 "int"/*starttime*/,
153 "int"/*endtime*/);
154
155 const enum vtype iftovt_tab[16] = {
156 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
157 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
158 };
159 const int vttoif_tab[9] = {
160 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
161 S_IFSOCK, S_IFIFO, S_IFMT,
162 };
163
164 /*
165 * Insq/Remq for the vnode usage lists.
166 */
167 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
168 #define bufremvn(bp) { \
169 LIST_REMOVE(bp, b_vnbufs); \
170 (bp)->b_vnbufs.le_next = NOLIST; \
171 }
172
173 int doforce = 1; /* 1 => permit forcible unmounting */
174
175 /*
176 * Local declarations.
177 */
178
179 static void vn_initialize_syncerd(void);
180
181 /*
182 * Initialize the vnode management data structures.
183 */
184 void
vntblinit(void)185 vntblinit(void)
186 {
187
188 vn_initialize_syncerd();
189 vfs_mount_sysinit();
190 vfs_vnode_sysinit();
191 }
192
193 /*
194 * Flush out and invalidate all buffers associated with a vnode.
195 * Called with the underlying vnode locked, which should prevent new dirty
196 * buffers from being queued.
197 */
198 int
vinvalbuf(struct vnode * vp,int flags,kauth_cred_t cred,struct lwp * l,bool catch_p,int slptimeo)199 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
200 bool catch_p, int slptimeo)
201 {
202 struct buf *bp, *nbp;
203 int error;
204 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
205 (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
206
207 /* XXXUBC this doesn't look at flags or slp* */
208 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
209 error = VOP_PUTPAGES(vp, 0, 0, flushflags);
210 if (error) {
211 return error;
212 }
213
214 if (flags & V_SAVE) {
215 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
216 if (error)
217 return (error);
218 KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
219 }
220
221 mutex_enter(&bufcache_lock);
222 restart:
223 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
224 KASSERT(bp->b_vp == vp);
225 nbp = LIST_NEXT(bp, b_vnbufs);
226 error = bbusy(bp, catch_p, slptimeo, NULL);
227 if (error != 0) {
228 if (error == EPASSTHROUGH)
229 goto restart;
230 mutex_exit(&bufcache_lock);
231 return (error);
232 }
233 brelsel(bp, BC_INVAL | BC_VFLUSH);
234 }
235
236 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
237 KASSERT(bp->b_vp == vp);
238 nbp = LIST_NEXT(bp, b_vnbufs);
239 error = bbusy(bp, catch_p, slptimeo, NULL);
240 if (error != 0) {
241 if (error == EPASSTHROUGH)
242 goto restart;
243 mutex_exit(&bufcache_lock);
244 return (error);
245 }
246 /*
247 * XXX Since there are no node locks for NFS, I believe
248 * there is a slight chance that a delayed write will
249 * occur while sleeping just above, so check for it.
250 */
251 if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
252 #ifdef DEBUG
253 printf("buffer still DELWRI\n");
254 #endif
255 bp->b_cflags |= BC_BUSY | BC_VFLUSH;
256 mutex_exit(&bufcache_lock);
257 VOP_BWRITE(bp->b_vp, bp);
258 mutex_enter(&bufcache_lock);
259 goto restart;
260 }
261 brelsel(bp, BC_INVAL | BC_VFLUSH);
262 }
263
264 #ifdef DIAGNOSTIC
265 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
266 panic("vinvalbuf: flush failed, vp %p", vp);
267 #endif
268
269 mutex_exit(&bufcache_lock);
270
271 return (0);
272 }
273
274 /*
275 * Destroy any in core blocks past the truncation length.
276 * Called with the underlying vnode locked, which should prevent new dirty
277 * buffers from being queued.
278 */
279 int
vtruncbuf(struct vnode * vp,daddr_t lbn,bool catch_p,int slptimeo)280 vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch_p, int slptimeo)
281 {
282 struct buf *bp, *nbp;
283 int error;
284 voff_t off;
285
286 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
287 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
288 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
289 if (error) {
290 return error;
291 }
292
293 mutex_enter(&bufcache_lock);
294 restart:
295 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
296 KASSERT(bp->b_vp == vp);
297 nbp = LIST_NEXT(bp, b_vnbufs);
298 if (bp->b_lblkno < lbn)
299 continue;
300 error = bbusy(bp, catch_p, slptimeo, NULL);
301 if (error != 0) {
302 if (error == EPASSTHROUGH)
303 goto restart;
304 mutex_exit(&bufcache_lock);
305 return (error);
306 }
307 brelsel(bp, BC_INVAL | BC_VFLUSH);
308 }
309
310 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
311 KASSERT(bp->b_vp == vp);
312 nbp = LIST_NEXT(bp, b_vnbufs);
313 if (bp->b_lblkno < lbn)
314 continue;
315 error = bbusy(bp, catch_p, slptimeo, NULL);
316 if (error != 0) {
317 if (error == EPASSTHROUGH)
318 goto restart;
319 mutex_exit(&bufcache_lock);
320 return (error);
321 }
322 brelsel(bp, BC_INVAL | BC_VFLUSH);
323 }
324 mutex_exit(&bufcache_lock);
325
326 return (0);
327 }
328
329 /*
330 * Flush all dirty buffers from a vnode.
331 * Called with the underlying vnode locked, which should prevent new dirty
332 * buffers from being queued.
333 */
334 int
vflushbuf(struct vnode * vp,int flags)335 vflushbuf(struct vnode *vp, int flags)
336 {
337 struct buf *bp, *nbp;
338 int error, pflags;
339 bool dirty, sync;
340
341 sync = (flags & FSYNC_WAIT) != 0;
342 pflags = PGO_CLEANIT | PGO_ALLPAGES |
343 (sync ? PGO_SYNCIO : 0) |
344 ((flags & FSYNC_LAZY) ? PGO_LAZY : 0);
345 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
346 (void) VOP_PUTPAGES(vp, 0, 0, pflags);
347
348 loop:
349 mutex_enter(&bufcache_lock);
350 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
351 KASSERT(bp->b_vp == vp);
352 nbp = LIST_NEXT(bp, b_vnbufs);
353 if ((bp->b_cflags & BC_BUSY))
354 continue;
355 if ((bp->b_oflags & BO_DELWRI) == 0)
356 panic("vflushbuf: not dirty, bp %p", bp);
357 bp->b_cflags |= BC_BUSY | BC_VFLUSH;
358 mutex_exit(&bufcache_lock);
359 /*
360 * Wait for I/O associated with indirect blocks to complete,
361 * since there is no way to quickly wait for them below.
362 */
363 if (bp->b_vp == vp || !sync)
364 (void) bawrite(bp);
365 else {
366 error = bwrite(bp);
367 if (error)
368 return error;
369 }
370 goto loop;
371 }
372 mutex_exit(&bufcache_lock);
373
374 if (!sync)
375 return 0;
376
377 mutex_enter(vp->v_interlock);
378 while (vp->v_numoutput != 0)
379 cv_wait(&vp->v_cv, vp->v_interlock);
380 dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
381 mutex_exit(vp->v_interlock);
382
383 if (dirty) {
384 vprint("vflushbuf: dirty", vp);
385 goto loop;
386 }
387
388 return 0;
389 }
390
391 /*
392 * Create a vnode for a block device.
393 * Used for root filesystem and swap areas.
394 * Also used for memory file system special devices.
395 */
396 int
bdevvp(dev_t dev,vnode_t ** vpp)397 bdevvp(dev_t dev, vnode_t **vpp)
398 {
399 struct vattr va;
400
401 vattr_null(&va);
402 va.va_type = VBLK;
403 va.va_rdev = dev;
404
405 return vcache_new(dead_rootmount, NULL, &va, NOCRED, NULL, vpp);
406 }
407
408 /*
409 * Create a vnode for a character device.
410 * Used for kernfs and some console handling.
411 */
412 int
cdevvp(dev_t dev,vnode_t ** vpp)413 cdevvp(dev_t dev, vnode_t **vpp)
414 {
415 struct vattr va;
416
417 vattr_null(&va);
418 va.va_type = VCHR;
419 va.va_rdev = dev;
420
421 return vcache_new(dead_rootmount, NULL, &va, NOCRED, NULL, vpp);
422 }
423
424 /*
425 * Associate a buffer with a vnode. There must already be a hold on
426 * the vnode.
427 */
428 void
bgetvp(struct vnode * vp,struct buf * bp)429 bgetvp(struct vnode *vp, struct buf *bp)
430 {
431
432 KASSERT(bp->b_vp == NULL);
433 KASSERT(bp->b_objlock == &buffer_lock);
434 KASSERT(mutex_owned(vp->v_interlock));
435 KASSERT(mutex_owned(&bufcache_lock));
436 KASSERT((bp->b_cflags & BC_BUSY) != 0);
437 KASSERT(!cv_has_waiters(&bp->b_done));
438
439 vholdl(vp);
440 bp->b_vp = vp;
441 if (vp->v_type == VBLK || vp->v_type == VCHR)
442 bp->b_dev = vp->v_rdev;
443 else
444 bp->b_dev = NODEV;
445
446 /*
447 * Insert onto list for new vnode.
448 */
449 bufinsvn(bp, &vp->v_cleanblkhd);
450 bp->b_objlock = vp->v_interlock;
451 }
452
453 /*
454 * Disassociate a buffer from a vnode.
455 */
456 void
brelvp(struct buf * bp)457 brelvp(struct buf *bp)
458 {
459 struct vnode *vp = bp->b_vp;
460
461 KASSERT(vp != NULL);
462 KASSERT(bp->b_objlock == vp->v_interlock);
463 KASSERT(mutex_owned(vp->v_interlock));
464 KASSERT(mutex_owned(&bufcache_lock));
465 KASSERT((bp->b_cflags & BC_BUSY) != 0);
466 KASSERT(!cv_has_waiters(&bp->b_done));
467
468 /*
469 * Delete from old vnode list, if on one.
470 */
471 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
472 bufremvn(bp);
473
474 if ((vp->v_iflag & (VI_ONWORKLST | VI_PAGES)) == VI_ONWORKLST &&
475 LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
476 vn_syncer_remove_from_worklist(vp);
477
478 bp->b_objlock = &buffer_lock;
479 bp->b_vp = NULL;
480 holdrelel(vp);
481 }
482
483 /*
484 * Reassign a buffer from one vnode list to another.
485 * The list reassignment must be within the same vnode.
486 * Used to assign file specific control information
487 * (indirect blocks) to the list to which they belong.
488 */
489 void
reassignbuf(struct buf * bp,struct vnode * vp)490 reassignbuf(struct buf *bp, struct vnode *vp)
491 {
492 struct buflists *listheadp;
493 int delayx;
494
495 KASSERT(mutex_owned(&bufcache_lock));
496 KASSERT(bp->b_objlock == vp->v_interlock);
497 KASSERT(mutex_owned(vp->v_interlock));
498 KASSERT((bp->b_cflags & BC_BUSY) != 0);
499
500 /*
501 * Delete from old vnode list, if on one.
502 */
503 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
504 bufremvn(bp);
505
506 /*
507 * If dirty, put on list of dirty buffers;
508 * otherwise insert onto list of clean buffers.
509 */
510 if ((bp->b_oflags & BO_DELWRI) == 0) {
511 listheadp = &vp->v_cleanblkhd;
512 if ((vp->v_iflag & (VI_ONWORKLST | VI_PAGES)) ==
513 VI_ONWORKLST &&
514 LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
515 vn_syncer_remove_from_worklist(vp);
516 } else {
517 listheadp = &vp->v_dirtyblkhd;
518 if ((vp->v_iflag & VI_ONWORKLST) == 0) {
519 switch (vp->v_type) {
520 case VDIR:
521 delayx = dirdelay;
522 break;
523 case VBLK:
524 if (spec_node_getmountedfs(vp) != NULL) {
525 delayx = metadelay;
526 break;
527 }
528 /* fall through */
529 default:
530 delayx = filedelay;
531 break;
532 }
533 if (!vp->v_mount ||
534 (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
535 vn_syncer_add_to_worklist(vp, delayx);
536 }
537 }
538 bufinsvn(bp, listheadp);
539 }
540
541 /*
542 * Lookup a vnode by device number and return it referenced.
543 */
544 int
vfinddev(dev_t dev,enum vtype type,vnode_t ** vpp)545 vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
546 {
547
548 return (spec_node_lookup_by_dev(type, dev, VDEAD_NOWAIT, vpp) == 0);
549 }
550
551 /*
552 * Revoke all the vnodes corresponding to the specified minor number
553 * range (endpoints inclusive) of the specified major.
554 */
555 void
vdevgone(int maj,int minl,int minh,enum vtype type)556 vdevgone(int maj, int minl, int minh, enum vtype type)
557 {
558 vnode_t *vp;
559 dev_t dev;
560 int mn;
561
562 for (mn = minl; mn <= minh; mn++) {
563 dev = makedev(maj, mn);
564 /*
565 * Notify anyone trying to get at this device that it
566 * has been detached, and then revoke it.
567 */
568 switch (type) {
569 case VBLK:
570 bdev_detached(dev);
571 break;
572 case VCHR:
573 cdev_detached(dev);
574 break;
575 default:
576 panic("invalid specnode type: %d", type);
577 }
578 /*
579 * Passing 0 as flags, instead of VDEAD_NOWAIT, means
580 * spec_node_lookup_by_dev will wait for vnodes it
581 * finds concurrently being revoked before returning.
582 */
583 while (spec_node_lookup_by_dev(type, dev, 0, &vp) == 0) {
584 VOP_REVOKE(vp, REVOKEALL);
585 vrele(vp);
586 }
587 }
588 }
589
590 /*
591 * The filesystem synchronizer mechanism - syncer.
592 *
593 * It is useful to delay writes of file data and filesystem metadata for
594 * a certain amount of time so that quickly created and deleted files need
595 * not waste disk bandwidth being created and removed. To implement this,
596 * vnodes are appended to a "workitem" queue.
597 *
598 * Most pending metadata should not wait for more than ten seconds. Thus,
599 * mounted on block devices are delayed only about a half the time that file
600 * data is delayed. Similarly, directory updates are more critical, so are
601 * only delayed about a third the time that file data is delayed.
602 *
603 * There are SYNCER_MAXDELAY queues that are processed in a round-robin
604 * manner at a rate of one each second (driven off the filesystem syner
605 * thread). The syncer_delayno variable indicates the next queue that is
606 * to be processed. Items that need to be processed soon are placed in
607 * this queue:
608 *
609 * syncer_workitem_pending[syncer_delayno]
610 *
611 * A delay of e.g. fifteen seconds is done by placing the request fifteen
612 * entries later in the queue:
613 *
614 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
615 *
616 * Flag VI_ONWORKLST indicates that vnode is added into the queue.
617 */
618
619 #define SYNCER_MAXDELAY 32
620
621 typedef TAILQ_HEAD(synclist, vnode_impl) synclist_t;
622
623 static void vn_syncer_add1(struct vnode *, int);
624 static void sysctl_vfs_syncfs_setup(struct sysctllog **);
625
626 /*
627 * Defines and variables for the syncer process.
628 */
629 int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
630 time_t syncdelay = 30; /* max time to delay syncing data */
631 time_t filedelay = 30; /* time to delay syncing files */
632 time_t dirdelay = 15; /* time to delay syncing directories */
633 time_t metadelay = 10; /* time to delay syncing metadata */
634 time_t lockdelay = 1; /* time to delay if locking fails */
635
636 static kmutex_t syncer_data_lock; /* short term lock on data structs */
637
638 static int syncer_delayno = 0;
639 static long syncer_last;
640 static synclist_t * syncer_workitem_pending;
641
642 static void
vn_initialize_syncerd(void)643 vn_initialize_syncerd(void)
644 {
645 int i;
646
647 syncer_last = SYNCER_MAXDELAY + 2;
648
649 sysctl_vfs_syncfs_setup(NULL);
650
651 syncer_workitem_pending =
652 kmem_alloc(syncer_last * sizeof (struct synclist), KM_SLEEP);
653
654 for (i = 0; i < syncer_last; i++)
655 TAILQ_INIT(&syncer_workitem_pending[i]);
656
657 mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_NONE);
658 }
659
660 /*
661 * Return delay factor appropriate for the given file system. For
662 * WAPBL we use the sync vnode to burst out metadata updates: sync
663 * those file systems more frequently.
664 */
665 static inline int
sync_delay(struct mount * mp)666 sync_delay(struct mount *mp)
667 {
668
669 return mp->mnt_wapbl != NULL ? metadelay : syncdelay;
670 }
671
672 /*
673 * Compute the next slot index from delay.
674 */
675 static inline int
sync_delay_slot(int delayx)676 sync_delay_slot(int delayx)
677 {
678
679 if (delayx > syncer_maxdelay - 2)
680 delayx = syncer_maxdelay - 2;
681 return (syncer_delayno + delayx) % syncer_last;
682 }
683
684 /*
685 * Add an item to the syncer work queue.
686 */
687 static void
vn_syncer_add1(struct vnode * vp,int delayx)688 vn_syncer_add1(struct vnode *vp, int delayx)
689 {
690 synclist_t *slp;
691 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
692
693 KASSERT(mutex_owned(&syncer_data_lock));
694
695 if (vp->v_iflag & VI_ONWORKLST) {
696 /*
697 * Remove in order to adjust the position of the vnode.
698 * Note: called from sched_sync(), which will not hold
699 * interlock, therefore we cannot modify v_iflag here.
700 */
701 slp = &syncer_workitem_pending[vip->vi_synclist_slot];
702 TAILQ_REMOVE(slp, vip, vi_synclist);
703 } else {
704 KASSERT(mutex_owned(vp->v_interlock));
705 vp->v_iflag |= VI_ONWORKLST;
706 }
707
708 vip->vi_synclist_slot = sync_delay_slot(delayx);
709
710 slp = &syncer_workitem_pending[vip->vi_synclist_slot];
711 TAILQ_INSERT_TAIL(slp, vip, vi_synclist);
712 }
713
714 void
vn_syncer_add_to_worklist(struct vnode * vp,int delayx)715 vn_syncer_add_to_worklist(struct vnode *vp, int delayx)
716 {
717 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
718
719 KASSERT(mutex_owned(vp->v_interlock));
720
721 mutex_enter(&syncer_data_lock);
722 vn_syncer_add1(vp, delayx);
723 SDT_PROBE3(vfs, syncer, worklist, vnode__add,
724 vp, delayx, vip->vi_synclist_slot);
725 mutex_exit(&syncer_data_lock);
726 }
727
728 /*
729 * Remove an item from the syncer work queue.
730 */
731 void
vn_syncer_remove_from_worklist(struct vnode * vp)732 vn_syncer_remove_from_worklist(struct vnode *vp)
733 {
734 synclist_t *slp;
735 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
736
737 KASSERT(mutex_owned(vp->v_interlock));
738
739 if (vp->v_iflag & VI_ONWORKLST) {
740 mutex_enter(&syncer_data_lock);
741 SDT_PROBE1(vfs, syncer, worklist, vnode__remove, vp);
742 vp->v_iflag &= ~VI_ONWORKLST;
743 slp = &syncer_workitem_pending[vip->vi_synclist_slot];
744 TAILQ_REMOVE(slp, vip, vi_synclist);
745 mutex_exit(&syncer_data_lock);
746 }
747 }
748
749 /*
750 * Add this mount point to the syncer.
751 */
752 void
vfs_syncer_add_to_worklist(struct mount * mp)753 vfs_syncer_add_to_worklist(struct mount *mp)
754 {
755 static int start, incr, next;
756 int vdelay;
757
758 KASSERT(mutex_owned(mp->mnt_updating));
759 KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) == 0);
760
761 /*
762 * We attempt to scatter the mount points on the list
763 * so that they will go off at evenly distributed times
764 * even if all the filesystems are mounted at once.
765 */
766
767 next += incr;
768 if (next == 0 || next > syncer_maxdelay) {
769 start /= 2;
770 incr /= 2;
771 if (start == 0) {
772 start = syncer_maxdelay / 2;
773 incr = syncer_maxdelay;
774 }
775 next = start;
776 }
777 mp->mnt_iflag |= IMNT_ONWORKLIST;
778 vdelay = sync_delay(mp);
779 mp->mnt_synclist_slot = vdelay > 0 ? next % vdelay : 0;
780 SDT_PROBE3(vfs, syncer, worklist, mount__add,
781 mp, vdelay, mp->mnt_synclist_slot);
782 }
783
784 /*
785 * Remove the mount point from the syncer.
786 */
787 void
vfs_syncer_remove_from_worklist(struct mount * mp)788 vfs_syncer_remove_from_worklist(struct mount *mp)
789 {
790
791 KASSERT(mutex_owned(mp->mnt_updating));
792 KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) != 0);
793
794 SDT_PROBE1(vfs, syncer, worklist, mount__remove, mp);
795 mp->mnt_iflag &= ~IMNT_ONWORKLIST;
796 }
797
798 /*
799 * Try lazy sync, return true on success.
800 */
801 static bool
lazy_sync_vnode(struct vnode * vp)802 lazy_sync_vnode(struct vnode *vp)
803 {
804 bool synced;
805 int error;
806
807 KASSERT(mutex_owned(&syncer_data_lock));
808
809 synced = false;
810 if ((error = vcache_tryvget(vp)) == 0) {
811 mutex_exit(&syncer_data_lock);
812 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT)) == 0) {
813 synced = true;
814 SDT_PROBE1(vfs, syncer, sync, vnode__start, vp);
815 error = VOP_FSYNC(vp, curlwp->l_cred,
816 FSYNC_LAZY, 0, 0);
817 SDT_PROBE2(vfs, syncer, sync, vnode__done, vp, error);
818 vput(vp);
819 } else {
820 SDT_PROBE2(vfs, syncer, sync, vnode__fail__lock,
821 vp, error);
822 vrele(vp);
823 }
824 mutex_enter(&syncer_data_lock);
825 } else {
826 SDT_PROBE2(vfs, syncer, sync, vnode__fail__vget, vp, error);
827 }
828 return synced;
829 }
830
831 /*
832 * System filesystem synchronizer daemon.
833 */
834 void
sched_sync(void * arg)835 sched_sync(void *arg)
836 {
837 mount_iterator_t *iter;
838 synclist_t *slp;
839 struct vnode_impl *vi;
840 struct vnode *vp;
841 struct mount *mp;
842 time_t starttime, endtime;
843 int vdelay, oslot, nslot, delayx;
844 bool synced;
845 int error;
846
847 for (;;) {
848 starttime = time_second;
849 SDT_PROBE1(vfs, syncer, sync, start, starttime);
850
851 /*
852 * Sync mounts whose dirty time has expired.
853 */
854 mountlist_iterator_init(&iter);
855 while ((mp = mountlist_iterator_trynext(iter)) != NULL) {
856 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0 ||
857 mp->mnt_synclist_slot != syncer_delayno) {
858 SDT_PROBE1(vfs, syncer, sync, mount__skip,
859 mp);
860 continue;
861 }
862
863 vdelay = sync_delay(mp);
864 oslot = mp->mnt_synclist_slot;
865 nslot = sync_delay_slot(vdelay);
866 mp->mnt_synclist_slot = nslot;
867 SDT_PROBE4(vfs, syncer, worklist, mount__update,
868 mp, vdelay, oslot, nslot);
869
870 SDT_PROBE1(vfs, syncer, sync, mount__start, mp);
871 error = VFS_SYNC(mp, MNT_LAZY, curlwp->l_cred);
872 SDT_PROBE2(vfs, syncer, sync, mount__done,
873 mp, error);
874 }
875 mountlist_iterator_destroy(iter);
876
877 mutex_enter(&syncer_data_lock);
878
879 /*
880 * Push files whose dirty time has expired.
881 */
882 slp = &syncer_workitem_pending[syncer_delayno];
883 syncer_delayno += 1;
884 if (syncer_delayno >= syncer_last)
885 syncer_delayno = 0;
886
887 while ((vi = TAILQ_FIRST(slp)) != NULL) {
888 vp = VIMPL_TO_VNODE(vi);
889 synced = lazy_sync_vnode(vp);
890
891 /*
892 * XXX The vnode may have been recycled, in which
893 * case it may have a new identity.
894 */
895 vi = TAILQ_FIRST(slp);
896 if (vi != NULL && VIMPL_TO_VNODE(vi) == vp) {
897 /*
898 * Put us back on the worklist. The worklist
899 * routine will remove us from our current
900 * position and then add us back in at a later
901 * position.
902 *
903 * Try again sooner rather than later if
904 * we were unable to lock the vnode. Lock
905 * failure should not prevent us from doing
906 * the sync "soon".
907 *
908 * If we locked it yet arrive here, it's
909 * likely that lazy sync is in progress and
910 * so the vnode still has dirty metadata.
911 * syncdelay is mainly to get this vnode out
912 * of the way so we do not consider it again
913 * "soon" in this loop, so the delay time is
914 * not critical as long as it is not "soon".
915 * While write-back strategy is the file
916 * system's domain, we expect write-back to
917 * occur no later than syncdelay seconds
918 * into the future.
919 */
920 delayx = synced ? syncdelay : lockdelay;
921 oslot = vi->vi_synclist_slot;
922 vn_syncer_add1(vp, delayx);
923 nslot = vi->vi_synclist_slot;
924 SDT_PROBE4(vfs, syncer, worklist,
925 vnode__update,
926 vp, delayx, oslot, nslot);
927 }
928 }
929
930 endtime = time_second;
931
932 SDT_PROBE2(vfs, syncer, sync, done, starttime, endtime);
933
934 /*
935 * If it has taken us less than a second to process the
936 * current work, then wait. Otherwise start right over
937 * again. We can still lose time if any single round
938 * takes more than two seconds, but it does not really
939 * matter as we are just trying to generally pace the
940 * filesystem activity.
941 */
942 if (endtime == starttime) {
943 kpause("syncer", false, hz, &syncer_data_lock);
944 }
945 mutex_exit(&syncer_data_lock);
946 }
947 }
948
949 static void
sysctl_vfs_syncfs_setup(struct sysctllog ** clog)950 sysctl_vfs_syncfs_setup(struct sysctllog **clog)
951 {
952 const struct sysctlnode *rnode, *cnode;
953
954 sysctl_createv(clog, 0, NULL, &rnode,
955 CTLFLAG_PERMANENT,
956 CTLTYPE_NODE, "sync",
957 SYSCTL_DESCR("syncer options"),
958 NULL, 0, NULL, 0,
959 CTL_VFS, CTL_CREATE, CTL_EOL);
960
961 sysctl_createv(clog, 0, &rnode, &cnode,
962 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
963 CTLTYPE_QUAD, "delay",
964 SYSCTL_DESCR("max time to delay syncing data"),
965 NULL, 0, &syncdelay, 0,
966 CTL_CREATE, CTL_EOL);
967
968 sysctl_createv(clog, 0, &rnode, &cnode,
969 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
970 CTLTYPE_QUAD, "filedelay",
971 SYSCTL_DESCR("time to delay syncing files"),
972 NULL, 0, &filedelay, 0,
973 CTL_CREATE, CTL_EOL);
974
975 sysctl_createv(clog, 0, &rnode, &cnode,
976 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
977 CTLTYPE_QUAD, "dirdelay",
978 SYSCTL_DESCR("time to delay syncing directories"),
979 NULL, 0, &dirdelay, 0,
980 CTL_CREATE, CTL_EOL);
981
982 sysctl_createv(clog, 0, &rnode, &cnode,
983 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
984 CTLTYPE_QUAD, "metadelay",
985 SYSCTL_DESCR("time to delay syncing metadata"),
986 NULL, 0, &metadelay, 0,
987 CTL_CREATE, CTL_EOL);
988 }
989
990 /*
991 * sysctl helper routine to return list of supported fstypes
992 */
993 int
sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)994 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
995 {
996 char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
997 char *where = oldp;
998 struct vfsops *v;
999 size_t needed, left, slen;
1000 int error, first;
1001
1002 if (newp != NULL)
1003 return (EPERM);
1004 if (namelen != 0)
1005 return (EINVAL);
1006
1007 first = 1;
1008 error = 0;
1009 needed = 0;
1010 left = *oldlenp;
1011
1012 sysctl_unlock();
1013 mutex_enter(&vfs_list_lock);
1014 LIST_FOREACH(v, &vfs_list, vfs_list) {
1015 if (where == NULL)
1016 needed += strlen(v->vfs_name) + 1;
1017 else {
1018 memset(bf, 0, sizeof(bf));
1019 if (first) {
1020 strncpy(bf, v->vfs_name, sizeof(bf));
1021 first = 0;
1022 } else {
1023 bf[0] = ' ';
1024 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1025 }
1026 bf[sizeof(bf)-1] = '\0';
1027 slen = strlen(bf);
1028 if (left < slen + 1)
1029 break;
1030 v->vfs_refcount++;
1031 mutex_exit(&vfs_list_lock);
1032 /* +1 to copy out the trailing NUL byte */
1033 error = copyout(bf, where, slen + 1);
1034 mutex_enter(&vfs_list_lock);
1035 v->vfs_refcount--;
1036 if (error)
1037 break;
1038 where += slen;
1039 needed += slen;
1040 left -= slen;
1041 }
1042 }
1043 mutex_exit(&vfs_list_lock);
1044 sysctl_relock();
1045 *oldlenp = needed;
1046 return (error);
1047 }
1048
1049 int kinfo_vdebug = 1;
1050 int kinfo_vgetfailed;
1051
1052 #define KINFO_VNODESLOP 10
1053
1054 /*
1055 * Dump vnode list (via sysctl).
1056 * Copyout address of vnode followed by vnode.
1057 */
1058 int
sysctl_kern_vnode(SYSCTLFN_ARGS)1059 sysctl_kern_vnode(SYSCTLFN_ARGS)
1060 {
1061 char *where = oldp;
1062 size_t *sizep = oldlenp;
1063 struct mount *mp;
1064 vnode_t *vp, vbuf;
1065 mount_iterator_t *iter;
1066 struct vnode_iterator *marker;
1067 char *bp = where;
1068 char *ewhere;
1069 int error;
1070
1071 if (namelen != 0)
1072 return (EOPNOTSUPP);
1073 if (newp != NULL)
1074 return (EPERM);
1075
1076 #define VPTRSZ sizeof(vnode_t *)
1077 #define VNODESZ sizeof(vnode_t)
1078 if (where == NULL) {
1079 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1080 return (0);
1081 }
1082 ewhere = where + *sizep;
1083
1084 sysctl_unlock();
1085 mountlist_iterator_init(&iter);
1086 while ((mp = mountlist_iterator_next(iter)) != NULL) {
1087 vfs_vnode_iterator_init(mp, &marker);
1088 while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
1089 if (bp + VPTRSZ + VNODESZ > ewhere) {
1090 vrele(vp);
1091 vfs_vnode_iterator_destroy(marker);
1092 mountlist_iterator_destroy(iter);
1093 sysctl_relock();
1094 *sizep = bp - where;
1095 return (ENOMEM);
1096 }
1097 memcpy(&vbuf, vp, VNODESZ);
1098 if ((error = copyout(&vp, bp, VPTRSZ)) ||
1099 (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
1100 vrele(vp);
1101 vfs_vnode_iterator_destroy(marker);
1102 mountlist_iterator_destroy(iter);
1103 sysctl_relock();
1104 return (error);
1105 }
1106 vrele(vp);
1107 bp += VPTRSZ + VNODESZ;
1108 }
1109 vfs_vnode_iterator_destroy(marker);
1110 }
1111 mountlist_iterator_destroy(iter);
1112 sysctl_relock();
1113
1114 *sizep = bp - where;
1115 return (0);
1116 }
1117
1118 /*
1119 * Set vnode attributes to VNOVAL
1120 */
1121 void
vattr_null(struct vattr * vap)1122 vattr_null(struct vattr *vap)
1123 {
1124
1125 memset(vap, 0, sizeof(*vap));
1126
1127 vap->va_type = VNON;
1128
1129 /*
1130 * Assign individually so that it is safe even if size and
1131 * sign of each member are varied.
1132 */
1133 vap->va_mode = VNOVAL;
1134 vap->va_nlink = VNOVAL;
1135 vap->va_uid = VNOVAL;
1136 vap->va_gid = VNOVAL;
1137 vap->va_fsid = VNOVAL;
1138 vap->va_fileid = VNOVAL;
1139 vap->va_size = VNOVAL;
1140 vap->va_blocksize = VNOVAL;
1141 vap->va_atime.tv_sec =
1142 vap->va_mtime.tv_sec =
1143 vap->va_ctime.tv_sec =
1144 vap->va_birthtime.tv_sec = VNOVAL;
1145 vap->va_atime.tv_nsec =
1146 vap->va_mtime.tv_nsec =
1147 vap->va_ctime.tv_nsec =
1148 vap->va_birthtime.tv_nsec = VNOVAL;
1149 vap->va_gen = VNOVAL;
1150 vap->va_flags = VNOVAL;
1151 vap->va_rdev = VNOVAL;
1152 vap->va_bytes = VNOVAL;
1153 }
1154
1155 /*
1156 * Vnode state to string.
1157 */
1158 const char *
vstate_name(enum vnode_state state)1159 vstate_name(enum vnode_state state)
1160 {
1161
1162 switch (state) {
1163 case VS_ACTIVE:
1164 return "ACTIVE";
1165 case VS_MARKER:
1166 return "MARKER";
1167 case VS_LOADING:
1168 return "LOADING";
1169 case VS_LOADED:
1170 return "LOADED";
1171 case VS_BLOCKED:
1172 return "BLOCKED";
1173 case VS_RECLAIMING:
1174 return "RECLAIMING";
1175 case VS_RECLAIMED:
1176 return "RECLAIMED";
1177 default:
1178 return "ILLEGAL";
1179 }
1180 }
1181
1182 /*
1183 * Print a description of a vnode (common part).
1184 */
1185 static void
1186 vprint_common(struct vnode *vp, const char *prefix,
1187 void (*pr)(const char *, ...) __printflike(1, 2))
1188 {
1189 int n;
1190 char bf[96];
1191 const uint8_t *cp;
1192 vnode_impl_t *vip;
1193 const char * const vnode_tags[] = { VNODE_TAGS };
1194 const char * const vnode_types[] = { VNODE_TYPES };
1195 const char vnode_flagbits[] = VNODE_FLAGBITS;
1196
1197 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
1198 #define ARRAY_PRINT(idx, arr) \
1199 ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1200
1201 vip = VNODE_TO_VIMPL(vp);
1202
1203 snprintb(bf, sizeof(bf),
1204 vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
1205
1206 (*pr)("vnode %p flags %s\n", vp, bf);
1207 (*pr)("%stag %s(%d) type %s(%d) mount %p typedata %p\n", prefix,
1208 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
1209 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1210 vp->v_mount, vp->v_mountedhere);
1211 (*pr)("%susecount %d writecount %d holdcount %d\n", prefix,
1212 vrefcnt(vp), vp->v_writecount, vp->v_holdcnt);
1213 (*pr)("%ssize %" PRIx64 " writesize %" PRIx64 " numoutput %d\n",
1214 prefix, vp->v_size, vp->v_writesize, vp->v_numoutput);
1215 (*pr)("%sdata %p lock %p\n", prefix, vp->v_data, &vip->vi_lock);
1216
1217 (*pr)("%sstate %s key(%p %zd)", prefix, vstate_name(vip->vi_state),
1218 vip->vi_key.vk_mount, vip->vi_key.vk_key_len);
1219 n = vip->vi_key.vk_key_len;
1220 cp = vip->vi_key.vk_key;
1221 while (n-- > 0)
1222 (*pr)(" %02x", *cp++);
1223 (*pr)("\n");
1224 (*pr)("%slrulisthd %p\n", prefix, vip->vi_lrulisthd);
1225
1226 #undef ARRAY_PRINT
1227 #undef ARRAY_SIZE
1228 }
1229
1230 /*
1231 * Print out a description of a vnode.
1232 */
1233 void
vprint(const char * label,struct vnode * vp)1234 vprint(const char *label, struct vnode *vp)
1235 {
1236
1237 if (label != NULL)
1238 printf("%s: ", label);
1239 vprint_common(vp, "\t", printf);
1240 if (vp->v_data != NULL) {
1241 printf("\t");
1242 VOP_PRINT(vp);
1243 }
1244 }
1245
1246 /*
1247 * Given a file system name, look up the vfsops for that
1248 * file system, or return NULL if file system isn't present
1249 * in the kernel.
1250 */
1251 struct vfsops *
vfs_getopsbyname(const char * name)1252 vfs_getopsbyname(const char *name)
1253 {
1254 struct vfsops *v;
1255
1256 mutex_enter(&vfs_list_lock);
1257 LIST_FOREACH(v, &vfs_list, vfs_list) {
1258 if (strcmp(v->vfs_name, name) == 0)
1259 break;
1260 }
1261 if (v != NULL)
1262 v->vfs_refcount++;
1263 mutex_exit(&vfs_list_lock);
1264
1265 return (v);
1266 }
1267
1268 void
copy_statvfs_info(struct statvfs * sbp,const struct mount * mp)1269 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
1270 {
1271 const struct statvfs *mbp;
1272
1273 if (sbp == (mbp = &mp->mnt_stat))
1274 return;
1275
1276 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
1277 sbp->f_fsid = mbp->f_fsid;
1278 sbp->f_owner = mbp->f_owner;
1279 sbp->f_flag = mbp->f_flag;
1280 sbp->f_syncwrites = mbp->f_syncwrites;
1281 sbp->f_asyncwrites = mbp->f_asyncwrites;
1282 sbp->f_syncreads = mbp->f_syncreads;
1283 sbp->f_asyncreads = mbp->f_asyncreads;
1284 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
1285 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
1286 sizeof(sbp->f_fstypename));
1287 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
1288 sizeof(sbp->f_mntonname));
1289 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
1290 sizeof(sbp->f_mntfromname));
1291 (void)memcpy(sbp->f_mntfromlabel, mp->mnt_stat.f_mntfromlabel,
1292 sizeof(sbp->f_mntfromlabel));
1293 sbp->f_namemax = mbp->f_namemax;
1294 }
1295
1296 int
set_statvfs_info(const char * onp,int ukon,const char * fromp,int ukfrom,const char * vfsname,struct mount * mp,struct lwp * l)1297 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
1298 const char *vfsname, struct mount *mp, struct lwp *l)
1299 {
1300 int error;
1301 size_t size;
1302 struct statvfs *sfs = &mp->mnt_stat;
1303 int (*fun)(const void *, void *, size_t, size_t *);
1304
1305 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
1306 sizeof(mp->mnt_stat.f_fstypename));
1307
1308 if (onp) {
1309 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1310 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
1311 if (cwdi->cwdi_rdir != NULL) {
1312 size_t len;
1313 char *bp;
1314 char *path = PNBUF_GET();
1315
1316 bp = path + MAXPATHLEN;
1317 *--bp = '\0';
1318 rw_enter(&cwdi->cwdi_lock, RW_READER);
1319 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
1320 path, MAXPATHLEN / 2, 0, l);
1321 rw_exit(&cwdi->cwdi_lock);
1322 if (error) {
1323 PNBUF_PUT(path);
1324 return error;
1325 }
1326
1327 len = strlen(bp);
1328 if (len > sizeof(sfs->f_mntonname) - 1)
1329 len = sizeof(sfs->f_mntonname) - 1;
1330 (void)strncpy(sfs->f_mntonname, bp, len);
1331 PNBUF_PUT(path);
1332
1333 if (len < sizeof(sfs->f_mntonname) - 1) {
1334 error = (*fun)(onp, &sfs->f_mntonname[len],
1335 sizeof(sfs->f_mntonname) - len - 1, &size);
1336 if (error)
1337 return error;
1338 size += len;
1339 } else {
1340 size = len;
1341 }
1342 } else {
1343 error = (*fun)(onp, &sfs->f_mntonname,
1344 sizeof(sfs->f_mntonname) - 1, &size);
1345 if (error)
1346 return error;
1347 }
1348 (void)memset(sfs->f_mntonname + size, 0,
1349 sizeof(sfs->f_mntonname) - size);
1350 }
1351
1352 if (fromp) {
1353 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
1354 error = (*fun)(fromp, sfs->f_mntfromname,
1355 sizeof(sfs->f_mntfromname) - 1, &size);
1356 if (error)
1357 return error;
1358 (void)memset(sfs->f_mntfromname + size, 0,
1359 sizeof(sfs->f_mntfromname) - size);
1360 }
1361 return 0;
1362 }
1363
1364 /*
1365 * Knob to control the precision of file timestamps:
1366 *
1367 * 0 = seconds only; nanoseconds zeroed.
1368 * 1 = seconds and nanoseconds, accurate within 1/HZ.
1369 * 2 = seconds and nanoseconds, truncated to microseconds.
1370 * >=3 = seconds and nanoseconds, maximum precision.
1371 */
1372 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
1373
1374 int vfs_timestamp_precision __read_mostly = TSP_NSEC;
1375
1376 void
vfs_timestamp(struct timespec * tsp)1377 vfs_timestamp(struct timespec *tsp)
1378 {
1379 struct timeval tv;
1380
1381 switch (vfs_timestamp_precision) {
1382 case TSP_SEC:
1383 tsp->tv_sec = time_second;
1384 tsp->tv_nsec = 0;
1385 break;
1386 case TSP_HZ:
1387 getnanotime(tsp);
1388 break;
1389 case TSP_USEC:
1390 microtime(&tv);
1391 TIMEVAL_TO_TIMESPEC(&tv, tsp);
1392 break;
1393 case TSP_NSEC:
1394 default:
1395 nanotime(tsp);
1396 break;
1397 }
1398 }
1399
1400 /*
1401 * The purpose of this routine is to remove granularity from accmode_t,
1402 * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE,
1403 * VADMIN and VAPPEND.
1404 *
1405 * If it returns 0, the caller is supposed to continue with the usual
1406 * access checks using 'accmode' as modified by this routine. If it
1407 * returns nonzero value, the caller is supposed to return that value
1408 * as errno.
1409 *
1410 * Note that after this routine runs, accmode may be zero.
1411 */
1412 int
vfs_unixify_accmode(accmode_t * accmode)1413 vfs_unixify_accmode(accmode_t *accmode)
1414 {
1415 /*
1416 * There is no way to specify explicit "deny" rule using
1417 * file mode or POSIX.1e ACLs.
1418 */
1419 if (*accmode & VEXPLICIT_DENY) {
1420 *accmode = 0;
1421 return (0);
1422 }
1423
1424 /*
1425 * None of these can be translated into usual access bits.
1426 * Also, the common case for NFSv4 ACLs is to not contain
1427 * either of these bits. Caller should check for VWRITE
1428 * on the containing directory instead.
1429 */
1430 if (*accmode & (VDELETE_CHILD | VDELETE))
1431 return (EPERM);
1432
1433 if (*accmode & VADMIN_PERMS) {
1434 *accmode &= ~VADMIN_PERMS;
1435 *accmode |= VADMIN;
1436 }
1437
1438 /*
1439 * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL
1440 * or VSYNCHRONIZE using file mode or POSIX.1e ACL.
1441 */
1442 *accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE);
1443
1444 return (0);
1445 }
1446
1447 time_t rootfstime; /* recorded root fs time, if known */
1448 void
setrootfstime(time_t t)1449 setrootfstime(time_t t)
1450 {
1451 rootfstime = t;
1452 }
1453
1454 static const uint8_t vttodt_tab[ ] = {
1455 [VNON] = DT_UNKNOWN,
1456 [VREG] = DT_REG,
1457 [VDIR] = DT_DIR,
1458 [VBLK] = DT_BLK,
1459 [VCHR] = DT_CHR,
1460 [VLNK] = DT_LNK,
1461 [VSOCK] = DT_SOCK,
1462 [VFIFO] = DT_FIFO,
1463 [VBAD] = DT_UNKNOWN
1464 };
1465
1466 uint8_t
vtype2dt(enum vtype vt)1467 vtype2dt(enum vtype vt)
1468 {
1469
1470 CTASSERT(VBAD == __arraycount(vttodt_tab) - 1);
1471 return vttodt_tab[vt];
1472 }
1473
1474 int
VFS_MOUNT(struct mount * mp,const char * a,void * b,size_t * c)1475 VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
1476 {
1477 int mpsafe = mp->mnt_iflag & IMNT_MPSAFE;
1478 int error;
1479
1480 /*
1481 * Note: The first time through, the vfs_mount function may set
1482 * IMNT_MPSAFE, so we have to cache it on entry in order to
1483 * avoid leaking a kernel lock.
1484 *
1485 * XXX Maybe the MPSAFE bit should be set in struct vfsops and
1486 * not in struct mount.
1487 */
1488 if (mpsafe) {
1489 KERNEL_LOCK(1, NULL);
1490 }
1491 error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
1492 if (mpsafe) {
1493 KERNEL_UNLOCK_ONE(NULL);
1494 }
1495
1496 return error;
1497 }
1498
1499 int
VFS_START(struct mount * mp,int a)1500 VFS_START(struct mount *mp, int a)
1501 {
1502 int error;
1503
1504 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1505 KERNEL_LOCK(1, NULL);
1506 }
1507 error = (*(mp->mnt_op->vfs_start))(mp, a);
1508 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1509 KERNEL_UNLOCK_ONE(NULL);
1510 }
1511
1512 return error;
1513 }
1514
1515 int
VFS_UNMOUNT(struct mount * mp,int a)1516 VFS_UNMOUNT(struct mount *mp, int a)
1517 {
1518 int error;
1519
1520 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1521 KERNEL_LOCK(1, NULL);
1522 }
1523 error = (*(mp->mnt_op->vfs_unmount))(mp, a);
1524 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1525 KERNEL_UNLOCK_ONE(NULL);
1526 }
1527
1528 return error;
1529 }
1530
1531 int
VFS_ROOT(struct mount * mp,int lktype,struct vnode ** a)1532 VFS_ROOT(struct mount *mp, int lktype, struct vnode **a)
1533 {
1534 int error;
1535
1536 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1537 KERNEL_LOCK(1, NULL);
1538 }
1539 error = (*(mp->mnt_op->vfs_root))(mp, lktype, a);
1540 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1541 KERNEL_UNLOCK_ONE(NULL);
1542 }
1543
1544 return error;
1545 }
1546
1547 int
VFS_QUOTACTL(struct mount * mp,struct quotactl_args * args)1548 VFS_QUOTACTL(struct mount *mp, struct quotactl_args *args)
1549 {
1550 int error;
1551
1552 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1553 KERNEL_LOCK(1, NULL);
1554 }
1555 error = (*(mp->mnt_op->vfs_quotactl))(mp, args);
1556 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1557 KERNEL_UNLOCK_ONE(NULL);
1558 }
1559
1560 return error;
1561 }
1562
1563 int
VFS_STATVFS(struct mount * mp,struct statvfs * a)1564 VFS_STATVFS(struct mount *mp, struct statvfs *a)
1565 {
1566 int error;
1567
1568 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1569 KERNEL_LOCK(1, NULL);
1570 }
1571 error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
1572 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1573 KERNEL_UNLOCK_ONE(NULL);
1574 }
1575
1576 return error;
1577 }
1578
1579 int
VFS_SYNC(struct mount * mp,int a,struct kauth_cred * b)1580 VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
1581 {
1582 int error;
1583
1584 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1585 KERNEL_LOCK(1, NULL);
1586 }
1587 error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
1588 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1589 KERNEL_UNLOCK_ONE(NULL);
1590 }
1591
1592 return error;
1593 }
1594
1595 int
VFS_FHTOVP(struct mount * mp,struct fid * a,int b,struct vnode ** c)1596 VFS_FHTOVP(struct mount *mp, struct fid *a, int b, struct vnode **c)
1597 {
1598 int error;
1599
1600 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1601 KERNEL_LOCK(1, NULL);
1602 }
1603 error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b, c);
1604 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1605 KERNEL_UNLOCK_ONE(NULL);
1606 }
1607
1608 return error;
1609 }
1610
1611 int
VFS_VPTOFH(struct vnode * vp,struct fid * a,size_t * b)1612 VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
1613 {
1614 int error;
1615
1616 if ((vp->v_vflag & VV_MPSAFE) == 0) {
1617 KERNEL_LOCK(1, NULL);
1618 }
1619 error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
1620 if ((vp->v_vflag & VV_MPSAFE) == 0) {
1621 KERNEL_UNLOCK_ONE(NULL);
1622 }
1623
1624 return error;
1625 }
1626
1627 int
VFS_SNAPSHOT(struct mount * mp,struct vnode * a,struct timespec * b)1628 VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
1629 {
1630 int error;
1631
1632 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1633 KERNEL_LOCK(1, NULL);
1634 }
1635 error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
1636 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1637 KERNEL_UNLOCK_ONE(NULL);
1638 }
1639
1640 return error;
1641 }
1642
1643 int
VFS_EXTATTRCTL(struct mount * mp,int a,struct vnode * b,int c,const char * d)1644 VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
1645 {
1646 int error;
1647
1648 KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */
1649 error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
1650 KERNEL_UNLOCK_ONE(NULL); /* XXX */
1651
1652 return error;
1653 }
1654
1655 int
VFS_SUSPENDCTL(struct mount * mp,int a)1656 VFS_SUSPENDCTL(struct mount *mp, int a)
1657 {
1658 int error;
1659
1660 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1661 KERNEL_LOCK(1, NULL);
1662 }
1663 error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
1664 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
1665 KERNEL_UNLOCK_ONE(NULL);
1666 }
1667
1668 return error;
1669 }
1670
1671 #if defined(DDB) || defined(DEBUGPRINT)
1672 static const char buf_flagbits[] = BUF_FLAGBITS;
1673
1674 void
vfs_buf_print(struct buf * bp,int full,void (* pr)(const char *,...))1675 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
1676 {
1677 char bf[1024];
1678
1679 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
1680 PRIx64 " dev 0x%x\n",
1681 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
1682
1683 snprintb(bf, sizeof(bf),
1684 buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
1685 (*pr)(" error %d flags %s\n", bp->b_error, bf);
1686
1687 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
1688 bp->b_bufsize, bp->b_bcount, bp->b_resid);
1689 (*pr)(" data %p saveaddr %p\n",
1690 bp->b_data, bp->b_saveaddr);
1691 (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
1692 }
1693
1694 void
vfs_vnode_print(struct vnode * vp,int full,void (* pr)(const char *,...))1695 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
1696 {
1697
1698 uvm_object_printit(&vp->v_uobj, full, pr);
1699 (*pr)("\n");
1700 vprint_common(vp, "", pr);
1701 if (full) {
1702 struct buf *bp;
1703
1704 (*pr)("clean bufs:\n");
1705 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
1706 (*pr)(" bp %p\n", bp);
1707 vfs_buf_print(bp, full, pr);
1708 }
1709
1710 (*pr)("dirty bufs:\n");
1711 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
1712 (*pr)(" bp %p\n", bp);
1713 vfs_buf_print(bp, full, pr);
1714 }
1715 }
1716 }
1717
1718 void
vfs_vnode_lock_print(void * vlock,int full,void (* pr)(const char *,...))1719 vfs_vnode_lock_print(void *vlock, int full, void (*pr)(const char *, ...))
1720 {
1721 struct mount *mp;
1722 vnode_impl_t *vip;
1723
1724 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) {
1725 TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1726 if (&vip->vi_lock == vlock ||
1727 VIMPL_TO_VNODE(vip)->v_interlock == vlock)
1728 vfs_vnode_print(VIMPL_TO_VNODE(vip), full, pr);
1729 }
1730 }
1731 }
1732
1733 void
vfs_mount_print_all(int full,void (* pr)(const char *,...))1734 vfs_mount_print_all(int full, void (*pr)(const char *, ...))
1735 {
1736 struct mount *mp;
1737 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1738 vfs_mount_print(mp, full, pr);
1739 }
1740
1741 void
vfs_mount_print(struct mount * mp,int full,void (* pr)(const char *,...))1742 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
1743 {
1744 char sbuf[256];
1745
1746 (*pr)("vnodecovered = %p data = %p\n",
1747 mp->mnt_vnodecovered, mp->mnt_data);
1748
1749 (*pr)("fs_bshift %d dev_bshift = %d\n",
1750 mp->mnt_fs_bshift, mp->mnt_dev_bshift);
1751
1752 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
1753 (*pr)("flag = %s\n", sbuf);
1754
1755 snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
1756 (*pr)("iflag = %s\n", sbuf);
1757
1758 (*pr)("refcnt = %d updating @ %p\n", mp->mnt_refcnt, mp->mnt_updating);
1759
1760 (*pr)("statvfs cache:\n");
1761 (*pr)("\tbsize = %lu\n", mp->mnt_stat.f_bsize);
1762 (*pr)("\tfrsize = %lu\n", mp->mnt_stat.f_frsize);
1763 (*pr)("\tiosize = %lu\n", mp->mnt_stat.f_iosize);
1764
1765 (*pr)("\tblocks = %"PRIu64"\n", mp->mnt_stat.f_blocks);
1766 (*pr)("\tbfree = %"PRIu64"\n", mp->mnt_stat.f_bfree);
1767 (*pr)("\tbavail = %"PRIu64"\n", mp->mnt_stat.f_bavail);
1768 (*pr)("\tbresvd = %"PRIu64"\n", mp->mnt_stat.f_bresvd);
1769
1770 (*pr)("\tfiles = %"PRIu64"\n", mp->mnt_stat.f_files);
1771 (*pr)("\tffree = %"PRIu64"\n", mp->mnt_stat.f_ffree);
1772 (*pr)("\tfavail = %"PRIu64"\n", mp->mnt_stat.f_favail);
1773 (*pr)("\tfresvd = %"PRIu64"\n", mp->mnt_stat.f_fresvd);
1774
1775 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
1776 mp->mnt_stat.f_fsidx.__fsid_val[0],
1777 mp->mnt_stat.f_fsidx.__fsid_val[1]);
1778
1779 (*pr)("\towner = %"PRIu32"\n", mp->mnt_stat.f_owner);
1780 (*pr)("\tnamemax = %lu\n", mp->mnt_stat.f_namemax);
1781
1782 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
1783
1784 (*pr)("\tflag = %s\n", sbuf);
1785 (*pr)("\tsyncwrites = %" PRIu64 "\n", mp->mnt_stat.f_syncwrites);
1786 (*pr)("\tasyncwrites = %" PRIu64 "\n", mp->mnt_stat.f_asyncwrites);
1787 (*pr)("\tsyncreads = %" PRIu64 "\n", mp->mnt_stat.f_syncreads);
1788 (*pr)("\tasyncreads = %" PRIu64 "\n", mp->mnt_stat.f_asyncreads);
1789 (*pr)("\tfstypename = %s\n", mp->mnt_stat.f_fstypename);
1790 (*pr)("\tmntonname = %s\n", mp->mnt_stat.f_mntonname);
1791 (*pr)("\tmntfromname = %s\n", mp->mnt_stat.f_mntfromname);
1792
1793 {
1794 int cnt = 0;
1795 vnode_t *vp;
1796 vnode_impl_t *vip;
1797 (*pr)("locked vnodes =");
1798 TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1799 vp = VIMPL_TO_VNODE(vip);
1800 if (VOP_ISLOCKED(vp)) {
1801 if ((++cnt % 6) == 0) {
1802 (*pr)(" %p,\n\t", vp);
1803 } else {
1804 (*pr)(" %p,", vp);
1805 }
1806 }
1807 }
1808 (*pr)("\n");
1809 }
1810
1811 if (full) {
1812 int cnt = 0;
1813 vnode_t *vp;
1814 vnode_impl_t *vip;
1815 (*pr)("all vnodes =");
1816 TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1817 vp = VIMPL_TO_VNODE(vip);
1818 if (!TAILQ_NEXT(vip, vi_mntvnodes)) {
1819 (*pr)(" %p", vp);
1820 } else if ((++cnt % 6) == 0) {
1821 (*pr)(" %p,\n\t", vp);
1822 } else {
1823 (*pr)(" %p,", vp);
1824 }
1825 }
1826 (*pr)("\n");
1827 }
1828 }
1829
1830 /*
1831 * List all of the locked vnodes in the system.
1832 */
1833 void printlockedvnodes(void);
1834
1835 void
printlockedvnodes(void)1836 printlockedvnodes(void)
1837 {
1838 struct mount *mp;
1839 vnode_t *vp;
1840 vnode_impl_t *vip;
1841
1842 printf("Locked vnodes\n");
1843 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) {
1844 TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
1845 vp = VIMPL_TO_VNODE(vip);
1846 if (VOP_ISLOCKED(vp))
1847 vprint(NULL, vp);
1848 }
1849 }
1850 }
1851
1852 #endif /* DDB || DEBUGPRINT */
1853