1 /* $OpenBSD: vfs_sync.c,v 1.73 2024/10/18 05:52:32 miod Exp $ */
2
3 /*
4 * Portions of this code are:
5 *
6 * Copyright (c) 1989, 1993
7 * The Regents of the University of California. All rights reserved.
8 * (c) UNIX System Laboratories, Inc.
9 * All or some portions of this file are derived from material licensed
10 * to the University of California by American Telephone and Telegraph
11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12 * the permission of UNIX System Laboratories, Inc.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 /*
40 * Syncer daemon
41 */
42
43 #include <sys/queue.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/time.h>
52
53 /*
54 * The workitem queue.
55 */
56 #define SYNCER_MAXDELAY 32 /* maximum sync delay time */
57 #define SYNCER_DEFAULT 30 /* default sync delay time */
58 int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
59 int syncdelay = SYNCER_DEFAULT; /* time to delay syncing vnodes */
60
61 int syncer_delayno = 0;
62 long syncer_mask;
63 LIST_HEAD(synclist, vnode);
64 static struct synclist *syncer_workitem_pending;
65
66 struct proc *syncerproc;
67 int syncer_chan;
68
69 /*
70 * The workitem queue.
71 *
72 * It is useful to delay writes of file data and filesystem metadata
73 * for tens of seconds so that quickly created and deleted files need
74 * not waste disk bandwidth being created and removed. To realize this,
75 * we append vnodes to a "workitem" queue. When running with a soft
76 * updates implementation, most pending metadata dependencies should
77 * not wait for more than a few seconds. Thus, mounted block devices
78 * are delayed only about half the time that file data is delayed.
79 * Similarly, directory updates are more critical, so are only delayed
80 * about a third the time that file data is delayed. Thus, there are
81 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
82 * one each second (driven off the filesystem syncer process). The
83 * syncer_delayno variable indicates the next queue that is to be processed.
84 * Items that need to be processed soon are placed in this queue:
85 *
86 * syncer_workitem_pending[syncer_delayno]
87 *
88 * A delay of fifteen seconds is done by placing the request fifteen
89 * entries later in the queue:
90 *
91 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
92 *
93 */
94
95 void
vn_initialize_syncerd(void)96 vn_initialize_syncerd(void)
97 {
98 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, M_WAITOK,
99 &syncer_mask);
100 syncer_maxdelay = syncer_mask + 1;
101 }
102
103 /*
104 * Add an item to the syncer work queue.
105 */
106 void
vn_syncer_add_to_worklist(struct vnode * vp,int delay)107 vn_syncer_add_to_worklist(struct vnode *vp, int delay)
108 {
109 int s, slot;
110
111 if (delay > syncer_maxdelay - 2)
112 delay = syncer_maxdelay - 2;
113 slot = (syncer_delayno + delay) & syncer_mask;
114
115 s = splbio();
116 if (vp->v_bioflag & VBIOONSYNCLIST)
117 LIST_REMOVE(vp, v_synclist);
118
119 vp->v_bioflag |= VBIOONSYNCLIST;
120 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
121 splx(s);
122 }
123
124 /*
125 * System filesystem synchronizer daemon.
126 */
127 void
syncer_thread(void * arg)128 syncer_thread(void *arg)
129 {
130 uint64_t elapsed, start;
131 struct proc *p = curproc;
132 struct synclist *slp;
133 struct vnode *vp;
134 int s;
135
136 for (;;) {
137 start = getnsecuptime();
138
139 /*
140 * Push files whose dirty time has expired.
141 */
142 s = splbio();
143 slp = &syncer_workitem_pending[syncer_delayno];
144
145 syncer_delayno += 1;
146 if (syncer_delayno == syncer_maxdelay)
147 syncer_delayno = 0;
148
149 while ((vp = LIST_FIRST(slp)) != NULL) {
150 if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT)) {
151 /*
152 * If we fail to get the lock, we move this
153 * vnode one second ahead in time.
154 * XXX - no good, but the best we can do.
155 */
156 vn_syncer_add_to_worklist(vp, 1);
157 continue;
158 }
159 splx(s);
160 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
161 vput(vp);
162 s = splbio();
163 if (LIST_FIRST(slp) == vp) {
164 /*
165 * Note: disk vps can remain on the
166 * worklist too with no dirty blocks, but
167 * since sync_fsync() moves it to a different
168 * slot we are safe.
169 */
170 #ifdef DIAGNOSTIC
171 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
172 vp->v_type != VBLK) {
173 vprint("fsync failed", vp);
174 if (vp->v_mount != NULL)
175 printf("mounted on: %s\n",
176 vp->v_mount->mnt_stat.f_mntonname);
177 panic("%s: fsync failed", __func__);
178 }
179 #endif /* DIAGNOSTIC */
180 /*
181 * Put us back on the worklist. The worklist
182 * routine will remove us from our current
183 * position and then add us back in at a later
184 * position.
185 */
186 vn_syncer_add_to_worklist(vp, syncdelay);
187 }
188
189 sched_pause(yield);
190 }
191
192 splx(s);
193
194 /*
195 * If it has taken us less than a second to process the
196 * current work, then wait. Otherwise start right over
197 * again. We can still lose time if any single round
198 * takes more than two seconds, but it does not really
199 * matter as we are just trying to generally pace the
200 * filesystem activity.
201 */
202 elapsed = getnsecuptime() - start;
203 if (elapsed < SEC_TO_NSEC(1)) {
204 tsleep_nsec(&syncer_chan, PPAUSE, "syncer",
205 SEC_TO_NSEC(1) - elapsed);
206 }
207 }
208 }
209
210 /* Routine to create and manage a filesystem syncer vnode. */
211 int sync_fsync(void *);
212 int sync_inactive(void *);
213 int sync_print(void *);
214
215 const struct vops sync_vops = {
216 .vop_close = nullop,
217 .vop_fsync = sync_fsync,
218 .vop_inactive = sync_inactive,
219 .vop_reclaim = nullop,
220 .vop_lock = nullop,
221 .vop_unlock = nullop,
222 .vop_islocked = nullop,
223 .vop_print = sync_print,
224
225 .vop_abortop = NULL,
226 .vop_access = NULL,
227 .vop_advlock = NULL,
228 .vop_bmap = NULL,
229 .vop_bwrite = NULL,
230 .vop_create = NULL,
231 .vop_getattr = NULL,
232 .vop_ioctl = NULL,
233 .vop_link = NULL,
234 .vop_lookup = NULL,
235 .vop_mknod = NULL,
236 .vop_open = NULL,
237 .vop_pathconf = NULL,
238 .vop_read = NULL,
239 .vop_readdir = NULL,
240 .vop_readlink = NULL,
241 .vop_remove = eopnotsupp,
242 .vop_rename = NULL,
243 .vop_revoke = NULL,
244 .vop_mkdir = NULL,
245 .vop_rmdir = NULL,
246 .vop_setattr = NULL,
247 .vop_strategy = NULL,
248 .vop_symlink = NULL,
249 .vop_write = NULL,
250 .vop_kqfilter = NULL
251 };
252
253 /*
254 * Create a new filesystem syncer vnode for the specified mount point.
255 */
256 int
vfs_allocate_syncvnode(struct mount * mp)257 vfs_allocate_syncvnode(struct mount *mp)
258 {
259 struct vnode *vp;
260 static long start, incr, next;
261 int error;
262
263 /* Allocate a new vnode */
264 if ((error = getnewvnode(VT_VFS, mp, &sync_vops, &vp)) != 0) {
265 mp->mnt_syncer = NULL;
266 return (error);
267 }
268 vp->v_writecount = 1;
269 vp->v_type = VNON;
270 /*
271 * Place the vnode onto the syncer worklist. We attempt to
272 * scatter them about on the list so that they will go off
273 * at evenly distributed times even if all the filesystems
274 * are mounted at once.
275 */
276 next += incr;
277 if (next == 0 || next > syncer_maxdelay) {
278 start /= 2;
279 incr /= 2;
280 if (start == 0) {
281 start = syncer_maxdelay / 2;
282 incr = syncer_maxdelay;
283 }
284 next = start;
285 }
286 vn_syncer_add_to_worklist(vp, next);
287 mp->mnt_syncer = vp;
288 return (0);
289 }
290
291 /*
292 * Do a lazy sync of the filesystem.
293 */
294 int
sync_fsync(void * v)295 sync_fsync(void *v)
296 {
297 struct vop_fsync_args *ap = v;
298 struct vnode *syncvp = ap->a_vp;
299 struct mount *mp = syncvp->v_mount;
300 int asyncflag;
301
302 /*
303 * We only need to do something if this is a lazy evaluation.
304 */
305 if (ap->a_waitfor != MNT_LAZY)
306 return (0);
307
308 /*
309 * Move ourselves to the back of the sync list.
310 */
311 vn_syncer_add_to_worklist(syncvp, syncdelay);
312
313 /*
314 * Walk the list of vnodes pushing all that are dirty and
315 * not already on the sync list.
316 */
317 if (vfs_busy(mp, VB_READ|VB_NOWAIT) == 0) {
318 asyncflag = mp->mnt_flag & MNT_ASYNC;
319 mp->mnt_flag &= ~MNT_ASYNC;
320 VFS_SYNC(mp, MNT_LAZY, 0, ap->a_cred, ap->a_p);
321 if (asyncflag)
322 mp->mnt_flag |= MNT_ASYNC;
323 vfs_unbusy(mp);
324 }
325
326 return (0);
327 }
328
329 /*
330 * The syncer vnode is no longer needed and is being decommissioned.
331 */
332 int
sync_inactive(void * v)333 sync_inactive(void *v)
334 {
335 struct vop_inactive_args *ap = v;
336
337 struct vnode *vp = ap->a_vp;
338 int s;
339
340 if (vp->v_usecount == 0) {
341 VOP_UNLOCK(vp);
342 return (0);
343 }
344
345 vp->v_mount->mnt_syncer = NULL;
346
347 s = splbio();
348
349 LIST_REMOVE(vp, v_synclist);
350 vp->v_bioflag &= ~VBIOONSYNCLIST;
351
352 splx(s);
353
354 vp->v_writecount = 0;
355 vput(vp);
356
357 return (0);
358 }
359
360 /*
361 * Print out a syncer vnode.
362 */
363 int
sync_print(void * v)364 sync_print(void *v)
365 {
366 #if defined(DEBUG) || defined(DIAGNOSTIC) || defined(VFSLCKDEBUG)
367 printf("syncer vnode\n");
368 #endif
369
370 return (0);
371 }
372