1 /* $NetBSD: vfs_trans.c,v 1.70 2022/11/04 11:20:39 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.70 2022/11/04 11:20:39 hannken Exp $");
34
35 /*
36 * File system transaction operations.
37 */
38
39 #ifdef _KERNEL_OPT
40 #include "opt_ddb.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/atomic.h>
46 #include <sys/buf.h>
47 #include <sys/hash.h>
48 #include <sys/kmem.h>
49 #include <sys/mount.h>
50 #include <sys/pserialize.h>
51 #include <sys/vnode.h>
52 #include <sys/fstrans.h>
53 #include <sys/proc.h>
54 #include <sys/pool.h>
55
56 #include <miscfs/deadfs/deadfs.h>
57 #include <miscfs/specfs/specdev.h>
58
59 #define FSTRANS_MOUNT_HASHSIZE 32
60
61 enum fstrans_lock_type {
62 FSTRANS_LAZY, /* Granted while not suspended */
63 FSTRANS_SHARED /* Granted while not suspending */
64 };
65
66 struct fscow_handler {
67 LIST_ENTRY(fscow_handler) ch_list;
68 int (*ch_func)(void *, struct buf *, bool);
69 void *ch_arg;
70 };
71 struct fstrans_lwp_info {
72 struct fstrans_lwp_info *fli_succ;
73 struct lwp *fli_self;
74 struct mount *fli_mount;
75 struct fstrans_lwp_info *fli_alias;
76 struct fstrans_mount_info *fli_mountinfo;
77 int fli_trans_cnt;
78 int fli_alias_cnt;
79 int fli_cow_cnt;
80 enum fstrans_lock_type fli_lock_type;
81 LIST_ENTRY(fstrans_lwp_info) fli_list;
82 };
83 struct fstrans_mount_info {
84 enum fstrans_state fmi_state;
85 unsigned int fmi_ref_cnt;
86 bool fmi_gone;
87 bool fmi_cow_change;
88 SLIST_ENTRY(fstrans_mount_info) fmi_hash;
89 LIST_HEAD(, fscow_handler) fmi_cow_handler;
90 struct mount *fmi_mount;
91 struct fstrans_mount_info *fmi_lower_info;
92 struct lwp *fmi_owner;
93 };
94 SLIST_HEAD(fstrans_mount_hashhead, fstrans_mount_info);
95
96 static kmutex_t vfs_suspend_lock /* Serialize suspensions. */
97 __cacheline_aligned;
98 static kmutex_t fstrans_lock /* Fstrans big lock. */
99 __cacheline_aligned;
100 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */
101 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */
102 static pserialize_t fstrans_psz; /* Pserialize state. */
103 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
104 /* List of all fstrans_lwp_info. */
105 static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */
106
107 static u_long fstrans_mount_hashmask;
108 static struct fstrans_mount_hashhead *fstrans_mount_hashtab;
109 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */
110
111 static inline uint32_t fstrans_mount_hash(struct mount *);
112 static inline struct fstrans_mount_info *fstrans_mount_get(struct mount *);
113 static void fstrans_mount_dtor(struct fstrans_mount_info *);
114 static void fstrans_clear_lwp_info(void);
115 static inline struct fstrans_lwp_info *
116 fstrans_get_lwp_info(struct mount *, bool);
117 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
118 static int fstrans_lwp_pcc(void *, void *, int);
119 static void fstrans_lwp_pcd(void *, void *);
120 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
121 static bool grant_lock(const struct fstrans_mount_info *,
122 const enum fstrans_lock_type);
123 static bool state_change_done(const struct fstrans_mount_info *);
124 static bool cow_state_change_done(const struct fstrans_mount_info *);
125 static void cow_change_enter(struct fstrans_mount_info *);
126 static void cow_change_done(struct fstrans_mount_info *);
127
128 /*
129 * Initialize.
130 */
131 void
fstrans_init(void)132 fstrans_init(void)
133 {
134
135 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
136 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
137 cv_init(&fstrans_state_cv, "fstchg");
138 cv_init(&fstrans_count_cv, "fstcnt");
139 fstrans_psz = pserialize_create();
140 LIST_INIT(&fstrans_fli_head);
141 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info),
142 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE,
143 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL);
144 KASSERT(fstrans_lwp_cache != NULL);
145 fstrans_mount_hashtab = hashinit(FSTRANS_MOUNT_HASHSIZE, HASH_SLIST,
146 true, &fstrans_mount_hashmask);
147 }
148
149 /*
150 * pool_cache constructor for fstrans_lwp_info. Updating the global list
151 * produces cache misses on MP. Minimise by keeping free entries on list.
152 */
153 int
fstrans_lwp_pcc(void * arg,void * obj,int flags)154 fstrans_lwp_pcc(void *arg, void *obj, int flags)
155 {
156 struct fstrans_lwp_info *fli = obj;
157
158 memset(fli, 0, sizeof(*fli));
159
160 mutex_enter(&fstrans_lock);
161 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
162 mutex_exit(&fstrans_lock);
163
164 return 0;
165 }
166
167 /*
168 * pool_cache destructor
169 */
170 void
fstrans_lwp_pcd(void * arg,void * obj)171 fstrans_lwp_pcd(void *arg, void *obj)
172 {
173 struct fstrans_lwp_info *fli = obj;
174
175 mutex_enter(&fstrans_lock);
176 LIST_REMOVE(fli, fli_list);
177 mutex_exit(&fstrans_lock);
178 }
179
180 /*
181 * Deallocate lwp state.
182 */
183 void
fstrans_lwp_dtor(lwp_t * l)184 fstrans_lwp_dtor(lwp_t *l)
185 {
186 struct fstrans_lwp_info *fli, *fli_next;
187
188 if (l->l_fstrans == NULL)
189 return;
190
191 mutex_enter(&fstrans_lock);
192 for (fli = l->l_fstrans; fli; fli = fli_next) {
193 KASSERT(fli->fli_trans_cnt == 0);
194 KASSERT(fli->fli_cow_cnt == 0);
195 KASSERT(fli->fli_self == l);
196 if (fli->fli_mount != NULL)
197 fstrans_mount_dtor(fli->fli_mountinfo);
198 fli_next = fli->fli_succ;
199 fli->fli_alias_cnt = 0;
200 fli->fli_mount = NULL;
201 fli->fli_alias = NULL;
202 fli->fli_mountinfo = NULL;
203 fli->fli_self = NULL;
204 }
205 mutex_exit(&fstrans_lock);
206
207 for (fli = l->l_fstrans; fli; fli = fli_next) {
208 fli_next = fli->fli_succ;
209 pool_cache_put(fstrans_lwp_cache, fli);
210 }
211 l->l_fstrans = NULL;
212 }
213
214 /*
215 * mount pointer to hash
216 */
217 static inline uint32_t
fstrans_mount_hash(struct mount * mp)218 fstrans_mount_hash(struct mount *mp)
219 {
220
221 return hash32_buf(&mp, sizeof(mp), HASH32_BUF_INIT) &
222 fstrans_mount_hashmask;
223 }
224
225 /*
226 * retrieve fstrans_mount_info by mount or NULL
227 */
228 static inline struct fstrans_mount_info *
fstrans_mount_get(struct mount * mp)229 fstrans_mount_get(struct mount *mp)
230 {
231 uint32_t indx;
232 struct fstrans_mount_info *fmi, *fmi_lower;
233
234 KASSERT(mutex_owned(&fstrans_lock));
235
236 indx = fstrans_mount_hash(mp);
237 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) {
238 if (fmi->fmi_mount == mp) {
239 if (__predict_false(mp->mnt_lower != NULL &&
240 fmi->fmi_lower_info == NULL)) {
241 /*
242 * Intern the lower/lowest mount into
243 * this mount info on first lookup.
244 */
245 KASSERT(fmi->fmi_ref_cnt == 1);
246
247 fmi_lower = fstrans_mount_get(mp->mnt_lower);
248 if (fmi_lower && fmi_lower->fmi_lower_info)
249 fmi_lower = fmi_lower->fmi_lower_info;
250 if (fmi_lower == NULL)
251 return NULL;
252 fmi->fmi_lower_info = fmi_lower;
253 fmi->fmi_lower_info->fmi_ref_cnt += 1;
254 }
255 return fmi;
256 }
257 }
258
259 return NULL;
260 }
261
262 /*
263 * Dereference mount state.
264 */
265 static void
fstrans_mount_dtor(struct fstrans_mount_info * fmi)266 fstrans_mount_dtor(struct fstrans_mount_info *fmi)
267 {
268
269 KASSERT(mutex_owned(&fstrans_lock));
270
271 KASSERT(fmi != NULL);
272 fmi->fmi_ref_cnt -= 1;
273 if (__predict_true(fmi->fmi_ref_cnt > 0)) {
274 return;
275 }
276
277 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
278 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
279 KASSERT(fmi->fmi_owner == NULL);
280
281 if (fmi->fmi_lower_info)
282 fstrans_mount_dtor(fmi->fmi_lower_info);
283
284 KASSERT(fstrans_gone_count > 0);
285 fstrans_gone_count -= 1;
286
287 KASSERT(fmi->fmi_mount->mnt_lower == NULL);
288
289 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
290 kmem_free(fmi, sizeof(*fmi));
291 }
292
293 /*
294 * Allocate mount state.
295 */
296 int
fstrans_mount(struct mount * mp)297 fstrans_mount(struct mount *mp)
298 {
299 uint32_t indx;
300 struct fstrans_mount_info *newfmi;
301
302 indx = fstrans_mount_hash(mp);
303
304 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
305 newfmi->fmi_state = FSTRANS_NORMAL;
306 newfmi->fmi_ref_cnt = 1;
307 newfmi->fmi_gone = false;
308 LIST_INIT(&newfmi->fmi_cow_handler);
309 newfmi->fmi_cow_change = false;
310 newfmi->fmi_mount = mp;
311 newfmi->fmi_lower_info = NULL;
312 newfmi->fmi_owner = NULL;
313
314 mutex_enter(&fstrans_lock);
315 SLIST_INSERT_HEAD(&fstrans_mount_hashtab[indx], newfmi, fmi_hash);
316 mutex_exit(&fstrans_lock);
317
318 return 0;
319 }
320
321 /*
322 * Deallocate mount state.
323 */
324 void
fstrans_unmount(struct mount * mp)325 fstrans_unmount(struct mount *mp)
326 {
327 uint32_t indx;
328 struct fstrans_mount_info *fmi;
329
330 indx = fstrans_mount_hash(mp);
331
332 mutex_enter(&fstrans_lock);
333 fmi = fstrans_mount_get(mp);
334 KASSERT(fmi != NULL);
335 fmi->fmi_gone = true;
336 SLIST_REMOVE(&fstrans_mount_hashtab[indx],
337 fmi, fstrans_mount_info, fmi_hash);
338 fstrans_gone_count += 1;
339 fstrans_mount_dtor(fmi);
340 mutex_exit(&fstrans_lock);
341 }
342
343 /*
344 * Clear mount entries whose mount is gone.
345 */
346 static void
fstrans_clear_lwp_info(void)347 fstrans_clear_lwp_info(void)
348 {
349 struct fstrans_lwp_info **p, *fli, *tofree = NULL;
350
351 /*
352 * Scan our list clearing entries whose mount is gone.
353 */
354 mutex_enter(&fstrans_lock);
355 for (p = &curlwp->l_fstrans; *p; ) {
356 fli = *p;
357 if (fli->fli_mount != NULL &&
358 fli->fli_mountinfo->fmi_gone &&
359 fli->fli_trans_cnt == 0 &&
360 fli->fli_cow_cnt == 0 &&
361 fli->fli_alias_cnt == 0) {
362 *p = (*p)->fli_succ;
363 fstrans_mount_dtor(fli->fli_mountinfo);
364 if (fli->fli_alias) {
365 KASSERT(fli->fli_alias->fli_alias_cnt > 0);
366 fli->fli_alias->fli_alias_cnt--;
367 }
368 fli->fli_mount = NULL;
369 fli->fli_alias = NULL;
370 fli->fli_mountinfo = NULL;
371 fli->fli_self = NULL;
372 p = &curlwp->l_fstrans;
373 fli->fli_succ = tofree;
374 tofree = fli;
375 } else {
376 p = &(*p)->fli_succ;
377 }
378 }
379 #ifdef DIAGNOSTIC
380 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ)
381 if (fli->fli_alias != NULL)
382 KASSERT(fli->fli_alias->fli_self == curlwp);
383 #endif /* DIAGNOSTIC */
384 mutex_exit(&fstrans_lock);
385
386 while (tofree != NULL) {
387 fli = tofree;
388 tofree = fli->fli_succ;
389 pool_cache_put(fstrans_lwp_cache, fli);
390 }
391 }
392
393 /*
394 * Allocate and return per lwp info for this mount.
395 */
396 static struct fstrans_lwp_info *
fstrans_alloc_lwp_info(struct mount * mp)397 fstrans_alloc_lwp_info(struct mount *mp)
398 {
399 struct fstrans_lwp_info *fli, *fli_lower;
400 struct fstrans_mount_info *fmi;
401
402 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
403 if (fli->fli_mount == mp)
404 return fli;
405 }
406
407 /*
408 * Lookup mount info and get lower mount per lwp info.
409 */
410 mutex_enter(&fstrans_lock);
411 fmi = fstrans_mount_get(mp);
412 if (fmi == NULL) {
413 mutex_exit(&fstrans_lock);
414 return NULL;
415 }
416 fmi->fmi_ref_cnt += 1;
417 mutex_exit(&fstrans_lock);
418
419 if (fmi->fmi_lower_info) {
420 fli_lower =
421 fstrans_alloc_lwp_info(fmi->fmi_lower_info->fmi_mount);
422 if (fli_lower == NULL) {
423 mutex_enter(&fstrans_lock);
424 fstrans_mount_dtor(fmi);
425 mutex_exit(&fstrans_lock);
426
427 return NULL;
428 }
429 } else {
430 fli_lower = NULL;
431 }
432
433 /*
434 * Allocate a new entry.
435 */
436 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK);
437 KASSERT(fli->fli_trans_cnt == 0);
438 KASSERT(fli->fli_cow_cnt == 0);
439 KASSERT(fli->fli_alias_cnt == 0);
440 KASSERT(fli->fli_mount == NULL);
441 KASSERT(fli->fli_alias == NULL);
442 KASSERT(fli->fli_mountinfo == NULL);
443 KASSERT(fli->fli_self == NULL);
444
445 /*
446 * Attach the mount info and alias.
447 */
448
449 fli->fli_self = curlwp;
450 fli->fli_mount = mp;
451 fli->fli_mountinfo = fmi;
452
453 fli->fli_succ = curlwp->l_fstrans;
454 curlwp->l_fstrans = fli;
455
456 if (fli_lower) {
457 fli->fli_alias = fli_lower;
458 fli->fli_alias->fli_alias_cnt++;
459 fli = fli->fli_alias;
460 }
461
462 return fli;
463 }
464
465 /*
466 * Retrieve the per lwp info for this mount allocating if necessary.
467 */
468 static inline struct fstrans_lwp_info *
fstrans_get_lwp_info(struct mount * mp,bool do_alloc)469 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
470 {
471 struct fstrans_lwp_info *fli;
472
473 /*
474 * Scan our list for a match.
475 */
476 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
477 if (fli->fli_mount == mp) {
478 KASSERT(mp->mnt_lower == NULL ||
479 fli->fli_alias != NULL);
480 if (fli->fli_alias != NULL)
481 fli = fli->fli_alias;
482 break;
483 }
484 }
485
486 if (do_alloc) {
487 if (__predict_false(fli == NULL))
488 fli = fstrans_alloc_lwp_info(mp);
489 }
490
491 return fli;
492 }
493
494 /*
495 * Check if this lock type is granted at this state.
496 */
497 static bool
grant_lock(const struct fstrans_mount_info * fmi,const enum fstrans_lock_type type)498 grant_lock(const struct fstrans_mount_info *fmi,
499 const enum fstrans_lock_type type)
500 {
501
502 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL))
503 return true;
504 if (fmi->fmi_owner == curlwp)
505 return true;
506 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
507 return true;
508
509 return false;
510 }
511
512 /*
513 * Start a transaction. If this thread already has a transaction on this
514 * file system increment the reference counter.
515 */
516 static inline int
_fstrans_start(struct mount * mp,enum fstrans_lock_type lock_type,int wait)517 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
518 {
519 int s;
520 struct fstrans_lwp_info *fli;
521 struct fstrans_mount_info *fmi;
522
523 ASSERT_SLEEPABLE();
524
525 fli = fstrans_get_lwp_info(mp, true);
526 if (fli == NULL)
527 return 0;
528 fmi = fli->fli_mountinfo;
529
530 if (fli->fli_trans_cnt > 0) {
531 fli->fli_trans_cnt += 1;
532
533 return 0;
534 }
535
536 s = pserialize_read_enter();
537 if (__predict_true(grant_lock(fmi, lock_type))) {
538 fli->fli_trans_cnt = 1;
539 fli->fli_lock_type = lock_type;
540 pserialize_read_exit(s);
541
542 return 0;
543 }
544 pserialize_read_exit(s);
545
546 if (! wait)
547 return EBUSY;
548
549 mutex_enter(&fstrans_lock);
550 while (! grant_lock(fmi, lock_type))
551 cv_wait(&fstrans_state_cv, &fstrans_lock);
552 fli->fli_trans_cnt = 1;
553 fli->fli_lock_type = lock_type;
554 mutex_exit(&fstrans_lock);
555
556 return 0;
557 }
558
559 void
fstrans_start(struct mount * mp)560 fstrans_start(struct mount *mp)
561 {
562 int error __diagused;
563
564 error = _fstrans_start(mp, FSTRANS_SHARED, 1);
565 KASSERT(error == 0);
566 }
567
568 int
fstrans_start_nowait(struct mount * mp)569 fstrans_start_nowait(struct mount *mp)
570 {
571
572 return _fstrans_start(mp, FSTRANS_SHARED, 0);
573 }
574
575 void
fstrans_start_lazy(struct mount * mp)576 fstrans_start_lazy(struct mount *mp)
577 {
578 int error __diagused;
579
580 error = _fstrans_start(mp, FSTRANS_LAZY, 1);
581 KASSERT(error == 0);
582 }
583
584 /*
585 * Finish a transaction.
586 */
587 void
fstrans_done(struct mount * mp)588 fstrans_done(struct mount *mp)
589 {
590 int s;
591 struct fstrans_lwp_info *fli;
592 struct fstrans_mount_info *fmi;
593
594 fli = fstrans_get_lwp_info(mp, false);
595 if (fli == NULL)
596 return;
597 fmi = fli->fli_mountinfo;
598 KASSERT(fli->fli_trans_cnt > 0);
599
600 if (fli->fli_trans_cnt > 1) {
601 fli->fli_trans_cnt -= 1;
602
603 return;
604 }
605
606 if (__predict_false(fstrans_gone_count > 0))
607 fstrans_clear_lwp_info();
608
609 s = pserialize_read_enter();
610 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
611 fli->fli_trans_cnt = 0;
612 pserialize_read_exit(s);
613
614 return;
615 }
616 pserialize_read_exit(s);
617
618 mutex_enter(&fstrans_lock);
619 fli->fli_trans_cnt = 0;
620 cv_signal(&fstrans_count_cv);
621 mutex_exit(&fstrans_lock);
622 }
623
624 /*
625 * Check if we hold an lock.
626 */
627 int
fstrans_held(struct mount * mp)628 fstrans_held(struct mount *mp)
629 {
630 struct fstrans_lwp_info *fli;
631 struct fstrans_mount_info *fmi;
632
633 KASSERT(mp != dead_rootmount);
634
635 fli = fstrans_get_lwp_info(mp, false);
636 if (fli == NULL)
637 return 0;
638 fmi = fli->fli_mountinfo;
639
640 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp);
641 }
642
643 /*
644 * Check if this thread has an exclusive lock.
645 */
646 int
fstrans_is_owner(struct mount * mp)647 fstrans_is_owner(struct mount *mp)
648 {
649 struct fstrans_lwp_info *fli;
650 struct fstrans_mount_info *fmi;
651
652 KASSERT(mp != dead_rootmount);
653
654 fli = fstrans_get_lwp_info(mp, false);
655 if (fli == NULL)
656 return 0;
657 fmi = fli->fli_mountinfo;
658
659 return (fmi->fmi_owner == curlwp);
660 }
661
662 /*
663 * True, if no thread is in a transaction not granted at the current state.
664 */
665 static bool
state_change_done(const struct fstrans_mount_info * fmi)666 state_change_done(const struct fstrans_mount_info *fmi)
667 {
668 struct fstrans_lwp_info *fli;
669
670 KASSERT(mutex_owned(&fstrans_lock));
671
672 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
673 if (fli->fli_mountinfo != fmi)
674 continue;
675 if (fli->fli_trans_cnt == 0)
676 continue;
677 if (fli->fli_self == curlwp)
678 continue;
679 if (grant_lock(fmi, fli->fli_lock_type))
680 continue;
681
682 return false;
683 }
684
685 return true;
686 }
687
688 /*
689 * Set new file system state.
690 */
691 int
fstrans_setstate(struct mount * mp,enum fstrans_state new_state)692 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
693 {
694 int error;
695 enum fstrans_state old_state;
696 struct fstrans_lwp_info *fli;
697 struct fstrans_mount_info *fmi;
698
699 KASSERT(mp != dead_rootmount);
700
701 fli = fstrans_get_lwp_info(mp, true);
702 if (fli == NULL)
703 return ENOENT;
704 fmi = fli->fli_mountinfo;
705 old_state = fmi->fmi_state;
706 if (old_state == new_state)
707 return 0;
708
709 mutex_enter(&fstrans_lock);
710 fmi->fmi_state = new_state;
711 pserialize_perform(fstrans_psz);
712
713 /*
714 * All threads see the new state now.
715 * Wait for transactions invalid at this state to leave.
716 */
717 error = 0;
718 while (! state_change_done(fmi)) {
719 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
720 if (error) {
721 new_state = fmi->fmi_state = FSTRANS_NORMAL;
722 break;
723 }
724 }
725 if (old_state != new_state) {
726 if (old_state == FSTRANS_NORMAL) {
727 KASSERT(fmi->fmi_owner == NULL);
728 fmi->fmi_owner = curlwp;
729 }
730 if (new_state == FSTRANS_NORMAL) {
731 KASSERT(fmi->fmi_owner == curlwp);
732 fmi->fmi_owner = NULL;
733 }
734 }
735 cv_broadcast(&fstrans_state_cv);
736 mutex_exit(&fstrans_lock);
737
738 return error;
739 }
740
741 /*
742 * Get current file system state.
743 */
744 enum fstrans_state
fstrans_getstate(struct mount * mp)745 fstrans_getstate(struct mount *mp)
746 {
747 struct fstrans_lwp_info *fli;
748 struct fstrans_mount_info *fmi;
749
750 KASSERT(mp != dead_rootmount);
751
752 fli = fstrans_get_lwp_info(mp, true);
753 KASSERT(fli != NULL);
754 fmi = fli->fli_mountinfo;
755
756 return fmi->fmi_state;
757 }
758
759 /*
760 * Request a filesystem to suspend all operations.
761 */
762 int
vfs_suspend(struct mount * mp,int nowait)763 vfs_suspend(struct mount *mp, int nowait)
764 {
765 struct fstrans_lwp_info *fli;
766 int error;
767
768 if (mp == dead_rootmount)
769 return EOPNOTSUPP;
770
771 fli = fstrans_get_lwp_info(mp, true);
772 if (fli == NULL)
773 return ENOENT;
774
775 if (nowait) {
776 if (!mutex_tryenter(&vfs_suspend_lock))
777 return EWOULDBLOCK;
778 } else
779 mutex_enter(&vfs_suspend_lock);
780
781 if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) {
782 mutex_exit(&vfs_suspend_lock);
783 return error;
784 }
785
786 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
787 vfs_resume(mp);
788 return ENOENT;
789 }
790
791 return 0;
792 }
793
794 /*
795 * Request a filesystem to resume all operations.
796 */
797 void
vfs_resume(struct mount * mp)798 vfs_resume(struct mount *mp)
799 {
800 struct fstrans_lwp_info *fli;
801
802 KASSERT(mp != dead_rootmount);
803
804 fli = fstrans_get_lwp_info(mp, false);
805 mp = fli->fli_mount;
806
807 VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
808 mutex_exit(&vfs_suspend_lock);
809 }
810
811
812 /*
813 * True, if no thread is running a cow handler.
814 */
815 static bool
cow_state_change_done(const struct fstrans_mount_info * fmi)816 cow_state_change_done(const struct fstrans_mount_info *fmi)
817 {
818 struct fstrans_lwp_info *fli;
819
820 KASSERT(mutex_owned(&fstrans_lock));
821 KASSERT(fmi->fmi_cow_change);
822
823 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
824 if (fli->fli_mount != fmi->fmi_mount)
825 continue;
826 if (fli->fli_cow_cnt == 0)
827 continue;
828
829 return false;
830 }
831
832 return true;
833 }
834
835 /*
836 * Prepare for changing this mounts cow list.
837 * Returns with fstrans_lock locked.
838 */
839 static void
cow_change_enter(struct fstrans_mount_info * fmi)840 cow_change_enter(struct fstrans_mount_info *fmi)
841 {
842
843 mutex_enter(&fstrans_lock);
844
845 /*
846 * Wait for other threads changing the list.
847 */
848 while (fmi->fmi_cow_change)
849 cv_wait(&fstrans_state_cv, &fstrans_lock);
850
851 /*
852 * Wait until all threads are aware of a state change.
853 */
854 fmi->fmi_cow_change = true;
855 pserialize_perform(fstrans_psz);
856
857 while (! cow_state_change_done(fmi))
858 cv_wait(&fstrans_count_cv, &fstrans_lock);
859 }
860
861 /*
862 * Done changing this mounts cow list.
863 */
864 static void
cow_change_done(struct fstrans_mount_info * fmi)865 cow_change_done(struct fstrans_mount_info *fmi)
866 {
867
868 KASSERT(mutex_owned(&fstrans_lock));
869
870 fmi->fmi_cow_change = false;
871 pserialize_perform(fstrans_psz);
872
873 cv_broadcast(&fstrans_state_cv);
874
875 mutex_exit(&fstrans_lock);
876 }
877
878 /*
879 * Add a handler to this mount.
880 */
881 int
fscow_establish(struct mount * mp,int (* func)(void *,struct buf *,bool),void * arg)882 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
883 void *arg)
884 {
885 struct fstrans_mount_info *fmi;
886 struct fscow_handler *newch;
887
888 KASSERT(mp != dead_rootmount);
889
890 mutex_enter(&fstrans_lock);
891 fmi = fstrans_mount_get(mp);
892 KASSERT(fmi != NULL);
893 fmi->fmi_ref_cnt += 1;
894 mutex_exit(&fstrans_lock);
895
896 newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
897 newch->ch_func = func;
898 newch->ch_arg = arg;
899
900 cow_change_enter(fmi);
901 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
902 cow_change_done(fmi);
903
904 return 0;
905 }
906
907 /*
908 * Remove a handler from this mount.
909 */
910 int
fscow_disestablish(struct mount * mp,int (* func)(void *,struct buf *,bool),void * arg)911 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
912 void *arg)
913 {
914 struct fstrans_mount_info *fmi;
915 struct fscow_handler *hp = NULL;
916
917 KASSERT(mp != dead_rootmount);
918
919 mutex_enter(&fstrans_lock);
920 fmi = fstrans_mount_get(mp);
921 KASSERT(fmi != NULL);
922 mutex_exit(&fstrans_lock);
923
924 cow_change_enter(fmi);
925 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
926 if (hp->ch_func == func && hp->ch_arg == arg)
927 break;
928 if (hp != NULL) {
929 LIST_REMOVE(hp, ch_list);
930 kmem_free(hp, sizeof(*hp));
931 }
932 fstrans_mount_dtor(fmi);
933 cow_change_done(fmi);
934
935 return hp ? 0 : EINVAL;
936 }
937
938 /*
939 * Check for need to copy block that is about to be written.
940 */
941 int
fscow_run(struct buf * bp,bool data_valid)942 fscow_run(struct buf *bp, bool data_valid)
943 {
944 int error, s;
945 struct mount *mp;
946 struct fstrans_lwp_info *fli;
947 struct fstrans_mount_info *fmi;
948 struct fscow_handler *hp;
949
950 /*
951 * First check if we need run the copy-on-write handler.
952 */
953 if ((bp->b_flags & B_COWDONE))
954 return 0;
955 if (bp->b_vp == NULL) {
956 bp->b_flags |= B_COWDONE;
957 return 0;
958 }
959 if (bp->b_vp->v_type == VBLK)
960 mp = spec_node_getmountedfs(bp->b_vp);
961 else
962 mp = bp->b_vp->v_mount;
963 if (mp == NULL || mp == dead_rootmount) {
964 bp->b_flags |= B_COWDONE;
965 return 0;
966 }
967
968 fli = fstrans_get_lwp_info(mp, true);
969 KASSERT(fli != NULL);
970 fmi = fli->fli_mountinfo;
971
972 /*
973 * On non-recursed run check if other threads
974 * want to change the list.
975 */
976 if (fli->fli_cow_cnt == 0) {
977 s = pserialize_read_enter();
978 if (__predict_false(fmi->fmi_cow_change)) {
979 pserialize_read_exit(s);
980 mutex_enter(&fstrans_lock);
981 while (fmi->fmi_cow_change)
982 cv_wait(&fstrans_state_cv, &fstrans_lock);
983 fli->fli_cow_cnt = 1;
984 mutex_exit(&fstrans_lock);
985 } else {
986 fli->fli_cow_cnt = 1;
987 pserialize_read_exit(s);
988 }
989 } else
990 fli->fli_cow_cnt += 1;
991
992 /*
993 * Run all copy-on-write handlers, stop on error.
994 */
995 error = 0;
996 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
997 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
998 break;
999 if (error == 0)
1000 bp->b_flags |= B_COWDONE;
1001
1002 /*
1003 * Check if other threads want to change the list.
1004 */
1005 if (fli->fli_cow_cnt > 1) {
1006 fli->fli_cow_cnt -= 1;
1007 } else {
1008 s = pserialize_read_enter();
1009 if (__predict_false(fmi->fmi_cow_change)) {
1010 pserialize_read_exit(s);
1011 mutex_enter(&fstrans_lock);
1012 fli->fli_cow_cnt = 0;
1013 cv_signal(&fstrans_count_cv);
1014 mutex_exit(&fstrans_lock);
1015 } else {
1016 fli->fli_cow_cnt = 0;
1017 pserialize_read_exit(s);
1018 }
1019 }
1020
1021 return error;
1022 }
1023
1024 #if defined(DDB)
1025 void fstrans_dump(int);
1026
1027 static void
fstrans_print_lwp(struct proc * p,struct lwp * l,int verbose)1028 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
1029 {
1030 char prefix[9];
1031 struct fstrans_lwp_info *fli;
1032
1033 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
1034 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
1035 if (fli->fli_self != l)
1036 continue;
1037 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
1038 if (! verbose)
1039 continue;
1040 }
1041 printf("%-8s", prefix);
1042 if (verbose)
1043 printf(" @%p", fli);
1044 if (fli->fli_mount == dead_rootmount)
1045 printf(" <dead>");
1046 else if (fli->fli_mount != NULL)
1047 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
1048 else
1049 printf(" NULL");
1050 if (fli->fli_alias != NULL) {
1051 struct mount *amp = fli->fli_alias->fli_mount;
1052
1053 printf(" alias");
1054 if (verbose)
1055 printf(" @%p", fli->fli_alias);
1056 if (amp == NULL)
1057 printf(" NULL");
1058 else
1059 printf(" (%s)", amp->mnt_stat.f_mntonname);
1060 }
1061 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
1062 printf(" gone");
1063 if (fli->fli_trans_cnt == 0) {
1064 printf(" -");
1065 } else {
1066 switch (fli->fli_lock_type) {
1067 case FSTRANS_LAZY:
1068 printf(" lazy");
1069 break;
1070 case FSTRANS_SHARED:
1071 printf(" shared");
1072 break;
1073 default:
1074 printf(" %#x", fli->fli_lock_type);
1075 break;
1076 }
1077 }
1078 printf(" %d cow %d alias %d\n",
1079 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt);
1080 prefix[0] = '\0';
1081 }
1082 }
1083
1084 static void
fstrans_print_mount(struct mount * mp,int verbose)1085 fstrans_print_mount(struct mount *mp, int verbose)
1086 {
1087 uint32_t indx;
1088 struct fstrans_mount_info *fmi;
1089
1090 indx = fstrans_mount_hash(mp);
1091 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash)
1092 if (fmi->fmi_mount == mp)
1093 break;
1094
1095 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
1096 return;
1097
1098 printf("%-16s ", mp->mnt_stat.f_mntonname);
1099 if (fmi == NULL) {
1100 printf("(null)\n");
1101 return;
1102 }
1103 printf("owner %p ", fmi->fmi_owner);
1104 switch (fmi->fmi_state) {
1105 case FSTRANS_NORMAL:
1106 printf("state normal\n");
1107 break;
1108 case FSTRANS_SUSPENDING:
1109 printf("state suspending\n");
1110 break;
1111 case FSTRANS_SUSPENDED:
1112 printf("state suspended\n");
1113 break;
1114 default:
1115 printf("state %#x\n", fmi->fmi_state);
1116 break;
1117 }
1118 }
1119
1120 void
fstrans_dump(int full)1121 fstrans_dump(int full)
1122 {
1123 const struct proclist_desc *pd;
1124 struct proc *p;
1125 struct lwp *l;
1126 struct mount *mp;
1127
1128 printf("Fstrans locks by lwp:\n");
1129 for (pd = proclists; pd->pd_list != NULL; pd++)
1130 PROCLIST_FOREACH(p, pd->pd_list)
1131 LIST_FOREACH(l, &p->p_lwps, l_sibling)
1132 fstrans_print_lwp(p, l, full == 1);
1133
1134 printf("Fstrans state by mount:\n");
1135 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1136 fstrans_print_mount(mp, full == 1);
1137 }
1138 #endif /* defined(DDB) */
1139