1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004 Poul-Henning Kamp
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/bio.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/mutex.h>
36 #include <sys/sbuf.h>
37 #include <sys/vnode.h>
38 #include <sys/mount.h>
39
40 #include <geom/geom.h>
41 #include <geom/geom_vfs.h>
42
43 /*
44 * subroutines for use by filesystems.
45 *
46 * XXX: should maybe live somewhere else ?
47 */
48 #include <sys/buf.h>
49
50 struct g_vfs_softc {
51 struct mtx sc_mtx;
52 struct bufobj *sc_bo;
53 struct g_event *sc_event;
54 int sc_active;
55 bool sc_orphaned;
56 int sc_enxio_active;
57 int sc_enxio_reported;
58 };
59
60 static struct buf_ops __g_vfs_bufops = {
61 .bop_name = "GEOM_VFS",
62 .bop_write = bufwrite,
63 .bop_strategy = g_vfs_strategy,
64 .bop_sync = bufsync,
65 .bop_bdflush = bufbdflush
66 };
67
68 struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
69
70 static g_orphan_t g_vfs_orphan;
71
72 static struct g_class g_vfs_class = {
73 .name = "VFS",
74 .version = G_VERSION,
75 .orphan = g_vfs_orphan,
76 };
77
78 DECLARE_GEOM_CLASS(g_vfs_class, g_vfs);
79
80 static void
g_vfs_destroy(void * arg,int flags __unused)81 g_vfs_destroy(void *arg, int flags __unused)
82 {
83 struct g_consumer *cp;
84
85 g_topology_assert();
86 cp = arg;
87 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
88 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
89 g_detach(cp);
90 if (cp->geom->softc == NULL)
91 g_wither_geom(cp->geom, ENXIO);
92 }
93
94 static void
g_vfs_done(struct bio * bip)95 g_vfs_done(struct bio *bip)
96 {
97 struct g_consumer *cp;
98 struct g_event *event;
99 struct g_vfs_softc *sc;
100 struct buf *bp;
101 int destroy;
102 struct mount *mp;
103 struct vnode *vp;
104 struct cdev *cdevp;
105
106 /*
107 * Collect statistics on synchronous and asynchronous read
108 * and write counts for disks that have associated filesystems.
109 */
110 bp = bip->bio_caller2;
111 vp = bp->b_vp;
112 if (vp != NULL) {
113 /*
114 * If not a disk vnode, use its associated mount point
115 * otherwise use the mountpoint associated with the disk.
116 */
117 VI_LOCK(vp);
118 if (vp->v_type != VCHR ||
119 (cdevp = vp->v_rdev) == NULL ||
120 cdevp->si_devsw == NULL ||
121 (cdevp->si_devsw->d_flags & D_DISK) == 0)
122 mp = vp->v_mount;
123 else
124 mp = cdevp->si_mountpt;
125 if (mp != NULL) {
126 if (bp->b_iocmd == BIO_READ) {
127 if (BUF_DISOWNED(bp))
128 mp->mnt_stat.f_asyncreads++;
129 else
130 mp->mnt_stat.f_syncreads++;
131 } else if (bp->b_iocmd == BIO_WRITE) {
132 if (BUF_DISOWNED(bp))
133 mp->mnt_stat.f_asyncwrites++;
134 else
135 mp->mnt_stat.f_syncwrites++;
136 }
137 }
138 VI_UNLOCK(vp);
139 }
140
141 cp = bip->bio_from;
142 sc = cp->geom->softc;
143 if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) {
144 if ((bp->b_xflags & BX_CVTENXIO) != 0) {
145 if (atomic_cmpset_int(&sc->sc_enxio_active, 0, 1))
146 printf("g_vfs_done(): %s converting all errors to ENXIO\n",
147 bip->bio_to->name);
148 }
149 if (sc->sc_enxio_active)
150 bip->bio_error = ENXIO;
151 if (bip->bio_error != ENXIO ||
152 atomic_cmpset_int(&sc->sc_enxio_reported, 0, 1)) {
153 g_print_bio("g_vfs_done():", bip, "error = %d%s",
154 bip->bio_error,
155 bip->bio_error != ENXIO ? "" :
156 " supressing further ENXIO");
157 }
158 }
159 bp->b_error = bip->bio_error;
160 bp->b_ioflags = bip->bio_flags;
161 if (bip->bio_error)
162 bp->b_ioflags |= BIO_ERROR;
163 bp->b_resid = bp->b_bcount - bip->bio_completed;
164 g_destroy_bio(bip);
165
166 mtx_lock(&sc->sc_mtx);
167 destroy = ((--sc->sc_active) == 0 && sc->sc_orphaned);
168 if (destroy) {
169 event = sc->sc_event;
170 sc->sc_event = NULL;
171 } else
172 event = NULL;
173 mtx_unlock(&sc->sc_mtx);
174 if (destroy)
175 g_post_event_ep(g_vfs_destroy, cp, event, NULL);
176
177 bufdone(bp);
178 }
179
180 void
g_vfs_strategy(struct bufobj * bo,struct buf * bp)181 g_vfs_strategy(struct bufobj *bo, struct buf *bp)
182 {
183 struct g_vfs_softc *sc;
184 struct g_consumer *cp;
185 struct bio *bip;
186
187 cp = bo->bo_private;
188 sc = cp->geom->softc;
189
190 /*
191 * If the provider has orphaned us, just return ENXIO.
192 */
193 mtx_lock(&sc->sc_mtx);
194 if (sc->sc_orphaned || sc->sc_enxio_active) {
195 mtx_unlock(&sc->sc_mtx);
196 bp->b_error = ENXIO;
197 bp->b_ioflags |= BIO_ERROR;
198 bufdone(bp);
199 return;
200 }
201 sc->sc_active++;
202 mtx_unlock(&sc->sc_mtx);
203
204 bip = g_alloc_bio();
205 bip->bio_cmd = bp->b_iocmd;
206 bip->bio_offset = bp->b_iooffset;
207 bip->bio_length = bp->b_bcount;
208 bdata2bio(bp, bip);
209 if ((bp->b_flags & B_BARRIER) != 0) {
210 bip->bio_flags |= BIO_ORDERED;
211 bp->b_flags &= ~B_BARRIER;
212 }
213 if (bp->b_iocmd == BIO_SPEEDUP)
214 bip->bio_flags |= bp->b_ioflags;
215 bip->bio_done = g_vfs_done;
216 bip->bio_caller2 = bp;
217 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
218 buf_track(bp, __func__);
219 bip->bio_track_bp = bp;
220 #endif
221 g_io_request(bip, cp);
222 }
223
224 static void
g_vfs_orphan(struct g_consumer * cp)225 g_vfs_orphan(struct g_consumer *cp)
226 {
227 struct g_geom *gp;
228 struct g_event *event;
229 struct g_vfs_softc *sc;
230 int destroy;
231
232 g_topology_assert();
233
234 gp = cp->geom;
235 g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name);
236 sc = gp->softc;
237 if (sc == NULL)
238 return;
239 event = g_alloc_event(M_WAITOK);
240 mtx_lock(&sc->sc_mtx);
241 KASSERT(sc->sc_event == NULL, ("g_vfs %p already has an event", sc));
242 sc->sc_orphaned = true;
243 destroy = (sc->sc_active == 0);
244 if (!destroy) {
245 sc->sc_event = event;
246 event = NULL;
247 }
248 mtx_unlock(&sc->sc_mtx);
249 if (destroy) {
250 g_free(event);
251 g_vfs_destroy(cp, 0);
252 }
253
254 /*
255 * Do not destroy the geom. Filesystem will do that during unmount.
256 */
257 }
258
259 int
g_vfs_open(struct vnode * vp,struct g_consumer ** cpp,const char * fsname,int wr)260 g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr)
261 {
262 struct g_geom *gp;
263 struct g_provider *pp;
264 struct g_consumer *cp;
265 struct g_vfs_softc *sc;
266 struct bufobj *bo;
267 int error;
268
269 g_topology_assert();
270
271 *cpp = NULL;
272 bo = &vp->v_bufobj;
273 if (bo->bo_private != vp)
274 return (EBUSY);
275
276 pp = g_dev_getprovider(vp->v_rdev);
277 if (pp == NULL)
278 return (ENOENT);
279 gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name);
280 sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
281 mtx_init(&sc->sc_mtx, "g_vfs", NULL, MTX_DEF);
282 sc->sc_bo = bo;
283 gp->softc = sc;
284 cp = g_new_consumer(gp);
285 error = g_attach(cp, pp);
286 if (error) {
287 g_wither_geom(gp, ENXIO);
288 return (error);
289 }
290 error = g_access(cp, 1, wr, wr);
291 if (error) {
292 g_wither_geom(gp, ENXIO);
293 return (error);
294 }
295 /*
296 * Mediasize might not be set until first access (see g_disk_access()),
297 * That's why we check it here and not earlier.
298 */
299 if (pp->mediasize == 0) {
300 (void)g_access(cp, -1, -wr, -wr);
301 g_wither_geom(gp, ENXIO);
302 return (ENXIO);
303 }
304 vnode_create_disk_vobject(vp, pp->mediasize, curthread);
305 *cpp = cp;
306 cp->private = vp;
307 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
308 bo->bo_ops = g_vfs_bufops;
309 bo->bo_private = cp;
310 bo->bo_bsize = pp->sectorsize;
311
312 return (error);
313 }
314
315 void
g_vfs_close(struct g_consumer * cp)316 g_vfs_close(struct g_consumer *cp)
317 {
318 struct g_geom *gp;
319 struct g_vfs_softc *sc;
320
321 g_topology_assert();
322
323 gp = cp->geom;
324 sc = gp->softc;
325 bufobj_invalbuf(sc->sc_bo, V_SAVE, 0, 0);
326 sc->sc_bo->bo_private = cp->private;
327 gp->softc = NULL;
328 mtx_destroy(&sc->sc_mtx);
329 if (!sc->sc_orphaned || cp->provider == NULL)
330 g_wither_geom_close(gp, ENXIO);
331 KASSERT(sc->sc_event == NULL, ("g_vfs %p event is non-NULL", sc));
332 g_free(sc);
333 }
334