xref: /freebsd/sys/geom/union/g_union.c (revision 716fd348)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/bio.h>
30 #include <sys/buf.h>
31 #include <sys/ctype.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/reboot.h>
37 #include <sys/rwlock.h>
38 #include <sys/sbuf.h>
39 #include <sys/sysctl.h>
40 
41 #include <geom/geom.h>
42 #include <geom/geom_dbg.h>
43 #include <geom/union/g_union.h>
44 
45 SYSCTL_DECL(_kern_geom);
46 static SYSCTL_NODE(_kern_geom, OID_AUTO, union, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47     "GEOM_UNION stuff");
48 static u_int g_union_debug = 0;
49 SYSCTL_UINT(_kern_geom_union, OID_AUTO, debug, CTLFLAG_RW, &g_union_debug, 0,
50     "Debug level");
51 
52 static void g_union_config(struct gctl_req *req, struct g_class *mp,
53     const char *verb);
54 static g_access_t g_union_access;
55 static g_start_t g_union_start;
56 static g_dumpconf_t g_union_dumpconf;
57 static g_orphan_t g_union_orphan;
58 static int g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
59     struct g_geom *gp);
60 static g_provgone_t g_union_providergone;
61 static g_resize_t g_union_resize;
62 
63 struct g_class g_union_class = {
64 	.name = G_UNION_CLASS_NAME,
65 	.version = G_VERSION,
66 	.ctlreq = g_union_config,
67 	.access = g_union_access,
68 	.start = g_union_start,
69 	.dumpconf = g_union_dumpconf,
70 	.orphan = g_union_orphan,
71 	.destroy_geom = g_union_destroy_geom,
72 	.providergone = g_union_providergone,
73 	.resize = g_union_resize,
74 };
75 
76 static void g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool);
77 static intmax_t g_union_fetcharg(struct gctl_req *req, const char *name);
78 static bool g_union_verify_nprefix(const char *name);
79 static void g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool);
80 static struct g_geom *g_union_find_geom(struct g_class *mp, const char *name);
81 static void g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool);
82 static void g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool);
83 static void g_union_revert(struct g_union_softc *sc);
84 static void g_union_doio(struct g_union_wip *wip);
85 static void g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool);
86 static void g_union_setmap(struct bio *bp, struct g_union_softc *sc);
87 static bool g_union_getmap(struct bio *bp, struct g_union_softc *sc,
88 	off_t *len2read);
89 static void g_union_done(struct bio *bp);
90 static void g_union_kerneldump(struct bio *bp, struct g_union_softc *sc);
91 static int g_union_dumper(void *, void *, off_t, size_t);
92 static int g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force);
93 
94 /*
95  * Operate on union-specific configuration commands.
96  */
97 static void
98 g_union_config(struct gctl_req *req, struct g_class *mp, const char *verb)
99 {
100 	uint32_t *version, *verbose;
101 
102 	g_topology_assert();
103 
104 	version = gctl_get_paraml(req, "version", sizeof(*version));
105 	if (version == NULL) {
106 		gctl_error(req, "No '%s' argument.", "version");
107 		return;
108 	}
109 	if (*version != G_UNION_VERSION) {
110 		gctl_error(req, "Userland and kernel parts are out of sync.");
111 		return;
112 	}
113 	verbose = gctl_get_paraml(req, "verbose", sizeof(*verbose));
114 	if (verbose == NULL) {
115 		gctl_error(req, "No '%s' argument.", "verbose");
116 		return;
117 	}
118 	if (strcmp(verb, "create") == 0) {
119 		g_union_ctl_create(req, mp, *verbose);
120 		return;
121 	} else if (strcmp(verb, "destroy") == 0) {
122 		g_union_ctl_destroy(req, mp, *verbose);
123 		return;
124 	} else if (strcmp(verb, "reset") == 0) {
125 		g_union_ctl_reset(req, mp, *verbose);
126 		return;
127 	} else if (strcmp(verb, "revert") == 0) {
128 		g_union_ctl_revert(req, mp, *verbose);
129 		return;
130 	} else if (strcmp(verb, "commit") == 0) {
131 		g_union_ctl_commit(req, mp, *verbose);
132 		return;
133 	}
134 
135 	gctl_error(req, "Unknown verb.");
136 }
137 
138 /*
139  * Create a union device.
140  */
141 static void
142 g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
143 {
144 	struct g_provider *upperpp, *lowerpp, *newpp;
145 	struct g_consumer *uppercp, *lowercp;
146 	struct g_union_softc *sc;
147 	struct g_geom_alias *gap;
148 	struct g_geom *gp;
149 	intmax_t offset, secsize, size, needed;
150 	const char *gunionname;
151 	int *nargs, error, i, n;
152 	char name[64];
153 
154 	g_topology_assert();
155 
156 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
157 	if (nargs == NULL) {
158 		gctl_error(req, "No '%s' argument.", "nargs");
159 		return;
160 	}
161 	if (*nargs < 2) {
162 		gctl_error(req, "Missing device(s).");
163 		return;
164 	}
165 	if (*nargs > 2) {
166 		gctl_error(req, "Extra device(s).");
167 		return;
168 	}
169 
170 	offset = g_union_fetcharg(req, "offset");
171 	size = g_union_fetcharg(req, "size");
172 	secsize = g_union_fetcharg(req, "secsize");
173 	gunionname = gctl_get_asciiparam(req, "gunionname");
174 
175 	upperpp = gctl_get_provider(req, "arg0");
176 	lowerpp = gctl_get_provider(req, "arg1");
177 	if (upperpp == NULL || lowerpp == NULL)
178 		/* error message provided by gctl_get_provider() */
179 		return;
180 	/* Create the union */
181 	if (secsize == 0)
182 		secsize = lowerpp->sectorsize;
183 	else if ((secsize % lowerpp->sectorsize) != 0) {
184 		gctl_error(req, "Sector size %jd is not a multiple of lower "
185 		    "provider %s's %jd sector size.", (intmax_t)secsize,
186 		    lowerpp->name, (intmax_t)lowerpp->sectorsize);
187 		return;
188 	}
189 	if (secsize > maxphys) {
190 		gctl_error(req, "Too big secsize %jd for lower provider %s.",
191 		    (intmax_t)secsize, lowerpp->name);
192 		return;
193 	}
194 	if (secsize % upperpp->sectorsize != 0) {
195 		gctl_error(req, "Sector size %jd is not a multiple of upper "
196 		    "provider %s's %jd sector size.", (intmax_t)secsize,
197 		    upperpp->name, (intmax_t)upperpp->sectorsize);
198 		return;
199 	}
200 	if ((offset % secsize) != 0) {
201 		gctl_error(req, "Offset %jd is not a multiple of lower "
202 		    "provider %s's %jd sector size.", (intmax_t)offset,
203 		    lowerpp->name, (intmax_t)lowerpp->sectorsize);
204 		return;
205 	}
206 	if (size == 0)
207 		size = lowerpp->mediasize - offset;
208 	else
209 		size -= offset;
210 	if ((size % secsize) != 0) {
211 		gctl_error(req, "Size %jd is not a multiple of sector size "
212 		    "%jd.", (intmax_t)size, (intmax_t)secsize);
213 		return;
214 	}
215 	if (offset + size < lowerpp->mediasize) {
216 		gctl_error(req, "Size %jd is too small for lower provider %s, "
217 		    "needs %jd.", (intmax_t)(offset + size), lowerpp->name,
218 		    lowerpp->mediasize);
219 		return;
220 	}
221 	if (size > upperpp->mediasize) {
222 		gctl_error(req, "Upper provider %s size (%jd) is too small, "
223 		    "needs %jd.", upperpp->name, (intmax_t)upperpp->mediasize,
224 		    (intmax_t)size);
225 		return;
226 	}
227 	if (gunionname != NULL && !g_union_verify_nprefix(gunionname)) {
228 		gctl_error(req, "Gunion name %s must be alphanumeric.",
229 		    gunionname);
230 		return;
231 	}
232 	if (gunionname != NULL) {
233 		n = snprintf(name, sizeof(name), "%s%s", gunionname,
234 		    G_UNION_SUFFIX);
235 	} else {
236 		n = snprintf(name, sizeof(name), "%s-%s%s", upperpp->name,
237 		    lowerpp->name, G_UNION_SUFFIX);
238 	}
239 	if (n <= 0 || n >= sizeof(name)) {
240 		gctl_error(req, "Invalid provider name.");
241 		return;
242 	}
243 	LIST_FOREACH(gp, &mp->geom, geom) {
244 		if (strcmp(gp->name, name) == 0) {
245 			gctl_error(req, "Provider %s already exists.", name);
246 			return;
247 		}
248 	}
249 	gp = g_new_geomf(mp, "%s", name);
250 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
251 	rw_init(&sc->sc_rwlock, "gunion");
252 	TAILQ_INIT(&sc->sc_wiplist);
253 	sc->sc_offset = offset;
254 	sc->sc_size = size;
255 	sc->sc_sectorsize = secsize;
256 	sc->sc_reads = 0;
257 	sc->sc_writes = 0;
258 	sc->sc_deletes = 0;
259 	sc->sc_getattrs = 0;
260 	sc->sc_flushes = 0;
261 	sc->sc_speedups = 0;
262 	sc->sc_cmd0s = 0;
263 	sc->sc_cmd1s = 0;
264 	sc->sc_cmd2s = 0;
265 	sc->sc_readbytes = 0;
266 	sc->sc_wrotebytes = 0;
267 	sc->sc_writemap_memory = 0;
268 	gp->softc = sc;
269 
270 	newpp = g_new_providerf(gp, "%s", gp->name);
271 	newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
272 	newpp->mediasize = size;
273 	newpp->sectorsize = secsize;
274 	LIST_FOREACH(gap, &upperpp->aliases, ga_next)
275 		g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
276 		    G_UNION_SUFFIX);
277 	LIST_FOREACH(gap, &lowerpp->aliases, ga_next)
278 		g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
279 		    G_UNION_SUFFIX);
280 	lowercp = g_new_consumer(gp);
281 	lowercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
282 	if ((error = g_attach(lowercp, lowerpp)) != 0) {
283 		gctl_error(req, "Error %d: cannot attach to provider %s.",
284 		    error, lowerpp->name);
285 		goto fail1;
286 	}
287 	/* request read and exclusive access for lower */
288 	if ((error = g_access(lowercp, 1, 0, 1)) != 0) {
289 		gctl_error(req, "Error %d: cannot obtain exclusive access to "
290 		    "%s.\n\tMust be unmounted or mounted read-only.", error,
291 		    lowerpp->name);
292 		goto fail2;
293 	}
294 	uppercp = g_new_consumer(gp);
295 	uppercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
296 	if ((error = g_attach(uppercp, upperpp)) != 0) {
297 		gctl_error(req, "Error %d: cannot attach to provider %s.",
298 		    error, upperpp->name);
299 		goto fail3;
300 	}
301 	/* request read, write, and exclusive access for upper */
302 	if ((error = g_access(uppercp, 1, 1, 1)) != 0) {
303 		gctl_error(req, "Error %d: cannot obtain write access to %s.",
304 		    error, upperpp->name);
305 		goto fail4;
306 	}
307 	sc->sc_uppercp = uppercp;
308 	sc->sc_lowercp = lowercp;
309 
310 	newpp->flags |= (upperpp->flags & G_PF_ACCEPT_UNMAPPED) &
311 	    (lowerpp->flags & G_PF_ACCEPT_UNMAPPED);
312 	g_error_provider(newpp, 0);
313 	/*
314 	 * Allocate the map that tracks the sectors that have been written
315 	 * to the top layer. We use a 2-level hierarchy as that lets us
316 	 * map up to 1 petabyte using allocations of less than 33 Mb
317 	 * when using 4K byte sectors (or 268 Mb with 512 byte sectors).
318 	 *
319 	 * We totally populate the leaf nodes rather than allocating them
320 	 * as they are first used because their usage occurs in the
321 	 * g_union_start() routine that may be running in the g_down
322 	 * thread which cannot sleep.
323 	 */
324 	sc->sc_map_size = roundup(size / secsize, BITS_PER_ENTRY);
325 	needed = sc->sc_map_size / BITS_PER_ENTRY;
326 	for (sc->sc_root_size = 1;
327 	     sc->sc_root_size * sc->sc_root_size < needed;
328 	     sc->sc_root_size++)
329 		continue;
330 	sc->sc_writemap_root = g_malloc(sc->sc_root_size * sizeof(uint64_t *),
331 	    M_WAITOK | M_ZERO);
332 	sc->sc_leaf_size = sc->sc_root_size;
333 	sc->sc_bits_per_leaf = sc->sc_leaf_size * BITS_PER_ENTRY;
334 	sc->sc_leafused = g_malloc(roundup(sc->sc_root_size, BITS_PER_ENTRY),
335 	    M_WAITOK | M_ZERO);
336 	for (i = 0; i < sc->sc_root_size; i++)
337 		sc->sc_writemap_root[i] =
338 		    g_malloc(sc->sc_leaf_size * sizeof(uint64_t),
339 		    M_WAITOK | M_ZERO);
340 	sc->sc_writemap_memory =
341 	    (sc->sc_root_size + sc->sc_root_size * sc->sc_leaf_size) *
342 	    sizeof(uint64_t) + roundup(sc->sc_root_size, BITS_PER_ENTRY);
343 	if (verbose)
344 		gctl_error(req, "Device %s created with memory map size %jd.",
345 		    gp->name, (intmax_t)sc->sc_writemap_memory);
346 	G_UNION_DEBUG(1, "Device %s created with memory map size %jd.",
347 	    gp->name, (intmax_t)sc->sc_writemap_memory);
348 	return;
349 
350 fail4:
351 	g_detach(uppercp);
352 fail3:
353 	g_destroy_consumer(uppercp);
354 	g_access(lowercp, -1, 0, -1);
355 fail2:
356 	g_detach(lowercp);
357 fail1:
358 	g_destroy_consumer(lowercp);
359 	g_destroy_provider(newpp);
360 	g_destroy_geom(gp);
361 }
362 
363 /*
364  * Fetch named option and verify that it is positive.
365  */
366 static intmax_t
367 g_union_fetcharg(struct gctl_req *req, const char *name)
368 {
369 	intmax_t *val;
370 
371 	val = gctl_get_paraml_opt(req, name, sizeof(*val));
372 	if (val == NULL)
373 		return (0);
374 	if (*val >= 0)
375 		return (*val);
376 	gctl_error(req, "Invalid '%s': negative value, using default.", name);
377 	return (0);
378 }
379 
380 /*
381  * Verify that a name is alphanumeric.
382  */
383 static bool
384 g_union_verify_nprefix(const char *name)
385 {
386 	int i;
387 
388 	for (i = 0; i < strlen(name); i++) {
389 		if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) {
390 			return (false);
391 		}
392 	}
393 	return (true);
394 }
395 
396 /*
397  * Destroy a union device.
398  */
399 static void
400 g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool verbose)
401 {
402 	int *nargs, *force, error, i;
403 	struct g_geom *gp;
404 	const char *name;
405 	char param[16];
406 
407 	g_topology_assert();
408 
409 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
410 	if (nargs == NULL) {
411 		gctl_error(req, "No '%s' argument.", "nargs");
412 		return;
413 	}
414 	if (*nargs <= 0) {
415 		gctl_error(req, "Missing device(s).");
416 		return;
417 	}
418 	force = gctl_get_paraml(req, "force", sizeof(*force));
419 	if (force == NULL) {
420 		gctl_error(req, "No 'force' argument.");
421 		return;
422 	}
423 
424 	for (i = 0; i < *nargs; i++) {
425 		snprintf(param, sizeof(param), "arg%d", i);
426 		name = gctl_get_asciiparam(req, param);
427 		if (name == NULL) {
428 			gctl_msg(req, "No '%s' argument.", param);
429 			continue;
430 		}
431 		if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
432 			name += strlen(_PATH_DEV);
433 		gp = g_union_find_geom(mp, name);
434 		if (gp == NULL) {
435 			gctl_msg(req, "Device %s is invalid.", name);
436 			continue;
437 		}
438 		error = g_union_destroy(verbose ? req : NULL, gp, *force);
439 		if (error != 0)
440 			gctl_msg(req, "Error %d: cannot destroy device %s.",
441 			    error, gp->name);
442 	}
443 	gctl_post_messages(req);
444 }
445 
446 /*
447  * Find a union geom.
448  */
449 static struct g_geom *
450 g_union_find_geom(struct g_class *mp, const char *name)
451 {
452 	struct g_geom *gp;
453 
454 	LIST_FOREACH(gp, &mp->geom, geom) {
455 		if (strcmp(gp->name, name) == 0)
456 			return (gp);
457 	}
458 	return (NULL);
459 }
460 
461 /*
462  * Zero out all the statistics associated with a union device.
463  */
464 static void
465 g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool verbose)
466 {
467 	struct g_union_softc *sc;
468 	struct g_provider *pp;
469 	struct g_geom *gp;
470 	char param[16];
471 	int i, *nargs;
472 
473 	g_topology_assert();
474 
475 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
476 	if (nargs == NULL) {
477 		gctl_error(req, "No '%s' argument.", "nargs");
478 		return;
479 	}
480 	if (*nargs <= 0) {
481 		gctl_error(req, "Missing device(s).");
482 		return;
483 	}
484 
485 	for (i = 0; i < *nargs; i++) {
486 		snprintf(param, sizeof(param), "arg%d", i);
487 		pp = gctl_get_provider(req, param);
488 		if (pp == NULL) {
489 			gctl_msg(req, "No '%s' argument.", param);
490 			continue;
491 		}
492 		gp = pp->geom;
493 		if (gp->class != mp) {
494 			gctl_msg(req, "Provider %s is invalid.",
495 			    pp->name);
496 			continue;
497 		}
498 		sc = gp->softc;
499 		sc->sc_reads = 0;
500 		sc->sc_writes = 0;
501 		sc->sc_deletes = 0;
502 		sc->sc_getattrs = 0;
503 		sc->sc_flushes = 0;
504 		sc->sc_speedups = 0;
505 		sc->sc_cmd0s = 0;
506 		sc->sc_cmd1s = 0;
507 		sc->sc_cmd2s = 0;
508 		sc->sc_readbytes = 0;
509 		sc->sc_wrotebytes = 0;
510 		if (verbose)
511 			gctl_msg(req, "Device %s has been reset.", pp->name);
512 		G_UNION_DEBUG(1, "Device %s has been reset.", pp->name);
513 	}
514 	gctl_post_messages(req);
515 }
516 
517 /*
518  * Revert all write requests made to the top layer of the union.
519  */
520 static void
521 g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool verbose)
522 {
523 	struct g_union_softc *sc;
524 	struct g_provider *pp;
525 	struct g_geom *gp;
526 	char param[16];
527 	int i, *nargs;
528 
529 	g_topology_assert();
530 
531 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
532 	if (nargs == NULL) {
533 		gctl_error(req, "No '%s' argument.", "nargs");
534 		return;
535 	}
536 	if (*nargs <= 0) {
537 		gctl_error(req, "Missing device(s).");
538 		return;
539 	}
540 
541 	for (i = 0; i < *nargs; i++) {
542 		snprintf(param, sizeof(param), "arg%d", i);
543 		pp = gctl_get_provider(req, param);
544 		if (pp == NULL) {
545 			gctl_msg(req, "No '%s' argument.", param);
546 			continue;
547 		}
548 		gp = pp->geom;
549 		if (gp->class != mp) {
550 			gctl_msg(req, "Provider %s is invalid.", pp->name);
551 			continue;
552 		}
553 		sc = gp->softc;
554 		if (g_union_get_writelock(sc) != 0) {
555 			gctl_msg(req, "Revert already in progress for "
556 			    "provider %s.", pp->name);
557 			continue;
558 		}
559 		/*
560 		 * No mount or other use of union is allowed.
561 		 */
562 		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) {
563 			gctl_msg(req, "Unable to get exclusive access for "
564 			    "reverting of %s;\n\t%s cannot be mounted or "
565 			    "otherwise open during a revert.",
566 			     pp->name, pp->name);
567 			g_union_rel_writelock(sc);
568 			continue;
569 		}
570 		g_union_revert(sc);
571 		g_union_rel_writelock(sc);
572 		if (verbose)
573 			gctl_msg(req, "Device %s has been reverted.", pp->name);
574 		G_UNION_DEBUG(1, "Device %s has been reverted.", pp->name);
575 	}
576 	gctl_post_messages(req);
577 }
578 
579 /*
580  * Revert union writes by zero'ing out the writemap.
581  */
582 static void
583 g_union_revert(struct g_union_softc *sc)
584 {
585 	int i;
586 
587 	G_WLOCK(sc);
588 	for (i = 0; i < sc->sc_root_size; i++)
589 		memset(sc->sc_writemap_root[i], 0,
590 		    sc->sc_leaf_size * sizeof(uint64_t));
591 	memset(sc->sc_leafused, 0, roundup(sc->sc_root_size, BITS_PER_ENTRY));
592 	G_WUNLOCK(sc);
593 }
594 
595 /*
596  * Commit all the writes made in the top layer to the lower layer.
597  */
598 static void
599 g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool verbose)
600 {
601 	struct g_union_softc *sc;
602 	struct g_provider *pp, *lowerpp;
603 	struct g_consumer *lowercp;
604 	struct g_geom *gp;
605 	struct bio *bp;
606 	char param[16];
607 	off_t len2rd, len2wt, savelen;
608 	int i, error, error1, *nargs, *force, *reboot;
609 
610 	g_topology_assert();
611 
612 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
613 	if (nargs == NULL) {
614 		gctl_error(req, "No '%s' argument.", "nargs");
615 		return;
616 	}
617 	if (*nargs <= 0) {
618 		gctl_error(req, "Missing device(s).");
619 		return;
620 	}
621 	force = gctl_get_paraml(req, "force", sizeof(*force));
622 	if (force == NULL) {
623 		gctl_error(req, "No 'force' argument.");
624 		return;
625 	}
626 	reboot = gctl_get_paraml(req, "reboot", sizeof(*reboot));
627 	if (reboot == NULL) {
628 		gctl_error(req, "No 'reboot' argument.");
629 		return;
630 	}
631 
632 	/* Get a bio buffer to do our I/O */
633 	bp = g_alloc_bio();
634 	bp->bio_data = g_malloc(MAXBSIZE, M_WAITOK);
635 	bp->bio_done = biodone;
636 	for (i = 0; i < *nargs; i++) {
637 		snprintf(param, sizeof(param), "arg%d", i);
638 		pp = gctl_get_provider(req, param);
639 		if (pp == NULL) {
640 			gctl_msg(req, "No '%s' argument.", param);
641 			continue;
642 		}
643 		gp = pp->geom;
644 		if (gp->class != mp) {
645 			gctl_msg(req, "Provider %s is invalid.", pp->name);
646 			continue;
647 		}
648 		sc = gp->softc;
649 		if (g_union_get_writelock(sc) != 0) {
650 			gctl_msg(req, "Commit already in progress for "
651 			    "provider %s.", pp->name);
652 			continue;
653 		}
654 
655 		/* upgrade to write access for lower */
656 		lowercp = sc->sc_lowercp;
657 		lowerpp = lowercp->provider;
658 		/*
659 		 * No mount or other use of union is allowed, unless the
660 		 * -f flag is given which allows read-only mount or usage.
661 		 */
662 		if ((*force == false && pp->acr > 0) || pp->acw > 0 ||
663 		     pp->ace > 0) {
664 			gctl_msg(req, "Unable to get exclusive access for "
665 			    "writing of %s.\n\tNote that %s cannot be mounted "
666 			    "or otherwise\n\topen during a commit unless the "
667 			    "-f flag is used.", pp->name, pp->name);
668 			g_union_rel_writelock(sc);
669 			continue;
670 		}
671 		/*
672 		 * No mount or other use of lower media is allowed, unless the
673 		 * -f flag is given which allows read-only mount or usage.
674 		 */
675 		if ((*force == false && lowerpp->acr > lowercp->acr) ||
676 		     lowerpp->acw > lowercp->acw ||
677 		     lowerpp->ace > lowercp->ace) {
678 			gctl_msg(req, "provider %s is unable to get "
679 			    "exclusive access to %s\n\tfor writing. Note that "
680 			    "%s cannot be mounted or otherwise open\n\tduring "
681 			    "a commit unless the -f flag is used.", pp->name,
682 			    lowerpp->name, lowerpp->name);
683 			g_union_rel_writelock(sc);
684 			continue;
685 		}
686 		if ((error = g_access(lowercp, 0, 1, 0)) != 0) {
687 			gctl_msg(req, "Error %d: provider %s is unable to "
688 			    "access %s for writing.", error, pp->name,
689 			    lowerpp->name);
690 			g_union_rel_writelock(sc);
691 			continue;
692 		}
693 		g_topology_unlock();
694 		/* Loop over write map copying across written blocks */
695 		bp->bio_offset = 0;
696 		bp->bio_length = sc->sc_map_size * sc->sc_sectorsize;
697 		G_RLOCK(sc);
698 		error = 0;
699 		while (bp->bio_length > 0) {
700 			if (!g_union_getmap(bp, sc, &len2rd)) {
701 				/* not written, so skip */
702 				bp->bio_offset += len2rd;
703 				bp->bio_length -= len2rd;
704 				continue;
705 			}
706 			G_RUNLOCK(sc);
707 			/* need to read then write len2rd sectors */
708 			for ( ; len2rd > 0; len2rd -= len2wt) {
709 				/* limit ourselves to MAXBSIZE size I/Os */
710 				len2wt = len2rd;
711 				if (len2wt > MAXBSIZE)
712 					len2wt = MAXBSIZE;
713 				savelen = bp->bio_length;
714 				bp->bio_length = len2wt;
715 				bp->bio_cmd = BIO_READ;
716 				g_io_request(bp, sc->sc_uppercp);
717 				if ((error = biowait(bp, "rdunion")) != 0) {
718 					gctl_msg(req, "Commit read error %d "
719 					    "in provider %s, commit aborted.",
720 					    error, pp->name);
721 					goto cleanup;
722 				}
723 				bp->bio_flags &= ~BIO_DONE;
724 				bp->bio_cmd = BIO_WRITE;
725 				g_io_request(bp, lowercp);
726 				if ((error = biowait(bp, "wtunion")) != 0) {
727 					gctl_msg(req, "Commit write error %d "
728 					    "in provider %s, commit aborted.",
729 					    error, pp->name);
730 					goto cleanup;
731 				}
732 				bp->bio_flags &= ~BIO_DONE;
733 				bp->bio_offset += len2wt;
734 				bp->bio_length = savelen - len2wt;
735 			}
736 			G_RLOCK(sc);
737 		}
738 		G_RUNLOCK(sc);
739 		/* clear the write map */
740 		g_union_revert(sc);
741 cleanup:
742 		g_topology_lock();
743 		/* return lower to previous access */
744 		if ((error1 = g_access(lowercp, 0, -1, 0)) != 0) {
745 			G_UNION_DEBUG(2, "Error %d: device %s could not reset "
746 			    "access to %s (r=0 w=-1 e=0).", error1, pp->name,
747 			    lowerpp->name);
748 		}
749 		g_union_rel_writelock(sc);
750 		if (error == 0 && verbose)
751 			gctl_msg(req, "Device %s has been committed.",
752 			    pp->name);
753 		G_UNION_DEBUG(1, "Device %s has been committed.", pp->name);
754 	}
755 	gctl_post_messages(req);
756 	g_free(bp->bio_data);
757 	g_destroy_bio(bp);
758 	if (*reboot)
759 		kern_reboot(RB_AUTOBOOT);
760 }
761 
762 /*
763  * Generally allow access unless a commit is in progress.
764  */
765 static int
766 g_union_access(struct g_provider *pp, int r, int w, int e)
767 {
768 	struct g_union_softc *sc;
769 
770 	sc = pp->geom->softc;
771 	if (sc == NULL) {
772 		if (r <= 0 && w <= 0 && e <= 0)
773 			return (0);
774 		return (ENXIO);
775 	}
776 	r += pp->acr;
777 	w += pp->acw;
778 	e += pp->ace;
779 	if (g_union_get_writelock(sc) != 0) {
780 		if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0)
781 			return (0);
782 		return (EBUSY);
783 	}
784 	g_union_rel_writelock(sc);
785 	return (0);
786 }
787 
788 /*
789  * Initiate an I/O operation on the union device.
790  */
791 static void
792 g_union_start(struct bio *bp)
793 {
794 	struct g_union_softc *sc;
795 	struct g_union_wip *wip;
796 	struct bio *cbp;
797 
798 	sc = bp->bio_to->geom->softc;
799 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
800 		wip = g_malloc(sizeof(*wip), M_NOWAIT);
801 		if (wip == NULL) {
802 			g_io_deliver(bp, ENOMEM);
803 			return;
804 		}
805 		TAILQ_INIT(&wip->wip_waiting);
806 		wip->wip_bp = bp;
807 		wip->wip_sc = sc;
808 		wip->wip_start = bp->bio_offset + sc->sc_offset;
809 		wip->wip_end = wip->wip_start + bp->bio_length - 1;
810 		wip->wip_numios = 1;
811 		wip->wip_error = 0;
812 		g_union_doio(wip);
813 		return;
814 	}
815 
816 	/*
817 	 * All commands other than read and write are passed through to
818 	 * the upper-level device since it is writable and thus able to
819 	 * respond to delete, flush, and speedup requests.
820 	 */
821 	cbp = g_clone_bio(bp);
822 	if (cbp == NULL) {
823 		g_io_deliver(bp, ENOMEM);
824 		return;
825 	}
826 	cbp->bio_offset = bp->bio_offset + sc->sc_offset;
827 	cbp->bio_done = g_std_done;
828 
829 	switch (cbp->bio_cmd) {
830 	case BIO_DELETE:
831 		G_UNION_LOGREQ(cbp, "Delete request received.");
832 		atomic_add_long(&sc->sc_deletes, 1);
833 		break;
834 	case BIO_GETATTR:
835 		G_UNION_LOGREQ(cbp, "Getattr request received.");
836 		atomic_add_long(&sc->sc_getattrs, 1);
837 		if (strcmp(cbp->bio_attribute, "GEOM::kerneldump") != 0)
838 			/* forward the GETATTR to the lower-level device */
839 			break;
840 		g_union_kerneldump(bp, sc);
841 		return;
842 	case BIO_FLUSH:
843 		G_UNION_LOGREQ(cbp, "Flush request received.");
844 		atomic_add_long(&sc->sc_flushes, 1);
845 		break;
846 	case BIO_SPEEDUP:
847 		G_UNION_LOGREQ(cbp, "Speedup request received.");
848 		atomic_add_long(&sc->sc_speedups, 1);
849 		break;
850 	case BIO_CMD0:
851 		G_UNION_LOGREQ(cbp, "Cmd0 request received.");
852 		atomic_add_long(&sc->sc_cmd0s, 1);
853 		break;
854 	case BIO_CMD1:
855 		G_UNION_LOGREQ(cbp, "Cmd1 request received.");
856 		atomic_add_long(&sc->sc_cmd1s, 1);
857 		break;
858 	case BIO_CMD2:
859 		G_UNION_LOGREQ(cbp, "Cmd2 request received.");
860 		atomic_add_long(&sc->sc_cmd2s, 1);
861 		break;
862 	default:
863 		G_UNION_LOGREQ(cbp, "Unknown (%d) request received.",
864 		    cbp->bio_cmd);
865 		break;
866 	}
867 	g_io_request(cbp, sc->sc_uppercp);
868 }
869 
870 /*
871  * Initiate a read or write operation on the union device.
872  */
873 static void
874 g_union_doio(struct g_union_wip *wip)
875 {
876 	struct g_union_softc *sc;
877 	struct g_consumer *cp, *firstcp;
878 	struct g_union_wip *activewip;
879 	struct bio *cbp, *firstbp;
880 	off_t rdlen, len2rd, offset;
881 	int iocnt, needstoblock;
882 	char *level;
883 
884 	/*
885 	 * To maintain consistency, we cannot allow concurrent reads
886 	 * or writes to the same block.
887 	 *
888 	 * A work-in-progress (wip) structure is allocated for each
889 	 * read or write request. All active requests are kept on the
890 	 * softc sc_wiplist. As each request arrives, it is checked to
891 	 * see if it overlaps any of the active entries. If it does not
892 	 * overlap, then it is added to the active list and initiated.
893 	 * If it does overlap an active entry, it is added to the
894 	 * wip_waiting list for the active entry that it overlaps.
895 	 * When an active entry completes, it restarts all the requests
896 	 * on its wip_waiting list.
897 	 */
898 	sc = wip->wip_sc;
899 	G_WLOCK(sc);
900 	TAILQ_FOREACH(activewip, &sc->sc_wiplist, wip_next) {
901 		if (wip->wip_end < activewip->wip_start ||
902 		    wip->wip_start > activewip->wip_end)
903 			continue;
904 		needstoblock = 1;
905 		if (wip->wip_bp->bio_cmd == BIO_WRITE)
906 			if (activewip->wip_bp->bio_cmd == BIO_WRITE)
907 				sc->sc_writeblockwrite += 1;
908 			else
909 				sc->sc_readblockwrite += 1;
910 		else
911 			if (activewip->wip_bp->bio_cmd == BIO_WRITE)
912 				sc->sc_writeblockread += 1;
913 			else {
914 				sc->sc_readcurrentread += 1;
915 				needstoblock = 0;
916 			}
917 		/* Put request on a waiting list if necessary */
918 		if (needstoblock) {
919 			TAILQ_INSERT_TAIL(&activewip->wip_waiting, wip,
920 			    wip_next);
921 			G_WUNLOCK(sc);
922 			return;
923 		}
924 	}
925 	/* Put request on the active list */
926 	TAILQ_INSERT_TAIL(&sc->sc_wiplist, wip, wip_next);
927 
928 	/*
929 	 * Process I/O requests that have been cleared to go.
930 	 */
931 	cbp = g_clone_bio(wip->wip_bp);
932 	if (cbp == NULL) {
933 		TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
934 		G_WUNLOCK(sc);
935 		KASSERT(TAILQ_FIRST(&wip->wip_waiting) == NULL,
936 		    ("g_union_doio: non-empty work-in-progress waiting queue"));
937 		g_io_deliver(wip->wip_bp, ENOMEM);
938 		g_free(wip);
939 		return;
940 	}
941 	G_WUNLOCK(sc);
942 	cbp->bio_caller1 = wip;
943 	cbp->bio_done = g_union_done;
944 	cbp->bio_offset = wip->wip_start;
945 
946 	/*
947 	 * Writes are always done to the top level. The blocks that
948 	 * are written are recorded in the bitmap when the I/O completes.
949 	 */
950 	if (cbp->bio_cmd == BIO_WRITE) {
951 		G_UNION_LOGREQ(cbp, "Sending %jd byte write request to upper "
952 		    "level.", cbp->bio_length);
953 		atomic_add_long(&sc->sc_writes, 1);
954 		atomic_add_long(&sc->sc_wrotebytes, cbp->bio_length);
955 		g_io_request(cbp, sc->sc_uppercp);
956 		return;
957 	}
958 	/*
959 	 * The usual read case is that we either read the top layer
960 	 * if the block has been previously written or the bottom layer
961 	 * if it has not been written. However, it is possible that
962 	 * only part of the block has been written, For example we may
963 	 * have written a UFS/FFS file fragment comprising several
964 	 * sectors out of an 8-sector block.  Here, if the entire
965 	 * 8-sector block is read for example by a snapshot needing
966 	 * to copy the full block, then we need to read the written
967 	 * sectors from the upper level and the unwritten sectors from
968 	 * the lower level. We do this by alternately reading from the
969 	 * top and bottom layers until we complete the read. We
970 	 * simplify for the common case to just do the I/O and return.
971 	 */
972 	atomic_add_long(&sc->sc_reads, 1);
973 	atomic_add_long(&sc->sc_readbytes, cbp->bio_length);
974 	rdlen = cbp->bio_length;
975 	offset = 0;
976 	for (iocnt = 0; ; iocnt++) {
977 		if (g_union_getmap(cbp, sc, &len2rd)) {
978 			/* read top */
979 			cp = sc->sc_uppercp;
980 			level = "upper";
981 		} else {
982 			/* read bottom */
983 			cp = sc->sc_lowercp;
984 			level = "lower";
985 		}
986 		/* Check if only a single read is required */
987 		if (iocnt == 0 && rdlen == len2rd) {
988 			G_UNION_LOGREQLVL((cp == sc->sc_uppercp) ?
989 			    3 : 4, cbp, "Sending %jd byte read "
990 			    "request to %s level.", len2rd, level);
991 			g_io_request(cbp, cp);
992 			return;
993 		}
994 		cbp->bio_length = len2rd;
995 		if ((cbp->bio_flags & BIO_UNMAPPED) != 0)
996 			cbp->bio_ma_offset += offset;
997 		else
998 			cbp->bio_data += offset;
999 		offset += len2rd;
1000 		rdlen -= len2rd;
1001 		G_UNION_LOGREQLVL(3, cbp, "Sending %jd byte read "
1002 		    "request to %s level.", len2rd, level);
1003 		/*
1004 		 * To avoid prematurely notifying our consumer
1005 		 * that their I/O has completed, we have to delay
1006 		 * issuing our first I/O request until we have
1007 		 * issued all the additional I/O requests.
1008 		 */
1009 		if (iocnt > 0) {
1010 			atomic_add_long(&wip->wip_numios, 1);
1011 			g_io_request(cbp, cp);
1012 		} else {
1013 			firstbp = cbp;
1014 			firstcp = cp;
1015 		}
1016 		if (rdlen == 0)
1017 			break;
1018 		/* set up for next read */
1019 		cbp = g_clone_bio(wip->wip_bp);
1020 		if (cbp == NULL) {
1021 			wip->wip_error = ENOMEM;
1022 			atomic_add_long(&wip->wip_numios, -1);
1023 			break;
1024 		}
1025 		cbp->bio_caller1 = wip;
1026 		cbp->bio_done = g_union_done;
1027 		cbp->bio_offset += offset;
1028 		cbp->bio_length = rdlen;
1029 		atomic_add_long(&sc->sc_reads, 1);
1030 	}
1031 	/* We have issued all our I/O, so start the first one */
1032 	g_io_request(firstbp, firstcp);
1033 	return;
1034 }
1035 
1036 /*
1037  * Used when completing a union I/O operation.
1038  */
1039 static void
1040 g_union_done(struct bio *bp)
1041 {
1042 	struct g_union_wip *wip, *waitingwip;
1043 	struct g_union_softc *sc;
1044 
1045 	wip = bp->bio_caller1;
1046 	if (wip->wip_error != 0 && bp->bio_error == 0)
1047 		bp->bio_error = wip->wip_error;
1048 	wip->wip_error = 0;
1049 	if (atomic_fetchadd_long(&wip->wip_numios, -1) == 1) {
1050 		sc = wip->wip_sc;
1051 		G_WLOCK(sc);
1052 		if (bp->bio_cmd == BIO_WRITE)
1053 			g_union_setmap(bp, sc);
1054 		TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
1055 		G_WUNLOCK(sc);
1056 		while ((waitingwip = TAILQ_FIRST(&wip->wip_waiting)) != NULL) {
1057 			TAILQ_REMOVE(&wip->wip_waiting, waitingwip, wip_next);
1058 			g_union_doio(waitingwip);
1059 		}
1060 		g_free(wip);
1061 	}
1062 	g_std_done(bp);
1063 }
1064 
1065 /*
1066  * Record blocks that have been written in the map.
1067  */
1068 static void
1069 g_union_setmap(struct bio *bp, struct g_union_softc *sc)
1070 {
1071 	size_t root_idx;
1072 	uint64_t **leaf;
1073 	uint64_t *wordp;
1074 	off_t start, numsec;
1075 
1076 	G_WLOCKOWNED(sc);
1077 	KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1078 	    ("g_union_setmap: offset not on sector boundry"));
1079 	KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1080 	    ("g_union_setmap: length not a multiple of sectors"));
1081 	start = bp->bio_offset / sc->sc_sectorsize;
1082 	numsec = bp->bio_length / sc->sc_sectorsize;
1083 	KASSERT(start + numsec <= sc->sc_map_size,
1084 	    ("g_union_setmap: block %jd is out of range", start + numsec));
1085 	for ( ; numsec > 0; numsec--, start++) {
1086 		root_idx = start / sc->sc_bits_per_leaf;
1087 		leaf = &sc->sc_writemap_root[root_idx];
1088 		wordp = &(*leaf)
1089 		    [(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1090 		*wordp |= 1ULL << (start % BITS_PER_ENTRY);
1091 		sc->sc_leafused[root_idx / BITS_PER_ENTRY] |=
1092 		    1ULL << (root_idx % BITS_PER_ENTRY);
1093 	}
1094 }
1095 
1096 /*
1097  * Check map to determine whether blocks have been written.
1098  *
1099  * Return true if they have been written so should be read from the top
1100  * layer. Return false if they have not been written so should be read
1101  * from the bottom layer. Return in len2read the bytes to be read. See
1102  * the comment above the BIO_READ implementation in g_union_start() for
1103  * an explantion of why len2read may be shorter than the buffer length.
1104  */
1105 static bool
1106 g_union_getmap(struct bio *bp, struct g_union_softc *sc, off_t *len2read)
1107 {
1108 	off_t start, numsec, leafresid, bitloc;
1109 	bool first, maptype, retval;
1110 	uint64_t *leaf, word;
1111 	size_t root_idx;
1112 
1113 	KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1114 	    ("g_union_getmap: offset not on sector boundry"));
1115 	KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1116 	    ("g_union_getmap: length not a multiple of sectors"));
1117 	start = bp->bio_offset / sc->sc_sectorsize;
1118 	numsec = bp->bio_length / sc->sc_sectorsize;
1119 	G_UNION_DEBUG(4, "g_union_getmap: check %jd sectors starting at %jd\n",
1120 	    numsec, start);
1121 	KASSERT(start + numsec <= sc->sc_map_size,
1122 	    ("g_union_getmap: block %jd is out of range", start + numsec));
1123 		root_idx = start / sc->sc_bits_per_leaf;
1124 	first = true;
1125 	maptype = false;
1126 	while (numsec > 0) {
1127 		/* Check first if the leaf records any written sectors */
1128 		root_idx = start / sc->sc_bits_per_leaf;
1129 		leafresid = sc->sc_bits_per_leaf -
1130 		    (start % sc->sc_bits_per_leaf);
1131 		if (((sc->sc_leafused[root_idx / BITS_PER_ENTRY]) &
1132 		    (1ULL << (root_idx % BITS_PER_ENTRY))) == 0) {
1133 			if (first) {
1134 				maptype = false;
1135 				first = false;
1136 			}
1137 			if (maptype)
1138 				break;
1139 			numsec -= leafresid;
1140 			start += leafresid;
1141 			continue;
1142 		}
1143 		/* Check up to a word boundry, then check word by word */
1144 		leaf = sc->sc_writemap_root[root_idx];
1145 		word = leaf[(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1146 		bitloc = start % BITS_PER_ENTRY;
1147 		if (bitloc == 0 && (word == 0 || word == ~0)) {
1148 			if (first) {
1149 				if (word == 0)
1150 					maptype = false;
1151 				else
1152 					maptype = true;
1153 				first = false;
1154 			}
1155 			if ((word == 0 && maptype) ||
1156 			    (word == ~0 && !maptype))
1157 				break;
1158 			numsec -= BITS_PER_ENTRY;
1159 			start += BITS_PER_ENTRY;
1160 			continue;
1161 		}
1162 		for ( ; bitloc < BITS_PER_ENTRY; bitloc ++) {
1163 			retval = (word & (1ULL << bitloc)) != 0;
1164 			if (first) {
1165 				maptype = retval;
1166 				first = false;
1167 			}
1168 			if (maptype == retval) {
1169 				numsec--;
1170 				start++;
1171 				continue;
1172 			}
1173 			goto out;
1174 		}
1175 	}
1176 out:
1177 	if (numsec < 0) {
1178 		start += numsec;
1179 		numsec = 0;
1180 	}
1181 	*len2read = bp->bio_length - (numsec * sc->sc_sectorsize);
1182 	G_UNION_DEBUG(maptype ? 3 : 4,
1183 	    "g_union_getmap: return maptype %swritten for %jd "
1184 	    "sectors ending at %jd\n", maptype ? "" : "NOT ",
1185 	    *len2read / sc->sc_sectorsize, start - 1);
1186 	return (maptype);
1187 }
1188 
1189 /*
1190  * Fill in details for a BIO_GETATTR request.
1191  */
1192 static void
1193 g_union_kerneldump(struct bio *bp, struct g_union_softc *sc)
1194 {
1195 	struct g_kerneldump *gkd;
1196 	struct g_geom *gp;
1197 	struct g_provider *pp;
1198 
1199 	gkd = (struct g_kerneldump *)bp->bio_data;
1200 	gp = bp->bio_to->geom;
1201 	g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name,
1202 	    (intmax_t)gkd->offset, (intmax_t)gkd->length);
1203 
1204 	pp = LIST_FIRST(&gp->provider);
1205 
1206 	gkd->di.dumper = g_union_dumper;
1207 	gkd->di.priv = sc;
1208 	gkd->di.blocksize = pp->sectorsize;
1209 	gkd->di.maxiosize = DFLTPHYS;
1210 	gkd->di.mediaoffset = sc->sc_offset + gkd->offset;
1211 	if (gkd->offset > sc->sc_size) {
1212 		g_io_deliver(bp, ENODEV);
1213 		return;
1214 	}
1215 	if (gkd->offset + gkd->length > sc->sc_size)
1216 		gkd->length = sc->sc_size - gkd->offset;
1217 	gkd->di.mediasize = gkd->length;
1218 	g_io_deliver(bp, 0);
1219 }
1220 
1221 /*
1222  * Handler for g_union_kerneldump().
1223  */
1224 static int
1225 g_union_dumper(void *priv, void *virtual, off_t offset, size_t length)
1226 {
1227 
1228 	return (0);
1229 }
1230 
1231 /*
1232  * List union statistics.
1233  */
1234 static void
1235 g_union_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1236     struct g_consumer *cp, struct g_provider *pp)
1237 {
1238 	struct g_union_softc *sc;
1239 
1240 	if (pp != NULL || cp != NULL || gp->softc == NULL)
1241 		return;
1242 	sc = gp->softc;
1243 	sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent,
1244 	    (uintmax_t)sc->sc_reads);
1245 	sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent,
1246 	    (uintmax_t)sc->sc_writes);
1247 	sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent,
1248 	    (uintmax_t)sc->sc_deletes);
1249 	sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent,
1250 	    (uintmax_t)sc->sc_getattrs);
1251 	sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent,
1252 	    (uintmax_t)sc->sc_flushes);
1253 	sbuf_printf(sb, "%s<Speedups>%ju</Speedups>\n", indent,
1254 	    (uintmax_t)sc->sc_speedups);
1255 	sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent,
1256 	    (uintmax_t)sc->sc_cmd0s);
1257 	sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent,
1258 	    (uintmax_t)sc->sc_cmd1s);
1259 	sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent,
1260 	    (uintmax_t)sc->sc_cmd2s);
1261 	sbuf_printf(sb, "%s<ReadCurrentRead>%ju</ReadCurrentRead>\n", indent,
1262 	    (uintmax_t)sc->sc_readcurrentread);
1263 	sbuf_printf(sb, "%s<ReadBlockWrite>%ju</ReadBlockWrite>\n", indent,
1264 	    (uintmax_t)sc->sc_readblockwrite);
1265 	sbuf_printf(sb, "%s<WriteBlockRead>%ju</WriteBlockRead>\n", indent,
1266 	    (uintmax_t)sc->sc_writeblockread);
1267 	sbuf_printf(sb, "%s<WriteBlockWrite>%ju</WriteBlockWrite>\n", indent,
1268 	    (uintmax_t)sc->sc_writeblockwrite);
1269 	sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
1270 	    (uintmax_t)sc->sc_readbytes);
1271 	sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
1272 	    (uintmax_t)sc->sc_wrotebytes);
1273 	sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
1274 	    (intmax_t)sc->sc_offset);
1275 }
1276 
1277 /*
1278  * Clean up an orphaned geom.
1279  */
1280 static void
1281 g_union_orphan(struct g_consumer *cp)
1282 {
1283 
1284 	g_topology_assert();
1285 	g_union_destroy(NULL, cp->geom, true);
1286 }
1287 
1288 /*
1289  * Clean up a union geom.
1290  */
1291 static int
1292 g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
1293     struct g_geom *gp)
1294 {
1295 
1296 	return (g_union_destroy(NULL, gp, false));
1297 }
1298 
1299 /*
1300  * Clean up a union device.
1301  */
1302 static int
1303 g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force)
1304 {
1305 	struct g_union_softc *sc;
1306 	struct g_provider *pp;
1307 	int error;
1308 
1309 	g_topology_assert();
1310 	sc = gp->softc;
1311 	if (sc == NULL)
1312 		return (ENXIO);
1313 	pp = LIST_FIRST(&gp->provider);
1314 	if ((sc->sc_flags & DOING_COMMIT) != 0 ||
1315 	    (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))) {
1316 		if (force) {
1317 			if (req != NULL)
1318 				gctl_msg(req, "Device %s is still in use, "
1319 				    "so is being forcibly removed.", gp->name);
1320 			G_UNION_DEBUG(1, "Device %s is still in use, so "
1321 			    "is being forcibly removed.", gp->name);
1322 		} else {
1323 			if (req != NULL)
1324 				gctl_msg(req, "Device %s is still open "
1325 				    "(r=%d w=%d e=%d).", gp->name, pp->acr,
1326 				    pp->acw, pp->ace);
1327 			G_UNION_DEBUG(1, "Device %s is still open "
1328 			    "(r=%d w=%d e=%d).", gp->name, pp->acr,
1329 			    pp->acw, pp->ace);
1330 			return (EBUSY);
1331 		}
1332 	} else {
1333 		if (req != NULL)
1334 			gctl_msg(req, "Device %s removed.", gp->name);
1335 		G_UNION_DEBUG(1, "Device %s removed.", gp->name);
1336 	}
1337 	/* Close consumers */
1338 	if ((error = g_access(sc->sc_lowercp, -1, 0, -1)) != 0)
1339 		G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1340 		    "to %s.", error, gp->name, sc->sc_lowercp->provider->name);
1341 	if ((error = g_access(sc->sc_uppercp, -1, -1, -1)) != 0)
1342 		G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1343 		    "to %s.", error, gp->name, sc->sc_uppercp->provider->name);
1344 
1345 	g_wither_geom(gp, ENXIO);
1346 
1347 	return (0);
1348 }
1349 
1350 /*
1351  * Clean up a union provider.
1352  */
1353 static void
1354 g_union_providergone(struct g_provider *pp)
1355 {
1356 	struct g_geom *gp;
1357 	struct g_union_softc *sc;
1358 	size_t i;
1359 
1360 	gp = pp->geom;
1361 	sc = gp->softc;
1362 	gp->softc = NULL;
1363 	for (i = 0; i < sc->sc_root_size; i++)
1364 		g_free(sc->sc_writemap_root[i]);
1365 	g_free(sc->sc_writemap_root);
1366 	g_free(sc->sc_leafused);
1367 	rw_destroy(&sc->sc_rwlock);
1368 	g_free(sc);
1369 }
1370 
1371 /*
1372  * Respond to a resized provider.
1373  */
1374 static void
1375 g_union_resize(struct g_consumer *cp)
1376 {
1377 	struct g_union_softc *sc;
1378 	struct g_geom *gp;
1379 
1380 	g_topology_assert();
1381 
1382 	gp = cp->geom;
1383 	sc = gp->softc;
1384 
1385 	/*
1386 	 * If size has gotten bigger, ignore it and just keep using
1387 	 * the space we already had. Otherwise we are done.
1388 	 */
1389 	if (sc->sc_size < cp->provider->mediasize - sc->sc_offset)
1390 		return;
1391 	g_union_destroy(NULL, gp, true);
1392 }
1393 
1394 DECLARE_GEOM_CLASS(g_union_class, g_union);
1395 MODULE_VERSION(geom_union, 0);
1396