1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/module.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/bio.h>
36 #include <sys/sbuf.h>
37 #include <sys/sysctl.h>
38 #include <sys/malloc.h>
39 #include <vm/uma.h>
40 #include <geom/geom.h>
41 #include <geom/geom_dbg.h>
42 #include <geom/stripe/g_stripe.h>
43
44 FEATURE(geom_stripe, "GEOM striping support");
45
46 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
47
48 static uma_zone_t g_stripe_zone;
49
50 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
51 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
52 struct g_geom *gp);
53
54 static g_taste_t g_stripe_taste;
55 static g_ctl_req_t g_stripe_config;
56 static g_dumpconf_t g_stripe_dumpconf;
57 static g_init_t g_stripe_init;
58 static g_fini_t g_stripe_fini;
59
60 struct g_class g_stripe_class = {
61 .name = G_STRIPE_CLASS_NAME,
62 .version = G_VERSION,
63 .ctlreq = g_stripe_config,
64 .taste = g_stripe_taste,
65 .destroy_geom = g_stripe_destroy_geom,
66 .init = g_stripe_init,
67 .fini = g_stripe_fini
68 };
69
70 SYSCTL_DECL(_kern_geom);
71 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
72 "GEOM_STRIPE stuff");
73 static u_int g_stripe_debug = 0;
74 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0,
75 "Debug level");
76 static int g_stripe_fast = 0;
77 SYSCTL_INT(_kern_geom_stripe, OID_AUTO, fast,
78 CTLFLAG_RWTUN, &g_stripe_fast, 0,
79 "Fast, but memory-consuming, mode");
80 static u_long g_stripe_maxmem;
81 SYSCTL_ULONG(_kern_geom_stripe, OID_AUTO, maxmem,
82 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &g_stripe_maxmem, 0,
83 "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
84 static u_int g_stripe_fast_failed = 0;
85 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
86 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
87
88 /*
89 * Greatest Common Divisor.
90 */
91 static u_int
gcd(u_int a,u_int b)92 gcd(u_int a, u_int b)
93 {
94 u_int c;
95
96 while (b != 0) {
97 c = a;
98 a = b;
99 b = (c % b);
100 }
101 return (a);
102 }
103
104 /*
105 * Least Common Multiple.
106 */
107 static u_int
lcm(u_int a,u_int b)108 lcm(u_int a, u_int b)
109 {
110
111 return ((a * b) / gcd(a, b));
112 }
113
114 static void
g_stripe_init(struct g_class * mp __unused)115 g_stripe_init(struct g_class *mp __unused)
116 {
117
118 g_stripe_maxmem = maxphys * 100;
119 TUNABLE_ULONG_FETCH("kern.geom.stripe.maxmem,", &g_stripe_maxmem);
120 g_stripe_zone = uma_zcreate("g_stripe_zone", maxphys, NULL, NULL,
121 NULL, NULL, 0, 0);
122 g_stripe_maxmem -= g_stripe_maxmem % maxphys;
123 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / maxphys);
124 }
125
126 static void
g_stripe_fini(struct g_class * mp __unused)127 g_stripe_fini(struct g_class *mp __unused)
128 {
129
130 uma_zdestroy(g_stripe_zone);
131 }
132
133 /*
134 * Return the number of valid disks.
135 */
136 static u_int
g_stripe_nvalid(struct g_stripe_softc * sc)137 g_stripe_nvalid(struct g_stripe_softc *sc)
138 {
139 u_int i, no;
140
141 no = 0;
142 for (i = 0; i < sc->sc_ndisks; i++) {
143 if (sc->sc_disks[i] != NULL)
144 no++;
145 }
146
147 return (no);
148 }
149
150 static void
g_stripe_remove_disk(struct g_consumer * cp)151 g_stripe_remove_disk(struct g_consumer *cp)
152 {
153 struct g_stripe_softc *sc;
154
155 g_topology_assert();
156 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
157 sc = (struct g_stripe_softc *)cp->geom->softc;
158 KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
159
160 if (cp->private == NULL) {
161 G_STRIPE_DEBUG(0, "Disk %s removed from %s.",
162 cp->provider->name, sc->sc_name);
163 cp->private = (void *)(uintptr_t)-1;
164 }
165
166 if (sc->sc_provider != NULL) {
167 G_STRIPE_DEBUG(0, "Device %s deactivated.",
168 sc->sc_provider->name);
169 g_wither_provider(sc->sc_provider, ENXIO);
170 sc->sc_provider = NULL;
171 }
172
173 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
174 return;
175 sc->sc_disks[cp->index] = NULL;
176 cp->index = 0;
177 g_detach(cp);
178 g_destroy_consumer(cp);
179 /* If there are no valid disks anymore, remove device. */
180 if (LIST_EMPTY(&sc->sc_geom->consumer))
181 g_stripe_destroy(sc, 1);
182 }
183
184 static void
g_stripe_orphan(struct g_consumer * cp)185 g_stripe_orphan(struct g_consumer *cp)
186 {
187 struct g_stripe_softc *sc;
188 struct g_geom *gp;
189
190 g_topology_assert();
191 gp = cp->geom;
192 sc = gp->softc;
193 if (sc == NULL)
194 return;
195
196 g_stripe_remove_disk(cp);
197 }
198
199 static int
g_stripe_access(struct g_provider * pp,int dr,int dw,int de)200 g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
201 {
202 struct g_consumer *cp1, *cp2, *tmp;
203 struct g_stripe_softc *sc __diagused;
204 struct g_geom *gp;
205 int error;
206
207 g_topology_assert();
208 gp = pp->geom;
209 sc = gp->softc;
210 KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
211
212 /* On first open, grab an extra "exclusive" bit */
213 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
214 de++;
215 /* ... and let go of it on last close */
216 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
217 de--;
218
219 LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
220 error = g_access(cp1, dr, dw, de);
221 if (error != 0)
222 goto fail;
223 if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
224 cp1->private != NULL) {
225 g_stripe_remove_disk(cp1); /* May destroy geom. */
226 }
227 }
228 return (0);
229
230 fail:
231 LIST_FOREACH(cp2, &gp->consumer, consumer) {
232 if (cp1 == cp2)
233 break;
234 g_access(cp2, -dr, -dw, -de);
235 }
236 return (error);
237 }
238
239 static void
g_stripe_copy(struct g_stripe_softc * sc,char * src,char * dst,off_t offset,off_t length,int mode)240 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
241 off_t length, int mode)
242 {
243 off_t stripesize;
244 size_t len;
245
246 stripesize = sc->sc_stripesize;
247 len = (size_t)(stripesize - (offset & (stripesize - 1)));
248 do {
249 bcopy(src, dst, len);
250 if (mode) {
251 dst += len + stripesize * (sc->sc_ndisks - 1);
252 src += len;
253 } else {
254 dst += len;
255 src += len + stripesize * (sc->sc_ndisks - 1);
256 }
257 length -= len;
258 KASSERT(length >= 0,
259 ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).",
260 (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length));
261 if (length > stripesize)
262 len = stripesize;
263 else
264 len = length;
265 } while (length > 0);
266 }
267
268 static void
g_stripe_done(struct bio * bp)269 g_stripe_done(struct bio *bp)
270 {
271 struct g_stripe_softc *sc;
272 struct bio *pbp;
273
274 pbp = bp->bio_parent;
275 sc = pbp->bio_to->geom->softc;
276 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
277 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
278 bp->bio_length, 1);
279 bp->bio_data = bp->bio_caller1;
280 bp->bio_caller1 = NULL;
281 }
282 mtx_lock(&sc->sc_lock);
283 if (pbp->bio_error == 0)
284 pbp->bio_error = bp->bio_error;
285 pbp->bio_completed += bp->bio_completed;
286 pbp->bio_inbed++;
287 if (pbp->bio_children == pbp->bio_inbed) {
288 mtx_unlock(&sc->sc_lock);
289 if (pbp->bio_driver1 != NULL)
290 uma_zfree(g_stripe_zone, pbp->bio_driver1);
291 if (bp->bio_cmd == BIO_SPEEDUP)
292 pbp->bio_completed = pbp->bio_length;
293 g_io_deliver(pbp, pbp->bio_error);
294 } else
295 mtx_unlock(&sc->sc_lock);
296 g_destroy_bio(bp);
297 }
298
299 static int
g_stripe_start_fast(struct bio * bp,u_int no,off_t offset,off_t length)300 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
301 {
302 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
303 struct g_stripe_softc *sc;
304 char *addr, *data = NULL;
305 struct bio *cbp;
306 off_t stripesize;
307 u_int nparts = 0;
308 int error;
309
310 sc = bp->bio_to->geom->softc;
311
312 addr = bp->bio_data;
313 stripesize = sc->sc_stripesize;
314
315 cbp = g_clone_bio(bp);
316 if (cbp == NULL) {
317 error = ENOMEM;
318 goto failure;
319 }
320 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
321 nparts++;
322 /*
323 * Fill in the component buf structure.
324 */
325 cbp->bio_done = g_stripe_done;
326 cbp->bio_offset = offset;
327 cbp->bio_data = addr;
328 cbp->bio_caller1 = NULL;
329 cbp->bio_length = length;
330 cbp->bio_caller2 = sc->sc_disks[no];
331
332 /* offset -= offset % stripesize; */
333 offset -= offset & (stripesize - 1);
334 addr += length;
335 length = bp->bio_length - length;
336 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
337 if (no > sc->sc_ndisks - 1) {
338 no = 0;
339 offset += stripesize;
340 }
341 if (nparts >= sc->sc_ndisks) {
342 cbp = TAILQ_NEXT(cbp, bio_queue);
343 if (cbp == NULL)
344 cbp = TAILQ_FIRST(&queue);
345 nparts++;
346 /*
347 * Update bio structure.
348 */
349 /*
350 * MIN() is in case when
351 * (bp->bio_length % sc->sc_stripesize) != 0.
352 */
353 cbp->bio_length += MIN(stripesize, length);
354 if (cbp->bio_caller1 == NULL) {
355 cbp->bio_caller1 = cbp->bio_data;
356 cbp->bio_data = NULL;
357 if (data == NULL) {
358 data = uma_zalloc(g_stripe_zone,
359 M_NOWAIT);
360 if (data == NULL) {
361 error = ENOMEM;
362 goto failure;
363 }
364 }
365 }
366 } else {
367 cbp = g_clone_bio(bp);
368 if (cbp == NULL) {
369 error = ENOMEM;
370 goto failure;
371 }
372 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
373 nparts++;
374 /*
375 * Fill in the component buf structure.
376 */
377 cbp->bio_done = g_stripe_done;
378 cbp->bio_offset = offset;
379 cbp->bio_data = addr;
380 cbp->bio_caller1 = NULL;
381 /*
382 * MIN() is in case when
383 * (bp->bio_length % sc->sc_stripesize) != 0.
384 */
385 cbp->bio_length = MIN(stripesize, length);
386 cbp->bio_caller2 = sc->sc_disks[no];
387 }
388 }
389 if (data != NULL)
390 bp->bio_driver1 = data;
391 /*
392 * Fire off all allocated requests!
393 */
394 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
395 struct g_consumer *cp;
396
397 TAILQ_REMOVE(&queue, cbp, bio_queue);
398 cp = cbp->bio_caller2;
399 cbp->bio_caller2 = NULL;
400 cbp->bio_to = cp->provider;
401 if (cbp->bio_caller1 != NULL) {
402 cbp->bio_data = data;
403 if (bp->bio_cmd == BIO_WRITE) {
404 g_stripe_copy(sc, cbp->bio_caller1, data,
405 cbp->bio_offset, cbp->bio_length, 0);
406 }
407 data += cbp->bio_length;
408 }
409 G_STRIPE_LOGREQ(cbp, "Sending request.");
410 g_io_request(cbp, cp);
411 }
412 return (0);
413 failure:
414 if (data != NULL)
415 uma_zfree(g_stripe_zone, data);
416 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
417 TAILQ_REMOVE(&queue, cbp, bio_queue);
418 if (cbp->bio_caller1 != NULL) {
419 cbp->bio_data = cbp->bio_caller1;
420 cbp->bio_caller1 = NULL;
421 }
422 bp->bio_children--;
423 g_destroy_bio(cbp);
424 }
425 return (error);
426 }
427
428 static int
g_stripe_start_economic(struct bio * bp,u_int no,off_t offset,off_t length)429 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
430 {
431 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
432 struct g_stripe_softc *sc;
433 off_t stripesize;
434 struct bio *cbp;
435 char *addr;
436 int error;
437
438 sc = bp->bio_to->geom->softc;
439
440 stripesize = sc->sc_stripesize;
441
442 cbp = g_clone_bio(bp);
443 if (cbp == NULL) {
444 error = ENOMEM;
445 goto failure;
446 }
447 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
448 /*
449 * Fill in the component buf structure.
450 */
451 if (bp->bio_length == length)
452 cbp->bio_done = g_std_done; /* Optimized lockless case. */
453 else
454 cbp->bio_done = g_stripe_done;
455 cbp->bio_offset = offset;
456 cbp->bio_length = length;
457 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
458 bp->bio_ma_n = round_page(bp->bio_ma_offset +
459 bp->bio_length) / PAGE_SIZE;
460 addr = NULL;
461 } else
462 addr = bp->bio_data;
463 cbp->bio_caller2 = sc->sc_disks[no];
464
465 /* offset -= offset % stripesize; */
466 offset -= offset & (stripesize - 1);
467 if (bp->bio_cmd != BIO_DELETE)
468 addr += length;
469 length = bp->bio_length - length;
470 for (no++; length > 0; no++, length -= stripesize) {
471 if (no > sc->sc_ndisks - 1) {
472 no = 0;
473 offset += stripesize;
474 }
475 cbp = g_clone_bio(bp);
476 if (cbp == NULL) {
477 error = ENOMEM;
478 goto failure;
479 }
480 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
481
482 /*
483 * Fill in the component buf structure.
484 */
485 cbp->bio_done = g_stripe_done;
486 cbp->bio_offset = offset;
487 /*
488 * MIN() is in case when
489 * (bp->bio_length % sc->sc_stripesize) != 0.
490 */
491 cbp->bio_length = MIN(stripesize, length);
492 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
493 cbp->bio_ma_offset += (uintptr_t)addr;
494 cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
495 cbp->bio_ma_offset %= PAGE_SIZE;
496 cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
497 cbp->bio_length) / PAGE_SIZE;
498 } else
499 cbp->bio_data = addr;
500
501 cbp->bio_caller2 = sc->sc_disks[no];
502
503 if (bp->bio_cmd != BIO_DELETE)
504 addr += stripesize;
505 }
506 /*
507 * Fire off all allocated requests!
508 */
509 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
510 struct g_consumer *cp;
511
512 TAILQ_REMOVE(&queue, cbp, bio_queue);
513 cp = cbp->bio_caller2;
514 cbp->bio_caller2 = NULL;
515 cbp->bio_to = cp->provider;
516 G_STRIPE_LOGREQ(cbp, "Sending request.");
517 g_io_request(cbp, cp);
518 }
519 return (0);
520 failure:
521 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
522 TAILQ_REMOVE(&queue, cbp, bio_queue);
523 bp->bio_children--;
524 g_destroy_bio(cbp);
525 }
526 return (error);
527 }
528
529 static void
g_stripe_pushdown(struct g_stripe_softc * sc,struct bio * bp)530 g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp)
531 {
532 struct bio_queue_head queue;
533 struct g_consumer *cp;
534 struct bio *cbp;
535 u_int no;
536
537 bioq_init(&queue);
538 for (no = 0; no < sc->sc_ndisks; no++) {
539 cbp = g_clone_bio(bp);
540 if (cbp == NULL) {
541 for (cbp = bioq_first(&queue); cbp != NULL;
542 cbp = bioq_first(&queue)) {
543 bioq_remove(&queue, cbp);
544 g_destroy_bio(cbp);
545 }
546 if (bp->bio_error == 0)
547 bp->bio_error = ENOMEM;
548 g_io_deliver(bp, bp->bio_error);
549 return;
550 }
551 bioq_insert_tail(&queue, cbp);
552 cbp->bio_done = g_stripe_done;
553 cbp->bio_caller2 = sc->sc_disks[no];
554 cbp->bio_to = sc->sc_disks[no]->provider;
555 }
556 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
557 bioq_remove(&queue, cbp);
558 G_STRIPE_LOGREQ(cbp, "Sending request.");
559 cp = cbp->bio_caller2;
560 cbp->bio_caller2 = NULL;
561 g_io_request(cbp, cp);
562 }
563 }
564
565 static void
g_stripe_start(struct bio * bp)566 g_stripe_start(struct bio *bp)
567 {
568 off_t offset, start, length, nstripe, stripesize;
569 struct g_stripe_softc *sc;
570 u_int no;
571 int error, fast = 0;
572
573 sc = bp->bio_to->geom->softc;
574 /*
575 * If sc == NULL, provider's error should be set and g_stripe_start()
576 * should not be called at all.
577 */
578 KASSERT(sc != NULL,
579 ("Provider's error should be set (error=%d)(device=%s).",
580 bp->bio_to->error, bp->bio_to->name));
581
582 G_STRIPE_LOGREQ(bp, "Request received.");
583
584 switch (bp->bio_cmd) {
585 case BIO_READ:
586 case BIO_WRITE:
587 case BIO_DELETE:
588 break;
589 case BIO_SPEEDUP:
590 case BIO_FLUSH:
591 g_stripe_pushdown(sc, bp);
592 return;
593 case BIO_GETATTR:
594 if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
595 int val = (sc->sc_flags & G_STRIPE_FLAG_CANDELETE) != 0;
596 g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
597 return;
598 }
599 /* otherwise: To which provider it should be delivered? */
600 default:
601 g_io_deliver(bp, EOPNOTSUPP);
602 return;
603 }
604
605 stripesize = sc->sc_stripesize;
606
607 /*
608 * Calculations are quite messy, but fast I hope.
609 */
610
611 /* Stripe number. */
612 /* nstripe = bp->bio_offset / stripesize; */
613 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
614 /* Disk number. */
615 no = nstripe % sc->sc_ndisks;
616 /* Start position in stripe. */
617 /* start = bp->bio_offset % stripesize; */
618 start = bp->bio_offset & (stripesize - 1);
619 /* Start position in disk. */
620 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
621 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
622 /* Length of data to operate. */
623 length = MIN(bp->bio_length, stripesize - start);
624
625 /*
626 * Do use "fast" mode when:
627 * 1. "Fast" mode is ON.
628 * and
629 * 2. Request size is less than or equal to maxphys,
630 * which should always be true.
631 * and
632 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
633 * there will be no need to send more than one I/O request to
634 * a provider, so there is nothing to optmize.
635 * and
636 * 4. Request is not unmapped.
637 * and
638 * 5. It is not a BIO_DELETE.
639 */
640 if (g_stripe_fast && bp->bio_length <= maxphys &&
641 bp->bio_length >= stripesize * sc->sc_ndisks &&
642 (bp->bio_flags & BIO_UNMAPPED) == 0 &&
643 bp->bio_cmd != BIO_DELETE) {
644 fast = 1;
645 }
646 error = 0;
647 if (fast) {
648 error = g_stripe_start_fast(bp, no, offset, length);
649 if (error != 0)
650 g_stripe_fast_failed++;
651 }
652 /*
653 * Do use "economic" when:
654 * 1. "Economic" mode is ON.
655 * or
656 * 2. "Fast" mode failed. It can only fail if there is no memory.
657 */
658 if (!fast || error != 0)
659 error = g_stripe_start_economic(bp, no, offset, length);
660 if (error != 0) {
661 if (bp->bio_error == 0)
662 bp->bio_error = error;
663 g_io_deliver(bp, bp->bio_error);
664 }
665 }
666
667 static void
g_stripe_check_and_run(struct g_stripe_softc * sc)668 g_stripe_check_and_run(struct g_stripe_softc *sc)
669 {
670 struct g_provider *dp;
671 off_t mediasize, ms;
672 u_int no, sectorsize = 0;
673
674 g_topology_assert();
675 if (g_stripe_nvalid(sc) != sc->sc_ndisks)
676 return;
677
678 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
679 sc->sc_name);
680 sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
681 if (g_stripe_fast == 0)
682 sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
683 /*
684 * Find the smallest disk.
685 */
686 mediasize = sc->sc_disks[0]->provider->mediasize;
687 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
688 mediasize -= sc->sc_disks[0]->provider->sectorsize;
689 mediasize -= mediasize % sc->sc_stripesize;
690 sectorsize = sc->sc_disks[0]->provider->sectorsize;
691 for (no = 1; no < sc->sc_ndisks; no++) {
692 dp = sc->sc_disks[no]->provider;
693 ms = dp->mediasize;
694 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
695 ms -= dp->sectorsize;
696 ms -= ms % sc->sc_stripesize;
697 if (ms < mediasize)
698 mediasize = ms;
699 sectorsize = lcm(sectorsize, dp->sectorsize);
700
701 /* A provider underneath us doesn't support unmapped */
702 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
703 G_STRIPE_DEBUG(1, "Cancelling unmapped "
704 "because of %s.", dp->name);
705 sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
706 }
707 }
708 sc->sc_provider->sectorsize = sectorsize;
709 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
710 sc->sc_provider->stripesize = sc->sc_stripesize;
711 sc->sc_provider->stripeoffset = 0;
712 g_error_provider(sc->sc_provider, 0);
713
714 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
715 }
716
717 static int
g_stripe_read_metadata(struct g_consumer * cp,struct g_stripe_metadata * md)718 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
719 {
720 struct g_provider *pp;
721 u_char *buf;
722 int error;
723
724 g_topology_assert();
725
726 error = g_access(cp, 1, 0, 0);
727 if (error != 0)
728 return (error);
729 pp = cp->provider;
730 g_topology_unlock();
731 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
732 &error);
733 g_topology_lock();
734 g_access(cp, -1, 0, 0);
735 if (buf == NULL)
736 return (error);
737
738 /* Decode metadata. */
739 stripe_metadata_decode(buf, md);
740 g_free(buf);
741
742 return (0);
743 }
744
745 /*
746 * Add disk to given device.
747 */
748 static int
g_stripe_add_disk(struct g_stripe_softc * sc,struct g_provider * pp,u_int no)749 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
750 {
751 struct g_consumer *cp, *fcp;
752 struct g_geom *gp;
753 int error;
754
755 g_topology_assert();
756 /* Metadata corrupted? */
757 if (no >= sc->sc_ndisks)
758 return (EINVAL);
759
760 /* Check if disk is not already attached. */
761 if (sc->sc_disks[no] != NULL)
762 return (EEXIST);
763
764 gp = sc->sc_geom;
765 fcp = LIST_FIRST(&gp->consumer);
766
767 cp = g_new_consumer(gp);
768 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
769 cp->private = NULL;
770 cp->index = no;
771 error = g_attach(cp, pp);
772 if (error != 0) {
773 g_destroy_consumer(cp);
774 return (error);
775 }
776
777 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
778 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
779 if (error != 0) {
780 g_detach(cp);
781 g_destroy_consumer(cp);
782 return (error);
783 }
784 }
785 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
786 struct g_stripe_metadata md;
787
788 /* Reread metadata. */
789 error = g_stripe_read_metadata(cp, &md);
790 if (error != 0)
791 goto fail;
792
793 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
794 strcmp(md.md_name, sc->sc_name) != 0 ||
795 md.md_id != sc->sc_id) {
796 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
797 goto fail;
798 }
799 }
800
801 sc->sc_disks[no] = cp;
802
803 /* cascade candelete */
804 error = g_access(cp, 1, 0, 0);
805 if (error == 0) {
806 int can_delete;
807
808 error = g_getattr("GEOM::candelete", cp, &can_delete);
809 if (error == 0 && can_delete != 0)
810 sc->sc_flags |= G_STRIPE_FLAG_CANDELETE;
811 G_STRIPE_DEBUG(1, "Provider %s candelete %i.", pp->name,
812 can_delete);
813 g_access(cp, -1, 0, 0);
814 }
815
816 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
817 g_stripe_check_and_run(sc);
818
819 return (0);
820 fail:
821 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
822 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
823 g_detach(cp);
824 g_destroy_consumer(cp);
825 return (error);
826 }
827
828 static struct g_geom *
g_stripe_create(struct g_class * mp,const struct g_stripe_metadata * md,u_int type)829 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
830 u_int type)
831 {
832 struct g_stripe_softc *sc;
833 struct g_geom *gp;
834 u_int no;
835
836 g_topology_assert();
837 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
838 md->md_id);
839
840 /* Two disks is minimum. */
841 if (md->md_all < 2) {
842 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
843 return (NULL);
844 }
845 #if 0
846 /* Stripe size have to be grater than or equal to sector size. */
847 if (md->md_stripesize < sectorsize) {
848 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
849 return (NULL);
850 }
851 #endif
852 /* Stripe size have to be power of 2. */
853 if (!powerof2(md->md_stripesize)) {
854 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
855 return (NULL);
856 }
857
858 /* Check for duplicate unit */
859 LIST_FOREACH(gp, &mp->geom, geom) {
860 sc = gp->softc;
861 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
862 G_STRIPE_DEBUG(0, "Device %s already configured.",
863 sc->sc_name);
864 return (NULL);
865 }
866 }
867 gp = g_new_geomf(mp, "%s", md->md_name);
868 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
869 gp->start = g_stripe_start;
870 gp->spoiled = g_stripe_orphan;
871 gp->orphan = g_stripe_orphan;
872 gp->access = g_stripe_access;
873 gp->dumpconf = g_stripe_dumpconf;
874
875 sc->sc_id = md->md_id;
876 sc->sc_stripesize = md->md_stripesize;
877 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
878 sc->sc_ndisks = md->md_all;
879 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
880 M_STRIPE, M_WAITOK | M_ZERO);
881 for (no = 0; no < sc->sc_ndisks; no++)
882 sc->sc_disks[no] = NULL;
883 sc->sc_type = type;
884 mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
885
886 gp->softc = sc;
887 sc->sc_geom = gp;
888 sc->sc_provider = NULL;
889
890 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
891
892 return (gp);
893 }
894
895 static int
g_stripe_destroy(struct g_stripe_softc * sc,boolean_t force)896 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
897 {
898 struct g_provider *pp;
899 struct g_consumer *cp, *cp1;
900 struct g_geom *gp;
901
902 g_topology_assert();
903
904 if (sc == NULL)
905 return (ENXIO);
906
907 pp = sc->sc_provider;
908 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
909 if (force) {
910 G_STRIPE_DEBUG(0, "Device %s is still open, so it "
911 "can't be definitely removed.", pp->name);
912 } else {
913 G_STRIPE_DEBUG(1,
914 "Device %s is still open (r%dw%de%d).", pp->name,
915 pp->acr, pp->acw, pp->ace);
916 return (EBUSY);
917 }
918 }
919
920 gp = sc->sc_geom;
921 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
922 g_stripe_remove_disk(cp);
923 if (cp1 == NULL)
924 return (0); /* Recursion happened. */
925 }
926 if (!LIST_EMPTY(&gp->consumer))
927 return (EINPROGRESS);
928
929 gp->softc = NULL;
930 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
931 gp->name));
932 free(sc->sc_disks, M_STRIPE);
933 mtx_destroy(&sc->sc_lock);
934 free(sc, M_STRIPE);
935 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
936 g_wither_geom(gp, ENXIO);
937 return (0);
938 }
939
940 static int
g_stripe_destroy_geom(struct gctl_req * req __unused,struct g_class * mp __unused,struct g_geom * gp)941 g_stripe_destroy_geom(struct gctl_req *req __unused,
942 struct g_class *mp __unused, struct g_geom *gp)
943 {
944 struct g_stripe_softc *sc;
945
946 sc = gp->softc;
947 return (g_stripe_destroy(sc, 0));
948 }
949
950 static struct g_geom *
g_stripe_taste(struct g_class * mp,struct g_provider * pp,int flags __unused)951 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
952 {
953 struct g_stripe_metadata md;
954 struct g_stripe_softc *sc;
955 struct g_consumer *cp;
956 struct g_geom *gp;
957 int error;
958
959 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
960 g_topology_assert();
961
962 /* Skip providers that are already open for writing. */
963 if (pp->acw > 0)
964 return (NULL);
965
966 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
967
968 gp = g_new_geomf(mp, "stripe:taste");
969 gp->start = g_stripe_start;
970 gp->access = g_stripe_access;
971 gp->orphan = g_stripe_orphan;
972 cp = g_new_consumer(gp);
973 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
974 error = g_attach(cp, pp);
975 if (error == 0) {
976 error = g_stripe_read_metadata(cp, &md);
977 g_detach(cp);
978 }
979 g_destroy_consumer(cp);
980 g_destroy_geom(gp);
981 if (error != 0)
982 return (NULL);
983 gp = NULL;
984
985 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
986 return (NULL);
987 if (md.md_version > G_STRIPE_VERSION) {
988 printf("geom_stripe.ko module is too old to handle %s.\n",
989 pp->name);
990 return (NULL);
991 }
992 /*
993 * Backward compatibility:
994 */
995 /* There was no md_provider field in earlier versions of metadata. */
996 if (md.md_version < 2)
997 bzero(md.md_provider, sizeof(md.md_provider));
998 /* There was no md_provsize field in earlier versions of metadata. */
999 if (md.md_version < 3)
1000 md.md_provsize = pp->mediasize;
1001
1002 if (md.md_provider[0] != '\0' &&
1003 !g_compare_names(md.md_provider, pp->name))
1004 return (NULL);
1005 if (md.md_provsize != pp->mediasize)
1006 return (NULL);
1007
1008 /*
1009 * Let's check if device already exists.
1010 */
1011 sc = NULL;
1012 LIST_FOREACH(gp, &mp->geom, geom) {
1013 sc = gp->softc;
1014 if (sc == NULL)
1015 continue;
1016 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
1017 continue;
1018 if (strcmp(md.md_name, sc->sc_name) != 0)
1019 continue;
1020 if (md.md_id != sc->sc_id)
1021 continue;
1022 break;
1023 }
1024 if (gp != NULL) {
1025 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
1026 error = g_stripe_add_disk(sc, pp, md.md_no);
1027 if (error != 0) {
1028 G_STRIPE_DEBUG(0,
1029 "Cannot add disk %s to %s (error=%d).", pp->name,
1030 gp->name, error);
1031 return (NULL);
1032 }
1033 } else {
1034 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
1035 if (gp == NULL) {
1036 G_STRIPE_DEBUG(0, "Cannot create device %s.",
1037 md.md_name);
1038 return (NULL);
1039 }
1040 sc = gp->softc;
1041 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
1042 error = g_stripe_add_disk(sc, pp, md.md_no);
1043 if (error != 0) {
1044 G_STRIPE_DEBUG(0,
1045 "Cannot add disk %s to %s (error=%d).", pp->name,
1046 gp->name, error);
1047 g_stripe_destroy(sc, 1);
1048 return (NULL);
1049 }
1050 }
1051
1052 return (gp);
1053 }
1054
1055 static void
g_stripe_ctl_create(struct gctl_req * req,struct g_class * mp)1056 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
1057 {
1058 u_int attached, no;
1059 struct g_stripe_metadata md;
1060 struct g_provider *pp;
1061 struct g_stripe_softc *sc;
1062 struct g_geom *gp;
1063 struct sbuf *sb;
1064 off_t *stripesize;
1065 const char *name;
1066 char param[16];
1067 int *nargs;
1068
1069 g_topology_assert();
1070 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1071 if (nargs == NULL) {
1072 gctl_error(req, "No '%s' argument.", "nargs");
1073 return;
1074 }
1075 if (*nargs <= 2) {
1076 gctl_error(req, "Too few arguments.");
1077 return;
1078 }
1079
1080 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
1081 md.md_version = G_STRIPE_VERSION;
1082 name = gctl_get_asciiparam(req, "arg0");
1083 if (name == NULL) {
1084 gctl_error(req, "No 'arg%u' argument.", 0);
1085 return;
1086 }
1087 strlcpy(md.md_name, name, sizeof(md.md_name));
1088 md.md_id = arc4random();
1089 md.md_no = 0;
1090 md.md_all = *nargs - 1;
1091 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
1092 if (stripesize == NULL) {
1093 gctl_error(req, "No '%s' argument.", "stripesize");
1094 return;
1095 }
1096 md.md_stripesize = (uint32_t)*stripesize;
1097 bzero(md.md_provider, sizeof(md.md_provider));
1098 /* This field is not important here. */
1099 md.md_provsize = 0;
1100
1101 /* Check all providers are valid */
1102 for (no = 1; no < *nargs; no++) {
1103 snprintf(param, sizeof(param), "arg%u", no);
1104 pp = gctl_get_provider(req, param);
1105 if (pp == NULL)
1106 return;
1107 }
1108
1109 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
1110 if (gp == NULL) {
1111 gctl_error(req, "Can't configure %s.", md.md_name);
1112 return;
1113 }
1114
1115 sc = gp->softc;
1116 sb = sbuf_new_auto();
1117 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
1118 for (attached = 0, no = 1; no < *nargs; no++) {
1119 snprintf(param, sizeof(param), "arg%u", no);
1120 pp = gctl_get_provider(req, param);
1121 if (pp == NULL) {
1122 name = gctl_get_asciiparam(req, param);
1123 MPASS(name != NULL);
1124 sbuf_printf(sb, " %s", name);
1125 continue;
1126 }
1127 if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
1128 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
1129 no, pp->name, gp->name);
1130 sbuf_printf(sb, " %s", pp->name);
1131 continue;
1132 }
1133 attached++;
1134 }
1135 sbuf_finish(sb);
1136 if (md.md_all != attached) {
1137 g_stripe_destroy(gp->softc, 1);
1138 gctl_error(req, "%s", sbuf_data(sb));
1139 }
1140 sbuf_delete(sb);
1141 }
1142
1143 static struct g_stripe_softc *
g_stripe_find_device(struct g_class * mp,const char * name)1144 g_stripe_find_device(struct g_class *mp, const char *name)
1145 {
1146 struct g_stripe_softc *sc;
1147 struct g_geom *gp;
1148
1149 LIST_FOREACH(gp, &mp->geom, geom) {
1150 sc = gp->softc;
1151 if (sc == NULL)
1152 continue;
1153 if (strcmp(sc->sc_name, name) == 0)
1154 return (sc);
1155 }
1156 return (NULL);
1157 }
1158
1159 static void
g_stripe_ctl_destroy(struct gctl_req * req,struct g_class * mp)1160 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
1161 {
1162 struct g_stripe_softc *sc;
1163 int *force, *nargs, error;
1164 const char *name;
1165 char param[16];
1166 u_int i;
1167
1168 g_topology_assert();
1169
1170 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1171 if (nargs == NULL) {
1172 gctl_error(req, "No '%s' argument.", "nargs");
1173 return;
1174 }
1175 if (*nargs <= 0) {
1176 gctl_error(req, "Missing device(s).");
1177 return;
1178 }
1179 force = gctl_get_paraml(req, "force", sizeof(*force));
1180 if (force == NULL) {
1181 gctl_error(req, "No '%s' argument.", "force");
1182 return;
1183 }
1184
1185 for (i = 0; i < (u_int)*nargs; i++) {
1186 snprintf(param, sizeof(param), "arg%u", i);
1187 name = gctl_get_asciiparam(req, param);
1188 if (name == NULL) {
1189 gctl_error(req, "No 'arg%u' argument.", i);
1190 return;
1191 }
1192 sc = g_stripe_find_device(mp, name);
1193 if (sc == NULL) {
1194 gctl_error(req, "No such device: %s.", name);
1195 return;
1196 }
1197 error = g_stripe_destroy(sc, *force);
1198 if (error != 0) {
1199 gctl_error(req, "Cannot destroy device %s (error=%d).",
1200 sc->sc_name, error);
1201 return;
1202 }
1203 }
1204 }
1205
1206 static void
g_stripe_config(struct gctl_req * req,struct g_class * mp,const char * verb)1207 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
1208 {
1209 uint32_t *version;
1210
1211 g_topology_assert();
1212
1213 version = gctl_get_paraml(req, "version", sizeof(*version));
1214 if (version == NULL) {
1215 gctl_error(req, "No '%s' argument.", "version");
1216 return;
1217 }
1218 if (*version != G_STRIPE_VERSION) {
1219 gctl_error(req, "Userland and kernel parts are out of sync.");
1220 return;
1221 }
1222
1223 if (strcmp(verb, "create") == 0) {
1224 g_stripe_ctl_create(req, mp);
1225 return;
1226 } else if (strcmp(verb, "destroy") == 0 ||
1227 strcmp(verb, "stop") == 0) {
1228 g_stripe_ctl_destroy(req, mp);
1229 return;
1230 }
1231
1232 gctl_error(req, "Unknown verb.");
1233 }
1234
1235 static void
g_stripe_dumpconf(struct sbuf * sb,const char * indent,struct g_geom * gp,struct g_consumer * cp,struct g_provider * pp)1236 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1237 struct g_consumer *cp, struct g_provider *pp)
1238 {
1239 struct g_stripe_softc *sc;
1240
1241 sc = gp->softc;
1242 if (sc == NULL)
1243 return;
1244 if (pp != NULL) {
1245 /* Nothing here. */
1246 } else if (cp != NULL) {
1247 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
1248 (u_int)cp->index);
1249 } else {
1250 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
1251 sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent,
1252 (uintmax_t)sc->sc_stripesize);
1253 sbuf_printf(sb, "%s<Type>", indent);
1254 switch (sc->sc_type) {
1255 case G_STRIPE_TYPE_AUTOMATIC:
1256 sbuf_cat(sb, "AUTOMATIC");
1257 break;
1258 case G_STRIPE_TYPE_MANUAL:
1259 sbuf_cat(sb, "MANUAL");
1260 break;
1261 default:
1262 sbuf_cat(sb, "UNKNOWN");
1263 break;
1264 }
1265 sbuf_cat(sb, "</Type>\n");
1266 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
1267 indent, sc->sc_ndisks, g_stripe_nvalid(sc));
1268 sbuf_printf(sb, "%s<State>", indent);
1269 if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
1270 sbuf_cat(sb, "UP");
1271 else
1272 sbuf_cat(sb, "DOWN");
1273 sbuf_cat(sb, "</State>\n");
1274 }
1275 }
1276
1277 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
1278 MODULE_VERSION(geom_stripe, 0);
1279