xref: /freebsd/sys/geom/vinum/geom_vinum_raid5.c (revision fdafd315)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2004, 2007 Lukas Ertl
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/bio.h>
31 #include <sys/lock.h>
32 #include <sys/malloc.h>
33 #include <sys/systm.h>
34 
35 #include <geom/geom.h>
36 #include <geom/geom_dbg.h>
37 #include <geom/vinum/geom_vinum_var.h>
38 #include <geom/vinum/geom_vinum_raid5.h>
39 #include <geom/vinum/geom_vinum.h>
40 
41 static int		gv_raid5_offset(struct gv_plex *, off_t, off_t,
42 			    off_t *, off_t *, int *, int *, int);
43 static struct bio *	gv_raid5_clone_bio(struct bio *, struct gv_sd *,
44 			    struct gv_raid5_packet *, caddr_t, int);
45 static int	gv_raid5_request(struct gv_plex *, struct gv_raid5_packet *,
46 		    struct bio *, caddr_t, off_t, off_t, int *);
47 static int	gv_raid5_check(struct gv_plex *, struct gv_raid5_packet *,
48 		    struct bio *, caddr_t, off_t, off_t);
49 static int	gv_raid5_rebuild(struct gv_plex *, struct gv_raid5_packet *,
50 		    struct bio *, caddr_t, off_t, off_t);
51 
52 struct gv_raid5_packet *
gv_raid5_start(struct gv_plex * p,struct bio * bp,caddr_t addr,off_t boff,off_t bcount)53 gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
54     off_t bcount)
55 {
56 	struct bio *cbp;
57 	struct gv_raid5_packet *wp, *wp2;
58 	struct gv_bioq *bq, *bq2;
59 	int err, delay;
60 
61 	delay = 0;
62 	wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
63 	wp->bio = bp;
64 	wp->waiting = NULL;
65 	wp->parity = NULL;
66 	TAILQ_INIT(&wp->bits);
67 
68 	if (bp->bio_pflags & GV_BIO_REBUILD)
69 		err = gv_raid5_rebuild(p, wp, bp, addr, boff, bcount);
70 	else if (bp->bio_pflags & GV_BIO_CHECK)
71 		err = gv_raid5_check(p, wp, bp, addr, boff, bcount);
72 	else
73 		err = gv_raid5_request(p, wp, bp, addr, boff, bcount, &delay);
74 
75 	/* Means we have a delayed request. */
76 	if (delay) {
77 		g_free(wp);
78 		return (NULL);
79 	}
80 
81 	/*
82 	 * Building the sub-request failed, we probably need to clean up a lot.
83 	 */
84 	if (err) {
85 		G_VINUM_LOGREQ(0, bp, "raid5 plex request failed.");
86 		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
87 			TAILQ_REMOVE(&wp->bits, bq, queue);
88 			g_free(bq);
89 		}
90 		if (wp->waiting != NULL) {
91 			if (wp->waiting->bio_cflags & GV_BIO_MALLOC)
92 				g_free(wp->waiting->bio_data);
93 			gv_drive_done(wp->waiting->bio_caller1);
94 			g_destroy_bio(wp->waiting);
95 		}
96 		if (wp->parity != NULL) {
97 			if (wp->parity->bio_cflags & GV_BIO_MALLOC)
98 				g_free(wp->parity->bio_data);
99 			gv_drive_done(wp->parity->bio_caller1);
100 			g_destroy_bio(wp->parity);
101 		}
102 		g_free(wp);
103 
104 		TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
105 			if (wp->bio != bp)
106 				continue;
107 
108 			TAILQ_REMOVE(&p->packets, wp, list);
109 			TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
110 				TAILQ_REMOVE(&wp->bits, bq, queue);
111 				g_free(bq);
112 			}
113 			g_free(wp);
114 		}
115 
116 		cbp = bioq_takefirst(p->bqueue);
117 		while (cbp != NULL) {
118 			if (cbp->bio_cflags & GV_BIO_MALLOC)
119 				g_free(cbp->bio_data);
120 			gv_drive_done(cbp->bio_caller1);
121 			g_destroy_bio(cbp);
122 			cbp = bioq_takefirst(p->bqueue);
123 		}
124 
125 		/* If internal, stop and reset state. */
126 		if (bp->bio_pflags & GV_BIO_INTERNAL) {
127 			if (bp->bio_pflags & GV_BIO_MALLOC)
128 				g_free(bp->bio_data);
129 			g_destroy_bio(bp);
130 			/* Reset flags. */
131 			p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
132 			    GV_PLEX_GROWING);
133 			return (NULL);
134 		}
135 		g_io_deliver(bp, err);
136 		return (NULL);
137 	}
138 
139 	return (wp);
140 }
141 
142 /*
143  * Check if the stripe that the work packet wants is already being used by
144  * some other work packet.
145  */
146 int
gv_stripe_active(struct gv_plex * p,struct bio * bp)147 gv_stripe_active(struct gv_plex *p, struct bio *bp)
148 {
149 	struct gv_raid5_packet *wp, *owp;
150 	int overlap;
151 
152 	wp = bp->bio_caller2;
153 	if (wp->lockbase == -1)
154 		return (0);
155 
156 	overlap = 0;
157 	TAILQ_FOREACH(owp, &p->packets, list) {
158 		if (owp == wp)
159 			break;
160 		if ((wp->lockbase >= owp->lockbase) &&
161 		    (wp->lockbase <= owp->lockbase + owp->length)) {
162 			overlap++;
163 			break;
164 		}
165 		if ((wp->lockbase <= owp->lockbase) &&
166 		    (wp->lockbase + wp->length >= owp->lockbase)) {
167 			overlap++;
168 			break;
169 		}
170 	}
171 
172 	return (overlap);
173 }
174 
175 static int
gv_raid5_check(struct gv_plex * p,struct gv_raid5_packet * wp,struct bio * bp,caddr_t addr,off_t boff,off_t bcount)176 gv_raid5_check(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
177     caddr_t addr, off_t boff, off_t bcount)
178 {
179 	struct gv_sd *parity, *s;
180 	struct gv_bioq *bq;
181 	struct bio *cbp;
182 	int i, psdno;
183 	off_t real_len, real_off;
184 
185 	if (p == NULL || LIST_EMPTY(&p->subdisks))
186 		return (ENXIO);
187 
188 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1);
189 
190 	/* Find the right subdisk. */
191 	parity = NULL;
192 	i = 0;
193 	LIST_FOREACH(s, &p->subdisks, in_plex) {
194 		if (i == psdno) {
195 			parity = s;
196 			break;
197 		}
198 		i++;
199 	}
200 
201 	/* Parity stripe not found. */
202 	if (parity == NULL)
203 		return (ENXIO);
204 
205 	if (parity->state != GV_SD_UP)
206 		return (ENXIO);
207 
208 	wp->length = real_len;
209 	wp->data = addr;
210 	wp->lockbase = real_off;
211 
212 	/* Read all subdisks. */
213 	LIST_FOREACH(s, &p->subdisks, in_plex) {
214 		/* Skip the parity subdisk. */
215 		if (s == parity)
216 			continue;
217 		/* Skip growing subdisks. */
218 		if (s->flags & GV_SD_GROW)
219 			continue;
220 
221 		cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
222 		if (cbp == NULL)
223 			return (ENOMEM);
224 		cbp->bio_cmd = BIO_READ;
225 
226 		bioq_insert_tail(p->bqueue, cbp);
227 
228 		bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
229 		bq->bp = cbp;
230 		TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
231 	}
232 
233 	/* Read the parity data. */
234 	cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
235 	if (cbp == NULL)
236 		return (ENOMEM);
237 	cbp->bio_cmd = BIO_READ;
238 	wp->waiting = cbp;
239 
240 	/*
241 	 * In case we want to rebuild the parity, create an extra BIO to write
242 	 * it out.  It also acts as buffer for the XOR operations.
243 	 */
244 	cbp = gv_raid5_clone_bio(bp, parity, wp, addr, 1);
245 	if (cbp == NULL)
246 		return (ENOMEM);
247 	wp->parity = cbp;
248 
249 	return (0);
250 }
251 
252 /* Rebuild a degraded RAID5 plex. */
253 static int
gv_raid5_rebuild(struct gv_plex * p,struct gv_raid5_packet * wp,struct bio * bp,caddr_t addr,off_t boff,off_t bcount)254 gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
255     caddr_t addr, off_t boff, off_t bcount)
256 {
257 	struct gv_sd *broken, *s;
258 	struct gv_bioq *bq;
259 	struct bio *cbp;
260 	off_t real_len, real_off;
261 
262 	if (p == NULL || LIST_EMPTY(&p->subdisks))
263 		return (ENXIO);
264 
265 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1);
266 
267 	/* Find the right subdisk. */
268 	broken = NULL;
269 	LIST_FOREACH(s, &p->subdisks, in_plex) {
270 		if (s->state != GV_SD_UP)
271 			broken = s;
272 	}
273 
274 	/* Broken stripe not found. */
275 	if (broken == NULL)
276 		return (ENXIO);
277 
278 	switch (broken->state) {
279 	case GV_SD_UP:
280 		return (EINVAL);
281 
282 	case GV_SD_STALE:
283 		if (!(bp->bio_pflags & GV_BIO_REBUILD))
284 			return (ENXIO);
285 
286 		G_VINUM_DEBUG(1, "sd %s is reviving", broken->name);
287 		gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE);
288 		/* Set this bit now, but should be set at end. */
289 		broken->flags |= GV_SD_CANGOUP;
290 		break;
291 
292 	case GV_SD_REVIVING:
293 		break;
294 
295 	default:
296 		/* All other subdisk states mean it's not accessible. */
297 		return (ENXIO);
298 	}
299 
300 	wp->length = real_len;
301 	wp->data = addr;
302 	wp->lockbase = real_off;
303 
304 	KASSERT(wp->length >= 0, ("gv_rebuild_raid5: wp->length < 0"));
305 
306 	/* Read all subdisks. */
307 	LIST_FOREACH(s, &p->subdisks, in_plex) {
308 		/* Skip the broken subdisk. */
309 		if (s == broken)
310 			continue;
311 
312 		/* Skip growing subdisks. */
313 		if (s->flags & GV_SD_GROW)
314 			continue;
315 
316 		cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
317 		if (cbp == NULL)
318 			return (ENOMEM);
319 		cbp->bio_cmd = BIO_READ;
320 
321 		bioq_insert_tail(p->bqueue, cbp);
322 
323 		bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
324 		bq->bp = cbp;
325 		TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
326 	}
327 
328 	/* Write the parity data. */
329 	cbp = gv_raid5_clone_bio(bp, broken, wp, NULL, 1);
330 	if (cbp == NULL)
331 		return (ENOMEM);
332 	wp->parity = cbp;
333 
334 	p->synced = boff;
335 
336 	/* Post notification that we're finished. */
337 	return (0);
338 }
339 
340 /* Build a request group to perform (part of) a RAID5 request. */
341 static int
gv_raid5_request(struct gv_plex * p,struct gv_raid5_packet * wp,struct bio * bp,caddr_t addr,off_t boff,off_t bcount,int * delay)342 gv_raid5_request(struct gv_plex *p, struct gv_raid5_packet *wp,
343     struct bio *bp, caddr_t addr, off_t boff, off_t bcount, int *delay)
344 {
345 	struct gv_sd *broken, *original, *parity, *s;
346 	struct gv_bioq *bq;
347 	struct bio *cbp;
348 	int i, psdno, sdno, type, grow;
349 	off_t real_len, real_off;
350 
351 	if (p == NULL || LIST_EMPTY(&p->subdisks))
352 		return (ENXIO);
353 
354 	/* We are optimistic and assume that this request will be OK. */
355 #define	REQ_TYPE_NORMAL		0
356 #define	REQ_TYPE_DEGRADED	1
357 #define	REQ_TYPE_NOPARITY	2
358 
359 	type = REQ_TYPE_NORMAL;
360 	original = parity = broken = NULL;
361 
362 	/* XXX: The resize won't crash with rebuild or sync, but we should still
363 	 * be aware of it. Also this should perhaps be done on rebuild/check as
364 	 * well?
365 	 */
366 	/* If we're over, we must use the old. */
367 	if (boff >= p->synced) {
368 		grow = 1;
369 	/* Or if over the resized offset, we use all drives. */
370 	} else if (boff + bcount <= p->synced) {
371 		grow = 0;
372 	/* Else, we're in the middle, and must wait a bit. */
373 	} else {
374 		bioq_disksort(p->rqueue, bp);
375 		*delay = 1;
376 		return (0);
377 	}
378 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len,
379 	    &sdno, &psdno, grow);
380 
381 	/* Find the right subdisks. */
382 	i = 0;
383 	LIST_FOREACH(s, &p->subdisks, in_plex) {
384 		if (i == sdno)
385 			original = s;
386 		if (i == psdno)
387 			parity = s;
388 		if (s->state != GV_SD_UP)
389 			broken = s;
390 		i++;
391 	}
392 
393 	if ((original == NULL) || (parity == NULL))
394 		return (ENXIO);
395 
396 	/* Our data stripe is missing. */
397 	if (original->state != GV_SD_UP)
398 		type = REQ_TYPE_DEGRADED;
399 
400 	/* If synchronizing request, just write it if disks are stale. */
401 	if (original->state == GV_SD_STALE && parity->state == GV_SD_STALE &&
402 	    bp->bio_pflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
403 		type = REQ_TYPE_NORMAL;
404 	/* Our parity stripe is missing. */
405 	} else if (parity->state != GV_SD_UP) {
406 		/* We cannot take another failure if we're already degraded. */
407 		if (type != REQ_TYPE_NORMAL)
408 			return (ENXIO);
409 		else
410 			type = REQ_TYPE_NOPARITY;
411 	}
412 
413 	wp->length = real_len;
414 	wp->data = addr;
415 	wp->lockbase = real_off;
416 
417 	KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
418 
419 	if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len < p->synced))
420 		type = REQ_TYPE_NORMAL;
421 
422 	if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len >= p->synced)) {
423 		bioq_disksort(p->rqueue, bp);
424 		*delay = 1;
425 		return (0);
426 	}
427 
428 	switch (bp->bio_cmd) {
429 	case BIO_READ:
430 		/*
431 		 * For a degraded read we need to read in all stripes except
432 		 * the broken one plus the parity stripe and then recalculate
433 		 * the desired data.
434 		 */
435 		if (type == REQ_TYPE_DEGRADED) {
436 			bzero(wp->data, wp->length);
437 			LIST_FOREACH(s, &p->subdisks, in_plex) {
438 				/* Skip the broken subdisk. */
439 				if (s == broken)
440 					continue;
441 				/* Skip growing if within offset. */
442 				if (grow && s->flags & GV_SD_GROW)
443 					continue;
444 				cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
445 				if (cbp == NULL)
446 					return (ENOMEM);
447 
448 				bioq_insert_tail(p->bqueue, cbp);
449 
450 				bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
451 				bq->bp = cbp;
452 				TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
453 			}
454 
455 		/* A normal read can be fulfilled with the original subdisk. */
456 		} else {
457 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 0);
458 			if (cbp == NULL)
459 				return (ENOMEM);
460 
461 			bioq_insert_tail(p->bqueue, cbp);
462 		}
463 		wp->lockbase = -1;
464 
465 		break;
466 
467 	case BIO_WRITE:
468 		/*
469 		 * A degraded write means we cannot write to the original data
470 		 * subdisk.  Thus we need to read in all valid stripes,
471 		 * recalculate the parity from the original data, and then
472 		 * write the parity stripe back out.
473 		 */
474 		if (type == REQ_TYPE_DEGRADED) {
475 			/* Read all subdisks. */
476 			LIST_FOREACH(s, &p->subdisks, in_plex) {
477 				/* Skip the broken and the parity subdisk. */
478 				if ((s == broken) || (s == parity))
479 					continue;
480 				/* Skip growing if within offset. */
481 				if (grow && s->flags & GV_SD_GROW)
482 					continue;
483 
484 				cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
485 				if (cbp == NULL)
486 					return (ENOMEM);
487 				cbp->bio_cmd = BIO_READ;
488 
489 				bioq_insert_tail(p->bqueue, cbp);
490 
491 				bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
492 				bq->bp = cbp;
493 				TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
494 			}
495 
496 			/* Write the parity data. */
497 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
498 			if (cbp == NULL)
499 				return (ENOMEM);
500 			bcopy(addr, cbp->bio_data, wp->length);
501 			wp->parity = cbp;
502 
503 		/*
504 		 * When the parity stripe is missing we just write out the data.
505 		 */
506 		} else if (type == REQ_TYPE_NOPARITY) {
507 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
508 			if (cbp == NULL)
509 				return (ENOMEM);
510 
511 			bioq_insert_tail(p->bqueue, cbp);
512 
513 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
514 			bq->bp = cbp;
515 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
516 
517 		/*
518 		 * A normal write request goes to the original subdisk, then we
519 		 * read in all other stripes, recalculate the parity and write
520 		 * out the parity again.
521 		 */
522 		} else {
523 			/* Read old parity. */
524 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
525 			if (cbp == NULL)
526 				return (ENOMEM);
527 			cbp->bio_cmd = BIO_READ;
528 
529 			bioq_insert_tail(p->bqueue, cbp);
530 
531 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
532 			bq->bp = cbp;
533 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
534 
535 			/* Read old data. */
536 			cbp = gv_raid5_clone_bio(bp, original, wp, NULL, 1);
537 			if (cbp == NULL)
538 				return (ENOMEM);
539 			cbp->bio_cmd = BIO_READ;
540 
541 			bioq_insert_tail(p->bqueue, cbp);
542 
543 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
544 			bq->bp = cbp;
545 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
546 
547 			/* Write new data. */
548 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
549 			if (cbp == NULL)
550 				return (ENOMEM);
551 
552 			/*
553 			 * We must not write the new data until the old data
554 			 * was read, so hold this BIO back until we're ready
555 			 * for it.
556 			 */
557 			wp->waiting = cbp;
558 
559 			/* The final bio for the parity. */
560 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
561 			if (cbp == NULL)
562 				return (ENOMEM);
563 
564 			/* Remember that this is the BIO for the parity data. */
565 			wp->parity = cbp;
566 		}
567 		break;
568 
569 	default:
570 		return (EINVAL);
571 	}
572 
573 	return (0);
574 }
575 
576 /*
577  * Calculate the offsets in the various subdisks for a RAID5 request. Also take
578  * care of new subdisks in an expanded RAID5 array.
579  * XXX: This assumes that the new subdisks are inserted after the others (which
580  * is okay as long as plex_offset is larger). If subdisks are inserted into the
581  * plexlist before, we get problems.
582  */
583 static int
gv_raid5_offset(struct gv_plex * p,off_t boff,off_t bcount,off_t * real_off,off_t * real_len,int * sdno,int * psdno,int growing)584 gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
585     off_t *real_len, int *sdno, int *psdno, int growing)
586 {
587 	struct gv_sd *s;
588 	int sd, psd, sdcount;
589 	off_t len_left, stripeend, stripeoff, stripestart;
590 
591 	sdcount = p->sdcount;
592 	if (growing) {
593 		LIST_FOREACH(s, &p->subdisks, in_plex) {
594 			if (s->flags & GV_SD_GROW)
595 				sdcount--;
596 		}
597 	}
598 
599 	/* The number of the subdisk containing the parity stripe. */
600 	psd = sdcount - 1 - ( boff / (p->stripesize * (sdcount - 1))) %
601 	    sdcount;
602 	KASSERT(psd >= 0, ("gv_raid5_offset: psdno < 0"));
603 
604 	/* Offset of the start address from the start of the stripe. */
605 	stripeoff = boff % (p->stripesize * (sdcount - 1));
606 	KASSERT(stripeoff >= 0, ("gv_raid5_offset: stripeoff < 0"));
607 
608 	/* The number of the subdisk where the stripe resides. */
609 	sd = stripeoff / p->stripesize;
610 	KASSERT(sd >= 0, ("gv_raid5_offset: sdno < 0"));
611 
612 	/* At or past parity subdisk. */
613 	if (sd >= psd)
614 		sd++;
615 
616 	/* The offset of the stripe on this subdisk. */
617 	stripestart = (boff - stripeoff) / (sdcount - 1);
618 	KASSERT(stripestart >= 0, ("gv_raid5_offset: stripestart < 0"));
619 
620 	stripeoff %= p->stripesize;
621 
622 	/* The offset of the request on this subdisk. */
623 	*real_off = stripestart + stripeoff;
624 
625 	stripeend = stripestart + p->stripesize;
626 	len_left = stripeend - *real_off;
627 	KASSERT(len_left >= 0, ("gv_raid5_offset: len_left < 0"));
628 
629 	*real_len = (bcount <= len_left) ? bcount : len_left;
630 
631 	if (sdno != NULL)
632 		*sdno = sd;
633 	if (psdno != NULL)
634 		*psdno = psd;
635 
636 	return (0);
637 }
638 
639 static struct bio *
gv_raid5_clone_bio(struct bio * bp,struct gv_sd * s,struct gv_raid5_packet * wp,caddr_t addr,int use_wp)640 gv_raid5_clone_bio(struct bio *bp, struct gv_sd *s, struct gv_raid5_packet *wp,
641     caddr_t addr, int use_wp)
642 {
643 	struct bio *cbp;
644 
645 	cbp = g_clone_bio(bp);
646 	if (cbp == NULL)
647 		return (NULL);
648 	if (addr == NULL) {
649 		cbp->bio_data = g_malloc(wp->length, M_WAITOK | M_ZERO);
650 		cbp->bio_cflags |= GV_BIO_MALLOC;
651 	} else
652 		cbp->bio_data = addr;
653 	cbp->bio_offset = wp->lockbase + s->drive_offset;
654 	cbp->bio_length = wp->length;
655 	cbp->bio_done = gv_done;
656 	cbp->bio_caller1 = s;
657 	s->drive_sc->active++;
658 	if (use_wp)
659 		cbp->bio_caller2 = wp;
660 
661 	return (cbp);
662 }
663