xref: /freebsd/sys/geom/vinum/geom_vinum_raid5.c (revision b00ab754)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004, 2007 Lukas Ertl
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/bio.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/systm.h>
37 
38 #include <geom/geom.h>
39 #include <geom/vinum/geom_vinum_var.h>
40 #include <geom/vinum/geom_vinum_raid5.h>
41 #include <geom/vinum/geom_vinum.h>
42 
43 static int		gv_raid5_offset(struct gv_plex *, off_t, off_t,
44 			    off_t *, off_t *, int *, int *, int);
45 static struct bio *	gv_raid5_clone_bio(struct bio *, struct gv_sd *,
46 			    struct gv_raid5_packet *, caddr_t, int);
47 static int	gv_raid5_request(struct gv_plex *, struct gv_raid5_packet *,
48 		    struct bio *, caddr_t, off_t, off_t, int *);
49 static int	gv_raid5_check(struct gv_plex *, struct gv_raid5_packet *,
50 		    struct bio *, caddr_t, off_t, off_t);
51 static int	gv_raid5_rebuild(struct gv_plex *, struct gv_raid5_packet *,
52 		    struct bio *, caddr_t, off_t, off_t);
53 
54 struct gv_raid5_packet *
55 gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
56     off_t bcount)
57 {
58 	struct bio *cbp;
59 	struct gv_raid5_packet *wp, *wp2;
60 	struct gv_bioq *bq, *bq2;
61 	int err, delay;
62 
63 	delay = 0;
64 	wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
65 	wp->bio = bp;
66 	wp->waiting = NULL;
67 	wp->parity = NULL;
68 	TAILQ_INIT(&wp->bits);
69 
70 	if (bp->bio_pflags & GV_BIO_REBUILD)
71 		err = gv_raid5_rebuild(p, wp, bp, addr, boff, bcount);
72 	else if (bp->bio_pflags & GV_BIO_CHECK)
73 		err = gv_raid5_check(p, wp, bp, addr, boff, bcount);
74 	else
75 		err = gv_raid5_request(p, wp, bp, addr, boff, bcount, &delay);
76 
77 	/* Means we have a delayed request. */
78 	if (delay) {
79 		g_free(wp);
80 		return (NULL);
81 	}
82 
83 	/*
84 	 * Building the sub-request failed, we probably need to clean up a lot.
85 	 */
86 	if (err) {
87 		G_VINUM_LOGREQ(0, bp, "raid5 plex request failed.");
88 		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
89 			TAILQ_REMOVE(&wp->bits, bq, queue);
90 			g_free(bq);
91 		}
92 		if (wp->waiting != NULL) {
93 			if (wp->waiting->bio_cflags & GV_BIO_MALLOC)
94 				g_free(wp->waiting->bio_data);
95 			g_destroy_bio(wp->waiting);
96 		}
97 		if (wp->parity != NULL) {
98 			if (wp->parity->bio_cflags & GV_BIO_MALLOC)
99 				g_free(wp->parity->bio_data);
100 			g_destroy_bio(wp->parity);
101 		}
102 		g_free(wp);
103 
104 		TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
105 			if (wp->bio != bp)
106 				continue;
107 
108 			TAILQ_REMOVE(&p->packets, wp, list);
109 			TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
110 				TAILQ_REMOVE(&wp->bits, bq, queue);
111 				g_free(bq);
112 			}
113 			g_free(wp);
114 		}
115 
116 		cbp = bioq_takefirst(p->bqueue);
117 		while (cbp != NULL) {
118 			if (cbp->bio_cflags & GV_BIO_MALLOC)
119 				g_free(cbp->bio_data);
120 			g_destroy_bio(cbp);
121 			cbp = bioq_takefirst(p->bqueue);
122 		}
123 
124 		/* If internal, stop and reset state. */
125 		if (bp->bio_pflags & GV_BIO_INTERNAL) {
126 			if (bp->bio_pflags & GV_BIO_MALLOC)
127 				g_free(bp->bio_data);
128 			g_destroy_bio(bp);
129 			/* Reset flags. */
130 			p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
131 			    GV_PLEX_GROWING);
132 			return (NULL);
133 		}
134 		g_io_deliver(bp, err);
135 		return (NULL);
136 	}
137 
138 	return (wp);
139 }
140 
141 /*
142  * Check if the stripe that the work packet wants is already being used by
143  * some other work packet.
144  */
145 int
146 gv_stripe_active(struct gv_plex *p, struct bio *bp)
147 {
148 	struct gv_raid5_packet *wp, *owp;
149 	int overlap;
150 
151 	wp = bp->bio_caller2;
152 	if (wp->lockbase == -1)
153 		return (0);
154 
155 	overlap = 0;
156 	TAILQ_FOREACH(owp, &p->packets, list) {
157 		if (owp == wp)
158 			break;
159 		if ((wp->lockbase >= owp->lockbase) &&
160 		    (wp->lockbase <= owp->lockbase + owp->length)) {
161 			overlap++;
162 			break;
163 		}
164 		if ((wp->lockbase <= owp->lockbase) &&
165 		    (wp->lockbase + wp->length >= owp->lockbase)) {
166 			overlap++;
167 			break;
168 		}
169 	}
170 
171 	return (overlap);
172 }
173 
174 static int
175 gv_raid5_check(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
176     caddr_t addr, off_t boff, off_t bcount)
177 {
178 	struct gv_sd *parity, *s;
179 	struct gv_bioq *bq;
180 	struct bio *cbp;
181 	int i, psdno;
182 	off_t real_len, real_off;
183 
184 	if (p == NULL || LIST_EMPTY(&p->subdisks))
185 		return (ENXIO);
186 
187 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1);
188 
189 	/* Find the right subdisk. */
190 	parity = NULL;
191 	i = 0;
192 	LIST_FOREACH(s, &p->subdisks, in_plex) {
193 		if (i == psdno) {
194 			parity = s;
195 			break;
196 		}
197 		i++;
198 	}
199 
200 	/* Parity stripe not found. */
201 	if (parity == NULL)
202 		return (ENXIO);
203 
204 	if (parity->state != GV_SD_UP)
205 		return (ENXIO);
206 
207 	wp->length = real_len;
208 	wp->data = addr;
209 	wp->lockbase = real_off;
210 
211 	/* Read all subdisks. */
212 	LIST_FOREACH(s, &p->subdisks, in_plex) {
213 		/* Skip the parity subdisk. */
214 		if (s == parity)
215 			continue;
216 		/* Skip growing subdisks. */
217 		if (s->flags & GV_SD_GROW)
218 			continue;
219 
220 		cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
221 		if (cbp == NULL)
222 			return (ENOMEM);
223 		cbp->bio_cmd = BIO_READ;
224 
225 		bioq_insert_tail(p->bqueue, cbp);
226 
227 		bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
228 		bq->bp = cbp;
229 		TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
230 	}
231 
232 	/* Read the parity data. */
233 	cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
234 	if (cbp == NULL)
235 		return (ENOMEM);
236 	cbp->bio_cmd = BIO_READ;
237 	wp->waiting = cbp;
238 
239 	/*
240 	 * In case we want to rebuild the parity, create an extra BIO to write
241 	 * it out.  It also acts as buffer for the XOR operations.
242 	 */
243 	cbp = gv_raid5_clone_bio(bp, parity, wp, addr, 1);
244 	if (cbp == NULL)
245 		return (ENOMEM);
246 	wp->parity = cbp;
247 
248 	return (0);
249 }
250 
251 /* Rebuild a degraded RAID5 plex. */
252 static int
253 gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
254     caddr_t addr, off_t boff, off_t bcount)
255 {
256 	struct gv_sd *broken, *s;
257 	struct gv_bioq *bq;
258 	struct bio *cbp;
259 	off_t real_len, real_off;
260 
261 	if (p == NULL || LIST_EMPTY(&p->subdisks))
262 		return (ENXIO);
263 
264 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1);
265 
266 	/* Find the right subdisk. */
267 	broken = NULL;
268 	LIST_FOREACH(s, &p->subdisks, in_plex) {
269 		if (s->state != GV_SD_UP)
270 			broken = s;
271 	}
272 
273 	/* Broken stripe not found. */
274 	if (broken == NULL)
275 		return (ENXIO);
276 
277 	switch (broken->state) {
278 	case GV_SD_UP:
279 		return (EINVAL);
280 
281 	case GV_SD_STALE:
282 		if (!(bp->bio_pflags & GV_BIO_REBUILD))
283 			return (ENXIO);
284 
285 		G_VINUM_DEBUG(1, "sd %s is reviving", broken->name);
286 		gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE);
287 		/* Set this bit now, but should be set at end. */
288 		broken->flags |= GV_SD_CANGOUP;
289 		break;
290 
291 	case GV_SD_REVIVING:
292 		break;
293 
294 	default:
295 		/* All other subdisk states mean it's not accessible. */
296 		return (ENXIO);
297 	}
298 
299 	wp->length = real_len;
300 	wp->data = addr;
301 	wp->lockbase = real_off;
302 
303 	KASSERT(wp->length >= 0, ("gv_rebuild_raid5: wp->length < 0"));
304 
305 	/* Read all subdisks. */
306 	LIST_FOREACH(s, &p->subdisks, in_plex) {
307 		/* Skip the broken subdisk. */
308 		if (s == broken)
309 			continue;
310 
311 		/* Skip growing subdisks. */
312 		if (s->flags & GV_SD_GROW)
313 			continue;
314 
315 		cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
316 		if (cbp == NULL)
317 			return (ENOMEM);
318 		cbp->bio_cmd = BIO_READ;
319 
320 		bioq_insert_tail(p->bqueue, cbp);
321 
322 		bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
323 		bq->bp = cbp;
324 		TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
325 	}
326 
327 	/* Write the parity data. */
328 	cbp = gv_raid5_clone_bio(bp, broken, wp, NULL, 1);
329 	if (cbp == NULL)
330 		return (ENOMEM);
331 	wp->parity = cbp;
332 
333 	p->synced = boff;
334 
335 	/* Post notification that we're finished. */
336 	return (0);
337 }
338 
339 /* Build a request group to perform (part of) a RAID5 request. */
340 static int
341 gv_raid5_request(struct gv_plex *p, struct gv_raid5_packet *wp,
342     struct bio *bp, caddr_t addr, off_t boff, off_t bcount, int *delay)
343 {
344 	struct g_geom *gp;
345 	struct gv_sd *broken, *original, *parity, *s;
346 	struct gv_bioq *bq;
347 	struct bio *cbp;
348 	int i, psdno, sdno, type, grow;
349 	off_t real_len, real_off;
350 
351 	gp = bp->bio_to->geom;
352 
353 	if (p == NULL || LIST_EMPTY(&p->subdisks))
354 		return (ENXIO);
355 
356 	/* We are optimistic and assume that this request will be OK. */
357 #define	REQ_TYPE_NORMAL		0
358 #define	REQ_TYPE_DEGRADED	1
359 #define	REQ_TYPE_NOPARITY	2
360 
361 	type = REQ_TYPE_NORMAL;
362 	original = parity = broken = NULL;
363 
364 	/* XXX: The resize won't crash with rebuild or sync, but we should still
365 	 * be aware of it. Also this should perhaps be done on rebuild/check as
366 	 * well?
367 	 */
368 	/* If we're over, we must use the old. */
369 	if (boff >= p->synced) {
370 		grow = 1;
371 	/* Or if over the resized offset, we use all drives. */
372 	} else if (boff + bcount <= p->synced) {
373 		grow = 0;
374 	/* Else, we're in the middle, and must wait a bit. */
375 	} else {
376 		bioq_disksort(p->rqueue, bp);
377 		*delay = 1;
378 		return (0);
379 	}
380 	gv_raid5_offset(p, boff, bcount, &real_off, &real_len,
381 	    &sdno, &psdno, grow);
382 
383 	/* Find the right subdisks. */
384 	i = 0;
385 	LIST_FOREACH(s, &p->subdisks, in_plex) {
386 		if (i == sdno)
387 			original = s;
388 		if (i == psdno)
389 			parity = s;
390 		if (s->state != GV_SD_UP)
391 			broken = s;
392 		i++;
393 	}
394 
395 	if ((original == NULL) || (parity == NULL))
396 		return (ENXIO);
397 
398 	/* Our data stripe is missing. */
399 	if (original->state != GV_SD_UP)
400 		type = REQ_TYPE_DEGRADED;
401 
402 	/* If synchronizing request, just write it if disks are stale. */
403 	if (original->state == GV_SD_STALE && parity->state == GV_SD_STALE &&
404 	    bp->bio_pflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
405 		type = REQ_TYPE_NORMAL;
406 	/* Our parity stripe is missing. */
407 	} else if (parity->state != GV_SD_UP) {
408 		/* We cannot take another failure if we're already degraded. */
409 		if (type != REQ_TYPE_NORMAL)
410 			return (ENXIO);
411 		else
412 			type = REQ_TYPE_NOPARITY;
413 	}
414 
415 	wp->length = real_len;
416 	wp->data = addr;
417 	wp->lockbase = real_off;
418 
419 	KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
420 
421 	if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len < p->synced))
422 		type = REQ_TYPE_NORMAL;
423 
424 	if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len >= p->synced)) {
425 		bioq_disksort(p->rqueue, bp);
426 		*delay = 1;
427 		return (0);
428 	}
429 
430 	switch (bp->bio_cmd) {
431 	case BIO_READ:
432 		/*
433 		 * For a degraded read we need to read in all stripes except
434 		 * the broken one plus the parity stripe and then recalculate
435 		 * the desired data.
436 		 */
437 		if (type == REQ_TYPE_DEGRADED) {
438 			bzero(wp->data, wp->length);
439 			LIST_FOREACH(s, &p->subdisks, in_plex) {
440 				/* Skip the broken subdisk. */
441 				if (s == broken)
442 					continue;
443 				/* Skip growing if within offset. */
444 				if (grow && s->flags & GV_SD_GROW)
445 					continue;
446 				cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
447 				if (cbp == NULL)
448 					return (ENOMEM);
449 
450 				bioq_insert_tail(p->bqueue, cbp);
451 
452 				bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
453 				bq->bp = cbp;
454 				TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
455 			}
456 
457 		/* A normal read can be fulfilled with the original subdisk. */
458 		} else {
459 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 0);
460 			if (cbp == NULL)
461 				return (ENOMEM);
462 
463 			bioq_insert_tail(p->bqueue, cbp);
464 		}
465 		wp->lockbase = -1;
466 
467 		break;
468 
469 	case BIO_WRITE:
470 		/*
471 		 * A degraded write means we cannot write to the original data
472 		 * subdisk.  Thus we need to read in all valid stripes,
473 		 * recalculate the parity from the original data, and then
474 		 * write the parity stripe back out.
475 		 */
476 		if (type == REQ_TYPE_DEGRADED) {
477 			/* Read all subdisks. */
478 			LIST_FOREACH(s, &p->subdisks, in_plex) {
479 				/* Skip the broken and the parity subdisk. */
480 				if ((s == broken) || (s == parity))
481 					continue;
482 				/* Skip growing if within offset. */
483 				if (grow && s->flags & GV_SD_GROW)
484 					continue;
485 
486 				cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
487 				if (cbp == NULL)
488 					return (ENOMEM);
489 				cbp->bio_cmd = BIO_READ;
490 
491 				bioq_insert_tail(p->bqueue, cbp);
492 
493 				bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
494 				bq->bp = cbp;
495 				TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
496 			}
497 
498 			/* Write the parity data. */
499 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
500 			if (cbp == NULL)
501 				return (ENOMEM);
502 			bcopy(addr, cbp->bio_data, wp->length);
503 			wp->parity = cbp;
504 
505 		/*
506 		 * When the parity stripe is missing we just write out the data.
507 		 */
508 		} else if (type == REQ_TYPE_NOPARITY) {
509 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
510 			if (cbp == NULL)
511 				return (ENOMEM);
512 
513 			bioq_insert_tail(p->bqueue, cbp);
514 
515 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
516 			bq->bp = cbp;
517 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
518 
519 		/*
520 		 * A normal write request goes to the original subdisk, then we
521 		 * read in all other stripes, recalculate the parity and write
522 		 * out the parity again.
523 		 */
524 		} else {
525 			/* Read old parity. */
526 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
527 			if (cbp == NULL)
528 				return (ENOMEM);
529 			cbp->bio_cmd = BIO_READ;
530 
531 			bioq_insert_tail(p->bqueue, cbp);
532 
533 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
534 			bq->bp = cbp;
535 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
536 
537 			/* Read old data. */
538 			cbp = gv_raid5_clone_bio(bp, original, wp, NULL, 1);
539 			if (cbp == NULL)
540 				return (ENOMEM);
541 			cbp->bio_cmd = BIO_READ;
542 
543 			bioq_insert_tail(p->bqueue, cbp);
544 
545 			bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
546 			bq->bp = cbp;
547 			TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
548 
549 			/* Write new data. */
550 			cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
551 			if (cbp == NULL)
552 				return (ENOMEM);
553 
554 			/*
555 			 * We must not write the new data until the old data
556 			 * was read, so hold this BIO back until we're ready
557 			 * for it.
558 			 */
559 			wp->waiting = cbp;
560 
561 			/* The final bio for the parity. */
562 			cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
563 			if (cbp == NULL)
564 				return (ENOMEM);
565 
566 			/* Remember that this is the BIO for the parity data. */
567 			wp->parity = cbp;
568 		}
569 		break;
570 
571 	default:
572 		return (EINVAL);
573 	}
574 
575 	return (0);
576 }
577 
578 /*
579  * Calculate the offsets in the various subdisks for a RAID5 request. Also take
580  * care of new subdisks in an expanded RAID5 array.
581  * XXX: This assumes that the new subdisks are inserted after the others (which
582  * is okay as long as plex_offset is larger). If subdisks are inserted into the
583  * plexlist before, we get problems.
584  */
585 static int
586 gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
587     off_t *real_len, int *sdno, int *psdno, int growing)
588 {
589 	struct gv_sd *s;
590 	int sd, psd, sdcount;
591 	off_t len_left, stripeend, stripeoff, stripestart;
592 
593 	sdcount = p->sdcount;
594 	if (growing) {
595 		LIST_FOREACH(s, &p->subdisks, in_plex) {
596 			if (s->flags & GV_SD_GROW)
597 				sdcount--;
598 		}
599 	}
600 
601 	/* The number of the subdisk containing the parity stripe. */
602 	psd = sdcount - 1 - ( boff / (p->stripesize * (sdcount - 1))) %
603 	    sdcount;
604 	KASSERT(psdno >= 0, ("gv_raid5_offset: psdno < 0"));
605 
606 	/* Offset of the start address from the start of the stripe. */
607 	stripeoff = boff % (p->stripesize * (sdcount - 1));
608 	KASSERT(stripeoff >= 0, ("gv_raid5_offset: stripeoff < 0"));
609 
610 	/* The number of the subdisk where the stripe resides. */
611 	sd = stripeoff / p->stripesize;
612 	KASSERT(sdno >= 0, ("gv_raid5_offset: sdno < 0"));
613 
614 	/* At or past parity subdisk. */
615 	if (sd >= psd)
616 		sd++;
617 
618 	/* The offset of the stripe on this subdisk. */
619 	stripestart = (boff - stripeoff) / (sdcount - 1);
620 	KASSERT(stripestart >= 0, ("gv_raid5_offset: stripestart < 0"));
621 
622 	stripeoff %= p->stripesize;
623 
624 	/* The offset of the request on this subdisk. */
625 	*real_off = stripestart + stripeoff;
626 
627 	stripeend = stripestart + p->stripesize;
628 	len_left = stripeend - *real_off;
629 	KASSERT(len_left >= 0, ("gv_raid5_offset: len_left < 0"));
630 
631 	*real_len = (bcount <= len_left) ? bcount : len_left;
632 
633 	if (sdno != NULL)
634 		*sdno = sd;
635 	if (psdno != NULL)
636 		*psdno = psd;
637 
638 	return (0);
639 }
640 
641 static struct bio *
642 gv_raid5_clone_bio(struct bio *bp, struct gv_sd *s, struct gv_raid5_packet *wp,
643     caddr_t addr, int use_wp)
644 {
645 	struct bio *cbp;
646 
647 	cbp = g_clone_bio(bp);
648 	if (cbp == NULL)
649 		return (NULL);
650 	if (addr == NULL) {
651 		cbp->bio_data = g_malloc(wp->length, M_WAITOK | M_ZERO);
652 		cbp->bio_cflags |= GV_BIO_MALLOC;
653 	} else
654 		cbp->bio_data = addr;
655 	cbp->bio_offset = wp->lockbase + s->drive_offset;
656 	cbp->bio_length = wp->length;
657 	cbp->bio_done = gv_done;
658 	cbp->bio_caller1 = s;
659 	if (use_wp)
660 		cbp->bio_caller2 = wp;
661 
662 	return (cbp);
663 }
664