173679edcSLukas Ertl /*- 2c0b9797aSUlf Lilleengen * Copyright (c) 2007 Lukas Ertl 373679edcSLukas Ertl * All rights reserved. 473679edcSLukas Ertl * 573679edcSLukas Ertl * Redistribution and use in source and binary forms, with or without 673679edcSLukas Ertl * modification, are permitted provided that the following conditions 773679edcSLukas Ertl * are met: 873679edcSLukas Ertl * 1. Redistributions of source code must retain the above copyright 973679edcSLukas Ertl * notice, this list of conditions and the following disclaimer. 1073679edcSLukas Ertl * 2. Redistributions in binary form must reproduce the above copyright 1173679edcSLukas Ertl * notice, this list of conditions and the following disclaimer in the 1273679edcSLukas Ertl * documentation and/or other materials provided with the distribution. 1373679edcSLukas Ertl * 1473679edcSLukas Ertl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1573679edcSLukas Ertl * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1673679edcSLukas Ertl * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1773679edcSLukas Ertl * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1873679edcSLukas Ertl * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1973679edcSLukas Ertl * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2073679edcSLukas Ertl * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2173679edcSLukas Ertl * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2273679edcSLukas Ertl * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2373679edcSLukas Ertl * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2473679edcSLukas Ertl * SUCH DAMAGE. 2573679edcSLukas Ertl */ 2673679edcSLukas Ertl 2773679edcSLukas Ertl #include <sys/cdefs.h> 2873679edcSLukas Ertl __FBSDID("$FreeBSD$"); 2973679edcSLukas Ertl 3073679edcSLukas Ertl #include <sys/param.h> 3173679edcSLukas Ertl #include <sys/bio.h> 3273679edcSLukas Ertl #include <sys/lock.h> 3373679edcSLukas Ertl #include <sys/malloc.h> 3473679edcSLukas Ertl #include <sys/systm.h> 3573679edcSLukas Ertl 3673679edcSLukas Ertl #include <geom/geom.h> 3773679edcSLukas Ertl #include <geom/vinum/geom_vinum_var.h> 3873679edcSLukas Ertl #include <geom/vinum/geom_vinum.h> 3973679edcSLukas Ertl 40c0b9797aSUlf Lilleengen void 41c0b9797aSUlf Lilleengen gv_volume_flush(struct gv_volume *v) 42c0b9797aSUlf Lilleengen { 43c0b9797aSUlf Lilleengen struct gv_softc *sc; 44c0b9797aSUlf Lilleengen struct bio *bp; 4567e3ab6eSLukas Ertl 46c0b9797aSUlf Lilleengen KASSERT(v != NULL, ("NULL v")); 47c0b9797aSUlf Lilleengen sc = v->vinumconf; 48c0b9797aSUlf Lilleengen KASSERT(sc != NULL, ("NULL sc")); 49c0b9797aSUlf Lilleengen 50c0b9797aSUlf Lilleengen bp = bioq_takefirst(v->wqueue); 51c0b9797aSUlf Lilleengen while (bp != NULL) { 52c0b9797aSUlf Lilleengen gv_volume_start(sc, bp); 53c0b9797aSUlf Lilleengen bp = bioq_takefirst(v->wqueue); 54c0b9797aSUlf Lilleengen } 55c0b9797aSUlf Lilleengen } 56c0b9797aSUlf Lilleengen 57c0b9797aSUlf Lilleengen void 58c0b9797aSUlf Lilleengen gv_volume_start(struct gv_softc *sc, struct bio *bp) 5973679edcSLukas Ertl { 6073679edcSLukas Ertl struct g_geom *gp; 6199b536d8SLukas Ertl struct gv_volume *v; 62c0b9797aSUlf Lilleengen struct gv_plex *p, *lp; 63c0b9797aSUlf Lilleengen int numwrites; 6473679edcSLukas Ertl 65c0b9797aSUlf Lilleengen gp = sc->geom; 66c0b9797aSUlf Lilleengen v = bp->bio_to->private; 67c0b9797aSUlf Lilleengen if (v == NULL || v->state != GV_VOL_UP) { 6867e3ab6eSLukas Ertl g_io_deliver(bp, ENXIO); 6967e3ab6eSLukas Ertl return; 7067e3ab6eSLukas Ertl } 7167e3ab6eSLukas Ertl 7267e3ab6eSLukas Ertl switch (bp->bio_cmd) { 7367e3ab6eSLukas Ertl case BIO_READ: 74d8688e11SLukas Ertl /* 75c0b9797aSUlf Lilleengen * Try to find a good plex where we can send the request to, 76c0b9797aSUlf Lilleengen * round-robin-style. The plex either has to be up, or it's a 77c0b9797aSUlf Lilleengen * degraded RAID5 plex. Check if we have delayed requests. Put 78c0b9797aSUlf Lilleengen * this request on the delayed queue if so. This makes sure that 79c0b9797aSUlf Lilleengen * we don't read old values. 80d8688e11SLukas Ertl */ 81c0b9797aSUlf Lilleengen if (bioq_first(v->wqueue) != NULL) { 82c0b9797aSUlf Lilleengen bioq_insert_tail(v->wqueue, bp); 83c0b9797aSUlf Lilleengen break; 84c0b9797aSUlf Lilleengen } 857ad68986SLukas Ertl lp = v->last_read_plex; 867ad68986SLukas Ertl if (lp == NULL) 877ad68986SLukas Ertl lp = LIST_FIRST(&v->plexes); 887ad68986SLukas Ertl p = LIST_NEXT(lp, in_volume); 897ad68986SLukas Ertl if (p == NULL) 907ad68986SLukas Ertl p = LIST_FIRST(&v->plexes); 91c0b9797aSUlf Lilleengen do { 92c0b9797aSUlf Lilleengen if (p == NULL) { 93c0b9797aSUlf Lilleengen p = lp; 94c0b9797aSUlf Lilleengen break; 95c0b9797aSUlf Lilleengen } 96d8688e11SLukas Ertl if ((p->state > GV_PLEX_DEGRADED) || 97d8688e11SLukas Ertl (p->state >= GV_PLEX_DEGRADED && 98d8688e11SLukas Ertl p->org == GV_PLEX_RAID5)) 9967e3ab6eSLukas Ertl break; 1007ad68986SLukas Ertl p = LIST_NEXT(p, in_volume); 101c0b9797aSUlf Lilleengen if (p == NULL) 102c0b9797aSUlf Lilleengen p = LIST_FIRST(&v->plexes); 1037ad68986SLukas Ertl } while (p != lp); 1047ad68986SLukas Ertl 105c0b9797aSUlf Lilleengen if ((p == NULL) || 1067ad68986SLukas Ertl (p->org == GV_PLEX_RAID5 && p->state < GV_PLEX_DEGRADED) || 107fdb9eda8SLukas Ertl (p->org != GV_PLEX_RAID5 && p->state <= GV_PLEX_DEGRADED)) { 108d8688e11SLukas Ertl g_io_deliver(bp, ENXIO); 109d8688e11SLukas Ertl return; 110d8688e11SLukas Ertl } 1117ad68986SLukas Ertl v->last_read_plex = p; 11267e3ab6eSLukas Ertl 113c0b9797aSUlf Lilleengen /* Hand it down to the plex logic. */ 114c0b9797aSUlf Lilleengen gv_plex_start(p, bp); 11567e3ab6eSLukas Ertl break; 11667e3ab6eSLukas Ertl 11767e3ab6eSLukas Ertl case BIO_WRITE: 11867e3ab6eSLukas Ertl case BIO_DELETE: 119c0b9797aSUlf Lilleengen /* Delay write-requests if any plex is synchronizing. */ 120c0b9797aSUlf Lilleengen LIST_FOREACH(p, &v->plexes, in_volume) { 121c0b9797aSUlf Lilleengen if (p->flags & GV_PLEX_SYNCING) { 122c0b9797aSUlf Lilleengen bioq_insert_tail(v->wqueue, bp); 123c0b9797aSUlf Lilleengen return; 124c0b9797aSUlf Lilleengen } 125c0b9797aSUlf Lilleengen } 126c0b9797aSUlf Lilleengen 127c0b9797aSUlf Lilleengen numwrites = 0; 128c0b9797aSUlf Lilleengen /* Give the BIO to each plex of this volume. */ 12967e3ab6eSLukas Ertl LIST_FOREACH(p, &v->plexes, in_volume) { 13067e3ab6eSLukas Ertl if (p->state < GV_PLEX_DEGRADED) 13167e3ab6eSLukas Ertl continue; 132c0b9797aSUlf Lilleengen gv_plex_start(p, bp); 133c0b9797aSUlf Lilleengen numwrites++; 1347ad68986SLukas Ertl } 135c0b9797aSUlf Lilleengen if (numwrites == 0) 136c0b9797aSUlf Lilleengen g_io_deliver(bp, ENXIO); 13767e3ab6eSLukas Ertl break; 13867e3ab6eSLukas Ertl } 13973679edcSLukas Ertl } 14073679edcSLukas Ertl 141c0b9797aSUlf Lilleengen void 142c0b9797aSUlf Lilleengen gv_bio_done(struct gv_softc *sc, struct bio *bp) 14373679edcSLukas Ertl { 14473679edcSLukas Ertl struct gv_volume *v; 14573679edcSLukas Ertl struct gv_plex *p; 146c0b9797aSUlf Lilleengen struct gv_sd *s; 14773679edcSLukas Ertl 148c0b9797aSUlf Lilleengen s = bp->bio_caller1; 149c0b9797aSUlf Lilleengen KASSERT(s != NULL, ("gv_bio_done: NULL s")); 150c0b9797aSUlf Lilleengen p = s->plex_sc; 151c0b9797aSUlf Lilleengen KASSERT(p != NULL, ("gv_bio_done: NULL p")); 152c0b9797aSUlf Lilleengen v = p->vol_sc; 153c0b9797aSUlf Lilleengen KASSERT(v != NULL, ("gv_bio_done: NULL v")); 15473679edcSLukas Ertl 155c0b9797aSUlf Lilleengen switch (p->org) { 156c0b9797aSUlf Lilleengen case GV_PLEX_CONCAT: 157c0b9797aSUlf Lilleengen case GV_PLEX_STRIPED: 158c0b9797aSUlf Lilleengen gv_plex_normal_done(p, bp); 159c0b9797aSUlf Lilleengen break; 160c0b9797aSUlf Lilleengen case GV_PLEX_RAID5: 161c0b9797aSUlf Lilleengen gv_plex_raid5_done(p, bp); 162c0b9797aSUlf Lilleengen break; 1634328802cSLukas Ertl } 1644328802cSLukas Ertl } 165