xref: /dragonfly/sys/dev/raid/vinum/vinumstate.c (revision f2c43266)
1 /*-
2  * Copyright (c) 1997, 1998, 1999
3  *	Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6  *
7  *  Written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by Nan Yang Computer
23  *      Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  *
40  * $Id: vinumstate.c,v 2.18 2000/05/10 07:30:50 grog Exp grog $
41  * $FreeBSD: src/sys/dev/vinum/vinumstate.c,v 1.28.2.2 2000/06/08 02:00:23 grog Exp $
42  */
43 
44 #include "vinumhdr.h"
45 #include "request.h"
46 
47 /* Update drive state */
48 /* Return 1 if the state changes, otherwise 0 */
49 int
50 set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
51 {
52     union daemoninfo di;
53     struct drive *drive = &DRIVE[driveno];
54     int oldstate = drive->state;
55     int sdno;
56 
57     if (drive->state == drive_unallocated)		    /* no drive to do anything with, */
58 	return 0;
59 
60     if (newstate == oldstate)				    /* don't change it if it's not different */
61 	return 1;					    /* all OK */
62     if ((newstate == drive_down)			    /* the drive's going down */
63     &&(!(flags & setstate_force))
64 	&& (drive->opencount != 0))			    /* we can't do it */
65 	return 0;					    /* don't do it */
66     drive->state = newstate;				    /* set the state */
67     if (drive->label.name[0] != '\0')			    /* we have a name, */
68 	log(LOG_INFO,
69 	    "vinum: drive %s is %s\n",
70 	    drive->label.name,
71 	    drive_state(drive->state));
72     if (drive->state != oldstate) {			    /* state has changed */
73 	for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
74 	    if ((SD[sdno].state >= sd_referenced)
75 		&& (SD[sdno].driveno == driveno))	    /* belongs to this drive */
76 		update_sd_state(sdno);			    /* update the state */
77 	}
78     }
79     if (newstate == drive_up) {				    /* want to bring it up */
80 	if ((drive->flags & VF_OPEN) == 0)		    /* should be open, but we're not */
81 	    init_drive(drive, 1);			    /* which changes the state again */
82     } else {						    /* taking it down or worse */
83 	di.drive = drive;
84 	queue_daemon_request(daemonrq_closedrive, di);	    /* get the daemon to close it */
85     }
86     if ((flags & setstate_configuring) == 0)		    /* configuring? */
87 	save_config();					    /* no: save the updated configuration now */
88     return 1;
89 }
90 
91 /*
92  * Try to set the subdisk state.  Return 1 if state changed to
93  * what we wanted, -1 if it changed to something else, and 0
94  * if no change.
95  *
96  * This routine is called both from the user (up, down states only)
97  * and internally.
98  *
99  * The setstate_force bit in the flags enables the state change even
100  * if it could be dangerous to data consistency.  It shouldn't allow
101  * nonsense.
102  */
103 int
104 set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
105 {
106     struct sd *sd = &SD[sdno];
107     struct plex *plex;
108     struct volume *vol;
109     int oldstate = sd->state;
110     int status = 1;					    /* status to return */
111 
112     if (newstate == oldstate)				    /* already there, */
113 	return 1;
114     else if (sd->state == sd_unallocated)		    /* no subdisk to do anything with, */
115 	return 0;					    /* can't do it */
116 
117     if (sd->driveoffset < 0) {				    /* not allocated space */
118 	sd->state = sd_down;
119 	if (newstate != sd_down) {
120 	    if (sd->plexno >= 0)
121 		sdstatemap(&PLEX[sd->plexno]);		    /* count up subdisks */
122 	    return -1;
123 	}
124     } else {						    /* space allocated */
125 	switch (newstate) {
126 	case sd_down:					    /* take it down? */
127 	    /*
128 	     * If we're attached to a plex, and we're
129 	     * not reborn, we won't go down without
130 	     * use of force.
131 	     */
132 	    if (!(flags & setstate_force)
133 		&& (sd->plexno >= 0)
134 		&& (sd->state != sd_reborn))
135 		return 0;				    /* don't do it */
136 	    break;
137 
138 	case sd_initialized:
139 	    if ((sd->state == sd_initializing)		    /* we were initializing */
140 	    ||(flags & setstate_force))			    /* or we forced it */
141 		break;
142 	    return 0;					    /* can't do it otherwise */
143 
144 	case sd_up:
145 	    if (DRIVE[sd->driveno].state != drive_up)	    /* can't bring the sd up if the drive isn't, */
146 		return 0;				    /* not even by force */
147 	    if (flags & setstate_force)			    /* forcing it, */
148 		break;					    /* just do it, and damn the consequences */
149 	    switch (sd->state) {
150 		/*
151 		 * Perform the necessary tests.  To allow
152 		 * the state transition, just break out of
153 		 * the switch.
154 		 */
155 	    case sd_crashed:
156 	    case sd_reborn:
157 	    case sd_down:				    /* been down, no data lost */
158 		/*
159 		 * If we're associated with a plex, and
160 		 * the plex isn't up, or we're the only
161 		 * subdisk in the plex, we can do it.
162 		 */
163 		if ((sd->plexno >= 0)
164 		    && (((PLEX[sd->plexno].state < plex_firstup)
165 			    || (PLEX[sd->plexno].subdisks > 1))))
166 		    break;				    /* do it */
167 		if (oldstate != sd_reborn) {
168 		    sd->state = sd_reborn;		    /* here it is again */
169 		    log(LOG_INFO,
170 			"vinum: %s is %s, not %s\n",
171 			sd->name,
172 			sd_state(sd->state),
173 			sd_state(newstate));
174 		}
175 		status = -1;
176 		break;
177 
178 	    case sd_init:				    /* brand new */
179 		if (flags & setstate_configuring)	    /* we're doing this while configuring */
180 		    break;
181 		/* otherwise it's like being empty */
182 		/* FALLTHROUGH */
183 
184 	    case sd_empty:
185 	    case sd_initialized:
186 		/*
187 		 * If we're not part of a plex, or the
188 		 * plex is not part of a volume with other
189 		 * plexes which are up, we can come up
190 		 * without being inconsistent.
191 		 *
192 		 * If we're part of a parity plex, we'll
193 		 * come up if the caller uses force.  This
194 		 * is the way we bring them up after
195 		 * initialization.
196 		 */
197 		if ((sd->plexno < 0)
198 		    || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0)
199 		    || (isparity((&PLEX[sd->plexno]))
200 			&& (flags & setstate_force)))
201 		    break;
202 
203 		/* Otherwise it's just out of date */
204 		/* FALLTHROUGH */
205 
206 	    case sd_stale:				    /* out of date info, need reviving */
207 	    case sd_obsolete:
208 		/*
209 
210 		 * 1.  If the subdisk is not part of a
211 		 *     plex, bring it up, don't revive.
212 		 *
213 		 * 2.  If the subdisk is part of a
214 		 *     one-plex volume or an unattached
215 		 *     plex, and it's not RAID-4 or
216 		 *     RAID-5, we *can't revive*.  The
217 		 *     subdisk doesn't change its state.
218 		 *
219 		 * 3.  If the subdisk is part of a
220 		 *     one-plex volume or an unattached
221 		 *     plex, and it's RAID-4 or RAID-5,
222 		 *     but more than one subdisk is down,
223 		 *     we *still can't revive*.  The
224 		 *     subdisk doesn't change its state.
225 		 *
226 		 * 4.  If the subdisk is part of a
227 		 *     multi-plex volume, we'll change to
228 		 *     reviving and let the revive
229 		 *     routines find out whether it will
230 		 *     work or not.  If they don't, the
231 		 *     revive stops with an error message,
232 		 *     but the state doesn't change
233 		 *     (FWIW).
234 		 */
235 		if (sd->plexno < 0)			    /* no plex associated, */
236 		    break;				    /* bring it up */
237 		plex = &PLEX[sd->plexno];
238 		if (plex->volno >= 0)			    /* have a volume */
239 		    vol = &VOL[plex->volno];
240 		else
241 		    vol = NULL;
242 		/*
243 		 * We can't do it if:
244 		 *
245 		 * 1: we don't have a volume
246 		 * 2: we're the only plex in the volume
247 		 * 3: we're a RAID-4 or RAID-5 plex, and
248 		 *    more than one subdisk is down.
249 		 */
250 		if (((vol == NULL)
251 			|| (vol->plexes == 1))
252 		    && ((!isparity(plex))
253 			|| (plex->sddowncount > 1))) {
254 		    if (sd->state == sd_initializing)	    /* it's finished initializing  */
255 			sd->state = sd_initialized;
256 		    else
257 			return 0;			    /* can't do it */
258 		} else {
259 		    sd->state = sd_reviving;		    /* put in reviving state */
260 		    sd->revived = 0;			    /* nothing done yet */
261 		    status = EAGAIN;			    /* need to repeat */
262 		}
263 		break;
264 
265 	    case sd_reviving:
266 		if (flags & setstate_force)		    /* insist, */
267 		    break;
268 		return EAGAIN;				    /* no, try again */
269 
270 	    default:					    /* can't do it */
271 		/*
272 		 * There's no way to bring subdisks up directly from
273 		 * other states.  First they need to be initialized
274 		 * or revived.
275 		 */
276 		return 0;
277 	    }
278 	    break;
279 
280 	default:					    /* other ones, only internal with force */
281 	    if ((flags & setstate_force) == 0)		    /* no force?  What's this? */
282 		return 0;				    /* don't do it */
283 	}
284     }
285     if (status == 1) {					    /* we can do it, */
286 	sd->state = newstate;
287 	if (flags & setstate_force)
288 	    log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state));
289 	else
290 	    log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
291     } else						    /* we don't get here with status 0 */
292 	log(LOG_INFO,
293 	    "vinum: %s is %s, not %s\n",
294 	    sd->name,
295 	    sd_state(sd->state),
296 	    sd_state(newstate));
297     if (sd->plexno >= 0)				    /* we belong to a plex */
298 	update_plex_state(sd->plexno);			    /* update plex state */
299     if ((flags & setstate_configuring) == 0)		    /* save config now */
300 	save_config();
301     return status;
302 }
303 
304 /*
305  * Set the state of a plex dependent on its subdisks.
306  * This time round, we'll let plex state just reflect
307  * aggregate subdisk state, so this becomes an order of
308  * magnitude less complicated.  In particular, ignore
309  * the requested state.
310  */
311 int
312 set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
313 {
314     struct plex *plex;					    /* point to our plex */
315     enum plexstate oldstate;
316     enum volplexstate vps;				    /* how do we compare with the other plexes? */
317 
318     plex = &PLEX[plexno];				    /* point to our plex */
319     oldstate = plex->state;
320 
321     /* If the plex isn't allocated, we can't do it. */
322     if (plex->state == plex_unallocated)
323 	return 0;
324 
325     /*
326      * If it's already in the the state we want,
327      * and it's not up, just return.  If it's up,
328      * we still need to do some housekeeping.
329      */
330     if ((state == oldstate)
331 	&& (state != plex_up))
332 	return 1;
333     vps = vpstate(plex);				    /* how do we compare with the other plexes? */
334     switch (state) {
335 	/*
336 	 * We can't bring the plex up, even by force,
337 	 * unless it's ready.  update_plex_state
338 	 * checks that.
339 	 */
340     case plex_up:					    /* bring the plex up */
341 	update_plex_state(plex->plexno);		    /* it'll come up if it can */
342 	break;
343 
344     case plex_down:					    /* want to take it down */
345 	/*
346 	 * If we're the only one, or the only one
347 	 * which is up, we need force to do it.
348 	 */
349 	if (((vps == volplex_onlyus)
350 		|| (vps == volplex_onlyusup))
351 	    && (!(flags & setstate_force)))
352 	    return 0;					    /* can't do it */
353 	plex->state = state;				    /* do it */
354 	invalidate_subdisks(plex, sd_down);		    /* and down all up subdisks */
355 	break;
356 
357 	/*
358 	 * This is only requested internally.
359 	 * Trust ourselves
360 	 */
361     case plex_faulty:
362 	plex->state = state;				    /* do it */
363 	invalidate_subdisks(plex, sd_crashed);		    /* and crash all up subdisks */
364 	break;
365 
366     case plex_initializing:
367 	/* XXX consider what safeguards we need here */
368 	if ((flags & setstate_force) == 0)
369 	    return 0;
370 	plex->state = state;				    /* do it */
371 	break;
372 
373 	/* What's this? */
374     default:
375 	return 0;
376     }
377     if (plex->state != oldstate)			    /* we've changed, */
378 	log(LOG_INFO,					    /* tell them about it */
379 	    "vinum: %s is %s\n",
380 	    plex->name,
381 	    plex_state(plex->state));
382     /*
383      * Now see what we have left, and whether
384      * we're taking the volume down
385      */
386     if (plex->volno >= 0)				    /* we have a volume */
387 	update_volume_state(plex->volno);		    /* update its state */
388     if ((flags & setstate_configuring) == 0)		    /* save config now */
389 	save_config();					    /* yes: save the updated configuration */
390     return 1;
391 }
392 
393 /* Update the state of a plex dependent on its plexes. */
394 int
395 set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
396 {
397     struct volume *vol = &VOL[volno];			    /* point to our volume */
398 
399     if (vol->state == volume_unallocated)		    /* no volume to do anything with, */
400 	return 0;
401     if (vol->state == state)				    /* we're there already */
402 	return 1;
403 
404     if (state == volume_up)				    /* want to come up */
405 	update_volume_state(volno);
406     else if (state == volume_down) {			    /* want to go down */
407 	if (((vol->flags & VF_OPEN) == 0)		    /* not open */
408 	||((flags & setstate_force) != 0)) {		    /* or we're forcing */
409 	    vol->state = volume_down;
410 	    log(LOG_INFO,
411 		"vinum: volume %s is %s\n",
412 		vol->name,
413 		volume_state(vol->state));
414 	    if ((flags & setstate_configuring) == 0)	    /* save config now */
415 		save_config();				    /* yes: save the updated configuration */
416 	    return 1;
417 	}
418     }
419     return 0;						    /* no change */
420 }
421 
422 /* Set the state of a subdisk based on its environment */
423 void
424 update_sd_state(int sdno)
425 {
426     struct sd *sd;
427     struct drive *drive;
428     enum sdstate oldstate;
429 
430     sd = &SD[sdno];
431     oldstate = sd->state;
432     drive = &DRIVE[sd->driveno];
433 
434     if (drive->state == drive_up) {
435 	switch (sd->state) {
436 	case sd_down:
437 	case sd_crashed:
438 	    sd->state = sd_reborn;			    /* back up again with no loss */
439 	    break;
440 
441 	default:
442 	    break;
443 	}
444     } else {						    /* down or worse */
445 	switch (sd->state) {
446 	case sd_up:
447 	case sd_reborn:
448 	case sd_reviving:
449 	case sd_empty:
450 	    sd->state = sd_crashed;			    /* lost our drive */
451 	    break;
452 
453 	default:
454 	    break;
455 	}
456     }
457     if (sd->state != oldstate)				    /* state has changed, */
458 	log(LOG_INFO,					    /* say so */
459 	    "vinum: %s is %s\n",
460 	    sd->name,
461 	    sd_state(sd->state));
462     if (sd->plexno >= 0)				    /* we're part of a plex, */
463 	update_plex_state(sd->plexno);			    /* update its state */
464 }
465 
466 /*
467  * Force a plex and all its subdisks
468  * into an 'up' state.  This is a helper
469  * for update_plex_state.
470  */
471 void
472 forceup(int plexno)
473 {
474     struct plex *plex;
475     int sdno;
476 
477     plex = &PLEX[plexno];				    /* point to the plex */
478     plex->state = plex_up;				    /* and bring it up */
479 
480     /* change the subdisks to up state */
481     for (sdno = 0; sdno < plex->subdisks; sdno++) {
482 	SD[plex->sdnos[sdno]].state = sd_up;
483 	log(LOG_INFO,					    /* tell them about it */
484 	    "vinum: %s is up\n",
485 	    SD[plex->sdnos[sdno]].name);
486     }
487 }
488 
489 /* Set the state of a plex based on its environment */
490 void
491 update_plex_state(int plexno)
492 {
493     struct plex *plex;					    /* point to our plex */
494     enum plexstate oldstate;
495     enum sdstates statemap;				    /* get a map of the subdisk states */
496     enum volplexstate vps;				    /* how do we compare with the other plexes? */
497 
498     plex = &PLEX[plexno];				    /* point to our plex */
499     oldstate = plex->state;
500     statemap = sdstatemap(plex);			    /* get a map of the subdisk states */
501     vps = vpstate(plex);				    /* how do we compare with the other plexes? */
502 
503     if (statemap & sd_initstate)			    /* something initializing? */
504 	plex->state = plex_initializing;		    /* yup, that makes the plex the same */
505     else if (statemap == sd_upstate)
506 	/*
507 	 * All the subdisks are up.  This also means that
508 	 * they are consistent, so we can just bring
509 	 * the plex up
510 	 */
511 	plex->state = plex_up;
512     else if (isparity(plex)				    /* RAID-4 or RAID-5 plex */
513     &&(plex->sddowncount == 1))				    /* and exactly one subdisk down */
514 	plex->state = plex_degraded;			    /* limping a bit */
515     else if (((statemap & ~sd_downstate) == sd_emptystate)  /* all subdisks empty */
516     ||((statemap & ~sd_downstate)
517 	    == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) {
518 	if ((vps & volplex_otherup) == 0) {		    /* no other plex is up */
519 	    struct volume *vol = &VOL[plex->volno];	    /* possible volume to which it points */
520 
521 	    /*
522 	     * If we're a striped or concat plex
523 	     * associated with a volume, none of whose
524 	     * plexes are up, and we're new and untested,
525 	     * and the volume has the setupstate bit set,
526 	     * we can pretend to be in a consistent state.
527 	     *
528 	     * We need to do this in one swell foop: on
529 	     * the next call we will no longer be just
530 	     * empty.
531 	     *
532 	     * This code assumes that all the other plexes
533 	     * are also capable of coming up (i.e. all the
534 	     * sds are up), but that's OK: we'll come back
535 	     * to this function for the remaining plexes
536 	     * in the volume.
537 	     */
538 	    if ((plex->state == plex_init)
539 		&& (plex->volno >= 0)
540 		&& (vol->flags & VF_CONFIG_SETUPSTATE)) {
541 		for (plexno = 0; plexno < vol->plexes; plexno++)
542 		    forceup(VOL[plex->volno].plex[plexno]);
543 	    } else if ((statemap == sd_initializedstate)    /* if it's initialized (not empty) */
544 ||(plex->organization == plex_concat)			    /* and we're not RAID-4 or RAID-5 */
545 	    ||(plex->organization == plex_striped))
546 		forceup(plexno);			    /* we'll do it */
547 	    /*
548 	     * This leaves a case where things don't get
549 	     * done: the plex is RAID-4 or RAID-5, and
550 	     * the subdisks are all empty.  They need to
551 	     * be initialized first.
552 	     */
553 	} else {
554 	    if (statemap == sd_upstate)			    /* all subdisks up */
555 		plex->state = plex_up;			    /* we can come up too */
556 	    else
557 		plex->state = plex_faulty;
558 	}
559     } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
560 	plex->state = plex_flaky;
561     else if (statemap & (sd_upstate | sd_rebornstate))	    /* some up or reborn */
562 	plex->state = plex_corrupt;			    /* corrupt */
563     else if (statemap & (sd_initstate | sd_emptystate))	    /* some subdisks empty or initializing */
564 	plex->state = plex_initializing;
565     else						    /* nothing at all up */
566 	plex->state = plex_faulty;
567 
568     if (plex->state != oldstate)			    /* state has changed, */
569 	log(LOG_INFO,					    /* tell them about it */
570 	    "vinum: %s is %s\n",
571 	    plex->name,
572 	    plex_state(plex->state));
573     if (plex->volno >= 0)				    /* we're part of a volume, */
574 	update_volume_state(plex->volno);		    /* update its state */
575 }
576 
577 /* Set volume state based on its components */
578 void
579 update_volume_state(int volno)
580 {
581     struct volume *vol;					    /* our volume */
582     int plexno;
583     enum volumestate oldstate;
584 
585     vol = &VOL[volno];					    /* point to our volume */
586     oldstate = vol->state;
587 
588     for (plexno = 0; plexno < vol->plexes; plexno++) {
589 	struct plex *plex = &PLEX[vol->plex[plexno]];	    /* point to the plex */
590 	if (plex->state >= plex_corrupt) {		    /* something accessible, */
591 	    vol->state = volume_up;
592 	    break;
593 	}
594     }
595     if (plexno == vol->plexes)				    /* didn't find an up plex */
596 	vol->state = volume_down;
597 
598     if (vol->state != oldstate) {			    /* state changed */
599 	log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state));
600 	save_config();					    /* save the updated configuration */
601     }
602 }
603 
604 /*
605  * Called from request routines when they find
606  * a subdisk which is not kosher.  Decide whether
607  * it warrants changing the state.  Return
608  * REQUEST_DOWN if we can't use the subdisk,
609  * REQUEST_OK if we can.
610  */
611 /*
612  * A prior version of this function checked the plex
613  * state as well.  At the moment, consider plex states
614  * information for the user only.  We'll ignore them
615  * and use the subdisk state only.  The last version of
616  * this file with the old logic was 2.7. XXX
617  */
618 enum requeststatus
619 checksdstate(struct sd *sd, struct request *rq, vinum_off_t diskaddr, vinum_off_t diskend)
620 {
621     struct plex *plex = &PLEX[sd->plexno];
622     int writeop = (rq->bio->bio_buf->b_cmd != BUF_CMD_READ);	    /* note if we're writing */
623 
624     switch (sd->state) {
625 	/* We shouldn't get called if the subdisk is up */
626     case sd_up:
627 	return REQUEST_OK;
628 
629     case sd_reviving:
630 	/*
631 	 * Access to a reviving subdisk depends on the
632 	 * organization of the plex:
633 	 *
634 	 * - If it's concatenated, access the subdisk
635 	 *   up to its current revive point.  If we
636 	 *   want to write to the subdisk overlapping
637 	 *   the current revive block, set the
638 	 *   conflict flag in the request, asking the
639 	 *   caller to put the request on the wait
640 	 *   list, which will be attended to by
641 	 *   revive_block when it's done.
642 	 * - if it's striped, we can't do it (we could
643 	 *   do some hairy calculations, but it's
644 	 *   unlikely to work).
645 	 * - if it's RAID-4 or RAID-5, we can do it as
646 	 *   long as only one subdisk is down
647 	 */
648 	if (plex->organization == plex_striped)		    /* plex is striped, */
649 	    return REQUEST_DOWN;
650 
651 	else if (isparity(plex)) {			    /* RAID-4 or RAID-5 plex */
652 	    if (plex->sddowncount > 1)			    /* with more than one sd down, */
653 		return REQUEST_DOWN;
654 	    else
655 		/*
656 		 * XXX We shouldn't do this if we can find a
657 		 * better way.  Check the other plexes
658 		 * first, and return a DOWN if another
659 		 * plex will do it better
660 		 */
661 		return REQUEST_OK;			    /* OK, we'll find a way */
662 	}
663 	if (diskaddr > (sd->revived
664 		+ sd->plexoffset
665 		+ (sd->revive_blocksize >> DEV_BSHIFT)))    /* we're beyond the end */
666 	    return REQUEST_DOWN;
667 	else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
668 	    if (writeop) {
669 		rq->flags |= XFR_REVIVECONFLICT;	    /* note a potential conflict */
670 		rq->sdno = sd->sdno;			    /* and which sd last caused it */
671 	    } else
672 		return REQUEST_DOWN;
673 	}
674 	return REQUEST_OK;
675 
676     case sd_reborn:
677 	if (writeop)
678 	    return REQUEST_OK;				    /* always write to a reborn disk */
679 	else						    /* don't allow a read */
680 	    /*
681 	       * Handle the mapping.  We don't want to reject
682 	       * a read request to a reborn subdisk if that's
683 	       * all we have. XXX
684 	     */
685 	    return REQUEST_DOWN;
686 
687     case sd_down:
688 	if (writeop)					    /* writing to a consistent down disk */
689 	    set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
690 	return REQUEST_DOWN;
691 
692     case sd_crashed:
693 	if (writeop)					    /* writing to a consistent down disk */
694 	    set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
695 	return REQUEST_DOWN;
696 
697     default:
698 	return REQUEST_DOWN;
699     }
700 }
701 
702 /* return a state map for the subdisks of a plex */
703 enum sdstates
704 sdstatemap(struct plex *plex)
705 {
706     int sdno;
707     enum sdstates statemap = 0;				    /* note the states we find */
708 
709     plex->sddowncount = 0;				    /* no subdisks down yet */
710     for (sdno = 0; sdno < plex->subdisks; sdno++) {
711 	struct sd *sd = &SD[plex->sdnos[sdno]];		    /* point to the subdisk */
712 
713 	switch (sd->state) {
714 	case sd_empty:
715 	    statemap |= sd_emptystate;
716 	    (plex->sddowncount)++;			    /* another unusable subdisk */
717 	    break;
718 
719 	case sd_init:
720 	    statemap |= sd_initstate;
721 	    (plex->sddowncount)++;			    /* another unusable subdisk */
722 	    break;
723 
724 	case sd_down:
725 	    statemap |= sd_downstate;
726 	    (plex->sddowncount)++;			    /* another unusable subdisk */
727 	    break;
728 
729 	case sd_crashed:
730 	    statemap |= sd_crashedstate;
731 	    (plex->sddowncount)++;			    /* another unusable subdisk */
732 	    break;
733 
734 	case sd_obsolete:
735 	    statemap |= sd_obsoletestate;
736 	    (plex->sddowncount)++;			    /* another unusable subdisk */
737 	    break;
738 
739 	case sd_stale:
740 	    statemap |= sd_stalestate;
741 	    (plex->sddowncount)++;			    /* another unusable subdisk */
742 	    break;
743 
744 	case sd_reborn:
745 	    statemap |= sd_rebornstate;
746 	    break;
747 
748 	case sd_up:
749 	    statemap |= sd_upstate;
750 	    break;
751 
752 	case sd_initializing:
753 	    statemap |= sd_initstate;
754 	    (plex->sddowncount)++;			    /* another unusable subdisk */
755 	    break;
756 
757 	case sd_initialized:
758 	    statemap |= sd_initializedstate;
759 	    (plex->sddowncount)++;			    /* another unusable subdisk */
760 	    break;
761 
762 	case sd_unallocated:
763 	case sd_uninit:
764 	case sd_reviving:
765 	case sd_referenced:
766 	    statemap |= sd_otherstate;
767 	    (plex->sddowncount)++;			    /* another unusable subdisk */
768 	}
769     }
770     return statemap;
771 }
772 
773 /* determine the state of the volume relative to this plex */
774 enum volplexstate
775 vpstate(struct plex *plex)
776 {
777     struct volume *vol;
778     enum volplexstate state = volplex_onlyusdown;	    /* state to return */
779     int plexno;
780 
781     if (plex->volno < 0) {				    /* not associated with a volume */
782 	if (plex->state > plex_degraded)
783 	    return volplex_onlyus;			    /* just us */
784 	else
785 	    return volplex_onlyusdown;			    /* assume the worst */
786     }
787     vol = &VOL[plex->volno];				    /* point to our volume */
788     for (plexno = 0; plexno < vol->plexes; plexno++) {
789 	if (&PLEX[vol->plex[plexno]] == plex) {		    /* us */
790 	    if (PLEX[vol->plex[plexno]].state >= plex_degraded)	/* are we up? */
791 		state |= volplex_onlyus;		    /* yes */
792 	} else {
793 	    if (PLEX[vol->plex[plexno]].state >= plex_degraded)	/* not us */
794 		state |= volplex_otherup;		    /* and when they were up, they were up */
795 	    else
796 		state |= volplex_alldown;		    /* and when they were down, they were down */
797 	}
798     }
799     return state;					    /* and when they were only halfway up */
800 }							    /* they were neither up nor down */
801 
802 /* Check if all bits b are set in a */
803 int allset(int a, int b);
804 
805 int
806 allset(int a, int b)
807 {
808     return (a & b) == b;
809 }
810 
811 /* Invalidate the subdisks belonging to a plex */
812 void
813 invalidate_subdisks(struct plex *plex, enum sdstate state)
814 {
815     int sdno;
816 
817     for (sdno = 0; sdno < plex->subdisks; sdno++) {	    /* for each subdisk */
818 	struct sd *sd = &SD[plex->sdnos[sdno]];
819 
820 	switch (sd->state) {
821 	case sd_unallocated:
822 	case sd_uninit:
823 	case sd_init:
824 	case sd_initializing:
825 	case sd_initialized:
826 	case sd_empty:
827 	case sd_obsolete:
828 	case sd_stale:
829 	case sd_crashed:
830 	case sd_down:
831 	case sd_referenced:
832 	    break;
833 
834 	case sd_reviving:
835 	case sd_reborn:
836 	case sd_up:
837 	    set_sd_state(plex->sdnos[sdno], state, setstate_force);
838 	}
839     }
840 }
841 
842 /*
843  * Start an object, in other words do what we can to get it up.
844  * This is called from vinumioctl (VINUMSTART).
845  * Return error indications via ioctl_reply
846  */
847 void
848 start_object(struct vinum_ioctl_msg *data)
849 {
850     int status;
851     int objindex = data->index;				    /* data gets overwritten */
852     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
853     enum setstateflags flags;
854 
855     if (data->force != 0)				    /* are we going to use force? */
856 	flags = setstate_force;				    /* yes */
857     else
858 	flags = setstate_none;				    /* no */
859 
860     switch (data->type) {
861     case drive_object:
862 	status = set_drive_state(objindex, drive_up, flags);
863 	if (DRIVE[objindex].state != drive_up)		    /* set status on whether we really did it */
864 	    ioctl_reply->error = EBUSY;
865 	else
866 	    ioctl_reply->error = 0;
867 	break;
868 
869     case sd_object:
870 	if (DRIVE[SD[objindex].driveno].state != drive_up) {
871 	    ioctl_reply->error = EIO;
872 	    strcpy(ioctl_reply->msg, "Drive is down");
873 	    return;
874 	}
875 	if (data->blocksize)
876 	    SD[objindex].revive_blocksize = data->blocksize;
877 	if ((SD[objindex].state == sd_reviving)		    /* reviving, */
878 	||(SD[objindex].state == sd_stale)) {		    /* or stale, will revive */
879 	    SD[objindex].state = sd_reviving;		    /* make sure we're reviving */
880 	    ioctl_reply->error = revive_block(objindex);    /* revive another block */
881 	    ioctl_reply->msg[0] = '\0';			    /* no comment */
882 	    return;
883 	} else if (SD[objindex].state == sd_initializing) { /* initializing, */
884 	    if (data->blocksize)
885 		SD[objindex].init_blocksize = data->blocksize;
886 	    ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */
887 	    ioctl_reply->msg[0] = '\0';			    /* no comment */
888 	    return;
889 	}
890 	status = set_sd_state(objindex, sd_up, flags);	    /* set state */
891 	if (status != EAGAIN) {				    /* not first revive or initialize, */
892 	    if (SD[objindex].state != sd_up)		    /* set status on whether we really did it */
893 		ioctl_reply->error = EBUSY;
894 	    else
895 		ioctl_reply->error = 0;
896 	} else
897 	    ioctl_reply->error = status;
898 	break;
899 
900     case plex_object:
901 	status = set_plex_state(objindex, plex_up, flags);
902 	if (PLEX[objindex].state != plex_up)		    /* set status on whether we really did it */
903 	    ioctl_reply->error = EBUSY;
904 	else
905 	    ioctl_reply->error = 0;
906 	break;
907 
908     case volume_object:
909 	status = set_volume_state(objindex, volume_up, flags);
910 	if (VOL[objindex].state != volume_up)		    /* set status on whether we really did it */
911 	    ioctl_reply->error = EBUSY;
912 	else
913 	    ioctl_reply->error = 0;
914 	break;
915 
916     default:
917 	ioctl_reply->error = EINVAL;
918 	strcpy(ioctl_reply->msg, "Invalid object type");
919 	return;
920     }
921     /*
922      * There's no point in saying anything here:
923      * the userland program does it better
924      */
925     ioctl_reply->msg[0] = '\0';
926 }
927 
928 /*
929  * Stop an object, in other words do what we can to get it down
930  * This is called from vinumioctl (VINUMSTOP).
931  * Return error indications via ioctl_reply.
932  */
933 void
934 stop_object(struct vinum_ioctl_msg *data)
935 {
936     int status = 1;
937     int objindex = data->index;				    /* save the number from change */
938     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
939 
940     switch (data->type) {
941     case drive_object:
942 	status = set_drive_state(objindex, drive_down, data->force);
943 	break;
944 
945     case sd_object:
946 	status = set_sd_state(objindex, sd_down, data->force);
947 	break;
948 
949     case plex_object:
950 	status = set_plex_state(objindex, plex_down, data->force);
951 	break;
952 
953     case volume_object:
954 	status = set_volume_state(objindex, volume_down, data->force);
955 	break;
956 
957     default:
958 	ioctl_reply->error = EINVAL;
959 	strcpy(ioctl_reply->msg, "Invalid object type");
960 	return;
961     }
962     ioctl_reply->msg[0] = '\0';
963     if (status == 0)					    /* couldn't do it */
964 	ioctl_reply->error = EBUSY;
965     else
966 	ioctl_reply->error = 0;
967 }
968 
969 /*
970  * VINUM_SETSTATE ioctl: set an object state.
971  * msg is the message passed by the user.
972  */
973 void
974 setstate(struct vinum_ioctl_msg *msg)
975 {
976     int sdno;
977     struct sd *sd;
978     struct plex *plex;
979     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
980 
981     switch (msg->state) {
982     case object_down:
983 	stop_object(msg);
984 	break;
985 
986     case object_initializing:
987 	switch (msg->type) {
988 	case sd_object:
989 	    sd = &SD[msg->index];
990 	    if ((msg->index >= vinum_conf.subdisks_allocated)
991 		|| (sd->state <= sd_referenced)) {
992 		ksprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
993 		ioctl_reply->error = EFAULT;
994 		return;
995 	    }
996 	    set_sd_state(msg->index, sd_initializing, msg->force);
997 	    if (sd->state != sd_initializing) {
998 		strcpy(ioctl_reply->msg, "Can't set state");
999 		ioctl_reply->error = EBUSY;
1000 	    } else
1001 		ioctl_reply->error = 0;
1002 	    break;
1003 
1004 	case plex_object:
1005 	    plex = &PLEX[msg->index];
1006 	    if ((msg->index >= vinum_conf.plexes_allocated)
1007 		|| (plex->state <= plex_unallocated)) {
1008 		ksprintf(ioctl_reply->msg, "Invalid plex %d", msg->index);
1009 		ioctl_reply->error = EFAULT;
1010 		return;
1011 	    }
1012 	    set_plex_state(msg->index, plex_initializing, msg->force);
1013 	    if (plex->state != plex_initializing) {
1014 		strcpy(ioctl_reply->msg, "Can't set state");
1015 		ioctl_reply->error = EBUSY;
1016 	    } else {
1017 		ioctl_reply->error = 0;
1018 		for (sdno = 0; sdno < plex->subdisks; sdno++) {
1019 		    sd = &SD[plex->sdnos[sdno]];
1020 		    set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
1021 		    if (sd->state != sd_initializing) {
1022 			strcpy(ioctl_reply->msg, "Can't set state");
1023 			ioctl_reply->error = EBUSY;
1024 			break;
1025 		    }
1026 		}
1027 	    }
1028 	    break;
1029 
1030 	default:
1031 	    strcpy(ioctl_reply->msg, "Invalid object");
1032 	    ioctl_reply->error = EINVAL;
1033 	}
1034 	break;
1035 
1036     case object_initialized:
1037 	if (msg->type == sd_object) {
1038 	    sd = &SD[msg->index];
1039 	    if ((msg->index >= vinum_conf.subdisks_allocated)
1040 		|| (sd->state <= sd_referenced)) {
1041 		ksprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
1042 		ioctl_reply->error = EFAULT;
1043 		return;
1044 	    }
1045 	    set_sd_state(msg->index, sd_initialized, msg->force);
1046 	    if (sd->state != sd_initializing) {
1047 		strcpy(ioctl_reply->msg, "Can't set state");
1048 		ioctl_reply->error = EBUSY;
1049 	    } else
1050 		ioctl_reply->error = 0;
1051 	} else {
1052 	    strcpy(ioctl_reply->msg, "Invalid object");
1053 	    ioctl_reply->error = EINVAL;
1054 	}
1055 	break;
1056 
1057     case object_up:
1058 	start_object(msg);
1059     }
1060 }
1061 
1062 /*
1063  * Brute force set state function.  Don't look at
1064  * any dependencies, just do it.  This is mainly
1065  * intended for testing and recovery.
1066  */
1067 void
1068 setstate_by_force(struct vinum_ioctl_msg *msg)
1069 {
1070     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
1071 
1072     switch (msg->type) {
1073     case drive_object:
1074 	DRIVE[msg->index].state = msg->state;
1075 	break;
1076 
1077     case sd_object:
1078 	SD[msg->index].state = msg->state;
1079 	break;
1080 
1081     case plex_object:
1082 	PLEX[msg->index].state = msg->state;
1083 	break;
1084 
1085     case volume_object:
1086 	VOL[msg->index].state = msg->state;
1087 	break;
1088 
1089     default:
1090 	break;
1091     }
1092     ioctl_reply->error = 0;
1093 }
1094