xref: /dragonfly/sys/dev/raid/vinum/vinumstate.c (revision 38a690d7)
1 /*-
2  * Copyright (c) 1997, 1998, 1999
3  *	Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6  *
7  *  Written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by Nan Yang Computer
23  *      Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  *
40  * $Id: vinumstate.c,v 2.18 2000/05/10 07:30:50 grog Exp grog $
41  * $FreeBSD: src/sys/dev/vinum/vinumstate.c,v 1.28.2.2 2000/06/08 02:00:23 grog Exp $
42  * $DragonFly: src/sys/dev/raid/vinum/vinumstate.c,v 1.3 2003/08/07 21:17:10 dillon Exp $
43  */
44 
45 #include "vinumhdr.h"
46 #include "request.h"
47 
48 /* Update drive state */
49 /* Return 1 if the state changes, otherwise 0 */
50 int
51 set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
52 {
53     struct drive *drive = &DRIVE[driveno];
54     int oldstate = drive->state;
55     int sdno;
56 
57     if (drive->state == drive_unallocated)		    /* no drive to do anything with, */
58 	return 0;
59 
60     if (newstate == oldstate)				    /* don't change it if it's not different */
61 	return 1;					    /* all OK */
62     if ((newstate == drive_down)			    /* the drive's going down */
63     &&(!(flags & setstate_force))
64 	&& (drive->opencount != 0))			    /* we can't do it */
65 	return 0;					    /* don't do it */
66     drive->state = newstate;				    /* set the state */
67     if (drive->label.name[0] != '\0')			    /* we have a name, */
68 	log(LOG_INFO,
69 	    "vinum: drive %s is %s\n",
70 	    drive->label.name,
71 	    drive_state(drive->state));
72     if (drive->state != oldstate) {			    /* state has changed */
73 	for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
74 	    if ((SD[sdno].state >= sd_referenced)
75 		&& (SD[sdno].driveno == driveno))	    /* belongs to this drive */
76 		update_sd_state(sdno);			    /* update the state */
77 	}
78     }
79     if (newstate == drive_up) {				    /* want to bring it up */
80 	if ((drive->flags & VF_OPEN) == 0)		    /* should be open, but we're not */
81 	    init_drive(drive, 1);			    /* which changes the state again */
82     } else						    /* taking it down or worse */
83 	queue_daemon_request(daemonrq_closedrive,	    /* get the daemon to close it */
84 	    (union daemoninfo) drive);
85     if ((flags & setstate_configuring) == 0)		    /* configuring? */
86 	save_config();					    /* no: save the updated configuration now */
87     return 1;
88 }
89 
90 /*
91  * Try to set the subdisk state.  Return 1 if state changed to
92  * what we wanted, -1 if it changed to something else, and 0
93  * if no change.
94  *
95  * This routine is called both from the user (up, down states only)
96  * and internally.
97  *
98  * The setstate_force bit in the flags enables the state change even
99  * if it could be dangerous to data consistency.  It shouldn't allow
100  * nonsense.
101  */
102 int
103 set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
104 {
105     struct sd *sd = &SD[sdno];
106     struct plex *plex;
107     struct volume *vol;
108     int oldstate = sd->state;
109     int status = 1;					    /* status to return */
110 
111     if (newstate == oldstate)				    /* already there, */
112 	return 1;
113     else if (sd->state == sd_unallocated)		    /* no subdisk to do anything with, */
114 	return 0;					    /* can't do it */
115 
116     if (sd->driveoffset < 0) {				    /* not allocated space */
117 	sd->state = sd_down;
118 	if (newstate != sd_down) {
119 	    if (sd->plexno >= 0)
120 		sdstatemap(&PLEX[sd->plexno]);		    /* count up subdisks */
121 	    return -1;
122 	}
123     } else {						    /* space allocated */
124 	switch (newstate) {
125 	case sd_down:					    /* take it down? */
126 	    /*
127 	     * If we're attached to a plex, and we're
128 	     * not reborn, we won't go down without
129 	     * use of force.
130 	     */
131 	    if ((!flags & setstate_force)
132 		&& (sd->plexno >= 0)
133 		&& (sd->state != sd_reborn))
134 		return 0;				    /* don't do it */
135 	    break;
136 
137 	case sd_initialized:
138 	    if ((sd->state == sd_initializing)		    /* we were initializing */
139 	    ||(flags & setstate_force))			    /* or we forced it */
140 		break;
141 	    return 0;					    /* can't do it otherwise */
142 
143 	case sd_up:
144 	    if (DRIVE[sd->driveno].state != drive_up)	    /* can't bring the sd up if the drive isn't, */
145 		return 0;				    /* not even by force */
146 	    if (flags & setstate_force)			    /* forcing it, */
147 		break;					    /* just do it, and damn the consequences */
148 	    switch (sd->state) {
149 		/*
150 		 * Perform the necessary tests.  To allow
151 		 * the state transition, just break out of
152 		 * the switch.
153 		 */
154 	    case sd_crashed:
155 	    case sd_reborn:
156 	    case sd_down:				    /* been down, no data lost */
157 		/*
158 		 * If we're associated with a plex, and
159 		 * the plex isn't up, or we're the only
160 		 * subdisk in the plex, we can do it.
161 		 */
162 		if ((sd->plexno >= 0)
163 		    && (((PLEX[sd->plexno].state < plex_firstup)
164 			    || (PLEX[sd->plexno].subdisks > 1))))
165 		    break;				    /* do it */
166 		if (oldstate != sd_reborn) {
167 		    sd->state = sd_reborn;		    /* here it is again */
168 		    log(LOG_INFO,
169 			"vinum: %s is %s, not %s\n",
170 			sd->name,
171 			sd_state(sd->state),
172 			sd_state(newstate));
173 		}
174 		status = -1;
175 		break;
176 
177 	    case sd_init:				    /* brand new */
178 		if (flags & setstate_configuring)	    /* we're doing this while configuring */
179 		    break;
180 		/* otherwise it's like being empty */
181 		/* FALLTHROUGH */
182 
183 	    case sd_empty:
184 	    case sd_initialized:
185 		/*
186 		 * If we're not part of a plex, or the
187 		 * plex is not part of a volume with other
188 		 * plexes which are up, we can come up
189 		 * without being inconsistent.
190 		 *
191 		 * If we're part of a parity plex, we'll
192 		 * come up if the caller uses force.  This
193 		 * is the way we bring them up after
194 		 * initialization.
195 		 */
196 		if ((sd->plexno < 0)
197 		    || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0)
198 		    || (isparity((&PLEX[sd->plexno]))
199 			&& (flags & setstate_force)))
200 		    break;
201 
202 		/* Otherwise it's just out of date */
203 		/* FALLTHROUGH */
204 
205 	    case sd_stale:				    /* out of date info, need reviving */
206 	    case sd_obsolete:
207 		/*
208 
209 		 * 1.  If the subdisk is not part of a
210 		 *     plex, bring it up, don't revive.
211 		 *
212 		 * 2.  If the subdisk is part of a
213 		 *     one-plex volume or an unattached
214 		 *     plex, and it's not RAID-4 or
215 		 *     RAID-5, we *can't revive*.  The
216 		 *     subdisk doesn't change its state.
217 		 *
218 		 * 3.  If the subdisk is part of a
219 		 *     one-plex volume or an unattached
220 		 *     plex, and it's RAID-4 or RAID-5,
221 		 *     but more than one subdisk is down,
222 		 *     we *still can't revive*.  The
223 		 *     subdisk doesn't change its state.
224 		 *
225 		 * 4.  If the subdisk is part of a
226 		 *     multi-plex volume, we'll change to
227 		 *     reviving and let the revive
228 		 *     routines find out whether it will
229 		 *     work or not.  If they don't, the
230 		 *     revive stops with an error message,
231 		 *     but the state doesn't change
232 		 *     (FWIW).
233 		 */
234 		if (sd->plexno < 0)			    /* no plex associated, */
235 		    break;				    /* bring it up */
236 		plex = &PLEX[sd->plexno];
237 		if (plex->volno >= 0)			    /* have a volume */
238 		    vol = &VOL[plex->volno];
239 		else
240 		    vol = NULL;
241 		/*
242 		 * We can't do it if:
243 		 *
244 		 * 1: we don't have a volume
245 		 * 2: we're the only plex in the volume
246 		 * 3: we're a RAID-4 or RAID-5 plex, and
247 		 *    more than one subdisk is down.
248 		 */
249 		if (((vol == NULL)
250 			|| (vol->plexes == 1))
251 		    && ((!isparity(plex))
252 			|| (plex->sddowncount > 1))) {
253 		    if (sd->state == sd_initializing)	    /* it's finished initializing  */
254 			sd->state = sd_initialized;
255 		    else
256 			return 0;			    /* can't do it */
257 		} else {
258 		    sd->state = sd_reviving;		    /* put in reviving state */
259 		    sd->revived = 0;			    /* nothing done yet */
260 		    status = EAGAIN;			    /* need to repeat */
261 		}
262 		break;
263 
264 	    case sd_reviving:
265 		if (flags & setstate_force)		    /* insist, */
266 		    break;
267 		return EAGAIN;				    /* no, try again */
268 
269 	    default:					    /* can't do it */
270 		/*
271 		 * There's no way to bring subdisks up directly from
272 		 * other states.  First they need to be initialized
273 		 * or revived.
274 		 */
275 		return 0;
276 	    }
277 	    break;
278 
279 	default:					    /* other ones, only internal with force */
280 	    if ((flags & setstate_force) == 0)		    /* no force?  What's this? */
281 		return 0;				    /* don't do it */
282 	}
283     }
284     if (status == 1) {					    /* we can do it, */
285 	sd->state = newstate;
286 	if (flags & setstate_force)
287 	    log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state));
288 	else
289 	    log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
290     } else						    /* we don't get here with status 0 */
291 	log(LOG_INFO,
292 	    "vinum: %s is %s, not %s\n",
293 	    sd->name,
294 	    sd_state(sd->state),
295 	    sd_state(newstate));
296     if (sd->plexno >= 0)				    /* we belong to a plex */
297 	update_plex_state(sd->plexno);			    /* update plex state */
298     if ((flags & setstate_configuring) == 0)		    /* save config now */
299 	save_config();
300     return status;
301 }
302 
303 /*
304  * Set the state of a plex dependent on its subdisks.
305  * This time round, we'll let plex state just reflect
306  * aggregate subdisk state, so this becomes an order of
307  * magnitude less complicated.  In particular, ignore
308  * the requested state.
309  */
310 int
311 set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
312 {
313     struct plex *plex;					    /* point to our plex */
314     enum plexstate oldstate;
315     enum volplexstate vps;				    /* how do we compare with the other plexes? */
316 
317     plex = &PLEX[plexno];				    /* point to our plex */
318     oldstate = plex->state;
319 
320     /* If the plex isn't allocated, we can't do it. */
321     if (plex->state == plex_unallocated)
322 	return 0;
323 
324     /*
325      * If it's already in the the state we want,
326      * and it's not up, just return.  If it's up,
327      * we still need to do some housekeeping.
328      */
329     if ((state == oldstate)
330 	&& (state != plex_up))
331 	return 1;
332     vps = vpstate(plex);				    /* how do we compare with the other plexes? */
333     switch (state) {
334 	/*
335 	 * We can't bring the plex up, even by force,
336 	 * unless it's ready.  update_plex_state
337 	 * checks that.
338 	 */
339     case plex_up:					    /* bring the plex up */
340 	update_plex_state(plex->plexno);		    /* it'll come up if it can */
341 	break;
342 
343     case plex_down:					    /* want to take it down */
344 	/*
345 	 * If we're the only one, or the only one
346 	 * which is up, we need force to do it.
347 	 */
348 	if (((vps == volplex_onlyus)
349 		|| (vps == volplex_onlyusup))
350 	    && (!(flags & setstate_force)))
351 	    return 0;					    /* can't do it */
352 	plex->state = state;				    /* do it */
353 	invalidate_subdisks(plex, sd_down);		    /* and down all up subdisks */
354 	break;
355 
356 	/*
357 	 * This is only requested internally.
358 	 * Trust ourselves
359 	 */
360     case plex_faulty:
361 	plex->state = state;				    /* do it */
362 	invalidate_subdisks(plex, sd_crashed);		    /* and crash all up subdisks */
363 	break;
364 
365     case plex_initializing:
366 	/* XXX consider what safeguards we need here */
367 	if ((flags & setstate_force) == 0)
368 	    return 0;
369 	plex->state = state;				    /* do it */
370 	break;
371 
372 	/* What's this? */
373     default:
374 	return 0;
375     }
376     if (plex->state != oldstate)			    /* we've changed, */
377 	log(LOG_INFO,					    /* tell them about it */
378 	    "vinum: %s is %s\n",
379 	    plex->name,
380 	    plex_state(plex->state));
381     /*
382      * Now see what we have left, and whether
383      * we're taking the volume down
384      */
385     if (plex->volno >= 0)				    /* we have a volume */
386 	update_volume_state(plex->volno);		    /* update its state */
387     if ((flags & setstate_configuring) == 0)		    /* save config now */
388 	save_config();					    /* yes: save the updated configuration */
389     return 1;
390 }
391 
392 /* Update the state of a plex dependent on its plexes. */
393 int
394 set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
395 {
396     struct volume *vol = &VOL[volno];			    /* point to our volume */
397 
398     if (vol->state == volume_unallocated)		    /* no volume to do anything with, */
399 	return 0;
400     if (vol->state == state)				    /* we're there already */
401 	return 1;
402 
403     if (state == volume_up)				    /* want to come up */
404 	update_volume_state(volno);
405     else if (state == volume_down) {			    /* want to go down */
406 	if (((vol->flags & VF_OPEN) == 0)		    /* not open */
407 	||((flags & setstate_force) != 0)) {		    /* or we're forcing */
408 	    vol->state = volume_down;
409 	    log(LOG_INFO,
410 		"vinum: volume %s is %s\n",
411 		vol->name,
412 		volume_state(vol->state));
413 	    if ((flags & setstate_configuring) == 0)	    /* save config now */
414 		save_config();				    /* yes: save the updated configuration */
415 	    return 1;
416 	}
417     }
418     return 0;						    /* no change */
419 }
420 
421 /* Set the state of a subdisk based on its environment */
422 void
423 update_sd_state(int sdno)
424 {
425     struct sd *sd;
426     struct drive *drive;
427     enum sdstate oldstate;
428 
429     sd = &SD[sdno];
430     oldstate = sd->state;
431     drive = &DRIVE[sd->driveno];
432 
433     if (drive->state == drive_up) {
434 	switch (sd->state) {
435 	case sd_down:
436 	case sd_crashed:
437 	    sd->state = sd_reborn;			    /* back up again with no loss */
438 	    break;
439 
440 	default:
441 	    break;
442 	}
443     } else {						    /* down or worse */
444 	switch (sd->state) {
445 	case sd_up:
446 	case sd_reborn:
447 	case sd_reviving:
448 	case sd_empty:
449 	    sd->state = sd_crashed;			    /* lost our drive */
450 	    break;
451 
452 	default:
453 	    break;
454 	}
455     }
456     if (sd->state != oldstate)				    /* state has changed, */
457 	log(LOG_INFO,					    /* say so */
458 	    "vinum: %s is %s\n",
459 	    sd->name,
460 	    sd_state(sd->state));
461     if (sd->plexno >= 0)				    /* we're part of a plex, */
462 	update_plex_state(sd->plexno);			    /* update its state */
463 }
464 
465 /*
466  * Force a plex and all its subdisks
467  * into an 'up' state.  This is a helper
468  * for update_plex_state.
469  */
470 void
471 forceup(int plexno)
472 {
473     struct plex *plex;
474     int sdno;
475 
476     plex = &PLEX[plexno];				    /* point to the plex */
477     plex->state = plex_up;				    /* and bring it up */
478 
479     /* change the subdisks to up state */
480     for (sdno = 0; sdno < plex->subdisks; sdno++) {
481 	SD[plex->sdnos[sdno]].state = sd_up;
482 	log(LOG_INFO,					    /* tell them about it */
483 	    "vinum: %s is up\n",
484 	    SD[plex->sdnos[sdno]].name);
485     }
486 }
487 
488 /* Set the state of a plex based on its environment */
489 void
490 update_plex_state(int plexno)
491 {
492     struct plex *plex;					    /* point to our plex */
493     enum plexstate oldstate;
494     enum sdstates statemap;				    /* get a map of the subdisk states */
495     enum volplexstate vps;				    /* how do we compare with the other plexes? */
496 
497     plex = &PLEX[plexno];				    /* point to our plex */
498     oldstate = plex->state;
499     statemap = sdstatemap(plex);			    /* get a map of the subdisk states */
500     vps = vpstate(plex);				    /* how do we compare with the other plexes? */
501 
502     if (statemap & sd_initstate)			    /* something initializing? */
503 	plex->state = plex_initializing;		    /* yup, that makes the plex the same */
504     else if (statemap == sd_upstate)
505 	/*
506 	 * All the subdisks are up.  This also means that
507 	 * they are consistent, so we can just bring
508 	 * the plex up
509 	 */
510 	plex->state = plex_up;
511     else if (isparity(plex)				    /* RAID-4 or RAID-5 plex */
512     &&(plex->sddowncount == 1))				    /* and exactly one subdisk down */
513 	plex->state = plex_degraded;			    /* limping a bit */
514     else if (((statemap & ~sd_downstate) == sd_emptystate)  /* all subdisks empty */
515     ||((statemap & ~sd_downstate)
516 	    == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) {
517 	if ((vps & volplex_otherup) == 0) {		    /* no other plex is up */
518 	    struct volume *vol = &VOL[plex->volno];	    /* possible volume to which it points */
519 
520 	    /*
521 	     * If we're a striped or concat plex
522 	     * associated with a volume, none of whose
523 	     * plexes are up, and we're new and untested,
524 	     * and the volume has the setupstate bit set,
525 	     * we can pretend to be in a consistent state.
526 	     *
527 	     * We need to do this in one swell foop: on
528 	     * the next call we will no longer be just
529 	     * empty.
530 	     *
531 	     * This code assumes that all the other plexes
532 	     * are also capable of coming up (i.e. all the
533 	     * sds are up), but that's OK: we'll come back
534 	     * to this function for the remaining plexes
535 	     * in the volume.
536 	     */
537 	    if ((plex->state == plex_init)
538 		&& (plex->volno >= 0)
539 		&& (vol->flags & VF_CONFIG_SETUPSTATE)) {
540 		for (plexno = 0; plexno < vol->plexes; plexno++)
541 		    forceup(VOL[plex->volno].plex[plexno]);
542 	    } else if ((statemap == sd_initializedstate)    /* if it's initialized (not empty) */
543 ||(plex->organization == plex_concat)			    /* and we're not RAID-4 or RAID-5 */
544 	    ||(plex->organization == plex_striped))
545 		forceup(plexno);			    /* we'll do it */
546 	    /*
547 	     * This leaves a case where things don't get
548 	     * done: the plex is RAID-4 or RAID-5, and
549 	     * the subdisks are all empty.  They need to
550 	     * be initialized first.
551 	     */
552 	} else {
553 	    if (statemap == sd_upstate)			    /* all subdisks up */
554 		plex->state = plex_up;			    /* we can come up too */
555 	    else
556 		plex->state = plex_faulty;
557 	}
558     } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
559 	plex->state = plex_flaky;
560     else if (statemap & (sd_upstate | sd_rebornstate))	    /* some up or reborn */
561 	plex->state = plex_corrupt;			    /* corrupt */
562     else if (statemap & (sd_initstate | sd_emptystate))	    /* some subdisks empty or initializing */
563 	plex->state = plex_initializing;
564     else						    /* nothing at all up */
565 	plex->state = plex_faulty;
566 
567     if (plex->state != oldstate)			    /* state has changed, */
568 	log(LOG_INFO,					    /* tell them about it */
569 	    "vinum: %s is %s\n",
570 	    plex->name,
571 	    plex_state(plex->state));
572     if (plex->volno >= 0)				    /* we're part of a volume, */
573 	update_volume_state(plex->volno);		    /* update its state */
574 }
575 
576 /* Set volume state based on its components */
577 void
578 update_volume_state(int volno)
579 {
580     struct volume *vol;					    /* our volume */
581     int plexno;
582     enum volumestate oldstate;
583 
584     vol = &VOL[volno];					    /* point to our volume */
585     oldstate = vol->state;
586 
587     for (plexno = 0; plexno < vol->plexes; plexno++) {
588 	struct plex *plex = &PLEX[vol->plex[plexno]];	    /* point to the plex */
589 	if (plex->state >= plex_corrupt) {		    /* something accessible, */
590 	    vol->state = volume_up;
591 	    break;
592 	}
593     }
594     if (plexno == vol->plexes)				    /* didn't find an up plex */
595 	vol->state = volume_down;
596 
597     if (vol->state != oldstate) {			    /* state changed */
598 	log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state));
599 	save_config();					    /* save the updated configuration */
600     }
601 }
602 
603 /*
604  * Called from request routines when they find
605  * a subdisk which is not kosher.  Decide whether
606  * it warrants changing the state.  Return
607  * REQUEST_DOWN if we can't use the subdisk,
608  * REQUEST_OK if we can.
609  */
610 /*
611  * A prior version of this function checked the plex
612  * state as well.  At the moment, consider plex states
613  * information for the user only.  We'll ignore them
614  * and use the subdisk state only.  The last version of
615  * this file with the old logic was 2.7. XXX
616  */
617 enum requeststatus
618 checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
619 {
620     struct plex *plex = &PLEX[sd->plexno];
621     int writeop = (rq->bp->b_flags & B_READ) == 0;	    /* note if we're writing */
622 
623     switch (sd->state) {
624 	/* We shouldn't get called if the subdisk is up */
625     case sd_up:
626 	return REQUEST_OK;
627 
628     case sd_reviving:
629 	/*
630 	 * Access to a reviving subdisk depends on the
631 	 * organization of the plex:
632 	 *
633 	 * - If it's concatenated, access the subdisk
634 	 *   up to its current revive point.  If we
635 	 *   want to write to the subdisk overlapping
636 	 *   the current revive block, set the
637 	 *   conflict flag in the request, asking the
638 	 *   caller to put the request on the wait
639 	 *   list, which will be attended to by
640 	 *   revive_block when it's done.
641 	 * - if it's striped, we can't do it (we could
642 	 *   do some hairy calculations, but it's
643 	 *   unlikely to work).
644 	 * - if it's RAID-4 or RAID-5, we can do it as
645 	 *   long as only one subdisk is down
646 	 */
647 	if (plex->organization == plex_striped)		    /* plex is striped, */
648 	    return REQUEST_DOWN;
649 
650 	else if (isparity(plex)) {			    /* RAID-4 or RAID-5 plex */
651 	    if (plex->sddowncount > 1)			    /* with more than one sd down, */
652 		return REQUEST_DOWN;
653 	    else
654 		/*
655 		 * XXX We shouldn't do this if we can find a
656 		 * better way.  Check the other plexes
657 		 * first, and return a DOWN if another
658 		 * plex will do it better
659 		 */
660 		return REQUEST_OK;			    /* OK, we'll find a way */
661 	}
662 	if (diskaddr > (sd->revived
663 		+ sd->plexoffset
664 		+ (sd->revive_blocksize >> DEV_BSHIFT)))    /* we're beyond the end */
665 	    return REQUEST_DOWN;
666 	else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
667 	    if (writeop) {
668 		rq->flags |= XFR_REVIVECONFLICT;	    /* note a potential conflict */
669 		rq->sdno = sd->sdno;			    /* and which sd last caused it */
670 	    } else
671 		return REQUEST_DOWN;
672 	}
673 	return REQUEST_OK;
674 
675     case sd_reborn:
676 	if (writeop)
677 	    return REQUEST_OK;				    /* always write to a reborn disk */
678 	else						    /* don't allow a read */
679 	    /*
680 	       * Handle the mapping.  We don't want to reject
681 	       * a read request to a reborn subdisk if that's
682 	       * all we have. XXX
683 	     */
684 	    return REQUEST_DOWN;
685 
686     case sd_down:
687 	if (writeop)					    /* writing to a consistent down disk */
688 	    set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
689 	return REQUEST_DOWN;
690 
691     case sd_crashed:
692 	if (writeop)					    /* writing to a consistent down disk */
693 	    set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
694 	return REQUEST_DOWN;
695 
696     default:
697 	return REQUEST_DOWN;
698     }
699 }
700 
701 /* return a state map for the subdisks of a plex */
702 enum sdstates
703 sdstatemap(struct plex *plex)
704 {
705     int sdno;
706     enum sdstates statemap = 0;				    /* note the states we find */
707 
708     plex->sddowncount = 0;				    /* no subdisks down yet */
709     for (sdno = 0; sdno < plex->subdisks; sdno++) {
710 	struct sd *sd = &SD[plex->sdnos[sdno]];		    /* point to the subdisk */
711 
712 	switch (sd->state) {
713 	case sd_empty:
714 	    statemap |= sd_emptystate;
715 	    (plex->sddowncount)++;			    /* another unusable subdisk */
716 	    break;
717 
718 	case sd_init:
719 	    statemap |= sd_initstate;
720 	    (plex->sddowncount)++;			    /* another unusable subdisk */
721 	    break;
722 
723 	case sd_down:
724 	    statemap |= sd_downstate;
725 	    (plex->sddowncount)++;			    /* another unusable subdisk */
726 	    break;
727 
728 	case sd_crashed:
729 	    statemap |= sd_crashedstate;
730 	    (plex->sddowncount)++;			    /* another unusable subdisk */
731 	    break;
732 
733 	case sd_obsolete:
734 	    statemap |= sd_obsoletestate;
735 	    (plex->sddowncount)++;			    /* another unusable subdisk */
736 	    break;
737 
738 	case sd_stale:
739 	    statemap |= sd_stalestate;
740 	    (plex->sddowncount)++;			    /* another unusable subdisk */
741 	    break;
742 
743 	case sd_reborn:
744 	    statemap |= sd_rebornstate;
745 	    break;
746 
747 	case sd_up:
748 	    statemap |= sd_upstate;
749 	    break;
750 
751 	case sd_initializing:
752 	    statemap |= sd_initstate;
753 	    (plex->sddowncount)++;			    /* another unusable subdisk */
754 	    break;
755 
756 	case sd_initialized:
757 	    statemap |= sd_initializedstate;
758 	    (plex->sddowncount)++;			    /* another unusable subdisk */
759 	    break;
760 
761 	case sd_unallocated:
762 	case sd_uninit:
763 	case sd_reviving:
764 	case sd_referenced:
765 	    statemap |= sd_otherstate;
766 	    (plex->sddowncount)++;			    /* another unusable subdisk */
767 	}
768     }
769     return statemap;
770 }
771 
772 /* determine the state of the volume relative to this plex */
773 enum volplexstate
774 vpstate(struct plex *plex)
775 {
776     struct volume *vol;
777     enum volplexstate state = volplex_onlyusdown;	    /* state to return */
778     int plexno;
779 
780     if (plex->volno < 0) {				    /* not associated with a volume */
781 	if (plex->state > plex_degraded)
782 	    return volplex_onlyus;			    /* just us */
783 	else
784 	    return volplex_onlyusdown;			    /* assume the worst */
785     }
786     vol = &VOL[plex->volno];				    /* point to our volume */
787     for (plexno = 0; plexno < vol->plexes; plexno++) {
788 	if (&PLEX[vol->plex[plexno]] == plex) {		    /* us */
789 	    if (PLEX[vol->plex[plexno]].state >= plex_degraded)	/* are we up? */
790 		state |= volplex_onlyus;		    /* yes */
791 	} else {
792 	    if (PLEX[vol->plex[plexno]].state >= plex_degraded)	/* not us */
793 		state |= volplex_otherup;		    /* and when they were up, they were up */
794 	    else
795 		state |= volplex_alldown;		    /* and when they were down, they were down */
796 	}
797     }
798     return state;					    /* and when they were only halfway up */
799 }							    /* they were neither up nor down */
800 
801 /* Check if all bits b are set in a */
802 int allset(int a, int b);
803 
804 int
805 allset(int a, int b)
806 {
807     return (a & b) == b;
808 }
809 
810 /* Invalidate the subdisks belonging to a plex */
811 void
812 invalidate_subdisks(struct plex *plex, enum sdstate state)
813 {
814     int sdno;
815 
816     for (sdno = 0; sdno < plex->subdisks; sdno++) {	    /* for each subdisk */
817 	struct sd *sd = &SD[plex->sdnos[sdno]];
818 
819 	switch (sd->state) {
820 	case sd_unallocated:
821 	case sd_uninit:
822 	case sd_init:
823 	case sd_initializing:
824 	case sd_initialized:
825 	case sd_empty:
826 	case sd_obsolete:
827 	case sd_stale:
828 	case sd_crashed:
829 	case sd_down:
830 	case sd_referenced:
831 	    break;
832 
833 	case sd_reviving:
834 	case sd_reborn:
835 	case sd_up:
836 	    set_sd_state(plex->sdnos[sdno], state, setstate_force);
837 	}
838     }
839 }
840 
841 /*
842  * Start an object, in other words do what we can to get it up.
843  * This is called from vinumioctl (VINUMSTART).
844  * Return error indications via ioctl_reply
845  */
846 void
847 start_object(struct vinum_ioctl_msg *data)
848 {
849     int status;
850     int objindex = data->index;				    /* data gets overwritten */
851     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
852     enum setstateflags flags;
853 
854     if (data->force != 0)				    /* are we going to use force? */
855 	flags = setstate_force;				    /* yes */
856     else
857 	flags = setstate_none;				    /* no */
858 
859     switch (data->type) {
860     case drive_object:
861 	status = set_drive_state(objindex, drive_up, flags);
862 	if (DRIVE[objindex].state != drive_up)		    /* set status on whether we really did it */
863 	    ioctl_reply->error = EBUSY;
864 	else
865 	    ioctl_reply->error = 0;
866 	break;
867 
868     case sd_object:
869 	if (DRIVE[SD[objindex].driveno].state != drive_up) {
870 	    ioctl_reply->error = EIO;
871 	    strcpy(ioctl_reply->msg, "Drive is down");
872 	    return;
873 	}
874 	if (data->blocksize)
875 	    SD[objindex].revive_blocksize = data->blocksize;
876 	if ((SD[objindex].state == sd_reviving)		    /* reviving, */
877 	||(SD[objindex].state == sd_stale)) {		    /* or stale, will revive */
878 	    SD[objindex].state = sd_reviving;		    /* make sure we're reviving */
879 	    ioctl_reply->error = revive_block(objindex);    /* revive another block */
880 	    ioctl_reply->msg[0] = '\0';			    /* no comment */
881 	    return;
882 	} else if (SD[objindex].state == sd_initializing) { /* initializing, */
883 	    if (data->blocksize)
884 		SD[objindex].init_blocksize = data->blocksize;
885 	    ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */
886 	    ioctl_reply->msg[0] = '\0';			    /* no comment */
887 	    return;
888 	}
889 	status = set_sd_state(objindex, sd_up, flags);	    /* set state */
890 	if (status != EAGAIN) {				    /* not first revive or initialize, */
891 	    if (SD[objindex].state != sd_up)		    /* set status on whether we really did it */
892 		ioctl_reply->error = EBUSY;
893 	    else
894 		ioctl_reply->error = 0;
895 	} else
896 	    ioctl_reply->error = status;
897 	break;
898 
899     case plex_object:
900 	status = set_plex_state(objindex, plex_up, flags);
901 	if (PLEX[objindex].state != plex_up)		    /* set status on whether we really did it */
902 	    ioctl_reply->error = EBUSY;
903 	else
904 	    ioctl_reply->error = 0;
905 	break;
906 
907     case volume_object:
908 	status = set_volume_state(objindex, volume_up, flags);
909 	if (VOL[objindex].state != volume_up)		    /* set status on whether we really did it */
910 	    ioctl_reply->error = EBUSY;
911 	else
912 	    ioctl_reply->error = 0;
913 	break;
914 
915     default:
916 	ioctl_reply->error = EINVAL;
917 	strcpy(ioctl_reply->msg, "Invalid object type");
918 	return;
919     }
920     /*
921      * There's no point in saying anything here:
922      * the userland program does it better
923      */
924     ioctl_reply->msg[0] = '\0';
925 }
926 
927 /*
928  * Stop an object, in other words do what we can to get it down
929  * This is called from vinumioctl (VINUMSTOP).
930  * Return error indications via ioctl_reply.
931  */
932 void
933 stop_object(struct vinum_ioctl_msg *data)
934 {
935     int status = 1;
936     int objindex = data->index;				    /* save the number from change */
937     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
938 
939     switch (data->type) {
940     case drive_object:
941 	status = set_drive_state(objindex, drive_down, data->force);
942 	break;
943 
944     case sd_object:
945 	status = set_sd_state(objindex, sd_down, data->force);
946 	break;
947 
948     case plex_object:
949 	status = set_plex_state(objindex, plex_down, data->force);
950 	break;
951 
952     case volume_object:
953 	status = set_volume_state(objindex, volume_down, data->force);
954 	break;
955 
956     default:
957 	ioctl_reply->error = EINVAL;
958 	strcpy(ioctl_reply->msg, "Invalid object type");
959 	return;
960     }
961     ioctl_reply->msg[0] = '\0';
962     if (status == 0)					    /* couldn't do it */
963 	ioctl_reply->error = EBUSY;
964     else
965 	ioctl_reply->error = 0;
966 }
967 
968 /*
969  * VINUM_SETSTATE ioctl: set an object state.
970  * msg is the message passed by the user.
971  */
972 void
973 setstate(struct vinum_ioctl_msg *msg)
974 {
975     int sdno;
976     struct sd *sd;
977     struct plex *plex;
978     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
979 
980     switch (msg->state) {
981     case object_down:
982 	stop_object(msg);
983 	break;
984 
985     case object_initializing:
986 	switch (msg->type) {
987 	case sd_object:
988 	    sd = &SD[msg->index];
989 	    if ((msg->index >= vinum_conf.subdisks_allocated)
990 		|| (sd->state <= sd_referenced)) {
991 		sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
992 		ioctl_reply->error = EFAULT;
993 		return;
994 	    }
995 	    set_sd_state(msg->index, sd_initializing, msg->force);
996 	    if (sd->state != sd_initializing) {
997 		strcpy(ioctl_reply->msg, "Can't set state");
998 		ioctl_reply->error = EBUSY;
999 	    } else
1000 		ioctl_reply->error = 0;
1001 	    break;
1002 
1003 	case plex_object:
1004 	    plex = &PLEX[msg->index];
1005 	    if ((msg->index >= vinum_conf.plexes_allocated)
1006 		|| (plex->state <= plex_unallocated)) {
1007 		sprintf(ioctl_reply->msg, "Invalid plex %d", msg->index);
1008 		ioctl_reply->error = EFAULT;
1009 		return;
1010 	    }
1011 	    set_plex_state(msg->index, plex_initializing, msg->force);
1012 	    if (plex->state != plex_initializing) {
1013 		strcpy(ioctl_reply->msg, "Can't set state");
1014 		ioctl_reply->error = EBUSY;
1015 	    } else {
1016 		ioctl_reply->error = 0;
1017 		for (sdno = 0; sdno < plex->subdisks; sdno++) {
1018 		    sd = &SD[plex->sdnos[sdno]];
1019 		    set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
1020 		    if (sd->state != sd_initializing) {
1021 			strcpy(ioctl_reply->msg, "Can't set state");
1022 			ioctl_reply->error = EBUSY;
1023 			break;
1024 		    }
1025 		}
1026 	    }
1027 	    break;
1028 
1029 	default:
1030 	    strcpy(ioctl_reply->msg, "Invalid object");
1031 	    ioctl_reply->error = EINVAL;
1032 	}
1033 	break;
1034 
1035     case object_initialized:
1036 	if (msg->type == sd_object) {
1037 	    sd = &SD[msg->index];
1038 	    if ((msg->index >= vinum_conf.subdisks_allocated)
1039 		|| (sd->state <= sd_referenced)) {
1040 		sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
1041 		ioctl_reply->error = EFAULT;
1042 		return;
1043 	    }
1044 	    set_sd_state(msg->index, sd_initialized, msg->force);
1045 	    if (sd->state != sd_initializing) {
1046 		strcpy(ioctl_reply->msg, "Can't set state");
1047 		ioctl_reply->error = EBUSY;
1048 	    } else
1049 		ioctl_reply->error = 0;
1050 	} else {
1051 	    strcpy(ioctl_reply->msg, "Invalid object");
1052 	    ioctl_reply->error = EINVAL;
1053 	}
1054 	break;
1055 
1056     case object_up:
1057 	start_object(msg);
1058     }
1059 }
1060 
1061 /*
1062  * Brute force set state function.  Don't look at
1063  * any dependencies, just do it.  This is mainly
1064  * intended for testing and recovery.
1065  */
1066 void
1067 setstate_by_force(struct vinum_ioctl_msg *msg)
1068 {
1069     struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
1070 
1071     switch (msg->type) {
1072     case drive_object:
1073 	DRIVE[msg->index].state = msg->state;
1074 	break;
1075 
1076     case sd_object:
1077 	SD[msg->index].state = msg->state;
1078 	break;
1079 
1080     case plex_object:
1081 	PLEX[msg->index].state = msg->state;
1082 	break;
1083 
1084     case volume_object:
1085 	VOL[msg->index].state = msg->state;
1086 	break;
1087 
1088     default:
1089     }
1090     ioctl_reply->error = 0;
1091 }
1092 /* Local Variables: */
1093 /* fill-column: 50 */
1094 /* End: */
1095