1 /*- 2 * Copyright (c) 1997, 1998, 1999 3 * Nan Yang Computer Services Limited. All rights reserved. 4 * 5 * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. 6 * 7 * Written by Greg Lehey 8 * 9 * This software is distributed under the so-called ``Berkeley 10 * License'': 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Nan Yang Computer 23 * Services Limited. 24 * 4. Neither the name of the Company nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * This software is provided ``as is'', and any express or implied 29 * warranties, including, but not limited to, the implied warranties of 30 * merchantability and fitness for a particular purpose are disclaimed. 31 * In no event shall the company or contributors be liable for any 32 * direct, indirect, incidental, special, exemplary, or consequential 33 * damages (including, but not limited to, procurement of substitute 34 * goods or services; loss of use, data, or profits; or business 35 * interruption) however caused and on any theory of liability, whether 36 * in contract, strict liability, or tort (including negligence or 37 * otherwise) arising in any way out of the use of this software, even if 38 * advised of the possibility of such damage. 39 * 40 * $Id: vinumstate.c,v 2.18 2000/05/10 07:30:50 grog Exp grog $ 41 * $FreeBSD: src/sys/dev/vinum/vinumstate.c,v 1.28.2.2 2000/06/08 02:00:23 grog Exp $ 42 * $DragonFly: src/sys/dev/raid/vinum/vinumstate.c,v 1.6 2006/04/30 17:22:17 dillon Exp $ 43 */ 44 45 #include "vinumhdr.h" 46 #include "request.h" 47 48 /* Update drive state */ 49 /* Return 1 if the state changes, otherwise 0 */ 50 int 51 set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags) 52 { 53 struct drive *drive = &DRIVE[driveno]; 54 int oldstate = drive->state; 55 int sdno; 56 57 if (drive->state == drive_unallocated) /* no drive to do anything with, */ 58 return 0; 59 60 if (newstate == oldstate) /* don't change it if it's not different */ 61 return 1; /* all OK */ 62 if ((newstate == drive_down) /* the drive's going down */ 63 &&(!(flags & setstate_force)) 64 && (drive->opencount != 0)) /* we can't do it */ 65 return 0; /* don't do it */ 66 drive->state = newstate; /* set the state */ 67 if (drive->label.name[0] != '\0') /* we have a name, */ 68 log(LOG_INFO, 69 "vinum: drive %s is %s\n", 70 drive->label.name, 71 drive_state(drive->state)); 72 if (drive->state != oldstate) { /* state has changed */ 73 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */ 74 if ((SD[sdno].state >= sd_referenced) 75 && (SD[sdno].driveno == driveno)) /* belongs to this drive */ 76 update_sd_state(sdno); /* update the state */ 77 } 78 } 79 if (newstate == drive_up) { /* want to bring it up */ 80 if ((drive->flags & VF_OPEN) == 0) /* should be open, but we're not */ 81 init_drive(drive, 1); /* which changes the state again */ 82 } else /* taking it down or worse */ 83 queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */ 84 (union daemoninfo) drive); 85 if ((flags & setstate_configuring) == 0) /* configuring? */ 86 save_config(); /* no: save the updated configuration now */ 87 return 1; 88 } 89 90 /* 91 * Try to set the subdisk state. Return 1 if state changed to 92 * what we wanted, -1 if it changed to something else, and 0 93 * if no change. 94 * 95 * This routine is called both from the user (up, down states only) 96 * and internally. 97 * 98 * The setstate_force bit in the flags enables the state change even 99 * if it could be dangerous to data consistency. It shouldn't allow 100 * nonsense. 101 */ 102 int 103 set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) 104 { 105 struct sd *sd = &SD[sdno]; 106 struct plex *plex; 107 struct volume *vol; 108 int oldstate = sd->state; 109 int status = 1; /* status to return */ 110 111 if (newstate == oldstate) /* already there, */ 112 return 1; 113 else if (sd->state == sd_unallocated) /* no subdisk to do anything with, */ 114 return 0; /* can't do it */ 115 116 if (sd->driveoffset < 0) { /* not allocated space */ 117 sd->state = sd_down; 118 if (newstate != sd_down) { 119 if (sd->plexno >= 0) 120 sdstatemap(&PLEX[sd->plexno]); /* count up subdisks */ 121 return -1; 122 } 123 } else { /* space allocated */ 124 switch (newstate) { 125 case sd_down: /* take it down? */ 126 /* 127 * If we're attached to a plex, and we're 128 * not reborn, we won't go down without 129 * use of force. 130 */ 131 if ((!flags & setstate_force) 132 && (sd->plexno >= 0) 133 && (sd->state != sd_reborn)) 134 return 0; /* don't do it */ 135 break; 136 137 case sd_initialized: 138 if ((sd->state == sd_initializing) /* we were initializing */ 139 ||(flags & setstate_force)) /* or we forced it */ 140 break; 141 return 0; /* can't do it otherwise */ 142 143 case sd_up: 144 if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */ 145 return 0; /* not even by force */ 146 if (flags & setstate_force) /* forcing it, */ 147 break; /* just do it, and damn the consequences */ 148 switch (sd->state) { 149 /* 150 * Perform the necessary tests. To allow 151 * the state transition, just break out of 152 * the switch. 153 */ 154 case sd_crashed: 155 case sd_reborn: 156 case sd_down: /* been down, no data lost */ 157 /* 158 * If we're associated with a plex, and 159 * the plex isn't up, or we're the only 160 * subdisk in the plex, we can do it. 161 */ 162 if ((sd->plexno >= 0) 163 && (((PLEX[sd->plexno].state < plex_firstup) 164 || (PLEX[sd->plexno].subdisks > 1)))) 165 break; /* do it */ 166 if (oldstate != sd_reborn) { 167 sd->state = sd_reborn; /* here it is again */ 168 log(LOG_INFO, 169 "vinum: %s is %s, not %s\n", 170 sd->name, 171 sd_state(sd->state), 172 sd_state(newstate)); 173 } 174 status = -1; 175 break; 176 177 case sd_init: /* brand new */ 178 if (flags & setstate_configuring) /* we're doing this while configuring */ 179 break; 180 /* otherwise it's like being empty */ 181 /* FALLTHROUGH */ 182 183 case sd_empty: 184 case sd_initialized: 185 /* 186 * If we're not part of a plex, or the 187 * plex is not part of a volume with other 188 * plexes which are up, we can come up 189 * without being inconsistent. 190 * 191 * If we're part of a parity plex, we'll 192 * come up if the caller uses force. This 193 * is the way we bring them up after 194 * initialization. 195 */ 196 if ((sd->plexno < 0) 197 || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0) 198 || (isparity((&PLEX[sd->plexno])) 199 && (flags & setstate_force))) 200 break; 201 202 /* Otherwise it's just out of date */ 203 /* FALLTHROUGH */ 204 205 case sd_stale: /* out of date info, need reviving */ 206 case sd_obsolete: 207 /* 208 209 * 1. If the subdisk is not part of a 210 * plex, bring it up, don't revive. 211 * 212 * 2. If the subdisk is part of a 213 * one-plex volume or an unattached 214 * plex, and it's not RAID-4 or 215 * RAID-5, we *can't revive*. The 216 * subdisk doesn't change its state. 217 * 218 * 3. If the subdisk is part of a 219 * one-plex volume or an unattached 220 * plex, and it's RAID-4 or RAID-5, 221 * but more than one subdisk is down, 222 * we *still can't revive*. The 223 * subdisk doesn't change its state. 224 * 225 * 4. If the subdisk is part of a 226 * multi-plex volume, we'll change to 227 * reviving and let the revive 228 * routines find out whether it will 229 * work or not. If they don't, the 230 * revive stops with an error message, 231 * but the state doesn't change 232 * (FWIW). 233 */ 234 if (sd->plexno < 0) /* no plex associated, */ 235 break; /* bring it up */ 236 plex = &PLEX[sd->plexno]; 237 if (plex->volno >= 0) /* have a volume */ 238 vol = &VOL[plex->volno]; 239 else 240 vol = NULL; 241 /* 242 * We can't do it if: 243 * 244 * 1: we don't have a volume 245 * 2: we're the only plex in the volume 246 * 3: we're a RAID-4 or RAID-5 plex, and 247 * more than one subdisk is down. 248 */ 249 if (((vol == NULL) 250 || (vol->plexes == 1)) 251 && ((!isparity(plex)) 252 || (plex->sddowncount > 1))) { 253 if (sd->state == sd_initializing) /* it's finished initializing */ 254 sd->state = sd_initialized; 255 else 256 return 0; /* can't do it */ 257 } else { 258 sd->state = sd_reviving; /* put in reviving state */ 259 sd->revived = 0; /* nothing done yet */ 260 status = EAGAIN; /* need to repeat */ 261 } 262 break; 263 264 case sd_reviving: 265 if (flags & setstate_force) /* insist, */ 266 break; 267 return EAGAIN; /* no, try again */ 268 269 default: /* can't do it */ 270 /* 271 * There's no way to bring subdisks up directly from 272 * other states. First they need to be initialized 273 * or revived. 274 */ 275 return 0; 276 } 277 break; 278 279 default: /* other ones, only internal with force */ 280 if ((flags & setstate_force) == 0) /* no force? What's this? */ 281 return 0; /* don't do it */ 282 } 283 } 284 if (status == 1) { /* we can do it, */ 285 sd->state = newstate; 286 if (flags & setstate_force) 287 log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state)); 288 else 289 log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state)); 290 } else /* we don't get here with status 0 */ 291 log(LOG_INFO, 292 "vinum: %s is %s, not %s\n", 293 sd->name, 294 sd_state(sd->state), 295 sd_state(newstate)); 296 if (sd->plexno >= 0) /* we belong to a plex */ 297 update_plex_state(sd->plexno); /* update plex state */ 298 if ((flags & setstate_configuring) == 0) /* save config now */ 299 save_config(); 300 return status; 301 } 302 303 /* 304 * Set the state of a plex dependent on its subdisks. 305 * This time round, we'll let plex state just reflect 306 * aggregate subdisk state, so this becomes an order of 307 * magnitude less complicated. In particular, ignore 308 * the requested state. 309 */ 310 int 311 set_plex_state(int plexno, enum plexstate state, enum setstateflags flags) 312 { 313 struct plex *plex; /* point to our plex */ 314 enum plexstate oldstate; 315 enum volplexstate vps; /* how do we compare with the other plexes? */ 316 317 plex = &PLEX[plexno]; /* point to our plex */ 318 oldstate = plex->state; 319 320 /* If the plex isn't allocated, we can't do it. */ 321 if (plex->state == plex_unallocated) 322 return 0; 323 324 /* 325 * If it's already in the the state we want, 326 * and it's not up, just return. If it's up, 327 * we still need to do some housekeeping. 328 */ 329 if ((state == oldstate) 330 && (state != plex_up)) 331 return 1; 332 vps = vpstate(plex); /* how do we compare with the other plexes? */ 333 switch (state) { 334 /* 335 * We can't bring the plex up, even by force, 336 * unless it's ready. update_plex_state 337 * checks that. 338 */ 339 case plex_up: /* bring the plex up */ 340 update_plex_state(plex->plexno); /* it'll come up if it can */ 341 break; 342 343 case plex_down: /* want to take it down */ 344 /* 345 * If we're the only one, or the only one 346 * which is up, we need force to do it. 347 */ 348 if (((vps == volplex_onlyus) 349 || (vps == volplex_onlyusup)) 350 && (!(flags & setstate_force))) 351 return 0; /* can't do it */ 352 plex->state = state; /* do it */ 353 invalidate_subdisks(plex, sd_down); /* and down all up subdisks */ 354 break; 355 356 /* 357 * This is only requested internally. 358 * Trust ourselves 359 */ 360 case plex_faulty: 361 plex->state = state; /* do it */ 362 invalidate_subdisks(plex, sd_crashed); /* and crash all up subdisks */ 363 break; 364 365 case plex_initializing: 366 /* XXX consider what safeguards we need here */ 367 if ((flags & setstate_force) == 0) 368 return 0; 369 plex->state = state; /* do it */ 370 break; 371 372 /* What's this? */ 373 default: 374 return 0; 375 } 376 if (plex->state != oldstate) /* we've changed, */ 377 log(LOG_INFO, /* tell them about it */ 378 "vinum: %s is %s\n", 379 plex->name, 380 plex_state(plex->state)); 381 /* 382 * Now see what we have left, and whether 383 * we're taking the volume down 384 */ 385 if (plex->volno >= 0) /* we have a volume */ 386 update_volume_state(plex->volno); /* update its state */ 387 if ((flags & setstate_configuring) == 0) /* save config now */ 388 save_config(); /* yes: save the updated configuration */ 389 return 1; 390 } 391 392 /* Update the state of a plex dependent on its plexes. */ 393 int 394 set_volume_state(int volno, enum volumestate state, enum setstateflags flags) 395 { 396 struct volume *vol = &VOL[volno]; /* point to our volume */ 397 398 if (vol->state == volume_unallocated) /* no volume to do anything with, */ 399 return 0; 400 if (vol->state == state) /* we're there already */ 401 return 1; 402 403 if (state == volume_up) /* want to come up */ 404 update_volume_state(volno); 405 else if (state == volume_down) { /* want to go down */ 406 if (((vol->flags & VF_OPEN) == 0) /* not open */ 407 ||((flags & setstate_force) != 0)) { /* or we're forcing */ 408 vol->state = volume_down; 409 log(LOG_INFO, 410 "vinum: volume %s is %s\n", 411 vol->name, 412 volume_state(vol->state)); 413 if ((flags & setstate_configuring) == 0) /* save config now */ 414 save_config(); /* yes: save the updated configuration */ 415 return 1; 416 } 417 } 418 return 0; /* no change */ 419 } 420 421 /* Set the state of a subdisk based on its environment */ 422 void 423 update_sd_state(int sdno) 424 { 425 struct sd *sd; 426 struct drive *drive; 427 enum sdstate oldstate; 428 429 sd = &SD[sdno]; 430 oldstate = sd->state; 431 drive = &DRIVE[sd->driveno]; 432 433 if (drive->state == drive_up) { 434 switch (sd->state) { 435 case sd_down: 436 case sd_crashed: 437 sd->state = sd_reborn; /* back up again with no loss */ 438 break; 439 440 default: 441 break; 442 } 443 } else { /* down or worse */ 444 switch (sd->state) { 445 case sd_up: 446 case sd_reborn: 447 case sd_reviving: 448 case sd_empty: 449 sd->state = sd_crashed; /* lost our drive */ 450 break; 451 452 default: 453 break; 454 } 455 } 456 if (sd->state != oldstate) /* state has changed, */ 457 log(LOG_INFO, /* say so */ 458 "vinum: %s is %s\n", 459 sd->name, 460 sd_state(sd->state)); 461 if (sd->plexno >= 0) /* we're part of a plex, */ 462 update_plex_state(sd->plexno); /* update its state */ 463 } 464 465 /* 466 * Force a plex and all its subdisks 467 * into an 'up' state. This is a helper 468 * for update_plex_state. 469 */ 470 void 471 forceup(int plexno) 472 { 473 struct plex *plex; 474 int sdno; 475 476 plex = &PLEX[plexno]; /* point to the plex */ 477 plex->state = plex_up; /* and bring it up */ 478 479 /* change the subdisks to up state */ 480 for (sdno = 0; sdno < plex->subdisks; sdno++) { 481 SD[plex->sdnos[sdno]].state = sd_up; 482 log(LOG_INFO, /* tell them about it */ 483 "vinum: %s is up\n", 484 SD[plex->sdnos[sdno]].name); 485 } 486 } 487 488 /* Set the state of a plex based on its environment */ 489 void 490 update_plex_state(int plexno) 491 { 492 struct plex *plex; /* point to our plex */ 493 enum plexstate oldstate; 494 enum sdstates statemap; /* get a map of the subdisk states */ 495 enum volplexstate vps; /* how do we compare with the other plexes? */ 496 497 plex = &PLEX[plexno]; /* point to our plex */ 498 oldstate = plex->state; 499 statemap = sdstatemap(plex); /* get a map of the subdisk states */ 500 vps = vpstate(plex); /* how do we compare with the other plexes? */ 501 502 if (statemap & sd_initstate) /* something initializing? */ 503 plex->state = plex_initializing; /* yup, that makes the plex the same */ 504 else if (statemap == sd_upstate) 505 /* 506 * All the subdisks are up. This also means that 507 * they are consistent, so we can just bring 508 * the plex up 509 */ 510 plex->state = plex_up; 511 else if (isparity(plex) /* RAID-4 or RAID-5 plex */ 512 &&(plex->sddowncount == 1)) /* and exactly one subdisk down */ 513 plex->state = plex_degraded; /* limping a bit */ 514 else if (((statemap & ~sd_downstate) == sd_emptystate) /* all subdisks empty */ 515 ||((statemap & ~sd_downstate) 516 == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) { 517 if ((vps & volplex_otherup) == 0) { /* no other plex is up */ 518 struct volume *vol = &VOL[plex->volno]; /* possible volume to which it points */ 519 520 /* 521 * If we're a striped or concat plex 522 * associated with a volume, none of whose 523 * plexes are up, and we're new and untested, 524 * and the volume has the setupstate bit set, 525 * we can pretend to be in a consistent state. 526 * 527 * We need to do this in one swell foop: on 528 * the next call we will no longer be just 529 * empty. 530 * 531 * This code assumes that all the other plexes 532 * are also capable of coming up (i.e. all the 533 * sds are up), but that's OK: we'll come back 534 * to this function for the remaining plexes 535 * in the volume. 536 */ 537 if ((plex->state == plex_init) 538 && (plex->volno >= 0) 539 && (vol->flags & VF_CONFIG_SETUPSTATE)) { 540 for (plexno = 0; plexno < vol->plexes; plexno++) 541 forceup(VOL[plex->volno].plex[plexno]); 542 } else if ((statemap == sd_initializedstate) /* if it's initialized (not empty) */ 543 ||(plex->organization == plex_concat) /* and we're not RAID-4 or RAID-5 */ 544 ||(plex->organization == plex_striped)) 545 forceup(plexno); /* we'll do it */ 546 /* 547 * This leaves a case where things don't get 548 * done: the plex is RAID-4 or RAID-5, and 549 * the subdisks are all empty. They need to 550 * be initialized first. 551 */ 552 } else { 553 if (statemap == sd_upstate) /* all subdisks up */ 554 plex->state = plex_up; /* we can come up too */ 555 else 556 plex->state = plex_faulty; 557 } 558 } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */ 559 plex->state = plex_flaky; 560 else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */ 561 plex->state = plex_corrupt; /* corrupt */ 562 else if (statemap & (sd_initstate | sd_emptystate)) /* some subdisks empty or initializing */ 563 plex->state = plex_initializing; 564 else /* nothing at all up */ 565 plex->state = plex_faulty; 566 567 if (plex->state != oldstate) /* state has changed, */ 568 log(LOG_INFO, /* tell them about it */ 569 "vinum: %s is %s\n", 570 plex->name, 571 plex_state(plex->state)); 572 if (plex->volno >= 0) /* we're part of a volume, */ 573 update_volume_state(plex->volno); /* update its state */ 574 } 575 576 /* Set volume state based on its components */ 577 void 578 update_volume_state(int volno) 579 { 580 struct volume *vol; /* our volume */ 581 int plexno; 582 enum volumestate oldstate; 583 584 vol = &VOL[volno]; /* point to our volume */ 585 oldstate = vol->state; 586 587 for (plexno = 0; plexno < vol->plexes; plexno++) { 588 struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */ 589 if (plex->state >= plex_corrupt) { /* something accessible, */ 590 vol->state = volume_up; 591 break; 592 } 593 } 594 if (plexno == vol->plexes) /* didn't find an up plex */ 595 vol->state = volume_down; 596 597 if (vol->state != oldstate) { /* state changed */ 598 log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state)); 599 save_config(); /* save the updated configuration */ 600 } 601 } 602 603 /* 604 * Called from request routines when they find 605 * a subdisk which is not kosher. Decide whether 606 * it warrants changing the state. Return 607 * REQUEST_DOWN if we can't use the subdisk, 608 * REQUEST_OK if we can. 609 */ 610 /* 611 * A prior version of this function checked the plex 612 * state as well. At the moment, consider plex states 613 * information for the user only. We'll ignore them 614 * and use the subdisk state only. The last version of 615 * this file with the old logic was 2.7. XXX 616 */ 617 enum requeststatus 618 checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend) 619 { 620 struct plex *plex = &PLEX[sd->plexno]; 621 int writeop = (rq->bio->bio_buf->b_cmd != BUF_CMD_READ); /* note if we're writing */ 622 623 switch (sd->state) { 624 /* We shouldn't get called if the subdisk is up */ 625 case sd_up: 626 return REQUEST_OK; 627 628 case sd_reviving: 629 /* 630 * Access to a reviving subdisk depends on the 631 * organization of the plex: 632 * 633 * - If it's concatenated, access the subdisk 634 * up to its current revive point. If we 635 * want to write to the subdisk overlapping 636 * the current revive block, set the 637 * conflict flag in the request, asking the 638 * caller to put the request on the wait 639 * list, which will be attended to by 640 * revive_block when it's done. 641 * - if it's striped, we can't do it (we could 642 * do some hairy calculations, but it's 643 * unlikely to work). 644 * - if it's RAID-4 or RAID-5, we can do it as 645 * long as only one subdisk is down 646 */ 647 if (plex->organization == plex_striped) /* plex is striped, */ 648 return REQUEST_DOWN; 649 650 else if (isparity(plex)) { /* RAID-4 or RAID-5 plex */ 651 if (plex->sddowncount > 1) /* with more than one sd down, */ 652 return REQUEST_DOWN; 653 else 654 /* 655 * XXX We shouldn't do this if we can find a 656 * better way. Check the other plexes 657 * first, and return a DOWN if another 658 * plex will do it better 659 */ 660 return REQUEST_OK; /* OK, we'll find a way */ 661 } 662 if (diskaddr > (sd->revived 663 + sd->plexoffset 664 + (sd->revive_blocksize >> DEV_BSHIFT))) /* we're beyond the end */ 665 return REQUEST_DOWN; 666 else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */ 667 if (writeop) { 668 rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */ 669 rq->sdno = sd->sdno; /* and which sd last caused it */ 670 } else 671 return REQUEST_DOWN; 672 } 673 return REQUEST_OK; 674 675 case sd_reborn: 676 if (writeop) 677 return REQUEST_OK; /* always write to a reborn disk */ 678 else /* don't allow a read */ 679 /* 680 * Handle the mapping. We don't want to reject 681 * a read request to a reborn subdisk if that's 682 * all we have. XXX 683 */ 684 return REQUEST_DOWN; 685 686 case sd_down: 687 if (writeop) /* writing to a consistent down disk */ 688 set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */ 689 return REQUEST_DOWN; 690 691 case sd_crashed: 692 if (writeop) /* writing to a consistent down disk */ 693 set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */ 694 return REQUEST_DOWN; 695 696 default: 697 return REQUEST_DOWN; 698 } 699 } 700 701 /* return a state map for the subdisks of a plex */ 702 enum sdstates 703 sdstatemap(struct plex *plex) 704 { 705 int sdno; 706 enum sdstates statemap = 0; /* note the states we find */ 707 708 plex->sddowncount = 0; /* no subdisks down yet */ 709 for (sdno = 0; sdno < plex->subdisks; sdno++) { 710 struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */ 711 712 switch (sd->state) { 713 case sd_empty: 714 statemap |= sd_emptystate; 715 (plex->sddowncount)++; /* another unusable subdisk */ 716 break; 717 718 case sd_init: 719 statemap |= sd_initstate; 720 (plex->sddowncount)++; /* another unusable subdisk */ 721 break; 722 723 case sd_down: 724 statemap |= sd_downstate; 725 (plex->sddowncount)++; /* another unusable subdisk */ 726 break; 727 728 case sd_crashed: 729 statemap |= sd_crashedstate; 730 (plex->sddowncount)++; /* another unusable subdisk */ 731 break; 732 733 case sd_obsolete: 734 statemap |= sd_obsoletestate; 735 (plex->sddowncount)++; /* another unusable subdisk */ 736 break; 737 738 case sd_stale: 739 statemap |= sd_stalestate; 740 (plex->sddowncount)++; /* another unusable subdisk */ 741 break; 742 743 case sd_reborn: 744 statemap |= sd_rebornstate; 745 break; 746 747 case sd_up: 748 statemap |= sd_upstate; 749 break; 750 751 case sd_initializing: 752 statemap |= sd_initstate; 753 (plex->sddowncount)++; /* another unusable subdisk */ 754 break; 755 756 case sd_initialized: 757 statemap |= sd_initializedstate; 758 (plex->sddowncount)++; /* another unusable subdisk */ 759 break; 760 761 case sd_unallocated: 762 case sd_uninit: 763 case sd_reviving: 764 case sd_referenced: 765 statemap |= sd_otherstate; 766 (plex->sddowncount)++; /* another unusable subdisk */ 767 } 768 } 769 return statemap; 770 } 771 772 /* determine the state of the volume relative to this plex */ 773 enum volplexstate 774 vpstate(struct plex *plex) 775 { 776 struct volume *vol; 777 enum volplexstate state = volplex_onlyusdown; /* state to return */ 778 int plexno; 779 780 if (plex->volno < 0) { /* not associated with a volume */ 781 if (plex->state > plex_degraded) 782 return volplex_onlyus; /* just us */ 783 else 784 return volplex_onlyusdown; /* assume the worst */ 785 } 786 vol = &VOL[plex->volno]; /* point to our volume */ 787 for (plexno = 0; plexno < vol->plexes; plexno++) { 788 if (&PLEX[vol->plex[plexno]] == plex) { /* us */ 789 if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */ 790 state |= volplex_onlyus; /* yes */ 791 } else { 792 if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */ 793 state |= volplex_otherup; /* and when they were up, they were up */ 794 else 795 state |= volplex_alldown; /* and when they were down, they were down */ 796 } 797 } 798 return state; /* and when they were only halfway up */ 799 } /* they were neither up nor down */ 800 801 /* Check if all bits b are set in a */ 802 int allset(int a, int b); 803 804 int 805 allset(int a, int b) 806 { 807 return (a & b) == b; 808 } 809 810 /* Invalidate the subdisks belonging to a plex */ 811 void 812 invalidate_subdisks(struct plex *plex, enum sdstate state) 813 { 814 int sdno; 815 816 for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ 817 struct sd *sd = &SD[plex->sdnos[sdno]]; 818 819 switch (sd->state) { 820 case sd_unallocated: 821 case sd_uninit: 822 case sd_init: 823 case sd_initializing: 824 case sd_initialized: 825 case sd_empty: 826 case sd_obsolete: 827 case sd_stale: 828 case sd_crashed: 829 case sd_down: 830 case sd_referenced: 831 break; 832 833 case sd_reviving: 834 case sd_reborn: 835 case sd_up: 836 set_sd_state(plex->sdnos[sdno], state, setstate_force); 837 } 838 } 839 } 840 841 /* 842 * Start an object, in other words do what we can to get it up. 843 * This is called from vinumioctl (VINUMSTART). 844 * Return error indications via ioctl_reply 845 */ 846 void 847 start_object(struct vinum_ioctl_msg *data) 848 { 849 int status; 850 int objindex = data->index; /* data gets overwritten */ 851 struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */ 852 enum setstateflags flags; 853 854 if (data->force != 0) /* are we going to use force? */ 855 flags = setstate_force; /* yes */ 856 else 857 flags = setstate_none; /* no */ 858 859 switch (data->type) { 860 case drive_object: 861 status = set_drive_state(objindex, drive_up, flags); 862 if (DRIVE[objindex].state != drive_up) /* set status on whether we really did it */ 863 ioctl_reply->error = EBUSY; 864 else 865 ioctl_reply->error = 0; 866 break; 867 868 case sd_object: 869 if (DRIVE[SD[objindex].driveno].state != drive_up) { 870 ioctl_reply->error = EIO; 871 strcpy(ioctl_reply->msg, "Drive is down"); 872 return; 873 } 874 if (data->blocksize) 875 SD[objindex].revive_blocksize = data->blocksize; 876 if ((SD[objindex].state == sd_reviving) /* reviving, */ 877 ||(SD[objindex].state == sd_stale)) { /* or stale, will revive */ 878 SD[objindex].state = sd_reviving; /* make sure we're reviving */ 879 ioctl_reply->error = revive_block(objindex); /* revive another block */ 880 ioctl_reply->msg[0] = '\0'; /* no comment */ 881 return; 882 } else if (SD[objindex].state == sd_initializing) { /* initializing, */ 883 if (data->blocksize) 884 SD[objindex].init_blocksize = data->blocksize; 885 ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */ 886 ioctl_reply->msg[0] = '\0'; /* no comment */ 887 return; 888 } 889 status = set_sd_state(objindex, sd_up, flags); /* set state */ 890 if (status != EAGAIN) { /* not first revive or initialize, */ 891 if (SD[objindex].state != sd_up) /* set status on whether we really did it */ 892 ioctl_reply->error = EBUSY; 893 else 894 ioctl_reply->error = 0; 895 } else 896 ioctl_reply->error = status; 897 break; 898 899 case plex_object: 900 status = set_plex_state(objindex, plex_up, flags); 901 if (PLEX[objindex].state != plex_up) /* set status on whether we really did it */ 902 ioctl_reply->error = EBUSY; 903 else 904 ioctl_reply->error = 0; 905 break; 906 907 case volume_object: 908 status = set_volume_state(objindex, volume_up, flags); 909 if (VOL[objindex].state != volume_up) /* set status on whether we really did it */ 910 ioctl_reply->error = EBUSY; 911 else 912 ioctl_reply->error = 0; 913 break; 914 915 default: 916 ioctl_reply->error = EINVAL; 917 strcpy(ioctl_reply->msg, "Invalid object type"); 918 return; 919 } 920 /* 921 * There's no point in saying anything here: 922 * the userland program does it better 923 */ 924 ioctl_reply->msg[0] = '\0'; 925 } 926 927 /* 928 * Stop an object, in other words do what we can to get it down 929 * This is called from vinumioctl (VINUMSTOP). 930 * Return error indications via ioctl_reply. 931 */ 932 void 933 stop_object(struct vinum_ioctl_msg *data) 934 { 935 int status = 1; 936 int objindex = data->index; /* save the number from change */ 937 struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */ 938 939 switch (data->type) { 940 case drive_object: 941 status = set_drive_state(objindex, drive_down, data->force); 942 break; 943 944 case sd_object: 945 status = set_sd_state(objindex, sd_down, data->force); 946 break; 947 948 case plex_object: 949 status = set_plex_state(objindex, plex_down, data->force); 950 break; 951 952 case volume_object: 953 status = set_volume_state(objindex, volume_down, data->force); 954 break; 955 956 default: 957 ioctl_reply->error = EINVAL; 958 strcpy(ioctl_reply->msg, "Invalid object type"); 959 return; 960 } 961 ioctl_reply->msg[0] = '\0'; 962 if (status == 0) /* couldn't do it */ 963 ioctl_reply->error = EBUSY; 964 else 965 ioctl_reply->error = 0; 966 } 967 968 /* 969 * VINUM_SETSTATE ioctl: set an object state. 970 * msg is the message passed by the user. 971 */ 972 void 973 setstate(struct vinum_ioctl_msg *msg) 974 { 975 int sdno; 976 struct sd *sd; 977 struct plex *plex; 978 struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */ 979 980 switch (msg->state) { 981 case object_down: 982 stop_object(msg); 983 break; 984 985 case object_initializing: 986 switch (msg->type) { 987 case sd_object: 988 sd = &SD[msg->index]; 989 if ((msg->index >= vinum_conf.subdisks_allocated) 990 || (sd->state <= sd_referenced)) { 991 sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index); 992 ioctl_reply->error = EFAULT; 993 return; 994 } 995 set_sd_state(msg->index, sd_initializing, msg->force); 996 if (sd->state != sd_initializing) { 997 strcpy(ioctl_reply->msg, "Can't set state"); 998 ioctl_reply->error = EBUSY; 999 } else 1000 ioctl_reply->error = 0; 1001 break; 1002 1003 case plex_object: 1004 plex = &PLEX[msg->index]; 1005 if ((msg->index >= vinum_conf.plexes_allocated) 1006 || (plex->state <= plex_unallocated)) { 1007 sprintf(ioctl_reply->msg, "Invalid plex %d", msg->index); 1008 ioctl_reply->error = EFAULT; 1009 return; 1010 } 1011 set_plex_state(msg->index, plex_initializing, msg->force); 1012 if (plex->state != plex_initializing) { 1013 strcpy(ioctl_reply->msg, "Can't set state"); 1014 ioctl_reply->error = EBUSY; 1015 } else { 1016 ioctl_reply->error = 0; 1017 for (sdno = 0; sdno < plex->subdisks; sdno++) { 1018 sd = &SD[plex->sdnos[sdno]]; 1019 set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force); 1020 if (sd->state != sd_initializing) { 1021 strcpy(ioctl_reply->msg, "Can't set state"); 1022 ioctl_reply->error = EBUSY; 1023 break; 1024 } 1025 } 1026 } 1027 break; 1028 1029 default: 1030 strcpy(ioctl_reply->msg, "Invalid object"); 1031 ioctl_reply->error = EINVAL; 1032 } 1033 break; 1034 1035 case object_initialized: 1036 if (msg->type == sd_object) { 1037 sd = &SD[msg->index]; 1038 if ((msg->index >= vinum_conf.subdisks_allocated) 1039 || (sd->state <= sd_referenced)) { 1040 sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index); 1041 ioctl_reply->error = EFAULT; 1042 return; 1043 } 1044 set_sd_state(msg->index, sd_initialized, msg->force); 1045 if (sd->state != sd_initializing) { 1046 strcpy(ioctl_reply->msg, "Can't set state"); 1047 ioctl_reply->error = EBUSY; 1048 } else 1049 ioctl_reply->error = 0; 1050 } else { 1051 strcpy(ioctl_reply->msg, "Invalid object"); 1052 ioctl_reply->error = EINVAL; 1053 } 1054 break; 1055 1056 case object_up: 1057 start_object(msg); 1058 } 1059 } 1060 1061 /* 1062 * Brute force set state function. Don't look at 1063 * any dependencies, just do it. This is mainly 1064 * intended for testing and recovery. 1065 */ 1066 void 1067 setstate_by_force(struct vinum_ioctl_msg *msg) 1068 { 1069 struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */ 1070 1071 switch (msg->type) { 1072 case drive_object: 1073 DRIVE[msg->index].state = msg->state; 1074 break; 1075 1076 case sd_object: 1077 SD[msg->index].state = msg->state; 1078 break; 1079 1080 case plex_object: 1081 PLEX[msg->index].state = msg->state; 1082 break; 1083 1084 case volume_object: 1085 VOL[msg->index].state = msg->state; 1086 break; 1087 1088 default: 1089 break; 1090 } 1091 ioctl_reply->error = 0; 1092 } 1093 /* Local Variables: */ 1094 /* fill-column: 50 */ 1095 /* End: */ 1096