1 /*- 2 * Copyright (c) 1997, 1998, 1999 3 * Nan Yang Computer Services Limited. All rights reserved. 4 * 5 * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. 6 * 7 * Written by Greg Lehey 8 * 9 * This software is distributed under the so-called ``Berkeley 10 * License'': 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Nan Yang Computer 23 * Services Limited. 24 * 4. Neither the name of the Company nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * This software is provided ``as is'', and any express or implied 29 * warranties, including, but not limited to, the implied warranties of 30 * merchantability and fitness for a particular purpose are disclaimed. 31 * In no event shall the company or contributors be liable for any 32 * direct, indirect, incidental, special, exemplary, or consequential 33 * damages (including, but not limited to, procurement of substitute 34 * goods or services; loss of use, data, or profits; or business 35 * interruption) however caused and on any theory of liability, whether 36 * in contract, strict liability, or tort (including negligence or 37 * otherwise) arising in any way out of the use of this software, even if 38 * advised of the possibility of such damage. 39 * 40 * $Id: vinumstate.c,v 2.18 2000/05/10 07:30:50 grog Exp grog $ 41 * $FreeBSD: src/sys/dev/vinum/vinumstate.c,v 1.28.2.2 2000/06/08 02:00:23 grog Exp $ 42 */ 43 44 #include "vinumhdr.h" 45 #include "request.h" 46 47 /* Update drive state */ 48 /* Return 1 if the state changes, otherwise 0 */ 49 int 50 set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags) 51 { 52 union daemoninfo di; 53 struct drive *drive = &DRIVE[driveno]; 54 int oldstate = drive->state; 55 int sdno; 56 57 if (drive->state == drive_unallocated) /* no drive to do anything with, */ 58 return 0; 59 60 if (newstate == oldstate) /* don't change it if it's not different */ 61 return 1; /* all OK */ 62 if ((newstate == drive_down) /* the drive's going down */ 63 &&(!(flags & setstate_force)) 64 && (drive->opencount != 0)) /* we can't do it */ 65 return 0; /* don't do it */ 66 drive->state = newstate; /* set the state */ 67 if (drive->label.name[0] != '\0') /* we have a name, */ 68 log(LOG_INFO, 69 "vinum: drive %s is %s\n", 70 drive->label.name, 71 drive_state(drive->state)); 72 if (drive->state != oldstate) { /* state has changed */ 73 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */ 74 if ((SD[sdno].state >= sd_referenced) 75 && (SD[sdno].driveno == driveno)) /* belongs to this drive */ 76 update_sd_state(sdno); /* update the state */ 77 } 78 } 79 if (newstate == drive_up) { /* want to bring it up */ 80 if ((drive->flags & VF_OPEN) == 0) /* should be open, but we're not */ 81 init_drive(drive, 1); /* which changes the state again */ 82 } else { /* taking it down or worse */ 83 di.drive = drive; 84 queue_daemon_request(daemonrq_closedrive, di); /* get the daemon to close it */ 85 } 86 if ((flags & setstate_configuring) == 0) /* configuring? */ 87 save_config(); /* no: save the updated configuration now */ 88 return 1; 89 } 90 91 /* 92 * Try to set the subdisk state. Return 1 if state changed to 93 * what we wanted, -1 if it changed to something else, and 0 94 * if no change. 95 * 96 * This routine is called both from the user (up, down states only) 97 * and internally. 98 * 99 * The setstate_force bit in the flags enables the state change even 100 * if it could be dangerous to data consistency. It shouldn't allow 101 * nonsense. 102 */ 103 int 104 set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) 105 { 106 struct sd *sd = &SD[sdno]; 107 struct plex *plex; 108 struct volume *vol; 109 int oldstate = sd->state; 110 int status = 1; /* status to return */ 111 112 if (newstate == oldstate) /* already there, */ 113 return 1; 114 else if (sd->state == sd_unallocated) /* no subdisk to do anything with, */ 115 return 0; /* can't do it */ 116 117 if (sd->driveoffset < 0) { /* not allocated space */ 118 sd->state = sd_down; 119 if (newstate != sd_down) { 120 if (sd->plexno >= 0) 121 sdstatemap(&PLEX[sd->plexno]); /* count up subdisks */ 122 return -1; 123 } 124 } else { /* space allocated */ 125 switch (newstate) { 126 case sd_down: /* take it down? */ 127 /* 128 * If we're attached to a plex, and we're 129 * not reborn, we won't go down without 130 * use of force. 131 */ 132 if (!(flags & setstate_force) 133 && (sd->plexno >= 0) 134 && (sd->state != sd_reborn)) 135 return 0; /* don't do it */ 136 break; 137 138 case sd_initialized: 139 if ((sd->state == sd_initializing) /* we were initializing */ 140 ||(flags & setstate_force)) /* or we forced it */ 141 break; 142 return 0; /* can't do it otherwise */ 143 144 case sd_up: 145 if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */ 146 return 0; /* not even by force */ 147 if (flags & setstate_force) /* forcing it, */ 148 break; /* just do it, and damn the consequences */ 149 switch (sd->state) { 150 /* 151 * Perform the necessary tests. To allow 152 * the state transition, just break out of 153 * the switch. 154 */ 155 case sd_crashed: 156 case sd_reborn: 157 case sd_down: /* been down, no data lost */ 158 /* 159 * If we're associated with a plex, and 160 * the plex isn't up, or we're the only 161 * subdisk in the plex, we can do it. 162 */ 163 if ((sd->plexno >= 0) 164 && (((PLEX[sd->plexno].state < plex_firstup) 165 || (PLEX[sd->plexno].subdisks > 1)))) 166 break; /* do it */ 167 if (oldstate != sd_reborn) { 168 sd->state = sd_reborn; /* here it is again */ 169 log(LOG_INFO, 170 "vinum: %s is %s, not %s\n", 171 sd->name, 172 sd_state(sd->state), 173 sd_state(newstate)); 174 } 175 status = -1; 176 break; 177 178 case sd_init: /* brand new */ 179 if (flags & setstate_configuring) /* we're doing this while configuring */ 180 break; 181 /* otherwise it's like being empty */ 182 /* FALLTHROUGH */ 183 184 case sd_empty: 185 case sd_initialized: 186 /* 187 * If we're not part of a plex, or the 188 * plex is not part of a volume with other 189 * plexes which are up, we can come up 190 * without being inconsistent. 191 * 192 * If we're part of a parity plex, we'll 193 * come up if the caller uses force. This 194 * is the way we bring them up after 195 * initialization. 196 */ 197 if ((sd->plexno < 0) 198 || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0) 199 || (isparity((&PLEX[sd->plexno])) 200 && (flags & setstate_force))) 201 break; 202 203 /* Otherwise it's just out of date */ 204 /* FALLTHROUGH */ 205 206 case sd_stale: /* out of date info, need reviving */ 207 case sd_obsolete: 208 /* 209 210 * 1. If the subdisk is not part of a 211 * plex, bring it up, don't revive. 212 * 213 * 2. If the subdisk is part of a 214 * one-plex volume or an unattached 215 * plex, and it's not RAID-4 or 216 * RAID-5, we *can't revive*. The 217 * subdisk doesn't change its state. 218 * 219 * 3. If the subdisk is part of a 220 * one-plex volume or an unattached 221 * plex, and it's RAID-4 or RAID-5, 222 * but more than one subdisk is down, 223 * we *still can't revive*. The 224 * subdisk doesn't change its state. 225 * 226 * 4. If the subdisk is part of a 227 * multi-plex volume, we'll change to 228 * reviving and let the revive 229 * routines find out whether it will 230 * work or not. If they don't, the 231 * revive stops with an error message, 232 * but the state doesn't change 233 * (FWIW). 234 */ 235 if (sd->plexno < 0) /* no plex associated, */ 236 break; /* bring it up */ 237 plex = &PLEX[sd->plexno]; 238 if (plex->volno >= 0) /* have a volume */ 239 vol = &VOL[plex->volno]; 240 else 241 vol = NULL; 242 /* 243 * We can't do it if: 244 * 245 * 1: we don't have a volume 246 * 2: we're the only plex in the volume 247 * 3: we're a RAID-4 or RAID-5 plex, and 248 * more than one subdisk is down. 249 */ 250 if (((vol == NULL) 251 || (vol->plexes == 1)) 252 && ((!isparity(plex)) 253 || (plex->sddowncount > 1))) { 254 if (sd->state == sd_initializing) /* it's finished initializing */ 255 sd->state = sd_initialized; 256 else 257 return 0; /* can't do it */ 258 } else { 259 sd->state = sd_reviving; /* put in reviving state */ 260 sd->revived = 0; /* nothing done yet */ 261 status = EAGAIN; /* need to repeat */ 262 } 263 break; 264 265 case sd_reviving: 266 if (flags & setstate_force) /* insist, */ 267 break; 268 return EAGAIN; /* no, try again */ 269 270 default: /* can't do it */ 271 /* 272 * There's no way to bring subdisks up directly from 273 * other states. First they need to be initialized 274 * or revived. 275 */ 276 return 0; 277 } 278 break; 279 280 default: /* other ones, only internal with force */ 281 if ((flags & setstate_force) == 0) /* no force? What's this? */ 282 return 0; /* don't do it */ 283 } 284 } 285 if (status == 1) { /* we can do it, */ 286 sd->state = newstate; 287 if (flags & setstate_force) 288 log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state)); 289 else 290 log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state)); 291 } else /* we don't get here with status 0 */ 292 log(LOG_INFO, 293 "vinum: %s is %s, not %s\n", 294 sd->name, 295 sd_state(sd->state), 296 sd_state(newstate)); 297 if (sd->plexno >= 0) /* we belong to a plex */ 298 update_plex_state(sd->plexno); /* update plex state */ 299 if ((flags & setstate_configuring) == 0) /* save config now */ 300 save_config(); 301 return status; 302 } 303 304 /* 305 * Set the state of a plex dependent on its subdisks. 306 * This time round, we'll let plex state just reflect 307 * aggregate subdisk state, so this becomes an order of 308 * magnitude less complicated. In particular, ignore 309 * the requested state. 310 */ 311 int 312 set_plex_state(int plexno, enum plexstate state, enum setstateflags flags) 313 { 314 struct plex *plex; /* point to our plex */ 315 enum plexstate oldstate; 316 enum volplexstate vps; /* how do we compare with the other plexes? */ 317 318 plex = &PLEX[plexno]; /* point to our plex */ 319 oldstate = plex->state; 320 321 /* If the plex isn't allocated, we can't do it. */ 322 if (plex->state == plex_unallocated) 323 return 0; 324 325 /* 326 * If it's already in the the state we want, 327 * and it's not up, just return. If it's up, 328 * we still need to do some housekeeping. 329 */ 330 if ((state == oldstate) 331 && (state != plex_up)) 332 return 1; 333 vps = vpstate(plex); /* how do we compare with the other plexes? */ 334 switch (state) { 335 /* 336 * We can't bring the plex up, even by force, 337 * unless it's ready. update_plex_state 338 * checks that. 339 */ 340 case plex_up: /* bring the plex up */ 341 update_plex_state(plex->plexno); /* it'll come up if it can */ 342 break; 343 344 case plex_down: /* want to take it down */ 345 /* 346 * If we're the only one, or the only one 347 * which is up, we need force to do it. 348 */ 349 if (((vps == volplex_onlyus) 350 || (vps == volplex_onlyusup)) 351 && (!(flags & setstate_force))) 352 return 0; /* can't do it */ 353 plex->state = state; /* do it */ 354 invalidate_subdisks(plex, sd_down); /* and down all up subdisks */ 355 break; 356 357 /* 358 * This is only requested internally. 359 * Trust ourselves 360 */ 361 case plex_faulty: 362 plex->state = state; /* do it */ 363 invalidate_subdisks(plex, sd_crashed); /* and crash all up subdisks */ 364 break; 365 366 case plex_initializing: 367 /* XXX consider what safeguards we need here */ 368 if ((flags & setstate_force) == 0) 369 return 0; 370 plex->state = state; /* do it */ 371 break; 372 373 /* What's this? */ 374 default: 375 return 0; 376 } 377 if (plex->state != oldstate) /* we've changed, */ 378 log(LOG_INFO, /* tell them about it */ 379 "vinum: %s is %s\n", 380 plex->name, 381 plex_state(plex->state)); 382 /* 383 * Now see what we have left, and whether 384 * we're taking the volume down 385 */ 386 if (plex->volno >= 0) /* we have a volume */ 387 update_volume_state(plex->volno); /* update its state */ 388 if ((flags & setstate_configuring) == 0) /* save config now */ 389 save_config(); /* yes: save the updated configuration */ 390 return 1; 391 } 392 393 /* Update the state of a plex dependent on its plexes. */ 394 int 395 set_volume_state(int volno, enum volumestate state, enum setstateflags flags) 396 { 397 struct volume *vol = &VOL[volno]; /* point to our volume */ 398 399 if (vol->state == volume_unallocated) /* no volume to do anything with, */ 400 return 0; 401 if (vol->state == state) /* we're there already */ 402 return 1; 403 404 if (state == volume_up) /* want to come up */ 405 update_volume_state(volno); 406 else if (state == volume_down) { /* want to go down */ 407 if (((vol->flags & VF_OPEN) == 0) /* not open */ 408 ||((flags & setstate_force) != 0)) { /* or we're forcing */ 409 vol->state = volume_down; 410 log(LOG_INFO, 411 "vinum: volume %s is %s\n", 412 vol->name, 413 volume_state(vol->state)); 414 if ((flags & setstate_configuring) == 0) /* save config now */ 415 save_config(); /* yes: save the updated configuration */ 416 return 1; 417 } 418 } 419 return 0; /* no change */ 420 } 421 422 /* Set the state of a subdisk based on its environment */ 423 void 424 update_sd_state(int sdno) 425 { 426 struct sd *sd; 427 struct drive *drive; 428 enum sdstate oldstate; 429 430 sd = &SD[sdno]; 431 oldstate = sd->state; 432 drive = &DRIVE[sd->driveno]; 433 434 if (drive->state == drive_up) { 435 switch (sd->state) { 436 case sd_down: 437 case sd_crashed: 438 sd->state = sd_reborn; /* back up again with no loss */ 439 break; 440 441 default: 442 break; 443 } 444 } else { /* down or worse */ 445 switch (sd->state) { 446 case sd_up: 447 case sd_reborn: 448 case sd_reviving: 449 case sd_empty: 450 sd->state = sd_crashed; /* lost our drive */ 451 break; 452 453 default: 454 break; 455 } 456 } 457 if (sd->state != oldstate) /* state has changed, */ 458 log(LOG_INFO, /* say so */ 459 "vinum: %s is %s\n", 460 sd->name, 461 sd_state(sd->state)); 462 if (sd->plexno >= 0) /* we're part of a plex, */ 463 update_plex_state(sd->plexno); /* update its state */ 464 } 465 466 /* 467 * Force a plex and all its subdisks 468 * into an 'up' state. This is a helper 469 * for update_plex_state. 470 */ 471 void 472 forceup(int plexno) 473 { 474 struct plex *plex; 475 int sdno; 476 477 plex = &PLEX[plexno]; /* point to the plex */ 478 plex->state = plex_up; /* and bring it up */ 479 480 /* change the subdisks to up state */ 481 for (sdno = 0; sdno < plex->subdisks; sdno++) { 482 SD[plex->sdnos[sdno]].state = sd_up; 483 log(LOG_INFO, /* tell them about it */ 484 "vinum: %s is up\n", 485 SD[plex->sdnos[sdno]].name); 486 } 487 } 488 489 /* Set the state of a plex based on its environment */ 490 void 491 update_plex_state(int plexno) 492 { 493 struct plex *plex; /* point to our plex */ 494 enum plexstate oldstate; 495 enum sdstates statemap; /* get a map of the subdisk states */ 496 enum volplexstate vps; /* how do we compare with the other plexes? */ 497 498 plex = &PLEX[plexno]; /* point to our plex */ 499 oldstate = plex->state; 500 statemap = sdstatemap(plex); /* get a map of the subdisk states */ 501 vps = vpstate(plex); /* how do we compare with the other plexes? */ 502 503 if (statemap & sd_initstate) /* something initializing? */ 504 plex->state = plex_initializing; /* yup, that makes the plex the same */ 505 else if (statemap == sd_upstate) 506 /* 507 * All the subdisks are up. This also means that 508 * they are consistent, so we can just bring 509 * the plex up 510 */ 511 plex->state = plex_up; 512 else if (isparity(plex) /* RAID-4 or RAID-5 plex */ 513 &&(plex->sddowncount == 1)) /* and exactly one subdisk down */ 514 plex->state = plex_degraded; /* limping a bit */ 515 else if (((statemap & ~sd_downstate) == sd_emptystate) /* all subdisks empty */ 516 ||((statemap & ~sd_downstate) 517 == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) { 518 if ((vps & volplex_otherup) == 0) { /* no other plex is up */ 519 struct volume *vol = &VOL[plex->volno]; /* possible volume to which it points */ 520 521 /* 522 * If we're a striped or concat plex 523 * associated with a volume, none of whose 524 * plexes are up, and we're new and untested, 525 * and the volume has the setupstate bit set, 526 * we can pretend to be in a consistent state. 527 * 528 * We need to do this in one swell foop: on 529 * the next call we will no longer be just 530 * empty. 531 * 532 * This code assumes that all the other plexes 533 * are also capable of coming up (i.e. all the 534 * sds are up), but that's OK: we'll come back 535 * to this function for the remaining plexes 536 * in the volume. 537 */ 538 if ((plex->state == plex_init) 539 && (plex->volno >= 0) 540 && (vol->flags & VF_CONFIG_SETUPSTATE)) { 541 for (plexno = 0; plexno < vol->plexes; plexno++) 542 forceup(VOL[plex->volno].plex[plexno]); 543 } else if ((statemap == sd_initializedstate) /* if it's initialized (not empty) */ 544 ||(plex->organization == plex_concat) /* and we're not RAID-4 or RAID-5 */ 545 ||(plex->organization == plex_striped)) 546 forceup(plexno); /* we'll do it */ 547 /* 548 * This leaves a case where things don't get 549 * done: the plex is RAID-4 or RAID-5, and 550 * the subdisks are all empty. They need to 551 * be initialized first. 552 */ 553 } else { 554 if (statemap == sd_upstate) /* all subdisks up */ 555 plex->state = plex_up; /* we can come up too */ 556 else 557 plex->state = plex_faulty; 558 } 559 } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */ 560 plex->state = plex_flaky; 561 else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */ 562 plex->state = plex_corrupt; /* corrupt */ 563 else if (statemap & (sd_initstate | sd_emptystate)) /* some subdisks empty or initializing */ 564 plex->state = plex_initializing; 565 else /* nothing at all up */ 566 plex->state = plex_faulty; 567 568 if (plex->state != oldstate) /* state has changed, */ 569 log(LOG_INFO, /* tell them about it */ 570 "vinum: %s is %s\n", 571 plex->name, 572 plex_state(plex->state)); 573 if (plex->volno >= 0) /* we're part of a volume, */ 574 update_volume_state(plex->volno); /* update its state */ 575 } 576 577 /* Set volume state based on its components */ 578 void 579 update_volume_state(int volno) 580 { 581 struct volume *vol; /* our volume */ 582 int plexno; 583 enum volumestate oldstate; 584 585 vol = &VOL[volno]; /* point to our volume */ 586 oldstate = vol->state; 587 588 for (plexno = 0; plexno < vol->plexes; plexno++) { 589 struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */ 590 if (plex->state >= plex_corrupt) { /* something accessible, */ 591 vol->state = volume_up; 592 break; 593 } 594 } 595 if (plexno == vol->plexes) /* didn't find an up plex */ 596 vol->state = volume_down; 597 598 if (vol->state != oldstate) { /* state changed */ 599 log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state)); 600 save_config(); /* save the updated configuration */ 601 } 602 } 603 604 /* 605 * Called from request routines when they find 606 * a subdisk which is not kosher. Decide whether 607 * it warrants changing the state. Return 608 * REQUEST_DOWN if we can't use the subdisk, 609 * REQUEST_OK if we can. 610 */ 611 /* 612 * A prior version of this function checked the plex 613 * state as well. At the moment, consider plex states 614 * information for the user only. We'll ignore them 615 * and use the subdisk state only. The last version of 616 * this file with the old logic was 2.7. XXX 617 */ 618 enum requeststatus 619 checksdstate(struct sd *sd, struct request *rq, vinum_off_t diskaddr, vinum_off_t diskend) 620 { 621 struct plex *plex = &PLEX[sd->plexno]; 622 int writeop = (rq->bio->bio_buf->b_cmd != BUF_CMD_READ); /* note if we're writing */ 623 624 switch (sd->state) { 625 /* We shouldn't get called if the subdisk is up */ 626 case sd_up: 627 return REQUEST_OK; 628 629 case sd_reviving: 630 /* 631 * Access to a reviving subdisk depends on the 632 * organization of the plex: 633 * 634 * - If it's concatenated, access the subdisk 635 * up to its current revive point. If we 636 * want to write to the subdisk overlapping 637 * the current revive block, set the 638 * conflict flag in the request, asking the 639 * caller to put the request on the wait 640 * list, which will be attended to by 641 * revive_block when it's done. 642 * - if it's striped, we can't do it (we could 643 * do some hairy calculations, but it's 644 * unlikely to work). 645 * - if it's RAID-4 or RAID-5, we can do it as 646 * long as only one subdisk is down 647 */ 648 if (plex->organization == plex_striped) /* plex is striped, */ 649 return REQUEST_DOWN; 650 651 else if (isparity(plex)) { /* RAID-4 or RAID-5 plex */ 652 if (plex->sddowncount > 1) /* with more than one sd down, */ 653 return REQUEST_DOWN; 654 else 655 /* 656 * XXX We shouldn't do this if we can find a 657 * better way. Check the other plexes 658 * first, and return a DOWN if another 659 * plex will do it better 660 */ 661 return REQUEST_OK; /* OK, we'll find a way */ 662 } 663 if (diskaddr > (sd->revived 664 + sd->plexoffset 665 + (sd->revive_blocksize >> DEV_BSHIFT))) /* we're beyond the end */ 666 return REQUEST_DOWN; 667 else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */ 668 if (writeop) { 669 rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */ 670 rq->sdno = sd->sdno; /* and which sd last caused it */ 671 } else 672 return REQUEST_DOWN; 673 } 674 return REQUEST_OK; 675 676 case sd_reborn: 677 if (writeop) 678 return REQUEST_OK; /* always write to a reborn disk */ 679 else /* don't allow a read */ 680 /* 681 * Handle the mapping. We don't want to reject 682 * a read request to a reborn subdisk if that's 683 * all we have. XXX 684 */ 685 return REQUEST_DOWN; 686 687 case sd_down: 688 if (writeop) /* writing to a consistent down disk */ 689 set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */ 690 return REQUEST_DOWN; 691 692 case sd_crashed: 693 if (writeop) /* writing to a consistent down disk */ 694 set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */ 695 return REQUEST_DOWN; 696 697 default: 698 return REQUEST_DOWN; 699 } 700 } 701 702 /* return a state map for the subdisks of a plex */ 703 enum sdstates 704 sdstatemap(struct plex *plex) 705 { 706 int sdno; 707 enum sdstates statemap = 0; /* note the states we find */ 708 709 plex->sddowncount = 0; /* no subdisks down yet */ 710 for (sdno = 0; sdno < plex->subdisks; sdno++) { 711 struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */ 712 713 switch (sd->state) { 714 case sd_empty: 715 statemap |= sd_emptystate; 716 (plex->sddowncount)++; /* another unusable subdisk */ 717 break; 718 719 case sd_init: 720 statemap |= sd_initstate; 721 (plex->sddowncount)++; /* another unusable subdisk */ 722 break; 723 724 case sd_down: 725 statemap |= sd_downstate; 726 (plex->sddowncount)++; /* another unusable subdisk */ 727 break; 728 729 case sd_crashed: 730 statemap |= sd_crashedstate; 731 (plex->sddowncount)++; /* another unusable subdisk */ 732 break; 733 734 case sd_obsolete: 735 statemap |= sd_obsoletestate; 736 (plex->sddowncount)++; /* another unusable subdisk */ 737 break; 738 739 case sd_stale: 740 statemap |= sd_stalestate; 741 (plex->sddowncount)++; /* another unusable subdisk */ 742 break; 743 744 case sd_reborn: 745 statemap |= sd_rebornstate; 746 break; 747 748 case sd_up: 749 statemap |= sd_upstate; 750 break; 751 752 case sd_initializing: 753 statemap |= sd_initstate; 754 (plex->sddowncount)++; /* another unusable subdisk */ 755 break; 756 757 case sd_initialized: 758 statemap |= sd_initializedstate; 759 (plex->sddowncount)++; /* another unusable subdisk */ 760 break; 761 762 case sd_unallocated: 763 case sd_uninit: 764 case sd_reviving: 765 case sd_referenced: 766 statemap |= sd_otherstate; 767 (plex->sddowncount)++; /* another unusable subdisk */ 768 } 769 } 770 return statemap; 771 } 772 773 /* determine the state of the volume relative to this plex */ 774 enum volplexstate 775 vpstate(struct plex *plex) 776 { 777 struct volume *vol; 778 enum volplexstate state = volplex_onlyusdown; /* state to return */ 779 int plexno; 780 781 if (plex->volno < 0) { /* not associated with a volume */ 782 if (plex->state > plex_degraded) 783 return volplex_onlyus; /* just us */ 784 else 785 return volplex_onlyusdown; /* assume the worst */ 786 } 787 vol = &VOL[plex->volno]; /* point to our volume */ 788 for (plexno = 0; plexno < vol->plexes; plexno++) { 789 if (&PLEX[vol->plex[plexno]] == plex) { /* us */ 790 if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */ 791 state |= volplex_onlyus; /* yes */ 792 } else { 793 if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */ 794 state |= volplex_otherup; /* and when they were up, they were up */ 795 else 796 state |= volplex_alldown; /* and when they were down, they were down */ 797 } 798 } 799 return state; /* and when they were only halfway up */ 800 } /* they were neither up nor down */ 801 802 /* Check if all bits b are set in a */ 803 int allset(int a, int b); 804 805 int 806 allset(int a, int b) 807 { 808 return (a & b) == b; 809 } 810 811 /* Invalidate the subdisks belonging to a plex */ 812 void 813 invalidate_subdisks(struct plex *plex, enum sdstate state) 814 { 815 int sdno; 816 817 for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ 818 struct sd *sd = &SD[plex->sdnos[sdno]]; 819 820 switch (sd->state) { 821 case sd_unallocated: 822 case sd_uninit: 823 case sd_init: 824 case sd_initializing: 825 case sd_initialized: 826 case sd_empty: 827 case sd_obsolete: 828 case sd_stale: 829 case sd_crashed: 830 case sd_down: 831 case sd_referenced: 832 break; 833 834 case sd_reviving: 835 case sd_reborn: 836 case sd_up: 837 set_sd_state(plex->sdnos[sdno], state, setstate_force); 838 } 839 } 840 } 841 842 /* 843 * Start an object, in other words do what we can to get it up. 844 * This is called from vinumioctl (VINUMSTART). 845 * Return error indications via ioctl_reply 846 */ 847 void 848 start_object(struct vinum_ioctl_msg *data) 849 { 850 int status; 851 int objindex = data->index; /* data gets overwritten */ 852 struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */ 853 enum setstateflags flags; 854 855 if (data->force != 0) /* are we going to use force? */ 856 flags = setstate_force; /* yes */ 857 else 858 flags = setstate_none; /* no */ 859 860 switch (data->type) { 861 case drive_object: 862 status = set_drive_state(objindex, drive_up, flags); 863 if (DRIVE[objindex].state != drive_up) /* set status on whether we really did it */ 864 ioctl_reply->error = EBUSY; 865 else 866 ioctl_reply->error = 0; 867 break; 868 869 case sd_object: 870 if (DRIVE[SD[objindex].driveno].state != drive_up) { 871 ioctl_reply->error = EIO; 872 strcpy(ioctl_reply->msg, "Drive is down"); 873 return; 874 } 875 if (data->blocksize) 876 SD[objindex].revive_blocksize = data->blocksize; 877 if ((SD[objindex].state == sd_reviving) /* reviving, */ 878 ||(SD[objindex].state == sd_stale)) { /* or stale, will revive */ 879 SD[objindex].state = sd_reviving; /* make sure we're reviving */ 880 ioctl_reply->error = revive_block(objindex); /* revive another block */ 881 ioctl_reply->msg[0] = '\0'; /* no comment */ 882 return; 883 } else if (SD[objindex].state == sd_initializing) { /* initializing, */ 884 if (data->blocksize) 885 SD[objindex].init_blocksize = data->blocksize; 886 ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */ 887 ioctl_reply->msg[0] = '\0'; /* no comment */ 888 return; 889 } 890 status = set_sd_state(objindex, sd_up, flags); /* set state */ 891 if (status != EAGAIN) { /* not first revive or initialize, */ 892 if (SD[objindex].state != sd_up) /* set status on whether we really did it */ 893 ioctl_reply->error = EBUSY; 894 else 895 ioctl_reply->error = 0; 896 } else 897 ioctl_reply->error = status; 898 break; 899 900 case plex_object: 901 status = set_plex_state(objindex, plex_up, flags); 902 if (PLEX[objindex].state != plex_up) /* set status on whether we really did it */ 903 ioctl_reply->error = EBUSY; 904 else 905 ioctl_reply->error = 0; 906 break; 907 908 case volume_object: 909 status = set_volume_state(objindex, volume_up, flags); 910 if (VOL[objindex].state != volume_up) /* set status on whether we really did it */ 911 ioctl_reply->error = EBUSY; 912 else 913 ioctl_reply->error = 0; 914 break; 915 916 default: 917 ioctl_reply->error = EINVAL; 918 strcpy(ioctl_reply->msg, "Invalid object type"); 919 return; 920 } 921 /* 922 * There's no point in saying anything here: 923 * the userland program does it better 924 */ 925 ioctl_reply->msg[0] = '\0'; 926 } 927 928 /* 929 * Stop an object, in other words do what we can to get it down 930 * This is called from vinumioctl (VINUMSTOP). 931 * Return error indications via ioctl_reply. 932 */ 933 void 934 stop_object(struct vinum_ioctl_msg *data) 935 { 936 int status = 1; 937 int objindex = data->index; /* save the number from change */ 938 struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */ 939 940 switch (data->type) { 941 case drive_object: 942 status = set_drive_state(objindex, drive_down, data->force); 943 break; 944 945 case sd_object: 946 status = set_sd_state(objindex, sd_down, data->force); 947 break; 948 949 case plex_object: 950 status = set_plex_state(objindex, plex_down, data->force); 951 break; 952 953 case volume_object: 954 status = set_volume_state(objindex, volume_down, data->force); 955 break; 956 957 default: 958 ioctl_reply->error = EINVAL; 959 strcpy(ioctl_reply->msg, "Invalid object type"); 960 return; 961 } 962 ioctl_reply->msg[0] = '\0'; 963 if (status == 0) /* couldn't do it */ 964 ioctl_reply->error = EBUSY; 965 else 966 ioctl_reply->error = 0; 967 } 968 969 /* 970 * VINUM_SETSTATE ioctl: set an object state. 971 * msg is the message passed by the user. 972 */ 973 void 974 setstate(struct vinum_ioctl_msg *msg) 975 { 976 int sdno; 977 struct sd *sd; 978 struct plex *plex; 979 struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */ 980 981 switch (msg->state) { 982 case object_down: 983 stop_object(msg); 984 break; 985 986 case object_initializing: 987 switch (msg->type) { 988 case sd_object: 989 sd = &SD[msg->index]; 990 if ((msg->index >= vinum_conf.subdisks_allocated) 991 || (sd->state <= sd_referenced)) { 992 ksprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index); 993 ioctl_reply->error = EFAULT; 994 return; 995 } 996 set_sd_state(msg->index, sd_initializing, msg->force); 997 if (sd->state != sd_initializing) { 998 strcpy(ioctl_reply->msg, "Can't set state"); 999 ioctl_reply->error = EBUSY; 1000 } else 1001 ioctl_reply->error = 0; 1002 break; 1003 1004 case plex_object: 1005 plex = &PLEX[msg->index]; 1006 if ((msg->index >= vinum_conf.plexes_allocated) 1007 || (plex->state <= plex_unallocated)) { 1008 ksprintf(ioctl_reply->msg, "Invalid plex %d", msg->index); 1009 ioctl_reply->error = EFAULT; 1010 return; 1011 } 1012 set_plex_state(msg->index, plex_initializing, msg->force); 1013 if (plex->state != plex_initializing) { 1014 strcpy(ioctl_reply->msg, "Can't set state"); 1015 ioctl_reply->error = EBUSY; 1016 } else { 1017 ioctl_reply->error = 0; 1018 for (sdno = 0; sdno < plex->subdisks; sdno++) { 1019 sd = &SD[plex->sdnos[sdno]]; 1020 set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force); 1021 if (sd->state != sd_initializing) { 1022 strcpy(ioctl_reply->msg, "Can't set state"); 1023 ioctl_reply->error = EBUSY; 1024 break; 1025 } 1026 } 1027 } 1028 break; 1029 1030 default: 1031 strcpy(ioctl_reply->msg, "Invalid object"); 1032 ioctl_reply->error = EINVAL; 1033 } 1034 break; 1035 1036 case object_initialized: 1037 if (msg->type == sd_object) { 1038 sd = &SD[msg->index]; 1039 if ((msg->index >= vinum_conf.subdisks_allocated) 1040 || (sd->state <= sd_referenced)) { 1041 ksprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index); 1042 ioctl_reply->error = EFAULT; 1043 return; 1044 } 1045 set_sd_state(msg->index, sd_initialized, msg->force); 1046 if (sd->state != sd_initializing) { 1047 strcpy(ioctl_reply->msg, "Can't set state"); 1048 ioctl_reply->error = EBUSY; 1049 } else 1050 ioctl_reply->error = 0; 1051 } else { 1052 strcpy(ioctl_reply->msg, "Invalid object"); 1053 ioctl_reply->error = EINVAL; 1054 } 1055 break; 1056 1057 case object_up: 1058 start_object(msg); 1059 } 1060 } 1061 1062 /* 1063 * Brute force set state function. Don't look at 1064 * any dependencies, just do it. This is mainly 1065 * intended for testing and recovery. 1066 */ 1067 void 1068 setstate_by_force(struct vinum_ioctl_msg *msg) 1069 { 1070 struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */ 1071 1072 switch (msg->type) { 1073 case drive_object: 1074 DRIVE[msg->index].state = msg->state; 1075 break; 1076 1077 case sd_object: 1078 SD[msg->index].state = msg->state; 1079 break; 1080 1081 case plex_object: 1082 PLEX[msg->index].state = msg->state; 1083 break; 1084 1085 case volume_object: 1086 VOL[msg->index].state = msg->state; 1087 break; 1088 1089 default: 1090 break; 1091 } 1092 ioctl_reply->error = 0; 1093 } 1094