1 /* $NetBSD: rf_disks.c,v 1.40 2002/10/22 03:15:28 oster Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1995 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Author: Mark Holland 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /*************************************************************** 66 * rf_disks.c -- code to perform operations on the actual disks 67 ***************************************************************/ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.40 2002/10/22 03:15:28 oster Exp $"); 71 72 #include <dev/raidframe/raidframevar.h> 73 74 #include "rf_raid.h" 75 #include "rf_alloclist.h" 76 #include "rf_utils.h" 77 #include "rf_general.h" 78 #include "rf_options.h" 79 #include "rf_kintf.h" 80 #include "rf_netbsd.h" 81 82 #include <sys/param.h> 83 #include <sys/systm.h> 84 #include <sys/proc.h> 85 #include <sys/ioctl.h> 86 #include <sys/fcntl.h> 87 #include <sys/vnode.h> 88 89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 90 static void rf_print_label_status( RF_Raid_t *, int, int, char *, 91 RF_ComponentLabel_t *); 92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 93 RF_ComponentLabel_t *, int, int ); 94 95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 97 98 /************************************************************************** 99 * 100 * initialize the disks comprising the array 101 * 102 * We want the spare disks to have regular row,col numbers so that we can 103 * easily substitue a spare for a failed disk. But, the driver code assumes 104 * throughout that the array contains numRow by numCol _non-spare_ disks, so 105 * it's not clear how to fit in the spares. This is an unfortunate holdover 106 * from raidSim. The quick and dirty fix is to make row zero bigger than the 107 * rest, and put all the spares in it. This probably needs to get changed 108 * eventually. 109 * 110 **************************************************************************/ 111 112 int 113 rf_ConfigureDisks( listp, raidPtr, cfgPtr ) 114 RF_ShutdownList_t **listp; 115 RF_Raid_t *raidPtr; 116 RF_Config_t *cfgPtr; 117 { 118 RF_RaidDisk_t **disks; 119 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 120 RF_RowCol_t r, c; 121 int bs, ret; 122 unsigned i, count, foundone = 0, numFailuresThisRow; 123 int force; 124 125 force = cfgPtr->force; 126 127 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 128 if (ret) 129 goto fail; 130 131 disks = raidPtr->Disks; 132 133 for (r = 0; r < raidPtr->numRow; r++) { 134 numFailuresThisRow = 0; 135 for (c = 0; c < raidPtr->numCol; c++) { 136 ret = rf_ConfigureDisk(raidPtr, 137 &cfgPtr->devnames[r][c][0], 138 &disks[r][c], r, c); 139 140 if (ret) 141 goto fail; 142 143 if (disks[r][c].status == rf_ds_optimal) { 144 raidread_component_label( 145 raidPtr->raid_cinfo[r][c].ci_dev, 146 raidPtr->raid_cinfo[r][c].ci_vp, 147 &raidPtr->raid_cinfo[r][c].ci_label); 148 } 149 150 if (disks[r][c].status != rf_ds_optimal) { 151 numFailuresThisRow++; 152 } else { 153 if (disks[r][c].numBlocks < min_numblks) 154 min_numblks = disks[r][c].numBlocks; 155 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", 156 r, c, disks[r][c].devname, 157 (long int) disks[r][c].numBlocks, 158 disks[r][c].blockSize, 159 (long int) disks[r][c].numBlocks * 160 disks[r][c].blockSize / 1024 / 1024); 161 } 162 } 163 /* XXX fix for n-fault tolerant */ 164 /* XXX this should probably check to see how many failures 165 we can handle for this configuration! */ 166 if (numFailuresThisRow > 0) 167 raidPtr->status[r] = rf_rs_degraded; 168 } 169 170 /* all disks must be the same size & have the same block size, bs must 171 * be a power of 2 */ 172 bs = 0; 173 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { 174 for (c = 0; !foundone && c < raidPtr->numCol; c++) { 175 if (disks[r][c].status == rf_ds_optimal) { 176 bs = disks[r][c].blockSize; 177 foundone = 1; 178 } 179 } 180 } 181 if (!foundone) { 182 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 183 ret = EINVAL; 184 goto fail; 185 } 186 for (count = 0, i = 1; i; i <<= 1) 187 if (bs & i) 188 count++; 189 if (count != 1) { 190 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 191 ret = EINVAL; 192 goto fail; 193 } 194 195 if (rf_CheckLabels( raidPtr, cfgPtr )) { 196 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 197 if (force != 0) { 198 printf("raid%d: Fatal errors being ignored.\n", 199 raidPtr->raidid); 200 } else { 201 ret = EINVAL; 202 goto fail; 203 } 204 } 205 206 for (r = 0; r < raidPtr->numRow; r++) { 207 for (c = 0; c < raidPtr->numCol; c++) { 208 if (disks[r][c].status == rf_ds_optimal) { 209 if (disks[r][c].blockSize != bs) { 210 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); 211 ret = EINVAL; 212 goto fail; 213 } 214 if (disks[r][c].numBlocks != min_numblks) { 215 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", 216 r, c, (int) min_numblks); 217 disks[r][c].numBlocks = min_numblks; 218 } 219 } 220 } 221 } 222 223 raidPtr->sectorsPerDisk = min_numblks; 224 raidPtr->logBytesPerSector = ffs(bs) - 1; 225 raidPtr->bytesPerSector = bs; 226 raidPtr->sectorMask = bs - 1; 227 return (0); 228 229 fail: 230 231 rf_UnconfigureVnodes( raidPtr ); 232 233 return (ret); 234 } 235 236 237 /**************************************************************************** 238 * set up the data structures describing the spare disks in the array 239 * recall from the above comment that the spare disk descriptors are stored 240 * in row zero, which is specially expanded to hold them. 241 ****************************************************************************/ 242 int 243 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) 244 RF_ShutdownList_t ** listp; 245 RF_Raid_t * raidPtr; 246 RF_Config_t * cfgPtr; 247 { 248 int i, ret; 249 unsigned int bs; 250 RF_RaidDisk_t *disks; 251 int num_spares_done; 252 253 num_spares_done = 0; 254 255 /* The space for the spares should have already been allocated by 256 * ConfigureDisks() */ 257 258 disks = &raidPtr->Disks[0][raidPtr->numCol]; 259 for (i = 0; i < raidPtr->numSpare; i++) { 260 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 261 &disks[i], 0, raidPtr->numCol + i); 262 if (ret) 263 goto fail; 264 if (disks[i].status != rf_ds_optimal) { 265 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 266 &cfgPtr->spare_names[i][0]); 267 } else { 268 disks[i].status = rf_ds_spare; /* change status to 269 * spare */ 270 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, 271 disks[i].devname, 272 (long int) disks[i].numBlocks, disks[i].blockSize, 273 (long int) disks[i].numBlocks * 274 disks[i].blockSize / 1024 / 1024); 275 } 276 num_spares_done++; 277 } 278 279 /* check sizes and block sizes on spare disks */ 280 bs = 1 << raidPtr->logBytesPerSector; 281 for (i = 0; i < raidPtr->numSpare; i++) { 282 if (disks[i].blockSize != bs) { 283 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 284 ret = EINVAL; 285 goto fail; 286 } 287 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 288 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 289 disks[i].devname, disks[i].blockSize, 290 (long int) raidPtr->sectorsPerDisk); 291 ret = EINVAL; 292 goto fail; 293 } else 294 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 295 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); 296 297 disks[i].numBlocks = raidPtr->sectorsPerDisk; 298 } 299 } 300 301 return (0); 302 303 fail: 304 305 /* Release the hold on the main components. We've failed to allocate 306 * a spare, and since we're failing, we need to free things.. 307 308 XXX failing to allocate a spare is *not* that big of a deal... 309 We *can* survive without it, if need be, esp. if we get hot 310 adding working. 311 312 If we don't fail out here, then we need a way to remove this spare... 313 that should be easier to do here than if we are "live"... 314 315 */ 316 317 rf_UnconfigureVnodes( raidPtr ); 318 319 return (ret); 320 } 321 322 static int 323 rf_AllocDiskStructures(raidPtr, cfgPtr) 324 RF_Raid_t *raidPtr; 325 RF_Config_t *cfgPtr; 326 { 327 RF_RaidDisk_t **disks; 328 int ret; 329 int r; 330 331 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), 332 (RF_RaidDisk_t **), raidPtr->cleanupList); 333 if (disks == NULL) { 334 ret = ENOMEM; 335 goto fail; 336 } 337 raidPtr->Disks = disks; 338 /* get space for the device-specific stuff... */ 339 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, 340 sizeof(struct raidcinfo *), (struct raidcinfo **), 341 raidPtr->cleanupList); 342 if (raidPtr->raid_cinfo == NULL) { 343 ret = ENOMEM; 344 goto fail; 345 } 346 347 for (r = 0; r < raidPtr->numRow; r++) { 348 /* We allocate RF_MAXSPARE on the first row so that we 349 have room to do hot-swapping of spares */ 350 RF_CallocAndAdd(disks[r], raidPtr->numCol 351 + ((r == 0) ? RF_MAXSPARE : 0), 352 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 353 raidPtr->cleanupList); 354 if (disks[r] == NULL) { 355 ret = ENOMEM; 356 goto fail; 357 } 358 /* get more space for device specific stuff.. */ 359 RF_CallocAndAdd(raidPtr->raid_cinfo[r], 360 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), 361 sizeof(struct raidcinfo), (struct raidcinfo *), 362 raidPtr->cleanupList); 363 if (raidPtr->raid_cinfo[r] == NULL) { 364 ret = ENOMEM; 365 goto fail; 366 } 367 } 368 return(0); 369 fail: 370 rf_UnconfigureVnodes( raidPtr ); 371 372 return(ret); 373 } 374 375 376 /* configure a single disk during auto-configuration at boot */ 377 int 378 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) 379 RF_Raid_t *raidPtr; 380 RF_Config_t *cfgPtr; 381 RF_AutoConfig_t *auto_config; 382 { 383 RF_RaidDisk_t **disks; 384 RF_RaidDisk_t *diskPtr; 385 RF_RowCol_t r, c; 386 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 387 int bs, ret; 388 int numFailuresThisRow; 389 RF_AutoConfig_t *ac; 390 int parity_good; 391 int mod_counter; 392 int mod_counter_found; 393 394 #if DEBUG 395 printf("Starting autoconfiguration of RAID set...\n"); 396 #endif 397 398 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 399 if (ret) 400 goto fail; 401 402 disks = raidPtr->Disks; 403 404 /* assume the parity will be fine.. */ 405 parity_good = RF_RAID_CLEAN; 406 407 /* Check for mod_counters that are too low */ 408 mod_counter_found = 0; 409 mod_counter = 0; 410 ac = auto_config; 411 while(ac!=NULL) { 412 if (mod_counter_found==0) { 413 mod_counter = ac->clabel->mod_counter; 414 mod_counter_found = 1; 415 } else { 416 if (ac->clabel->mod_counter > mod_counter) { 417 mod_counter = ac->clabel->mod_counter; 418 } 419 } 420 ac->flag = 0; /* clear the general purpose flag */ 421 ac = ac->next; 422 } 423 424 bs = 0; 425 for (r = 0; r < raidPtr->numRow; r++) { 426 numFailuresThisRow = 0; 427 for (c = 0; c < raidPtr->numCol; c++) { 428 diskPtr = &disks[r][c]; 429 430 /* find this row/col in the autoconfig */ 431 #if DEBUG 432 printf("Looking for %d,%d in autoconfig\n",r,c); 433 #endif 434 ac = auto_config; 435 while(ac!=NULL) { 436 if (ac->clabel==NULL) { 437 /* big-time bad news. */ 438 goto fail; 439 } 440 if ((ac->clabel->row == r) && 441 (ac->clabel->column == c) && 442 (ac->clabel->mod_counter == mod_counter)) { 443 /* it's this one... */ 444 /* flag it as 'used', so we don't 445 free it later. */ 446 ac->flag = 1; 447 #if DEBUG 448 printf("Found: %s at %d,%d\n", 449 ac->devname,r,c); 450 #endif 451 452 break; 453 } 454 ac=ac->next; 455 } 456 457 if (ac==NULL) { 458 /* we didn't find an exact match with a 459 correct mod_counter above... can we 460 find one with an incorrect mod_counter 461 to use instead? (this one, if we find 462 it, will be marked as failed once the 463 set configures) 464 */ 465 466 ac = auto_config; 467 while(ac!=NULL) { 468 if (ac->clabel==NULL) { 469 /* big-time bad news. */ 470 goto fail; 471 } 472 if ((ac->clabel->row == r) && 473 (ac->clabel->column == c)) { 474 /* it's this one... 475 flag it as 'used', so we 476 don't free it later. */ 477 ac->flag = 1; 478 #if DEBUG 479 printf("Found(low mod_counter): %s at %d,%d\n", 480 ac->devname,r,c); 481 #endif 482 483 break; 484 } 485 ac=ac->next; 486 } 487 } 488 489 490 491 if (ac!=NULL) { 492 /* Found it. Configure it.. */ 493 diskPtr->blockSize = ac->clabel->blockSize; 494 diskPtr->numBlocks = ac->clabel->numBlocks; 495 /* Note: rf_protectedSectors is already 496 factored into numBlocks here */ 497 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; 498 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; 499 500 memcpy(&raidPtr->raid_cinfo[r][c].ci_label, 501 ac->clabel, sizeof(*ac->clabel)); 502 sprintf(diskPtr->devname, "/dev/%s", 503 ac->devname); 504 505 /* note the fact that this component was 506 autoconfigured. You'll need this info 507 later. Trust me :) */ 508 diskPtr->auto_configured = 1; 509 diskPtr->dev = ac->dev; 510 511 /* 512 * we allow the user to specify that 513 * only a fraction of the disks should 514 * be used this is just for debug: it 515 * speeds up the parity scan 516 */ 517 518 diskPtr->numBlocks = diskPtr->numBlocks * 519 rf_sizePercentage / 100; 520 521 /* XXX these will get set multiple times, 522 but since we're autoconfiguring, they'd 523 better be always the same each time! 524 If not, this is the least of your worries */ 525 526 bs = diskPtr->blockSize; 527 min_numblks = diskPtr->numBlocks; 528 529 /* this gets done multiple times, but that's 530 fine -- the serial number will be the same 531 for all components, guaranteed */ 532 raidPtr->serial_number = 533 ac->clabel->serial_number; 534 /* check the last time the label 535 was modified */ 536 if (ac->clabel->mod_counter != 537 mod_counter) { 538 /* Even though we've filled in all 539 of the above, we don't trust 540 this component since it's 541 modification counter is not 542 in sync with the rest, and we really 543 consider it to be failed. */ 544 disks[r][c].status = rf_ds_failed; 545 numFailuresThisRow++; 546 } else { 547 if (ac->clabel->clean != 548 RF_RAID_CLEAN) { 549 parity_good = RF_RAID_DIRTY; 550 } 551 } 552 } else { 553 /* Didn't find it at all!! 554 Component must really be dead */ 555 disks[r][c].status = rf_ds_failed; 556 sprintf(disks[r][c].devname,"component%d", 557 r * raidPtr->numCol + c); 558 numFailuresThisRow++; 559 } 560 } 561 /* XXX fix for n-fault tolerant */ 562 /* XXX this should probably check to see how many failures 563 we can handle for this configuration! */ 564 if (numFailuresThisRow > 0) 565 raidPtr->status[r] = rf_rs_degraded; 566 } 567 568 /* close the device for the ones that didn't get used */ 569 570 ac = auto_config; 571 while(ac!=NULL) { 572 if (ac->flag == 0) { 573 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 574 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0); 575 vput(ac->vp); 576 ac->vp = NULL; 577 #if DEBUG 578 printf("Released %s from auto-config set.\n", 579 ac->devname); 580 #endif 581 } 582 ac = ac->next; 583 } 584 585 raidPtr->mod_counter = mod_counter; 586 587 /* note the state of the parity, if any */ 588 raidPtr->parity_good = parity_good; 589 raidPtr->sectorsPerDisk = min_numblks; 590 raidPtr->logBytesPerSector = ffs(bs) - 1; 591 raidPtr->bytesPerSector = bs; 592 raidPtr->sectorMask = bs - 1; 593 return (0); 594 595 fail: 596 597 rf_UnconfigureVnodes( raidPtr ); 598 599 return (ret); 600 601 } 602 603 /* configure a single disk in the array */ 604 int 605 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) 606 RF_Raid_t *raidPtr; 607 char *buf; 608 RF_RaidDisk_t *diskPtr; 609 RF_RowCol_t row; 610 RF_RowCol_t col; 611 { 612 char *p; 613 struct partinfo dpart; 614 struct vnode *vp; 615 struct vattr va; 616 struct proc *proc; 617 int error; 618 619 p = rf_find_non_white(buf); 620 if (p[strlen(p) - 1] == '\n') { 621 /* strip off the newline */ 622 p[strlen(p) - 1] = '\0'; 623 } 624 (void) strcpy(diskPtr->devname, p); 625 626 proc = raidPtr->engine_thread; 627 628 /* Let's start by claiming the component is fine and well... */ 629 diskPtr->status = rf_ds_optimal; 630 631 raidPtr->raid_cinfo[row][col].ci_vp = NULL; 632 raidPtr->raid_cinfo[row][col].ci_dev = NULL; 633 634 error = raidlookup(diskPtr->devname, proc, &vp); 635 if (error) { 636 printf("raidlookup on device: %s failed!\n", diskPtr->devname); 637 if (error == ENXIO) { 638 /* the component isn't there... must be dead :-( */ 639 diskPtr->status = rf_ds_failed; 640 } else { 641 return (error); 642 } 643 } 644 if (diskPtr->status == rf_ds_optimal) { 645 646 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { 647 return (error); 648 } 649 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, 650 FREAD, proc->p_ucred, proc); 651 if (error) { 652 return (error); 653 } 654 655 diskPtr->blockSize = dpart.disklab->d_secsize; 656 657 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 658 diskPtr->partitionSize = dpart.part->p_size; 659 660 raidPtr->raid_cinfo[row][col].ci_vp = vp; 661 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; 662 663 /* This component was not automatically configured */ 664 diskPtr->auto_configured = 0; 665 diskPtr->dev = va.va_rdev; 666 667 /* we allow the user to specify that only a fraction of the 668 * disks should be used this is just for debug: it speeds up 669 * the parity scan */ 670 diskPtr->numBlocks = diskPtr->numBlocks * 671 rf_sizePercentage / 100; 672 } 673 return (0); 674 } 675 676 static void 677 rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) 678 RF_Raid_t *raidPtr; 679 int row; 680 int column; 681 char *dev_name; 682 RF_ComponentLabel_t *ci_label; 683 { 684 685 printf("raid%d: Component %s being configured at row: %d col: %d\n", 686 raidPtr->raidid, dev_name, row, column ); 687 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 688 ci_label->row, ci_label->column, 689 ci_label->num_rows, ci_label->num_columns); 690 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 691 ci_label->version, ci_label->serial_number, 692 ci_label->mod_counter); 693 printf(" Clean: %s Status: %d\n", 694 ci_label->clean ? "Yes" : "No", ci_label->status ); 695 } 696 697 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, 698 serial_number, mod_counter ) 699 RF_Raid_t *raidPtr; 700 int row; 701 int column; 702 char *dev_name; 703 RF_ComponentLabel_t *ci_label; 704 int serial_number; 705 int mod_counter; 706 { 707 int fatal_error = 0; 708 709 if (serial_number != ci_label->serial_number) { 710 printf("%s has a different serial number: %d %d\n", 711 dev_name, serial_number, ci_label->serial_number); 712 fatal_error = 1; 713 } 714 if (mod_counter != ci_label->mod_counter) { 715 printf("%s has a different modfication count: %d %d\n", 716 dev_name, mod_counter, ci_label->mod_counter); 717 } 718 719 if (row != ci_label->row) { 720 printf("Row out of alignment for: %s\n", dev_name); 721 fatal_error = 1; 722 } 723 if (column != ci_label->column) { 724 printf("Column out of alignment for: %s\n", dev_name); 725 fatal_error = 1; 726 } 727 if (raidPtr->numRow != ci_label->num_rows) { 728 printf("Number of rows do not match for: %s\n", dev_name); 729 fatal_error = 1; 730 } 731 if (raidPtr->numCol != ci_label->num_columns) { 732 printf("Number of columns do not match for: %s\n", dev_name); 733 fatal_error = 1; 734 } 735 if (ci_label->clean == 0) { 736 /* it's not clean, but that's not fatal */ 737 printf("%s is not clean!\n", dev_name); 738 } 739 return(fatal_error); 740 } 741 742 743 /* 744 745 rf_CheckLabels() - check all the component labels for consistency. 746 Return an error if there is anything major amiss. 747 748 */ 749 750 int 751 rf_CheckLabels( raidPtr, cfgPtr ) 752 RF_Raid_t *raidPtr; 753 RF_Config_t *cfgPtr; 754 { 755 int r,c; 756 char *dev_name; 757 RF_ComponentLabel_t *ci_label; 758 int serial_number = 0; 759 int mod_number = 0; 760 int fatal_error = 0; 761 int mod_values[4]; 762 int mod_count[4]; 763 int ser_values[4]; 764 int ser_count[4]; 765 int num_ser; 766 int num_mod; 767 int i; 768 int found; 769 int hosed_row; 770 int hosed_column; 771 int too_fatal; 772 int parity_good; 773 int force; 774 775 hosed_row = -1; 776 hosed_column = -1; 777 too_fatal = 0; 778 force = cfgPtr->force; 779 780 /* 781 We're going to try to be a little intelligent here. If one 782 component's label is bogus, and we can identify that it's the 783 *only* one that's gone, we'll mark it as "failed" and allow 784 the configuration to proceed. This will be the *only* case 785 that we'll proceed if there would be (otherwise) fatal errors. 786 787 Basically we simply keep a count of how many components had 788 what serial number. If all but one agree, we simply mark 789 the disagreeing component as being failed, and allow 790 things to come up "normally". 791 792 We do this first for serial numbers, and then for "mod_counter". 793 794 */ 795 796 num_ser = 0; 797 num_mod = 0; 798 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { 799 for (c = 0; c < raidPtr->numCol; c++) { 800 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 801 found=0; 802 for(i=0;i<num_ser;i++) { 803 if (ser_values[i] == ci_label->serial_number) { 804 ser_count[i]++; 805 found=1; 806 break; 807 } 808 } 809 if (!found) { 810 ser_values[num_ser] = ci_label->serial_number; 811 ser_count[num_ser] = 1; 812 num_ser++; 813 if (num_ser>2) { 814 fatal_error = 1; 815 break; 816 } 817 } 818 found=0; 819 for(i=0;i<num_mod;i++) { 820 if (mod_values[i] == ci_label->mod_counter) { 821 mod_count[i]++; 822 found=1; 823 break; 824 } 825 } 826 if (!found) { 827 mod_values[num_mod] = ci_label->mod_counter; 828 mod_count[num_mod] = 1; 829 num_mod++; 830 if (num_mod>2) { 831 fatal_error = 1; 832 break; 833 } 834 } 835 } 836 } 837 #if DEBUG 838 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 839 for(i=0;i<num_ser;i++) { 840 printf("%d %d\n", ser_values[i], ser_count[i]); 841 } 842 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 843 for(i=0;i<num_mod;i++) { 844 printf("%d %d\n", mod_values[i], mod_count[i]); 845 } 846 #endif 847 serial_number = ser_values[0]; 848 if (num_ser == 2) { 849 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 850 /* Locate the maverick component */ 851 if (ser_count[1] > ser_count[0]) { 852 serial_number = ser_values[1]; 853 } 854 for (r = 0; r < raidPtr->numRow; r++) { 855 for (c = 0; c < raidPtr->numCol; c++) { 856 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 857 if (serial_number != 858 ci_label->serial_number) { 859 hosed_row = r; 860 hosed_column = c; 861 break; 862 } 863 } 864 } 865 printf("Hosed component: %s\n", 866 &cfgPtr->devnames[hosed_row][hosed_column][0]); 867 if (!force) { 868 /* we'll fail this component, as if there are 869 other major errors, we arn't forcing things 870 and we'll abort the config anyways */ 871 raidPtr->Disks[hosed_row][hosed_column].status 872 = rf_ds_failed; 873 raidPtr->numFailures++; 874 raidPtr->status[hosed_row] = rf_rs_degraded; 875 } 876 } else { 877 too_fatal = 1; 878 } 879 if (cfgPtr->parityConfig == '0') { 880 /* We've identified two different serial numbers. 881 RAID 0 can't cope with that, so we'll punt */ 882 too_fatal = 1; 883 } 884 885 } 886 887 /* record the serial number for later. If we bail later, setting 888 this doesn't matter, otherwise we've got the best guess at the 889 correct serial number */ 890 raidPtr->serial_number = serial_number; 891 892 mod_number = mod_values[0]; 893 if (num_mod == 2) { 894 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 895 /* Locate the maverick component */ 896 if (mod_count[1] > mod_count[0]) { 897 mod_number = mod_values[1]; 898 } else if (mod_count[1] < mod_count[0]) { 899 mod_number = mod_values[0]; 900 } else { 901 /* counts of different modification values 902 are the same. Assume greater value is 903 the correct one, all other things 904 considered */ 905 if (mod_values[0] > mod_values[1]) { 906 mod_number = mod_values[0]; 907 } else { 908 mod_number = mod_values[1]; 909 } 910 911 } 912 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { 913 for (c = 0; c < raidPtr->numCol; c++) { 914 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 915 if (mod_number != 916 ci_label->mod_counter) { 917 if ( ( hosed_row == r ) && 918 ( hosed_column == c )) { 919 /* same one. Can 920 deal with it. */ 921 } else { 922 hosed_row = r; 923 hosed_column = c; 924 if (num_ser != 1) { 925 too_fatal = 1; 926 break; 927 } 928 } 929 } 930 } 931 } 932 printf("Hosed component: %s\n", 933 &cfgPtr->devnames[hosed_row][hosed_column][0]); 934 if (!force) { 935 /* we'll fail this component, as if there are 936 other major errors, we arn't forcing things 937 and we'll abort the config anyways */ 938 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { 939 raidPtr->Disks[hosed_row][hosed_column].status 940 = rf_ds_failed; 941 raidPtr->numFailures++; 942 raidPtr->status[hosed_row] = rf_rs_degraded; 943 } 944 } 945 } else { 946 too_fatal = 1; 947 } 948 if (cfgPtr->parityConfig == '0') { 949 /* We've identified two different mod counters. 950 RAID 0 can't cope with that, so we'll punt */ 951 too_fatal = 1; 952 } 953 } 954 955 raidPtr->mod_counter = mod_number; 956 957 if (too_fatal) { 958 /* we've had both a serial number mismatch, and a mod_counter 959 mismatch -- and they involved two different components!! 960 Bail -- make things fail so that the user must force 961 the issue... */ 962 hosed_row = -1; 963 hosed_column = -1; 964 } 965 966 if (num_ser > 2) { 967 printf("raid%d: Too many different serial numbers!\n", 968 raidPtr->raidid); 969 } 970 971 if (num_mod > 2) { 972 printf("raid%d: Too many different mod counters!\n", 973 raidPtr->raidid); 974 } 975 976 /* we start by assuming the parity will be good, and flee from 977 that notion at the slightest sign of trouble */ 978 979 parity_good = RF_RAID_CLEAN; 980 for (r = 0; r < raidPtr->numRow; r++) { 981 for (c = 0; c < raidPtr->numCol; c++) { 982 dev_name = &cfgPtr->devnames[r][c][0]; 983 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 984 985 if ((r == hosed_row) && (c == hosed_column)) { 986 printf("raid%d: Ignoring %s\n", 987 raidPtr->raidid, dev_name); 988 } else { 989 rf_print_label_status( raidPtr, r, c, 990 dev_name, ci_label ); 991 if (rf_check_label_vitals( raidPtr, r, c, 992 dev_name, ci_label, 993 serial_number, 994 mod_number )) { 995 fatal_error = 1; 996 } 997 if (ci_label->clean != RF_RAID_CLEAN) { 998 parity_good = RF_RAID_DIRTY; 999 } 1000 } 1001 } 1002 } 1003 if (fatal_error) { 1004 parity_good = RF_RAID_DIRTY; 1005 } 1006 1007 /* we note the state of the parity */ 1008 raidPtr->parity_good = parity_good; 1009 1010 return(fatal_error); 1011 } 1012 1013 int 1014 rf_add_hot_spare(raidPtr, sparePtr) 1015 RF_Raid_t *raidPtr; 1016 RF_SingleComponent_t *sparePtr; 1017 { 1018 RF_RaidDisk_t *disks; 1019 RF_DiskQueue_t *spareQueues; 1020 int ret; 1021 unsigned int bs; 1022 int spare_number; 1023 1024 ret=0; 1025 1026 if (raidPtr->numSpare >= RF_MAXSPARE) { 1027 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 1028 return(EINVAL); 1029 } 1030 1031 RF_LOCK_MUTEX(raidPtr->mutex); 1032 while (raidPtr->adding_hot_spare==1) { 1033 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0, 1034 &(raidPtr->mutex)); 1035 } 1036 raidPtr->adding_hot_spare=1; 1037 RF_UNLOCK_MUTEX(raidPtr->mutex); 1038 1039 /* the beginning of the spares... */ 1040 disks = &raidPtr->Disks[0][raidPtr->numCol]; 1041 1042 spare_number = raidPtr->numSpare; 1043 1044 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 1045 &disks[spare_number], 0, 1046 raidPtr->numCol + spare_number); 1047 1048 if (ret) 1049 goto fail; 1050 if (disks[spare_number].status != rf_ds_optimal) { 1051 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 1052 sparePtr->component_name); 1053 ret=EINVAL; 1054 goto fail; 1055 } else { 1056 disks[spare_number].status = rf_ds_spare; 1057 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, 1058 disks[spare_number].devname, 1059 (long int) disks[spare_number].numBlocks, 1060 disks[spare_number].blockSize, 1061 (long int) disks[spare_number].numBlocks * 1062 disks[spare_number].blockSize / 1024 / 1024); 1063 } 1064 1065 1066 /* check sizes and block sizes on the spare disk */ 1067 bs = 1 << raidPtr->logBytesPerSector; 1068 if (disks[spare_number].blockSize != bs) { 1069 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 1070 ret = EINVAL; 1071 goto fail; 1072 } 1073 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 1074 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 1075 disks[spare_number].devname, 1076 disks[spare_number].blockSize, 1077 (long int) raidPtr->sectorsPerDisk); 1078 ret = EINVAL; 1079 goto fail; 1080 } else { 1081 if (disks[spare_number].numBlocks > 1082 raidPtr->sectorsPerDisk) { 1083 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, 1084 (long int) raidPtr->sectorsPerDisk); 1085 1086 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1087 } 1088 } 1089 1090 spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; 1091 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1092 0, raidPtr->numCol + spare_number, 1093 raidPtr->qType, 1094 raidPtr->sectorsPerDisk, 1095 raidPtr->Disks[0][raidPtr->numCol + 1096 spare_number].dev, 1097 raidPtr->maxOutstanding, 1098 &raidPtr->shutdownList, 1099 raidPtr->cleanupList); 1100 1101 RF_LOCK_MUTEX(raidPtr->mutex); 1102 raidPtr->numSpare++; 1103 RF_UNLOCK_MUTEX(raidPtr->mutex); 1104 1105 fail: 1106 RF_LOCK_MUTEX(raidPtr->mutex); 1107 raidPtr->adding_hot_spare=0; 1108 wakeup(&(raidPtr->adding_hot_spare)); 1109 RF_UNLOCK_MUTEX(raidPtr->mutex); 1110 1111 return(ret); 1112 } 1113 1114 int 1115 rf_remove_hot_spare(raidPtr,sparePtr) 1116 RF_Raid_t *raidPtr; 1117 RF_SingleComponent_t *sparePtr; 1118 { 1119 int spare_number; 1120 1121 1122 if (raidPtr->numSpare==0) { 1123 printf("No spares to remove!\n"); 1124 return(EINVAL); 1125 } 1126 1127 spare_number = sparePtr->column; 1128 1129 return(EINVAL); /* XXX not implemented yet */ 1130 #if 0 1131 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1132 return(EINVAL); 1133 } 1134 1135 /* verify that this spare isn't in use... */ 1136 1137 1138 1139 1140 /* it's gone.. */ 1141 1142 raidPtr->numSpare--; 1143 1144 return(0); 1145 #endif 1146 } 1147 1148 1149 int 1150 rf_delete_component(raidPtr,component) 1151 RF_Raid_t *raidPtr; 1152 RF_SingleComponent_t *component; 1153 { 1154 RF_RaidDisk_t *disks; 1155 1156 if ((component->row < 0) || 1157 (component->row >= raidPtr->numRow) || 1158 (component->column < 0) || 1159 (component->column >= raidPtr->numCol)) { 1160 return(EINVAL); 1161 } 1162 1163 disks = &raidPtr->Disks[component->row][component->column]; 1164 1165 /* 1. This component must be marked as 'failed' */ 1166 1167 return(EINVAL); /* Not implemented yet. */ 1168 } 1169 1170 int 1171 rf_incorporate_hot_spare(raidPtr,component) 1172 RF_Raid_t *raidPtr; 1173 RF_SingleComponent_t *component; 1174 { 1175 1176 /* Issues here include how to 'move' this in if there is IO 1177 taking place (e.g. component queues and such) */ 1178 1179 return(EINVAL); /* Not implemented yet. */ 1180 } 1181