1 /* $NetBSD: rf_netbsdkintf.c,v 1.120 2002/05/23 15:12:59 leo Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 81 82 83 /* 84 * Copyright (c) 1995 Carnegie-Mellon University. 85 * All rights reserved. 86 * 87 * Authors: Mark Holland, Jim Zelenka 88 * 89 * Permission to use, copy, modify and distribute this software and 90 * its documentation is hereby granted, provided that both the copyright 91 * notice and this permission notice appear in all copies of the 92 * software, derivative works or modified versions, and any portions 93 * thereof, and that both notices appear in supporting documentation. 94 * 95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 98 * 99 * Carnegie Mellon requests users of this software to return to 100 * 101 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 102 * School of Computer Science 103 * Carnegie Mellon University 104 * Pittsburgh PA 15213-3890 105 * 106 * any improvements or extensions that they make and grant Carnegie the 107 * rights to redistribute these changes. 108 */ 109 110 /*********************************************************** 111 * 112 * rf_kintf.c -- the kernel interface routines for RAIDframe 113 * 114 ***********************************************************/ 115 116 #include <sys/cdefs.h> 117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.120 2002/05/23 15:12:59 leo Exp $"); 118 119 #include <sys/param.h> 120 #include <sys/errno.h> 121 #include <sys/pool.h> 122 #include <sys/queue.h> 123 #include <sys/disk.h> 124 #include <sys/device.h> 125 #include <sys/stat.h> 126 #include <sys/ioctl.h> 127 #include <sys/fcntl.h> 128 #include <sys/systm.h> 129 #include <sys/namei.h> 130 #include <sys/vnode.h> 131 #include <sys/disklabel.h> 132 #include <sys/conf.h> 133 #include <sys/lock.h> 134 #include <sys/buf.h> 135 #include <sys/user.h> 136 #include <sys/reboot.h> 137 138 #include <dev/raidframe/raidframevar.h> 139 #include <dev/raidframe/raidframeio.h> 140 #include "raid.h" 141 #include "opt_raid_autoconfig.h" 142 #include "rf_raid.h" 143 #include "rf_copyback.h" 144 #include "rf_dag.h" 145 #include "rf_dagflags.h" 146 #include "rf_desc.h" 147 #include "rf_diskqueue.h" 148 #include "rf_acctrace.h" 149 #include "rf_etimer.h" 150 #include "rf_general.h" 151 #include "rf_debugMem.h" 152 #include "rf_kintf.h" 153 #include "rf_options.h" 154 #include "rf_driver.h" 155 #include "rf_parityscan.h" 156 #include "rf_debugprint.h" 157 #include "rf_threadstuff.h" 158 159 int rf_kdebug_level = 0; 160 161 #ifdef DEBUG 162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 163 #else /* DEBUG */ 164 #define db1_printf(a) { } 165 #endif /* DEBUG */ 166 167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 168 169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 170 171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 172 * spare table */ 173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 174 * installation process */ 175 176 /* prototypes */ 177 static void KernelWakeupFunc(struct buf * bp); 178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, 179 dev_t dev, RF_SectorNum_t startSect, 180 RF_SectorCount_t numSect, caddr_t buf, 181 void (*cbFunc) (struct buf *), void *cbArg, 182 int logBytesPerSector, struct proc * b_proc); 183 static void raidinit(RF_Raid_t *); 184 185 void raidattach(int); 186 int raidsize(dev_t); 187 int raidopen(dev_t, int, int, struct proc *); 188 int raidclose(dev_t, int, int, struct proc *); 189 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *); 190 int raidwrite(dev_t, struct uio *, int); 191 int raidread(dev_t, struct uio *, int); 192 void raidstrategy(struct buf *); 193 int raiddump(dev_t, daddr_t, caddr_t, size_t); 194 195 /* 196 * Pilfered from ccd.c 197 */ 198 199 struct raidbuf { 200 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ 201 struct buf *rf_obp; /* ptr. to original I/O buf */ 202 int rf_flags; /* misc. flags */ 203 RF_DiskQueueData_t *req;/* the request that this was part of.. */ 204 }; 205 206 /* component buffer pool */ 207 struct pool raidframe_cbufpool; 208 209 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT) 210 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp) 211 212 /* XXX Not sure if the following should be replacing the raidPtrs above, 213 or if it should be used in conjunction with that... 214 */ 215 216 struct raid_softc { 217 int sc_flags; /* flags */ 218 int sc_cflags; /* configuration flags */ 219 size_t sc_size; /* size of the raid device */ 220 char sc_xname[20]; /* XXX external name */ 221 struct disk sc_dkdev; /* generic disk device info */ 222 struct buf_queue buf_queue; /* used for the device queue */ 223 }; 224 /* sc_flags */ 225 #define RAIDF_INITED 0x01 /* unit has been initialized */ 226 #define RAIDF_WLABEL 0x02 /* label area is writable */ 227 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 228 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 229 #define RAIDF_LOCKED 0x80 /* unit is locked */ 230 231 #define raidunit(x) DISKUNIT(x) 232 int numraid = 0; 233 234 /* 235 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 236 * Be aware that large numbers can allow the driver to consume a lot of 237 * kernel memory, especially on writes, and in degraded mode reads. 238 * 239 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 240 * a single 64K write will typically require 64K for the old data, 241 * 64K for the old parity, and 64K for the new parity, for a total 242 * of 192K (if the parity buffer is not re-used immediately). 243 * Even it if is used immediately, that's still 128K, which when multiplied 244 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 245 * 246 * Now in degraded mode, for example, a 64K read on the above setup may 247 * require data reconstruction, which will require *all* of the 4 remaining 248 * disks to participate -- 4 * 32K/disk == 128K again. 249 */ 250 251 #ifndef RAIDOUTSTANDING 252 #define RAIDOUTSTANDING 6 253 #endif 254 255 #define RAIDLABELDEV(dev) \ 256 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 257 258 /* declared here, and made public, for the benefit of KVM stuff.. */ 259 struct raid_softc *raid_softc; 260 261 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 262 struct disklabel *); 263 static void raidgetdisklabel(dev_t); 264 static void raidmakedisklabel(struct raid_softc *); 265 266 static int raidlock(struct raid_softc *); 267 static void raidunlock(struct raid_softc *); 268 269 static void rf_markalldirty(RF_Raid_t *); 270 void rf_mountroot_hook(struct device *); 271 272 struct device *raidrootdev; 273 274 void rf_ReconThread(struct rf_recon_req *); 275 /* XXX what I want is: */ 276 /*void rf_ReconThread(RF_Raid_t *raidPtr); */ 277 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 278 void rf_CopybackThread(RF_Raid_t *raidPtr); 279 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 280 void rf_buildroothack(void *); 281 282 RF_AutoConfig_t *rf_find_raid_components(void); 283 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 284 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 285 static int rf_reasonable_label(RF_ComponentLabel_t *); 286 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 287 int rf_set_autoconfig(RF_Raid_t *, int); 288 int rf_set_rootpartition(RF_Raid_t *, int); 289 void rf_release_all_vps(RF_ConfigSet_t *); 290 void rf_cleanup_config_set(RF_ConfigSet_t *); 291 int rf_have_enough_components(RF_ConfigSet_t *); 292 int rf_auto_config_set(RF_ConfigSet_t *, int *); 293 294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 295 allow autoconfig to take place. 296 Note that this is overridden by having 297 RAID_AUTOCONFIG as an option in the 298 kernel config file. */ 299 300 void 301 raidattach(num) 302 int num; 303 { 304 int raidID; 305 int i, rc; 306 RF_AutoConfig_t *ac_list; /* autoconfig list */ 307 RF_ConfigSet_t *config_sets; 308 309 #ifdef DEBUG 310 printf("raidattach: Asked for %d units\n", num); 311 #endif 312 313 if (num <= 0) { 314 #ifdef DIAGNOSTIC 315 panic("raidattach: count <= 0"); 316 #endif 317 return; 318 } 319 /* This is where all the initialization stuff gets done. */ 320 321 numraid = num; 322 323 /* Make some space for requested number of units... */ 324 325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **)); 326 if (raidPtrs == NULL) { 327 panic("raidPtrs is NULL!!\n"); 328 } 329 330 /* Initialize the component buffer pool. */ 331 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0, 332 0, 0, "raidpl", NULL); 333 334 rc = rf_mutex_init(&rf_sparet_wait_mutex); 335 if (rc) { 336 RF_PANIC(); 337 } 338 339 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 340 341 for (i = 0; i < num; i++) 342 raidPtrs[i] = NULL; 343 rc = rf_BootRaidframe(); 344 if (rc == 0) 345 printf("Kernelized RAIDframe activated\n"); 346 else 347 panic("Serious error booting RAID!!\n"); 348 349 /* put together some datastructures like the CCD device does.. This 350 * lets us lock the device and what-not when it gets opened. */ 351 352 raid_softc = (struct raid_softc *) 353 malloc(num * sizeof(struct raid_softc), 354 M_RAIDFRAME, M_NOWAIT); 355 if (raid_softc == NULL) { 356 printf("WARNING: no memory for RAIDframe driver\n"); 357 return; 358 } 359 360 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 361 362 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 363 M_RAIDFRAME, M_NOWAIT); 364 if (raidrootdev == NULL) { 365 panic("No memory for RAIDframe driver!!?!?!\n"); 366 } 367 368 for (raidID = 0; raidID < num; raidID++) { 369 BUFQ_INIT(&raid_softc[raidID].buf_queue); 370 371 raidrootdev[raidID].dv_class = DV_DISK; 372 raidrootdev[raidID].dv_cfdata = NULL; 373 raidrootdev[raidID].dv_unit = raidID; 374 raidrootdev[raidID].dv_parent = NULL; 375 raidrootdev[raidID].dv_flags = 0; 376 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID); 377 378 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t), 379 (RF_Raid_t *)); 380 if (raidPtrs[raidID] == NULL) { 381 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 382 numraid = raidID; 383 return; 384 } 385 } 386 387 #ifdef RAID_AUTOCONFIG 388 raidautoconfig = 1; 389 #endif 390 391 if (raidautoconfig) { 392 /* 1. locate all RAID components on the system */ 393 394 #if DEBUG 395 printf("Searching for raid components...\n"); 396 #endif 397 ac_list = rf_find_raid_components(); 398 399 /* 2. sort them into their respective sets */ 400 401 config_sets = rf_create_auto_sets(ac_list); 402 403 /* 3. evaluate each set and configure the valid ones 404 This gets done in rf_buildroothack() */ 405 406 /* schedule the creation of the thread to do the 407 "/ on RAID" stuff */ 408 409 kthread_create(rf_buildroothack,config_sets); 410 411 #if 0 412 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]); 413 #endif 414 } 415 416 } 417 418 void 419 rf_buildroothack(arg) 420 void *arg; 421 { 422 RF_ConfigSet_t *config_sets = arg; 423 RF_ConfigSet_t *cset; 424 RF_ConfigSet_t *next_cset; 425 int retcode; 426 int raidID; 427 int rootID; 428 int num_root; 429 430 rootID = 0; 431 num_root = 0; 432 cset = config_sets; 433 while(cset != NULL ) { 434 next_cset = cset->next; 435 if (rf_have_enough_components(cset) && 436 cset->ac->clabel->autoconfigure==1) { 437 retcode = rf_auto_config_set(cset,&raidID); 438 if (!retcode) { 439 if (cset->rootable) { 440 rootID = raidID; 441 num_root++; 442 } 443 } else { 444 /* The autoconfig didn't work :( */ 445 #if DEBUG 446 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 447 #endif 448 rf_release_all_vps(cset); 449 } 450 } else { 451 /* we're not autoconfiguring this set... 452 release the associated resources */ 453 rf_release_all_vps(cset); 454 } 455 /* cleanup */ 456 rf_cleanup_config_set(cset); 457 cset = next_cset; 458 } 459 if (boothowto & RB_ASKNAME) { 460 /* We don't auto-config... */ 461 } else { 462 /* They didn't ask, and we found something bootable... */ 463 464 if (num_root == 1) { 465 booted_device = &raidrootdev[rootID]; 466 } else if (num_root > 1) { 467 /* we can't guess.. require the user to answer... */ 468 boothowto |= RB_ASKNAME; 469 } 470 } 471 } 472 473 474 int 475 raidsize(dev) 476 dev_t dev; 477 { 478 struct raid_softc *rs; 479 struct disklabel *lp; 480 int part, unit, omask, size; 481 482 unit = raidunit(dev); 483 if (unit >= numraid) 484 return (-1); 485 rs = &raid_softc[unit]; 486 487 if ((rs->sc_flags & RAIDF_INITED) == 0) 488 return (-1); 489 490 part = DISKPART(dev); 491 omask = rs->sc_dkdev.dk_openmask & (1 << part); 492 lp = rs->sc_dkdev.dk_label; 493 494 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc)) 495 return (-1); 496 497 if (lp->d_partitions[part].p_fstype != FS_SWAP) 498 size = -1; 499 else 500 size = lp->d_partitions[part].p_size * 501 (lp->d_secsize / DEV_BSIZE); 502 503 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc)) 504 return (-1); 505 506 return (size); 507 508 } 509 510 int 511 raiddump(dev, blkno, va, size) 512 dev_t dev; 513 daddr_t blkno; 514 caddr_t va; 515 size_t size; 516 { 517 /* Not implemented. */ 518 return ENXIO; 519 } 520 /* ARGSUSED */ 521 int 522 raidopen(dev, flags, fmt, p) 523 dev_t dev; 524 int flags, fmt; 525 struct proc *p; 526 { 527 int unit = raidunit(dev); 528 struct raid_softc *rs; 529 struct disklabel *lp; 530 int part, pmask; 531 int error = 0; 532 533 if (unit >= numraid) 534 return (ENXIO); 535 rs = &raid_softc[unit]; 536 537 if ((error = raidlock(rs)) != 0) 538 return (error); 539 lp = rs->sc_dkdev.dk_label; 540 541 part = DISKPART(dev); 542 pmask = (1 << part); 543 544 db1_printf(("Opening raid device number: %d partition: %d\n", 545 unit, part)); 546 547 548 if ((rs->sc_flags & RAIDF_INITED) && 549 (rs->sc_dkdev.dk_openmask == 0)) 550 raidgetdisklabel(dev); 551 552 /* make sure that this partition exists */ 553 554 if (part != RAW_PART) { 555 db1_printf(("Not a raw partition..\n")); 556 if (((rs->sc_flags & RAIDF_INITED) == 0) || 557 ((part >= lp->d_npartitions) || 558 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 559 error = ENXIO; 560 raidunlock(rs); 561 db1_printf(("Bailing out...\n")); 562 return (error); 563 } 564 } 565 /* Prevent this unit from being unconfigured while open. */ 566 switch (fmt) { 567 case S_IFCHR: 568 rs->sc_dkdev.dk_copenmask |= pmask; 569 break; 570 571 case S_IFBLK: 572 rs->sc_dkdev.dk_bopenmask |= pmask; 573 break; 574 } 575 576 if ((rs->sc_dkdev.dk_openmask == 0) && 577 ((rs->sc_flags & RAIDF_INITED) != 0)) { 578 /* First one... mark things as dirty... Note that we *MUST* 579 have done a configure before this. I DO NOT WANT TO BE 580 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 581 THAT THEY BELONG TOGETHER!!!!! */ 582 /* XXX should check to see if we're only open for reading 583 here... If so, we needn't do this, but then need some 584 other way of keeping track of what's happened.. */ 585 586 rf_markalldirty( raidPtrs[unit] ); 587 } 588 589 590 rs->sc_dkdev.dk_openmask = 591 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 592 593 raidunlock(rs); 594 595 return (error); 596 597 598 } 599 /* ARGSUSED */ 600 int 601 raidclose(dev, flags, fmt, p) 602 dev_t dev; 603 int flags, fmt; 604 struct proc *p; 605 { 606 int unit = raidunit(dev); 607 struct raid_softc *rs; 608 int error = 0; 609 int part; 610 611 if (unit >= numraid) 612 return (ENXIO); 613 rs = &raid_softc[unit]; 614 615 if ((error = raidlock(rs)) != 0) 616 return (error); 617 618 part = DISKPART(dev); 619 620 /* ...that much closer to allowing unconfiguration... */ 621 switch (fmt) { 622 case S_IFCHR: 623 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 624 break; 625 626 case S_IFBLK: 627 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 628 break; 629 } 630 rs->sc_dkdev.dk_openmask = 631 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 632 633 if ((rs->sc_dkdev.dk_openmask == 0) && 634 ((rs->sc_flags & RAIDF_INITED) != 0)) { 635 /* Last one... device is not unconfigured yet. 636 Device shutdown has taken care of setting the 637 clean bits if RAIDF_INITED is not set 638 mark things as clean... */ 639 #if 0 640 printf("Last one on raid%d. Updating status.\n",unit); 641 #endif 642 rf_update_component_labels(raidPtrs[unit], 643 RF_FINAL_COMPONENT_UPDATE); 644 if (doing_shutdown) { 645 /* last one, and we're going down, so 646 lights out for this RAID set too. */ 647 error = rf_Shutdown(raidPtrs[unit]); 648 649 /* It's no longer initialized... */ 650 rs->sc_flags &= ~RAIDF_INITED; 651 652 /* Detach the disk. */ 653 disk_detach(&rs->sc_dkdev); 654 } 655 } 656 657 raidunlock(rs); 658 return (0); 659 660 } 661 662 void 663 raidstrategy(bp) 664 struct buf *bp; 665 { 666 int s; 667 668 unsigned int raidID = raidunit(bp->b_dev); 669 RF_Raid_t *raidPtr; 670 struct raid_softc *rs = &raid_softc[raidID]; 671 struct disklabel *lp; 672 int wlabel; 673 674 if ((rs->sc_flags & RAIDF_INITED) ==0) { 675 bp->b_error = ENXIO; 676 bp->b_flags |= B_ERROR; 677 bp->b_resid = bp->b_bcount; 678 biodone(bp); 679 return; 680 } 681 if (raidID >= numraid || !raidPtrs[raidID]) { 682 bp->b_error = ENODEV; 683 bp->b_flags |= B_ERROR; 684 bp->b_resid = bp->b_bcount; 685 biodone(bp); 686 return; 687 } 688 raidPtr = raidPtrs[raidID]; 689 if (!raidPtr->valid) { 690 bp->b_error = ENODEV; 691 bp->b_flags |= B_ERROR; 692 bp->b_resid = bp->b_bcount; 693 biodone(bp); 694 return; 695 } 696 if (bp->b_bcount == 0) { 697 db1_printf(("b_bcount is zero..\n")); 698 biodone(bp); 699 return; 700 } 701 lp = rs->sc_dkdev.dk_label; 702 703 /* 704 * Do bounds checking and adjust transfer. If there's an 705 * error, the bounds check will flag that for us. 706 */ 707 708 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 709 if (DISKPART(bp->b_dev) != RAW_PART) 710 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 711 db1_printf(("Bounds check failed!!:%d %d\n", 712 (int) bp->b_blkno, (int) wlabel)); 713 biodone(bp); 714 return; 715 } 716 s = splbio(); 717 718 bp->b_resid = 0; 719 720 /* stuff it onto our queue */ 721 BUFQ_INSERT_TAIL(&rs->buf_queue, bp); 722 723 raidstart(raidPtrs[raidID]); 724 725 splx(s); 726 } 727 /* ARGSUSED */ 728 int 729 raidread(dev, uio, flags) 730 dev_t dev; 731 struct uio *uio; 732 int flags; 733 { 734 int unit = raidunit(dev); 735 struct raid_softc *rs; 736 int part; 737 738 if (unit >= numraid) 739 return (ENXIO); 740 rs = &raid_softc[unit]; 741 742 if ((rs->sc_flags & RAIDF_INITED) == 0) 743 return (ENXIO); 744 part = DISKPART(dev); 745 746 db1_printf(("raidread: unit: %d partition: %d\n", unit, part)); 747 748 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 749 750 } 751 /* ARGSUSED */ 752 int 753 raidwrite(dev, uio, flags) 754 dev_t dev; 755 struct uio *uio; 756 int flags; 757 { 758 int unit = raidunit(dev); 759 struct raid_softc *rs; 760 761 if (unit >= numraid) 762 return (ENXIO); 763 rs = &raid_softc[unit]; 764 765 if ((rs->sc_flags & RAIDF_INITED) == 0) 766 return (ENXIO); 767 db1_printf(("raidwrite\n")); 768 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 769 770 } 771 772 int 773 raidioctl(dev, cmd, data, flag, p) 774 dev_t dev; 775 u_long cmd; 776 caddr_t data; 777 int flag; 778 struct proc *p; 779 { 780 int unit = raidunit(dev); 781 int error = 0; 782 int part, pmask; 783 struct raid_softc *rs; 784 RF_Config_t *k_cfg, *u_cfg; 785 RF_Raid_t *raidPtr; 786 RF_RaidDisk_t *diskPtr; 787 RF_AccTotals_t *totals; 788 RF_DeviceConfig_t *d_cfg, **ucfgp; 789 u_char *specific_buf; 790 int retcode = 0; 791 int row; 792 int column; 793 struct rf_recon_req *rrcopy, *rr; 794 RF_ComponentLabel_t *clabel; 795 RF_ComponentLabel_t ci_label; 796 RF_ComponentLabel_t **clabel_ptr; 797 RF_SingleComponent_t *sparePtr,*componentPtr; 798 RF_SingleComponent_t hot_spare; 799 RF_SingleComponent_t component; 800 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 801 int i, j, d; 802 #ifdef __HAVE_OLD_DISKLABEL 803 struct disklabel newlabel; 804 #endif 805 806 if (unit >= numraid) 807 return (ENXIO); 808 rs = &raid_softc[unit]; 809 raidPtr = raidPtrs[unit]; 810 811 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 812 (int) DISKPART(dev), (int) unit, (int) cmd)); 813 814 /* Must be open for writes for these commands... */ 815 switch (cmd) { 816 case DIOCSDINFO: 817 case DIOCWDINFO: 818 #ifdef __HAVE_OLD_DISKLABEL 819 case ODIOCWDINFO: 820 case ODIOCSDINFO: 821 #endif 822 case DIOCWLABEL: 823 if ((flag & FWRITE) == 0) 824 return (EBADF); 825 } 826 827 /* Must be initialized for these... */ 828 switch (cmd) { 829 case DIOCGDINFO: 830 case DIOCSDINFO: 831 case DIOCWDINFO: 832 #ifdef __HAVE_OLD_DISKLABEL 833 case ODIOCGDINFO: 834 case ODIOCWDINFO: 835 case ODIOCSDINFO: 836 case ODIOCGDEFLABEL: 837 #endif 838 case DIOCGPART: 839 case DIOCWLABEL: 840 case DIOCGDEFLABEL: 841 case RAIDFRAME_SHUTDOWN: 842 case RAIDFRAME_REWRITEPARITY: 843 case RAIDFRAME_GET_INFO: 844 case RAIDFRAME_RESET_ACCTOTALS: 845 case RAIDFRAME_GET_ACCTOTALS: 846 case RAIDFRAME_KEEP_ACCTOTALS: 847 case RAIDFRAME_GET_SIZE: 848 case RAIDFRAME_FAIL_DISK: 849 case RAIDFRAME_COPYBACK: 850 case RAIDFRAME_CHECK_RECON_STATUS: 851 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 852 case RAIDFRAME_GET_COMPONENT_LABEL: 853 case RAIDFRAME_SET_COMPONENT_LABEL: 854 case RAIDFRAME_ADD_HOT_SPARE: 855 case RAIDFRAME_REMOVE_HOT_SPARE: 856 case RAIDFRAME_INIT_LABELS: 857 case RAIDFRAME_REBUILD_IN_PLACE: 858 case RAIDFRAME_CHECK_PARITY: 859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 861 case RAIDFRAME_CHECK_COPYBACK_STATUS: 862 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 863 case RAIDFRAME_SET_AUTOCONFIG: 864 case RAIDFRAME_SET_ROOT: 865 case RAIDFRAME_DELETE_COMPONENT: 866 case RAIDFRAME_INCORPORATE_HOT_SPARE: 867 if ((rs->sc_flags & RAIDF_INITED) == 0) 868 return (ENXIO); 869 } 870 871 switch (cmd) { 872 873 /* configure the system */ 874 case RAIDFRAME_CONFIGURE: 875 876 if (raidPtr->valid) { 877 /* There is a valid RAID set running on this unit! */ 878 printf("raid%d: Device already configured!\n",unit); 879 return(EINVAL); 880 } 881 882 /* copy-in the configuration information */ 883 /* data points to a pointer to the configuration structure */ 884 885 u_cfg = *((RF_Config_t **) data); 886 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 887 if (k_cfg == NULL) { 888 return (ENOMEM); 889 } 890 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, 891 sizeof(RF_Config_t)); 892 if (retcode) { 893 RF_Free(k_cfg, sizeof(RF_Config_t)); 894 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 895 retcode)); 896 return (retcode); 897 } 898 /* allocate a buffer for the layout-specific data, and copy it 899 * in */ 900 if (k_cfg->layoutSpecificSize) { 901 if (k_cfg->layoutSpecificSize > 10000) { 902 /* sanity check */ 903 RF_Free(k_cfg, sizeof(RF_Config_t)); 904 return (EINVAL); 905 } 906 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 907 (u_char *)); 908 if (specific_buf == NULL) { 909 RF_Free(k_cfg, sizeof(RF_Config_t)); 910 return (ENOMEM); 911 } 912 retcode = copyin(k_cfg->layoutSpecific, 913 (caddr_t) specific_buf, 914 k_cfg->layoutSpecificSize); 915 if (retcode) { 916 RF_Free(k_cfg, sizeof(RF_Config_t)); 917 RF_Free(specific_buf, 918 k_cfg->layoutSpecificSize); 919 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 920 retcode)); 921 return (retcode); 922 } 923 } else 924 specific_buf = NULL; 925 k_cfg->layoutSpecific = specific_buf; 926 927 /* should do some kind of sanity check on the configuration. 928 * Store the sum of all the bytes in the last byte? */ 929 930 /* configure the system */ 931 932 /* 933 * Clear the entire RAID descriptor, just to make sure 934 * there is no stale data left in the case of a 935 * reconfiguration 936 */ 937 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 938 raidPtr->raidid = unit; 939 940 retcode = rf_Configure(raidPtr, k_cfg, NULL); 941 942 if (retcode == 0) { 943 944 /* allow this many simultaneous IO's to 945 this RAID device */ 946 raidPtr->openings = RAIDOUTSTANDING; 947 948 raidinit(raidPtr); 949 rf_markalldirty(raidPtr); 950 } 951 /* free the buffers. No return code here. */ 952 if (k_cfg->layoutSpecificSize) { 953 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 954 } 955 RF_Free(k_cfg, sizeof(RF_Config_t)); 956 957 return (retcode); 958 959 /* shutdown the system */ 960 case RAIDFRAME_SHUTDOWN: 961 962 if ((error = raidlock(rs)) != 0) 963 return (error); 964 965 /* 966 * If somebody has a partition mounted, we shouldn't 967 * shutdown. 968 */ 969 970 part = DISKPART(dev); 971 pmask = (1 << part); 972 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 973 ((rs->sc_dkdev.dk_bopenmask & pmask) && 974 (rs->sc_dkdev.dk_copenmask & pmask))) { 975 raidunlock(rs); 976 return (EBUSY); 977 } 978 979 retcode = rf_Shutdown(raidPtr); 980 981 /* It's no longer initialized... */ 982 rs->sc_flags &= ~RAIDF_INITED; 983 984 /* Detach the disk. */ 985 disk_detach(&rs->sc_dkdev); 986 987 raidunlock(rs); 988 989 return (retcode); 990 case RAIDFRAME_GET_COMPONENT_LABEL: 991 clabel_ptr = (RF_ComponentLabel_t **) data; 992 /* need to read the component label for the disk indicated 993 by row,column in clabel */ 994 995 /* For practice, let's get it directly fromdisk, rather 996 than from the in-core copy */ 997 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 998 (RF_ComponentLabel_t *)); 999 if (clabel == NULL) 1000 return (ENOMEM); 1001 1002 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t)); 1003 1004 retcode = copyin( *clabel_ptr, clabel, 1005 sizeof(RF_ComponentLabel_t)); 1006 1007 if (retcode) { 1008 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1009 return(retcode); 1010 } 1011 1012 row = clabel->row; 1013 column = clabel->column; 1014 1015 if ((row < 0) || (row >= raidPtr->numRow) || 1016 (column < 0) || (column >= raidPtr->numCol + 1017 raidPtr->numSpare)) { 1018 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1019 return(EINVAL); 1020 } 1021 1022 raidread_component_label(raidPtr->Disks[row][column].dev, 1023 raidPtr->raid_cinfo[row][column].ci_vp, 1024 clabel ); 1025 1026 retcode = copyout((caddr_t) clabel, 1027 (caddr_t) *clabel_ptr, 1028 sizeof(RF_ComponentLabel_t)); 1029 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1030 return (retcode); 1031 1032 case RAIDFRAME_SET_COMPONENT_LABEL: 1033 clabel = (RF_ComponentLabel_t *) data; 1034 1035 /* XXX check the label for valid stuff... */ 1036 /* Note that some things *should not* get modified -- 1037 the user should be re-initing the labels instead of 1038 trying to patch things. 1039 */ 1040 1041 printf("Got component label:\n"); 1042 printf("Version: %d\n",clabel->version); 1043 printf("Serial Number: %d\n",clabel->serial_number); 1044 printf("Mod counter: %d\n",clabel->mod_counter); 1045 printf("Row: %d\n", clabel->row); 1046 printf("Column: %d\n", clabel->column); 1047 printf("Num Rows: %d\n", clabel->num_rows); 1048 printf("Num Columns: %d\n", clabel->num_columns); 1049 printf("Clean: %d\n", clabel->clean); 1050 printf("Status: %d\n", clabel->status); 1051 1052 row = clabel->row; 1053 column = clabel->column; 1054 1055 if ((row < 0) || (row >= raidPtr->numRow) || 1056 (column < 0) || (column >= raidPtr->numCol)) { 1057 return(EINVAL); 1058 } 1059 1060 /* XXX this isn't allowed to do anything for now :-) */ 1061 1062 /* XXX and before it is, we need to fill in the rest 1063 of the fields!?!?!?! */ 1064 #if 0 1065 raidwrite_component_label( 1066 raidPtr->Disks[row][column].dev, 1067 raidPtr->raid_cinfo[row][column].ci_vp, 1068 clabel ); 1069 #endif 1070 return (0); 1071 1072 case RAIDFRAME_INIT_LABELS: 1073 clabel = (RF_ComponentLabel_t *) data; 1074 /* 1075 we only want the serial number from 1076 the above. We get all the rest of the information 1077 from the config that was used to create this RAID 1078 set. 1079 */ 1080 1081 raidPtr->serial_number = clabel->serial_number; 1082 1083 raid_init_component_label(raidPtr, &ci_label); 1084 ci_label.serial_number = clabel->serial_number; 1085 1086 for(row=0;row<raidPtr->numRow;row++) { 1087 ci_label.row = row; 1088 for(column=0;column<raidPtr->numCol;column++) { 1089 diskPtr = &raidPtr->Disks[row][column]; 1090 if (!RF_DEAD_DISK(diskPtr->status)) { 1091 ci_label.partitionSize = diskPtr->partitionSize; 1092 ci_label.column = column; 1093 raidwrite_component_label( 1094 raidPtr->Disks[row][column].dev, 1095 raidPtr->raid_cinfo[row][column].ci_vp, 1096 &ci_label ); 1097 } 1098 } 1099 } 1100 1101 return (retcode); 1102 case RAIDFRAME_SET_AUTOCONFIG: 1103 d = rf_set_autoconfig(raidPtr, *(int *) data); 1104 printf("New autoconfig value is: %d\n", d); 1105 *(int *) data = d; 1106 return (retcode); 1107 1108 case RAIDFRAME_SET_ROOT: 1109 d = rf_set_rootpartition(raidPtr, *(int *) data); 1110 printf("New rootpartition value is: %d\n", d); 1111 *(int *) data = d; 1112 return (retcode); 1113 1114 /* initialize all parity */ 1115 case RAIDFRAME_REWRITEPARITY: 1116 1117 if (raidPtr->Layout.map->faultsTolerated == 0) { 1118 /* Parity for RAID 0 is trivially correct */ 1119 raidPtr->parity_good = RF_RAID_CLEAN; 1120 return(0); 1121 } 1122 1123 if (raidPtr->parity_rewrite_in_progress == 1) { 1124 /* Re-write is already in progress! */ 1125 return(EINVAL); 1126 } 1127 1128 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1129 rf_RewriteParityThread, 1130 raidPtr,"raid_parity"); 1131 return (retcode); 1132 1133 1134 case RAIDFRAME_ADD_HOT_SPARE: 1135 sparePtr = (RF_SingleComponent_t *) data; 1136 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); 1137 retcode = rf_add_hot_spare(raidPtr, &hot_spare); 1138 return(retcode); 1139 1140 case RAIDFRAME_REMOVE_HOT_SPARE: 1141 return(retcode); 1142 1143 case RAIDFRAME_DELETE_COMPONENT: 1144 componentPtr = (RF_SingleComponent_t *)data; 1145 memcpy( &component, componentPtr, 1146 sizeof(RF_SingleComponent_t)); 1147 retcode = rf_delete_component(raidPtr, &component); 1148 return(retcode); 1149 1150 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1151 componentPtr = (RF_SingleComponent_t *)data; 1152 memcpy( &component, componentPtr, 1153 sizeof(RF_SingleComponent_t)); 1154 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1155 return(retcode); 1156 1157 case RAIDFRAME_REBUILD_IN_PLACE: 1158 1159 if (raidPtr->Layout.map->faultsTolerated == 0) { 1160 /* Can't do this on a RAID 0!! */ 1161 return(EINVAL); 1162 } 1163 1164 if (raidPtr->recon_in_progress == 1) { 1165 /* a reconstruct is already in progress! */ 1166 return(EINVAL); 1167 } 1168 1169 componentPtr = (RF_SingleComponent_t *) data; 1170 memcpy( &component, componentPtr, 1171 sizeof(RF_SingleComponent_t)); 1172 row = component.row; 1173 column = component.column; 1174 printf("Rebuild: %d %d\n",row, column); 1175 if ((row < 0) || (row >= raidPtr->numRow) || 1176 (column < 0) || (column >= raidPtr->numCol)) { 1177 return(EINVAL); 1178 } 1179 1180 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1181 if (rrcopy == NULL) 1182 return(ENOMEM); 1183 1184 rrcopy->raidPtr = (void *) raidPtr; 1185 rrcopy->row = row; 1186 rrcopy->col = column; 1187 1188 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1189 rf_ReconstructInPlaceThread, 1190 rrcopy,"raid_reconip"); 1191 return(retcode); 1192 1193 case RAIDFRAME_GET_INFO: 1194 if (!raidPtr->valid) 1195 return (ENODEV); 1196 ucfgp = (RF_DeviceConfig_t **) data; 1197 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1198 (RF_DeviceConfig_t *)); 1199 if (d_cfg == NULL) 1200 return (ENOMEM); 1201 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t)); 1202 d_cfg->rows = raidPtr->numRow; 1203 d_cfg->cols = raidPtr->numCol; 1204 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol; 1205 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1206 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1207 return (ENOMEM); 1208 } 1209 d_cfg->nspares = raidPtr->numSpare; 1210 if (d_cfg->nspares >= RF_MAX_DISKS) { 1211 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1212 return (ENOMEM); 1213 } 1214 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1215 d = 0; 1216 for (i = 0; i < d_cfg->rows; i++) { 1217 for (j = 0; j < d_cfg->cols; j++) { 1218 d_cfg->devs[d] = raidPtr->Disks[i][j]; 1219 d++; 1220 } 1221 } 1222 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1223 d_cfg->spares[i] = raidPtr->Disks[0][j]; 1224 } 1225 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp, 1226 sizeof(RF_DeviceConfig_t)); 1227 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1228 1229 return (retcode); 1230 1231 case RAIDFRAME_CHECK_PARITY: 1232 *(int *) data = raidPtr->parity_good; 1233 return (0); 1234 1235 case RAIDFRAME_RESET_ACCTOTALS: 1236 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1237 return (0); 1238 1239 case RAIDFRAME_GET_ACCTOTALS: 1240 totals = (RF_AccTotals_t *) data; 1241 *totals = raidPtr->acc_totals; 1242 return (0); 1243 1244 case RAIDFRAME_KEEP_ACCTOTALS: 1245 raidPtr->keep_acc_totals = *(int *)data; 1246 return (0); 1247 1248 case RAIDFRAME_GET_SIZE: 1249 *(int *) data = raidPtr->totalSectors; 1250 return (0); 1251 1252 /* fail a disk & optionally start reconstruction */ 1253 case RAIDFRAME_FAIL_DISK: 1254 1255 if (raidPtr->Layout.map->faultsTolerated == 0) { 1256 /* Can't do this on a RAID 0!! */ 1257 return(EINVAL); 1258 } 1259 1260 rr = (struct rf_recon_req *) data; 1261 1262 if (rr->row < 0 || rr->row >= raidPtr->numRow 1263 || rr->col < 0 || rr->col >= raidPtr->numCol) 1264 return (EINVAL); 1265 1266 printf("raid%d: Failing the disk: row: %d col: %d\n", 1267 unit, rr->row, rr->col); 1268 1269 /* make a copy of the recon request so that we don't rely on 1270 * the user's buffer */ 1271 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1272 if (rrcopy == NULL) 1273 return(ENOMEM); 1274 memcpy(rrcopy, rr, sizeof(*rr)); 1275 rrcopy->raidPtr = (void *) raidPtr; 1276 1277 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1278 rf_ReconThread, 1279 rrcopy,"raid_recon"); 1280 return (0); 1281 1282 /* invoke a copyback operation after recon on whatever disk 1283 * needs it, if any */ 1284 case RAIDFRAME_COPYBACK: 1285 1286 if (raidPtr->Layout.map->faultsTolerated == 0) { 1287 /* This makes no sense on a RAID 0!! */ 1288 return(EINVAL); 1289 } 1290 1291 if (raidPtr->copyback_in_progress == 1) { 1292 /* Copyback is already in progress! */ 1293 return(EINVAL); 1294 } 1295 1296 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1297 rf_CopybackThread, 1298 raidPtr,"raid_copyback"); 1299 return (retcode); 1300 1301 /* return the percentage completion of reconstruction */ 1302 case RAIDFRAME_CHECK_RECON_STATUS: 1303 if (raidPtr->Layout.map->faultsTolerated == 0) { 1304 /* This makes no sense on a RAID 0, so tell the 1305 user it's done. */ 1306 *(int *) data = 100; 1307 return(0); 1308 } 1309 row = 0; /* XXX we only consider a single row... */ 1310 if (raidPtr->status[row] != rf_rs_reconstructing) 1311 *(int *) data = 100; 1312 else 1313 *(int *) data = raidPtr->reconControl[row]->percentComplete; 1314 return (0); 1315 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1316 progressInfoPtr = (RF_ProgressInfo_t **) data; 1317 row = 0; /* XXX we only consider a single row... */ 1318 if (raidPtr->status[row] != rf_rs_reconstructing) { 1319 progressInfo.remaining = 0; 1320 progressInfo.completed = 100; 1321 progressInfo.total = 100; 1322 } else { 1323 progressInfo.total = 1324 raidPtr->reconControl[row]->numRUsTotal; 1325 progressInfo.completed = 1326 raidPtr->reconControl[row]->numRUsComplete; 1327 progressInfo.remaining = progressInfo.total - 1328 progressInfo.completed; 1329 } 1330 retcode = copyout((caddr_t) &progressInfo, 1331 (caddr_t) *progressInfoPtr, 1332 sizeof(RF_ProgressInfo_t)); 1333 return (retcode); 1334 1335 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1336 if (raidPtr->Layout.map->faultsTolerated == 0) { 1337 /* This makes no sense on a RAID 0, so tell the 1338 user it's done. */ 1339 *(int *) data = 100; 1340 return(0); 1341 } 1342 if (raidPtr->parity_rewrite_in_progress == 1) { 1343 *(int *) data = 100 * 1344 raidPtr->parity_rewrite_stripes_done / 1345 raidPtr->Layout.numStripe; 1346 } else { 1347 *(int *) data = 100; 1348 } 1349 return (0); 1350 1351 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1352 progressInfoPtr = (RF_ProgressInfo_t **) data; 1353 if (raidPtr->parity_rewrite_in_progress == 1) { 1354 progressInfo.total = raidPtr->Layout.numStripe; 1355 progressInfo.completed = 1356 raidPtr->parity_rewrite_stripes_done; 1357 progressInfo.remaining = progressInfo.total - 1358 progressInfo.completed; 1359 } else { 1360 progressInfo.remaining = 0; 1361 progressInfo.completed = 100; 1362 progressInfo.total = 100; 1363 } 1364 retcode = copyout((caddr_t) &progressInfo, 1365 (caddr_t) *progressInfoPtr, 1366 sizeof(RF_ProgressInfo_t)); 1367 return (retcode); 1368 1369 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1370 if (raidPtr->Layout.map->faultsTolerated == 0) { 1371 /* This makes no sense on a RAID 0 */ 1372 *(int *) data = 100; 1373 return(0); 1374 } 1375 if (raidPtr->copyback_in_progress == 1) { 1376 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1377 raidPtr->Layout.numStripe; 1378 } else { 1379 *(int *) data = 100; 1380 } 1381 return (0); 1382 1383 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1384 progressInfoPtr = (RF_ProgressInfo_t **) data; 1385 if (raidPtr->copyback_in_progress == 1) { 1386 progressInfo.total = raidPtr->Layout.numStripe; 1387 progressInfo.completed = 1388 raidPtr->copyback_stripes_done; 1389 progressInfo.remaining = progressInfo.total - 1390 progressInfo.completed; 1391 } else { 1392 progressInfo.remaining = 0; 1393 progressInfo.completed = 100; 1394 progressInfo.total = 100; 1395 } 1396 retcode = copyout((caddr_t) &progressInfo, 1397 (caddr_t) *progressInfoPtr, 1398 sizeof(RF_ProgressInfo_t)); 1399 return (retcode); 1400 1401 /* the sparetable daemon calls this to wait for the kernel to 1402 * need a spare table. this ioctl does not return until a 1403 * spare table is needed. XXX -- calling mpsleep here in the 1404 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1405 * -- I should either compute the spare table in the kernel, 1406 * or have a different -- XXX XXX -- interface (a different 1407 * character device) for delivering the table -- XXX */ 1408 #if 0 1409 case RAIDFRAME_SPARET_WAIT: 1410 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1411 while (!rf_sparet_wait_queue) 1412 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1413 waitreq = rf_sparet_wait_queue; 1414 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1415 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1416 1417 /* structure assignment */ 1418 *((RF_SparetWait_t *) data) = *waitreq; 1419 1420 RF_Free(waitreq, sizeof(*waitreq)); 1421 return (0); 1422 1423 /* wakes up a process waiting on SPARET_WAIT and puts an error 1424 * code in it that will cause the dameon to exit */ 1425 case RAIDFRAME_ABORT_SPARET_WAIT: 1426 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1427 waitreq->fcol = -1; 1428 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1429 waitreq->next = rf_sparet_wait_queue; 1430 rf_sparet_wait_queue = waitreq; 1431 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1432 wakeup(&rf_sparet_wait_queue); 1433 return (0); 1434 1435 /* used by the spare table daemon to deliver a spare table 1436 * into the kernel */ 1437 case RAIDFRAME_SEND_SPARET: 1438 1439 /* install the spare table */ 1440 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1441 1442 /* respond to the requestor. the return status of the spare 1443 * table installation is passed in the "fcol" field */ 1444 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1445 waitreq->fcol = retcode; 1446 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1447 waitreq->next = rf_sparet_resp_queue; 1448 rf_sparet_resp_queue = waitreq; 1449 wakeup(&rf_sparet_resp_queue); 1450 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1451 1452 return (retcode); 1453 #endif 1454 1455 default: 1456 break; /* fall through to the os-specific code below */ 1457 1458 } 1459 1460 if (!raidPtr->valid) 1461 return (EINVAL); 1462 1463 /* 1464 * Add support for "regular" device ioctls here. 1465 */ 1466 1467 switch (cmd) { 1468 case DIOCGDINFO: 1469 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1470 break; 1471 #ifdef __HAVE_OLD_DISKLABEL 1472 case ODIOCGDINFO: 1473 newlabel = *(rs->sc_dkdev.dk_label); 1474 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1475 return ENOTTY; 1476 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1477 break; 1478 #endif 1479 1480 case DIOCGPART: 1481 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1482 ((struct partinfo *) data)->part = 1483 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1484 break; 1485 1486 case DIOCWDINFO: 1487 case DIOCSDINFO: 1488 #ifdef __HAVE_OLD_DISKLABEL 1489 case ODIOCWDINFO: 1490 case ODIOCSDINFO: 1491 #endif 1492 { 1493 struct disklabel *lp; 1494 #ifdef __HAVE_OLD_DISKLABEL 1495 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1496 memset(&newlabel, 0, sizeof newlabel); 1497 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1498 lp = &newlabel; 1499 } else 1500 #endif 1501 lp = (struct disklabel *)data; 1502 1503 if ((error = raidlock(rs)) != 0) 1504 return (error); 1505 1506 rs->sc_flags |= RAIDF_LABELLING; 1507 1508 error = setdisklabel(rs->sc_dkdev.dk_label, 1509 lp, 0, rs->sc_dkdev.dk_cpulabel); 1510 if (error == 0) { 1511 if (cmd == DIOCWDINFO 1512 #ifdef __HAVE_OLD_DISKLABEL 1513 || cmd == ODIOCWDINFO 1514 #endif 1515 ) 1516 error = writedisklabel(RAIDLABELDEV(dev), 1517 raidstrategy, rs->sc_dkdev.dk_label, 1518 rs->sc_dkdev.dk_cpulabel); 1519 } 1520 rs->sc_flags &= ~RAIDF_LABELLING; 1521 1522 raidunlock(rs); 1523 1524 if (error) 1525 return (error); 1526 break; 1527 } 1528 1529 case DIOCWLABEL: 1530 if (*(int *) data != 0) 1531 rs->sc_flags |= RAIDF_WLABEL; 1532 else 1533 rs->sc_flags &= ~RAIDF_WLABEL; 1534 break; 1535 1536 case DIOCGDEFLABEL: 1537 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1538 break; 1539 1540 #ifdef __HAVE_OLD_DISKLABEL 1541 case ODIOCGDEFLABEL: 1542 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1543 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1544 return ENOTTY; 1545 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1546 break; 1547 #endif 1548 1549 default: 1550 retcode = ENOTTY; 1551 } 1552 return (retcode); 1553 1554 } 1555 1556 1557 /* raidinit -- complete the rest of the initialization for the 1558 RAIDframe device. */ 1559 1560 1561 static void 1562 raidinit(raidPtr) 1563 RF_Raid_t *raidPtr; 1564 { 1565 struct raid_softc *rs; 1566 int unit; 1567 1568 unit = raidPtr->raidid; 1569 1570 rs = &raid_softc[unit]; 1571 1572 /* XXX should check return code first... */ 1573 rs->sc_flags |= RAIDF_INITED; 1574 1575 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */ 1576 1577 rs->sc_dkdev.dk_name = rs->sc_xname; 1578 1579 /* disk_attach actually creates space for the CPU disklabel, among 1580 * other things, so it's critical to call this *BEFORE* we try putzing 1581 * with disklabels. */ 1582 1583 disk_attach(&rs->sc_dkdev); 1584 1585 /* XXX There may be a weird interaction here between this, and 1586 * protectedSectors, as used in RAIDframe. */ 1587 1588 rs->sc_size = raidPtr->totalSectors; 1589 1590 } 1591 1592 /* wake up the daemon & tell it to get us a spare table 1593 * XXX 1594 * the entries in the queues should be tagged with the raidPtr 1595 * so that in the extremely rare case that two recons happen at once, 1596 * we know for which device were requesting a spare table 1597 * XXX 1598 * 1599 * XXX This code is not currently used. GO 1600 */ 1601 int 1602 rf_GetSpareTableFromDaemon(req) 1603 RF_SparetWait_t *req; 1604 { 1605 int retcode; 1606 1607 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1608 req->next = rf_sparet_wait_queue; 1609 rf_sparet_wait_queue = req; 1610 wakeup(&rf_sparet_wait_queue); 1611 1612 /* mpsleep unlocks the mutex */ 1613 while (!rf_sparet_resp_queue) { 1614 tsleep(&rf_sparet_resp_queue, PRIBIO, 1615 "raidframe getsparetable", 0); 1616 } 1617 req = rf_sparet_resp_queue; 1618 rf_sparet_resp_queue = req->next; 1619 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1620 1621 retcode = req->fcol; 1622 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1623 * alloc'd */ 1624 return (retcode); 1625 } 1626 1627 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1628 * bp & passes it down. 1629 * any calls originating in the kernel must use non-blocking I/O 1630 * do some extra sanity checking to return "appropriate" error values for 1631 * certain conditions (to make some standard utilities work) 1632 * 1633 * Formerly known as: rf_DoAccessKernel 1634 */ 1635 void 1636 raidstart(raidPtr) 1637 RF_Raid_t *raidPtr; 1638 { 1639 RF_SectorCount_t num_blocks, pb, sum; 1640 RF_RaidAddr_t raid_addr; 1641 int retcode; 1642 struct partition *pp; 1643 daddr_t blocknum; 1644 int unit; 1645 struct raid_softc *rs; 1646 int do_async; 1647 struct buf *bp; 1648 1649 unit = raidPtr->raidid; 1650 rs = &raid_softc[unit]; 1651 1652 /* quick check to see if anything has died recently */ 1653 RF_LOCK_MUTEX(raidPtr->mutex); 1654 if (raidPtr->numNewFailures > 0) { 1655 rf_update_component_labels(raidPtr, 1656 RF_NORMAL_COMPONENT_UPDATE); 1657 raidPtr->numNewFailures--; 1658 } 1659 RF_UNLOCK_MUTEX(raidPtr->mutex); 1660 1661 /* Check to see if we're at the limit... */ 1662 RF_LOCK_MUTEX(raidPtr->mutex); 1663 while (raidPtr->openings > 0) { 1664 RF_UNLOCK_MUTEX(raidPtr->mutex); 1665 1666 /* get the next item, if any, from the queue */ 1667 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) { 1668 /* nothing more to do */ 1669 return; 1670 } 1671 BUFQ_REMOVE(&rs->buf_queue, bp); 1672 1673 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1674 * partition.. Need to make it absolute to the underlying 1675 * device.. */ 1676 1677 blocknum = bp->b_blkno; 1678 if (DISKPART(bp->b_dev) != RAW_PART) { 1679 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1680 blocknum += pp->p_offset; 1681 } 1682 1683 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1684 (int) blocknum)); 1685 1686 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1687 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1688 1689 /* *THIS* is where we adjust what block we're going to... 1690 * but DO NOT TOUCH bp->b_blkno!!! */ 1691 raid_addr = blocknum; 1692 1693 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1694 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1695 sum = raid_addr + num_blocks + pb; 1696 if (1 || rf_debugKernelAccess) { 1697 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1698 (int) raid_addr, (int) sum, (int) num_blocks, 1699 (int) pb, (int) bp->b_resid)); 1700 } 1701 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1702 || (sum < num_blocks) || (sum < pb)) { 1703 bp->b_error = ENOSPC; 1704 bp->b_flags |= B_ERROR; 1705 bp->b_resid = bp->b_bcount; 1706 biodone(bp); 1707 RF_LOCK_MUTEX(raidPtr->mutex); 1708 continue; 1709 } 1710 /* 1711 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1712 */ 1713 1714 if (bp->b_bcount & raidPtr->sectorMask) { 1715 bp->b_error = EINVAL; 1716 bp->b_flags |= B_ERROR; 1717 bp->b_resid = bp->b_bcount; 1718 biodone(bp); 1719 RF_LOCK_MUTEX(raidPtr->mutex); 1720 continue; 1721 1722 } 1723 db1_printf(("Calling DoAccess..\n")); 1724 1725 1726 RF_LOCK_MUTEX(raidPtr->mutex); 1727 raidPtr->openings--; 1728 RF_UNLOCK_MUTEX(raidPtr->mutex); 1729 1730 /* 1731 * Everything is async. 1732 */ 1733 do_async = 1; 1734 1735 disk_busy(&rs->sc_dkdev); 1736 1737 /* XXX we're still at splbio() here... do we *really* 1738 need to be? */ 1739 1740 /* don't ever condition on bp->b_flags & B_WRITE. 1741 * always condition on B_READ instead */ 1742 1743 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1744 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1745 do_async, raid_addr, num_blocks, 1746 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1747 1748 RF_LOCK_MUTEX(raidPtr->mutex); 1749 } 1750 RF_UNLOCK_MUTEX(raidPtr->mutex); 1751 } 1752 1753 1754 1755 1756 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1757 1758 int 1759 rf_DispatchKernelIO(queue, req) 1760 RF_DiskQueue_t *queue; 1761 RF_DiskQueueData_t *req; 1762 { 1763 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1764 struct buf *bp; 1765 struct raidbuf *raidbp = NULL; 1766 struct raid_softc *rs; 1767 int unit; 1768 int s; 1769 1770 s=0; 1771 /* s = splbio();*/ /* want to test this */ 1772 /* XXX along with the vnode, we also need the softc associated with 1773 * this device.. */ 1774 1775 req->queue = queue; 1776 1777 unit = queue->raidPtr->raidid; 1778 1779 db1_printf(("DispatchKernelIO unit: %d\n", unit)); 1780 1781 if (unit >= numraid) { 1782 printf("Invalid unit number: %d %d\n", unit, numraid); 1783 panic("Invalid Unit number in rf_DispatchKernelIO\n"); 1784 } 1785 rs = &raid_softc[unit]; 1786 1787 bp = req->bp; 1788 #if 1 1789 /* XXX when there is a physical disk failure, someone is passing us a 1790 * buffer that contains old stuff!! Attempt to deal with this problem 1791 * without taking a performance hit... (not sure where the real bug 1792 * is. It's buried in RAIDframe somewhere) :-( GO ) */ 1793 1794 if (bp->b_flags & B_ERROR) { 1795 bp->b_flags &= ~B_ERROR; 1796 } 1797 if (bp->b_error != 0) { 1798 bp->b_error = 0; 1799 } 1800 #endif 1801 raidbp = RAIDGETBUF(rs); 1802 1803 raidbp->rf_flags = 0; /* XXX not really used anywhere... */ 1804 1805 /* 1806 * context for raidiodone 1807 */ 1808 raidbp->rf_obp = bp; 1809 raidbp->req = req; 1810 1811 LIST_INIT(&raidbp->rf_buf.b_dep); 1812 1813 switch (req->type) { 1814 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1815 /* XXX need to do something extra here.. */ 1816 /* I'm leaving this in, as I've never actually seen it used, 1817 * and I'd like folks to report it... GO */ 1818 printf(("WAKEUP CALLED\n")); 1819 queue->numOutstanding++; 1820 1821 /* XXX need to glue the original buffer into this?? */ 1822 1823 KernelWakeupFunc(&raidbp->rf_buf); 1824 break; 1825 1826 case RF_IO_TYPE_READ: 1827 case RF_IO_TYPE_WRITE: 1828 1829 if (req->tracerec) { 1830 RF_ETIMER_START(req->tracerec->timer); 1831 } 1832 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, 1833 op | bp->b_flags, queue->rf_cinfo->ci_dev, 1834 req->sectorOffset, req->numSector, 1835 req->buf, KernelWakeupFunc, (void *) req, 1836 queue->raidPtr->logBytesPerSector, req->b_proc); 1837 1838 if (rf_debugKernelAccess) { 1839 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1840 (long) bp->b_blkno)); 1841 } 1842 queue->numOutstanding++; 1843 queue->last_deq_sector = req->sectorOffset; 1844 /* acc wouldn't have been let in if there were any pending 1845 * reqs at any other priority */ 1846 queue->curPriority = req->priority; 1847 1848 db1_printf(("Going for %c to unit %d row %d col %d\n", 1849 req->type, unit, queue->row, queue->col)); 1850 db1_printf(("sector %d count %d (%d bytes) %d\n", 1851 (int) req->sectorOffset, (int) req->numSector, 1852 (int) (req->numSector << 1853 queue->raidPtr->logBytesPerSector), 1854 (int) queue->raidPtr->logBytesPerSector)); 1855 if ((raidbp->rf_buf.b_flags & B_READ) == 0) { 1856 raidbp->rf_buf.b_vp->v_numoutput++; 1857 } 1858 VOP_STRATEGY(&raidbp->rf_buf); 1859 1860 break; 1861 1862 default: 1863 panic("bad req->type in rf_DispatchKernelIO"); 1864 } 1865 db1_printf(("Exiting from DispatchKernelIO\n")); 1866 /* splx(s); */ /* want to test this */ 1867 return (0); 1868 } 1869 /* this is the callback function associated with a I/O invoked from 1870 kernel code. 1871 */ 1872 static void 1873 KernelWakeupFunc(vbp) 1874 struct buf *vbp; 1875 { 1876 RF_DiskQueueData_t *req = NULL; 1877 RF_DiskQueue_t *queue; 1878 struct raidbuf *raidbp = (struct raidbuf *) vbp; 1879 struct buf *bp; 1880 struct raid_softc *rs; 1881 int unit; 1882 int s; 1883 1884 s = splbio(); 1885 db1_printf(("recovering the request queue:\n")); 1886 req = raidbp->req; 1887 1888 bp = raidbp->rf_obp; 1889 1890 queue = (RF_DiskQueue_t *) req->queue; 1891 1892 if (raidbp->rf_buf.b_flags & B_ERROR) { 1893 bp->b_flags |= B_ERROR; 1894 bp->b_error = raidbp->rf_buf.b_error ? 1895 raidbp->rf_buf.b_error : EIO; 1896 } 1897 1898 /* XXX methinks this could be wrong... */ 1899 #if 1 1900 bp->b_resid = raidbp->rf_buf.b_resid; 1901 #endif 1902 1903 if (req->tracerec) { 1904 RF_ETIMER_STOP(req->tracerec->timer); 1905 RF_ETIMER_EVAL(req->tracerec->timer); 1906 RF_LOCK_MUTEX(rf_tracing_mutex); 1907 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1908 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1909 req->tracerec->num_phys_ios++; 1910 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1911 } 1912 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ 1913 1914 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */ 1915 1916 1917 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1918 * ballistic, and mark the component as hosed... */ 1919 1920 if (bp->b_flags & B_ERROR) { 1921 /* Mark the disk as dead */ 1922 /* but only mark it once... */ 1923 if (queue->raidPtr->Disks[queue->row][queue->col].status == 1924 rf_ds_optimal) { 1925 printf("raid%d: IO Error. Marking %s as failed.\n", 1926 unit, queue->raidPtr->Disks[queue->row][queue->col].devname); 1927 queue->raidPtr->Disks[queue->row][queue->col].status = 1928 rf_ds_failed; 1929 queue->raidPtr->status[queue->row] = rf_rs_degraded; 1930 queue->raidPtr->numFailures++; 1931 queue->raidPtr->numNewFailures++; 1932 } else { /* Disk is already dead... */ 1933 /* printf("Disk already marked as dead!\n"); */ 1934 } 1935 1936 } 1937 1938 rs = &raid_softc[unit]; 1939 RAIDPUTBUF(rs, raidbp); 1940 1941 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0); 1942 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0); 1943 1944 splx(s); 1945 } 1946 1947 1948 1949 /* 1950 * initialize a buf structure for doing an I/O in the kernel. 1951 */ 1952 static void 1953 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, 1954 logBytesPerSector, b_proc) 1955 struct buf *bp; 1956 struct vnode *b_vp; 1957 unsigned rw_flag; 1958 dev_t dev; 1959 RF_SectorNum_t startSect; 1960 RF_SectorCount_t numSect; 1961 caddr_t buf; 1962 void (*cbFunc) (struct buf *); 1963 void *cbArg; 1964 int logBytesPerSector; 1965 struct proc *b_proc; 1966 { 1967 /* bp->b_flags = B_PHYS | rw_flag; */ 1968 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 1969 bp->b_bcount = numSect << logBytesPerSector; 1970 bp->b_bufsize = bp->b_bcount; 1971 bp->b_error = 0; 1972 bp->b_dev = dev; 1973 bp->b_data = buf; 1974 bp->b_blkno = startSect; 1975 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 1976 if (bp->b_bcount == 0) { 1977 panic("bp->b_bcount is zero in InitBP!!\n"); 1978 } 1979 bp->b_proc = b_proc; 1980 bp->b_iodone = cbFunc; 1981 bp->b_vp = b_vp; 1982 1983 } 1984 1985 static void 1986 raidgetdefaultlabel(raidPtr, rs, lp) 1987 RF_Raid_t *raidPtr; 1988 struct raid_softc *rs; 1989 struct disklabel *lp; 1990 { 1991 db1_printf(("Building a default label...\n")); 1992 memset(lp, 0, sizeof(*lp)); 1993 1994 /* fabricate a label... */ 1995 lp->d_secperunit = raidPtr->totalSectors; 1996 lp->d_secsize = raidPtr->bytesPerSector; 1997 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 1998 lp->d_ntracks = 4 * raidPtr->numCol; 1999 lp->d_ncylinders = raidPtr->totalSectors / 2000 (lp->d_nsectors * lp->d_ntracks); 2001 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2002 2003 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2004 lp->d_type = DTYPE_RAID; 2005 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2006 lp->d_rpm = 3600; 2007 lp->d_interleave = 1; 2008 lp->d_flags = 0; 2009 2010 lp->d_partitions[RAW_PART].p_offset = 0; 2011 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2012 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2013 lp->d_npartitions = RAW_PART + 1; 2014 2015 lp->d_magic = DISKMAGIC; 2016 lp->d_magic2 = DISKMAGIC; 2017 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2018 2019 } 2020 /* 2021 * Read the disklabel from the raid device. If one is not present, fake one 2022 * up. 2023 */ 2024 static void 2025 raidgetdisklabel(dev) 2026 dev_t dev; 2027 { 2028 int unit = raidunit(dev); 2029 struct raid_softc *rs = &raid_softc[unit]; 2030 char *errstring; 2031 struct disklabel *lp = rs->sc_dkdev.dk_label; 2032 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2033 RF_Raid_t *raidPtr; 2034 2035 db1_printf(("Getting the disklabel...\n")); 2036 2037 memset(clp, 0, sizeof(*clp)); 2038 2039 raidPtr = raidPtrs[unit]; 2040 2041 raidgetdefaultlabel(raidPtr, rs, lp); 2042 2043 /* 2044 * Call the generic disklabel extraction routine. 2045 */ 2046 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2047 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2048 if (errstring) 2049 raidmakedisklabel(rs); 2050 else { 2051 int i; 2052 struct partition *pp; 2053 2054 /* 2055 * Sanity check whether the found disklabel is valid. 2056 * 2057 * This is necessary since total size of the raid device 2058 * may vary when an interleave is changed even though exactly 2059 * same componets are used, and old disklabel may used 2060 * if that is found. 2061 */ 2062 if (lp->d_secperunit != rs->sc_size) 2063 printf("WARNING: %s: " 2064 "total sector size in disklabel (%d) != " 2065 "the size of raid (%ld)\n", rs->sc_xname, 2066 lp->d_secperunit, (long) rs->sc_size); 2067 for (i = 0; i < lp->d_npartitions; i++) { 2068 pp = &lp->d_partitions[i]; 2069 if (pp->p_offset + pp->p_size > rs->sc_size) 2070 printf("WARNING: %s: end of partition `%c' " 2071 "exceeds the size of raid (%ld)\n", 2072 rs->sc_xname, 'a' + i, (long) rs->sc_size); 2073 } 2074 } 2075 2076 } 2077 /* 2078 * Take care of things one might want to take care of in the event 2079 * that a disklabel isn't present. 2080 */ 2081 static void 2082 raidmakedisklabel(rs) 2083 struct raid_softc *rs; 2084 { 2085 struct disklabel *lp = rs->sc_dkdev.dk_label; 2086 db1_printf(("Making a label..\n")); 2087 2088 /* 2089 * For historical reasons, if there's no disklabel present 2090 * the raw partition must be marked FS_BSDFFS. 2091 */ 2092 2093 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2094 2095 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2096 2097 lp->d_checksum = dkcksum(lp); 2098 } 2099 /* 2100 * Lookup the provided name in the filesystem. If the file exists, 2101 * is a valid block device, and isn't being used by anyone else, 2102 * set *vpp to the file's vnode. 2103 * You'll find the original of this in ccd.c 2104 */ 2105 int 2106 raidlookup(path, p, vpp) 2107 char *path; 2108 struct proc *p; 2109 struct vnode **vpp; /* result */ 2110 { 2111 struct nameidata nd; 2112 struct vnode *vp; 2113 struct vattr va; 2114 int error; 2115 2116 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); 2117 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2118 #ifdef DEBUG 2119 printf("RAIDframe: vn_open returned %d\n", error); 2120 #endif 2121 return (error); 2122 } 2123 vp = nd.ni_vp; 2124 if (vp->v_usecount > 1) { 2125 VOP_UNLOCK(vp, 0); 2126 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2127 return (EBUSY); 2128 } 2129 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { 2130 VOP_UNLOCK(vp, 0); 2131 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2132 return (error); 2133 } 2134 /* XXX: eventually we should handle VREG, too. */ 2135 if (va.va_type != VBLK) { 2136 VOP_UNLOCK(vp, 0); 2137 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2138 return (ENOTBLK); 2139 } 2140 VOP_UNLOCK(vp, 0); 2141 *vpp = vp; 2142 return (0); 2143 } 2144 /* 2145 * Wait interruptibly for an exclusive lock. 2146 * 2147 * XXX 2148 * Several drivers do this; it should be abstracted and made MP-safe. 2149 * (Hmm... where have we seen this warning before :-> GO ) 2150 */ 2151 static int 2152 raidlock(rs) 2153 struct raid_softc *rs; 2154 { 2155 int error; 2156 2157 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2158 rs->sc_flags |= RAIDF_WANTED; 2159 if ((error = 2160 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2161 return (error); 2162 } 2163 rs->sc_flags |= RAIDF_LOCKED; 2164 return (0); 2165 } 2166 /* 2167 * Unlock and wake up any waiters. 2168 */ 2169 static void 2170 raidunlock(rs) 2171 struct raid_softc *rs; 2172 { 2173 2174 rs->sc_flags &= ~RAIDF_LOCKED; 2175 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2176 rs->sc_flags &= ~RAIDF_WANTED; 2177 wakeup(rs); 2178 } 2179 } 2180 2181 2182 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2183 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2184 2185 int 2186 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2187 { 2188 RF_ComponentLabel_t clabel; 2189 raidread_component_label(dev, b_vp, &clabel); 2190 clabel.mod_counter = mod_counter; 2191 clabel.clean = RF_RAID_CLEAN; 2192 raidwrite_component_label(dev, b_vp, &clabel); 2193 return(0); 2194 } 2195 2196 2197 int 2198 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2199 { 2200 RF_ComponentLabel_t clabel; 2201 raidread_component_label(dev, b_vp, &clabel); 2202 clabel.mod_counter = mod_counter; 2203 clabel.clean = RF_RAID_DIRTY; 2204 raidwrite_component_label(dev, b_vp, &clabel); 2205 return(0); 2206 } 2207 2208 /* ARGSUSED */ 2209 int 2210 raidread_component_label(dev, b_vp, clabel) 2211 dev_t dev; 2212 struct vnode *b_vp; 2213 RF_ComponentLabel_t *clabel; 2214 { 2215 struct buf *bp; 2216 int error; 2217 2218 /* XXX should probably ensure that we don't try to do this if 2219 someone has changed rf_protected_sectors. */ 2220 2221 if (b_vp == NULL) { 2222 /* For whatever reason, this component is not valid. 2223 Don't try to read a component label from it. */ 2224 return(EINVAL); 2225 } 2226 2227 /* get a block of the appropriate size... */ 2228 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2229 bp->b_dev = dev; 2230 2231 /* get our ducks in a row for the read */ 2232 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2233 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2234 bp->b_flags |= B_READ; 2235 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2236 2237 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2238 2239 error = biowait(bp); 2240 2241 if (!error) { 2242 memcpy(clabel, bp->b_data, 2243 sizeof(RF_ComponentLabel_t)); 2244 #if 0 2245 rf_print_component_label( clabel ); 2246 #endif 2247 } else { 2248 #if 0 2249 printf("Failed to read RAID component label!\n"); 2250 #endif 2251 } 2252 2253 brelse(bp); 2254 return(error); 2255 } 2256 /* ARGSUSED */ 2257 int 2258 raidwrite_component_label(dev, b_vp, clabel) 2259 dev_t dev; 2260 struct vnode *b_vp; 2261 RF_ComponentLabel_t *clabel; 2262 { 2263 struct buf *bp; 2264 int error; 2265 2266 /* get a block of the appropriate size... */ 2267 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2268 bp->b_dev = dev; 2269 2270 /* get our ducks in a row for the write */ 2271 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2272 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2273 bp->b_flags |= B_WRITE; 2274 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2275 2276 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2277 2278 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2279 2280 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2281 error = biowait(bp); 2282 brelse(bp); 2283 if (error) { 2284 #if 1 2285 printf("Failed to write RAID component info!\n"); 2286 #endif 2287 } 2288 2289 return(error); 2290 } 2291 2292 void 2293 rf_markalldirty(raidPtr) 2294 RF_Raid_t *raidPtr; 2295 { 2296 RF_ComponentLabel_t clabel; 2297 int r,c; 2298 2299 raidPtr->mod_counter++; 2300 for (r = 0; r < raidPtr->numRow; r++) { 2301 for (c = 0; c < raidPtr->numCol; c++) { 2302 /* we don't want to touch (at all) a disk that has 2303 failed */ 2304 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) { 2305 raidread_component_label( 2306 raidPtr->Disks[r][c].dev, 2307 raidPtr->raid_cinfo[r][c].ci_vp, 2308 &clabel); 2309 if (clabel.status == rf_ds_spared) { 2310 /* XXX do something special... 2311 but whatever you do, don't 2312 try to access it!! */ 2313 } else { 2314 #if 0 2315 clabel.status = 2316 raidPtr->Disks[r][c].status; 2317 raidwrite_component_label( 2318 raidPtr->Disks[r][c].dev, 2319 raidPtr->raid_cinfo[r][c].ci_vp, 2320 &clabel); 2321 #endif 2322 raidmarkdirty( 2323 raidPtr->Disks[r][c].dev, 2324 raidPtr->raid_cinfo[r][c].ci_vp, 2325 raidPtr->mod_counter); 2326 } 2327 } 2328 } 2329 } 2330 /* printf("Component labels marked dirty.\n"); */ 2331 #if 0 2332 for( c = 0; c < raidPtr->numSpare ; c++) { 2333 sparecol = raidPtr->numCol + c; 2334 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) { 2335 /* 2336 2337 XXX this is where we get fancy and map this spare 2338 into it's correct spot in the array. 2339 2340 */ 2341 /* 2342 2343 we claim this disk is "optimal" if it's 2344 rf_ds_used_spare, as that means it should be 2345 directly substitutable for the disk it replaced. 2346 We note that too... 2347 2348 */ 2349 2350 for(i=0;i<raidPtr->numRow;i++) { 2351 for(j=0;j<raidPtr->numCol;j++) { 2352 if ((raidPtr->Disks[i][j].spareRow == 2353 r) && 2354 (raidPtr->Disks[i][j].spareCol == 2355 sparecol)) { 2356 srow = r; 2357 scol = sparecol; 2358 break; 2359 } 2360 } 2361 } 2362 2363 raidread_component_label( 2364 raidPtr->Disks[r][sparecol].dev, 2365 raidPtr->raid_cinfo[r][sparecol].ci_vp, 2366 &clabel); 2367 /* make sure status is noted */ 2368 clabel.version = RF_COMPONENT_LABEL_VERSION; 2369 clabel.mod_counter = raidPtr->mod_counter; 2370 clabel.serial_number = raidPtr->serial_number; 2371 clabel.row = srow; 2372 clabel.column = scol; 2373 clabel.num_rows = raidPtr->numRow; 2374 clabel.num_columns = raidPtr->numCol; 2375 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ 2376 clabel.status = rf_ds_optimal; 2377 raidwrite_component_label( 2378 raidPtr->Disks[r][sparecol].dev, 2379 raidPtr->raid_cinfo[r][sparecol].ci_vp, 2380 &clabel); 2381 raidmarkclean( raidPtr->Disks[r][sparecol].dev, 2382 raidPtr->raid_cinfo[r][sparecol].ci_vp); 2383 } 2384 } 2385 2386 #endif 2387 } 2388 2389 2390 void 2391 rf_update_component_labels(raidPtr, final) 2392 RF_Raid_t *raidPtr; 2393 int final; 2394 { 2395 RF_ComponentLabel_t clabel; 2396 int sparecol; 2397 int r,c; 2398 int i,j; 2399 int srow, scol; 2400 2401 srow = -1; 2402 scol = -1; 2403 2404 /* XXX should do extra checks to make sure things really are clean, 2405 rather than blindly setting the clean bit... */ 2406 2407 raidPtr->mod_counter++; 2408 2409 for (r = 0; r < raidPtr->numRow; r++) { 2410 for (c = 0; c < raidPtr->numCol; c++) { 2411 if (raidPtr->Disks[r][c].status == rf_ds_optimal) { 2412 raidread_component_label( 2413 raidPtr->Disks[r][c].dev, 2414 raidPtr->raid_cinfo[r][c].ci_vp, 2415 &clabel); 2416 /* make sure status is noted */ 2417 clabel.status = rf_ds_optimal; 2418 /* bump the counter */ 2419 clabel.mod_counter = raidPtr->mod_counter; 2420 2421 raidwrite_component_label( 2422 raidPtr->Disks[r][c].dev, 2423 raidPtr->raid_cinfo[r][c].ci_vp, 2424 &clabel); 2425 if (final == RF_FINAL_COMPONENT_UPDATE) { 2426 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2427 raidmarkclean( 2428 raidPtr->Disks[r][c].dev, 2429 raidPtr->raid_cinfo[r][c].ci_vp, 2430 raidPtr->mod_counter); 2431 } 2432 } 2433 } 2434 /* else we don't touch it.. */ 2435 } 2436 } 2437 2438 for( c = 0; c < raidPtr->numSpare ; c++) { 2439 sparecol = raidPtr->numCol + c; 2440 /* Need to ensure that the reconstruct actually completed! */ 2441 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) { 2442 /* 2443 2444 we claim this disk is "optimal" if it's 2445 rf_ds_used_spare, as that means it should be 2446 directly substitutable for the disk it replaced. 2447 We note that too... 2448 2449 */ 2450 2451 for(i=0;i<raidPtr->numRow;i++) { 2452 for(j=0;j<raidPtr->numCol;j++) { 2453 if ((raidPtr->Disks[i][j].spareRow == 2454 0) && 2455 (raidPtr->Disks[i][j].spareCol == 2456 sparecol)) { 2457 srow = i; 2458 scol = j; 2459 break; 2460 } 2461 } 2462 } 2463 2464 /* XXX shouldn't *really* need this... */ 2465 raidread_component_label( 2466 raidPtr->Disks[0][sparecol].dev, 2467 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2468 &clabel); 2469 /* make sure status is noted */ 2470 2471 raid_init_component_label(raidPtr, &clabel); 2472 2473 clabel.mod_counter = raidPtr->mod_counter; 2474 clabel.row = srow; 2475 clabel.column = scol; 2476 clabel.status = rf_ds_optimal; 2477 2478 raidwrite_component_label( 2479 raidPtr->Disks[0][sparecol].dev, 2480 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2481 &clabel); 2482 if (final == RF_FINAL_COMPONENT_UPDATE) { 2483 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2484 raidmarkclean( raidPtr->Disks[0][sparecol].dev, 2485 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2486 raidPtr->mod_counter); 2487 } 2488 } 2489 } 2490 } 2491 /* printf("Component labels updated\n"); */ 2492 } 2493 2494 void 2495 rf_close_component(raidPtr, vp, auto_configured) 2496 RF_Raid_t *raidPtr; 2497 struct vnode *vp; 2498 int auto_configured; 2499 { 2500 struct proc *p; 2501 2502 p = raidPtr->engine_thread; 2503 2504 if (vp != NULL) { 2505 if (auto_configured == 1) { 2506 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2507 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2508 vput(vp); 2509 2510 } else { 2511 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2512 } 2513 } else { 2514 printf("vnode was NULL\n"); 2515 } 2516 } 2517 2518 2519 void 2520 rf_UnconfigureVnodes(raidPtr) 2521 RF_Raid_t *raidPtr; 2522 { 2523 int r,c; 2524 struct proc *p; 2525 struct vnode *vp; 2526 int acd; 2527 2528 2529 /* We take this opportunity to close the vnodes like we should.. */ 2530 2531 p = raidPtr->engine_thread; 2532 2533 for (r = 0; r < raidPtr->numRow; r++) { 2534 for (c = 0; c < raidPtr->numCol; c++) { 2535 printf("Closing vnode for row: %d col: %d\n", r, c); 2536 vp = raidPtr->raid_cinfo[r][c].ci_vp; 2537 acd = raidPtr->Disks[r][c].auto_configured; 2538 rf_close_component(raidPtr, vp, acd); 2539 raidPtr->raid_cinfo[r][c].ci_vp = NULL; 2540 raidPtr->Disks[r][c].auto_configured = 0; 2541 } 2542 } 2543 for (r = 0; r < raidPtr->numSpare; r++) { 2544 printf("Closing vnode for spare: %d\n", r); 2545 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; 2546 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; 2547 rf_close_component(raidPtr, vp, acd); 2548 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; 2549 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; 2550 } 2551 } 2552 2553 2554 void 2555 rf_ReconThread(req) 2556 struct rf_recon_req *req; 2557 { 2558 int s; 2559 RF_Raid_t *raidPtr; 2560 2561 s = splbio(); 2562 raidPtr = (RF_Raid_t *) req->raidPtr; 2563 raidPtr->recon_in_progress = 1; 2564 2565 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, 2566 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2567 2568 /* XXX get rid of this! we don't need it at all.. */ 2569 RF_Free(req, sizeof(*req)); 2570 2571 raidPtr->recon_in_progress = 0; 2572 splx(s); 2573 2574 /* That's all... */ 2575 kthread_exit(0); /* does not return */ 2576 } 2577 2578 void 2579 rf_RewriteParityThread(raidPtr) 2580 RF_Raid_t *raidPtr; 2581 { 2582 int retcode; 2583 int s; 2584 2585 raidPtr->parity_rewrite_in_progress = 1; 2586 s = splbio(); 2587 retcode = rf_RewriteParity(raidPtr); 2588 splx(s); 2589 if (retcode) { 2590 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2591 } else { 2592 /* set the clean bit! If we shutdown correctly, 2593 the clean bit on each component label will get 2594 set */ 2595 raidPtr->parity_good = RF_RAID_CLEAN; 2596 } 2597 raidPtr->parity_rewrite_in_progress = 0; 2598 2599 /* Anyone waiting for us to stop? If so, inform them... */ 2600 if (raidPtr->waitShutdown) { 2601 wakeup(&raidPtr->parity_rewrite_in_progress); 2602 } 2603 2604 /* That's all... */ 2605 kthread_exit(0); /* does not return */ 2606 } 2607 2608 2609 void 2610 rf_CopybackThread(raidPtr) 2611 RF_Raid_t *raidPtr; 2612 { 2613 int s; 2614 2615 raidPtr->copyback_in_progress = 1; 2616 s = splbio(); 2617 rf_CopybackReconstructedData(raidPtr); 2618 splx(s); 2619 raidPtr->copyback_in_progress = 0; 2620 2621 /* That's all... */ 2622 kthread_exit(0); /* does not return */ 2623 } 2624 2625 2626 void 2627 rf_ReconstructInPlaceThread(req) 2628 struct rf_recon_req *req; 2629 { 2630 int retcode; 2631 int s; 2632 RF_Raid_t *raidPtr; 2633 2634 s = splbio(); 2635 raidPtr = req->raidPtr; 2636 raidPtr->recon_in_progress = 1; 2637 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col); 2638 RF_Free(req, sizeof(*req)); 2639 raidPtr->recon_in_progress = 0; 2640 splx(s); 2641 2642 /* That's all... */ 2643 kthread_exit(0); /* does not return */ 2644 } 2645 2646 void 2647 rf_mountroot_hook(dev) 2648 struct device *dev; 2649 { 2650 2651 } 2652 2653 2654 RF_AutoConfig_t * 2655 rf_find_raid_components() 2656 { 2657 struct devnametobdevmaj *dtobdm; 2658 struct vnode *vp; 2659 struct disklabel label; 2660 struct device *dv; 2661 char *cd_name; 2662 dev_t dev; 2663 int error; 2664 int i; 2665 int good_one; 2666 RF_ComponentLabel_t *clabel; 2667 RF_AutoConfig_t *ac_list; 2668 RF_AutoConfig_t *ac; 2669 2670 2671 /* initialize the AutoConfig list */ 2672 ac_list = NULL; 2673 2674 /* we begin by trolling through *all* the devices on the system */ 2675 2676 for (dv = alldevs.tqh_first; dv != NULL; 2677 dv = dv->dv_list.tqe_next) { 2678 2679 /* we are only interested in disks... */ 2680 if (dv->dv_class != DV_DISK) 2681 continue; 2682 2683 /* we don't care about floppies... */ 2684 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) { 2685 continue; 2686 } 2687 /* hdfd is the Atari/Hades floppy driver */ 2688 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) { 2689 continue; 2690 } 2691 2692 /* need to find the device_name_to_block_device_major stuff */ 2693 cd_name = dv->dv_cfdata->cf_driver->cd_name; 2694 dtobdm = dev_name2blk; 2695 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) { 2696 dtobdm++; 2697 } 2698 2699 /* get a vnode for the raw partition of this disk */ 2700 2701 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART); 2702 if (bdevvp(dev, &vp)) 2703 panic("RAID can't alloc vnode"); 2704 2705 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2706 2707 if (error) { 2708 /* "Who cares." Continue looking 2709 for something that exists*/ 2710 vput(vp); 2711 continue; 2712 } 2713 2714 /* Ok, the disk exists. Go get the disklabel. */ 2715 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, 2716 FREAD, NOCRED, 0); 2717 if (error) { 2718 /* 2719 * XXX can't happen - open() would 2720 * have errored out (or faked up one) 2721 */ 2722 printf("can't get label for dev %s%c (%d)!?!?\n", 2723 dv->dv_xname, 'a' + RAW_PART, error); 2724 } 2725 2726 /* don't need this any more. We'll allocate it again 2727 a little later if we really do... */ 2728 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2729 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2730 vput(vp); 2731 2732 for (i=0; i < label.d_npartitions; i++) { 2733 /* We only support partitions marked as RAID */ 2734 if (label.d_partitions[i].p_fstype != FS_RAID) 2735 continue; 2736 2737 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i); 2738 if (bdevvp(dev, &vp)) 2739 panic("RAID can't alloc vnode"); 2740 2741 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2742 if (error) { 2743 /* Whatever... */ 2744 vput(vp); 2745 continue; 2746 } 2747 2748 good_one = 0; 2749 2750 clabel = (RF_ComponentLabel_t *) 2751 malloc(sizeof(RF_ComponentLabel_t), 2752 M_RAIDFRAME, M_NOWAIT); 2753 if (clabel == NULL) { 2754 /* XXX CLEANUP HERE */ 2755 printf("RAID auto config: out of memory!\n"); 2756 return(NULL); /* XXX probably should panic? */ 2757 } 2758 2759 if (!raidread_component_label(dev, vp, clabel)) { 2760 /* Got the label. Does it look reasonable? */ 2761 if (rf_reasonable_label(clabel) && 2762 (clabel->partitionSize <= 2763 label.d_partitions[i].p_size)) { 2764 #if DEBUG 2765 printf("Component on: %s%c: %d\n", 2766 dv->dv_xname, 'a'+i, 2767 label.d_partitions[i].p_size); 2768 rf_print_component_label(clabel); 2769 #endif 2770 /* if it's reasonable, add it, 2771 else ignore it. */ 2772 ac = (RF_AutoConfig_t *) 2773 malloc(sizeof(RF_AutoConfig_t), 2774 M_RAIDFRAME, 2775 M_NOWAIT); 2776 if (ac == NULL) { 2777 /* XXX should panic?? */ 2778 return(NULL); 2779 } 2780 2781 sprintf(ac->devname, "%s%c", 2782 dv->dv_xname, 'a'+i); 2783 ac->dev = dev; 2784 ac->vp = vp; 2785 ac->clabel = clabel; 2786 ac->next = ac_list; 2787 ac_list = ac; 2788 good_one = 1; 2789 } 2790 } 2791 if (!good_one) { 2792 /* cleanup */ 2793 free(clabel, M_RAIDFRAME); 2794 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2795 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2796 vput(vp); 2797 } 2798 } 2799 } 2800 return(ac_list); 2801 } 2802 2803 static int 2804 rf_reasonable_label(clabel) 2805 RF_ComponentLabel_t *clabel; 2806 { 2807 2808 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2809 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2810 ((clabel->clean == RF_RAID_CLEAN) || 2811 (clabel->clean == RF_RAID_DIRTY)) && 2812 clabel->row >=0 && 2813 clabel->column >= 0 && 2814 clabel->num_rows > 0 && 2815 clabel->num_columns > 0 && 2816 clabel->row < clabel->num_rows && 2817 clabel->column < clabel->num_columns && 2818 clabel->blockSize > 0 && 2819 clabel->numBlocks > 0) { 2820 /* label looks reasonable enough... */ 2821 return(1); 2822 } 2823 return(0); 2824 } 2825 2826 2827 void 2828 rf_print_component_label(clabel) 2829 RF_ComponentLabel_t *clabel; 2830 { 2831 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2832 clabel->row, clabel->column, 2833 clabel->num_rows, clabel->num_columns); 2834 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2835 clabel->version, clabel->serial_number, 2836 clabel->mod_counter); 2837 printf(" Clean: %s Status: %d\n", 2838 clabel->clean ? "Yes" : "No", clabel->status ); 2839 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2840 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2841 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2842 (char) clabel->parityConfig, clabel->blockSize, 2843 clabel->numBlocks); 2844 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2845 printf(" Contains root partition: %s\n", 2846 clabel->root_partition ? "Yes" : "No" ); 2847 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2848 #if 0 2849 printf(" Config order: %d\n", clabel->config_order); 2850 #endif 2851 2852 } 2853 2854 RF_ConfigSet_t * 2855 rf_create_auto_sets(ac_list) 2856 RF_AutoConfig_t *ac_list; 2857 { 2858 RF_AutoConfig_t *ac; 2859 RF_ConfigSet_t *config_sets; 2860 RF_ConfigSet_t *cset; 2861 RF_AutoConfig_t *ac_next; 2862 2863 2864 config_sets = NULL; 2865 2866 /* Go through the AutoConfig list, and figure out which components 2867 belong to what sets. */ 2868 ac = ac_list; 2869 while(ac!=NULL) { 2870 /* we're going to putz with ac->next, so save it here 2871 for use at the end of the loop */ 2872 ac_next = ac->next; 2873 2874 if (config_sets == NULL) { 2875 /* will need at least this one... */ 2876 config_sets = (RF_ConfigSet_t *) 2877 malloc(sizeof(RF_ConfigSet_t), 2878 M_RAIDFRAME, M_NOWAIT); 2879 if (config_sets == NULL) { 2880 panic("rf_create_auto_sets: No memory!\n"); 2881 } 2882 /* this one is easy :) */ 2883 config_sets->ac = ac; 2884 config_sets->next = NULL; 2885 config_sets->rootable = 0; 2886 ac->next = NULL; 2887 } else { 2888 /* which set does this component fit into? */ 2889 cset = config_sets; 2890 while(cset!=NULL) { 2891 if (rf_does_it_fit(cset, ac)) { 2892 /* looks like it matches... */ 2893 ac->next = cset->ac; 2894 cset->ac = ac; 2895 break; 2896 } 2897 cset = cset->next; 2898 } 2899 if (cset==NULL) { 2900 /* didn't find a match above... new set..*/ 2901 cset = (RF_ConfigSet_t *) 2902 malloc(sizeof(RF_ConfigSet_t), 2903 M_RAIDFRAME, M_NOWAIT); 2904 if (cset == NULL) { 2905 panic("rf_create_auto_sets: No memory!\n"); 2906 } 2907 cset->ac = ac; 2908 ac->next = NULL; 2909 cset->next = config_sets; 2910 cset->rootable = 0; 2911 config_sets = cset; 2912 } 2913 } 2914 ac = ac_next; 2915 } 2916 2917 2918 return(config_sets); 2919 } 2920 2921 static int 2922 rf_does_it_fit(cset, ac) 2923 RF_ConfigSet_t *cset; 2924 RF_AutoConfig_t *ac; 2925 { 2926 RF_ComponentLabel_t *clabel1, *clabel2; 2927 2928 /* If this one matches the *first* one in the set, that's good 2929 enough, since the other members of the set would have been 2930 through here too... */ 2931 /* note that we are not checking partitionSize here.. 2932 2933 Note that we are also not checking the mod_counters here. 2934 If everything else matches execpt the mod_counter, that's 2935 good enough for this test. We will deal with the mod_counters 2936 a little later in the autoconfiguration process. 2937 2938 (clabel1->mod_counter == clabel2->mod_counter) && 2939 2940 The reason we don't check for this is that failed disks 2941 will have lower modification counts. If those disks are 2942 not added to the set they used to belong to, then they will 2943 form their own set, which may result in 2 different sets, 2944 for example, competing to be configured at raid0, and 2945 perhaps competing to be the root filesystem set. If the 2946 wrong ones get configured, or both attempt to become /, 2947 weird behaviour and or serious lossage will occur. Thus we 2948 need to bring them into the fold here, and kick them out at 2949 a later point. 2950 2951 */ 2952 2953 clabel1 = cset->ac->clabel; 2954 clabel2 = ac->clabel; 2955 if ((clabel1->version == clabel2->version) && 2956 (clabel1->serial_number == clabel2->serial_number) && 2957 (clabel1->num_rows == clabel2->num_rows) && 2958 (clabel1->num_columns == clabel2->num_columns) && 2959 (clabel1->sectPerSU == clabel2->sectPerSU) && 2960 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2961 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2962 (clabel1->parityConfig == clabel2->parityConfig) && 2963 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2964 (clabel1->blockSize == clabel2->blockSize) && 2965 (clabel1->numBlocks == clabel2->numBlocks) && 2966 (clabel1->autoconfigure == clabel2->autoconfigure) && 2967 (clabel1->root_partition == clabel2->root_partition) && 2968 (clabel1->last_unit == clabel2->last_unit) && 2969 (clabel1->config_order == clabel2->config_order)) { 2970 /* if it get's here, it almost *has* to be a match */ 2971 } else { 2972 /* it's not consistent with somebody in the set.. 2973 punt */ 2974 return(0); 2975 } 2976 /* all was fine.. it must fit... */ 2977 return(1); 2978 } 2979 2980 int 2981 rf_have_enough_components(cset) 2982 RF_ConfigSet_t *cset; 2983 { 2984 RF_AutoConfig_t *ac; 2985 RF_AutoConfig_t *auto_config; 2986 RF_ComponentLabel_t *clabel; 2987 int r,c; 2988 int num_rows; 2989 int num_cols; 2990 int num_missing; 2991 int mod_counter; 2992 int mod_counter_found; 2993 int even_pair_failed; 2994 char parity_type; 2995 2996 2997 /* check to see that we have enough 'live' components 2998 of this set. If so, we can configure it if necessary */ 2999 3000 num_rows = cset->ac->clabel->num_rows; 3001 num_cols = cset->ac->clabel->num_columns; 3002 parity_type = cset->ac->clabel->parityConfig; 3003 3004 /* XXX Check for duplicate components!?!?!? */ 3005 3006 /* Determine what the mod_counter is supposed to be for this set. */ 3007 3008 mod_counter_found = 0; 3009 mod_counter = 0; 3010 ac = cset->ac; 3011 while(ac!=NULL) { 3012 if (mod_counter_found==0) { 3013 mod_counter = ac->clabel->mod_counter; 3014 mod_counter_found = 1; 3015 } else { 3016 if (ac->clabel->mod_counter > mod_counter) { 3017 mod_counter = ac->clabel->mod_counter; 3018 } 3019 } 3020 ac = ac->next; 3021 } 3022 3023 num_missing = 0; 3024 auto_config = cset->ac; 3025 3026 for(r=0; r<num_rows; r++) { 3027 even_pair_failed = 0; 3028 for(c=0; c<num_cols; c++) { 3029 ac = auto_config; 3030 while(ac!=NULL) { 3031 if ((ac->clabel->row == r) && 3032 (ac->clabel->column == c) && 3033 (ac->clabel->mod_counter == mod_counter)) { 3034 /* it's this one... */ 3035 #if DEBUG 3036 printf("Found: %s at %d,%d\n", 3037 ac->devname,r,c); 3038 #endif 3039 break; 3040 } 3041 ac=ac->next; 3042 } 3043 if (ac==NULL) { 3044 /* Didn't find one here! */ 3045 /* special case for RAID 1, especially 3046 where there are more than 2 3047 components (where RAIDframe treats 3048 things a little differently :( ) */ 3049 if (parity_type == '1') { 3050 if (c%2 == 0) { /* even component */ 3051 even_pair_failed = 1; 3052 } else { /* odd component. If 3053 we're failed, and 3054 so is the even 3055 component, it's 3056 "Good Night, Charlie" */ 3057 if (even_pair_failed == 1) { 3058 return(0); 3059 } 3060 } 3061 } else { 3062 /* normal accounting */ 3063 num_missing++; 3064 } 3065 } 3066 if ((parity_type == '1') && (c%2 == 1)) { 3067 /* Just did an even component, and we didn't 3068 bail.. reset the even_pair_failed flag, 3069 and go on to the next component.... */ 3070 even_pair_failed = 0; 3071 } 3072 } 3073 } 3074 3075 clabel = cset->ac->clabel; 3076 3077 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3078 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3079 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3080 /* XXX this needs to be made *much* more general */ 3081 /* Too many failures */ 3082 return(0); 3083 } 3084 /* otherwise, all is well, and we've got enough to take a kick 3085 at autoconfiguring this set */ 3086 return(1); 3087 } 3088 3089 void 3090 rf_create_configuration(ac,config,raidPtr) 3091 RF_AutoConfig_t *ac; 3092 RF_Config_t *config; 3093 RF_Raid_t *raidPtr; 3094 { 3095 RF_ComponentLabel_t *clabel; 3096 int i; 3097 3098 clabel = ac->clabel; 3099 3100 /* 1. Fill in the common stuff */ 3101 config->numRow = clabel->num_rows; 3102 config->numCol = clabel->num_columns; 3103 config->numSpare = 0; /* XXX should this be set here? */ 3104 config->sectPerSU = clabel->sectPerSU; 3105 config->SUsPerPU = clabel->SUsPerPU; 3106 config->SUsPerRU = clabel->SUsPerRU; 3107 config->parityConfig = clabel->parityConfig; 3108 /* XXX... */ 3109 strcpy(config->diskQueueType,"fifo"); 3110 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3111 config->layoutSpecificSize = 0; /* XXX ?? */ 3112 3113 while(ac!=NULL) { 3114 /* row/col values will be in range due to the checks 3115 in reasonable_label() */ 3116 strcpy(config->devnames[ac->clabel->row][ac->clabel->column], 3117 ac->devname); 3118 ac = ac->next; 3119 } 3120 3121 for(i=0;i<RF_MAXDBGV;i++) { 3122 config->debugVars[i][0] = NULL; 3123 } 3124 } 3125 3126 int 3127 rf_set_autoconfig(raidPtr, new_value) 3128 RF_Raid_t *raidPtr; 3129 int new_value; 3130 { 3131 RF_ComponentLabel_t clabel; 3132 struct vnode *vp; 3133 dev_t dev; 3134 int row, column; 3135 3136 raidPtr->autoconfigure = new_value; 3137 for(row=0; row<raidPtr->numRow; row++) { 3138 for(column=0; column<raidPtr->numCol; column++) { 3139 if (raidPtr->Disks[row][column].status == 3140 rf_ds_optimal) { 3141 dev = raidPtr->Disks[row][column].dev; 3142 vp = raidPtr->raid_cinfo[row][column].ci_vp; 3143 raidread_component_label(dev, vp, &clabel); 3144 clabel.autoconfigure = new_value; 3145 raidwrite_component_label(dev, vp, &clabel); 3146 } 3147 } 3148 } 3149 return(new_value); 3150 } 3151 3152 int 3153 rf_set_rootpartition(raidPtr, new_value) 3154 RF_Raid_t *raidPtr; 3155 int new_value; 3156 { 3157 RF_ComponentLabel_t clabel; 3158 struct vnode *vp; 3159 dev_t dev; 3160 int row, column; 3161 3162 raidPtr->root_partition = new_value; 3163 for(row=0; row<raidPtr->numRow; row++) { 3164 for(column=0; column<raidPtr->numCol; column++) { 3165 if (raidPtr->Disks[row][column].status == 3166 rf_ds_optimal) { 3167 dev = raidPtr->Disks[row][column].dev; 3168 vp = raidPtr->raid_cinfo[row][column].ci_vp; 3169 raidread_component_label(dev, vp, &clabel); 3170 clabel.root_partition = new_value; 3171 raidwrite_component_label(dev, vp, &clabel); 3172 } 3173 } 3174 } 3175 return(new_value); 3176 } 3177 3178 void 3179 rf_release_all_vps(cset) 3180 RF_ConfigSet_t *cset; 3181 { 3182 RF_AutoConfig_t *ac; 3183 3184 ac = cset->ac; 3185 while(ac!=NULL) { 3186 /* Close the vp, and give it back */ 3187 if (ac->vp) { 3188 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3189 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3190 vput(ac->vp); 3191 ac->vp = NULL; 3192 } 3193 ac = ac->next; 3194 } 3195 } 3196 3197 3198 void 3199 rf_cleanup_config_set(cset) 3200 RF_ConfigSet_t *cset; 3201 { 3202 RF_AutoConfig_t *ac; 3203 RF_AutoConfig_t *next_ac; 3204 3205 ac = cset->ac; 3206 while(ac!=NULL) { 3207 next_ac = ac->next; 3208 /* nuke the label */ 3209 free(ac->clabel, M_RAIDFRAME); 3210 /* cleanup the config structure */ 3211 free(ac, M_RAIDFRAME); 3212 /* "next.." */ 3213 ac = next_ac; 3214 } 3215 /* and, finally, nuke the config set */ 3216 free(cset, M_RAIDFRAME); 3217 } 3218 3219 3220 void 3221 raid_init_component_label(raidPtr, clabel) 3222 RF_Raid_t *raidPtr; 3223 RF_ComponentLabel_t *clabel; 3224 { 3225 /* current version number */ 3226 clabel->version = RF_COMPONENT_LABEL_VERSION; 3227 clabel->serial_number = raidPtr->serial_number; 3228 clabel->mod_counter = raidPtr->mod_counter; 3229 clabel->num_rows = raidPtr->numRow; 3230 clabel->num_columns = raidPtr->numCol; 3231 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3232 clabel->status = rf_ds_optimal; /* "It's good!" */ 3233 3234 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3235 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3236 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3237 3238 clabel->blockSize = raidPtr->bytesPerSector; 3239 clabel->numBlocks = raidPtr->sectorsPerDisk; 3240 3241 /* XXX not portable */ 3242 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3243 clabel->maxOutstanding = raidPtr->maxOutstanding; 3244 clabel->autoconfigure = raidPtr->autoconfigure; 3245 clabel->root_partition = raidPtr->root_partition; 3246 clabel->last_unit = raidPtr->raidid; 3247 clabel->config_order = raidPtr->config_order; 3248 } 3249 3250 int 3251 rf_auto_config_set(cset,unit) 3252 RF_ConfigSet_t *cset; 3253 int *unit; 3254 { 3255 RF_Raid_t *raidPtr; 3256 RF_Config_t *config; 3257 int raidID; 3258 int retcode; 3259 3260 printf("RAID autoconfigure\n"); 3261 3262 retcode = 0; 3263 *unit = -1; 3264 3265 /* 1. Create a config structure */ 3266 3267 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3268 M_RAIDFRAME, 3269 M_NOWAIT); 3270 if (config==NULL) { 3271 printf("Out of mem!?!?\n"); 3272 /* XXX do something more intelligent here. */ 3273 return(1); 3274 } 3275 3276 memset(config, 0, sizeof(RF_Config_t)); 3277 3278 /* XXX raidID needs to be set correctly.. */ 3279 3280 /* 3281 2. Figure out what RAID ID this one is supposed to live at 3282 See if we can get the same RAID dev that it was configured 3283 on last time.. 3284 */ 3285 3286 raidID = cset->ac->clabel->last_unit; 3287 if ((raidID < 0) || (raidID >= numraid)) { 3288 /* let's not wander off into lala land. */ 3289 raidID = numraid - 1; 3290 } 3291 if (raidPtrs[raidID]->valid != 0) { 3292 3293 /* 3294 Nope... Go looking for an alternative... 3295 Start high so we don't immediately use raid0 if that's 3296 not taken. 3297 */ 3298 3299 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3300 if (raidPtrs[raidID]->valid == 0) { 3301 /* can use this one! */ 3302 break; 3303 } 3304 } 3305 } 3306 3307 if (raidID < 0) { 3308 /* punt... */ 3309 printf("Unable to auto configure this set!\n"); 3310 printf("(Out of RAID devs!)\n"); 3311 return(1); 3312 } 3313 printf("Configuring raid%d:\n",raidID); 3314 raidPtr = raidPtrs[raidID]; 3315 3316 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3317 raidPtr->raidid = raidID; 3318 raidPtr->openings = RAIDOUTSTANDING; 3319 3320 /* 3. Build the configuration structure */ 3321 rf_create_configuration(cset->ac, config, raidPtr); 3322 3323 /* 4. Do the configuration */ 3324 retcode = rf_Configure(raidPtr, config, cset->ac); 3325 3326 if (retcode == 0) { 3327 3328 raidinit(raidPtrs[raidID]); 3329 3330 rf_markalldirty(raidPtrs[raidID]); 3331 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3332 if (cset->ac->clabel->root_partition==1) { 3333 /* everything configured just fine. Make a note 3334 that this set is eligible to be root. */ 3335 cset->rootable = 1; 3336 /* XXX do this here? */ 3337 raidPtrs[raidID]->root_partition = 1; 3338 } 3339 } 3340 3341 /* 5. Cleanup */ 3342 free(config, M_RAIDFRAME); 3343 3344 *unit = raidID; 3345 return(retcode); 3346 } 3347 3348 void 3349 rf_disk_unbusy(desc) 3350 RF_RaidAccessDesc_t *desc; 3351 { 3352 struct buf *bp; 3353 3354 bp = (struct buf *)desc->bp; 3355 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3356 (bp->b_bcount - bp->b_resid)); 3357 } 3358