1 /* $OpenBSD: softraid.c,v 1.429 2022/12/21 09:54:23 kn Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/malloc.h> 28 #include <sys/pool.h> 29 #include <sys/kernel.h> 30 #include <sys/disk.h> 31 #include <sys/rwlock.h> 32 #include <sys/queue.h> 33 #include <sys/fcntl.h> 34 #include <sys/disklabel.h> 35 #include <sys/vnode.h> 36 #include <sys/lock.h> 37 #include <sys/mount.h> 38 #include <sys/sensors.h> 39 #include <sys/stat.h> 40 #include <sys/conf.h> 41 #include <sys/uio.h> 42 #include <sys/task.h> 43 #include <sys/kthread.h> 44 #include <sys/dkio.h> 45 #include <sys/stdint.h> 46 47 #include <scsi/scsi_all.h> 48 #include <scsi/scsiconf.h> 49 #include <scsi/scsi_disk.h> 50 51 #include <dev/softraidvar.h> 52 53 #ifdef HIBERNATE 54 #include <lib/libsa/aes_xts.h> 55 #include <sys/hibernate.h> 56 #include <scsi/sdvar.h> 57 #endif /* HIBERNATE */ 58 59 /* #define SR_FANCY_STATS */ 60 61 #ifdef SR_DEBUG 62 #define SR_FANCY_STATS 63 uint32_t sr_debug = 0 64 /* | SR_D_CMD */ 65 /* | SR_D_MISC */ 66 /* | SR_D_INTR */ 67 /* | SR_D_IOCTL */ 68 /* | SR_D_CCB */ 69 /* | SR_D_WU */ 70 /* | SR_D_META */ 71 /* | SR_D_DIS */ 72 /* | SR_D_STATE */ 73 /* | SR_D_REBUILD */ 74 ; 75 #endif 76 77 struct sr_softc *softraid0; 78 struct sr_uuid sr_bootuuid; 79 u_int8_t sr_bootkey[SR_CRYPTO_MAXKEYBYTES]; 80 81 int sr_match(struct device *, void *, void *); 82 void sr_attach(struct device *, struct device *, void *); 83 int sr_detach(struct device *, int); 84 void sr_map_root(void); 85 86 const struct cfattach softraid_ca = { 87 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 88 }; 89 90 struct cfdriver softraid_cd = { 91 NULL, "softraid", DV_DULL 92 }; 93 94 /* scsi & discipline */ 95 void sr_scsi_cmd(struct scsi_xfer *); 96 int sr_scsi_probe(struct scsi_link *); 97 int sr_scsi_ioctl(struct scsi_link *, u_long, 98 caddr_t, int); 99 int sr_bio_ioctl(struct device *, u_long, caddr_t); 100 int sr_bio_handler(struct sr_softc *, 101 struct sr_discipline *, u_long, struct bio *); 102 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 103 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 104 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 105 int sr_ioctl_setstate(struct sr_softc *, 106 struct bioc_setstate *); 107 int sr_ioctl_createraid(struct sr_softc *, 108 struct bioc_createraid *, int, void *); 109 int sr_ioctl_deleteraid(struct sr_softc *, 110 struct sr_discipline *, struct bioc_deleteraid *); 111 int sr_ioctl_discipline(struct sr_softc *, 112 struct sr_discipline *, struct bioc_discipline *); 113 int sr_ioctl_installboot(struct sr_softc *, 114 struct sr_discipline *, struct bioc_installboot *); 115 void sr_chunks_unwind(struct sr_softc *, 116 struct sr_chunk_head *); 117 void sr_discipline_free(struct sr_discipline *); 118 void sr_discipline_shutdown(struct sr_discipline *, int, int); 119 int sr_discipline_init(struct sr_discipline *, int); 120 int sr_alloc_resources(struct sr_discipline *); 121 void sr_free_resources(struct sr_discipline *); 122 void sr_set_chunk_state(struct sr_discipline *, int, int); 123 void sr_set_vol_state(struct sr_discipline *); 124 125 /* utility functions */ 126 void sr_shutdown(int); 127 void sr_uuid_generate(struct sr_uuid *); 128 char *sr_uuid_format(struct sr_uuid *); 129 void sr_uuid_print(struct sr_uuid *, int); 130 void sr_checksum_print(u_int8_t *); 131 int sr_boot_assembly(struct sr_softc *); 132 int sr_already_assembled(struct sr_discipline *); 133 int sr_hotspare(struct sr_softc *, dev_t); 134 void sr_hotspare_rebuild(struct sr_discipline *); 135 int sr_rebuild_init(struct sr_discipline *, dev_t, int); 136 void sr_rebuild_start(void *); 137 void sr_rebuild_thread(void *); 138 void sr_rebuild(struct sr_discipline *); 139 void sr_roam_chunks(struct sr_discipline *); 140 int sr_chunk_in_use(struct sr_softc *, dev_t); 141 int sr_rw(struct sr_softc *, dev_t, char *, size_t, 142 daddr_t, long); 143 void sr_wu_done_callback(void *); 144 struct sr_discipline *sr_find_discipline(struct sr_softc *sc, const char *); 145 146 /* don't include these on RAMDISK */ 147 #ifndef SMALL_KERNEL 148 void sr_sensors_refresh(void *); 149 int sr_sensors_create(struct sr_discipline *); 150 void sr_sensors_delete(struct sr_discipline *); 151 #endif 152 153 /* metadata */ 154 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 155 int sr_meta_attach(struct sr_discipline *, int, int); 156 int sr_meta_rw(struct sr_discipline *, dev_t, void *, long); 157 int sr_meta_clear(struct sr_discipline *); 158 void sr_meta_init(struct sr_discipline *, int, int); 159 void sr_meta_init_complete(struct sr_discipline *); 160 void sr_meta_opt_handler(struct sr_discipline *, 161 struct sr_meta_opt_hdr *); 162 163 /* hotplug magic */ 164 void sr_disk_attach(struct disk *, int); 165 166 struct sr_hotplug_list { 167 void (*sh_hotplug)(struct sr_discipline *, 168 struct disk *, int); 169 struct sr_discipline *sh_sd; 170 171 SLIST_ENTRY(sr_hotplug_list) shl_link; 172 }; 173 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 174 175 struct sr_hotplug_list_head sr_hotplug_callbacks; 176 extern void (*softraid_disk_attach)(struct disk *, int); 177 178 /* scsi glue */ 179 const struct scsi_adapter sr_switch = { 180 sr_scsi_cmd, NULL, sr_scsi_probe, NULL, sr_scsi_ioctl 181 }; 182 183 /* native metadata format */ 184 int sr_meta_native_bootprobe(struct sr_softc *, dev_t, 185 struct sr_boot_chunk_head *); 186 #define SR_META_NOTCLAIMED (0) 187 #define SR_META_CLAIMED (1) 188 int sr_meta_native_probe(struct sr_softc *, 189 struct sr_chunk *); 190 int sr_meta_native_attach(struct sr_discipline *, int); 191 int sr_meta_native_write(struct sr_discipline *, dev_t, 192 struct sr_metadata *,void *); 193 194 #ifdef SR_DEBUG 195 void sr_meta_print(struct sr_metadata *); 196 #else 197 #define sr_meta_print(m) 198 #endif 199 200 /* the metadata driver should remain stateless */ 201 struct sr_meta_driver { 202 daddr_t smd_offset; /* metadata location */ 203 u_int32_t smd_size; /* size of metadata */ 204 205 int (*smd_probe)(struct sr_softc *, 206 struct sr_chunk *); 207 int (*smd_attach)(struct sr_discipline *, int); 208 int (*smd_detach)(struct sr_discipline *); 209 int (*smd_read)(struct sr_discipline *, dev_t, 210 struct sr_metadata *, void *); 211 int (*smd_write)(struct sr_discipline *, dev_t, 212 struct sr_metadata *, void *); 213 int (*smd_validate)(struct sr_discipline *, 214 struct sr_metadata *, void *); 215 } smd[] = { 216 { SR_META_OFFSET, SR_META_SIZE * DEV_BSIZE, 217 sr_meta_native_probe, sr_meta_native_attach, NULL, 218 sr_meta_native_read, sr_meta_native_write, NULL }, 219 { 0, 0, NULL, NULL, NULL, NULL } 220 }; 221 222 int 223 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force) 224 { 225 struct sr_softc *sc = sd->sd_sc; 226 struct sr_chunk_head *cl; 227 struct sr_chunk *ch_entry, *chunk1, *chunk2; 228 int rv = 1, i = 0; 229 230 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc), chunk_no); 231 232 /* in memory copy of metadata */ 233 sd->sd_meta = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, 234 M_ZERO | M_NOWAIT); 235 if (!sd->sd_meta) { 236 sr_error(sc, "could not allocate memory for metadata"); 237 goto bad; 238 } 239 240 if (sd->sd_meta_type != SR_META_F_NATIVE) { 241 /* in memory copy of foreign metadata */ 242 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 243 M_DEVBUF, M_ZERO | M_NOWAIT); 244 if (!sd->sd_meta_foreign) { 245 /* unwind frees sd_meta */ 246 sr_error(sc, "could not allocate memory for foreign " 247 "metadata"); 248 goto bad; 249 } 250 } 251 252 /* we have a valid list now create an array index */ 253 cl = &sd->sd_vol.sv_chunk_list; 254 sd->sd_vol.sv_chunks = mallocarray(chunk_no, sizeof(struct sr_chunk *), 255 M_DEVBUF, M_WAITOK | M_ZERO); 256 257 /* fill out chunk array */ 258 i = 0; 259 SLIST_FOREACH(ch_entry, cl, src_link) 260 sd->sd_vol.sv_chunks[i++] = ch_entry; 261 262 /* attach metadata */ 263 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 264 goto bad; 265 266 /* Force chunks into correct order now that metadata is attached. */ 267 SLIST_INIT(cl); 268 for (i = 0; i < chunk_no; i++) { 269 ch_entry = sd->sd_vol.sv_chunks[i]; 270 chunk2 = NULL; 271 SLIST_FOREACH(chunk1, cl, src_link) { 272 if (chunk1->src_meta.scmi.scm_chunk_id > 273 ch_entry->src_meta.scmi.scm_chunk_id) 274 break; 275 chunk2 = chunk1; 276 } 277 if (chunk2 == NULL) 278 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 279 else 280 SLIST_INSERT_AFTER(chunk2, ch_entry, src_link); 281 } 282 i = 0; 283 SLIST_FOREACH(ch_entry, cl, src_link) 284 sd->sd_vol.sv_chunks[i++] = ch_entry; 285 286 rv = 0; 287 bad: 288 return (rv); 289 } 290 291 int 292 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 293 { 294 struct sr_softc *sc = sd->sd_sc; 295 struct vnode *vn; 296 struct sr_chunk *ch_entry, *ch_prev = NULL; 297 struct sr_chunk_head *cl; 298 char devname[32]; 299 int i, d, type, found, prevf, error; 300 dev_t dev; 301 302 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 303 304 if (no_chunk == 0) 305 goto unwind; 306 307 cl = &sd->sd_vol.sv_chunk_list; 308 309 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 310 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 311 M_WAITOK | M_ZERO); 312 /* keep disks in user supplied order */ 313 if (ch_prev) 314 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 315 else 316 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 317 ch_prev = ch_entry; 318 dev = dt[d]; 319 ch_entry->src_dev_mm = dev; 320 321 if (dev == NODEV) { 322 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 323 continue; 324 } else { 325 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 326 if (bdevvp(dev, &vn)) { 327 sr_error(sc, "sr_meta_probe: cannot allocate " 328 "vnode"); 329 goto unwind; 330 } 331 332 /* 333 * XXX leaving dev open for now; move this to attach 334 * and figure out the open/close dance for unwind. 335 */ 336 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc); 337 if (error) { 338 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 339 "open %s\n", DEVNAME(sc), devname); 340 vput(vn); 341 goto unwind; 342 } 343 344 strlcpy(ch_entry->src_devname, devname, 345 sizeof(ch_entry->src_devname)); 346 ch_entry->src_vn = vn; 347 } 348 349 /* determine if this is a device we understand */ 350 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 351 type = smd[i].smd_probe(sc, ch_entry); 352 if (type == SR_META_F_INVALID) 353 continue; 354 else { 355 found = type; 356 break; 357 } 358 } 359 360 if (found == SR_META_F_INVALID) 361 goto unwind; 362 if (prevf == SR_META_F_INVALID) 363 prevf = found; 364 if (prevf != found) { 365 DNPRINTF(SR_D_META, "%s: prevf != found\n", 366 DEVNAME(sc)); 367 goto unwind; 368 } 369 } 370 371 return (prevf); 372 unwind: 373 return (SR_META_F_INVALID); 374 } 375 376 void 377 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 378 { 379 int maj, unit, part; 380 char *name; 381 382 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 383 DEVNAME(sc), buf, size); 384 385 if (!buf) 386 return; 387 388 maj = major(dev); 389 part = DISKPART(dev); 390 unit = DISKUNIT(dev); 391 392 name = findblkname(maj); 393 if (name == NULL) 394 return; 395 396 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 397 } 398 399 int 400 sr_rw(struct sr_softc *sc, dev_t dev, char *buf, size_t size, daddr_t blkno, 401 long flags) 402 { 403 struct vnode *vp; 404 struct buf b; 405 size_t bufsize, dma_bufsize; 406 int rv = 1; 407 char *dma_buf; 408 int s; 409 410 DNPRINTF(SR_D_MISC, "%s: sr_rw(0x%x, %p, %zu, %lld 0x%lx)\n", 411 DEVNAME(sc), dev, buf, size, (long long)blkno, flags); 412 413 dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size; 414 dma_buf = dma_alloc(dma_bufsize, PR_WAITOK); 415 416 if (bdevvp(dev, &vp)) { 417 printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc)); 418 goto done; 419 } 420 421 while (size > 0) { 422 DNPRINTF(SR_D_MISC, "%s: dma_buf %p, size %zu, blkno %lld)\n", 423 DEVNAME(sc), dma_buf, size, (long long)blkno); 424 425 bufsize = (size > MAXPHYS) ? MAXPHYS : size; 426 if (flags == B_WRITE) 427 memcpy(dma_buf, buf, bufsize); 428 429 bzero(&b, sizeof(b)); 430 b.b_flags = flags | B_PHYS; 431 b.b_proc = curproc; 432 b.b_dev = dev; 433 b.b_iodone = NULL; 434 b.b_error = 0; 435 b.b_blkno = blkno; 436 b.b_data = dma_buf; 437 b.b_bcount = bufsize; 438 b.b_bufsize = bufsize; 439 b.b_resid = bufsize; 440 b.b_vp = vp; 441 442 if ((b.b_flags & B_READ) == 0) { 443 s = splbio(); 444 vp->v_numoutput++; 445 splx(s); 446 } 447 448 LIST_INIT(&b.b_dep); 449 VOP_STRATEGY(vp, &b); 450 biowait(&b); 451 452 if (b.b_flags & B_ERROR) { 453 printf("%s: I/O error %d on dev 0x%x at block %llu\n", 454 DEVNAME(sc), b.b_error, dev, b.b_blkno); 455 goto done; 456 } 457 458 if (flags == B_READ) 459 memcpy(buf, dma_buf, bufsize); 460 461 size -= bufsize; 462 buf += bufsize; 463 blkno += howmany(bufsize, DEV_BSIZE); 464 } 465 466 rv = 0; 467 468 done: 469 if (vp) 470 vput(vp); 471 472 dma_free(dma_buf, dma_bufsize); 473 474 return (rv); 475 } 476 477 int 478 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, long flags) 479 { 480 int rv = 1; 481 482 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, 0x%lx)\n", 483 DEVNAME(sd->sd_sc), dev, md, flags); 484 485 if (md == NULL) { 486 printf("%s: sr_meta_rw: invalid metadata pointer\n", 487 DEVNAME(sd->sd_sc)); 488 goto done; 489 } 490 491 rv = sr_rw(sd->sd_sc, dev, md, SR_META_SIZE * DEV_BSIZE, 492 SR_META_OFFSET, flags); 493 494 done: 495 return (rv); 496 } 497 498 int 499 sr_meta_clear(struct sr_discipline *sd) 500 { 501 struct sr_softc *sc = sd->sd_sc; 502 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 503 struct sr_chunk *ch_entry; 504 void *m; 505 int rv = 1; 506 507 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 508 509 if (sd->sd_meta_type != SR_META_F_NATIVE) { 510 sr_error(sc, "cannot clear foreign metadata"); 511 goto done; 512 } 513 514 m = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_WAITOK | M_ZERO); 515 SLIST_FOREACH(ch_entry, cl, src_link) { 516 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 517 /* XXX mark disk offline */ 518 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 519 "clear %s\n", DEVNAME(sc), ch_entry->src_devname); 520 rv++; 521 continue; 522 } 523 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 524 } 525 526 bzero(sd->sd_meta, SR_META_SIZE * DEV_BSIZE); 527 528 free(m, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 529 rv = 0; 530 done: 531 return (rv); 532 } 533 534 void 535 sr_meta_init(struct sr_discipline *sd, int level, int no_chunk) 536 { 537 struct sr_softc *sc = sd->sd_sc; 538 struct sr_metadata *sm = sd->sd_meta; 539 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 540 struct sr_meta_chunk *scm; 541 struct sr_chunk *chunk; 542 int cid = 0; 543 u_int64_t max_chunk_sz = 0, min_chunk_sz = 0; 544 u_int32_t secsize = DEV_BSIZE; 545 546 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 547 548 if (!sm) 549 return; 550 551 /* Initialise volume metadata. */ 552 sm->ssdi.ssd_magic = SR_MAGIC; 553 sm->ssdi.ssd_version = SR_META_VERSION; 554 sm->ssdi.ssd_vol_flags = sd->sd_meta_flags; 555 sm->ssdi.ssd_volid = 0; 556 sm->ssdi.ssd_chunk_no = no_chunk; 557 sm->ssdi.ssd_level = level; 558 559 sm->ssd_data_blkno = SR_DATA_OFFSET; 560 sm->ssd_ondisk = 0; 561 562 sr_uuid_generate(&sm->ssdi.ssd_uuid); 563 564 /* Initialise chunk metadata and get min/max chunk sizes & secsize. */ 565 SLIST_FOREACH(chunk, cl, src_link) { 566 scm = &chunk->src_meta; 567 scm->scmi.scm_size = chunk->src_size; 568 scm->scmi.scm_chunk_id = cid++; 569 scm->scm_status = BIOC_SDONLINE; 570 scm->scmi.scm_volid = 0; 571 strlcpy(scm->scmi.scm_devname, chunk->src_devname, 572 sizeof(scm->scmi.scm_devname)); 573 memcpy(&scm->scmi.scm_uuid, &sm->ssdi.ssd_uuid, 574 sizeof(scm->scmi.scm_uuid)); 575 sr_checksum(sc, scm, &scm->scm_checksum, 576 sizeof(scm->scm_checksum)); 577 578 if (min_chunk_sz == 0) 579 min_chunk_sz = scm->scmi.scm_size; 580 if (chunk->src_secsize > secsize) 581 secsize = chunk->src_secsize; 582 min_chunk_sz = MIN(min_chunk_sz, scm->scmi.scm_size); 583 max_chunk_sz = MAX(max_chunk_sz, scm->scmi.scm_size); 584 } 585 586 sm->ssdi.ssd_secsize = secsize; 587 588 /* Equalize chunk sizes. */ 589 SLIST_FOREACH(chunk, cl, src_link) 590 chunk->src_meta.scmi.scm_coerced_size = min_chunk_sz; 591 592 sd->sd_vol.sv_chunk_minsz = min_chunk_sz; 593 sd->sd_vol.sv_chunk_maxsz = max_chunk_sz; 594 } 595 596 void 597 sr_meta_init_complete(struct sr_discipline *sd) 598 { 599 #ifdef SR_DEBUG 600 struct sr_softc *sc = sd->sd_sc; 601 #endif 602 struct sr_metadata *sm = sd->sd_meta; 603 604 DNPRINTF(SR_D_META, "%s: sr_meta_complete\n", DEVNAME(sc)); 605 606 /* Complete initialisation of volume metadata. */ 607 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 608 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 609 "SR %s", sd->sd_name); 610 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 611 "%03d", sm->ssdi.ssd_version); 612 } 613 614 void 615 sr_meta_opt_handler(struct sr_discipline *sd, struct sr_meta_opt_hdr *om) 616 { 617 if (om->som_type != SR_OPT_BOOT) 618 panic("unknown optional metadata type"); 619 } 620 621 void 622 sr_meta_save_callback(void *xsd) 623 { 624 struct sr_discipline *sd = xsd; 625 int s; 626 627 s = splbio(); 628 629 if (sr_meta_save(sd, SR_META_DIRTY)) 630 printf("%s: save metadata failed\n", DEVNAME(sd->sd_sc)); 631 632 sd->sd_must_flush = 0; 633 splx(s); 634 } 635 636 int 637 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 638 { 639 struct sr_softc *sc = sd->sd_sc; 640 struct sr_metadata *sm = sd->sd_meta, *m; 641 struct sr_meta_driver *s; 642 struct sr_chunk *src; 643 struct sr_meta_chunk *cm; 644 struct sr_workunit wu; 645 struct sr_meta_opt_hdr *omh; 646 struct sr_meta_opt_item *omi; 647 int i; 648 649 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 650 DEVNAME(sc), sd->sd_meta->ssd_devname); 651 652 if (!sm) { 653 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 654 goto bad; 655 } 656 657 /* meta scratchpad */ 658 s = &smd[sd->sd_meta_type]; 659 m = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT); 660 if (!m) { 661 printf("%s: could not allocate metadata scratch area\n", 662 DEVNAME(sc)); 663 goto bad; 664 } 665 666 /* from here on out metadata is updated */ 667 restart: 668 sm->ssd_ondisk++; 669 sm->ssd_meta_flags = flags; 670 memcpy(m, sm, sizeof(*m)); 671 672 /* Chunk metadata. */ 673 cm = (struct sr_meta_chunk *)(m + 1); 674 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 675 src = sd->sd_vol.sv_chunks[i]; 676 memcpy(cm, &src->src_meta, sizeof(*cm)); 677 cm++; 678 } 679 680 /* Optional metadata. */ 681 omh = (struct sr_meta_opt_hdr *)(cm); 682 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) { 683 DNPRINTF(SR_D_META, "%s: saving optional metadata type %u with " 684 "length %u\n", DEVNAME(sc), omi->omi_som->som_type, 685 omi->omi_som->som_length); 686 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH); 687 sr_checksum(sc, omi->omi_som, &omi->omi_som->som_checksum, 688 omi->omi_som->som_length); 689 memcpy(omh, omi->omi_som, omi->omi_som->som_length); 690 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)omh + 691 omi->omi_som->som_length); 692 } 693 694 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 695 src = sd->sd_vol.sv_chunks[i]; 696 697 /* skip disks that are offline */ 698 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 699 continue; 700 701 /* calculate metadata checksum for correct chunk */ 702 m->ssdi.ssd_chunk_id = i; 703 sr_checksum(sc, m, &m->ssd_checksum, 704 sizeof(struct sr_meta_invariant)); 705 706 #ifdef SR_DEBUG 707 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 708 "chunkid: %d checksum: ", 709 DEVNAME(sc), src->src_meta.scmi.scm_devname, 710 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 711 712 if (sr_debug & SR_D_META) 713 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 714 DNPRINTF(SR_D_META, "\n"); 715 sr_meta_print(m); 716 #endif 717 718 /* translate and write to disk */ 719 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 720 printf("%s: could not write metadata to %s\n", 721 DEVNAME(sc), src->src_devname); 722 /* restart the meta write */ 723 src->src_meta.scm_status = BIOC_SDOFFLINE; 724 /* XXX recalculate volume status */ 725 goto restart; 726 } 727 } 728 729 /* not all disciplines have sync */ 730 if (sd->sd_scsi_sync) { 731 bzero(&wu, sizeof(wu)); 732 wu.swu_flags |= SR_WUF_FAKE; 733 wu.swu_dis = sd; 734 sd->sd_scsi_sync(&wu); 735 } 736 free(m, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 737 return (0); 738 bad: 739 return (1); 740 } 741 742 int 743 sr_meta_read(struct sr_discipline *sd) 744 { 745 struct sr_softc *sc = sd->sd_sc; 746 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 747 struct sr_metadata *sm; 748 struct sr_chunk *ch_entry; 749 struct sr_meta_chunk *cp; 750 struct sr_meta_driver *s; 751 void *fm = NULL; 752 int no_disk = 0, got_meta = 0; 753 754 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 755 756 sm = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_WAITOK | M_ZERO); 757 s = &smd[sd->sd_meta_type]; 758 if (sd->sd_meta_type != SR_META_F_NATIVE) 759 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 760 761 cp = (struct sr_meta_chunk *)(sm + 1); 762 SLIST_FOREACH(ch_entry, cl, src_link) { 763 /* skip disks that are offline */ 764 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 765 DNPRINTF(SR_D_META, 766 "%s: %s chunk marked offline, spoofing status\n", 767 DEVNAME(sc), ch_entry->src_devname); 768 cp++; /* adjust chunk pointer to match failure */ 769 continue; 770 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 771 /* read and translate */ 772 /* XXX mark chunk offline, elsewhere!! */ 773 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 774 cp++; /* adjust chunk pointer to match failure */ 775 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 776 DEVNAME(sc)); 777 continue; 778 } 779 780 if (sm->ssdi.ssd_magic != SR_MAGIC) { 781 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 782 DEVNAME(sc)); 783 continue; 784 } 785 786 /* validate metadata */ 787 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 788 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 789 DEVNAME(sc)); 790 no_disk = -1; 791 goto done; 792 } 793 794 /* assume first chunk contains metadata */ 795 if (got_meta == 0) { 796 sr_meta_opt_load(sc, sm, &sd->sd_meta_opt); 797 memcpy(sd->sd_meta, sm, sizeof(*sd->sd_meta)); 798 got_meta = 1; 799 } 800 801 memcpy(&ch_entry->src_meta, cp, sizeof(ch_entry->src_meta)); 802 803 no_disk++; 804 cp++; 805 } 806 807 free(sm, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 808 free(fm, M_DEVBUF, s->smd_size); 809 810 done: 811 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 812 no_disk); 813 return (no_disk); 814 } 815 816 void 817 sr_meta_opt_load(struct sr_softc *sc, struct sr_metadata *sm, 818 struct sr_meta_opt_head *som) 819 { 820 struct sr_meta_opt_hdr *omh; 821 struct sr_meta_opt_item *omi; 822 u_int8_t checksum[MD5_DIGEST_LENGTH]; 823 int i; 824 825 /* Process optional metadata. */ 826 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) + 827 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no); 828 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 829 830 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF, 831 M_WAITOK | M_ZERO); 832 SLIST_INSERT_HEAD(som, omi, omi_link); 833 834 if (omh->som_length == 0) { 835 836 /* Load old fixed length optional metadata. */ 837 DNPRINTF(SR_D_META, "%s: old optional metadata of type " 838 "%u\n", DEVNAME(sc), omh->som_type); 839 840 /* Validate checksum. */ 841 sr_checksum(sc, (void *)omh, &checksum, 842 SR_OLD_META_OPT_SIZE - MD5_DIGEST_LENGTH); 843 if (bcmp(&checksum, (void *)omh + SR_OLD_META_OPT_MD5, 844 sizeof(checksum))) 845 panic("%s: invalid optional metadata checksum", 846 DEVNAME(sc)); 847 848 /* Determine correct length. */ 849 switch (omh->som_type) { 850 case SR_OPT_CRYPTO: 851 omh->som_length = sizeof(struct sr_meta_crypto); 852 break; 853 case SR_OPT_BOOT: 854 omh->som_length = sizeof(struct sr_meta_boot); 855 break; 856 case SR_OPT_KEYDISK: 857 omh->som_length = 858 sizeof(struct sr_meta_keydisk); 859 break; 860 default: 861 panic("unknown old optional metadata type %u", 862 omh->som_type); 863 } 864 865 omi->omi_som = malloc(omh->som_length, M_DEVBUF, 866 M_WAITOK | M_ZERO); 867 memcpy((u_int8_t *)omi->omi_som + sizeof(*omi->omi_som), 868 (u_int8_t *)omh + SR_OLD_META_OPT_OFFSET, 869 omh->som_length - sizeof(*omi->omi_som)); 870 omi->omi_som->som_type = omh->som_type; 871 omi->omi_som->som_length = omh->som_length; 872 873 omh = (struct sr_meta_opt_hdr *)((void *)omh + 874 SR_OLD_META_OPT_SIZE); 875 } else { 876 877 /* Load variable length optional metadata. */ 878 DNPRINTF(SR_D_META, "%s: optional metadata of type %u, " 879 "length %u\n", DEVNAME(sc), omh->som_type, 880 omh->som_length); 881 omi->omi_som = malloc(omh->som_length, M_DEVBUF, 882 M_WAITOK | M_ZERO); 883 memcpy(omi->omi_som, omh, omh->som_length); 884 885 /* Validate checksum. */ 886 memcpy(&checksum, &omi->omi_som->som_checksum, 887 MD5_DIGEST_LENGTH); 888 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH); 889 sr_checksum(sc, omi->omi_som, 890 &omi->omi_som->som_checksum, omh->som_length); 891 if (bcmp(&checksum, &omi->omi_som->som_checksum, 892 sizeof(checksum))) 893 panic("%s: invalid optional metadata checksum", 894 DEVNAME(sc)); 895 896 omh = (struct sr_meta_opt_hdr *)((void *)omh + 897 omh->som_length); 898 } 899 } 900 } 901 902 int 903 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 904 void *fm) 905 { 906 struct sr_softc *sc = sd->sd_sc; 907 struct sr_meta_driver *s; 908 #ifdef SR_DEBUG 909 struct sr_meta_chunk *mc; 910 #endif 911 u_int8_t checksum[MD5_DIGEST_LENGTH]; 912 char devname[32]; 913 int rv = 1; 914 915 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 916 917 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 918 919 s = &smd[sd->sd_meta_type]; 920 if (sd->sd_meta_type != SR_META_F_NATIVE) 921 if (s->smd_validate(sd, sm, fm)) { 922 sr_error(sc, "invalid foreign metadata"); 923 goto done; 924 } 925 926 /* 927 * at this point all foreign metadata has been translated to the native 928 * format and will be treated just like the native format 929 */ 930 931 if (sm->ssdi.ssd_magic != SR_MAGIC) { 932 sr_error(sc, "not valid softraid metadata"); 933 goto done; 934 } 935 936 /* Verify metadata checksum. */ 937 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 938 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 939 sr_error(sc, "invalid metadata checksum"); 940 goto done; 941 } 942 943 /* Handle changes between versions. */ 944 if (sm->ssdi.ssd_version == 3) { 945 946 /* 947 * Version 3 - update metadata version and fix up data blkno 948 * value since this did not exist in version 3. 949 */ 950 if (sm->ssd_data_blkno == 0) 951 sm->ssd_data_blkno = SR_META_V3_DATA_OFFSET; 952 sm->ssdi.ssd_secsize = DEV_BSIZE; 953 954 } else if (sm->ssdi.ssd_version == 4) { 955 956 /* 957 * Version 4 - original metadata format did not store 958 * data blkno so fix this up if necessary. 959 */ 960 if (sm->ssd_data_blkno == 0) 961 sm->ssd_data_blkno = SR_DATA_OFFSET; 962 sm->ssdi.ssd_secsize = DEV_BSIZE; 963 964 } else if (sm->ssdi.ssd_version == 5) { 965 966 /* 967 * Version 5 - variable length optional metadata. Migration 968 * from earlier fixed length optional metadata is handled 969 * in sr_meta_read(). 970 */ 971 sm->ssdi.ssd_secsize = DEV_BSIZE; 972 973 } else if (sm->ssdi.ssd_version == SR_META_VERSION) { 974 975 /* 976 * Version 6 - store & report a sector size. 977 */ 978 979 } else { 980 981 sr_error(sc, "cannot read metadata version %u on %s, " 982 "expected version %u or earlier", 983 sm->ssdi.ssd_version, devname, SR_META_VERSION); 984 goto done; 985 986 } 987 988 /* Update version number and revision string. */ 989 sm->ssdi.ssd_version = SR_META_VERSION; 990 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 991 "%03d", SR_META_VERSION); 992 993 #ifdef SR_DEBUG 994 /* warn if disk changed order */ 995 mc = (struct sr_meta_chunk *)(sm + 1); 996 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 997 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 998 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 999 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 1000 devname); 1001 #endif 1002 1003 /* we have meta data on disk */ 1004 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 1005 DEVNAME(sc), devname); 1006 1007 rv = 0; 1008 done: 1009 return (rv); 1010 } 1011 1012 int 1013 sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno, 1014 struct sr_boot_chunk_head *bch) 1015 { 1016 struct vnode *vn; 1017 struct disklabel label; 1018 struct sr_metadata *md = NULL; 1019 struct sr_discipline *fake_sd = NULL; 1020 struct sr_boot_chunk *bc; 1021 char devname[32]; 1022 dev_t chrdev, rawdev; 1023 int error, i; 1024 int rv = SR_META_NOTCLAIMED; 1025 1026 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 1027 1028 /* 1029 * Use character raw device to avoid SCSI complaints about missing 1030 * media on removable media devices. 1031 */ 1032 chrdev = blktochr(devno); 1033 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(devno), RAW_PART); 1034 if (cdevvp(rawdev, &vn)) { 1035 sr_error(sc, "sr_meta_native_bootprobe: cannot allocate vnode"); 1036 goto done; 1037 } 1038 1039 /* open device */ 1040 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1041 if (error) { 1042 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 1043 "failed\n", DEVNAME(sc)); 1044 vput(vn); 1045 goto done; 1046 } 1047 1048 /* get disklabel */ 1049 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 1050 curproc); 1051 if (error) { 1052 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 1053 "failed\n", DEVNAME(sc)); 1054 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1055 vput(vn); 1056 goto done; 1057 } 1058 1059 /* we are done, close device */ 1060 error = VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1061 if (error) { 1062 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 1063 "failed\n", DEVNAME(sc)); 1064 vput(vn); 1065 goto done; 1066 } 1067 vput(vn); 1068 1069 md = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT); 1070 if (md == NULL) { 1071 sr_error(sc, "not enough memory for metadata buffer"); 1072 goto done; 1073 } 1074 1075 /* create fake sd to use utility functions */ 1076 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, 1077 M_ZERO | M_NOWAIT); 1078 if (fake_sd == NULL) { 1079 sr_error(sc, "not enough memory for fake discipline"); 1080 goto done; 1081 } 1082 fake_sd->sd_sc = sc; 1083 fake_sd->sd_meta_type = SR_META_F_NATIVE; 1084 1085 for (i = 0; i < MAXPARTITIONS; i++) { 1086 if (label.d_partitions[i].p_fstype != FS_RAID) 1087 continue; 1088 1089 /* open partition */ 1090 rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i); 1091 if (bdevvp(rawdev, &vn)) { 1092 sr_error(sc, "sr_meta_native_bootprobe: cannot " 1093 "allocate vnode for partition"); 1094 goto done; 1095 } 1096 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1097 if (error) { 1098 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 1099 "open failed, partition %d\n", 1100 DEVNAME(sc), i); 1101 vput(vn); 1102 continue; 1103 } 1104 1105 if (sr_meta_native_read(fake_sd, rawdev, md, NULL)) { 1106 sr_error(sc, "native bootprobe could not read native " 1107 "metadata"); 1108 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1109 vput(vn); 1110 continue; 1111 } 1112 1113 /* are we a softraid partition? */ 1114 if (md->ssdi.ssd_magic != SR_MAGIC) { 1115 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1116 vput(vn); 1117 continue; 1118 } 1119 1120 sr_meta_getdevname(sc, rawdev, devname, sizeof(devname)); 1121 if (sr_meta_validate(fake_sd, rawdev, md, NULL) == 0) { 1122 /* XXX fix M_WAITOK, this is boot time */ 1123 bc = malloc(sizeof(struct sr_boot_chunk), 1124 M_DEVBUF, M_WAITOK | M_ZERO); 1125 bc->sbc_metadata = malloc(sizeof(struct sr_metadata), 1126 M_DEVBUF, M_WAITOK | M_ZERO); 1127 memcpy(bc->sbc_metadata, md, sizeof(struct sr_metadata)); 1128 bc->sbc_mm = rawdev; 1129 SLIST_INSERT_HEAD(bch, bc, sbc_link); 1130 rv = SR_META_CLAIMED; 1131 } 1132 1133 /* we are done, close partition */ 1134 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1135 vput(vn); 1136 } 1137 1138 done: 1139 free(fake_sd, M_DEVBUF, sizeof(struct sr_discipline)); 1140 free(md, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 1141 1142 return (rv); 1143 } 1144 1145 int 1146 sr_boot_assembly(struct sr_softc *sc) 1147 { 1148 struct sr_boot_volume_head bvh; 1149 struct sr_boot_chunk_head bch, kdh; 1150 struct sr_boot_volume *bv, *bv1, *bv2; 1151 struct sr_boot_chunk *bc, *bcnext, *bc1, *bc2; 1152 struct sr_disk_head sdklist; 1153 struct sr_disk *sdk; 1154 struct disk *dk; 1155 struct bioc_createraid bcr; 1156 struct sr_meta_chunk *hm; 1157 struct sr_chunk_head *cl; 1158 struct sr_chunk *hotspare, *chunk, *last; 1159 u_int64_t *ondisk = NULL; 1160 dev_t *devs = NULL; 1161 void *data; 1162 char devname[32]; 1163 int rv = 0, i; 1164 1165 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1166 1167 SLIST_INIT(&sdklist); 1168 SLIST_INIT(&bvh); 1169 SLIST_INIT(&bch); 1170 SLIST_INIT(&kdh); 1171 1172 dk = TAILQ_FIRST(&disklist); 1173 while (dk != NULL) { 1174 1175 /* See if this disk has been checked. */ 1176 SLIST_FOREACH(sdk, &sdklist, sdk_link) 1177 if (sdk->sdk_devno == dk->dk_devno) 1178 break; 1179 1180 if (sdk != NULL || dk->dk_devno == NODEV) { 1181 dk = TAILQ_NEXT(dk, dk_link); 1182 continue; 1183 } 1184 1185 /* Add this disk to the list that we've checked. */ 1186 sdk = malloc(sizeof(struct sr_disk), M_DEVBUF, 1187 M_NOWAIT | M_ZERO); 1188 if (sdk == NULL) 1189 goto unwind; 1190 sdk->sdk_devno = dk->dk_devno; 1191 SLIST_INSERT_HEAD(&sdklist, sdk, sdk_link); 1192 1193 /* Only check sd(4) and wd(4) devices. */ 1194 if (strncmp(dk->dk_name, "sd", 2) && 1195 strncmp(dk->dk_name, "wd", 2)) { 1196 dk = TAILQ_NEXT(dk, dk_link); 1197 continue; 1198 } 1199 1200 /* native softraid uses partitions */ 1201 rw_enter_write(&sc->sc_lock); 1202 bio_status_init(&sc->sc_status, &sc->sc_dev); 1203 sr_meta_native_bootprobe(sc, dk->dk_devno, &bch); 1204 rw_exit_write(&sc->sc_lock); 1205 1206 /* probe non-native disks if native failed. */ 1207 1208 /* Restart scan since we may have slept. */ 1209 dk = TAILQ_FIRST(&disklist); 1210 } 1211 1212 /* 1213 * Create a list of volumes and associate chunks with each volume. 1214 */ 1215 for (bc = SLIST_FIRST(&bch); bc != NULL; bc = bcnext) { 1216 1217 bcnext = SLIST_NEXT(bc, sbc_link); 1218 SLIST_REMOVE(&bch, bc, sr_boot_chunk, sbc_link); 1219 bc->sbc_chunk_id = bc->sbc_metadata->ssdi.ssd_chunk_id; 1220 1221 /* Handle key disks separately. */ 1222 if (bc->sbc_metadata->ssdi.ssd_level == SR_KEYDISK_LEVEL) { 1223 SLIST_INSERT_HEAD(&kdh, bc, sbc_link); 1224 continue; 1225 } 1226 1227 SLIST_FOREACH(bv, &bvh, sbv_link) { 1228 if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid, 1229 &bv->sbv_uuid, 1230 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)) == 0) 1231 break; 1232 } 1233 1234 if (bv == NULL) { 1235 bv = malloc(sizeof(struct sr_boot_volume), 1236 M_DEVBUF, M_NOWAIT | M_ZERO); 1237 if (bv == NULL) { 1238 printf("%s: failed to allocate boot volume\n", 1239 DEVNAME(sc)); 1240 goto unwind; 1241 } 1242 1243 bv->sbv_level = bc->sbc_metadata->ssdi.ssd_level; 1244 bv->sbv_volid = bc->sbc_metadata->ssdi.ssd_volid; 1245 bv->sbv_chunk_no = bc->sbc_metadata->ssdi.ssd_chunk_no; 1246 bv->sbv_flags = bc->sbc_metadata->ssdi.ssd_vol_flags; 1247 memcpy(&bv->sbv_uuid, &bc->sbc_metadata->ssdi.ssd_uuid, 1248 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)); 1249 SLIST_INIT(&bv->sbv_chunks); 1250 1251 /* Maintain volume order. */ 1252 bv2 = NULL; 1253 SLIST_FOREACH(bv1, &bvh, sbv_link) { 1254 if (bv1->sbv_volid > bv->sbv_volid) 1255 break; 1256 bv2 = bv1; 1257 } 1258 if (bv2 == NULL) { 1259 DNPRINTF(SR_D_META, "%s: insert volume %u " 1260 "at head\n", DEVNAME(sc), bv->sbv_volid); 1261 SLIST_INSERT_HEAD(&bvh, bv, sbv_link); 1262 } else { 1263 DNPRINTF(SR_D_META, "%s: insert volume %u " 1264 "after %u\n", DEVNAME(sc), bv->sbv_volid, 1265 bv2->sbv_volid); 1266 SLIST_INSERT_AFTER(bv2, bv, sbv_link); 1267 } 1268 } 1269 1270 /* Maintain chunk order. */ 1271 bc2 = NULL; 1272 SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) { 1273 if (bc1->sbc_chunk_id > bc->sbc_chunk_id) 1274 break; 1275 bc2 = bc1; 1276 } 1277 if (bc2 == NULL) { 1278 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1279 "at head\n", DEVNAME(sc), bv->sbv_volid, 1280 bc->sbc_chunk_id); 1281 SLIST_INSERT_HEAD(&bv->sbv_chunks, bc, sbc_link); 1282 } else { 1283 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1284 "after %u\n", DEVNAME(sc), bv->sbv_volid, 1285 bc->sbc_chunk_id, bc2->sbc_chunk_id); 1286 SLIST_INSERT_AFTER(bc2, bc, sbc_link); 1287 } 1288 1289 bv->sbv_chunks_found++; 1290 } 1291 1292 /* Allocate memory for device and ondisk version arrays. */ 1293 devs = mallocarray(BIOC_CRMAXLEN, sizeof(dev_t), M_DEVBUF, 1294 M_NOWAIT); 1295 if (devs == NULL) { 1296 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1297 goto unwind; 1298 } 1299 ondisk = mallocarray(BIOC_CRMAXLEN, sizeof(u_int64_t), M_DEVBUF, 1300 M_NOWAIT); 1301 if (ondisk == NULL) { 1302 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1303 goto unwind; 1304 } 1305 1306 /* 1307 * Assemble hotspare "volumes". 1308 */ 1309 SLIST_FOREACH(bv, &bvh, sbv_link) { 1310 1311 /* Check if this is a hotspare "volume". */ 1312 if (bv->sbv_level != SR_HOTSPARE_LEVEL || 1313 bv->sbv_chunk_no != 1) 1314 continue; 1315 1316 #ifdef SR_DEBUG 1317 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1318 DEVNAME(sc)); 1319 if (sr_debug & SR_D_META) 1320 sr_uuid_print(&bv->sbv_uuid, 0); 1321 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1322 bv->sbv_volid, bv->sbv_chunk_no); 1323 #endif 1324 1325 /* Create hotspare chunk metadata. */ 1326 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1327 M_NOWAIT | M_ZERO); 1328 if (hotspare == NULL) { 1329 printf("%s: failed to allocate hotspare\n", 1330 DEVNAME(sc)); 1331 goto unwind; 1332 } 1333 1334 bc = SLIST_FIRST(&bv->sbv_chunks); 1335 sr_meta_getdevname(sc, bc->sbc_mm, devname, sizeof(devname)); 1336 hotspare->src_dev_mm = bc->sbc_mm; 1337 strlcpy(hotspare->src_devname, devname, 1338 sizeof(hotspare->src_devname)); 1339 hotspare->src_size = bc->sbc_metadata->ssdi.ssd_size; 1340 1341 hm = &hotspare->src_meta; 1342 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1343 hm->scmi.scm_chunk_id = 0; 1344 hm->scmi.scm_size = bc->sbc_metadata->ssdi.ssd_size; 1345 hm->scmi.scm_coerced_size = bc->sbc_metadata->ssdi.ssd_size; 1346 strlcpy(hm->scmi.scm_devname, devname, 1347 sizeof(hm->scmi.scm_devname)); 1348 memcpy(&hm->scmi.scm_uuid, &bc->sbc_metadata->ssdi.ssd_uuid, 1349 sizeof(struct sr_uuid)); 1350 1351 sr_checksum(sc, hm, &hm->scm_checksum, 1352 sizeof(struct sr_meta_chunk_invariant)); 1353 1354 hm->scm_status = BIOC_SDHOTSPARE; 1355 1356 /* Add chunk to hotspare list. */ 1357 rw_enter_write(&sc->sc_hs_lock); 1358 cl = &sc->sc_hotspare_list; 1359 if (SLIST_EMPTY(cl)) 1360 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1361 else { 1362 SLIST_FOREACH(chunk, cl, src_link) 1363 last = chunk; 1364 SLIST_INSERT_AFTER(last, hotspare, src_link); 1365 } 1366 sc->sc_hotspare_no++; 1367 rw_exit_write(&sc->sc_hs_lock); 1368 1369 } 1370 1371 /* 1372 * Assemble RAID volumes. 1373 */ 1374 SLIST_FOREACH(bv, &bvh, sbv_link) { 1375 1376 bzero(&bcr, sizeof(bcr)); 1377 data = NULL; 1378 1379 /* Check if this is a hotspare "volume". */ 1380 if (bv->sbv_level == SR_HOTSPARE_LEVEL && 1381 bv->sbv_chunk_no == 1) 1382 continue; 1383 1384 /* 1385 * Skip volumes that are marked as no auto assemble, unless 1386 * this was the volume which we actually booted from. 1387 */ 1388 if (bcmp(&sr_bootuuid, &bv->sbv_uuid, sizeof(sr_bootuuid)) != 0) 1389 if (bv->sbv_flags & BIOC_SCNOAUTOASSEMBLE) 1390 continue; 1391 1392 #ifdef SR_DEBUG 1393 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1394 if (sr_debug & SR_D_META) 1395 sr_uuid_print(&bv->sbv_uuid, 0); 1396 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1397 bv->sbv_volid, bv->sbv_chunk_no); 1398 #endif 1399 1400 /* 1401 * If this is a crypto volume, try to find a matching 1402 * key disk... 1403 */ 1404 bcr.bc_key_disk = NODEV; 1405 if (bv->sbv_level == 'C' || bv->sbv_level == 0x1C) { 1406 SLIST_FOREACH(bc, &kdh, sbc_link) { 1407 if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid, 1408 &bv->sbv_uuid, 1409 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)) 1410 == 0) 1411 bcr.bc_key_disk = bc->sbc_mm; 1412 } 1413 } 1414 1415 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1416 devs[i] = NODEV; /* mark device as illegal */ 1417 ondisk[i] = 0; 1418 } 1419 1420 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) { 1421 if (devs[bc->sbc_chunk_id] != NODEV) { 1422 bv->sbv_chunks_found--; 1423 sr_meta_getdevname(sc, bc->sbc_mm, devname, 1424 sizeof(devname)); 1425 printf("%s: found duplicate chunk %u for " 1426 "volume %u on device %s\n", DEVNAME(sc), 1427 bc->sbc_chunk_id, bv->sbv_volid, devname); 1428 } 1429 1430 if (devs[bc->sbc_chunk_id] == NODEV || 1431 bc->sbc_metadata->ssd_ondisk > 1432 ondisk[bc->sbc_chunk_id]) { 1433 devs[bc->sbc_chunk_id] = bc->sbc_mm; 1434 ondisk[bc->sbc_chunk_id] = 1435 bc->sbc_metadata->ssd_ondisk; 1436 DNPRINTF(SR_D_META, "%s: using ondisk " 1437 "metadata version %llu for chunk %u\n", 1438 DEVNAME(sc), ondisk[bc->sbc_chunk_id], 1439 bc->sbc_chunk_id); 1440 } 1441 } 1442 1443 if (bv->sbv_chunk_no != bv->sbv_chunks_found) { 1444 printf("%s: not all chunks were provided; " 1445 "attempting to bring volume %d online\n", 1446 DEVNAME(sc), bv->sbv_volid); 1447 } 1448 1449 bcr.bc_level = bv->sbv_level; 1450 bcr.bc_dev_list_len = bv->sbv_chunk_no * sizeof(dev_t); 1451 bcr.bc_dev_list = devs; 1452 bcr.bc_flags = BIOC_SCDEVT | 1453 (bv->sbv_flags & BIOC_SCNOAUTOASSEMBLE); 1454 1455 if ((bv->sbv_level == 'C' || bv->sbv_level == 0x1C) && 1456 bcmp(&sr_bootuuid, &bv->sbv_uuid, sizeof(sr_bootuuid)) == 0) 1457 data = sr_bootkey; 1458 1459 rw_enter_write(&sc->sc_lock); 1460 bio_status_init(&sc->sc_status, &sc->sc_dev); 1461 sr_ioctl_createraid(sc, &bcr, 0, data); 1462 rw_exit_write(&sc->sc_lock); 1463 1464 rv++; 1465 } 1466 1467 /* done with metadata */ 1468 unwind: 1469 /* Free boot volumes and associated chunks. */ 1470 for (bv1 = SLIST_FIRST(&bvh); bv1 != NULL; bv1 = bv2) { 1471 bv2 = SLIST_NEXT(bv1, sbv_link); 1472 for (bc1 = SLIST_FIRST(&bv1->sbv_chunks); bc1 != NULL; 1473 bc1 = bc2) { 1474 bc2 = SLIST_NEXT(bc1, sbc_link); 1475 free(bc1->sbc_metadata, M_DEVBUF, 1476 sizeof(*bc1->sbc_metadata)); 1477 free(bc1, M_DEVBUF, sizeof(*bc1)); 1478 } 1479 free(bv1, M_DEVBUF, sizeof(*bv1)); 1480 } 1481 /* Free keydisks chunks. */ 1482 for (bc1 = SLIST_FIRST(&kdh); bc1 != NULL; bc1 = bc2) { 1483 bc2 = SLIST_NEXT(bc1, sbc_link); 1484 free(bc1->sbc_metadata, M_DEVBUF, sizeof(*bc1->sbc_metadata)); 1485 free(bc1, M_DEVBUF, sizeof(*bc1)); 1486 } 1487 /* Free unallocated chunks. */ 1488 for (bc1 = SLIST_FIRST(&bch); bc1 != NULL; bc1 = bc2) { 1489 bc2 = SLIST_NEXT(bc1, sbc_link); 1490 free(bc1->sbc_metadata, M_DEVBUF, sizeof(*bc1->sbc_metadata)); 1491 free(bc1, M_DEVBUF, sizeof(*bc1)); 1492 } 1493 1494 while (!SLIST_EMPTY(&sdklist)) { 1495 sdk = SLIST_FIRST(&sdklist); 1496 SLIST_REMOVE_HEAD(&sdklist, sdk_link); 1497 free(sdk, M_DEVBUF, sizeof(*sdk)); 1498 } 1499 1500 free(devs, M_DEVBUF, BIOC_CRMAXLEN * sizeof(dev_t)); 1501 free(ondisk, M_DEVBUF, BIOC_CRMAXLEN * sizeof(u_int64_t)); 1502 1503 return (rv); 1504 } 1505 1506 void 1507 sr_map_root(void) 1508 { 1509 struct sr_softc *sc = softraid0; 1510 struct sr_discipline *sd; 1511 struct sr_meta_opt_item *omi; 1512 struct sr_meta_boot *sbm; 1513 u_char duid[8]; 1514 int i; 1515 1516 if (sc == NULL) 1517 return; 1518 1519 DNPRINTF(SR_D_MISC, "%s: sr_map_root\n", DEVNAME(sc)); 1520 1521 bzero(duid, sizeof(duid)); 1522 if (bcmp(rootduid, duid, sizeof(duid)) == 0) { 1523 DNPRINTF(SR_D_MISC, "%s: root duid is zero\n", DEVNAME(sc)); 1524 return; 1525 } 1526 1527 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 1528 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) { 1529 if (omi->omi_som->som_type != SR_OPT_BOOT) 1530 continue; 1531 sbm = (struct sr_meta_boot *)omi->omi_som; 1532 for (i = 0; i < SR_MAX_BOOT_DISKS; i++) { 1533 if (bcmp(rootduid, sbm->sbm_boot_duid[i], 1534 sizeof(rootduid)) == 0) { 1535 memcpy(rootduid, sbm->sbm_root_duid, 1536 sizeof(rootduid)); 1537 DNPRINTF(SR_D_MISC, "%s: root duid " 1538 "mapped to %s\n", DEVNAME(sc), 1539 duid_format(rootduid)); 1540 return; 1541 } 1542 } 1543 } 1544 } 1545 } 1546 1547 int 1548 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1549 { 1550 struct disklabel label; 1551 char *devname; 1552 int error, part; 1553 u_int64_t size; 1554 1555 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1556 DEVNAME(sc), ch_entry->src_devname); 1557 1558 devname = ch_entry->src_devname; 1559 part = DISKPART(ch_entry->src_dev_mm); 1560 1561 /* get disklabel */ 1562 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1563 NOCRED, curproc); 1564 if (error) { 1565 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1566 DEVNAME(sc), devname); 1567 goto unwind; 1568 } 1569 memcpy(ch_entry->src_duid, label.d_uid, sizeof(ch_entry->src_duid)); 1570 1571 /* make sure the partition is of the right type */ 1572 if (label.d_partitions[part].p_fstype != FS_RAID) { 1573 DNPRINTF(SR_D_META, 1574 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1575 devname, 1576 label.d_partitions[part].p_fstype); 1577 goto unwind; 1578 } 1579 1580 size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part])); 1581 if (size <= SR_DATA_OFFSET) { 1582 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1583 devname); 1584 goto unwind; 1585 } 1586 size -= SR_DATA_OFFSET; 1587 if (size > INT64_MAX) { 1588 DNPRINTF(SR_D_META, "%s: %s partition too large\n", DEVNAME(sc), 1589 devname); 1590 goto unwind; 1591 } 1592 ch_entry->src_size = size; 1593 ch_entry->src_secsize = label.d_secsize; 1594 1595 DNPRINTF(SR_D_META, "%s: probe found %s size %lld\n", DEVNAME(sc), 1596 devname, (long long)size); 1597 1598 return (SR_META_F_NATIVE); 1599 unwind: 1600 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1601 devname ? devname : "nodev"); 1602 return (SR_META_F_INVALID); 1603 } 1604 1605 int 1606 sr_meta_native_attach(struct sr_discipline *sd, int force) 1607 { 1608 struct sr_softc *sc = sd->sd_sc; 1609 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1610 struct sr_metadata *md = NULL; 1611 struct sr_chunk *ch_entry, *ch_next; 1612 struct sr_uuid uuid; 1613 u_int64_t version = 0; 1614 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1615 1616 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1617 1618 md = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT); 1619 if (md == NULL) { 1620 sr_error(sc, "not enough memory for metadata buffer"); 1621 goto bad; 1622 } 1623 1624 bzero(&uuid, sizeof uuid); 1625 1626 sr = not_sr = d = 0; 1627 SLIST_FOREACH(ch_entry, cl, src_link) { 1628 if (ch_entry->src_dev_mm == NODEV) 1629 continue; 1630 1631 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1632 sr_error(sc, "could not read native metadata"); 1633 goto bad; 1634 } 1635 1636 if (md->ssdi.ssd_magic == SR_MAGIC) { 1637 sr++; 1638 ch_entry->src_meta.scmi.scm_chunk_id = 1639 md->ssdi.ssd_chunk_id; 1640 if (d == 0) { 1641 memcpy(&uuid, &md->ssdi.ssd_uuid, sizeof uuid); 1642 expected = md->ssdi.ssd_chunk_no; 1643 version = md->ssd_ondisk; 1644 d++; 1645 continue; 1646 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1647 sizeof uuid)) { 1648 sr_error(sc, "not part of the same volume"); 1649 goto bad; 1650 } 1651 if (md->ssd_ondisk != version) { 1652 old_meta++; 1653 version = MAX(md->ssd_ondisk, version); 1654 } 1655 } else 1656 not_sr++; 1657 } 1658 1659 if (sr && not_sr && !force) { 1660 sr_error(sc, "not all chunks are of the native metadata " 1661 "format"); 1662 goto bad; 1663 } 1664 1665 /* mixed metadata versions; mark bad disks offline */ 1666 if (old_meta) { 1667 d = 0; 1668 for (ch_entry = SLIST_FIRST(cl); ch_entry != NULL; 1669 ch_entry = ch_next, d++) { 1670 ch_next = SLIST_NEXT(ch_entry, src_link); 1671 1672 /* XXX do we want to read this again? */ 1673 if (ch_entry->src_dev_mm == NODEV) 1674 panic("src_dev_mm == NODEV"); 1675 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1676 NULL)) 1677 sr_warn(sc, "could not read native metadata"); 1678 if (md->ssd_ondisk != version) 1679 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1680 BIOC_SDOFFLINE; 1681 } 1682 } 1683 1684 if (expected != sr && !force && expected != -1) { 1685 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1686 "anyway\n", DEVNAME(sc)); 1687 } 1688 1689 rv = 0; 1690 bad: 1691 free(md, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 1692 return (rv); 1693 } 1694 1695 int 1696 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1697 struct sr_metadata *md, void *fm) 1698 { 1699 #ifdef SR_DEBUG 1700 struct sr_softc *sc = sd->sd_sc; 1701 #endif 1702 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1703 DEVNAME(sc), dev, md); 1704 1705 return (sr_meta_rw(sd, dev, md, B_READ)); 1706 } 1707 1708 int 1709 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1710 struct sr_metadata *md, void *fm) 1711 { 1712 #ifdef SR_DEBUG 1713 struct sr_softc *sc = sd->sd_sc; 1714 #endif 1715 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1716 DEVNAME(sc), dev, md); 1717 1718 return (sr_meta_rw(sd, dev, md, B_WRITE)); 1719 } 1720 1721 void 1722 sr_hotplug_register(struct sr_discipline *sd, void *func) 1723 { 1724 struct sr_hotplug_list *mhe; 1725 1726 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1727 DEVNAME(sd->sd_sc), func); 1728 1729 /* make sure we aren't on the list yet */ 1730 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1731 if (mhe->sh_hotplug == func) 1732 return; 1733 1734 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1735 M_WAITOK | M_ZERO); 1736 mhe->sh_hotplug = func; 1737 mhe->sh_sd = sd; 1738 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1739 } 1740 1741 void 1742 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1743 { 1744 struct sr_hotplug_list *mhe; 1745 1746 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1747 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1748 1749 /* make sure we are on the list yet */ 1750 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) { 1751 if (mhe->sh_hotplug == func) 1752 break; 1753 } 1754 if (mhe != NULL) { 1755 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1756 sr_hotplug_list, shl_link); 1757 free(mhe, M_DEVBUF, sizeof(*mhe)); 1758 } 1759 } 1760 1761 void 1762 sr_disk_attach(struct disk *diskp, int action) 1763 { 1764 struct sr_hotplug_list *mhe; 1765 1766 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1767 if (mhe->sh_sd->sd_ready) 1768 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1769 } 1770 1771 int 1772 sr_match(struct device *parent, void *match, void *aux) 1773 { 1774 return (1); 1775 } 1776 1777 void 1778 sr_attach(struct device *parent, struct device *self, void *aux) 1779 { 1780 struct sr_softc *sc = (void *)self; 1781 struct scsibus_attach_args saa; 1782 1783 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1784 1785 if (softraid0 == NULL) 1786 softraid0 = sc; 1787 1788 rw_init(&sc->sc_lock, "sr_lock"); 1789 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1790 1791 SLIST_INIT(&sr_hotplug_callbacks); 1792 TAILQ_INIT(&sc->sc_dis_list); 1793 SLIST_INIT(&sc->sc_hotspare_list); 1794 1795 #if NBIO > 0 1796 if (bio_register(&sc->sc_dev, sr_bio_ioctl) != 0) 1797 printf("%s: controller registration failed", DEVNAME(sc)); 1798 #endif /* NBIO > 0 */ 1799 1800 #ifndef SMALL_KERNEL 1801 strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), 1802 sizeof(sc->sc_sensordev.xname)); 1803 sensordev_install(&sc->sc_sensordev); 1804 #endif /* SMALL_KERNEL */ 1805 1806 printf("\n"); 1807 1808 saa.saa_adapter_softc = sc; 1809 saa.saa_adapter = &sr_switch; 1810 saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET; 1811 saa.saa_adapter_buswidth = SR_MAX_LD; 1812 saa.saa_luns = 1; 1813 saa.saa_openings = 0; 1814 saa.saa_pool = NULL; 1815 saa.saa_quirks = saa.saa_flags = 0; 1816 saa.saa_wwpn = saa.saa_wwnn = 0; 1817 1818 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, &saa, 1819 scsiprint); 1820 1821 softraid_disk_attach = sr_disk_attach; 1822 1823 sr_boot_assembly(sc); 1824 1825 explicit_bzero(sr_bootkey, sizeof(sr_bootkey)); 1826 } 1827 1828 int 1829 sr_detach(struct device *self, int flags) 1830 { 1831 struct sr_softc *sc = (void *)self; 1832 int rv; 1833 1834 DNPRINTF(SR_D_MISC, "%s: sr_detach\n", DEVNAME(sc)); 1835 1836 softraid_disk_attach = NULL; 1837 1838 sr_shutdown(0); 1839 1840 #ifndef SMALL_KERNEL 1841 if (sc->sc_sensor_task != NULL) 1842 sensor_task_unregister(sc->sc_sensor_task); 1843 sensordev_deinstall(&sc->sc_sensordev); 1844 #endif /* SMALL_KERNEL */ 1845 1846 if (sc->sc_scsibus != NULL) { 1847 rv = config_detach((struct device *)sc->sc_scsibus, flags); 1848 if (rv != 0) 1849 return (rv); 1850 sc->sc_scsibus = NULL; 1851 } 1852 1853 return (0); 1854 } 1855 1856 void 1857 sr_info(struct sr_softc *sc, const char *fmt, ...) 1858 { 1859 va_list ap; 1860 1861 rw_assert_wrlock(&sc->sc_lock); 1862 1863 va_start(ap, fmt); 1864 bio_status(&sc->sc_status, 0, BIO_MSG_INFO, fmt, &ap); 1865 va_end(ap); 1866 } 1867 1868 void 1869 sr_warn(struct sr_softc *sc, const char *fmt, ...) 1870 { 1871 va_list ap; 1872 1873 rw_assert_wrlock(&sc->sc_lock); 1874 1875 va_start(ap, fmt); 1876 bio_status(&sc->sc_status, 1, BIO_MSG_WARN, fmt, &ap); 1877 va_end(ap); 1878 } 1879 1880 void 1881 sr_error(struct sr_softc *sc, const char *fmt, ...) 1882 { 1883 va_list ap; 1884 1885 rw_assert_wrlock(&sc->sc_lock); 1886 1887 va_start(ap, fmt); 1888 bio_status(&sc->sc_status, 1, BIO_MSG_ERROR, fmt, &ap); 1889 va_end(ap); 1890 } 1891 1892 int 1893 sr_ccb_alloc(struct sr_discipline *sd) 1894 { 1895 struct sr_ccb *ccb; 1896 int i; 1897 1898 if (!sd) 1899 return (1); 1900 1901 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1902 1903 if (sd->sd_ccb) 1904 return (1); 1905 1906 sd->sd_ccb = mallocarray(sd->sd_max_wu, 1907 sd->sd_max_ccb_per_wu * sizeof(struct sr_ccb), 1908 M_DEVBUF, M_WAITOK | M_ZERO); 1909 TAILQ_INIT(&sd->sd_ccb_freeq); 1910 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1911 ccb = &sd->sd_ccb[i]; 1912 ccb->ccb_dis = sd; 1913 sr_ccb_put(ccb); 1914 } 1915 1916 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1917 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1918 1919 return (0); 1920 } 1921 1922 void 1923 sr_ccb_free(struct sr_discipline *sd) 1924 { 1925 struct sr_ccb *ccb; 1926 1927 if (!sd) 1928 return; 1929 1930 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1931 1932 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1933 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1934 1935 free(sd->sd_ccb, M_DEVBUF, sd->sd_max_wu * sd->sd_max_ccb_per_wu * 1936 sizeof(struct sr_ccb)); 1937 } 1938 1939 struct sr_ccb * 1940 sr_ccb_get(struct sr_discipline *sd) 1941 { 1942 struct sr_ccb *ccb; 1943 int s; 1944 1945 s = splbio(); 1946 1947 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1948 if (ccb) { 1949 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1950 ccb->ccb_state = SR_CCB_INPROGRESS; 1951 } 1952 1953 splx(s); 1954 1955 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1956 ccb); 1957 1958 return (ccb); 1959 } 1960 1961 void 1962 sr_ccb_put(struct sr_ccb *ccb) 1963 { 1964 struct sr_discipline *sd = ccb->ccb_dis; 1965 int s; 1966 1967 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1968 ccb); 1969 1970 s = splbio(); 1971 1972 ccb->ccb_wu = NULL; 1973 ccb->ccb_state = SR_CCB_FREE; 1974 ccb->ccb_target = -1; 1975 ccb->ccb_opaque = NULL; 1976 1977 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 1978 1979 splx(s); 1980 } 1981 1982 struct sr_ccb * 1983 sr_ccb_rw(struct sr_discipline *sd, int chunk, daddr_t blkno, 1984 long len, u_int8_t *data, int xsflags, int ccbflags) 1985 { 1986 struct sr_chunk *sc = sd->sd_vol.sv_chunks[chunk]; 1987 struct sr_ccb *ccb = NULL; 1988 int s; 1989 1990 ccb = sr_ccb_get(sd); 1991 if (ccb == NULL) 1992 goto out; 1993 1994 ccb->ccb_flags = ccbflags; 1995 ccb->ccb_target = chunk; 1996 1997 ccb->ccb_buf.b_flags = B_PHYS | B_CALL; 1998 if (ISSET(xsflags, SCSI_DATA_IN)) 1999 ccb->ccb_buf.b_flags |= B_READ; 2000 else 2001 ccb->ccb_buf.b_flags |= B_WRITE; 2002 2003 ccb->ccb_buf.b_blkno = blkno + sd->sd_meta->ssd_data_blkno; 2004 ccb->ccb_buf.b_bcount = len; 2005 ccb->ccb_buf.b_bufsize = len; 2006 ccb->ccb_buf.b_resid = len; 2007 ccb->ccb_buf.b_data = data; 2008 ccb->ccb_buf.b_error = 0; 2009 ccb->ccb_buf.b_iodone = sd->sd_scsi_intr; 2010 ccb->ccb_buf.b_proc = curproc; 2011 ccb->ccb_buf.b_dev = sc->src_dev_mm; 2012 ccb->ccb_buf.b_vp = sc->src_vn; 2013 ccb->ccb_buf.b_bq = NULL; 2014 2015 if (!ISSET(ccb->ccb_buf.b_flags, B_READ)) { 2016 s = splbio(); 2017 ccb->ccb_buf.b_vp->v_numoutput++; 2018 splx(s); 2019 } 2020 2021 LIST_INIT(&ccb->ccb_buf.b_dep); 2022 2023 DNPRINTF(SR_D_DIS, "%s: %s %s ccb " 2024 "b_bcount %ld b_blkno %lld b_flags 0x%0lx b_data %p\n", 2025 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, sd->sd_name, 2026 ccb->ccb_buf.b_bcount, (long long)ccb->ccb_buf.b_blkno, 2027 ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); 2028 2029 out: 2030 return ccb; 2031 } 2032 2033 void 2034 sr_ccb_done(struct sr_ccb *ccb) 2035 { 2036 struct sr_workunit *wu = ccb->ccb_wu; 2037 struct sr_discipline *sd = wu->swu_dis; 2038 struct sr_softc *sc = sd->sd_sc; 2039 2040 DNPRINTF(SR_D_INTR, "%s: %s %s ccb done b_bcount %ld b_resid %zu" 2041 " b_flags 0x%0lx block %lld target %d\n", 2042 DEVNAME(sc), sd->sd_meta->ssd_devname, sd->sd_name, 2043 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags, 2044 (long long)ccb->ccb_buf.b_blkno, ccb->ccb_target); 2045 2046 splassert(IPL_BIO); 2047 2048 if (ccb->ccb_target == -1) 2049 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu); 2050 2051 if (ccb->ccb_buf.b_flags & B_ERROR) { 2052 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target %d\n", 2053 DEVNAME(sc), (long long)ccb->ccb_buf.b_blkno, 2054 ccb->ccb_target); 2055 if (ISSET(sd->sd_capabilities, SR_CAP_REDUNDANT)) 2056 sd->sd_set_chunk_state(sd, ccb->ccb_target, 2057 BIOC_SDOFFLINE); 2058 else 2059 printf("%s: %s: i/o error %d @ %s block %lld\n", 2060 DEVNAME(sc), sd->sd_meta->ssd_devname, 2061 ccb->ccb_buf.b_error, sd->sd_name, 2062 (long long)ccb->ccb_buf.b_blkno); 2063 ccb->ccb_state = SR_CCB_FAILED; 2064 wu->swu_ios_failed++; 2065 } else { 2066 ccb->ccb_state = SR_CCB_OK; 2067 wu->swu_ios_succeeded++; 2068 } 2069 2070 wu->swu_ios_complete++; 2071 } 2072 2073 int 2074 sr_wu_alloc(struct sr_discipline *sd) 2075 { 2076 struct sr_workunit *wu; 2077 int i, no_wu; 2078 2079 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 2080 sd, sd->sd_max_wu); 2081 2082 no_wu = sd->sd_max_wu; 2083 sd->sd_wu_pending = no_wu; 2084 2085 mtx_init(&sd->sd_wu_mtx, IPL_BIO); 2086 TAILQ_INIT(&sd->sd_wu); 2087 TAILQ_INIT(&sd->sd_wu_freeq); 2088 TAILQ_INIT(&sd->sd_wu_pendq); 2089 TAILQ_INIT(&sd->sd_wu_defq); 2090 2091 for (i = 0; i < no_wu; i++) { 2092 wu = malloc(sd->sd_wu_size, M_DEVBUF, M_WAITOK | M_ZERO); 2093 TAILQ_INSERT_TAIL(&sd->sd_wu, wu, swu_next); 2094 TAILQ_INIT(&wu->swu_ccb); 2095 wu->swu_dis = sd; 2096 task_set(&wu->swu_task, sr_wu_done_callback, wu); 2097 sr_wu_put(sd, wu); 2098 } 2099 2100 return (0); 2101 } 2102 2103 void 2104 sr_wu_free(struct sr_discipline *sd) 2105 { 2106 struct sr_workunit *wu; 2107 2108 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 2109 2110 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 2111 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 2112 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 2113 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 2114 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 2115 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 2116 2117 while ((wu = TAILQ_FIRST(&sd->sd_wu)) != NULL) { 2118 TAILQ_REMOVE(&sd->sd_wu, wu, swu_next); 2119 free(wu, M_DEVBUF, sd->sd_wu_size); 2120 } 2121 } 2122 2123 void * 2124 sr_wu_get(void *xsd) 2125 { 2126 struct sr_discipline *sd = (struct sr_discipline *)xsd; 2127 struct sr_workunit *wu; 2128 2129 mtx_enter(&sd->sd_wu_mtx); 2130 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 2131 if (wu) { 2132 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 2133 sd->sd_wu_pending++; 2134 } 2135 mtx_leave(&sd->sd_wu_mtx); 2136 2137 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 2138 2139 return (wu); 2140 } 2141 2142 void 2143 sr_wu_put(void *xsd, void *xwu) 2144 { 2145 struct sr_discipline *sd = (struct sr_discipline *)xsd; 2146 struct sr_workunit *wu = (struct sr_workunit *)xwu; 2147 2148 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 2149 2150 sr_wu_release_ccbs(wu); 2151 sr_wu_init(sd, wu); 2152 2153 mtx_enter(&sd->sd_wu_mtx); 2154 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 2155 sd->sd_wu_pending--; 2156 mtx_leave(&sd->sd_wu_mtx); 2157 } 2158 2159 void 2160 sr_wu_init(struct sr_discipline *sd, struct sr_workunit *wu) 2161 { 2162 int s; 2163 2164 s = splbio(); 2165 if (wu->swu_cb_active == 1) 2166 panic("%s: sr_wu_init got active wu", DEVNAME(sd->sd_sc)); 2167 splx(s); 2168 2169 wu->swu_xs = NULL; 2170 wu->swu_state = SR_WU_FREE; 2171 wu->swu_flags = 0; 2172 wu->swu_blk_start = 0; 2173 wu->swu_blk_end = 0; 2174 wu->swu_collider = NULL; 2175 } 2176 2177 void 2178 sr_wu_enqueue_ccb(struct sr_workunit *wu, struct sr_ccb *ccb) 2179 { 2180 struct sr_discipline *sd = wu->swu_dis; 2181 int s; 2182 2183 s = splbio(); 2184 if (wu->swu_cb_active == 1) 2185 panic("%s: sr_wu_enqueue_ccb got active wu", 2186 DEVNAME(sd->sd_sc)); 2187 ccb->ccb_wu = wu; 2188 wu->swu_io_count++; 2189 TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); 2190 splx(s); 2191 } 2192 2193 void 2194 sr_wu_release_ccbs(struct sr_workunit *wu) 2195 { 2196 struct sr_ccb *ccb; 2197 2198 /* Return all ccbs that are associated with this workunit. */ 2199 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 2200 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 2201 sr_ccb_put(ccb); 2202 } 2203 2204 wu->swu_io_count = 0; 2205 wu->swu_ios_complete = 0; 2206 wu->swu_ios_failed = 0; 2207 wu->swu_ios_succeeded = 0; 2208 } 2209 2210 void 2211 sr_wu_done(struct sr_workunit *wu) 2212 { 2213 struct sr_discipline *sd = wu->swu_dis; 2214 2215 DNPRINTF(SR_D_INTR, "%s: sr_wu_done count %d completed %d failed %d\n", 2216 DEVNAME(sd->sd_sc), wu->swu_io_count, wu->swu_ios_complete, 2217 wu->swu_ios_failed); 2218 2219 if (wu->swu_ios_complete < wu->swu_io_count) 2220 return; 2221 2222 task_add(sd->sd_taskq, &wu->swu_task); 2223 } 2224 2225 void 2226 sr_wu_done_callback(void *xwu) 2227 { 2228 struct sr_workunit *wu = xwu; 2229 struct sr_discipline *sd = wu->swu_dis; 2230 struct scsi_xfer *xs = wu->swu_xs; 2231 struct sr_workunit *wup; 2232 int s; 2233 2234 /* 2235 * The SR_WUF_DISCIPLINE or SR_WUF_REBUILD flag must be set if 2236 * the work unit is not associated with a scsi_xfer. 2237 */ 2238 KASSERT(xs != NULL || 2239 (wu->swu_flags & (SR_WUF_DISCIPLINE|SR_WUF_REBUILD))); 2240 2241 s = splbio(); 2242 2243 if (xs != NULL) { 2244 if (wu->swu_ios_failed) 2245 xs->error = XS_DRIVER_STUFFUP; 2246 else 2247 xs->error = XS_NOERROR; 2248 } 2249 2250 if (sd->sd_scsi_wu_done) { 2251 if (sd->sd_scsi_wu_done(wu) == SR_WU_RESTART) 2252 goto done; 2253 } 2254 2255 /* Remove work unit from pending queue. */ 2256 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) 2257 if (wup == wu) 2258 break; 2259 if (wup == NULL) 2260 panic("%s: wu %p not on pending queue", 2261 DEVNAME(sd->sd_sc), wu); 2262 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 2263 2264 if (wu->swu_collider) { 2265 if (wu->swu_ios_failed) 2266 sr_raid_recreate_wu(wu->swu_collider); 2267 2268 /* XXX Should the collider be failed if this xs failed? */ 2269 sr_raid_startwu(wu->swu_collider); 2270 } 2271 2272 /* 2273 * If a discipline provides its own sd_scsi_done function, then it 2274 * is responsible for calling sr_scsi_done() once I/O is complete. 2275 */ 2276 if (wu->swu_flags & SR_WUF_REBUILD) 2277 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 2278 if (wu->swu_flags & SR_WUF_WAKEUP) 2279 wakeup(wu); 2280 if (sd->sd_scsi_done) 2281 sd->sd_scsi_done(wu); 2282 else if (wu->swu_flags & SR_WUF_DISCIPLINE) 2283 sr_scsi_wu_put(sd, wu); 2284 else if (!(wu->swu_flags & SR_WUF_REBUILD)) 2285 sr_scsi_done(sd, xs); 2286 2287 done: 2288 splx(s); 2289 } 2290 2291 struct sr_workunit * 2292 sr_scsi_wu_get(struct sr_discipline *sd, int flags) 2293 { 2294 return scsi_io_get(&sd->sd_iopool, flags); 2295 } 2296 2297 void 2298 sr_scsi_wu_put(struct sr_discipline *sd, struct sr_workunit *wu) 2299 { 2300 scsi_io_put(&sd->sd_iopool, wu); 2301 2302 if (sd->sd_sync && sd->sd_wu_pending == 0) 2303 wakeup(sd); 2304 } 2305 2306 void 2307 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 2308 { 2309 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 2310 2311 if (xs->error == XS_NOERROR) 2312 xs->resid = 0; 2313 2314 scsi_done(xs); 2315 2316 if (sd->sd_sync && sd->sd_wu_pending == 0) 2317 wakeup(sd); 2318 } 2319 2320 void 2321 sr_scsi_cmd(struct scsi_xfer *xs) 2322 { 2323 struct scsi_link *link = xs->sc_link; 2324 struct sr_softc *sc = link->bus->sb_adapter_softc; 2325 struct sr_workunit *wu = xs->io; 2326 struct sr_discipline *sd; 2327 2328 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd target %d xs %p flags %#x\n", 2329 DEVNAME(sc), link->target, xs, xs->flags); 2330 2331 sd = sc->sc_targets[link->target]; 2332 if (sd == NULL) 2333 panic("%s: sr_scsi_cmd NULL discipline", DEVNAME(sc)); 2334 2335 if (sd->sd_deleted) { 2336 printf("%s: %s device is being deleted, failing io\n", 2337 DEVNAME(sc), sd->sd_meta->ssd_devname); 2338 goto stuffup; 2339 } 2340 2341 /* scsi layer *can* re-send wu without calling sr_wu_put(). */ 2342 sr_wu_release_ccbs(wu); 2343 sr_wu_init(sd, wu); 2344 wu->swu_state = SR_WU_INPROGRESS; 2345 wu->swu_xs = xs; 2346 2347 switch (xs->cmd.opcode) { 2348 case READ_COMMAND: 2349 case READ_10: 2350 case READ_16: 2351 case WRITE_COMMAND: 2352 case WRITE_10: 2353 case WRITE_16: 2354 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 2355 DEVNAME(sc), xs->cmd.opcode); 2356 if (sd->sd_scsi_rw(wu)) 2357 goto stuffup; 2358 break; 2359 2360 case SYNCHRONIZE_CACHE: 2361 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 2362 DEVNAME(sc)); 2363 if (sd->sd_scsi_sync(wu)) 2364 goto stuffup; 2365 goto complete; 2366 2367 case TEST_UNIT_READY: 2368 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 2369 DEVNAME(sc)); 2370 if (sd->sd_scsi_tur(wu)) 2371 goto stuffup; 2372 goto complete; 2373 2374 case START_STOP: 2375 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 2376 DEVNAME(sc)); 2377 if (sd->sd_scsi_start_stop(wu)) 2378 goto stuffup; 2379 goto complete; 2380 2381 case INQUIRY: 2382 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 2383 DEVNAME(sc)); 2384 if (sd->sd_scsi_inquiry(wu)) 2385 goto stuffup; 2386 goto complete; 2387 2388 case READ_CAPACITY: 2389 case READ_CAPACITY_16: 2390 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 2391 DEVNAME(sc), xs->cmd.opcode); 2392 if (sd->sd_scsi_read_cap(wu)) 2393 goto stuffup; 2394 goto complete; 2395 2396 case REQUEST_SENSE: 2397 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 2398 DEVNAME(sc)); 2399 if (sd->sd_scsi_req_sense(wu)) 2400 goto stuffup; 2401 goto complete; 2402 2403 default: 2404 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 2405 DEVNAME(sc), xs->cmd.opcode); 2406 /* XXX might need to add generic function to handle others */ 2407 goto stuffup; 2408 } 2409 2410 return; 2411 stuffup: 2412 if (sd->sd_scsi_sense.error_code) { 2413 xs->error = XS_SENSE; 2414 memcpy(&xs->sense, &sd->sd_scsi_sense, sizeof(xs->sense)); 2415 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2416 } else { 2417 xs->error = XS_DRIVER_STUFFUP; 2418 } 2419 complete: 2420 sr_scsi_done(sd, xs); 2421 } 2422 2423 int 2424 sr_scsi_probe(struct scsi_link *link) 2425 { 2426 struct sr_softc *sc = link->bus->sb_adapter_softc; 2427 struct sr_discipline *sd; 2428 2429 KASSERT(link->target < SR_MAX_LD && link->lun == 0); 2430 2431 sd = sc->sc_targets[link->target]; 2432 if (sd == NULL) 2433 return (ENODEV); 2434 2435 link->pool = &sd->sd_iopool; 2436 if (sd->sd_openings) 2437 link->openings = sd->sd_openings(sd); 2438 else 2439 link->openings = sd->sd_max_wu; 2440 2441 return (0); 2442 } 2443 2444 int 2445 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag) 2446 { 2447 struct sr_softc *sc = link->bus->sb_adapter_softc; 2448 struct sr_discipline *sd; 2449 2450 sd = sc->sc_targets[link->target]; 2451 if (sd == NULL) 2452 return (ENODEV); 2453 2454 DNPRINTF(SR_D_IOCTL, "%s: %s sr_scsi_ioctl cmd: %#lx\n", 2455 DEVNAME(sc), sd->sd_meta->ssd_devname, cmd); 2456 2457 /* Pass bio ioctls through to the bio handler. */ 2458 if (IOCGROUP(cmd) == 'B') 2459 return (sr_bio_handler(sc, sd, cmd, (struct bio *)addr)); 2460 2461 switch (cmd) { 2462 case DIOCGCACHE: 2463 case DIOCSCACHE: 2464 return (EOPNOTSUPP); 2465 default: 2466 return (ENOTTY); 2467 } 2468 } 2469 2470 int 2471 sr_bio_ioctl(struct device *dev, u_long cmd, caddr_t addr) 2472 { 2473 struct sr_softc *sc = (struct sr_softc *) dev; 2474 DNPRINTF(SR_D_IOCTL, "%s: sr_bio_ioctl\n", DEVNAME(sc)); 2475 2476 return sr_bio_handler(sc, NULL, cmd, (struct bio *)addr); 2477 } 2478 2479 int 2480 sr_bio_handler(struct sr_softc *sc, struct sr_discipline *sd, u_long cmd, 2481 struct bio *bio) 2482 { 2483 int rv = 0; 2484 2485 DNPRINTF(SR_D_IOCTL, "%s: sr_bio_handler ", DEVNAME(sc)); 2486 2487 rw_enter_write(&sc->sc_lock); 2488 2489 bio_status_init(&sc->sc_status, &sc->sc_dev); 2490 2491 switch (cmd) { 2492 case BIOCINQ: 2493 DNPRINTF(SR_D_IOCTL, "inq\n"); 2494 rv = sr_ioctl_inq(sc, (struct bioc_inq *)bio); 2495 break; 2496 2497 case BIOCVOL: 2498 DNPRINTF(SR_D_IOCTL, "vol\n"); 2499 rv = sr_ioctl_vol(sc, (struct bioc_vol *)bio); 2500 break; 2501 2502 case BIOCDISK: 2503 DNPRINTF(SR_D_IOCTL, "disk\n"); 2504 rv = sr_ioctl_disk(sc, (struct bioc_disk *)bio); 2505 break; 2506 2507 case BIOCALARM: 2508 DNPRINTF(SR_D_IOCTL, "alarm\n"); 2509 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)bio); */ 2510 break; 2511 2512 case BIOCBLINK: 2513 DNPRINTF(SR_D_IOCTL, "blink\n"); 2514 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)bio); */ 2515 break; 2516 2517 case BIOCSETSTATE: 2518 DNPRINTF(SR_D_IOCTL, "setstate\n"); 2519 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)bio); 2520 break; 2521 2522 case BIOCCREATERAID: 2523 DNPRINTF(SR_D_IOCTL, "createraid\n"); 2524 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)bio, 2525 1, NULL); 2526 break; 2527 2528 case BIOCDELETERAID: 2529 DNPRINTF(SR_D_IOCTL, "deleteraid\n"); 2530 rv = sr_ioctl_deleteraid(sc, sd, (struct bioc_deleteraid *)bio); 2531 break; 2532 2533 case BIOCDISCIPLINE: 2534 DNPRINTF(SR_D_IOCTL, "discipline\n"); 2535 rv = sr_ioctl_discipline(sc, sd, (struct bioc_discipline *)bio); 2536 break; 2537 2538 case BIOCINSTALLBOOT: 2539 DNPRINTF(SR_D_IOCTL, "installboot\n"); 2540 rv = sr_ioctl_installboot(sc, sd, 2541 (struct bioc_installboot *)bio); 2542 break; 2543 2544 default: 2545 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 2546 rv = ENOTTY; 2547 } 2548 2549 sc->sc_status.bs_status = (rv ? BIO_STATUS_ERROR : BIO_STATUS_SUCCESS); 2550 2551 if (sc->sc_status.bs_msg_count > 0) 2552 rv = 0; 2553 2554 memcpy(&bio->bio_status, &sc->sc_status, sizeof(struct bio_status)); 2555 2556 rw_exit_write(&sc->sc_lock); 2557 2558 return (rv); 2559 } 2560 2561 int 2562 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 2563 { 2564 struct sr_discipline *sd; 2565 int vol = 0, disk = 0; 2566 2567 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2568 vol++; 2569 disk += sd->sd_meta->ssdi.ssd_chunk_no; 2570 } 2571 2572 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2573 bi->bi_novol = vol + sc->sc_hotspare_no; 2574 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2575 2576 return (0); 2577 } 2578 2579 int 2580 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2581 { 2582 int vol = -1, rv = EINVAL; 2583 struct sr_discipline *sd; 2584 struct sr_chunk *hotspare; 2585 2586 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2587 vol++; 2588 if (vol != bv->bv_volid) 2589 continue; 2590 2591 bv->bv_status = sd->sd_vol_status; 2592 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2593 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2594 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2595 2596 #ifdef CRYPTO 2597 if (sd->sd_meta->ssdi.ssd_level == 'C' && 2598 sd->mds.mdd_crypto.key_disk != NULL) 2599 bv->bv_nodisk++; 2600 else if (sd->sd_meta->ssdi.ssd_level == 0x1C && 2601 sd->mds.mdd_raid1c.sr1c_crypto.key_disk != NULL) 2602 bv->bv_nodisk++; 2603 #endif 2604 if (bv->bv_status == BIOC_SVREBUILD) 2605 bv->bv_percent = sr_rebuild_percent(sd); 2606 2607 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2608 sizeof(bv->bv_dev)); 2609 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2610 sizeof(bv->bv_vendor)); 2611 rv = 0; 2612 goto done; 2613 } 2614 2615 /* Check hotspares list. */ 2616 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2617 vol++; 2618 if (vol != bv->bv_volid) 2619 continue; 2620 2621 bv->bv_status = BIOC_SVONLINE; 2622 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2623 bv->bv_level = -1; /* Hotspare. */ 2624 bv->bv_nodisk = 1; 2625 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2626 sizeof(bv->bv_dev)); 2627 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2628 sizeof(bv->bv_vendor)); 2629 rv = 0; 2630 goto done; 2631 } 2632 2633 done: 2634 return (rv); 2635 } 2636 2637 int 2638 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2639 { 2640 struct sr_discipline *sd; 2641 struct sr_chunk *src, *hotspare; 2642 int vol = -1, rv = EINVAL; 2643 2644 if (bd->bd_diskid < 0) 2645 goto done; 2646 2647 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2648 vol++; 2649 if (vol != bd->bd_volid) 2650 continue; 2651 2652 if (bd->bd_diskid < sd->sd_meta->ssdi.ssd_chunk_no) 2653 src = sd->sd_vol.sv_chunks[bd->bd_diskid]; 2654 #ifdef CRYPTO 2655 else if (bd->bd_diskid == sd->sd_meta->ssdi.ssd_chunk_no && 2656 sd->sd_meta->ssdi.ssd_level == 'C' && 2657 sd->mds.mdd_crypto.key_disk != NULL) 2658 src = sd->mds.mdd_crypto.key_disk; 2659 else if (bd->bd_diskid == sd->sd_meta->ssdi.ssd_chunk_no && 2660 sd->sd_meta->ssdi.ssd_level == 0x1C && 2661 sd->mds.mdd_raid1c.sr1c_crypto.key_disk != NULL) 2662 src = sd->mds.mdd_crypto.key_disk; 2663 #endif 2664 else 2665 break; 2666 2667 bd->bd_status = src->src_meta.scm_status; 2668 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2669 bd->bd_channel = vol; 2670 bd->bd_target = bd->bd_diskid; 2671 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2672 sizeof(bd->bd_vendor)); 2673 rv = 0; 2674 goto done; 2675 } 2676 2677 /* Check hotspares list. */ 2678 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2679 vol++; 2680 if (vol != bd->bd_volid) 2681 continue; 2682 2683 if (bd->bd_diskid != 0) 2684 break; 2685 2686 bd->bd_status = hotspare->src_meta.scm_status; 2687 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2688 bd->bd_channel = vol; 2689 bd->bd_target = bd->bd_diskid; 2690 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2691 sizeof(bd->bd_vendor)); 2692 rv = 0; 2693 goto done; 2694 } 2695 2696 done: 2697 return (rv); 2698 } 2699 2700 int 2701 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2702 { 2703 int rv = EINVAL; 2704 int vol = -1, found, c; 2705 struct sr_discipline *sd; 2706 struct sr_chunk *ch_entry; 2707 struct sr_chunk_head *cl; 2708 2709 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2710 goto done; 2711 2712 if (bs->bs_status == BIOC_SSHOTSPARE) { 2713 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2714 goto done; 2715 } 2716 2717 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2718 vol++; 2719 if (vol == bs->bs_volid) 2720 break; 2721 } 2722 if (sd == NULL) 2723 goto done; 2724 2725 switch (bs->bs_status) { 2726 case BIOC_SSOFFLINE: 2727 /* Take chunk offline */ 2728 found = c = 0; 2729 cl = &sd->sd_vol.sv_chunk_list; 2730 SLIST_FOREACH(ch_entry, cl, src_link) { 2731 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2732 found = 1; 2733 break; 2734 } 2735 c++; 2736 } 2737 if (found == 0) { 2738 sr_error(sc, "chunk not part of array"); 2739 goto done; 2740 } 2741 2742 /* XXX: check current state first */ 2743 sd->sd_set_chunk_state(sd, c, BIOC_SDOFFLINE); 2744 2745 if (sr_meta_save(sd, SR_META_DIRTY)) { 2746 sr_error(sc, "could not save metadata for %s", 2747 sd->sd_meta->ssd_devname); 2748 goto done; 2749 } 2750 rv = 0; 2751 break; 2752 2753 case BIOC_SDSCRUB: 2754 break; 2755 2756 case BIOC_SSREBUILD: 2757 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0); 2758 break; 2759 2760 default: 2761 sr_error(sc, "unsupported state request %d", bs->bs_status); 2762 } 2763 2764 done: 2765 return (rv); 2766 } 2767 2768 int 2769 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2770 { 2771 struct sr_discipline *sd; 2772 struct sr_chunk *chunk; 2773 int i; 2774 2775 DNPRINTF(SR_D_MISC, "%s: sr_chunk_in_use(%d)\n", DEVNAME(sc), dev); 2776 2777 if (dev == NODEV) 2778 return BIOC_SDINVALID; 2779 2780 /* See if chunk is already in use. */ 2781 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2782 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2783 chunk = sd->sd_vol.sv_chunks[i]; 2784 if (chunk->src_dev_mm == dev) 2785 return chunk->src_meta.scm_status; 2786 } 2787 } 2788 2789 /* Check hotspares list. */ 2790 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2791 if (chunk->src_dev_mm == dev) 2792 return chunk->src_meta.scm_status; 2793 2794 return BIOC_SDINVALID; 2795 } 2796 2797 int 2798 sr_hotspare(struct sr_softc *sc, dev_t dev) 2799 { 2800 struct sr_discipline *sd = NULL; 2801 struct sr_metadata *sm = NULL; 2802 struct sr_meta_chunk *hm; 2803 struct sr_chunk_head *cl; 2804 struct sr_chunk *chunk, *last, *hotspare = NULL; 2805 struct sr_uuid uuid; 2806 struct disklabel label; 2807 struct vnode *vn; 2808 u_int64_t size; 2809 char devname[32]; 2810 int rv = EINVAL; 2811 int c, part, open = 0; 2812 2813 /* 2814 * Add device to global hotspares list. 2815 */ 2816 2817 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2818 2819 /* Make sure chunk is not already in use. */ 2820 c = sr_chunk_in_use(sc, dev); 2821 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2822 if (c == BIOC_SDHOTSPARE) 2823 sr_error(sc, "%s is already a hotspare", devname); 2824 else 2825 sr_error(sc, "%s is already in use", devname); 2826 goto done; 2827 } 2828 2829 /* XXX - See if there is an existing degraded volume... */ 2830 2831 /* Open device. */ 2832 if (bdevvp(dev, &vn)) { 2833 sr_error(sc, "sr_hotspare: cannot allocate vnode"); 2834 goto done; 2835 } 2836 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { 2837 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2838 DEVNAME(sc), devname); 2839 vput(vn); 2840 goto fail; 2841 } 2842 open = 1; /* close dev on error */ 2843 2844 /* Get partition details. */ 2845 part = DISKPART(dev); 2846 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, 2847 NOCRED, curproc)) { 2848 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2849 DEVNAME(sc)); 2850 goto fail; 2851 } 2852 if (label.d_partitions[part].p_fstype != FS_RAID) { 2853 sr_error(sc, "%s partition not of type RAID (%d)", 2854 devname, label.d_partitions[part].p_fstype); 2855 goto fail; 2856 } 2857 2858 /* Calculate partition size. */ 2859 size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part])); 2860 if (size <= SR_DATA_OFFSET) { 2861 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 2862 devname); 2863 goto fail; 2864 } 2865 size -= SR_DATA_OFFSET; 2866 if (size > INT64_MAX) { 2867 DNPRINTF(SR_D_META, "%s: %s partition too large\n", DEVNAME(sc), 2868 devname); 2869 goto fail; 2870 } 2871 2872 /* 2873 * Create and populate chunk metadata. 2874 */ 2875 2876 sr_uuid_generate(&uuid); 2877 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2878 2879 hotspare->src_dev_mm = dev; 2880 hotspare->src_vn = vn; 2881 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2882 hotspare->src_size = size; 2883 2884 hm = &hotspare->src_meta; 2885 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2886 hm->scmi.scm_chunk_id = 0; 2887 hm->scmi.scm_size = size; 2888 hm->scmi.scm_coerced_size = size; 2889 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2890 memcpy(&hm->scmi.scm_uuid, &uuid, sizeof(struct sr_uuid)); 2891 2892 sr_checksum(sc, hm, &hm->scm_checksum, 2893 sizeof(struct sr_meta_chunk_invariant)); 2894 2895 hm->scm_status = BIOC_SDHOTSPARE; 2896 2897 /* 2898 * Create and populate our own discipline and metadata. 2899 */ 2900 2901 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2902 sm->ssdi.ssd_magic = SR_MAGIC; 2903 sm->ssdi.ssd_version = SR_META_VERSION; 2904 sm->ssd_ondisk = 0; 2905 sm->ssdi.ssd_vol_flags = 0; 2906 memcpy(&sm->ssdi.ssd_uuid, &uuid, sizeof(struct sr_uuid)); 2907 sm->ssdi.ssd_chunk_no = 1; 2908 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2909 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2910 sm->ssdi.ssd_size = size; 2911 sm->ssdi.ssd_secsize = label.d_secsize; 2912 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2913 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2914 "SR %s", "HOTSPARE"); 2915 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2916 "%03d", SR_META_VERSION); 2917 2918 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2919 sd->sd_sc = sc; 2920 sd->sd_meta = sm; 2921 sd->sd_meta_type = SR_META_F_NATIVE; 2922 sd->sd_vol_status = BIOC_SVONLINE; 2923 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2924 SLIST_INIT(&sd->sd_meta_opt); 2925 2926 /* Add chunk to volume. */ 2927 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2928 M_WAITOK | M_ZERO); 2929 sd->sd_vol.sv_chunks[0] = hotspare; 2930 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2931 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2932 2933 /* Save metadata. */ 2934 if (sr_meta_save(sd, SR_META_DIRTY)) { 2935 sr_error(sc, "could not save metadata to %s", devname); 2936 goto fail; 2937 } 2938 2939 /* 2940 * Add chunk to hotspare list. 2941 */ 2942 rw_enter_write(&sc->sc_hs_lock); 2943 cl = &sc->sc_hotspare_list; 2944 if (SLIST_EMPTY(cl)) 2945 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2946 else { 2947 SLIST_FOREACH(chunk, cl, src_link) 2948 last = chunk; 2949 SLIST_INSERT_AFTER(last, hotspare, src_link); 2950 } 2951 sc->sc_hotspare_no++; 2952 rw_exit_write(&sc->sc_hs_lock); 2953 2954 rv = 0; 2955 goto done; 2956 2957 fail: 2958 free(hotspare, M_DEVBUF, sizeof(*hotspare)); 2959 2960 done: 2961 if (sd) 2962 free(sd->sd_vol.sv_chunks, M_DEVBUF, 2963 sizeof(sd->sd_vol.sv_chunks)); 2964 free(sd, M_DEVBUF, sizeof(*sd)); 2965 free(sm, M_DEVBUF, sizeof(*sm)); 2966 if (open) { 2967 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 2968 vput(vn); 2969 } 2970 2971 return (rv); 2972 } 2973 2974 void 2975 sr_hotspare_rebuild_callback(void *xsd) 2976 { 2977 struct sr_discipline *sd = xsd; 2978 sr_hotspare_rebuild(sd); 2979 } 2980 2981 void 2982 sr_hotspare_rebuild(struct sr_discipline *sd) 2983 { 2984 struct sr_softc *sc = sd->sd_sc; 2985 struct sr_chunk_head *cl; 2986 struct sr_chunk *hotspare, *chunk = NULL; 2987 struct sr_workunit *wu; 2988 struct sr_ccb *ccb; 2989 int i, s, cid, busy; 2990 2991 /* 2992 * Attempt to locate a hotspare and initiate rebuild. 2993 */ 2994 2995 /* Find first offline chunk. */ 2996 for (cid = 0; cid < sd->sd_meta->ssdi.ssd_chunk_no; cid++) { 2997 if (sd->sd_vol.sv_chunks[cid]->src_meta.scm_status == 2998 BIOC_SDOFFLINE) { 2999 chunk = sd->sd_vol.sv_chunks[cid]; 3000 break; 3001 } 3002 } 3003 if (chunk == NULL) { 3004 printf("%s: no offline chunk found on %s!\n", 3005 DEVNAME(sc), sd->sd_meta->ssd_devname); 3006 return; 3007 } 3008 3009 /* See if we have a suitable hotspare... */ 3010 rw_enter_write(&sc->sc_hs_lock); 3011 cl = &sc->sc_hotspare_list; 3012 SLIST_FOREACH(hotspare, cl, src_link) 3013 if (hotspare->src_size >= chunk->src_size && 3014 hotspare->src_secsize <= sd->sd_meta->ssdi.ssd_secsize) 3015 break; 3016 3017 if (hotspare != NULL) { 3018 3019 printf("%s: %s volume degraded, will attempt to " 3020 "rebuild on hotspare %s\n", DEVNAME(sc), 3021 sd->sd_meta->ssd_devname, hotspare->src_devname); 3022 3023 /* 3024 * Ensure that all pending I/O completes on the failed chunk 3025 * before trying to initiate a rebuild. 3026 */ 3027 i = 0; 3028 do { 3029 busy = 0; 3030 3031 s = splbio(); 3032 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 3033 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3034 if (ccb->ccb_target == cid) 3035 busy = 1; 3036 } 3037 } 3038 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 3039 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3040 if (ccb->ccb_target == cid) 3041 busy = 1; 3042 } 3043 } 3044 splx(s); 3045 3046 if (busy) { 3047 tsleep_nsec(sd, PRIBIO, "sr_hotspare", 3048 SEC_TO_NSEC(1)); 3049 i++; 3050 } 3051 3052 } while (busy && i < 120); 3053 3054 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 3055 "complete on failed chunk %s\n", DEVNAME(sc), 3056 i, chunk->src_devname); 3057 3058 if (busy) { 3059 printf("%s: pending I/O failed to complete on " 3060 "failed chunk %s, hotspare rebuild aborted...\n", 3061 DEVNAME(sc), chunk->src_devname); 3062 goto done; 3063 } 3064 3065 s = splbio(); 3066 rw_enter_write(&sc->sc_lock); 3067 bio_status_init(&sc->sc_status, &sc->sc_dev); 3068 if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) { 3069 3070 /* Remove hotspare from available list. */ 3071 sc->sc_hotspare_no--; 3072 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 3073 free(hotspare, M_DEVBUF, sizeof(*hotspare)); 3074 3075 } 3076 rw_exit_write(&sc->sc_lock); 3077 splx(s); 3078 } 3079 done: 3080 rw_exit_write(&sc->sc_hs_lock); 3081 } 3082 3083 int 3084 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare) 3085 { 3086 struct sr_softc *sc = sd->sd_sc; 3087 struct sr_chunk *chunk = NULL; 3088 struct sr_meta_chunk *meta; 3089 struct disklabel label; 3090 struct vnode *vn; 3091 u_int64_t size; 3092 int64_t csize; 3093 char devname[32]; 3094 int rv = EINVAL, open = 0; 3095 int cid, i, part, status; 3096 3097 /* 3098 * Attempt to initiate a rebuild onto the specified device. 3099 */ 3100 3101 if (!(sd->sd_capabilities & SR_CAP_REBUILD)) { 3102 sr_error(sc, "discipline does not support rebuild"); 3103 goto done; 3104 } 3105 3106 /* make sure volume is in the right state */ 3107 if (sd->sd_vol_status == BIOC_SVREBUILD) { 3108 sr_error(sc, "rebuild already in progress"); 3109 goto done; 3110 } 3111 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 3112 sr_error(sc, "volume not degraded"); 3113 goto done; 3114 } 3115 3116 /* Find first offline chunk. */ 3117 for (cid = 0; cid < sd->sd_meta->ssdi.ssd_chunk_no; cid++) { 3118 if (sd->sd_vol.sv_chunks[cid]->src_meta.scm_status == 3119 BIOC_SDOFFLINE) { 3120 chunk = sd->sd_vol.sv_chunks[cid]; 3121 break; 3122 } 3123 } 3124 if (chunk == NULL) { 3125 sr_error(sc, "no offline chunks available to rebuild"); 3126 goto done; 3127 } 3128 3129 /* Get coerced size from another online chunk. */ 3130 csize = 0; 3131 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 3132 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 3133 BIOC_SDONLINE) { 3134 meta = &sd->sd_vol.sv_chunks[i]->src_meta; 3135 csize = meta->scmi.scm_coerced_size; 3136 break; 3137 } 3138 } 3139 if (csize == 0) { 3140 sr_error(sc, "no online chunks available for rebuild"); 3141 goto done; 3142 } 3143 3144 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 3145 if (bdevvp(dev, &vn)) { 3146 printf("%s: sr_rebuild_init: can't allocate vnode\n", 3147 DEVNAME(sc)); 3148 goto done; 3149 } 3150 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { 3151 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 3152 "open %s\n", DEVNAME(sc), devname); 3153 vput(vn); 3154 goto done; 3155 } 3156 open = 1; /* close dev on error */ 3157 3158 /* Get disklabel and check partition. */ 3159 part = DISKPART(dev); 3160 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, 3161 NOCRED, curproc)) { 3162 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 3163 DEVNAME(sc)); 3164 goto done; 3165 } 3166 if (label.d_partitions[part].p_fstype != FS_RAID) { 3167 sr_error(sc, "%s partition not of type RAID (%d)", 3168 devname, label.d_partitions[part].p_fstype); 3169 goto done; 3170 } 3171 3172 /* Is the partition large enough? */ 3173 size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part])); 3174 if (size <= sd->sd_meta->ssd_data_blkno) { 3175 sr_error(sc, "%s: %s partition too small", DEVNAME(sc), 3176 devname); 3177 goto done; 3178 } 3179 size -= sd->sd_meta->ssd_data_blkno; 3180 if (size > INT64_MAX) { 3181 sr_error(sc, "%s: %s partition too large", DEVNAME(sc), 3182 devname); 3183 goto done; 3184 } 3185 if (size < csize) { 3186 sr_error(sc, "%s partition too small, at least %lld bytes " 3187 "required", devname, (long long)(csize << DEV_BSHIFT)); 3188 goto done; 3189 } else if (size > csize) 3190 sr_warn(sc, "%s partition too large, wasting %lld bytes", 3191 devname, (long long)((size - csize) << DEV_BSHIFT)); 3192 if (label.d_secsize > sd->sd_meta->ssdi.ssd_secsize) { 3193 sr_error(sc, "%s sector size too large, <= %u bytes " 3194 "required", devname, sd->sd_meta->ssdi.ssd_secsize); 3195 goto done; 3196 } 3197 3198 /* Ensure that this chunk is not already in use. */ 3199 status = sr_chunk_in_use(sc, dev); 3200 if (status != BIOC_SDINVALID && status != BIOC_SDOFFLINE && 3201 !(hotspare && status == BIOC_SDHOTSPARE)) { 3202 sr_error(sc, "%s is already in use", devname); 3203 goto done; 3204 } 3205 3206 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 3207 sd->sd_meta->ssd_rebuild = 0; 3208 3209 open = 0; /* leave dev open from here on out */ 3210 3211 /* Fix up chunk. */ 3212 memcpy(chunk->src_duid, label.d_uid, sizeof(chunk->src_duid)); 3213 chunk->src_dev_mm = dev; 3214 chunk->src_vn = vn; 3215 3216 /* Reconstruct metadata. */ 3217 meta = &chunk->src_meta; 3218 meta->scmi.scm_volid = sd->sd_meta->ssdi.ssd_volid; 3219 meta->scmi.scm_chunk_id = cid; 3220 strlcpy(meta->scmi.scm_devname, devname, 3221 sizeof(meta->scmi.scm_devname)); 3222 meta->scmi.scm_size = size; 3223 meta->scmi.scm_coerced_size = csize; 3224 memcpy(&meta->scmi.scm_uuid, &sd->sd_meta->ssdi.ssd_uuid, 3225 sizeof(meta->scmi.scm_uuid)); 3226 sr_checksum(sc, meta, &meta->scm_checksum, 3227 sizeof(struct sr_meta_chunk_invariant)); 3228 3229 sd->sd_set_chunk_state(sd, cid, BIOC_SDREBUILD); 3230 3231 if (sr_meta_save(sd, SR_META_DIRTY)) { 3232 sr_error(sc, "could not save metadata to %s", devname); 3233 open = 1; 3234 goto done; 3235 } 3236 3237 sr_warn(sc, "rebuild of %s started on %s", 3238 sd->sd_meta->ssd_devname, devname); 3239 3240 sd->sd_reb_abort = 0; 3241 kthread_create_deferred(sr_rebuild_start, sd); 3242 3243 rv = 0; 3244 done: 3245 if (open) { 3246 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 3247 vput(vn); 3248 } 3249 3250 return (rv); 3251 } 3252 3253 int 3254 sr_rebuild_percent(struct sr_discipline *sd) 3255 { 3256 daddr_t rb, sz; 3257 3258 sz = sd->sd_meta->ssdi.ssd_size; 3259 rb = sd->sd_meta->ssd_rebuild; 3260 3261 if (rb > 0) 3262 return (100 - ((sz * 100 - rb * 100) / sz) - 1); 3263 3264 return (0); 3265 } 3266 3267 void 3268 sr_roam_chunks(struct sr_discipline *sd) 3269 { 3270 struct sr_softc *sc = sd->sd_sc; 3271 struct sr_chunk *chunk; 3272 struct sr_meta_chunk *meta; 3273 int roamed = 0; 3274 3275 /* Have any chunks roamed? */ 3276 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 3277 meta = &chunk->src_meta; 3278 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 3279 sizeof(meta->scmi.scm_devname))) { 3280 3281 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 3282 meta->scmi.scm_devname, chunk->src_devname); 3283 3284 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 3285 sizeof(meta->scmi.scm_devname)); 3286 3287 roamed++; 3288 } 3289 } 3290 3291 if (roamed) 3292 sr_meta_save(sd, SR_META_DIRTY); 3293 } 3294 3295 int 3296 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, 3297 int user, void *data) 3298 { 3299 struct sr_meta_opt_item *omi; 3300 struct sr_chunk_head *cl; 3301 struct sr_discipline *sd = NULL; 3302 struct sr_chunk *ch_entry; 3303 struct scsi_link *link; 3304 struct device *dev; 3305 char *uuid, devname[32]; 3306 dev_t *dt = NULL; 3307 int i, no_chunk, rv = EINVAL, target, vol; 3308 int no_meta; 3309 3310 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 3311 DEVNAME(sc), user); 3312 3313 /* user input */ 3314 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 3315 goto unwind; 3316 3317 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 3318 if (user) { 3319 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 3320 goto unwind; 3321 } else 3322 memcpy(dt, bc->bc_dev_list, bc->bc_dev_list_len); 3323 3324 /* Initialise discipline. */ 3325 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 3326 sd->sd_sc = sc; 3327 SLIST_INIT(&sd->sd_meta_opt); 3328 sd->sd_taskq = taskq_create("srdis", 1, IPL_BIO, 0); 3329 if (sd->sd_taskq == NULL) { 3330 sr_error(sc, "could not create discipline taskq"); 3331 goto unwind; 3332 } 3333 if (sr_discipline_init(sd, bc->bc_level)) { 3334 sr_error(sc, "could not initialize discipline"); 3335 goto unwind; 3336 } 3337 3338 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 3339 cl = &sd->sd_vol.sv_chunk_list; 3340 SLIST_INIT(cl); 3341 3342 /* Ensure that chunks are not already in use. */ 3343 for (i = 0; i < no_chunk; i++) { 3344 if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) { 3345 sr_meta_getdevname(sc, dt[i], devname, sizeof(devname)); 3346 sr_error(sc, "chunk %s already in use", devname); 3347 goto unwind; 3348 } 3349 } 3350 3351 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 3352 if (sd->sd_meta_type == SR_META_F_INVALID) { 3353 sr_error(sc, "invalid metadata format"); 3354 goto unwind; 3355 } 3356 3357 if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE)) 3358 goto unwind; 3359 3360 /* force the raid volume by clearing metadata region */ 3361 if (bc->bc_flags & BIOC_SCFORCE) { 3362 /* make sure disk isn't up and running */ 3363 if (sr_meta_read(sd)) 3364 if (sr_already_assembled(sd)) { 3365 uuid = sr_uuid_format( 3366 &sd->sd_meta->ssdi.ssd_uuid); 3367 sr_error(sc, "disk %s is currently in use; " 3368 "cannot force create", uuid); 3369 free(uuid, M_DEVBUF, 37); 3370 goto unwind; 3371 } 3372 3373 if (sr_meta_clear(sd)) { 3374 sr_error(sc, "failed to clear metadata"); 3375 goto unwind; 3376 } 3377 } 3378 3379 no_meta = sr_meta_read(sd); 3380 if (no_meta == -1) { 3381 3382 /* Corrupt metadata on one or more chunks. */ 3383 sr_error(sc, "one of the chunks has corrupt metadata; " 3384 "aborting assembly"); 3385 goto unwind; 3386 3387 } else if (no_meta == 0) { 3388 3389 /* Initialise volume and chunk metadata. */ 3390 sr_meta_init(sd, bc->bc_level, no_chunk); 3391 sd->sd_vol_status = BIOC_SVONLINE; 3392 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 3393 if (sd->sd_create) { 3394 if ((i = sd->sd_create(sd, bc, no_chunk, 3395 sd->sd_vol.sv_chunk_minsz))) { 3396 rv = i; 3397 goto unwind; 3398 } 3399 } 3400 sr_meta_init_complete(sd); 3401 3402 DNPRINTF(SR_D_IOCTL, 3403 "%s: sr_ioctl_createraid: vol_size: %lld\n", 3404 DEVNAME(sc), sd->sd_meta->ssdi.ssd_size); 3405 3406 /* Warn if we've wasted chunk space due to coercing. */ 3407 if ((sd->sd_capabilities & SR_CAP_NON_COERCED) == 0 && 3408 sd->sd_vol.sv_chunk_minsz != sd->sd_vol.sv_chunk_maxsz) 3409 sr_warn(sc, "chunk sizes are not equal; up to %llu " 3410 "blocks wasted per chunk", 3411 sd->sd_vol.sv_chunk_maxsz - 3412 sd->sd_vol.sv_chunk_minsz); 3413 3414 } else { 3415 3416 /* Ensure we are assembling the correct # of chunks. */ 3417 if (bc->bc_level == 0x1C && 3418 sd->sd_meta->ssdi.ssd_chunk_no > no_chunk) { 3419 sr_warn(sc, "trying to bring up %s degraded", 3420 sd->sd_meta->ssd_devname); 3421 } else if (sd->sd_meta->ssdi.ssd_chunk_no != no_chunk) { 3422 sr_error(sc, "volume chunk count does not match metadata " 3423 "chunk count"); 3424 goto unwind; 3425 } 3426 3427 /* Ensure metadata level matches requested assembly level. */ 3428 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 3429 sr_error(sc, "volume level does not match metadata " 3430 "level"); 3431 goto unwind; 3432 } 3433 3434 if (sr_already_assembled(sd)) { 3435 uuid = sr_uuid_format(&sd->sd_meta->ssdi.ssd_uuid); 3436 sr_error(sc, "disk %s already assembled", uuid); 3437 free(uuid, M_DEVBUF, 37); 3438 goto unwind; 3439 } 3440 3441 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 3442 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 3443 "metadata\n", DEVNAME(sc)); 3444 goto unwind; 3445 } 3446 3447 if (no_meta != no_chunk) 3448 sr_warn(sc, "trying to bring up %s degraded", 3449 sd->sd_meta->ssd_devname); 3450 3451 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 3452 sr_warn(sc, "%s was not shutdown properly", 3453 sd->sd_meta->ssd_devname); 3454 3455 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) 3456 if (sd->sd_meta_opt_handler == NULL || 3457 sd->sd_meta_opt_handler(sd, omi->omi_som) != 0) 3458 sr_meta_opt_handler(sd, omi->omi_som); 3459 3460 if (sd->sd_assemble) { 3461 if ((i = sd->sd_assemble(sd, bc, no_chunk, data))) { 3462 rv = i; 3463 goto unwind; 3464 } 3465 } 3466 3467 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 3468 DEVNAME(sc)); 3469 3470 } 3471 3472 /* Metadata MUST be fully populated by this point. */ 3473 TAILQ_INSERT_TAIL(&sc->sc_dis_list, sd, sd_link); 3474 3475 /* Allocate all resources. */ 3476 if ((rv = sd->sd_alloc_resources(sd))) 3477 goto unwind; 3478 3479 /* Adjust flags if necessary. */ 3480 if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) && 3481 (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) != 3482 (sd->sd_meta->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE)) { 3483 sd->sd_meta->ssdi.ssd_vol_flags &= ~BIOC_SCNOAUTOASSEMBLE; 3484 sd->sd_meta->ssdi.ssd_vol_flags |= 3485 bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 3486 } 3487 3488 if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) { 3489 /* Initialise volume state. */ 3490 sd->sd_set_vol_state(sd); 3491 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3492 sr_error(sc, "%s is offline, will not be brought " 3493 "online", sd->sd_meta->ssd_devname); 3494 goto unwind; 3495 } 3496 3497 /* Setup SCSI iopool. */ 3498 scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put); 3499 3500 /* 3501 * All checks passed - return ENXIO if volume cannot be created. 3502 */ 3503 rv = ENXIO; 3504 3505 /* 3506 * Find a free target. 3507 * 3508 * XXX: We reserve sd_target == 0 to indicate the 3509 * discipline is not linked into sc->sc_targets, so begin 3510 * the search with target = 1. 3511 */ 3512 for (target = 1; target < SR_MAX_LD; target++) 3513 if (sc->sc_targets[target] == NULL) 3514 break; 3515 if (target == SR_MAX_LD) { 3516 sr_error(sc, "no free target for %s", 3517 sd->sd_meta->ssd_devname); 3518 goto unwind; 3519 } 3520 3521 /* Clear sense data. */ 3522 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3523 3524 /* Attach discipline and get midlayer to probe it. */ 3525 sd->sd_target = target; 3526 sc->sc_targets[target] = sd; 3527 if (scsi_probe_lun(sc->sc_scsibus, target, 0) != 0) { 3528 sr_error(sc, "scsi_probe_lun failed"); 3529 sc->sc_targets[target] = NULL; 3530 sd->sd_target = 0; 3531 goto unwind; 3532 } 3533 3534 link = scsi_get_link(sc->sc_scsibus, target, 0); 3535 if (link == NULL) 3536 goto unwind; 3537 3538 dev = link->device_softc; 3539 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s at target %d\n", 3540 DEVNAME(sc), dev->dv_xname, sd->sd_target); 3541 3542 /* XXX - Count volumes, not targets. */ 3543 for (i = 0, vol = -1; i <= sd->sd_target; i++) 3544 if (sc->sc_targets[i]) 3545 vol++; 3546 3547 rv = 0; 3548 3549 if (sd->sd_meta->ssd_devname[0] != '\0' && 3550 strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 3551 sizeof(dev->dv_xname))) 3552 sr_warn(sc, "volume %s is roaming, it used to be %s, " 3553 "updating metadata", dev->dv_xname, 3554 sd->sd_meta->ssd_devname); 3555 3556 /* Populate remaining volume metadata. */ 3557 sd->sd_meta->ssdi.ssd_volid = vol; 3558 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3559 sizeof(sd->sd_meta->ssd_devname)); 3560 3561 sr_info(sc, "%s volume attached as %s", 3562 sd->sd_name, sd->sd_meta->ssd_devname); 3563 3564 /* Update device name on any roaming chunks. */ 3565 sr_roam_chunks(sd); 3566 3567 #ifndef SMALL_KERNEL 3568 if (sr_sensors_create(sd)) 3569 sr_warn(sc, "unable to create sensor for %s", 3570 dev->dv_xname); 3571 #endif /* SMALL_KERNEL */ 3572 } else { 3573 /* This volume does not attach as a system disk. */ 3574 ch_entry = SLIST_FIRST(cl); /* XXX */ 3575 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3576 sizeof(sd->sd_meta->ssd_devname)); 3577 3578 if (sd->sd_start_discipline(sd)) 3579 goto unwind; 3580 } 3581 3582 /* Save current metadata to disk. */ 3583 rv = sr_meta_save(sd, SR_META_DIRTY); 3584 3585 if (sd->sd_vol_status == BIOC_SVREBUILD) 3586 kthread_create_deferred(sr_rebuild_start, sd); 3587 3588 sd->sd_ready = 1; 3589 3590 free(dt, M_DEVBUF, bc->bc_dev_list_len); 3591 3592 return (rv); 3593 3594 unwind: 3595 free(dt, M_DEVBUF, bc->bc_dev_list_len); 3596 3597 sr_discipline_shutdown(sd, 0, 0); 3598 3599 if (rv == EAGAIN) 3600 rv = 0; 3601 3602 return (rv); 3603 } 3604 3605 int 3606 sr_ioctl_deleteraid(struct sr_softc *sc, struct sr_discipline *sd, 3607 struct bioc_deleteraid *bd) 3608 { 3609 int rv = 1; 3610 3611 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", 3612 DEVNAME(sc), bd->bd_dev); 3613 3614 if (sd == NULL && (sd = sr_find_discipline(sc, bd->bd_dev)) == NULL) { 3615 sr_error(sc, "volume %s not found", bd->bd_dev); 3616 goto bad; 3617 } 3618 3619 /* 3620 * XXX Better check for mounted file systems and refuse to detach any 3621 * volume that is actively in use. 3622 */ 3623 if (bcmp(&sr_bootuuid, &sd->sd_meta->ssdi.ssd_uuid, 3624 sizeof(sr_bootuuid)) == 0) { 3625 sr_error(sc, "refusing to delete boot volume"); 3626 goto bad; 3627 } 3628 3629 sd->sd_deleted = 1; 3630 sd->sd_meta->ssdi.ssd_vol_flags = BIOC_SCNOAUTOASSEMBLE; 3631 sr_discipline_shutdown(sd, 1, 0); 3632 3633 rv = 0; 3634 bad: 3635 return (rv); 3636 } 3637 3638 int 3639 sr_ioctl_discipline(struct sr_softc *sc, struct sr_discipline *sd, 3640 struct bioc_discipline *bd) 3641 { 3642 int rv = 1; 3643 3644 /* Dispatch a discipline specific ioctl. */ 3645 3646 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc), 3647 bd->bd_dev); 3648 3649 if (sd == NULL && (sd = sr_find_discipline(sc, bd->bd_dev)) == NULL) { 3650 sr_error(sc, "volume %s not found", bd->bd_dev); 3651 goto bad; 3652 } 3653 3654 if (sd->sd_ioctl_handler) 3655 rv = sd->sd_ioctl_handler(sd, bd); 3656 3657 bad: 3658 return (rv); 3659 } 3660 3661 int 3662 sr_ioctl_installboot(struct sr_softc *sc, struct sr_discipline *sd, 3663 struct bioc_installboot *bb) 3664 { 3665 void *bootblk = NULL, *bootldr = NULL; 3666 struct sr_chunk *chunk; 3667 struct sr_meta_opt_item *omi; 3668 struct sr_meta_boot *sbm; 3669 struct disk *dk; 3670 u_int32_t bbs = 0, bls = 0, secsize; 3671 u_char duid[8]; 3672 int rv = EINVAL; 3673 int i; 3674 3675 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc), 3676 bb->bb_dev); 3677 3678 if (sd == NULL && (sd = sr_find_discipline(sc, bb->bb_dev)) == NULL) { 3679 sr_error(sc, "volume %s not found", bb->bb_dev); 3680 goto done; 3681 } 3682 3683 TAILQ_FOREACH(dk, &disklist, dk_link) 3684 if (!strncmp(dk->dk_name, bb->bb_dev, sizeof(bb->bb_dev))) 3685 break; 3686 if (dk == NULL || dk->dk_label == NULL || 3687 duid_iszero(dk->dk_label->d_uid)) { 3688 sr_error(sc, "failed to get DUID for softraid volume"); 3689 goto done; 3690 } 3691 memcpy(duid, dk->dk_label->d_uid, sizeof(duid)); 3692 3693 /* Ensure that boot storage area is large enough. */ 3694 if (sd->sd_meta->ssd_data_blkno < (SR_BOOT_OFFSET + SR_BOOT_SIZE)) { 3695 sr_error(sc, "insufficient boot storage"); 3696 goto done; 3697 } 3698 3699 if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * DEV_BSIZE) { 3700 sr_error(sc, "boot block too large (%d > %d)", 3701 bb->bb_bootblk_size, SR_BOOT_BLOCKS_SIZE * DEV_BSIZE); 3702 goto done; 3703 } 3704 3705 if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * DEV_BSIZE) { 3706 sr_error(sc, "boot loader too large (%d > %d)", 3707 bb->bb_bootldr_size, SR_BOOT_LOADER_SIZE * DEV_BSIZE); 3708 goto done; 3709 } 3710 3711 secsize = sd->sd_meta->ssdi.ssd_secsize; 3712 3713 /* Copy in boot block. */ 3714 bbs = howmany(bb->bb_bootblk_size, secsize) * secsize; 3715 bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO); 3716 if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0) 3717 goto done; 3718 3719 /* Copy in boot loader. */ 3720 bls = howmany(bb->bb_bootldr_size, secsize) * secsize; 3721 bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO); 3722 if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0) 3723 goto done; 3724 3725 /* Create or update optional meta for bootable volumes. */ 3726 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) 3727 if (omi->omi_som->som_type == SR_OPT_BOOT) 3728 break; 3729 if (omi == NULL) { 3730 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF, 3731 M_WAITOK | M_ZERO); 3732 omi->omi_som = malloc(sizeof(struct sr_meta_boot), M_DEVBUF, 3733 M_WAITOK | M_ZERO); 3734 omi->omi_som->som_type = SR_OPT_BOOT; 3735 omi->omi_som->som_length = sizeof(struct sr_meta_boot); 3736 SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link); 3737 sd->sd_meta->ssdi.ssd_opt_no++; 3738 } 3739 sbm = (struct sr_meta_boot *)omi->omi_som; 3740 3741 memcpy(sbm->sbm_root_duid, duid, sizeof(sbm->sbm_root_duid)); 3742 bzero(&sbm->sbm_boot_duid, sizeof(sbm->sbm_boot_duid)); 3743 sbm->sbm_bootblk_size = bbs; 3744 sbm->sbm_bootldr_size = bls; 3745 3746 DNPRINTF(SR_D_IOCTL, "sr_ioctl_installboot: root duid is %s\n", 3747 duid_format(sbm->sbm_root_duid)); 3748 3749 /* Save boot block and boot loader to each chunk. */ 3750 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 3751 3752 chunk = sd->sd_vol.sv_chunks[i]; 3753 if (chunk->src_meta.scm_status != BIOC_SDONLINE && 3754 chunk->src_meta.scm_status != BIOC_SDREBUILD) 3755 continue; 3756 3757 if (i < SR_MAX_BOOT_DISKS) 3758 memcpy(&sbm->sbm_boot_duid[i], chunk->src_duid, 3759 sizeof(sbm->sbm_boot_duid[i])); 3760 3761 /* Save boot blocks. */ 3762 DNPRINTF(SR_D_IOCTL, 3763 "sr_ioctl_installboot: saving boot block to %s " 3764 "(%u bytes)\n", chunk->src_devname, bbs); 3765 3766 if (sr_rw(sc, chunk->src_dev_mm, bootblk, bbs, 3767 SR_BOOT_BLOCKS_OFFSET, B_WRITE)) { 3768 sr_error(sc, "failed to write boot block"); 3769 goto done; 3770 } 3771 3772 /* Save boot loader.*/ 3773 DNPRINTF(SR_D_IOCTL, 3774 "sr_ioctl_installboot: saving boot loader to %s " 3775 "(%u bytes)\n", chunk->src_devname, bls); 3776 3777 if (sr_rw(sc, chunk->src_dev_mm, bootldr, bls, 3778 SR_BOOT_LOADER_OFFSET, B_WRITE)) { 3779 sr_error(sc, "failed to write boot loader"); 3780 goto done; 3781 } 3782 } 3783 3784 /* XXX - Install boot block on disk - MD code. */ 3785 3786 /* Mark volume as bootable and save metadata. */ 3787 sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE; 3788 if (sr_meta_save(sd, SR_META_DIRTY)) { 3789 sr_error(sc, "could not save metadata to %s", DEVNAME(sc)); 3790 goto done; 3791 } 3792 3793 rv = 0; 3794 3795 done: 3796 free(bootblk, M_DEVBUF, bbs); 3797 free(bootldr, M_DEVBUF, bls); 3798 3799 return (rv); 3800 } 3801 3802 void 3803 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3804 { 3805 struct sr_chunk *ch_entry, *ch_next; 3806 3807 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3808 3809 if (!cl) 3810 return; 3811 3812 for (ch_entry = SLIST_FIRST(cl); ch_entry != NULL; ch_entry = ch_next) { 3813 ch_next = SLIST_NEXT(ch_entry, src_link); 3814 3815 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3816 DEVNAME(sc), ch_entry->src_devname); 3817 if (ch_entry->src_vn) { 3818 /* 3819 * XXX - explicitly lock the vnode until we can resolve 3820 * the problem introduced by vnode aliasing... specfs 3821 * has no locking, whereas ufs/ffs does! 3822 */ 3823 vn_lock(ch_entry->src_vn, LK_EXCLUSIVE | LK_RETRY); 3824 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 3825 curproc); 3826 vput(ch_entry->src_vn); 3827 } 3828 free(ch_entry, M_DEVBUF, sizeof(*ch_entry)); 3829 } 3830 SLIST_INIT(cl); 3831 } 3832 3833 void 3834 sr_discipline_free(struct sr_discipline *sd) 3835 { 3836 struct sr_softc *sc; 3837 struct sr_discipline *sdtmp1; 3838 struct sr_meta_opt_head *som; 3839 struct sr_meta_opt_item *omi, *omi_next; 3840 3841 if (!sd) 3842 return; 3843 3844 sc = sd->sd_sc; 3845 3846 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3847 DEVNAME(sc), 3848 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3849 if (sd->sd_free_resources) 3850 sd->sd_free_resources(sd); 3851 free(sd->sd_vol.sv_chunks, M_DEVBUF, 0); 3852 free(sd->sd_meta, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 3853 free(sd->sd_meta_foreign, M_DEVBUF, smd[sd->sd_meta_type].smd_size); 3854 3855 som = &sd->sd_meta_opt; 3856 for (omi = SLIST_FIRST(som); omi != NULL; omi = omi_next) { 3857 omi_next = SLIST_NEXT(omi, omi_link); 3858 free(omi->omi_som, M_DEVBUF, 0); 3859 free(omi, M_DEVBUF, sizeof(*omi)); 3860 } 3861 3862 if (sd->sd_target != 0) { 3863 KASSERT(sc->sc_targets[sd->sd_target] == sd); 3864 sc->sc_targets[sd->sd_target] = NULL; 3865 } 3866 3867 TAILQ_FOREACH(sdtmp1, &sc->sc_dis_list, sd_link) { 3868 if (sdtmp1 == sd) 3869 break; 3870 } 3871 if (sdtmp1 != NULL) 3872 TAILQ_REMOVE(&sc->sc_dis_list, sd, sd_link); 3873 3874 explicit_bzero(sd, sizeof *sd); 3875 free(sd, M_DEVBUF, sizeof(*sd)); 3876 } 3877 3878 void 3879 sr_discipline_shutdown(struct sr_discipline *sd, int meta_save, int dying) 3880 { 3881 struct sr_softc *sc; 3882 int ret, s; 3883 3884 if (!sd) 3885 return; 3886 sc = sd->sd_sc; 3887 3888 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3889 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3890 3891 /* If rebuilding, abort rebuild and drain I/O. */ 3892 if (sd->sd_reb_active) { 3893 sd->sd_reb_abort = 1; 3894 while (sd->sd_reb_active) 3895 tsleep_nsec(sd, PWAIT, "sr_shutdown", MSEC_TO_NSEC(1)); 3896 } 3897 3898 if (meta_save) 3899 sr_meta_save(sd, 0); 3900 3901 s = splbio(); 3902 3903 sd->sd_ready = 0; 3904 3905 /* make sure there isn't a sync pending and yield */ 3906 wakeup(sd); 3907 while (sd->sd_sync || sd->sd_must_flush) { 3908 ret = tsleep_nsec(&sd->sd_sync, MAXPRI, "sr_down", 3909 SEC_TO_NSEC(60)); 3910 if (ret == EWOULDBLOCK) 3911 break; 3912 } 3913 if (dying == -1) { 3914 sd->sd_ready = 1; 3915 splx(s); 3916 return; 3917 } 3918 3919 #ifndef SMALL_KERNEL 3920 sr_sensors_delete(sd); 3921 #endif /* SMALL_KERNEL */ 3922 3923 if (sd->sd_target != 0) 3924 scsi_detach_lun(sc->sc_scsibus, sd->sd_target, 0, 3925 dying ? 0 : DETACH_FORCE); 3926 3927 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3928 3929 if (sd->sd_taskq) 3930 taskq_destroy(sd->sd_taskq); 3931 3932 sr_discipline_free(sd); 3933 3934 splx(s); 3935 } 3936 3937 int 3938 sr_discipline_init(struct sr_discipline *sd, int level) 3939 { 3940 int rv = 1; 3941 3942 /* Initialise discipline function pointers with defaults. */ 3943 sd->sd_alloc_resources = sr_alloc_resources; 3944 sd->sd_assemble = NULL; 3945 sd->sd_create = NULL; 3946 sd->sd_free_resources = sr_free_resources; 3947 sd->sd_ioctl_handler = NULL; 3948 sd->sd_openings = NULL; 3949 sd->sd_meta_opt_handler = NULL; 3950 sd->sd_rebuild = sr_rebuild; 3951 sd->sd_scsi_inquiry = sr_raid_inquiry; 3952 sd->sd_scsi_read_cap = sr_raid_read_cap; 3953 sd->sd_scsi_tur = sr_raid_tur; 3954 sd->sd_scsi_req_sense = sr_raid_request_sense; 3955 sd->sd_scsi_start_stop = sr_raid_start_stop; 3956 sd->sd_scsi_sync = sr_raid_sync; 3957 sd->sd_scsi_rw = NULL; 3958 sd->sd_scsi_intr = sr_raid_intr; 3959 sd->sd_scsi_wu_done = NULL; 3960 sd->sd_scsi_done = NULL; 3961 sd->sd_set_chunk_state = sr_set_chunk_state; 3962 sd->sd_set_vol_state = sr_set_vol_state; 3963 sd->sd_start_discipline = NULL; 3964 3965 task_set(&sd->sd_meta_save_task, sr_meta_save_callback, sd); 3966 task_set(&sd->sd_hotspare_rebuild_task, sr_hotspare_rebuild_callback, 3967 sd); 3968 3969 sd->sd_wu_size = sizeof(struct sr_workunit); 3970 switch (level) { 3971 case 0: 3972 sr_raid0_discipline_init(sd); 3973 break; 3974 case 1: 3975 sr_raid1_discipline_init(sd); 3976 break; 3977 case 5: 3978 sr_raid5_discipline_init(sd); 3979 break; 3980 case 6: 3981 sr_raid6_discipline_init(sd); 3982 break; 3983 #ifdef CRYPTO 3984 case 'C': 3985 sr_crypto_discipline_init(sd); 3986 break; 3987 case 0x1C: 3988 sr_raid1c_discipline_init(sd); 3989 break; 3990 #endif 3991 case 'c': 3992 sr_concat_discipline_init(sd); 3993 break; 3994 default: 3995 goto bad; 3996 } 3997 3998 rv = 0; 3999 bad: 4000 return (rv); 4001 } 4002 4003 int 4004 sr_raid_inquiry(struct sr_workunit *wu) 4005 { 4006 struct sr_discipline *sd = wu->swu_dis; 4007 struct scsi_xfer *xs = wu->swu_xs; 4008 struct scsi_inquiry *cdb = (struct scsi_inquiry *)&xs->cmd; 4009 struct scsi_inquiry_data inq; 4010 4011 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 4012 4013 if (xs->cmdlen != sizeof(*cdb)) 4014 return (EINVAL); 4015 4016 if (ISSET(cdb->flags, SI_EVPD)) 4017 return (EOPNOTSUPP); 4018 4019 bzero(&inq, sizeof(inq)); 4020 inq.device = T_DIRECT; 4021 inq.dev_qual2 = 0; 4022 inq.version = SCSI_REV_2; 4023 inq.response_format = SID_SCSI2_RESPONSE; 4024 inq.additional_length = SID_SCSI2_ALEN; 4025 inq.flags |= SID_CmdQue; 4026 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 4027 sizeof(inq.vendor)); 4028 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 4029 sizeof(inq.product)); 4030 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 4031 sizeof(inq.revision)); 4032 scsi_copy_internal_data(xs, &inq, sizeof(inq)); 4033 4034 return (0); 4035 } 4036 4037 int 4038 sr_raid_read_cap(struct sr_workunit *wu) 4039 { 4040 struct sr_discipline *sd = wu->swu_dis; 4041 struct scsi_xfer *xs = wu->swu_xs; 4042 struct scsi_read_cap_data rcd; 4043 struct scsi_read_cap_data_16 rcd16; 4044 u_int64_t addr; 4045 int rv = 1; 4046 u_int32_t secsize; 4047 4048 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 4049 4050 secsize = sd->sd_meta->ssdi.ssd_secsize; 4051 4052 addr = ((sd->sd_meta->ssdi.ssd_size * DEV_BSIZE) / secsize) - 1; 4053 if (xs->cmd.opcode == READ_CAPACITY) { 4054 bzero(&rcd, sizeof(rcd)); 4055 if (addr > 0xffffffffllu) 4056 _lto4b(0xffffffff, rcd.addr); 4057 else 4058 _lto4b(addr, rcd.addr); 4059 _lto4b(secsize, rcd.length); 4060 scsi_copy_internal_data(xs, &rcd, sizeof(rcd)); 4061 rv = 0; 4062 } else if (xs->cmd.opcode == READ_CAPACITY_16) { 4063 bzero(&rcd16, sizeof(rcd16)); 4064 _lto8b(addr, rcd16.addr); 4065 _lto4b(secsize, rcd16.length); 4066 scsi_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 4067 rv = 0; 4068 } 4069 4070 return (rv); 4071 } 4072 4073 int 4074 sr_raid_tur(struct sr_workunit *wu) 4075 { 4076 struct sr_discipline *sd = wu->swu_dis; 4077 4078 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 4079 4080 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 4081 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 4082 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 4083 sd->sd_scsi_sense.add_sense_code = 0x04; 4084 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 4085 sd->sd_scsi_sense.extra_len = 4; 4086 return (1); 4087 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 4088 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 4089 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 4090 sd->sd_scsi_sense.add_sense_code = 0x05; 4091 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 4092 sd->sd_scsi_sense.extra_len = 4; 4093 return (1); 4094 } 4095 4096 return (0); 4097 } 4098 4099 int 4100 sr_raid_request_sense(struct sr_workunit *wu) 4101 { 4102 struct sr_discipline *sd = wu->swu_dis; 4103 struct scsi_xfer *xs = wu->swu_xs; 4104 4105 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 4106 DEVNAME(sd->sd_sc)); 4107 4108 /* use latest sense data */ 4109 memcpy(&xs->sense, &sd->sd_scsi_sense, sizeof(xs->sense)); 4110 4111 /* clear sense data */ 4112 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 4113 4114 return (0); 4115 } 4116 4117 int 4118 sr_raid_start_stop(struct sr_workunit *wu) 4119 { 4120 struct scsi_xfer *xs = wu->swu_xs; 4121 struct scsi_start_stop *ss = (struct scsi_start_stop *)&xs->cmd; 4122 4123 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 4124 DEVNAME(wu->swu_dis->sd_sc)); 4125 4126 if (!ss) 4127 return (1); 4128 4129 /* 4130 * do nothing! 4131 * a softraid discipline should always reflect correct status 4132 */ 4133 return (0); 4134 } 4135 4136 int 4137 sr_raid_sync(struct sr_workunit *wu) 4138 { 4139 struct sr_discipline *sd = wu->swu_dis; 4140 int s, ret, rv = 0, ios; 4141 4142 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 4143 4144 /* when doing a fake sync don't count the wu */ 4145 ios = (wu->swu_flags & SR_WUF_FAKE) ? 0 : 1; 4146 4147 s = splbio(); 4148 sd->sd_sync = 1; 4149 while (sd->sd_wu_pending > ios) { 4150 ret = tsleep_nsec(sd, PRIBIO, "sr_sync", SEC_TO_NSEC(15)); 4151 if (ret == EWOULDBLOCK) { 4152 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 4153 DEVNAME(sd->sd_sc)); 4154 rv = 1; 4155 break; 4156 } 4157 } 4158 sd->sd_sync = 0; 4159 splx(s); 4160 4161 wakeup(&sd->sd_sync); 4162 4163 return (rv); 4164 } 4165 4166 void 4167 sr_raid_intr(struct buf *bp) 4168 { 4169 struct sr_ccb *ccb = (struct sr_ccb *)bp; 4170 struct sr_workunit *wu = ccb->ccb_wu; 4171 #ifdef SR_DEBUG 4172 struct sr_discipline *sd = wu->swu_dis; 4173 struct scsi_xfer *xs = wu->swu_xs; 4174 #endif 4175 int s; 4176 4177 DNPRINTF(SR_D_INTR, "%s: %s %s intr bp %p xs %p\n", 4178 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, sd->sd_name, bp, xs); 4179 4180 s = splbio(); 4181 sr_ccb_done(ccb); 4182 sr_wu_done(wu); 4183 splx(s); 4184 } 4185 4186 void 4187 sr_schedule_wu(struct sr_workunit *wu) 4188 { 4189 struct sr_discipline *sd = wu->swu_dis; 4190 struct sr_workunit *wup; 4191 int s; 4192 4193 DNPRINTF(SR_D_WU, "sr_schedule_wu: schedule wu %p state %i " 4194 "flags 0x%x\n", wu, wu->swu_state, wu->swu_flags); 4195 4196 KASSERT(wu->swu_io_count > 0); 4197 4198 s = splbio(); 4199 4200 /* Construct the work unit, do not schedule it. */ 4201 if (wu->swu_state == SR_WU_CONSTRUCT) 4202 goto queued; 4203 4204 /* Deferred work unit being reconstructed, do not start. */ 4205 if (wu->swu_state == SR_WU_REQUEUE) 4206 goto queued; 4207 4208 /* Current work unit failed, restart. */ 4209 if (wu->swu_state == SR_WU_RESTART) 4210 goto start; 4211 4212 if (wu->swu_state != SR_WU_INPROGRESS) 4213 panic("sr_schedule_wu: work unit not in progress (state %i)", 4214 wu->swu_state); 4215 4216 /* Walk queue backwards and fill in collider if we have one. */ 4217 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 4218 if (wu->swu_blk_end < wup->swu_blk_start || 4219 wup->swu_blk_end < wu->swu_blk_start) 4220 continue; 4221 4222 /* Defer work unit due to LBA collision. */ 4223 DNPRINTF(SR_D_WU, "sr_schedule_wu: deferring work unit %p\n", 4224 wu); 4225 wu->swu_state = SR_WU_DEFERRED; 4226 while (wup->swu_collider) 4227 wup = wup->swu_collider; 4228 wup->swu_collider = wu; 4229 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 4230 sd->sd_wu_collisions++; 4231 goto queued; 4232 } 4233 4234 start: 4235 sr_raid_startwu(wu); 4236 4237 queued: 4238 splx(s); 4239 } 4240 4241 void 4242 sr_raid_startwu(struct sr_workunit *wu) 4243 { 4244 struct sr_discipline *sd = wu->swu_dis; 4245 struct sr_ccb *ccb; 4246 4247 DNPRINTF(SR_D_WU, "sr_raid_startwu: start wu %p\n", wu); 4248 4249 splassert(IPL_BIO); 4250 4251 if (wu->swu_state == SR_WU_DEFERRED) { 4252 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 4253 wu->swu_state = SR_WU_INPROGRESS; 4254 } 4255 4256 if (wu->swu_state != SR_WU_RESTART) 4257 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 4258 4259 /* Start all of the individual I/Os. */ 4260 if (wu->swu_cb_active == 1) 4261 panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc)); 4262 wu->swu_cb_active = 1; 4263 4264 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) 4265 VOP_STRATEGY(ccb->ccb_buf.b_vp, &ccb->ccb_buf); 4266 4267 wu->swu_cb_active = 0; 4268 } 4269 4270 void 4271 sr_raid_recreate_wu(struct sr_workunit *wu) 4272 { 4273 struct sr_discipline *sd = wu->swu_dis; 4274 struct sr_workunit *wup = wu; 4275 4276 /* 4277 * Recreate a work unit by releasing the associated CCBs and reissuing 4278 * the SCSI I/O request. This process is then repeated for all of the 4279 * colliding work units. 4280 */ 4281 do { 4282 sr_wu_release_ccbs(wup); 4283 4284 wup->swu_state = SR_WU_REQUEUE; 4285 if (sd->sd_scsi_rw(wup)) 4286 panic("could not requeue I/O"); 4287 4288 wup = wup->swu_collider; 4289 } while (wup); 4290 } 4291 4292 int 4293 sr_alloc_resources(struct sr_discipline *sd) 4294 { 4295 if (sr_wu_alloc(sd)) { 4296 sr_error(sd->sd_sc, "unable to allocate work units"); 4297 return (ENOMEM); 4298 } 4299 if (sr_ccb_alloc(sd)) { 4300 sr_error(sd->sd_sc, "unable to allocate ccbs"); 4301 return (ENOMEM); 4302 } 4303 4304 return (0); 4305 } 4306 4307 void 4308 sr_free_resources(struct sr_discipline *sd) 4309 { 4310 sr_wu_free(sd); 4311 sr_ccb_free(sd); 4312 } 4313 4314 void 4315 sr_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 4316 { 4317 int old_state, s; 4318 4319 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_set_chunk_state %d -> %d\n", 4320 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 4321 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 4322 4323 /* ok to go to splbio since this only happens in error path */ 4324 s = splbio(); 4325 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 4326 4327 /* multiple IOs to the same chunk that fail will come through here */ 4328 if (old_state == new_state) 4329 goto done; 4330 4331 switch (old_state) { 4332 case BIOC_SDONLINE: 4333 if (new_state == BIOC_SDOFFLINE) 4334 break; 4335 else 4336 goto die; 4337 break; 4338 4339 case BIOC_SDOFFLINE: 4340 goto die; 4341 4342 default: 4343 die: 4344 splx(s); /* XXX */ 4345 panic("%s: %s: %s: invalid chunk state transition %d -> %d", 4346 DEVNAME(sd->sd_sc), 4347 sd->sd_meta->ssd_devname, 4348 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 4349 old_state, new_state); 4350 /* NOTREACHED */ 4351 } 4352 4353 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 4354 sd->sd_set_vol_state(sd); 4355 4356 sd->sd_must_flush = 1; 4357 task_add(systq, &sd->sd_meta_save_task); 4358 done: 4359 splx(s); 4360 } 4361 4362 void 4363 sr_set_vol_state(struct sr_discipline *sd) 4364 { 4365 int states[SR_MAX_STATES]; 4366 int new_state, i, nd; 4367 int old_state = sd->sd_vol_status; 4368 u_int32_t s; 4369 4370 DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state\n", 4371 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 4372 4373 nd = sd->sd_meta->ssdi.ssd_chunk_no; 4374 4375 for (i = 0; i < SR_MAX_STATES; i++) 4376 states[i] = 0; 4377 4378 for (i = 0; i < nd; i++) { 4379 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 4380 if (s >= SR_MAX_STATES) 4381 panic("%s: %s: %s: invalid chunk state", 4382 DEVNAME(sd->sd_sc), 4383 sd->sd_meta->ssd_devname, 4384 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 4385 states[s]++; 4386 } 4387 4388 if (states[BIOC_SDONLINE] == nd) 4389 new_state = BIOC_SVONLINE; 4390 else 4391 new_state = BIOC_SVOFFLINE; 4392 4393 DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state %d -> %d\n", 4394 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 4395 old_state, new_state); 4396 4397 switch (old_state) { 4398 case BIOC_SVONLINE: 4399 if (new_state == BIOC_SVOFFLINE || new_state == BIOC_SVONLINE) 4400 break; 4401 else 4402 goto die; 4403 break; 4404 4405 case BIOC_SVOFFLINE: 4406 /* XXX this might be a little too much */ 4407 goto die; 4408 4409 default: 4410 die: 4411 panic("%s: %s: invalid volume state transition %d -> %d", 4412 DEVNAME(sd->sd_sc), 4413 sd->sd_meta->ssd_devname, 4414 old_state, new_state); 4415 /* NOTREACHED */ 4416 } 4417 4418 sd->sd_vol_status = new_state; 4419 } 4420 4421 void * 4422 sr_block_get(struct sr_discipline *sd, long length) 4423 { 4424 return dma_alloc(length, PR_NOWAIT | PR_ZERO); 4425 } 4426 4427 void 4428 sr_block_put(struct sr_discipline *sd, void *ptr, int length) 4429 { 4430 dma_free(ptr, length); 4431 } 4432 4433 void 4434 sr_checksum_print(u_int8_t *md5) 4435 { 4436 int i; 4437 4438 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 4439 printf("%02x", md5[i]); 4440 } 4441 4442 void 4443 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 4444 { 4445 MD5_CTX ctx; 4446 4447 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 4448 md5, len); 4449 4450 MD5Init(&ctx); 4451 MD5Update(&ctx, src, len); 4452 MD5Final(md5, &ctx); 4453 } 4454 4455 void 4456 sr_uuid_generate(struct sr_uuid *uuid) 4457 { 4458 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 4459 /* UUID version 4: random */ 4460 uuid->sui_id[6] &= 0x0f; 4461 uuid->sui_id[6] |= 0x40; 4462 /* RFC4122 variant */ 4463 uuid->sui_id[8] &= 0x3f; 4464 uuid->sui_id[8] |= 0x80; 4465 } 4466 4467 char * 4468 sr_uuid_format(struct sr_uuid *uuid) 4469 { 4470 char *uuidstr; 4471 4472 uuidstr = malloc(37, M_DEVBUF, M_WAITOK); 4473 4474 snprintf(uuidstr, 37, 4475 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 4476 "%02x%02x%02x%02x%02x%02x", 4477 uuid->sui_id[0], uuid->sui_id[1], 4478 uuid->sui_id[2], uuid->sui_id[3], 4479 uuid->sui_id[4], uuid->sui_id[5], 4480 uuid->sui_id[6], uuid->sui_id[7], 4481 uuid->sui_id[8], uuid->sui_id[9], 4482 uuid->sui_id[10], uuid->sui_id[11], 4483 uuid->sui_id[12], uuid->sui_id[13], 4484 uuid->sui_id[14], uuid->sui_id[15]); 4485 4486 return uuidstr; 4487 } 4488 4489 void 4490 sr_uuid_print(struct sr_uuid *uuid, int cr) 4491 { 4492 char *uuidstr; 4493 4494 uuidstr = sr_uuid_format(uuid); 4495 printf("%s%s", uuidstr, (cr ? "\n" : "")); 4496 free(uuidstr, M_DEVBUF, 37); 4497 } 4498 4499 int 4500 sr_already_assembled(struct sr_discipline *sd) 4501 { 4502 struct sr_softc *sc = sd->sd_sc; 4503 struct sr_discipline *sdtmp; 4504 4505 TAILQ_FOREACH(sdtmp, &sc->sc_dis_list, sd_link) { 4506 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 4507 &sdtmp->sd_meta->ssdi.ssd_uuid, 4508 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 4509 return (1); 4510 } 4511 4512 return (0); 4513 } 4514 4515 int32_t 4516 sr_validate_stripsize(u_int32_t b) 4517 { 4518 int s = 0; 4519 4520 if (b % DEV_BSIZE) 4521 return (-1); 4522 4523 while ((b & 1) == 0) { 4524 b >>= 1; 4525 s++; 4526 } 4527 4528 /* only multiple of twos */ 4529 b >>= 1; 4530 if (b) 4531 return(-1); 4532 4533 return (s); 4534 } 4535 4536 void 4537 sr_quiesce(void) 4538 { 4539 struct sr_softc *sc = softraid0; 4540 struct sr_discipline *sd, *nsd; 4541 4542 if (sc == NULL) 4543 return; 4544 4545 /* Shutdown disciplines in reverse attach order. */ 4546 TAILQ_FOREACH_REVERSE_SAFE(sd, &sc->sc_dis_list, 4547 sr_discipline_list, sd_link, nsd) 4548 sr_discipline_shutdown(sd, 1, -1); 4549 } 4550 4551 void 4552 sr_shutdown(int dying) 4553 { 4554 struct sr_softc *sc = softraid0; 4555 struct sr_discipline *sd; 4556 4557 if (sc == NULL) 4558 return; 4559 4560 DNPRINTF(SR_D_MISC, "%s: sr_shutdown\n", DEVNAME(sc)); 4561 4562 /* 4563 * Since softraid is not under mainbus, we have to explicitly 4564 * notify its children that the power is going down, so they 4565 * can execute their shutdown hooks. 4566 */ 4567 config_suspend((struct device *)sc, DVACT_POWERDOWN); 4568 4569 /* Shutdown disciplines in reverse attach order. */ 4570 while ((sd = TAILQ_LAST(&sc->sc_dis_list, sr_discipline_list)) != NULL) 4571 sr_discipline_shutdown(sd, 1, dying); 4572 } 4573 4574 int 4575 sr_validate_io(struct sr_workunit *wu, daddr_t *blkno, char *func) 4576 { 4577 struct sr_discipline *sd = wu->swu_dis; 4578 struct scsi_xfer *xs = wu->swu_xs; 4579 int rv = 1; 4580 4581 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 4582 xs->cmd.opcode); 4583 4584 if (sd->sd_meta->ssd_data_blkno == 0) 4585 panic("invalid data blkno"); 4586 4587 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 4588 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 4589 DEVNAME(sd->sd_sc), func); 4590 goto bad; 4591 } 4592 4593 if (xs->datalen == 0) { 4594 printf("%s: %s: illegal block count for %s\n", 4595 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 4596 goto bad; 4597 } 4598 4599 if (xs->cmdlen == 10) 4600 *blkno = _4btol(((struct scsi_rw_10 *)&xs->cmd)->addr); 4601 else if (xs->cmdlen == 16) 4602 *blkno = _8btol(((struct scsi_rw_16 *)&xs->cmd)->addr); 4603 else if (xs->cmdlen == 6) 4604 *blkno = _3btol(((struct scsi_rw *)&xs->cmd)->addr); 4605 else { 4606 printf("%s: %s: illegal cmdlen for %s\n", 4607 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 4608 goto bad; 4609 } 4610 4611 *blkno *= (sd->sd_meta->ssdi.ssd_secsize / DEV_BSIZE); 4612 4613 wu->swu_blk_start = *blkno; 4614 wu->swu_blk_end = *blkno + (xs->datalen >> DEV_BSHIFT) - 1; 4615 4616 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 4617 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 4618 "end: %lld length: %d\n", 4619 DEVNAME(sd->sd_sc), func, (long long)wu->swu_blk_start, 4620 (long long)wu->swu_blk_end, xs->datalen); 4621 4622 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 4623 SSD_ERRCODE_VALID; 4624 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 4625 sd->sd_scsi_sense.add_sense_code = 0x21; 4626 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 4627 sd->sd_scsi_sense.extra_len = 4; 4628 goto bad; 4629 } 4630 4631 rv = 0; 4632 bad: 4633 return (rv); 4634 } 4635 4636 void 4637 sr_rebuild_start(void *arg) 4638 { 4639 struct sr_discipline *sd = arg; 4640 struct sr_softc *sc = sd->sd_sc; 4641 4642 DNPRINTF(SR_D_REBUILD, "%s: %s starting rebuild thread\n", 4643 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 4644 4645 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 4646 DEVNAME(sc)) != 0) 4647 printf("%s: unable to start background operation\n", 4648 DEVNAME(sc)); 4649 } 4650 4651 void 4652 sr_rebuild_thread(void *arg) 4653 { 4654 struct sr_discipline *sd = arg; 4655 4656 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild thread started\n", 4657 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 4658 4659 sd->sd_reb_active = 1; 4660 sd->sd_rebuild(sd); 4661 sd->sd_reb_active = 0; 4662 4663 kthread_exit(0); 4664 } 4665 4666 void 4667 sr_rebuild(struct sr_discipline *sd) 4668 { 4669 struct sr_softc *sc = sd->sd_sc; 4670 u_int64_t sz, whole_blk, partial_blk, blk, restart; 4671 daddr_t lba; 4672 struct sr_workunit *wu_r, *wu_w; 4673 struct scsi_xfer xs_r, xs_w; 4674 struct scsi_rw_16 *cr, *cw; 4675 int c, s, slept, percent = 0, old_percent = -1; 4676 u_int8_t *buf; 4677 4678 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 4679 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 4680 4681 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 4682 if (restart > whole_blk) { 4683 printf("%s: bogus rebuild restart offset, starting from 0\n", 4684 DEVNAME(sc)); 4685 restart = 0; 4686 } 4687 if (restart) { 4688 /* 4689 * XXX there is a hole here; there is a possibility that we 4690 * had a restart however the chunk that was supposed to 4691 * be rebuilt is no longer valid; we can reach this situation 4692 * when a rebuild is in progress and the box crashes and 4693 * on reboot the rebuild chunk is different (like zero'd or 4694 * replaced). We need to check the uuid of the chunk that is 4695 * being rebuilt to assert this. 4696 */ 4697 percent = sr_rebuild_percent(sd); 4698 printf("%s: resuming rebuild on %s at %d%%\n", 4699 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 4700 } 4701 4702 /* currently this is 64k therefore we can use dma_alloc */ 4703 buf = dma_alloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, PR_WAITOK); 4704 for (blk = restart; blk <= whole_blk; blk++) { 4705 lba = blk * SR_REBUILD_IO_SIZE; 4706 sz = SR_REBUILD_IO_SIZE; 4707 if (blk == whole_blk) { 4708 if (partial_blk == 0) 4709 break; 4710 sz = partial_blk; 4711 } 4712 4713 /* get some wu */ 4714 wu_r = sr_scsi_wu_get(sd, 0); 4715 wu_w = sr_scsi_wu_get(sd, 0); 4716 4717 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild wu_r %p, wu_w %p\n", 4718 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, wu_r, wu_w); 4719 4720 /* setup read io */ 4721 bzero(&xs_r, sizeof xs_r); 4722 xs_r.error = XS_NOERROR; 4723 xs_r.flags = SCSI_DATA_IN; 4724 xs_r.datalen = sz << DEV_BSHIFT; 4725 xs_r.data = buf; 4726 xs_r.cmdlen = sizeof(*cr); 4727 cr = (struct scsi_rw_16 *)&xs_r.cmd; 4728 cr->opcode = READ_16; 4729 _lto4b(sz, cr->length); 4730 _lto8b(lba, cr->addr); 4731 wu_r->swu_state = SR_WU_CONSTRUCT; 4732 wu_r->swu_flags |= SR_WUF_REBUILD; 4733 wu_r->swu_xs = &xs_r; 4734 if (sd->sd_scsi_rw(wu_r)) { 4735 printf("%s: could not create read io\n", 4736 DEVNAME(sc)); 4737 goto fail; 4738 } 4739 4740 /* setup write io */ 4741 bzero(&xs_w, sizeof xs_w); 4742 xs_w.error = XS_NOERROR; 4743 xs_w.flags = SCSI_DATA_OUT; 4744 xs_w.datalen = sz << DEV_BSHIFT; 4745 xs_w.data = buf; 4746 xs_w.cmdlen = sizeof(*cw); 4747 cw = (struct scsi_rw_16 *)&xs_w.cmd; 4748 cw->opcode = WRITE_16; 4749 _lto4b(sz, cw->length); 4750 _lto8b(lba, cw->addr); 4751 wu_w->swu_state = SR_WU_CONSTRUCT; 4752 wu_w->swu_flags |= SR_WUF_REBUILD | SR_WUF_WAKEUP; 4753 wu_w->swu_xs = &xs_w; 4754 if (sd->sd_scsi_rw(wu_w)) { 4755 printf("%s: could not create write io\n", 4756 DEVNAME(sc)); 4757 goto fail; 4758 } 4759 4760 /* 4761 * collide with the read io so that we get automatically 4762 * started when the read is done 4763 */ 4764 wu_w->swu_state = SR_WU_DEFERRED; 4765 wu_r->swu_collider = wu_w; 4766 s = splbio(); 4767 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 4768 splx(s); 4769 4770 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild scheduling wu_r %p\n", 4771 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, wu_r); 4772 4773 wu_r->swu_state = SR_WU_INPROGRESS; 4774 sr_schedule_wu(wu_r); 4775 4776 /* wait for write completion */ 4777 slept = 0; 4778 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 4779 tsleep_nsec(wu_w, PRIBIO, "sr_rebuild", INFSLP); 4780 slept = 1; 4781 } 4782 /* yield if we didn't sleep */ 4783 if (slept == 0) 4784 tsleep_nsec(sc, PWAIT, "sr_yield", MSEC_TO_NSEC(1)); 4785 4786 sr_scsi_wu_put(sd, wu_r); 4787 sr_scsi_wu_put(sd, wu_w); 4788 4789 sd->sd_meta->ssd_rebuild = lba; 4790 4791 /* XXX - this should be based on size, not percentage. */ 4792 /* save metadata every percent */ 4793 percent = sr_rebuild_percent(sd); 4794 if (percent != old_percent && blk != whole_blk) { 4795 if (sr_meta_save(sd, SR_META_DIRTY)) 4796 printf("%s: could not save metadata to %s\n", 4797 DEVNAME(sc), sd->sd_meta->ssd_devname); 4798 old_percent = percent; 4799 } 4800 4801 if (sd->sd_reb_abort) 4802 goto abort; 4803 } 4804 4805 /* all done */ 4806 sd->sd_meta->ssd_rebuild = 0; 4807 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 4808 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 4809 BIOC_SDREBUILD) { 4810 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 4811 break; 4812 } 4813 } 4814 4815 abort: 4816 if (sr_meta_save(sd, SR_META_DIRTY)) 4817 printf("%s: could not save metadata to %s\n", 4818 DEVNAME(sc), sd->sd_meta->ssd_devname); 4819 fail: 4820 dma_free(buf, SR_REBUILD_IO_SIZE << DEV_BSHIFT); 4821 } 4822 4823 struct sr_discipline * 4824 sr_find_discipline(struct sr_softc *sc, const char *devname) 4825 { 4826 struct sr_discipline *sd; 4827 4828 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) 4829 if (!strncmp(sd->sd_meta->ssd_devname, devname, 4830 sizeof(sd->sd_meta->ssd_devname))) 4831 break; 4832 return sd; 4833 } 4834 4835 #ifndef SMALL_KERNEL 4836 int 4837 sr_sensors_create(struct sr_discipline *sd) 4838 { 4839 struct sr_softc *sc = sd->sd_sc; 4840 int rv = 1; 4841 4842 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 4843 DEVNAME(sc), sd->sd_meta->ssd_devname); 4844 4845 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 4846 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 4847 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 4848 sizeof(sd->sd_vol.sv_sensor.desc)); 4849 4850 sensor_attach(&sc->sc_sensordev, &sd->sd_vol.sv_sensor); 4851 sd->sd_vol.sv_sensor_attached = 1; 4852 4853 if (sc->sc_sensor_task == NULL) { 4854 sc->sc_sensor_task = sensor_task_register(sc, 4855 sr_sensors_refresh, 10); 4856 if (sc->sc_sensor_task == NULL) 4857 goto bad; 4858 } 4859 4860 rv = 0; 4861 bad: 4862 return (rv); 4863 } 4864 4865 void 4866 sr_sensors_delete(struct sr_discipline *sd) 4867 { 4868 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 4869 4870 if (sd->sd_vol.sv_sensor_attached) 4871 sensor_detach(&sd->sd_sc->sc_sensordev, &sd->sd_vol.sv_sensor); 4872 } 4873 4874 void 4875 sr_sensors_refresh(void *arg) 4876 { 4877 struct sr_softc *sc = arg; 4878 struct sr_volume *sv; 4879 struct sr_discipline *sd; 4880 4881 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 4882 4883 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 4884 sv = &sd->sd_vol; 4885 4886 switch(sd->sd_vol_status) { 4887 case BIOC_SVOFFLINE: 4888 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 4889 sv->sv_sensor.status = SENSOR_S_CRIT; 4890 break; 4891 4892 case BIOC_SVDEGRADED: 4893 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 4894 sv->sv_sensor.status = SENSOR_S_WARN; 4895 break; 4896 4897 case BIOC_SVREBUILD: 4898 sv->sv_sensor.value = SENSOR_DRIVE_REBUILD; 4899 sv->sv_sensor.status = SENSOR_S_WARN; 4900 break; 4901 4902 case BIOC_SVSCRUB: 4903 case BIOC_SVONLINE: 4904 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 4905 sv->sv_sensor.status = SENSOR_S_OK; 4906 break; 4907 4908 default: 4909 sv->sv_sensor.value = 0; /* unknown */ 4910 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 4911 } 4912 } 4913 } 4914 #endif /* SMALL_KERNEL */ 4915 4916 #ifdef SR_FANCY_STATS 4917 void sr_print_stats(void); 4918 4919 void 4920 sr_print_stats(void) 4921 { 4922 struct sr_softc *sc = softraid0; 4923 struct sr_discipline *sd; 4924 4925 if (sc == NULL) { 4926 printf("no softraid softc found\n"); 4927 return; 4928 } 4929 4930 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 4931 printf("%s: ios pending %d, collisions %llu\n", 4932 sd->sd_meta->ssd_devname, 4933 sd->sd_wu_pending, 4934 sd->sd_wu_collisions); 4935 } 4936 } 4937 #endif /* SR_FANCY_STATS */ 4938 4939 #ifdef SR_DEBUG 4940 void 4941 sr_meta_print(struct sr_metadata *m) 4942 { 4943 int i; 4944 struct sr_meta_chunk *mc; 4945 struct sr_meta_opt_hdr *omh; 4946 4947 if (!(sr_debug & SR_D_META)) 4948 return; 4949 4950 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 4951 printf("\tssd_version %d\n", m->ssdi.ssd_version); 4952 printf("\tssd_vol_flags 0x%x\n", m->ssdi.ssd_vol_flags); 4953 printf("\tssd_uuid "); 4954 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 4955 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 4956 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 4957 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 4958 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 4959 printf("\tssd_level %d\n", m->ssdi.ssd_level); 4960 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 4961 printf("\tssd_devname %s\n", m->ssd_devname); 4962 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 4963 printf("\tssd_product %s\n", m->ssdi.ssd_product); 4964 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 4965 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 4966 printf("\tssd_checksum "); 4967 sr_checksum_print(m->ssd_checksum); 4968 printf("\n"); 4969 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 4970 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 4971 4972 mc = (struct sr_meta_chunk *)(m + 1); 4973 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 4974 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 4975 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 4976 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 4977 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 4978 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 4979 printf("\t\tscm_uuid "); 4980 sr_uuid_print(&mc->scmi.scm_uuid, 1); 4981 printf("\t\tscm_checksum "); 4982 sr_checksum_print(mc->scm_checksum); 4983 printf("\n"); 4984 printf("\t\tscm_status %d\n", mc->scm_status); 4985 } 4986 4987 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(m + 1) + 4988 sizeof(struct sr_meta_chunk) * m->ssdi.ssd_chunk_no); 4989 for (i = 0; i < m->ssdi.ssd_opt_no; i++) { 4990 printf("\t\t\tsom_type %d\n", omh->som_type); 4991 printf("\t\t\tsom_checksum "); 4992 sr_checksum_print(omh->som_checksum); 4993 printf("\n"); 4994 omh = (struct sr_meta_opt_hdr *)((void *)omh + 4995 omh->som_length); 4996 } 4997 } 4998 4999 void 5000 sr_dump_block(void *blk, int len) 5001 { 5002 uint8_t *b = blk; 5003 int i, j, c; 5004 5005 for (i = 0; i < len; i += 16) { 5006 for (j = 0; j < 16; j++) 5007 printf("%.2x ", b[i + j]); 5008 printf(" "); 5009 for (j = 0; j < 16; j++) { 5010 c = b[i + j]; 5011 if (c < ' ' || c > 'z' || i + j > len) 5012 c = '.'; 5013 printf("%c", c); 5014 } 5015 printf("\n"); 5016 } 5017 } 5018 5019 void 5020 sr_dump_mem(u_int8_t *p, int len) 5021 { 5022 int i; 5023 5024 for (i = 0; i < len; i++) 5025 printf("%02x ", *p++); 5026 printf("\n"); 5027 } 5028 5029 #endif /* SR_DEBUG */ 5030 5031 #ifdef HIBERNATE 5032 /* 5033 * Side-effect free (no malloc, printf, pool, splx) softraid crypto writer. 5034 * 5035 * This function must perform the following: 5036 * 1. Determine the underlying device's own side-effect free I/O function 5037 * (eg, ahci_hibernate_io, wd_hibernate_io, etc). 5038 * 2. Store enough information in the provided page argument for subsequent 5039 * I/O calls (such as the crypto discipline structure for the keys, the 5040 * offset of the softraid partition on the underlying disk, as well as 5041 * the offset of the swap partition within the crypto volume. 5042 * 3. Encrypt the incoming data using the sr_discipline keys, then pass 5043 * the request to the underlying device's own I/O function. 5044 */ 5045 int 5046 sr_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, int op, void *page) 5047 { 5048 /* Struct for stashing data obtained on HIB_INIT. 5049 * XXX 5050 * We share the page with the underlying device's own 5051 * side-effect free I/O function, so we pad our data to 5052 * the end of the page. Presently this does not overlap 5053 * with either of the two other side-effect free i/o 5054 * functions (ahci/wd). 5055 */ 5056 struct { 5057 char pad[3072]; 5058 struct sr_discipline *srd; 5059 hibio_fn subfn; /* underlying device i/o fn */ 5060 dev_t subdev; /* underlying device dev_t */ 5061 daddr_t sr_swapoff; /* ofs of swap part in sr volume */ 5062 char buf[DEV_BSIZE]; /* encryption performed into this buf */ 5063 } *my = page; 5064 extern struct cfdriver sd_cd; 5065 char errstr[128], *dl_ret; 5066 struct sr_chunk *schunk; 5067 struct sd_softc *sd; 5068 struct aes_xts_ctx ctx; 5069 struct sr_softc *sc; 5070 struct device *dv; 5071 daddr_t key_blkno; 5072 uint32_t sub_raidoff; /* ofs of sr part in underlying dev */ 5073 struct disklabel dl; 5074 struct partition *pp; 5075 size_t i, j; 5076 u_char iv[8]; 5077 5078 /* 5079 * In HIB_INIT, we are passed the swap partition size and offset 5080 * in 'size' and 'blkno' respectively. These are relative to the 5081 * start of the softraid partition, and we need to save these 5082 * for later translation to the underlying device's layout. 5083 */ 5084 if (op == HIB_INIT) { 5085 dv = disk_lookup(&sd_cd, DISKUNIT(dev)); 5086 sd = (struct sd_softc *)dv; 5087 sc = (struct sr_softc *)dv->dv_parent->dv_parent; 5088 5089 /* 5090 * Look up the sr discipline. This is used to determine 5091 * if we are SR crypto and what the underlying device is. 5092 */ 5093 my->srd = sc->sc_targets[sd->sc_link->target]; 5094 DNPRINTF(SR_D_MISC, "sr_hibernate_io: discipline is %s\n", 5095 my->srd->sd_name); 5096 if (strncmp(my->srd->sd_name, "CRYPTO", 5097 sizeof(my->srd->sd_name))) 5098 return (ENOTSUP); 5099 5100 /* Find the underlying device */ 5101 schunk = my->srd->sd_vol.sv_chunks[0]; 5102 my->subdev = schunk->src_dev_mm; 5103 5104 /* 5105 * Find the appropriate underlying device side effect free 5106 * I/O function, based on the type of device it is. 5107 */ 5108 my->subfn = get_hibernate_io_function(my->subdev); 5109 if (!my->subfn) 5110 return (ENODEV); 5111 5112 /* 5113 * Find blkno where this raid partition starts on 5114 * the underlying disk. 5115 */ 5116 dl_ret = disk_readlabel(&dl, my->subdev, errstr, 5117 sizeof(errstr)); 5118 if (dl_ret) { 5119 printf("Hibernate error reading disklabel: %s\n", dl_ret); 5120 return (ENOTSUP); 5121 } 5122 5123 pp = &dl.d_partitions[DISKPART(my->subdev)]; 5124 if (pp->p_fstype != FS_RAID || DL_GETPSIZE(pp) == 0) 5125 return (ENOTSUP); 5126 5127 /* Find the blkno of the SR part in the underlying device */ 5128 sub_raidoff = my->srd->sd_meta->ssd_data_blkno + 5129 DL_SECTOBLK(&dl, DL_GETPOFFSET(pp)); 5130 DNPRINTF(SR_D_MISC,"sr_hibernate_io: blk trans ofs: %d blks\n", 5131 sub_raidoff); 5132 5133 /* Save the blkno of the swap partition in the SR disk */ 5134 my->sr_swapoff = blkno; 5135 5136 /* Initialize the sub-device */ 5137 return my->subfn(my->subdev, sub_raidoff + blkno, 5138 addr, size, op, page); 5139 } 5140 5141 /* Hibernate only uses (and we only support) writes */ 5142 if (op != HIB_W) 5143 return (ENOTSUP); 5144 5145 /* 5146 * Blocks act as the IV for the encryption. These block numbers 5147 * are relative to the start of the sr partition, but the 'blkno' 5148 * passed above is relative to the start of the swap partition 5149 * inside the sr partition, so bias appropriately. 5150 */ 5151 key_blkno = my->sr_swapoff + blkno; 5152 5153 /* Process each disk block one at a time. */ 5154 for (i = 0; i < size; i += DEV_BSIZE) { 5155 int res; 5156 5157 bzero(&ctx, sizeof(ctx)); 5158 5159 /* 5160 * Set encryption key (from the sr discipline stashed 5161 * during HIB_INIT. This code is based on the softraid 5162 * bootblock code. 5163 */ 5164 aes_xts_setkey(&ctx, my->srd->mds.mdd_crypto.scr_key[0], 64); 5165 /* We encrypt DEV_BSIZE bytes at a time in my->buf */ 5166 memcpy(my->buf, ((char *)addr) + i, DEV_BSIZE); 5167 5168 /* Block number is the IV */ 5169 memcpy(&iv, &key_blkno, sizeof(key_blkno)); 5170 aes_xts_reinit(&ctx, iv); 5171 5172 /* Encrypt DEV_BSIZE bytes, AES_XTS_BLOCKSIZE bytes at a time */ 5173 for (j = 0; j < DEV_BSIZE; j += AES_XTS_BLOCKSIZE) 5174 aes_xts_encrypt(&ctx, my->buf + j); 5175 5176 /* 5177 * Write one block out from my->buf to the underlying device 5178 * using its own side-effect free I/O function. 5179 */ 5180 res = my->subfn(my->subdev, blkno + (i / DEV_BSIZE), 5181 (vaddr_t)(my->buf), DEV_BSIZE, op, page); 5182 if (res != 0) 5183 return (res); 5184 key_blkno++; 5185 } 5186 return (0); 5187 } 5188 #endif /* HIBERNATE */ 5189