1 /* $OpenBSD: softraid_raid1.c,v 1.48 2013/03/31 13:31:44 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 1 functions. */ 47 int sr_raid1_create(struct sr_discipline *, struct bioc_createraid *, 48 int, int64_t); 49 int sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *, 50 int, void *); 51 int sr_raid1_init(struct sr_discipline *sd); 52 int sr_raid1_rw(struct sr_workunit *); 53 void sr_raid1_intr(struct buf *); 54 void sr_raid1_set_chunk_state(struct sr_discipline *, int, int); 55 void sr_raid1_set_vol_state(struct sr_discipline *); 56 57 /* Discipline initialisation. */ 58 void 59 sr_raid1_discipline_init(struct sr_discipline *sd) 60 { 61 /* Fill out discipline members. */ 62 sd->sd_type = SR_MD_RAID1; 63 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 64 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 65 SR_CAP_REBUILD | SR_CAP_REDUNDANT; 66 sd->sd_max_wu = SR_RAID1_NOWU; 67 68 /* Setup discipline specific function pointers. */ 69 sd->sd_assemble = sr_raid1_assemble; 70 sd->sd_create = sr_raid1_create; 71 sd->sd_scsi_rw = sr_raid1_rw; 72 sd->sd_scsi_intr = sr_raid1_intr; 73 sd->sd_set_chunk_state = sr_raid1_set_chunk_state; 74 sd->sd_set_vol_state = sr_raid1_set_vol_state; 75 } 76 77 int 78 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc, 79 int no_chunk, int64_t coerced_size) 80 { 81 82 if (no_chunk < 2) { 83 sr_error(sd->sd_sc, "RAID 1 requires two or more chunks"); 84 return EINVAL; 85 } 86 87 sd->sd_meta->ssdi.ssd_size = coerced_size; 88 89 return sr_raid1_init(sd); 90 } 91 92 int 93 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 94 int no_chunk, void *data) 95 { 96 return sr_raid1_init(sd); 97 } 98 99 int 100 sr_raid1_init(struct sr_discipline *sd) 101 { 102 sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no; 103 104 return 0; 105 } 106 107 void 108 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 109 { 110 int old_state, s; 111 112 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 113 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 114 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 115 116 /* ok to go to splbio since this only happens in error path */ 117 s = splbio(); 118 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 119 120 /* multiple IOs to the same chunk that fail will come through here */ 121 if (old_state == new_state) 122 goto done; 123 124 switch (old_state) { 125 case BIOC_SDONLINE: 126 switch (new_state) { 127 case BIOC_SDOFFLINE: 128 case BIOC_SDSCRUB: 129 break; 130 default: 131 goto die; 132 } 133 break; 134 135 case BIOC_SDOFFLINE: 136 switch (new_state) { 137 case BIOC_SDREBUILD: 138 case BIOC_SDHOTSPARE: 139 break; 140 default: 141 goto die; 142 } 143 break; 144 145 case BIOC_SDSCRUB: 146 if (new_state == BIOC_SDONLINE) { 147 ; 148 } else 149 goto die; 150 break; 151 152 case BIOC_SDREBUILD: 153 switch (new_state) { 154 case BIOC_SDONLINE: 155 break; 156 case BIOC_SDOFFLINE: 157 /* Abort rebuild since the rebuild chunk disappeared. */ 158 sd->sd_reb_abort = 1; 159 break; 160 default: 161 goto die; 162 } 163 break; 164 165 case BIOC_SDHOTSPARE: 166 switch (new_state) { 167 case BIOC_SDOFFLINE: 168 case BIOC_SDREBUILD: 169 break; 170 default: 171 goto die; 172 } 173 break; 174 175 default: 176 die: 177 splx(s); /* XXX */ 178 panic("%s: %s: %s: invalid chunk state transition " 179 "%d -> %d\n", DEVNAME(sd->sd_sc), 180 sd->sd_meta->ssd_devname, 181 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 182 old_state, new_state); 183 /* NOTREACHED */ 184 } 185 186 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 187 sd->sd_set_vol_state(sd); 188 189 sd->sd_must_flush = 1; 190 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 191 done: 192 splx(s); 193 } 194 195 void 196 sr_raid1_set_vol_state(struct sr_discipline *sd) 197 { 198 int states[SR_MAX_STATES]; 199 int new_state, i, s, nd; 200 int old_state = sd->sd_vol_status; 201 202 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 203 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 204 205 nd = sd->sd_meta->ssdi.ssd_chunk_no; 206 207 #ifdef SR_DEBUG 208 for (i = 0; i < nd; i++) 209 DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n", 210 DEVNAME(sd->sd_sc), i, 211 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 212 #endif 213 214 for (i = 0; i < SR_MAX_STATES; i++) 215 states[i] = 0; 216 217 for (i = 0; i < nd; i++) { 218 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 219 if (s >= SR_MAX_STATES) 220 panic("%s: %s: %s: invalid chunk state", 221 DEVNAME(sd->sd_sc), 222 sd->sd_meta->ssd_devname, 223 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 224 states[s]++; 225 } 226 227 if (states[BIOC_SDONLINE] == nd) 228 new_state = BIOC_SVONLINE; 229 else if (states[BIOC_SDONLINE] == 0) 230 new_state = BIOC_SVOFFLINE; 231 else if (states[BIOC_SDSCRUB] != 0) 232 new_state = BIOC_SVSCRUB; 233 else if (states[BIOC_SDREBUILD] != 0) 234 new_state = BIOC_SVREBUILD; 235 else if (states[BIOC_SDOFFLINE] != 0) 236 new_state = BIOC_SVDEGRADED; 237 else { 238 DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state " 239 "was %d\n", DEVNAME(sd->sd_sc), old_state); 240 panic("invalid volume state"); 241 } 242 243 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n", 244 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 245 old_state, new_state); 246 247 switch (old_state) { 248 case BIOC_SVONLINE: 249 switch (new_state) { 250 case BIOC_SVONLINE: /* can go to same state */ 251 case BIOC_SVOFFLINE: 252 case BIOC_SVDEGRADED: 253 case BIOC_SVREBUILD: /* happens on boot */ 254 break; 255 default: 256 goto die; 257 } 258 break; 259 260 case BIOC_SVOFFLINE: 261 /* XXX this might be a little too much */ 262 goto die; 263 264 case BIOC_SVSCRUB: 265 switch (new_state) { 266 case BIOC_SVONLINE: 267 case BIOC_SVOFFLINE: 268 case BIOC_SVDEGRADED: 269 case BIOC_SVSCRUB: /* can go to same state */ 270 break; 271 default: 272 goto die; 273 } 274 break; 275 276 case BIOC_SVBUILDING: 277 switch (new_state) { 278 case BIOC_SVONLINE: 279 case BIOC_SVOFFLINE: 280 case BIOC_SVBUILDING: /* can go to the same state */ 281 break; 282 default: 283 goto die; 284 } 285 break; 286 287 case BIOC_SVREBUILD: 288 switch (new_state) { 289 case BIOC_SVONLINE: 290 case BIOC_SVOFFLINE: 291 case BIOC_SVDEGRADED: 292 case BIOC_SVREBUILD: /* can go to the same state */ 293 break; 294 default: 295 goto die; 296 } 297 break; 298 299 case BIOC_SVDEGRADED: 300 switch (new_state) { 301 case BIOC_SVOFFLINE: 302 case BIOC_SVREBUILD: 303 case BIOC_SVDEGRADED: /* can go to the same state */ 304 break; 305 default: 306 goto die; 307 } 308 break; 309 310 default: 311 die: 312 panic("%s: %s: invalid volume state transition " 313 "%d -> %d\n", DEVNAME(sd->sd_sc), 314 sd->sd_meta->ssd_devname, 315 old_state, new_state); 316 /* NOTREACHED */ 317 } 318 319 sd->sd_vol_status = new_state; 320 321 /* If we have just become degraded, look for a hotspare. */ 322 if (new_state == BIOC_SVDEGRADED) 323 workq_add_task(NULL, 0, sr_hotspare_rebuild_callback, sd, NULL); 324 } 325 326 int 327 sr_raid1_rw(struct sr_workunit *wu) 328 { 329 struct sr_discipline *sd = wu->swu_dis; 330 struct scsi_xfer *xs = wu->swu_xs; 331 struct sr_ccb *ccb; 332 struct sr_chunk *scp; 333 int ios, chunk, i, s, rt; 334 daddr64_t blk; 335 336 /* blk and scsi error will be handled by sr_validate_io */ 337 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 338 goto bad; 339 340 /* calculate physical block */ 341 blk += sd->sd_meta->ssd_data_offset; 342 343 if (xs->flags & SCSI_DATA_IN) 344 ios = 1; 345 else 346 ios = sd->sd_meta->ssdi.ssd_chunk_no; 347 348 for (i = 0; i < ios; i++) { 349 if (xs->flags & SCSI_DATA_IN) { 350 rt = 0; 351 ragain: 352 /* interleave reads */ 353 chunk = sd->mds.mdd_raid1.sr1_counter++ % 354 sd->sd_meta->ssdi.ssd_chunk_no; 355 scp = sd->sd_vol.sv_chunks[chunk]; 356 switch (scp->src_meta.scm_status) { 357 case BIOC_SDONLINE: 358 case BIOC_SDSCRUB: 359 break; 360 361 case BIOC_SDOFFLINE: 362 case BIOC_SDREBUILD: 363 case BIOC_SDHOTSPARE: 364 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 365 goto ragain; 366 367 /* FALLTHROUGH */ 368 default: 369 /* volume offline */ 370 printf("%s: is offline, cannot read\n", 371 DEVNAME(sd->sd_sc)); 372 goto bad; 373 } 374 } else { 375 /* writes go on all working disks */ 376 chunk = i; 377 scp = sd->sd_vol.sv_chunks[chunk]; 378 switch (scp->src_meta.scm_status) { 379 case BIOC_SDONLINE: 380 case BIOC_SDSCRUB: 381 case BIOC_SDREBUILD: 382 break; 383 384 case BIOC_SDHOTSPARE: /* should never happen */ 385 case BIOC_SDOFFLINE: 386 continue; 387 388 default: 389 goto bad; 390 } 391 } 392 393 ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data, 394 xs->flags, 0); 395 if (!ccb) { 396 /* should never happen but handle more gracefully */ 397 printf("%s: %s: too many ccbs queued\n", 398 DEVNAME(sd->sd_sc), 399 sd->sd_meta->ssd_devname); 400 goto bad; 401 } 402 sr_wu_enqueue_ccb(wu, ccb); 403 } 404 405 s = splbio(); 406 407 /* rebuild io, let rebuild routine deal with it */ 408 if (wu->swu_flags & SR_WUF_REBUILD) 409 goto queued; 410 411 /* current io failed, restart */ 412 if (wu->swu_state == SR_WU_RESTART) 413 goto start; 414 415 /* deferred io failed, don't restart */ 416 if (wu->swu_state == SR_WU_REQUEUE) 417 goto queued; 418 419 if (sr_check_io_collision(wu)) 420 goto queued; 421 422 start: 423 sr_raid_startwu(wu); 424 queued: 425 splx(s); 426 return (0); 427 bad: 428 /* wu is unwound by sr_wu_put */ 429 return (1); 430 } 431 432 void 433 sr_raid1_intr(struct buf *bp) 434 { 435 struct sr_ccb *ccb = (struct sr_ccb *)bp; 436 struct sr_workunit *wu = ccb->ccb_wu, *wup; 437 struct sr_discipline *sd = wu->swu_dis; 438 struct scsi_xfer *xs = wu->swu_xs; 439 struct sr_softc *sc = sd->sd_sc; 440 int s; 441 442 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 443 DEVNAME(sc), bp, xs); 444 445 s = splbio(); 446 447 sr_ccb_done(ccb); 448 449 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 450 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 451 wu->swu_ios_failed); 452 453 if (wu->swu_ios_complete < wu->swu_io_count) 454 goto done; 455 456 xs->error = XS_NOERROR; 457 458 /* if all ios failed, retry reads and give up on writes */ 459 if (wu->swu_ios_failed == wu->swu_ios_complete) { 460 if (xs->flags & SCSI_DATA_IN) { 461 printf("%s: retrying read on block %lld\n", 462 DEVNAME(sc), ccb->ccb_buf.b_blkno); 463 if (wu->swu_cb_active == 1) 464 panic("%s: sr_raid1_intr_cb", 465 DEVNAME(sd->sd_sc)); 466 sr_wu_release_ccbs(wu); 467 wu->swu_state = SR_WU_RESTART; 468 if (sd->sd_scsi_rw(wu) == 0) 469 goto done; 470 xs->error = XS_DRIVER_STUFFUP; 471 } else { 472 printf("%s: permanently failing write on block %lld\n", 473 DEVNAME(sc), ccb->ccb_buf.b_blkno); 474 xs->error = XS_DRIVER_STUFFUP; 475 } 476 } 477 478 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) 479 if (wu == wup) 480 break; 481 482 if (wup == NULL) 483 panic("%s: wu %p not on pending queue", 484 DEVNAME(sd->sd_sc), wu); 485 486 /* wu on pendq, remove */ 487 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 488 489 if (wu->swu_collider) { 490 if (wu->swu_ios_failed) 491 sr_raid_recreate_wu(wu->swu_collider); 492 493 /* XXX Should the collider be failed if this xs failed? */ 494 /* restart deferred wu */ 495 wu->swu_collider->swu_state = SR_WU_INPROGRESS; 496 TAILQ_REMOVE(&sd->sd_wu_defq, wu->swu_collider, swu_link); 497 sr_raid_startwu(wu->swu_collider); 498 } 499 500 if (wu->swu_flags & SR_WUF_REBUILD) 501 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 502 if (wu->swu_flags & SR_WUF_WAKEUP) 503 wakeup(wu); 504 if (!(wu->swu_flags & SR_WUF_REBUILD)) 505 sr_scsi_done(sd, xs); 506 507 done: 508 splx(s); 509 } 510