1 /* $OpenBSD: softraid_raid1.c,v 1.63 2015/07/21 03:30:51 krw Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/malloc.h> 26 #include <sys/kernel.h> 27 #include <sys/disk.h> 28 #include <sys/rwlock.h> 29 #include <sys/queue.h> 30 #include <sys/fcntl.h> 31 #include <sys/mount.h> 32 #include <sys/sensors.h> 33 #include <sys/stat.h> 34 #include <sys/task.h> 35 #include <sys/conf.h> 36 #include <sys/uio.h> 37 38 #include <scsi/scsi_all.h> 39 #include <scsi/scsiconf.h> 40 #include <scsi/scsi_disk.h> 41 42 #include <dev/softraidvar.h> 43 44 /* RAID 1 functions. */ 45 int sr_raid1_create(struct sr_discipline *, struct bioc_createraid *, 46 int, int64_t); 47 int sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *, 48 int, void *); 49 int sr_raid1_init(struct sr_discipline *sd); 50 int sr_raid1_rw(struct sr_workunit *); 51 int sr_raid1_wu_done(struct sr_workunit *); 52 void sr_raid1_set_chunk_state(struct sr_discipline *, int, int); 53 void sr_raid1_set_vol_state(struct sr_discipline *); 54 55 /* Discipline initialisation. */ 56 void 57 sr_raid1_discipline_init(struct sr_discipline *sd) 58 { 59 /* Fill out discipline members. */ 60 sd->sd_type = SR_MD_RAID1; 61 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 62 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 63 SR_CAP_REBUILD | SR_CAP_REDUNDANT; 64 sd->sd_max_wu = SR_RAID1_NOWU; 65 66 /* Setup discipline specific function pointers. */ 67 sd->sd_assemble = sr_raid1_assemble; 68 sd->sd_create = sr_raid1_create; 69 sd->sd_scsi_rw = sr_raid1_rw; 70 sd->sd_scsi_wu_done = sr_raid1_wu_done; 71 sd->sd_set_chunk_state = sr_raid1_set_chunk_state; 72 sd->sd_set_vol_state = sr_raid1_set_vol_state; 73 } 74 75 int 76 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc, 77 int no_chunk, int64_t coerced_size) 78 { 79 if (no_chunk < 2) { 80 sr_error(sd->sd_sc, "%s requires two or more chunks", 81 sd->sd_name); 82 return EINVAL; 83 } 84 85 sd->sd_meta->ssdi.ssd_size = coerced_size; 86 87 return sr_raid1_init(sd); 88 } 89 90 int 91 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 92 int no_chunk, void *data) 93 { 94 return sr_raid1_init(sd); 95 } 96 97 int 98 sr_raid1_init(struct sr_discipline *sd) 99 { 100 sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no; 101 102 return 0; 103 } 104 105 void 106 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 107 { 108 int old_state, s; 109 110 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 111 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 112 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 113 114 /* ok to go to splbio since this only happens in error path */ 115 s = splbio(); 116 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 117 118 /* multiple IOs to the same chunk that fail will come through here */ 119 if (old_state == new_state) 120 goto done; 121 122 switch (old_state) { 123 case BIOC_SDONLINE: 124 switch (new_state) { 125 case BIOC_SDOFFLINE: 126 case BIOC_SDSCRUB: 127 break; 128 default: 129 goto die; 130 } 131 break; 132 133 case BIOC_SDOFFLINE: 134 switch (new_state) { 135 case BIOC_SDREBUILD: 136 case BIOC_SDHOTSPARE: 137 break; 138 default: 139 goto die; 140 } 141 break; 142 143 case BIOC_SDSCRUB: 144 if (new_state == BIOC_SDONLINE) { 145 ; 146 } else 147 goto die; 148 break; 149 150 case BIOC_SDREBUILD: 151 switch (new_state) { 152 case BIOC_SDONLINE: 153 break; 154 case BIOC_SDOFFLINE: 155 /* Abort rebuild since the rebuild chunk disappeared. */ 156 sd->sd_reb_abort = 1; 157 break; 158 default: 159 goto die; 160 } 161 break; 162 163 case BIOC_SDHOTSPARE: 164 switch (new_state) { 165 case BIOC_SDOFFLINE: 166 case BIOC_SDREBUILD: 167 break; 168 default: 169 goto die; 170 } 171 break; 172 173 default: 174 die: 175 splx(s); /* XXX */ 176 panic("%s: %s: %s: invalid chunk state transition " 177 "%d -> %d\n", DEVNAME(sd->sd_sc), 178 sd->sd_meta->ssd_devname, 179 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 180 old_state, new_state); 181 /* NOTREACHED */ 182 } 183 184 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 185 sd->sd_set_vol_state(sd); 186 187 sd->sd_must_flush = 1; 188 task_add(systq, &sd->sd_meta_save_task); 189 done: 190 splx(s); 191 } 192 193 void 194 sr_raid1_set_vol_state(struct sr_discipline *sd) 195 { 196 int states[SR_MAX_STATES]; 197 int new_state, i, s, nd; 198 int old_state = sd->sd_vol_status; 199 200 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 201 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 202 203 nd = sd->sd_meta->ssdi.ssd_chunk_no; 204 205 #ifdef SR_DEBUG 206 for (i = 0; i < nd; i++) 207 DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n", 208 DEVNAME(sd->sd_sc), i, 209 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 210 #endif 211 212 for (i = 0; i < SR_MAX_STATES; i++) 213 states[i] = 0; 214 215 for (i = 0; i < nd; i++) { 216 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 217 if (s >= SR_MAX_STATES) 218 panic("%s: %s: %s: invalid chunk state", 219 DEVNAME(sd->sd_sc), 220 sd->sd_meta->ssd_devname, 221 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 222 states[s]++; 223 } 224 225 if (states[BIOC_SDONLINE] == nd) 226 new_state = BIOC_SVONLINE; 227 else if (states[BIOC_SDONLINE] == 0) 228 new_state = BIOC_SVOFFLINE; 229 else if (states[BIOC_SDSCRUB] != 0) 230 new_state = BIOC_SVSCRUB; 231 else if (states[BIOC_SDREBUILD] != 0) 232 new_state = BIOC_SVREBUILD; 233 else if (states[BIOC_SDOFFLINE] != 0) 234 new_state = BIOC_SVDEGRADED; 235 else { 236 DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state " 237 "was %d\n", DEVNAME(sd->sd_sc), old_state); 238 panic("invalid volume state"); 239 } 240 241 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n", 242 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 243 old_state, new_state); 244 245 switch (old_state) { 246 case BIOC_SVONLINE: 247 switch (new_state) { 248 case BIOC_SVONLINE: /* can go to same state */ 249 case BIOC_SVOFFLINE: 250 case BIOC_SVDEGRADED: 251 case BIOC_SVREBUILD: /* happens on boot */ 252 break; 253 default: 254 goto die; 255 } 256 break; 257 258 case BIOC_SVOFFLINE: 259 /* XXX this might be a little too much */ 260 goto die; 261 262 case BIOC_SVDEGRADED: 263 switch (new_state) { 264 case BIOC_SVOFFLINE: 265 case BIOC_SVREBUILD: 266 case BIOC_SVDEGRADED: /* can go to the same state */ 267 break; 268 default: 269 goto die; 270 } 271 break; 272 273 case BIOC_SVBUILDING: 274 switch (new_state) { 275 case BIOC_SVONLINE: 276 case BIOC_SVOFFLINE: 277 case BIOC_SVBUILDING: /* can go to the same state */ 278 break; 279 default: 280 goto die; 281 } 282 break; 283 284 case BIOC_SVSCRUB: 285 switch (new_state) { 286 case BIOC_SVONLINE: 287 case BIOC_SVOFFLINE: 288 case BIOC_SVDEGRADED: 289 case BIOC_SVSCRUB: /* can go to same state */ 290 break; 291 default: 292 goto die; 293 } 294 break; 295 296 case BIOC_SVREBUILD: 297 switch (new_state) { 298 case BIOC_SVONLINE: 299 case BIOC_SVOFFLINE: 300 case BIOC_SVDEGRADED: 301 case BIOC_SVREBUILD: /* can go to the same state */ 302 break; 303 default: 304 goto die; 305 } 306 break; 307 308 default: 309 die: 310 panic("%s: %s: invalid volume state transition " 311 "%d -> %d\n", DEVNAME(sd->sd_sc), 312 sd->sd_meta->ssd_devname, 313 old_state, new_state); 314 /* NOTREACHED */ 315 } 316 317 sd->sd_vol_status = new_state; 318 319 /* If we have just become degraded, look for a hotspare. */ 320 if (new_state == BIOC_SVDEGRADED) 321 task_add(systq, &sd->sd_hotspare_rebuild_task); 322 } 323 324 int 325 sr_raid1_rw(struct sr_workunit *wu) 326 { 327 struct sr_discipline *sd = wu->swu_dis; 328 struct scsi_xfer *xs = wu->swu_xs; 329 struct sr_ccb *ccb; 330 struct sr_chunk *scp; 331 int ios, chunk, i, rt; 332 daddr_t blkno; 333 334 /* blkno and scsi error will be handled by sr_validate_io */ 335 if (sr_validate_io(wu, &blkno, "sr_raid1_rw")) 336 goto bad; 337 338 if (xs->flags & SCSI_DATA_IN) 339 ios = 1; 340 else 341 ios = sd->sd_meta->ssdi.ssd_chunk_no; 342 343 for (i = 0; i < ios; i++) { 344 if (xs->flags & SCSI_DATA_IN) { 345 rt = 0; 346 ragain: 347 /* interleave reads */ 348 chunk = sd->mds.mdd_raid1.sr1_counter++ % 349 sd->sd_meta->ssdi.ssd_chunk_no; 350 scp = sd->sd_vol.sv_chunks[chunk]; 351 switch (scp->src_meta.scm_status) { 352 case BIOC_SDONLINE: 353 case BIOC_SDSCRUB: 354 break; 355 356 case BIOC_SDOFFLINE: 357 case BIOC_SDREBUILD: 358 case BIOC_SDHOTSPARE: 359 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 360 goto ragain; 361 362 /* FALLTHROUGH */ 363 default: 364 /* volume offline */ 365 printf("%s: is offline, cannot read\n", 366 DEVNAME(sd->sd_sc)); 367 goto bad; 368 } 369 } else { 370 /* writes go on all working disks */ 371 chunk = i; 372 scp = sd->sd_vol.sv_chunks[chunk]; 373 switch (scp->src_meta.scm_status) { 374 case BIOC_SDONLINE: 375 case BIOC_SDSCRUB: 376 case BIOC_SDREBUILD: 377 break; 378 379 case BIOC_SDHOTSPARE: /* should never happen */ 380 case BIOC_SDOFFLINE: 381 continue; 382 383 default: 384 goto bad; 385 } 386 } 387 388 ccb = sr_ccb_rw(sd, chunk, blkno, xs->datalen, xs->data, 389 xs->flags, 0); 390 if (!ccb) { 391 /* should never happen but handle more gracefully */ 392 printf("%s: %s: too many ccbs queued\n", 393 DEVNAME(sd->sd_sc), 394 sd->sd_meta->ssd_devname); 395 goto bad; 396 } 397 sr_wu_enqueue_ccb(wu, ccb); 398 } 399 400 sr_schedule_wu(wu); 401 402 return (0); 403 404 bad: 405 /* wu is unwound by sr_wu_put */ 406 return (1); 407 } 408 409 int 410 sr_raid1_wu_done(struct sr_workunit *wu) 411 { 412 struct sr_discipline *sd = wu->swu_dis; 413 struct scsi_xfer *xs = wu->swu_xs; 414 415 /* If at least one I/O succeeded, we are okay. */ 416 if (wu->swu_ios_succeeded > 0) { 417 xs->error = XS_NOERROR; 418 return SR_WU_OK; 419 } 420 421 /* If all I/O failed, retry reads and give up on writes. */ 422 if (xs->flags & SCSI_DATA_IN) { 423 printf("%s: retrying read on block %lld\n", 424 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 425 if (wu->swu_cb_active == 1) 426 panic("%s: sr_raid1_intr_cb", 427 DEVNAME(sd->sd_sc)); 428 sr_wu_release_ccbs(wu); 429 wu->swu_state = SR_WU_RESTART; 430 if (sd->sd_scsi_rw(wu) == 0) 431 return SR_WU_RESTART; 432 } else { 433 printf("%s: permanently failing write on block %lld\n", 434 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 435 } 436 437 wu->swu_state = SR_WU_FAILED; 438 xs->error = XS_DRIVER_STUFFUP; 439 440 return SR_WU_FAILED; 441 } 442