1 /* $OpenBSD: softraid_raid6.c,v 1.63 2014/09/14 14:17:24 jsg Exp $ */ 2 /* 3 * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bio.h" 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/buf.h> 24 #include <sys/device.h> 25 #include <sys/ioctl.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/task.h> 37 #include <sys/conf.h> 38 #include <sys/uio.h> 39 40 #include <scsi/scsi_all.h> 41 #include <scsi/scsiconf.h> 42 #include <scsi/scsi_disk.h> 43 44 #include <dev/softraidvar.h> 45 #include <dev/rndvar.h> 46 47 uint8_t *gf_map[256]; 48 uint8_t gf_pow[768]; 49 int gf_log[256]; 50 51 /* RAID 6 functions. */ 52 int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *, 53 int, int64_t); 54 int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *, 55 int, void *); 56 int sr_raid6_init(struct sr_discipline *); 57 int sr_raid6_rw(struct sr_workunit *); 58 int sr_raid6_openings(struct sr_discipline *); 59 void sr_raid6_intr(struct buf *); 60 int sr_raid6_wu_done(struct sr_workunit *); 61 void sr_raid6_set_chunk_state(struct sr_discipline *, int, int); 62 void sr_raid6_set_vol_state(struct sr_discipline *); 63 64 void sr_raid6_xorp(void *, void *, int); 65 void sr_raid6_xorq(void *, void *, int, int); 66 int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, daddr_t, 67 void *, int, int, void *, void *, int); 68 void sr_raid6_scrub(struct sr_discipline *); 69 int sr_failio(struct sr_workunit *); 70 71 void gf_init(void); 72 uint8_t gf_inv(uint8_t); 73 int gf_premul(uint8_t); 74 uint8_t gf_mul(uint8_t, uint8_t); 75 76 #define SR_NOFAIL 0x00 77 #define SR_FAILX (1L << 0) 78 #define SR_FAILY (1L << 1) 79 #define SR_FAILP (1L << 2) 80 #define SR_FAILQ (1L << 3) 81 82 struct sr_raid6_opaque { 83 int gn; 84 void *pbuf; 85 void *qbuf; 86 }; 87 88 /* discipline initialisation. */ 89 void 90 sr_raid6_discipline_init(struct sr_discipline *sd) 91 { 92 /* Initialize GF256 tables. */ 93 gf_init(); 94 95 /* Fill out discipline members. */ 96 sd->sd_type = SR_MD_RAID6; 97 strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name)); 98 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 99 SR_CAP_REDUNDANT; 100 sd->sd_max_wu = SR_RAID6_NOWU; 101 102 /* Setup discipline specific function pointers. */ 103 sd->sd_assemble = sr_raid6_assemble; 104 sd->sd_create = sr_raid6_create; 105 sd->sd_openings = sr_raid6_openings; 106 sd->sd_scsi_rw = sr_raid6_rw; 107 sd->sd_scsi_intr = sr_raid6_intr; 108 sd->sd_scsi_wu_done = sr_raid6_wu_done; 109 sd->sd_set_chunk_state = sr_raid6_set_chunk_state; 110 sd->sd_set_vol_state = sr_raid6_set_vol_state; 111 } 112 113 int 114 sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc, 115 int no_chunk, int64_t coerced_size) 116 { 117 if (no_chunk < 4) { 118 sr_error(sd->sd_sc, "%s requires four or more chunks", 119 sd->sd_name); 120 return EINVAL; 121 } 122 123 /* 124 * XXX add variable strip size later even though MAXPHYS is really 125 * the clever value, users like * to tinker with that type of stuff. 126 */ 127 sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS; 128 sd->sd_meta->ssdi.ssd_size = (coerced_size & 129 ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >> 130 DEV_BSHIFT) - 1)) * (no_chunk - 2); 131 132 return sr_raid6_init(sd); 133 } 134 135 int 136 sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 137 int no_chunk, void *data) 138 { 139 return sr_raid6_init(sd); 140 } 141 142 int 143 sr_raid6_init(struct sr_discipline *sd) 144 { 145 /* Initialise runtime values. */ 146 sd->mds.mdd_raid6.sr6_strip_bits = 147 sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size); 148 if (sd->mds.mdd_raid6.sr6_strip_bits == -1) { 149 sr_error(sd->sd_sc, "invalid strip size"); 150 return EINVAL; 151 } 152 153 /* only if stripsize <= MAXPHYS */ 154 sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no); 155 156 return 0; 157 } 158 159 int 160 sr_raid6_openings(struct sr_discipline *sd) 161 { 162 return (sd->sd_max_wu >> 1); /* 2 wu's per IO */ 163 } 164 165 void 166 sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 167 { 168 int old_state, s; 169 170 /* XXX this is for RAID 0 */ 171 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 172 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 173 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 174 175 /* ok to go to splbio since this only happens in error path */ 176 s = splbio(); 177 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 178 179 /* multiple IOs to the same chunk that fail will come through here */ 180 if (old_state == new_state) 181 goto done; 182 183 switch (old_state) { 184 case BIOC_SDONLINE: 185 switch (new_state) { 186 case BIOC_SDOFFLINE: 187 case BIOC_SDSCRUB: 188 break; 189 default: 190 goto die; 191 } 192 break; 193 194 case BIOC_SDOFFLINE: 195 if (new_state == BIOC_SDREBUILD) { 196 ; 197 } else 198 goto die; 199 break; 200 201 case BIOC_SDSCRUB: 202 switch (new_state) { 203 case BIOC_SDONLINE: 204 case BIOC_SDOFFLINE: 205 break; 206 default: 207 goto die; 208 } 209 break; 210 211 case BIOC_SDREBUILD: 212 switch (new_state) { 213 case BIOC_SDONLINE: 214 case BIOC_SDOFFLINE: 215 break; 216 default: 217 goto die; 218 } 219 break; 220 221 default: 222 die: 223 splx(s); /* XXX */ 224 panic("%s: %s: %s: invalid chunk state transition " 225 "%d -> %d", DEVNAME(sd->sd_sc), 226 sd->sd_meta->ssd_devname, 227 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 228 old_state, new_state); 229 /* NOTREACHED */ 230 } 231 232 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 233 sd->sd_set_vol_state(sd); 234 235 sd->sd_must_flush = 1; 236 task_add(systq, &sd->sd_meta_save_task); 237 done: 238 splx(s); 239 } 240 241 void 242 sr_raid6_set_vol_state(struct sr_discipline *sd) 243 { 244 int states[SR_MAX_STATES]; 245 int new_state, i, s, nd; 246 int old_state = sd->sd_vol_status; 247 248 /* XXX this is for RAID 0 */ 249 250 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 251 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 252 253 nd = sd->sd_meta->ssdi.ssd_chunk_no; 254 255 for (i = 0; i < SR_MAX_STATES; i++) 256 states[i] = 0; 257 258 for (i = 0; i < nd; i++) { 259 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 260 if (s >= SR_MAX_STATES) 261 panic("%s: %s: %s: invalid chunk state", 262 DEVNAME(sd->sd_sc), 263 sd->sd_meta->ssd_devname, 264 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 265 states[s]++; 266 } 267 268 if (states[BIOC_SDONLINE] == nd) 269 new_state = BIOC_SVONLINE; 270 else if (states[BIOC_SDONLINE] < nd - 2) 271 new_state = BIOC_SVOFFLINE; 272 else if (states[BIOC_SDSCRUB] != 0) 273 new_state = BIOC_SVSCRUB; 274 else if (states[BIOC_SDREBUILD] != 0) 275 new_state = BIOC_SVREBUILD; 276 else if (states[BIOC_SDONLINE] < nd) 277 new_state = BIOC_SVDEGRADED; 278 else { 279 printf("old_state = %d, ", old_state); 280 for (i = 0; i < nd; i++) 281 printf("%d = %d, ", i, 282 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 283 panic("invalid new_state"); 284 } 285 286 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 287 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 288 old_state, new_state); 289 290 switch (old_state) { 291 case BIOC_SVONLINE: 292 switch (new_state) { 293 case BIOC_SVONLINE: /* can go to same state */ 294 case BIOC_SVOFFLINE: 295 case BIOC_SVDEGRADED: 296 case BIOC_SVREBUILD: /* happens on boot */ 297 break; 298 default: 299 goto die; 300 } 301 break; 302 303 case BIOC_SVOFFLINE: 304 /* XXX this might be a little too much */ 305 goto die; 306 307 case BIOC_SVDEGRADED: 308 switch (new_state) { 309 case BIOC_SVOFFLINE: 310 case BIOC_SVREBUILD: 311 case BIOC_SVDEGRADED: /* can go to the same state */ 312 break; 313 default: 314 goto die; 315 } 316 break; 317 318 case BIOC_SVBUILDING: 319 switch (new_state) { 320 case BIOC_SVONLINE: 321 case BIOC_SVOFFLINE: 322 case BIOC_SVBUILDING: /* can go to the same state */ 323 break; 324 default: 325 goto die; 326 } 327 break; 328 329 case BIOC_SVSCRUB: 330 switch (new_state) { 331 case BIOC_SVONLINE: 332 case BIOC_SVOFFLINE: 333 case BIOC_SVDEGRADED: 334 case BIOC_SVSCRUB: /* can go to same state */ 335 break; 336 default: 337 goto die; 338 } 339 break; 340 341 case BIOC_SVREBUILD: 342 switch (new_state) { 343 case BIOC_SVONLINE: 344 case BIOC_SVOFFLINE: 345 case BIOC_SVDEGRADED: 346 case BIOC_SVREBUILD: /* can go to the same state */ 347 break; 348 default: 349 goto die; 350 } 351 break; 352 353 default: 354 die: 355 panic("%s: %s: invalid volume state transition %d -> %d", 356 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 357 old_state, new_state); 358 /* NOTREACHED */ 359 } 360 361 sd->sd_vol_status = new_state; 362 } 363 364 /* modes: 365 * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 366 * 0, qbuf, NULL, 0); 367 * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 368 * 0, pbuf, NULL, 0); 369 * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 370 * 0, pbuf, qbuf, gf_pow[i]); 371 */ 372 373 int 374 sr_raid6_rw(struct sr_workunit *wu) 375 { 376 struct sr_workunit *wu_r = NULL; 377 struct sr_discipline *sd = wu->swu_dis; 378 struct scsi_xfer *xs = wu->swu_xs; 379 struct sr_chunk *scp; 380 int s, fail, i, gxinv, pxinv; 381 daddr_t blk, lba; 382 int64_t chunk_offs, lbaoffs, phys_offs, strip_offs; 383 int64_t strip_no, strip_size, strip_bits; 384 int64_t fchunk, no_chunk, chunk, qchunk, pchunk; 385 int64_t length, datalen, row_size; 386 void *pbuf, *data, *qbuf; 387 388 /* blk and scsi error will be handled by sr_validate_io */ 389 if (sr_validate_io(wu, &blk, "sr_raid6_rw")) 390 goto bad; 391 392 strip_size = sd->sd_meta->ssdi.ssd_strip_size; 393 strip_bits = sd->mds.mdd_raid6.sr6_strip_bits; 394 no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2; 395 row_size = (no_chunk << strip_bits) >> DEV_BSHIFT; 396 397 data = xs->data; 398 datalen = xs->datalen; 399 lbaoffs = blk << DEV_BSHIFT; 400 401 if (xs->flags & SCSI_DATA_OUT) { 402 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){ 403 printf("%s: can't get wu_r", DEVNAME(sd->sd_sc)); 404 goto bad; 405 } 406 wu_r->swu_state = SR_WU_INPROGRESS; 407 wu_r->swu_flags |= SR_WUF_DISCIPLINE; 408 } 409 410 wu->swu_blk_start = 0; 411 while (datalen != 0) { 412 strip_no = lbaoffs >> strip_bits; 413 strip_offs = lbaoffs & (strip_size - 1); 414 chunk_offs = (strip_no / no_chunk) << strip_bits; 415 phys_offs = chunk_offs + strip_offs + 416 (sd->sd_meta->ssd_data_offset << DEV_BSHIFT); 417 418 /* get size remaining in this stripe */ 419 length = MIN(strip_size - strip_offs, datalen); 420 421 /* map disk offset to parity/data drive */ 422 chunk = strip_no % no_chunk; 423 424 qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2)); 425 if (qchunk == 0) 426 pchunk = no_chunk + 1; 427 else 428 pchunk = qchunk - 1; 429 if (chunk >= pchunk) 430 chunk++; 431 if (chunk >= qchunk) 432 chunk++; 433 434 lba = phys_offs >> DEV_BSHIFT; 435 436 /* XXX big hammer.. exclude I/O from entire stripe */ 437 if (wu->swu_blk_start == 0) 438 wu->swu_blk_start = (strip_no / no_chunk) * row_size; 439 wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1); 440 441 fail = 0; 442 fchunk = -1; 443 444 /* Get disk-fail flags */ 445 for (i=0; i< no_chunk+2; i++) { 446 scp = sd->sd_vol.sv_chunks[i]; 447 switch (scp->src_meta.scm_status) { 448 case BIOC_SDOFFLINE: 449 case BIOC_SDREBUILD: 450 case BIOC_SDHOTSPARE: 451 if (i == qchunk) 452 fail |= SR_FAILQ; 453 else if (i == pchunk) 454 fail |= SR_FAILP; 455 else if (i == chunk) 456 fail |= SR_FAILX; 457 else { 458 /* dual data-disk failure */ 459 fail |= SR_FAILY; 460 fchunk = i; 461 } 462 break; 463 } 464 } 465 if (xs->flags & SCSI_DATA_IN) { 466 if (!(fail & SR_FAILX)) { 467 /* drive is good. issue single read request */ 468 if (sr_raid6_addio(wu, chunk, lba, length, 469 data, xs->flags, 0, NULL, NULL, 0)) 470 goto bad; 471 } else if (fail & SR_FAILP) { 472 /* Dx, P failed */ 473 printf("Disk %llx offline, " 474 "regenerating Dx+P\n", chunk); 475 476 gxinv = gf_inv(gf_pow[chunk]); 477 478 /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */ 479 memset(data, 0, length); 480 if (sr_raid6_addio(wu, qchunk, lba, length, 481 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 482 goto bad; 483 484 /* Read Dz * gz * inv(gx) */ 485 for (i = 0; i < no_chunk+2; i++) { 486 if (i == qchunk || i == pchunk || i == chunk) 487 continue; 488 489 if (sr_raid6_addio(wu, i, lba, length, 490 NULL, SCSI_DATA_IN, 0, NULL, data, 491 gf_mul(gf_pow[i], gxinv))) 492 goto bad; 493 } 494 495 /* data will contain correct value on completion */ 496 } else if (fail & SR_FAILY) { 497 /* Dx, Dy failed */ 498 printf("Disk %llx & %llx offline, " 499 "regenerating Dx+Dy\n", chunk, fchunk); 500 501 gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]); 502 pxinv = gf_mul(gf_pow[fchunk], gxinv); 503 504 /* read Q * inv(gx + gy) */ 505 memset(data, 0, length); 506 if (sr_raid6_addio(wu, qchunk, lba, length, 507 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 508 goto bad; 509 510 /* read P * gy * inv(gx + gy) */ 511 if (sr_raid6_addio(wu, pchunk, lba, length, 512 NULL, SCSI_DATA_IN, 0, NULL, data, pxinv)) 513 goto bad; 514 515 /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz 516 * Q: sr_raid6_xorp(qbuf, --, length); 517 * P: sr_raid6_xorp(pbuf, --, length); 518 * Dz: sr_raid6_xorp(pbuf, --, length); 519 * sr_raid6_xorq(qbuf, --, length, gf_pow[i]); 520 */ 521 for (i = 0; i < no_chunk+2; i++) { 522 if (i == qchunk || i == pchunk || 523 i == chunk || i == fchunk) 524 continue; 525 526 /* read Dz * (gz + gy) * inv(gx + gy) */ 527 if (sr_raid6_addio(wu, i, lba, length, 528 NULL, SCSI_DATA_IN, 0, NULL, data, 529 pxinv ^ gf_mul(gf_pow[i], gxinv))) 530 goto bad; 531 } 532 } else { 533 /* Two cases: single disk (Dx) or (Dx+Q) 534 * Dx = Dz ^ P (same as RAID5) 535 */ 536 printf("Disk %llx offline, " 537 "regenerating Dx%s\n", chunk, 538 fail & SR_FAILQ ? "+Q" : " single"); 539 540 /* Calculate: Dx = P^Dz 541 * P: sr_raid6_xorp(data, ---, length); 542 * Dz: sr_raid6_xorp(data, ---, length); 543 */ 544 memset(data, 0, length); 545 for (i = 0; i < no_chunk+2; i++) { 546 if (i != chunk && i != qchunk) { 547 /* Read Dz */ 548 if (sr_raid6_addio(wu, i, lba, 549 length, NULL, SCSI_DATA_IN, 550 0, data, NULL, 0)) 551 goto bad; 552 } 553 } 554 555 /* data will contain correct value on completion */ 556 } 557 } else { 558 /* XXX handle writes to failed/offline disk? */ 559 if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP)) 560 goto bad; 561 562 /* 563 * initialize pbuf with contents of new data to be 564 * written. This will be XORed with old data and old 565 * parity in the intr routine. The result in pbuf 566 * is the new parity data. 567 */ 568 qbuf = sr_block_get(sd, length); 569 if (qbuf == NULL) 570 goto bad; 571 572 pbuf = sr_block_get(sd, length); 573 if (pbuf == NULL) 574 goto bad; 575 576 /* Calculate P = Dn; Q = gn * Dn */ 577 if (gf_premul(gf_pow[chunk])) 578 goto bad; 579 sr_raid6_xorp(pbuf, data, length); 580 sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]); 581 582 /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */ 583 if (sr_raid6_addio(wu_r, chunk, lba, length, NULL, 584 SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk])) 585 goto bad; 586 587 /* Read old xor-parity: P ^= P' */ 588 if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL, 589 SCSI_DATA_IN, 0, pbuf, NULL, 0)) 590 goto bad; 591 592 /* Read old q-parity: Q ^= Q' */ 593 if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL, 594 SCSI_DATA_IN, 0, qbuf, NULL, 0)) 595 goto bad; 596 597 /* write new data */ 598 if (sr_raid6_addio(wu, chunk, lba, length, data, 599 xs->flags, 0, NULL, NULL, 0)) 600 goto bad; 601 602 /* write new xor-parity */ 603 if (sr_raid6_addio(wu, pchunk, lba, length, pbuf, 604 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 605 goto bad; 606 607 /* write new q-parity */ 608 if (sr_raid6_addio(wu, qchunk, lba, length, qbuf, 609 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 610 goto bad; 611 } 612 613 /* advance to next block */ 614 lbaoffs += length; 615 datalen -= length; 616 data += length; 617 } 618 619 s = splbio(); 620 if (wu_r) { 621 /* collide write request with reads */ 622 wu_r->swu_blk_start = wu->swu_blk_start; 623 wu_r->swu_blk_end = wu->swu_blk_end; 624 625 wu->swu_state = SR_WU_DEFERRED; 626 wu_r->swu_collider = wu; 627 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 628 629 wu = wu_r; 630 } 631 splx(s); 632 633 sr_schedule_wu(wu); 634 635 return (0); 636 bad: 637 /* XXX - can leak pbuf/qbuf on error. */ 638 /* wu is unwound by sr_wu_put */ 639 if (wu_r) 640 sr_scsi_wu_put(sd, wu_r); 641 return (1); 642 } 643 644 /* Handle failure I/O completion */ 645 int 646 sr_failio(struct sr_workunit *wu) 647 { 648 struct sr_discipline *sd = wu->swu_dis; 649 struct sr_ccb *ccb; 650 651 if (!(wu->swu_flags & SR_WUF_FAIL)) 652 return (0); 653 654 /* Wu is a 'fake'.. don't do real I/O just intr */ 655 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 656 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) 657 sr_raid6_intr(&ccb->ccb_buf); 658 return (1); 659 } 660 661 void 662 sr_raid6_intr(struct buf *bp) 663 { 664 struct sr_ccb *ccb = (struct sr_ccb *)bp; 665 struct sr_workunit *wu = ccb->ccb_wu; 666 struct sr_discipline *sd = wu->swu_dis; 667 struct sr_raid6_opaque *pq = ccb->ccb_opaque; 668 int s; 669 670 DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n", 671 DEVNAME(sd->sd_sc), bp, wu->swu_xs); 672 673 s = splbio(); 674 sr_ccb_done(ccb); 675 676 /* XOR data to result. */ 677 if (ccb->ccb_state == SR_CCB_OK && pq) { 678 if (pq->pbuf) 679 /* Calculate xor-parity */ 680 sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data, 681 ccb->ccb_buf.b_bcount); 682 if (pq->qbuf) 683 /* Calculate q-parity */ 684 sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data, 685 ccb->ccb_buf.b_bcount, pq->gn); 686 free(pq, M_DEVBUF, 0); 687 ccb->ccb_opaque = NULL; 688 } 689 690 /* Free allocated data buffer. */ 691 if (ccb->ccb_flags & SR_CCBF_FREEBUF) { 692 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount); 693 ccb->ccb_buf.b_data = NULL; 694 } 695 696 sr_wu_done(wu); 697 splx(s); 698 } 699 700 int 701 sr_raid6_wu_done(struct sr_workunit *wu) 702 { 703 struct sr_discipline *sd = wu->swu_dis; 704 struct scsi_xfer *xs = wu->swu_xs; 705 706 /* XXX - we have no way of propagating errors... */ 707 if (wu->swu_flags & SR_WUF_DISCIPLINE) 708 return SR_WU_OK; 709 710 /* XXX - This is insufficient for RAID 6. */ 711 if (wu->swu_ios_succeeded > 0) { 712 xs->error = XS_NOERROR; 713 return SR_WU_OK; 714 } 715 716 if (xs->flags & SCSI_DATA_IN) { 717 printf("%s: retrying read on block %lld\n", 718 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 719 sr_wu_release_ccbs(wu); 720 wu->swu_state = SR_WU_RESTART; 721 if (sd->sd_scsi_rw(wu) == 0) 722 return SR_WU_RESTART; 723 } else { 724 printf("%s: permanently fail write on block %lld\n", 725 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 726 } 727 728 wu->swu_state = SR_WU_FAILED; 729 xs->error = XS_DRIVER_STUFFUP; 730 731 return SR_WU_FAILED; 732 } 733 734 int 735 sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno, 736 daddr_t len, void *data, int xsflags, int ccbflags, void *pbuf, 737 void *qbuf, int gn) 738 { 739 struct sr_discipline *sd = wu->swu_dis; 740 struct sr_ccb *ccb; 741 struct sr_raid6_opaque *pqbuf; 742 743 DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%llx %llx %p:%p\n", 744 (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk, 745 (long long)blkno, (long long)len, 746 pbuf, qbuf); 747 748 /* Allocate temporary buffer. */ 749 if (data == NULL) { 750 data = sr_block_get(sd, len); 751 if (data == NULL) 752 return (-1); 753 ccbflags |= SR_CCBF_FREEBUF; 754 } 755 756 ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags); 757 if (ccb == NULL) { 758 if (ccbflags & SR_CCBF_FREEBUF) 759 sr_block_put(sd, data, len); 760 return (-1); 761 } 762 if (pbuf || qbuf) { 763 /* XXX - can leak data and ccb on failure. */ 764 if (qbuf && gf_premul(gn)) 765 return (-1); 766 767 /* XXX - should be preallocated? */ 768 pqbuf = malloc(sizeof(struct sr_raid6_opaque), 769 M_DEVBUF, M_ZERO | M_NOWAIT); 770 if (pqbuf == NULL) { 771 sr_ccb_put(ccb); 772 return (-1); 773 } 774 pqbuf->pbuf = pbuf; 775 pqbuf->qbuf = qbuf; 776 pqbuf->gn = gn; 777 ccb->ccb_opaque = pqbuf; 778 } 779 sr_wu_enqueue_ccb(wu, ccb); 780 781 return (0); 782 } 783 784 /* Perform RAID6 parity calculation. 785 * P=xor parity, Q=GF256 parity, D=data, gn=disk# */ 786 void 787 sr_raid6_xorp(void *p, void *d, int len) 788 { 789 uint32_t *pbuf = p, *data = d; 790 791 len >>= 2; 792 while (len--) 793 *pbuf++ ^= *data++; 794 } 795 796 void 797 sr_raid6_xorq(void *q, void *d, int len, int gn) 798 { 799 uint32_t *qbuf = q, *data = d, x; 800 uint8_t *gn_map = gf_map[gn]; 801 802 len >>= 2; 803 while (len--) { 804 x = *data++; 805 *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) | 806 ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) | 807 ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) | 808 ((uint32_t)gn_map[(x >> 24) & 0xff] << 24)); 809 } 810 } 811 812 /* Create GF256 log/pow tables: polynomial = 0x11D */ 813 void 814 gf_init(void) 815 { 816 int i; 817 uint8_t p = 1; 818 819 /* use 2N pow table to avoid using % in multiply */ 820 for (i=0; i<256; i++) { 821 gf_log[p] = i; 822 gf_pow[i] = gf_pow[i+255] = p; 823 p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00)); 824 } 825 gf_log[0] = 512; 826 } 827 828 uint8_t 829 gf_inv(uint8_t a) 830 { 831 return gf_pow[255 - gf_log[a]]; 832 } 833 834 uint8_t 835 gf_mul(uint8_t a, uint8_t b) 836 { 837 return gf_pow[gf_log[a] + gf_log[b]]; 838 } 839 840 /* Precalculate multiplication tables for drive gn */ 841 int 842 gf_premul(uint8_t gn) 843 { 844 int i; 845 846 if (gf_map[gn] != NULL) 847 return (0); 848 849 if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL) 850 return (-1); 851 852 for (i=0; i<256; i++) 853 gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]]; 854 return (0); 855 } 856