1 /* $OpenBSD: softraid_raid6.c,v 1.72 2021/05/16 15:12:37 deraadt Exp $ */ 2 /* 3 * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bio.h" 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/buf.h> 24 #include <sys/device.h> 25 #include <sys/ioctl.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/mount.h> 33 #include <sys/sensors.h> 34 #include <sys/stat.h> 35 #include <sys/task.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 45 uint8_t *gf_map[256]; 46 uint8_t gf_pow[768]; 47 int gf_log[256]; 48 49 /* RAID 6 functions. */ 50 int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *, 51 int, int64_t); 52 int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *, 53 int, void *); 54 int sr_raid6_init(struct sr_discipline *); 55 int sr_raid6_rw(struct sr_workunit *); 56 int sr_raid6_openings(struct sr_discipline *); 57 void sr_raid6_intr(struct buf *); 58 int sr_raid6_wu_done(struct sr_workunit *); 59 void sr_raid6_set_chunk_state(struct sr_discipline *, int, int); 60 void sr_raid6_set_vol_state(struct sr_discipline *); 61 62 void sr_raid6_xorp(void *, void *, int); 63 void sr_raid6_xorq(void *, void *, int, int); 64 int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long, 65 void *, int, int, void *, void *, int); 66 void sr_raid6_scrub(struct sr_discipline *); 67 int sr_failio(struct sr_workunit *); 68 69 void gf_init(void); 70 uint8_t gf_inv(uint8_t); 71 int gf_premul(uint8_t); 72 uint8_t gf_mul(uint8_t, uint8_t); 73 74 #define SR_NOFAIL 0x00 75 #define SR_FAILX (1L << 0) 76 #define SR_FAILY (1L << 1) 77 #define SR_FAILP (1L << 2) 78 #define SR_FAILQ (1L << 3) 79 80 struct sr_raid6_opaque { 81 int gn; 82 void *pbuf; 83 void *qbuf; 84 }; 85 86 /* discipline initialisation. */ 87 void 88 sr_raid6_discipline_init(struct sr_discipline *sd) 89 { 90 /* Initialize GF256 tables. */ 91 gf_init(); 92 93 /* Fill out discipline members. */ 94 sd->sd_type = SR_MD_RAID6; 95 strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name)); 96 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 97 SR_CAP_REDUNDANT; 98 sd->sd_max_wu = SR_RAID6_NOWU; 99 100 /* Setup discipline specific function pointers. */ 101 sd->sd_assemble = sr_raid6_assemble; 102 sd->sd_create = sr_raid6_create; 103 sd->sd_openings = sr_raid6_openings; 104 sd->sd_scsi_rw = sr_raid6_rw; 105 sd->sd_scsi_intr = sr_raid6_intr; 106 sd->sd_scsi_wu_done = sr_raid6_wu_done; 107 sd->sd_set_chunk_state = sr_raid6_set_chunk_state; 108 sd->sd_set_vol_state = sr_raid6_set_vol_state; 109 } 110 111 int 112 sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc, 113 int no_chunk, int64_t coerced_size) 114 { 115 if (no_chunk < 4) { 116 sr_error(sd->sd_sc, "%s requires four or more chunks", 117 sd->sd_name); 118 return EINVAL; 119 } 120 121 /* 122 * XXX add variable strip size later even though MAXPHYS is really 123 * the clever value, users like * to tinker with that type of stuff. 124 */ 125 sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS; 126 sd->sd_meta->ssdi.ssd_size = (coerced_size & 127 ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >> 128 DEV_BSHIFT) - 1)) * (no_chunk - 2); 129 130 return sr_raid6_init(sd); 131 } 132 133 int 134 sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 135 int no_chunk, void *data) 136 { 137 return sr_raid6_init(sd); 138 } 139 140 int 141 sr_raid6_init(struct sr_discipline *sd) 142 { 143 /* Initialise runtime values. */ 144 sd->mds.mdd_raid6.sr6_strip_bits = 145 sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size); 146 if (sd->mds.mdd_raid6.sr6_strip_bits == -1) { 147 sr_error(sd->sd_sc, "invalid strip size"); 148 return EINVAL; 149 } 150 151 /* only if stripsize <= MAXPHYS */ 152 sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no); 153 154 return 0; 155 } 156 157 int 158 sr_raid6_openings(struct sr_discipline *sd) 159 { 160 return (sd->sd_max_wu >> 1); /* 2 wu's per IO */ 161 } 162 163 void 164 sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 165 { 166 int old_state, s; 167 168 /* XXX this is for RAID 0 */ 169 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 170 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 171 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 172 173 /* ok to go to splbio since this only happens in error path */ 174 s = splbio(); 175 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 176 177 /* multiple IOs to the same chunk that fail will come through here */ 178 if (old_state == new_state) 179 goto done; 180 181 switch (old_state) { 182 case BIOC_SDONLINE: 183 switch (new_state) { 184 case BIOC_SDOFFLINE: 185 case BIOC_SDSCRUB: 186 break; 187 default: 188 goto die; 189 } 190 break; 191 192 case BIOC_SDOFFLINE: 193 if (new_state == BIOC_SDREBUILD) { 194 ; 195 } else 196 goto die; 197 break; 198 199 case BIOC_SDSCRUB: 200 switch (new_state) { 201 case BIOC_SDONLINE: 202 case BIOC_SDOFFLINE: 203 break; 204 default: 205 goto die; 206 } 207 break; 208 209 case BIOC_SDREBUILD: 210 switch (new_state) { 211 case BIOC_SDONLINE: 212 case BIOC_SDOFFLINE: 213 break; 214 default: 215 goto die; 216 } 217 break; 218 219 default: 220 die: 221 splx(s); /* XXX */ 222 panic("%s: %s: %s: invalid chunk state transition %d -> %d", 223 DEVNAME(sd->sd_sc), 224 sd->sd_meta->ssd_devname, 225 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 226 old_state, new_state); 227 /* NOTREACHED */ 228 } 229 230 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 231 sd->sd_set_vol_state(sd); 232 233 sd->sd_must_flush = 1; 234 task_add(systq, &sd->sd_meta_save_task); 235 done: 236 splx(s); 237 } 238 239 void 240 sr_raid6_set_vol_state(struct sr_discipline *sd) 241 { 242 int states[SR_MAX_STATES]; 243 int new_state, i, s, nd; 244 int old_state = sd->sd_vol_status; 245 246 /* XXX this is for RAID 0 */ 247 248 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 249 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 250 251 nd = sd->sd_meta->ssdi.ssd_chunk_no; 252 253 for (i = 0; i < SR_MAX_STATES; i++) 254 states[i] = 0; 255 256 for (i = 0; i < nd; i++) { 257 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 258 if (s >= SR_MAX_STATES) 259 panic("%s: %s: %s: invalid chunk state", 260 DEVNAME(sd->sd_sc), 261 sd->sd_meta->ssd_devname, 262 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 263 states[s]++; 264 } 265 266 if (states[BIOC_SDONLINE] == nd) 267 new_state = BIOC_SVONLINE; 268 else if (states[BIOC_SDONLINE] < nd - 2) 269 new_state = BIOC_SVOFFLINE; 270 else if (states[BIOC_SDSCRUB] != 0) 271 new_state = BIOC_SVSCRUB; 272 else if (states[BIOC_SDREBUILD] != 0) 273 new_state = BIOC_SVREBUILD; 274 else if (states[BIOC_SDONLINE] < nd) 275 new_state = BIOC_SVDEGRADED; 276 else { 277 printf("old_state = %d, ", old_state); 278 for (i = 0; i < nd; i++) 279 printf("%d = %d, ", i, 280 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 281 panic("invalid new_state"); 282 } 283 284 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 285 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 286 old_state, new_state); 287 288 switch (old_state) { 289 case BIOC_SVONLINE: 290 switch (new_state) { 291 case BIOC_SVONLINE: /* can go to same state */ 292 case BIOC_SVOFFLINE: 293 case BIOC_SVDEGRADED: 294 case BIOC_SVREBUILD: /* happens on boot */ 295 break; 296 default: 297 goto die; 298 } 299 break; 300 301 case BIOC_SVOFFLINE: 302 /* XXX this might be a little too much */ 303 goto die; 304 305 case BIOC_SVDEGRADED: 306 switch (new_state) { 307 case BIOC_SVOFFLINE: 308 case BIOC_SVREBUILD: 309 case BIOC_SVDEGRADED: /* can go to the same state */ 310 break; 311 default: 312 goto die; 313 } 314 break; 315 316 case BIOC_SVBUILDING: 317 switch (new_state) { 318 case BIOC_SVONLINE: 319 case BIOC_SVOFFLINE: 320 case BIOC_SVBUILDING: /* can go to the same state */ 321 break; 322 default: 323 goto die; 324 } 325 break; 326 327 case BIOC_SVSCRUB: 328 switch (new_state) { 329 case BIOC_SVONLINE: 330 case BIOC_SVOFFLINE: 331 case BIOC_SVDEGRADED: 332 case BIOC_SVSCRUB: /* can go to same state */ 333 break; 334 default: 335 goto die; 336 } 337 break; 338 339 case BIOC_SVREBUILD: 340 switch (new_state) { 341 case BIOC_SVONLINE: 342 case BIOC_SVOFFLINE: 343 case BIOC_SVDEGRADED: 344 case BIOC_SVREBUILD: /* can go to the same state */ 345 break; 346 default: 347 goto die; 348 } 349 break; 350 351 default: 352 die: 353 panic("%s: %s: invalid volume state transition %d -> %d", 354 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 355 old_state, new_state); 356 /* NOTREACHED */ 357 } 358 359 sd->sd_vol_status = new_state; 360 } 361 362 /* modes: 363 * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 364 * 0, qbuf, NULL, 0); 365 * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 366 * 0, pbuf, NULL, 0); 367 * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 368 * 0, pbuf, qbuf, gf_pow[i]); 369 */ 370 371 int 372 sr_raid6_rw(struct sr_workunit *wu) 373 { 374 struct sr_workunit *wu_r = NULL; 375 struct sr_discipline *sd = wu->swu_dis; 376 struct scsi_xfer *xs = wu->swu_xs; 377 struct sr_chunk *scp; 378 int s, fail, i, gxinv, pxinv; 379 daddr_t blkno, lba; 380 int64_t chunk_offs, lbaoffs, offset, strip_offs; 381 int64_t strip_no, strip_size, strip_bits, row_size; 382 int64_t fchunk, no_chunk, chunk, qchunk, pchunk; 383 long length, datalen; 384 void *pbuf, *data, *qbuf; 385 386 /* blkno and scsi error will be handled by sr_validate_io */ 387 if (sr_validate_io(wu, &blkno, "sr_raid6_rw")) 388 goto bad; 389 390 strip_size = sd->sd_meta->ssdi.ssd_strip_size; 391 strip_bits = sd->mds.mdd_raid6.sr6_strip_bits; 392 no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2; 393 row_size = (no_chunk << strip_bits) >> DEV_BSHIFT; 394 395 data = xs->data; 396 datalen = xs->datalen; 397 lbaoffs = blkno << DEV_BSHIFT; 398 399 if (xs->flags & SCSI_DATA_OUT) { 400 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){ 401 printf("%s: can't get wu_r", DEVNAME(sd->sd_sc)); 402 goto bad; 403 } 404 wu_r->swu_state = SR_WU_INPROGRESS; 405 wu_r->swu_flags |= SR_WUF_DISCIPLINE; 406 } 407 408 wu->swu_blk_start = 0; 409 while (datalen != 0) { 410 strip_no = lbaoffs >> strip_bits; 411 strip_offs = lbaoffs & (strip_size - 1); 412 chunk_offs = (strip_no / no_chunk) << strip_bits; 413 offset = chunk_offs + strip_offs; 414 415 /* get size remaining in this stripe */ 416 length = MIN(strip_size - strip_offs, datalen); 417 418 /* map disk offset to parity/data drive */ 419 chunk = strip_no % no_chunk; 420 421 qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2)); 422 if (qchunk == 0) 423 pchunk = no_chunk + 1; 424 else 425 pchunk = qchunk - 1; 426 if (chunk >= pchunk) 427 chunk++; 428 if (chunk >= qchunk) 429 chunk++; 430 431 lba = offset >> DEV_BSHIFT; 432 433 /* XXX big hammer.. exclude I/O from entire stripe */ 434 if (wu->swu_blk_start == 0) 435 wu->swu_blk_start = (strip_no / no_chunk) * row_size; 436 wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1); 437 438 fail = 0; 439 fchunk = -1; 440 441 /* Get disk-fail flags */ 442 for (i=0; i< no_chunk+2; i++) { 443 scp = sd->sd_vol.sv_chunks[i]; 444 switch (scp->src_meta.scm_status) { 445 case BIOC_SDOFFLINE: 446 case BIOC_SDREBUILD: 447 case BIOC_SDHOTSPARE: 448 if (i == qchunk) 449 fail |= SR_FAILQ; 450 else if (i == pchunk) 451 fail |= SR_FAILP; 452 else if (i == chunk) 453 fail |= SR_FAILX; 454 else { 455 /* dual data-disk failure */ 456 fail |= SR_FAILY; 457 fchunk = i; 458 } 459 break; 460 } 461 } 462 if (xs->flags & SCSI_DATA_IN) { 463 if (!(fail & SR_FAILX)) { 464 /* drive is good. issue single read request */ 465 if (sr_raid6_addio(wu, chunk, lba, length, 466 data, xs->flags, 0, NULL, NULL, 0)) 467 goto bad; 468 } else if (fail & SR_FAILP) { 469 /* Dx, P failed */ 470 printf("Disk %llx offline, " 471 "regenerating Dx+P\n", chunk); 472 473 gxinv = gf_inv(gf_pow[chunk]); 474 475 /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */ 476 memset(data, 0, length); 477 if (sr_raid6_addio(wu, qchunk, lba, length, 478 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 479 goto bad; 480 481 /* Read Dz * gz * inv(gx) */ 482 for (i = 0; i < no_chunk+2; i++) { 483 if (i == qchunk || i == pchunk || i == chunk) 484 continue; 485 486 if (sr_raid6_addio(wu, i, lba, length, 487 NULL, SCSI_DATA_IN, 0, NULL, data, 488 gf_mul(gf_pow[i], gxinv))) 489 goto bad; 490 } 491 492 /* data will contain correct value on completion */ 493 } else if (fail & SR_FAILY) { 494 /* Dx, Dy failed */ 495 printf("Disk %llx & %llx offline, " 496 "regenerating Dx+Dy\n", chunk, fchunk); 497 498 gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]); 499 pxinv = gf_mul(gf_pow[fchunk], gxinv); 500 501 /* read Q * inv(gx + gy) */ 502 memset(data, 0, length); 503 if (sr_raid6_addio(wu, qchunk, lba, length, 504 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 505 goto bad; 506 507 /* read P * gy * inv(gx + gy) */ 508 if (sr_raid6_addio(wu, pchunk, lba, length, 509 NULL, SCSI_DATA_IN, 0, NULL, data, pxinv)) 510 goto bad; 511 512 /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz 513 * Q: sr_raid6_xorp(qbuf, --, length); 514 * P: sr_raid6_xorp(pbuf, --, length); 515 * Dz: sr_raid6_xorp(pbuf, --, length); 516 * sr_raid6_xorq(qbuf, --, length, gf_pow[i]); 517 */ 518 for (i = 0; i < no_chunk+2; i++) { 519 if (i == qchunk || i == pchunk || 520 i == chunk || i == fchunk) 521 continue; 522 523 /* read Dz * (gz + gy) * inv(gx + gy) */ 524 if (sr_raid6_addio(wu, i, lba, length, 525 NULL, SCSI_DATA_IN, 0, NULL, data, 526 pxinv ^ gf_mul(gf_pow[i], gxinv))) 527 goto bad; 528 } 529 } else { 530 /* Two cases: single disk (Dx) or (Dx+Q) 531 * Dx = Dz ^ P (same as RAID5) 532 */ 533 printf("Disk %llx offline, " 534 "regenerating Dx%s\n", chunk, 535 fail & SR_FAILQ ? "+Q" : " single"); 536 537 /* Calculate: Dx = P^Dz 538 * P: sr_raid6_xorp(data, ---, length); 539 * Dz: sr_raid6_xorp(data, ---, length); 540 */ 541 memset(data, 0, length); 542 for (i = 0; i < no_chunk+2; i++) { 543 if (i != chunk && i != qchunk) { 544 /* Read Dz */ 545 if (sr_raid6_addio(wu, i, lba, 546 length, NULL, SCSI_DATA_IN, 547 0, data, NULL, 0)) 548 goto bad; 549 } 550 } 551 552 /* data will contain correct value on completion */ 553 } 554 } else { 555 /* XXX handle writes to failed/offline disk? */ 556 if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP)) 557 goto bad; 558 559 /* 560 * initialize pbuf with contents of new data to be 561 * written. This will be XORed with old data and old 562 * parity in the intr routine. The result in pbuf 563 * is the new parity data. 564 */ 565 qbuf = sr_block_get(sd, length); 566 if (qbuf == NULL) 567 goto bad; 568 569 pbuf = sr_block_get(sd, length); 570 if (pbuf == NULL) 571 goto bad; 572 573 /* Calculate P = Dn; Q = gn * Dn */ 574 if (gf_premul(gf_pow[chunk])) 575 goto bad; 576 sr_raid6_xorp(pbuf, data, length); 577 sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]); 578 579 /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */ 580 if (sr_raid6_addio(wu_r, chunk, lba, length, NULL, 581 SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk])) 582 goto bad; 583 584 /* Read old xor-parity: P ^= P' */ 585 if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL, 586 SCSI_DATA_IN, 0, pbuf, NULL, 0)) 587 goto bad; 588 589 /* Read old q-parity: Q ^= Q' */ 590 if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL, 591 SCSI_DATA_IN, 0, qbuf, NULL, 0)) 592 goto bad; 593 594 /* write new data */ 595 if (sr_raid6_addio(wu, chunk, lba, length, data, 596 xs->flags, 0, NULL, NULL, 0)) 597 goto bad; 598 599 /* write new xor-parity */ 600 if (sr_raid6_addio(wu, pchunk, lba, length, pbuf, 601 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 602 goto bad; 603 604 /* write new q-parity */ 605 if (sr_raid6_addio(wu, qchunk, lba, length, qbuf, 606 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 607 goto bad; 608 } 609 610 /* advance to next block */ 611 lbaoffs += length; 612 datalen -= length; 613 data += length; 614 } 615 616 s = splbio(); 617 if (wu_r) { 618 /* collide write request with reads */ 619 wu_r->swu_blk_start = wu->swu_blk_start; 620 wu_r->swu_blk_end = wu->swu_blk_end; 621 622 wu->swu_state = SR_WU_DEFERRED; 623 wu_r->swu_collider = wu; 624 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 625 626 wu = wu_r; 627 } 628 splx(s); 629 630 sr_schedule_wu(wu); 631 632 return (0); 633 bad: 634 /* XXX - can leak pbuf/qbuf on error. */ 635 /* wu is unwound by sr_wu_put */ 636 if (wu_r) 637 sr_scsi_wu_put(sd, wu_r); 638 return (1); 639 } 640 641 /* Handle failure I/O completion */ 642 int 643 sr_failio(struct sr_workunit *wu) 644 { 645 struct sr_discipline *sd = wu->swu_dis; 646 struct sr_ccb *ccb; 647 648 if (!(wu->swu_flags & SR_WUF_FAIL)) 649 return (0); 650 651 /* Wu is a 'fake'.. don't do real I/O just intr */ 652 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 653 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) 654 sr_raid6_intr(&ccb->ccb_buf); 655 return (1); 656 } 657 658 void 659 sr_raid6_intr(struct buf *bp) 660 { 661 struct sr_ccb *ccb = (struct sr_ccb *)bp; 662 struct sr_workunit *wu = ccb->ccb_wu; 663 struct sr_discipline *sd = wu->swu_dis; 664 struct sr_raid6_opaque *pq = ccb->ccb_opaque; 665 int s; 666 667 DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n", 668 DEVNAME(sd->sd_sc), bp, wu->swu_xs); 669 670 s = splbio(); 671 sr_ccb_done(ccb); 672 673 /* XOR data to result. */ 674 if (ccb->ccb_state == SR_CCB_OK && pq) { 675 if (pq->pbuf) 676 /* Calculate xor-parity */ 677 sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data, 678 ccb->ccb_buf.b_bcount); 679 if (pq->qbuf) 680 /* Calculate q-parity */ 681 sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data, 682 ccb->ccb_buf.b_bcount, pq->gn); 683 free(pq, M_DEVBUF, 0); 684 ccb->ccb_opaque = NULL; 685 } 686 687 /* Free allocated data buffer. */ 688 if (ccb->ccb_flags & SR_CCBF_FREEBUF) { 689 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount); 690 ccb->ccb_buf.b_data = NULL; 691 } 692 693 sr_wu_done(wu); 694 splx(s); 695 } 696 697 int 698 sr_raid6_wu_done(struct sr_workunit *wu) 699 { 700 struct sr_discipline *sd = wu->swu_dis; 701 struct scsi_xfer *xs = wu->swu_xs; 702 703 /* XXX - we have no way of propagating errors... */ 704 if (wu->swu_flags & SR_WUF_DISCIPLINE) 705 return SR_WU_OK; 706 707 /* XXX - This is insufficient for RAID 6. */ 708 if (wu->swu_ios_succeeded > 0) { 709 xs->error = XS_NOERROR; 710 return SR_WU_OK; 711 } 712 713 if (xs->flags & SCSI_DATA_IN) { 714 printf("%s: retrying read on block %lld\n", 715 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 716 sr_wu_release_ccbs(wu); 717 wu->swu_state = SR_WU_RESTART; 718 if (sd->sd_scsi_rw(wu) == 0) 719 return SR_WU_RESTART; 720 } else { 721 printf("%s: permanently fail write on block %lld\n", 722 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 723 } 724 725 wu->swu_state = SR_WU_FAILED; 726 xs->error = XS_DRIVER_STUFFUP; 727 728 return SR_WU_FAILED; 729 } 730 731 int 732 sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno, 733 long len, void *data, int xsflags, int ccbflags, void *pbuf, 734 void *qbuf, int gn) 735 { 736 struct sr_discipline *sd = wu->swu_dis; 737 struct sr_ccb *ccb; 738 struct sr_raid6_opaque *pqbuf; 739 740 DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n", 741 (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk, 742 (long long)blkno, len, pbuf, qbuf); 743 744 /* Allocate temporary buffer. */ 745 if (data == NULL) { 746 data = sr_block_get(sd, len); 747 if (data == NULL) 748 return (-1); 749 ccbflags |= SR_CCBF_FREEBUF; 750 } 751 752 ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags); 753 if (ccb == NULL) { 754 if (ccbflags & SR_CCBF_FREEBUF) 755 sr_block_put(sd, data, len); 756 return (-1); 757 } 758 if (pbuf || qbuf) { 759 /* XXX - can leak data and ccb on failure. */ 760 if (qbuf && gf_premul(gn)) 761 return (-1); 762 763 /* XXX - should be preallocated? */ 764 pqbuf = malloc(sizeof(struct sr_raid6_opaque), 765 M_DEVBUF, M_ZERO | M_NOWAIT); 766 if (pqbuf == NULL) { 767 sr_ccb_put(ccb); 768 return (-1); 769 } 770 pqbuf->pbuf = pbuf; 771 pqbuf->qbuf = qbuf; 772 pqbuf->gn = gn; 773 ccb->ccb_opaque = pqbuf; 774 } 775 sr_wu_enqueue_ccb(wu, ccb); 776 777 return (0); 778 } 779 780 /* Perform RAID6 parity calculation. 781 * P=xor parity, Q=GF256 parity, D=data, gn=disk# */ 782 void 783 sr_raid6_xorp(void *p, void *d, int len) 784 { 785 uint32_t *pbuf = p, *data = d; 786 787 len >>= 2; 788 while (len--) 789 *pbuf++ ^= *data++; 790 } 791 792 void 793 sr_raid6_xorq(void *q, void *d, int len, int gn) 794 { 795 uint32_t *qbuf = q, *data = d, x; 796 uint8_t *gn_map = gf_map[gn]; 797 798 len >>= 2; 799 while (len--) { 800 x = *data++; 801 *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) | 802 ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) | 803 ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) | 804 ((uint32_t)gn_map[(x >> 24) & 0xff] << 24)); 805 } 806 } 807 808 /* Create GF256 log/pow tables: polynomial = 0x11D */ 809 void 810 gf_init(void) 811 { 812 int i; 813 uint8_t p = 1; 814 815 /* use 2N pow table to avoid using % in multiply */ 816 for (i=0; i<256; i++) { 817 gf_log[p] = i; 818 gf_pow[i] = gf_pow[i+255] = p; 819 p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00)); 820 } 821 gf_log[0] = 512; 822 } 823 824 uint8_t 825 gf_inv(uint8_t a) 826 { 827 return gf_pow[255 - gf_log[a]]; 828 } 829 830 uint8_t 831 gf_mul(uint8_t a, uint8_t b) 832 { 833 return gf_pow[gf_log[a] + gf_log[b]]; 834 } 835 836 /* Precalculate multiplication tables for drive gn */ 837 int 838 gf_premul(uint8_t gn) 839 { 840 int i; 841 842 if (gf_map[gn] != NULL) 843 return (0); 844 845 if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL) 846 return (-1); 847 848 for (i=0; i<256; i++) 849 gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]]; 850 return (0); 851 } 852