1 /* $NetBSD: rf_pq.c,v 1.17 2019/10/10 03:43:59 christos Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * Code for RAID level 6 (P + Q) disk array architecture.
31 */
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_pq.c,v 1.17 2019/10/10 03:43:59 christos Exp $");
35
36 #include "rf_archs.h"
37
38 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
39
40 #include <dev/raidframe/raidframevar.h>
41
42 #include "rf_raid.h"
43 #include "rf_dag.h"
44 #include "rf_dagffrd.h"
45 #include "rf_dagffwr.h"
46 #include "rf_dagdegrd.h"
47 #include "rf_dagdegwr.h"
48 #include "rf_dagutils.h"
49 #include "rf_dagfuncs.h"
50 #include "rf_etimer.h"
51 #include "rf_pqdeg.h"
52 #include "rf_general.h"
53 #include "rf_map.h"
54 #include "rf_pq.h"
55
56 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
57 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
58
59 void
rf_RegularONPFunc(RF_DagNode_t * node)60 rf_RegularONPFunc(RF_DagNode_t *node)
61 {
62 rf_RegularXorFunc(node);
63 }
64 /*
65 same as simpleONQ func, but the coefficient is always 1
66 */
67
68 void
rf_SimpleONPFunc(RF_DagNode_t * node)69 rf_SimpleONPFunc(RF_DagNode_t *node)
70 {
71 rf_SimpleXorFunc(node);
72 }
73
74 void
rf_RecoveryPFunc(RF_DagNode_t * node)75 rf_RecoveryPFunc(RF_DagNode_t *node)
76 {
77 rf_RecoveryXorFunc(node);
78 }
79
80 void
rf_RegularPFunc(RF_DagNode_t * node)81 rf_RegularPFunc(RF_DagNode_t *node)
82 {
83 rf_RegularXorFunc(node);
84 }
85 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
86 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
87
88 static void
89 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
90 unsigned char coeff);
91 static void
92 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
93 unsigned length, unsigned coeff);
94
95 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
96 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
97 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
98
99 void
rf_PQDagSelect(RF_Raid_t * raidPtr,RF_IoType_t type,RF_AccessStripeMap_t * asmap,RF_VoidFuncPtr * createFunc)100 rf_PQDagSelect(
101 RF_Raid_t * raidPtr,
102 RF_IoType_t type,
103 RF_AccessStripeMap_t * asmap,
104 RF_VoidFuncPtr * createFunc)
105 {
106 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
107 unsigned ndfail = asmap->numDataFailed;
108 unsigned npfail = asmap->numParityFailed;
109 unsigned ntfail = npfail + ndfail;
110
111 RF_ASSERT(RF_IO_IS_R_OR_W(type));
112 if (ntfail > 2) {
113 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
114 *createFunc = NULL;
115 return;
116 }
117 /* ok, we can do this I/O */
118 if (type == RF_IO_TYPE_READ) {
119 switch (ndfail) {
120 case 0:
121 /* fault free read */
122 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
123 break;
124 case 1:
125 /* lost a single data unit */
126 /* two cases: (1) parity is not lost. do a normal raid
127 * 5 reconstruct read. (2) parity is lost. do a
128 * reconstruct read using "q". */
129 if (ntfail == 2) { /* also lost redundancy */
130 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
131 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
132 else
133 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
134 } else {
135 /* P and Q are ok. But is there a failure in
136 * some unaccessed data unit? */
137 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
138 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
139 else
140 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
141 }
142 break;
143 case 2:
144 /* lost two data units */
145 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
146 break;
147 }
148 return;
149 }
150 /* a write */
151 switch (ntfail) {
152 case 0: /* fault free */
153 if (rf_suppressLocksAndLargeWrites ||
154 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
155 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
156
157 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
158 } else {
159 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
160 }
161 break;
162
163 case 1: /* single disk fault */
164 if (npfail == 1) {
165 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
166 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
167 * normal mode raid5
168 * write. */
169 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
170 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
171 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
172 else
173 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
174 } else {/* parity died, small write only updating Q */
175 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
176 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
177 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
178 else
179 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
180 }
181 } else { /* data missing. Do a P reconstruct write if
182 * only a single data unit is lost in the
183 * stripe, otherwise a PQ reconstruct write. */
184 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
185 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
186 else
187 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
188 }
189 break;
190
191 case 2: /* two disk faults */
192 switch (npfail) {
193 case 2: /* both p and q dead */
194 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
195 break;
196 case 1: /* either p or q and dead data */
197 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
198 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
199 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
200 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
201 else
202 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
203 break;
204 case 0: /* double data loss */
205 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
206 break;
207 }
208 break;
209
210 default: /* more than 2 disk faults */
211 *createFunc = NULL;
212 RF_PANIC();
213 }
214 return;
215 }
216 /*
217 Used as a stop gap info function
218 */
219 #if 0
220 static void
221 PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, RF_AccessStripeMap_t *asmap)
222 {
223 *nSucc = *nAnte = 1;
224 }
225
226 static void
227 PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, RF_AccessStripeMap_t *asmap)
228 {
229 *nSucc = 1;
230 *nAnte = 2;
231 }
232 #endif
233
RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)234 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
235 {
236 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
237 rf_RegularPQFunc, RF_FALSE);
238 }
239
240 int
rf_RegularONQFunc(RF_DagNode_t * node)241 rf_RegularONQFunc(RF_DagNode_t *node)
242 {
243 int np = node->numParams;
244 int d;
245 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
246 int i;
247 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
248 RF_Etimer_t timer;
249 char *qbuf, *qpbuf;
250 char *obuf, *nbuf;
251 RF_PhysDiskAddr_t *old, *new;
252 unsigned long coeff;
253 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
254
255 RF_ETIMER_START(timer);
256
257 d = (np - 3) / 4;
258 RF_ASSERT(4 * d + 3 == np);
259 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
260 for (i = 0; i < d; i++) {
261 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
262 obuf = (char *) node->params[2 * i + 1].p;
263 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
264 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
265 RF_ASSERT(new->numSector == old->numSector);
266 RF_ASSERT(new->raidAddress == old->raidAddress);
267 /* the stripe unit within the stripe tells us the coefficient
268 * to use for the multiply. */
269 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
270 /* compute the data unit offset within the column, then add
271 * one */
272 coeff = (coeff % raidPtr->Layout.numDataCol);
273 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
274 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
275 }
276
277 RF_ETIMER_STOP(timer);
278 RF_ETIMER_EVAL(timer);
279 tracerec->q_us += RF_ETIMER_VAL_US(timer);
280 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
281 * I/O in this node */
282 return (0);
283 }
284 /*
285 See the SimpleXORFunc for the difference between a simple and regular func.
286 These Q functions should be used for
287
288 new q = Q(data,old data,old q)
289
290 style updates and not for
291
292 q = ( new data, new data, .... )
293
294 computations.
295
296 The simple q takes 2(2d+1)+1 params, where d is the number
297 of stripes written. The order of params is
298 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
299 [2d] old q pda_0, old q buffer
300 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
301 raidPtr
302 */
303
304 int
rf_SimpleONQFunc(RF_DagNode_t * node)305 rf_SimpleONQFunc(RF_DagNode_t *node)
306 {
307 int np = node->numParams;
308 int d;
309 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
310 int i;
311 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
312 RF_Etimer_t timer;
313 char *qbuf;
314 char *obuf, *nbuf;
315 RF_PhysDiskAddr_t *old, *new;
316 unsigned long coeff;
317
318 RF_ETIMER_START(timer);
319
320 d = (np - 3) / 4;
321 RF_ASSERT(4 * d + 3 == np);
322 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
323 for (i = 0; i < d; i++) {
324 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
325 obuf = (char *) node->params[2 * i + 1].p;
326 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
327 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
328 RF_ASSERT(new->numSector == old->numSector);
329 RF_ASSERT(new->raidAddress == old->raidAddress);
330 /* the stripe unit within the stripe tells us the coefficient
331 * to use for the multiply. */
332 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
333 /* compute the data unit offset within the column, then add
334 * one */
335 coeff = (coeff % raidPtr->Layout.numDataCol);
336 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
337 }
338
339 RF_ETIMER_STOP(timer);
340 RF_ETIMER_EVAL(timer);
341 tracerec->q_us += RF_ETIMER_VAL_US(timer);
342 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
343 * I/O in this node */
344 return (0);
345 }
RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)346 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
347 {
348 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
349 }
350
351 static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
352
353 static void
RegularQSubr(RF_DagNode_t * node,char * qbuf)354 RegularQSubr(RF_DagNode_t *node, char *qbuf)
355 {
356 int np = node->numParams;
357 int d;
358 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
359 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
360 int i;
361 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
362 RF_Etimer_t timer;
363 char *obuf, *qpbuf;
364 RF_PhysDiskAddr_t *old;
365 unsigned long coeff;
366
367 RF_ETIMER_START(timer);
368
369 d = (np - 1) / 2;
370 RF_ASSERT(2 * d + 1 == np);
371 for (i = 0; i < d; i++) {
372 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
373 obuf = (char *) node->params[2 * i + 1].p;
374 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
375 /* compute the data unit offset within the column, then add
376 * one */
377 coeff = (coeff % raidPtr->Layout.numDataCol);
378 /* the input buffers may not all be aligned with the start of
379 * the stripe. so shift by their sector offset within the
380 * stripe unit */
381 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
382 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
383 }
384
385 RF_ETIMER_STOP(timer);
386 RF_ETIMER_EVAL(timer);
387 tracerec->q_us += RF_ETIMER_VAL_US(timer);
388 }
389 /*
390 used in degraded writes.
391 */
392
393 static void DegrQSubr(RF_DagNode_t *node);
394
395 static void
DegrQSubr(RF_DagNode_t * node)396 DegrQSubr(RF_DagNode_t *node)
397 {
398 int np = node->numParams;
399 int d;
400 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
401 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
402 int i;
403 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
404 RF_Etimer_t timer;
405 char *qbuf = node->results[1];
406 char *obuf, *qpbuf;
407 RF_PhysDiskAddr_t *old;
408 unsigned long coeff;
409 unsigned fail_start;
410 int j;
411
412 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
413 fail_start = old->startSector % secPerSU;
414
415 RF_ETIMER_START(timer);
416
417 d = (np - 2) / 2;
418 RF_ASSERT(2 * d + 2 == np);
419 for (i = 0; i < d; i++) {
420 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
421 obuf = (char *) node->params[2 * i + 1].p;
422 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
423 /* compute the data unit offset within the column, then add
424 * one */
425 coeff = (coeff % raidPtr->Layout.numDataCol);
426 /* the input buffers may not all be aligned with the start of
427 * the stripe. so shift by their sector offset within the
428 * stripe unit */
429 j = old->startSector % secPerSU;
430 RF_ASSERT(j >= fail_start);
431 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
432 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
433 }
434
435 RF_ETIMER_STOP(timer);
436 RF_ETIMER_EVAL(timer);
437 tracerec->q_us += RF_ETIMER_VAL_US(timer);
438 }
439 /*
440 Called by large write code to compute the new parity and the new q.
441
442 structure of the params:
443
444 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
445 raidPtr
446
447 for a total of 2d+1 arguments.
448 The result buffers results[0], results[1] are the buffers for the p and q,
449 respectively.
450
451 We compute Q first, then compute P. The P calculation may try to reuse
452 one of the input buffers for its output, so if we computed P first, we would
453 corrupt the input for the q calculation.
454 */
455
456 int
rf_RegularPQFunc(RF_DagNode_t * node)457 rf_RegularPQFunc(RF_DagNode_t *node)
458 {
459 RegularQSubr(node, node->results[1]);
460 return (rf_RegularXorFunc(node)); /* does the wakeup */
461 }
462
463 int
rf_RegularQFunc(RF_DagNode_t * node)464 rf_RegularQFunc(RF_DagNode_t *node)
465 {
466 /* Almost ... adjust Qsubr args */
467 RegularQSubr(node, node->results[0]);
468 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
469 * I/O in this node */
470 return (0);
471 }
472 /*
473 Called by singly degraded write code to compute the new parity and the new q.
474
475 structure of the params:
476
477 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
478 failedPDA raidPtr
479
480 for a total of 2d+2 arguments.
481 The result buffers results[0], results[1] are the buffers for the parity and q,
482 respectively.
483
484 We compute Q first, then compute parity. The parity calculation may try to reuse
485 one of the input buffers for its output, so if we computed parity first, we would
486 corrupt the input for the q calculation.
487
488 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
489 */
490
491 void
rf_Degraded_100_PQFunc(RF_DagNode_t * node)492 rf_Degraded_100_PQFunc(RF_DagNode_t *node)
493 {
494 int np = node->numParams;
495
496 RF_ASSERT(np >= 2);
497 DegrQSubr(node);
498 rf_RecoveryXorFunc(node);
499 }
500
501
502 /*
503 The two below are used when reading a stripe with a single lost data unit.
504 The parameters are
505
506 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
507
508 and results[0] contains the data buffer. Which is originally zero-filled.
509
510 */
511
512 /* this Q func is used by the degraded-mode dag functions to recover lost data.
513 * the second-to-last parameter is the PDA for the failed portion of the access.
514 * the code here looks at this PDA and assumes that the xor target buffer is
515 * equal in size to the number of sectors in the failed PDA. It then uses
516 * the other PDAs in the parameter list to determine where within the target
517 * buffer the corresponding data should be xored.
518 *
519 * Recall the basic equation is
520 *
521 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
522 *
523 * so to recover data_j we need
524 *
525 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
526 *
527 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
528 * copying Q into it. Then we need to do a table lookup to convert to solve
529 * data_j /= J
530 *
531 *
532 */
533 int
rf_RecoveryQFunc(RF_DagNode_t * node)534 rf_RecoveryQFunc(RF_DagNode_t *node)
535 {
536 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
537 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
538 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
539 int i;
540 RF_PhysDiskAddr_t *pda;
541 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
542 char *srcbuf, *destbuf;
543 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
544 RF_Etimer_t timer;
545 unsigned long coeff;
546
547 RF_ETIMER_START(timer);
548 /* start by copying Q into the buffer */
549 memcpy(node->results[0], node->params[node->numParams - 3].p,
550 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
551 for (i = 0; i < node->numParams - 4; i += 2) {
552 RF_ASSERT(node->params[i + 1].p != node->results[0]);
553 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
554 srcbuf = (char *) node->params[i + 1].p;
555 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
556 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
557 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
558 /* compute the data unit offset within the column */
559 coeff = (coeff % raidPtr->Layout.numDataCol);
560 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
561 }
562 /* Do the nasty inversion now */
563 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
564 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
565 RF_ETIMER_STOP(timer);
566 RF_ETIMER_EVAL(timer);
567 tracerec->q_us += RF_ETIMER_VAL_US(timer);
568 rf_GenericWakeupFunc(node, 0);
569 return (0);
570 }
571
572 int
rf_RecoveryPQFunc(RF_DagNode_t * node)573 rf_RecoveryPQFunc(RF_DagNode_t *node)
574 {
575 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
576 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
577 return (1);
578 }
579 /*
580 Degraded write Q subroutine.
581 Used when P is dead.
582 Large-write style Q computation.
583 Parameters
584
585 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
586
587 We ignore failedPDA.
588
589 This is a "simple style" recovery func.
590 */
591
592 void
rf_PQ_DegradedWriteQFunc(RF_DagNode_t * node)593 rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node)
594 {
595 int np = node->numParams;
596 int d;
597 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
598 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
599 int i;
600 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
601 RF_Etimer_t timer;
602 char *qbuf = node->results[0];
603 char *obuf, *qpbuf;
604 RF_PhysDiskAddr_t *old;
605 unsigned long coeff;
606 int fail_start, j;
607
608 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
609 fail_start = old->startSector % secPerSU;
610
611 RF_ETIMER_START(timer);
612
613 d = (np - 2) / 2;
614 RF_ASSERT(2 * d + 2 == np);
615
616 for (i = 0; i < d; i++) {
617 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
618 obuf = (char *) node->params[2 * i + 1].p;
619 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
620 /* compute the data unit offset within the column, then add
621 * one */
622 coeff = (coeff % raidPtr->Layout.numDataCol);
623 j = old->startSector % secPerSU;
624 RF_ASSERT(j >= fail_start);
625 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
626 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
627 }
628
629 RF_ETIMER_STOP(timer);
630 RF_ETIMER_EVAL(timer);
631 tracerec->q_us += RF_ETIMER_VAL_US(timer);
632 rf_GenericWakeupFunc(node, 0);
633 }
634
635
636
637
638 /* Q computations */
639
640 /*
641 coeff - colummn;
642
643 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
644
645 on 5-bit basis;
646 length in bytes;
647 */
648
649 void
rf_IncQ(unsigned long * dest,unsigned long * buf,unsigned length,unsigned coeff)650 rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, unsigned coeff)
651 {
652 unsigned long a, d, new;
653 unsigned long a1, a2;
654 unsigned int *q = &(rf_qfor[28 - coeff][0]);
655 unsigned r = rf_rn[coeff + 1];
656
657 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
658 #define INSERT(a,i) (a << (5L*i))
659
660 length /= 8;
661 /* 13 5 bit quants in a 64 bit word */
662 while (length) {
663 a = *buf++;
664 d = *dest;
665 a1 = EXTRACT(a, 0) ^ r;
666 a2 = EXTRACT(a, 1) ^ r;
667 new = INSERT(a2, 1) | a1;
668 a1 = EXTRACT(a, 2) ^ r;
669 a2 = EXTRACT(a, 3) ^ r;
670 a1 = q[a1];
671 a2 = q[a2];
672 new = new | INSERT(a1, 2) | INSERT(a2, 3);
673 a1 = EXTRACT(a, 4) ^ r;
674 a2 = EXTRACT(a, 5) ^ r;
675 a1 = q[a1];
676 a2 = q[a2];
677 new = new | INSERT(a1, 4) | INSERT(a2, 5);
678 a1 = EXTRACT(a, 5) ^ r;
679 a2 = EXTRACT(a, 6) ^ r;
680 a1 = q[a1];
681 a2 = q[a2];
682 new = new | INSERT(a1, 5) | INSERT(a2, 6);
683 #if RF_LONGSHIFT > 2
684 a1 = EXTRACT(a, 7) ^ r;
685 a2 = EXTRACT(a, 8) ^ r;
686 a1 = q[a1];
687 a2 = q[a2];
688 new = new | INSERT(a1, 7) | INSERT(a2, 8);
689 a1 = EXTRACT(a, 9) ^ r;
690 a2 = EXTRACT(a, 10) ^ r;
691 a1 = q[a1];
692 a2 = q[a2];
693 new = new | INSERT(a1, 9) | INSERT(a2, 10);
694 a1 = EXTRACT(a, 11) ^ r;
695 a2 = EXTRACT(a, 12) ^ r;
696 a1 = q[a1];
697 a2 = q[a2];
698 new = new | INSERT(a1, 11) | INSERT(a2, 12);
699 #endif /* RF_LONGSHIFT > 2 */
700 d ^= new;
701 *dest++ = d;
702 length--;
703 }
704 }
705 /*
706 compute
707
708 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
709
710 on a five bit basis.
711 optimization: compute old ^ new on 64 bit basis.
712
713 length in bytes.
714 */
715
716 static void
QDelta(char * dest,char * obuf,char * nbuf,unsigned length,unsigned char coeff)717 QDelta(
718 char *dest,
719 char *obuf,
720 char *nbuf,
721 unsigned length,
722 unsigned char coeff)
723 {
724 unsigned long a, d, new;
725 unsigned long a1, a2;
726 unsigned int *q = &(rf_qfor[28 - coeff][0]);
727 unsigned int r = rf_rn[coeff + 1];
728
729 r = a1 = a2 = new = d = a = 0; /* XXX for now... */
730 q = NULL; /* XXX for now */
731
732 #ifdef _KERNEL
733 /* PQ in kernel currently not supported because the encoding/decoding
734 * table is not present */
735 memset(dest, 0, length);
736 #else /* KERNEL */
737 /* this code probably doesn't work and should be rewritten -wvcii */
738 /* 13 5 bit quants in a 64 bit word */
739 length /= 8;
740 while (length) {
741 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
742 a ^= *nbuf++;
743 d = *dest;
744 a1 = EXTRACT(a, 0) ^ r;
745 a2 = EXTRACT(a, 1) ^ r;
746 a1 = q[a1];
747 a2 = q[a2];
748 new = INSERT(a2, 1) | a1;
749 a1 = EXTRACT(a, 2) ^ r;
750 a2 = EXTRACT(a, 3) ^ r;
751 a1 = q[a1];
752 a2 = q[a2];
753 new = new | INSERT(a1, 2) | INSERT(a2, 3);
754 a1 = EXTRACT(a, 4) ^ r;
755 a2 = EXTRACT(a, 5) ^ r;
756 a1 = q[a1];
757 a2 = q[a2];
758 new = new | INSERT(a1, 4) | INSERT(a2, 5);
759 a1 = EXTRACT(a, 5) ^ r;
760 a2 = EXTRACT(a, 6) ^ r;
761 a1 = q[a1];
762 a2 = q[a2];
763 new = new | INSERT(a1, 5) | INSERT(a2, 6);
764 #if RF_LONGSHIFT > 2
765 a1 = EXTRACT(a, 7) ^ r;
766 a2 = EXTRACT(a, 8) ^ r;
767 a1 = q[a1];
768 a2 = q[a2];
769 new = new | INSERT(a1, 7) | INSERT(a2, 8);
770 a1 = EXTRACT(a, 9) ^ r;
771 a2 = EXTRACT(a, 10) ^ r;
772 a1 = q[a1];
773 a2 = q[a2];
774 new = new | INSERT(a1, 9) | INSERT(a2, 10);
775 a1 = EXTRACT(a, 11) ^ r;
776 a2 = EXTRACT(a, 12) ^ r;
777 a1 = q[a1];
778 a2 = q[a2];
779 new = new | INSERT(a1, 11) | INSERT(a2, 12);
780 #endif /* RF_LONGSHIFT > 2 */
781 d ^= new;
782 *dest++ = d;
783 length--;
784 }
785 #endif /* _KERNEL */
786 }
787 /*
788 recover columns a and b from the given p and q into
789 bufs abuf and bbuf. All bufs are word aligned.
790 Length is in bytes.
791 */
792
793
794 /*
795 * XXX
796 *
797 * Everything about this seems wrong.
798 */
799 void
rf_PQ_recover(unsigned long * pbuf,unsigned long * qbuf,unsigned long * abuf,unsigned long * bbuf,unsigned length,unsigned coeff_a,unsigned coeff_b)800 rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b)
801 {
802 unsigned long p, q, a, a0, a1;
803 int col = (29 * coeff_a) + coeff_b;
804 unsigned char *q0 = &(rf_qinv[col][0]);
805
806 length /= 8;
807 while (length) {
808 p = *pbuf++;
809 q = *qbuf++;
810 a0 = EXTRACT(p, 0);
811 a1 = EXTRACT(q, 0);
812 a = q0[a0 << 5 | a1];
813 #define MF(i) \
814 a0 = EXTRACT(p,i); \
815 a1 = EXTRACT(q,i); \
816 a = a | INSERT(q0[a0<<5 | a1],i)
817
818 MF(1);
819 MF(2);
820 MF(3);
821 MF(4);
822 MF(5);
823 MF(6);
824 #if 0
825 MF(7);
826 MF(8);
827 MF(9);
828 MF(10);
829 MF(11);
830 MF(12);
831 #endif /* 0 */
832 *abuf++ = a;
833 *bbuf++ = a ^ p;
834 length--;
835 }
836 }
837 /*
838 Lost parity and a data column. Recover that data column.
839 Assume col coeff is lost. Let q the contents of Q after
840 all surviving data columns have been q-xored out of it.
841 Then we have the equation
842
843 q[28-coeff][a_i ^ r_i+1] = q
844
845 but q is cyclic with period 31.
846 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
847 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
848
849 so a_i = r_{coeff+1} ^ q[3+coeff][q]
850
851 The routine is passed q buffer and the buffer
852 the data is to be recoverd into. They can be the same.
853 */
854
855
856
857 static void
rf_InvertQ(unsigned long * qbuf,unsigned long * abuf,unsigned length,unsigned coeff)858 rf_InvertQ(
859 unsigned long *qbuf,
860 unsigned long *abuf,
861 unsigned length,
862 unsigned coeff)
863 {
864 unsigned long a, new;
865 unsigned long a1, a2;
866 unsigned int *q = &(rf_qfor[3 + coeff][0]);
867 unsigned r = rf_rn[coeff + 1];
868
869 /* 13 5 bit quants in a 64 bit word */
870 length /= 8;
871 while (length) {
872 a = *qbuf++;
873 a1 = EXTRACT(a, 0);
874 a2 = EXTRACT(a, 1);
875 a1 = r ^ q[a1];
876 a2 = r ^ q[a2];
877 new = INSERT(a2, 1) | a1;
878 #define M(i,j) \
879 a1 = EXTRACT(a,i); \
880 a2 = EXTRACT(a,j); \
881 a1 = r ^ q[a1]; \
882 a2 = r ^ q[a2]; \
883 new = new | INSERT(a1,i) | INSERT(a2,j)
884
885 M(2, 3);
886 M(4, 5);
887 M(5, 6);
888 #if RF_LONGSHIFT > 2
889 M(7, 8);
890 M(9, 10);
891 M(11, 12);
892 #endif /* RF_LONGSHIFT > 2 */
893 *abuf++ = new;
894 length--;
895 }
896 }
897 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
898 * (RF_INCLUDE_RAID6 > 0) */
899