1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.25 2022/02/16 22:00:56 andvar Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: ChangMing Wu
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*
30  * Code for RAID-EVENODD  architecture.
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.25 2022/02/16 22:00:56 andvar Exp $");
35 
36 #include "rf_archs.h"
37 
38 #ifdef _KERNEL_OPT
39 #include "opt_raid_diagnostic.h"
40 #endif
41 
42 #if RF_INCLUDE_EVENODD > 0
43 
44 #include <dev/raidframe/raidframevar.h>
45 
46 #include "rf_raid.h"
47 #include "rf_dag.h"
48 #include "rf_dagffrd.h"
49 #include "rf_dagffwr.h"
50 #include "rf_dagdegrd.h"
51 #include "rf_dagdegwr.h"
52 #include "rf_dagutils.h"
53 #include "rf_dagfuncs.h"
54 #include "rf_etimer.h"
55 #include "rf_general.h"
56 #include "rf_parityscan.h"
57 #include "rf_evenodd.h"
58 #include "rf_evenodd_dagfuncs.h"
59 
60 /* These redundant functions are for small write */
61 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
62 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
63 /* These redundant functions are for degraded read */
64 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
65 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
66 /**********************************************************************************************
67  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
68  **********************************************************************************************/
69 void
rf_RegularPEFunc(RF_DagNode_t * node)70 rf_RegularPEFunc(RF_DagNode_t *node)
71 {
72 	rf_RegularESubroutine(node, node->results[1]);
73 	rf_RegularXorFunc(node);/* does the wakeup here! */
74 }
75 
76 
77 /************************************************************************************************
78  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
79  *  be used. The previous case is when write access at least sectors of full stripe unit.
80  *  The later function is used when the write access two stripe units but with total sectors
81  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
82  *  areas in their stripe unit and  parity write and 'E' write are both divided into two distinct
83  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
84  ************************************************************************************************/
85 
86 /* Algorithm:
87      1. Store the difference of old data and new data in the Rod buffer.
88      2. then encode this buffer into the buffer which already have old 'E' information inside it,
89 	the result can be shown to be the new 'E' information.
90      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
91    Here we have another alternative: to allocate a temporary buffer for storing the difference of
92    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
93    take the same speed as the previous, and need more memory.
94 */
95 void
rf_RegularONEFunc(RF_DagNode_t * node)96 rf_RegularONEFunc(RF_DagNode_t *node)
97 {
98 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
99 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
100 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
101 								 * where you can find
102 								 * e-pda */
103 	int     i, k;
104 	int     suoffset, length;
105 	RF_RowCol_t scol;
106 	char   *srcbuf, *destbuf;
107 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
108 	RF_Etimer_t timer;
109 	RF_PhysDiskAddr_t *pda;
110 #ifdef RAID_DIAGNOSTIC
111 	RF_PhysDiskAddr_t *EPDA =
112 	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
113 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
114 
115 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
116 	RF_ASSERT(ESUOffset == 0);
117 #endif /* RAID_DIAGNOSTIC */
118 
119 	RF_ETIMER_START(timer);
120 
121 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
122 	 * new data is stored in Rod buffer */
123 	for (k = 0; k < EpdaIndex; k += 2) {
124 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
125 		rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
126 	}
127 	/* Start to encoding the buffer storing the difference of old data and
128 	 * new data into 'E' buffer  */
129 	for (i = 0; i < EpdaIndex; i += 2)
130 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
131 									 * of E */
132 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
133 			srcbuf = (char *) node->params[i + 1].p;
134 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
135 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
136 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
137 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
138 		}
139 	/* Recover the original old data to be used by parity encoding
140 	 * function in XorNode */
141 	for (k = 0; k < EpdaIndex; k += 2) {
142 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
143 		rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
144 	}
145 	RF_ETIMER_STOP(timer);
146 	RF_ETIMER_EVAL(timer);
147 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
148 	rf_GenericWakeupFunc(node, 0);
149 }
150 
151 void
rf_SimpleONEFunc(RF_DagNode_t * node)152 rf_SimpleONEFunc(RF_DagNode_t *node)
153 {
154 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
155 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
156 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
157 	int     retcode = 0;
158 	char   *srcbuf, *destbuf;
159 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
160 	int     length;
161 	RF_RowCol_t scol;
162 	RF_Etimer_t timer;
163 
164 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
165 	if (node->dagHdr->status == rf_enable) {
166 		RF_ETIMER_START(timer);
167 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
168 														 * writeDataNodes */
169 		/* bxor to buffer of readDataNodes */
170 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length);
171 		/* find out the corresponding column in encoding matrix for
172 		 * write column to be encoded into redundant disk 'E' */
173 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
174 		srcbuf = node->params[1].p;
175 		destbuf = node->params[3].p;
176 		/* Start encoding process */
177 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
178 		rf_bxor(node->params[5].p, node->params[1].p, length);
179 		RF_ETIMER_STOP(timer);
180 		RF_ETIMER_EVAL(timer);
181 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
182 
183 	}
184 	rf_GenericWakeupFunc(node, retcode);	/* call wake func
185 						 * explicitly since no
186 						 * I/O in this node */
187 }
188 
189 
190 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
191 void
rf_RegularESubroutine(RF_DagNode_t * node,char * ebuf)192 rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf)
193 {
194 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
195 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
196 	RF_PhysDiskAddr_t *pda;
197 	int     i, suoffset;
198 	RF_RowCol_t scol;
199 	char   *srcbuf, *destbuf;
200 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
201 	RF_Etimer_t timer;
202 
203 	RF_ETIMER_START(timer);
204 	for (i = 0; i < node->numParams - 2; i += 2) {
205 		RF_ASSERT(node->params[i + 1].p != ebuf);
206 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
207 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
208 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
209 		srcbuf = (char *) node->params[i + 1].p;
210 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
211 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
212 	}
213 	RF_ETIMER_STOP(timer);
214 	RF_ETIMER_EVAL(timer);
215 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
216 }
217 
218 
219 /*******************************************************************************************
220  *			 Used in  EO_001_CreateLargeWriteDAG
221  ******************************************************************************************/
222 void
rf_RegularEFunc(RF_DagNode_t * node)223 rf_RegularEFunc(RF_DagNode_t *node)
224 {
225 	rf_RegularESubroutine(node, node->results[0]);
226 	rf_GenericWakeupFunc(node, 0);
227 }
228 /*******************************************************************************************
229  * This degraded function allow only two case:
230  *  1. when write access the full failed stripe unit, then the access can be more than
231  *     one tripe units.
232  *  2. when write access only part of the failed SU, we assume accesses of more than
233  *     one stripe unit is not allowed so that the write can be dealt with like a
234  *     large write.
235  *  The following function is based on these assumptions. So except in the second case,
236  *  it looks the same as a large write encoding function. But this is not exactly the
237  *  normal way for doing a degraded write, since raidframe have to break cases of access
238  *  other than the above two into smaller accesses. We may have to change
239  *  DegrESubroutine in the future.
240  *******************************************************************************************/
241 void
rf_DegrESubroutine(RF_DagNode_t * node,char * ebuf)242 rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf)
243 {
244 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
245 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
246 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
247 	RF_PhysDiskAddr_t *pda;
248 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
249 	RF_RowCol_t scol;
250 	char   *srcbuf, *destbuf;
251 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
252 	RF_Etimer_t timer;
253 
254 	RF_ETIMER_START(timer);
255 	for (i = 0; i < node->numParams - 2; i += 2) {
256 		RF_ASSERT(node->params[i + 1].p != ebuf);
257 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
258 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
259 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
260 		srcbuf = (char *) node->params[i + 1].p;
261 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
262 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
263 	}
264 
265 	RF_ETIMER_STOP(timer);
266 	RF_ETIMER_EVAL(timer);
267 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
268 }
269 
270 
271 /**************************************************************************************
272  * This function is used in case where one data disk failed and both redundant disks
273  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
274  * failed in the stripe but not accessed at this time, then we should, instead, use
275  * the rf_EOWriteDoubleRecoveryFunc().
276  **************************************************************************************/
277 void
rf_Degraded_100_EOFunc(RF_DagNode_t * node)278 rf_Degraded_100_EOFunc(RF_DagNode_t *node)
279 {
280 	rf_DegrESubroutine(node, node->results[1]);
281 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
282 }
283 /**************************************************************************************
284  * This function is to encode one sector in one of the data disks to the E disk.
285  * However, in evenodd this function can also be used as decoding function to recover
286  * data from dead disk in the case of parity failure and a single data failure.
287  **************************************************************************************/
288 void
rf_e_EncOneSect(RF_RowCol_t srcLogicCol,char * srcSecbuf,RF_RowCol_t destLogicCol,char * destSecbuf,int bytesPerSector)289 rf_e_EncOneSect(
290     RF_RowCol_t srcLogicCol,
291     char *srcSecbuf,
292     RF_RowCol_t destLogicCol,
293     char *destSecbuf,
294     int bytesPerSector)
295 {
296 	int     S_index;	/* index of the EU in the src col which need
297 				 * be Xored into all EUs in a dest sector */
298 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
299 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
300 					 * the destination column of encoding
301 					 * matrix */
302 	        indexInSrc;	/* row index of an encoding unit in the source
303 				 * column used for recovery */
304 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
305 
306 #if RF_EO_MATRIX_DIM > 17
307 	int     shortsPerEU = bytesPerEU / sizeof(short);
308 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
309 	short temp1;
310 #elif RF_EO_MATRIX_DIM == 17
311 	int     longsPerEU = bytesPerEU / sizeof(long);
312 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
313 	long temp1;
314 #endif
315 
316 #if RF_EO_MATRIX_DIM > 17
317 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
318 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
319 #elif RF_EO_MATRIX_DIM == 17
320 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
321 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
322 #endif
323 
324 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
325 #if RF_EO_MATRIX_DIM > 17
326 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
327 #elif RF_EO_MATRIX_DIM == 17
328 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
329 #endif
330 
331 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
332 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
333 
334 #if RF_EO_MATRIX_DIM > 17
335 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
336 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
337 		for (j = 0; j < shortsPerEU; j++) {
338 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
339 			/* note: S_index won't be at the end row for any src
340 			 * col! */
341 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
342 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
343 			/* if indexInSrc is at the end row, ie.
344 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
345 			else
346 				destShortBuf[j] = temp1;
347 		}
348 
349 #elif RF_EO_MATRIX_DIM == 17
350 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
351 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
352 		for (j = 0; j < longsPerEU; j++) {
353 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
354 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
355 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
356 			else
357 				destLongBuf[j] = temp1;
358 		}
359 #endif
360 	}
361 }
362 
363 void
rf_e_encToBuf(RF_Raid_t * raidPtr,RF_RowCol_t srcLogicCol,char * srcbuf,RF_RowCol_t destLogicCol,char * destbuf,int numSector)364 rf_e_encToBuf(
365     RF_Raid_t * raidPtr,
366     RF_RowCol_t srcLogicCol,
367     char *srcbuf,
368     RF_RowCol_t destLogicCol,
369     char *destbuf,
370     int numSector)
371 {
372 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
373 
374 	for (i = 0; i < numSector; i++) {
375 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
376 		srcbuf += bytesPerSector;
377 		destbuf += bytesPerSector;
378 	}
379 }
380 /**************************************************************************************
381  * when parity die and one data die, We use second redundant information, 'E',
382  * to recover the data in dead disk. This function is used in the recovery node of
383  * for EO_110_CreateReadDAG
384  **************************************************************************************/
385 void
rf_RecoveryEFunc(RF_DagNode_t * node)386 rf_RecoveryEFunc(RF_DagNode_t *node)
387 {
388 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
389 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
390 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
391 	RF_RowCol_t scol,	/* source logical column */
392 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
393 									 * failed SU */
394 	int     i;
395 	RF_PhysDiskAddr_t *pda;
396 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
397 	char   *srcbuf, *destbuf;
398 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
399 	RF_Etimer_t timer;
400 
401 	memset(node->results[0], 0,
402 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
403 	if (node->dagHdr->status == rf_enable) {
404 		RF_ETIMER_START(timer);
405 		for (i = 0; i < node->numParams - 2; i += 2)
406 			if (node->params[i + 1].p != node->results[0]) {
407 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
408 				if (i == node->numParams - 4)
409 					scol = RF_EO_MATRIX_DIM - 2;	/* the column of
410 									 * redundant E */
411 				else
412 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
413 				srcbuf = (char *) node->params[i + 1].p;
414 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
415 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
416 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
417 			}
418 		RF_ETIMER_STOP(timer);
419 		RF_ETIMER_EVAL(timer);
420 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
421 	}
422 	rf_GenericWakeupFunc(node, 0);	/* node execute successfully */
423 }
424 /**************************************************************************************
425  * This function is used in the case where one data and the parity have filed.
426  * (in EO_110_CreateWriteDAG )
427  **************************************************************************************/
428 void
rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)429 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
430 {
431 	rf_DegrESubroutine(node, node->results[0]);
432 	rf_GenericWakeupFunc(node, 0);
433 }
434 
435 
436 
437 /**************************************************************************************
438  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
439  **************************************************************************************/
440 
441 void
rf_doubleEOdecode(RF_Raid_t * raidPtr,char ** rrdbuf,char ** dest,RF_RowCol_t * fcol,char * pbuf,char * ebuf)442 rf_doubleEOdecode(
443     RF_Raid_t * raidPtr,
444     char **rrdbuf,
445     char **dest,
446     RF_RowCol_t * fcol,
447     char *pbuf,
448     char *ebuf)
449 {
450 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
451 	int     i, j, k, f1, f2, row;
452 	int     rrdrow, erow, count = 0;
453 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
454 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
455 #if 0
456 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
457 #endif
458 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
459 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
460 	int     numDataCol = layoutPtr->numDataCol;
461 #if RF_EO_MATRIX_DIM > 17
462 	int     shortsPerEU = bytesPerEU / sizeof(short);
463 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
464 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
465 	short *temp;
466 	short  *P;
467 
468 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
469 #elif RF_EO_MATRIX_DIM == 17
470 	int     longsPerEU = bytesPerEU / sizeof(long);
471 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
472 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
473 	long *temp;
474 	long   *P;
475 
476 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
477 #endif
478 	P = RF_Malloc(bytesPerEU);
479 	temp = RF_Malloc(bytesPerEU);
480 	RF_ASSERT(*((long *) dest[0]) == 0);
481 	RF_ASSERT(*((long *) dest[1]) == 0);
482 	RF_ASSERT(*P == 0);
483 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
484 	 * elements in the last two columns, ie. 'E' and 'parity' columns, see
485 	 * the Ref. paper by Blaum, et al 1993  */
486 	for (i = 0; i < numRowInEncMatix; i++)
487 		for (k = 0; k < longsPerEU; k++) {
488 #if RF_EO_MATRIX_DIM > 17
489 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
490 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
491 #elif RF_EO_MATRIX_DIM == 17
492 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
493 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
494 #endif
495 			P[k] ^= *ebuf_current;
496 			P[k] ^= *pbuf_current;
497 		}
498 	RF_ASSERT(fcol[0] != fcol[1]);
499 	if (fcol[0] < fcol[1]) {
500 #if RF_EO_MATRIX_DIM > 17
501 		dest_smaller = (short *) (dest[0]);
502 		dest_larger = (short *) (dest[1]);
503 #elif RF_EO_MATRIX_DIM == 17
504 		dest_smaller = (long *) (dest[0]);
505 		dest_larger = (long *) (dest[1]);
506 #endif
507 		f1 = fcol[0];
508 		f2 = fcol[1];
509 	} else {
510 #if RF_EO_MATRIX_DIM > 17
511 		dest_smaller = (short *) (dest[1]);
512 		dest_larger = (short *) (dest[0]);
513 #elif RF_EO_MATRIX_DIM == 17
514 		dest_smaller = (long *) (dest[1]);
515 		dest_larger = (long *) (dest[0]);
516 #endif
517 		f1 = fcol[1];
518 		f2 = fcol[0];
519 	}
520 	row = (RF_EO_MATRIX_DIM) - 1;
521 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
522 #if RF_EO_MATRIX_DIM > 17
523 		dest_larger_current = dest_larger + row * shortsPerEU;
524 		dest_smaller_current = dest_smaller + row * shortsPerEU;
525 #elif RF_EO_MATRIX_DIM == 17
526 		dest_larger_current = dest_larger + row * longsPerEU;
527 		dest_smaller_current = dest_smaller + row * longsPerEU;
528 #endif
529 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
530 		       which is the failed data in the column which has smaller col index. **/
531 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
532 		for (j = 0; j < numDataCol; j++) {
533 			if (j == f1 || j == f2)
534 				continue;
535 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
536 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
537 #if RF_EO_MATRIX_DIM > 17
538 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
539 				for (k = 0; k < shortsPerEU; k++)
540 					temp[k] ^= *(rrdbuf_current + k);
541 #elif RF_EO_MATRIX_DIM == 17
542 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
543 				for (k = 0; k < longsPerEU; k++)
544 					temp[k] ^= *(rrdbuf_current + k);
545 #endif
546 			}
547 		}
548 		/* step 2:  ^E(erow,m-2), If erow is at the bottom row, don't
549 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
550 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
551 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
552 		 * diagonal) ^ (failed 2)       */
553 
554 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
555 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
556 #if RF_EO_MATRIX_DIM > 17
557 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
558 			for (k = 0; k < shortsPerEU; k++)
559 				temp[k] ^= *(ebuf_current + k);
560 #elif RF_EO_MATRIX_DIM == 17
561 			ebuf_current = (long *) ebuf + longsPerEU * erow;
562 			for (k = 0; k < longsPerEU; k++)
563 				temp[k] ^= *(ebuf_current + k);
564 #endif
565 		}
566 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
567 		 * proved to be actually  (principle diagonal)  After this
568 		 * step, temp[k] = (failed 2), the failed data to be recovered */
569 #if RF_EO_MATRIX_DIM > 17
570 		for (k = 0; k < shortsPerEU; k++)
571 			temp[k] ^= P[k];
572 		/* Put the data to the destination buffer                              */
573 		for (k = 0; k < shortsPerEU; k++)
574 			dest_larger_current[k] = temp[k];
575 #elif RF_EO_MATRIX_DIM == 17
576 		for (k = 0; k < longsPerEU; k++)
577 			temp[k] ^= P[k];
578 		/* Put the data to the destination buffer                              */
579 		for (k = 0; k < longsPerEU; k++)
580 			dest_larger_current[k] = temp[k];
581 #endif
582 
583 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
584 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
585 		 * columns    */
586 		for (j = 0; j < numDataCol; j++) {
587 			if (j == f1 || j == f2)
588 				continue;
589 #if RF_EO_MATRIX_DIM > 17
590 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
591 			for (k = 0; k < shortsPerEU; k++)
592 				temp[k] ^= *(rrdbuf_current + k);
593 #elif RF_EO_MATRIX_DIM == 17
594 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
595 			for (k = 0; k < longsPerEU; k++)
596 				temp[k] ^= *(rrdbuf_current + k);
597 #endif
598 		}
599 		/* step 2: ^A(row,m-1) */
600 		/* step 3: Put the data to the destination buffer                             	 */
601 #if RF_EO_MATRIX_DIM > 17
602 		pbuf_current = (short *) pbuf + shortsPerEU * row;
603 		for (k = 0; k < shortsPerEU; k++)
604 			temp[k] ^= *(pbuf_current + k);
605 		for (k = 0; k < shortsPerEU; k++)
606 			dest_smaller_current[k] = temp[k];
607 #elif RF_EO_MATRIX_DIM == 17
608 		pbuf_current = (long *) pbuf + longsPerEU * row;
609 		for (k = 0; k < longsPerEU; k++)
610 			temp[k] ^= *(pbuf_current + k);
611 		for (k = 0; k < longsPerEU; k++)
612 			dest_smaller_current[k] = temp[k];
613 #endif
614 		count++;
615 	}
616 	/* Check if all Encoding Unit in the data buffer have been decoded,
617 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
618 	 * this algorithm will covered all buffer 				 */
619 	RF_ASSERT(count == numRowInEncMatix);
620 	RF_Free((char *) P, bytesPerEU);
621 	RF_Free((char *) temp, bytesPerEU);
622 }
623 
624 
625 /***************************************************************************************
626 * 	This function is called by double degraded read
627 * 	EO_200_CreateReadDAG
628 *
629 ***************************************************************************************/
630 void
rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t * node)631 rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node)
632 {
633 	int     ndataParam = 0;
634 	int     np = node->numParams;
635 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
636 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
637 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
638 	int     i, prm, sector, nresults = node->numResults;
639 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
640 	unsigned sosAddr;
641 	int     mallc_one = 0, mallc_two = 0;	/* flags to indicate if
642 						 * memory is allocated */
643 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
644 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
645 	        npda;
646 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
647 	char  **buf, *ebuf, *pbuf, *dest[2];
648 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL,
649 	    psuoff = 0, esuoff = 0;
650 	RF_SectorNum_t startSector, endSector;
651 	RF_Etimer_t timer;
652 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
653 
654 	RF_ETIMER_START(timer);
655 
656 	/* Find out the number of parameters which are pdas for data
657 	 * information */
658 	for (i = 0; i <= np; i++)
659 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
660 			ndataParam = i;
661 			break;
662 		}
663 	buf = RF_Malloc(numDataCol * sizeof(*buf));
664 	if (ndataParam != 0) {
665 		suoff = RF_Malloc(ndataParam * sizeof(*suoff));
666 		suend = RF_Malloc(ndataParam * sizeof(*suend));
667 		prmToCol = RF_Malloc(ndataParam * sizeof(*prmToCol));
668 	}
669 	if (asmap->failedPDAs[1] &&
670 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
671 		RF_ASSERT(0);	/* currently, no support for this situation */
672 		ppda = node->params[np - 6].p;
673 		ppda2 = node->params[np - 5].p;
674 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
675 		epda = node->params[np - 4].p;
676 		epda2 = node->params[np - 3].p;
677 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
678 	} else {
679 		ppda = node->params[np - 4].p;
680 		epda = node->params[np - 3].p;
681 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
682 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
683 		RF_ASSERT(psuoff == esuoff);
684 	}
685 	/*
686             the followings have three goals:
687             1. determine the startSector to begin decoding and endSector to end decoding.
688             2. determine the column numbers of the two failed disks.
689             3. determine the offset and end offset of the access within each failed stripe unit.
690          */
691 	if (nresults == 1) {
692 		/* find the startSector to begin decoding */
693 		pda = node->results[0];
694 		memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
695 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
696 		fsuend[0] = fsuoff[0] + pda->numSector;
697 		fsuoff[1] = 0;
698 		fsuend[1] = 0;
699 		startSector = fsuoff[0];
700 		endSector = fsuend[0];
701 
702 		/* find out the column of failed disk being accessed */
703 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
704 
705 		/* find out the other failed column not accessed */
706 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
707 		for (i = 0; i < numDataCol; i++) {
708 			npda.raidAddress = sosAddr + (i * secPerSU);
709 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
710 			/* skip over dead disks */
711 			if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
712 				if (i != fcol[0])
713 					break;
714 		}
715 		RF_ASSERT(i < numDataCol);
716 		fcol[1] = i;
717 	} else {
718 		RF_ASSERT(nresults == 2);
719 		pda0 = node->results[0];
720 		memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
721 		pda1 = node->results[1];
722 		memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
723 		/* determine the failed column numbers of the two failed
724 		 * disks. */
725 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
726 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
727 		/* determine the offset and end offset of the access within
728 		 * each failed stripe unit. */
729 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
730 		fsuend[0] = fsuoff[0] + pda0->numSector;
731 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
732 		fsuend[1] = fsuoff[1] + pda1->numSector;
733 		/* determine the startSector to begin decoding */
734 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
735 		/* determine the endSector to end decoding */
736 		endSector = RF_MAX(fsuend[0], fsuend[1]);
737 	}
738 	/*
739 	      assign the beginning sector and the end sector for each parameter
740 	      find out the corresponding column # for each parameter
741         */
742 	for (prm = 0; prm < ndataParam; prm++) {
743 		pda = node->params[prm].p;
744 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
745 		suend[prm] = suoff[prm] + pda->numSector;
746 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
747 	}
748 	/* 'sector' is the sector for the current decoding algorithm. For each
749 	 * sector in the failed SU, find out the corresponding parameters that
750 	 * cover the current sector and that are needed for decoding of this
751 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
752 	 * accessed failed SU. If not, malloc a temporary space of a sector in
753 	 * size. */
754 	for (sector = startSector; sector < endSector; sector++) {
755 		if (nresults == 2)
756 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
757 				continue;
758 		for (prm = 0; prm < ndataParam; prm++)
759 			if (suoff[prm] <= sector && sector < suend[prm])
760 				buf[(prmToCol[prm])] = (char *)((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
761 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
762 		/* find out if sector is in the shadow of any accessed failed
763 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
764 		 * position of the buffer corresponding to failed SUs. if no,
765 		 * malloc a temporary space of a sector in size for
766 		 * destination of decoding. */
767 		RF_ASSERT(nresults == 1 || nresults == 2);
768 		if (nresults == 1) {
769 			dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
770 			/* Always malloc temp buffer to dest[1]  */
771 			dest[1] = RF_Malloc(bytesPerSector);
772 			mallc_two = 1;
773 		} else {
774 			if (fsuoff[0] <= sector && sector < fsuend[0])
775 				dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
776 			else {
777 				dest[0] = RF_Malloc(bytesPerSector);
778 				mallc_one = 1;
779 			}
780 			if (fsuoff[1] <= sector && sector < fsuend[1])
781 				dest[1] = (char *)((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
782 			else {
783 				dest[1] = RF_Malloc(bytesPerSector);
784 				mallc_two = 1;
785 			}
786 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
787 		}
788 		pbuf = (char *)ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
789 		ebuf = (char *)epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
790 		/*
791 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
792 	         * one sector to destination.
793 	         */
794 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
795 		/* free all allocated memory, and mark flag to indicate no
796 		 * memory is being allocated */
797 		if (mallc_one == 1)
798 			RF_Free(dest[0], bytesPerSector);
799 		if (mallc_two == 1)
800 			RF_Free(dest[1], bytesPerSector);
801 		mallc_one = mallc_two = 0;
802 	}
803 	RF_Free(buf, numDataCol * sizeof(char *));
804 	if (ndataParam != 0) {
805 		RF_Free(suoff, ndataParam * sizeof(long));
806 		RF_Free(suend, ndataParam * sizeof(long));
807 		RF_Free(prmToCol, ndataParam * sizeof(long));
808 	}
809 	RF_ETIMER_STOP(timer);
810 	RF_ETIMER_EVAL(timer);
811 	if (tracerec) {
812 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
813 	}
814 	rf_GenericWakeupFunc(node, 0);
815 }
816 
817 
818 /* currently, only access of one of the two failed SU is allowed in this function.
819  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
820  * many accesses of single stripe unit.
821  */
822 
823 void
rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t * node)824 rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node)
825 {
826 	int     np = node->numParams;
827 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
828 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
829 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
830 	RF_SectorNum_t sector;
831 	RF_RowCol_t col, scol;
832 	int     prm, i, j;
833 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
834 	unsigned sosAddr;
835 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
836 	RF_int64 numbytes;
837 	RF_SectorNum_t startSector, endSector;
838 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
839 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
840 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
841 				 * buffer storing data read from col0, col1,
842 				 * col2 */
843 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
844 	RF_Etimer_t timer;
845 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
846 
847 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
848 						 * case, the other failed SU
849 						 * is not being accessed */
850 	RF_ETIMER_START(timer);
851 	buf = RF_Malloc(numDataCol * sizeof(*buf));
852 
853 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
854 				 * and [1] are Ppda and Epda  */
855 	epda = node->results[1];
856 	fpda = asmap->failedPDAs[0];
857 
858 	/* First, recovery the failed old SU using EvenOdd double decoding      */
859 	/* determine the startSector and endSector for decoding */
860 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
861 	endSector = startSector + fpda->numSector;
862 	/* Assign buf[col] pointers to point to each non-failed column  and
863 	 * initialize the pbuf and ebuf to point at the beginning of each
864 	 * source buffers and destination buffers */
865 	for (prm = 0; prm < numDataCol - 2; prm++) {
866 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
867 		col = rf_EUCol(layoutPtr, pda->raidAddress);
868 		buf[col] = pda->bufPtr;
869 	}
870 	/* pbuf and ebuf:  they will change values as double recovery decoding
871 	 * goes on */
872 	pbuf = ppda->bufPtr;
873 	ebuf = epda->bufPtr;
874 	/* find out the logical column numbers in the encoding matrix of the
875 	 * two failed columns */
876 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
877 
878 	/* find out the other failed column not accessed this time */
879 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
880 	for (i = 0; i < numDataCol; i++) {
881 		npda.raidAddress = sosAddr + (i * secPerSU);
882 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
883 		/* skip over dead disks */
884 		if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
885 			if (i != fcol[0])
886 				break;
887 	}
888 	RF_ASSERT(i < numDataCol);
889 	fcol[1] = i;
890 	/* assign temporary space to put recovered failed SU */
891 	numbytes = fpda->numSector * bytesPerSector;
892 	olddata[0] = RF_Malloc(numbytes);
893 	olddata[1] = RF_Malloc(numbytes);
894 	dest[0] = olddata[0];
895 	dest[1] = olddata[1];
896 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
897 	 * have already pointed at the beginning of each source buffers and
898 	 * destination buffers */
899 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
900 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
901 		for (j = 0; j < numDataCol; j++)
902 			if ((j != fcol[0]) && (j != fcol[1]))
903 				buf[j] += bytesPerSector;
904 		dest[0] += bytesPerSector;
905 		dest[1] += bytesPerSector;
906 		ebuf += bytesPerSector;
907 		pbuf += bytesPerSector;
908 	}
909 	/* after recovery, the buffer pointed by olddata[0] is the old failed
910 	 * data. With new writing data and this old data, use small write to
911 	 * calculate the new redundant information */
912 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
913 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
914 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
915 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
916 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
917 	 * wudNodes; For current implementation, we assume the simplest case:
918 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
919 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
920 	 * data to be written to the failed disk. We first bxor the new data
921 	 * into the old recovered data, then do the same things as small
922 	 * write. */
923 
924 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes);
925 	/* do new 'E' calculation  */
926 	/* find out the corresponding column in encoding matrix for write
927 	 * column to be encoded into redundant disk 'E' */
928 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
929 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
930 	 * buffer pointer               */
931 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
932 
933 	/* do new 'P' calculation  */
934 	rf_bxor(olddata[0], ppda->bufPtr, numbytes);
935 	/* Free the allocated buffer  */
936 	RF_Free(olddata[0], numbytes);
937 	RF_Free(olddata[1], numbytes);
938 	RF_Free(buf, numDataCol * sizeof(char *));
939 
940 	RF_ETIMER_STOP(timer);
941 	RF_ETIMER_EVAL(timer);
942 	if (tracerec) {
943 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
944 	}
945 	rf_GenericWakeupFunc(node, 0);
946 }
947 #endif				/* RF_INCLUDE_EVENODD > 0 */
948