1 /*
2  * This includes forward error correction code based on Vandermonde matrices
3  * 980624
4  * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
5  *
6  * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
7  * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
8  * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above
17  *    copyright notice, this list of conditions and the following
18  *    disclaimer in the documentation and/or other materials
19  *    provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
26  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
28  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
30  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
32  * OF SUCH DAMAGE.
33  */
34 
35 
36 #include "normEncoderRS8.h"
37 #include "protoDebug.h"
38 
39 #ifdef SIMULATE
40 #include "normMessage.h"
41 #endif // SIMULATE
42 
43 /*
44  * The first part of the file here implements linear algebra in GF.
45  *
46  * gf is the type used to store an element of the Galois Field.
47  * Must constain at least GF_BITS bits.
48  *
49  * Note: unsigned char will work up to GF(256) but int seems to run
50  * faster on the Pentium. We use int whenever have to deal with an
51  * index, since they are generally faster.
52  */
53 
54 #define GF_BITS  8	                    // 8-bit RS code
55 #if (GF_BITS < 2)  || (GF_BITS > 16)
56 #error "GF_BITS must be 2 .. 16"
57 #endif
58 #if (GF_BITS <= 8)
59 typedef UINT8 gf;
60 #else
61 typedef UINT16 gf;
62 #endif
63 #define	GF_SIZE ((1 << GF_BITS) - 1)	// powers of alpha
64 
65 
66 /*
67  * Primitive polynomials - see Lin & Costello, Appendix A,
68  * and  Lee & Messerschmitt, p. 453.
69  */
70 
71 static const char *allPp[] =
72 {                        // GF_BITS   Polynomial
73     NULL,		         //   0      no code
74     NULL,		         //   1      no code
75     "111",		         //   2      1+x+x^2
76     "1101",		         //   3      1+x+x^3
77     "11001",		     //   4      1+x+x^4
78     "101001",		     //   5      1+x^2+x^5
79     "1100001",		     //   6      1+x+x^6
80     "10010001",		     //   7      1 + x^3 + x^7
81     "101110001",	     //   8      1+x^2+x^3+x^4+x^8
82     "1000100001",	     //   9      1+x^4+x^9
83     "10010000001",	     //  10      1+x^3+x^10
84     "101000000001",	     //  11      1+x^2+x^11
85     "1100101000001",	 //  12      1+x+x^4+x^6+x^12
86     "11011000000001",	 //  13      1+x+x^3+x^4+x^13
87     "110000100010001",	 //  14      1+x+x^6+x^10+x^14
88     "1100000000000001",  //  15      1+x+x^15
89     "11010000000010001"  //  16      1+x+x^3+x^12+x^16
90 };
91 
92 
93 /*
94  * To speed up computations, we have tables for logarithm, exponent
95  * and inverse of a number. If GF_BITS <= 8, we use a table for
96  * multiplication as well (it takes 64K, no big deal even on a PDA,
97  * especially because it can be pre-initialized an put into a ROM!),
98  * otherwhise we use a table of logarithms.
99  * In any case the macro gf_mul(x,y) takes care of multiplications.
100  */
101 
102 static gf gf_exp[2*GF_SIZE];	// index->poly form conversion table
103 static int gf_log[GF_SIZE + 1];	// Poly->index form conversion table
104 static gf inverse[GF_SIZE+1];	// inverse of field elem.
105 				                // inv[\alpha**i]=\alpha**(GF_SIZE-i-1)
106 
107 // modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
108 // without a slow divide.
modnn(int x)109 static inline gf modnn(int x)
110 {
111     while (x >= GF_SIZE)
112     {
113 	    x -= GF_SIZE;
114 	    x = (x >> GF_BITS) + (x & GF_SIZE);
115     }
116     return x;
117 }  // end modnn()
118 
119 #define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;}
120 
121 /*
122  * gf_mul(x,y) multiplies two numbers. If GF_BITS<=8, it is much
123  * faster to use a multiplication table.
124  *
125  * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
126  * many numbers by the same constant. In this case the first
127  * call sets the constant, and others perform the multiplications.
128  * A value related to the multiplication is held in a local variable
129  * declared with USE_GF_MULC . See usage in addmul1().
130  */
131 #if (GF_BITS <= 8)
132 
133 static gf gf_mul_table[GF_SIZE + 1][GF_SIZE + 1];
134 
135 #define gf_mul(x,y) gf_mul_table[x][y]
136 #define USE_GF_MULC register gf * __gf_mulc_
137 #define GF_MULC0(c) __gf_mulc_ = gf_mul_table[c]
138 #define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
139 
init_mul_table()140 static void init_mul_table()
141 {
142     for (int i = 0; i <= GF_SIZE; i++)
143     {
144 	    for (int j = 0; j <= GF_SIZE; j++)
145 	        gf_mul_table[i][j] = gf_exp[modnn(gf_log[i] + gf_log[j]) ] ;
146     }
147     for (int j = 0; j <= GF_SIZE; j++)
148 	    gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
149 }
150 
151 #else	/* GF_BITS > 8 */
152 
gf_mul(int x,int y)153 inline gf gf_mul(int x, int y)
154 {
155     if ((0 == x) || (0 == y)) return 0;
156     return gf_exp[gf_log[x] + gf_log[y] ] ;
157 }
158 
159 #define init_mul_table()
160 #define USE_GF_MULC register gf * __gf_mulc_
161 #define GF_MULC0(c) __gf_mulc_ = &gf_exp[ gf_log[c] ]
162 #define GF_ADDMULC(dst, x) { if (x) dst ^= __gf_mulc_[ gf_log[x] ] ; }
163 
164 #endif  // if/else (GF_BITS <= 8)
165 
166 /*
167  * Generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
168  * Lookup tables:
169  *     index->polynomial form		gf_exp[] contains j= \alpha^i;
170  *     polynomial form -> index form	gf_log[ j = \alpha^i ] = i
171  * \alpha=x is the primitive element of GF(2^m)
172  *
173  * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
174  * multiplication of two numbers can be resolved without calling modnn
175  */
176 
177 #define NEW_GF_MATRIX(rows, cols) (new gf[rows*cols])
178 
179 /*
180  * initialize the data structures used for computations in GF.
181  */
generate_gf()182 static void generate_gf()
183 {
184     const char *Pp =  allPp[GF_BITS] ;
185 
186     gf mask = 1;	/* x ** 0 = 1 */
187     gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
188     /*
189      * first, generate the (polynomial representation of) powers of \alpha,
190      * which are stored in gf_exp[i] = \alpha ** i .
191      * At the same time build gf_log[gf_exp[i]] = i .
192      * The first GF_BITS powers are simply bits shifted to the left.
193      */
194     for (int i = 0; i < GF_BITS; i++, mask <<= 1 )
195     {
196 	    gf_exp[i] = mask;
197 	    gf_log[gf_exp[i]] = i;
198 	    /*
199 	     * If Pp[i] == 1 then \alpha ** i occurs in poly-repr
200 	     * gf_exp[GF_BITS] = \alpha ** GF_BITS
201 	     */
202 	    if ( Pp[i] == '1' )
203 	        gf_exp[GF_BITS] ^= mask;
204     }
205     /*
206      * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can als
207      * compute its inverse.
208      */
209     gf_log[gf_exp[GF_BITS]] = GF_BITS;
210     /*
211      * Poly-repr of \alpha ** (i+1) is given by poly-repr of
212      * \alpha ** i shifted left one-bit and accounting for any
213      * \alpha ** GF_BITS term that may occur when poly-repr of
214      * \alpha ** i is shifted.
215      */
216     mask = 1 << (GF_BITS - 1 ) ;
217     for (int i = GF_BITS + 1; i < GF_SIZE; i++)
218     {
219 	    if (gf_exp[i - 1] >= mask)
220 	        gf_exp[i] = gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
221 	    else
222 	        gf_exp[i] = gf_exp[i - 1] << 1;
223 	    gf_log[gf_exp[i]] = i;
224     }
225     /*
226      * log(0) is not defined, so use a special value
227      */
228     gf_log[0] =	GF_SIZE ;
229     /* set the extended gf_exp values for fast multiply */
230     for (int i = 0 ; i < GF_SIZE ; i++)
231 	    gf_exp[i + GF_SIZE] = gf_exp[i] ;
232 
233     /*
234      * again special cases. 0 has no inverse. This used to
235      * be initialized to GF_SIZE, but it should make no difference
236      * since noone is supposed to read from here.
237      */
238     inverse[0] = 0 ;
239     inverse[1] = 1;
240     for (int i = 2; i <= GF_SIZE; i++)
241 	    inverse[i] = gf_exp[GF_SIZE-gf_log[i]];
242 }  // end generate_gf()
243 
244 
245 /*
246  * Various linear algebra operations that i use often.
247  */
248 
249 /*
250  * addmul() computes dst[] = dst[] + c * src[]
251  * This is used often, so better optimize it! Currently the loop is
252  * unrolled 16 times, a good value for 486 and pentium-class machines.
253  * The case c=0 is also optimized, whereas c=1 is not. These
254  * calls are unfrequent in my typical apps so I did not bother.
255  *
256  * Note that gcc on
257  */
258 #define addmul(dst, src, c, sz) \
259     if (c != 0) addmul1(dst, src, c, sz)
260 #define UNROLL 16 /* 1, 4, 8, 16 */
261 
addmul1(gf * dst1,gf * src1,gf c,int sz)262 static void addmul1(gf* dst1, gf* src1, gf c, int sz)
263 {
264     USE_GF_MULC ;
265     register gf* dst = dst1;
266     register gf* src = src1 ;
267     gf* lim = &dst[sz - UNROLL + 1] ;
268 
269     GF_MULC0(c) ;
270 
271 #if (UNROLL > 1) /* unrolling by 8/16 is quite effective on the pentium */
272     for (; dst < lim ; dst += UNROLL, src += UNROLL )
273     {
274         GF_ADDMULC( dst[0] , src[0] );
275 	    GF_ADDMULC( dst[1] , src[1] );
276 	    GF_ADDMULC( dst[2] , src[2] );
277 	    GF_ADDMULC( dst[3] , src[3] );
278 #if (UNROLL > 4)
279 	    GF_ADDMULC( dst[4] , src[4] );
280 	    GF_ADDMULC( dst[5] , src[5] );
281 	    GF_ADDMULC( dst[6] , src[6] );
282 	    GF_ADDMULC( dst[7] , src[7] );
283 #endif
284 #if (UNROLL > 8)
285 	    GF_ADDMULC( dst[8] , src[8] );
286 	    GF_ADDMULC( dst[9] , src[9] );
287 	    GF_ADDMULC( dst[10] , src[10] );
288 	    GF_ADDMULC( dst[11] , src[11] );
289 	    GF_ADDMULC( dst[12] , src[12] );
290 	    GF_ADDMULC( dst[13] , src[13] );
291 	    GF_ADDMULC( dst[14] , src[14] );
292         GF_ADDMULC( dst[15] , src[15] );
293 #endif
294     }
295 #endif
296     lim += UNROLL - 1 ;
297     for (; dst < lim; dst++, src++ )		/* final components */
298 	    GF_ADDMULC( *dst , *src );
299 }  // end addmul1()
300 
301 
302 // computes C = AB where A is n*k, B is k*m, C is n*m
matmul(gf * a,gf * b,gf * c,int n,int k,int m)303 static void matmul(gf* a, gf* b, gf* c, int n, int k, int m)
304 {
305     int row, col, i ;
306 
307     for (row = 0; row < n ; row++)
308     {
309 	    for (col = 0; col < m ; col++)
310         {
311 	        gf* pa = &a[ row * k ];
312 	        gf* pb = &b[ col ];
313 	        gf acc = 0 ;
314 	        for (i = 0; i < k ; i++, pa++, pb += m)
315 		        acc ^= gf_mul( *pa, *pb ) ;
316 	        c[row * m + col] = acc ;
317 	    }
318     }
319 }  // end matmul()
320 
321 
invert_vdm(gf * src,int k)322 static int invert_vdm(gf* src, int k)
323 {
324     gf t, xx;
325 
326     if (k == 1)return 0; // degenerate case, matrix must be p^0 = 1
327 
328     /*
329      * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
330      * b holds the coefficient for the matrix inversion
331      */
332     gf* c = NEW_GF_MATRIX(1, k);
333     gf* b = NEW_GF_MATRIX(1, k);
334     gf* p = NEW_GF_MATRIX(1, k);
335 
336     int i, j;
337     for (j = 1, i = 0 ; i < k ; i++, j+=k )
338     {
339 	    c[i] = 0 ;
340 	    p[i] = src[j] ;    /* p[i] */
341     }
342     /*
343      * construct coeffs. recursively. We know c[k] = 1 (implicit)
344      * and start P_0 = x - p_0, then at each stage multiply by
345      * x - p_i generating P_i = x P_{i-1} - p_i P_{i-1}
346      * After k steps we are done.
347      */
348     c[k-1] = p[0] ;	/* really -p(0), but x = -x in GF(2^m) */
349     for (i = 1 ; i < k ; i++)
350     {
351 	    gf p_i = p[i] ; /* see above comment */
352 	    for (j = k - 1  - ( i - 1 ) ; j < k-1 ; j++ )
353 	        c[j] ^= gf_mul( p_i, c[j+1] ) ;
354 	    c[k-1] ^= p_i ;
355     }
356     for (int row = 0 ; row < k ; row++)
357     {
358 	    /*
359 	     * synthetic division etc.
360 	     */
361 	    xx = p[row] ;
362 	    t = 1 ;
363 	    b[k-1] = 1 ; /* this is in fact c[k] */
364 	    for (i = k - 2 ; i >= 0 ; i-- )
365         {
366 	        b[i] = c[i+1] ^ gf_mul(xx, b[i+1]) ;
367 	        t = gf_mul(xx, t) ^ b[i] ;
368 	    }
369 	    for (int col = 0 ; col < k ; col++ )
370 	        src[col*k + row] = gf_mul(inverse[t], b[col] );
371     }
372     delete[] c;
373     delete[] b;
374     delete[] p;
375     return 0 ;
376 }  // end invert_vdm()
377 
378 
379 static bool fec_initialized = false;
init_fec()380 static void init_fec()
381 {
382     if (!fec_initialized)
383     {
384         generate_gf();
385         init_mul_table();
386         fec_initialized = true;
387     }
388 }
389 
NormEncoderRS8()390 NormEncoderRS8::NormEncoderRS8()
391  : enc_matrix(NULL)
392 {
393 }
394 
~NormEncoderRS8()395 NormEncoderRS8::~NormEncoderRS8()
396 {
397     Destroy();
398 }
399 
Init(unsigned int numData,unsigned int numParity,UINT16 vecSizeMax)400 bool NormEncoderRS8::Init(unsigned int numData, unsigned int numParity, UINT16 vecSizeMax)
401 {
402 #ifdef SIMULATE
403     vecSizeMax = MIN(SIM_PAYLOAD_MAX, vecSizeMax);
404 #endif // SIMULATE
405     if ((numData + numParity) > GF_SIZE)
406     {
407         PLOG(PL_FATAL, "NormEncoderRS8::Init() error: numData/numParity exceeds code limits\n");
408         return false;
409     }
410 
411     if (NULL != enc_matrix)
412     {
413         delete[] enc_matrix;
414         enc_matrix = NULL;
415     }
416     init_fec();
417     int n = numData + numParity;
418     int k = numData;
419     enc_matrix = (UINT8*)NEW_GF_MATRIX(n, k);
420     if (NULL != enc_matrix)
421     {
422         gf* tmpMatrix = NEW_GF_MATRIX(n, k);
423         if (NULL == tmpMatrix)
424         {
425             PLOG(PL_FATAL, "NormEncoderRS8::Init() error: new  tmpMatrix error: %s\n", GetErrorString());
426             delete[] enc_matrix;
427             enc_matrix = NULL;
428             return false;
429         }
430         // Fill the matrix with powers of field elements, starting from 0.
431         // The first row is special, cannot be computed with exp. table.
432         tmpMatrix[0] = 1 ;
433         for (int col = 1; col < k ; col++)
434 	        tmpMatrix[col] = 0 ;
435         for (gf* p = tmpMatrix + k, row = 0; row < n-1 ; row++, p += k)
436         {
437 	        for (int col = 0 ; col < k ; col ++ )
438 	            p[col] = gf_exp[modnn(row*col)];
439         }
440 
441 
442         // Quick code to build systematic matrix: invert the top
443         // k*k vandermonde matrix, multiply right the bottom n-k rows
444         // by the inverse, and construct the identity matrix at the top.
445         invert_vdm(tmpMatrix, k); /* much faster than invert_mat */
446         matmul(tmpMatrix + k*k, tmpMatrix, ((gf*)enc_matrix) + k*k, n - k, k, k);
447         // the upper matrix is I so do not bother with a slow multiply
448         memset(enc_matrix, 0, k*k*sizeof(gf));
449         for (gf* p = (gf*)enc_matrix, col = 0 ; col < k ; col++, p += k+1 )
450 	        *p = 1 ;
451         delete[] tmpMatrix;
452         ndata = numData;
453         npar = numParity;
454         vector_size = vecSizeMax;
455         return true;
456     }
457     else
458     {
459         PLOG(PL_FATAL, "NormEncoderRS8::Init() error: new enc_matrix error: %s\n", GetErrorString());
460         return false;
461     }
462 }  // end NormEncoderRS8::Init()
463 
Destroy()464 void NormEncoderRS8::Destroy()
465 {
466     if (NULL != enc_matrix)
467     {
468         delete[] enc_matrix;
469         enc_matrix = NULL;
470     }
471 }  // end NormEncoderRS8::Destroy()
472 
Encode(unsigned int segmentId,const char * dataVector,char ** parityVectorList)473 void NormEncoderRS8::Encode(unsigned int segmentId, const char* dataVector, char** parityVectorList)
474 {
475     for (unsigned int i = 0; i < npar; i++)
476     {
477         // Update each parity vector
478         gf* fec = (gf*)parityVectorList[i];
479         gf* p = ((gf*)enc_matrix) + ((i+ndata)*ndata);
480         unsigned int nelements = (GF_BITS > 8) ? vector_size / 2 : vector_size;
481         addmul(fec, (gf*)dataVector, p[segmentId], nelements);
482     }
483 }  // end NormEncoderRS8::Encode()
484 
485 
NormDecoderRS8()486 NormDecoderRS8::NormDecoderRS8()
487  : enc_matrix(NULL), dec_matrix(NULL),
488    parity_loc(NULL), inv_ndxc(NULL), inv_ndxr(NULL),
489    inv_pivt(NULL), inv_id_row(NULL), inv_temp_row(NULL)
490 {
491 }
492 
~NormDecoderRS8()493 NormDecoderRS8::~NormDecoderRS8()
494 {
495     Destroy();
496 }
497 
Destroy()498 void NormDecoderRS8::Destroy()
499 {
500     if (NULL != enc_matrix)
501     {
502         delete[] enc_matrix;
503         enc_matrix = NULL;
504     }
505     if (NULL != dec_matrix)
506     {
507         delete[] dec_matrix;
508         dec_matrix = NULL;
509     }
510     if (NULL != parity_loc)
511     {
512         delete[] parity_loc;
513         parity_loc = NULL;
514     }
515     if (NULL != inv_ndxc)
516     {
517         delete[] inv_ndxc;
518         inv_ndxc = NULL;
519     }
520     if (NULL != inv_ndxr)
521     {
522         delete[] inv_ndxr;
523         inv_ndxr = NULL;
524     }
525     if (NULL != inv_pivt)
526     {
527         delete[] inv_pivt;
528         inv_pivt = NULL;
529     }
530     if (NULL != inv_id_row)
531     {
532         delete[] inv_id_row;
533         inv_id_row = NULL;
534     }
535     if (NULL != inv_temp_row)
536     {
537         delete[] inv_temp_row;
538         inv_temp_row = NULL;
539     }
540 }  // end NormDecoderRS8::Destroy()
541 
Init(unsigned int numData,unsigned int numParity,UINT16 vecSizeMax)542 bool NormDecoderRS8::Init(unsigned int numData, unsigned int numParity, UINT16 vecSizeMax)
543 {
544 #ifdef SIMULATE
545     vecSizeMax = MIN(SIM_PAYLOAD_MAX, vecSizeMax);
546 #endif // SIMULATE
547     if ((numData + numParity) > GF_SIZE)
548     {
549         PLOG(PL_FATAL, "NormEncoderRS8::Init() error: numData/numParity exceeds code limits\n");
550         return false;
551     }
552 
553     init_fec();
554     Destroy();
555 
556     int n = numData + numParity;
557     int k = numData;
558 
559     if (NULL == (inv_ndxc = new unsigned int[k]))
560     {
561         PLOG(PL_FATAL, "NormDecoderRS8::Init() new inv_indxc error: %s\n", GetErrorString());
562         Destroy();
563         return false;
564     }
565 
566     if (NULL == (inv_ndxr = new unsigned int[k]))
567     {
568         PLOG(PL_FATAL, "NormDecoderRS8::Init() new inv_inv_ndxr error: %s\n", GetErrorString());
569         Destroy();
570         return false;
571     }
572 
573     if (NULL == (inv_pivt = new unsigned int[k]))
574     {
575         PLOG(PL_FATAL, "NormDecoderRS8::Init() new inv_pivt error: %s\n", GetErrorString());
576         Destroy();
577         return false;
578     }
579 
580     if (NULL == (inv_id_row = (UINT8*)NEW_GF_MATRIX(1, k)))
581     {
582         PLOG(PL_FATAL, "NormDecoderRS8::Init() new inv_id_row error: %s\n", GetErrorString());
583         Destroy();
584         return false;
585     }
586 
587     if (NULL == (inv_temp_row = (UINT8*)NEW_GF_MATRIX(1, k)))
588     {
589         PLOG(PL_FATAL, "NormDecoderRS8::Init() new inv_temp_row error: %s\n", GetErrorString());
590         Destroy();
591         return false;
592     }
593 
594     if (NULL == (parity_loc = new unsigned int[numParity]))
595     {
596         PLOG(PL_FATAL, "NormDecoderRS8::Init() error: new parity_loc error: %s\n", GetErrorString());
597         Destroy();
598         return false;
599     }
600 
601     if (NULL == (dec_matrix = (UINT8*)NEW_GF_MATRIX(k, k)))
602     {
603         PLOG(PL_FATAL, "NormDecoderRS8::Init() error: new dec_matrix error: %s\n", GetErrorString());
604         Destroy();
605         return false;
606     }
607 
608     if (NULL == (enc_matrix = (UINT8*)NEW_GF_MATRIX(n, k)))
609     {
610         PLOG(PL_FATAL, "NormDecoderRS8::Init() error: new enc_matrix error: %s\n", GetErrorString());
611         Destroy();
612         return false;
613     }
614 
615 
616     gf* tmpMatrix = NEW_GF_MATRIX(n, k);
617     if (NULL == tmpMatrix)
618     {
619         PLOG(PL_FATAL, "NormEncoderRS8::Init() error: new  tmpMatrix error: %s\n", GetErrorString());
620         delete[] enc_matrix;
621         enc_matrix = NULL;
622         return false;
623     }
624     // Fill the matrix with powers of field elements, starting from 0.
625     // The first row is special, cannot be computed with exp. table.
626     tmpMatrix[0] = 1 ;
627     for (int col = 1; col < k ; col++)
628 	    tmpMatrix[col] = 0 ;
629     for (gf* p = tmpMatrix + k, row = 0; row < n-1 ; row++, p += k)
630     {
631 	    for (int col = 0 ; col < k ; col ++ )
632 	        p[col] = gf_exp[modnn(row*col)];
633     }
634 
635     // Quick code to build systematic matrix: invert the top
636     // k*k vandermonde matrix, multiply right the bottom n-k rows
637     // by the inverse, and construct the identity matrix at the top.
638     invert_vdm(tmpMatrix, k); /* much faster than invert_mat */
639     matmul(tmpMatrix + k*k, tmpMatrix, ((gf*)enc_matrix) + k*k, n - k, k, k);
640     // the upper matrix is I so do not bother with a slow multiply
641     memset(enc_matrix, 0, k*k*sizeof(gf));
642     for (gf* p = (gf*)enc_matrix, col = 0 ; col < k ; col++, p += k+1 )
643 	    *p = 1 ;
644     delete[] tmpMatrix;
645     ndata = numData;
646     npar = numParity;
647     vector_size = vecSizeMax;
648     return true;
649 }  // end NormDecoderRS8::Init()
650 
651 
Decode(char ** vectorList,unsigned int numData,unsigned int erasureCount,unsigned int * erasureLocs)652 int NormDecoderRS8::Decode(char** vectorList, unsigned int numData,  unsigned int erasureCount, unsigned int* erasureLocs)
653 {
654     unsigned int bsz = ndata + npar;
655     // 1) Build decoding matrix for the given set of segments & erasures
656     unsigned int nextErasure = 0;
657     unsigned int ne = 0;
658     unsigned int sourceErasureCount = 0;
659     unsigned int parityCount = 0;
660     for (unsigned int i = 0;  i < bsz; i++)
661     {
662         if (i < numData)
663         {
664             if ((nextErasure < erasureCount) && (i == erasureLocs[nextErasure]))
665             {
666                 nextErasure++;
667                 sourceErasureCount++;
668             }
669             else
670             {
671                 // set identity row for segments we have
672                 gf* p = ((gf*)dec_matrix) + ndata*i;
673                 memset(p, 0, ndata*sizeof(gf));
674                 p[i] = 1;
675             }
676         }
677         else if (i < ndata)
678         {
679             // set identity row for assumed zero segments (shortened code)
680             gf* p = ((gf*)dec_matrix) + ndata*i;
681             memset(p, 0, ndata*sizeof(gf));
682             p[i] = 1;
683             // Also keep track of where the non-erased parity segment are
684             // for the shortened code
685             if ((nextErasure < erasureCount) && (i == erasureLocs[nextErasure]))
686             {
687                 nextErasure++;
688             }
689             else if (parityCount < sourceErasureCount)
690             {
691                 parity_loc[parityCount++] = i;
692                 // Copy appropriate enc_matric parity row to dec_matrix erasureRow
693                 gf* p = ((gf*)dec_matrix) + ndata*erasureLocs[ne++];
694                 memcpy(p, ((gf*)enc_matrix) + (ndata-numData+i)*ndata, ndata*sizeof(gf));
695             }
696 
697         }
698         else if (parityCount < sourceErasureCount)
699         {
700             if ((nextErasure < erasureCount) && (i == erasureLocs[nextErasure]))
701             {
702                 nextErasure++;
703             }
704             else
705             {
706                 ASSERT(parityCount < npar);
707                 parity_loc[parityCount++] = i;
708                 // Copy appropriate enc_matric parity row to dec_matrix erasureRow
709                 gf* p = ((gf*)dec_matrix) + ndata*erasureLocs[ne++];
710                 memcpy(p, ((gf*)enc_matrix) + (ndata-numData+i)*ndata, ndata*sizeof(gf));
711             }
712         }
713         else
714         {
715             break;
716         }
717 
718     }
719     ASSERT(ne == sourceErasureCount);
720     // 2) Invert the decoding matrix
721     if (!InvertDecodingMatrix())
722     {
723 	    PLOG(PL_FATAL, "NormDecoderRS8::Decode() error: couldn't invert dec_matrix ?!\n");
724         return 0;
725     }
726 
727     // 3) Decode
728     for (unsigned int e = 0; e < erasureCount; e++)
729     {
730         // Calculate missing segments (erasures) using dec_matrix and non-erasures
731         unsigned int row = erasureLocs[e];
732         if (row >= numData) break; // don't bother filling in parity segments
733         unsigned int col = 0;
734         unsigned int nextErasure = 0;
735         unsigned int nelements = (GF_BITS > 8) ? vector_size/2 : vector_size;
736         for (unsigned int i  = 0; i < numData; i++)
737         {
738             if ((nextErasure < erasureCount) && (i == erasureLocs[nextErasure]))
739             {
740                 // Use parity segments in place of erased vector in decoding
741                 addmul((gf*)vectorList[row], (gf*)vectorList[parity_loc[nextErasure]], ((gf*)dec_matrix)[row*ndata + col], nelements);
742                 col++;
743                 nextErasure++;  // point to next erasure
744             }
745             else if (i < numData)
746             {
747                 addmul((gf*)vectorList[row], (gf*)vectorList[i], ((gf*)dec_matrix)[row*ndata + col], nelements);
748                 col++;
749             }
750             else
751             {
752                 ASSERT(0);
753             }
754         }
755     }
756     return erasureCount ;
757 }  // end NormDecoderRS8::Decode()
758 
759 
760 
761 /*
762  * NormDecoderRS8::InvertDecodingMatrix() takes a matrix and produces its inverse
763  * k is the size of the matrix. (Gauss-Jordan, adapted from Numerical Recipes in C)
764  * Return non-zero if singular.
765  */
InvertDecodingMatrix()766 bool NormDecoderRS8::InvertDecodingMatrix()
767 {
768     gf* src = (gf*)dec_matrix;
769     unsigned int k = ndata;
770 
771     memset(inv_id_row, 0, k*sizeof(gf));
772     // inv_pivt marks elements already used as pivots.
773     memset(inv_pivt, 0, k*sizeof(unsigned int));
774 
775     for (unsigned int col = 0; col < k ; col++)
776     {
777 	    /*
778 	     * Zeroing column 'col', look for a non-zero element.
779 	     * First try on the diagonal, if it fails, look elsewhere.
780 	     */
781 	    int irow = -1;
782         int icol = -1 ;
783 	    if (inv_pivt[col] != 1 && src[col*k + col] != 0)
784         {
785 	        irow = col ;
786 	        icol = col ;
787 	        goto found_piv ;
788 	    }
789 	    for (unsigned int row = 0 ; row < k ; row++)
790         {
791 	        if (inv_pivt[row] != 1)
792             {
793 		        for (unsigned int ix = 0 ; ix < k ; ix++)
794                 {
795 		            if (inv_pivt[ix] == 0)
796                     {
797 			            if (src[row*k + ix] != 0)
798                         {
799 			                irow = row ;
800 			                icol = ix ;
801 			                goto found_piv ;
802 			            }
803 		            }
804                     else if (inv_pivt[ix] > 1)
805                     {
806 			            PLOG(PL_FATAL, "NormDecoderRS8::InvertDecodingMatrix() error: singular matrix!\n");
807 			            return false;
808 		            }
809 		        }
810 	        }
811 	    }
812 	    if (icol == -1)
813         {
814             PLOG(PL_FATAL, "NormDecoderRS8::InvertDecodingMatrix() error: pivot not found!\n");
815 	        return false;
816 	    }
817     found_piv:
818 	    ++(inv_pivt[icol]) ;
819 	    /*
820 	     * swap rows irow and icol, so afterwards the diagonal
821 	     * element will be correct. Rarely done, not worth
822 	     * optimizing.
823 	    */
824 	    if (irow != icol)
825         {
826 	        for (unsigned int ix = 0 ; ix < k ; ix++ )
827 		        SWAP(src[irow*k + ix], src[icol*k + ix], gf);
828 	    }
829 	    inv_ndxr[col] = irow ;
830 	    inv_ndxc[col] = icol ;
831 	    gf* pivotRow = &src[icol*k] ;
832 	    gf c = pivotRow[icol] ;
833 	    if (c == 0)
834         {
835 	        PLOG(PL_FATAL, "NormDecoderRS8::InvertDecodingMatrix() error: singular matrix!\n");
836 	        return false;
837 	    }
838 	    if (c != 1 ) /* otherwhise this is a NOP */
839         {
840 	        /*
841 	         * this is done often , but optimizing is not so
842 	         * fruitful, at least in the obvious ways (unrolling)
843 	         */
844 	        c = inverse[ c ] ;
845 	        pivotRow[icol] = 1 ;
846 	        for (unsigned int ix = 0 ; ix < k ; ix++ )
847 		        pivotRow[ix] = gf_mul(c, pivotRow[ix] );
848 	    }
849 	    /*
850 	     * from all rows, remove multiples of the selected row
851 	     * to zero the relevant entry (in fact, the entry is not zero
852 	     * because we know it must be zero).
853 	     * (Here, if we know that the pivot_row is the identity,
854 	     * we can optimize the addmul).
855 	     */
856 	    inv_id_row[icol] = 1;
857 	    if (0 != memcmp(pivotRow, inv_id_row, k*sizeof(gf)))
858         {
859 	        for (gf* p = src, ix = 0 ; ix < k ; ix++, p += k )
860             {
861 		        if (ix != icol)
862                 {
863 		            c = p[icol] ;
864 		            p[icol] = 0 ;
865 		            addmul(p, pivotRow, c, k );
866 		        }
867 	        }
868 	    }
869 	    inv_id_row[icol] = 0;
870     }  // end for (col = 0; col < k ; col++)
871 
872     for (int col = k - 1 ; col >= 0 ; col-- )
873     {
874 	    if (inv_ndxr[col] >= k)
875         {
876 	        PLOG(PL_ERROR, "NormDecoderRS8::InvertDecodingMatrix() error: AARGH, inv_ndxr[col] %d\n", inv_ndxr[col]);
877         }
878 	    else if (inv_ndxc[col] >= k)
879         {
880 	        PLOG(PL_ERROR, "NormDecoderRS8::InvertDecodingMatrix() error: AARGH, indxc[col] %d\n", inv_ndxc[col]);
881         }
882 	    else if (inv_ndxr[col] != inv_ndxc[col] )
883         {
884 	        for (unsigned int row = 0 ; row < k ; row++ )
885 		        SWAP( src[row*k + inv_ndxr[col]], src[row*k + inv_ndxc[col]], gf) ;
886 	    }
887     }
888     return true;
889 }  // end NormDecoderRS8::InvertDecodingMatrix()
890 
891