1 /*
2  * This includes forward error correction code based on Vandermonde matrices
3  * 980624
4  * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
5  *
6  * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
7  * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
8  * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above
17  *    copyright notice, this list of conditions and the following
18  *    disclaimer in the documentation and/or other materials
19  *    provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
26  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
28  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
30  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
32  * OF SUCH DAMAGE.
33  */
34 
35 
36 #include "normEncoderRS16.h"
37 #include "protoDebug.h"
38 #ifdef SIMULATE
39 #include "normMessage.h"
40 #endif // SIMULATE
41 
42 /*
43  * The first part of the file here implements linear algebra in GF.
44  *
45  * gf is the type used to store an element of the Galois Field.
46  * Must constain at least GF_BITS bits.
47  *
48  * Note: unsigned char will work up to GF(256) but int seems to run
49  * faster on the Pentium. We use int whenever have to deal with an
50  * index, since they are generally faster.
51  */
52 
53 #define GF_BITS  16	                    // 16-bit RS code
54 #if (GF_BITS < 2  && GF_BITS > 16)
55 #error "GF_BITS must be 2 .. 16"
56 #endif
57 #if (GF_BITS <= 8)
58 typedef UINT8 gf;
59 #else
60 typedef UINT16 gf;
61 #endif
62 #define	GF_SIZE ((1 << GF_BITS) - 1)	// powers of alpha
63 
64 
65 /*
66  * Primitive polynomials - see Lin & Costello, Appendix A,
67  * and  Lee & Messerschmitt, p. 453.
68  */
69 
70 static const char *allPp[] =
71 {                        // GF_BITS   Polynomial
72     NULL,		         //   0      no code
73     NULL,		         //   1      no code
74     "111",		         //   2      1+x+x^2
75     "1101",		         //   3      1+x+x^3
76     "11001",		     //   4      1+x+x^4
77     "101001",		     //   5      1+x^2+x^5
78     "1100001",		     //   6      1+x+x^6
79     "10010001",		     //   7      1 + x^3 + x^7
80     "101110001",	     //   8      1+x^2+x^3+x^4+x^8
81     "1000100001",	     //   9      1+x^4+x^9
82     "10010000001",	     //  10      1+x^3+x^10
83     "101000000001",	     //  11      1+x^2+x^11
84     "1100101000001",	 //  12      1+x+x^4+x^6+x^12
85     "11011000000001",	 //  13      1+x+x^3+x^4+x^13
86     "110000100010001",	 //  14      1+x+x^6+x^10+x^14
87     "1100000000000001",  //  15      1+x+x^15
88     "11010000000010001"  //  16      1+x+x^3+x^12+x^16
89 };
90 
91 
92 /*
93  * To speed up computations, we have tables for logarithm, exponent
94  * and inverse of a number. If GF_BITS <= 8, we use a table for
95  * multiplication as well (it takes 64K, no big deal even on a PDA,
96  * especially because it can be pre-initialized an put into a ROM!),
97  * otherwhise we use a table of logarithms.
98  * In any case the macro gf_mul(x,y) takes care of multiplications.
99  */
100 
101 static gf gf_exp[2*GF_SIZE];	// index->poly form conversion table
102 static int gf_log[GF_SIZE + 1];	// Poly->index form conversion table
103 static gf inverse[GF_SIZE+1];	// inverse of field elem.
104 				                // inv[\alpha**i]=\alpha**(GF_SIZE-i-1)
105 
106 // modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
107 // without a slow divide.
modnn(int x)108 static inline gf modnn(int x)
109 {
110     while (x >= GF_SIZE)
111     {
112 	    x -= GF_SIZE;
113 	    x = (x >> GF_BITS) + (x & GF_SIZE);
114     }
115     return x;
116 }  // end modnn()
117 
118 #define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;}
119 
120 /*
121  * gf_mul(x,y) multiplies two numbers. If GF_BITS<=8, it is much
122  * faster to use a multiplication table.
123  *
124  * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
125  * many numbers by the same constant. In this case the first
126  * call sets the constant, and others perform the multiplications.
127  * A value related to the multiplication is held in a local variable
128  * declared with USE_GF_MULC . See usage in addmul1().
129  */
130 #if (GF_BITS <= 8)
131 
132 static gf gf_mul_table[GF_SIZE + 1][GF_SIZE + 1];
133 
134 #define gf_mul(x,y) gf_mul_table[x][y]
135 #define USE_GF_MULC register gf * __gf_mulc_
136 #define GF_MULC0(c) __gf_mulc_ = gf_mul_table[c]
137 #define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
138 
init_mul_table()139 static void init_mul_table()
140 {
141     for (int i = 0; i <= GF_SIZE; i++)
142     {
143 	    for (int j = 0; j <= GF_SIZE; j++)
144 	        gf_mul_table[i][j] = gf_exp[modnn(gf_log[i] + gf_log[j]) ] ;
145     }
146     for (int j = 0; j <= GF_SIZE; j++)
147 	    gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
148 }
149 
150 #else	/* GF_BITS > 8 */
151 
gf_mul(int x,int y)152 inline gf gf_mul(int x, int y)
153 {
154     if ((0 == x) || (0 == y)) return 0;
155     return gf_exp[gf_log[x] + gf_log[y] ] ;
156 }
157 
158 #define init_mul_table()
159 #define USE_GF_MULC register gf * __gf_mulc_
160 #define GF_MULC0(c) __gf_mulc_ = &gf_exp[ gf_log[c] ]
161 #define GF_ADDMULC(dst, x) { if (x) dst ^= __gf_mulc_[ gf_log[x] ] ; }
162 
163 #endif  // if/else (GF_BITS <= 8)
164 
165 /*
166  * Generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
167  * Lookup tables:
168  *     index->polynomial form		gf_exp[] contains j= \alpha^i;
169  *     polynomial form -> index form	gf_log[ j = \alpha^i ] = i
170  * \alpha=x is the primitive element of GF(2^m)
171  *
172  * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
173  * multiplication of two numbers can be resolved without calling modnn
174  */
175 
176 #define NEW_GF_MATRIX(rows, cols) (new gf[rows*cols])
177 
178 /*
179  * initialize the data structures used for computations in GF.
180  */
generate_gf()181 static void generate_gf()
182 {
183     const char *Pp =  allPp[GF_BITS] ;
184 
185     gf mask = 1;	/* x ** 0 = 1 */
186     gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
187     /*
188      * first, generate the (polynomial representation of) powers of \alpha,
189      * which are stored in gf_exp[i] = \alpha ** i .
190      * At the same time build gf_log[gf_exp[i]] = i .
191      * The first GF_BITS powers are simply bits shifted to the left.
192      */
193     for (int i = 0; i < GF_BITS; i++, mask <<= 1 )
194     {
195 	    gf_exp[i] = mask;
196 	    gf_log[gf_exp[i]] = i;
197 	    /*
198 	     * If Pp[i] == 1 then \alpha ** i occurs in poly-repr
199 	     * gf_exp[GF_BITS] = \alpha ** GF_BITS
200 	     */
201 	    if ( Pp[i] == '1' )
202 	        gf_exp[GF_BITS] ^= mask;
203     }
204     /*
205      * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can als
206      * compute its inverse.
207      */
208     gf_log[gf_exp[GF_BITS]] = GF_BITS;
209     /*
210      * Poly-repr of \alpha ** (i+1) is given by poly-repr of
211      * \alpha ** i shifted left one-bit and accounting for any
212      * \alpha ** GF_BITS term that may occur when poly-repr of
213      * \alpha ** i is shifted.
214      */
215     mask = 1 << (GF_BITS - 1 ) ;
216     for (int i = GF_BITS + 1; i < GF_SIZE; i++)
217     {
218 	    if (gf_exp[i - 1] >= mask)
219 	        gf_exp[i] = gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
220 	    else
221 	        gf_exp[i] = gf_exp[i - 1] << 1;
222 	    gf_log[gf_exp[i]] = i;
223     }
224     /*
225      * log(0) is not defined, so use a special value
226      */
227     gf_log[0] =	GF_SIZE ;
228     /* set the extended gf_exp values for fast multiply */
229     for (int i = 0 ; i < GF_SIZE ; i++)
230 	    gf_exp[i + GF_SIZE] = gf_exp[i] ;
231 
232     /*
233      * again special cases. 0 has no inverse. This used to
234      * be initialized to GF_SIZE, but it should make no difference
235      * since noone is supposed to read from here.
236      */
237     inverse[0] = 0 ;
238     inverse[1] = 1;
239     for (int i = 2; i <= GF_SIZE; i++)
240 	    inverse[i] = gf_exp[GF_SIZE-gf_log[i]];
241 }  // end generate_gf()
242 
243 
244 /*
245  * Various linear algebra operations that i use often.
246  */
247 
248 /*
249  * addmul() computes dst[] = dst[] + c * src[]
250  * This is used often, so better optimize it! Currently the loop is
251  * unrolled 16 times, a good value for 486 and pentium-class machines.
252  * The case c=0 is also optimized, whereas c=1 is not. These
253  * calls are unfrequent in my typical apps so I did not bother.
254  *
255  * Note that gcc on
256  */
257 #define addmul(dst, src, c, sz) \
258     if (c != 0) addmul1(dst, src, c, sz)
259 #define UNROLL 16 /* 1, 4, 8, 16 */
260 
addmul1(gf * dst1,gf * src1,gf c,int sz)261 static void addmul1(gf* dst1, gf* src1, gf c, int sz)
262 {
263     USE_GF_MULC ;
264     register gf* dst = dst1;
265     register gf* src = src1 ;
266     gf* lim = &dst[sz - UNROLL + 1] ;
267 
268     GF_MULC0(c) ;
269 
270 #if (UNROLL > 1) /* unrolling by 8/16 is quite effective on the pentium */
271     for (; dst < lim ; dst += UNROLL, src += UNROLL )
272     {
273         GF_ADDMULC( dst[0] , src[0] );
274 	    GF_ADDMULC( dst[1] , src[1] );
275 	    GF_ADDMULC( dst[2] , src[2] );
276 	    GF_ADDMULC( dst[3] , src[3] );
277 #if (UNROLL > 4)
278 	    GF_ADDMULC( dst[4] , src[4] );
279 	    GF_ADDMULC( dst[5] , src[5] );
280 	    GF_ADDMULC( dst[6] , src[6] );
281 	    GF_ADDMULC( dst[7] , src[7] );
282 #endif
283 #if (UNROLL > 8)
284 	    GF_ADDMULC( dst[8] , src[8] );
285 	    GF_ADDMULC( dst[9] , src[9] );
286 	    GF_ADDMULC( dst[10] , src[10] );
287 	    GF_ADDMULC( dst[11] , src[11] );
288 	    GF_ADDMULC( dst[12] , src[12] );
289 	    GF_ADDMULC( dst[13] , src[13] );
290 	    GF_ADDMULC( dst[14] , src[14] );
291         GF_ADDMULC( dst[15] , src[15] );
292 #endif
293     }
294 #endif
295     lim += UNROLL - 1 ;
296     for (; dst < lim; dst++, src++ )		/* final components */
297 	    GF_ADDMULC( *dst , *src );
298 }  // end addmul1()
299 
300 
301 // computes C = AB where A is n*k, B is k*m, C is n*m
matmul(gf * a,gf * b,gf * c,int n,int k,int m)302 static void matmul(gf* a, gf* b, gf* c, int n, int k, int m)
303 {
304     int row, col, i ;
305 
306     for (row = 0; row < n ; row++)
307     {
308 	    for (col = 0; col < m ; col++)
309         {
310 	        gf* pa = &a[ row * k ];
311 	        gf* pb = &b[ col ];
312 	        gf acc = 0 ;
313 	        for (i = 0; i < k ; i++, pa++, pb += m)
314 		        acc ^= gf_mul( *pa, *pb ) ;
315 	        c[row * m + col] = acc ;
316 	    }
317     }
318 }  // end matmul()
319 
320 
invert_vdm(gf * src,int k)321 static int invert_vdm(gf* src, int k)
322 {
323     gf t, xx;
324 
325     if (k == 1)return 0; // degenerate case, matrix must be p^0 = 1
326 
327     /*
328      * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
329      * b holds the coefficient for the matrix inversion
330      */
331     gf* c = NEW_GF_MATRIX(1, k);
332     gf* b = NEW_GF_MATRIX(1, k);
333     gf* p = NEW_GF_MATRIX(1, k);
334 
335     int i, j;
336     for (j = 1, i = 0 ; i < k ; i++, j+=k )
337     {
338 	    c[i] = 0 ;
339 	    p[i] = src[j] ;    /* p[i] */
340     }
341     /*
342      * construct coeffs. recursively. We know c[k] = 1 (implicit)
343      * and start P_0 = x - p_0, then at each stage multiply by
344      * x - p_i generating P_i = x P_{i-1} - p_i P_{i-1}
345      * After k steps we are done.
346      */
347     c[k-1] = p[0] ;	/* really -p(0), but x = -x in GF(2^m) */
348     for (i = 1 ; i < k ; i++)
349     {
350 	    gf p_i = p[i] ; /* see above comment */
351 	    for (j = k - 1  - ( i - 1 ) ; j < k-1 ; j++ )
352 	        c[j] ^= gf_mul( p_i, c[j+1] ) ;
353 	    c[k-1] ^= p_i ;
354     }
355     for (int row = 0 ; row < k ; row++)
356     {
357 	    /*
358 	     * synthetic division etc.
359 	     */
360 	    xx = p[row] ;
361 	    t = 1 ;
362 	    b[k-1] = 1 ; /* this is in fact c[k] */
363 	    for (i = k - 2 ; i >= 0 ; i-- )
364         {
365 	        b[i] = c[i+1] ^ gf_mul(xx, b[i+1]) ;
366 	        t = gf_mul(xx, t) ^ b[i] ;
367 	    }
368 	    for (int col = 0 ; col < k ; col++ )
369 	        src[col*k + row] = gf_mul(inverse[t], b[col] );
370     }
371     delete[] c;
372     delete[] b;
373     delete[] p;
374     return 0 ;
375 }  // end invert_vdm()
376 
377 
378 static bool fec_initialized = false;
init_fec()379 static void init_fec()
380 {
381     if (!fec_initialized)
382     {
383         generate_gf();
384         init_mul_table();
385         fec_initialized = true;
386     }
387 }
388 
NormEncoderRS16()389 NormEncoderRS16::NormEncoderRS16()
390  : enc_matrix(NULL)
391 {
392 }
393 
~NormEncoderRS16()394 NormEncoderRS16::~NormEncoderRS16()
395 {
396     Destroy();
397 }
398 
Init(unsigned int numData,unsigned int numParity,UINT16 vecSizeMax)399 bool NormEncoderRS16::Init(unsigned int numData, unsigned int numParity, UINT16 vecSizeMax)
400 {
401 #ifdef SIMULATE
402     vecSizeMax = MIN(SIM_PAYLOAD_MAX, vecSizeMax);
403 #endif // SIMULATE
404     if ((numData + numParity) > GF_SIZE)
405     {
406         PLOG(PL_FATAL, "NormEncoderRS16::Init() error: numData/numParity exceeds code limits\n");
407         return false;
408     }
409 
410     if (NULL != enc_matrix)
411     {
412         delete[] enc_matrix;
413         enc_matrix = NULL;
414     }
415     init_fec();
416     int n = numData + numParity;
417     int k = numData;
418     enc_matrix = (UINT8*)NEW_GF_MATRIX(n, k);
419     if (NULL != enc_matrix)
420     {
421         gf* tmpMatrix = NEW_GF_MATRIX(n, k);
422         if (NULL == tmpMatrix)
423         {
424             PLOG(PL_FATAL, "NormEncoderRS16::Init() error: new  tmpMatrix error: %s\n", GetErrorString());
425             delete[] enc_matrix;
426             enc_matrix = NULL;
427             return false;
428         }
429         // Fill the matrix with powers of field elements, starting from 0.
430         // The first row is special, cannot be computed with exp. table.
431         tmpMatrix[0] = 1 ;
432         for (int col = 1; col < k ; col++)
433 	        tmpMatrix[col] = 0 ;
434         for (gf* p = tmpMatrix + k, row = 0; row < n-1 ; row++, p += k)
435         {
436 	        for (int col = 0 ; col < k ; col ++ )
437 	            p[col] = gf_exp[modnn(row*col)];
438         }
439 
440 
441         // Quick code to build systematic matrix: invert the top
442         // k*k vandermonde matrix, multiply right the bottom n-k rows
443         // by the inverse, and construct the identity matrix at the top.
444         invert_vdm(tmpMatrix, k); /* much faster than invert_mat */
445         matmul(tmpMatrix + k*k, tmpMatrix, ((gf*)enc_matrix) + k*k, n - k, k, k);
446         // the upper matrix is I so do not bother with a slow multiply
447         memset(enc_matrix, 0, k*k*sizeof(gf));
448         for (gf* p = (gf*)enc_matrix, col = 0 ; col < k ; col++, p += k+1 )
449 	        *p = 1 ;
450         delete[] tmpMatrix;
451         ndata = numData;
452         npar = numParity;
453         vector_size = vecSizeMax;
454         return true;
455     }
456     else
457     {
458         PLOG(PL_FATAL, "NormEncoderRS16::Init() error: new enc_matrix error: %s\n", GetErrorString());
459         return false;
460     }
461 }  // end NormEncoderRS16::Init()
462 
Destroy()463 void NormEncoderRS16::Destroy()
464 {
465     if (NULL != enc_matrix)
466     {
467         delete[] enc_matrix;
468         enc_matrix = NULL;
469     }
470 }  // end NormEncoderRS16::Destroy()
471 
Encode(unsigned int segmentId,const char * dataVector,char ** parityVectorList)472 void NormEncoderRS16::Encode(unsigned int segmentId, const char* dataVector, char** parityVectorList)
473 {
474     for (unsigned int i = 0; i < npar; i++)
475     {
476         // Update each parity vector
477         gf* fec = (gf*)parityVectorList[i];
478         gf* p = ((gf*)enc_matrix) + ((i+ndata)*ndata);
479         unsigned int nelements = (GF_BITS > 8) ? vector_size / 2 : vector_size;
480         addmul(fec, (gf*)dataVector, p[segmentId], nelements);
481     }
482 }  // end NormEncoderRS16::Encode()
483 
484 
NormDecoderRS16()485 NormDecoderRS16::NormDecoderRS16()
486  : enc_matrix(NULL), dec_matrix(NULL),
487    parity_loc(NULL), inv_ndxc(NULL), inv_ndxr(NULL),
488    inv_pivt(NULL), inv_id_row(NULL), inv_temp_row(NULL)
489 {
490 }
491 
~NormDecoderRS16()492 NormDecoderRS16::~NormDecoderRS16()
493 {
494     Destroy();
495 }
496 
Destroy()497 void NormDecoderRS16::Destroy()
498 {
499     if (NULL != enc_matrix)
500     {
501         delete[] enc_matrix;
502         enc_matrix = NULL;
503     }
504     if (NULL != dec_matrix)
505     {
506         delete[] dec_matrix;
507         dec_matrix = NULL;
508     }
509     if (NULL != parity_loc)
510     {
511         delete[] parity_loc;
512         parity_loc = NULL;
513     }
514     if (NULL != inv_ndxc)
515     {
516         delete[] inv_ndxc;
517         inv_ndxc = NULL;
518     }
519     if (NULL != inv_ndxr)
520     {
521         delete[] inv_ndxr;
522         inv_ndxr = NULL;
523     }
524     if (NULL != inv_pivt)
525     {
526         delete[] inv_pivt;
527         inv_pivt = NULL;
528     }
529     if (NULL != inv_id_row)
530     {
531         delete[] inv_id_row;
532         inv_id_row = NULL;
533     }
534     if (NULL != inv_temp_row)
535     {
536         delete[] inv_temp_row;
537         inv_temp_row = NULL;
538     }
539 }  // end NormDecoderRS16::Destroy()
540 
Init(unsigned int numData,unsigned int numParity,UINT16 vecSizeMax)541 bool NormDecoderRS16::Init(unsigned int numData, unsigned int numParity, UINT16 vecSizeMax)
542 {
543 #ifdef SIMULATE
544     vecSizeMax = MIN(SIM_PAYLOAD_MAX, vecSizeMax);
545 #endif // SIMULATE
546     if ((numData + numParity) > GF_SIZE)
547     {
548         PLOG(PL_FATAL, "NormEncoderRS16::Init() error: numData/numParity exceeds code limits\n");
549         return false;
550     }
551 
552     init_fec();
553     Destroy();
554 
555     int n = numData + numParity;
556     int k = numData;
557 
558     if (NULL == (inv_ndxc = new unsigned int[k]))
559     {
560         PLOG(PL_FATAL, "NormDecoderRS16::Init() new inv_indxc error: %s\n", GetErrorString());
561         Destroy();
562         return false;
563     }
564 
565     if (NULL == (inv_ndxr = new unsigned int[k]))
566     {
567         PLOG(PL_FATAL, "NormDecoderRS16::Init() new inv_inv_ndxr error: %s\n", GetErrorString());
568         Destroy();
569         return false;
570     }
571 
572     if (NULL == (inv_pivt = new unsigned int[k]))
573     {
574         PLOG(PL_FATAL, "NormDecoderRS16::Init() new inv_pivt error: %s\n", GetErrorString());
575         Destroy();
576         return false;
577     }
578 
579     if (NULL == (inv_id_row = (UINT8*)NEW_GF_MATRIX(1, k)))
580     {
581         PLOG(PL_FATAL, "NormDecoderRS16::Init() new inv_id_row error: %s\n", GetErrorString());
582         Destroy();
583         return false;
584     }
585 
586     if (NULL == (inv_temp_row = (UINT8*)NEW_GF_MATRIX(1, k)))
587     {
588         PLOG(PL_FATAL, "NormDecoderRS16::Init() new inv_temp_row error: %s\n", GetErrorString());
589         Destroy();
590         return false;
591     }
592 
593     if (NULL == (parity_loc = new unsigned int[numParity]))
594     {
595         PLOG(PL_FATAL, "NormDecoderRS16::Init() error: new parity_loc error: %s\n", GetErrorString());
596         Destroy();
597         return false;
598     }
599 
600     if (NULL == (dec_matrix = (UINT8*)NEW_GF_MATRIX(k, k)))
601     {
602         PLOG(PL_FATAL, "NormDecoderRS16::Init() error: new dec_matrix error: %s\n", GetErrorString());
603         Destroy();
604         return false;
605     }
606 
607     if (NULL == (enc_matrix = (UINT8*)NEW_GF_MATRIX(n, k)))
608     {
609         PLOG(PL_FATAL, "NormDecoderRS16::Init() error: new enc_matrix error: %s\n", GetErrorString());
610         Destroy();
611         return false;
612     }
613 
614     gf* tmpMatrix = NEW_GF_MATRIX(n, k);
615     if (NULL == tmpMatrix)
616     {
617         PLOG(PL_FATAL, "NormDecoderRS16::Init() error: new  tmpMatrix error: %s\n", GetErrorString());
618         delete[] enc_matrix;
619         enc_matrix = NULL;
620         return false;
621     }
622     // Fill the matrix with powers of field elements, starting from 0.
623     // The first row is special, cannot be computed with exp. table.
624     tmpMatrix[0] = 1 ;
625     for (int col = 1; col < k ; col++)
626 	    tmpMatrix[col] = 0 ;
627     for (gf* p = tmpMatrix + k, row = 0; row < n-1 ; row++, p += k)
628     {
629 	    for (int col = 0 ; col < k ; col ++ )
630 	        p[col] = gf_exp[modnn(row*col)];
631     }
632 
633     // Quick code to build systematic matrix: invert the top
634     // k*k vandermonde matrix, multiply right the bottom n-k rows
635     // by the inverse, and construct the identity matrix at the top.
636     invert_vdm(tmpMatrix, k); /* much faster than invert_mat */
637     matmul(tmpMatrix + k*k, tmpMatrix, ((gf*)enc_matrix) + k*k, n - k, k, k);
638     // the upper matrix is I so do not bother with a slow multiply
639     memset(enc_matrix, 0, k*k*sizeof(gf));
640     for (gf* p = (gf*)enc_matrix, col = 0 ; col < k ; col++, p += k+1 )
641 	    *p = 1 ;
642     delete[] tmpMatrix;
643     ndata = numData;
644     npar = numParity;
645     vector_size = vecSizeMax;
646     return true;
647 }  // end NormDecoderRS16::Init()
648 
649 
Decode(char ** vectorList,unsigned int numData,unsigned int erasureCount,unsigned int * erasureLocs)650 int NormDecoderRS16::Decode(char** vectorList, unsigned int numData,  unsigned int erasureCount, unsigned int* erasureLocs)
651 {
652     unsigned int bsz = ndata + npar;
653     // 1) Build decoding matrix for the given set of segments & erasures
654     unsigned int nextErasure = 0;
655     unsigned int ne = 0;
656     unsigned int sourceErasureCount = 0;
657     unsigned int parityCount = 0;
658     for (unsigned int i = 0;  i < bsz; i++)
659     {
660         if (i < numData)
661         {
662             if ((nextErasure < erasureCount) && (i == erasureLocs[nextErasure]))
663             {
664                 nextErasure++;
665                 sourceErasureCount++;
666             }
667             else
668             {
669                 // set identity row for segments we have
670                 gf* p = ((gf*)dec_matrix) + ndata*i;
671                 memset(p, 0, ndata*sizeof(gf));
672                 p[i] = 1;
673             }
674         }
675         else if (i < ndata)
676         {
677             // set identity row for assumed zero segments (shortened code)
678             gf* p = ((gf*)dec_matrix) + ndata*i;
679             memset(p, 0, ndata*sizeof(gf));
680             p[i] = 1;
681             // Also keep track of where the non-erased parity segment are
682             // for the shortened code
683             if ((nextErasure < erasureCount) && (i == erasureLocs[nextErasure]))
684             {
685                 nextErasure++;
686             }
687             else if (parityCount < sourceErasureCount)
688             {
689                 parity_loc[parityCount++] = i;
690                 // Copy appropriate enc_matric parity row to dec_matrix erasureRow
691                 gf* p = ((gf*)dec_matrix) + ndata*erasureLocs[ne++];
692                 memcpy(p, ((gf*)enc_matrix) + (ndata-numData+i)*ndata, ndata*sizeof(gf));
693             }
694 
695         }
696         else if (parityCount < sourceErasureCount)
697         {
698             if ((nextErasure < erasureCount) && (i == erasureLocs[nextErasure]))
699             {
700                 nextErasure++;
701             }
702             else
703             {
704                 ASSERT(parityCount < npar);
705                 parity_loc[parityCount++] = i;
706                 // Copy appropriate enc_matric parity row to dec_matrix erasureRow
707                 gf* p = ((gf*)dec_matrix) + ndata*erasureLocs[ne++];
708                 memcpy(p, ((gf*)enc_matrix) + (ndata-numData+i)*ndata, ndata*sizeof(gf));
709             }
710         }
711         else
712         {
713             break;
714         }
715 
716     }
717     ASSERT(ne == sourceErasureCount);
718     // 2) Invert the decoding matrix
719     if (!InvertDecodingMatrix())
720     {
721 	    PLOG(PL_FATAL, "NormDecoderRS16::Decode() error: couldn't invert dec_matrix (numData:%d erasureCount:%d) ?!\n", numData, erasureCount);
722         return 0;
723     }
724 
725     // 3) Decode
726     for (unsigned int e = 0; e < erasureCount; e++)
727     {
728         // Calculate missing segments (erasures) using dec_matrix and non-erasures
729         unsigned int row = erasureLocs[e];
730         if (row >= numData) break; // don't bother filling in parity segments
731         unsigned int col = 0;
732         unsigned int nextErasure = 0;
733         unsigned int nelements = (GF_BITS > 8) ? vector_size/2 : vector_size;
734         for (unsigned int i  = 0; i < numData; i++)
735         {
736             if ((nextErasure < erasureCount) && (i == erasureLocs[nextErasure]))
737             {
738                 // Use parity segments in place of erased vector in decoding
739                 addmul((gf*)vectorList[row], (gf*)vectorList[parity_loc[nextErasure]], ((gf*)dec_matrix)[row*ndata + col], nelements);
740                 col++;
741                 nextErasure++;  // point to next erasure
742             }
743             else if (i < numData)
744             {
745                 addmul((gf*)vectorList[row], (gf*)vectorList[i], ((gf*)dec_matrix)[row*ndata + col], nelements);
746                 col++;
747             }
748             else
749             {
750                 ASSERT(0);
751             }
752         }
753     }
754     return erasureCount ;
755 }  // end NormDecoderRS16::Decode()
756 
757 
758 
759 /*
760  * NormDecoderRS16::InvertDecodingMatrix() takes a matrix and produces its inverse
761  * k is the size of the matrix. (Gauss-Jordan, adapted from Numerical Recipes in C)
762  * Return non-zero if singular.
763  */
InvertDecodingMatrix()764 bool NormDecoderRS16::InvertDecodingMatrix()
765 {
766     gf* src = (gf*)dec_matrix;
767     unsigned int k = ndata;
768 
769     memset(inv_id_row, 0, k*sizeof(gf));
770     // inv_pivt marks elements already used as pivots.
771     memset(inv_pivt, 0, k*sizeof(unsigned int));
772 
773     for (unsigned int col = 0; col < k ; col++)
774     {
775 	    /*
776 	     * Zeroing column 'col', look for a non-zero element.
777 	     * First try on the diagonal, if it fails, look elsewhere.
778 	     */
779 	    int irow = -1;
780         int icol = -1 ;
781 	    if (inv_pivt[col] != 1 && src[col*k + col] != 0)
782         {
783 	        irow = col ;
784 	        icol = col ;
785 	        goto found_piv ;
786 	    }
787 	    for (int row = 0 ; row < k ; row++)
788         {
789 	        if (inv_pivt[row] != 1)
790             {
791 		        for (unsigned int ix = 0 ; ix < k ; ix++)
792                 {
793 		            if (inv_pivt[ix] == 0)
794                     {
795 			            if (src[row*k + ix] != 0)
796                         {
797 			                irow = row ;
798 			                icol = ix ;
799 			                goto found_piv ;
800 			            }
801 		            }
802                     else if (inv_pivt[ix] > 1)
803                     {
804 			            PLOG(PL_FATAL, "NormDecoderRS16::InvertDecodingMatrix() error: singular matrix!\n");
805 			            return false;
806 		            }
807 		        }
808 	        }
809 	    }
810 	    if (icol == -1)
811         {
812             PLOG(PL_FATAL, "NormDecoderRS16::InvertDecodingMatrix() error: pivot not found!\n");
813 	        return false;
814 	    }
815     found_piv:
816 	    ++(inv_pivt[icol]) ;
817 	    /*
818 	     * swap rows irow and icol, so afterwards the diagonal
819 	     * element will be correct. Rarely done, not worth
820 	     * optimizing.
821 	    */
822 	    if (irow != icol)
823         {
824 	        for (unsigned int ix = 0 ; ix < k ; ix++ )
825 		        SWAP(src[irow*k + ix], src[icol*k + ix], gf);
826 	    }
827 	    inv_ndxr[col] = irow ;
828 	    inv_ndxc[col] = icol ;
829 	    gf* pivotRow = &src[icol*k] ;
830 	    gf c = pivotRow[icol] ;
831 	    if (c == 0)
832         {
833 	        PLOG(PL_FATAL, "NormDecoderRS16::InvertDecodingMatrix() error: singular matrix!\n");
834 	        return false;
835 	    }
836 	    if (c != 1 ) /* otherwhise this is a NOP */
837         {
838 	        /*
839 	         * this is done often , but optimizing is not so
840 	         * fruitful, at least in the obvious ways (unrolling)
841 	         */
842 	        c = inverse[ c ] ;
843 	        pivotRow[icol] = 1 ;
844 	        for (unsigned int ix = 0 ; ix < k ; ix++ )
845 		        pivotRow[ix] = gf_mul(c, pivotRow[ix] );
846 	    }
847 	    /*
848 	     * from all rows, remove multiples of the selected row
849 	     * to zero the relevant entry (in fact, the entry is not zero
850 	     * because we know it must be zero).
851 	     * (Here, if we know that the pivot_row is the identity,
852 	     * we can optimize the addmul).
853 	     */
854 	    inv_id_row[icol] = 1;
855 	    if (0 != memcmp(pivotRow, inv_id_row, k*sizeof(gf)))
856         {
857 	        for (gf* p = src, ix = 0 ; ix < k ; ix++, p += k )
858             {
859 		        if (ix != icol)
860                 {
861 		            c = p[icol] ;
862 		            p[icol] = 0 ;
863 		            addmul(p, pivotRow, c, k );
864 		        }
865 	        }
866 	    }
867 	    inv_id_row[icol] = 0;
868     }  // end for (col = 0; col < k ; col++)
869 
870     for (int col = k - 1 ; col >= 0 ; col-- )
871     {
872 	    if (inv_ndxr[col] >= k)
873         {
874 	        PLOG(PL_ERROR, "NormDecoderRS16::InvertDecodingMatrix() error: AARGH, inv_ndxr[col] %d\n", inv_ndxr[col]);
875         }
876 	    else if (inv_ndxc[col] >= k)
877         {
878 	        PLOG(PL_ERROR, "NormDecoderRS16::InvertDecodingMatrix() error: AARGH, indxc[col] %d\n", inv_ndxc[col]);
879         }
880 	    else if (inv_ndxr[col] != inv_ndxc[col] )
881         {
882 	        for (int row = 0 ; row < k ; row++ )
883 		        SWAP( src[row*k + inv_ndxr[col]], src[row*k + inv_ndxc[col]], gf) ;
884 	    }
885     }
886     return true;
887 }  // end NormDecoderRS16::InvertDecodingMatrix()
888 
889