1 #include <m4ri/djb.h>
2 #include <m4rie/blm.h>
3 #include <m4rie/mzd_ptr.h>
4 
5 
_mzd_ptr_apply_blm_mzd(mzd_t ** X,const mzd_t ** A,const mzd_t ** B,const blm_t * f)6 void _mzd_ptr_apply_blm_mzd(mzd_t **X, const mzd_t **A, const mzd_t **B, const blm_t *f) {
7   assert((f->H!=NULL) & (f->F!=NULL) & (f->G!=NULL) &
8          (f->H->ncols == f->F->nrows) &
9          (f->F->nrows == f->G->nrows));
10 
11   mzd_t *t0 = mzd_init(A[0]->nrows, B[0]->ncols);
12   mzd_t *t1 = mzd_init(A[0]->nrows, A[0]->ncols);
13   mzd_t *t2 = mzd_init(B[0]->nrows, B[0]->ncols);
14 
15   for(rci_t i=0; i < f->F->nrows; i++) {
16     mzd_set_ui(t1, 0);
17     for(rci_t j=0; j < f->F->ncols; j++) {
18       if(mzd_read_bit(f->F, i, j)) {
19         mzd_add(t1, t1, A[j]);
20       }
21     }
22 
23     mzd_set_ui(t2, 0);
24     for(rci_t j=0; j < f->G->ncols; j++) {
25       if(mzd_read_bit(f->G, i, j)) {
26         mzd_add(t2, t2, B[j]);
27       }
28     }
29 
30 
31     mzd_mul(t0, t1, t2, 0);
32 
33     for(rci_t j=0; j < f->H->nrows; j++)
34       if(mzd_read_bit(f->H, j, i))
35         _mzd_ptr_add_modred(NULL, t0, X, j);
36   }
37 
38   mzd_free(t0);
39   mzd_free(t1);
40   mzd_free(t2);
41 }
42 
_blm_djb_compile(blm_t * f)43 blm_t *_blm_djb_compile(blm_t *f) {
44   assert((f->f == NULL) && (f->g == NULL) && (f->h == NULL));
45   assert((f->F != NULL) && (f->G != NULL) && (f->H != NULL));
46 
47 
48   mzd_t *F = mzd_copy(NULL, f->F);
49   f->f = djb_compile(F);
50   mzd_free(F);
51   if(mzd_equal(f->F, f->G))
52     f->g = f->f;
53   else {
54     mzd_t *G = mzd_copy(NULL, f->G);
55     f->g = djb_compile(G);
56     mzd_free(G);
57   }
58   mzd_t *H = mzd_copy(NULL, f->H);
59   f->h = djb_compile(H);
60   mzd_free(H);
61 
62   return f;
63 }
64 
_mzd_ptr_apply_blm_djb(mzd_t ** X,const mzd_t ** A,const mzd_t ** B,const blm_t * f)65 void _mzd_ptr_apply_blm_djb(mzd_t **X, const mzd_t **A, const mzd_t **B, const blm_t *f) {
66   assert((f->H!=NULL) & (f->F!=NULL) & (f->G!=NULL) &   \
67          (f->H->ncols == f->F->nrows) &                 \
68          (f->F->nrows == f->G->nrows));
69 
70 
71   mzd_t **t0 = (mzd_t**)m4ri_mm_malloc(sizeof(mzd_t*)*f->F->nrows);
72   mzd_t **t1 = (mzd_t**)m4ri_mm_malloc(sizeof(mzd_t*)*f->F->nrows);
73   mzd_t **t2 = (mzd_t**)m4ri_mm_malloc(sizeof(mzd_t*)*f->F->nrows);
74 
75   for(rci_t i=0; i<f->F->nrows; i++) {
76     t1[i] = mzd_init(A[0]->nrows, A[0]->ncols);
77     t2[i] = mzd_init(B[0]->nrows, B[0]->ncols);
78   }
79 
80   djb_apply_mzd_ptr(f->f, t1, A);
81   djb_apply_mzd_ptr(f->g, t2, B);
82 
83   for(rci_t i=0; i<f->F->nrows; i++) {
84     t0[i] = mzd_init(A[0]->nrows, B[0]->ncols);
85     mzd_mul(t0[i], t1[i], t2[i], 0);
86     mzd_free(t1[i]);
87     mzd_free(t2[i]);
88   }
89 
90 
91 
92   djb_apply_mzd_ptr(f->h, X, (const mzd_t**)t0);
93 
94   for(rci_t i=0; i<f->F->nrows; i++) {
95     mzd_free(t0[i]);
96   }
97 
98   m4ri_mm_free(t0);
99   m4ri_mm_free(t1);
100   m4ri_mm_free(t2);
101 }
102 
103 
104 
crt_init(const deg_t f_len,const deg_t g_len)105 int *crt_init(const deg_t f_len, const deg_t g_len) {
106   int *p_best = (int*)m4ri_mm_calloc(M4RIE_CRT_LEN, sizeof(int));
107   int  c_best = f_len * g_len;
108 
109   int *p = (int*)m4ri_mm_calloc(M4RIE_CRT_LEN, sizeof(int));
110 
111   for(deg_t omega=0; omega<8; omega++) {
112 
113     deg_t deg_need = f_len+g_len-1-omega;
114     deg_t deg_have = 0;
115     deg_t deg_poly = 1;
116 
117     p[0] = omega;
118     for(deg_t d=1; d<M4RIE_CRT_LEN; d++)
119       p[d] = 0;
120 
121     while (deg_have < deg_need) {
122       p[deg_poly] = irreducible_polynomials[deg_poly][0];
123       if (deg_have + deg_poly * p[deg_poly] < deg_need) {
124         deg_have += deg_poly * p[deg_poly];
125       } else {
126         deg_t deg_diff = deg_need - deg_have;
127         p[deg_poly] = ceil(deg_diff/(double)deg_poly);
128         deg_have += deg_poly * p[deg_poly];
129       }
130       deg_poly ++;
131     }
132 
133     deg_t deg_diff = deg_have - deg_need;
134     if (deg_diff && p[deg_diff] > 0) {
135       p[deg_diff]--;
136       deg_have -= deg_diff;
137     }
138 
139     int c = costs[p[0]];
140     for(deg_t d=1; d<M4RIE_CRT_LEN; d++)
141       c += costs[d] * p[d];
142 
143     if (c < c_best) {
144       for(deg_t d=0; d<M4RIE_CRT_LEN; d++)
145         p_best[d] = p[d];
146       c_best = c;
147     }
148   }
149   m4ri_mm_free(p);
150   return p_best;
151 }
152 
_small_multiplication_map(const deg_t degree)153 mzd_t *_small_multiplication_map(const deg_t degree) {
154   mzd_t *A;
155   switch(degree) {
156   case 1:
157     A = mzd_init(1, 1);
158     A->rows[0][0] = 0x1;
159     return A;
160   case 2:
161     A = mzd_init(3, 2);
162     A->rows[0][0] = 0x1; A->rows[1][0] = 0x3; A->rows[2][0] = 0x2;
163     return A;
164   case 3:
165     A = mzd_init(6, 3);
166     A->rows[0][0] = 0x1; A->rows[1][0] = 0x2; A->rows[2][0] = 0x4;
167     A->rows[3][0] = 0x3; A->rows[4][0] = 0x5; A->rows[5][0] = 0x6;
168     return A;
169   case 4:
170     A = mzd_init(9, 4);
171     A->rows[0][0] = 0x1; A->rows[1][0] = 0x2; A->rows[2][0] = 0x4; A->rows[3][0] = 0x8;
172     A->rows[4][0] = 0xf; A->rows[5][0] = 0x3; A->rows[6][0] = 0x5; A->rows[7][0] = 0xa; A->rows[8][0] = 0xc;
173     return A;
174   case 5:
175     A = mzd_init(13, 5);
176     A->rows[ 0][0] = 0x01; A->rows[ 1][0] = 0x02; A->rows[ 2][0] = 0x08; A->rows[ 3][0] = 0x10;
177     A->rows[ 4][0] = 0x11; A->rows[ 5][0] = 0x03; A->rows[ 6][0] = 0x18; A->rows[ 7][0] = 0x16;
178     A->rows[ 8][0] = 0x0d; A->rows[ 9][0] = 0x1b; A->rows[10][0] = 0x17; A->rows[11][0] = 0x1d;
179     A->rows[12][0] = 0x1f;
180     return A;
181   case 6:
182     A = mzd_init(17,  6);
183     A->rows[ 0][0] = 0x01;     A->rows[ 1][0] = 0x02;     A->rows[ 2][0] = 0x10;      A->rows[ 3][0] = 0x20;
184     A->rows[ 4][0] = 0x30;     A->rows[ 5][0] = 0x03;     A->rows[ 6][0] = 0x18;      A->rows[ 7][0] = 0x06;
185     A->rows[ 8][0] = 0x12;     A->rows[ 9][0] = 0x0c;     A->rows[10][0] = 0x38;      A->rows[11][0] = 0x07;
186     A->rows[12][0] = 0x29;     A->rows[13][0] = 0x25;     A->rows[14][0] = 0x2d;      A->rows[15][0] = 0x1b;
187     A->rows[16][0] = 0x3f;
188     return A;
189   case 7:
190     A = mzd_init(22,  7);
191     A->rows[ 0][0] = 0x7f;     A->rows[ 1][0] = 0x6e;     A->rows[ 2][0] = 0x3b;     A->rows[ 3][0] = 0x5d;
192     A->rows[ 4][0] = 0x6d;     A->rows[ 5][0] = 0x5b;     A->rows[ 6][0] = 0x36;     A->rows[ 7][0] = 0x03;
193     A->rows[ 8][0] = 0x05;     A->rows[ 9][0] = 0x11;     A->rows[10][0] = 0x0a;     A->rows[11][0] = 0x44;
194     A->rows[12][0] = 0x28;     A->rows[13][0] = 0x50;     A->rows[14][0] = 0x60;     A->rows[15][0] = 0x01;
195     A->rows[16][0] = 0x02;     A->rows[17][0] = 0x04;     A->rows[18][0] = 0x08;     A->rows[19][0] = 0x10;
196     A->rows[20][0] = 0x20;     A->rows[21][0] = 0x40;
197     return A;
198   case 8:
199     A = mzd_init(27,  8);
200     A->rows[ 0][0] = 0x01;     A->rows[ 1][0] = 0x02;     A->rows[ 2][0] = 0x04;     A->rows[ 3][0] = 0x08;
201     A->rows[ 4][0] = 0x10;     A->rows[ 5][0] = 0x20;     A->rows[ 6][0] = 0x40;     A->rows[ 7][0] = 0x80;
202     A->rows[ 8][0] = 0x05;     A->rows[ 9][0] = 0x0c;     A->rows[10][0] = 0x44;     A->rows[11][0] = 0x30;
203     A->rows[12][0] = 0xc0;     A->rows[13][0] = 0x0f;     A->rows[14][0] = 0xa0;     A->rows[15][0] = 0x11;
204     A->rows[16][0] = 0x0a;     A->rows[17][0] = 0xaa;     A->rows[18][0] = 0x03;     A->rows[19][0] = 0x50;
205     A->rows[20][0] = 0x22;     A->rows[21][0] = 0xff;     A->rows[22][0] = 0x55;     A->rows[23][0] = 0x33;
206     A->rows[24][0] = 0xcc;     A->rows[25][0] = 0x88;     A->rows[26][0] = 0xf0;
207     return A;
208   case 9:
209     A = mzd_init(31,  9);
210     A->rows[ 0][0] = 0x100;     A->rows[ 1][0] = 0x001;     A->rows[ 2][0] = 0x002;     A->rows[ 3][0] = 0x003;
211     A->rows[ 4][0] = 0x155;     A->rows[ 5][0] = 0x0aa;     A->rows[ 6][0] = 0x1ff;     A->rows[ 7][0] = 0x16d;
212     A->rows[ 8][0] = 0x1b6;     A->rows[ 9][0] = 0x0db;     A->rows[10][0] = 0x0e9;     A->rows[11][0] = 0x13a;
213     A->rows[12][0] = 0x074;     A->rows[13][0] = 0x1d3;     A->rows[14][0] = 0x09d;     A->rows[15][0] = 0x14e;
214     A->rows[16][0] = 0x0b9;     A->rows[17][0] = 0x172;     A->rows[18][0] = 0x05c;     A->rows[19][0] = 0x1cb;
215     A->rows[20][0] = 0x0e5;     A->rows[21][0] = 0x12e;     A->rows[22][0] = 0x191;     A->rows[23][0] = 0x0b2;
216     A->rows[24][0] = 0x164;     A->rows[25][0] = 0x0c8;     A->rows[26][0] = 0x08f;     A->rows[27][0] = 0x123;
217     A->rows[28][0] = 0x0f5;     A->rows[29][0] = 0x07a;     A->rows[30][0] = 0x1ac;
218     return A;
219   case 10:
220     A = mzd_init(36, 10);
221     A->rows[ 0][0] = 0x200;     A->rows[ 1][0] = 0x001;     A->rows[ 2][0] = 0x3ff;     A->rows[ 3][0] = 0x36d;
222     A->rows[ 4][0] = 0x1b6;     A->rows[ 5][0] = 0x2db;     A->rows[ 6][0] = 0x0e9;     A->rows[ 7][0] = 0x13a;
223     A->rows[ 8][0] = 0x274;     A->rows[ 9][0] = 0x1d3;     A->rows[10][0] = 0x29d;     A->rows[11][0] = 0x34e;
224     A->rows[12][0] = 0x0b9;     A->rows[13][0] = 0x172;     A->rows[14][0] = 0x25c;     A->rows[15][0] = 0x1cb;
225     A->rows[16][0] = 0x2e5;     A->rows[17][0] = 0x32e;     A->rows[18][0] = 0x191;     A->rows[19][0] = 0x2b2;
226     A->rows[20][0] = 0x164;     A->rows[21][0] = 0x2c8;     A->rows[22][0] = 0x08f;     A->rows[23][0] = 0x323;
227     A->rows[24][0] = 0x0f5;     A->rows[25][0] = 0x07a;     A->rows[26][0] = 0x3ac;     A->rows[27][0] = 0x2f1;
228     A->rows[28][0] = 0x1e2;     A->rows[29][0] = 0x3c4;     A->rows[30][0] = 0x178;     A->rows[31][0] = 0x1af;
229     A->rows[32][0] = 0x313;     A->rows[33][0] = 0x135;     A->rows[34][0] = 0x09a;     A->rows[35][0] = 0x2bc;
230     return A;
231   case 11:
232     A = mzd_init(40, 11);
233     A->rows[ 0][0] = 0x001;     A->rows[ 1][0] = 0x36d;     A->rows[ 2][0] = 0x5b6;     A->rows[ 3][0] = 0x6db;
234     A->rows[ 4][0] = 0x555;     A->rows[ 5][0] = 0x2aa;     A->rows[ 6][0] = 0x7ff;     A->rows[ 7][0] = 0x400;
235     A->rows[ 8][0] = 0x200;     A->rows[ 9][0] = 0x600;     A->rows[10][0] = 0x4e9;     A->rows[11][0] = 0x53a;
236     A->rows[12][0] = 0x274;     A->rows[13][0] = 0x1d3;     A->rows[14][0] = 0x69d;     A->rows[15][0] = 0x74e;
237     A->rows[16][0] = 0x4b9;     A->rows[17][0] = 0x172;     A->rows[18][0] = 0x65c;     A->rows[19][0] = 0x5cb;
238     A->rows[20][0] = 0x2e5;     A->rows[21][0] = 0x72e;     A->rows[22][0] = 0x591;     A->rows[23][0] = 0x6b2;
239     A->rows[24][0] = 0x564;     A->rows[25][0] = 0x2c8;     A->rows[26][0] = 0x48f;     A->rows[27][0] = 0x323;
240     A->rows[28][0] = 0x0f5;     A->rows[29][0] = 0x47a;     A->rows[30][0] = 0x7ac;     A->rows[31][0] = 0x2f1;
241     A->rows[32][0] = 0x5e2;     A->rows[33][0] = 0x3c4;     A->rows[34][0] = 0x578;     A->rows[35][0] = 0x1af;
242     A->rows[36][0] = 0x713;     A->rows[37][0] = 0x135;     A->rows[38][0] = 0x09a;     A->rows[39][0] = 0x6bc;
243     return A;
244   case 12:
245     A = mzd_init(45, 12);
246     A->rows[ 0][0] = 0xb6d;     A->rows[ 1][0] = 0xdb6;     A->rows[ 2][0] = 0x6db;     A->rows[ 3][0] = 0x001;
247     A->rows[ 4][0] = 0x002;     A->rows[ 5][0] = 0x003;     A->rows[ 6][0] = 0x555;     A->rows[ 7][0] = 0xaaa;
248     A->rows[ 8][0] = 0xfff;     A->rows[ 9][0] = 0x800;     A->rows[10][0] = 0x400;     A->rows[11][0] = 0xc00;
249     A->rows[12][0] = 0x4e9;     A->rows[13][0] = 0xd3a;     A->rows[14][0] = 0xa74;     A->rows[15][0] = 0x9d3;
250     A->rows[16][0] = 0xe9d;     A->rows[17][0] = 0x74e;     A->rows[18][0] = 0x591;     A->rows[19][0] = 0xeb2;
251     A->rows[20][0] = 0xd64;     A->rows[21][0] = 0xac8;     A->rows[22][0] = 0xc8f;     A->rows[23][0] = 0xb23;
252     A->rows[24][0] = 0x8f5;     A->rows[25][0] = 0x47a;     A->rows[26][0] = 0x7ac;     A->rows[27][0] = 0xaf1;
253     A->rows[28][0] = 0x5e2;     A->rows[29][0] = 0xbc4;     A->rows[30][0] = 0xd78;     A->rows[31][0] = 0x9af;
254     A->rows[32][0] = 0xf13;     A->rows[33][0] = 0x135;     A->rows[34][0] = 0x89a;     A->rows[35][0] = 0x6bc;
255     A->rows[36][0] = 0x631;     A->rows[37][0] = 0xa52;     A->rows[38][0] = 0x294;     A->rows[39][0] = 0x318;
256     A->rows[40][0] = 0xdef;     A->rows[41][0] = 0xc63;     A->rows[42][0] = 0x4a5;     A->rows[43][0] = 0x94a;
257     A->rows[44][0] = 0x18c;
258     return A;
259   case 13:
260     A = mzd_init(49, 13);
261     A->rows[ 0][0] = 0x0001;     A->rows[ 1][0] = 0x1b6d;     A->rows[ 2][0] = 0x0db6;     A->rows[ 3][0] = 0x16db;
262     A->rows[ 4][0] = 0x1555;     A->rows[ 5][0] = 0x0aaa;     A->rows[ 6][0] = 0x1fff;     A->rows[ 7][0] = 0x1000;
263     A->rows[ 8][0] = 0x0800;     A->rows[ 9][0] = 0x1800;     A->rows[10][0] = 0x14e9;     A->rows[11][0] = 0x1d3a;
264     A->rows[12][0] = 0x1a74;     A->rows[13][0] = 0x09d3;     A->rows[14][0] = 0x0e9d;     A->rows[15][0] = 0x074e;
265     A->rows[16][0] = 0x1cb9;     A->rows[17][0] = 0x1972;     A->rows[18][0] = 0x0e5c;     A->rows[19][0] = 0x05cb;
266     A->rows[20][0] = 0x12e5;     A->rows[21][0] = 0x172e;     A->rows[22][0] = 0x1591;     A->rows[23][0] = 0x1eb2;
267     A->rows[24][0] = 0x1d64;     A->rows[25][0] = 0x1ac8;     A->rows[26][0] = 0x0c8f;     A->rows[27][0] = 0x0b23;
268     A->rows[28][0] = 0x08f5;     A->rows[29][0] = 0x047a;     A->rows[30][0] = 0x07ac;     A->rows[31][0] = 0x1af1;
269     A->rows[32][0] = 0x15e2;     A->rows[33][0] = 0x0bc4;     A->rows[34][0] = 0x0d78;     A->rows[35][0] = 0x09af;
270     A->rows[36][0] = 0x0f13;     A->rows[37][0] = 0x1135;     A->rows[38][0] = 0x189a;     A->rows[39][0] = 0x06bc;
271     A->rows[40][0] = 0x0631;     A->rows[41][0] = 0x0a52;     A->rows[42][0] = 0x1294;     A->rows[43][0] = 0x0318;
272     A->rows[44][0] = 0x1def;     A->rows[45][0] = 0x0c63;     A->rows[46][0] = 0x14a5;     A->rows[47][0] = 0x094a;
273     A->rows[48][0] = 0x118c;
274     return A;
275   case 14:
276     A = mzd_init(55, 14);
277     A->rows[ 0][0] = 0x0001;     A->rows[ 1][0] = 0x1b6d;     A->rows[ 2][0] = 0x2db6;     A->rows[ 3][0] = 0x36db;
278     A->rows[ 4][0] = 0x1555;     A->rows[ 5][0] = 0x2aaa;     A->rows[ 6][0] = 0x3fff;     A->rows[ 7][0] = 0x34e9;
279     A->rows[ 8][0] = 0x1d3a;     A->rows[ 9][0] = 0x3a74;     A->rows[10][0] = 0x29d3;     A->rows[11][0] = 0x0e9d;
280     A->rows[12][0] = 0x274e;     A->rows[13][0] = 0x1cb9;     A->rows[14][0] = 0x3972;     A->rows[15][0] = 0x2e5c;
281     A->rows[16][0] = 0x25cb;     A->rows[17][0] = 0x32e5;     A->rows[18][0] = 0x172e;     A->rows[19][0] = 0x3591;
282     A->rows[20][0] = 0x1eb2;     A->rows[21][0] = 0x3d64;     A->rows[22][0] = 0x3ac8;     A->rows[23][0] = 0x2c8f;
283     A->rows[24][0] = 0x2b23;     A->rows[25][0] = 0x08f5;     A->rows[26][0] = 0x247a;     A->rows[27][0] = 0x07ac;
284     A->rows[28][0] = 0x1af1;     A->rows[29][0] = 0x35e2;     A->rows[30][0] = 0x2bc4;     A->rows[31][0] = 0x0d78;
285     A->rows[32][0] = 0x09af;     A->rows[33][0] = 0x2f13;     A->rows[34][0] = 0x3135;     A->rows[35][0] = 0x389a;
286     A->rows[36][0] = 0x26bc;     A->rows[37][0] = 0x0631;     A->rows[38][0] = 0x0a52;     A->rows[39][0] = 0x1294;
287     A->rows[40][0] = 0x2318;     A->rows[41][0] = 0x3def;     A->rows[42][0] = 0x0c63;     A->rows[43][0] = 0x14a5;
288     A->rows[44][0] = 0x294a;     A->rows[45][0] = 0x318c;     A->rows[46][0] = 0x2000;     A->rows[47][0] = 0x1000;
289     A->rows[48][0] = 0x0800;     A->rows[49][0] = 0x0400;     A->rows[50][0] = 0x3c00;     A->rows[51][0] = 0x3000;
290     A->rows[52][0] = 0x2800;     A->rows[53][0] = 0x1400;     A->rows[54][0] = 0x0c00;
291     return A;
292   case 15:
293     A = mzd_init(60, 15);
294     A->rows[ 0][0] = 0x0001;     A->rows[ 1][0] = 0x7fff;     A->rows[ 2][0] = 0x5b6d;     A->rows[ 3][0] = 0x6db6;
295     A->rows[ 4][0] = 0x36db;     A->rows[ 5][0] = 0x4000;     A->rows[ 6][0] = 0x2000;     A->rows[ 7][0] = 0x6000;
296     A->rows[ 8][0] = 0x74e9;     A->rows[ 9][0] = 0x1d3a;     A->rows[10][0] = 0x3a74;     A->rows[11][0] = 0x69d3;
297     A->rows[12][0] = 0x4e9d;     A->rows[13][0] = 0x274e;     A->rows[14][0] = 0x5cb9;     A->rows[15][0] = 0x3972;
298     A->rows[16][0] = 0x2e5c;     A->rows[17][0] = 0x65cb;     A->rows[18][0] = 0x72e5;     A->rows[19][0] = 0x172e;
299     A->rows[20][0] = 0x7591;     A->rows[21][0] = 0x1eb2;     A->rows[22][0] = 0x3d64;     A->rows[23][0] = 0x7ac8;
300     A->rows[24][0] = 0x2c8f;     A->rows[25][0] = 0x6b23;     A->rows[26][0] = 0x48f5;     A->rows[27][0] = 0x647a;
301     A->rows[28][0] = 0x47ac;     A->rows[29][0] = 0x1af1;     A->rows[30][0] = 0x35e2;     A->rows[31][0] = 0x6bc4;
302     A->rows[32][0] = 0x4d78;     A->rows[33][0] = 0x09af;     A->rows[34][0] = 0x2f13;     A->rows[35][0] = 0x7135;
303     A->rows[36][0] = 0x789a;     A->rows[37][0] = 0x26bc;     A->rows[38][0] = 0x4631;     A->rows[39][0] = 0x4a52;
304     A->rows[40][0] = 0x5294;     A->rows[41][0] = 0x6318;     A->rows[42][0] = 0x3def;     A->rows[43][0] = 0x0c63;
305     A->rows[44][0] = 0x14a5;     A->rows[45][0] = 0x294a;     A->rows[46][0] = 0x318c;     A->rows[47][0] = 0x4d21;
306     A->rows[48][0] = 0x1a42;     A->rows[49][0] = 0x7348;     A->rows[50][0] = 0x6690;     A->rows[51][0] = 0x2bb1;
307     A->rows[52][0] = 0x5763;     A->rows[53][0] = 0x15d8;     A->rows[54][0] = 0x0576;     A->rows[55][0] = 0x47cd;
308     A->rows[56][0] = 0x42bb;     A->rows[57][0] = 0x4857;     A->rows[58][0] = 0x215d;     A->rows[59][0] = 0x3b1f;
309     return A;
310   case 16:
311     A = mzd_init(64, 16);
312     A->rows[ 0][0] = 0xdb6d;     A->rows[ 1][0] = 0x6db6;     A->rows[ 2][0] = 0xb6db;     A->rows[ 3][0] = 0x0001;
313     A->rows[ 4][0] = 0x0002;     A->rows[ 5][0] = 0x0003;     A->rows[ 6][0] = 0x5555;     A->rows[ 7][0] = 0xaaaa;
314     A->rows[ 8][0] = 0xffff;     A->rows[ 9][0] = 0x8000;     A->rows[10][0] = 0x4000;     A->rows[11][0] = 0xc000;
315     A->rows[12][0] = 0x74e9;     A->rows[13][0] = 0x9d3a;     A->rows[14][0] = 0x3a74;     A->rows[15][0] = 0xe9d3;
316     A->rows[16][0] = 0x4e9d;     A->rows[17][0] = 0xa74e;     A->rows[18][0] = 0x5cb9;     A->rows[19][0] = 0xb972;
317     A->rows[20][0] = 0x2e5c;     A->rows[21][0] = 0xe5cb;     A->rows[22][0] = 0x72e5;     A->rows[23][0] = 0x972e;
318     A->rows[24][0] = 0xf591;     A->rows[25][0] = 0x1eb2;     A->rows[26][0] = 0x3d64;     A->rows[27][0] = 0x7ac8;
319     A->rows[28][0] = 0xac8f;     A->rows[29][0] = 0xeb23;     A->rows[30][0] = 0xc8f5;     A->rows[31][0] = 0x647a;
320     A->rows[32][0] = 0x47ac;     A->rows[33][0] = 0x9af1;     A->rows[34][0] = 0x35e2;     A->rows[35][0] = 0x6bc4;
321     A->rows[36][0] = 0x4d78;     A->rows[37][0] = 0x89af;     A->rows[38][0] = 0xaf13;     A->rows[39][0] = 0xf135;
322     A->rows[40][0] = 0x789a;     A->rows[41][0] = 0x26bc;     A->rows[42][0] = 0xc631;     A->rows[43][0] = 0x4a52;
323     A->rows[44][0] = 0x5294;     A->rows[45][0] = 0x6318;     A->rows[46][0] = 0xbdef;     A->rows[47][0] = 0x8c63;
324     A->rows[48][0] = 0x94a5;     A->rows[49][0] = 0x294a;     A->rows[50][0] = 0x318c;     A->rows[51][0] = 0xcd21;
325     A->rows[52][0] = 0x9a42;     A->rows[53][0] = 0xf348;     A->rows[54][0] = 0xe690;     A->rows[55][0] = 0x2bb1;
326     A->rows[56][0] = 0x5763;     A->rows[57][0] = 0x15d8;     A->rows[58][0] = 0x8576;     A->rows[59][0] = 0xc7cd;
327     A->rows[60][0] = 0x42bb;     A->rows[61][0] = 0x4857;     A->rows[62][0] = 0x215d;     A->rows[63][0] = 0xbb1f;
328     return A;
329 
330   default:
331     m4ri_die("only degrees up to 16 are implemented but got degree %d\n", degree);
332   }
333   return NULL;
334 }
335 
336 /**
337  * \param length The length of the polynomial we want to reduce
338  * \param poly A polynomial
339  * \param d The degree of poly
340  */
341 
_crt_modred_mat(const deg_t length,const word poly,const deg_t d)342 mzd_t *_crt_modred_mat(const deg_t length, const word poly, const deg_t d) {
343   mzd_t *A = mzd_init(d, length);
344 
345   /* (x-infinity)^d */
346   if (poly == 0) {
347     for(deg_t i=0; i<d; i++)
348       mzd_write_bit(A, i, length-i-1, 1);
349     return A;
350   }
351 
352   mzd_t *f = mzd_init(1, length);
353   mzd_t *t = mzd_init(1, length);
354 
355   for(deg_t i=0; i<length; i++) {
356     mzd_set_ui(f, 0);
357     f->rows[0][i/m4ri_radix] = __M4RI_TWOPOW(i%m4ri_radix);
358     word ii = i;
359     while(ii >= d) {
360       /* f ^= gf2x_mul((1ULL<<(ii-d)), poly, length); */
361       mzd_set_ui(t, 0);
362       mzd_xor_bits(t, 0, ii-d, d+1, poly);
363 
364       mzd_add(f, f, t);
365 
366       /* ii = gf2x_deg(f); */
367       ii = 0;
368       for(wi_t j=f->width-1; j>=0; j--) {
369         if (f->rows[0][j]) {
370           ii = gf2x_deg(f->rows[0][j]) + m4ri_radix*j;
371           break;
372         }
373       }
374     }
375     for(deg_t j=0; j<= ii; j++)
376       mzd_write_bit(A, j, i, (f->rows[0][j/m4ri_radix]>>(j%m4ri_radix)) & 0x1);
377   }
378   return A;
379 }
380 
_blm_finish_polymult(const gf2e * ff,blm_t * f)381 blm_t *_blm_finish_polymult(const gf2e *ff, blm_t *f) {
382   assert( (f != NULL) & (f->F != NULL) & (f->G != NULL) );
383 
384   const rci_t m = f->F->nrows;
385   const rci_t c_nrows = f->F->ncols + f->G->ncols - 1;
386 
387   mzd_t *H = mzd_init(c_nrows, m);
388 
389   mzd_t *F_T = mzd_transpose(NULL, f->F);
390   mzd_t *G_T = mzd_transpose(NULL, f->G);
391 
392   mzd_t *C = mzd_init(m, m);
393 
394   /* we find a full rank matrix */
395 
396   word v = 0;
397   word w = 0;
398   rci_t r = 0;
399   rci_t rank = 0;
400 
401   mzd_t *pivots = mzd_init(m, m4ri_radix*2);
402 
403   mzp_t *P = mzp_init(m);
404   mzp_t *Q = mzp_init(m);
405 
406   while(rank < m) {
407     /* x^v = (0, 0, ... , 1, ..., 0, 0) * F_T -> select row v */
408     for(wi_t j=0; j< C->width; j++)
409       C->rows[r][j] = F_T->rows[v][j] & G_T->rows[w][j];
410 
411     pivots->rows[r][0] = v;
412     pivots->rows[r][1] = w;
413 
414     w++;
415     if (w == (word)f->G->ncols) {
416       v++;
417       if (v == (word)f->F->ncols)
418         v = 0;
419       w = v;
420     }
421     r++;
422     if (r == C->nrows) {
423       mzd_t *D = mzd_copy(NULL, C);
424       rank = mzd_ple(D, P, Q, 0);
425       mzd_apply_p_left(pivots, P);
426       mzd_apply_p_left(C, P);
427       mzd_free(D);
428 
429       if (rank < m)
430         r = rank;
431     }
432   }
433   mzp_free(P);
434   mzp_free(Q);
435 
436   for(r=0; r<m; r++) {
437     /* x^v = (0, 0, ... , 1, ..., 0, 0) * F_T -> select row v */
438     v = pivots->rows[r][0];
439     w = pivots->rows[r][1];
440     for(wi_t j=0; j< C->width; j++)
441       C->rows[r][j] = F_T->rows[v][j] & G_T->rows[w][j];
442   }
443   mzd_free(F_T);
444   mzd_free(G_T);
445 
446   // This should be replaced by TRSM calls
447   mzd_t *D = mzd_inv_m4ri(NULL, C, 0);
448   mzd_free(C);
449   mzd_t *DT = mzd_transpose(NULL, D);
450   mzd_free(D);
451 
452   mzd_t *a = mzd_init(1, m);
453   mzd_t *b = mzd_init(1, H->ncols);
454 
455   for(rci_t i=0; i<H->nrows; i++) {
456     mzd_set_ui(a, 0);
457     for(rci_t j=0; j<m; j++) {
458       v = pivots->rows[j][0];
459       w = pivots->rows[j][1];
460       if ((v+w) == (word)i)
461         mzd_write_bit(a, 0, j, 1);
462     }
463     mzd_mul(b, a, DT, 0);
464 
465     /* copy result to H */
466     for(rci_t j=0; j<H->ncols; j++)
467       mzd_write_bit(H, i, j, mzd_read_bit(b, 0, j ));
468   }
469   mzd_free(a);
470   mzd_free(b);
471   mzd_free(pivots);
472 
473   if (ff == NULL) {
474     f->H = H;
475   } else {
476     mzd_t *N = _crt_modred_mat(H->nrows, ff->minpoly, ff->degree);
477     f->H = mzd_mul(NULL, N, H, 0);
478     mzd_free(N);
479     mzd_free(H);
480   }
481   return f;
482 }
483 
484 const int costs[17] = {0, 1, 3, 6, 9, 13, 17, 22, 27, 31, 36, 40, 45, 49, 55, 60, 64};
485 //const int costs[17] = {0, 1, 3, 6, 9, 13, 15, 22, 24, 30, 33, 39, 42, 48, 51, 54, 60}; /* best possible */
486 
blm_init_crt(const gf2e * ff,const deg_t f_ncols,const deg_t g_ncols,const int * p,int djb)487 blm_t *blm_init_crt(const gf2e *ff, const deg_t f_ncols, const deg_t g_ncols, const int *p, int djb) {
488   blm_t *f = m4ri_mm_malloc(sizeof(blm_t));
489 
490   // iterator over irreducible polynomials
491   int *p_it = (int*)m4ri_mm_calloc(sizeof(int), M4RIE_CRT_LEN);
492 
493   mzd_t *M, *T;
494 
495   word poly = 0;
496 
497   rci_t m = costs[p[0]];
498   for(int d=1; d<M4RIE_CRT_LEN; d++)
499     m += costs[d] * p[d];
500 
501   f->F = mzd_init(m, f_ncols);
502   f->f = NULL;
503   f->G = mzd_init(m, g_ncols);
504   f->g = NULL;
505 
506   rci_t r = 0;
507 
508 
509   /**
510    * 1) We construct maps F,G which combine modular reduction to degree d and the linear map
511    *    required for multiplying modulo a polynomial of degree d.
512    */
513 
514   /**
515    * 1.1) We deal with (x+infinity)^omega first
516    */
517 
518   if(p[0] != 0) {
519     deg_t d = p[0];
520     mzd_t *N  = _small_multiplication_map(d);
521     M = _crt_modred_mat(f_ncols, poly, d);
522     T = mzd_init_window(f->F, r, 0, r + costs[d], f_ncols);
523     mzd_mul(T, N, M, 0);
524     mzd_free(T);
525     mzd_free(M);
526 
527     M = _crt_modred_mat(g_ncols, poly, d);
528     T = mzd_init_window(f->G, r, 0, r + costs[d], g_ncols);
529     mzd_mul(T, N, M, 0);
530     mzd_free(T);
531     mzd_free(M);
532 
533     mzd_free(N);
534 
535     r += costs[d];
536   }
537 
538   /**
539    * 1.2) We deal with regular polynomial which are co-prime
540    */
541 
542   for(deg_t d=1; d<M4RIE_CRT_LEN; d++) {
543     if (p[d] == 0)
544       continue;
545 
546     mzd_t *N  = _small_multiplication_map(d);
547 
548     for(int i=0; i<p[d]; i++) {
549       if (p_it[d] < irreducible_polynomials[d][0]) {
550         poly = irreducible_polynomials[d][ 1 + p_it[d] ];
551         p_it[d]++;
552       } else if (d/2 && p_it[d/2] < irreducible_polynomials[d/2][0]) {
553         /** the minimal polynomial is a square */
554         poly = irreducible_polynomials[d/2][ 1 + p_it[d/2]];
555         p_it[d/2]++;
556         poly = gf2x_mul(poly, poly, d/2+1);
557       } else if (d/4 && p_it[d/4] < irreducible_polynomials[d/4][0]) {
558         /** the minimal polynomial is a fourth power */
559         poly = irreducible_polynomials[d/4][1 + p_it[d/4]];
560         p_it[d/4]++;
561         poly = gf2x_mul(poly, poly, d/4+1);
562         poly = gf2x_mul(poly, poly, d/2+1);
563       } else if (d/8 && p_it[d/8] < irreducible_polynomials[d/8][0]) {
564         /** the minimal polynomial is an eigth power */
565         poly = irreducible_polynomials[d/8][p_it[d/8]+ 1];
566         p_it[d/8]++;
567         poly = gf2x_mul(poly, poly, d/8+1);
568         poly = gf2x_mul(poly, poly, d/4+1);
569         poly = gf2x_mul(poly, poly, d/2+1);
570       } else {
571         m4ri_die("Degree %d is not implemented\n", d);
572       }
573       M = _crt_modred_mat(f_ncols, poly, d);
574       T = mzd_init_window(f->F, r, 0, r + costs[d], f_ncols);
575       mzd_mul(T, N, M, 0);
576       mzd_free(T);
577       mzd_free(M);
578 
579       M = _crt_modred_mat(g_ncols, poly, d);
580       T = mzd_init_window(f->G, r, 0, r + costs[d], g_ncols);
581       mzd_mul(T, N, M, 0);
582       mzd_free(T);
583       mzd_free(M);
584 
585       r += costs[d];
586     }
587 
588     mzd_free(N);
589   }
590 
591   m4ri_mm_free(p_it);
592 
593   /**
594    * 2) We solve for H as we know poly(c) and (F*vec(a) x G*vec(b)). We pick points poly(a) = x^v,
595    * poly(b) = x^w (hence: poly(c) = x^(v+w)).
596    */
597   _blm_finish_polymult(ff, f);
598   f->h = NULL;
599 
600   /**
601    * 3) We compile DJB maps if asked for.
602    */
603 
604   if (djb)
605     _blm_djb_compile(f);
606 
607   return f;
608 }
609 
610 
blm_free(blm_t * f)611 void blm_free(blm_t *f) {
612   mzd_free(f->F);
613   mzd_free(f->G);
614   mzd_free(f->H);
615   if (f->f != f->g)
616     djb_free(f->g);
617   djb_free(f->f);
618   djb_free(f->h);
619 
620   m4ri_mm_free(f);
621 }
622