1 #include <m4ri/djb.h>
2 #include <m4rie/blm.h>
3 #include <m4rie/mzd_ptr.h>
4
5
_mzd_ptr_apply_blm_mzd(mzd_t ** X,const mzd_t ** A,const mzd_t ** B,const blm_t * f)6 void _mzd_ptr_apply_blm_mzd(mzd_t **X, const mzd_t **A, const mzd_t **B, const blm_t *f) {
7 assert((f->H!=NULL) & (f->F!=NULL) & (f->G!=NULL) &
8 (f->H->ncols == f->F->nrows) &
9 (f->F->nrows == f->G->nrows));
10
11 mzd_t *t0 = mzd_init(A[0]->nrows, B[0]->ncols);
12 mzd_t *t1 = mzd_init(A[0]->nrows, A[0]->ncols);
13 mzd_t *t2 = mzd_init(B[0]->nrows, B[0]->ncols);
14
15 for(rci_t i=0; i < f->F->nrows; i++) {
16 mzd_set_ui(t1, 0);
17 for(rci_t j=0; j < f->F->ncols; j++) {
18 if(mzd_read_bit(f->F, i, j)) {
19 mzd_add(t1, t1, A[j]);
20 }
21 }
22
23 mzd_set_ui(t2, 0);
24 for(rci_t j=0; j < f->G->ncols; j++) {
25 if(mzd_read_bit(f->G, i, j)) {
26 mzd_add(t2, t2, B[j]);
27 }
28 }
29
30
31 mzd_mul(t0, t1, t2, 0);
32
33 for(rci_t j=0; j < f->H->nrows; j++)
34 if(mzd_read_bit(f->H, j, i))
35 _mzd_ptr_add_modred(NULL, t0, X, j);
36 }
37
38 mzd_free(t0);
39 mzd_free(t1);
40 mzd_free(t2);
41 }
42
_blm_djb_compile(blm_t * f)43 blm_t *_blm_djb_compile(blm_t *f) {
44 assert((f->f == NULL) && (f->g == NULL) && (f->h == NULL));
45 assert((f->F != NULL) && (f->G != NULL) && (f->H != NULL));
46
47
48 mzd_t *F = mzd_copy(NULL, f->F);
49 f->f = djb_compile(F);
50 mzd_free(F);
51 if(mzd_equal(f->F, f->G))
52 f->g = f->f;
53 else {
54 mzd_t *G = mzd_copy(NULL, f->G);
55 f->g = djb_compile(G);
56 mzd_free(G);
57 }
58 mzd_t *H = mzd_copy(NULL, f->H);
59 f->h = djb_compile(H);
60 mzd_free(H);
61
62 return f;
63 }
64
_mzd_ptr_apply_blm_djb(mzd_t ** X,const mzd_t ** A,const mzd_t ** B,const blm_t * f)65 void _mzd_ptr_apply_blm_djb(mzd_t **X, const mzd_t **A, const mzd_t **B, const blm_t *f) {
66 assert((f->H!=NULL) & (f->F!=NULL) & (f->G!=NULL) & \
67 (f->H->ncols == f->F->nrows) & \
68 (f->F->nrows == f->G->nrows));
69
70
71 mzd_t **t0 = (mzd_t**)m4ri_mm_malloc(sizeof(mzd_t*)*f->F->nrows);
72 mzd_t **t1 = (mzd_t**)m4ri_mm_malloc(sizeof(mzd_t*)*f->F->nrows);
73 mzd_t **t2 = (mzd_t**)m4ri_mm_malloc(sizeof(mzd_t*)*f->F->nrows);
74
75 for(rci_t i=0; i<f->F->nrows; i++) {
76 t1[i] = mzd_init(A[0]->nrows, A[0]->ncols);
77 t2[i] = mzd_init(B[0]->nrows, B[0]->ncols);
78 }
79
80 djb_apply_mzd_ptr(f->f, t1, A);
81 djb_apply_mzd_ptr(f->g, t2, B);
82
83 for(rci_t i=0; i<f->F->nrows; i++) {
84 t0[i] = mzd_init(A[0]->nrows, B[0]->ncols);
85 mzd_mul(t0[i], t1[i], t2[i], 0);
86 mzd_free(t1[i]);
87 mzd_free(t2[i]);
88 }
89
90
91
92 djb_apply_mzd_ptr(f->h, X, (const mzd_t**)t0);
93
94 for(rci_t i=0; i<f->F->nrows; i++) {
95 mzd_free(t0[i]);
96 }
97
98 m4ri_mm_free(t0);
99 m4ri_mm_free(t1);
100 m4ri_mm_free(t2);
101 }
102
103
104
crt_init(const deg_t f_len,const deg_t g_len)105 int *crt_init(const deg_t f_len, const deg_t g_len) {
106 int *p_best = (int*)m4ri_mm_calloc(M4RIE_CRT_LEN, sizeof(int));
107 int c_best = f_len * g_len;
108
109 int *p = (int*)m4ri_mm_calloc(M4RIE_CRT_LEN, sizeof(int));
110
111 for(deg_t omega=0; omega<8; omega++) {
112
113 deg_t deg_need = f_len+g_len-1-omega;
114 deg_t deg_have = 0;
115 deg_t deg_poly = 1;
116
117 p[0] = omega;
118 for(deg_t d=1; d<M4RIE_CRT_LEN; d++)
119 p[d] = 0;
120
121 while (deg_have < deg_need) {
122 p[deg_poly] = irreducible_polynomials[deg_poly][0];
123 if (deg_have + deg_poly * p[deg_poly] < deg_need) {
124 deg_have += deg_poly * p[deg_poly];
125 } else {
126 deg_t deg_diff = deg_need - deg_have;
127 p[deg_poly] = ceil(deg_diff/(double)deg_poly);
128 deg_have += deg_poly * p[deg_poly];
129 }
130 deg_poly ++;
131 }
132
133 deg_t deg_diff = deg_have - deg_need;
134 if (deg_diff && p[deg_diff] > 0) {
135 p[deg_diff]--;
136 deg_have -= deg_diff;
137 }
138
139 int c = costs[p[0]];
140 for(deg_t d=1; d<M4RIE_CRT_LEN; d++)
141 c += costs[d] * p[d];
142
143 if (c < c_best) {
144 for(deg_t d=0; d<M4RIE_CRT_LEN; d++)
145 p_best[d] = p[d];
146 c_best = c;
147 }
148 }
149 m4ri_mm_free(p);
150 return p_best;
151 }
152
_small_multiplication_map(const deg_t degree)153 mzd_t *_small_multiplication_map(const deg_t degree) {
154 mzd_t *A;
155 switch(degree) {
156 case 1:
157 A = mzd_init(1, 1);
158 A->rows[0][0] = 0x1;
159 return A;
160 case 2:
161 A = mzd_init(3, 2);
162 A->rows[0][0] = 0x1; A->rows[1][0] = 0x3; A->rows[2][0] = 0x2;
163 return A;
164 case 3:
165 A = mzd_init(6, 3);
166 A->rows[0][0] = 0x1; A->rows[1][0] = 0x2; A->rows[2][0] = 0x4;
167 A->rows[3][0] = 0x3; A->rows[4][0] = 0x5; A->rows[5][0] = 0x6;
168 return A;
169 case 4:
170 A = mzd_init(9, 4);
171 A->rows[0][0] = 0x1; A->rows[1][0] = 0x2; A->rows[2][0] = 0x4; A->rows[3][0] = 0x8;
172 A->rows[4][0] = 0xf; A->rows[5][0] = 0x3; A->rows[6][0] = 0x5; A->rows[7][0] = 0xa; A->rows[8][0] = 0xc;
173 return A;
174 case 5:
175 A = mzd_init(13, 5);
176 A->rows[ 0][0] = 0x01; A->rows[ 1][0] = 0x02; A->rows[ 2][0] = 0x08; A->rows[ 3][0] = 0x10;
177 A->rows[ 4][0] = 0x11; A->rows[ 5][0] = 0x03; A->rows[ 6][0] = 0x18; A->rows[ 7][0] = 0x16;
178 A->rows[ 8][0] = 0x0d; A->rows[ 9][0] = 0x1b; A->rows[10][0] = 0x17; A->rows[11][0] = 0x1d;
179 A->rows[12][0] = 0x1f;
180 return A;
181 case 6:
182 A = mzd_init(17, 6);
183 A->rows[ 0][0] = 0x01; A->rows[ 1][0] = 0x02; A->rows[ 2][0] = 0x10; A->rows[ 3][0] = 0x20;
184 A->rows[ 4][0] = 0x30; A->rows[ 5][0] = 0x03; A->rows[ 6][0] = 0x18; A->rows[ 7][0] = 0x06;
185 A->rows[ 8][0] = 0x12; A->rows[ 9][0] = 0x0c; A->rows[10][0] = 0x38; A->rows[11][0] = 0x07;
186 A->rows[12][0] = 0x29; A->rows[13][0] = 0x25; A->rows[14][0] = 0x2d; A->rows[15][0] = 0x1b;
187 A->rows[16][0] = 0x3f;
188 return A;
189 case 7:
190 A = mzd_init(22, 7);
191 A->rows[ 0][0] = 0x7f; A->rows[ 1][0] = 0x6e; A->rows[ 2][0] = 0x3b; A->rows[ 3][0] = 0x5d;
192 A->rows[ 4][0] = 0x6d; A->rows[ 5][0] = 0x5b; A->rows[ 6][0] = 0x36; A->rows[ 7][0] = 0x03;
193 A->rows[ 8][0] = 0x05; A->rows[ 9][0] = 0x11; A->rows[10][0] = 0x0a; A->rows[11][0] = 0x44;
194 A->rows[12][0] = 0x28; A->rows[13][0] = 0x50; A->rows[14][0] = 0x60; A->rows[15][0] = 0x01;
195 A->rows[16][0] = 0x02; A->rows[17][0] = 0x04; A->rows[18][0] = 0x08; A->rows[19][0] = 0x10;
196 A->rows[20][0] = 0x20; A->rows[21][0] = 0x40;
197 return A;
198 case 8:
199 A = mzd_init(27, 8);
200 A->rows[ 0][0] = 0x01; A->rows[ 1][0] = 0x02; A->rows[ 2][0] = 0x04; A->rows[ 3][0] = 0x08;
201 A->rows[ 4][0] = 0x10; A->rows[ 5][0] = 0x20; A->rows[ 6][0] = 0x40; A->rows[ 7][0] = 0x80;
202 A->rows[ 8][0] = 0x05; A->rows[ 9][0] = 0x0c; A->rows[10][0] = 0x44; A->rows[11][0] = 0x30;
203 A->rows[12][0] = 0xc0; A->rows[13][0] = 0x0f; A->rows[14][0] = 0xa0; A->rows[15][0] = 0x11;
204 A->rows[16][0] = 0x0a; A->rows[17][0] = 0xaa; A->rows[18][0] = 0x03; A->rows[19][0] = 0x50;
205 A->rows[20][0] = 0x22; A->rows[21][0] = 0xff; A->rows[22][0] = 0x55; A->rows[23][0] = 0x33;
206 A->rows[24][0] = 0xcc; A->rows[25][0] = 0x88; A->rows[26][0] = 0xf0;
207 return A;
208 case 9:
209 A = mzd_init(31, 9);
210 A->rows[ 0][0] = 0x100; A->rows[ 1][0] = 0x001; A->rows[ 2][0] = 0x002; A->rows[ 3][0] = 0x003;
211 A->rows[ 4][0] = 0x155; A->rows[ 5][0] = 0x0aa; A->rows[ 6][0] = 0x1ff; A->rows[ 7][0] = 0x16d;
212 A->rows[ 8][0] = 0x1b6; A->rows[ 9][0] = 0x0db; A->rows[10][0] = 0x0e9; A->rows[11][0] = 0x13a;
213 A->rows[12][0] = 0x074; A->rows[13][0] = 0x1d3; A->rows[14][0] = 0x09d; A->rows[15][0] = 0x14e;
214 A->rows[16][0] = 0x0b9; A->rows[17][0] = 0x172; A->rows[18][0] = 0x05c; A->rows[19][0] = 0x1cb;
215 A->rows[20][0] = 0x0e5; A->rows[21][0] = 0x12e; A->rows[22][0] = 0x191; A->rows[23][0] = 0x0b2;
216 A->rows[24][0] = 0x164; A->rows[25][0] = 0x0c8; A->rows[26][0] = 0x08f; A->rows[27][0] = 0x123;
217 A->rows[28][0] = 0x0f5; A->rows[29][0] = 0x07a; A->rows[30][0] = 0x1ac;
218 return A;
219 case 10:
220 A = mzd_init(36, 10);
221 A->rows[ 0][0] = 0x200; A->rows[ 1][0] = 0x001; A->rows[ 2][0] = 0x3ff; A->rows[ 3][0] = 0x36d;
222 A->rows[ 4][0] = 0x1b6; A->rows[ 5][0] = 0x2db; A->rows[ 6][0] = 0x0e9; A->rows[ 7][0] = 0x13a;
223 A->rows[ 8][0] = 0x274; A->rows[ 9][0] = 0x1d3; A->rows[10][0] = 0x29d; A->rows[11][0] = 0x34e;
224 A->rows[12][0] = 0x0b9; A->rows[13][0] = 0x172; A->rows[14][0] = 0x25c; A->rows[15][0] = 0x1cb;
225 A->rows[16][0] = 0x2e5; A->rows[17][0] = 0x32e; A->rows[18][0] = 0x191; A->rows[19][0] = 0x2b2;
226 A->rows[20][0] = 0x164; A->rows[21][0] = 0x2c8; A->rows[22][0] = 0x08f; A->rows[23][0] = 0x323;
227 A->rows[24][0] = 0x0f5; A->rows[25][0] = 0x07a; A->rows[26][0] = 0x3ac; A->rows[27][0] = 0x2f1;
228 A->rows[28][0] = 0x1e2; A->rows[29][0] = 0x3c4; A->rows[30][0] = 0x178; A->rows[31][0] = 0x1af;
229 A->rows[32][0] = 0x313; A->rows[33][0] = 0x135; A->rows[34][0] = 0x09a; A->rows[35][0] = 0x2bc;
230 return A;
231 case 11:
232 A = mzd_init(40, 11);
233 A->rows[ 0][0] = 0x001; A->rows[ 1][0] = 0x36d; A->rows[ 2][0] = 0x5b6; A->rows[ 3][0] = 0x6db;
234 A->rows[ 4][0] = 0x555; A->rows[ 5][0] = 0x2aa; A->rows[ 6][0] = 0x7ff; A->rows[ 7][0] = 0x400;
235 A->rows[ 8][0] = 0x200; A->rows[ 9][0] = 0x600; A->rows[10][0] = 0x4e9; A->rows[11][0] = 0x53a;
236 A->rows[12][0] = 0x274; A->rows[13][0] = 0x1d3; A->rows[14][0] = 0x69d; A->rows[15][0] = 0x74e;
237 A->rows[16][0] = 0x4b9; A->rows[17][0] = 0x172; A->rows[18][0] = 0x65c; A->rows[19][0] = 0x5cb;
238 A->rows[20][0] = 0x2e5; A->rows[21][0] = 0x72e; A->rows[22][0] = 0x591; A->rows[23][0] = 0x6b2;
239 A->rows[24][0] = 0x564; A->rows[25][0] = 0x2c8; A->rows[26][0] = 0x48f; A->rows[27][0] = 0x323;
240 A->rows[28][0] = 0x0f5; A->rows[29][0] = 0x47a; A->rows[30][0] = 0x7ac; A->rows[31][0] = 0x2f1;
241 A->rows[32][0] = 0x5e2; A->rows[33][0] = 0x3c4; A->rows[34][0] = 0x578; A->rows[35][0] = 0x1af;
242 A->rows[36][0] = 0x713; A->rows[37][0] = 0x135; A->rows[38][0] = 0x09a; A->rows[39][0] = 0x6bc;
243 return A;
244 case 12:
245 A = mzd_init(45, 12);
246 A->rows[ 0][0] = 0xb6d; A->rows[ 1][0] = 0xdb6; A->rows[ 2][0] = 0x6db; A->rows[ 3][0] = 0x001;
247 A->rows[ 4][0] = 0x002; A->rows[ 5][0] = 0x003; A->rows[ 6][0] = 0x555; A->rows[ 7][0] = 0xaaa;
248 A->rows[ 8][0] = 0xfff; A->rows[ 9][0] = 0x800; A->rows[10][0] = 0x400; A->rows[11][0] = 0xc00;
249 A->rows[12][0] = 0x4e9; A->rows[13][0] = 0xd3a; A->rows[14][0] = 0xa74; A->rows[15][0] = 0x9d3;
250 A->rows[16][0] = 0xe9d; A->rows[17][0] = 0x74e; A->rows[18][0] = 0x591; A->rows[19][0] = 0xeb2;
251 A->rows[20][0] = 0xd64; A->rows[21][0] = 0xac8; A->rows[22][0] = 0xc8f; A->rows[23][0] = 0xb23;
252 A->rows[24][0] = 0x8f5; A->rows[25][0] = 0x47a; A->rows[26][0] = 0x7ac; A->rows[27][0] = 0xaf1;
253 A->rows[28][0] = 0x5e2; A->rows[29][0] = 0xbc4; A->rows[30][0] = 0xd78; A->rows[31][0] = 0x9af;
254 A->rows[32][0] = 0xf13; A->rows[33][0] = 0x135; A->rows[34][0] = 0x89a; A->rows[35][0] = 0x6bc;
255 A->rows[36][0] = 0x631; A->rows[37][0] = 0xa52; A->rows[38][0] = 0x294; A->rows[39][0] = 0x318;
256 A->rows[40][0] = 0xdef; A->rows[41][0] = 0xc63; A->rows[42][0] = 0x4a5; A->rows[43][0] = 0x94a;
257 A->rows[44][0] = 0x18c;
258 return A;
259 case 13:
260 A = mzd_init(49, 13);
261 A->rows[ 0][0] = 0x0001; A->rows[ 1][0] = 0x1b6d; A->rows[ 2][0] = 0x0db6; A->rows[ 3][0] = 0x16db;
262 A->rows[ 4][0] = 0x1555; A->rows[ 5][0] = 0x0aaa; A->rows[ 6][0] = 0x1fff; A->rows[ 7][0] = 0x1000;
263 A->rows[ 8][0] = 0x0800; A->rows[ 9][0] = 0x1800; A->rows[10][0] = 0x14e9; A->rows[11][0] = 0x1d3a;
264 A->rows[12][0] = 0x1a74; A->rows[13][0] = 0x09d3; A->rows[14][0] = 0x0e9d; A->rows[15][0] = 0x074e;
265 A->rows[16][0] = 0x1cb9; A->rows[17][0] = 0x1972; A->rows[18][0] = 0x0e5c; A->rows[19][0] = 0x05cb;
266 A->rows[20][0] = 0x12e5; A->rows[21][0] = 0x172e; A->rows[22][0] = 0x1591; A->rows[23][0] = 0x1eb2;
267 A->rows[24][0] = 0x1d64; A->rows[25][0] = 0x1ac8; A->rows[26][0] = 0x0c8f; A->rows[27][0] = 0x0b23;
268 A->rows[28][0] = 0x08f5; A->rows[29][0] = 0x047a; A->rows[30][0] = 0x07ac; A->rows[31][0] = 0x1af1;
269 A->rows[32][0] = 0x15e2; A->rows[33][0] = 0x0bc4; A->rows[34][0] = 0x0d78; A->rows[35][0] = 0x09af;
270 A->rows[36][0] = 0x0f13; A->rows[37][0] = 0x1135; A->rows[38][0] = 0x189a; A->rows[39][0] = 0x06bc;
271 A->rows[40][0] = 0x0631; A->rows[41][0] = 0x0a52; A->rows[42][0] = 0x1294; A->rows[43][0] = 0x0318;
272 A->rows[44][0] = 0x1def; A->rows[45][0] = 0x0c63; A->rows[46][0] = 0x14a5; A->rows[47][0] = 0x094a;
273 A->rows[48][0] = 0x118c;
274 return A;
275 case 14:
276 A = mzd_init(55, 14);
277 A->rows[ 0][0] = 0x0001; A->rows[ 1][0] = 0x1b6d; A->rows[ 2][0] = 0x2db6; A->rows[ 3][0] = 0x36db;
278 A->rows[ 4][0] = 0x1555; A->rows[ 5][0] = 0x2aaa; A->rows[ 6][0] = 0x3fff; A->rows[ 7][0] = 0x34e9;
279 A->rows[ 8][0] = 0x1d3a; A->rows[ 9][0] = 0x3a74; A->rows[10][0] = 0x29d3; A->rows[11][0] = 0x0e9d;
280 A->rows[12][0] = 0x274e; A->rows[13][0] = 0x1cb9; A->rows[14][0] = 0x3972; A->rows[15][0] = 0x2e5c;
281 A->rows[16][0] = 0x25cb; A->rows[17][0] = 0x32e5; A->rows[18][0] = 0x172e; A->rows[19][0] = 0x3591;
282 A->rows[20][0] = 0x1eb2; A->rows[21][0] = 0x3d64; A->rows[22][0] = 0x3ac8; A->rows[23][0] = 0x2c8f;
283 A->rows[24][0] = 0x2b23; A->rows[25][0] = 0x08f5; A->rows[26][0] = 0x247a; A->rows[27][0] = 0x07ac;
284 A->rows[28][0] = 0x1af1; A->rows[29][0] = 0x35e2; A->rows[30][0] = 0x2bc4; A->rows[31][0] = 0x0d78;
285 A->rows[32][0] = 0x09af; A->rows[33][0] = 0x2f13; A->rows[34][0] = 0x3135; A->rows[35][0] = 0x389a;
286 A->rows[36][0] = 0x26bc; A->rows[37][0] = 0x0631; A->rows[38][0] = 0x0a52; A->rows[39][0] = 0x1294;
287 A->rows[40][0] = 0x2318; A->rows[41][0] = 0x3def; A->rows[42][0] = 0x0c63; A->rows[43][0] = 0x14a5;
288 A->rows[44][0] = 0x294a; A->rows[45][0] = 0x318c; A->rows[46][0] = 0x2000; A->rows[47][0] = 0x1000;
289 A->rows[48][0] = 0x0800; A->rows[49][0] = 0x0400; A->rows[50][0] = 0x3c00; A->rows[51][0] = 0x3000;
290 A->rows[52][0] = 0x2800; A->rows[53][0] = 0x1400; A->rows[54][0] = 0x0c00;
291 return A;
292 case 15:
293 A = mzd_init(60, 15);
294 A->rows[ 0][0] = 0x0001; A->rows[ 1][0] = 0x7fff; A->rows[ 2][0] = 0x5b6d; A->rows[ 3][0] = 0x6db6;
295 A->rows[ 4][0] = 0x36db; A->rows[ 5][0] = 0x4000; A->rows[ 6][0] = 0x2000; A->rows[ 7][0] = 0x6000;
296 A->rows[ 8][0] = 0x74e9; A->rows[ 9][0] = 0x1d3a; A->rows[10][0] = 0x3a74; A->rows[11][0] = 0x69d3;
297 A->rows[12][0] = 0x4e9d; A->rows[13][0] = 0x274e; A->rows[14][0] = 0x5cb9; A->rows[15][0] = 0x3972;
298 A->rows[16][0] = 0x2e5c; A->rows[17][0] = 0x65cb; A->rows[18][0] = 0x72e5; A->rows[19][0] = 0x172e;
299 A->rows[20][0] = 0x7591; A->rows[21][0] = 0x1eb2; A->rows[22][0] = 0x3d64; A->rows[23][0] = 0x7ac8;
300 A->rows[24][0] = 0x2c8f; A->rows[25][0] = 0x6b23; A->rows[26][0] = 0x48f5; A->rows[27][0] = 0x647a;
301 A->rows[28][0] = 0x47ac; A->rows[29][0] = 0x1af1; A->rows[30][0] = 0x35e2; A->rows[31][0] = 0x6bc4;
302 A->rows[32][0] = 0x4d78; A->rows[33][0] = 0x09af; A->rows[34][0] = 0x2f13; A->rows[35][0] = 0x7135;
303 A->rows[36][0] = 0x789a; A->rows[37][0] = 0x26bc; A->rows[38][0] = 0x4631; A->rows[39][0] = 0x4a52;
304 A->rows[40][0] = 0x5294; A->rows[41][0] = 0x6318; A->rows[42][0] = 0x3def; A->rows[43][0] = 0x0c63;
305 A->rows[44][0] = 0x14a5; A->rows[45][0] = 0x294a; A->rows[46][0] = 0x318c; A->rows[47][0] = 0x4d21;
306 A->rows[48][0] = 0x1a42; A->rows[49][0] = 0x7348; A->rows[50][0] = 0x6690; A->rows[51][0] = 0x2bb1;
307 A->rows[52][0] = 0x5763; A->rows[53][0] = 0x15d8; A->rows[54][0] = 0x0576; A->rows[55][0] = 0x47cd;
308 A->rows[56][0] = 0x42bb; A->rows[57][0] = 0x4857; A->rows[58][0] = 0x215d; A->rows[59][0] = 0x3b1f;
309 return A;
310 case 16:
311 A = mzd_init(64, 16);
312 A->rows[ 0][0] = 0xdb6d; A->rows[ 1][0] = 0x6db6; A->rows[ 2][0] = 0xb6db; A->rows[ 3][0] = 0x0001;
313 A->rows[ 4][0] = 0x0002; A->rows[ 5][0] = 0x0003; A->rows[ 6][0] = 0x5555; A->rows[ 7][0] = 0xaaaa;
314 A->rows[ 8][0] = 0xffff; A->rows[ 9][0] = 0x8000; A->rows[10][0] = 0x4000; A->rows[11][0] = 0xc000;
315 A->rows[12][0] = 0x74e9; A->rows[13][0] = 0x9d3a; A->rows[14][0] = 0x3a74; A->rows[15][0] = 0xe9d3;
316 A->rows[16][0] = 0x4e9d; A->rows[17][0] = 0xa74e; A->rows[18][0] = 0x5cb9; A->rows[19][0] = 0xb972;
317 A->rows[20][0] = 0x2e5c; A->rows[21][0] = 0xe5cb; A->rows[22][0] = 0x72e5; A->rows[23][0] = 0x972e;
318 A->rows[24][0] = 0xf591; A->rows[25][0] = 0x1eb2; A->rows[26][0] = 0x3d64; A->rows[27][0] = 0x7ac8;
319 A->rows[28][0] = 0xac8f; A->rows[29][0] = 0xeb23; A->rows[30][0] = 0xc8f5; A->rows[31][0] = 0x647a;
320 A->rows[32][0] = 0x47ac; A->rows[33][0] = 0x9af1; A->rows[34][0] = 0x35e2; A->rows[35][0] = 0x6bc4;
321 A->rows[36][0] = 0x4d78; A->rows[37][0] = 0x89af; A->rows[38][0] = 0xaf13; A->rows[39][0] = 0xf135;
322 A->rows[40][0] = 0x789a; A->rows[41][0] = 0x26bc; A->rows[42][0] = 0xc631; A->rows[43][0] = 0x4a52;
323 A->rows[44][0] = 0x5294; A->rows[45][0] = 0x6318; A->rows[46][0] = 0xbdef; A->rows[47][0] = 0x8c63;
324 A->rows[48][0] = 0x94a5; A->rows[49][0] = 0x294a; A->rows[50][0] = 0x318c; A->rows[51][0] = 0xcd21;
325 A->rows[52][0] = 0x9a42; A->rows[53][0] = 0xf348; A->rows[54][0] = 0xe690; A->rows[55][0] = 0x2bb1;
326 A->rows[56][0] = 0x5763; A->rows[57][0] = 0x15d8; A->rows[58][0] = 0x8576; A->rows[59][0] = 0xc7cd;
327 A->rows[60][0] = 0x42bb; A->rows[61][0] = 0x4857; A->rows[62][0] = 0x215d; A->rows[63][0] = 0xbb1f;
328 return A;
329
330 default:
331 m4ri_die("only degrees up to 16 are implemented but got degree %d\n", degree);
332 }
333 return NULL;
334 }
335
336 /**
337 * \param length The length of the polynomial we want to reduce
338 * \param poly A polynomial
339 * \param d The degree of poly
340 */
341
_crt_modred_mat(const deg_t length,const word poly,const deg_t d)342 mzd_t *_crt_modred_mat(const deg_t length, const word poly, const deg_t d) {
343 mzd_t *A = mzd_init(d, length);
344
345 /* (x-infinity)^d */
346 if (poly == 0) {
347 for(deg_t i=0; i<d; i++)
348 mzd_write_bit(A, i, length-i-1, 1);
349 return A;
350 }
351
352 mzd_t *f = mzd_init(1, length);
353 mzd_t *t = mzd_init(1, length);
354
355 for(deg_t i=0; i<length; i++) {
356 mzd_set_ui(f, 0);
357 f->rows[0][i/m4ri_radix] = __M4RI_TWOPOW(i%m4ri_radix);
358 word ii = i;
359 while(ii >= d) {
360 /* f ^= gf2x_mul((1ULL<<(ii-d)), poly, length); */
361 mzd_set_ui(t, 0);
362 mzd_xor_bits(t, 0, ii-d, d+1, poly);
363
364 mzd_add(f, f, t);
365
366 /* ii = gf2x_deg(f); */
367 ii = 0;
368 for(wi_t j=f->width-1; j>=0; j--) {
369 if (f->rows[0][j]) {
370 ii = gf2x_deg(f->rows[0][j]) + m4ri_radix*j;
371 break;
372 }
373 }
374 }
375 for(deg_t j=0; j<= ii; j++)
376 mzd_write_bit(A, j, i, (f->rows[0][j/m4ri_radix]>>(j%m4ri_radix)) & 0x1);
377 }
378 return A;
379 }
380
_blm_finish_polymult(const gf2e * ff,blm_t * f)381 blm_t *_blm_finish_polymult(const gf2e *ff, blm_t *f) {
382 assert( (f != NULL) & (f->F != NULL) & (f->G != NULL) );
383
384 const rci_t m = f->F->nrows;
385 const rci_t c_nrows = f->F->ncols + f->G->ncols - 1;
386
387 mzd_t *H = mzd_init(c_nrows, m);
388
389 mzd_t *F_T = mzd_transpose(NULL, f->F);
390 mzd_t *G_T = mzd_transpose(NULL, f->G);
391
392 mzd_t *C = mzd_init(m, m);
393
394 /* we find a full rank matrix */
395
396 word v = 0;
397 word w = 0;
398 rci_t r = 0;
399 rci_t rank = 0;
400
401 mzd_t *pivots = mzd_init(m, m4ri_radix*2);
402
403 mzp_t *P = mzp_init(m);
404 mzp_t *Q = mzp_init(m);
405
406 while(rank < m) {
407 /* x^v = (0, 0, ... , 1, ..., 0, 0) * F_T -> select row v */
408 for(wi_t j=0; j< C->width; j++)
409 C->rows[r][j] = F_T->rows[v][j] & G_T->rows[w][j];
410
411 pivots->rows[r][0] = v;
412 pivots->rows[r][1] = w;
413
414 w++;
415 if (w == (word)f->G->ncols) {
416 v++;
417 if (v == (word)f->F->ncols)
418 v = 0;
419 w = v;
420 }
421 r++;
422 if (r == C->nrows) {
423 mzd_t *D = mzd_copy(NULL, C);
424 rank = mzd_ple(D, P, Q, 0);
425 mzd_apply_p_left(pivots, P);
426 mzd_apply_p_left(C, P);
427 mzd_free(D);
428
429 if (rank < m)
430 r = rank;
431 }
432 }
433 mzp_free(P);
434 mzp_free(Q);
435
436 for(r=0; r<m; r++) {
437 /* x^v = (0, 0, ... , 1, ..., 0, 0) * F_T -> select row v */
438 v = pivots->rows[r][0];
439 w = pivots->rows[r][1];
440 for(wi_t j=0; j< C->width; j++)
441 C->rows[r][j] = F_T->rows[v][j] & G_T->rows[w][j];
442 }
443 mzd_free(F_T);
444 mzd_free(G_T);
445
446 // This should be replaced by TRSM calls
447 mzd_t *D = mzd_inv_m4ri(NULL, C, 0);
448 mzd_free(C);
449 mzd_t *DT = mzd_transpose(NULL, D);
450 mzd_free(D);
451
452 mzd_t *a = mzd_init(1, m);
453 mzd_t *b = mzd_init(1, H->ncols);
454
455 for(rci_t i=0; i<H->nrows; i++) {
456 mzd_set_ui(a, 0);
457 for(rci_t j=0; j<m; j++) {
458 v = pivots->rows[j][0];
459 w = pivots->rows[j][1];
460 if ((v+w) == (word)i)
461 mzd_write_bit(a, 0, j, 1);
462 }
463 mzd_mul(b, a, DT, 0);
464
465 /* copy result to H */
466 for(rci_t j=0; j<H->ncols; j++)
467 mzd_write_bit(H, i, j, mzd_read_bit(b, 0, j ));
468 }
469 mzd_free(a);
470 mzd_free(b);
471 mzd_free(pivots);
472
473 if (ff == NULL) {
474 f->H = H;
475 } else {
476 mzd_t *N = _crt_modred_mat(H->nrows, ff->minpoly, ff->degree);
477 f->H = mzd_mul(NULL, N, H, 0);
478 mzd_free(N);
479 mzd_free(H);
480 }
481 return f;
482 }
483
484 const int costs[17] = {0, 1, 3, 6, 9, 13, 17, 22, 27, 31, 36, 40, 45, 49, 55, 60, 64};
485 //const int costs[17] = {0, 1, 3, 6, 9, 13, 15, 22, 24, 30, 33, 39, 42, 48, 51, 54, 60}; /* best possible */
486
blm_init_crt(const gf2e * ff,const deg_t f_ncols,const deg_t g_ncols,const int * p,int djb)487 blm_t *blm_init_crt(const gf2e *ff, const deg_t f_ncols, const deg_t g_ncols, const int *p, int djb) {
488 blm_t *f = m4ri_mm_malloc(sizeof(blm_t));
489
490 // iterator over irreducible polynomials
491 int *p_it = (int*)m4ri_mm_calloc(sizeof(int), M4RIE_CRT_LEN);
492
493 mzd_t *M, *T;
494
495 word poly = 0;
496
497 rci_t m = costs[p[0]];
498 for(int d=1; d<M4RIE_CRT_LEN; d++)
499 m += costs[d] * p[d];
500
501 f->F = mzd_init(m, f_ncols);
502 f->f = NULL;
503 f->G = mzd_init(m, g_ncols);
504 f->g = NULL;
505
506 rci_t r = 0;
507
508
509 /**
510 * 1) We construct maps F,G which combine modular reduction to degree d and the linear map
511 * required for multiplying modulo a polynomial of degree d.
512 */
513
514 /**
515 * 1.1) We deal with (x+infinity)^omega first
516 */
517
518 if(p[0] != 0) {
519 deg_t d = p[0];
520 mzd_t *N = _small_multiplication_map(d);
521 M = _crt_modred_mat(f_ncols, poly, d);
522 T = mzd_init_window(f->F, r, 0, r + costs[d], f_ncols);
523 mzd_mul(T, N, M, 0);
524 mzd_free(T);
525 mzd_free(M);
526
527 M = _crt_modred_mat(g_ncols, poly, d);
528 T = mzd_init_window(f->G, r, 0, r + costs[d], g_ncols);
529 mzd_mul(T, N, M, 0);
530 mzd_free(T);
531 mzd_free(M);
532
533 mzd_free(N);
534
535 r += costs[d];
536 }
537
538 /**
539 * 1.2) We deal with regular polynomial which are co-prime
540 */
541
542 for(deg_t d=1; d<M4RIE_CRT_LEN; d++) {
543 if (p[d] == 0)
544 continue;
545
546 mzd_t *N = _small_multiplication_map(d);
547
548 for(int i=0; i<p[d]; i++) {
549 if (p_it[d] < irreducible_polynomials[d][0]) {
550 poly = irreducible_polynomials[d][ 1 + p_it[d] ];
551 p_it[d]++;
552 } else if (d/2 && p_it[d/2] < irreducible_polynomials[d/2][0]) {
553 /** the minimal polynomial is a square */
554 poly = irreducible_polynomials[d/2][ 1 + p_it[d/2]];
555 p_it[d/2]++;
556 poly = gf2x_mul(poly, poly, d/2+1);
557 } else if (d/4 && p_it[d/4] < irreducible_polynomials[d/4][0]) {
558 /** the minimal polynomial is a fourth power */
559 poly = irreducible_polynomials[d/4][1 + p_it[d/4]];
560 p_it[d/4]++;
561 poly = gf2x_mul(poly, poly, d/4+1);
562 poly = gf2x_mul(poly, poly, d/2+1);
563 } else if (d/8 && p_it[d/8] < irreducible_polynomials[d/8][0]) {
564 /** the minimal polynomial is an eigth power */
565 poly = irreducible_polynomials[d/8][p_it[d/8]+ 1];
566 p_it[d/8]++;
567 poly = gf2x_mul(poly, poly, d/8+1);
568 poly = gf2x_mul(poly, poly, d/4+1);
569 poly = gf2x_mul(poly, poly, d/2+1);
570 } else {
571 m4ri_die("Degree %d is not implemented\n", d);
572 }
573 M = _crt_modred_mat(f_ncols, poly, d);
574 T = mzd_init_window(f->F, r, 0, r + costs[d], f_ncols);
575 mzd_mul(T, N, M, 0);
576 mzd_free(T);
577 mzd_free(M);
578
579 M = _crt_modred_mat(g_ncols, poly, d);
580 T = mzd_init_window(f->G, r, 0, r + costs[d], g_ncols);
581 mzd_mul(T, N, M, 0);
582 mzd_free(T);
583 mzd_free(M);
584
585 r += costs[d];
586 }
587
588 mzd_free(N);
589 }
590
591 m4ri_mm_free(p_it);
592
593 /**
594 * 2) We solve for H as we know poly(c) and (F*vec(a) x G*vec(b)). We pick points poly(a) = x^v,
595 * poly(b) = x^w (hence: poly(c) = x^(v+w)).
596 */
597 _blm_finish_polymult(ff, f);
598 f->h = NULL;
599
600 /**
601 * 3) We compile DJB maps if asked for.
602 */
603
604 if (djb)
605 _blm_djb_compile(f);
606
607 return f;
608 }
609
610
blm_free(blm_t * f)611 void blm_free(blm_t *f) {
612 mzd_free(f->F);
613 mzd_free(f->G);
614 mzd_free(f->H);
615 if (f->f != f->g)
616 djb_free(f->g);
617 djb_free(f->f);
618 djb_free(f->h);
619
620 m4ri_mm_free(f);
621 }
622