1 #include "cado.h" // IWYU pragma: keep
2 // IWYU pragma: no_include <ext/alloc_traits.h>
3 #include <fstream> // IWYU pragma: keep
4 #ifdef SELECT_MPFQ_LAYER_u64k1
5 #include <limits.h>                   // for UINT_MAX
6 #include <stdlib.h>                   // for abort
7 #else
8 #include <gmp.h>               // for mp_limb_t, __gmpn_cmp, __gmpn_copyi
9 #endif
10 #include <unistd.h>     // pread // IWYU pragma: keep
11 #include "lingen_io_matpoly.hpp"
12 #include "lingen_matpoly_select.hpp"
13 #include "macros.h"            // for ASSERT_ALWAYS
14 #include "params.h"            // for cxx_param_list, param_list_decl_usage
15 #include "omp_proxy.h" // IWYU pragma: keep
16 
17 
18 // constexpr const unsigned int simd = matpoly::over_gf2 ? ULONG_BITS : 1;
19 constexpr const unsigned int splitwidth = matpoly::over_gf2 ? 64 : 1;
20 
21 /* {{{ I/O helpers */
22 
23 /* This is an indication of the number of bytes we read at a time for A
24  * (input) and F (output) */
25 unsigned int io_matpoly_block_size = 1 << 20;
26 
lingen_io_matpoly_decl_usage(cxx_param_list & pl)27 void lingen_io_matpoly_decl_usage(cxx_param_list & pl)
28 {
29     param_list_decl_usage(pl, "io-block-size",
30             "chunk size for reading the input or writing the output");
31 }
32 
lingen_io_matpoly_lookup_parameters(cxx_param_list & pl)33 void lingen_io_matpoly_lookup_parameters(cxx_param_list & pl)
34 {
35     param_list_lookup_string(pl, "io-block-size");
36 }
37 
lingen_io_matpoly_interpret_parameters(cxx_param_list & pl)38 void lingen_io_matpoly_interpret_parameters(cxx_param_list & pl)
39 {
40     param_list_parse_uint(pl, "io-block-size", &(io_matpoly_block_size));
41 }
42 
43 /* {{{ matpoly_write
44  * writes some of the matpoly data to f, either in ascii or binary
45  * format. This can be used to write only part of the data (degrees
46  * [k0..k1[). Returns the number of coefficients (i.e., matrices, so at
47  * most k1-k0) successfully written, or
48  * -1 on error (e.g. when some matrix was only partially written).
49  */
50 
51 #ifndef SELECT_MPFQ_LAYER_u64k1
matpoly_write(abdst_field ab,std::ostream & os,matpoly const & M,unsigned int k0,unsigned int k1,int ascii,int transpose)52 int matpoly_write(abdst_field ab, std::ostream& os, matpoly const & M, unsigned int k0, unsigned int k1, int ascii, int transpose)
53 {
54     unsigned int m = transpose ? M.n : M.m;
55     unsigned int n = transpose ? M.m : M.n;
56     ASSERT_ALWAYS(k0 == k1 || (k0 < M.get_size() && k1 <= M.get_size()));
57     for(unsigned int k = k0 ; k < k1 ; k++) {
58         int err = 0;
59         int matnb = 0;
60         for(unsigned int i = 0 ; !err && i < m ; i++) {
61             for(unsigned int j = 0 ; !err && j < n ; j++) {
62                 absrc_elt x;
63                 x = transpose ? M.coeff(j, i, k) : M.coeff(i, j, k);
64                 if (ascii) {
65                     if (j) err = !(os << " ");
66                     if (!err) err = !(abcxx_out(ab, os, x));
67                 } else {
68                     err = !(os.write((const char *) x, (size_t) abvec_elt_stride(ab, 1)));
69                 }
70                 if (!err) matnb++;
71             }
72             if (!err && ascii) err = !(os << "\n");
73         }
74         if (ascii) err = err || !(os << "\n");
75         if (err) {
76             return (matnb == 0) ? (int) (k - k0) : -1;
77         }
78     }
79     return k1 - k0;
80 }
81 #else
matpoly_write(abdst_field,std::ostream & os,matpoly const & M,unsigned int k0,unsigned int k1,int ascii,int transpose)82 int matpoly_write(abdst_field, std::ostream& os, matpoly const & M, unsigned int k0, unsigned int k1, int ascii, int transpose)
83 {
84     unsigned int m = M.m;
85     unsigned int n = M.n;
86     ASSERT_ALWAYS(k0 == k1 || (k0 < M.get_size() && k1 <= M.get_size()));
87     unsigned int mc = iceildiv(M.m, ULONG_BITS);
88     unsigned int nc = iceildiv(M.n, ULONG_BITS);
89     size_t ulongs_per_mat = transpose ? (M.n * mc) : (M.m * nc);
90     std::vector<unsigned long> buf(ulongs_per_mat);
91     for(unsigned int k = k0 ; k < k1 ; k++) {
92         buf.assign(ulongs_per_mat, 0);
93         bool err = false;
94         size_t kq = k / ULONG_BITS;
95         size_t kr = k % ULONG_BITS;
96         unsigned long km = 1UL << kr;
97         if (!transpose) {
98             for(unsigned int i = 0 ; i < m ; i++) {
99                 unsigned long * v = &(buf[i * nc]);
100                 for(unsigned int j = 0 ; j < n ; j++) {
101                     unsigned int jq = j / ULONG_BITS;
102                     unsigned int jr = j % ULONG_BITS;
103                     unsigned long bit = (M.part(i, j)[kq] & km) != 0;
104                     v[jq] |= bit << jr;
105                 }
106             }
107         } else {
108             for(unsigned int j = 0 ; j < n ; j++) {
109                 unsigned long * v = &(buf[j * mc]);
110                 for(unsigned int i = 0 ; i < m ; i++) {
111                     unsigned int iq = i / ULONG_BITS;
112                     unsigned int ir = i % ULONG_BITS;
113                     unsigned long bit = (M.part(i, j)[kq] & km) != 0;
114                     v[iq] |= bit << ir;
115                 }
116             }
117         }
118         if (ascii) {
119             /* do we have an endian-robust wordsize-robust convention for
120              * printing bitstrings in hex ?
121              *
122              * it's not even clear that we should care -- after all, as
123              * long as mksol follows a consistent convention too, we
124              * should be fine.
125              */
126             abort();
127         } else {
128             err = !(os.write((const char*) buf.data(), ulongs_per_mat * sizeof(unsigned long)));
129         }
130         if (err) return -1;
131     }
132     return k1 - k0;
133 }
134 #endif
135 /* }}} */
136 
137 #define VOID_POINTER_ADD(x, k) (((char*)(x))+(k))
138 
139 /* fw must be an array of ofstreams of exactly the same size as the
140  * matrix to be written.
141  */
matpoly_write_split(abdst_field ab MAYBE_UNUSED,std::vector<std::ofstream> & fw,matpoly const & M,unsigned int k0,unsigned int k1,int ascii)142 int matpoly_write_split(abdst_field ab MAYBE_UNUSED, std::vector<std::ofstream> & fw, matpoly const & M, unsigned int k0, unsigned int k1, int ascii)
143 {
144     ASSERT_ALWAYS(k0 == k1 || (k0 < M.get_size() && k1 <= M.get_size()));
145 #ifdef SELECT_MPFQ_LAYER_u64k1
146     size_t ulongs_per_mat = splitwidth * splitwidth / ULONG_BITS;
147     std::vector<unsigned long> buf(ulongs_per_mat);
148 #endif
149     for(unsigned int k = k0 ; k < k1 ; k++) {
150         int err = 0;
151         int matnb = 0;
152         for(unsigned int i = 0 ; !err && i < M.m ; i += splitwidth) {
153             for(unsigned int j = 0 ; !err && j < M.n ; j += splitwidth) {
154                 std::ostream& os = fw[i/splitwidth*M.n/splitwidth+j/splitwidth];
155 #ifndef SELECT_MPFQ_LAYER_u64k1
156                 absrc_elt x = M.coeff(i, j, k);
157                 if (ascii) {
158                     err = !(abcxx_out(ab, os, x));
159                     if (!err) err = !(os << "\n");
160                 } else {
161                     err = !(os.write((const char *) x, (size_t) abvec_elt_stride(ab, 1)));
162                 }
163 #else
164                 if (ascii)
165                     abort();
166                 buf.assign(ulongs_per_mat, 0);
167                 size_t kq = k / ULONG_BITS;
168                 size_t kr = k % ULONG_BITS;
169                 for(unsigned int di = 0 ; di < splitwidth ; di++) {
170                     unsigned int ii = i + di;
171                     unsigned int ulongs_per_row = splitwidth / ULONG_BITS;
172                     for(unsigned int dj0 = 0 ; dj0 < ulongs_per_row ; dj0++) {
173                         for(unsigned int dj = 0 ; dj < ULONG_BITS ; dj++) {
174                             unsigned int jj = j + dj0 * ULONG_BITS + dj;
175                             const unsigned long * mij = M.part(ii, jj);
176                             unsigned long bit = (mij[kq] >> kr) & 1;
177                             buf[di * ulongs_per_row + dj0] ^= bit << dj;
178                         }
179                     }
180                 }
181                 err = !(os.write((const char *) buf.data(), ulongs_per_mat * sizeof(unsigned long)));
182 #endif
183                 if (!err) matnb++;
184             }
185         }
186         if (err) {
187             return (matnb == 0) ? (int) (k - k0) : -1;
188         }
189     }
190     return k1 - k0;
191 }
192 /* }}} */
193 
194 /* {{{ matpoly_read
195  * reads some of the matpoly data from f, either in ascii or binary
196  * format. This can be used to parse only part of the data (degrees
197  * [k0..k1[, k1 being an upper bound). Returns the number of coefficients
198  * (i.e., matrices, so at most k1-k0) successfully read, or
199  * -1 on error (e.g. when some matrix was only partially read).
200  *
201  * Note that the matrix must *not* be in pre-init state. It must have
202  * been already allocated.
203  */
204 
205 #ifndef SELECT_MPFQ_LAYER_u64k1
matpoly_read(abdst_field ab,FILE * f,matpoly & M,unsigned int k0,unsigned int k1,int ascii,int transpose)206 int matpoly_read(abdst_field ab, FILE * f, matpoly & M, unsigned int k0, unsigned int k1, int ascii, int transpose)
207 {
208     ASSERT_ALWAYS(!M.check_pre_init());
209     unsigned int m = transpose ? M.n : M.m;
210     unsigned int n = transpose ? M.m : M.n;
211     ASSERT_ALWAYS(k0 == k1 || (k0 < M.get_size() && k1 <= M.get_size()));
212     mpz_srcptr pz = abfield_characteristic_srcptr(ab);
213 #if GMP_VERSION_ATLEAST(6, 0, 0)
214     mp_size_t pzsize = mpz_size(pz);
215     const mp_limb_t * pzlimbs = mpz_limbs_read(pz);
216 #else
217     mp_size_t pzsize = SIZ(pz);
218     const mp_limb_t * pzlimbs = PTR(pz);
219 #endif
220     std::vector<mp_limb_t> vbuf(pzsize + 1);
221     mp_limb_t * buf = vbuf.data();
222     for(unsigned int k = k0 ; k < k1 ; k++) {
223         int err = 0;
224         int matnb = 0;
225         for(unsigned int i = 0 ; !err && i < m ; i++) {
226             for(unsigned int j = 0 ; !err && j < n ; j++) {
227                 abdst_elt x;
228                 x = transpose ? M.coeff(j, i, k)
229                               : M.coeff(i, j, k);
230                 if (ascii) {
231                     err = abfscan(ab, f, x) == 0;
232                 } else {
233                     err = fread(x, abvec_elt_stride(ab, 1), 1, f) < 1;
234                     if (mpn_cmp(x, pzlimbs, pzsize >= 0)) {
235                         mpn_tdiv_qr(buf + pzsize, buf, 0, x, pzsize, pzlimbs, pzsize);
236                         mpn_copyi(x, buf, pzsize);
237                     }
238                 }
239                 if (!err) matnb++;
240             }
241         }
242         if (err) {
243             if (matnb == 0)
244                 return (int) (k - k0);
245             fprintf(stderr, "I/O error after %d,%d coefficients (matrix size %u*%u)\n",
246                     (int) (k-k0), matnb, m, n);
247             return -1;
248         }
249     }
250     return k1 - k0;
251 }
252 #else
matpoly_read_inner(abdst_field,FILE * f,matpoly & M,unsigned int k0,unsigned int k1,int ascii,int transpose,off_t base,unsigned int batch=1)253 int matpoly_read_inner(abdst_field, FILE * f, matpoly & M, unsigned int k0, unsigned int k1, int ascii, int transpose, off_t base, unsigned int batch = 1)
254 {
255     /* Internally, the dimension of the matrix that are most packed
256      * (i.e., rows when in row-major order) are padded to multiples of
257      * ULONG_BITS
258      */
259     unsigned int m = M.m;
260     unsigned int n = M.n;
261     ASSERT_ALWAYS(k0 == k1 || (k0 < M.get_size() && k1 <= M.get_size()));
262     unsigned int mc = iceildiv(M.m, ULONG_BITS);
263     unsigned int nc = iceildiv(M.n, ULONG_BITS);
264     size_t ulongs_per_mat = transpose ? (M.n * mc) : (M.m * nc);
265     batch = MIN(batch, k1 - k0);
266     std::vector<unsigned long> buf(ulongs_per_mat * batch);
267     for(unsigned int k = k0 ; k < k1 ; k+=batch) {
268         if (k + batch > k1) batch = k1 - k;
269         unsigned int good;
270         if (ascii) {
271             /* do we have an endian-robust wordsize-robust convention for
272              * printing bitstrings in hex ?
273              *
274              * it's not even clear that we should care -- after all, as long as
275              * mksol follows a consistent convention too, we should be fine.
276              */
277             abort();
278         } else {
279             int rc;
280             if (base < 0) {
281                 rc = fread(buf.data(), sizeof(unsigned long), ulongs_per_mat * batch, f);
282                 if (rc != (int) (ulongs_per_mat * batch) && ferror(f))
283                     return k - k0;
284             } else {
285                 /* use pread -- good for multithreading */
286                 size_t one = ulongs_per_mat * sizeof(unsigned long);
287                 off_t off = base + (k - k0) * one;
288                 ssize_t r = pread(fileno(f), buf.data(), one * batch, off);
289                 if (r < 0) rc = 0;
290                 else rc = r / sizeof(unsigned long);
291             }
292             good = rc / ulongs_per_mat;
293         }
294         for(unsigned int b = 0 ; b < good ; b++) {
295             size_t kq = (k + b) / ULONG_BITS;
296             size_t kr = (k + b) % ULONG_BITS;
297             if (!transpose) {
298                 for(unsigned int i = 0 ; i < m ; i++) {
299                     unsigned long * v = &(buf[b * ulongs_per_mat + i * nc]);
300                     for(unsigned int j = 0 ; j < n ; j++) {
301                         unsigned int jq = j / ULONG_BITS;
302                         unsigned int jr = j % ULONG_BITS;
303                         unsigned long bit = (v[jq] >> jr) & 1;
304                         M.part(i, j)[kq] &= ~(1UL << kr);
305                         M.part(i, j)[kq] |= bit << kr;
306                     }
307                 }
308             } else {
309                 for(unsigned int j = 0 ; j < n ; j++) {
310                     unsigned long * v = &(buf[b * ulongs_per_mat + j * mc]);
311                     for(unsigned int i = 0 ; i < m ; i++) {
312                         unsigned int iq = i / ULONG_BITS;
313                         unsigned int ir = i % ULONG_BITS;
314                         unsigned long bit = (v[iq] >> ir) & 1;
315                         M.part(i, j)[kq] &= ~(1UL << kr);
316                         M.part(i, j)[kq] |= bit << kr;
317                     }
318                 }
319             }
320         }
321         if (good < batch) {
322             return k - k0 + good;
323         }
324     }
325     return k1 - k0;
326 }
matpoly_read(abdst_field ab,FILE * f,matpoly & M,unsigned int k0,unsigned int k1,int ascii,int transpose)327 int matpoly_read(abdst_field ab, FILE * f, matpoly & M, unsigned int k0, unsigned int k1, int ascii, int transpose)
328 {
329     int rc = 0;
330     if (k0 % ULONG_BITS) {
331         unsigned int fk0 = MIN(k1, k0 + ULONG_BITS - (k0 % ULONG_BITS));
332         rc += matpoly_read_inner(ab, f, M, k0, fk0, ascii, transpose, -1);
333         if (rc < (int) (fk0 - k0) || fk0 == k1)
334             return rc;
335         return rc + matpoly_read(ab, f, M, fk0, k1, ascii, transpose);
336     }
337     if (k1 % ULONG_BITS) {
338         unsigned int fk1 = MAX(k0, k1 - (k1 % ULONG_BITS));
339         if (k0 < fk1) {
340             /* recurse and to the bulk of the processing on aligned
341              * values */
342             rc += matpoly_read(ab, f, M, k0, fk1, ascii, transpose);
343             if (rc < (int) (fk1 - k0))
344                 return rc;
345         }
346         return rc + matpoly_read_inner(ab, f, M, fk1, k1, ascii, transpose, -1);
347     }
348 
349     ASSERT_ALWAYS(!(k0 % ULONG_BITS));
350     ASSERT_ALWAYS(!(k1 % ULONG_BITS));
351 
352     off_t pos0 = ftell(f);
353     unsigned int mc = iceildiv(M.m, ULONG_BITS);
354     unsigned int nc = iceildiv(M.n, ULONG_BITS);
355     size_t ulongs_per_mat = transpose ? (M.n * mc) : (M.m * nc);
356     size_t one = ulongs_per_mat * sizeof(unsigned long);
357 
358     unsigned int nth = 1;
359 #ifdef HAVE_OPENMP
360     nth = omp_get_max_threads();
361 #endif
362     unsigned int dk = ((k1 - k0)/ULONG_BITS) / nth;
363     unsigned int mk = ((k1 - k0)/ULONG_BITS) % nth;
364 #ifdef HAVE_OPENMP
365 #pragma omp parallel num_threads(nth)
366 #endif
367     {
368         unsigned int idx = 0;
369 #ifdef HAVE_OPENMP
370         idx = omp_get_thread_num();
371 #endif
372         unsigned int lk0 = k0 / ULONG_BITS + idx * dk + MIN(idx, mk);
373         unsigned int lk1 = lk0 + dk + (idx < mk);
374         lk0 *= ULONG_BITS;
375         lk1 *= ULONG_BITS;
376 
377         off_t base = pos0 + (lk0 - k0) * one;
378         int my_rc = matpoly_read_inner(ab, f, M, lk0, lk1, ascii, transpose, base, UINT_MAX);
379 
380 #ifdef HAVE_OPENMP
381 #pragma omp critical
382 #endif
383         rc += my_rc;
384     }
385 
386     fseek(f, pos0 + rc * one, SEEK_SET);
387     return rc;
388 }
389 
390 #endif
391 /* }}} */
392 
393