1 #ifndef LINGEN_MATPOLY_BINARY_HPP_
2 #define LINGEN_MATPOLY_BINARY_HPP_
3 
4 /* The outer interface of the matpoly_binary type is exactly the same as
5  * for the matpoly type. This is enforced by the test program. In
6  * particular, we copy even the "abdst_field" argument that is passed
7  * everywhere, with the slight catch that the code *here* does not use it
8  * at all.
9  *
10  * We have two options:
11  *
12  * For convenience, consider this binary matpoly type as
13  * linked to the u64k1 type
14  *
15  * Replace all the "ab" stuff by empty proxies.
16  *
17  * I haven't made my mind yet as to which is best.
18  */
19 
20 #include <cstdlib>
21 #include <gmp.h>
22 #include "cado_config.h"
23 #include "macros.h"
24 #include "lingen_memory_pool.hpp"
25 #include "submatrix_range.hpp"
26 #include "mpfq_fake.hpp"
27 #include "tree_stats.hpp"
28 #include "lingen_call_companion.hpp"
29 
30 class matpoly {
31     friend class bigmatpoly;
32 
33     typedef abdst_vec ptr;
34     typedef absrc_vec srcptr;
35 
36     typedef memory_pool_wrapper<ptr, true> memory_pool_type;
37     static memory_pool_type memory;
38 public:
39     struct memory_guard : private memory_pool_type::guard_base {
memory_guardmatpoly::memory_guard40         memory_guard(size_t s) : memory_pool_type::guard_base(memory, s) {}
~memory_guardmatpoly::memory_guard41         ~memory_guard() { memory_pool_type::guard_base::pre_dtor(memory); }
42     };
43 
44     static constexpr bool over_gf2 = true;
45     // static void add_to_main_memory_pool(size_t s);
46     abdst_field ab = NULL;
47     unsigned int m = 0;
48     unsigned int n = 0;
49     /* alloc_words is the number of unsigned longs used to store each
50      * coefficient */
51 private:
52     size_t size = 0;    /* in bits */
53     size_t alloc_words = 0;
54     unsigned long * x = NULL;
55 #define BITS_TO_WORDS(B,W)      iceildiv((B),(W))
b2w_x(size_t n)56     static inline size_t b2w_x(size_t n) {
57         /* We always use an even number of words. It seems stupid, but
58          * some of the routines that play an important role in
59          * block-based lingen basecase really want 64*64 matrices of
60          * 64-bit word-based polynomials. In a way it's a shortcoming,
61          * yes.
62          */
63         static_assert(64 % ULONG_BITS == 0, "ULONG_BITS must divide 64");
64         return BITS_TO_WORDS(n, 64) * (64 / ULONG_BITS);
65     }
b2w(size_t n)66     static inline size_t b2w(size_t n) {
67         return BITS_TO_WORDS(n, ULONG_BITS);
68     }/*{{{*/
69     // inline size_t colstride() const { return nrows() * stride(); }/*}}}*/
70 public:
capacity() const71     inline size_t capacity() const { return alloc_words * ULONG_BITS; }
data_area() const72     const void * data_area() const { return x; }
is_tight() const73     bool is_tight() const { return alloc_words == b2w(size); }
data_entry_size_in_bytes() const74     size_t data_entry_size_in_bytes() const {
75         return b2w(size) * sizeof(unsigned long);
76     }
data_entry_size_in_words() const77     size_t data_entry_size_in_words() const {
78         return b2w(size);
79     }
data_size_in_bytes() const80     size_t data_size_in_bytes() const {
81         return m * n * data_entry_size_in_bytes();
82     }
data_entry_alloc_size_in_words() const83     size_t data_entry_alloc_size_in_words() const {
84         return alloc_words;
85     }
data_alloc_size_in_words() const86     size_t data_alloc_size_in_words() const {
87         return m * n * data_entry_alloc_size_in_words();
88     }
data_entry_alloc_size_in_bytes() const89     size_t data_entry_alloc_size_in_bytes() const {
90         return alloc_words * sizeof(unsigned long);
91     }
data_alloc_size_in_bytes() const92     size_t data_alloc_size_in_bytes() const {
93         return m * n * data_entry_alloc_size_in_bytes();
94     }
nrows() const95     inline unsigned int nrows() const { return m; }
ncols() const96     inline unsigned int ncols() const { return n; }
get_size() const97     inline size_t get_size() const { return size; }
set_size(size_t s)98     void set_size(size_t s) { size = s; }
99     size_t get_true_nonzero_size() const;
100     unsigned int valuation() const;
101 
matpoly()102     matpoly() { m=n=0; size=0; alloc_words=0; ab=NULL; x=NULL; }
103     matpoly(abdst_field ab, unsigned int m, unsigned int n, int len);
104     matpoly(matpoly const&) = delete;
105     matpoly& operator=(matpoly const&) = delete;
106     matpoly& set(matpoly const&);
107     matpoly(matpoly &&);
108     matpoly& operator=(matpoly &&);
109     ~matpoly();
similar_shell() const110     matpoly similar_shell() const { return matpoly(ab, m, n, 0); }
check_pre_init() const111     bool check_pre_init() const ATTRIBUTE_WARN_UNUSED_RESULT {
112         return x == NULL;
113     }
114     void realloc(size_t new_number_of_coeffs);
shrink_to_fit()115     inline void shrink_to_fit() { realloc(size); }
116     void zero();
clear()117     void clear() { *this = matpoly(); }
118 
119     /* {{{ access interface for matpoly */
120     /* part_head does not _promise_ to point to coefficient k exactly. In
121      * the simd case (lingen_matpoly_binary.hpp) it points to the word
122      * where coefficient k can be found */
part(unsigned int i,unsigned int j)123     inline abdst_vec part(unsigned int i, unsigned int j) {
124         return x + (i*n+j)*alloc_words;
125     }
part_head(unsigned int i,unsigned int j,unsigned int k)126     inline abdst_vec part_head(unsigned int i, unsigned int j, unsigned int k) {
127         return part(i, j) + k / ULONG_BITS;
128     }
coeff(unsigned int i,unsigned int j)129     inline abdst_elt coeff(unsigned int i, unsigned int j) {
130         return part(i, j);
131     }
part(unsigned int i,unsigned int j) const132     inline absrc_vec part(unsigned int i, unsigned int j) const {
133         return x + (i*n+j)*alloc_words;
134     }
part_head(unsigned int i,unsigned int j,unsigned int k) const135     inline absrc_vec part_head(unsigned int i, unsigned int j, unsigned int k) const {
136         return part(i, j) + k / ULONG_BITS;
137     }
138     /* This one is a bit special, as it makes it possible to read one
139      * coefficient exactly. It's R/O access, though. */
coeff(unsigned int i,unsigned int j,unsigned int k) const140     inline absrc_elt coeff(unsigned int i, unsigned int j, unsigned int k) const {
141         unsigned int kr = k % ULONG_BITS;
142         unsigned long km = 1UL << kr;
143         static constexpr abelt coeffbits[2] = { {0}, {1} };
144         return coeffbits[((*part_head(i, j, k) & km) != 0)];
145     }
146     struct coeff_accessor_proxy {
147         unsigned long * p;
148         unsigned int kr;
coeff_accessor_proxymatpoly::coeff_accessor_proxy149         coeff_accessor_proxy(matpoly& F, unsigned int i,
150                 unsigned int j, unsigned int k)
151         {
152             p = F.coeff(i, j) + (k / ULONG_BITS);
153             kr = k % ULONG_BITS;
154         }
operator +=matpoly::coeff_accessor_proxy155         coeff_accessor_proxy& operator+=(absrc_elt x) {
156             *p ^= *x << kr;
157             return *this;
158         }
159     };
coeff_accessor(unsigned int i,unsigned int j,unsigned int k=0)160     inline coeff_accessor_proxy coeff_accessor(unsigned int i, unsigned int j, unsigned int k = 0) {
161         return coeff_accessor_proxy(*this, i, j, k);
162     }
163     /* }}} */
164 
165     /* The interfaces below used to exist for the old binary "polmat"
166      * type, and we wish to do away with them.
167      */
168     void addpoly(unsigned int i, unsigned int j, matpoly const& y, unsigned int iy, unsigned int jy) __attribute__((deprecated));
169     void xmul_poly(unsigned int i, unsigned int j, unsigned long s) __attribute__((deprecated));
poly(unsigned int i,unsigned int j)170     unsigned long * poly(unsigned int i, unsigned int j) __attribute__((deprecated)) { return part(i, j); }
poly(unsigned int i,unsigned int j) const171     const unsigned long * poly(unsigned int i, unsigned int j) const __attribute__((deprecated)) { return part(i, j); }
172 
173     void set_constant_ui(unsigned long e);
set_constant(absrc_elt e)174     void set_constant(absrc_elt e) { set_constant_ui(*e); }
175 
176     /* Note that this does not affect the size field */
177     void fill_random(unsigned int k0, unsigned int k1, gmp_randstate_t rstate);
178 
clear_and_set_random(unsigned int len,gmp_randstate_t rstate)179     void clear_and_set_random(unsigned int len, gmp_randstate_t rstate)
180     {
181         if (len > capacity())
182             zero_pad(len);
183         zero_pad(capacity());
184         fill_random(0, len, rstate);
185         set_size(len);
186     }
187     int cmp(matpoly const & b) const;
188     void multiply_column_by_x(unsigned int j, unsigned int size);
189     void divide_column_by_x(unsigned int j, unsigned int size);
190     void truncate(matpoly const & src, unsigned int size);
truncate(unsigned int size)191     void truncate(unsigned int size) { truncate(*this, size); }
192     /* This checks that coefficients of degree k to size-1 are zero.
193      */
194     int tail_is_zero(unsigned int k) const;
195 public:
196     /* not to be confused with the former. a priori this is an
197      * implementation detail. At times, we want to assert that.
198      */
199     bool high_word_is_clear() const;
200 private:
201     void clear_high_word_common(unsigned int length);
202 public:
clear_high_word()203     inline void clear_high_word() { clear_high_word_common(size); }
204 public:
205     /* This changes size to nsize, and fills [size..nsize[ with zeroes */
206     void zero_pad(unsigned int nsize);
207 
zero_with_size(size_t size)208     void zero_with_size(size_t size) { set_size(0); zero_pad(size); }
209 
210     void extract_column(
211         unsigned int jdst, unsigned int kdst,
212         matpoly const & src, unsigned int jsrc, unsigned int ksrc);
213     void zero_column(unsigned int jdst, unsigned int kdst);
214     void rshift(matpoly const &, unsigned int k);
215     void rshift(unsigned int k);
216 
217     void add(matpoly const & a, matpoly const & b);
218     void sub(matpoly const & a, matpoly const & b);
add(matpoly const & a)219     void add(matpoly const & a) { add(*this, a); }
sub(matpoly const & a)220     void sub(matpoly const & a) { sub(*this, a); }
221 
222     static matpoly mul(matpoly const & a, matpoly const & b);
223     static matpoly mp(matpoly const & a, matpoly const & c);
224     void addmul(matpoly const & a, matpoly const & b);
225     void addmp(matpoly const & a, matpoly const & c);
226 
227     /* We have nothing terrific to identify as substeps here, and there's
228      * no schedule information that we intend to exploit */
mp(tree_stats & stats,matpoly const & a,matpoly const & b,lingen_call_companion::mul_or_mp_times * M)229     static matpoly mp(tree_stats & stats, matpoly const & a, matpoly const & b, lingen_call_companion::mul_or_mp_times * M)
230     {
231         if (!M) return mp(a, b);
232         tree_stats::smallstep_sentinel dummy(stats, M->step_name());
233         return mp(a, b);
234     }
235 
mul(tree_stats & stats,matpoly const & a,matpoly const & b,lingen_call_companion::mul_or_mp_times * M)236     static matpoly mul(tree_stats & stats, matpoly const & a, matpoly const & b, lingen_call_companion::mul_or_mp_times * M) {
237         if (!M) return mul(a, b);
238         tree_stats::smallstep_sentinel dummy(stats, M->step_name());
239         return mul(a, b);
240     }
241 
242 
243     // void set_polymat(polymat const & src);
244     int coeff_is_zero(unsigned int k) const;
245     void coeff_set_zero(unsigned int k);
246 
247     struct view_t : public submatrix_range {
248         matpoly & M;
view_tmatpoly::view_t249         view_t(matpoly & M, submatrix_range S) : submatrix_range(S), M(M) {}
view_tmatpoly::view_t250         view_t(matpoly & M) : submatrix_range(M), M(M) {}
partmatpoly::view_t251         inline abdst_vec part(unsigned int i, unsigned int j) {
252             return M.part(i0+i, j0+j);
253         }
partmatpoly::view_t254         inline absrc_vec part(unsigned int i, unsigned int j) const {
255             return M.part(i0+i, j0+j);
256         }
257         void zero();
258     };
259 
260     struct const_view_t : public submatrix_range {
261         matpoly const & M;
const_view_tmatpoly::const_view_t262         const_view_t(matpoly const & M, submatrix_range S) : submatrix_range(S), M(M) {}
const_view_tmatpoly::const_view_t263         const_view_t(matpoly const & M) : submatrix_range(M), M(M) {}
const_view_tmatpoly::const_view_t264         const_view_t(view_t const & V) : submatrix_range(V), M(V.M) {}
partmatpoly::const_view_t265         inline absrc_vec part(unsigned int i, unsigned int j) const {
266             return M.part(i0+i, j0+j);
267         }
268     };
view(submatrix_range S)269     view_t view(submatrix_range S) { ASSERT_ALWAYS(S.valid(*this)); return view_t(*this, S); }
view(submatrix_range S) const270     const_view_t view(submatrix_range S) const { ASSERT_ALWAYS(S.valid(*this)); return const_view_t(*this, S); }
view()271     view_t view() { return view_t(*this); }
view() const272     const_view_t view() const { return const_view_t(*this); }
273 
274     static void copy(view_t t, const_view_t a);
275     static void addmul(view_t t, const_view_t t0, const_view_t t1);
276     static void addmp(view_t t, const_view_t t0, const_view_t t1);
277 
278     matpoly truncate_and_rshift(unsigned int truncated_size, unsigned int rshift);
279 };
280 
281 #endif	/* LINGEN_MATPOLY_BINARY_HPP_ */
282