1 /*
2     Copyright (C) 2019 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
10 */
11 
12 #include "fmpz_mod_poly.h"
13 
14 /*
15 typedef struct {
16     slong npoints;
17     fmpz_mod_poly_t R0, R1;
18     fmpz_mod_poly_t V0, V1;
19     fmpz_mod_poly_t qt, rt; temporaries
20     fmpz_mod_poly_t points;
21 } fmpz_mod_berlekamp_massey_struct;
22 typedef fmpz_mod_berlekamp_massey_struct nmod_berlekamp_massey_t[1];
23 
24     n = B->npoints is the number of points a_1, ..., a_n that have been added
25     to the sequence. The polynomials A and S are then defined as
26 
27         A = x^n
28         S = a_1*x^(n-1) + a_2*x^(n-2) + ... + a_n
29 
30     We maintain polynomials U0, V0, U1, V1 such that
31 
32         U0*A + V0*S = R0   deg(R0) >= n/2
33         U1*A + V1*S = R1   deg(R1) < n/2
34 
35     where R0 and R1 are consecutive euclidean remainders and U0, V0, U1, V1 are
36     the corresponding Bezout coefficients. Note that
37         deg(U1) < deg(V1) = deg(A) - deg(R0) <= n/2
38 
39     The U0 and U1 are not stored explicitly. The points a_1, ..., a_n are stored
40     in B->points, which is used merely as a resizable array.
41 
42     The main usage of this function is the rational reconstruction of a series
43 
44      a1    a2    a3         -U1
45     --- + --- + --- + ... = ---- maybe
46      x    x^2   x^3          V1
47 
48     It can be seen that
49 
50      a1    a2        an   -U1     R1
51     --- + --- + ... --- = --- + -------
52      x    x^2       x^n    V1   V1*x^n
53 
54     Thus the error is O(1/x^(n+1)) iff deg(R1) < deg(V1).
55 */
fmpz_mod_berlekamp_massey_init(fmpz_mod_berlekamp_massey_t B,const fmpz_mod_ctx_t ctx)56 void fmpz_mod_berlekamp_massey_init(
57     fmpz_mod_berlekamp_massey_t B,
58     const fmpz_mod_ctx_t ctx)
59 {
60     fmpz_mod_poly_init(B->V0, ctx);
61     fmpz_mod_poly_init(B->R0, ctx);
62     fmpz_mod_poly_set_ui(B->R0, 1, ctx);
63     fmpz_mod_poly_init(B->V1, ctx);
64     fmpz_mod_poly_set_ui(B->V1, 1, ctx);
65     fmpz_mod_poly_init(B->R1, ctx);
66     fmpz_mod_poly_init(B->rt, ctx);
67     fmpz_mod_poly_init(B->qt, ctx);
68     fmpz_mod_poly_init(B->points, ctx);
69     B->npoints = 0;
70     B->points->length = 0;
71 }
72 
fmpz_mod_berlekamp_massey_start_over(fmpz_mod_berlekamp_massey_t B,const fmpz_mod_ctx_t ctx)73 void fmpz_mod_berlekamp_massey_start_over(
74     fmpz_mod_berlekamp_massey_t B,
75     const fmpz_mod_ctx_t ctx)
76 {
77     B->npoints = 0;
78     B->points->length = 0;
79     fmpz_mod_poly_zero(B->V0, ctx);
80     fmpz_mod_poly_set_ui(B->R0, 1, ctx);
81     fmpz_mod_poly_set_ui(B->V1, 1, ctx);
82     fmpz_mod_poly_zero(B->R1, ctx);
83 }
84 
fmpz_mod_berlekamp_massey_clear(fmpz_mod_berlekamp_massey_t B,const fmpz_mod_ctx_t ctx)85 void fmpz_mod_berlekamp_massey_clear(
86     fmpz_mod_berlekamp_massey_t B,
87     const fmpz_mod_ctx_t ctx)
88 {
89     fmpz_mod_poly_clear(B->R0, ctx);
90     fmpz_mod_poly_clear(B->R1, ctx);
91     fmpz_mod_poly_clear(B->V0, ctx);
92     fmpz_mod_poly_clear(B->V1, ctx);
93     fmpz_mod_poly_clear(B->rt, ctx);
94     fmpz_mod_poly_clear(B->qt, ctx);
95     fmpz_mod_poly_clear(B->points, ctx);
96 }
97 
fmpz_mod_berlekamp_massey_print(const fmpz_mod_berlekamp_massey_t B,const fmpz_mod_ctx_t ctx)98 void fmpz_mod_berlekamp_massey_print(
99     const fmpz_mod_berlekamp_massey_t B,
100     const fmpz_mod_ctx_t ctx)
101 {
102     slong i;
103     fmpz_mod_poly_print_pretty(B->V1, "#", ctx);
104     flint_printf(",");
105     for (i = 0; i < B->points->length; i++)
106     {
107         flint_printf(" ");
108         fmpz_print(B->points->coeffs + i);
109     }
110 }
111 
fmpz_mod_berlekamp_massey_add_points(fmpz_mod_berlekamp_massey_t B,const fmpz * a,slong count,const fmpz_mod_ctx_t ctx)112 void fmpz_mod_berlekamp_massey_add_points(
113     fmpz_mod_berlekamp_massey_t B,
114     const fmpz * a,
115     slong count,
116     const fmpz_mod_ctx_t ctx)
117 {
118     slong i;
119     slong old_length = B->points->length;
120     fmpz_mod_poly_fit_length(B->points, old_length + count, ctx);
121     for (i = 0; i < count; i++)
122     {
123         FLINT_ASSERT(fmpz_mod_is_canonical(a + i, ctx));
124         fmpz_set(B->points->coeffs + old_length + i, a + i);
125     }
126     B->points->length = old_length + count;
127 }
128 
fmpz_mod_berlekamp_massey_add_zeros(fmpz_mod_berlekamp_massey_t B,slong count,const fmpz_mod_ctx_t ctx)129 void fmpz_mod_berlekamp_massey_add_zeros(
130     fmpz_mod_berlekamp_massey_t B,
131     slong count,
132     const fmpz_mod_ctx_t ctx)
133 {
134     slong i;
135     slong old_length = B->points->length;
136     fmpz_mod_poly_fit_length(B->points, old_length + count, ctx);
137     for (i = 0; i < count; i++)
138     {
139         fmpz_zero(B->points->coeffs + old_length + i);
140     }
141     B->points->length = old_length + count;
142 }
143 
fmpz_mod_berlekamp_massey_add_point(fmpz_mod_berlekamp_massey_t B,const fmpz_t a,const fmpz_mod_ctx_t ctx)144 void fmpz_mod_berlekamp_massey_add_point(
145     fmpz_mod_berlekamp_massey_t B,
146     const fmpz_t a,
147     const fmpz_mod_ctx_t ctx)
148 {
149     slong old_length = B->points->length;
150     fmpz_mod_poly_fit_length(B->points, old_length + 1, ctx);
151     FLINT_ASSERT(fmpz_mod_is_canonical(a, ctx));
152     fmpz_set(B->points->coeffs + old_length, a);
153     B->points->length = old_length + 1;
154 }
155 
fmpz_mod_berlekamp_massey_add_point_ui(fmpz_mod_berlekamp_massey_t B,ulong a,const fmpz_mod_ctx_t ctx)156 void fmpz_mod_berlekamp_massey_add_point_ui(
157     fmpz_mod_berlekamp_massey_t B,
158     ulong a,
159     const fmpz_mod_ctx_t ctx)
160 {
161     slong old_length = B->points->length;
162     fmpz_mod_poly_fit_length(B->points, old_length + 1, ctx);
163     FLINT_ASSERT(fmpz_cmp_ui(fmpz_mod_ctx_modulus(ctx), a) > 0);
164     fmpz_set_ui(B->points->coeffs + old_length, a);
165     B->points->length = old_length + 1;
166 }
167 
168 /* return 1 if reduction changed the master poly, 0 otherwise */
fmpz_mod_berlekamp_massey_reduce(fmpz_mod_berlekamp_massey_t B,const fmpz_mod_ctx_t ctx)169 int fmpz_mod_berlekamp_massey_reduce(
170     fmpz_mod_berlekamp_massey_t B,
171     const fmpz_mod_ctx_t ctx)
172 {
173     slong i, l, k, queue_len, queue_lo, queue_hi;
174 
175     /*
176         the points in B->points->coeffs[j] for queue_lo <= j < queue_hi need
177         to be added to the internal polynomials.
178         These are first reversed into rt. deg(rt) < queue_len.
179     */
180     queue_lo = B->npoints;
181     queue_hi = B->points->length;
182     queue_len = queue_hi - queue_lo;
183     FLINT_ASSERT(queue_len >= 0);
184     fmpz_mod_poly_zero(B->rt, ctx);
185     for (i = 0; i < queue_len; i++)
186     {
187         fmpz_mod_poly_set_coeff_fmpz(B->rt, queue_len - i - 1,
188                                         B->points->coeffs + queue_lo + i, ctx);
189     }
190     B->npoints = queue_hi;
191 
192     /* Ri = Ri * x^queue_len + Vi*rt */
193     fmpz_mod_poly_mul(B->qt, B->V0, B->rt, ctx);
194     fmpz_mod_poly_shift_left(B->R0, B->R0, queue_len, ctx);
195     fmpz_mod_poly_add(B->R0, B->R0, B->qt, ctx);
196     fmpz_mod_poly_mul(B->qt, B->V1, B->rt, ctx);
197     fmpz_mod_poly_shift_left(B->R1, B->R1, queue_len, ctx);
198     fmpz_mod_poly_add(B->R1, B->R1, B->qt, ctx);
199 
200     /* now start reducing R0, R1 */
201     if (2*fmpz_mod_poly_degree(B->R1, ctx) < B->npoints)
202     {
203         /* already have deg(R1) < B->npoints/2 */
204         return 0;
205     }
206 
207     /* one iteration of euclid to get deg(R0) >= B->npoints/2 */
208     fmpz_mod_poly_divrem(B->qt, B->rt, B->R0, B->R1, ctx);
209     fmpz_mod_poly_swap(B->R0, B->R1, ctx);
210     fmpz_mod_poly_swap(B->R1, B->rt, ctx);
211     fmpz_mod_poly_mul(B->rt, B->qt, B->V1, ctx);
212     fmpz_mod_poly_sub(B->qt, B->V0, B->rt, ctx);
213     fmpz_mod_poly_swap(B->V0, B->V1, ctx);
214     fmpz_mod_poly_swap(B->V1, B->qt, ctx);
215 
216     l = fmpz_mod_poly_degree(B->R0, ctx);
217     FLINT_ASSERT(B->npoints <= 2*l && l < B->npoints);
218 
219     k = B->npoints - l;
220     FLINT_ASSERT(0 <= k && k <= l);
221 
222     /*
223         (l - k)/2 is the expected number of required euclidean iterations.
224         Either branch is OK anytime. TODO: find cutoff
225     */
226     if (l - k < 10)
227     {
228         while (B->npoints <= 2*fmpz_mod_poly_degree(B->R1, ctx))
229         {
230             fmpz_mod_poly_divrem(B->qt, B->rt, B->R0, B->R1, ctx);
231             fmpz_mod_poly_swap(B->R0, B->R1, ctx);
232             fmpz_mod_poly_swap(B->R1, B->rt, ctx);
233             fmpz_mod_poly_mul(B->rt, B->qt, B->V1, ctx);
234             fmpz_mod_poly_sub(B->qt, B->V0, B->rt, ctx);
235             fmpz_mod_poly_swap(B->V0, B->V1, ctx);
236             fmpz_mod_poly_swap(B->V1, B->qt, ctx);
237         }
238     }
239     else
240     {
241         /* TODO: get hgcd working in this branch */
242         while (B->npoints <= 2*fmpz_mod_poly_degree(B->R1, ctx))
243         {
244             fmpz_mod_poly_divrem(B->qt, B->rt, B->R0, B->R1, ctx);
245             fmpz_mod_poly_swap(B->R0, B->R1, ctx);
246             fmpz_mod_poly_swap(B->R1, B->rt, ctx);
247             fmpz_mod_poly_mul(B->rt, B->qt, B->V1, ctx);
248             fmpz_mod_poly_sub(B->qt, B->V0, B->rt, ctx);
249             fmpz_mod_poly_swap(B->V0, B->V1, ctx);
250             fmpz_mod_poly_swap(B->V1, B->qt, ctx);
251         }
252     }
253 
254     FLINT_ASSERT(fmpz_mod_poly_degree(B->V1, ctx) >= 0);
255     FLINT_ASSERT(2*fmpz_mod_poly_degree(B->V1, ctx) <= B->npoints);
256     FLINT_ASSERT(2*fmpz_mod_poly_degree(B->R0, ctx) >= B->npoints);
257     FLINT_ASSERT(2*fmpz_mod_poly_degree(B->R1, ctx) <  B->npoints);
258 
259     return 1;
260 }
261