1 /*
2     Copyright (C) 2008, 2009 William Hart
3     Copyright (C) 2010 Sebastian Pancratz
4 
5     This file is part of FLINT.
6 
7     FLINT is free software: you can redistribute it and/or modify it under
8     the terms of the GNU Lesser General Public License (LGPL) as published
9     by the Free Software Foundation; either version 2.1 of the License, or
10     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
11 */
12 
13 #include <stdlib.h>
14 #include "fmpz_poly.h"
15 
16 static void
__fmpz_poly_pseudo_divrem_divconquer(fmpz * Q,fmpz * R,ulong * d,const fmpz * A,slong lenA,const fmpz * B,slong lenB,const fmpz_preinvn_t inv)17 __fmpz_poly_pseudo_divrem_divconquer(fmpz * Q, fmpz * R,
18                   ulong * d, const fmpz * A, slong lenA,
19                           const fmpz * B, slong lenB, const fmpz_preinvn_t inv)
20 {
21     if (lenB <= 16 || (lenA > 2 * lenB - 1 && lenA < 128))
22     {
23         _fmpz_poly_pseudo_divrem_basecase(Q, R, d, A, lenA, B, lenB, inv);
24     }
25     else
26     {
27         const slong n2 = lenB / 2;
28         const slong n1 = lenB - n2;
29 
30         const fmpz * d1 = B + n2;
31         const fmpz * d2 = B;
32         const fmpz * d3 = B + n1;
33         const fmpz * d4 = B;
34 
35         if (lenA <= lenB + n2 - 1)
36         {
37             fmpz *p1, *r1, *d2q1;
38             fmpz *f;
39 
40             /*
41                Shift A right by n1, zero the bottom n2 - 1 coeffs; call this p1
42              */
43 
44             p1 = (fmpz *) flint_malloc((lenA - n1) * sizeof(fmpz));
45             {
46                 slong i;
47                 flint_mpn_zero((mp_ptr) p1, n2 - 1);
48                 for (i = n2 - 1; i < lenA - n1; i++)
49                     p1[i] = (A + n1)[i];
50             }
51 
52             /*
53                Compute p1 div d3, at most a 2 n2 - 1 by n2 division, leaving
54                lenA - lenB + 1 <= n2 terms in the quotient
55              */
56 
57             r1 = R + n1;
58             _fmpz_poly_pseudo_divrem_divconquer(Q, r1, d, p1, lenA - n1, d3, n2, inv);
59 
60             flint_free(p1);
61 
62             /*
63                Push the relevant {n2 - 1} terms of the remainder to the
64                top of {R, lenA}
65              */
66 
67             {
68                 slong i;
69                 for (i = n2 - 2; i >= 0; i--)
70                     fmpz_swap(R + lenA - (n2 - 1) + i, r1 + i);
71                 r1 = R + lenA - (n2 - 1);
72             }
73 
74             /*
75                Compute d2q1 = Q d4 of length lenA - n2, which is
76                at most n1 + n2 - 1 terms
77              */
78 
79             d2q1 = R;
80             _fmpz_poly_mul(d2q1, d4, n1, Q, lenA - lenB + 1);
81 
82             /*
83                Compute R = L^d R', where R' is the terms of A we have not dealt,
84                of which there are at most n1 + n2 - 1; that is,
85 
86                Set R to {A, n1 + n2 - 1} * f + r1 x^n1 - d2q1
87              */
88 
89             _fmpz_vec_neg(R, R, lenA - n2);
90             _fmpz_vec_add(R + n1, R + n1, R + lenA - n2 + 1, lenA - lenB);
91             _fmpz_vec_swap(R + lenA - n2, R + 2 * lenA - lenB + 1 - n2, n2 - (lenA - lenB + 1));
92 
93             f = R + lenB - 1;
94             fmpz_pow_ui(f, B + (lenB - 1), *d);
95             _fmpz_vec_scalar_addmul_fmpz(R, A, n1 + n2 - 1, f);
96         }
97         else if (lenA > 2 * lenB - 1)
98         {
99             /*
100                XXX:  In this case, we expect A to be modifiable
101              */
102 
103             ulong s1, s2;
104             const slong shift = lenA - 2 * lenB + 1;
105 
106             fmpz * q1 = Q + shift;
107             fmpz * q2 = Q;
108             fmpz * r1 = R;
109 
110             fmpz *p1, *t;
111             fmpz_t f;
112 
113             fmpz_init(f);
114 
115             /*
116                Shift A right until it is of length 2 lenB - 1, call this p1;
117                zero the bottom lenB - 1 coeffs
118              */
119 
120             p1 = (fmpz *) flint_malloc((2 * lenB - 1) * sizeof(fmpz));
121             {
122                 slong i;
123                 flint_mpn_zero((mp_ptr) p1, lenB - 1);
124                 for (i = lenB - 1; i < 2*lenB - 1; i++)
125                     p1[i] = (A + shift)[i];
126             }
127 
128             /*
129                Set q1 to p1 div B, a 2 lenB - 1 by lenB division, so q1 ends up
130                being at most length lenB; r1 is of length at most lenB - 1
131              */
132 
133             _fmpz_poly_pseudo_divrem_divconquer(q1, r1, &s1, p1, 2 * lenB - 1, B, lenB, inv);
134 
135             flint_free(p1);
136 
137             /*
138                Compute t = L^s1 a2 + r1 x^shift, of length at most lenA - lenB
139                since r1 is of length at most lenB - 1.  Here a2 is what remains
140                of A after the first lenR coefficients are removed
141              */
142 
143             t = (fmpz *) A;
144 
145             fmpz_pow_ui(f, B + (lenB - 1), s1);
146 
147             _fmpz_vec_scalar_mul_fmpz(t, A, lenA - lenB, f);
148             _fmpz_vec_add(t + shift, t + shift, r1, lenB - 1);
149 
150             /*
151                Compute q2 = t div B; it is a smaller division than the original
152                since len(t) <= lenA - lenB, and r2 has length at most lenB - 1
153              */
154 
155             _fmpz_poly_pseudo_divrem_divconquer(q2, R, &s2, t, lenA - lenB, B, lenB, inv);
156 
157             /*
158                Write out Q = L^s2 q1 x^shift + q2, of length at most
159                lenB + shift.  Note q2 has length at most shift since it is at
160                most an lenA - lenB by lenB division; q1 cannot have length zero
161                since we are doing pseudo division
162              */
163 
164             fmpz_pow_ui(f, B + (lenB - 1), s2);
165 
166             _fmpz_vec_scalar_mul_fmpz(q1, q1, lenB, f);
167 
168             *d = s1 + s2;
169 
170             fmpz_clear(f);
171         }
172         else  /* n1 + 2 n2 - 1 < lenA <= 2 lenB - 1 */
173         {
174             fmpz * q1   = Q + n2;
175             fmpz * q2   = Q;
176             fmpz * r1   = R;
177             fmpz * d2q1 = R + (n1 - 1);
178             fmpz *p1, *t;
179             fmpz_t f;
180             ulong s1, s2;
181 
182             fmpz_init(f);
183 
184             /*
185                Set p1 to the top lenA - 2 n2 coeffs of A, clearing the bottom
186                n1 - 1 coeffs
187              */
188 
189             p1 = (fmpz *) flint_malloc((lenA - 2 * n2) * sizeof(fmpz));
190             {
191                 slong i;
192                 flint_mpn_zero((mp_ptr) p1, n1 - 1);
193                 for (i = n1 - 1; i < lenA - 2 * n2; i++)
194                     p1[i] = (A + 2 * n2)[i];
195             }
196 
197             /*
198                Set q1 to p1 div d1, at most a 2 n1 - 1 by n1 division, so q1 ends
199                up being of length at most n1; r1 is of length n1 - 1
200              */
201 
202             _fmpz_poly_pseudo_divrem_divconquer(q1, r1, &s1, p1, lenA - 2 * n2, d1, n1, inv);
203 
204             flint_free(p1);
205 
206             /*
207                Compute d2q1 = d2q1, of length lenA - lenB
208 
209                Note lenA - lenB <= lenB - 1 <= 2 n2 and lenA - (n1 - 1) > 2 n2,
210                so we can store d2q1 in the top 2 n2 coeffs of R
211              */
212 
213             if (n2 >= lenA - n1 - 2 * n2 + 1)
214                 _fmpz_poly_mul(d2q1, d2, n2, q1, lenA - (n1 + 2 * n2 - 1));
215             else
216                 _fmpz_poly_mul(d2q1, q1, lenA - (n1 + 2 * n2 - 1), d2, n2);
217 
218             /*
219                Compute
220                    t = L^s1 * (a2 x^{n1 + n2 - 1} + a3)
221                        + r1 x^{2 n2} - d2q1 x^n2
222                of length at most lenB + n2 - 1, since r1 is of length at most
223                n1 - 1 and d2q1 is of length at most n1 + n2 - 1
224              */
225 
226             t = _fmpz_vec_init(n1 + 2 * n2 - 1);
227 
228             fmpz_pow_ui(f, B + (lenB - 1), s1);
229 
230             _fmpz_vec_scalar_mul_fmpz(t, A, n1 + 2 * n2 - 1, f);
231             _fmpz_vec_add(t + 2 * n2, t + 2 * n2, r1, n1 - 1);
232             _fmpz_vec_sub(t + n2, t + n2, d2q1, lenA - lenB);
233 
234             /*
235                Compute q2 = t div B and set R to the remainder, at most a
236                lenB + n2 - 1 by lenB division, so q2 is of length at most n2
237              */
238 
239             _fmpz_poly_pseudo_divrem_divconquer(q2, R, &s2, t, lenB + n2 - 1, B, lenB, inv);
240 
241             _fmpz_vec_clear(t, n1 + 2 * n2 - 1);
242 
243             /*
244                Write Q = L^s2 q1 x^n2 + q2; note len(q1) is non-zero since
245                we are performing pseudo division
246              */
247 
248             fmpz_pow_ui(f, B + (lenB - 1), s2);
249 
250             _fmpz_vec_scalar_mul_fmpz(q1, q1, lenA - lenB + 1 - n2, f);
251 
252             *d = s1 + s2;
253 
254             fmpz_clear(f);
255         }
256     }
257 }
258 
259 void
_fmpz_poly_pseudo_divrem_divconquer(fmpz * Q,fmpz * R,ulong * d,const fmpz * A,slong lenA,const fmpz * B,slong lenB,const fmpz_preinvn_t inv)260 _fmpz_poly_pseudo_divrem_divconquer(fmpz * Q, fmpz * R,
261                      ulong * d, const fmpz * A, slong lenA,
262                           const fmpz * B, slong lenB, const fmpz_preinvn_t inv)
263 {
264     if (lenA <= 2 * lenB - 1)
265     {
266         __fmpz_poly_pseudo_divrem_divconquer(Q, R, d, A, lenA, B, lenB, inv);
267     }
268     else  /* lenA > 2 * lenB - 1 */
269     {
270         fmpz *S = _fmpz_vec_init(lenA);
271 
272         _fmpz_vec_set(S, A, lenA);
273 
274         __fmpz_poly_pseudo_divrem_divconquer(Q, R, d, S, lenA, B, lenB, inv);
275 
276         _fmpz_vec_clear(S, lenA);
277     }
278 }
279 
280 void
fmpz_poly_pseudo_divrem_divconquer(fmpz_poly_t Q,fmpz_poly_t R,ulong * d,const fmpz_poly_t A,const fmpz_poly_t B)281 fmpz_poly_pseudo_divrem_divconquer(fmpz_poly_t Q, fmpz_poly_t R,
282                                    ulong * d, const fmpz_poly_t A,
283                                    const fmpz_poly_t B)
284 {
285     slong lenq, lenr;
286     fmpz *q, *r;
287 
288     if (B->length == 0)
289     {
290         flint_printf("Exception (fmpz_poly_pseudo_divrem_divconquer). Division by zero.\n");
291         flint_abort();
292     }
293     if (Q == R)
294     {
295         flint_printf("Exception (fmpz_poly_pseudo_divrem_divconquer). \n"
296                "Output arguments Q and R may not be aliased.\n");
297         flint_abort();
298     }
299     if (A->length < B->length)
300     {
301         fmpz_poly_zero(Q);
302         fmpz_poly_set(R, A);
303         *d = 0;
304         return;
305     }
306 
307     lenq = A->length - B->length + 1;
308     lenr = A->length;
309     if (Q == A || Q == B)
310         q = _fmpz_vec_init(lenq);
311     else
312     {
313         fmpz_poly_fit_length(Q, lenq);
314         q = Q->coeffs;
315     }
316     if (R == A || R == B)
317         r = _fmpz_vec_init(lenr);
318     else
319     {
320         fmpz_poly_fit_length(R, lenr);
321         r = R->coeffs;
322     }
323 
324     _fmpz_poly_pseudo_divrem_divconquer(q, r, d, A->coeffs, A->length,
325                                                  B->coeffs, B->length, NULL);
326 
327     lenr = B->length - 1;
328     FMPZ_VEC_NORM(r, lenr);
329 
330     if (Q == A || Q == B)
331     {
332         _fmpz_vec_clear(Q->coeffs, Q->alloc);
333         Q->coeffs = q;
334         Q->alloc = lenq;
335         Q->length = lenq;
336     }
337     else
338         _fmpz_poly_set_length(Q, lenq);
339     if (R == A || R == B)
340     {
341         _fmpz_vec_clear(R->coeffs, R->alloc);
342         R->coeffs = r;
343         R->alloc = A->length;
344         R->length = lenr;
345     }
346     else
347         _fmpz_poly_set_length(R, lenr);
348 }
349 
350