1 /*
2 Copyright (C) 2008, 2009 William Hart
3 Copyright (C) 2010 Sebastian Pancratz
4
5 This file is part of FLINT.
6
7 FLINT is free software: you can redistribute it and/or modify it under
8 the terms of the GNU Lesser General Public License (LGPL) as published
9 by the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version. See <http://www.gnu.org/licenses/>.
11 */
12
13 #include <stdlib.h>
14 #include "fmpz_poly.h"
15
16 static void
__fmpz_poly_pseudo_divrem_divconquer(fmpz * Q,fmpz * R,ulong * d,const fmpz * A,slong lenA,const fmpz * B,slong lenB,const fmpz_preinvn_t inv)17 __fmpz_poly_pseudo_divrem_divconquer(fmpz * Q, fmpz * R,
18 ulong * d, const fmpz * A, slong lenA,
19 const fmpz * B, slong lenB, const fmpz_preinvn_t inv)
20 {
21 if (lenB <= 16 || (lenA > 2 * lenB - 1 && lenA < 128))
22 {
23 _fmpz_poly_pseudo_divrem_basecase(Q, R, d, A, lenA, B, lenB, inv);
24 }
25 else
26 {
27 const slong n2 = lenB / 2;
28 const slong n1 = lenB - n2;
29
30 const fmpz * d1 = B + n2;
31 const fmpz * d2 = B;
32 const fmpz * d3 = B + n1;
33 const fmpz * d4 = B;
34
35 if (lenA <= lenB + n2 - 1)
36 {
37 fmpz *p1, *r1, *d2q1;
38 fmpz *f;
39
40 /*
41 Shift A right by n1, zero the bottom n2 - 1 coeffs; call this p1
42 */
43
44 p1 = (fmpz *) flint_malloc((lenA - n1) * sizeof(fmpz));
45 {
46 slong i;
47 flint_mpn_zero((mp_ptr) p1, n2 - 1);
48 for (i = n2 - 1; i < lenA - n1; i++)
49 p1[i] = (A + n1)[i];
50 }
51
52 /*
53 Compute p1 div d3, at most a 2 n2 - 1 by n2 division, leaving
54 lenA - lenB + 1 <= n2 terms in the quotient
55 */
56
57 r1 = R + n1;
58 _fmpz_poly_pseudo_divrem_divconquer(Q, r1, d, p1, lenA - n1, d3, n2, inv);
59
60 flint_free(p1);
61
62 /*
63 Push the relevant {n2 - 1} terms of the remainder to the
64 top of {R, lenA}
65 */
66
67 {
68 slong i;
69 for (i = n2 - 2; i >= 0; i--)
70 fmpz_swap(R + lenA - (n2 - 1) + i, r1 + i);
71 r1 = R + lenA - (n2 - 1);
72 }
73
74 /*
75 Compute d2q1 = Q d4 of length lenA - n2, which is
76 at most n1 + n2 - 1 terms
77 */
78
79 d2q1 = R;
80 _fmpz_poly_mul(d2q1, d4, n1, Q, lenA - lenB + 1);
81
82 /*
83 Compute R = L^d R', where R' is the terms of A we have not dealt,
84 of which there are at most n1 + n2 - 1; that is,
85
86 Set R to {A, n1 + n2 - 1} * f + r1 x^n1 - d2q1
87 */
88
89 _fmpz_vec_neg(R, R, lenA - n2);
90 _fmpz_vec_add(R + n1, R + n1, R + lenA - n2 + 1, lenA - lenB);
91 _fmpz_vec_swap(R + lenA - n2, R + 2 * lenA - lenB + 1 - n2, n2 - (lenA - lenB + 1));
92
93 f = R + lenB - 1;
94 fmpz_pow_ui(f, B + (lenB - 1), *d);
95 _fmpz_vec_scalar_addmul_fmpz(R, A, n1 + n2 - 1, f);
96 }
97 else if (lenA > 2 * lenB - 1)
98 {
99 /*
100 XXX: In this case, we expect A to be modifiable
101 */
102
103 ulong s1, s2;
104 const slong shift = lenA - 2 * lenB + 1;
105
106 fmpz * q1 = Q + shift;
107 fmpz * q2 = Q;
108 fmpz * r1 = R;
109
110 fmpz *p1, *t;
111 fmpz_t f;
112
113 fmpz_init(f);
114
115 /*
116 Shift A right until it is of length 2 lenB - 1, call this p1;
117 zero the bottom lenB - 1 coeffs
118 */
119
120 p1 = (fmpz *) flint_malloc((2 * lenB - 1) * sizeof(fmpz));
121 {
122 slong i;
123 flint_mpn_zero((mp_ptr) p1, lenB - 1);
124 for (i = lenB - 1; i < 2*lenB - 1; i++)
125 p1[i] = (A + shift)[i];
126 }
127
128 /*
129 Set q1 to p1 div B, a 2 lenB - 1 by lenB division, so q1 ends up
130 being at most length lenB; r1 is of length at most lenB - 1
131 */
132
133 _fmpz_poly_pseudo_divrem_divconquer(q1, r1, &s1, p1, 2 * lenB - 1, B, lenB, inv);
134
135 flint_free(p1);
136
137 /*
138 Compute t = L^s1 a2 + r1 x^shift, of length at most lenA - lenB
139 since r1 is of length at most lenB - 1. Here a2 is what remains
140 of A after the first lenR coefficients are removed
141 */
142
143 t = (fmpz *) A;
144
145 fmpz_pow_ui(f, B + (lenB - 1), s1);
146
147 _fmpz_vec_scalar_mul_fmpz(t, A, lenA - lenB, f);
148 _fmpz_vec_add(t + shift, t + shift, r1, lenB - 1);
149
150 /*
151 Compute q2 = t div B; it is a smaller division than the original
152 since len(t) <= lenA - lenB, and r2 has length at most lenB - 1
153 */
154
155 _fmpz_poly_pseudo_divrem_divconquer(q2, R, &s2, t, lenA - lenB, B, lenB, inv);
156
157 /*
158 Write out Q = L^s2 q1 x^shift + q2, of length at most
159 lenB + shift. Note q2 has length at most shift since it is at
160 most an lenA - lenB by lenB division; q1 cannot have length zero
161 since we are doing pseudo division
162 */
163
164 fmpz_pow_ui(f, B + (lenB - 1), s2);
165
166 _fmpz_vec_scalar_mul_fmpz(q1, q1, lenB, f);
167
168 *d = s1 + s2;
169
170 fmpz_clear(f);
171 }
172 else /* n1 + 2 n2 - 1 < lenA <= 2 lenB - 1 */
173 {
174 fmpz * q1 = Q + n2;
175 fmpz * q2 = Q;
176 fmpz * r1 = R;
177 fmpz * d2q1 = R + (n1 - 1);
178 fmpz *p1, *t;
179 fmpz_t f;
180 ulong s1, s2;
181
182 fmpz_init(f);
183
184 /*
185 Set p1 to the top lenA - 2 n2 coeffs of A, clearing the bottom
186 n1 - 1 coeffs
187 */
188
189 p1 = (fmpz *) flint_malloc((lenA - 2 * n2) * sizeof(fmpz));
190 {
191 slong i;
192 flint_mpn_zero((mp_ptr) p1, n1 - 1);
193 for (i = n1 - 1; i < lenA - 2 * n2; i++)
194 p1[i] = (A + 2 * n2)[i];
195 }
196
197 /*
198 Set q1 to p1 div d1, at most a 2 n1 - 1 by n1 division, so q1 ends
199 up being of length at most n1; r1 is of length n1 - 1
200 */
201
202 _fmpz_poly_pseudo_divrem_divconquer(q1, r1, &s1, p1, lenA - 2 * n2, d1, n1, inv);
203
204 flint_free(p1);
205
206 /*
207 Compute d2q1 = d2q1, of length lenA - lenB
208
209 Note lenA - lenB <= lenB - 1 <= 2 n2 and lenA - (n1 - 1) > 2 n2,
210 so we can store d2q1 in the top 2 n2 coeffs of R
211 */
212
213 if (n2 >= lenA - n1 - 2 * n2 + 1)
214 _fmpz_poly_mul(d2q1, d2, n2, q1, lenA - (n1 + 2 * n2 - 1));
215 else
216 _fmpz_poly_mul(d2q1, q1, lenA - (n1 + 2 * n2 - 1), d2, n2);
217
218 /*
219 Compute
220 t = L^s1 * (a2 x^{n1 + n2 - 1} + a3)
221 + r1 x^{2 n2} - d2q1 x^n2
222 of length at most lenB + n2 - 1, since r1 is of length at most
223 n1 - 1 and d2q1 is of length at most n1 + n2 - 1
224 */
225
226 t = _fmpz_vec_init(n1 + 2 * n2 - 1);
227
228 fmpz_pow_ui(f, B + (lenB - 1), s1);
229
230 _fmpz_vec_scalar_mul_fmpz(t, A, n1 + 2 * n2 - 1, f);
231 _fmpz_vec_add(t + 2 * n2, t + 2 * n2, r1, n1 - 1);
232 _fmpz_vec_sub(t + n2, t + n2, d2q1, lenA - lenB);
233
234 /*
235 Compute q2 = t div B and set R to the remainder, at most a
236 lenB + n2 - 1 by lenB division, so q2 is of length at most n2
237 */
238
239 _fmpz_poly_pseudo_divrem_divconquer(q2, R, &s2, t, lenB + n2 - 1, B, lenB, inv);
240
241 _fmpz_vec_clear(t, n1 + 2 * n2 - 1);
242
243 /*
244 Write Q = L^s2 q1 x^n2 + q2; note len(q1) is non-zero since
245 we are performing pseudo division
246 */
247
248 fmpz_pow_ui(f, B + (lenB - 1), s2);
249
250 _fmpz_vec_scalar_mul_fmpz(q1, q1, lenA - lenB + 1 - n2, f);
251
252 *d = s1 + s2;
253
254 fmpz_clear(f);
255 }
256 }
257 }
258
259 void
_fmpz_poly_pseudo_divrem_divconquer(fmpz * Q,fmpz * R,ulong * d,const fmpz * A,slong lenA,const fmpz * B,slong lenB,const fmpz_preinvn_t inv)260 _fmpz_poly_pseudo_divrem_divconquer(fmpz * Q, fmpz * R,
261 ulong * d, const fmpz * A, slong lenA,
262 const fmpz * B, slong lenB, const fmpz_preinvn_t inv)
263 {
264 if (lenA <= 2 * lenB - 1)
265 {
266 __fmpz_poly_pseudo_divrem_divconquer(Q, R, d, A, lenA, B, lenB, inv);
267 }
268 else /* lenA > 2 * lenB - 1 */
269 {
270 fmpz *S = _fmpz_vec_init(lenA);
271
272 _fmpz_vec_set(S, A, lenA);
273
274 __fmpz_poly_pseudo_divrem_divconquer(Q, R, d, S, lenA, B, lenB, inv);
275
276 _fmpz_vec_clear(S, lenA);
277 }
278 }
279
280 void
fmpz_poly_pseudo_divrem_divconquer(fmpz_poly_t Q,fmpz_poly_t R,ulong * d,const fmpz_poly_t A,const fmpz_poly_t B)281 fmpz_poly_pseudo_divrem_divconquer(fmpz_poly_t Q, fmpz_poly_t R,
282 ulong * d, const fmpz_poly_t A,
283 const fmpz_poly_t B)
284 {
285 slong lenq, lenr;
286 fmpz *q, *r;
287
288 if (B->length == 0)
289 {
290 flint_printf("Exception (fmpz_poly_pseudo_divrem_divconquer). Division by zero.\n");
291 flint_abort();
292 }
293 if (Q == R)
294 {
295 flint_printf("Exception (fmpz_poly_pseudo_divrem_divconquer). \n"
296 "Output arguments Q and R may not be aliased.\n");
297 flint_abort();
298 }
299 if (A->length < B->length)
300 {
301 fmpz_poly_zero(Q);
302 fmpz_poly_set(R, A);
303 *d = 0;
304 return;
305 }
306
307 lenq = A->length - B->length + 1;
308 lenr = A->length;
309 if (Q == A || Q == B)
310 q = _fmpz_vec_init(lenq);
311 else
312 {
313 fmpz_poly_fit_length(Q, lenq);
314 q = Q->coeffs;
315 }
316 if (R == A || R == B)
317 r = _fmpz_vec_init(lenr);
318 else
319 {
320 fmpz_poly_fit_length(R, lenr);
321 r = R->coeffs;
322 }
323
324 _fmpz_poly_pseudo_divrem_divconquer(q, r, d, A->coeffs, A->length,
325 B->coeffs, B->length, NULL);
326
327 lenr = B->length - 1;
328 FMPZ_VEC_NORM(r, lenr);
329
330 if (Q == A || Q == B)
331 {
332 _fmpz_vec_clear(Q->coeffs, Q->alloc);
333 Q->coeffs = q;
334 Q->alloc = lenq;
335 Q->length = lenq;
336 }
337 else
338 _fmpz_poly_set_length(Q, lenq);
339 if (R == A || R == B)
340 {
341 _fmpz_vec_clear(R->coeffs, R->alloc);
342 R->coeffs = r;
343 R->alloc = A->length;
344 R->length = lenr;
345 }
346 else
347 _fmpz_poly_set_length(R, lenr);
348 }
349
350