1 /*
2 Copyright (C) 2011 William Hart
3 Copyright (C) 2011 Sebastian Pancratz
4 Copyright (C) 2014 William Hart
5
6 This file is part of FLINT.
7
8 FLINT is free software: you can redistribute it and/or modify it under
9 the terms of the GNU Lesser General Public License (LGPL) as published
10 by the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version. See <https://www.gnu.org/licenses/>.
12 */
13
14 #include <stdlib.h>
15 #include <gmp.h>
16 #include "flint.h"
17 #include "fmpz_vec.h"
18 #include "fmpz_mod_poly.h"
19 #include "mpn_extras.h"
20
21 /*
22 We define a whole bunch of macros here which essentially provide
23 the nmod_poly functionality as far as the setting of coefficient
24 data and lengths is concerned, but which do not do any separate
25 memory allocation. None of these macros support aliasing.
26 */
27
28 #define __set(B, lenB, A, lenA) \
29 do { \
30 _fmpz_vec_set((B), (A), (lenA)); \
31 (lenB) = (lenA); \
32 } while (0)
33
34 #define __add(C, lenC, A, lenA, B, lenB) \
35 do { \
36 _fmpz_mod_poly_add((C), (A), (lenA), (B), (lenB), mod); \
37 (lenC) = FLINT_MAX((lenA), (lenB)); \
38 FMPZ_VEC_NORM((C), (lenC)); \
39 } while (0)
40
41 #define __sub(C, lenC, A, lenA, B, lenB) \
42 do { \
43 _fmpz_mod_poly_sub((C), (A), (lenA), (B), (lenB), mod); \
44 (lenC) = FLINT_MAX((lenA), (lenB)); \
45 FMPZ_VEC_NORM((C), (lenC)); \
46 } while (0)
47
48 #define __mul(C, lenC, A, lenA, B, lenB) \
49 do { \
50 if ((lenA) != 0 && (lenB) != 0) \
51 { \
52 if ((lenA) >= (lenB)) \
53 _fmpz_mod_poly_mul((C), (A), (lenA), (B), (lenB), mod); \
54 else \
55 _fmpz_mod_poly_mul((C), (B), (lenB), (A), (lenA), mod); \
56 (lenC) = (lenA) + (lenB) - 1; \
57 } \
58 else \
59 { \
60 (lenC) = 0; \
61 } \
62 } while (0)
63
64 #define __divrem(Q, lenQ, R, lenR, A, lenA, B, lenB) \
65 do { \
66 if ((lenA) >= (lenB)) \
67 { \
68 fmpz_invmod(invB, B + lenB - 1, mod); \
69 _fmpz_mod_poly_divrem((Q), (R), (A), (lenA), (B), (lenB), invB, mod); \
70 (lenQ) = (lenA) - (lenB) + 1; \
71 (lenR) = (lenB) - 1; \
72 FMPZ_VEC_NORM((R), (lenR)); \
73 } \
74 else \
75 { \
76 _fmpz_vec_set((R), (A), (lenA)); \
77 (lenQ) = 0; \
78 (lenR) = (lenA); \
79 } \
80 } while (0)
81
82 #define __div(Q, lenQ, A, lenA, B, lenB) \
83 do { \
84 if ((lenA) >= (lenB)) \
85 { \
86 fmpz * __t = _fmpz_vec_init(lenB - 1); \
87 fmpz_invmod(invB, B + lenB - 1, mod); \
88 _fmpz_mod_poly_divrem((Q), __t, (A), (lenA), (B), (lenB), invB, mod); \
89 _fmpz_vec_clear(__t, lenB - 1); \
90 (lenQ) = (lenA) - (lenB) + 1; \
91 } \
92 else \
93 { \
94 (lenQ) = 0; \
95 } \
96 } while (0)
97
_fmpz_mod_poly_xgcd_hgcd(fmpz * G,fmpz * S,fmpz * T,const fmpz * A,slong lenA,const fmpz * B,slong lenB,const fmpz_t mod)98 slong _fmpz_mod_poly_xgcd_hgcd(fmpz *G, fmpz *S, fmpz *T,
99 const fmpz *A, slong lenA, const fmpz *B, slong lenB,
100 const fmpz_t mod)
101 {
102 slong lenG, lenS, lenT;
103
104 if (lenB == 1)
105 {
106 fmpz_set(G + 0, B + 0);
107 fmpz_set_ui(T, 1);
108 lenG = 1;
109 lenS = 0;
110 lenT = 1;
111 }
112 else
113 {
114 slong lenq, lenr, len1 = lenA + lenB;
115
116 fmpz *q = _fmpz_vec_init(len1);
117 fmpz *r = q + lenA;
118 fmpz_t invB;
119
120 fmpz_init(invB);
121
122 __divrem(q, lenq, r, lenr, A, lenA, B, lenB);
123
124 if (lenr == 0)
125 {
126 __set(G, lenG, B, lenB);
127 fmpz_set_ui(T + 0, 1);
128 lenS = 0;
129 lenT = 1;
130 }
131 else
132 {
133 fmpz *h, *j, *v, *w, *R[4], *X;
134 slong lenh, lenj, lenv, lenw, lenR[4], len2;
135 int sgnR;
136
137 lenh = lenj = lenB;
138 lenv = lenw = lenA + lenB - 2;
139 lenR[0] = lenR[1] = lenR[2] = lenR[3] = (lenB + 1) / 2;
140
141 len2 = 2 * lenh + 2 * lenv + 4 * lenR[0];
142 X = _fmpz_vec_init(len2);
143 h = X;
144 j = h + lenh;
145 v = j + lenj;
146 w = v + lenv;
147 R[0] = w + lenw;
148 R[1] = R[0] + lenR[0];
149 R[2] = R[1] + lenR[1];
150 R[3] = R[2] + lenR[2];
151
152 sgnR = _fmpz_mod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, B, lenB, r, lenr, mod);
153
154 if (sgnR > 0)
155 {
156 _fmpz_mod_poly_neg(S, R[1], lenR[1], mod);
157 _fmpz_vec_set(T, R[0], lenR[0]);
158 }
159 else
160 {
161 _fmpz_vec_set(S, R[1], lenR[1]);
162 _fmpz_mod_poly_neg(T, R[0], lenR[0], mod);
163 }
164 lenS = lenR[1];
165 lenT = lenR[0];
166
167 while (lenj != 0)
168 {
169 __divrem(q, lenq, r, lenr, h, lenh, j, lenj);
170
171 __mul(v, lenv, q, lenq, T, lenT);
172 {
173 slong l;
174 _fmpz_vec_swap(S, T, FLINT_MAX(lenS, lenT));
175 l = lenS; lenS = lenT; lenT = l;
176 }
177 if (lenr != 0) /* prevent overflow of T on last iteration */
178 __sub(T, lenT, T, lenT, v, lenv);
179 else /* lenr == 0 */
180 {
181 __set(G, lenG, j, lenj);
182
183 goto cofactor;
184 }
185 if (lenj < FMPZ_MOD_POLY_GCD_CUTOFF)
186 {
187 fmpz *u0 = R[0], *u1 = R[1];
188 slong lenu0 = lenr - 1, lenu1 = lenj - 1;
189
190 fmpz_invmod(invB, r + lenr - 1, mod);
191 lenG = _fmpz_mod_poly_xgcd_euclidean(G, u0, u1, j, lenj, r, lenr, invB, mod);
192 FMPZ_VEC_NORM(u0, lenu0);
193 FMPZ_VEC_NORM(u1, lenu1);
194
195 __mul(v, lenv, S, lenS, u0, lenu0);
196 __mul(w, lenw, T, lenT, u1, lenu1);
197 __add(S, lenS, v, lenv, w, lenw);
198
199 goto cofactor;
200 }
201
202 sgnR = _fmpz_mod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, j,lenj, r, lenr, mod);
203
204 __mul(v, lenv, R[1], lenR[1], T, lenT);
205 __mul(w, lenw, R[2], lenR[2], S, lenS);
206
207 __mul(q, lenq, S, lenS, R[3], lenR[3]);
208 if (sgnR > 0)
209 __sub(S, lenS, q, lenq, v, lenv);
210 else
211 __sub(S, lenS, v, lenv, q, lenq);
212
213 __mul(q, lenq, T, lenT, R[0], lenR[0]);
214 if (sgnR > WORD(0))
215 __sub(T, lenT, q, lenq, w, lenw);
216 else
217 __sub(T, lenT, w, lenw, q, lenq);
218 }
219 __set(G, lenG, h, lenh);
220
221 cofactor:
222
223 __mul(v, lenv, S, lenS, A, lenA);
224 __sub(w, lenw, G, lenG, v, lenv);
225 __div(T, lenT, w, lenw, B, lenB);
226
227 _fmpz_vec_clear(X, len2);
228 }
229
230 _fmpz_vec_clear(q, len1);
231
232 fmpz_clear(invB);
233 }
234
235 _fmpz_vec_zero(S + lenS, lenB - 1 - lenS);
236 _fmpz_vec_zero(T + lenT, lenA - 1 - lenT);
237
238 return lenG;
239 }
240
fmpz_mod_poly_xgcd_hgcd(fmpz_mod_poly_t G,fmpz_mod_poly_t S,fmpz_mod_poly_t T,const fmpz_mod_poly_t A,const fmpz_mod_poly_t B,const fmpz_mod_ctx_t ctx)241 void fmpz_mod_poly_xgcd_hgcd(fmpz_mod_poly_t G, fmpz_mod_poly_t S,
242 fmpz_mod_poly_t T, const fmpz_mod_poly_t A,
243 const fmpz_mod_poly_t B, const fmpz_mod_ctx_t ctx)
244 {
245 if (A->length < B->length)
246 {
247 fmpz_mod_poly_xgcd_hgcd(G, T, S, B, A, ctx);
248 }
249 else /* lenA >= lenB >= 0 */
250 {
251 const fmpz * p = fmpz_mod_ctx_modulus(ctx);
252 const slong lenA = A->length, lenB = B->length;
253 slong lenS, lenT;
254 fmpz_t inv;
255
256 fmpz_init(inv);
257
258 if (lenA == 0) /* lenA = lenB = 0 */
259 {
260 fmpz_mod_poly_zero(G, ctx);
261 fmpz_mod_poly_zero(S, ctx);
262 fmpz_mod_poly_zero(T, ctx);
263 }
264 else if (lenB == 0) /* lenA > lenB = 0 */
265 {
266 fmpz_invmod(inv, A->coeffs + lenA - 1, p);
267 fmpz_mod_poly_scalar_mul_fmpz(G, A, inv, ctx);
268 fmpz_mod_poly_zero(T, ctx);
269 fmpz_mod_poly_set_coeff_fmpz(S, 0, inv, ctx);
270 _fmpz_mod_poly_set_length(S, 1);
271 }
272 else if (lenB == 1) /* lenA >= lenB = 1 */
273 {
274 fmpz_mod_poly_fit_length(T, 1, ctx);
275 _fmpz_mod_poly_set_length(T, 1);
276 fmpz_invmod(inv, B->coeffs + 0, p);
277 fmpz_set(T->coeffs + 0, inv);
278 fmpz_mod_poly_set_coeff_ui(G, 0, 1, ctx);
279 _fmpz_mod_poly_set_length(G, 1);
280 fmpz_mod_poly_zero(S, ctx);
281 }
282 else /* lenA >= lenB >= 2 */
283 {
284 fmpz *g, *s, *t;
285 slong lenG;
286
287 if (G == A || G == B)
288 {
289 g = _fmpz_vec_init(FLINT_MIN(lenA, lenB));
290 }
291 else
292 {
293 fmpz_mod_poly_fit_length(G, FLINT_MIN(lenA, lenB), ctx);
294 g = G->coeffs;
295 }
296 if (S == A || S == B)
297 {
298 s = _fmpz_vec_init(FLINT_MAX(lenB - 1, 2));
299 }
300 else
301 {
302 fmpz_mod_poly_fit_length(S, FLINT_MAX(lenB - 1, 2), ctx);
303 s = S->coeffs;
304 }
305 if (T == A || T == B)
306 {
307 t = _fmpz_vec_init(FLINT_MAX(lenA - 1, 2));
308 }
309 else
310 {
311 fmpz_mod_poly_fit_length(T, FLINT_MAX(lenA - 1, 2), ctx);
312 t = T->coeffs;
313 }
314
315 if (lenA >= lenB)
316 lenG = _fmpz_mod_poly_xgcd_hgcd(g, s, t, A->coeffs, lenA,
317 B->coeffs, lenB, p);
318 else
319 lenG = _fmpz_mod_poly_xgcd_hgcd(g, t, s, B->coeffs, lenB,
320 A->coeffs, lenA, p);
321
322 if (G == A || G == B)
323 {
324 _fmpz_vec_clear(G->coeffs, FLINT_MIN(lenA, lenB));
325 G->coeffs = g;
326 G->alloc = FLINT_MIN(lenA, lenB);
327 }
328 if (S == A || S == B)
329 {
330 _fmpz_vec_clear(S->coeffs, FLINT_MAX(lenB - 1, 2));
331 S->coeffs = s;
332 S->alloc = FLINT_MAX(lenB - 1, 2);
333 }
334 if (T == A || T == B)
335 {
336 _fmpz_vec_clear(T->coeffs, FLINT_MAX(lenA - 1, 2));
337 T->coeffs = t;
338 T->alloc = FLINT_MAX(lenA - 1, 2);
339 }
340
341 _fmpz_mod_poly_set_length(G, lenG);
342 lenS = FLINT_MAX(lenB - lenG, 1);
343 lenT = FLINT_MAX(lenA - lenG, 1);
344 FMPZ_VEC_NORM(S->coeffs, lenS);
345 FMPZ_VEC_NORM(T->coeffs, lenT);
346
347 _fmpz_mod_poly_set_length(S, lenS);
348 _fmpz_mod_poly_set_length(T, lenT);
349
350 if (!fmpz_is_one(G->coeffs + lenG - 1))
351 {
352 fmpz_invmod(inv, G->coeffs + lenG - 1, p);
353 fmpz_mod_poly_scalar_mul_fmpz(G, G, inv, ctx);
354 fmpz_mod_poly_scalar_mul_fmpz(S, S, inv, ctx);
355 fmpz_mod_poly_scalar_mul_fmpz(T, T, inv, ctx);
356 }
357 }
358
359 fmpz_clear(inv);
360 }
361 }
362
363 #undef __set
364 #undef __add
365 #undef __sub
366 #undef __mul
367 #undef __divrem
368 #undef __div
369
370