1 /*
2     Copyright (C) 2020 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
10 */
11 
12 #include "n_poly.h"
13 
14 /* hold positive and negative powers of b */
n_fq_pow_cache_start_n_fq(const mp_limb_t * b,n_poly_t pos,n_poly_t bin,n_poly_t neg,const fq_nmod_ctx_t ctx)15 void n_fq_pow_cache_start_n_fq(
16     const mp_limb_t * b,
17     n_poly_t pos,       /* b^0, b^1, b^2, ..., b^50 */
18     n_poly_t bin,       /* b^1, b^2, b^3,  b^4, b^8, b^12, ... */
19     n_poly_t neg,       /* b^-0, b^-1, b^-2, ..., b^-50 */
20     const fq_nmod_ctx_t ctx)
21 {
22     slong d = fq_nmod_ctx_degree(ctx);
23     n_poly_fit_length(pos, d*2);
24     pos->length = 2;
25     _n_fq_one(pos->coeffs + d*0, d);
26     _n_fq_set(pos->coeffs + d*1, b, d);
27     bin->length = 0;
28     neg->length = 0;
29 }
30 
n_fq_pow_cache_start_fq_nmod(const fq_nmod_t b,n_poly_t pos,n_poly_t bin,n_poly_t neg,const fq_nmod_ctx_t ctx)31 void n_fq_pow_cache_start_fq_nmod(
32     const fq_nmod_t b,
33     n_poly_t pos,
34     n_poly_t bin,
35     n_poly_t neg,
36     const fq_nmod_ctx_t ctx)
37 {
38     slong d = fq_nmod_ctx_degree(ctx);
39     n_poly_fit_length(pos, d*2);
40     pos->length = 2;
41     _n_fq_one(pos->coeffs + d*0, d);
42     n_fq_set_fq_nmod(pos->coeffs + d*1, b, ctx);
43     bin->length = 0;
44     neg->length = 0;
45 }
46 
47 /* r = a*b^e */
n_fq_pow_cache_mulpow_ui_array_bin(mp_limb_t * r,const mp_limb_t * a,mp_limb_t * elimbs,slong elen,n_poly_t bin,const mp_limb_t * b,const fq_nmod_ctx_t ctx,mp_limb_t * tmp)48 static void n_fq_pow_cache_mulpow_ui_array_bin(
49     mp_limb_t * r,
50     const mp_limb_t * a,
51     mp_limb_t * elimbs, slong elen,
52     n_poly_t bin,
53     const mp_limb_t * b,
54     const fq_nmod_ctx_t ctx,
55     mp_limb_t * tmp)    /* size d*N_FQ_MUL_ITCH */
56 {
57     slong d = fq_nmod_ctx_degree(ctx);
58     const mp_limb_t * s = a; /* source */
59     slong ei = 0, i = 0;
60     mp_limb_t e = (ei < elen) ? elimbs[ei] : 0;
61     int bits_left = FLINT_BITS;
62 
63     /* complicated code needed if an odd number of bits per limb */
64     FLINT_ASSERT((FLINT_BITS % 2) == 0);
65 
66     if (bin->length < 3)
67     {
68         n_poly_fit_length(bin, 3*d);
69         bin->length = 3;
70         _n_fq_set(bin->coeffs + d*0, b, d);
71         _n_fq_mul(bin->coeffs + d*1, bin->coeffs + d*0,
72                                      bin->coeffs + d*0, ctx, tmp);
73         _n_fq_mul(bin->coeffs + d*2, bin->coeffs + d*1,
74                                      bin->coeffs + d*0, ctx, tmp);
75     }
76 
77     while (ei < elen)
78     {
79         FLINT_ASSERT(i <= bin->length);
80 
81         if (i + 3 > bin->length)
82         {
83             FLINT_ASSERT(i >= 3);
84 
85             n_poly_fit_length(bin, d*(bin->length + 3));
86             bin->length += 3;
87 
88             _n_fq_mul(bin->coeffs + d*(i + 0), bin->coeffs + d*(i - 2),
89                                             bin->coeffs + d*(i - 2), ctx, tmp);
90 
91             _n_fq_mul(bin->coeffs + d*(i + 1), bin->coeffs + d*(i + 0),
92                                             bin->coeffs + d*(i + 0), ctx, tmp);
93 
94             _n_fq_mul(bin->coeffs + d*(i + 2), bin->coeffs + d*(i + 1),
95                                             bin->coeffs + d*(i + 0), ctx, tmp);
96         }
97 
98         if ((e%4) != 0)
99         {
100             _n_fq_mul(r, s, bin->coeffs + d*(i + (e%4) - 1), ctx, tmp);
101             s = r;
102         }
103 
104         i += 3;
105 
106         e = e/4;
107 
108         if (ei + 1 < elen)
109         {
110             bits_left -= 2;
111             if (bits_left <= 0)
112             {
113                 ei++;
114                 e = elimbs[ei];
115                 bits_left = FLINT_BITS;
116             }
117         }
118         else
119         {
120             if (e == 0)
121                 break;
122         }
123     }
124 
125     if (s != r)
126         _n_fq_set(r, s, d);
127 }
128 
129 /* r =  a*b^e */
n_fq_pow_cache_mulpow_ui(mp_limb_t * r,const mp_limb_t * a,ulong e,n_poly_t pos,n_poly_t bin,n_poly_t neg,const fq_nmod_ctx_t ctx)130 void n_fq_pow_cache_mulpow_ui(
131     mp_limb_t * r,
132     const mp_limb_t * a,
133     ulong e,
134     n_poly_t pos,
135     n_poly_t bin,
136     n_poly_t neg,
137     const fq_nmod_ctx_t ctx)
138 {
139     slong d = fq_nmod_ctx_degree(ctx);
140     slong i = pos->length;
141     int a_in_fp = _n_fq_is_ui(a, d);
142 
143     FLINT_ASSERT(i >= 2);
144 
145     if (a[0] == 0 && a_in_fp)
146     {
147         _n_fq_zero(r, d);
148         return;
149     }
150 
151     if (e < 50)
152     {
153         n_poly_fit_length(pos, d*(FLINT_MAX(e + 1, i) + N_FQ_MUL_ITCH));
154         while (i <= e)
155         {
156             FLINT_ASSERT(d*(i + 1 + N_FQ_MUL_ITCH) <= pos->alloc);
157             _n_fq_mul(pos->coeffs + d*i, pos->coeffs + d*1,
158                          pos->coeffs + d*(i - 1), ctx, pos->coeffs + d*(i + 1));
159             i++;
160             pos->length = i;
161         }
162 
163         if (a_in_fp)
164             _nmod_vec_scalar_mul_nmod(r, pos->coeffs + d*e, d, a[0], ctx->mod);
165         else
166             _n_fq_mul(r, a, pos->coeffs + d*e, ctx, pos->coeffs + d*i);
167         return;
168     }
169 
170     if (_n_fq_is_zero(pos->coeffs + d*1, d))
171     {
172         _n_fq_zero(r, d);
173         return;
174     }
175 
176     n_poly_fit_length(pos, d*(i + N_FQ_MUL_ITCH));
177     n_fq_pow_cache_mulpow_ui_array_bin(r, a, &e, 1, bin, pos->coeffs + d*1,
178                                                        ctx, pos->coeffs + d*i);
179 }
180 
181 /* r =  a*b^-e */
n_fq_pow_cache_mulpow_neg_ui(mp_limb_t * r,const mp_limb_t * a,ulong e,n_poly_t pos,n_poly_t bin,n_poly_t neg,const fq_nmod_ctx_t ctx)182 void n_fq_pow_cache_mulpow_neg_ui(
183     mp_limb_t * r,
184     const mp_limb_t * a,
185     ulong e,
186     n_poly_t pos,
187     n_poly_t bin,
188     n_poly_t neg,
189     const fq_nmod_ctx_t ctx)
190 {
191     slong i, d = fq_nmod_ctx_degree(ctx);
192     mp_limb_t * tmp;
193     fmpz_t f;
194 
195     FLINT_ASSERT(pos->length >= 2);
196 
197     if (_n_fq_is_zero(pos->coeffs + d*1, d))
198     {
199         if (e > 0)
200             _n_fq_zero(r, d);
201         else
202             _n_fq_set(r, a, d);
203         return;
204     }
205 
206     if (e < 50)
207     {
208         n_poly_fit_length(pos, d*(pos->length
209                                        + FLINT_MAX(N_FQ_MUL_ITCH, N_FQ_INV_ITCH)));
210         tmp = pos->coeffs + d*pos->length;
211 
212         if (neg->length < 2)
213         {
214             n_poly_fit_length(neg, 2*d);
215             neg->length = 2;
216             _n_fq_one(neg->coeffs + d*0, d);
217             _n_fq_inv(neg->coeffs + d*1, pos->coeffs + d*1, ctx, tmp);
218         }
219 
220         i = neg->length;
221 
222         n_poly_fit_length(neg, d*(e + 1));
223         while (i <= e)
224         {
225             _n_fq_mul(neg->coeffs + d*i, neg->coeffs + d*1,
226                                      neg->coeffs + d*(i - 1), ctx, tmp);
227             i++;
228             neg->length = i;
229         }
230 
231         _n_fq_mul(r, a, neg->coeffs + d*e, ctx, tmp);
232         return;
233     }
234 
235     fmpz_init(f);
236     fmpz_neg_ui(f, e);
237     n_fq_pow_cache_mulpow_fmpz(r, a, f, pos, bin, neg, ctx);
238     fmpz_clear(f);
239 }
240 
241 /* r = a*b^-e */
n_fq_pow_cache_mulpow_fmpz(mp_limb_t * r,const mp_limb_t * a,const fmpz_t e,n_poly_t pos,n_poly_t bin,n_poly_t neg,const fq_nmod_ctx_t ctx)242 void n_fq_pow_cache_mulpow_fmpz(
243     mp_limb_t * r,
244     const mp_limb_t * a,
245     const fmpz_t e,
246     n_poly_t pos,
247     n_poly_t bin,
248     n_poly_t neg,
249     const fq_nmod_ctx_t ctx)
250 {
251     slong d = fq_nmod_ctx_degree(ctx);
252     fmpz_t t;
253 
254     FLINT_ASSERT(pos->length >= 2);
255 
256     if (!COEFF_IS_MPZ(*e) && *e >= 0)
257     {
258         n_fq_pow_cache_mulpow_ui(r, a, *e, pos, bin, neg, ctx);
259         return;
260     }
261 
262     if (_n_fq_is_zero(pos->coeffs + d*1, d))
263     {
264         if (!fmpz_is_zero(e))
265             _n_fq_zero(r, d);
266         else
267             _n_fq_set(r, a, d);
268         return;
269     }
270 
271     fmpz_init(t);
272     fq_nmod_ctx_order(t, ctx);
273     fmpz_sub_ui(t, t, 1);
274     fmpz_mod(t, e, t);
275 
276     n_poly_fit_length(pos, d*(pos->length + N_FQ_MUL_ITCH));
277 
278     if (COEFF_IS_MPZ(*t))
279         n_fq_pow_cache_mulpow_ui_array_bin(r, a,
280                      COEFF_TO_PTR(*t)->_mp_d, COEFF_TO_PTR(*t)->_mp_size, bin,
281                           pos->coeffs + d*1, ctx, pos->coeffs + d*pos->length);
282     else
283         n_fq_pow_cache_mulpow_ui(r, a, *t, pos, bin, neg, ctx);
284 
285     fmpz_clear(t);
286 }
287