1 /*
2     Copyright (C) 2019 Daniel Schultz
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "fq_nmod_mpoly.h"
13 
_fq_nmod_mpoly_divrem_monagan_pearce(slong * lenr,fq_nmod_struct ** polyq,ulong ** expq,slong * allocq,fq_nmod_struct ** polyr,ulong ** expr,slong * allocr,const fq_nmod_struct * coeff2,const ulong * exp2,slong len2,const fq_nmod_struct * coeff3,const ulong * exp3,slong len3,slong bits,slong N,const ulong * cmpmask,const fq_nmod_ctx_t fqctx)14 slong _fq_nmod_mpoly_divrem_monagan_pearce(slong * lenr,
15                   fq_nmod_struct ** polyq,      ulong ** expq, slong * allocq,
16                   fq_nmod_struct ** polyr,      ulong ** expr, slong * allocr,
17             const fq_nmod_struct * coeff2, const ulong * exp2, slong len2,
18             const fq_nmod_struct * coeff3, const ulong * exp3, slong len3,
19          slong bits, slong N, const ulong * cmpmask, const fq_nmod_ctx_t fqctx)
20 {
21     slong i, j, q_len, r_len, s;
22     slong next_loc;
23     slong heap_len = 2; /* heap zero index unused */
24     mpoly_heap_s * heap;
25     mpoly_heap_t * chain;
26     slong * store, * store_base;
27     mpoly_heap_t * x;
28     fq_nmod_struct * q_coeff = *polyq;
29     fq_nmod_struct * r_coeff = *polyr;
30     ulong * q_exp = *expq;
31     ulong * r_exp = *expr;
32     ulong * exp, * exps;
33     ulong ** exp_list;
34     slong exp_next;
35     ulong mask;
36     slong * hind;
37     int lt_divides;
38     fq_nmod_t lc_minus_inv, pp;
39     TMP_INIT;
40 
41     TMP_START;
42     fq_nmod_init(pp, fqctx);
43     fq_nmod_init(lc_minus_inv, fqctx);
44 
45     /* alloc array of heap nodes which can be chained together */
46     next_loc = len3 + 4;   /* something bigger than heap can ever be */
47     heap = (mpoly_heap_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap_s));
48     chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
49     store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(mpoly_heap_t *));
50 
51     /* array of exponent vectors, each of "N" words */
52     exps = (ulong *) TMP_ALLOC(len3*N*sizeof(ulong));
53     /* list of pointers to available exponent vectors */
54     exp_list = (ulong **) TMP_ALLOC(len3*sizeof(ulong *));
55     /* space to save copy of current exponent vector */
56     exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
57     /* set up list of available exponent vectors */
58     exp_next = 0;
59     for (i = 0; i < len3; i++)
60         exp_list[i] = exps + i*N;
61 
62     /* space for flagged heap indicies */
63     hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
64     for (i = 0; i < len3; i++)
65         hind[i] = 1;
66 
67     /* mask with high bit set in each word of each field of exponent vector */
68     mask = 0;
69     for (i = 0; i < FLINT_BITS/bits; i++)
70         mask = (mask << bits) + (UWORD(1) << (bits - 1));
71 
72     q_len = WORD(0);
73     r_len = WORD(0);
74 
75     /* s is the number of terms * (latest quotient) we should put into heap */
76     s = len3;
77 
78     /* insert (-1, 0, exp2[0]) into heap */
79     x = chain + 0;
80     x->i = -WORD(1);
81     x->j = 0;
82     x->next = NULL;
83     heap[1].next = x;
84     heap[1].exp = exp_list[exp_next++];
85     mpoly_monomial_set(heap[1].exp, exp2, N);
86 
87     /* precompute leading cofficient info */
88     fq_nmod_inv(lc_minus_inv, coeff3 + 0, fqctx);
89     fq_nmod_neg(lc_minus_inv, lc_minus_inv, fqctx);
90 
91     while (heap_len > 1)
92     {
93         mpoly_monomial_set(exp, heap[1].exp, N);
94 
95         if (bits <= FLINT_BITS)
96         {
97             if (mpoly_monomial_overflows(exp, N, mask))
98                 goto exp_overflow2;
99         }
100         else
101         {
102             if (mpoly_monomial_overflows_mp(exp, N, bits))
103                 goto exp_overflow2;
104         }
105 
106         _fq_nmod_mpoly_fit_length(&q_coeff, &q_exp, allocq, q_len + 1, N, fqctx);
107 
108         if (bits <= FLINT_BITS)
109             lt_divides = mpoly_monomial_divides(q_exp + q_len*N, exp, exp3, N, mask);
110         else
111             lt_divides = mpoly_monomial_divides_mp(q_exp + q_len*N, exp, exp3, N, bits);
112 
113         fq_nmod_zero(q_coeff + q_len, fqctx);
114         do
115         {
116             exp_list[--exp_next] = heap[1].exp;
117             x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
118             do
119             {
120                 *store++ = x->i;
121                 *store++ = x->j;
122                 if (x->i != -WORD(1))
123                     hind[x->i] |= WORD(1);
124 
125                 if (x->i == -WORD(1))
126                 {
127                     fq_nmod_sub(q_coeff + q_len, q_coeff + q_len, coeff2 + x->j, fqctx);
128                 }
129                 else
130                 {
131                     fq_nmod_mul(pp, coeff3 + x->i, q_coeff + x->j, fqctx);
132                     fq_nmod_add(q_coeff + q_len, q_coeff + q_len, pp, fqctx);
133                 }
134             } while ((x = x->next) != NULL);
135         } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
136 
137         /* process nodes taken from the heap */
138         while (store > store_base)
139         {
140             j = *--store;
141             i = *--store;
142 
143             if (i == -WORD(1))
144             {
145                 /* take next dividend term */
146                 if (j + 1 < len2)
147                 {
148                     x = chain + 0;
149                     x->i = i;
150                     x->j = j + 1;
151                     x->next = NULL;
152                     mpoly_monomial_set(exp_list[exp_next], exp2 + x->j*N, N);
153                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
154                                              &next_loc, &heap_len, N, cmpmask);
155                 }
156             }
157             else
158             {
159                 /* should we go right? */
160                 if (  (i + 1 < len3)
161                    && (hind[i + 1] == 2*j + 1)
162                    )
163                 {
164                     x = chain + i + 1;
165                     x->i = i + 1;
166                     x->j = j;
167                     x->next = NULL;
168                     hind[x->i] = 2*(x->j + 1) + 0;
169                     mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
170                                                              q_exp + x->j*N, N);
171                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
172                                              &next_loc, &heap_len, N, cmpmask);
173                 }
174                 /* should we go up? */
175                 if (j + 1 == q_len)
176                 {
177                     s++;
178                 }
179                 else if (  ((hind[i] & 1) == 1)
180                           && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))  )
181                 {
182                     x = chain + i;
183                     x->i = i;
184                     x->j = j + 1;
185                     x->next = NULL;
186                     hind[x->i] = 2*(x->j + 1) + 0;
187                     mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
188                                                              q_exp + x->j*N, N);
189                     exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
190                                              &next_loc, &heap_len, N, cmpmask);
191                 }
192             }
193         }
194 
195         /* try to divide accumulated term by leading term */
196         if (fq_nmod_is_zero(q_coeff + q_len, fqctx))
197         {
198             continue;
199         }
200         if (!lt_divides)
201         {
202             _fq_nmod_mpoly_fit_length(&r_coeff, &r_exp, allocr, r_len + 1, N, fqctx);
203             fq_nmod_neg(r_coeff + r_len, q_coeff + q_len, fqctx);
204             mpoly_monomial_set(r_exp + r_len*N, exp, N);
205             r_len++;
206             continue;
207         }
208 
209         fq_nmod_mul(q_coeff + q_len, q_coeff + q_len, lc_minus_inv, fqctx);
210 
211         /* put newly generated quotient term back into the heap if neccesary */
212         if (s > 1)
213         {
214             i = 1;
215             x = chain + i;
216             x->i = i;
217             x->j = q_len;
218             x->next = NULL;
219             hind[x->i] = 2*(x->j + 1) + 0;
220             mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
221                                                      q_exp + x->j*N, N);
222             exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
223                                              &next_loc, &heap_len, N, cmpmask);
224         }
225         s = 1;
226         q_len++;
227     }
228 
229 
230 cleanup2:
231 
232     (*polyq) = q_coeff;
233     (*expq) = q_exp;
234     (*polyr) = r_coeff;
235     (*expr) = r_exp;
236 
237     /* set remainder poly length */
238     (*lenr) = r_len;
239 
240     TMP_END;
241     fq_nmod_clear(pp, fqctx);
242     fq_nmod_clear(lc_minus_inv, fqctx);
243 
244     /* return quotient poly length */
245     return q_len;
246 
247 exp_overflow2:
248     q_len = 0;
249     r_len = 0;
250     goto cleanup2;
251 }
252 
fq_nmod_mpoly_divrem_monagan_pearce(fq_nmod_mpoly_t q,fq_nmod_mpoly_t r,const fq_nmod_mpoly_t poly2,const fq_nmod_mpoly_t poly3,const fq_nmod_mpoly_ctx_t ctx)253 void fq_nmod_mpoly_divrem_monagan_pearce(fq_nmod_mpoly_t q, fq_nmod_mpoly_t r,
254                       const fq_nmod_mpoly_t poly2, const fq_nmod_mpoly_t poly3,
255                                                  const fq_nmod_mpoly_ctx_t ctx)
256 {
257     slong exp_bits, N, lenq = 0, lenr = 0;
258     ulong * exp2 = poly2->exps, * exp3 = poly3->exps;
259     ulong * cmpmask;
260     int free2 = 0, free3 = 0;
261     fq_nmod_mpoly_t temp1, temp2;
262     fq_nmod_mpoly_struct * tq, * tr;
263     TMP_INIT;
264 
265     if (poly3->length == 0)
266     {
267         flint_throw(FLINT_DIVZERO, "Divide by zero in nmod_mpoly_divrem_monagan_pearce");
268     }
269 
270     if (poly2->length == 0)
271     {
272         fq_nmod_mpoly_zero(q, ctx);
273         fq_nmod_mpoly_zero(r, ctx);
274         return;
275     }
276 
277     TMP_START;
278 
279     exp_bits = FLINT_MAX(poly2->bits, poly3->bits);
280     exp_bits = mpoly_fix_bits(exp_bits, ctx->minfo);
281 
282     N = mpoly_words_per_exp(exp_bits, ctx->minfo);
283     cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
284     mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
285 
286     /* ensure input exponents packed to same size as output exponents */
287     if (exp_bits > poly2->bits)
288     {
289         free2 = 1;
290         exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
291         mpoly_repack_monomials(exp2, exp_bits, poly2->exps, poly2->bits,
292                                                     poly2->length, ctx->minfo);
293     }
294 
295     if (exp_bits > poly3->bits)
296     {
297         free3 = 1;
298         exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
299         mpoly_repack_monomials(exp3, exp_bits, poly3->exps, poly3->bits,
300                                                     poly3->length, ctx->minfo);
301     }
302 
303     /* check divisor leading monomial is at most that of the dividend */
304     if (mpoly_monomial_lt(exp2, exp3, N, cmpmask))
305     {
306         fq_nmod_mpoly_set(r, poly2, ctx);
307         fq_nmod_mpoly_zero(q, ctx);
308         goto cleanup3;
309     }
310 
311     /* take care of aliasing */
312     if (q == poly2 || q == poly3)
313     {
314         fq_nmod_mpoly_init2(temp1, poly2->length/poly3->length + 1,                                                                          ctx);
315         fq_nmod_mpoly_fit_bits(temp1, exp_bits, ctx);
316         temp1->bits = exp_bits;
317         tq = temp1;
318     }
319     else
320     {
321         fq_nmod_mpoly_fit_length(q, poly2->length/poly3->length + 1, ctx);
322         fq_nmod_mpoly_fit_bits(q, exp_bits, ctx);
323         q->bits = exp_bits;
324         tq = q;
325     }
326 
327     if (r == poly2 || r == poly3)
328     {
329         fq_nmod_mpoly_init2(temp2, poly3->length, ctx);
330         fq_nmod_mpoly_fit_bits(temp2, exp_bits, ctx);
331         temp2->bits = exp_bits;
332         tr = temp2;
333     }
334     else
335     {
336         fq_nmod_mpoly_fit_length(r, poly3->length, ctx);
337         fq_nmod_mpoly_fit_bits(r, exp_bits, ctx);
338         r->bits = exp_bits;
339         tr = r;
340     }
341 
342     /* do division with remainder */
343     while ((lenq = _fq_nmod_mpoly_divrem_monagan_pearce(&lenr, &tq->coeffs, &tq->exps,
344                       &tq->alloc, &tr->coeffs, &tr->exps, &tr->alloc, poly2->coeffs, exp2,
345                        poly2->length, poly3->coeffs, exp3, poly3->length, exp_bits,
346                                          N, cmpmask, ctx->fqctx)) == 0
347           && lenr == 0)
348    {
349         ulong * old_exp2 = exp2, * old_exp3 = exp3;
350         slong old_exp_bits = exp_bits;
351 
352         exp_bits = mpoly_fix_bits(exp_bits + 1, ctx->minfo);
353 
354         N = mpoly_words_per_exp(exp_bits, ctx->minfo);
355         cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
356         mpoly_get_cmpmask(cmpmask, N, exp_bits, ctx->minfo);
357 
358         exp2 = (ulong *) flint_malloc(N*poly2->length*sizeof(ulong));
359         mpoly_repack_monomials(exp2, exp_bits, old_exp2, old_exp_bits,
360                                                     poly2->length, ctx->minfo);
361 
362         exp3 = (ulong *) flint_malloc(N*poly3->length*sizeof(ulong));
363         mpoly_repack_monomials(exp3, exp_bits, old_exp3, old_exp_bits,
364                                                     poly3->length, ctx->minfo);
365 
366         if (free2)
367             flint_free(old_exp2);
368 
369         if (free3)
370             flint_free(old_exp3);
371 
372         free2 = free3 = 1;
373 
374         fq_nmod_mpoly_fit_bits(tq, exp_bits, ctx);
375         tq->bits = exp_bits;
376 
377         fq_nmod_mpoly_fit_bits(tr, exp_bits, ctx);
378         tr->bits = exp_bits;
379     }
380 
381     /* deal with aliasing */
382     if (q == poly2 || q == poly3)
383     {
384         fq_nmod_mpoly_swap(temp1, q, ctx);
385         fq_nmod_mpoly_clear(temp1, ctx);
386     }
387 
388     if (r == poly2 || r == poly3)
389     {
390         fq_nmod_mpoly_swap(temp2, r, ctx);
391         fq_nmod_mpoly_clear(temp2, ctx);
392     }
393 
394     _fq_nmod_mpoly_set_length(q, lenq, ctx);
395     _fq_nmod_mpoly_set_length(r, lenr, ctx);
396 
397 cleanup3:
398 
399     if (free2)
400         flint_free(exp2);
401 
402     if (free3)
403         flint_free(exp3);
404 
405     TMP_END;
406 }
407