1 /*
2     Copyright (C) 2008, 2009, William Hart
3     Copyright (C) 2010 Fredrik Johansson
4     Copyright (C) 2021 Daniel Schultz
5 
6     This file is part of FLINT.
7 
8     FLINT is free software: you can redistribute it and/or modify it under
9     the terms of the GNU Lesser General Public License (LGPL) as published
10     by the Free Software Foundation; either version 2.1 of the License, or
11     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
12 */
13 
14 #include <gmp.h>
15 #include "flint.h"
16 #include "ulong_extras.h"
17 #include "mpn_extras.h"
18 #include "fmpz.h"
19 #include "nmod_vec.h"
20 
21 #define MAC(h, l, a, b)                 \
22 do {                                    \
23     mp_limb_t p1, p0;                   \
24     umul_ppmm(p1, p0, a, b);            \
25     add_ssaaaa(h, l, h, l, p1, p0);     \
26 } while (0)
27 
28 
fmpz_multi_CRT_ui(fmpz_t b,mp_srcptr in,const fmpz_comb_t C,fmpz_comb_temp_t CT,int sign)29 void fmpz_multi_CRT_ui(
30     fmpz_t b,
31     mp_srcptr in,
32     const fmpz_comb_t C,
33     fmpz_comb_temp_t CT,
34     int sign)
35 {
36     slong i, j, k, l, s;
37     slong klen = C->crt_klen;
38     slong * step = C->step;
39     crt_lut_entry * lu = C->crt_lu;
40     fmpz * T = CT->T;
41     fmpz * A = CT->A;
42     slong * offsets = C->crt_offsets;
43     const mp_limb_t * md = C->packed_multipliers;
44     mpz_ptr az;
45     mp_limb_t * ad;
46     mp_limb_t hi, lo, t;
47 
48     for (k = 0, i = 0, l = 0; k < klen; k++)
49     {
50         s = step[k];
51         j = offsets[k];
52         az = _fmpz_promote(A + k);
53 
54         if (s < 0)
55         {
56             /*
57                 every low level combination in this chunk has 1 prime
58                 and md already has lu[i].i0 pre-multiplied in.
59             */
60             s = -s - 1;
61 
62             ad = FLINT_MPZ_REALLOC(az, s + 2);
63 
64             flint_mpn_zero(ad, s + 2);
65             hi = lo = 0;
66 
67             for ( ; i < j; md += s, l++, i++)
68             {
69                 FLINT_ASSERT(lu[i].i0 != 0);
70                 FLINT_ASSERT(lu[i].i1 == 0);
71                 FLINT_ASSERT(lu[i].i2 == 0);
72 
73                 t = mpn_addmul_1(ad, md, s, in[l*1]);
74                 add_ssaaaa(hi, lo, hi, lo, UWORD(0), t);
75             }
76 
77             ad[s] = lo;
78             ad[s + 1] = hi;
79         }
80         else
81         {
82             ad = FLINT_MPZ_REALLOC(az, s + 2);
83 
84             flint_mpn_zero(ad, s + 2);
85 
86             for ( ; i < j; md += s, i++)
87             {
88                 /* low level combination: 1, 2, or 3 small primes */
89                 FLINT_ASSERT(l + 1 <= C->num_primes);
90                 umul_ppmm(hi, lo, in[l*1], lu[i].i0); l++;
91 
92                 if (lu[i].i2 != 0)
93                 {
94                     FLINT_ASSERT(l + 2 <= C->num_primes);
95                     MAC(hi, lo, in[l*1], lu[i].i1); l++;
96                     MAC(hi, lo, in[l*1], lu[i].i2); l++;
97                 /*
98                     We have lu[i].mod.n = p0*p1*p2, and each lu[i].i{0|1|2} is
99                     strictly less than p1*p2*p3, and the inputs are reduced mod pi.
100                     Therefore, the sum is at most (p0*p1*p2-1)*(p0-1+p1-1+p2-1).
101                     Since p0*p1*p2 fits into a word, the sum fits into two words
102                     and the hi word is less than p0*p1*p2.
103                 */
104                 }
105                 else if (lu[i].i1 != 0)
106                 {
107                     FLINT_ASSERT(l + 1 <= C->num_primes);
108                     MAC(hi, lo, in[l*1], lu[i].i1); l++;
109                     /* Ditto for two */
110                 }
111 
112                 FLINT_ASSERT(hi < lu[i].mod.n);
113                 NMOD_RED2(t, hi, lo, lu[i].mod);
114 
115                 /* mid level combination: depends on FMPZ_CRT_UI_CUTOFF */
116                 hi = mpn_addmul_1(ad, md, s, t);
117                 add_ssaaaa(ad[s + 1], ad[s], ad[s + 1], ad[s], UWORD(0), hi);
118             }
119         }
120 
121         s += 2;
122 
123         MPN_NORM(ad, s);
124         az->_mp_size = s;
125         _fmpz_demote_val(A + k);
126 
127         _fmpz_smod(A + k, A + k, C->crt_P->moduli + k, sign, T + 0);
128     }
129 
130     FLINT_ASSERT(l == C->num_primes);
131 
132     /* high level combination */
133     fmpz_swap(T + 0, b);
134     _fmpz_multi_CRT_precomp(T, C->crt_P, A, sign);
135     fmpz_swap(T + 0, b);
136 }
137 
138