1 /*
2 Copyright (C) 2008, 2009, William Hart
3 Copyright (C) 2010 Fredrik Johansson
4 Copyright (C) 2021 Daniel Schultz
5
6 This file is part of FLINT.
7
8 FLINT is free software: you can redistribute it and/or modify it under
9 the terms of the GNU Lesser General Public License (LGPL) as published
10 by the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version. See <https://www.gnu.org/licenses/>.
12 */
13
14 #include <gmp.h>
15 #include "flint.h"
16 #include "ulong_extras.h"
17 #include "mpn_extras.h"
18 #include "fmpz.h"
19 #include "nmod_vec.h"
20
21 #define MAC(h, l, a, b) \
22 do { \
23 mp_limb_t p1, p0; \
24 umul_ppmm(p1, p0, a, b); \
25 add_ssaaaa(h, l, h, l, p1, p0); \
26 } while (0)
27
28
fmpz_multi_CRT_ui(fmpz_t b,mp_srcptr in,const fmpz_comb_t C,fmpz_comb_temp_t CT,int sign)29 void fmpz_multi_CRT_ui(
30 fmpz_t b,
31 mp_srcptr in,
32 const fmpz_comb_t C,
33 fmpz_comb_temp_t CT,
34 int sign)
35 {
36 slong i, j, k, l, s;
37 slong klen = C->crt_klen;
38 slong * step = C->step;
39 crt_lut_entry * lu = C->crt_lu;
40 fmpz * T = CT->T;
41 fmpz * A = CT->A;
42 slong * offsets = C->crt_offsets;
43 const mp_limb_t * md = C->packed_multipliers;
44 mpz_ptr az;
45 mp_limb_t * ad;
46 mp_limb_t hi, lo, t;
47
48 for (k = 0, i = 0, l = 0; k < klen; k++)
49 {
50 s = step[k];
51 j = offsets[k];
52 az = _fmpz_promote(A + k);
53
54 if (s < 0)
55 {
56 /*
57 every low level combination in this chunk has 1 prime
58 and md already has lu[i].i0 pre-multiplied in.
59 */
60 s = -s - 1;
61
62 ad = FLINT_MPZ_REALLOC(az, s + 2);
63
64 flint_mpn_zero(ad, s + 2);
65 hi = lo = 0;
66
67 for ( ; i < j; md += s, l++, i++)
68 {
69 FLINT_ASSERT(lu[i].i0 != 0);
70 FLINT_ASSERT(lu[i].i1 == 0);
71 FLINT_ASSERT(lu[i].i2 == 0);
72
73 t = mpn_addmul_1(ad, md, s, in[l*1]);
74 add_ssaaaa(hi, lo, hi, lo, UWORD(0), t);
75 }
76
77 ad[s] = lo;
78 ad[s + 1] = hi;
79 }
80 else
81 {
82 ad = FLINT_MPZ_REALLOC(az, s + 2);
83
84 flint_mpn_zero(ad, s + 2);
85
86 for ( ; i < j; md += s, i++)
87 {
88 /* low level combination: 1, 2, or 3 small primes */
89 FLINT_ASSERT(l + 1 <= C->num_primes);
90 umul_ppmm(hi, lo, in[l*1], lu[i].i0); l++;
91
92 if (lu[i].i2 != 0)
93 {
94 FLINT_ASSERT(l + 2 <= C->num_primes);
95 MAC(hi, lo, in[l*1], lu[i].i1); l++;
96 MAC(hi, lo, in[l*1], lu[i].i2); l++;
97 /*
98 We have lu[i].mod.n = p0*p1*p2, and each lu[i].i{0|1|2} is
99 strictly less than p1*p2*p3, and the inputs are reduced mod pi.
100 Therefore, the sum is at most (p0*p1*p2-1)*(p0-1+p1-1+p2-1).
101 Since p0*p1*p2 fits into a word, the sum fits into two words
102 and the hi word is less than p0*p1*p2.
103 */
104 }
105 else if (lu[i].i1 != 0)
106 {
107 FLINT_ASSERT(l + 1 <= C->num_primes);
108 MAC(hi, lo, in[l*1], lu[i].i1); l++;
109 /* Ditto for two */
110 }
111
112 FLINT_ASSERT(hi < lu[i].mod.n);
113 NMOD_RED2(t, hi, lo, lu[i].mod);
114
115 /* mid level combination: depends on FMPZ_CRT_UI_CUTOFF */
116 hi = mpn_addmul_1(ad, md, s, t);
117 add_ssaaaa(ad[s + 1], ad[s], ad[s + 1], ad[s], UWORD(0), hi);
118 }
119 }
120
121 s += 2;
122
123 MPN_NORM(ad, s);
124 az->_mp_size = s;
125 _fmpz_demote_val(A + k);
126
127 _fmpz_smod(A + k, A + k, C->crt_P->moduli + k, sign, T + 0);
128 }
129
130 FLINT_ASSERT(l == C->num_primes);
131
132 /* high level combination */
133 fmpz_swap(T + 0, b);
134 _fmpz_multi_CRT_precomp(T, C->crt_P, A, sign);
135 fmpz_swap(T + 0, b);
136 }
137
138