1 /* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
2 Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
3 Return the single-limb remainder.
4 There are no constraints on the value of the divisor.
5
6 Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009 Free
7 Software Foundation, Inc.
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23
24 #include "gmp.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27
28
29 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
30 meaning the quotient size where that should happen, the quotient size
31 being how many udiv divisions will be done.
32
33 The default is to use preinv always, CPUs where this doesn't suit have
34 tuned thresholds. Note in particular that preinv should certainly be
35 used if that's the only division available (USE_PREINV_ALWAYS). */
36
37 #ifndef MOD_1_NORM_THRESHOLD
38 #define MOD_1_NORM_THRESHOLD 0
39 #endif
40
41 #ifndef MOD_1_UNNORM_THRESHOLD
42 #define MOD_1_UNNORM_THRESHOLD 0
43 #endif
44
45 #ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
46 #define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
47 #endif
48
49 #ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
50 #define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
51 #endif
52
53 #ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
54 #define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
55 #endif
56
57 #ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
58 #define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
59 #endif
60
61
62 /* The comments in mpn/generic/divrem_1.c apply here too.
63
64 As noted in the algorithms section of the manual, the shifts in the loop
65 for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
66 by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can
67 skip a division where (a*2^n)%(d*2^n) can't then there's the same number
68 of divide steps, though how often that happens depends on the assumed
69 distributions of dividend and divisor. In any case this idea is left to
70 CPU specific implementations to consider. */
71
72 static mp_limb_t
mpn_mod_1_unnorm(mp_srcptr up,mp_size_t un,mp_limb_t d)73 mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
74 {
75 mp_size_t i;
76 mp_limb_t n1, n0, r;
77 mp_limb_t dummy;
78 int cnt;
79
80 ASSERT (un > 0);
81 ASSERT (d != 0);
82
83 d <<= GMP_NAIL_BITS;
84
85 /* Skip a division if high < divisor. Having the test here before
86 normalizing will still skip as often as possible. */
87 r = up[un - 1] << GMP_NAIL_BITS;
88 if (r < d)
89 {
90 r >>= GMP_NAIL_BITS;
91 un--;
92 if (un == 0)
93 return r;
94 }
95 else
96 r = 0;
97
98 /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
99 code above. */
100 if (! UDIV_NEEDS_NORMALIZATION
101 && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
102 {
103 for (i = un - 1; i >= 0; i--)
104 {
105 n0 = up[i] << GMP_NAIL_BITS;
106 udiv_qrnnd (dummy, r, r, n0, d);
107 r >>= GMP_NAIL_BITS;
108 }
109 return r;
110 }
111
112 count_leading_zeros (cnt, d);
113 d <<= cnt;
114
115 n1 = up[un - 1] << GMP_NAIL_BITS;
116 r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
117
118 if (UDIV_NEEDS_NORMALIZATION
119 && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
120 {
121 for (i = un - 2; i >= 0; i--)
122 {
123 n0 = up[i] << GMP_NAIL_BITS;
124 udiv_qrnnd (dummy, r, r,
125 (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
126 d);
127 r >>= GMP_NAIL_BITS;
128 n1 = n0;
129 }
130 udiv_qrnnd (dummy, r, r, n1 << cnt, d);
131 r >>= GMP_NAIL_BITS;
132 return r >> cnt;
133 }
134 else
135 {
136 mp_limb_t inv;
137 invert_limb (inv, d);
138
139 for (i = un - 2; i >= 0; i--)
140 {
141 n0 = up[i] << GMP_NAIL_BITS;
142 udiv_qrnnd_preinv (dummy, r, r,
143 (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
144 d, inv);
145 r >>= GMP_NAIL_BITS;
146 n1 = n0;
147 }
148 udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv);
149 r >>= GMP_NAIL_BITS;
150 return r >> cnt;
151 }
152 }
153
154 static mp_limb_t
mpn_mod_1_norm(mp_srcptr up,mp_size_t un,mp_limb_t d)155 mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
156 {
157 mp_size_t i;
158 mp_limb_t n0, r;
159 mp_limb_t dummy;
160
161 ASSERT (un > 0);
162
163 d <<= GMP_NAIL_BITS;
164
165 ASSERT (d & GMP_LIMB_HIGHBIT);
166
167 /* High limb is initial remainder, possibly with one subtract of
168 d to get r<d. */
169 r = up[un - 1] << GMP_NAIL_BITS;
170 if (r >= d)
171 r -= d;
172 r >>= GMP_NAIL_BITS;
173 un--;
174 if (un == 0)
175 return r;
176
177 if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
178 {
179 for (i = un - 1; i >= 0; i--)
180 {
181 n0 = up[i] << GMP_NAIL_BITS;
182 udiv_qrnnd (dummy, r, r, n0, d);
183 r >>= GMP_NAIL_BITS;
184 }
185 return r;
186 }
187 else
188 {
189 mp_limb_t inv;
190 invert_limb (inv, d);
191 for (i = un - 1; i >= 0; i--)
192 {
193 n0 = up[i] << GMP_NAIL_BITS;
194 udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
195 r >>= GMP_NAIL_BITS;
196 }
197 return r;
198 }
199 }
200
201 mp_limb_t
mpn_mod_1(mp_srcptr ap,mp_size_t n,mp_limb_t b)202 mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
203 {
204 ASSERT (n >= 0);
205 ASSERT (b != 0);
206
207 /* Should this be handled at all? Rely on callers? Note un==0 is currently
208 required by mpz/fdiv_r_ui.c and possibly other places. */
209 if (n == 0)
210 return 0;
211
212 if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
213 {
214 if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
215 {
216 return mpn_mod_1_norm (ap, n, b);
217 }
218 else
219 {
220 mp_limb_t pre[4];
221 mpn_mod_1_1p_cps (pre, b);
222 return mpn_mod_1_1p (ap, n, b, pre);
223 }
224 }
225 else
226 {
227 if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
228 {
229 return mpn_mod_1_unnorm (ap, n, b);
230 }
231 else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
232 {
233 mp_limb_t pre[4];
234 mpn_mod_1_1p_cps (pre, b);
235 return mpn_mod_1_1p (ap, n, b << pre[1], pre);
236 }
237 else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
238 {
239 mp_limb_t pre[5];
240 mpn_mod_1s_2p_cps (pre, b);
241 return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
242 }
243 else
244 {
245 mp_limb_t pre[7];
246 mpn_mod_1s_4p_cps (pre, b);
247 return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
248 }
249 }
250 }
251