1 /* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
2    Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
3    Return the single-limb remainder.
4    There are no constraints on the value of the divisor.
5 
6 Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012 Free Software
7 Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13 
14   * the GNU Lesser General Public License as published by the Free
15     Software Foundation; either version 3 of the License, or (at your
16     option) any later version.
17 
18 or
19 
20   * the GNU General Public License as published by the Free Software
21     Foundation; either version 2 of the License, or (at your option) any
22     later version.
23 
24 or both in parallel, as here.
25 
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29 for more details.
30 
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library.  If not,
33 see https://www.gnu.org/licenses/.  */
34 
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 #include "longlong.h"
38 
39 
40 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
41    meaning the quotient size where that should happen, the quotient size
42    being how many udiv divisions will be done.
43 
44    The default is to use preinv always, CPUs where this doesn't suit have
45    tuned thresholds.  Note in particular that preinv should certainly be
46    used if that's the only division available (USE_PREINV_ALWAYS).  */
47 
48 #ifndef MOD_1_NORM_THRESHOLD
49 #define MOD_1_NORM_THRESHOLD  0
50 #endif
51 
52 #ifndef MOD_1_UNNORM_THRESHOLD
53 #define MOD_1_UNNORM_THRESHOLD  0
54 #endif
55 
56 #ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
57 #define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
58 #endif
59 
60 #ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
61 #define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
62 #endif
63 
64 #ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
65 #define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
66 #endif
67 
68 #ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
69 #define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
70 #endif
71 
72 #if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
73 /* Duplicates declarations in tune/speed.h */
74 mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
75 mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
76 
77 void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
78 void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
79 
80 #undef mpn_mod_1_1p
81 #define mpn_mod_1_1p(ap, n, b, pre)			     \
82   (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre)     \
83    : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre)  \
84       : __gmpn_mod_1_1p (ap, n, b, pre)))
85 
86 #undef mpn_mod_1_1p_cps
87 #define mpn_mod_1_1p_cps(pre, b)				\
88   (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b)		\
89    : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b)	\
90       : __gmpn_mod_1_1p_cps (pre, b)))
91 #endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
92 
93 
94 /* The comments in mpn/generic/divrem_1.c apply here too.
95 
96    As noted in the algorithms section of the manual, the shifts in the loop
97    for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
98    by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can
99    skip a division where (a*2^n)%(d*2^n) can't then there's the same number
100    of divide steps, though how often that happens depends on the assumed
101    distributions of dividend and divisor.  In any case this idea is left to
102    CPU specific implementations to consider.  */
103 
104 static mp_limb_t
mpn_mod_1_unnorm(mp_srcptr up,mp_size_t un,mp_limb_t d)105 mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
106 {
107   mp_size_t  i;
108   mp_limb_t  n1, n0, r;
109   mp_limb_t  dummy;
110   int cnt;
111 
112   ASSERT (un > 0);
113   ASSERT (d != 0);
114 
115   d <<= GMP_NAIL_BITS;
116 
117   /* Skip a division if high < divisor.  Having the test here before
118      normalizing will still skip as often as possible.  */
119   r = up[un - 1] << GMP_NAIL_BITS;
120   if (r < d)
121     {
122       r >>= GMP_NAIL_BITS;
123       un--;
124       if (un == 0)
125 	return r;
126     }
127   else
128     r = 0;
129 
130   /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
131      code above. */
132   if (! UDIV_NEEDS_NORMALIZATION
133       && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
134     {
135       for (i = un - 1; i >= 0; i--)
136 	{
137 	  n0 = up[i] << GMP_NAIL_BITS;
138 	  udiv_qrnnd (dummy, r, r, n0, d);
139 	  r >>= GMP_NAIL_BITS;
140 	}
141       return r;
142     }
143 
144   count_leading_zeros (cnt, d);
145   d <<= cnt;
146 
147   n1 = up[un - 1] << GMP_NAIL_BITS;
148   r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
149 
150   if (UDIV_NEEDS_NORMALIZATION
151       && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
152     {
153       mp_limb_t nshift;
154       for (i = un - 2; i >= 0; i--)
155 	{
156 	  n0 = up[i] << GMP_NAIL_BITS;
157 	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
158 	  udiv_qrnnd (dummy, r, r, nshift, d);
159 	  r >>= GMP_NAIL_BITS;
160 	  n1 = n0;
161 	}
162       udiv_qrnnd (dummy, r, r, n1 << cnt, d);
163       r >>= GMP_NAIL_BITS;
164       return r >> cnt;
165     }
166   else
167     {
168       mp_limb_t inv, nshift;
169       invert_limb (inv, d);
170 
171       for (i = un - 2; i >= 0; i--)
172 	{
173 	  n0 = up[i] << GMP_NAIL_BITS;
174 	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
175 	  udiv_rnnd_preinv (r, r, nshift, d, inv);
176 	  r >>= GMP_NAIL_BITS;
177 	  n1 = n0;
178 	}
179       udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
180       r >>= GMP_NAIL_BITS;
181       return r >> cnt;
182     }
183 }
184 
185 static mp_limb_t
mpn_mod_1_norm(mp_srcptr up,mp_size_t un,mp_limb_t d)186 mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
187 {
188   mp_size_t  i;
189   mp_limb_t  n0, r;
190   mp_limb_t  dummy;
191 
192   ASSERT (un > 0);
193 
194   d <<= GMP_NAIL_BITS;
195 
196   ASSERT (d & GMP_LIMB_HIGHBIT);
197 
198   /* High limb is initial remainder, possibly with one subtract of
199      d to get r<d.  */
200   r = up[un - 1] << GMP_NAIL_BITS;
201   if (r >= d)
202     r -= d;
203   r >>= GMP_NAIL_BITS;
204   un--;
205   if (un == 0)
206     return r;
207 
208   if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
209     {
210       for (i = un - 1; i >= 0; i--)
211 	{
212 	  n0 = up[i] << GMP_NAIL_BITS;
213 	  udiv_qrnnd (dummy, r, r, n0, d);
214 	  r >>= GMP_NAIL_BITS;
215 	}
216       return r;
217     }
218   else
219     {
220       mp_limb_t  inv;
221       invert_limb (inv, d);
222       for (i = un - 1; i >= 0; i--)
223 	{
224 	  n0 = up[i] << GMP_NAIL_BITS;
225 	  udiv_rnnd_preinv (r, r, n0, d, inv);
226 	  r >>= GMP_NAIL_BITS;
227 	}
228       return r;
229     }
230 }
231 
232 mp_limb_t
mpn_mod_1(mp_srcptr ap,mp_size_t n,mp_limb_t b)233 mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
234 {
235   ASSERT (n >= 0);
236   ASSERT (b != 0);
237 
238   /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
239      required by mpz/fdiv_r_ui.c and possibly other places.  */
240   if (n == 0)
241     return 0;
242 
243   if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
244     {
245       if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
246 	{
247 	  return mpn_mod_1_norm (ap, n, b);
248 	}
249       else
250 	{
251 	  mp_limb_t pre[4];
252 	  mpn_mod_1_1p_cps (pre, b);
253 	  return mpn_mod_1_1p (ap, n, b, pre);
254 	}
255     }
256   else
257     {
258       if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
259 	{
260 	  return mpn_mod_1_unnorm (ap, n, b);
261 	}
262       else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
263 	{
264 	  mp_limb_t pre[4];
265 	  mpn_mod_1_1p_cps (pre, b);
266 	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
267 	}
268       else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
269 	{
270 	  mp_limb_t pre[5];
271 	  mpn_mod_1s_2p_cps (pre, b);
272 	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
273 	}
274       else
275 	{
276 	  mp_limb_t pre[7];
277 	  mpn_mod_1s_4p_cps (pre, b);
278 	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
279 	}
280     }
281 }
282