1 /* mpn_divrem_1 -- mpn by limb division.
2 
3 Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
4 Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
10 
11   * the GNU Lesser General Public License as published by the Free
12     Software Foundation; either version 3 of the License, or (at your
13     option) any later version.
14 
15 or
16 
17   * the GNU General Public License as published by the Free Software
18     Foundation; either version 2 of the License, or (at your option) any
19     later version.
20 
21 or both in parallel, as here.
22 
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26 for more details.
27 
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library.  If not,
30 see https://www.gnu.org/licenses/.  */
31 
32 #include "gmp-impl.h"
33 #include "longlong.h"
34 
35 
36 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
37    meaning the quotient size where that should happen, the quotient size
38    being how many udiv divisions will be done.
39 
40    The default is to use preinv always, CPUs where this doesn't suit have
41    tuned thresholds.  Note in particular that preinv should certainly be
42    used if that's the only division available (USE_PREINV_ALWAYS).  */
43 
44 #ifndef DIVREM_1_NORM_THRESHOLD
45 #define DIVREM_1_NORM_THRESHOLD  0
46 #endif
47 #ifndef DIVREM_1_UNNORM_THRESHOLD
48 #define DIVREM_1_UNNORM_THRESHOLD  0
49 #endif
50 
51 
52 
53 /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
54    and UNNORM thresholds are 0 and only the inversion code is included.
55 
56    If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
57    will be MP_SIZE_T_MAX and only the plain division code is included.
58 
59    Otherwise mul-by-inverse is better than plain division above some
60    threshold, and best results are obtained by having code for both present.
61 
62    The main reason for separating the norm and unnorm cases is that not all
63    CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
64    code used on an already normalized divisor.
65 
66    If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
67    non-shifting code for both the norm and unnorm cases, though with
68    different criteria for skipping a division, and with different thresholds
69    of course.  And in fact if inversion is never viable, then that simple
70    non-shifting division would be all that's left.
71 
72    The NORM and UNNORM thresholds might not differ much, but if there's
73    going to be separate code for norm and unnorm then it makes sense to have
74    separate thresholds.  One thing that's possible is that the
75    mul-by-inverse might be better only for normalized divisors, due to that
76    case not needing variable bit shifts.
77 
78    Notice that the thresholds are tested after the decision to possibly skip
79    one divide step, so they're based on the actual number of divisions done.
80 
81    For the unnorm case, it would be possible to call mpn_lshift to adjust
82    the dividend all in one go (into the quotient space say), rather than
83    limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
84    than what the compiler can generate for EXTRACT.  But this is left to CPU
85    specific implementations to consider, especially since EXTRACT isn't on
86    the dependent chain.  */
87 
88 mp_limb_t
mpn_divrem_1(mp_ptr qp,mp_size_t qxn,mp_srcptr up,mp_size_t un,mp_limb_t d)89 mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
90 	      mp_srcptr up, mp_size_t un, mp_limb_t d)
91 {
92   mp_size_t  n;
93   mp_size_t  i;
94   mp_limb_t  n1, n0;
95   mp_limb_t  r = 0;
96 
97   ASSERT (qxn >= 0);
98   ASSERT (un >= 0);
99   ASSERT (d != 0);
100   /* FIXME: What's the correct overlap rule when qxn!=0? */
101   ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
102 
103   n = un + qxn;
104   if (n == 0)
105     return 0;
106 
107   d <<= GMP_NAIL_BITS;
108 
109   qp += (n - 1);   /* Make qp point at most significant quotient limb */
110 
111   if ((d & GMP_LIMB_HIGHBIT) != 0)
112     {
113       if (un != 0)
114 	{
115 	  /* High quotient limb is 0 or 1, skip a divide step. */
116 	  mp_limb_t q;
117 	  r = up[un - 1] << GMP_NAIL_BITS;
118 	  q = (r >= d);
119 	  *qp-- = q;
120 	  r -= (d & -q);
121 	  r >>= GMP_NAIL_BITS;
122 	  n--;
123 	  un--;
124 	}
125 
126       if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
127 	{
128 	plain:
129 	  for (i = un - 1; i >= 0; i--)
130 	    {
131 	      n0 = up[i] << GMP_NAIL_BITS;
132 	      udiv_qrnnd (*qp, r, r, n0, d);
133 	      r >>= GMP_NAIL_BITS;
134 	      qp--;
135 	    }
136 	  for (i = qxn - 1; i >= 0; i--)
137 	    {
138 	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
139 	      r >>= GMP_NAIL_BITS;
140 	      qp--;
141 	    }
142 	  return r;
143 	}
144       else
145 	{
146 	  /* Multiply-by-inverse, divisor already normalized. */
147 	  mp_limb_t dinv;
148 	  invert_limb (dinv, d);
149 
150 	  for (i = un - 1; i >= 0; i--)
151 	    {
152 	      n0 = up[i] << GMP_NAIL_BITS;
153 	      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
154 	      r >>= GMP_NAIL_BITS;
155 	      qp--;
156 	    }
157 	  for (i = qxn - 1; i >= 0; i--)
158 	    {
159 	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
160 	      r >>= GMP_NAIL_BITS;
161 	      qp--;
162 	    }
163 	  return r;
164 	}
165     }
166   else
167     {
168       /* Most significant bit of divisor == 0.  */
169       int cnt;
170 
171       /* Skip a division if high < divisor (high quotient 0).  Testing here
172 	 before normalizing will still skip as often as possible.  */
173       if (un != 0)
174 	{
175 	  n1 = up[un - 1] << GMP_NAIL_BITS;
176 	  if (n1 < d)
177 	    {
178 	      r = n1 >> GMP_NAIL_BITS;
179 	      *qp-- = 0;
180 	      n--;
181 	      if (n == 0)
182 		return r;
183 	      un--;
184 	    }
185 	}
186 
187       if (! UDIV_NEEDS_NORMALIZATION
188 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
189 	goto plain;
190 
191       count_leading_zeros (cnt, d);
192       d <<= cnt;
193       r <<= cnt;
194 
195       if (UDIV_NEEDS_NORMALIZATION
196 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
197 	{
198 	  mp_limb_t nshift;
199 	  if (un != 0)
200 	    {
201 	      n1 = up[un - 1] << GMP_NAIL_BITS;
202 	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
203 	      for (i = un - 2; i >= 0; i--)
204 		{
205 		  n0 = up[i] << GMP_NAIL_BITS;
206 		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
207 		  udiv_qrnnd (*qp, r, r, nshift, d);
208 		  r >>= GMP_NAIL_BITS;
209 		  qp--;
210 		  n1 = n0;
211 		}
212 	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
213 	      r >>= GMP_NAIL_BITS;
214 	      qp--;
215 	    }
216 	  for (i = qxn - 1; i >= 0; i--)
217 	    {
218 	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
219 	      r >>= GMP_NAIL_BITS;
220 	      qp--;
221 	    }
222 	  return r >> cnt;
223 	}
224       else
225 	{
226 	  mp_limb_t  dinv, nshift;
227 	  invert_limb (dinv, d);
228 	  if (un != 0)
229 	    {
230 	      n1 = up[un - 1] << GMP_NAIL_BITS;
231 	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
232 	      for (i = un - 2; i >= 0; i--)
233 		{
234 		  n0 = up[i] << GMP_NAIL_BITS;
235 		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
236 		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
237 		  r >>= GMP_NAIL_BITS;
238 		  qp--;
239 		  n1 = n0;
240 		}
241 	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
242 	      r >>= GMP_NAIL_BITS;
243 	      qp--;
244 	    }
245 	  for (i = qxn - 1; i >= 0; i--)
246 	    {
247 	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
248 	      r >>= GMP_NAIL_BITS;
249 	      qp--;
250 	    }
251 	  return r >> cnt;
252 	}
253     }
254 }
255