xref: /dragonfly/contrib/gmp/mpn/generic/divrem_1.c (revision 37de577a)
1 /* mpn_divrem_1 -- mpn by limb division.
2 
3 Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software
4 Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20 
21 #include "gmp.h"
22 #include "gmp-impl.h"
23 #include "longlong.h"
24 
25 
26 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
27    meaning the quotient size where that should happen, the quotient size
28    being how many udiv divisions will be done.
29 
30    The default is to use preinv always, CPUs where this doesn't suit have
31    tuned thresholds.  Note in particular that preinv should certainly be
32    used if that's the only division available (USE_PREINV_ALWAYS).  */
33 
34 #ifndef DIVREM_1_NORM_THRESHOLD
35 #define DIVREM_1_NORM_THRESHOLD  0
36 #endif
37 #ifndef DIVREM_1_UNNORM_THRESHOLD
38 #define DIVREM_1_UNNORM_THRESHOLD  0
39 #endif
40 
41 
42 
43 /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
44    and UNNORM thresholds are 0 and only the inversion code is included.
45 
46    If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
47    will be MP_SIZE_T_MAX and only the plain division code is included.
48 
49    Otherwise mul-by-inverse is better than plain division above some
50    threshold, and best results are obtained by having code for both present.
51 
52    The main reason for separating the norm and unnorm cases is that not all
53    CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
54    code used on an already normalized divisor.
55 
56    If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
57    non-shifting code for both the norm and unnorm cases, though with
58    different criteria for skipping a division, and with different thresholds
59    of course.  And in fact if inversion is never viable, then that simple
60    non-shifting division would be all that's left.
61 
62    The NORM and UNNORM thresholds might not differ much, but if there's
63    going to be separate code for norm and unnorm then it makes sense to have
64    separate thresholds.  One thing that's possible is that the
65    mul-by-inverse might be better only for normalized divisors, due to that
66    case not needing variable bit shifts.
67 
68    Notice that the thresholds are tested after the decision to possibly skip
69    one divide step, so they're based on the actual number of divisions done.
70 
71    For the unnorm case, it would be possible to call mpn_lshift to adjust
72    the dividend all in one go (into the quotient space say), rather than
73    limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
74    than what the compiler can generate for EXTRACT.  But this is left to CPU
75    specific implementations to consider, especially since EXTRACT isn't on
76    the dependent chain.  */
77 
78 mp_limb_t
79 mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
80 	      mp_srcptr up, mp_size_t un, mp_limb_t d)
81 {
82   mp_size_t  n;
83   mp_size_t  i;
84   mp_limb_t  n1, n0;
85   mp_limb_t  r = 0;
86 
87   ASSERT (qxn >= 0);
88   ASSERT (un >= 0);
89   ASSERT (d != 0);
90   /* FIXME: What's the correct overlap rule when qxn!=0? */
91   ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
92 
93   n = un + qxn;
94   if (n == 0)
95     return 0;
96 
97   d <<= GMP_NAIL_BITS;
98 
99   qp += (n - 1);   /* Make qp point at most significant quotient limb */
100 
101   if ((d & GMP_LIMB_HIGHBIT) != 0)
102     {
103       if (un != 0)
104 	{
105 	  /* High quotient limb is 0 or 1, skip a divide step. */
106 	  mp_limb_t q;
107 	  r = up[un - 1] << GMP_NAIL_BITS;
108 	  q = (r >= d);
109 	  *qp-- = q;
110 	  r -= (d & -q);
111 	  r >>= GMP_NAIL_BITS;
112 	  n--;
113 	  un--;
114 	}
115 
116       if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
117 	{
118 	plain:
119 	  for (i = un - 1; i >= 0; i--)
120 	    {
121 	      n0 = up[i] << GMP_NAIL_BITS;
122 	      udiv_qrnnd (*qp, r, r, n0, d);
123 	      r >>= GMP_NAIL_BITS;
124 	      qp--;
125 	    }
126 	  for (i = qxn - 1; i >= 0; i--)
127 	    {
128 	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
129 	      r >>= GMP_NAIL_BITS;
130 	      qp--;
131 	    }
132 	  return r;
133 	}
134       else
135 	{
136 	  /* Multiply-by-inverse, divisor already normalized. */
137 	  mp_limb_t dinv;
138 	  invert_limb (dinv, d);
139 
140 	  for (i = un - 1; i >= 0; i--)
141 	    {
142 	      n0 = up[i] << GMP_NAIL_BITS;
143 	      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
144 	      r >>= GMP_NAIL_BITS;
145 	      qp--;
146 	    }
147 	  for (i = qxn - 1; i >= 0; i--)
148 	    {
149 	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
150 	      r >>= GMP_NAIL_BITS;
151 	      qp--;
152 	    }
153 	  return r;
154 	}
155     }
156   else
157     {
158       /* Most significant bit of divisor == 0.  */
159       int norm;
160 
161       /* Skip a division if high < divisor (high quotient 0).  Testing here
162 	 before normalizing will still skip as often as possible.  */
163       if (un != 0)
164 	{
165 	  n1 = up[un - 1] << GMP_NAIL_BITS;
166 	  if (n1 < d)
167 	    {
168 	      r = n1 >> GMP_NAIL_BITS;
169 	      *qp-- = 0;
170 	      n--;
171 	      if (n == 0)
172 		return r;
173 	      un--;
174 	    }
175 	}
176 
177       if (! UDIV_NEEDS_NORMALIZATION
178 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
179 	goto plain;
180 
181       count_leading_zeros (norm, d);
182       d <<= norm;
183       r <<= norm;
184 
185       if (UDIV_NEEDS_NORMALIZATION
186 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
187 	{
188 	  if (un != 0)
189 	    {
190 	      n1 = up[un - 1] << GMP_NAIL_BITS;
191 	      r |= (n1 >> (GMP_LIMB_BITS - norm));
192 	      for (i = un - 2; i >= 0; i--)
193 		{
194 		  n0 = up[i] << GMP_NAIL_BITS;
195 		  udiv_qrnnd (*qp, r, r,
196 			      (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
197 			      d);
198 		  r >>= GMP_NAIL_BITS;
199 		  qp--;
200 		  n1 = n0;
201 		}
202 	      udiv_qrnnd (*qp, r, r, n1 << norm, d);
203 	      r >>= GMP_NAIL_BITS;
204 	      qp--;
205 	    }
206 	  for (i = qxn - 1; i >= 0; i--)
207 	    {
208 	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
209 	      r >>= GMP_NAIL_BITS;
210 	      qp--;
211 	    }
212 	  return r >> norm;
213 	}
214       else
215 	{
216 	  mp_limb_t  dinv;
217 	  invert_limb (dinv, d);
218 	  if (un != 0)
219 	    {
220 	      n1 = up[un - 1] << GMP_NAIL_BITS;
221 	      r |= (n1 >> (GMP_LIMB_BITS - norm));
222 	      for (i = un - 2; i >= 0; i--)
223 		{
224 		  n0 = up[i] << GMP_NAIL_BITS;
225 		  udiv_qrnnd_preinv (*qp, r, r,
226 				     ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
227 				     d, dinv);
228 		  r >>= GMP_NAIL_BITS;
229 		  qp--;
230 		  n1 = n0;
231 		}
232 	      udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
233 	      r >>= GMP_NAIL_BITS;
234 	      qp--;
235 	    }
236 	  for (i = qxn - 1; i >= 0; i--)
237 	    {
238 	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
239 	      r >>= GMP_NAIL_BITS;
240 	      qp--;
241 	    }
242 	  return r >> norm;
243 	}
244     }
245 }
246