1 /* mpn_divrem_1 -- mpn by limb division.
2
3 Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
4 Foundation, Inc.
5
6 This file is part of the GNU MP Library.
7
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
10
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
14
15 or
16
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
19 later version.
20
21 or both in parallel, as here.
22
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 for more details.
27
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library. If not,
30 see https://www.gnu.org/licenses/. */
31
32 #include "gmp-impl.h"
33 #include "longlong.h"
34
35
36 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
37 meaning the quotient size where that should happen, the quotient size
38 being how many udiv divisions will be done.
39
40 The default is to use preinv always, CPUs where this doesn't suit have
41 tuned thresholds. Note in particular that preinv should certainly be
42 used if that's the only division available (USE_PREINV_ALWAYS). */
43
44 #ifndef DIVREM_1_NORM_THRESHOLD
45 #define DIVREM_1_NORM_THRESHOLD 0
46 #endif
47 #ifndef DIVREM_1_UNNORM_THRESHOLD
48 #define DIVREM_1_UNNORM_THRESHOLD 0
49 #endif
50
51
52
53 /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
54 and UNNORM thresholds are 0 and only the inversion code is included.
55
56 If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
57 will be MP_SIZE_T_MAX and only the plain division code is included.
58
59 Otherwise mul-by-inverse is better than plain division above some
60 threshold, and best results are obtained by having code for both present.
61
62 The main reason for separating the norm and unnorm cases is that not all
63 CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
64 code used on an already normalized divisor.
65
66 If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
67 non-shifting code for both the norm and unnorm cases, though with
68 different criteria for skipping a division, and with different thresholds
69 of course. And in fact if inversion is never viable, then that simple
70 non-shifting division would be all that's left.
71
72 The NORM and UNNORM thresholds might not differ much, but if there's
73 going to be separate code for norm and unnorm then it makes sense to have
74 separate thresholds. One thing that's possible is that the
75 mul-by-inverse might be better only for normalized divisors, due to that
76 case not needing variable bit shifts.
77
78 Notice that the thresholds are tested after the decision to possibly skip
79 one divide step, so they're based on the actual number of divisions done.
80
81 For the unnorm case, it would be possible to call mpn_lshift to adjust
82 the dividend all in one go (into the quotient space say), rather than
83 limb-by-limb in the loop. This might help if mpn_lshift is a lot faster
84 than what the compiler can generate for EXTRACT. But this is left to CPU
85 specific implementations to consider, especially since EXTRACT isn't on
86 the dependent chain. */
87
88 mp_limb_t
mpn_divrem_1(mp_ptr qp,mp_size_t qxn,mp_srcptr up,mp_size_t un,mp_limb_t d)89 mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
90 mp_srcptr up, mp_size_t un, mp_limb_t d)
91 {
92 mp_size_t n;
93 mp_size_t i;
94 mp_limb_t n1, n0;
95 mp_limb_t r = 0;
96
97 ASSERT (qxn >= 0);
98 ASSERT (un >= 0);
99 ASSERT (d != 0);
100 /* FIXME: What's the correct overlap rule when qxn!=0? */
101 ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
102
103 n = un + qxn;
104 if (n == 0)
105 return 0;
106
107 d <<= GMP_NAIL_BITS;
108
109 qp += (n - 1); /* Make qp point at most significant quotient limb */
110
111 if ((d & GMP_LIMB_HIGHBIT) != 0)
112 {
113 if (un != 0)
114 {
115 /* High quotient limb is 0 or 1, skip a divide step. */
116 mp_limb_t q;
117 r = up[un - 1] << GMP_NAIL_BITS;
118 q = (r >= d);
119 *qp-- = q;
120 r -= (d & -q);
121 r >>= GMP_NAIL_BITS;
122 n--;
123 un--;
124 }
125
126 if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
127 {
128 plain:
129 for (i = un - 1; i >= 0; i--)
130 {
131 n0 = up[i] << GMP_NAIL_BITS;
132 udiv_qrnnd (*qp, r, r, n0, d);
133 r >>= GMP_NAIL_BITS;
134 qp--;
135 }
136 for (i = qxn - 1; i >= 0; i--)
137 {
138 udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
139 r >>= GMP_NAIL_BITS;
140 qp--;
141 }
142 return r;
143 }
144 else
145 {
146 /* Multiply-by-inverse, divisor already normalized. */
147 mp_limb_t dinv;
148 invert_limb (dinv, d);
149
150 for (i = un - 1; i >= 0; i--)
151 {
152 n0 = up[i] << GMP_NAIL_BITS;
153 udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
154 r >>= GMP_NAIL_BITS;
155 qp--;
156 }
157 for (i = qxn - 1; i >= 0; i--)
158 {
159 udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
160 r >>= GMP_NAIL_BITS;
161 qp--;
162 }
163 return r;
164 }
165 }
166 else
167 {
168 /* Most significant bit of divisor == 0. */
169 int cnt;
170
171 /* Skip a division if high < divisor (high quotient 0). Testing here
172 before normalizing will still skip as often as possible. */
173 if (un != 0)
174 {
175 n1 = up[un - 1] << GMP_NAIL_BITS;
176 if (n1 < d)
177 {
178 r = n1 >> GMP_NAIL_BITS;
179 *qp-- = 0;
180 n--;
181 if (n == 0)
182 return r;
183 un--;
184 }
185 }
186
187 if (! UDIV_NEEDS_NORMALIZATION
188 && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
189 goto plain;
190
191 count_leading_zeros (cnt, d);
192 d <<= cnt;
193 r <<= cnt;
194
195 if (UDIV_NEEDS_NORMALIZATION
196 && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
197 {
198 mp_limb_t nshift;
199 if (un != 0)
200 {
201 n1 = up[un - 1] << GMP_NAIL_BITS;
202 r |= (n1 >> (GMP_LIMB_BITS - cnt));
203 for (i = un - 2; i >= 0; i--)
204 {
205 n0 = up[i] << GMP_NAIL_BITS;
206 nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
207 udiv_qrnnd (*qp, r, r, nshift, d);
208 r >>= GMP_NAIL_BITS;
209 qp--;
210 n1 = n0;
211 }
212 udiv_qrnnd (*qp, r, r, n1 << cnt, d);
213 r >>= GMP_NAIL_BITS;
214 qp--;
215 }
216 for (i = qxn - 1; i >= 0; i--)
217 {
218 udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
219 r >>= GMP_NAIL_BITS;
220 qp--;
221 }
222 return r >> cnt;
223 }
224 else
225 {
226 mp_limb_t dinv, nshift;
227 invert_limb (dinv, d);
228 if (un != 0)
229 {
230 n1 = up[un - 1] << GMP_NAIL_BITS;
231 r |= (n1 >> (GMP_LIMB_BITS - cnt));
232 for (i = un - 2; i >= 0; i--)
233 {
234 n0 = up[i] << GMP_NAIL_BITS;
235 nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
236 udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
237 r >>= GMP_NAIL_BITS;
238 qp--;
239 n1 = n0;
240 }
241 udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
242 r >>= GMP_NAIL_BITS;
243 qp--;
244 }
245 for (i = qxn - 1; i >= 0; i--)
246 {
247 udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
248 r >>= GMP_NAIL_BITS;
249 qp--;
250 }
251 return r >> cnt;
252 }
253 }
254 }
255