1 /* mpn_tdiv_q -- division for arbitrary size operands.
2
3 Contributed to the GNU project by Torbjorn Granlund.
4
5 Copyright 2009, 2010 Free Software Foundation, Inc.
6
7 Copyright 2010 William Hart (modified to work with MPIR functions).
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23
24 #include "mpir.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27
28 /* The DIV_QR_THRESHOLDs should be replaced with DIV_Q_THRESHOLDs */
29
30 /* Compute Q = N/D with truncation.
31 N = {np,nn}
32 D = {dp,dn}
33 Q = {qp,nn-dn+1}
34 T = {scratch,nn+1} is scratch space
35 N and D are both untouched by the computation.
36 N and T may overlap; pass the same space if N is irrelevant after the call,
37 but note that tp needs an extra limb.
38
39 Operand requirements:
40 N >= D > 0
41 dp[dn-1] != 0
42 No overlap between the N, D, and Q areas.
43
44 This division function does not clobber its input operands, since it is
45 intended to support average-O(qn) division, and for that to be effective, it
46 cannot put requirements on callers to copy a O(nn) operand.
47
48 If a caller does not care about the value of {np,nn+1} after calling this
49 function, it should pass np also for the scratch argument. This function
50 will then save some time and space by avoiding allocation and copying.
51 (FIXME: Is this a good design? We only really save any copying for
52 already-normalised divisors, which should be rare. It also prevents us from
53 reasonably asking for all scratch space we need.)
54
55 We write nn-dn+1 limbs for the quotient, but return void. Why not return
56 the most significant quotient limb? Look at the 4 main code blocks below
57 (consisting of an outer if-else where each arm contains an if-else). It is
58 tricky for the first code block, since the mpn_*_div_q calls will typically
59 generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless
60 we generate the most significant quotient limb here, before calling
61 mpn_*_div_q, or put the quotient in a temporary area. Since this is a
62 critical division case (the SB sub-case in particular) copying is not a good
63 idea.
64
65 It might make sense to split the if-else parts of the (qn + FUDGE
66 >= dn) blocks into separate functions, since we could promise quite
67 different things to callers in these two cases. The 'then' case
68 benefits from np=scratch, and it could perhaps even tolerate qp=np,
69 saving some headache for many callers.
70
71 FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size
72 operands, we do not reuse the huge scratch for adjustments. This can be a
73 serious waste of memory for the largest operands.
74 */
75
76 /* FUDGE determines when to try getting an approximate quotient from the upper
77 parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2
78 for the code to be correct. */
79 #define FUDGE 5 /* FIXME: tune this */
80
81 void
mpn_tdiv_q(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)82 mpn_tdiv_q (mp_ptr qp,
83 mp_srcptr np, mp_size_t nn,
84 mp_srcptr dp, mp_size_t dn)
85 {
86 mp_ptr new_dp, new_np, tp, rp, scratch;
87 mp_limb_t cy, dh, qh;
88 mp_size_t new_nn, qn;
89 mp_limb_t dinv;
90 int cnt;
91 TMP_DECL;
92 TMP_MARK;
93
94 ASSERT (nn >= dn);
95 ASSERT (dn > 0);
96 ASSERT (dp[dn - 1] != 0);
97 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
98 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
99
100 ASSERT_ALWAYS (FUDGE >= 2);
101
102 if (dn == 1)
103 {
104 mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
105 return;
106 }
107
108 scratch = TMP_ALLOC_LIMBS(nn + 1);
109
110 qn = nn - dn + 1; /* Quotient size, high limb might be zero */
111
112 if (qn + FUDGE >= dn)
113 {
114 /* |________________________|
115 |_______| */
116 new_np = scratch;
117
118 dh = dp[dn - 1];
119 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
120 {
121 count_leading_zeros (cnt, dh);
122
123 cy = mpn_lshift (new_np, np, nn, cnt);
124 new_np[nn] = cy;
125 new_nn = nn + (cy != 0);
126
127 new_dp = TMP_ALLOC_LIMBS (dn);
128 mpn_lshift (new_dp, dp, dn, cnt);
129
130 if (dn == 2)
131 {
132 qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
133 }
134 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
135 BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
136 {
137 mpir_invert_pi1(dinv, new_dp[dn - 1], new_dp[dn - 2]);
138 qh = mpn_sb_div_q (qp, new_np, new_nn, new_dp, dn, dinv);
139 }
140 else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) ||
141 BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD))
142 {
143 mpir_invert_pi1(dinv, new_dp[dn - 1], new_dp[dn - 2]);
144 qh = mpn_dc_div_q (qp, new_np, new_nn, new_dp, dn, dinv);
145 }
146 else
147 {
148 mp_ptr inv = TMP_ALLOC_LIMBS(dn);
149 mpn_invert(inv, new_dp, dn);
150 qh = mpn_inv_div_q (qp, new_np, new_nn, new_dp, dn, inv);
151 }
152 if (cy == 0)
153 qp[qn - 1] = qh;
154 else if (UNLIKELY (qh != 0))
155 {
156 /* This happens only when the quotient is close to B^n and
157 mpn_*_divappr_q returned B^n. */
158 mp_size_t i, n;
159 n = new_nn - dn;
160 for (i = 0; i < n; i++)
161 qp[i] = GMP_NUMB_MAX;
162 qh = 0; /* currently ignored */
163 }
164 }
165 else /* divisor is already normalised */
166 {
167 MPN_COPY (new_np, np, nn);
168
169 if (dn == 2)
170 {
171 qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
172 }
173 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
174 BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
175 {
176 mpir_invert_pi1(dinv, dh, dp[dn - 2]);
177 qh = mpn_sb_div_q (qp, new_np, nn, dp, dn, dinv);
178 }
179 else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) ||
180 BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD))
181 {
182 mpir_invert_pi1(dinv, dh, dp[dn - 2]);
183 qh = mpn_dc_div_q (qp, new_np, nn, dp, dn, dinv);
184 }
185 else
186 {
187 mp_ptr inv = TMP_ALLOC_LIMBS(dn);
188 mpn_invert(inv, dp, dn);
189 qh = mpn_inv_div_q (qp, new_np, nn, dp, dn, inv);
190 }
191 qp[nn - dn] = qh;
192 }
193 }
194 else
195 {
196 /* |________________________|
197 |_________________| */
198 tp = TMP_ALLOC_LIMBS (qn + 1);
199
200 new_np = scratch;
201 new_nn = 2 * qn + 1;
202
203 dh = dp[dn - 1];
204 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
205 {
206 count_leading_zeros (cnt, dh);
207
208 cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
209 new_np[new_nn] = cy;
210
211 new_nn += (cy != 0);
212
213 new_dp = TMP_ALLOC_LIMBS (qn + 1);
214 mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
215 new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
216
217 if (qn + 1 == 2)
218 {
219 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
220 }
221 else if (BELOW_THRESHOLD (qn - 1, DC_DIVAPPR_Q_THRESHOLD))
222 {
223 mpir_invert_pi1(dinv, new_dp[qn], new_dp[qn - 1]);
224 qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
225 }
226 else if (BELOW_THRESHOLD (qn - 1, INV_DIVAPPR_Q_THRESHOLD))
227 {
228 mpir_invert_pi1(dinv, new_dp[qn], new_dp[qn - 1]);
229 qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
230 }
231 else
232 {
233 mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1);
234 mpn_invert(inv, new_dp, qn + 1);
235 qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv);
236 }
237 if (cy == 0)
238 tp[qn] = qh;
239 else if (UNLIKELY (qh != 0))
240 {
241 /* This happens only when the quotient is close to B^n and
242 mpn_*_divappr_q returned B^n. */
243 mp_size_t i, n;
244 n = new_nn - (qn + 1);
245 for (i = 0; i < n; i++)
246 tp[i] = GMP_NUMB_MAX;
247 qh = 0; /* currently ignored */
248 }
249 }
250 else /* divisor is already normalised */
251 {
252 MPN_COPY (new_np, np + nn - new_nn, new_nn);
253
254 new_dp = (mp_ptr) dp + dn - (qn + 1);
255
256 if (qn == 2 - 1)
257 {
258 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
259 }
260 else if (BELOW_THRESHOLD (qn - 1, DC_DIVAPPR_Q_THRESHOLD))
261 {
262 mpir_invert_pi1(dinv, dh, new_dp[qn - 1]);
263 qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
264 }
265 else if (BELOW_THRESHOLD (qn - 1, INV_DIVAPPR_Q_THRESHOLD))
266 {
267 mpir_invert_pi1(dinv, dh, new_dp[qn - 1]);
268 qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
269 }
270 else
271 {
272 mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1);
273 mpn_invert(inv, new_dp, qn + 1);
274 qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv);
275 }
276 tp[qn] = qh;
277 }
278
279 MPN_COPY (qp, tp + 1, qn);
280 if (UNLIKELY(tp[0] <= 4))
281 {
282 mp_size_t rn;
283
284 rp = TMP_ALLOC_LIMBS (dn + qn);
285 mpn_mul (rp, dp, dn, tp + 1, qn);
286 rn = dn + qn;
287 rn -= rp[rn - 1] == 0;
288
289 if (rn > nn || mpn_cmp (np, rp, nn) < 0)
290 mpn_decr_u (qp, 1);
291 }
292 }
293
294 TMP_FREE;
295 }
296