1 /* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
2 quotient. The quotient returned is either correct, or one too large.
3
4 Contributed to the GNU project by Torbjorn Granlund.
5
6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10 Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
23
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
26
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 #include "longlong.h"
30
31
32 mp_limb_t
mpn_dcpi1_divappr_q_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,gmp_pi1_t * dinv,mp_ptr tp)33 mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
34 gmp_pi1_t *dinv, mp_ptr tp)
35 {
36 mp_size_t lo, hi;
37 mp_limb_t cy, qh, ql;
38
39 lo = n >> 1; /* floor(n/2) */
40 hi = n - lo; /* ceil(n/2) */
41
42 if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
43 qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
44 else
45 qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
46
47 mpn_mul (tp, qp + lo, hi, dp, lo);
48
49 cy = mpn_sub_n (np + lo, np + lo, tp, n);
50 if (qh != 0)
51 cy += mpn_sub_n (np + n, np + n, dp, lo);
52
53 while (cy != 0)
54 {
55 qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
56 cy -= mpn_add_n (np + lo, np + lo, dp, n);
57 }
58
59 if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
60 ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
61 else
62 ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
63
64 if (UNLIKELY (ql != 0))
65 {
66 mp_size_t i;
67 for (i = 0; i < lo; i++)
68 qp[i] = GMP_NUMB_MASK;
69 }
70
71 return qh;
72 }
73
74 mp_limb_t
mpn_dcpi1_divappr_q(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,gmp_pi1_t * dinv)75 mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
76 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
77 {
78 mp_size_t qn;
79 mp_limb_t qh, cy, qsave;
80 mp_ptr tp;
81 TMP_DECL;
82
83 TMP_MARK;
84
85 ASSERT (dn >= 6);
86 ASSERT (nn > dn);
87 ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
88
89 qn = nn - dn;
90 qp += qn;
91 np += nn;
92 dp += dn;
93
94 if (qn >= dn)
95 {
96 qn++; /* pretend we'll need an extra limb */
97 /* Reduce qn mod dn without division, optimizing small operations. */
98 do
99 qn -= dn;
100 while (qn > dn);
101
102 qp -= qn; /* point at low limb of next quotient block */
103 np -= qn; /* point in the middle of partial remainder */
104
105 tp = TMP_SALLOC_LIMBS (dn);
106
107 /* Perform the typically smaller block first. */
108 if (qn == 1)
109 {
110 mp_limb_t q, n2, n1, n0, d1, d0;
111
112 /* Handle qh up front, for simplicity. */
113 qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
114 if (qh)
115 ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
116
117 /* A single iteration of schoolbook: One 3/2 division,
118 followed by the bignum update and adjustment. */
119 n2 = np[0];
120 n1 = np[-1];
121 n0 = np[-2];
122 d1 = dp[-1];
123 d0 = dp[-2];
124
125 ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
126
127 if (UNLIKELY (n2 == d1) && n1 == d0)
128 {
129 q = GMP_NUMB_MASK;
130 cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
131 ASSERT (cy == n2);
132 }
133 else
134 {
135 udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
136
137 if (dn > 2)
138 {
139 mp_limb_t cy, cy1;
140 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
141
142 cy1 = n0 < cy;
143 n0 = (n0 - cy) & GMP_NUMB_MASK;
144 cy = n1 < cy1;
145 n1 = (n1 - cy1) & GMP_NUMB_MASK;
146 np[-2] = n0;
147
148 if (UNLIKELY (cy != 0))
149 {
150 n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
151 qh -= (q == 0);
152 q = (q - 1) & GMP_NUMB_MASK;
153 }
154 }
155 else
156 np[-2] = n0;
157
158 np[-1] = n1;
159 }
160 qp[0] = q;
161 }
162 else
163 {
164 if (qn == 2)
165 qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
166 else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
167 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
168 else
169 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
170
171 if (qn != dn)
172 {
173 if (qn > dn - qn)
174 mpn_mul (tp, qp, qn, dp - dn, dn - qn);
175 else
176 mpn_mul (tp, dp - dn, dn - qn, qp, qn);
177
178 cy = mpn_sub_n (np - dn, np - dn, tp, dn);
179 if (qh != 0)
180 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
181
182 while (cy != 0)
183 {
184 qh -= mpn_sub_1 (qp, qp, qn, 1);
185 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
186 }
187 }
188 }
189 qn = nn - dn - qn + 1;
190 while (qn > dn)
191 {
192 qp -= dn;
193 np -= dn;
194 mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
195 qn -= dn;
196 }
197
198 /* Since we pretended we'd need an extra quotient limb before, we now
199 have made sure the code above left just dn-1=qn quotient limbs to
200 develop. Develop that plus a guard limb. */
201 qn--;
202 qp -= qn;
203 np -= dn;
204 qsave = qp[qn];
205 mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
206 MPN_COPY_INCR (qp, qp + 1, qn);
207 qp[qn] = qsave;
208 }
209 else /* (qn < dn) */
210 {
211 mp_ptr q2p;
212 #if 0 /* not possible since we demand nn > dn */
213 if (qn == 0)
214 {
215 qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
216 if (qh)
217 mpn_sub_n (np - dn, np - dn, dp - dn, dn);
218 TMP_FREE;
219 return qh;
220 }
221 #endif
222
223 qp -= qn; /* point at low limb of next quotient block */
224 np -= qn; /* point in the middle of partial remainder */
225
226 q2p = TMP_SALLOC_LIMBS (qn + 1);
227 /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
228 callers not to be silly? */
229 if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
230 {
231 qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
232 dp - (qn + 1), qn + 1, dinv->inv32);
233 }
234 else
235 {
236 /* It is tempting to use qp for recursive scratch and put quotient in
237 tp, but the recursive scratch needs one limb too many. */
238 tp = TMP_SALLOC_LIMBS (qn + 1);
239 qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
240 }
241 MPN_COPY (qp, q2p + 1, qn);
242 }
243
244 TMP_FREE;
245 return qh;
246 }
247