1 /* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
2 size operands.
3
4 Contributed to the GNU project by Torbjorn Granlund.
5
6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10 Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
23
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
26
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 #include "longlong.h"
30
31
32 mp_limb_t
mpn_dcpi1_div_qr_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,gmp_pi1_t * dinv,mp_ptr tp)33 mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
34 gmp_pi1_t *dinv, mp_ptr tp)
35 {
36 mp_size_t lo, hi;
37 mp_limb_t cy, qh, ql;
38
39 lo = n >> 1; /* floor(n/2) */
40 hi = n - lo; /* ceil(n/2) */
41
42 if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
43 qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
44 else
45 qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
46
47 mpn_mul (tp, qp + lo, hi, dp, lo);
48
49 cy = mpn_sub_n (np + lo, np + lo, tp, n);
50 if (qh != 0)
51 cy += mpn_sub_n (np + n, np + n, dp, lo);
52
53 while (cy != 0)
54 {
55 qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
56 cy -= mpn_add_n (np + lo, np + lo, dp, n);
57 }
58
59 if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
60 ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
61 else
62 ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
63
64 mpn_mul (tp, dp, hi, qp, lo);
65
66 cy = mpn_sub_n (np, np, tp, n);
67 if (ql != 0)
68 cy += mpn_sub_n (np + lo, np + lo, dp, hi);
69
70 while (cy != 0)
71 {
72 mpn_sub_1 (qp, qp, lo, 1);
73 cy -= mpn_add_n (np, np, dp, n);
74 }
75
76 return qh;
77 }
78
79 mp_limb_t
mpn_dcpi1_div_qr(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,gmp_pi1_t * dinv)80 mpn_dcpi1_div_qr (mp_ptr qp,
81 mp_ptr np, mp_size_t nn,
82 mp_srcptr dp, mp_size_t dn,
83 gmp_pi1_t *dinv)
84 {
85 mp_size_t qn;
86 mp_limb_t qh, cy;
87 mp_ptr tp;
88 TMP_DECL;
89
90 TMP_MARK;
91
92 ASSERT (dn >= 6); /* to adhere to mpn_sbpi1_div_qr's limits */
93 ASSERT (nn - dn >= 3); /* to adhere to mpn_sbpi1_div_qr's limits */
94 ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
95
96 tp = TMP_SALLOC_LIMBS (dn);
97
98 qn = nn - dn;
99 qp += qn;
100 np += nn;
101 dp += dn;
102
103 if (qn > dn)
104 {
105 /* Reduce qn mod dn without division, optimizing small operations. */
106 do
107 qn -= dn;
108 while (qn > dn);
109
110 qp -= qn; /* point at low limb of next quotient block */
111 np -= qn; /* point in the middle of partial remainder */
112
113 /* Perform the typically smaller block first. */
114 if (qn == 1)
115 {
116 mp_limb_t q, n2, n1, n0, d1, d0;
117
118 /* Handle qh up front, for simplicity. */
119 qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
120 if (qh)
121 ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
122
123 /* A single iteration of schoolbook: One 3/2 division,
124 followed by the bignum update and adjustment. */
125 n2 = np[0];
126 n1 = np[-1];
127 n0 = np[-2];
128 d1 = dp[-1];
129 d0 = dp[-2];
130
131 ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
132
133 if (UNLIKELY (n2 == d1) && n1 == d0)
134 {
135 q = GMP_NUMB_MASK;
136 cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
137 ASSERT (cy == n2);
138 }
139 else
140 {
141 udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
142
143 if (dn > 2)
144 {
145 mp_limb_t cy, cy1;
146 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
147
148 cy1 = n0 < cy;
149 n0 = (n0 - cy) & GMP_NUMB_MASK;
150 cy = n1 < cy1;
151 n1 = (n1 - cy1) & GMP_NUMB_MASK;
152 np[-2] = n0;
153
154 if (UNLIKELY (cy != 0))
155 {
156 n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
157 qh -= (q == 0);
158 q = (q - 1) & GMP_NUMB_MASK;
159 }
160 }
161 else
162 np[-2] = n0;
163
164 np[-1] = n1;
165 }
166 qp[0] = q;
167 }
168 else
169 {
170 /* Do a 2qn / qn division */
171 if (qn == 2)
172 qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
173 else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
174 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
175 else
176 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
177
178 if (qn != dn)
179 {
180 if (qn > dn - qn)
181 mpn_mul (tp, qp, qn, dp - dn, dn - qn);
182 else
183 mpn_mul (tp, dp - dn, dn - qn, qp, qn);
184
185 cy = mpn_sub_n (np - dn, np - dn, tp, dn);
186 if (qh != 0)
187 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
188
189 while (cy != 0)
190 {
191 qh -= mpn_sub_1 (qp, qp, qn, 1);
192 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
193 }
194 }
195 }
196
197 qn = nn - dn - qn;
198 do
199 {
200 qp -= dn;
201 np -= dn;
202 mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
203 qn -= dn;
204 }
205 while (qn > 0);
206 }
207 else
208 {
209 qp -= qn; /* point at low limb of next quotient block */
210 np -= qn; /* point in the middle of partial remainder */
211
212 if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
213 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
214 else
215 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
216
217 if (qn != dn)
218 {
219 if (qn > dn - qn)
220 mpn_mul (tp, qp, qn, dp - dn, dn - qn);
221 else
222 mpn_mul (tp, dp - dn, dn - qn, qp, qn);
223
224 cy = mpn_sub_n (np - dn, np - dn, tp, dn);
225 if (qh != 0)
226 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
227
228 while (cy != 0)
229 {
230 qh -= mpn_sub_1 (qp, qp, qn, 1);
231 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
232 }
233 }
234 }
235
236 TMP_FREE;
237 return qh;
238 }
239