1 /* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
2 size operands.
3
4 Contributed to the GNU project by Torbjorn Granlund.
5
6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10 Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
20
21 or
22
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
26
27 or both in parallel, as here.
28
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 for more details.
33
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
37
38 #include "gmp-impl.h"
39 #include "longlong.h"
40
41
42 mp_limb_t
mpn_dcpi1_div_qr_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,gmp_pi1_t * dinv,mp_ptr tp)43 mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
44 gmp_pi1_t *dinv, mp_ptr tp)
45 {
46 mp_size_t lo, hi;
47 mp_limb_t cy, qh, ql;
48
49 lo = n >> 1; /* floor(n/2) */
50 hi = n - lo; /* ceil(n/2) */
51
52 if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
53 qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
54 else
55 qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
56
57 mpn_mul (tp, qp + lo, hi, dp, lo);
58
59 cy = mpn_sub_n (np + lo, np + lo, tp, n);
60 if (qh != 0)
61 cy += mpn_sub_n (np + n, np + n, dp, lo);
62
63 while (cy != 0)
64 {
65 qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
66 cy -= mpn_add_n (np + lo, np + lo, dp, n);
67 }
68
69 if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
70 ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
71 else
72 ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
73
74 mpn_mul (tp, dp, hi, qp, lo);
75
76 cy = mpn_sub_n (np, np, tp, n);
77 if (ql != 0)
78 cy += mpn_sub_n (np + lo, np + lo, dp, hi);
79
80 while (cy != 0)
81 {
82 mpn_sub_1 (qp, qp, lo, 1);
83 cy -= mpn_add_n (np, np, dp, n);
84 }
85
86 return qh;
87 }
88
89 mp_limb_t
mpn_dcpi1_div_qr(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,gmp_pi1_t * dinv)90 mpn_dcpi1_div_qr (mp_ptr qp,
91 mp_ptr np, mp_size_t nn,
92 mp_srcptr dp, mp_size_t dn,
93 gmp_pi1_t *dinv)
94 {
95 mp_size_t qn;
96 mp_limb_t qh, cy;
97 mp_ptr tp;
98 TMP_DECL;
99
100 TMP_MARK;
101
102 ASSERT (dn >= 6); /* to adhere to mpn_sbpi1_div_qr's limits */
103 ASSERT (nn - dn >= 3); /* to adhere to mpn_sbpi1_div_qr's limits */
104 ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
105
106 tp = TMP_ALLOC_LIMBS (dn);
107
108 qn = nn - dn;
109 qp += qn;
110 np += nn;
111 dp += dn;
112
113 if (qn > dn)
114 {
115 /* Reduce qn mod dn without division, optimizing small operations. */
116 do
117 qn -= dn;
118 while (qn > dn);
119
120 qp -= qn; /* point at low limb of next quotient block */
121 np -= qn; /* point in the middle of partial remainder */
122
123 /* Perform the typically smaller block first. */
124 if (qn == 1)
125 {
126 mp_limb_t q, n2, n1, n0, d1, d0;
127
128 /* Handle qh up front, for simplicity. */
129 qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
130 if (qh)
131 ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
132
133 /* A single iteration of schoolbook: One 3/2 division,
134 followed by the bignum update and adjustment. */
135 n2 = np[0];
136 n1 = np[-1];
137 n0 = np[-2];
138 d1 = dp[-1];
139 d0 = dp[-2];
140
141 ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
142
143 if (UNLIKELY (n2 == d1) && n1 == d0)
144 {
145 q = GMP_NUMB_MASK;
146 cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
147 ASSERT (cy == n2);
148 }
149 else
150 {
151 udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
152
153 if (dn > 2)
154 {
155 mp_limb_t cy, cy1;
156 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
157
158 cy1 = n0 < cy;
159 n0 = (n0 - cy) & GMP_NUMB_MASK;
160 cy = n1 < cy1;
161 n1 = (n1 - cy1) & GMP_NUMB_MASK;
162 np[-2] = n0;
163
164 if (UNLIKELY (cy != 0))
165 {
166 n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
167 qh -= (q == 0);
168 q = (q - 1) & GMP_NUMB_MASK;
169 }
170 }
171 else
172 np[-2] = n0;
173
174 np[-1] = n1;
175 }
176 qp[0] = q;
177 }
178 else
179 {
180 /* Do a 2qn / qn division */
181 if (qn == 2)
182 qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
183 else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
184 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
185 else
186 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
187
188 if (qn != dn)
189 {
190 if (qn > dn - qn)
191 mpn_mul (tp, qp, qn, dp - dn, dn - qn);
192 else
193 mpn_mul (tp, dp - dn, dn - qn, qp, qn);
194
195 cy = mpn_sub_n (np - dn, np - dn, tp, dn);
196 if (qh != 0)
197 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
198
199 while (cy != 0)
200 {
201 qh -= mpn_sub_1 (qp, qp, qn, 1);
202 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
203 }
204 }
205 }
206
207 qn = nn - dn - qn;
208 do
209 {
210 qp -= dn;
211 np -= dn;
212 mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
213 qn -= dn;
214 }
215 while (qn > 0);
216 }
217 else
218 {
219 qp -= qn; /* point at low limb of next quotient block */
220 np -= qn; /* point in the middle of partial remainder */
221
222 if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
223 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
224 else
225 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
226
227 if (qn != dn)
228 {
229 if (qn > dn - qn)
230 mpn_mul (tp, qp, qn, dp - dn, dn - qn);
231 else
232 mpn_mul (tp, dp - dn, dn - qn, qp, qn);
233
234 cy = mpn_sub_n (np - dn, np - dn, tp, dn);
235 if (qh != 0)
236 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
237
238 while (cy != 0)
239 {
240 qh -= mpn_sub_1 (qp, qp, qn, 1);
241 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
242 }
243 }
244 }
245
246 TMP_FREE;
247 return qh;
248 }
249