1 /* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary 2 size operands. 3 4 Contributed to the GNU project by Torbjorn Granlund. 5 6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 9 10 Copyright 2006, 2007, 2009 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of the GNU Lesser General Public License as published by 16 the Free Software Foundation; either version 3 of the License, or (at your 17 option) any later version. 18 19 The GNU MP Library is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 22 License for more details. 23 24 You should have received a copy of the GNU Lesser General Public License 25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 #include "longlong.h" 30 31 32 mp_limb_t 33 mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, 34 gmp_pi1_t *dinv, mp_ptr tp) 35 { 36 mp_size_t lo, hi; 37 mp_limb_t cy, qh, ql; 38 39 lo = n >> 1; /* floor(n/2) */ 40 hi = n - lo; /* ceil(n/2) */ 41 42 if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) 43 qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32); 44 else 45 qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp); 46 47 mpn_mul (tp, qp + lo, hi, dp, lo); 48 49 cy = mpn_sub_n (np + lo, np + lo, tp, n); 50 if (qh != 0) 51 cy += mpn_sub_n (np + n, np + n, dp, lo); 52 53 while (cy != 0) 54 { 55 qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); 56 cy -= mpn_add_n (np + lo, np + lo, dp, n); 57 } 58 59 if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD)) 60 ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32); 61 else 62 ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp); 63 64 mpn_mul (tp, dp, hi, qp, lo); 65 66 cy = mpn_sub_n (np, np, tp, n); 67 if (ql != 0) 68 cy += mpn_sub_n (np + lo, np + lo, dp, hi); 69 70 while (cy != 0) 71 { 72 mpn_sub_1 (qp, qp, lo, 1); 73 cy -= mpn_add_n (np, np, dp, n); 74 } 75 76 return qh; 77 } 78 79 mp_limb_t 80 mpn_dcpi1_div_qr (mp_ptr qp, 81 mp_ptr np, mp_size_t nn, 82 mp_srcptr dp, mp_size_t dn, 83 gmp_pi1_t *dinv) 84 { 85 mp_size_t qn; 86 mp_limb_t qh, cy; 87 mp_ptr tp; 88 TMP_DECL; 89 90 TMP_MARK; 91 92 ASSERT (dn >= 6); /* to adhere to mpn_sbpi1_div_qr's limits */ 93 ASSERT (nn - dn >= 3); /* to adhere to mpn_sbpi1_div_qr's limits */ 94 ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); 95 96 tp = TMP_SALLOC_LIMBS (dn); 97 98 qn = nn - dn; 99 qp += qn; 100 np += nn; 101 dp += dn; 102 103 if (qn > dn) 104 { 105 /* Reduce qn mod dn without division, optimizing small operations. */ 106 do 107 qn -= dn; 108 while (qn > dn); 109 110 qp -= qn; /* point at low limb of next quotient block */ 111 np -= qn; /* point in the middle of partial remainder */ 112 113 /* Perform the typically smaller block first. */ 114 if (qn == 1) 115 { 116 mp_limb_t q, n2, n1, n0, d1, d0; 117 118 /* Handle qh up front, for simplicity. */ 119 qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0; 120 if (qh) 121 ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn)); 122 123 /* A single iteration of schoolbook: One 3/2 division, 124 followed by the bignum update and adjustment. */ 125 n2 = np[0]; 126 n1 = np[-1]; 127 n0 = np[-2]; 128 d1 = dp[-1]; 129 d0 = dp[-2]; 130 131 ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0)); 132 133 if (UNLIKELY (n2 == d1) && n1 == d0) 134 { 135 q = GMP_NUMB_MASK; 136 cy = mpn_submul_1 (np - dn, dp - dn, dn, q); 137 ASSERT (cy == n2); 138 } 139 else 140 { 141 udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32); 142 143 if (dn > 2) 144 { 145 mp_limb_t cy, cy1; 146 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q); 147 148 cy1 = n0 < cy; 149 n0 = (n0 - cy) & GMP_NUMB_MASK; 150 cy = n1 < cy1; 151 n1 = (n1 - cy1) & GMP_NUMB_MASK; 152 np[-2] = n0; 153 154 if (UNLIKELY (cy != 0)) 155 { 156 n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1); 157 qh -= (q == 0); 158 q = (q - 1) & GMP_NUMB_MASK; 159 } 160 } 161 else 162 np[-2] = n0; 163 164 np[-1] = n1; 165 } 166 qp[0] = q; 167 } 168 else 169 { 170 /* Do a 2qn / qn division */ 171 if (qn == 2) 172 qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */ 173 else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) 174 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32); 175 else 176 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); 177 178 if (qn != dn) 179 { 180 if (qn > dn - qn) 181 mpn_mul (tp, qp, qn, dp - dn, dn - qn); 182 else 183 mpn_mul (tp, dp - dn, dn - qn, qp, qn); 184 185 cy = mpn_sub_n (np - dn, np - dn, tp, dn); 186 if (qh != 0) 187 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); 188 189 while (cy != 0) 190 { 191 qh -= mpn_sub_1 (qp, qp, qn, 1); 192 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); 193 } 194 } 195 } 196 197 qn = nn - dn - qn; 198 do 199 { 200 qp -= dn; 201 np -= dn; 202 mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp); 203 qn -= dn; 204 } 205 while (qn > 0); 206 } 207 else 208 { 209 qp -= qn; /* point at low limb of next quotient block */ 210 np -= qn; /* point in the middle of partial remainder */ 211 212 if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) 213 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32); 214 else 215 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); 216 217 if (qn != dn) 218 { 219 if (qn > dn - qn) 220 mpn_mul (tp, qp, qn, dp - dn, dn - qn); 221 else 222 mpn_mul (tp, dp - dn, dn - qn, qp, qn); 223 224 cy = mpn_sub_n (np - dn, np - dn, tp, dn); 225 if (qh != 0) 226 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); 227 228 while (cy != 0) 229 { 230 qh -= mpn_sub_1 (qp, qp, qn, 1); 231 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); 232 } 233 } 234 } 235 236 TMP_FREE; 237 return qh; 238 } 239