1 /* mpn_divrem_1 -- mpn by limb division. 2 3 Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software 4 Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MP Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21 #include "gmp.h" 22 #include "gmp-impl.h" 23 #include "longlong.h" 24 25 26 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, 27 meaning the quotient size where that should happen, the quotient size 28 being how many udiv divisions will be done. 29 30 The default is to use preinv always, CPUs where this doesn't suit have 31 tuned thresholds. Note in particular that preinv should certainly be 32 used if that's the only division available (USE_PREINV_ALWAYS). */ 33 34 #ifndef DIVREM_1_NORM_THRESHOLD 35 #define DIVREM_1_NORM_THRESHOLD 0 36 #endif 37 #ifndef DIVREM_1_UNNORM_THRESHOLD 38 #define DIVREM_1_UNNORM_THRESHOLD 0 39 #endif 40 41 42 43 /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM 44 and UNNORM thresholds are 0 and only the inversion code is included. 45 46 If multiply-by-inverse is never viable, then NORM and UNNORM thresholds 47 will be MP_SIZE_T_MAX and only the plain division code is included. 48 49 Otherwise mul-by-inverse is better than plain division above some 50 threshold, and best results are obtained by having code for both present. 51 52 The main reason for separating the norm and unnorm cases is that not all 53 CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm 54 code used on an already normalized divisor. 55 56 If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same 57 non-shifting code for both the norm and unnorm cases, though with 58 different criteria for skipping a division, and with different thresholds 59 of course. And in fact if inversion is never viable, then that simple 60 non-shifting division would be all that's left. 61 62 The NORM and UNNORM thresholds might not differ much, but if there's 63 going to be separate code for norm and unnorm then it makes sense to have 64 separate thresholds. One thing that's possible is that the 65 mul-by-inverse might be better only for normalized divisors, due to that 66 case not needing variable bit shifts. 67 68 Notice that the thresholds are tested after the decision to possibly skip 69 one divide step, so they're based on the actual number of divisions done. 70 71 For the unnorm case, it would be possible to call mpn_lshift to adjust 72 the dividend all in one go (into the quotient space say), rather than 73 limb-by-limb in the loop. This might help if mpn_lshift is a lot faster 74 than what the compiler can generate for EXTRACT. But this is left to CPU 75 specific implementations to consider, especially since EXTRACT isn't on 76 the dependent chain. */ 77 78 mp_limb_t 79 mpn_divrem_1 (mp_ptr qp, mp_size_t qxn, 80 mp_srcptr up, mp_size_t un, mp_limb_t d) 81 { 82 mp_size_t n; 83 mp_size_t i; 84 mp_limb_t n1, n0; 85 mp_limb_t r = 0; 86 87 ASSERT (qxn >= 0); 88 ASSERT (un >= 0); 89 ASSERT (d != 0); 90 /* FIXME: What's the correct overlap rule when qxn!=0? */ 91 ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un)); 92 93 n = un + qxn; 94 if (n == 0) 95 return 0; 96 97 d <<= GMP_NAIL_BITS; 98 99 qp += (n - 1); /* Make qp point at most significant quotient limb */ 100 101 if ((d & GMP_LIMB_HIGHBIT) != 0) 102 { 103 if (un != 0) 104 { 105 /* High quotient limb is 0 or 1, skip a divide step. */ 106 mp_limb_t q; 107 r = up[un - 1] << GMP_NAIL_BITS; 108 q = (r >= d); 109 *qp-- = q; 110 r -= (d & -q); 111 r >>= GMP_NAIL_BITS; 112 n--; 113 un--; 114 } 115 116 if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD)) 117 { 118 plain: 119 for (i = un - 1; i >= 0; i--) 120 { 121 n0 = up[i] << GMP_NAIL_BITS; 122 udiv_qrnnd (*qp, r, r, n0, d); 123 r >>= GMP_NAIL_BITS; 124 qp--; 125 } 126 for (i = qxn - 1; i >= 0; i--) 127 { 128 udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d); 129 r >>= GMP_NAIL_BITS; 130 qp--; 131 } 132 return r; 133 } 134 else 135 { 136 /* Multiply-by-inverse, divisor already normalized. */ 137 mp_limb_t dinv; 138 invert_limb (dinv, d); 139 140 for (i = un - 1; i >= 0; i--) 141 { 142 n0 = up[i] << GMP_NAIL_BITS; 143 udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv); 144 r >>= GMP_NAIL_BITS; 145 qp--; 146 } 147 for (i = qxn - 1; i >= 0; i--) 148 { 149 udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv); 150 r >>= GMP_NAIL_BITS; 151 qp--; 152 } 153 return r; 154 } 155 } 156 else 157 { 158 /* Most significant bit of divisor == 0. */ 159 int norm; 160 161 /* Skip a division if high < divisor (high quotient 0). Testing here 162 before normalizing will still skip as often as possible. */ 163 if (un != 0) 164 { 165 n1 = up[un - 1] << GMP_NAIL_BITS; 166 if (n1 < d) 167 { 168 r = n1 >> GMP_NAIL_BITS; 169 *qp-- = 0; 170 n--; 171 if (n == 0) 172 return r; 173 un--; 174 } 175 } 176 177 if (! UDIV_NEEDS_NORMALIZATION 178 && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) 179 goto plain; 180 181 count_leading_zeros (norm, d); 182 d <<= norm; 183 r <<= norm; 184 185 if (UDIV_NEEDS_NORMALIZATION 186 && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) 187 { 188 if (un != 0) 189 { 190 n1 = up[un - 1] << GMP_NAIL_BITS; 191 r |= (n1 >> (GMP_LIMB_BITS - norm)); 192 for (i = un - 2; i >= 0; i--) 193 { 194 n0 = up[i] << GMP_NAIL_BITS; 195 udiv_qrnnd (*qp, r, r, 196 (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)), 197 d); 198 r >>= GMP_NAIL_BITS; 199 qp--; 200 n1 = n0; 201 } 202 udiv_qrnnd (*qp, r, r, n1 << norm, d); 203 r >>= GMP_NAIL_BITS; 204 qp--; 205 } 206 for (i = qxn - 1; i >= 0; i--) 207 { 208 udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d); 209 r >>= GMP_NAIL_BITS; 210 qp--; 211 } 212 return r >> norm; 213 } 214 else 215 { 216 mp_limb_t dinv; 217 invert_limb (dinv, d); 218 if (un != 0) 219 { 220 n1 = up[un - 1] << GMP_NAIL_BITS; 221 r |= (n1 >> (GMP_LIMB_BITS - norm)); 222 for (i = un - 2; i >= 0; i--) 223 { 224 n0 = up[i] << GMP_NAIL_BITS; 225 udiv_qrnnd_preinv (*qp, r, r, 226 ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))), 227 d, dinv); 228 r >>= GMP_NAIL_BITS; 229 qp--; 230 n1 = n0; 231 } 232 udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv); 233 r >>= GMP_NAIL_BITS; 234 qp--; 235 } 236 for (i = qxn - 1; i >= 0; i--) 237 { 238 udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv); 239 r >>= GMP_NAIL_BITS; 240 qp--; 241 } 242 return r >> norm; 243 } 244 } 245 } 246