1 /*- 2 * This file is dual licensed under the MIT and the University of Illinois 3 * Open Source Licenses. 4 * 5 *- 6 * University of Illinois/NCSA 7 * Open Source License 8 * 9 * Copyright (c) 2009-2014 by the contributors listed below 10 * 11 * All rights reserved. 12 * 13 * Developed by: 14 * 15 * LLVM Team 16 * 17 * University of Illinois at Urbana-Champaign 18 * 19 * http://llvm.org 20 * 21 * Permission is hereby granted, free of charge, to any person obtaining a 22 * copy of this software and associated documentation files (the "Software"), 23 * to deal with the Software without restriction, including without limitation 24 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 25 * and/or sell copies of the Software, and to permit persons to whom the 26 * Software is furnished to do so, subject to the following conditions: 27 * 28 * * Redistributions of source code must retain the above copyright notice, 29 * this list of conditions and the following disclaimers. 30 * 31 * * Redistributions in binary form must reproduce the above copyright notice, 32 * this list of conditions and the following disclaimers in the 33 * documentation and/or other materials provided with the distribution. 34 * 35 * * Neither the names of the LLVM Team, University of Illinois at 36 * Urbana-Champaign, nor the names of its contributors may be used to 37 * endorse or promote products derived from this Software without specific 38 * prior written permission. 39 * 40 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 41 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 42 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 43 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 44 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 45 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 46 * WITH THE SOFTWARE. 47 * 48 *- 49 * Copyright (c) 2009-2014 by the contributors listed below 50 * 51 * Permission is hereby granted, free of charge, to any person obtaining a copy 52 * of this software and associated documentation files (the "Software"), to 53 * deal in the Software without restriction, including without limitation the 54 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 55 * sell copies of the Software, and to permit persons to whom the Software is 56 * furnished to do so, subject to the following conditions: 57 * 58 * The above copyright notice and this permission notice shall be included in 59 * all copies or substantial portions of the Software. 60 * 61 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 62 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 63 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 64 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 65 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 66 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 67 * DEALINGS IN THE SOFTWARE. 68 * 69 * - 70 * List of contributors: 71 * 72 * Craig van Vliet <cvanvliet@auroraux.org> 73 * Edward O'Callaghan <eocallaghan@auroraux.org> 74 * Howard Hinnant <hhinnant@apple.com> 75 * Matt Thomas <matt@NetBSD.org> 76 * Joerg Sonnenberger <joerg@NetBSD.org> 77 */ 78 79 #include <machine/limits.h> 80 #include <machine/stdint.h> 81 82 typedef union 83 { 84 _uint128_t all; 85 struct { 86 unsigned long low; 87 unsigned long high; 88 } s; 89 } utwords; 90 91 /* Effects: if rem != 0, *rem = a % b 92 * Returns: a / b 93 */ 94 95 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 96 97 static _uint128_t 98 __udivmodti4(_uint128_t a, _uint128_t b, _uint128_t *rem) 99 { 100 const unsigned n_udword_bits = sizeof(unsigned long) * CHAR_BIT; 101 const unsigned n_utword_bits = sizeof(_uint128_t) * CHAR_BIT; 102 utwords n, d, q, r; 103 unsigned sr; 104 105 n.all = a; 106 d.all = b; 107 108 /* special cases, X is unknown, K != 0 */ 109 if (n.s.high == 0) { 110 if (d.s.high == 0) { 111 /* 0 X 112 * --- 113 * 0 X 114 */ 115 if (rem) 116 *rem = n.s.low % d.s.low; 117 return n.s.low / d.s.low; 118 } 119 /* 0 X 120 * --- 121 * K X 122 */ 123 if (rem) 124 *rem = n.s.low; 125 return 0; 126 } 127 /* n.s.high != 0 */ 128 if (d.s.low == 0) { 129 if (d.s.high == 0) { 130 /* K X 131 * --- 132 * 0 0 133 */ 134 if (rem) 135 *rem = n.s.high % d.s.low; 136 return n.s.high / d.s.low; 137 } 138 /* d.s.high != 0 */ 139 if (n.s.low == 0) { 140 /* K 0 141 * --- 142 * K 0 143 */ 144 if (rem) { 145 r.s.high = n.s.high % d.s.high; 146 r.s.low = 0; 147 *rem = r.all; 148 } 149 return n.s.high / d.s.high; 150 } 151 /* K K 152 * --- 153 * K 0 154 */ 155 if ((d.s.high & (d.s.high - 1)) == 0) { /* if d is a power of 2 */ 156 if (rem) { 157 r.s.low = n.s.low; 158 r.s.high = n.s.high & (d.s.high - 1); 159 *rem = r.all; 160 } 161 return n.s.high >> __builtin_ctzl(d.s.high); 162 } 163 /* K K 164 * --- 165 * K 0 166 */ 167 sr = __builtin_clzl(d.s.high) - __builtin_clzl(n.s.high); 168 /* 0 <= sr <= n_udword_bits - 2 or sr large */ 169 if (sr > n_udword_bits - 2) { 170 if (rem) 171 *rem = n.all; 172 return 0; 173 } 174 ++sr; 175 /* 1 <= sr <= n_udword_bits - 1 */ 176 /* q.all = n.all << (n_utword_bits - sr); */ 177 q.s.low = 0; 178 q.s.high = n.s.low << (n_udword_bits - sr); 179 /* r.all = n.all >> sr; */ 180 r.s.high = n.s.high >> sr; 181 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 182 } else { /* d.s.low != 0 */ 183 if (d.s.high == 0) { 184 /* K X 185 * --- 186 * 0 K 187 */ 188 if ((d.s.low & (d.s.low - 1)) == 0) { /* if d is a power of 2 */ 189 if (rem) 190 *rem = n.s.low & (d.s.low - 1); 191 if (d.s.low == 1) 192 return n.all; 193 sr = __builtin_ctzl(d.s.low); 194 q.s.high = n.s.high >> sr; 195 q.s.low = (n.s.high << (n_udword_bits - sr)) | 196 (n.s.low >> sr); 197 return q.all; 198 } 199 /* K X 200 * --- 201 * 0 K 202 */ 203 sr = 1 + n_udword_bits + __builtin_clzl(d.s.low) - 204 __builtin_clzl(n.s.high); 205 /* 2 <= sr <= n_utword_bits - 1 206 * q.all = n.all << (n_utword_bits - sr); 207 * r.all = n.all >> sr; 208 * if (sr == n_udword_bits) 209 * { 210 * q.s.low = 0; 211 * q.s.high = n.s.low; 212 * r.s.high = 0; 213 * r.s.low = n.s.high; 214 * } 215 * else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 216 * { 217 * q.s.low = 0; 218 * q.s.high = n.s.low << (n_udword_bits - sr); 219 * r.s.high = n.s.high >> sr; 220 * r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 221 * } 222 * else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 223 * { 224 * q.s.low = n.s.low << (n_utword_bits - sr); 225 * q.s.high = (n.s.high << (n_utword_bits - sr)) | 226 * (n.s.low >> (sr - n_udword_bits)); 227 * r.s.high = 0; 228 * r.s.low = n.s.high >> (sr - n_udword_bits); 229 * } 230 */ 231 q.s.low = (n.s.low << (n_utword_bits - sr)) & 232 ((long)(int)(n_udword_bits - sr) >> (n_udword_bits-1)); 233 q.s.high = ((n.s.low << ( n_udword_bits - sr)) & 234 ((long)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) | 235 (((n.s.high << (n_utword_bits - sr)) | 236 (n.s.low >> (sr - n_udword_bits))) & 237 ((long)(int)(n_udword_bits - sr) >> (n_udword_bits-1))); 238 r.s.high = (n.s.high >> sr) & 239 ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); 240 r.s.low = ((n.s.high >> (sr - n_udword_bits)) & 241 ((long)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) | 242 (((n.s.high << (n_udword_bits - sr)) | 243 (n.s.low >> sr)) & 244 ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); 245 } else { 246 /* K X 247 * --- 248 * K K 249 */ 250 sr = __builtin_clzl(d.s.high) - __builtin_clzl(n.s.high); 251 /*0 <= sr <= n_udword_bits - 1 or sr large */ 252 if (sr > n_udword_bits - 1) { 253 if (rem) 254 *rem = n.all; 255 return 0; 256 } 257 ++sr; 258 /* 1 <= sr <= n_udword_bits */ 259 /* q.all = n.all << (n_utword_bits - sr); */ 260 q.s.low = 0; 261 q.s.high = n.s.low << (n_udword_bits - sr); 262 /* r.all = n.all >> sr; 263 * if (sr < n_udword_bits) 264 * { 265 * r.s.high = n.s.high >> sr; 266 * r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 267 * } 268 * else 269 * { 270 * r.s.high = 0; 271 * r.s.low = n.s.high; 272 * } 273 */ 274 r.s.high = (n.s.high >> sr) & 275 ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); 276 r.s.low = (n.s.high << (n_udword_bits - sr)) | 277 ((n.s.low >> sr) & 278 ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); 279 } 280 } 281 /* Not a special case 282 * q and r are initialized with: 283 * q.all = n.all << (n_utword_bits - sr); 284 * r.all = n.all >> sr; 285 * 1 <= sr <= n_utword_bits - 1 286 */ 287 unsigned carry = 0; 288 for (; sr > 0; --sr) { 289 /* r:q = ((r:q) << 1) | carry */ 290 r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); 291 r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); 292 q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); 293 q.s.low = (q.s.low << 1) | carry; 294 /* carry = 0; 295 * if (r.all >= d.all) 296 * { 297 * r.all -= d.all; 298 * carry = 1; 299 * } 300 */ 301 const _int128_t s = 302 (_int128_t)(d.all - r.all - 1) >> (n_utword_bits - 1); 303 carry = s & 1; 304 r.all -= d.all & s; 305 } 306 q.all = (q.all << 1) | carry; 307 if (rem) 308 *rem = r.all; 309 return q.all; 310 } 311 312 _uint128_t __udivti3(_uint128_t a, _uint128_t b); 313 314 _uint128_t 315 __udivti3(_uint128_t a, _uint128_t b) 316 { 317 return __udivmodti4(a, b, 0); 318 } 319