1 /* Division and remainder routines for Tile. 2 Copyright (C) 2011-2017 Free Software Foundation, Inc. 3 Contributed by Walter Lee (walt@tilera.com) 4 5 This file is free software; you can redistribute it and/or modify it 6 under the terms of the GNU General Public License as published by the 7 Free Software Foundation; either version 3, or (at your option) any 8 later version. 9 10 This file is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 typedef int int32_t; 25 typedef unsigned uint32_t; 26 typedef long long int64_t; 27 typedef unsigned long long uint64_t; 28 29 /* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV). */ 30 static inline void 31 raise_intdiv (void) 32 { 33 asm ("{ raise; moveli zero, 8 + (1 << 6) }"); 34 } 35 36 37 #ifndef __tilegx__ 38 /*__udivsi3 - 32 bit integer unsigned divide */ 39 static inline uint32_t __attribute__ ((always_inline)) 40 __udivsi3_inline (uint32_t dividend, uint32_t divisor) 41 { 42 /* Divide out any power of two factor from dividend and divisor. 43 Note that when dividing by zero the divisor will remain zero, 44 which is all we need to detect that case below. */ 45 const int power_of_two_factor = __insn_ctz (divisor); 46 divisor >>= power_of_two_factor; 47 dividend >>= power_of_two_factor; 48 49 /* Checks for division by power of two or division by zero. */ 50 if (divisor <= 1) 51 { 52 if (divisor == 0) 53 { 54 raise_intdiv (); 55 return 0; 56 } 57 return dividend; 58 } 59 60 /* Compute (a / b) by repeatedly finding the largest N 61 such that (b << N) <= a. For each such N, set bit N in the 62 quotient, subtract (b << N) from a, and keep going. Think of this as 63 the reverse of the "shift-and-add" that a multiply does. The values 64 of N are precisely those shift counts. 65 66 Finding N is easy. First, use clz(b) - clz(a) to find the N 67 that lines up the high bit of (b << N) with the high bit of a. 68 Any larger value of N would definitely make (b << N) > a, 69 which is too big. 70 71 Then, if (b << N) > a (because it has larger low bits), decrement 72 N by one. This adjustment will definitely make (b << N) less 73 than a, because a's high bit is now one higher than b's. */ 74 75 /* Precomputing the max_ values allows us to avoid a subtract 76 in the inner loop and just right shift by clz(remainder). */ 77 const int divisor_clz = __insn_clz (divisor); 78 const uint32_t max_divisor = divisor << divisor_clz; 79 const uint32_t max_qbit = 1 << divisor_clz; 80 81 uint32_t quotient = 0; 82 uint32_t remainder = dividend; 83 84 while (remainder >= divisor) 85 { 86 int shift = __insn_clz (remainder); 87 uint32_t scaled_divisor = max_divisor >> shift; 88 uint32_t quotient_bit = max_qbit >> shift; 89 90 int too_big = (scaled_divisor > remainder); 91 scaled_divisor >>= too_big; 92 quotient_bit >>= too_big; 93 remainder -= scaled_divisor; 94 quotient |= quotient_bit; 95 } 96 return quotient; 97 } 98 #endif /* !__tilegx__ */ 99 100 101 /* __udivdi3 - 64 bit integer unsigned divide */ 102 static inline uint64_t __attribute__ ((always_inline)) 103 __udivdi3_inline (uint64_t dividend, uint64_t divisor) 104 { 105 /* Divide out any power of two factor from dividend and divisor. 106 Note that when dividing by zero the divisor will remain zero, 107 which is all we need to detect that case below. */ 108 const int power_of_two_factor = __builtin_ctzll (divisor); 109 divisor >>= power_of_two_factor; 110 dividend >>= power_of_two_factor; 111 112 /* Checks for division by power of two or division by zero. */ 113 if (divisor <= 1) 114 { 115 if (divisor == 0) 116 { 117 raise_intdiv (); 118 return 0; 119 } 120 return dividend; 121 } 122 123 #ifndef __tilegx__ 124 if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0) 125 { 126 /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */ 127 return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor); 128 } 129 #endif /* !__tilegx__ */ 130 131 /* See algorithm description in __udivsi3 */ 132 133 const int divisor_clz = __builtin_clzll (divisor); 134 const uint64_t max_divisor = divisor << divisor_clz; 135 const uint64_t max_qbit = 1ULL << divisor_clz; 136 137 uint64_t quotient = 0; 138 uint64_t remainder = dividend; 139 140 while (remainder >= divisor) 141 { 142 int shift = __builtin_clzll (remainder); 143 uint64_t scaled_divisor = max_divisor >> shift; 144 uint64_t quotient_bit = max_qbit >> shift; 145 146 int too_big = (scaled_divisor > remainder); 147 scaled_divisor >>= too_big; 148 quotient_bit >>= too_big; 149 remainder -= scaled_divisor; 150 quotient |= quotient_bit; 151 } 152 return quotient; 153 } 154 155 156 #ifndef __tilegx__ 157 /* __umodsi3 - 32 bit integer unsigned modulo */ 158 static inline uint32_t __attribute__ ((always_inline)) 159 __umodsi3_inline (uint32_t dividend, uint32_t divisor) 160 { 161 /* Shortcircuit mod by a power of two (and catch mod by zero). */ 162 const uint32_t mask = divisor - 1; 163 if ((divisor & mask) == 0) 164 { 165 if (divisor == 0) 166 { 167 raise_intdiv (); 168 return 0; 169 } 170 return dividend & mask; 171 } 172 173 /* We compute the remainder (a % b) by repeatedly subtracting off 174 multiples of b from a until a < b. The key is that subtracting 175 off a multiple of b does not affect the result mod b. 176 177 To make the algorithm run efficiently, we need to subtract 178 off a large multiple of b at each step. We subtract the largest 179 (b << N) that is <= a. 180 181 Finding N is easy. First, use clz(b) - clz(a) to find the N 182 that lines up the high bit of (b << N) with the high bit of a. 183 Any larger value of N would definitely make (b << N) > a, 184 which is too big. 185 186 Then, if (b << N) > a (because it has larger low bits), decrement 187 N by one. This adjustment will definitely make (b << N) less 188 than a, because a's high bit is now one higher than b's. */ 189 const uint32_t max_divisor = divisor << __insn_clz (divisor); 190 191 uint32_t remainder = dividend; 192 while (remainder >= divisor) 193 { 194 const int shift = __insn_clz (remainder); 195 uint32_t scaled_divisor = max_divisor >> shift; 196 scaled_divisor >>= (scaled_divisor > remainder); 197 remainder -= scaled_divisor; 198 } 199 200 return remainder; 201 } 202 #endif /* !__tilegx__ */ 203 204 205 /* __umoddi3 - 64 bit integer unsigned modulo */ 206 static inline uint64_t __attribute__ ((always_inline)) 207 __umoddi3_inline (uint64_t dividend, uint64_t divisor) 208 { 209 #ifndef __tilegx__ 210 if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0) 211 { 212 /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */ 213 return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor); 214 } 215 #endif /* !__tilegx__ */ 216 217 /* Shortcircuit mod by a power of two (and catch mod by zero). */ 218 const uint64_t mask = divisor - 1; 219 if ((divisor & mask) == 0) 220 { 221 if (divisor == 0) 222 { 223 raise_intdiv (); 224 return 0; 225 } 226 return dividend & mask; 227 } 228 229 /* See algorithm description in __umodsi3 */ 230 const uint64_t max_divisor = divisor << __builtin_clzll (divisor); 231 232 uint64_t remainder = dividend; 233 while (remainder >= divisor) 234 { 235 const int shift = __builtin_clzll (remainder); 236 uint64_t scaled_divisor = max_divisor >> shift; 237 scaled_divisor >>= (scaled_divisor > remainder); 238 remainder -= scaled_divisor; 239 } 240 241 return remainder; 242 } 243 244 245 uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor); 246 #ifdef L_tile_udivsi3 247 uint32_t 248 __udivsi3 (uint32_t dividend, uint32_t divisor) 249 { 250 #ifndef __tilegx__ 251 return __udivsi3_inline (dividend, divisor); 252 #else /* !__tilegx__ */ 253 uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor)); 254 return (uint32_t) n; 255 #endif /* !__tilegx__ */ 256 } 257 #endif 258 259 #define ABS(x) ((x) >= 0 ? (x) : -(x)) 260 261 int32_t __divsi3 (int32_t dividend, int32_t divisor); 262 #ifdef L_tile_divsi3 263 /* __divsi3 - 32 bit integer signed divide */ 264 int32_t 265 __divsi3 (int32_t dividend, int32_t divisor) 266 { 267 #ifndef __tilegx__ 268 uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor)); 269 #else /* !__tilegx__ */ 270 uint64_t n = 271 __udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor)); 272 #endif /* !__tilegx__ */ 273 if ((dividend ^ divisor) < 0) 274 n = -n; 275 return (int32_t) n; 276 } 277 #endif 278 279 280 uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor); 281 #ifdef L_tile_udivdi3 282 uint64_t 283 __udivdi3 (uint64_t dividend, uint64_t divisor) 284 { 285 return __udivdi3_inline (dividend, divisor); 286 } 287 #endif 288 289 /*__divdi3 - 64 bit integer signed divide */ 290 int64_t __divdi3 (int64_t dividend, int64_t divisor); 291 #ifdef L_tile_divdi3 292 int64_t 293 __divdi3 (int64_t dividend, int64_t divisor) 294 { 295 uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor)); 296 if ((dividend ^ divisor) < 0) 297 n = -n; 298 return (int64_t) n; 299 } 300 #endif 301 302 303 uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor); 304 #ifdef L_tile_umodsi3 305 uint32_t 306 __umodsi3 (uint32_t dividend, uint32_t divisor) 307 { 308 #ifndef __tilegx__ 309 return __umodsi3_inline (dividend, divisor); 310 #else /* !__tilegx__ */ 311 return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor); 312 #endif /* !__tilegx__ */ 313 } 314 #endif 315 316 317 /* __modsi3 - 32 bit integer signed modulo */ 318 int32_t __modsi3 (int32_t dividend, int32_t divisor); 319 #ifdef L_tile_modsi3 320 int32_t 321 __modsi3 (int32_t dividend, int32_t divisor) 322 { 323 #ifndef __tilegx__ 324 uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor)); 325 #else /* !__tilegx__ */ 326 uint64_t remainder = 327 __umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor)); 328 #endif /* !__tilegx__ */ 329 return (int32_t) ((dividend >= 0) ? remainder : -remainder); 330 } 331 #endif 332 333 334 uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor); 335 #ifdef L_tile_umoddi3 336 uint64_t 337 __umoddi3 (uint64_t dividend, uint64_t divisor) 338 { 339 return __umoddi3_inline (dividend, divisor); 340 } 341 #endif 342 343 344 /* __moddi3 - 64 bit integer signed modulo */ 345 int64_t __moddi3 (int64_t dividend, int64_t divisor); 346 #ifdef L_tile_moddi3 347 int64_t 348 __moddi3 (int64_t dividend, int64_t divisor) 349 { 350 uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor)); 351 return (int64_t) ((dividend >= 0) ? remainder : -remainder); 352 } 353 #endif 354