1/* Copyright (C) 2008-2019 Free Software Foundation, Inc. 2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com> 3 on behalf of Synopsys Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* 27 - calculate 15..18 bit inverse using a table of approximating polynoms. 28 precision is higher for polynoms used to evaluate input with larger 29 value. 30 - do one newton-raphson iteration step to double the precision, 31 then multiply this with the divisor 32 -> more time to decide if dividend is subnormal 33 - the worst error propagation is on the side of the value range 34 with the least initial defect, thus giving us about 30 bits precision. 35 */ 36#include "arc-ieee-754.h" 37 38#if 0 /* DEBUG */ 39 .global __divsf3 40 FUNC(__divsf3) 41 .balign 4 42__divsf3: 43 push_s blink 44 push_s r1 45 bl.d __divsf3_c 46 push_s r0 47 ld_s r1,[sp,4] 48 st_s r0,[sp,4] 49 bl.d __divsf3_asm 50 pop_s r0 51 pop_s r1 52 pop_s blink 53 cmp r0,r1 54#if 1 55 bne abort 56 jeq_s [blink] 57 b abort 58#else 59 bne abort 60 j_s [blink] 61#endif 62 ENDFUNC(__divsf3) 63#define __divsf3 __divsf3_asm 64#endif /* DEBUG */ 65 66 FUNC(__divsf3) 67 .balign 4 68.L7f800000: 69 .long 0x7f800000 70.Ldivtab: 71 .long 0xfc0ffff0 72 .long 0xf46ffefd 73 .long 0xed1ffd2a 74 .long 0xe627fa8e 75 .long 0xdf7ff73b 76 .long 0xd917f33b 77 .long 0xd2f7eea3 78 .long 0xcd1fe986 79 .long 0xc77fe3e7 80 .long 0xc21fdddb 81 .long 0xbcefd760 82 .long 0xb7f7d08c 83 .long 0xb32fc960 84 .long 0xae97c1ea 85 .long 0xaa27ba26 86 .long 0xa5e7b22e 87 .long 0xa1cfa9fe 88 .long 0x9ddfa1a0 89 .long 0x9a0f990c 90 .long 0x9667905d 91 .long 0x92df878a 92 .long 0x8f6f7e84 93 .long 0x8c27757e 94 .long 0x88f76c54 95 .long 0x85df630c 96 .long 0x82e759c5 97 .long 0x8007506d 98 .long 0x7d3f470a 99 .long 0x7a8f3da2 100 .long 0x77ef341e 101 .long 0x756f2abe 102 .long 0x72f7212d 103 .long 0x709717ad 104 .long 0x6e4f0e44 105 .long 0x6c1704d6 106 .long 0x69e6fb44 107 .long 0x67cef1d7 108 .long 0x65c6e872 109 .long 0x63cedf18 110 .long 0x61e6d5cd 111 .long 0x6006cc6d 112 .long 0x5e36c323 113 .long 0x5c76b9f3 114 .long 0x5abeb0b7 115 .long 0x5916a79b 116 .long 0x57769e77 117 .long 0x55de954d 118 .long 0x54568c4e 119 .long 0x52d6834d 120 .long 0x51667a7f 121 .long 0x4ffe71b5 122 .long 0x4e9e68f1 123 .long 0x4d466035 124 .long 0x4bf65784 125 .long 0x4aae4ede 126 .long 0x496e4646 127 .long 0x48363dbd 128 .long 0x47063547 129 .long 0x45de2ce5 130 .long 0x44be2498 131 .long 0x43a61c64 132 .long 0x4296144a 133 .long 0x41860c0e 134 .long 0x407e03ee 135__divsf3_support: /* This label makes debugger output saner. */ 136.Ldenorm_fp1: 137 bclr r6,r6,31 138 norm.f r12,r6 ; flag for x/0 -> Inf check 139 add r6,r6,r6 140 rsub r5,r12,16 141 ror r5,r1,r5 142 asl r6,r6,r12 143 bmsk r5,r5,5 144 ld.as r5,[r3,r5] 145 add r4,r6,r6 146 ; load latency 147 MPYHU r7,r5,r4 148 bic.ne.f 0, \ 149 0x60000000,r0 ; large number / denorm -> Inf 150 beq_s .Linf_NaN 151 asl r5,r5,13 152 ; wb stall 153 ; slow track 154 sub r7,r5,r7 155 MPYHU r8,r7,r6 156 asl_s r12,r12,23 157 and.f r2,r0,r9 158 add r2,r2,r12 159 asl r12,r0,8 160 ; wb stall 161 bne.d .Lpast_denorm_fp1 162.Ldenorm_fp0: 163 MPYHU r8,r8,r7 164 bclr r12,r12,31 165 norm.f r3,r12 ; flag for 0/x -> 0 check 166 bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 167 beq_s .Lret0 168 asl_s r12,r12,r3 169 asl_s r3,r3,23 170 add_s r12,r12,r12 171 add r11,r11,r3 172 b.d .Lpast_denorm_fp0 173 mov_s r3,r12 174 .balign 4 175.Linf_NaN: 176 bclr.f 0,r0,31 ; 0/0 -> NaN 177 xor_s r0,r0,r1 178 bmsk r1,r0,30 179 bic_s r0,r0,r1 180 sub.eq r0,r0,1 181 j_s.d [blink] 182 or r0,r0,r9 183.Lret0: 184 xor_s r0,r0,r1 185 bmsk r1,r0,30 186 j_s.d [blink] 187 bic_s r0,r0,r1 188.Linf_nan_fp1: 189 lsr_s r0,r0,31 190 bmsk.f 0,r1,22 191 asl_s r0,r0,31 192 bne_s 0f ; inf/inf -> nan 193 brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan 1940: j_s.d [blink] 195 mov r0,-1 196.Lsigned0: 197.Linf_nan_fp0: 198 tst_s r1,r1 199 j_s.d [blink] 200 bxor.mi r0,r0,31 201 .balign 4 202 .global __divsf3 203/* N.B. the spacing between divtab and the sub3 to get its address must 204 be a multiple of 8. */ 205__divsf3: 206 lsr r2,r1,17 207 sub3 r3,pcl,55;(.-.Ldivtab) >> 3 208 bmsk_s r2,r2,5 209 ld.as r5,[r3,r2] 210 asl r4,r1,9 211 ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 212 MPYHU r7,r5,r4 213 asl r6,r1,8 214 and.f r11,r1,r9 215 bset r6,r6,31 216 asl r5,r5,13 217 ; wb stall 218 beq .Ldenorm_fp1 219 sub r7,r5,r7 220 MPYHU r8,r7,r6 221 breq.d r11,r9,.Linf_nan_fp1 222 and.f r2,r0,r9 223 beq.d .Ldenorm_fp0 224 asl r12,r0,8 225 ; wb stall 226 breq r2,r9,.Linf_nan_fp0 227 MPYHU r8,r8,r7 228.Lpast_denorm_fp1: 229 bset r3,r12,31 230.Lpast_denorm_fp0: 231 cmp_s r3,r6 232 lsr.cc r3,r3,1 233 add_s r2,r2, /* wait for immediate */ \ 234 /* wb stall */ \ 235 0x3f000000 236 sub r7,r7,r8 ; u1.31 inverse, about 30 bit 237 MPYHU r3,r3,r7 238 sbc r2,r2,r11 239 xor.f 0,r0,r1 240 and r0,r2,r9 241 bxor.mi r0,r0,31 242 brhs r2, /* wb stall / wait for immediate */ \ 243 0x7f000000,.Linf_denorm 244.Lpast_denorm: 245 add_s r3,r3,0x22 ; round to nearest or higher 246 tst r3,0x3c ; check if rounding was unsafe 247 lsr r3,r3,6 248 jne.d [blink] ; return if rounding was safe. 249 add_s r0,r0,r3 250 /* work out exact rounding if we fall through here. */ 251 /* We know that the exact result cannot be represented in single 252 precision. Find the mid-point between the two nearest 253 representable values, multiply with the divisor, and check if 254 the result is larger than the dividend. */ 255 add_s r3,r3,r3 256 sub_s r3,r3,1 257 mpyu r3,r3,r6 258 asr.f 0,r0,1 ; for round-to-even in case this is a denorm 259 rsub r2,r9,25 260 asl_s r12,r12,r2 261 ; wb stall 262 ; slow track 263 sub.f 0,r12,r3 264 j_s.d [blink] 265 sub.mi r0,r0,1 266/* For denormal results, it is possible that an exact result needs 267 rounding, and thus the round-to-even rule has to come into play. */ 268.Linf_denorm: 269 brlo r2,0xc0000000,.Linf 270.Ldenorm: 271 asr_s r2,r2,23 272 bic r0,r0,r9 273 neg r9,r2 274 brlo.d r9,25,.Lpast_denorm 275 lsr r3,r3,r9 276 /* Fall through: return +- 0 */ 277 j_s [blink] 278.Linf: 279 j_s.d [blink] 280 or r0,r0,r9 281 ENDFUNC(__divsf3) 282