1/* Copyright (C) 2008-2018 Free Software Foundation, Inc. 2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com> 3 on behalf of Synopsys Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* 27 - calculate 15..18 bit inverse using a table of approximating polynoms. 28 precision is higher for polynoms used to evaluate input with larger 29 value. 30 - do one newton-raphson iteration step to double the precision, 31 then multiply this with the divisor 32 -> more time to decide if dividend is subnormal 33 - the worst error propagation is on the side of the value range 34 with the least initial defect, thus giving us about 30 bits precision. 35 */ 36#include "../arc-ieee-754.h" 37#define mlo acc2 38#define mhi acc1 39#define mul64(b,c) mullw 0,b,c` machlw 0,b,c 40#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c 41 42#if 0 /* DEBUG */ 43 .global __divsf3 44 FUNC(__divsf3) 45 .balign 4 46__divsf3: 47 push_s blink 48 push_s r1 49 bl.d __divsf3_c 50 push_s r0 51 ld_s r1,[sp,4] 52 st_s r0,[sp,4] 53 bl.d __divsf3_asm 54 pop_s r0 55 pop_s r1 56 pop_s blink 57 cmp r0,r1 58#if 1 59 bne abort 60 jeq_s [blink] 61 b abort 62#else 63 bne abort 64 j_s [blink] 65#endif 66 ENDFUNC(__divsf3) 67#define __divsf3 __divsf3_asm 68#endif /* DEBUG */ 69 70 FUNC(__divsf3) 71 .balign 4 72.Ldivtab: 73 .long 0xfc0ffff0 74 .long 0xf46ffefd 75 .long 0xed1ffd2a 76 .long 0xe627fa8e 77 .long 0xdf7ff73b 78 .long 0xd917f33b 79 .long 0xd2f7eea3 80 .long 0xcd1fe986 81 .long 0xc77fe3e7 82 .long 0xc21fdddb 83 .long 0xbcefd760 84 .long 0xb7f7d08c 85 .long 0xb32fc960 86 .long 0xae97c1ea 87 .long 0xaa27ba26 88 .long 0xa5e7b22e 89 .long 0xa1cfa9fe 90 .long 0x9ddfa1a0 91 .long 0x9a0f990c 92 .long 0x9667905d 93 .long 0x92df878a 94 .long 0x8f6f7e84 95 .long 0x8c27757e 96 .long 0x88f76c54 97 .long 0x85df630c 98 .long 0x82e759c5 99 .long 0x8007506d 100 .long 0x7d3f470a 101 .long 0x7a8f3da2 102 .long 0x77ef341e 103 .long 0x756f2abe 104 .long 0x72f7212d 105 .long 0x709717ad 106 .long 0x6e4f0e44 107 .long 0x6c1704d6 108 .long 0x69e6fb44 109 .long 0x67cef1d7 110 .long 0x65c6e872 111 .long 0x63cedf18 112 .long 0x61e6d5cd 113 .long 0x6006cc6d 114 .long 0x5e36c323 115 .long 0x5c76b9f3 116 .long 0x5abeb0b7 117 .long 0x5916a79b 118 .long 0x57769e77 119 .long 0x55de954d 120 .long 0x54568c4e 121 .long 0x52d6834d 122 .long 0x51667a7f 123 .long 0x4ffe71b5 124 .long 0x4e9e68f1 125 .long 0x4d466035 126 .long 0x4bf65784 127 .long 0x4aae4ede 128 .long 0x496e4646 129 .long 0x48363dbd 130 .long 0x47063547 131 .long 0x45de2ce5 132 .long 0x44be2498 133 .long 0x43a61c64 134 .long 0x4296144a 135 .long 0x41860c0e 136 .long 0x407e03ee 137.L7f800000: 138 .long 0x7f800000 139 .balign 4 140 .global __divsf3_support 141__divsf3_support: 142.Linf_NaN: 143 bclr.f 0,r0,31 ; 0/0 -> NaN 144 xor_s r0,r0,r1 145 bmsk r1,r0,30 146 bic_s r0,r0,r1 147 sub.eq r0,r0,1 148 j_s.d [blink] 149 or r0,r0,r9 150.Lret0: 151 xor_s r0,r0,r1 152 bmsk r1,r0,30 153 j_s.d [blink] 154 bic_s r0,r0,r1 155/* N.B. the spacing between divtab and the sub3 to get its address must 156 be a multiple of 8. */ 157__divsf3: 158 ld.as r9,[pcl,-9]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 159 sub3 r3,pcl,37;(.-.Ldivtab) >> 3 160 lsr r2,r1,17 161 and.f r11,r1,r9 162 bmsk r5,r2,5 163 beq.d .Ldenorm_fp1 164 asl r6,r1,8 165 and.f r2,r0,r9 166 ld.as r5,[r3,r5] 167 asl r4,r1,9 168 bset r6,r6,31 169 breq.d r11,r9,.Linf_nan_fp1 170.Lpast_denorm_fp1: 171 mululw 0,r5,r4 172 machulw r8,r5,r4 173 breq.d r2,r9,.Linf_nan_fp0 174 asl r5,r5,13 175 sub r7,r5,r8 176 mululw 0,r7,r6 177 machulw r8,r7,r6 178 beq.d .Ldenorm_fp0 179 asl r12,r0,8 180 mulu64 (r8,r7) 181 bset r3,r12,31 182.Lpast_denorm_fp0: 183 cmp_s r3,r6 184 lsr.cc r3,r3,1 185 add_s r2,r2, /* wait for immediate */ \ 186 0x3f000000 187 sub r7,r7,mhi ; u1.31 inverse, about 30 bit 188 mulu64 (r3,r7) 189 sbc r2,r2,r11 190 xor.f 0,r0,r1 191 and r0,r2,r9 192 bclr r3,r9,23 ; 0x7f000000 193 brhs.d r2,r3,.Linf_denorm 194 bxor.mi r0,r0,31 195.Lpast_denorm: 196 add r3,mhi,0x22 ; round to nearest or higher 197 tst r3,0x3c ; check if rounding was unsafe 198 lsr r3,r3,6 199 jne.d [blink] ; return if rounding was safe. 200 add_s r0,r0,r3 201 /* work out exact rounding if we fall through here. */ 202 /* We know that the exact result cannot be represented in single 203 precision. Find the mid-point between the two nearest 204 representable values, multiply with the divisor, and check if 205 the result is larger than the dividend. */ 206 add_s r3,r3,r3 207 sub_s r3,r3,1 208 mulu64 (r3,r6) 209 asr.f 0,r0,1 ; for round-to-even in case this is a denorm 210 rsub r2,r9,25 211 asl_s r12,r12,r2 212 sub.f 0,r12,mlo 213 j_s.d [blink] 214 sub.mi r0,r0,1 215.Linf_nan_fp1: 216 lsr_s r0,r0,31 217 bmsk.f 0,r1,22 218 asl_s r0,r0,31 219 bne_s 0f ; inf/inf -> nan 220 brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan 2210: j_s.d [blink] 222 mov r0,-1 223.Lsigned0: 224.Linf_nan_fp0: 225 tst_s r1,r1 226 j_s.d [blink] 227 bxor.mi r0,r0,31 228 .balign 4 229 .global __divsf3 230/* For denormal results, it is possible that an exact result needs 231 rounding, and thus the round-to-even rule has to come into play. */ 232.Linf_denorm: 233 brlo r2,0xc0000000,.Linf 234.Ldenorm: 235 asr_s r2,r2,23 236 bic r0,r0,r9 237 neg r9,r2 238 brlo.d r9,25,.Lpast_denorm 239 lsr r3,mlo,r9 240 /* Fall through: return +- 0 */ 241 j_s [blink] 242.Linf: 243 j_s.d [blink] 244 or r0,r0,r9 245 .balign 4 246.Ldenorm_fp1: 247 norm.f r12,r6 ; flag for x/0 -> Inf check 248 add r6,r6,r6 249 rsub r5,r12,16 250 ror r5,r1,r5 251 bmsk r5,r5,5 252 bic.ne.f 0, \ 253 0x60000000,r0 ; large number / denorm -> Inf 254 ld.as r5,[r3,r5] 255 asl r6,r6,r12 256 beq.d .Linf_NaN 257 and.f r2,r0,r9 258 add r4,r6,r6 259 asl_s r12,r12,23 260 bne.d .Lpast_denorm_fp1 261 add_s r2,r2,r12 262.Ldenorm_fp0: 263 mulu64 (r8,r7) 264 bclr r12,r12,31 265 norm.f r3,r12 ; flag for 0/x -> 0 check 266 bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 267 beq_s .Lret0 268 asl_s r12,r12,r3 269 asl_s r3,r3,23 270 add_s r12,r12,r12 271 add r11,r11,r3 272 b.d .Lpast_denorm_fp0 273 mov_s r3,r12 274 ENDFUNC(__divsf3) 275