1/* Copyright (C) 2008-2021 Free Software Foundation, Inc. 2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com> 3 on behalf of Synopsys Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26#include "arc-ieee-754.h" 27#if 0 /* DEBUG */ 28 .global __addsf3 29 FUNC(__addsf3) 30 .balign 4 31__addsf3: 32 push_s blink 33 push_s r1 34 bl.d __addsf3_c 35 push_s r0 36 ld_s r1,[sp,4] 37 st_s r0,[sp,4] 38 bl.d __addsf3_asm 39 pop_s r0 40 pop_s r1 41 pop_s blink 42 cmp r0,r1 43 jeq_s [blink] 44 bl abort 45 ENDFUNC(__addsf3) 46 .global __subsf3 47 FUNC(__subsf3) 48 .balign 4 49__subsf3: 50 push_s blink 51 push_s r1 52 bl.d __subsf3_c 53 push_s r0 54 ld_s r1,[sp,4] 55 st_s r0,[sp,4] 56 bl.d __subsf3_asm 57 pop_s r0 58 pop_s r1 59 pop_s blink 60 cmp r0,r1 61 jeq_s [blink] 62 bl abort 63 ENDFUNC(__subsf3) 64#define __addsf3 __addsf3_asm 65#define __subsf3 __subsf3_asm 66#endif /* DEBUG */ 67/* N.B. This is optimized for ARC700. 68 ARC600 has very different scheduling / instruction selection criteria. */ 69 70/* inputs: r0, r1 71 output: r0 72 clobber: r1-r10, r12, flags */ 73 74 .balign 4 75 .global __addsf3 76 .global __subsf3 77 FUNC(__addsf3) 78 FUNC(__subsf3) 79 .long 0x7f800000 ; exponent mask 80__subsf3: 81 bxor_l r1,r1,31 82__addsf3: 83 ld r9,[pcl,-8] 84 bmsk r4,r0,30 85 xor r10,r0,r1 86 and r6,r1,r9 87 sub.f r12,r4,r6 88 asr_s r12,r12,23 89 blo .Ldbl1_gt 90 brhs r4,r9,.Linf_nan 91 brne r12,0,.Lsmall_shift 92 brge r10,0,.Ladd_same_exp ; r12 == 0 93/* After subtracting, we need to normalize; when shifting to place the 94 leading 1 into position for the implicit 1 and adding that to DBL0, 95 we increment the exponent. Thus, we have to subtract one more than 96 the shift count from the exponent beforehand. Iff the exponent drops thus 97 below zero (before adding in the fraction with the leading one), we have 98 generated a denormal number. Denormal handling is basicallly reducing the 99 shift count so that we produce a zero exponent instead; FWIW, this way 100 the shift count can become zero (if we started out with exponent 1). 101 On the plus side, we don't need to check for denorm input, the result 102 of subtracing these looks just the same as denormals generated during 103 subtraction. */ 104 bmsk r7,r1,30 105 breq r4,r7,.Lret0 106 sub.f r5,r4,r7 107 lsr r12,r4,23 108 neg.cs r5,r5 109 norm r3,r5 110 bmsk r2,r0,22 111 sub_s r3,r3,6 112 min r12,r12,r3 113 bic r1,r0,r2 114 sub_s r3,r12,1 115 asl_s r12,r12,23 116 asl r2,r5,r3 117 sub_s r1,r1,r12 118 add_s r0,r1,r2 119 j_s.d [blink] 120 bxor.cs r0,r0,31 121 .balign 4 122.Linf_nan: 123 ; If both inputs are inf, but with different signs, the result is NaN. 124 asr r12,r10,31 125 or_s r1,r1,r12 126 j_s.d [blink] 127 or.eq r0,r0,r1 128 .balign 4 129.Ladd_same_exp: 130 /* This is a special case because we can't test for need to shift 131 down by checking if bit 23 of DBL0 changes. OTOH, here we know 132 that we always need to shift down. */ 133 ; adding the two floating point numbers together makes the sign 134 ; cancel out and apear as carry; the exponent is doubled, and the 135 ; fraction also in need of shifting left by one. The two implicit 136 ; ones of the sources make an implicit 1 of the result, again 137 ; non-existent in a place shifted by one. 138 add.f r0,r0,r1 139 btst_s r0,1 140 breq r6,0,.Ldenorm_add 141 add.ne r0,r0,1 ; round to even. 142 rrc r0,r0 143 bmsk r1,r9,23 144 add r0,r0,r1 ; increment exponent 145 bic.f 0,r9,r0; check for overflow -> infinity. 146 jne_l [blink] 147 mov_s r0,r9 148 j_s.d [blink] 149 bset.cs r0,r0,31 150 151.Ldenorm_add: 152 j_s.d [blink] 153 add r0,r4,r1 154 155.Lret_dbl0: 156 j_s [blink] 157 158 .balign 4 159.Lsmall_shift: 160 brhi r12,25,.Lret_dbl0 161 breq.d r6,0,.Ldenorm_small_shift 162 bmsk_s r1,r1,22 163 bset_s r1,r1,23 164.Lfixed_denorm_small_shift: 165 neg r8,r12 166 asl r5,r1,r8 167 brge.d r10,0,.Ladd 168 lsr_l r1,r1,r12 169/* subtract, abs(DBL0) > abs(DBL1) */ 170/* DBL0: original values 171 DBL1: fraction with explicit leading 1, shifted into place 172 r4: orig. DBL0 & 0x7fffffff 173 r6: orig. DBL1 & 0x7f800000 174 r9: 0x7f800000 175 r10: orig. DBL0H ^ DBL1H 176 r5 : guard bits */ 177 .balign 4 178.Lsub: 179 neg.f r12,r5 180 bmsk r3,r0,22 181 bset r5,r3,23 182 sbc.f r4,r5,r1 183 beq.d .Large_cancel_sub 184 bic r7,r0,r3 185 norm r3,r4 186 bmsk r6,r7,30 187.Lsub_done: 188 sub_s r3,r3,6 189 breq r3,1,.Lsub_done_noshift 190 asl r5,r3,23 191 sub_l r3,r3,1 192 brlo r6,r5,.Ldenorm_sub 193 sub r0,r7,r5 194 neg_s r1,r3 195 lsr.f r2,r12,r1 196 asl_s r12,r12,r3 197 btst_s r2,0 198 bmsk.eq.f r12,r12,30 199 asl r5,r4,r3 200 add_s r0,r0,r2 201 adc.ne r0,r0,0 202 j_s.d [blink] 203 add_l r0,r0,r5 204 205.Lret0: 206 j_s.d [blink] 207 mov_l r0,0 208 209 .balign 4 210.Ldenorm_small_shift: 211 brne.d r12,1,.Lfixed_denorm_small_shift 212 sub_s r12,r12,1 213 brlt.d r10,0,.Lsub 214 mov_s r5,r12 ; zero r5, and align following code 215.Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear. 216 bmsk r2,r0,22 217 add_s r2,r2,r1 218 bbit0.d r2,23,.Lno_shiftdown 219 add_s r0,r0,r1 220 bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity 221 bmsk r1,r2,22 222 lsr.ne.f r2,r2,2; cc: even ; hi: might round down 223 lsr.ne r1,r1,1 224 rcmp.hi r5,1; hi : round down 225 bclr.hi r0,r0,0 226 j_l.d [blink] 227 sub_s r0,r0,r1 228 229/* r4: DBL0H & 0x7fffffff 230 r6: DBL1H & 0x7f800000 231 r9: 0x7f800000 232 r10: sign difference 233 r12: shift count (negative) */ 234 .balign 4 235.Ldbl1_gt: 236 brhs r6,r9,.Lret_dbl1 ; inf or NaN 237 neg r8,r12 238 brhi r8,25,.Lret_dbl1 239.Lsmall_shift_dbl0: 240 breq.d r6,0,.Ldenorm_small_shift_dbl0 241 bmsk_s r0,r0,22 242 bset_s r0,r0,23 243.Lfixed_denorm_small_shift_dbl0: 244 asl r5,r0,r12 245 brge.d r10,0,.Ladd_dbl1_gt 246 lsr r0,r0,r8 247/* subtract, abs(DBL0) < abs(DBL1) */ 248/* DBL0: fraction with explicit leading 1, shifted into place 249 DBL1: original value 250 r6: orig. DBL1 & 0x7f800000 251 r9: 0x7f800000 252 r5: guard bits */ 253 .balign 4 254.Lrsub: 255 neg.f r12,r5 256 bmsk r5,r1,22 257 bic r7,r1,r5 258 bset r5,r5,23 259 sbc.f r4,r5,r0 260 bne.d .Lsub_done ; note: r6 is already set up. 261 norm r3,r4 262 /* Fall through */ 263 264/* r4:r12 : unnormalized result fraction 265 r7: result sign and exponent */ 266/* When seeing large cancellation, only the topmost guard bit might be set. */ 267 .balign 4 268.Large_cancel_sub: 269 breq_s r12,0,.Lret0 270 sub r0,r7,24<<23 271 xor.f 0,r0,r7 ; test if exponent is negative 272 tst.pl r9,r0 ; test if exponent is zero 273 jpnz [blink] ; return if non-denormal result 274 bmsk r6,r7,30 275 lsr r3,r6,23 276 xor r0,r6,r7 277 sub_s r3,r3,24-22 278 j_s.d [blink] 279 bset r0,r0,r3 280 281 ; If a denorm is produced, we have an exact result - 282 ; no need for rounding. 283 .balign 4 284.Ldenorm_sub: 285 sub r3,r6,1 286 lsr.f r3,r3,23 287 xor r0,r6,r7 288 neg_s r1,r3 289 asl.ne r4,r4,r3 290 lsr_s r12,r12,r1 291 add_s r0,r0,r4 292 j_s.d [blink] 293 add.ne r0,r0,r12 294 295 .balign 4 296.Lsub_done_noshift: 297 add.f 0,r12,r12 298 btst.eq r4,0 299 bclr r4,r4,23 300 add r0,r7,r4 301 j_s.d [blink] 302 adc.ne r0,r0,0 303 304 .balign 4 305.Lno_shiftdown: 306 add.f 0,r5,r5 307 btst.eq r0,0 308 cmp.eq r5,r5 309 j_s.d [blink] 310 add.cs r0,r0,1 311 312.Lret_dbl1: 313 j_s.d [blink] 314 mov_l r0,r1 315 .balign 4 316.Ldenorm_small_shift_dbl0: 317 sub.f r8,r8,1 318 bne.d .Lfixed_denorm_small_shift_dbl0 319 add_s r12,r12,1 320 brlt.d r10,0,.Lrsub 321 mov r5,0 322.Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear. 323 bmsk r2,r1,22 324 add_s r2,r2,r0 325 bbit0.d r2,23,.Lno_shiftdown_dbl1_gt 326 add_s r0,r1,r0 327 bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity 328 bmsk r1,r2,22 329 lsr.ne.f r2,r2,2; cc: even ; hi: might round down 330 lsr.ne r1,r1,1 331 rcmp.hi r5,1; hi : round down 332 bclr.hi r0,r0,0 333 j_l.d [blink] 334 sub_s r0,r0,r1 335 336 .balign 4 337.Lno_shiftdown_dbl1_gt: 338 add.f 0,r5,r5 339 btst.eq r0,0 340 cmp.eq r5,r5 341 j_s.d [blink] 342 add.cs r0,r0,1 343 ENDFUNC(__addsf3) 344 ENDFUNC(__subsf3) 345