1/* Copyright (C) 2008-2014 Free Software Foundation, Inc. 2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com> 3 on behalf of Synopsys Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26#include "../arc-ieee-754.h" 27 28#if 0 /* DEBUG */ 29 .global __muldf3 30 .balign 4 31__muldf3: 32 push_s blink 33 push_s r2 34 push_s r3 35 push_s r0 36 bl.d __muldf3_c 37 push_s r1 38 ld_s r2,[sp,12] 39 ld_s r3,[sp,8] 40 st_s r0,[sp,12] 41 st_s r1,[sp,8] 42 pop_s r1 43 bl.d __muldf3_asm 44 pop_s r0 45 pop_s r3 46 pop_s r2 47 pop_s blink 48 cmp r0,r2 49 cmp.eq r1,r3 50 jeq_s [blink] 51 and r12,DBL0H,DBL1H 52 bic.f 0,0x7ff80000,r12 ; both NaN -> OK 53 jeq_s [blink] 54 b abort 55#define __muldf3 __muldf3_asm 56#endif /* DEBUG */ 57 58__muldf3_support: /* This label makes debugger output saner. */ 59 .balign 4 60 FUNC(__muldf3) 61.Ldenorm_2: 62 breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output 63 norm.f r12,DBL1L 64 mov.mi r12,21 65 add.pl r12,r12,22 66 neg r11,r12 67 asl_s r12,r12,20 68 lsr.f DBL1H,DBL1L,r11 69 ror DBL1L,DBL1L,r11 70 sub_s DBL0H,DBL0H,r12 71 mov.eq DBL1H,DBL1L 72 sub_l DBL1L,DBL1L,DBL1H 73 /* Fall through. */ 74 .global __muldf3 75 .balign 4 76__muldf3: 77 mulu64 DBL0L,DBL1L 78 ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)] 79 bmsk r6,DBL0H,19 80 bset r6,r6,20 81 and r11,DBL0H,r9 82 breq.d r11,0,.Ldenorm_dbl0 83 and r12,DBL1H,r9 84 breq.d r12,0,.Ldenorm_dbl1 85 mov r8,mlo 86 mov r4,mhi 87 mulu64 r6,DBL1L 88 breq.d r11,r9,.Linf_nan 89 bmsk r10,DBL1H,19 90 breq.d r12,r9,.Linf_nan 91 bset r10,r10,20 92 add.f r4,r4,mlo 93 adc r5,mhi,0 94 mulu64 r10,DBL0L 95 add_s r12,r12,r11 ; add exponents 96 add.f r4,r4,mlo 97 adc r5,r5,mhi 98 mulu64 r6,r10 99 tst r8,r8 100 bclr r8,r9,30 ; 0x3ff00000 101 bset.ne r4,r4,0 ; put least significant word into sticky bit 102 bclr r6,r9,20 ; 0x7fe00000 103 add.f r5,r5,mlo 104 adc r7,mhi,0 ; fraction product in r7:r5:r4 105 lsr.f r10,r7,9 106 rsub.eq r8,r8,r9 ; 0x40000000 107 sub r12,r12,r8 ; subtract bias + implicit 1 108 brhs.d r12,r6,.Linf_denorm 109 rsub r10,r10,12 110.Lshift_frac: 111 neg r8,r10 112 asl r6,r4,r10 113 lsr DBL0L,r4,r8 114 add.f 0,r6,r6 115 btst.eq DBL0L,0 116 cmp.eq r4,r4 ; round to nearest / round to even 117 asl r4,r5,r10 118 lsr r5,r5,r8 119 adc.f DBL0L,DBL0L,r4 120 xor.f 0,DBL0H,DBL1H 121 asl r7,r7,r10 122 add_s r12,r12,r5 123 adc DBL0H,r12,r7 124 j_s.d [blink] 125 bset.mi DBL0H,DBL0H,31 126 127/* N.B. This is optimized for ARC700. 128 ARC600 has very different scheduling / instruction selection criteria. */ 129 130/* If one number is denormal, subtract some from the exponent of the other 131 one (if the other exponent is too small, return 0), and normalize the 132 denormal. Then re-run the computation. */ 133.Lret0_2: 134 lsr_s DBL0H,DBL0H,31 135 asl_s DBL0H,DBL0H,31 136 j_s.d [blink] 137 mov_s DBL0L,0 138 .balign 4 139.Ldenorm_dbl0: 140 mov_s r12,DBL0L 141 mov_s DBL0L,DBL1L 142 mov_s DBL1L,r12 143 mov_s r12,DBL0H 144 mov_s DBL0H,DBL1H 145 mov_s DBL1H,r12 146 and r11,DBL0H,r9 147.Ldenorm_dbl1: 148 brhs r11,r9,.Linf_nan 149 brhs 0x3ca00001,r11,.Lret0 150 sub_s DBL0H,DBL0H,DBL1H 151 bmsk.f DBL1H,DBL1H,30 152 add_s DBL0H,DBL0H,DBL1H 153 beq.d .Ldenorm_2 154 norm r12,DBL1H 155 sub_s r12,r12,10 156 asl r5,r12,20 157 asl_s DBL1H,DBL1H,r12 158 sub DBL0H,DBL0H,r5 159 neg r5,r12 160 lsr r6,DBL1L,r5 161 asl_s DBL1L,DBL1L,r12 162 b.d __muldf3 163 add_s DBL1H,DBL1H,r6 164 165.Lret0: xor_s DBL0H,DBL0H,DBL1H 166 bclr DBL1H,DBL0H,31 167 xor_s DBL0H,DBL0H,DBL1H 168 j_s.d [blink] 169 mov_s DBL0L,0 170 171 .balign 4 172.Linf_nan: 173 bclr r12,DBL1H,31 174 xor_s DBL1H,DBL1H,DBL0H 175 bclr_s DBL0H,DBL0H,31 176 max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf 177 or.f 0,DBL0H,DBL0L 178 mov_s DBL0L,0 179 or.ne.f DBL1L,DBL1L,r12 180 not_s DBL0H,DBL0L ; inf * 0 -> NaN 181 mov.ne DBL0H,r8 182 tst_s DBL1H,DBL1H 183 j_s.d [blink] 184 bset.mi DBL0H,DBL0H,31 185 186/* We have checked for infinity / NaN input before, and transformed 187 denormalized inputs into normalized inputs. Thus, the worst case 188 exponent overflows are: 189 1 + 1 - 0x400 == 0xc02 : maximum underflow 190 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow 191 N.B. 0x7e and 0x7f are also values for overflow. 192 193 If (r12 <= -54), we have an underflow to zero. */ 194 .balign 4 195.Linf_denorm: 196 lsr r6,r12,28 197 brlo.d r6,0xc,.Linf 198 asr r6,r12,20 199 add.f r10,r10,r6 200 brgt.d r10,0,.Lshift_frac 201 mov_s r12,0 202 beq.d .Lround_frac 203 add r10,r10,32 204.Lshift32_frac: 205 tst r4,r4 206 mov r4,r5 207 bset.ne r4,r4,1 208 mov r5,r7 209 brge.d r10,1,.Lshift_frac 210 mov r7,0 211 breq.d r10,0,.Lround_frac 212 add r10,r10,32 213 brgt r10,21,.Lshift32_frac 214 b_s .Lret0 215 216.Lround_frac: 217 add.f 0,r4,r4 218 btst.eq r5,0 219 mov_s DBL0L,r5 220 mov_s DBL0H,r7 221 adc.eq.f DBL0L,DBL0L,0 222 j_s.d [blink] 223 adc.eq DBL0H,DBL0H,0 224 225.Linf: mov_s DBL0L,0 226 xor.f DBL1H,DBL1H,DBL0H 227 mov_s DBL0H,r9 228 j_s.d [blink] 229 bset.mi DBL0H,DBL0H,31 230 ENDFUNC(__muldf3) 231 232 .balign 4 233.L7ff00000: 234 .long 0x7ff00000 235