1/* Copyright (C) 2008-2018 Free Software Foundation, Inc. 2 Contributor: Joern Rennecke <joern.rennecke@embecosm.com> 3 on behalf of Synopsys Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* XMAC schedule: directly back-to-back multiplies stall; the third 27 instruction after a multiply stalls unless it is also a multiply. */ 28#include "arc-ieee-754.h" 29 30#if 0 /* DEBUG */ 31 .global __muldf3 32 .balign 4 33__muldf3: 34 push_s blink 35 push_s r2 36 push_s r3 37 push_s r0 38 bl.d __muldf3_c 39 push_s r1 40 ld_s r2,[sp,12] 41 ld_s r3,[sp,8] 42 st_s r0,[sp,12] 43 st_s r1,[sp,8] 44 pop_s r1 45 bl.d __muldf3_asm 46 pop_s r0 47 pop_s r3 48 pop_s r2 49 pop_s blink 50 cmp r0,r2 51 cmp.eq r1,r3 52 jeq_s [blink] 53 b abort 54#define __muldf3 __muldf3_asm 55#endif /* DEBUG */ 56/* N.B. This is optimized for ARC700. 57 ARC600 has very different scheduling / instruction selection criteria. */ 58/* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx , 59 we can do: 60 sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx */ 61 62__muldf3_support: /* This label makes debugger output saner. */ 63/* If one number is denormal, subtract some from the exponent of the other 64 one (if the other exponent is too small, return 0), and normalize the 65 denormal. Then re-run the computation. */ 66 .balign 4 67 FUNC(__muldf3) 68.Ldenorm_dbl0: 69 mov_s r12,DBL0L 70 mov_s DBL0L,DBL1L 71 mov_s DBL1L,r12 72 mov_s r12,DBL0H 73 mov_s DBL0H,DBL1H 74 mov_s DBL1H,r12 75 and r11,DBL0H,r9 76.Ldenorm_dbl1: 77 brhs r11,r9,.Linf_nan 78 brhs 0x3ca00001,r11,.Lret0 79 sub_s DBL0H,DBL0H,DBL1H 80 bmsk_s DBL1H,DBL1H,30 81 add_s DBL0H,DBL0H,DBL1H 82 breq_s DBL1H,0,.Ldenorm_2 83 norm r12,DBL1H 84 85 sub_s r12,r12,10 86 asl r5,r12,20 87 asl_s DBL1H,DBL1H,r12 88 sub DBL0H,DBL0H,r5 89 neg r5,r12 90 lsr r6,DBL1L,r5 91 asl_s DBL1L,DBL1L,r12 92 b.d __muldf3 93 add_s DBL1H,DBL1H,r6 94 95 .balign 4 96.Linf_nan: 97 bclr r12,DBL1H,31 98 xor_s DBL1H,DBL1H,DBL0H 99 bclr_s DBL0H,DBL0H,31 100 max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf 101 or.f 0,DBL0H,DBL0L 102 mov_s DBL0L,0 103 or.ne.f DBL1L,DBL1L,r12 104 not_s DBL0H,DBL0L ; inf * 0 -> NaN 105 mov.ne DBL0H,r8 106 tst_s DBL1H,DBL1H 107 j_s.d [blink] 108 bset.mi DBL0H,DBL0H,31 109 110.Lret0: xor_s DBL0H,DBL0H,DBL1H 111 bclr DBL1H,DBL0H,31 112 xor_s DBL0H,DBL0H,DBL1H 113 j_s.d [blink] 114 mov_l DBL0L,0 115 116 .balign 4 117.Ldenorm_2: 118 breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output 119 norm.f r12,DBL1L 120 121 mov.mi r12,21 122 add.pl r12,r12,22 123 neg r11,r12 124 asl_s r12,r12,20 125 lsr.f DBL1H,DBL1L,r11 126 ror DBL1L,DBL1L,r11 127 sub_s DBL0H,DBL0H,r12 128 mov.eq DBL1H,DBL1L 129 sub_s DBL1L,DBL1L,DBL1H 130 /* Fall through. */ 131 .global __muldf3 132 .balign 4 133__muldf3: 134 ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)] 135 MPYHU r4,DBL0L,DBL1L 136 bmsk r6,DBL0H,19 137 bset r6,r6,20 138 mpyu r7,r6,DBL1L 139 and r11,DBL0H,r9 140 breq r11,0,.Ldenorm_dbl0 141 MPYHU r8,r6,DBL1L 142 bmsk r10,DBL1H,19 143 bset r10,r10,20 144 MPYHU r5,r10,DBL0L 145 add.f r4,r4,r7 146 and r12,DBL1H,r9 147 MPYHU r7,r6,r10 148 breq r12,0,.Ldenorm_dbl1 149 adc.f r5,r5,r8 150 mpyu r8,r10,DBL0L 151 breq r11,r9,.Linf_nan 152 breq r12,r9,.Linf_nan 153 mpyu r6,r6,r10 154 add.cs r7,r7,1 155 add.f r4,r4,r8 156 mpyu r10,DBL1L,DBL0L 157 bclr r8,r9,30 ; 0x3ff00000 158 adc.f r5,r5,r6 159 ; XMAC write-back stall / std. mult stall is one cycle later 160 bclr r6,r9,20 ; 0x7fe00000 161 add.cs r7,r7,1 ; fraction product in r7:r5:r4 162 tst r10,r10 163 bset.ne r4,r4,0 ; put least significant word into sticky bit 164 lsr.f r10,r7,9 165 add_l r12,r12,r11 ; add exponents 166 rsub.eq r8,r8,r9 ; 0x40000000 167 sub r12,r12,r8 ; subtract bias + implicit 1 168 brhs.d r12,r6,.Linf_denorm 169 rsub r10,r10,12 170.Lshift_frac: 171 neg r8,r10 172 asl r6,r4,r10 173 lsr DBL0L,r4,r8 174 add.f 0,r6,r6 175 btst.eq DBL0L,0 176 cmp.eq r4,r4 ; round to nearest / round to even 177 asl r4,r5,r10 178 lsr r5,r5,r8 179 adc.f DBL0L,DBL0L,r4 180 xor.f 0,DBL0H,DBL1H 181 asl r7,r7,r10 182 add_s r12,r12,r5 183 adc DBL0H,r12,r7 184 j_s.d [blink] 185 bset.mi DBL0H,DBL0H,31 186 187/* We have checked for infinity / NaN input before, and transformed 188 denormalized inputs into normalized inputs. Thus, the worst case 189 exponent overflows are: 190 1 + 1 - 0x400 == 0xc02 : maximum underflow 191 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow 192 N.B. 0x7e and 0x7f are also values for overflow. 193 194 If (r12 <= -54), we have an underflow to zero. */ 195 .balign 4 196.Linf_denorm: 197 brlo r12,0xc0000000,.Linf 198 asr r6,r12,20 199 mov_s r12,0 200 add.f r10,r10,r6 201 brgt r10,0,.Lshift_frac 202 beq_s .Lround_frac 203 add.f r10,r10,32 204.Lshift32_frac: 205 tst r4,r4 206 mov r4,r5 207 bset.ne r4,r4,1 208 mov r5,r7 209 mov r7,0 210 brge r10,1,.Lshift_frac 211 breq r10,0,.Lround_frac 212 add.f r10,r10,32 213 brgt r10,21,.Lshift32_frac 214 b_s .Lret0 215 216.Lround_frac: 217 add.f 0,r4,r4 218 btst.eq r5,0 219 mov_s DBL0L,r5 220 mov_s DBL0H,r7 221 adc.eq.f DBL0L,DBL0L,0 222 j_s.d [blink] 223 224 adc.eq DBL0H,DBL0H,0 225 226.Linf: xor.f DBL1H,DBL1H,DBL0H 227 mov_s DBL0L,0 228 mov_s DBL0H,r9 229 j_s.d [blink] 230 bset.mi DBL0H,DBL0H,31 231 ENDFUNC(__muldf3) 232 233 .balign 4 234.L7ff00000: 235 .long 0x7ff00000 236