1/* Copyright (c) 2002 Michael Stumpf <mistumpf@de.pepperl-fuchs.com> 2 Copyright (c) 2006 Dmitry Xmelkov 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in 12 the documentation and/or other materials provided with the 13 distribution. 14 * Neither the name of the copyright holders nor the names of 15 contributors may be used to endorse or promote products derived 16 from this software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 POSSIBILITY OF SUCH DAMAGE. */ 29 30/* $Id: mulsf3x.S 2473 2015-04-09 08:10:22Z pitchumani $ */ 31 32/* 33 */ 34 35#if !defined(__AVR_TINY__) 36 37#include "fp32def.h" 38#include "asmdef.h" 39 40FUNCTION __mulsf3x 41 42#if defined(__AVR_ENHANCED__) && __AVR_ENHANCED__ 43 440: XCALL _U(__fp_pscA) 45 brcs 1f 46 XCALL _U(__fp_pscB) 47 brcs 1f 48 and rA3, rB3 ; one of args is 0xff 49 breq 1f 50 XJMP _U(__fp_inf) ; nonzero * Inf --> Inf 511: XJMP _U(__fp_nan) ; 0 * Inf --> NaN 522: clr r1 ; after 'mul rA3,rB3' 53 XJMP _U(__fp_szero) 54 55ENTRY __mulsf3x 56 XCALL _U(__fp_split3) 57 brcs 0b 58 59ENTRY __mulsf3_pse ; post split entry 60 ; check zero 61 mul rA3, rB3 ; r1 would be clean 62 breq 2b 63 ; rB3.rA3 := rA3 + rB3 64 add rA3, rB3 65 ldi rB3, 0 66 adc rB3, rB3 67 68 ; multiplication: rA2.rA1.rA0 * rB2.rB1.rB0 --> rA2.rA1.rA0.rAE.ZH.ZL 69 70 ; ZH.ZL = rA0 * rB0 71 mul rA0, rB0 72 movw ZL, r0 73 ; rAE.ZH += rA1 * rB0 74 mul rA1, rB0 75 clr rAE 76 add ZH, r0 77 adc rAE, r1 78 ; rBE.rAE.ZH = rAE.ZH + rA0 * rB1 79 mul rA0, rB1 80 clr rBE 81 add ZH, r0 82 adc rAE, r1 83 adc rBE, rBE 84 ; rA0.rBE.rAE = rBE.rAE + rA0 * rB2 85 mul rA0, rB2 86 clr rA0 87 add rAE, r0 88 adc rBE, r1 89 adc rA0, rA0 90 ; rA0.rBE.rAE += rA2 * rB0 91 mul rA2, rB0 92 clr rB0 93 add rAE, r0 94 adc rBE, r1 95 adc rA0, rB0 96 ; rA0.rBE.rAE += rA1 * rB1 97 mul rA1, rB1 98 add rAE, r0 99 adc rBE, r1 100 adc rA0, rB0 ; rB0 == 0 101 ; rB0.rA0.rBE = rA0.rBE + rA2 * rB1 102 mul rA2, rB1 103 add rBE, r0 104 adc rA0, r1 105 adc rB0, rB0 ; rB0 was 0 106 ; rB0.rA0.rBE += rA1 * rB2 107 mul rA1, rB2 108 clr rB1 109 add rBE, r0 110 adc rA0, r1 111 adc rB0, rB1 112 ; rB0.rA0 += rA2 * rB2 113 mul rA2, rB2 114 add rA0, r0 115 adc rB0, r1 116 ; move result: rA2.rA1.rA0.rAE.ZH.ZL = rB0.rA0.rBE.rAE.ZH.ZL 117 mov rA2, rB0 118 mov rA1, rA0 119 mov rA0, rBE 120 ; __zero_reg__ 121 clr r1 122 123#else /* to __AVR_ENHANCED__ */ 124 1250: XCALL _U(__fp_pscA) 126 brcs 1f 127 XCALL _U(__fp_pscB) 128 brcs 1f 129 and rA3, rB3 ; one of args is 0xff 130 breq 1f 131 XJMP _U(__fp_inf) ; nonzero * Inf --> Inf 1321: XJMP _U(__fp_nan) ; 0 * Inf --> NaN 1332: XJMP _U(__fp_szero) 134 135ENTRY __mulsf3x 136 XCALL _U(__fp_split3) 137 brcs 0b 138 139ENTRY __mulsf3_pse ; post split entry 140 ; check zero 141 tst rA3 142 breq 2b 143 tst rB3 144 breq 2b 145 ; rB3.rA3 := rA3 + rB3 146 add rA3, rB3 147 ldi rB3, 0 148 adc rB3, rB3 149 150 ; multiplication: rA2.rA1.rA0 * rB2.rB1.rB0 --> rA2.rA1.rA0.rAE.ZH.ZL 151 152 clr rBE ; 4-d byte of rB* 153 clr ZL 154 clr ZH 155 clr rAE 156 ; r0.rAE.ZH.ZL += rA0 * rB2.rB1.rB0 157 clr r0 158 sec ; to count loops 159 ror rA0 1601: brcc 2f 161 add ZL, rB0 162 adc ZH, rB1 163 adc rAE, rB2 164 adc r0, rBE 1652: lsl rB0 166 rol rB1 167 rol rB2 168 rol rBE 169 lsr rA0 170 brne 1b 171 ; rA0.r1.r0.rAE.ZH += rA1 * rBE.rB2.rB1 172 ror rA1 ; C was 1 1733: brcc 4f 174 add ZH, rB1 175 adc rAE, rB2 176 adc r0, rBE 177 adc r1, rB0 178 brcc 4f 179 inc rA0 1804: lsl rB1 181 rol rB2 182 rol rBE 183 rol rB0 184 lsr rA1 185 brne 3b 186 ; rA0.r1.r0.rAE += rA2 * rB0.rBE.rB2 187 ror rA2 ; C was 1 1885: brcc 6f 189 add rAE, rB2 190 adc r0, rBE 191 adc r1, rB0 192 adc rA0, rB1 1936: lsl rB2 194 rol rBE 195 rol rB0 196 rol rB1 197 lsr rA2 198 brne 5b 199 ; move result: rA2.rA1.rA0.rAE.ZH.ZL := rA0.r1.r0.rAE.ZH.ZL 200 mov rA2, rA0 201 mov rA1, r1 202 mov rA0, r0 203 ; __zero_reg__ 204 clr r1 205 206#endif /* not __AVR_ENHANCED__ */ 207 208 ; exponent -= 127 (Why not 126? For compare conviniency.) 209 subi rA3, lo8(127) 210 sbci rB3, hi8(127) 211 brmi 13f ; denormalization is needed 212 breq 15f ; normalization is impossible 213 ; result exponent > min ==> normalization is possible 21410: tst rA2 215 brmi 11f ; mantissa is normal 216 ; mantissa <<= 1 217 lsl ZL 218 rol ZH 219 rol rAE 220 rol rA0 221 rol rA1 222 rol rA2 223 ; exponent -= 1 224 subi rA3, lo8(1) 225 sbci rB3, hi8(1) 226 brne 10b 227 ; check to overflow 22811: cpi rA3, 254 229 cpc rB3, r1 230 brlo 15f 231 XJMP _U(__fp_inf) 232 ; check lowest value of exponent to avoid long operation 23312: XJMP _U(__fp_szero) 23413: cpi rB3, hi8(-24) ; here rB3 < 0 235 brlt 12b 236 cpi rA3, lo8(-24) 237 brlt 12b 238 ; mantissa >>= -rA3 23914: lsr rA2 240 ror rA1 241 ror rA0 242 ror rAE 243 ror ZH 244 ror ZL 245 subi rA3, -1 246 brne 14b 247 ; for rounding 24815: or ZH, ZL 249 ; pack 250 lsl rA2 251 adc rA3, r1 ; restore exponent for normal values 252 lsr rA3 253 ror rA2 254 bld rA3, 7 ; sign 255 ret 256ENDFUNC 257 258#endif /* !defined(__AVR_TINY__) */ 259