1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N r3 43#define X r4 44#define INCX r5 45 46#define PRE r8 47#define INC1 r9 48 49#define FZERO 144(SP) 50#define FONE 148(SP) 51#define C1 152(SP) 52#define C2 156(SP) 53 54#define STACKSIZE 160 55 56 PROLOGUE 57 PROFCODE 58 59 addi SP, SP, -STACKSIZE 60 li r10, 0 61 lis r11, 0x3f80 62 lis r6, 0x3f00 63 lis r7, 0x4040 64 65 stfd f14, 0(SP) 66 stfd f15, 8(SP) 67 stfd f16, 16(SP) 68 stfd f17, 24(SP) 69 70 stfd f18, 32(SP) 71 stfd f19, 40(SP) 72 stfd f20, 48(SP) 73 stfd f21, 56(SP) 74 75 stfd f22, 64(SP) 76 stfd f23, 72(SP) 77 stfd f24, 80(SP) 78 stfd f25, 88(SP) 79 80 stfd f26, 96(SP) 81 stfd f27, 104(SP) 82 stfd f28, 112(SP) 83 stfd f29, 120(SP) 84 85 stfd f30, 128(SP) 86 stfd f31, 136(SP) 87 88 stw r10, FZERO 89 stw r11, FONE 90 stw r6, C1 91 stw r7, C2 92 93 lfs f1, FZERO 94 95#ifdef F_INTERFACE 96 LDINT N, 0(N) 97 LDINT INCX, 0(INCX) 98#endif 99 100 slwi INCX, INCX, ZBASE_SHIFT 101 li INC1, SIZE 102 li PRE, 3 * 16 * SIZE 103 104 cmpwi cr0, N, 0 105 ble- LL(999) 106 cmpwi cr0, INCX, 0 107 ble- LL(999) 108 109 fmr f0, f1 110 sub X, X, INCX 111 fmr f2, f1 112 fmr f3, f1 113 fmr f4, f1 114 fmr f5, f1 115 fmr f6, f1 116 fmr f7, f1 117 fmr f8, f1 118 fmr f9, f1 119 fmr f10, f1 120 fmr f11, f1 121 fmr f12, f1 122 fmr f13, f1 123 fmr f14, f1 124 fmr f15, f1 125 126 srawi. r0, N, 3 127 mtspr CTR, r0 128 beq- cr0, LL(1150) 129 130 LFDUX f16, X, INCX 131 LFDX f17, X, INC1 132 LFDUX f18, X, INCX 133 LFDX f19, X, INC1 134 LFDUX f20, X, INCX 135 LFDX f21, X, INC1 136 LFDUX f22, X, INCX 137 LFDX f23, X, INC1 138 139 LFDUX f24, X, INCX 140 LFDX f25, X, INC1 141 LFDUX f26, X, INCX 142 LFDX f27, X, INC1 143 LFDUX f28, X, INCX 144 LFDX f29, X, INC1 145 LFDUX f30, X, INCX 146 LFDX f31, X, INC1 147 bdz LL(1120) 148 .align 4 149 150LL(1110): 151 fmadd f0, f16, f16, f0 152 LFDUX f16, X, INCX 153 fmadd f1, f17, f17, f1 154 LFDX f17, X, INC1 155 fmadd f2, f18, f18, f2 156 LFDUX f18, X, INCX 157 fmadd f3, f19, f19, f3 158 LFDX f19, X, INC1 159 160#ifdef PPCG4 161 dcbt X, PRE 162#endif 163 164 fmadd f4, f20, f20, f4 165 LFDUX f20, X, INCX 166 fmadd f5, f21, f21, f5 167 LFDX f21, X, INC1 168 fmadd f6, f22, f22, f6 169 LFDUX f22, X, INCX 170 fmadd f7, f23, f23, f7 171 LFDX f23, X, INC1 172 173 fmadd f8, f24, f24, f8 174 LFDUX f24, X, INCX 175 fmadd f9, f25, f25, f9 176 LFDX f25, X, INC1 177 fmadd f10, f26, f26, f10 178 LFDUX f26, X, INCX 179 fmadd f11, f27, f27, f11 180 LFDX f27, X, INC1 181 182#ifdef PPCG4 183 dcbt X, PRE 184#endif 185 186 fmadd f12, f28, f28, f12 187 LFDUX f28, X, INCX 188 fmadd f13, f29, f29, f13 189 LFDX f29, X, INC1 190 fmadd f14, f30, f30, f14 191 LFDUX f30, X, INCX 192 fmadd f15, f31, f31, f15 193 LFDX f31, X, INC1 194 bdnz LL(1110) 195 .align 4 196 197LL(1120): 198 fmadd f0, f16, f16, f0 199 fmadd f1, f17, f17, f1 200 fmadd f2, f18, f18, f2 201 fmadd f3, f19, f19, f3 202 203 fmadd f4, f20, f20, f4 204 fmadd f5, f21, f21, f5 205 fmadd f6, f22, f22, f6 206 fmadd f7, f23, f23, f7 207 208 fmadd f8, f24, f24, f8 209 fmadd f9, f25, f25, f9 210 fmadd f10, f26, f26, f10 211 fmadd f11, f27, f27, f11 212 213 fmadd f12, f28, f28, f12 214 fmadd f13, f29, f29, f13 215 fmadd f14, f30, f30, f14 216 fmadd f15, f31, f31, f15 217 .align 4 218 219LL(1150): 220 andi. r0, N, 7 221 mtspr CTR, r0 222 beq- cr0, LL(1170) 223 .align 4 224 225LL(1160): 226 LFDUX f16, X, INCX 227 LFDX f17, X, INC1 228 fmadd f0, f16, f16, f0 229 fmadd f1, f17, f17, f1 230 bdnz LL(1160) 231 .align 4 232 233LL(1170): 234 fadd f0, f0, f1 235 fadd f2, f2, f3 236 fadd f4, f4, f5 237 fadd f6, f6, f7 238 239 fadd f8, f8, f9 240 fadd f10, f10, f11 241 fadd f12, f12, f13 242 fadd f14, f14, f15 243 244 fadd f0, f0, f2 245 fadd f4, f4, f6 246 fadd f8, f8, f10 247 fadd f12, f12, f14 248 249 fadd f0, f0, f4 250 fadd f8, f8, f12 251 252 fadd f1, f0, f8 253 lfs f4, FZERO 254 255 fcmpu cr0, f1, f4 256 beq cr0, LL(999) 257 258 frsqrte f0, f1 259 lfs f8, C1 260 lfs f9, C2 261 262 fmul f2, f1, f0 263 fadd f7, f8, f8 264 fmul f3, f0, f8 265 fnmsub f4, f2, f0, f9 266 fmul f0, f3, f4 267 268 fmul f5, f1, f0 269 fmul f2, f5, f8 270 fnmsub f3, f5, f0, f7 271 fmadd f1, f2, f3, f5 272 .align 4 273 274LL(999): 275 lfd f14, 0(SP) 276 lfd f15, 8(SP) 277 278 lfd f16, 16(SP) 279 lfd f17, 24(SP) 280 281 lfd f18, 32(SP) 282 lfd f19, 40(SP) 283 lfd f20, 48(SP) 284 lfd f21, 56(SP) 285 286 lfd f22, 64(SP) 287 lfd f23, 72(SP) 288 lfd f24, 80(SP) 289 lfd f25, 88(SP) 290 291 lfd f26, 96(SP) 292 lfd f27, 104(SP) 293 lfd f28, 112(SP) 294 lfd f29, 120(SP) 295 296 lfd f30, 128(SP) 297 lfd f31, 136(SP) 298 addi SP, SP, STACKSIZE 299 blr 300 301 EPILOGUE 302