1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N r3 43#define X r4 44#define INCX r5 45#define PREX r6 46 47#define ATTR r7 48 49#define FZERO f0 50 51#define STACKSIZE 160 52 53 PROLOGUE 54 PROFCODE 55 56 addi SP, SP, -STACKSIZE 57 li r0, 0 58 59 stfd f14, 0(SP) 60 stfd f15, 8(SP) 61 stfd f16, 16(SP) 62 stfd f17, 24(SP) 63 64 stfd f18, 32(SP) 65 stfd f19, 40(SP) 66 stfd f20, 48(SP) 67 stfd f21, 56(SP) 68 69 stfd f22, 64(SP) 70 stfd f23, 72(SP) 71 stfd f24, 80(SP) 72 stfd f25, 88(SP) 73 74 stfd f26, 96(SP) 75 stfd f27, 104(SP) 76 stfd f28, 112(SP) 77 stfd f29, 120(SP) 78 79 stfd f30, 128(SP) 80 stfd f31, 136(SP) 81 82 stw r0, 144(SP) 83 lfs FZERO,144(SP) 84 85#ifdef F_INTERFACE 86 LDINT N, 0(N) 87 LDINT INCX, 0(INCX) 88#endif 89 90 slwi INCX, INCX, BASE_SHIFT 91 fmr f1, FZERO 92 li PREX, 3 * 16 * SIZE 93 fmr f2, FZERO 94 sub X, X, INCX 95 fmr f3, FZERO 96 fmr f4, FZERO 97 fmr f5, FZERO 98 fmr f6, FZERO 99 cmpwi cr0, N, 0 100 fmr f7, FZERO 101 ble- LL(999) 102 103 cmpwi cr0, INCX, 0 104 ble- LL(999) 105 106 srawi. r0, N, 4 107 mtspr CTR, r0 108 beq- LL(150) 109 110 LFDUX f8, X, INCX 111 LFDUX f9, X, INCX 112 LFDUX f10, X, INCX 113 LFDUX f11, X, INCX 114 LFDUX f12, X, INCX 115 LFDUX f13, X, INCX 116 LFDUX f14, X, INCX 117 LFDUX f15, X, INCX 118 fabs f16, f8 119 120 LFDUX f24, X, INCX 121 fabs f17, f9 122 LFDUX f25, X, INCX 123 fabs f18, f10 124 LFDUX f26, X, INCX 125 fabs f19, f11 126 LFDUX f27, X, INCX 127 fabs f20, f12 128 LFDUX f28, X, INCX 129 fabs f21, f13 130 LFDUX f29, X, INCX 131 fabs f22, f14 132 LFDUX f30, X, INCX 133 fabs f23, f15 134 LFDUX f31, X, INCX 135 bdz LL(120) 136 .align 4 137 138LL(110): 139 LFDUX f8, X, INCX 140 FADD f0, f0, f16 141#ifdef PPCG4 142 dcbt X, PREX 143#else 144 nop 145#endif 146 fabs f16, f24 147 148 LFDUX f9, X, INCX 149 FADD f1, f1, f17 150 nop 151 fabs f17, f25 152 153 LFDUX f10, X, INCX 154 FADD f2, f2, f18 155 nop 156 fabs f18, f26 157 LFDUX f11, X, INCX 158 FADD f3, f3, f19 159 nop 160 fabs f19, f27 161 162 LFDUX f12, X, INCX 163 FADD f4, f4, f20 164#if defined(PPCG4) && defined(DOUBLE) 165 dcbt X, PREX 166#else 167 nop 168#endif 169 fabs f20, f28 170 171 LFDUX f13, X, INCX 172 FADD f5, f5, f21 173 nop 174 fabs f21, f29 175 176 LFDUX f14, X, INCX 177 FADD f6, f6, f22 178 nop 179 fabs f22, f30 180 LFDUX f15, X, INCX 181 FADD f7, f7, f23 182 nop 183 fabs f23, f31 184 185 LFDUX f24, X, INCX 186 FADD f0, f0, f16 187#ifdef PPCG4 188 dcbt X, PREX 189#else 190 nop 191#endif 192 fabs f16, f8 193 LFDUX f25, X, INCX 194 FADD f1, f1, f17 195 nop 196 fabs f17, f9 197 198 LFDUX f26, X, INCX 199 FADD f2, f2, f18 200 nop 201 fabs f18, f10 202 LFDUX f27, X, INCX 203 FADD f3, f3, f19 204 nop 205 fabs f19, f11 206 207 LFDUX f28, X, INCX 208 FADD f4, f4, f20 209#if defined(PPCG4) && defined(DOUBLE) 210 dcbt X, PREX 211#else 212 nop 213#endif 214 fabs f20, f12 215 216 LFDUX f29, X, INCX 217 FADD f5, f5, f21 218 nop 219 fabs f21, f13 220 221 LFDUX f30, X, INCX 222 FADD f6, f6, f22 223 nop 224 fabs f22, f14 225 226 LFDUX f31, X, INCX 227 FADD f7, f7, f23 228 fabs f23, f15 229 bdnz LL(110) 230 .align 4 231 232LL(120): 233 FADD f0, f0, f16 234 fabs f16, f24 235 FADD f1, f1, f17 236 fabs f17, f25 237 238 FADD f2, f2, f18 239 fabs f18, f26 240 FADD f3, f3, f19 241 fabs f19, f27 242 243 FADD f4, f4, f20 244 fabs f20, f28 245 FADD f5, f5, f21 246 fabs f21, f29 247 248 FADD f6, f6, f22 249 fabs f22, f30 250 FADD f7, f7, f23 251 fabs f23, f31 252 253 FADD f0, f0, f16 254 FADD f1, f1, f17 255 FADD f2, f2, f18 256 FADD f3, f3, f19 257 258 FADD f4, f4, f20 259 FADD f5, f5, f21 260 FADD f6, f6, f22 261 FADD f7, f7, f23 262 .align 4 263 264LL(150): 265 andi. r0, N, 15 266 mtspr CTR, r0 267 beq LL(999) 268 .align 4 269 270LL(160): 271 LFDUX f8, X, INCX 272 fabs f8, f8 273 FADD f0, f0, f8 274 bdnz LL(160) 275 .align 4 276 277LL(999): 278 FADD f0, f0, f1 279 FADD f2, f2, f3 280 FADD f4, f4, f5 281 FADD f6, f6, f7 282 283 FADD f0, f0, f2 284 FADD f4, f4, f6 285 FADD f1, f0, f4 286 287 lfd f14, 0(SP) 288 lfd f15, 8(SP) 289 lfd f16, 16(SP) 290 lfd f17, 24(SP) 291 292 lfd f18, 32(SP) 293 lfd f19, 40(SP) 294 lfd f20, 48(SP) 295 lfd f21, 56(SP) 296 297 lfd f22, 64(SP) 298 lfd f23, 72(SP) 299 lfd f24, 80(SP) 300 lfd f25, 88(SP) 301 302 lfd f26, 96(SP) 303 lfd f27, 104(SP) 304 lfd f28, 112(SP) 305 lfd f29, 120(SP) 306 307 lfd f30, 128(SP) 308 lfd f31, 136(SP) 309 310 addi SP, SP, STACKSIZE 311 blr 312 313 EPILOGUE 314