1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N r3 43#define X r4 44#define INCX r5 45 46#define INCX2 r6 47 48#define C1 f1 49#define C2 f0 50#define C3 f2 51#define C4 f3 52 53#define A1 f4 54#define A2 f5 55#define A3 f6 56#define A4 f7 57#define A5 f8 58#define A6 f9 59#define A7 f10 60#define A8 f11 61 62#define F1 f12 63#define F2 f13 64#define F3 f14 65#define F4 f15 66 67#define T1 f16 68#define T2 f17 69#define T3 f18 70#define T4 f19 71 72#define B1 f20 73#define B2 f21 74#define B3 f22 75#define B4 f23 76#define B5 f24 77#define B6 f25 78#define B7 f26 79#define B8 f27 80 81 82 PROLOGUE 83 PROFCODE 84 85 li r10, -16 86 87 stfpdux f14, SP, r10 88 stfpdux f15, SP, r10 89 90 stfpdux f16, SP, r10 91 stfpdux f17, SP, r10 92 stfpdux f18, SP, r10 93 stfpdux f19, SP, r10 94 95 stfpdux f20, SP, r10 96 stfpdux f21, SP, r10 97 stfpdux f22, SP, r10 98 stfpdux f23, SP, r10 99 100 stfpdux f24, SP, r10 101 stfpdux f25, SP, r10 102 stfpdux f26, SP, r10 103 stfpdux f27, SP, r10 104 105 li r10, 0 106 stwu r10, -4(SP) 107 stwu r10, -4(SP) 108 stwu r10, -4(SP) 109 stwu r10, -4(SP) 110 111#ifdef F_INTERFACE 112 LDINT N, 0(N) 113 LDINT INCX, 0(INCX) 114#endif 115 116 lfpdx C1, SP, r10 # Zero clear 117 118 slwi INCX, INCX, BASE_SHIFT 119 add INCX2, INCX, INCX 120 121 cmpwi cr0, N, 0 122 ble LL(999) 123 cmpwi cr0, INCX, 0 124 ble LL(999) 125 126 LFD A1, 0 * SIZE(X) 127 LFD A2, 1 * SIZE(X) 128 add X, X, INCX2 129 130 fabs A1, A1 131 fabs A2, A2 132 133 addi N, N, -1 134 cmpwi cr0, N, 0 135 fadd C1, A1, A2 136 ble LL(999) 137 138 subi INCX2, INCX2, SIZE 139 fsmfp C1, C1 140 li INCX, SIZE 141 fpmr C2, C1 142 sub X, X, INCX2 143 fpmr C3, C1 144 srawi. r0, N, 3 145 fpmr C4, C1 146 mtspr CTR, r0 147 beq- LL(105) 148 149 LFDUX A1, X, INCX2 150 LFDUX A2, X, INCX 151 LFDUX A3, X, INCX2 152 LFDUX A4, X, INCX 153 154 LFSDUX A1, X, INCX2 155 LFSDUX A2, X, INCX 156 LFSDUX A3, X, INCX2 157 LFSDUX A4, X, INCX 158 159 LFDUX A5, X, INCX2 160 LFDUX A6, X, INCX 161 LFDUX A7, X, INCX2 162 LFDUX A8, X, INCX 163 164 LFSDUX A5, X, INCX2 165 LFSDUX A6, X, INCX 166 LFSDUX A7, X, INCX2 167 LFSDUX A8, X, INCX 168 bdz LL(103) 169 .align 4 170 171LL(102): 172 fpabs B1, A1 173 LFDUX A1, X, INCX2 174 fpabs B2, A2 175 LFDUX A2, X, INCX 176 fpabs B3, A3 177 LFDUX A3, X, INCX2 178 fpabs B4, A4 179 LFDUX A4, X, INCX 180 181 fpabs B5, A5 182 LFSDUX A1, X, INCX2 183 fpabs B6, A6 184 LFSDUX A2, X, INCX 185 fpabs B7, A7 186 LFSDUX A3, X, INCX2 187 fpabs B8, A8 188 LFSDUX A4, X, INCX 189 190 fpadd T1, B1, B2 191 LFDUX A5, X, INCX2 192 fpadd T2, B3, B4 193 LFDUX A6, X, INCX 194 fpadd T3, B5, B6 195 LFDUX A7, X, INCX2 196 fpadd T4, B7, B8 197 LFDUX A8, X, INCX 198 199 fpsub F1, T1, C1 200 LFSDUX A5, X, INCX2 201 fpsub F2, T2, C2 202 LFSDUX A6, X, INCX 203 fpsub F3, T3, C3 204 LFSDUX A7, X, INCX2 205 fpsub F4, T4, C4 206 LFSDUX A8, X, INCX 207 208 fpsel C1, F1, C1, T1 209 fpsel C2, F2, C2, T2 210 fpsel C3, F3, C3, T3 211 fpsel C4, F4, C4, T4 212 bdnz LL(102) 213 .align 4 214 215LL(103): 216 fpabs B1, A1 217 fpabs B2, A2 218 fpabs B3, A3 219 fpabs B4, A4 220 221 fpabs B5, A5 222 fpabs B6, A6 223 fpabs B7, A7 224 fpabs B8, A8 225 226 fpadd T1, B1, B2 227 fpadd T2, B3, B4 228 fpadd T3, B5, B6 229 fpadd T4, B7, B8 230 231 fpsub F1, T1, C1 232 fpsub F2, T2, C2 233 fpsub F3, T3, C3 234 fpsub F4, T4, C4 235 236 fpsel C1, F1, C1, T1 237 fpsel C2, F2, C2, T2 238 fpsel C3, F3, C3, T3 239 fpsel C4, F4, C4, T4 240 .align 4 241 242LL(105): 243 andi. r0, N, 7 244 beq LL(998) 245 246 andi. r0, N, 4 247 beq LL(106) 248 249 LFDUX A1, X, INCX2 250 LFDUX A2, X, INCX 251 LFDUX A3, X, INCX2 252 LFDUX A4, X, INCX 253 254 LFSDUX A1, X, INCX2 255 LFSDUX A2, X, INCX 256 LFSDUX A3, X, INCX2 257 LFSDUX A4, X, INCX 258 259 fpabs A1, A1 260 fpabs A2, A2 261 fpabs A3, A3 262 fpabs A4, A4 263 264 fpadd A1, A1, A2 265 fpadd A3, A3, A4 266 267 fpsub F1, A1, C1 268 fpsub F2, A3, C2 269 270 fpsel C1, F1, C1, A1 271 fpsel C2, F2, C2, A3 272 .align 4 273 274LL(106): 275 andi. r0, N, 2 276 beq LL(107) 277 278 LFDUX A1, X, INCX2 279 LFDUX A2, X, INCX 280 LFSDUX A1, X, INCX2 281 LFSDUX A2, X, INCX 282 283 fpabs A1, A1 284 fpabs A2, A2 285 286 fpadd A1, A1, A2 287 288 fpsub F1, A1, C1 289 fpsel C1, F1, C1, A1 290 .align 4 291 292LL(107): 293 andi. r0, N, 1 294 beq LL(998) 295 296 LFDUX A1, X, INCX2 297 LFDUX A2, X, INCX 298 299 fabs A1, A1 300 fabs A2, A2 301 302 fadd A1, A1, A2 303 304 fsub F1, A1, C1 305 fsel C1, F1, C1, A1 306 .align 4 307 308LL(998): 309 fpsub F1, C2, C1 310 fpsub F2, C4, C3 311 312 fpsel C1, F1, C1, C2 313 fpsel C3, F2, C3, C4 314 315 fpsub F1, C3, C1 316 fpsel C1, F1, C1, C3 317 318 fsmtp C2, C1 319 320 fsub F1, C2, C1 321 fsel C1, F1, C1, C2 322 .align 4 323 324LL(999): 325 li r10, 16 326 327 lfpdux f27, SP, r10 328 lfpdux f26, SP, r10 329 lfpdux f25, SP, r10 330 lfpdux f24, SP, r10 331 332 lfpdux f23, SP, r10 333 lfpdux f22, SP, r10 334 lfpdux f21, SP, r10 335 lfpdux f20, SP, r10 336 337 lfpdux f19, SP, r10 338 lfpdux f18, SP, r10 339 lfpdux f17, SP, r10 340 lfpdux f16, SP, r10 341 342 lfpdux f15, SP, r10 343 lfpdux f14, SP, r10 344 addi SP, SP, 16 345 blr 346 347 EPILOGUE 348