1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N r3 43#define X r6 44#define INCX r7 45 46#define INCX2 r4 47#define X2 r5 48 49#define ALPHA f1 50 51#define A1 f0 52#define A2 f16 53#define A3 f2 54#define A4 f3 55#define A5 f4 56#define A6 f5 57#define A7 f6 58#define A8 f7 59 60#define B1 f8 61#define B2 f9 62#define B3 f10 63#define B4 f11 64#define B5 f12 65#define B6 f13 66#define B7 f14 67#define B8 f15 68 69 PROLOGUE 70 PROFCODE 71 72 li r10, -16 73 74 stfpdux f14, SP, r10 75 stfpdux f15, SP, r10 76 stfpdux f16, SP, r10 77 78 li r10, 0 79 stwu r10, -4(SP) 80 stwu r10, -4(SP) 81 stwu r10, -4(SP) 82 stwu r10, -4(SP) 83 84 lfpdx A1, SP, r10 # Zero clear 85 fsmfp ALPHA, ALPHA 86 87 slwi INCX, INCX, BASE_SHIFT 88 add INCX2, INCX, INCX 89 90 cmpwi cr0, N, 0 91 ble LL(999) 92 93 cmpwi cr0, INCX, SIZE 94 bne LL(100) 95 96 fcmpu cr7, ALPHA, A1 97 bne cr7, LL(50) 98 99 sub X, X, INCX2 100 101 andi. r0, X, 2 * SIZE - 1 102 beq LL(11) 103 104 STFDX A1, X, INCX2 105 addi X, X, 1 * SIZE 106 addi N, N, -1 107 cmpwi cr0, N, 0 108 ble LL(999) 109 .align 4 110 111LL(11): 112 srawi. r0, N, 4 113 mtspr CTR, r0 114 beq- LL(15) 115 .align 4 116 117LL(12): 118 STFPDUX A1, X, INCX2 119 STFPDUX A1, X, INCX2 120 STFPDUX A1, X, INCX2 121 STFPDUX A1, X, INCX2 122 STFPDUX A1, X, INCX2 123 STFPDUX A1, X, INCX2 124 STFPDUX A1, X, INCX2 125 STFPDUX A1, X, INCX2 126 bdnz LL(12) 127 .align 4 128 129LL(15): 130 andi. r0, N, 15 131 beq LL(999) 132 andi. r0, N, 8 133 beq LL(16) 134 135 STFPDUX A1, X, INCX2 136 STFPDUX A1, X, INCX2 137 STFPDUX A1, X, INCX2 138 STFPDUX A1, X, INCX2 139 .align 4 140 141LL(16): 142 andi. r0, N, 4 143 beq LL(17) 144 145 STFPDUX A1, X, INCX2 146 STFPDUX A1, X, INCX2 147 .align 4 148 149LL(17): 150 andi. r0, N, 2 151 beq LL(18) 152 153 STFPDUX A1, X, INCX2 154 .align 4 155 156LL(18): 157 andi. r0, N, 1 158 beq LL(999) 159 STFDUX A1, X, INCX2 160 b LL(999) 161 .align 4 162 163LL(50): 164 sub X2, X, INCX2 165 sub X, X, INCX2 166 167 andi. r0, X, 2 * SIZE - 1 168 beq LL(51) 169 170 LFDX A1, X, INCX2 171 addi X, X, 1 * SIZE 172 173 fmul B1, ALPHA, A1 174 addi N, N, -1 175 cmpwi cr0, N, 0 176 177 STFDX B1, X2, INCX2 178 addi X2, X2, 1 * SIZE 179 ble LL(999) 180 .align 4 181 182LL(51): 183 srawi. r0, N, 4 184 mtspr CTR, r0 185 beq- LL(55) 186 187 LFPDUX A1, X, INCX2 188 LFPDUX A2, X, INCX2 189 LFPDUX A3, X, INCX2 190 LFPDUX A4, X, INCX2 191 LFPDUX A5, X, INCX2 192 LFPDUX A6, X, INCX2 193 LFPDUX A7, X, INCX2 194 LFPDUX A8, X, INCX2 195 bdz LL(53) 196 .align 4 197 198LL(52): 199 fpmul B1, ALPHA, A1 200 LFPDUX A1, X, INCX2 201 fpmul B2, ALPHA, A2 202 LFPDUX A2, X, INCX2 203 fpmul B3, ALPHA, A3 204 LFPDUX A3, X, INCX2 205 fpmul B4, ALPHA, A4 206 LFPDUX A4, X, INCX2 207 fpmul B5, ALPHA, A5 208 LFPDUX A5, X, INCX2 209 fpmul B6, ALPHA, A6 210 LFPDUX A6, X, INCX2 211 fpmul B7, ALPHA, A7 212 LFPDUX A7, X, INCX2 213 fpmul B8, ALPHA, A8 214 LFPDUX A8, X, INCX2 215 216 STFPDUX B1, X2, INCX2 217 STFPDUX B2, X2, INCX2 218 STFPDUX B3, X2, INCX2 219 STFPDUX B4, X2, INCX2 220 STFPDUX B5, X2, INCX2 221 STFPDUX B6, X2, INCX2 222 STFPDUX B7, X2, INCX2 223 STFPDUX B8, X2, INCX2 224 bdnz LL(52) 225 .align 4 226 227LL(53): 228 fpmul B1, ALPHA, A1 229 fpmul B2, ALPHA, A2 230 fpmul B3, ALPHA, A3 231 fpmul B4, ALPHA, A4 232 fpmul B5, ALPHA, A5 233 fpmul B6, ALPHA, A6 234 STFPDUX B1, X2, INCX2 235 fpmul B7, ALPHA, A7 236 STFPDUX B2, X2, INCX2 237 fpmul B8, ALPHA, A8 238 STFPDUX B3, X2, INCX2 239 240 STFPDUX B4, X2, INCX2 241 STFPDUX B5, X2, INCX2 242 STFPDUX B6, X2, INCX2 243 STFPDUX B7, X2, INCX2 244 STFPDUX B8, X2, INCX2 245 .align 4 246 247LL(55): 248 andi. r0, N, 15 249 beq LL(999) 250 andi. r0, N, 8 251 beq LL(56) 252 253 LFPDUX A1, X, INCX2 254 LFPDUX A2, X, INCX2 255 LFPDUX A3, X, INCX2 256 LFPDUX A4, X, INCX2 257 258 fpmul B1, ALPHA, A1 259 fpmul B2, ALPHA, A2 260 fpmul B3, ALPHA, A3 261 fpmul B4, ALPHA, A4 262 263 STFPDUX B1, X2, INCX2 264 STFPDUX B2, X2, INCX2 265 STFPDUX B3, X2, INCX2 266 STFPDUX B4, X2, INCX2 267 .align 4 268 269LL(56): 270 andi. r0, N, 4 271 beq LL(57) 272 273 LFPDUX A1, X, INCX2 274 LFPDUX A2, X, INCX2 275 fpmul B1, ALPHA, A1 276 fpmul B2, ALPHA, A2 277 STFPDUX B1, X2, INCX2 278 STFPDUX B2, X2, INCX2 279 .align 4 280 281LL(57): 282 andi. r0, N, 2 283 beq LL(58) 284 285 LFPDUX A1, X, INCX2 286 fpmul B1, ALPHA, A1 287 STFPDUX B1, X2, INCX2 288 .align 4 289 290LL(58): 291 andi. r0, N, 1 292 beq LL(999) 293 294 LFDX A1, X, INCX2 295 fmul B1, ALPHA, A1 296 STFDX B1, X2, INCX2 297 b LL(999) 298 .align 4 299 300 301LL(100): 302 fcmpu cr7, ALPHA, A1 303 bne cr7, LL(200) 304 305 sub X, X, INCX 306 307 srawi. r0, N, 3 308 mtspr CTR, r0 309 beq- LL(115) 310 .align 4 311 312LL(112): 313 STFDUX A1, X, INCX 314 STFDUX A1, X, INCX 315 STFDUX A1, X, INCX 316 STFDUX A1, X, INCX 317 STFDUX A1, X, INCX 318 STFDUX A1, X, INCX 319 STFDUX A1, X, INCX 320 STFDUX A1, X, INCX 321 bdnz LL(112) 322 .align 4 323 324LL(115): 325 andi. r0, N, 7 326 beq LL(999) 327 andi. r0, N, 4 328 beq LL(117) 329 330 STFDUX A1, X, INCX 331 STFDUX A1, X, INCX 332 STFDUX A1, X, INCX 333 STFDUX A1, X, INCX 334 .align 4 335 336LL(117): 337 andi. r0, N, 2 338 beq LL(118) 339 340 STFDUX A1, X, INCX 341 STFDUX A1, X, INCX 342 .align 4 343 344LL(118): 345 andi. r0, N, 1 346 beq LL(999) 347 STFDUX A1, X, INCX 348 b LL(999) 349 .align 4 350 351LL(200): 352 sub X2, X, INCX 353 sub X, X, INCX 354 355 srawi. r0, N, 3 356 mtspr CTR, r0 357 beq- LL(215) 358 359 LFDUX A1, X, INCX 360 LFDUX A2, X, INCX 361 LFDUX A3, X, INCX 362 LFDUX A4, X, INCX 363 LFDUX A5, X, INCX 364 LFDUX A6, X, INCX 365 LFDUX A7, X, INCX 366 LFDUX A8, X, INCX 367 bdz LL(213) 368 .align 4 369 370LL(212): 371 fmul B1, ALPHA, A1 372 LFDUX A1, X, INCX 373 fmul B2, ALPHA, A2 374 LFDUX A2, X, INCX 375 376 fmul B3, ALPHA, A3 377 LFDUX A3, X, INCX 378 fmul B4, ALPHA, A4 379 LFDUX A4, X, INCX 380 381 fmul B5, ALPHA, A5 382 LFDUX A5, X, INCX 383 fmul B6, ALPHA, A6 384 LFDUX A6, X, INCX 385 386 fmul B7, ALPHA, A7 387 LFDUX A7, X, INCX 388 fmul B8, ALPHA, A8 389 LFDUX A8, X, INCX 390 391 STFDUX B1, X2, INCX 392 STFDUX B2, X2, INCX 393 STFDUX B3, X2, INCX 394 STFDUX B4, X2, INCX 395 STFDUX B5, X2, INCX 396 STFDUX B6, X2, INCX 397 STFDUX B7, X2, INCX 398 STFDUX B8, X2, INCX 399 bdnz LL(212) 400 .align 4 401 402LL(213): 403 fmul B1, ALPHA, A1 404 fmul B2, ALPHA, A2 405 fmul B3, ALPHA, A3 406 fmul B4, ALPHA, A4 407 fmul B5, ALPHA, A5 408 409 fmul B6, ALPHA, A6 410 STFDUX B1, X2, INCX 411 fmul B7, ALPHA, A7 412 STFDUX B2, X2, INCX 413 fmul B8, ALPHA, A8 414 STFDUX B3, X2, INCX 415 STFDUX B4, X2, INCX 416 STFDUX B5, X2, INCX 417 STFDUX B6, X2, INCX 418 STFDUX B7, X2, INCX 419 STFDUX B8, X2, INCX 420 .align 4 421 422LL(215): 423 andi. r0, N, 7 424 beq LL(999) 425 andi. r0, N, 4 426 beq LL(217) 427 428 LFDUX A1, X, INCX 429 LFDUX A2, X, INCX 430 LFDUX A3, X, INCX 431 LFDUX A4, X, INCX 432 433 fmul B1, ALPHA, A1 434 fmul B2, ALPHA, A2 435 fmul B3, ALPHA, A3 436 fmul B4, ALPHA, A4 437 438 STFDUX B1, X2, INCX 439 STFDUX B2, X2, INCX 440 STFDUX B3, X2, INCX 441 STFDUX B4, X2, INCX 442 .align 4 443 444LL(217): 445 andi. r0, N, 2 446 beq LL(218) 447 448 LFDUX A1, X, INCX 449 LFDUX A2, X, INCX 450 451 fmul B1, ALPHA, A1 452 fmul B2, ALPHA, A2 453 454 STFDUX B1, X2, INCX 455 STFDUX B2, X2, INCX 456 .align 4 457 458LL(218): 459 andi. r0, N, 1 460 beq LL(999) 461 462 LFDUX A1, X, INCX 463 fmul B1, ALPHA, A1 464 STFDUX B1, X2, INCX 465 .align 4 466 467LL(999): 468 li r10, 16 469 470 lfpdux f16, SP, r10 471 lfpdux f15, SP, r10 472 lfpdux f14, SP, r10 473 474 addi SP, SP, 16 475 blr 476 477 EPILOGUE 478