1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N r3 43#define X r4 44#define INCX r5 45 46#define PREA r8 47 48#define FZERO f1 49 50#define STACKSIZE 160 51 52 PROLOGUE 53 PROFCODE 54 55 addi SP, SP, -STACKSIZE 56 li r0, 0 57 58 stfd f14, 0(SP) 59 stfd f15, 8(SP) 60 stfd f16, 16(SP) 61 stfd f17, 24(SP) 62 63 stfd f18, 32(SP) 64 stfd f19, 40(SP) 65 stfd f20, 48(SP) 66 stfd f21, 56(SP) 67 68 stfd f22, 64(SP) 69 stfd f23, 72(SP) 70 stfd f24, 80(SP) 71 stfd f25, 88(SP) 72 73 stfd f26, 96(SP) 74 stfd f27, 104(SP) 75 stfd f28, 112(SP) 76 stfd f29, 120(SP) 77 78 stfd f30, 128(SP) 79 stfd f31, 136(SP) 80 81 stw r0, 144(SP) 82 lfs FZERO,144(SP) 83 84#ifdef F_INTERFACE 85 LDINT N, 0(N) 86 LDINT INCX, 0(INCX) 87#endif 88 89 slwi INCX, INCX, BASE_SHIFT 90 91 sub X, X, INCX 92 93 cmpwi cr0, N, 0 94 ble- LL(9999) 95 cmpwi cr0, INCX, 0 96 ble- LL(9999) 97 98 LFDUX f1, X, INCX 99 100 fmr f0, f1 101 subi N, N, 1 102 fmr f2, f1 103 fmr f3, f1 104 fmr f4, f1 105 fmr f5, f1 106 srawi. r0, N, 4 107 fmr f6, f1 108 mtspr CTR, r0 109 fmr f7, f1 110 beq- LL(150) 111 112 LFDUX f16, X, INCX 113 LFDUX f17, X, INCX 114 LFDUX f18, X, INCX 115 LFDUX f19, X, INCX 116 LFDUX f20, X, INCX 117 LFDUX f21, X, INCX 118 LFDUX f22, X, INCX 119 LFDUX f23, X, INCX 120 121 LFDUX f24, X, INCX 122 fsub f8, f0, f16 123 LFDUX f25, X, INCX 124 fsub f9, f1, f17 125 LFDUX f26, X, INCX 126 fsub f10, f2, f18 127 LFDUX f27, X, INCX 128 fsub f11, f3, f19 129 LFDUX f28, X, INCX 130 fsub f12, f4, f20 131 LFDUX f29, X, INCX 132 fsub f13, f5, f21 133 LFDUX f30, X, INCX 134 fsub f14, f6, f22 135 LFDUX f31, X, INCX 136 fsub f15, f7, f23 137 bdz LL(120) 138 .align 4 139 140LL(110): 141 fsel f0, f8, f16, f0 142 LFDUX f16, X, INCX 143 fsub f8, f0, f24 144 fsel f1, f9, f17, f1 145 LFDUX f17, X, INCX 146 fsub f9, f1, f25 147 fsel f2, f10, f18, f2 148 LFDUX f18, X, INCX 149 fsub f10, f2, f26 150 fsel f3, f11, f19, f3 151 LFDUX f19, X, INCX 152 fsub f11, f3, f27 153 154 fsel f4, f12, f20, f4 155 LFDUX f20, X, INCX 156 fsub f12, f4, f28 157 fsel f5, f13, f21, f5 158 LFDUX f21, X, INCX 159 fsub f13, f5, f29 160 fsel f6, f14, f22, f6 161 LFDUX f22, X, INCX 162 fsub f14, f6, f30 163 fsel f7, f15, f23, f7 164 LFDUX f23, X, INCX 165 fsub f15, f7, f31 166 167 fsel f0, f8, f24, f0 168 LFDUX f24, X, INCX 169 fsub f8, f0, f16 170 fsel f1, f9, f25, f1 171 LFDUX f25, X, INCX 172 fsub f9, f1, f17 173 fsel f2, f10, f26, f2 174 LFDUX f26, X, INCX 175 fsub f10, f2, f18 176 fsel f3, f11, f27, f3 177 LFDUX f27, X, INCX 178 fsub f11, f3, f19 179 180 fsel f4, f12, f28, f4 181 LFDUX f28, X, INCX 182 fsub f12, f4, f20 183 fsel f5, f13, f29, f5 184 LFDUX f29, X, INCX 185 fsub f13, f5, f21 186 fsel f6, f14, f30, f6 187 LFDUX f30, X, INCX 188 fsub f14, f6, f22 189 fsel f7, f15, f31, f7 190 LFDUX f31, X, INCX 191 fsub f15, f7, f23 192 bdnz LL(110) 193 .align 4 194 195LL(120): 196 fsel f0, f8, f16, f0 197 fsub f8, f0, f24 198 fsel f1, f9, f17, f1 199 fsub f9, f1, f25 200 fsel f2, f10, f18, f2 201 fsub f10, f2, f26 202 fsel f3, f11, f19, f3 203 fsub f11, f3, f27 204 205 fsel f4, f12, f20, f4 206 fsub f12, f4, f28 207 fsel f5, f13, f21, f5 208 fsub f13, f5, f29 209 fsel f6, f14, f22, f6 210 fsub f14, f6, f30 211 fsel f7, f15, f23, f7 212 fsub f15, f7, f31 213 214 fsel f0, f8, f24, f0 215 fsel f1, f9, f25, f1 216 fsel f2, f10, f26, f2 217 fsel f3, f11, f27, f3 218 fsel f4, f12, f28, f4 219 fsel f5, f13, f29, f5 220 fsel f6, f14, f30, f6 221 fsel f7, f15, f31, f7 222 .align 4 223 224LL(150): 225 andi. r0, N, 15 226 mtspr CTR, r0 227 beq LL(999) 228 .align 4 229 230LL(160): 231 LFDUX f8, X, INCX 232 fsub f16, f1, f8 233 fsel f1, f16, f8, f1 234 bdnz LL(160) 235 .align 4 236 237LL(999): 238 fsub f8, f0, f1 239 fsub f9, f2, f3 240 fsub f10, f4, f5 241 fsub f11, f6, f7 242 243 fsel f0, f8, f1, f0 244 fsel f2, f9, f3, f2 245 fsel f4, f10, f5, f4 246 fsel f6, f11, f7, f6 247 248 fsub f8, f0, f2 249 fsub f9, f4, f6 250 fsel f0, f8, f2, f0 251 fsel f4, f9, f6, f4 252 253 fsub f8, f0, f4 254 fsel f1, f8, f4, f0 255 .align 4 256 257LL(9999): 258 lfd f14, 0(SP) 259 lfd f15, 8(SP) 260 lfd f16, 16(SP) 261 lfd f17, 24(SP) 262 263 lfd f18, 32(SP) 264 lfd f19, 40(SP) 265 lfd f20, 48(SP) 266 lfd f21, 56(SP) 267 268 lfd f22, 64(SP) 269 lfd f23, 72(SP) 270 lfd f24, 80(SP) 271 lfd f25, 88(SP) 272 273 lfd f26, 96(SP) 274 lfd f27, 104(SP) 275 lfd f28, 112(SP) 276 lfd f29, 120(SP) 277 278 lfd f30, 128(SP) 279 lfd f31, 136(SP) 280 281 addi SP, SP, STACKSIZE 282 blr 283 284 EPILOGUE 285