1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N %i0 43#define X %i1 44#define INCX %i2 45#define I %i3 46 47#ifdef DOUBLE 48#define c1 %f0 49#define c2 %f2 50#define t1 %f8 51#define t2 %f10 52#define t3 %f12 53#define t4 %f14 54 55#define a1 %f16 56#define a2 %f18 57#define a3 %f20 58#define a4 %f22 59#define a5 %f24 60#define a6 %f26 61#define a7 %f28 62#define a8 %f30 63#else 64#define c1 %f0 65#define c2 %f1 66#define t1 %f4 67#define t2 %f5 68#define t3 %f6 69#define t4 %f7 70 71#define a1 %f8 72#define a2 %f9 73#define a3 %f10 74#define a4 %f11 75#define a5 %f12 76#define a6 %f13 77#define a7 %f14 78#define a8 %f15 79#endif 80 81 PROLOGUE 82 SAVESP 83 84 FCLR(0) 85 86 sll INCX, BASE_SHIFT, INCX 87 88 FMOV c1, c2 89 FMOV c1, t1 90 FMOV c1, t2 91 FMOV c1, t3 92 FMOV c1, t4 93 94 cmp INCX, 0 95 ble .LL19 96 cmp INCX, SIZE 97 bne .LL50 98 99 sra N, 3, I 100 cmp I, 0 101 ble,pn %icc, .LL15 102 nop 103 104 LDF [X + 0 * SIZE], a1 105 add I, -1, I 106 LDF [X + 1 * SIZE], a2 107 cmp I, 0 108 LDF [X + 2 * SIZE], a3 109 LDF [X + 3 * SIZE], a4 110 LDF [X + 4 * SIZE], a5 111 LDF [X + 5 * SIZE], a6 112 LDF [X + 6 * SIZE], a7 113 LDF [X + 7 * SIZE], a8 114 115 ble,pt %icc, .LL12 116 add X, 8 * SIZE, X 117 118#define PREFETCHSIZE 128 119 120.LL11: 121 FADD c1, t1, c1 122 prefetch [X + PREFETCHSIZE * SIZE], 0 123 FABS a1, t1 124 LDF [X + 0 * SIZE], a1 125 126 FADD c2, t2, c2 127 add I, -1, I 128 FABS a2, t2 129 LDF [X + 1 * SIZE], a2 130 131 FADD c1, t3, c1 132 cmp I, 0 133 FABS a3, t3 134 LDF [X + 2 * SIZE], a3 135 136 FADD c2, t4, c2 137 nop 138 FABS a4, t4 139 LDF [X + 3 * SIZE], a4 140 141 FADD c1, t1, c1 142 nop 143 FABS a5, t1 144 LDF [X + 4 * SIZE], a5 145 146 FADD c2, t2, c2 147 nop 148 FABS a6, t2 149 LDF [X + 5 * SIZE], a6 150 151 FADD c1, t3, c1 152 FABS a7, t3 153 LDF [X + 6 * SIZE], a7 154 add X, 8 * SIZE, X 155 156 FADD c2, t4, c2 157 FABS a8, t4 158 bg,pt %icc, .LL11 159 LDF [X - 1 * SIZE], a8 160 161.LL12: 162 FADD c1, t1, c1 163 FABS a1, t1 164 FADD c2, t2, c2 165 FABS a2, t2 166 167 FADD c1, t3, c1 168 FABS a3, t3 169 FADD c2, t4, c2 170 FABS a4, t4 171 172 FADD c1, t1, c1 173 FABS a5, t1 174 FADD c2, t2, c2 175 FABS a6, t2 176 177 FADD c1, t3, c1 178 FABS a7, t3 179 FADD c2, t4, c2 180 FABS a8, t4 181 182.LL15: 183 and N, 7, I 184 cmp I, 0 185 ble,a,pn %icc, .LL19 186 nop 187 188.LL16: 189 LDF [X + 0 * SIZE], a1 190 add I, -1, I 191 cmp I, 0 192 FADD c1, t1, c1 193 FABS a1, t1 194 bg,pt %icc, .LL16 195 add X, 1 * SIZE, X 196 197.LL19: 198 FADD c1, t1, c1 199 FADD c2, t2, c2 200 FADD c1, t3, c1 201 FADD c2, t4, c2 202 203 FADD c1, c2, c1 204 return %i7 + 8 205 clr %g0 206 207.LL50: 208 sra N, 3, I 209 cmp I, 0 210 ble,pn %icc, .LL55 211 nop 212 213 LDF [X + 0 * SIZE], a1 214 add X, INCX, X 215 LDF [X + 0 * SIZE], a2 216 add X, INCX, X 217 LDF [X + 0 * SIZE], a3 218 add X, INCX, X 219 LDF [X + 0 * SIZE], a4 220 add X, INCX, X 221 LDF [X + 0 * SIZE], a5 222 add X, INCX, X 223 LDF [X + 0 * SIZE], a6 224 add X, INCX, X 225 add I, -1, I 226 LDF [X + 0 * SIZE], a7 227 cmp I, 0 228 add X, INCX, X 229 LDF [X + 0 * SIZE], a8 230 231 ble,pt %icc, .LL52 232 add X, INCX, X 233 234.LL51: 235 FADD c1, t1, c1 236 add I, -1, I 237 FABS a1, t1 238 LDF [X + 0 * SIZE], a1 239 add X, INCX, X 240 241 FADD c2, t2, c2 242 cmp I, 0 243 FABS a2, t2 244 LDF [X + 0 * SIZE], a2 245 add X, INCX, X 246 247 FADD c1, t3, c1 248 FABS a3, t3 249 LDF [X + 0 * SIZE], a3 250 add X, INCX, X 251 252 FADD c2, t4, c2 253 FABS a4, t4 254 LDF [X + 0 * SIZE], a4 255 add X, INCX, X 256 257 FADD c1, t1, c1 258 FABS a5, t1 259 LDF [X + 0 * SIZE], a5 260 add X, INCX, X 261 262 FADD c2, t2, c2 263 FABS a6, t2 264 LDF [X + 0 * SIZE], a6 265 add X, INCX, X 266 267 FADD c1, t3, c1 268 FABS a7, t3 269 LDF [X + 0 * SIZE], a7 270 add X, INCX, X 271 272 FADD c2, t4, c2 273 FABS a8, t4 274 LDF [X + 0 * SIZE], a8 275 276 bg,pt %icc, .LL51 277 add X, INCX, X 278 279.LL52: 280 FADD c1, t1, c1 281 FABS a1, t1 282 FADD c2, t2, c2 283 FABS a2, t2 284 285 FADD c1, t3, c1 286 FABS a3, t3 287 FADD c2, t4, c2 288 FABS a4, t4 289 290 FADD c1, t1, c1 291 FABS a5, t1 292 FADD c2, t2, c2 293 FABS a6, t2 294 295 FADD c1, t3, c1 296 FABS a7, t3 297 FADD c2, t4, c2 298 FABS a8, t4 299 300.LL55: 301 and N, 7, I 302 cmp I, 0 303 ble,a,pn %icc, .LL59 304 nop 305 306.LL56: 307 LDF [X + 0 * SIZE], a1 308 FADD c1, t1, c1 309 add I, -1, I 310 FABS a1, t1 311 cmp I, 0 312 bg,pt %icc, .LL56 313 add X, INCX, X 314 315.LL59: 316 FADD c1, t1, c1 317 FADD c2, t2, c2 318 FADD c1, t3, c1 319 FADD c2, t4, c2 320 321 FADD c1, c2, c1 322 return %i7 + 8 323 clr %o0 324 325 EPILOGUE 326