1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N %i0 43#define X %i1 44#define INCX %i2 45#define I %i3 46 47#ifdef DOUBLE 48#define c1 %f0 49#define c2 %f2 50#define t1 %f8 51#define t2 %f10 52#define t3 %f12 53#define t4 %f14 54 55#define a1 %f16 56#define a2 %f18 57#define a3 %f20 58#define a4 %f22 59#define a5 %f24 60#define a6 %f26 61#define a7 %f28 62#define a8 %f30 63#else 64#define c1 %f0 65#define c2 %f1 66#define t1 %f4 67#define t2 %f5 68#define t3 %f6 69#define t4 %f7 70 71#define a1 %f8 72#define a2 %f9 73#define a3 %f10 74#define a4 %f11 75#define a5 %f12 76#define a6 %f13 77#define a7 %f14 78#define a8 %f15 79#endif 80 81 PROLOGUE 82 SAVESP 83 84 FCLR(0) 85 86 sll INCX, ZBASE_SHIFT, INCX 87 88 FMOV c1, c2 89 FMOV c1, t1 90 FMOV c1, t2 91 FMOV c1, t3 92 FMOV c1, t4 93 94 cmp INCX, 0 95 ble .LL19 96 nop 97 98 cmp INCX, 2 * SIZE 99 bne .LL50 100 nop 101 102 sra N, 2, I 103 cmp I, 0 104 ble,pn %icc, .LL15 105 nop 106 107 LDF [X + 0 * SIZE], a1 108 add I, -1, I 109 LDF [X + 1 * SIZE], a2 110 cmp I, 0 111 LDF [X + 2 * SIZE], a3 112 LDF [X + 3 * SIZE], a4 113 LDF [X + 4 * SIZE], a5 114 LDF [X + 5 * SIZE], a6 115 LDF [X + 6 * SIZE], a7 116 LDF [X + 7 * SIZE], a8 117 118 ble,pt %icc, .LL12 119 add X, 8 * SIZE, X 120 121#define PREFETCHSIZE 32 122 123.LL11: 124 FADD c1, t1, c1 125 prefetch [X + PREFETCHSIZE * SIZE], 0 126 FABS a1, t1 127 LDF [X + 0 * SIZE], a1 128 129 FADD c2, t2, c2 130 add I, -1, I 131 FABS a2, t2 132 LDF [X + 1 * SIZE], a2 133 134 FADD c1, t3, c1 135 cmp I, 0 136 FABS a3, t3 137 LDF [X + 2 * SIZE], a3 138 139 FADD c2, t4, c2 140 nop 141 FABS a4, t4 142 LDF [X + 3 * SIZE], a4 143 144 FADD c1, t1, c1 145 nop 146 FABS a5, t1 147 LDF [X + 4 * SIZE], a5 148 149 FADD c2, t2, c2 150 nop 151 FABS a6, t2 152 LDF [X + 5 * SIZE], a6 153 154 FADD c1, t3, c1 155 FABS a7, t3 156 LDF [X + 6 * SIZE], a7 157 add X, 8 * SIZE, X 158 159 FADD c2, t4, c2 160 FABS a8, t4 161 bg,pt %icc, .LL11 162 LDF [X - 1 * SIZE], a8 163 164.LL12: 165 FADD c1, t1, c1 166 FABS a1, t1 167 FADD c2, t2, c2 168 FABS a2, t2 169 170 FADD c1, t3, c1 171 FABS a3, t3 172 FADD c2, t4, c2 173 FABS a4, t4 174 175 FADD c1, t1, c1 176 FABS a5, t1 177 FADD c2, t2, c2 178 FABS a6, t2 179 180 FADD c1, t3, c1 181 FABS a7, t3 182 FADD c2, t4, c2 183 FABS a8, t4 184 185.LL15: 186 and N, 3, I 187 cmp I, 0 188 ble,a,pn %icc, .LL19 189 nop 190 191.LL16: 192 LDF [X + 0 * SIZE], a1 193 LDF [X + 1 * SIZE], a2 194 add I, -1, I 195 cmp I, 0 196 FADD c1, t1, c1 197 FADD c2, t2, c2 198 FABS a1, t1 199 FABS a2, t2 200 bg,pt %icc, .LL16 201 add X, 2 * SIZE, X 202 203.LL19: 204 FADD c1, t1, c1 205 FADD c2, t2, c2 206 FADD c1, t3, c1 207 FADD c2, t4, c2 208 209 FADD c1, c2, c1 210 return %i7 + 8 211 clr %g0 212 213.LL50: 214 sra N, 2, I 215 cmp I, 0 216 ble,pn %icc, .LL55 217 nop 218 219 LDF [X + 0 * SIZE], a1 220 LDF [X + 1 * SIZE], a2 221 add X, INCX, X 222 LDF [X + 0 * SIZE], a3 223 LDF [X + 1 * SIZE], a4 224 add X, INCX, X 225 LDF [X + 0 * SIZE], a5 226 LDF [X + 1 * SIZE], a6 227 add X, INCX, X 228 add I, -1, I 229 LDF [X + 0 * SIZE], a7 230 cmp I, 0 231 LDF [X + 1 * SIZE], a8 232 233 ble,pt %icc, .LL52 234 add X, INCX, X 235 236.LL51: 237 FADD c1, t1, c1 238 add I, -1, I 239 FABS a1, t1 240 LDF [X + 0 * SIZE], a1 241 242 FADD c2, t2, c2 243 cmp I, 0 244 FABS a2, t2 245 LDF [X + 1 * SIZE], a2 246 add X, INCX, X 247 248 FADD c1, t3, c1 249 FABS a3, t3 250 LDF [X + 0 * SIZE], a3 251 252 FADD c2, t4, c2 253 FABS a4, t4 254 LDF [X + 1 * SIZE], a4 255 add X, INCX, X 256 257 FADD c1, t1, c1 258 FABS a5, t1 259 LDF [X + 0 * SIZE], a5 260 261 FADD c2, t2, c2 262 FABS a6, t2 263 LDF [X + 1 * SIZE], a6 264 add X, INCX, X 265 266 FADD c1, t3, c1 267 FABS a7, t3 268 LDF [X + 0 * SIZE], a7 269 270 FADD c2, t4, c2 271 FABS a8, t4 272 LDF [X + 1 * SIZE], a8 273 274 bg,pt %icc, .LL51 275 add X, INCX, X 276 277.LL52: 278 FADD c1, t1, c1 279 FABS a1, t1 280 FADD c2, t2, c2 281 FABS a2, t2 282 283 FADD c1, t3, c1 284 FABS a3, t3 285 FADD c2, t4, c2 286 FABS a4, t4 287 288 FADD c1, t1, c1 289 FABS a5, t1 290 FADD c2, t2, c2 291 FABS a6, t2 292 293 FADD c1, t3, c1 294 FABS a7, t3 295 FADD c2, t4, c2 296 FABS a8, t4 297 298.LL55: 299 and N, 3, I 300 cmp I, 0 301 ble,a,pn %icc, .LL59 302 nop 303 304.LL56: 305 LDF [X + 0 * SIZE], a1 306 LDF [X + 1 * SIZE], a2 307 FADD c1, t1, c1 308 FADD c2, t2, c2 309 add I, -1, I 310 FABS a1, t1 311 FABS a2, t2 312 cmp I, 0 313 bg,pt %icc, .LL56 314 add X, INCX, X 315 316.LL59: 317 FADD c1, t1, c1 318 FADD c2, t2, c2 319 FADD c1, t3, c1 320 FADD c2, t4, c2 321 322 FADD c1, c2, c1 323 324 return %i7 + 8 325 clr %o0 326 327 EPILOGUE 328