1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N %i0 43#define X %i1 44#define INCX %i2 45#define I %i3 46 47#ifdef DOUBLE 48#define c1 %f0 49#define c2 %f2 50#define c3 %f4 51#define c4 %f6 52#define t1 %f8 53#define t2 %f10 54#define t3 %f12 55#define t4 %f14 56#define t5 %f16 57#define t6 %f18 58#define t7 %f20 59#define t8 %f22 60 61#define a1 %f24 62#define a2 %f26 63#define a3 %f28 64#define a4 %f30 65#define a5 %f32 66#define a6 %f34 67#define a7 %f36 68#define a8 %f38 69#else 70#define c1 %f0 71#define c2 %f1 72#define c3 %f2 73#define c4 %f3 74#define t1 %f4 75#define t2 %f5 76#define t3 %f6 77#define t4 %f7 78#define t5 %f8 79#define t6 %f9 80#define t7 %f10 81#define t8 %f11 82 83#define a1 %f12 84#define a2 %f13 85#define a3 %f14 86#define a4 %f15 87#define a5 %f16 88#define a6 %f17 89#define a7 %f18 90#define a8 %f19 91#endif 92 93#ifndef USE_MIN 94#define FCMOV FMOVG 95#else 96#define FCMOV FMOVL 97#endif 98 99 PROLOGUE 100 SAVESP 101 102 FCLR(0) 103 104 cmp N, 0 105 ble .LL20 106 nop 107 108 cmp INCX, 0 109 ble .LL20 110 sll INCX, ZBASE_SHIFT, INCX 111 112 LDF [X + 0 * SIZE], c1 113 LDF [X + 1 * SIZE], c2 114 add N, -1, N 115 FABS c1, c1 116 add X, INCX, X 117 FABS c2, c2 118 cmp N, 0 119 ble .LL20 120 FADD c1, c2, c1 121 122 FMOV c1, c2 123 FMOV c1, c3 124 FMOV c1, c4 125 126 cmp INCX, 2 * SIZE 127 bne .LL50 128 nop 129 130 sra N, 2, I 131 cmp I, 0 132 ble,pn %icc, .LL15 133 nop 134 135 LDF [X + 0 * SIZE], a1 136 LDF [X + 1 * SIZE], a2 137 LDF [X + 2 * SIZE], a3 138 LDF [X + 3 * SIZE], a4 139 140 LDF [X + 4 * SIZE], a5 141 add I, -1, I 142 LDF [X + 5 * SIZE], a6 143 cmp I, 0 144 LDF [X + 6 * SIZE], a7 145 LDF [X + 7 * SIZE], a8 146 147 ble,pt %icc, .LL12 148 add X, 8 * SIZE, X 149 150#define PREFETCHSIZE 40 151 152.LL11: 153 prefetch [X + PREFETCHSIZE * SIZE], 0 154 155 FABS a1, t1 156 LDF [X + 0 * SIZE], a1 157 FABS a2, t2 158 LDF [X + 1 * SIZE], a2 159 FABS a3, t3 160 LDF [X + 2 * SIZE], a3 161 FABS a4, t4 162 LDF [X + 3 * SIZE], a4 163 164 FABS a5, t5 165 LDF [X + 4 * SIZE], a5 166 FABS a6, t6 167 LDF [X + 5 * SIZE], a6 168 FABS a7, t7 169 LDF [X + 6 * SIZE], a7 170 FABS a8, t8 171 LDF [X + 7 * SIZE], a8 172 173 FADD t1, t2, t1 174 FADD t3, t4, t3 175 FADD t5, t6, t5 176 FADD t7, t8, t7 177 178 FCMP %fcc0, t1, c1 179 FCMP %fcc1, t3, c2 180 FCMP %fcc2, t5, c3 181 FCMP %fcc3, t7, c4 182 183 FCMOV %fcc0, t1, c1 184 add I, -1, I 185 FCMOV %fcc1, t3, c2 186 cmp I, 0 187 FCMOV %fcc2, t5, c3 188 FCMOV %fcc3, t7, c4 189 190 bg,pt %icc, .LL11 191 add X, 8 * SIZE, X 192 193.LL12: 194 FABS a1, t1 195 FABS a2, t2 196 FABS a3, t3 197 FABS a4, t4 198 199 FABS a5, t5 200 FABS a6, t6 201 FABS a7, t7 202 FABS a8, t8 203 204 FADD t1, t2, t1 205 FADD t3, t4, t3 206 FADD t5, t6, t5 207 FADD t7, t8, t7 208 209 FCMP %fcc0, t1, c1 210 FCMP %fcc1, t3, c2 211 FCMP %fcc2, t5, c3 212 FCMP %fcc3, t7, c4 213 214 FCMOV %fcc0, t1, c1 215 FCMOV %fcc1, t3, c2 216 FCMOV %fcc2, t5, c3 217 FCMOV %fcc3, t7, c4 218 219.LL15: 220 and N, 3, I 221 cmp I, 0 222 ble,a,pn %icc, .LL19 223 nop 224 225.LL16: 226 LDF [X + 0 * SIZE], a1 227 LDF [X + 1 * SIZE], a2 228 229 FABS a1, t1 230 FABS a2, t2 231 FADD t1, t2, t1 232 FCMP %fcc0, t1, c1 233 FCMOV %fcc0, t1, c1 234 add I, -1, I 235 cmp I, 0 236 bg,pt %icc, .LL16 237 add X, 2 * SIZE, X 238 239.LL19: 240 FCMP %fcc0, c2, c1 241 FCMP %fcc1, c4, c3 242 FCMOV %fcc0, c2, c1 243 FCMOV %fcc1, c4, c3 244 FCMP %fcc0, c3, c1 245 FCMOV %fcc0, c3, c1 246 247.LL20: 248 return %i7 + 8 249 clr %g0 250 251.LL50: 252 sra N, 2, I 253 cmp I, 0 254 ble,pn %icc, .LL55 255 nop 256 257 LDF [X + 0 * SIZE], a1 258 LDF [X + 1 * SIZE], a2 259 add X, INCX, X 260 LDF [X + 0 * SIZE], a3 261 LDF [X + 1 * SIZE], a4 262 add X, INCX, X 263 LDF [X + 0 * SIZE], a5 264 LDF [X + 1 * SIZE], a6 265 add X, INCX, X 266 add I, -1, I 267 LDF [X + 0 * SIZE], a7 268 cmp I, 0 269 LDF [X + 1 * SIZE], a8 270 ble,pt %icc, .LL52 271 add X, INCX, X 272 273.LL51: 274 FABS a1, t1 275 LDF [X + 0 * SIZE], a1 276 FABS a2, t2 277 LDF [X + 1 * SIZE], a2 278 add X, INCX, X 279 FABS a3, t3 280 LDF [X + 0 * SIZE], a3 281 FABS a4, t4 282 LDF [X + 1 * SIZE], a4 283 add X, INCX, X 284 285 FABS a5, t5 286 LDF [X + 0 * SIZE], a5 287 FABS a6, t6 288 LDF [X + 1 * SIZE], a6 289 add X, INCX, X 290 FABS a7, t7 291 LDF [X + 0 * SIZE], a7 292 FABS a8, t8 293 LDF [X + 1 * SIZE], a8 294 295 FADD t1, t2, t1 296 FADD t3, t4, t3 297 FADD t5, t6, t5 298 FADD t7, t8, t7 299 300 FCMP %fcc0, t1, c1 301 FCMP %fcc1, t3, c2 302 FCMP %fcc2, t5, c3 303 FCMP %fcc3, t7, c4 304 305 FCMOV %fcc0, t1, c1 306 add I, -1, I 307 FCMOV %fcc1, t3, c2 308 cmp I, 0 309 FCMOV %fcc2, t5, c3 310 FCMOV %fcc3, t7, c4 311 312 313 bg,pt %icc, .LL51 314 add X, INCX, X 315 316.LL52: 317 FABS a1, t1 318 FABS a2, t2 319 FABS a3, t3 320 FABS a4, t4 321 322 FABS a5, t5 323 FABS a6, t6 324 FABS a7, t7 325 FABS a8, t8 326 327 FADD t1, t2, t1 328 FADD t3, t4, t3 329 FADD t5, t6, t5 330 FADD t7, t8, t7 331 332 FCMP %fcc0, t1, c1 333 FCMP %fcc1, t3, c2 334 FCMP %fcc2, t5, c3 335 FCMP %fcc3, t7, c4 336 337 FCMOV %fcc0, t1, c1 338 FCMOV %fcc1, t3, c2 339 FCMOV %fcc2, t5, c3 340 FCMOV %fcc3, t7, c4 341 342.LL55: 343 and N, 3, I 344 cmp I, 0 345 ble,a,pn %icc, .LL59 346 nop 347 348.LL56: 349 LDF [X + 0 * SIZE], a1 350 LDF [X + 1 * SIZE], a2 351 352 FABS a1, t1 353 add I, -1, I 354 FABS a2, t2 355 cmp I, 0 356 FADD t1, t2, t1 357 FCMP %fcc0, t1, c1 358 FCMOV %fcc0, t1, c1 359 360 bg,pt %icc, .LL56 361 add X, INCX, X 362 363.LL59: 364 FCMP %fcc0, c2, c1 365 FCMP %fcc1, c4, c3 366 FCMOV %fcc0, c2, c1 367 FCMOV %fcc1, c4, c3 368 FCMP %fcc0, c3, c1 369 FCMOV %fcc0, c3, c1 370 371 return %i7 + 8 372 clr %o0 373 374 EPILOGUE 375