1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N %i0 43#define X %i1 44#define INCX %i2 45#define I %i3 46 47#ifdef DOUBLE 48#define c1 %f0 49#define c2 %f2 50#define c3 %f4 51#define c4 %f6 52#define t1 %f8 53#define t2 %f10 54#define t3 %f12 55#define t4 %f14 56 57#define a1 %f16 58#define a2 %f18 59#define a3 %f20 60#define a4 %f22 61#define a5 %f24 62#define a6 %f26 63#define a7 %f28 64#define a8 %f30 65#else 66#define c1 %f0 67#define c2 %f1 68#define c3 %f2 69#define c4 %f3 70#define t1 %f4 71#define t2 %f5 72#define t3 %f6 73#define t4 %f7 74 75#define a1 %f8 76#define a2 %f9 77#define a3 %f10 78#define a4 %f11 79#define a5 %f12 80#define a6 %f13 81#define a7 %f14 82#define a8 %f15 83#endif 84 85#ifndef USE_MIN 86#define FCMOV FMOVG 87#else 88#define FCMOV FMOVL 89#endif 90 91 PROLOGUE 92 SAVESP 93 94 FCLR(0) 95 96 cmp N, 0 97 ble .LL20 98 nop 99 100 cmp INCX, 0 101 ble .LL20 102 sll INCX, BASE_SHIFT, INCX 103 104 add N, -1, N 105 LDF [X], c1 106 add X, INCX, X 107 cmp N, 0 108 ble .LL20 109 nop 110 111 FMOV c1, c2 112 FMOV c1, c3 113 FMOV c1, c4 114 115 cmp INCX, SIZE 116 bne .LL50 117 nop 118 119 sra N, 3, I 120 cmp I, 0 121 ble,pn %icc, .LL15 122 nop 123 124 LDF [X + 0 * SIZE], a1 125 LDF [X + 1 * SIZE], a2 126 LDF [X + 2 * SIZE], a3 127 LDF [X + 3 * SIZE], a4 128 129 LDF [X + 4 * SIZE], a5 130 LDF [X + 5 * SIZE], a6 131 LDF [X + 6 * SIZE], a7 132 LDF [X + 7 * SIZE], a8 133 add X, 8 * SIZE, X 134 135 add I, -1, I 136 cmp I, 0 137 ble,pt %icc, .LL12 138 nop 139 140#define PREFETCHSIZE 40 141 142.LL11: 143 FCMP %fcc0, a1, c1 144 FCMP %fcc1, a2, c2 145 FCMP %fcc2, a3, c3 146 FCMP %fcc3, a4, c4 147 148 FCMOV %fcc0, a1, c1 149 LDF [X + 0 * SIZE], a1 150 FCMOV %fcc1, a2, c2 151 LDF [X + 1 * SIZE], a2 152 FCMOV %fcc2, a3, c3 153 LDF [X + 2 * SIZE], a3 154 FCMOV %fcc3, a4, c4 155 LDF [X + 3 * SIZE], a4 156 157 FCMP %fcc0, a5, c1 158 FCMP %fcc1, a6, c2 159 FCMP %fcc2, a7, c3 160 FCMP %fcc3, a8, c4 161 162 FCMOV %fcc0, a5, c1 163 LDF [X + 4 * SIZE], a5 164 add I, -1, I 165 FCMOV %fcc1, a6, c2 166 LDF [X + 5 * SIZE], a6 167 cmp I, 0 168 FCMOV %fcc2, a7, c3 169 LDF [X + 6 * SIZE], a7 170 FCMOV %fcc3, a8, c4 171 LDF [X + 7 * SIZE], a8 172 173 bg,pt %icc, .LL11 174 add X, 8 * SIZE, X 175 176.LL12: 177 FCMP %fcc0, a1, c1 178 FCMP %fcc1, a2, c2 179 FCMP %fcc2, a3, c3 180 FCMP %fcc3, a4, c4 181 182 FCMOV %fcc0, a1, c1 183 FCMOV %fcc1, a2, c2 184 FCMOV %fcc2, a3, c3 185 FCMOV %fcc3, a4, c4 186 187 FCMP %fcc0, a5, c1 188 FCMP %fcc1, a6, c2 189 FCMP %fcc2, a7, c3 190 FCMP %fcc3, a8, c4 191 192 FCMOV %fcc0, a5, c1 193 FCMOV %fcc1, a6, c2 194 FCMOV %fcc2, a7, c3 195 FCMOV %fcc3, a8, c4 196 197.LL15: 198 and N, 7, I 199 cmp I, 0 200 ble,a,pn %icc, .LL19 201 nop 202 203.LL16: 204 LDF [X + 0 * SIZE], a1 205 FCMP %fcc0, a1, c1 206 FCMOV %fcc0, a1, c1 207 add I, -1, I 208 cmp I, 0 209 bg,pt %icc, .LL16 210 add X, 1 * SIZE, X 211 212.LL19: 213 FCMP %fcc0, c2, c1 214 FCMP %fcc1, c4, c3 215 FCMOV %fcc0, c2, c1 216 FCMOV %fcc1, c4, c3 217 FCMP %fcc0, c3, c1 218 FCMOV %fcc0, c3, c1 219 220.LL20: 221 return %i7 + 8 222 clr %g0 223 224.LL50: 225 sra N, 3, I 226 cmp I, 0 227 ble,pn %icc, .LL55 228 nop 229 230 LDF [X + 0 * SIZE], a1 231 add X, INCX, X 232 LDF [X + 0 * SIZE], a2 233 add X, INCX, X 234 LDF [X + 0 * SIZE], a3 235 add X, INCX, X 236 LDF [X + 0 * SIZE], a4 237 add X, INCX, X 238 LDF [X + 0 * SIZE], a5 239 add X, INCX, X 240 LDF [X + 0 * SIZE], a6 241 add X, INCX, X 242 add I, -1, I 243 LDF [X + 0 * SIZE], a7 244 cmp I, 0 245 add X, INCX, X 246 LDF [X + 0 * SIZE], a8 247 248 ble,pt %icc, .LL52 249 add X, INCX, X 250 251.LL51: 252 FCMP %fcc0, a1, c1 253 FCMP %fcc1, a2, c2 254 FCMP %fcc2, a3, c3 255 FCMP %fcc3, a4, c4 256 257 FCMOV %fcc0, a1, c1 258 LDF [X + 0 * SIZE], a1 259 add X, INCX, X 260 FCMOV %fcc1, a2, c2 261 LDF [X + 0 * SIZE], a2 262 add X, INCX, X 263 FCMOV %fcc2, a3, c3 264 LDF [X + 0 * SIZE], a3 265 add X, INCX, X 266 FCMOV %fcc3, a4, c4 267 LDF [X + 0 * SIZE], a4 268 add X, INCX, X 269 270 FCMP %fcc0, a5, c1 271 add I, -1, I 272 FCMP %fcc1, a6, c2 273 cmp I, 0 274 FCMP %fcc2, a7, c3 275 FCMP %fcc3, a8, c4 276 277 FCMOV %fcc0, a5, c1 278 LDF [X + 0 * SIZE], a5 279 add X, INCX, X 280 FCMOV %fcc1, a6, c2 281 LDF [X + 0 * SIZE], a6 282 add X, INCX, X 283 FCMOV %fcc2, a7, c3 284 LDF [X + 0 * SIZE], a7 285 add X, INCX, X 286 FCMOV %fcc3, a8, c4 287 LDF [X + 0 * SIZE], a8 288 289 bg,pt %icc, .LL51 290 add X, INCX, X 291 292.LL52: 293 FCMP %fcc0, a1, c1 294 FCMP %fcc1, a2, c2 295 FCMP %fcc2, a3, c3 296 FCMP %fcc3, a4, c4 297 298 FCMOV %fcc0, a1, c1 299 FCMOV %fcc1, a2, c2 300 FCMOV %fcc2, a3, c3 301 FCMOV %fcc3, a4, c4 302 303 FCMP %fcc0, a5, c1 304 FCMP %fcc1, a6, c2 305 FCMP %fcc2, a7, c3 306 FCMP %fcc3, a8, c4 307 308 FCMOV %fcc0, a5, c1 309 FCMOV %fcc1, a6, c2 310 FCMOV %fcc2, a7, c3 311 FCMOV %fcc3, a8, c4 312 313.LL55: 314 and N, 7, I 315 cmp I, 0 316 ble,a,pn %icc, .LL59 317 nop 318 319.LL56: 320 LDF [X + 0 * SIZE], a1 321 FCMP %fcc0, a1, c1 322 FCMOV %fcc0, a1, c1 323 add I, -1, I 324 cmp I, 0 325 bg,pt %icc, .LL56 326 add X, INCX, X 327 328.LL59: 329 FCMP %fcc0, c2, c1 330 FCMP %fcc1, c4, c3 331 FCMOV %fcc0, c2, c1 332 FCMOV %fcc1, c4, c3 333 FCMP %fcc0, c3, c1 334 FCMOV %fcc0, c3, c1 335 336 return %i7 + 8 337 clr %o0 338 339 EPILOGUE 340