1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N %i0 43#define X %i1 44#define INCX %i2 45#define Y %i3 46#define INCY %i4 47#define I %i5 48 49#ifdef DOUBLE 50#define a1 %f0 51#define a2 %f2 52#define a3 %f4 53#define a4 %f6 54#define a5 %f8 55#define a6 %f10 56#define a7 %f12 57#define a8 %f14 58#define a9 %f16 59#define a10 %f18 60#define a11 %f20 61#define a12 %f22 62#define a13 %f24 63#define a14 %f26 64#define a15 %f28 65#define a16 %f30 66#else 67#define a1 %f0 68#define a2 %f1 69#define a3 %f2 70#define a4 %f3 71#define a5 %f4 72#define a6 %f5 73#define a7 %f6 74#define a8 %f7 75#define a9 %f8 76#define a10 %f9 77#define a11 %f10 78#define a12 %f11 79#define a13 %f12 80#define a14 %f13 81#define a15 %f14 82#define a16 %f15 83#endif 84 85 PROLOGUE 86 SAVESP 87 88 sll INCX, BASE_SHIFT, INCX 89 sll INCY, BASE_SHIFT, INCY 90 91 cmp INCX, SIZE 92 bne .LL50 93 nop 94 cmp INCY, SIZE 95 bne .LL50 96 nop 97 98 sra N, 3, I 99 cmp I, 0 100 ble,pn %icc, .LL15 101 nop 102 103#define PREFETCHSIZE 32 104 105.LL11: 106 LDF [X + 0 * SIZE], a1 107 prefetch [X + PREFETCHSIZE * SIZE], 0 108 LDF [X + 1 * SIZE], a2 109 LDF [X + 2 * SIZE], a3 110 LDF [X + 3 * SIZE], a4 111 LDF [X + 4 * SIZE], a5 112 LDF [X + 5 * SIZE], a6 113 LDF [X + 6 * SIZE], a7 114 LDF [X + 7 * SIZE], a8 115 116 STF a1, [Y + 0 * SIZE] 117 prefetch [Y + PREFETCHSIZE * SIZE], 0 118 STF a2, [Y + 1 * SIZE] 119 STF a3, [Y + 2 * SIZE] 120 STF a4, [Y + 3 * SIZE] 121 STF a5, [Y + 4 * SIZE] 122 STF a6, [Y + 5 * SIZE] 123 STF a7, [Y + 6 * SIZE] 124 STF a8, [Y + 7 * SIZE] 125 126 add I, -1, I 127 cmp I, 0 128 add Y, 8 * SIZE, Y 129 add X, 8 * SIZE, X 130 131 bg,pt %icc, .LL11 132 nop 133 134 135.LL15: 136 and N, 7, I 137 cmp I, 0 138 ble,a,pn %icc, .LL19 139 nop 140 141.LL16: 142 LDF [X + 0 * SIZE], a1 143 add I, -1, I 144 cmp I, 0 145 add X, 1 * SIZE, X 146 STF a1, [Y + 0 * SIZE] 147 bg,pt %icc, .LL16 148 add Y, 1 * SIZE, Y 149 150.LL19: 151 return %i7 + 8 152 clr %g0 153 154.LL50: 155 sra N, 3, I 156 cmp I, 0 157 ble,pn %icc, .LL55 158 nop 159 160.LL51: 161 LDF [X + 0 * SIZE], a1 162 add X, INCX, X 163 LDF [X + 0 * SIZE], a2 164 add X, INCX, X 165 LDF [X + 0 * SIZE], a3 166 add X, INCX, X 167 LDF [X + 0 * SIZE], a4 168 add X, INCX, X 169 LDF [X + 0 * SIZE], a5 170 add X, INCX, X 171 LDF [X + 0 * SIZE], a6 172 add X, INCX, X 173 LDF [X + 0 * SIZE], a7 174 add X, INCX, X 175 LDF [X + 0 * SIZE], a8 176 add X, INCX, X 177 178 STF a1, [Y + 0 * SIZE] 179 add Y, INCY, Y 180 add I, -1, I 181 STF a2, [Y + 0 * SIZE] 182 add Y, INCY, Y 183 cmp I, 0 184 STF a3, [Y + 0 * SIZE] 185 add Y, INCY, Y 186 STF a4, [Y + 0 * SIZE] 187 add Y, INCY, Y 188 STF a5, [Y + 0 * SIZE] 189 add Y, INCY, Y 190 STF a6, [Y + 0 * SIZE] 191 add Y, INCY, Y 192 STF a7, [Y + 0 * SIZE] 193 add Y, INCY, Y 194 STF a8, [Y + 0 * SIZE] 195 196 bg,pt %icc, .LL51 197 add Y, INCY, Y 198 199.LL55: 200 and N, 7, I 201 cmp I, 0 202 ble,a,pn %icc, .LL59 203 nop 204 205.LL56: 206 LDF [X + 0 * SIZE], a1 207 add I, -1, I 208 cmp I, 0 209 add X, INCX, X 210 STF a1, [Y + 0 * SIZE] 211 bg,pt %icc, .LL56 212 add Y, INCY, Y 213 214.LL59: 215 return %i7 + 8 216 clr %o0 217 218 EPILOGUE 219