1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define M %i0 43#define N %i1 44#define A %i2 45#define LDA %i3 46#define B %i4 47 48#define A1 %l0 49#define A2 %l1 50#define A3 %l2 51#define A4 %l3 52 53#define I %l4 54#define J %l5 55 56#ifdef DOUBLE 57#define c01 %f0 58#define c02 %f2 59#define c03 %f4 60#define c04 %f6 61#define c05 %f8 62#define c06 %f10 63#define c07 %f12 64#define c08 %f14 65#define c09 %f16 66#define c10 %f18 67#define c11 %f20 68#define c12 %f22 69#define c13 %f24 70#define c14 %f26 71#define c15 %f28 72#define c16 %f30 73#else 74#define c01 %f0 75#define c02 %f1 76#define c03 %f2 77#define c04 %f3 78#define c05 %f4 79#define c06 %f5 80#define c07 %f6 81#define c08 %f7 82#define c09 %f8 83#define c10 %f9 84#define c11 %f10 85#define c12 %f11 86#define c13 %f12 87#define c14 %f13 88#define c15 %f14 89#define c16 %f15 90#endif 91 92 PROLOGUE 93 SAVESP 94 95 sra N, 2, J 96 cmp J, 0 97 ble,pn %icc, .LL100 98 sll LDA, BASE_SHIFT, LDA 99 100.LL11: 101 add A, LDA, A2 102 mov A, A1 103 add A2, LDA, A3 104 sra M, 2, I 105 add A3, LDA, A4 106 cmp I, 0 107 108 ble,pn %icc, .LL15 109 add A4, LDA, A 110 111#define PREFETCHSIZE 36 112#define WPREFETCHSIZE 20 113 114.LL12: 115 prefetch [A1 + (PREFETCHSIZE + 0) * SIZE], 0 116 LDF [A1 + 0 * SIZE], c01 117 LDF [A2 + 0 * SIZE], c05 118 LDF [A3 + 0 * SIZE], c09 119 LDF [A4 + 0 * SIZE], c13 120 121 prefetch [A2 + (PREFETCHSIZE + 0) * SIZE], 0 122 LDF [A1 + 1 * SIZE], c02 123 LDF [A2 + 1 * SIZE], c06 124 LDF [A3 + 1 * SIZE], c10 125 LDF [A4 + 1 * SIZE], c14 126 127 prefetch [A3 + (PREFETCHSIZE + 0) * SIZE], 0 128 LDF [A1 + 2 * SIZE], c03 129 LDF [A2 + 2 * SIZE], c07 130 LDF [A3 + 2 * SIZE], c11 131 LDF [A4 + 2 * SIZE], c15 132 133 prefetch [A4 + (PREFETCHSIZE + 0) * SIZE], 0 134 LDF [A1 + 3 * SIZE], c04 135 LDF [A2 + 3 * SIZE], c08 136 LDF [A3 + 3 * SIZE], c12 137 LDF [A4 + 3 * SIZE], c16 138 139 prefetch [B + (WPREFETCHSIZE + 0) * SIZE], 2 140 STF c01, [B + 0 * SIZE] 141 add A1, 4 * SIZE, A1 142 STF c05, [B + 1 * SIZE] 143 add A2, 4 * SIZE, A2 144 STF c09, [B + 2 * SIZE] 145 add A3, 4 * SIZE, A3 146 STF c13, [B + 3 * SIZE] 147 add A4, 4 * SIZE, A4 148 STF c02, [B + 4 * SIZE] 149 add I, -1, I 150 STF c06, [B + 5 * SIZE] 151 cmp I, 0 152 STF c10, [B + 6 * SIZE] 153 STF c14, [B + 7 * SIZE] 154#ifdef DOUBLE 155 prefetch [B + (WPREFETCHSIZE + 8) * SIZE], 2 156#endif 157 STF c03, [B + 8 * SIZE] 158 STF c07, [B + 9 * SIZE] 159 STF c11, [B + 10 * SIZE] 160 STF c15, [B + 11 * SIZE] 161 STF c04, [B + 12 * SIZE] 162 STF c08, [B + 13 * SIZE] 163 STF c12, [B + 14 * SIZE] 164 STF c16, [B + 15 * SIZE] 165 bg,pt %icc, .LL12 166 add B, 16 * SIZE, B 167 168.LL15: 169 and M, 3, I 170 cmp I, 0 171 ble,pn %icc, .LL99 172 nop 173 174.LL16: 175 LDF [A1 + 0 * SIZE], c01 176 add A1, 1 * SIZE, A1 177 LDF [A2 + 0 * SIZE], c05 178 add A2, 1 * SIZE, A2 179 LDF [A3 + 0 * SIZE], c09 180 add A3, 1 * SIZE, A3 181 LDF [A4 + 0 * SIZE], c13 182 add A4, 1 * SIZE, A4 183 184 STF c01, [B + 0 * SIZE] 185 add I, -1, I 186 STF c05, [B + 1 * SIZE] 187 cmp I, 0 188 STF c09, [B + 2 * SIZE] 189 STF c13, [B + 3 * SIZE] 190 bg,pt %icc, .LL16 191 add B, 4 * SIZE, B 192 193.LL99: 194 add J, -1, J 195 cmp J, 0 196 bg,pt %icc, .LL11 197 nop 198 199.LL100: 200 and N, 2, J 201 cmp J, 0 202 ble,pn %icc, .LL200 203 nop 204 205.LL111: 206 sra M, 2, I 207 add A, LDA, A2 208 cmp I, 0 209 mov A, A1 210 211 ble,pn %icc, .LL115 212 add A2, LDA, A 213 214.LL112: 215 LDF [A1 + 0 * SIZE], c01 216 LDF [A2 + 0 * SIZE], c05 217 LDF [A1 + 1 * SIZE], c02 218 LDF [A2 + 1 * SIZE], c06 219 220 LDF [A1 + 2 * SIZE], c03 221 LDF [A2 + 2 * SIZE], c07 222 LDF [A1 + 3 * SIZE], c04 223 LDF [A2 + 3 * SIZE], c08 224 225 STF c01, [B + 0 * SIZE] 226 add A1, 4 * SIZE, A1 227 STF c05, [B + 1 * SIZE] 228 add A2, 4 * SIZE, A2 229 STF c02, [B + 2 * SIZE] 230 add I, -1, I 231 STF c06, [B + 3 * SIZE] 232 cmp I, 0 233 STF c03, [B + 4 * SIZE] 234 STF c07, [B + 5 * SIZE] 235 STF c04, [B + 6 * SIZE] 236 STF c08, [B + 7 * SIZE] 237 238 bg,pt %icc, .LL112 239 add B, 8 * SIZE, B 240 241.LL115: 242 and M, 3, I 243 cmp I, 0 244 ble,pn %icc, .LL200 245 nop 246 247.LL116: 248 LDF [A1 + 0 * SIZE], c01 249 add A1, 1 * SIZE, A1 250 add I, -1, I 251 LDF [A2 + 0 * SIZE], c05 252 add A2, 1 * SIZE, A2 253 cmp I, 0 254 255 STF c01, [B + 0 * SIZE] 256 STF c05, [B + 1 * SIZE] 257 bg,pt %icc, .LL116 258 add B, 2 * SIZE, B 259 260.LL200: 261 and N, 1, J 262 cmp J, 0 263 ble,pn %icc, .LL999 264 nop 265 266.LL211: 267 sra M, 2, I 268 cmp I, 0 269 ble,pn %icc, .LL215 270 mov A, A1 271 272.LL212: 273 LDF [A1 + 0 * SIZE], c01 274 LDF [A1 + 1 * SIZE], c02 275 LDF [A1 + 2 * SIZE], c03 276 LDF [A1 + 3 * SIZE], c04 277 278 STF c01, [B + 0 * SIZE] 279 add I, -1, I 280 STF c02, [B + 1 * SIZE] 281 cmp I, 0 282 STF c03, [B + 2 * SIZE] 283 add A1, 4 * SIZE, A1 284 STF c04, [B + 3 * SIZE] 285 286 bg,pt %icc, .LL212 287 add B, 4 * SIZE, B 288 289.LL215: 290 and M, 3, I 291 cmp I, 0 292 ble,pn %icc, .LL999 293 nop 294 295.LL216: 296 LDF [A1 + 0 * SIZE], c01 297 add A1, 1 * SIZE, A1 298 add I, -1, I 299 cmp I, 0 300 301 STF c01, [B + 0 * SIZE] 302 bg,pt %icc, .LL216 303 add B, 1 * SIZE, B 304 305.LL999: 306 return %i7 + 8 307 clr %o0 308 309 EPILOGUE 310