1/*********************************************************************/ 2/* Copyright 2005-2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define PREFETCHSIZE 72 43#define WPREFETCHSIZE 20 44 45#define M %i0 46#define N %i1 47#define A %i2 48#define LDA %i3 49#define B %i4 50 51#define A1 %l0 52#define A2 %l1 53#define A3 %l2 54#define A4 %l3 55 56#define I %l4 57#define J %l5 58 59#ifdef DOUBLE 60#define c01 %f0 61#define c02 %f2 62#define c03 %f4 63#define c04 %f6 64#define c05 %f8 65#define c06 %f10 66#define c07 %f12 67#define c08 %f14 68#define c09 %f16 69#define c10 %f18 70#define c11 %f20 71#define c12 %f22 72#define c13 %f24 73#define c14 %f26 74#define c15 %f28 75#define c16 %f30 76#else 77#define c01 %f0 78#define c02 %f1 79#define c03 %f2 80#define c04 %f3 81#define c05 %f4 82#define c06 %f5 83#define c07 %f6 84#define c08 %f7 85#define c09 %f8 86#define c10 %f9 87#define c11 %f10 88#define c12 %f11 89#define c13 %f12 90#define c14 %f13 91#define c15 %f14 92#define c16 %f15 93#endif 94 95 PROLOGUE 96 SAVESP 97 98 sra N, 1, J 99 cmp J, 0 100 ble,pn %icc, .LL100 101 sll LDA, BASE_SHIFT, LDA 102 103.LL11: 104 add A, LDA, A2 105 mov A, A1 106 sra M, 3, I 107 cmp I, 0 108 109 ble,pn %icc, .LL15 110 add A2, LDA, A 111 112.LL12: 113 prefetch [A1 + (PREFETCHSIZE + 0) * SIZE], 0 114 LDF [A1 + 0 * SIZE], c01 115 LDF [A2 + 0 * SIZE], c02 116 LDF [A1 + 1 * SIZE], c03 117 LDF [A2 + 1 * SIZE], c04 118 LDF [A1 + 2 * SIZE], c05 119 LDF [A2 + 2 * SIZE], c06 120 LDF [A1 + 3 * SIZE], c07 121 LDF [A2 + 3 * SIZE], c08 122 123 prefetch [A2 + (PREFETCHSIZE + 0) * SIZE], 0 124 LDF [A1 + 4 * SIZE], c09 125 LDF [A2 + 4 * SIZE], c10 126 LDF [A1 + 5 * SIZE], c11 127 LDF [A2 + 5 * SIZE], c12 128 LDF [A1 + 6 * SIZE], c13 129 LDF [A2 + 6 * SIZE], c14 130 LDF [A1 + 7 * SIZE], c15 131 LDF [A2 + 7 * SIZE], c16 132 133 add A1, 8 * SIZE, A1 134 add I, -1, I 135 add A2, 8 * SIZE, A2 136 cmp I, 0 137 138 prefetch [B + (WPREFETCHSIZE + 0) * SIZE], 2 139 STF c01, [B + 0 * SIZE] 140 STF c02, [B + 1 * SIZE] 141 STF c03, [B + 2 * SIZE] 142 STF c04, [B + 3 * SIZE] 143 STF c05, [B + 4 * SIZE] 144 STF c06, [B + 5 * SIZE] 145 STF c07, [B + 6 * SIZE] 146 STF c08, [B + 7 * SIZE] 147 148 prefetch [B + (WPREFETCHSIZE + 8) * SIZE], 2 149 STF c09, [B + 8 * SIZE] 150 STF c10, [B + 9 * SIZE] 151 STF c11, [B + 10 * SIZE] 152 STF c12, [B + 11 * SIZE] 153 STF c13, [B + 12 * SIZE] 154 STF c14, [B + 13 * SIZE] 155 STF c15, [B + 14 * SIZE] 156 STF c16, [B + 15 * SIZE] 157 158 bg,pt %icc, .LL12 159 add B, 16 * SIZE, B 160 161.LL15: 162 and M, 7, I 163 cmp I, 0 164 ble,pn %icc, .LL99 165 nop 166 167.LL16: 168 LDF [A1 + 0 * SIZE], c01 169 add A1, 1 * SIZE, A1 170 LDF [A2 + 0 * SIZE], c02 171 add A2, 1 * SIZE, A2 172 173 STF c01, [B + 0 * SIZE] 174 add I, -1, I 175 STF c02, [B + 1 * SIZE] 176 cmp I, 0 177 bg,pt %icc, .LL16 178 add B, 2 * SIZE, B 179 180.LL99: 181 add J, -1, J 182 cmp J, 0 183 bg,pt %icc, .LL11 184 nop 185 186.LL100: 187 and N, 1, J 188 cmp J, 0 189 ble,pn %icc, .LL999 190 nop 191 192.LL111: 193 sra M, 2, I 194 cmp I, 0 195 ble,pn %icc, .LL115 196 mov A, A1 197 198.LL112: 199 LDF [A1 + 0 * SIZE], c01 200 LDF [A1 + 1 * SIZE], c02 201 LDF [A1 + 2 * SIZE], c03 202 LDF [A1 + 3 * SIZE], c04 203 204 STF c01, [B + 0 * SIZE] 205 add I, -1, I 206 STF c02, [B + 1 * SIZE] 207 cmp I, 0 208 STF c03, [B + 2 * SIZE] 209 add A1, 4 * SIZE, A1 210 STF c04, [B + 3 * SIZE] 211 212 bg,pt %icc, .LL112 213 add B, 4 * SIZE, B 214 215.LL115: 216 and M, 3, I 217 cmp I, 0 218 ble,pn %icc, .LL999 219 nop 220 221.LL116: 222 LDF [A1 + 0 * SIZE], c01 223 add A1, 1 * SIZE, A1 224 add I, -1, I 225 cmp I, 0 226 227 STF c01, [B + 0 * SIZE] 228 bg,pt %icc, .LL116 229 add B, 1 * SIZE, B 230 231.LL999: 232 return %i7 + 8 233 clr %o0 234 235 EPILOGUE 236