1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N r3 43#define XX r4 44#define PRE r5 45 46#if defined(linux) || defined(__FreeBSD__) 47#ifndef __64BIT__ 48#define X r6 49#define INCX r7 50#else 51#define X r7 52#define INCX r8 53#endif 54#endif 55 56#if defined(_AIX) || defined(__APPLE__) 57#if !defined(__64BIT__) && defined(DOUBLE) 58#define X r8 59#define INCX r9 60#else 61#define X r7 62#define INCX r8 63#endif 64#endif 65 66#define FZERO f0 67#define ALPHA f1 68 69 PROLOGUE 70 PROFCODE 71 72 addi SP, SP, -8 73 li r0, 0 74 75 stw r0, 0(SP) 76 lfs FZERO, 0(SP) 77 78 addi SP, SP, 8 79 80 slwi INCX, INCX, BASE_SHIFT 81 li PRE, 3 * 16 * SIZE 82 83 cmpwi cr0, N, 0 84 blelr- cr0 85 86 sub X, X, INCX 87 88 fcmpu cr0, FZERO, ALPHA 89 bne- cr0, LL(A1I1) 90 91 srawi. r0, N, 4 92 mtspr CTR, r0 93 beq- cr0, LL(A0I1_Remain) 94 .align 4 95 96LL(A0I1_kernel): 97#ifdef PPCG4 98 dcbtst X, PRE 99#endif 100 101 STFDUX FZERO, X, INCX 102 STFDUX FZERO, X, INCX 103 STFDUX FZERO, X, INCX 104 STFDUX FZERO, X, INCX 105 106#if defined(PPCG4) && defined(DOUBLE) 107 dcbtst X, PRE 108#endif 109 110 STFDUX FZERO, X, INCX 111 STFDUX FZERO, X, INCX 112 STFDUX FZERO, X, INCX 113 STFDUX FZERO, X, INCX 114 115#ifdef PPCG4 116 dcbtst X, PRE 117#endif 118 119 STFDUX FZERO, X, INCX 120 STFDUX FZERO, X, INCX 121 STFDUX FZERO, X, INCX 122 STFDUX FZERO, X, INCX 123 124#if defined(PPCG4) && defined(DOUBLE) 125 dcbtst X, PRE 126#endif 127 128 STFDUX FZERO, X, INCX 129 STFDUX FZERO, X, INCX 130 STFDUX FZERO, X, INCX 131 STFDUX FZERO, X, INCX 132 bdnz LL(A0I1_kernel) 133 .align 4 134 135LL(A0I1_Remain): 136 andi. r0, N, 15 137 mtspr CTR, r0 138 beqlr+ 139 .align 4 140 141LL(A0I1_RemainKernel): 142 STFDUX FZERO, X, INCX 143 bdnz LL(A0I1_RemainKernel) 144 blr 145 .align 4 146 147LL(A1I1): 148 mr XX, X 149 150 srawi. r0, N, 3 151 mtspr CTR, r0 152 beq+ LL(A1I1_Remain) 153 154 LFDUX f2, X, INCX 155 LFDUX f3, X, INCX 156 LFDUX f4, X, INCX 157 LFDUX f5, X, INCX 158 bdz LL(12) 159 .align 4 160 161LL(11): 162 LFDUX f6, X, INCX 163 FMUL f2, ALPHA, f2 164 LFDUX f7, X, INCX 165 FMUL f3, ALPHA, f3 166 LFDUX f8, X, INCX 167 FMUL f4, ALPHA, f4 168 LFDUX f9, X, INCX 169 FMUL f5, ALPHA, f5 170 171#ifdef PPCG4 172 dcbtst X, PRE 173#endif 174 STFDUX f2, XX, INCX 175 STFDUX f3, XX, INCX 176 STFDUX f4, XX, INCX 177 STFDUX f5, XX, INCX 178 179 LFDUX f2, X, INCX 180 FMUL f6, ALPHA, f6 181 LFDUX f3, X, INCX 182 FMUL f7, ALPHA, f7 183 LFDUX f4, X, INCX 184 FMUL f8, ALPHA, f8 185 LFDUX f5, X, INCX 186 FMUL f9, ALPHA, f9 187 188 STFDUX f6, XX, INCX 189 STFDUX f7, XX, INCX 190 STFDUX f8, XX, INCX 191 STFDUX f9, XX, INCX 192 193#if defined(PPCG4) && defined(DOUBLE) 194 dcbtst X, PRE 195#endif 196 197 bdnz LL(11) 198 .align 4 199 200LL(12): 201 LFDUX f6, X, INCX 202 FMUL f2, ALPHA, f2 203 LFDUX f7, X, INCX 204 FMUL f3, ALPHA, f3 205 LFDUX f8, X, INCX 206 FMUL f4, ALPHA, f4 207 LFDUX f9, X, INCX 208 FMUL f5, ALPHA, f5 209 210 STFDUX f2, XX, INCX 211 FMUL f6, ALPHA, f6 212 STFDUX f3, XX, INCX 213 FMUL f7, ALPHA, f7 214 STFDUX f4, XX, INCX 215 FMUL f8, ALPHA, f8 216 STFDUX f5, XX, INCX 217 FMUL f9, ALPHA, f9 218 219 STFDUX f6, XX, INCX 220 STFDUX f7, XX, INCX 221 STFDUX f8, XX, INCX 222 STFDUX f9, XX, INCX 223 .align 4 224 225LL(A1I1_Remain): 226 andi. r0, N, 7 227 mtspr CTR, r0 228 beqlr+ 229 .align 4 230 231LL(A1I1_RemainKernel): 232 LFDUX f2, X, INCX 233 FMUL f2, ALPHA, f2 234 STFDUX f2, XX, INCX 235 bdnz LL(A1I1_RemainKernel) 236 blr 237 .align 4 238 239 EPILOGUE 240