1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N ARG1 43#define X ARG4 44#define INCX ARG5 45 46#define I %rax 47 48#include "l1param.h" 49 50 PROLOGUE 51 PROFCODE 52 53 salq $ZBASE_SHIFT, INCX 54 55 FLD 8(%rsp) 56 FLD 24(%rsp) 57 58 testq N, N 59 jle .L999 60 61 fld %st(1) 62 fabs 63 fld %st(1) 64 fabs 65 faddp %st, %st(1) 66 67 fldz 68 fcomip %st(1), %st 69 ffreep %st 70 jne .L30 71 72 EMMS 73 74 pxor %mm0, %mm0 75 76 cmpq $2 * SIZE, INCX 77 jne .L20 78 79 movq N, I 80 sarq $2, I 81 jle .L15 82 ALIGN_4 83 84.L12: 85#ifdef PREFETCHW 86 PREFETCHW (PREFETCHSIZE + 0) - PREOFFSET(X) 87#endif 88 89 movq %mm0, 0(X) 90 movq %mm0, 8(X) 91 movq %mm0, 16(X) 92 movq %mm0, 24(X) 93 movq %mm0, 32(X) 94 movq %mm0, 40(X) 95 movq %mm0, 48(X) 96 movq %mm0, 56(X) 97 movq %mm0, 64(X) 98 movq %mm0, 72(X) 99 movq %mm0, 80(X) 100 movq %mm0, 88(X) 101 movq %mm0, 96(X) 102 movq %mm0, 104(X) 103 movq %mm0, 112(X) 104 movq %mm0, 120(X) 105 addq $8 * SIZE, X 106 decq I 107 jg .L12 108 ALIGN_3 109 110.L15: 111 movq N, I 112 andq $3, I 113 jle .L18 114 ALIGN_2 115 116.L16: 117 movq %mm0, 0(X) 118 movq %mm0, 8(X) 119 movq %mm0, 16(X) 120 movq %mm0, 24(X) 121 122 addq $2 * SIZE, X 123 decq I 124 jg .L16 125 126.L18: 127 EMMS 128 129 ret 130 ALIGN_2 131 132.L20: 133 movq N, I 134 sarq $2, I 135 jle .L25 136 ALIGN_3 137 138.L22: 139 movq %mm0, 0(X) 140 movq %mm0, 8(X) 141 movq %mm0, 16(X) 142 movq %mm0, 24(X) 143 addq INCX, X 144 145 movq %mm0, 0(X) 146 movq %mm0, 8(X) 147 movq %mm0, 16(X) 148 movq %mm0, 24(X) 149 addq INCX, X 150 151 movq %mm0, 0(X) 152 movq %mm0, 8(X) 153 movq %mm0, 16(X) 154 movq %mm0, 24(X) 155 addq INCX, X 156 157 movq %mm0, 0(X) 158 movq %mm0, 8(X) 159 movq %mm0, 16(X) 160 movq %mm0, 24(X) 161 addq INCX, X 162 163 decq I 164 jg .L22 165 ALIGN_3 166 167.L25: 168 movq N, I 169 andq $3, I 170 jle .L28 171 ALIGN_3 172 173.L26: 174 movq %mm0, 0(X) 175 movq %mm0, 8(X) 176 movq %mm0, 16(X) 177 movq %mm0, 24(X) 178 addq INCX, X 179 180 decq I 181 jg .L26 182 183.L28: 184 EMMS 185 186 ret 187 ALIGN_3 188 189.L30: 190 movq N, I 191 ALIGN_2 192 193.L32: 194#ifdef PREFETCHW 195 PREFETCHW (PREFETCHSIZE + 0) - PREOFFSET(X) 196#endif 197 198 FLD 0 * SIZE(X) 199 fmul %st(1),%st 200 FLD 1 * SIZE(X) 201 fmul %st(3),%st 202 faddp %st,%st(1) 203 204 FLD 0 * SIZE(X) 205 fmul %st(3),%st 206 FLD 1 * SIZE(X) 207 fmul %st(3),%st 208 fsubrp %st,%st(1) 209 210 FST 0 * SIZE(X) 211 FST 1 * SIZE(X) 212 addq INCX, X 213 decq I 214 jg .L32 215 ALIGN_2 216 217.L999: 218 ffreep %st 219 ffreep %st 220 221 ret 222 223 EPILOGUE 224