1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define STACK 12 43#define ARGS 0 44 45#define M 4 + STACK + ARGS(%esp) 46#define X 8 + STACK + ARGS(%esp) 47#define INCX 12 + STACK + ARGS(%esp) 48#define Y 16 + STACK + ARGS(%esp) 49#define INCY 20 + STACK + ARGS(%esp) 50 51 PROLOGUE 52 53 pushl %edi 54 pushl %esi 55 pushl %ebx 56 57 PROFCODE 58 59#if defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95) 60 EMMS 61#endif 62 63 movl M, %ebx 64 movl X, %ecx 65 movl INCX, %esi 66 movl Y, %edx 67 movl INCY, %edi 68 69 testl %ebx, %ebx # if m == 0 goto End 70 jle .L999 71 72#if SIZE > 8 73 sall $BASE_SHIFT, %esi 74 sall $BASE_SHIFT, %edi 75#else 76 leal (, %esi, SIZE), %esi 77 leal (, %edi, SIZE), %edi 78#endif 79 80 cmpl $SIZE, %esi # if incx != 1 81 jne .L100 82 cmpl $SIZE, %edi # if incy != 1 83 jne .L100 84 85 movl %ebx, %eax # i = m 86 sarl $3, %eax 87 jle .L20 88 ALIGN_2 89 90.L11: 91 FLD 7 * SIZE(%ecx) 92 FLD 6 * SIZE(%ecx) 93 FLD 5 * SIZE(%ecx) 94 FLD 4 * SIZE(%ecx) 95 FLD 3 * SIZE(%ecx) 96 FLD 2 * SIZE(%ecx) 97 FLD 1 * SIZE(%ecx) 98 FLD 0 * SIZE(%ecx) 99 100 FST 0 * SIZE(%edx) 101 FST 1 * SIZE(%edx) 102 FST 2 * SIZE(%edx) 103 FST 3 * SIZE(%edx) 104 FST 4 * SIZE(%edx) 105 FST 5 * SIZE(%edx) 106 FST 6 * SIZE(%edx) 107 FST 7 * SIZE(%edx) 108 109 addl $8 * SIZE, %ecx 110 addl $8 * SIZE, %edx 111 decl %eax 112 jg .L11 113 ALIGN_2 114 115.L20: 116 movl %ebx, %eax # i = m 117 andl $7, %eax 118 jle .L99 119 ALIGN_2 120 121.L21: 122 FLD (%ecx) 123 FST (%edx) 124 addl $SIZE, %ecx 125 addl $SIZE, %edx 126 decl %eax 127 jg .L21 128 129.L99: 130 xorl %eax,%eax 131 popl %ebx 132 popl %esi 133 popl %edi 134 ret 135 ALIGN_3 136 137.L100: 138 movl %ebx, %eax 139 sarl $3, %eax 140 jle .L120 141 ALIGN_2 142 143.L111: 144 FLD (%ecx) 145 addl %esi, %ecx 146 FLD (%ecx) 147 addl %esi, %ecx 148 FLD (%ecx) 149 addl %esi, %ecx 150 FLD (%ecx) 151 addl %esi, %ecx 152 FLD (%ecx) 153 addl %esi, %ecx 154 FLD (%ecx) 155 addl %esi, %ecx 156 FLD (%ecx) 157 addl %esi, %ecx 158 FLD (%ecx) 159 addl %esi, %ecx 160 161 fxch %st(7) 162 FST (%edx) 163 addl %edi, %edx 164 165 fxch %st(5) 166 FST (%edx) 167 addl %edi, %edx 168 169 fxch %st(3) 170 FST (%edx) 171 addl %edi, %edx 172 173 fxch %st(1) 174 FST (%edx) 175 addl %edi, %edx 176 177 FST (%edx) 178 addl %edi, %edx 179 180 FST (%edx) 181 addl %edi, %edx 182 183 FST (%edx) 184 addl %edi, %edx 185 186 FST (%edx) 187 addl %edi, %edx 188 189 decl %eax 190 jg .L111 191 192.L120: 193 movl %ebx, %eax 194 andl $7, %eax 195 jle .L999 196 ALIGN_2 197 198.L121: 199 FLD (%ecx) 200 FST (%edx) 201 addl %esi, %ecx 202 addl %edi, %edx 203 decl %eax 204 jg .L121 205 206.L999: 207 xorl %eax,%eax 208 popl %ebx 209 popl %esi 210 popl %edi 211 ret 212 213 EPILOGUE 214