1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
4 /* */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
8 /* */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
11 /* disclaimer. */
12 /* */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
17 /* */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
32 /* */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
38
39 #include <stdio.h>
40 #include <ctype.h>
41 #include "common.h"
42
43 static FLOAT dp1 = 1.;
44
CNAME(BLASLONG m,FLOAT * a,BLASLONG lda,FLOAT * b,BLASLONG incb,FLOAT * buffer)45 int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *buffer){
46
47 BLASLONG i, is, min_i;
48 #if (TRANSA == 2) || (TRANSA == 4)
49 FLOAT _Complex temp;
50 #endif
51 #ifndef UNIT
52 FLOAT atemp1, atemp2, btemp1, btemp2;
53 #endif
54 FLOAT *gemvbuffer = (FLOAT *)buffer;
55 FLOAT *B = b;
56
57 if (incb != 1) {
58 B = buffer;
59 gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
60 COPY_K(m, b, incb, buffer, 1);
61 }
62
63 for (is =0; is < m; is += DTB_ENTRIES){
64
65 min_i = MIN(m - is, DTB_ENTRIES);
66
67 #if (TRANSA) == 1 || (TRANSA == 3)
68 if (is > 0){
69 #if TRANSA == 1
70 GEMV_N(is, min_i, 0, dp1, ZERO,
71 a + is * lda * 2, lda,
72 B + is * 2, 1,
73 B, 1, gemvbuffer);
74 #else
75 GEMV_R(is, min_i, 0, dp1, ZERO,
76 a + is * lda * 2, lda,
77 B + is * 2, 1,
78 B, 1, gemvbuffer);
79 #endif
80 }
81 #endif
82
83 for (i = 0; i < min_i; i++) {
84 FLOAT *AA = a + (is + (i + is) * lda) * 2;
85 FLOAT *BB = B + is * 2;
86
87 #if (TRANSA == 1) || (TRANSA == 3)
88 #if TRANSA == 1
89 if (i > 0) AXPYU_K (i, 0, 0, BB[i * 2 + 0], BB[i * 2 + 1],
90 AA, 1, BB, 1, NULL, 0);
91 #else
92 if (i > 0) AXPYC_K(i, 0, 0, BB[i * 2 + 0], BB[i * 2 + 1],
93 AA, 1, BB, 1, NULL, 0);
94 #endif
95 #endif
96
97 #ifndef UNIT
98 atemp1 = AA[i * 2 + 0];
99 atemp2 = AA[i * 2 + 1];
100
101 btemp1 = BB[i * 2 + 0];
102 btemp2 = BB[i * 2 + 1];
103
104 #if (TRANSA == 1) || (TRANSA == 2)
105 BB[i * 2 + 0] = atemp1 * btemp1 - atemp2 * btemp2;
106 BB[i * 2 + 1] = atemp1 * btemp2 + atemp2 * btemp1;
107 #else
108 BB[i * 2 + 0] = atemp1 * btemp1 + atemp2 * btemp2;
109 BB[i * 2 + 1] = atemp1 * btemp2 - atemp2 * btemp1;
110 #endif
111 #endif
112
113 #if (TRANSA == 2) || (TRANSA == 4)
114 if (i < min_i - 1) {
115 #if TRANSA == 2
116 temp = DOTU_K(min_i - i - 1,
117 AA + (i + 1) * 2, 1,
118 BB + (i + 1) * 2, 1);
119 #else
120 temp = DOTC_K(min_i - i - 1,
121 AA + (i + 1) * 2, 1,
122 BB + (i + 1) * 2, 1);
123 #endif
124
125 BB[i * 2 + 0] += CREAL(temp);
126 BB[i * 2 + 1] += CIMAG(temp);
127 }
128 #endif
129
130 }
131
132 #if (TRANSA) == 2 || (TRANSA == 4)
133 if (m - is > min_i){
134 #if TRANSA == 2
135 GEMV_T(m - is - min_i, min_i, 0, dp1, ZERO,
136 a + (is + min_i + is * lda) * 2, lda,
137 B + (is + min_i) * 2, 1,
138 B + is * 2, 1, gemvbuffer);
139 #else
140 GEMV_C(m - is - min_i, min_i, 0, dp1, ZERO,
141 a + (is + min_i + is * lda) * 2, lda,
142 B + (is + min_i) * 2, 1,
143 B + is * 2, 1, gemvbuffer);
144 #endif
145 }
146 #endif
147 }
148
149 if (incb != 1) {
150 COPY_K(m, buffer, 1, b, incb);
151 }
152
153 return 0;
154 }
155
156