1 /*********************************************************************/
2 /* */
3 /* Optimized BLAS libraries */
4 /* By Kazushige Goto <kgoto@tacc.utexas.edu> */
5 /* */
6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */
7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING */
8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF */
9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, */
10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY */
11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF */
12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO */
13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION. */
14 /* Under no circumstances shall University be liable for incidental, */
15 /* special, indirect, direct or consequential damages or loss of */
16 /* profits, interruption of business, or related expenses which may */
17 /* arise from use of Software or Documentation, including but not */
18 /* limited to those resulting from defects in Software and/or */
19 /* Documentation, or loss or inaccuracy of data of any kind. */
20 /*********************************************************************/
21
22 #include <stdio.h>
23 #include <ctype.h>
24 #include "common.h"
25 #ifdef FUNCTION_PROFILE
26 #include "functable.h"
27 #endif
28
29 #ifdef XDOUBLE
30 #define ERROR_NAME "QTBMV "
31 #elif defined(DOUBLE)
32 #define ERROR_NAME "DTBMV "
33 #else
34 #define ERROR_NAME "STBMV "
35 #endif
36
37 static int (*tbmv[])(BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = {
38 #ifdef XDOUBLE
39 qtbmv_NUU, qtbmv_NUN, qtbmv_NLU, qtbmv_NLN,
40 qtbmv_TUU, qtbmv_TUN, qtbmv_TLU, qtbmv_TLN,
41 #elif defined(DOUBLE)
42 dtbmv_NUU, dtbmv_NUN, dtbmv_NLU, dtbmv_NLN,
43 dtbmv_TUU, dtbmv_TUN, dtbmv_TLU, dtbmv_TLN,
44 #else
45 stbmv_NUU, stbmv_NUN, stbmv_NLU, stbmv_NLN,
46 stbmv_TUU, stbmv_TUN, stbmv_TLU, stbmv_TLN,
47 #endif
48 };
49
50 #ifdef SMP
51 static int (*tbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
52 #ifdef XDOUBLE
53 qtbmv_thread_NUU, qtbmv_thread_NUN, qtbmv_thread_NLU, qtbmv_thread_NLN,
54 qtbmv_thread_TUU, qtbmv_thread_TUN, qtbmv_thread_TLU, qtbmv_thread_TLN,
55 #elif defined(DOUBLE)
56 dtbmv_thread_NUU, dtbmv_thread_NUN, dtbmv_thread_NLU, dtbmv_thread_NLN,
57 dtbmv_thread_TUU, dtbmv_thread_TUN, dtbmv_thread_TLU, dtbmv_thread_TLN,
58 #else
59 stbmv_thread_NUU, stbmv_thread_NUN, stbmv_thread_NLU, stbmv_thread_NLN,
60 stbmv_thread_TUU, stbmv_thread_TUN, stbmv_thread_TLU, stbmv_thread_TLN,
61 #endif
62 };
63 #endif
64
65 #ifndef CBLAS
66
NAME(char * UPLO,char * TRANS,char * DIAG,blasint * N,blasint * K,FLOAT * a,blasint * LDA,FLOAT * x,blasint * INCX)67 void NAME(char *UPLO, char *TRANS, char *DIAG,
68 blasint *N, blasint *K,
69 FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){
70
71 char uplo_arg = *UPLO;
72 char trans_arg = *TRANS;
73 char diag_arg = *DIAG;
74
75 blasint n = *N;
76 blasint k = *K;
77 blasint lda = *LDA;
78 blasint incx = *INCX;
79
80 blasint info;
81 int uplo;
82 int unit;
83 int trans;
84 FLOAT *buffer;
85 #ifdef SMP
86 int nthreads;
87 #endif
88
89 PRINT_DEBUG_NAME;
90
91 TOUPPER(uplo_arg);
92 TOUPPER(trans_arg);
93 TOUPPER(diag_arg);
94
95 trans = -1;
96 unit = -1;
97 uplo = -1;
98
99 if (trans_arg == 'N') trans = 0;
100 if (trans_arg == 'T') trans = 1;
101 if (trans_arg == 'R') trans = 0;
102 if (trans_arg == 'C') trans = 1;
103
104 if (diag_arg == 'U') unit = 0;
105 if (diag_arg == 'N') unit = 1;
106
107 if (uplo_arg == 'U') uplo = 0;
108 if (uplo_arg == 'L') uplo = 1;
109
110 info = 0;
111
112 if (incx == 0) info = 9;
113 if (lda < k + 1) info = 7;
114 if (k < 0) info = 5;
115 if (n < 0) info = 4;
116 if (unit < 0) info = 3;
117 if (trans < 0) info = 2;
118 if (uplo < 0) info = 1;
119
120 if (info != 0) {
121 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
122 return;
123 }
124
125 #else
126
127 void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
128 enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
129 blasint n, blasint k, FLOAT *a, blasint lda, FLOAT *x, blasint incx) {
130
131 int trans, uplo, unit;
132 blasint info;
133 FLOAT *buffer;
134 #ifdef SMP
135 int nthreads;
136 #endif
137
138 PRINT_DEBUG_CNAME;
139
140 unit = -1;
141 uplo = -1;
142 trans = -1;
143 info = 0;
144
145 if (order == CblasColMajor) {
146 if (Uplo == CblasUpper) uplo = 0;
147 if (Uplo == CblasLower) uplo = 1;
148
149 if (TransA == CblasNoTrans) trans = 0;
150 if (TransA == CblasTrans) trans = 1;
151 if (TransA == CblasConjNoTrans) trans = 0;
152 if (TransA == CblasConjTrans) trans = 1;
153
154 if (Diag == CblasUnit) unit = 0;
155 if (Diag == CblasNonUnit) unit = 1;
156
157 info = -1;
158
159 if (incx == 0) info = 9;
160 if (lda < k + 1) info = 7;
161 if (k < 0) info = 5;
162 if (n < 0) info = 4;
163 if (unit < 0) info = 3;
164 if (trans < 0) info = 2;
165 if (uplo < 0) info = 1;
166 }
167
168 if (order == CblasRowMajor) {
169 if (Uplo == CblasUpper) uplo = 1;
170 if (Uplo == CblasLower) uplo = 0;
171
172 if (TransA == CblasNoTrans) trans = 1;
173 if (TransA == CblasTrans) trans = 0;
174 if (TransA == CblasConjNoTrans) trans = 1;
175 if (TransA == CblasConjTrans) trans = 0;
176
177 if (Diag == CblasUnit) unit = 0;
178 if (Diag == CblasNonUnit) unit = 1;
179
180 info = -1;
181
182 if (incx == 0) info = 9;
183 if (lda < k + 1) info = 7;
184 if (k < 0) info = 5;
185 if (n < 0) info = 4;
186 if (unit < 0) info = 3;
187 if (trans < 0) info = 2;
188 if (uplo < 0) info = 1;
189 }
190
191 if (info >= 0) {
192 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
193 return;
194 }
195
196 #endif
197
198 if (n == 0) return;
199
200 IDEBUG_START;
201
202 FUNCTION_PROFILE_START();
203
204 if (incx < 0 ) x -= (n - 1) * incx;
205
206 buffer = (FLOAT *)blas_memory_alloc(1);
207
208 #ifdef SMP
209 nthreads = num_cpu_avail(2);
210
211 if (nthreads == 1) {
212 #endif
213
214 (tbmv[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer);
215
216 #ifdef SMP
217 } else {
218
219 (tbmv_thread[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer, nthreads);
220
221 }
222 #endif
223
224 blas_memory_free(buffer);
225
226 FUNCTION_PROFILE_END(1, n * k / 2 + n, n * k);
227
228 IDEBUG_END;
229
230 return;
231 }
232