1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
4 /* */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
8 /* */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
11 /* disclaimer. */
12 /* */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
17 /* */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
32 /* */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
38
39 #include <stdio.h>
40 #include <ctype.h>
41 #include "common.h"
42 #ifdef FUNCTION_PROFILE
43 #include "functable.h"
44 #endif
45
46 #ifndef COMPLEX
47 #ifdef XDOUBLE
48 #define ERROR_NAME "QSYRK "
49 #elif defined(DOUBLE)
50 #define ERROR_NAME "DSYRK "
51 #else
52 #define ERROR_NAME "SSYRK "
53 #endif
54 #else
55 #ifndef HEMM
56 #ifdef XDOUBLE
57 #define ERROR_NAME "XSYRK "
58 #elif defined(DOUBLE)
59 #define ERROR_NAME "ZSYRK "
60 #else
61 #define ERROR_NAME "CSYRK "
62 #endif
63 #else
64 #ifdef XDOUBLE
65 #define ERROR_NAME "XHERK "
66 #elif defined(DOUBLE)
67 #define ERROR_NAME "ZHERK "
68 #else
69 #define ERROR_NAME "CHERK "
70 #endif
71 #endif
72 #endif
73
74 static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
75 #ifndef HEMM
76 SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC,
77 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
78 SYRK_THREAD_UN, SYRK_THREAD_UC, SYRK_THREAD_LN, SYRK_THREAD_LC,
79 #endif
80 #else
81 HERK_UN, HERK_UC, HERK_LN, HERK_LC,
82 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
83 HERK_THREAD_UN, HERK_THREAD_UC, HERK_THREAD_LN, HERK_THREAD_LC,
84 #endif
85 #endif
86 };
87
88 #ifndef CBLAS
89
NAME(char * UPLO,char * TRANS,blasint * N,blasint * K,FLOAT * alpha,FLOAT * a,blasint * ldA,FLOAT * beta,FLOAT * c,blasint * ldC)90 void NAME(char *UPLO, char *TRANS,
91 blasint *N, blasint *K,
92 FLOAT *alpha, FLOAT *a, blasint *ldA,
93 FLOAT *beta, FLOAT *c, blasint *ldC){
94
95 char uplo_arg = *UPLO;
96 char trans_arg = *TRANS;
97
98 blas_arg_t args;
99
100 FLOAT *buffer;
101 FLOAT *sa, *sb;
102
103 #ifdef SMP
104 #ifndef COMPLEX
105 #ifdef XDOUBLE
106 int mode = BLAS_XDOUBLE | BLAS_REAL;
107 #elif defined(DOUBLE)
108 int mode = BLAS_DOUBLE | BLAS_REAL;
109 #else
110 int mode = BLAS_SINGLE | BLAS_REAL;
111 #endif
112 #else
113 #ifdef XDOUBLE
114 int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
115 #elif defined(DOUBLE)
116 int mode = BLAS_DOUBLE | BLAS_COMPLEX;
117 #else
118 int mode = BLAS_SINGLE | BLAS_COMPLEX;
119 #endif
120 #endif
121 #endif
122
123 blasint info;
124 int uplo;
125 int trans;
126 int nrowa;
127
128 PRINT_DEBUG_NAME;
129
130 args.n = *N;
131 args.k = *K;
132
133 args.a = (void *)a;
134 args.c = (void *)c;
135
136 args.lda = *ldA;
137 args.ldc = *ldC;
138
139 args.alpha = (void *)alpha;
140 args.beta = (void *)beta;
141
142 TOUPPER(uplo_arg);
143 TOUPPER(trans_arg);
144
145 uplo = -1;
146 trans = -1;
147
148 if (uplo_arg == 'U') uplo = 0;
149 if (uplo_arg == 'L') uplo = 1;
150
151 if (trans_arg == 'N') trans = 0;
152 if (trans_arg == 'T') trans = 1;
153 if (trans_arg == 'R') trans = 0;
154 if (trans_arg == 'C') trans = 1;
155
156 nrowa = args.n;
157 if (trans & 1) nrowa = args.k;
158
159 info = 0;
160
161 if (args.ldc < MAX(1,args.n)) info = 10;
162 if (args.lda < MAX(1,nrowa)) info = 7;
163 if (args.k < 0) info = 4;
164 if (args.n < 0) info = 3;
165 if (trans < 0) info = 2;
166 if (uplo < 0) info = 1;
167
168 if (info != 0) {
169 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
170 return;
171 }
172
173 #else
174
175 void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
176 blasint n, blasint k,
177 #if !defined(COMPLEX) || defined(HEMM)
178 FLOAT alpha,
179 #else
180 FLOAT *alpha,
181 #endif
182 FLOAT *a, blasint lda,
183 #if !defined(COMPLEX) || defined(HEMM)
184 FLOAT beta,
185 #else
186 FLOAT *beta,
187 #endif
188 FLOAT *c, blasint ldc) {
189
190 blas_arg_t args;
191 int uplo, trans;
192 blasint info, nrowa;
193
194 FLOAT *buffer;
195 FLOAT *sa, *sb;
196
197 #ifdef SMP
198 #ifndef COMPLEX
199 #ifdef XDOUBLE
200 int mode = BLAS_XDOUBLE | BLAS_REAL;
201 #elif defined(DOUBLE)
202 int mode = BLAS_DOUBLE | BLAS_REAL;
203 #else
204 int mode = BLAS_SINGLE | BLAS_REAL;
205 #endif
206 #else
207 #ifdef XDOUBLE
208 int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
209 #elif defined(DOUBLE)
210 int mode = BLAS_DOUBLE | BLAS_COMPLEX;
211 #else
212 int mode = BLAS_SINGLE | BLAS_COMPLEX;
213 #endif
214 #endif
215 #endif
216
217 PRINT_DEBUG_CNAME;
218
219 args.n = n;
220 args.k = k;
221
222 args.a = (void *)a;
223 args.c = (void *)c;
224
225 args.lda = lda;
226 args.ldc = ldc;
227
228 #if !defined(COMPLEX) || defined(HEMM)
229 args.alpha = (void *)α
230 args.beta = (void *)β
231 #else
232 args.alpha = (void *)alpha;
233 args.beta = (void *)beta;
234 #endif
235
236 trans = -1;
237 uplo = -1;
238 info = 0;
239
240 if (order == CblasColMajor) {
241 if (Uplo == CblasUpper) uplo = 0;
242 if (Uplo == CblasLower) uplo = 1;
243
244 if (Trans == CblasNoTrans) trans = 0;
245 #ifndef COMPLEX
246 if (Trans == CblasTrans) trans = 1;
247 if (Trans == CblasConjNoTrans) trans = 0;
248 if (Trans == CblasConjTrans) trans = 1;
249 #elif !defined(HEMM)
250 if (Trans == CblasTrans) trans = 1;
251 #else
252 if (Trans == CblasConjTrans) trans = 1;
253 #endif
254
255 info = -1;
256
257 nrowa = args.n;
258 if (trans & 1) nrowa = args.k;
259
260 if (args.ldc < MAX(1,args.n)) info = 10;
261 if (args.lda < MAX(1,nrowa)) info = 7;
262 if (args.k < 0) info = 4;
263 if (args.n < 0) info = 3;
264 if (trans < 0) info = 2;
265 if (uplo < 0) info = 1;
266 }
267
268 if (order == CblasRowMajor) {
269 if (Uplo == CblasUpper) uplo = 1;
270 if (Uplo == CblasLower) uplo = 0;
271
272 if (Trans == CblasNoTrans) trans = 1;
273 #ifndef COMPLEX
274 if (Trans == CblasTrans) trans = 0;
275 if (Trans == CblasConjNoTrans) trans = 1;
276 if (Trans == CblasConjTrans) trans = 0;
277 #elif !defined(HEMM)
278 if (Trans == CblasTrans) trans = 0;
279 #else
280 if (Trans == CblasConjTrans) trans = 0;
281 #endif
282
283 info = -1;
284
285 nrowa = args.n;
286 if (trans & 1) nrowa = args.k;
287
288 if (args.ldc < MAX(1,args.n)) info = 10;
289 if (args.lda < MAX(1,nrowa)) info = 7;
290 if (args.k < 0) info = 4;
291 if (args.n < 0) info = 3;
292 if (trans < 0) info = 2;
293 if (uplo < 0) info = 1;
294 }
295
296 if (info >= 0) {
297 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
298 return;
299 }
300
301 #endif
302
303 if (args.n == 0) return;
304
305 IDEBUG_START;
306
307 FUNCTION_PROFILE_START();
308
309 buffer = (FLOAT *)blas_memory_alloc(0);
310
311 sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
312 sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
313
314 #ifdef SMP
315 if (!trans){
316 mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T);
317 } else {
318 mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N);
319 }
320
321 mode |= (uplo << BLAS_UPLO_SHIFT);
322
323 args.common = NULL;
324 args.nthreads = num_cpu_avail(3);
325
326 if (args.nthreads == 1) {
327 #endif
328
329 (syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
330
331 #ifdef SMP
332
333 } else {
334
335 #ifndef USE_SIMPLE_THREADED_LEVEL3
336
337 (syrk[4 | (uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
338
339 #else
340
341 syrk_thread(mode, &args, NULL, NULL, syrk[(uplo << 1) | trans ], sa, sb, args.nthreads);
342
343 #endif
344
345 }
346 #endif
347
348 blas_memory_free(buffer);
349
350 FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.n * args.k + args.n * args.n / 2, args.n * args.n * args.k);
351
352 IDEBUG_END;
353
354 return;
355 }
356