1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #include <stdio.h>
40 #include <ctype.h>
41 #include "common.h"
42 #ifdef FUNCTION_PROFILE
43 #include "functable.h"
44 #endif
45 
46 #ifndef COMPLEX
47 #ifdef XDOUBLE
48 #define ERROR_NAME "QSYRK "
49 #elif defined(DOUBLE)
50 #define ERROR_NAME "DSYRK "
51 #else
52 #define ERROR_NAME "SSYRK "
53 #endif
54 #else
55 #ifndef HEMM
56 #ifdef XDOUBLE
57 #define ERROR_NAME "XSYRK "
58 #elif defined(DOUBLE)
59 #define ERROR_NAME "ZSYRK "
60 #else
61 #define ERROR_NAME "CSYRK "
62 #endif
63 #else
64 #ifdef XDOUBLE
65 #define ERROR_NAME "XHERK "
66 #elif defined(DOUBLE)
67 #define ERROR_NAME "ZHERK "
68 #else
69 #define ERROR_NAME "CHERK "
70 #endif
71 #endif
72 #endif
73 
74 static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
75 #ifndef HEMM
76   SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC,
77 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
78   SYRK_THREAD_UN, SYRK_THREAD_UC, SYRK_THREAD_LN, SYRK_THREAD_LC,
79 #endif
80 #else
81   HERK_UN, HERK_UC, HERK_LN, HERK_LC,
82 #if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
83   HERK_THREAD_UN, HERK_THREAD_UC, HERK_THREAD_LN, HERK_THREAD_LC,
84 #endif
85 #endif
86 };
87 
88 #ifndef CBLAS
89 
NAME(char * UPLO,char * TRANS,blasint * N,blasint * K,FLOAT * alpha,FLOAT * a,blasint * ldA,FLOAT * beta,FLOAT * c,blasint * ldC)90 void NAME(char *UPLO, char *TRANS,
91          blasint *N, blasint *K,
92          FLOAT *alpha, FLOAT *a, blasint *ldA,
93          FLOAT *beta,  FLOAT *c, blasint *ldC){
94 
95   char uplo_arg  = *UPLO;
96   char trans_arg = *TRANS;
97 
98   blas_arg_t args;
99 
100   FLOAT *buffer;
101   FLOAT *sa, *sb;
102 
103 #ifdef SMP
104 #ifndef COMPLEX
105 #ifdef XDOUBLE
106   int mode  =  BLAS_XDOUBLE | BLAS_REAL;
107 #elif defined(DOUBLE)
108   int mode  =  BLAS_DOUBLE  | BLAS_REAL;
109 #else
110   int mode  =  BLAS_SINGLE  | BLAS_REAL;
111 #endif
112 #else
113 #ifdef XDOUBLE
114   int mode  =  BLAS_XDOUBLE | BLAS_COMPLEX;
115 #elif defined(DOUBLE)
116   int mode  =  BLAS_DOUBLE  | BLAS_COMPLEX;
117 #else
118   int mode  =  BLAS_SINGLE  | BLAS_COMPLEX;
119 #endif
120 #endif
121 #endif
122 
123   blasint info;
124   int uplo;
125   int trans;
126   int nrowa;
127 
128   PRINT_DEBUG_NAME;
129 
130   args.n = *N;
131   args.k = *K;
132 
133   args.a = (void *)a;
134   args.c = (void *)c;
135 
136   args.lda = *ldA;
137   args.ldc = *ldC;
138 
139   args.alpha = (void *)alpha;
140   args.beta  = (void *)beta;
141 
142   TOUPPER(uplo_arg);
143   TOUPPER(trans_arg);
144 
145   uplo  = -1;
146   trans = -1;
147 
148   if (uplo_arg  == 'U') uplo  = 0;
149   if (uplo_arg  == 'L') uplo  = 1;
150 
151   if (trans_arg == 'N') trans = 0;
152   if (trans_arg == 'T') trans = 1;
153   if (trans_arg == 'R') trans = 0;
154   if (trans_arg == 'C') trans = 1;
155 
156   nrowa = args.n;
157   if (trans & 1) nrowa = args.k;
158 
159   info = 0;
160 
161   if (args.ldc < MAX(1,args.n)) info = 10;
162   if (args.lda < MAX(1,nrowa))  info =  7;
163   if (args.k < 0)               info =  4;
164   if (args.n < 0)               info =  3;
165   if (trans < 0)                info =  2;
166   if (uplo  < 0)                info =  1;
167 
168   if (info != 0) {
169     BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
170     return;
171   }
172 
173 #else
174 
175 void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
176 	   blasint n, blasint k,
177 #if !defined(COMPLEX) || defined(HEMM)
178 	   FLOAT alpha,
179 #else
180 	   FLOAT *alpha,
181 #endif
182 	   FLOAT *a, blasint lda,
183 #if !defined(COMPLEX) || defined(HEMM)
184 	   FLOAT beta,
185 #else
186 	   FLOAT *beta,
187 #endif
188 	   FLOAT *c, blasint ldc) {
189 
190   blas_arg_t args;
191   int uplo, trans;
192   blasint info, nrowa;
193 
194   FLOAT *buffer;
195   FLOAT *sa, *sb;
196 
197 #ifdef SMP
198 #ifndef COMPLEX
199 #ifdef XDOUBLE
200   int mode  =  BLAS_XDOUBLE | BLAS_REAL;
201 #elif defined(DOUBLE)
202   int mode  =  BLAS_DOUBLE  | BLAS_REAL;
203 #else
204   int mode  =  BLAS_SINGLE  | BLAS_REAL;
205 #endif
206 #else
207 #ifdef XDOUBLE
208   int mode  =  BLAS_XDOUBLE | BLAS_COMPLEX;
209 #elif defined(DOUBLE)
210   int mode  =  BLAS_DOUBLE  | BLAS_COMPLEX;
211 #else
212   int mode  =  BLAS_SINGLE  | BLAS_COMPLEX;
213 #endif
214 #endif
215 #endif
216 
217   PRINT_DEBUG_CNAME;
218 
219   args.n = n;
220   args.k = k;
221 
222   args.a = (void *)a;
223   args.c = (void *)c;
224 
225   args.lda = lda;
226   args.ldc = ldc;
227 
228 #if !defined(COMPLEX) || defined(HEMM)
229   args.alpha = (void *)&alpha;
230   args.beta  = (void *)&beta;
231 #else
232   args.alpha = (void *)alpha;
233   args.beta  = (void *)beta;
234 #endif
235 
236   trans = -1;
237   uplo  = -1;
238   info  =  0;
239 
240   if (order == CblasColMajor) {
241     if (Uplo == CblasUpper) uplo  = 0;
242     if (Uplo == CblasLower) uplo  = 1;
243 
244     if (Trans == CblasNoTrans)     trans = 0;
245 #ifndef COMPLEX
246     if (Trans == CblasTrans)       trans = 1;
247     if (Trans == CblasConjNoTrans) trans = 0;
248     if (Trans == CblasConjTrans)   trans = 1;
249 #elif !defined(HEMM)
250     if (Trans == CblasTrans)       trans = 1;
251 #else
252     if (Trans == CblasConjTrans)   trans = 1;
253 #endif
254 
255     info = -1;
256 
257     nrowa = args.n;
258     if (trans & 1) nrowa = args.k;
259 
260     if (args.ldc < MAX(1,args.n)) info = 10;
261     if (args.lda < MAX(1,nrowa))  info =  7;
262     if (args.k < 0)               info =  4;
263     if (args.n < 0)               info =  3;
264     if (trans < 0)                info =  2;
265     if (uplo  < 0)                info =  1;
266   }
267 
268   if (order == CblasRowMajor) {
269     if (Uplo == CblasUpper) uplo  = 1;
270     if (Uplo == CblasLower) uplo  = 0;
271 
272     if (Trans == CblasNoTrans)     trans = 1;
273 #ifndef COMPLEX
274     if (Trans == CblasTrans)       trans = 0;
275     if (Trans == CblasConjNoTrans) trans = 1;
276     if (Trans == CblasConjTrans)   trans = 0;
277 #elif !defined(HEMM)
278     if (Trans == CblasTrans)       trans = 0;
279 #else
280     if (Trans == CblasConjTrans)   trans = 0;
281 #endif
282 
283     info = -1;
284 
285     nrowa = args.n;
286     if (trans & 1) nrowa = args.k;
287 
288     if (args.ldc < MAX(1,args.n)) info = 10;
289     if (args.lda < MAX(1,nrowa))  info =  7;
290     if (args.k < 0)               info =  4;
291     if (args.n < 0)               info =  3;
292     if (trans < 0)                info =  2;
293     if (uplo  < 0)                info =  1;
294   }
295 
296   if (info >= 0) {
297     BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
298     return;
299   }
300 
301 #endif
302 
303   if (args.n == 0) return;
304 
305   IDEBUG_START;
306 
307   FUNCTION_PROFILE_START();
308 
309   buffer = (FLOAT *)blas_memory_alloc(0);
310 
311   sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
312   sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
313 
314 #ifdef SMP
315   if (!trans){
316     mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T);
317   } else {
318     mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N);
319   }
320 
321   mode |= (uplo  << BLAS_UPLO_SHIFT);
322 
323   args.common = NULL;
324   args.nthreads = num_cpu_avail(3);
325 
326   if (args.nthreads == 1) {
327 #endif
328 
329     (syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
330 
331 #ifdef SMP
332 
333   } else {
334 
335 #ifndef USE_SIMPLE_THREADED_LEVEL3
336 
337     (syrk[4 | (uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
338 
339 #else
340 
341     syrk_thread(mode, &args, NULL, NULL, syrk[(uplo << 1) | trans ], sa, sb, args.nthreads);
342 
343 #endif
344 
345   }
346 #endif
347 
348  blas_memory_free(buffer);
349 
350   FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.n * args.k + args.n * args.n / 2, args.n * args.n * args.k);
351 
352   IDEBUG_END;
353 
354   return;
355 }
356