1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #ifndef COMMON_H_
19 #define COMMON_H_
20 
21 #if defined (_MSC_VER)
22 #define __template_static static
23 #else   /* _MSC_VER */
24 #define __template_static
25 #endif  /* !_MSC_VER */
26 
27 #define MAX(a, b)  ((a>b)? a: b)
28 
29 #include <clBLAS.h>
30 #include <cmdline.h>
31 #include <math.h>
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 	typedef enum BlasRoutineID {
38 		CLBLAS_GEMV,
39 		CLBLAS_SYMV,
40 		CLBLAS_GEMM,
41 		CLBLAS_GEMM2,
42 		CLBLAS_GEMM_TAIL,
43 		CLBLAS_TRMM,
44 		CLBLAS_TRSM,
45 		CLBLAS_SYRK,
46 		CLBLAS_SYR2K,
47 		CLBLAS_TRMV,
48         CLBLAS_TPMV,
49 		CLBLAS_TRSV,
50 		CLBLAS_TRSV_GEMV,	// Need a Kludge as current "gemv" don't support complex types
51 		CLBLAS_SYMM,
52 		CLBLAS_GER,
53 		CLBLAS_SYR,
54 		CLBLAS_HER,
55 		CLBLAS_HER2,
56 		CLBLAS_HEMM,
57 		CLBLAS_HERK,
58 		CLBLAS_SWAP,
59 		CLBLAS_COPY,
60 		CLBLAS_DOT,
61 		CLBLAS_SCAL,
62         CLBLAS_AXPY,
63 		CLBLAS_ROTG,
64 		CLBLAS_ROTM,
65 		CLBLAS_ROT,
66 		CLBLAS_ROTMG,
67 		CLBLAS_NRM2,
68         CLBLAS_ASUM,
69         CLBLAS_iAMAX,
70 
71 		/* ! Must be the last */
72 		BLAS_FUNCTIONS_NUMBER
73 	} BlasRoutineID;
74 
75 	typedef enum BlasFunction {
76     FN_SGEMV,
77     FN_DGEMV,
78     FN_CGEMV,
79     FN_ZGEMV,
80 
81     FN_SSYMV,
82     FN_DSYMV,
83 
84     FN_SSPMV,
85     FN_DSPMV,
86 
87     FN_SGEMM,
88     FN_DGEMM,
89     FN_CGEMM,
90     FN_ZGEMM,
91 
92     FN_SGEMM_2,
93     FN_DGEMM_2,
94     FN_CGEMM_2,
95     FN_ZGEMM_2,
96 
97     FN_STRMM,
98     FN_DTRMM,
99     FN_CTRMM,
100     FN_ZTRMM,
101 
102     FN_STRSM,
103     FN_DTRSM,
104     FN_CTRSM,
105     FN_ZTRSM,
106 
107     FN_SSYR2K,
108     FN_DSYR2K,
109     FN_CSYR2K,
110     FN_ZSYR2K,
111 
112     FN_SSYRK,
113     FN_DSYRK,
114     FN_CSYRK,
115     FN_ZSYRK,
116 
117     FN_STRMV,
118     FN_DTRMV,
119     FN_CTRMV,
120     FN_ZTRMV,
121 
122     FN_STPMV,
123     FN_DTPMV,
124     FN_CTPMV,
125     FN_ZTPMV,
126 
127     FN_STRSV,
128     FN_DTRSV,
129     FN_CTRSV,
130     FN_ZTRSV,
131 
132     FN_STPSV,
133     FN_DTPSV,
134     FN_CTPSV,
135     FN_ZTPSV,
136 
137     FN_SSYMM,
138     FN_DSYMM,
139     FN_CSYMM,
140     FN_ZSYMM,
141 
142 	FN_SSYR,
143 	FN_DSYR,
144 
145     FN_SSPR,
146 	FN_DSPR,
147 
148     FN_SGER,
149     FN_DGER,
150     FN_CGERU,
151     FN_ZGERU,
152     FN_CGERC,
153     FN_ZGERC,
154 
155     FN_CHER,
156     FN_ZHER,
157 	FN_CHER2,
158 	FN_ZHER2,
159 
160     FN_CHPR,
161     FN_ZHPR,
162 	FN_CHPR2,
163 	FN_ZHPR2,
164 
165 	FN_SSYR2,
166 	FN_DSYR2,
167 
168     FN_SSPR2,
169 	FN_DSPR2,
170 
171 	FN_CHEMV,
172 	FN_ZHEMV,
173 
174     FN_CHPMV,
175 	FN_ZHPMV,
176 
177 	FN_CHEMM,
178 	FN_ZHEMM,
179 
180 	FN_CHERK,
181 	FN_ZHERK,
182 
183 	FN_SGBMV,
184 	FN_DGBMV,
185 	FN_CGBMV,
186 	FN_ZGBMV,
187 
188 	FN_STBMV,
189 	FN_DTBMV,
190 	FN_CTBMV,
191 	FN_ZTBMV,
192 
193 	FN_SSBMV,
194 	FN_DSBMV,
195 
196 	FN_CHBMV,
197 	FN_ZHBMV,
198 
199 	FN_STBSV,
200 	FN_DTBSV,
201 	FN_CTBSV,
202 	FN_ZTBSV,
203 
204 	FN_CHER2K,
205 	FN_ZHER2K,
206 
207     FN_SCOPY,
208     FN_DCOPY,
209     FN_CCOPY,
210     FN_ZCOPY,
211 
212     FN_SSWAP,
213     FN_DSWAP,
214     FN_CSWAP,
215     FN_ZSWAP,
216 
217     FN_SDOT,
218     FN_DDOT,
219     FN_CDOTU,
220     FN_ZDOTU,
221     FN_CDOTC,
222     FN_ZDOTC,
223 
224     FN_SSCAL,
225     FN_DSCAL,
226     FN_CSCAL,
227     FN_ZSCAL,
228     FN_CSSCAL,
229     FN_ZDSCAL,
230 
231     FN_SAXPY,
232     FN_DAXPY,
233     FN_CAXPY,
234     FN_ZAXPY,
235 
236     FN_SROTG,
237     FN_DROTG,
238     FN_CROTG,
239     FN_ZROTG,
240 
241     FN_SROTM,
242     FN_DROTM,
243 
244 	FN_SROT,
245     FN_DROT,
246 	FN_CSROT,
247     FN_ZDROT,
248 
249     FN_SROTMG,
250     FN_DROTMG,
251 
252     FN_SNRM2,
253     FN_DNRM2,
254     FN_SCNRM2,
255     FN_DZNRM2,
256 
257     FN_SASUM,
258     FN_DASUM,
259     FN_SCASUM,
260     FN_DZASUM,
261 
262     FN_iSAMAX,
263     FN_iDAMAX,
264     FN_iCAMAX,
265     FN_iZAMAX,
266 
267     BLAS_FUNCTION_END
268 } BlasFunctionID;
269 
270 cl_context
271 getQueueContext(cl_command_queue commandQueue, cl_int *error);
272 
273 cl_int
274 waitForSuccessfulFinish(
275     cl_uint numCommandQueues,
276     cl_command_queue *commandQueues,
277     cl_event *events);
278 
279 cl_int
280 flushAll(
281     cl_uint numCommandQueues,
282     cl_command_queue *commandQueues);
283 
284 const char* orderStr(clblasOrder order);
285 const char* sideStr(clblasSide side);
286 const char* uploStr(clblasUplo uplo);
287 const char* transStr(clblasTranspose trans);
288 const char* diagStr(clblasDiag diag);
289 
290 char encodeTranspose(clblasTranspose value);
291 char encodeUplo(clblasUplo value);
292 char encodeDiag(clblasDiag value);
293 char encodeSide(clblasSide value);
294 
295 int functionBlasLevel(BlasFunctionID funct);
296 
297 size_t trsmBlockSize(void);
298 
299 #ifdef __cplusplus
300 }       // extern "C"
301 #endif
302 
303 #ifdef __cplusplus
304 
305 template <typename T>
306 static T
convertMultiplier(ComplexLong arg)307 convertMultiplier(ComplexLong arg)
308 {
309     return static_cast<T>(arg.re);
310 }
311 
312 template<>
313 __template_static FloatComplex
convertMultiplier(ComplexLong arg)314 convertMultiplier(ComplexLong arg)
315 {
316     return floatComplex(
317         static_cast<float>(arg.re), static_cast<float>(arg.imag));
318 }
319 
320 template<>
321 __template_static DoubleComplex
convertMultiplier(ComplexLong arg)322 convertMultiplier(ComplexLong arg)
323 {
324     return doubleComplex(arg.re, arg.imag);
325 }
326 
327 template <typename T>
returnMax(T arg)328 static cl_double returnMax(T arg)
329 {
330     return static_cast<cl_double>(fabs(arg));
331 }
332 
333  template<>
334 __template_static cl_double returnMax<FloatComplex> (FloatComplex arg)
335 {
336     return static_cast<cl_double>( MAX( fabs(CREAL(arg)), fabs(CIMAG(arg)) ) );
337 }
338 
339  template<>
340 __template_static cl_double returnMax<DoubleComplex> (DoubleComplex arg)
341 {
342     return static_cast<cl_double>( MAX( fabs(CREAL(arg)), fabs(CIMAG(arg)) ) );
343 }
344 
345 // xGEMM
346 void
347 printTestParams(
348     clblasOrder order,
349     clblasTranspose transA,
350     clblasTranspose transB,
351     size_t M,
352     size_t N,
353     size_t K,
354     bool useAlpha,
355     ComplexLong alpha,
356     size_t offA,
357     size_t lda,
358     size_t offB,
359     size_t ldb,
360     bool useBeta,
361     ComplexLong beta,
362     size_t offC,
363     size_t ldc);
364 
365 // xTRMM, xTRSM
366 void
367 printTestParams(
368     clblasOrder order,
369     clblasSide side,
370     clblasUplo uplo,
371     clblasTranspose transA,
372     clblasDiag diag,
373     size_t M,
374     size_t N,
375     bool useAlpha,
376     ComplexLong alpha,
377     size_t offA,
378     size_t lda,
379     size_t offB,
380     size_t ldb);
381 
382 //xTRMV, xTRSV
383 void
384 printTestParams(
385     clblasOrder order,
386     clblasUplo uplo,
387     clblasTranspose transA,
388     clblasDiag diag,
389     size_t N,
390     size_t lda,
391     int incx,
392     size_t offa,
393     size_t offx);
394 
395 //xTPMV
396 void
397 printTestParams(
398     clblasOrder order,
399     clblasUplo uplo,
400     clblasTranspose transA,
401     clblasDiag diag,
402     size_t N,
403     int incx,
404     size_t offa,
405     size_t offx);
406 
407 //xSYR xHER
408 void
409 printTestParams(
410     clblasOrder order,
411     clblasUplo uplo,
412     size_t N,
413     double alpha,
414     size_t offx,
415     int incx,
416     size_t offa,
417     size_t lda);
418 
419 
420 //xHER2
421 void
422 printTestParams(
423         clblasOrder order,
424         clblasUplo  uplo,
425         size_t N,
426         bool useAlpha,
427         cl_float2 alpha,
428         size_t offx,
429         int incx,
430         size_t offy,
431         int incy,
432         size_t offa,
433         size_t lda);
434 
435 //xCOPY , xSWAP
436 void
437 printTestParams(
438         size_t N,
439         size_t offx,
440         int incx,
441         size_t offy,
442         int incy);
443 
444 //xSyr2
445 void
446 printTestParams(
447 	clblasOrder order,
448 	clblasUplo  uplo,
449 	size_t N,
450 	double alpha,
451 	size_t offx,
452 	int incx,
453 	size_t offy,
454 	int incy,
455 	size_t offa,
456 	size_t lda);
457 
458 //HEMV
459 void
460 printTestParams(
461     clblasOrder order,
462     clblasUplo  uplo,
463     size_t N,
464     ComplexLong alpha,
465     size_t offa,
466     size_t lda,
467     size_t offx,
468     int incx,
469     ComplexLong beta,
470     size_t offy,
471     int incy);
472 
473 //xSymm,
474 void
475 printTestParams(
476     clblasOrder order,
477     clblasSide side,
478     clblasUplo uplo,
479     size_t M,
480     size_t N,
481     bool useAlpha,
482     ComplexLong alpha,
483     bool useBeta,
484     ComplexLong beta,
485     size_t lda,
486     size_t ldb,
487     size_t ldc,
488     size_t offa,
489     size_t offb,
490     size_t offc );
491 
492 //xHEMM
493 void
494 printTestParams(
495     clblasOrder order,
496     clblasSide side,
497     clblasUplo uplo,
498     size_t M,
499     size_t N,
500     bool useAlpha,
501     cl_float2 alpha,
502     bool useBeta,
503     cl_float2 beta,
504     size_t lda,
505     size_t ldb,
506     size_t ldc,
507     size_t offa,
508     size_t offb,
509     size_t offc );
510 
511 
512 //xGER , xGERC
513 
514 void
515 printTestParams(
516     clblasOrder order,
517     size_t M,
518     size_t N,
519     bool useAlpha,
520     ComplexLong alpha,
521     size_t lda,
522     int incx,
523     int incy,
524     size_t offa,
525     size_t offx,
526     size_t offy );
527 
528 // xGEMV
529 void
530 printTestParams(
531     clblasOrder order,
532     clblasTranspose transA,
533     size_t M,
534     size_t N,
535     bool useAlpha,
536     ComplexLong alpha,
537     size_t offA,
538     size_t lda,
539     int incx,
540     bool useBeta,
541     ComplexLong beta,
542     int incy);
543 
544 // xGBMV
545 void
546 printTestParams(
547     clblasOrder order,
548     clblasTranspose transA,
549     size_t M,
550     size_t N,
551     size_t KL,
552     size_t KU,
553     ComplexLong alpha,
554     size_t offA,
555     size_t lda,
556     size_t offx,
557     int incx,
558     ComplexLong beta,
559     size_t offy,
560     int incy);
561 
562 //xHBMV/xSBMV
563 
564 void
565 printTestParams(
566     clblasOrder order,
567     clblasUplo uplo,
568     size_t N,
569     size_t K,
570     ComplexLong alpha,
571     size_t offA,
572     size_t lda,
573     size_t offx,
574     int incx,
575     ComplexLong beta,
576     size_t offy,
577     int incy);
578 
579 
580 // xTBMV
581 void
582 printTestParams(
583     clblasOrder order,
584     clblasUplo uplo,
585     clblasTranspose transA,
586     clblasDiag diag,
587     size_t N,
588     size_t KLU,
589     size_t offA,
590     size_t lda,
591     size_t offx,
592     int incx,
593     size_t offy,
594     int incy);
595 
596 // xSYMV
597 void
598 printTestParams(
599     clblasOrder order,
600     clblasUplo uplo,
601     size_t N,
602     bool useAlpha,
603     ComplexLong alpha,
604     size_t offA,
605     size_t lda,
606     int incx,
607     bool useBeta,
608     ComplexLong beta,
609     int incy);
610 
611 // xSYR2K
612 void
613 printTestParams(
614     clblasOrder order,
615     clblasUplo uplo,
616     clblasTranspose transA,
617     size_t N,
618     size_t K,
619     bool useAlpha,
620     ComplexLong alpha,
621     size_t offA,
622     size_t lda,
623     size_t offB,
624     size_t ldb,
625     bool useBeta,
626     ComplexLong beta,
627     size_t offC,
628     size_t ldc);
629 
630 // xSYRK
631 void
632 printTestParams(
633     clblasOrder order,
634     clblasUplo uplo,
635     clblasTranspose transA,
636     size_t N,
637     size_t K,
638     bool useAlpha,
639     ComplexLong alpha,
640     size_t offA,
641     size_t lda,
642     bool useBeta,
643     ComplexLong beta,
644     size_t offC,
645     size_t ldc);
646 
647 // xSCAL
648 void
649 printTestParams(
650     size_t N,
651     ComplexLong alpha,
652     size_t offx,
653     int incx);
654 
655 // xAXPY
656 void
657 printTestParams(
658     size_t N,
659     ComplexLong alpha,
660     size_t offx,
661     int incx,
662     size_t offy,
663     int incy);
664 
665 // For ROT
666 void
667 printTestParams(
668     size_t N,
669     size_t offx,
670     int incx,
671 	size_t offy,
672 	int incy,
673 	ComplexLong alpha,
674 	ComplexLong beta);
675 
676 // xROTG, check if other ROTs can use this too
677 void
678 printTestParams(size_t offSA, size_t offSB, size_t offC, size_t offS);
679 
680 // xROTM
681 void
682 printTestParams(size_t N, size_t offx, int incx, size_t offy, int incy, size_t offParam, ComplexLong sflagParam);
683 
684 //xROTMG
685 void
686 printTestParams(int offX, int offY, int offD1, int offD2, int offParam, ComplexLong sflagParam);
687 
688 // xNRM2, AMAX and ASUM
689 void
690 printTestParams(
691     size_t N,
692     size_t offx,
693     int incx);
694 
695 #endif  // __cplusplus
696 
697 #endif  /* COMMON_H_ */
698