1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #ifndef COMMON_H_
19 #define COMMON_H_
20
21 #if defined (_MSC_VER)
22 #define __template_static static
23 #else /* _MSC_VER */
24 #define __template_static
25 #endif /* !_MSC_VER */
26
27 #define MAX(a, b) ((a>b)? a: b)
28
29 #include <clBLAS.h>
30 #include <cmdline.h>
31 #include <math.h>
32
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36
37 typedef enum BlasRoutineID {
38 CLBLAS_GEMV,
39 CLBLAS_SYMV,
40 CLBLAS_GEMM,
41 CLBLAS_GEMM2,
42 CLBLAS_GEMM_TAIL,
43 CLBLAS_TRMM,
44 CLBLAS_TRSM,
45 CLBLAS_SYRK,
46 CLBLAS_SYR2K,
47 CLBLAS_TRMV,
48 CLBLAS_TPMV,
49 CLBLAS_TRSV,
50 CLBLAS_TRSV_GEMV, // Need a Kludge as current "gemv" don't support complex types
51 CLBLAS_SYMM,
52 CLBLAS_GER,
53 CLBLAS_SYR,
54 CLBLAS_HER,
55 CLBLAS_HER2,
56 CLBLAS_HEMM,
57 CLBLAS_HERK,
58 CLBLAS_SWAP,
59 CLBLAS_COPY,
60 CLBLAS_DOT,
61 CLBLAS_SCAL,
62 CLBLAS_AXPY,
63 CLBLAS_ROTG,
64 CLBLAS_ROTM,
65 CLBLAS_ROT,
66 CLBLAS_ROTMG,
67 CLBLAS_NRM2,
68 CLBLAS_ASUM,
69 CLBLAS_iAMAX,
70
71 /* ! Must be the last */
72 BLAS_FUNCTIONS_NUMBER
73 } BlasRoutineID;
74
75 typedef enum BlasFunction {
76 FN_SGEMV,
77 FN_DGEMV,
78 FN_CGEMV,
79 FN_ZGEMV,
80
81 FN_SSYMV,
82 FN_DSYMV,
83
84 FN_SSPMV,
85 FN_DSPMV,
86
87 FN_SGEMM,
88 FN_DGEMM,
89 FN_CGEMM,
90 FN_ZGEMM,
91
92 FN_SGEMM_2,
93 FN_DGEMM_2,
94 FN_CGEMM_2,
95 FN_ZGEMM_2,
96
97 FN_STRMM,
98 FN_DTRMM,
99 FN_CTRMM,
100 FN_ZTRMM,
101
102 FN_STRSM,
103 FN_DTRSM,
104 FN_CTRSM,
105 FN_ZTRSM,
106
107 FN_SSYR2K,
108 FN_DSYR2K,
109 FN_CSYR2K,
110 FN_ZSYR2K,
111
112 FN_SSYRK,
113 FN_DSYRK,
114 FN_CSYRK,
115 FN_ZSYRK,
116
117 FN_STRMV,
118 FN_DTRMV,
119 FN_CTRMV,
120 FN_ZTRMV,
121
122 FN_STPMV,
123 FN_DTPMV,
124 FN_CTPMV,
125 FN_ZTPMV,
126
127 FN_STRSV,
128 FN_DTRSV,
129 FN_CTRSV,
130 FN_ZTRSV,
131
132 FN_STPSV,
133 FN_DTPSV,
134 FN_CTPSV,
135 FN_ZTPSV,
136
137 FN_SSYMM,
138 FN_DSYMM,
139 FN_CSYMM,
140 FN_ZSYMM,
141
142 FN_SSYR,
143 FN_DSYR,
144
145 FN_SSPR,
146 FN_DSPR,
147
148 FN_SGER,
149 FN_DGER,
150 FN_CGERU,
151 FN_ZGERU,
152 FN_CGERC,
153 FN_ZGERC,
154
155 FN_CHER,
156 FN_ZHER,
157 FN_CHER2,
158 FN_ZHER2,
159
160 FN_CHPR,
161 FN_ZHPR,
162 FN_CHPR2,
163 FN_ZHPR2,
164
165 FN_SSYR2,
166 FN_DSYR2,
167
168 FN_SSPR2,
169 FN_DSPR2,
170
171 FN_CHEMV,
172 FN_ZHEMV,
173
174 FN_CHPMV,
175 FN_ZHPMV,
176
177 FN_CHEMM,
178 FN_ZHEMM,
179
180 FN_CHERK,
181 FN_ZHERK,
182
183 FN_SGBMV,
184 FN_DGBMV,
185 FN_CGBMV,
186 FN_ZGBMV,
187
188 FN_STBMV,
189 FN_DTBMV,
190 FN_CTBMV,
191 FN_ZTBMV,
192
193 FN_SSBMV,
194 FN_DSBMV,
195
196 FN_CHBMV,
197 FN_ZHBMV,
198
199 FN_STBSV,
200 FN_DTBSV,
201 FN_CTBSV,
202 FN_ZTBSV,
203
204 FN_CHER2K,
205 FN_ZHER2K,
206
207 FN_SCOPY,
208 FN_DCOPY,
209 FN_CCOPY,
210 FN_ZCOPY,
211
212 FN_SSWAP,
213 FN_DSWAP,
214 FN_CSWAP,
215 FN_ZSWAP,
216
217 FN_SDOT,
218 FN_DDOT,
219 FN_CDOTU,
220 FN_ZDOTU,
221 FN_CDOTC,
222 FN_ZDOTC,
223
224 FN_SSCAL,
225 FN_DSCAL,
226 FN_CSCAL,
227 FN_ZSCAL,
228 FN_CSSCAL,
229 FN_ZDSCAL,
230
231 FN_SAXPY,
232 FN_DAXPY,
233 FN_CAXPY,
234 FN_ZAXPY,
235
236 FN_SROTG,
237 FN_DROTG,
238 FN_CROTG,
239 FN_ZROTG,
240
241 FN_SROTM,
242 FN_DROTM,
243
244 FN_SROT,
245 FN_DROT,
246 FN_CSROT,
247 FN_ZDROT,
248
249 FN_SROTMG,
250 FN_DROTMG,
251
252 FN_SNRM2,
253 FN_DNRM2,
254 FN_SCNRM2,
255 FN_DZNRM2,
256
257 FN_SASUM,
258 FN_DASUM,
259 FN_SCASUM,
260 FN_DZASUM,
261
262 FN_iSAMAX,
263 FN_iDAMAX,
264 FN_iCAMAX,
265 FN_iZAMAX,
266
267 BLAS_FUNCTION_END
268 } BlasFunctionID;
269
270 cl_context
271 getQueueContext(cl_command_queue commandQueue, cl_int *error);
272
273 cl_int
274 waitForSuccessfulFinish(
275 cl_uint numCommandQueues,
276 cl_command_queue *commandQueues,
277 cl_event *events);
278
279 cl_int
280 flushAll(
281 cl_uint numCommandQueues,
282 cl_command_queue *commandQueues);
283
284 const char* orderStr(clblasOrder order);
285 const char* sideStr(clblasSide side);
286 const char* uploStr(clblasUplo uplo);
287 const char* transStr(clblasTranspose trans);
288 const char* diagStr(clblasDiag diag);
289
290 char encodeTranspose(clblasTranspose value);
291 char encodeUplo(clblasUplo value);
292 char encodeDiag(clblasDiag value);
293 char encodeSide(clblasSide value);
294
295 int functionBlasLevel(BlasFunctionID funct);
296
297 size_t trsmBlockSize(void);
298
299 #ifdef __cplusplus
300 } // extern "C"
301 #endif
302
303 #ifdef __cplusplus
304
305 template <typename T>
306 static T
convertMultiplier(ComplexLong arg)307 convertMultiplier(ComplexLong arg)
308 {
309 return static_cast<T>(arg.re);
310 }
311
312 template<>
313 __template_static FloatComplex
convertMultiplier(ComplexLong arg)314 convertMultiplier(ComplexLong arg)
315 {
316 return floatComplex(
317 static_cast<float>(arg.re), static_cast<float>(arg.imag));
318 }
319
320 template<>
321 __template_static DoubleComplex
convertMultiplier(ComplexLong arg)322 convertMultiplier(ComplexLong arg)
323 {
324 return doubleComplex(arg.re, arg.imag);
325 }
326
327 template <typename T>
returnMax(T arg)328 static cl_double returnMax(T arg)
329 {
330 return static_cast<cl_double>(fabs(arg));
331 }
332
333 template<>
334 __template_static cl_double returnMax<FloatComplex> (FloatComplex arg)
335 {
336 return static_cast<cl_double>( MAX( fabs(CREAL(arg)), fabs(CIMAG(arg)) ) );
337 }
338
339 template<>
340 __template_static cl_double returnMax<DoubleComplex> (DoubleComplex arg)
341 {
342 return static_cast<cl_double>( MAX( fabs(CREAL(arg)), fabs(CIMAG(arg)) ) );
343 }
344
345 // xGEMM
346 void
347 printTestParams(
348 clblasOrder order,
349 clblasTranspose transA,
350 clblasTranspose transB,
351 size_t M,
352 size_t N,
353 size_t K,
354 bool useAlpha,
355 ComplexLong alpha,
356 size_t offA,
357 size_t lda,
358 size_t offB,
359 size_t ldb,
360 bool useBeta,
361 ComplexLong beta,
362 size_t offC,
363 size_t ldc);
364
365 // xTRMM, xTRSM
366 void
367 printTestParams(
368 clblasOrder order,
369 clblasSide side,
370 clblasUplo uplo,
371 clblasTranspose transA,
372 clblasDiag diag,
373 size_t M,
374 size_t N,
375 bool useAlpha,
376 ComplexLong alpha,
377 size_t offA,
378 size_t lda,
379 size_t offB,
380 size_t ldb);
381
382 //xTRMV, xTRSV
383 void
384 printTestParams(
385 clblasOrder order,
386 clblasUplo uplo,
387 clblasTranspose transA,
388 clblasDiag diag,
389 size_t N,
390 size_t lda,
391 int incx,
392 size_t offa,
393 size_t offx);
394
395 //xTPMV
396 void
397 printTestParams(
398 clblasOrder order,
399 clblasUplo uplo,
400 clblasTranspose transA,
401 clblasDiag diag,
402 size_t N,
403 int incx,
404 size_t offa,
405 size_t offx);
406
407 //xSYR xHER
408 void
409 printTestParams(
410 clblasOrder order,
411 clblasUplo uplo,
412 size_t N,
413 double alpha,
414 size_t offx,
415 int incx,
416 size_t offa,
417 size_t lda);
418
419
420 //xHER2
421 void
422 printTestParams(
423 clblasOrder order,
424 clblasUplo uplo,
425 size_t N,
426 bool useAlpha,
427 cl_float2 alpha,
428 size_t offx,
429 int incx,
430 size_t offy,
431 int incy,
432 size_t offa,
433 size_t lda);
434
435 //xCOPY , xSWAP
436 void
437 printTestParams(
438 size_t N,
439 size_t offx,
440 int incx,
441 size_t offy,
442 int incy);
443
444 //xSyr2
445 void
446 printTestParams(
447 clblasOrder order,
448 clblasUplo uplo,
449 size_t N,
450 double alpha,
451 size_t offx,
452 int incx,
453 size_t offy,
454 int incy,
455 size_t offa,
456 size_t lda);
457
458 //HEMV
459 void
460 printTestParams(
461 clblasOrder order,
462 clblasUplo uplo,
463 size_t N,
464 ComplexLong alpha,
465 size_t offa,
466 size_t lda,
467 size_t offx,
468 int incx,
469 ComplexLong beta,
470 size_t offy,
471 int incy);
472
473 //xSymm,
474 void
475 printTestParams(
476 clblasOrder order,
477 clblasSide side,
478 clblasUplo uplo,
479 size_t M,
480 size_t N,
481 bool useAlpha,
482 ComplexLong alpha,
483 bool useBeta,
484 ComplexLong beta,
485 size_t lda,
486 size_t ldb,
487 size_t ldc,
488 size_t offa,
489 size_t offb,
490 size_t offc );
491
492 //xHEMM
493 void
494 printTestParams(
495 clblasOrder order,
496 clblasSide side,
497 clblasUplo uplo,
498 size_t M,
499 size_t N,
500 bool useAlpha,
501 cl_float2 alpha,
502 bool useBeta,
503 cl_float2 beta,
504 size_t lda,
505 size_t ldb,
506 size_t ldc,
507 size_t offa,
508 size_t offb,
509 size_t offc );
510
511
512 //xGER , xGERC
513
514 void
515 printTestParams(
516 clblasOrder order,
517 size_t M,
518 size_t N,
519 bool useAlpha,
520 ComplexLong alpha,
521 size_t lda,
522 int incx,
523 int incy,
524 size_t offa,
525 size_t offx,
526 size_t offy );
527
528 // xGEMV
529 void
530 printTestParams(
531 clblasOrder order,
532 clblasTranspose transA,
533 size_t M,
534 size_t N,
535 bool useAlpha,
536 ComplexLong alpha,
537 size_t offA,
538 size_t lda,
539 int incx,
540 bool useBeta,
541 ComplexLong beta,
542 int incy);
543
544 // xGBMV
545 void
546 printTestParams(
547 clblasOrder order,
548 clblasTranspose transA,
549 size_t M,
550 size_t N,
551 size_t KL,
552 size_t KU,
553 ComplexLong alpha,
554 size_t offA,
555 size_t lda,
556 size_t offx,
557 int incx,
558 ComplexLong beta,
559 size_t offy,
560 int incy);
561
562 //xHBMV/xSBMV
563
564 void
565 printTestParams(
566 clblasOrder order,
567 clblasUplo uplo,
568 size_t N,
569 size_t K,
570 ComplexLong alpha,
571 size_t offA,
572 size_t lda,
573 size_t offx,
574 int incx,
575 ComplexLong beta,
576 size_t offy,
577 int incy);
578
579
580 // xTBMV
581 void
582 printTestParams(
583 clblasOrder order,
584 clblasUplo uplo,
585 clblasTranspose transA,
586 clblasDiag diag,
587 size_t N,
588 size_t KLU,
589 size_t offA,
590 size_t lda,
591 size_t offx,
592 int incx,
593 size_t offy,
594 int incy);
595
596 // xSYMV
597 void
598 printTestParams(
599 clblasOrder order,
600 clblasUplo uplo,
601 size_t N,
602 bool useAlpha,
603 ComplexLong alpha,
604 size_t offA,
605 size_t lda,
606 int incx,
607 bool useBeta,
608 ComplexLong beta,
609 int incy);
610
611 // xSYR2K
612 void
613 printTestParams(
614 clblasOrder order,
615 clblasUplo uplo,
616 clblasTranspose transA,
617 size_t N,
618 size_t K,
619 bool useAlpha,
620 ComplexLong alpha,
621 size_t offA,
622 size_t lda,
623 size_t offB,
624 size_t ldb,
625 bool useBeta,
626 ComplexLong beta,
627 size_t offC,
628 size_t ldc);
629
630 // xSYRK
631 void
632 printTestParams(
633 clblasOrder order,
634 clblasUplo uplo,
635 clblasTranspose transA,
636 size_t N,
637 size_t K,
638 bool useAlpha,
639 ComplexLong alpha,
640 size_t offA,
641 size_t lda,
642 bool useBeta,
643 ComplexLong beta,
644 size_t offC,
645 size_t ldc);
646
647 // xSCAL
648 void
649 printTestParams(
650 size_t N,
651 ComplexLong alpha,
652 size_t offx,
653 int incx);
654
655 // xAXPY
656 void
657 printTestParams(
658 size_t N,
659 ComplexLong alpha,
660 size_t offx,
661 int incx,
662 size_t offy,
663 int incy);
664
665 // For ROT
666 void
667 printTestParams(
668 size_t N,
669 size_t offx,
670 int incx,
671 size_t offy,
672 int incy,
673 ComplexLong alpha,
674 ComplexLong beta);
675
676 // xROTG, check if other ROTs can use this too
677 void
678 printTestParams(size_t offSA, size_t offSB, size_t offC, size_t offS);
679
680 // xROTM
681 void
682 printTestParams(size_t N, size_t offx, int incx, size_t offy, int incy, size_t offParam, ComplexLong sflagParam);
683
684 //xROTMG
685 void
686 printTestParams(int offX, int offY, int offD1, int offD2, int offParam, ComplexLong sflagParam);
687
688 // xNRM2, AMAX and ASUM
689 void
690 printTestParams(
691 size_t N,
692 size_t offx,
693 int incx);
694
695 #endif // __cplusplus
696
697 #endif /* COMMON_H_ */
698