1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <stdlib.h>
21 
22 #include <cltypes.h>
23 #include <clblas-internal.h>
24 
25 #include "solution_seq.h"
26 
27 #ifdef DUMP_CLBLAS_KERNELS
28 
29 enum {
30     SRC_BUFSIZE = 512244
31 };
32 
33 static void
getFuncName(char * name,BlasFunctionID funcID,DataType dtype)34 getFuncName(char *name, BlasFunctionID funcID, DataType dtype)
35 {
36     switch (funcID) {
37     case CLBLAS_GEMV:
38         strcpy(name + 1, "GEMV");
39         break;
40     case CLBLAS_SYMV:
41         strcpy(name + 1, "SYMV");
42         break;
43     case CLBLAS_GEMM:
44         strcpy(name + 1, "GEMM");
45         break;
46     case CLBLAS_TRMM:
47         strcpy(name + 1, "TRMM");
48         break;
49     case CLBLAS_TRSM:
50         strcpy(name + 1, "TRSM");
51         break;
52     case CLBLAS_SYRK:
53         strcpy(name + 1, "SYRK");
54         break;
55     case CLBLAS_SYR2K:
56         strcpy(name + 1, "SYR2K");
57         break;
58     default:
59         break;
60     }
61 
62     if (dtype == TYPE_FLOAT) {
63         name[0] = 's';
64     }
65     else {
66         name[0] = dtypeToPrefix(dtype);
67     }
68 }
69 
70 static void
addTranspSuffix(char * buf,clblasTranspose flag)71 addTranspSuffix(char *buf, clblasTranspose flag)
72 {
73     const char *s;
74 
75     if (flag == clblasNoTrans) {
76         return;
77     }
78 
79     s = (clblasTrans) ? "t" : "tc";
80     strcat(buf, s);
81 }
82 
83 static void
fileNameFromSolution(char * name,BlasFunctionID funcID,const SolutionStep * step)84 fileNameFromSolution(
85     char *name,
86     BlasFunctionID funcID,
87     const SolutionStep *step)
88 {
89     const char *s;
90     const CLBlasKargs *kargs = (const CLBlasKargs*)&step->args;
91     bool isTriangFn;
92 
93     isTriangFn = (funcID == CLBLAS_TRMM || funcID == CLBLAS_TRSM);
94     strcpy(name, "./");
95     name += strlen(name);
96     getFuncName(name, funcID, kargs->dtype);
97     s = (kargs->order == clblasRowMajor) ? "_row_" : "_col_";
98     strcat(name, s);
99     addTranspSuffix(name, kargs->transA);
100     if (isTriangFn) {
101         s = (kargs->uplo == clblasUpper) ? "_upper" : "_lower";
102         strcat(name, s);
103         s = (kargs->side == clblasRight) ? "_right" : "_left";
104         strcat(name, s);
105     }
106     else {
107         addTranspSuffix(name, kargs->transB);
108     }
109 
110     name += strlen(name);
111     sprintf(name, "_%lu_%lu", kargs->M, kargs->N);
112     if (!isTriangFn) {
113         name += strlen(name);
114         sprintf(name, "_%lu", kargs->K);
115     }
116     strcat(name, ".kdump");
117 }
118 
119 void
dumpKernel(const SolutionStep * step,CLBlasKernelType ktype)120 dumpKernel(
121     const SolutionStep *step,
122     CLBlasKernelType ktype)
123 {
124     FILE *file;
125     char tmp[1024];
126     MemoryPattern *pattern;
127     const char *s;
128     const CLBlasKargs *kargs = (const CLBlasKargs*)&step->args;
129     char *srcBuf;
130     unsigned int i;
131 
132     fileNameFromSolution(tmp, step->funcID, step);
133     file = fopen((const char*)tmp, "a+");
134     pattern = &clblasSolvers[step->funcID].memPatterns[step->patternID];
135 
136     // now, dump the info
137     sprintf(tmp, "offset M = %lu, offset N = %lu, offset A = %lu,"
138                  "offset BX = %lu, offset CY = %lu\n",
139             kargs->offsetM, kargs->offsetN, kargs->offA, kargs->offBX,
140             kargs->offCY);
141     fputs(tmp, file);
142 
143     sprintf(tmp, "Memory pattern = %s\n", pattern->name);
144     fputs(tmp, file);
145 
146     s = kernelTypeString(ktype);
147     sprintf(tmp, "Kernel type = %s\n", s);
148     fputs(tmp, file);
149 
150     // data parallelism granularity
151     if (step->pgran.wgDim == 1) {
152         sprintf(tmp, "work group size = %u\n", step->pgran.wgSize[0]);
153     }
154     else {
155         sprintf(tmp, "work group size = %u x %u\n", step->pgran.wgSize[0],
156                 step->pgran.wgSize[1]);
157     }
158     fputs(tmp, file);
159 
160     fputs("Problem granulation\n", file);
161     for (i = 0; i < pattern->nrLevels; i++) {
162         sprintf(tmp, "[%u]: ", i);
163         fputs(tmp, file);
164         sprintfGranulation(tmp, step->subdims, i);
165         fputs(tmp, file);
166         fputs("\n", file);
167     }
168 
169     srcBuf = malloc(SRC_BUFSIZE);
170     if (srcBuf != NULL) {
171         clGetProgramInfo(step->kernels[ktype]->program,
172                          CL_PROGRAM_SOURCE, SRC_BUFSIZE, srcBuf, NULL);
173         fputs("Kernel source:\n\n", file);
174         fputs(srcBuf, file);
175     }
176     else {
177         fputs("Kernel source: not available\n", file);
178     }
179     free(srcBuf);
180 
181     fputs("--------------------------------------------------------------"
182           "------------------------------------------------------------\n",
183           file);
184 
185     fclose(file);
186 }
187 
188 #endif      /* DUMP_CLBLAS_KERNELS */
189