1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <stdlib.h>
21
22 #include <cltypes.h>
23 #include <clblas-internal.h>
24
25 #include "solution_seq.h"
26
27 #ifdef DUMP_CLBLAS_KERNELS
28
29 enum {
30 SRC_BUFSIZE = 512244
31 };
32
33 static void
getFuncName(char * name,BlasFunctionID funcID,DataType dtype)34 getFuncName(char *name, BlasFunctionID funcID, DataType dtype)
35 {
36 switch (funcID) {
37 case CLBLAS_GEMV:
38 strcpy(name + 1, "GEMV");
39 break;
40 case CLBLAS_SYMV:
41 strcpy(name + 1, "SYMV");
42 break;
43 case CLBLAS_GEMM:
44 strcpy(name + 1, "GEMM");
45 break;
46 case CLBLAS_TRMM:
47 strcpy(name + 1, "TRMM");
48 break;
49 case CLBLAS_TRSM:
50 strcpy(name + 1, "TRSM");
51 break;
52 case CLBLAS_SYRK:
53 strcpy(name + 1, "SYRK");
54 break;
55 case CLBLAS_SYR2K:
56 strcpy(name + 1, "SYR2K");
57 break;
58 default:
59 break;
60 }
61
62 if (dtype == TYPE_FLOAT) {
63 name[0] = 's';
64 }
65 else {
66 name[0] = dtypeToPrefix(dtype);
67 }
68 }
69
70 static void
addTranspSuffix(char * buf,clblasTranspose flag)71 addTranspSuffix(char *buf, clblasTranspose flag)
72 {
73 const char *s;
74
75 if (flag == clblasNoTrans) {
76 return;
77 }
78
79 s = (clblasTrans) ? "t" : "tc";
80 strcat(buf, s);
81 }
82
83 static void
fileNameFromSolution(char * name,BlasFunctionID funcID,const SolutionStep * step)84 fileNameFromSolution(
85 char *name,
86 BlasFunctionID funcID,
87 const SolutionStep *step)
88 {
89 const char *s;
90 const CLBlasKargs *kargs = (const CLBlasKargs*)&step->args;
91 bool isTriangFn;
92
93 isTriangFn = (funcID == CLBLAS_TRMM || funcID == CLBLAS_TRSM);
94 strcpy(name, "./");
95 name += strlen(name);
96 getFuncName(name, funcID, kargs->dtype);
97 s = (kargs->order == clblasRowMajor) ? "_row_" : "_col_";
98 strcat(name, s);
99 addTranspSuffix(name, kargs->transA);
100 if (isTriangFn) {
101 s = (kargs->uplo == clblasUpper) ? "_upper" : "_lower";
102 strcat(name, s);
103 s = (kargs->side == clblasRight) ? "_right" : "_left";
104 strcat(name, s);
105 }
106 else {
107 addTranspSuffix(name, kargs->transB);
108 }
109
110 name += strlen(name);
111 sprintf(name, "_%lu_%lu", kargs->M, kargs->N);
112 if (!isTriangFn) {
113 name += strlen(name);
114 sprintf(name, "_%lu", kargs->K);
115 }
116 strcat(name, ".kdump");
117 }
118
119 void
dumpKernel(const SolutionStep * step,CLBlasKernelType ktype)120 dumpKernel(
121 const SolutionStep *step,
122 CLBlasKernelType ktype)
123 {
124 FILE *file;
125 char tmp[1024];
126 MemoryPattern *pattern;
127 const char *s;
128 const CLBlasKargs *kargs = (const CLBlasKargs*)&step->args;
129 char *srcBuf;
130 unsigned int i;
131
132 fileNameFromSolution(tmp, step->funcID, step);
133 file = fopen((const char*)tmp, "a+");
134 pattern = &clblasSolvers[step->funcID].memPatterns[step->patternID];
135
136 // now, dump the info
137 sprintf(tmp, "offset M = %lu, offset N = %lu, offset A = %lu,"
138 "offset BX = %lu, offset CY = %lu\n",
139 kargs->offsetM, kargs->offsetN, kargs->offA, kargs->offBX,
140 kargs->offCY);
141 fputs(tmp, file);
142
143 sprintf(tmp, "Memory pattern = %s\n", pattern->name);
144 fputs(tmp, file);
145
146 s = kernelTypeString(ktype);
147 sprintf(tmp, "Kernel type = %s\n", s);
148 fputs(tmp, file);
149
150 // data parallelism granularity
151 if (step->pgran.wgDim == 1) {
152 sprintf(tmp, "work group size = %u\n", step->pgran.wgSize[0]);
153 }
154 else {
155 sprintf(tmp, "work group size = %u x %u\n", step->pgran.wgSize[0],
156 step->pgran.wgSize[1]);
157 }
158 fputs(tmp, file);
159
160 fputs("Problem granulation\n", file);
161 for (i = 0; i < pattern->nrLevels; i++) {
162 sprintf(tmp, "[%u]: ", i);
163 fputs(tmp, file);
164 sprintfGranulation(tmp, step->subdims, i);
165 fputs(tmp, file);
166 fputs("\n", file);
167 }
168
169 srcBuf = malloc(SRC_BUFSIZE);
170 if (srcBuf != NULL) {
171 clGetProgramInfo(step->kernels[ktype]->program,
172 CL_PROGRAM_SOURCE, SRC_BUFSIZE, srcBuf, NULL);
173 fputs("Kernel source:\n\n", file);
174 fputs(srcBuf, file);
175 }
176 else {
177 fputs("Kernel source: not available\n", file);
178 }
179 free(srcBuf);
180
181 fputs("--------------------------------------------------------------"
182 "------------------------------------------------------------\n",
183 file);
184
185 fclose(file);
186 }
187
188 #endif /* DUMP_CLBLAS_KERNELS */
189