1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #include <clBLAS.h>
19 #include <toolslib.h>
20 #include <kern_cache.h>
21 #include <clBLAS.version.h>
22 #include <trace_malloc.h>
23
24 #include "clblas-internal.h"
25 #include <events.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #ifdef BUILDING_CLBLAS
29 #include "AutoGemmTeardown.h"
30 #include "UserGemmClKernels.h"
31 #endif
32
33 clblasStatus
clblasGetVersion(cl_uint * major,cl_uint * minor,cl_uint * patch)34 clblasGetVersion(cl_uint* major, cl_uint* minor, cl_uint* patch)
35 {
36 *major = clblasVersionMajor;
37 *minor = clblasVersionMinor;
38 *patch = clblasVersionPatch;
39
40 return clblasSuccess;
41 }
42
43 clblasStatus
clblasSetup(void)44 clblasSetup(void)
45 {
46 solver_id_t sidsNum;
47 char* tmp = NULL;
48
49 // Made the cache unlimited by default
50 size_t kCacheLimit = 0;
51
52 if (clblasInitialized) {
53 return clblasSuccess;
54 }
55
56 // printf("\n%s, line %d\n", __func__, __LINE__);
57 initMallocTrace();
58
59
60 clblasInitBinaryCache();
61
62 clblasSolvers[CLBLAS_GEMM].nrPatterns =
63 initGemmMemPatterns(clblasSolvers[CLBLAS_GEMM].memPatterns);
64 clblasSolvers[CLBLAS_GEMM].defaultPattern = -1;
65
66 clblasSolvers[CLBLAS_TRMM].nrPatterns =
67 initTrmmMemPatterns(clblasSolvers[CLBLAS_TRMM].memPatterns);
68 clblasSolvers[CLBLAS_TRMM].defaultPattern = -1;
69
70 clblasSolvers[CLBLAS_TRSM].nrPatterns =
71 initTrsmMemPatterns(clblasSolvers[CLBLAS_TRSM].memPatterns);
72 clblasSolvers[CLBLAS_TRSM].defaultPattern = -1;
73
74 clblasSolvers[CLBLAS_GEMV].nrPatterns =
75 initGemvMemPatterns(clblasSolvers[CLBLAS_GEMV].memPatterns);
76 clblasSolvers[CLBLAS_GEMV].defaultPattern = -1;
77
78 clblasSolvers[CLBLAS_SYMV].nrPatterns =
79 initSymvMemPatterns(clblasSolvers[CLBLAS_SYMV].memPatterns);
80 clblasSolvers[CLBLAS_SYMV].defaultPattern = -1;
81
82 clblasSolvers[CLBLAS_SYR2K].nrPatterns =
83 initSyr2kMemPatterns(clblasSolvers[CLBLAS_SYR2K].memPatterns);
84 clblasSolvers[CLBLAS_SYR2K].defaultPattern = -1;
85
86 clblasSolvers[CLBLAS_SYRK].nrPatterns =
87 initSyrkMemPatterns(clblasSolvers[CLBLAS_SYRK].memPatterns);
88 clblasSolvers[CLBLAS_SYRK].defaultPattern = -1;
89
90 clblasSolvers[CLBLAS_TRMV].nrPatterns =
91 initTrmvMemPatterns(clblasSolvers[CLBLAS_TRMV].memPatterns);
92 clblasSolvers[CLBLAS_TRMV].defaultPattern = -1;
93
94 // HEMV uses the same memory pattern as TRMV.
95 clblasSolvers[CLBLAS_HEMV].nrPatterns =
96 initTrmvMemPatterns(clblasSolvers[CLBLAS_HEMV].memPatterns);
97 clblasSolvers[CLBLAS_HEMV].defaultPattern = -1;
98
99 clblasSolvers[CLBLAS_TRSV].nrPatterns =
100 initTrsvMemPatterns(clblasSolvers[CLBLAS_TRSV].memPatterns);
101 clblasSolvers[CLBLAS_TRSV].defaultPattern = -1;
102
103 clblasSolvers[CLBLAS_TRSV_GEMV].nrPatterns =
104 initTrsvGemvMemPatterns(clblasSolvers[CLBLAS_TRSV_GEMV].memPatterns);
105 clblasSolvers[CLBLAS_TRSV_GEMV].defaultPattern = -1;
106
107 clblasSolvers[CLBLAS_SYMM].nrPatterns =
108 initSymmMemPatterns(clblasSolvers[CLBLAS_SYMM].memPatterns);
109 clblasSolvers[CLBLAS_SYMM].defaultPattern = -1;
110
111 clblasSolvers[CLBLAS_GEMM2].nrPatterns =
112 initGemmV2MemPatterns(clblasSolvers[CLBLAS_GEMM2].memPatterns);
113 clblasSolvers[CLBLAS_GEMM2].defaultPattern = -1;
114
115 clblasSolvers[CLBLAS_GEMM_TAIL].nrPatterns =
116 initGemmV2TailMemPatterns(clblasSolvers[CLBLAS_GEMM_TAIL].memPatterns);
117 clblasSolvers[CLBLAS_GEMM_TAIL].defaultPattern = -1;
118
119 clblasSolvers[CLBLAS_SYR].nrPatterns =
120 initSyrMemPatterns(clblasSolvers[CLBLAS_SYR].memPatterns);
121 clblasSolvers[CLBLAS_SYR].defaultPattern = -1;
122
123 clblasSolvers[CLBLAS_SYR2].nrPatterns =
124 initSyr2MemPatterns(clblasSolvers[CLBLAS_SYR2].memPatterns);
125 clblasSolvers[CLBLAS_SYR2].defaultPattern = -1;
126
127 clblasSolvers[CLBLAS_GER].nrPatterns =
128 initGerMemPatterns(clblasSolvers[CLBLAS_GER].memPatterns);
129 clblasSolvers[CLBLAS_GER].defaultPattern = -1;
130
131 clblasSolvers[CLBLAS_HER].nrPatterns =
132 initHerMemPatterns(clblasSolvers[CLBLAS_HER].memPatterns);
133 clblasSolvers[CLBLAS_HER].defaultPattern = -1;
134
135 clblasSolvers[CLBLAS_HER2].nrPatterns =
136 initHer2MemPatterns(clblasSolvers[CLBLAS_HER2].memPatterns);
137 clblasSolvers[CLBLAS_HER2].defaultPattern = -1;
138
139 clblasSolvers[CLBLAS_GBMV].nrPatterns =
140 initGbmvMemPatterns(clblasSolvers[CLBLAS_GBMV].memPatterns);
141 clblasSolvers[CLBLAS_GBMV].defaultPattern = -1;
142
143 clblasSolvers[CLBLAS_SWAP].nrPatterns =
144 initSwapMemPatterns(clblasSolvers[CLBLAS_SWAP].memPatterns);
145 clblasSolvers[CLBLAS_SWAP].defaultPattern = -1;
146
147 clblasSolvers[CLBLAS_SCAL].nrPatterns =
148 initScalMemPatterns(clblasSolvers[CLBLAS_SCAL].memPatterns);
149 clblasSolvers[CLBLAS_SCAL].defaultPattern = -1;
150
151 clblasSolvers[CLBLAS_COPY].nrPatterns =
152 initCopyMemPatterns(clblasSolvers[CLBLAS_COPY].memPatterns);
153 clblasSolvers[CLBLAS_COPY].defaultPattern = -1;
154
155 clblasSolvers[CLBLAS_AXPY].nrPatterns =
156 initAxpyMemPatterns(clblasSolvers[CLBLAS_AXPY].memPatterns);
157 clblasSolvers[CLBLAS_AXPY].defaultPattern = -1;
158
159 clblasSolvers[CLBLAS_DOT].nrPatterns =
160 initDotMemPatterns(clblasSolvers[CLBLAS_DOT].memPatterns);
161 clblasSolvers[CLBLAS_DOT].defaultPattern = -1;
162
163 clblasSolvers[CLBLAS_REDUCTION_EPILOGUE].nrPatterns =
164 initReductionMemPatterns(clblasSolvers[CLBLAS_REDUCTION_EPILOGUE].memPatterns);
165 clblasSolvers[CLBLAS_REDUCTION_EPILOGUE].defaultPattern = -1;
166
167 clblasSolvers[CLBLAS_ROTG].nrPatterns =
168 initRotgMemPatterns(clblasSolvers[CLBLAS_ROTG].memPatterns);
169 clblasSolvers[CLBLAS_ROTG].defaultPattern = -1;
170
171 clblasSolvers[CLBLAS_ROTMG].nrPatterns =
172 initRotmgMemPatterns(clblasSolvers[CLBLAS_ROTMG].memPatterns);
173 clblasSolvers[CLBLAS_ROTMG].defaultPattern = -1;
174
175 clblasSolvers[CLBLAS_ROTM].nrPatterns =
176 initRotmMemPatterns(clblasSolvers[CLBLAS_ROTM].memPatterns);
177 clblasSolvers[CLBLAS_ROTM].defaultPattern = -1;
178
179 clblasSolvers[CLBLAS_iAMAX].nrPatterns =
180 initiAmaxMemPatterns(clblasSolvers[CLBLAS_iAMAX].memPatterns);
181 clblasSolvers[CLBLAS_iAMAX].defaultPattern = -1;
182
183 clblasSolvers[CLBLAS_NRM2].nrPatterns =
184 initNrm2MemPatterns(clblasSolvers[CLBLAS_NRM2].memPatterns);
185 clblasSolvers[CLBLAS_NRM2].defaultPattern = -1;
186
187 clblasSolvers[CLBLAS_ASUM].nrPatterns =
188 initAsumMemPatterns(clblasSolvers[CLBLAS_ASUM].memPatterns);
189 clblasSolvers[CLBLAS_ASUM].defaultPattern = -1;
190
191 sidsNum = makeSolverID(BLAS_FUNCTIONS_NUMBER, 0);
192
193 // Read environmental variable to limit or disable ( 0 ) the size of the kernel cache in memory
194 tmp = getenv( "AMD_CLBLAS_KCACHE_LIMIT_MB" );
195 if( tmp != NULL )
196 {
197 kCacheLimit = atol( tmp );
198 #if defined( _WIN32 )
199 printf( "Kernel Cache limit: %Iu MB\n", kCacheLimit );
200 #else
201 printf( "Kernel Cache limit: %zu MB\n", kCacheLimit );
202 #endif
203 kCacheLimit *= (1024 * 1024);
204 }
205
206 if (kCacheLimit || (tmp == NULL)) {
207 clblasKernelCache = createKernelCache(sidsNum, kCacheLimit);
208 if (clblasKernelCache == NULL) {
209 return clblasOutOfHostMemory;
210 }
211 }
212 if (initSCImages()) {
213 destroyKernelCache(clblasKernelCache);
214 return clblasOutOfHostMemory;
215 }
216
217 decomposeEventsSetup();
218
219 initStorageCache();
220
221 clblasInitialized = 1;
222 return clblasSuccess;
223 }
224
225 // TO BE FIXED: is really a uggly hack.
226 // The tune tool and some tests are linked with
227 // only a subset of clBLAS that does not contain
228 // the functor related codes.
229 //
230 //void (* _cleanFunctorCachesHook)(void) = 0 ;
231
232 void
clblasTeardown(void)233 clblasTeardown(void)
234 {
235 if (!clblasInitialized) {
236 return;
237 }
238
239 printMallocStatistics();
240
241 if (clblasKernelCache != NULL) {
242 printKernelCacheSize(clblasKernelCache);
243 destroyKernelCache(clblasKernelCache);
244 clblasKernelCache = NULL;
245 }
246 releaseSCImages();
247 decomposeEventsTeardown();
248
249 // win32 - crashes
250 destroyStorageCache();
251
252 cleanFunctorCaches() ;
253
254 printMemLeaksInfo();
255 releaseMallocTrace();
256
257 #ifdef BUILDING_CLBLAS
258 initUserGemmClKernels();
259 initAutoGemmClKernels();
260 #endif
261
262 clblasInitialized = 0;
263 }
264