1 #include <stdio.h>
2 #include <string.h>
3 #include <clBLAS.h>
4
5 #include <devinfo.h>
6 #include "clblas-internal.h"
7 #include "solution_seq.h"
8
9 #include <functor.h>
10 #include <binary_lookup.h>
11 #include <iostream>
12
13 #include <functor_xgemm.h>
14 #include <tahiti.h>
15 #include <hawaii.h>
16 #include <gcn_sgemm.h>
17
18 #include "BinaryBuild.h"
19
20 //for the moment only managing source code and cl binary
21
22 #if BUILD_KERNEL_FROM_STRING
23 #include "sgemm_gcn.clT"
24 #else
25
26 #include "sgemm_gcn.clHawaii_64.bin.clT"
27 #include "sgemm_gcn.clBonaire_64.bin.clT"
28
29 #include "sgemm_gcn.clTahiti_64.bin.clT"
30 #endif
31
32
33 //
34 // The name of the 'const char *' providing the kernel OpenCL source
35 //
36 // dgemm_TATB_DIVN_DIVM_DIVK_BS0xBS1_NV0xNV1
37 //
38 // For instance, DGEMM_SRC_NAME(N,T,32,64,8,8,8,4,8) is dgemm_NT_32_64_8_8x8_4x8
39 //
40 #define SGEMM_SRC_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) sgemm_##TA##TB##_##DIVN##_##DIVM##_##DIVK##_##BS0##x##BS1##_##NV0##x##NV1##MULT
41 #define SGEMM_SRC_NAME_TAHITI(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,MULT) sgemm_##TA##TB##_##DIVN##_##DIVM##_##DIVK##_##BS0##x##BS1##_##NV0##x##NV1##MULT##_##BITS##_bin_Tahiti
42 #define SGEMM_SRC_NAME_HAWAII(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,MULT) sgemm_##TA##TB##_##DIVN##_##DIVM##_##DIVK##_##BS0##x##BS1##_##NV0##x##NV1##MULT##_##BITS##_bin_Hawaii
43 #define SGEMM_SRC_NAME_BONAIRE(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,MULT) sgemm_##TA##TB##_##DIVN##_##DIVM##_##DIVK##_##BS0##x##BS1##_##NV0##x##NV1##MULT##_##BITS##_bin_Bonaire
44
45 //
46 // The name of the 'const char []' global variable that contain the SPIR data.
47 // That name is similar to the one produced by DGEMM_SRC_NAME but suffixed by _spir
48 //
49 #define SGEMM_SPIR_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) sgemm_##TA##TB##_##DIVN##_##DIVM##_##DIVK##_##BS0##x##BS1##_##NV0##x##NV1_spir
50
51 //
52 // The name of the 'const char []' global variable that contain the CL binaries data.
53 // That name is similar to the one produced by DGEMM_SRC_NAME but suffixed by _bin
54 //
55
56
57 // The name of the kernel itself.
58 // This is basically the name returned by DGEMM_SRC_NAME but as string
59 //
60 #define SGEMM_KERNEL_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) "sgemm_" #TA #TB "_" #DIVN "_" #DIVM "_" #DIVK "_" #BS0 "x" #BS1 "_" #NV0 "x" #NV1 #MULT
61
62 //
63 // Helpers to transform N and T in proper clblas values for the macros above
64 //
65 #define trans_N clblasNoTrans
66 #define trans_T clblasTrans
67
68
69 // Fill a variant descriptor using OpenCL source
70 #define SGEMM_VARIANT_SRC(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) { \
71 SGEMM_KERNEL_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) , \
72 SGEMM_SRC_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) , \
73 NULL, NULL, 0, \
74 trans_##TA, trans_##TB, \
75 DIVN,DIVM,DIVK, \
76 { BS0, BS1 } , \
77 { NV0, NV1 } \
78 }
79
80 // Fill a variant descriptor using SPIR
81 #define SGEMM_VARIANT_SPIR(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) { \
82 SGEMM_KERNEL_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1) , \
83 NULL , "-x spir -spir-std=1.2" \
84 SGEMM_SPIR_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1), \
85 sizeof(SGEMM_SPIR_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1)), \
86 trans_##TA,trans_##TB, \
87 DIVN,DIVM,DIVK, \
88 { BS0, BS1 } , \
89 { NV0, NV1 } \
90 }
91
92 // Fill a variant descriptor using CL Binaries
93 #define SGEMM_VARIANT_BIN_CL1(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,DEVICE,MULT) { \
94 SGEMM_KERNEL_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) , \
95 NULL , NULL, \
96 SGEMM_SRC_NAME##_##DEVICE(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,MULT), \
97 sizeof(SGEMM_SRC_NAME##_##DEVICE(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,MULT)), \
98 trans_##TA,trans_##TB, \
99 DIVN,DIVM,DIVK, \
100 { BS0, BS1 } , \
101 { NV0, NV1 } \
102 }
103
104
105 #define SGEMM_VARIANT_BIN_CL2(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,DEVICE,MULT) { \
106 SGEMM_KERNEL_NAME(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,MULT) , \
107 NULL , "-cl-std=CL2.0", \
108 SGEMM_SRC_NAME##_##DEVICE(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,MULT), \
109 sizeof(SGEMM_SRC_NAME##_##DEVICE(TA,TB,DIVN,DIVM,DIVK,BS0,BS1,NV0,NV1,BITS,MULT)), \
110 trans_##TA,trans_##TB, \
111 DIVN,DIVM,DIVK, \
112 { BS0, BS1 } , \
113 { NV0, NV1 } \
114 }
115
116 // Make it 1 to enable additional debug 'print'
117 #define VERB 0
118
119 // Just because the full name is too long
120 typedef clblasSgemmFunctorGCN::Variant Variant ;
121
122 //
123 // The static cache used to store all instances of clblasSgemmFunctorGCN
124 //
125 typedef clblasFunctorCache<clblasSgemmFunctorGCN,const Variant *> Cache ;
126 static Cache cache ;
127
128
129 // return true iff a kernel variant is applicable to the specified args
applicable(const Variant & var,clblasSgemmFunctor::Args & args)130 static bool applicable( const Variant & var, clblasSgemmFunctor::Args & args )
131 {
132 #if 0
133 // Transpose values are tested in select_variant
134 if ( args.transA != var.transA ) return false ;
135 if ( args.transB != var.transB ) return false ;
136 #endif
137 if ( args.N % var.divN != 0 ) return false ;
138 if ( args.M % var.divM != 0 ) return false ;
139 if ( args.K % var.divK != 0 ) return false ;
140 if ( args.beta==0 && var.mult.compare("__ALPHA")!=0)
141 return false ;
142
143 return true ;
144 }
145
146 //
147 // The goal of this function is to return the Variant to be used
148 // for the DGEMM specified by 'args'.
149 //
150 // The variants are typically tested sequentially from the more
151 // specific to the more generic. Additional conditions can be
152 // placed into the surrounding 'if' (typically that would be
153 // to perform additional tests on M, N and K).
154 //
155 //
156
select_variant(clblasSgemmFunctor::Args & args,const char * DevName,cl_uint _64BitsUse)157 static const Variant * select_variant( clblasSgemmFunctor::Args & args, const char* DevName, cl_uint _64BitsUse )
158 {
159 //
160
161 if(_64BitsUse!=64)
162 {
163 std::cout<<"we don't support clblas on 32 bits"<< std::endl;
164 assert(1);
165 return NULL;
166 }
167
168 if ( args.transA == clblasNoTrans )
169 {
170 if ( args.transB == clblasNoTrans )
171 {
172 if (true)
173 {
174
175 //we only manage the binary version here
176 if(!strcmp(DevName, "Tahiti"))
177 {
178 #ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
179 static const Variant variant = SGEMM_VARIANT_BIN_CL1(N,N,96,96,16,16,16,6,6,64,TAHITI, __ALPHABETA) ;
180 if ( applicable(variant,args) )
181 return &variant ;
182
183 static const Variant variantA = SGEMM_VARIANT_BIN_CL1(N,N,96,96,16,16,16,6,6,64,TAHITI, __ALPHA) ;
184 if ( applicable(variantA,args) )
185 return &variantA ;
186 #endif //#ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
187 }
188 //For GCN2 devices we will use the splitsgemm functor
189 }
190
191 if (true)
192 {
193
194 //we only manage the binary version here
195 if(!strcmp(DevName, "Tahiti"))
196 {
197 #ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
198 static const Variant variant = SGEMM_VARIANT_BIN_CL1(N,N,64,64,16,16,16,4,4,64,TAHITI, __ALPHABETA) ;
199 if ( applicable(variant,args) )
200 return &variant ;
201
202 static const Variant variantA = SGEMM_VARIANT_BIN_CL1(N,N,64,64,16,16,16,4,4,64,TAHITI, __ALPHA) ;
203 if ( applicable(variantA,args) )
204 return &variantA ;
205 #endif //#ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
206 }
207 else if(!strcmp(DevName, "Hawaii"))
208 {
209 #ifndef CLBLAS_HAWAII_DYNAMIC_KERNEL
210 static const Variant variant = SGEMM_VARIANT_BIN_CL2(N,N,64,64,16,16,16,4,4,64,HAWAII, __ALPHABETA) ;
211 if ( applicable(variant,args) )
212 return &variant ;
213 static const Variant variantA = SGEMM_VARIANT_BIN_CL2(N,N,64,64,16,16,16,4,4,64,HAWAII, __ALPHA) ;
214 if ( applicable(variantA,args) )
215 return &variantA ;
216 #endif //#ifndef CLBLAS_HAWAII_DYNAMIC_KERNEL
217 }
218 else if(!strcmp(DevName, "Bonaire"))
219 {
220 #ifndef CLBLAS_BONAIRE_DYNAMIC_KERNEL
221 static const Variant variant = SGEMM_VARIANT_BIN_CL2(N,N,64,64,16,16,16,4,4,64,BONAIRE, __ALPHABETA) ;
222 if ( applicable(variant,args) )
223 return &variant ;
224 static const Variant variantA = SGEMM_VARIANT_BIN_CL2(N,N,64,64,16,16,16,4,4,64,BONAIRE, __ALPHA) ;
225 if ( applicable(variantA,args) )
226 return &variantA ;
227 #endif //#ifndef CLBLAS_BONAIRE_DYNAMIC_KERNEL
228 }
229
230 }
231 }
232 else
233 {
234 // ===== sgemm NT ======
235
236 if (true)
237 {
238
239 //we only manage the binary version here
240 if(!strcmp(DevName, "Tahiti"))
241 {
242 #ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
243 static const Variant variant = SGEMM_VARIANT_BIN_CL1(N,T,96,96,16,16,16,6,6,64,TAHITI, __ALPHABETA) ;
244 if ( applicable(variant,args) )
245 return &variant ;
246
247 static const Variant variantA = SGEMM_VARIANT_BIN_CL1(N,T,96,96,16,16,16,6,6,64,TAHITI, __ALPHA) ;
248 if ( applicable(variantA,args) )
249 return &variantA ;
250 #endif //#ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
251 }
252 //For GCN2 devices we will use the splitsgemm functor
253 //else if(!strcmp(DevName, "Hawaii"))
254 //{
255 // static const Variant variant = SGEMM_VARIANT_BIN_CL2(N,T,96,96,16,16,16,6,6,64,HAWAII, __ALPHABETA) ;
256 // if ( applicable(variant,args) )
257 // return &variant ;
258 // static const Variant variantA = SGEMM_VARIANT_BIN_CL2(N,T,96,96,16,16,16,6,6,64,HAWAII, __ALPHA) ;
259 // if ( applicable(variantA,args) )
260 // return &variantA ;
261
262 //}
263 //else if(!strcmp(DevName, "Bonaire"))
264 //{
265 // static const Variant variant = SGEMM_VARIANT_BIN_CL2(N,T,96,96,16,16,16,6,6,64,BONAIRE, __ALPHABETA) ;
266 // if ( applicable(variant,args) )
267 // return &variant ;
268 // static const Variant variantA = SGEMM_VARIANT_BIN_CL2(N,T,96,96,16,16,16,6,6,64,BONAIRE, __ALPHA) ;
269 // if ( applicable(variantA,args) )
270 // return &variantA ;
271
272 //}
273 }
274
275 if (true)
276 {
277
278 //we only manage the binary version here
279 if(!strcmp(DevName, "Tahiti"))
280 {
281 #ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
282 static const Variant variant = SGEMM_VARIANT_BIN_CL1(N,T,64,64,16,16,16,4,4,64,TAHITI, __ALPHABETA) ;
283 if ( applicable(variant,args) )
284 return &variant ;
285
286 static const Variant variantA = SGEMM_VARIANT_BIN_CL1(N,T,64,64,16,16,16,4,4,64,TAHITI, __ALPHA) ;
287 if ( applicable(variantA,args) )
288 return &variantA ;
289 #endif //#ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
290 }
291 else if(!strcmp(DevName, "Hawaii"))
292 {
293 #ifndef CLBLAS_HAWAII_DYNAMIC_KERNEL
294 static const Variant variant = SGEMM_VARIANT_BIN_CL2(N,T,64,64,16,16,16,4,4,64,HAWAII, __ALPHABETA) ;
295 if ( applicable(variant,args) )
296 return &variant ;
297 static const Variant variantA = SGEMM_VARIANT_BIN_CL2(N,T,64,64,16,16,16,4,4,64,HAWAII, __ALPHA) ;
298 if ( applicable(variantA,args) )
299 return &variantA ;
300 #endif //#ifndef CLBLAS_HAWAII_DYNAMIC_KERNEL
301 }
302
303 else if(!strcmp(DevName, "Bonaire"))
304 {
305 #ifndef CLBLAS_BONAIRE_DYNAMIC_KERNEL
306 static const Variant variant = SGEMM_VARIANT_BIN_CL2(N,T,64,64,16,16,16,4,4,64,BONAIRE, __ALPHABETA) ;
307 if ( applicable(variant,args) )
308 return &variant ;
309 static const Variant variantA = SGEMM_VARIANT_BIN_CL2(N,T,64,64,16,16,16,4,4,64,BONAIRE, __ALPHA) ;
310 if ( applicable(variantA,args) )
311 return &variantA ;
312 #endif //#ifndef CLBLAS_BONAIRE_DYNAMIC_KERNEL
313 }
314 }
315 }
316 }
317 else
318 {
319 if ( args.transB == clblasNoTrans )
320 {
321 // ===== sgemm TN ======
322 if (true)
323 {
324
325 //we only manage the binary version here
326 if(!strcmp(DevName, "Tahiti"))
327 {
328 #ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
329 static const Variant variant = SGEMM_VARIANT_BIN_CL1(T,N,96,96,16,16,16,6,6,64,TAHITI, __ALPHABETA) ;
330 if ( applicable(variant,args) )
331 return &variant ;
332
333 static const Variant variantA = SGEMM_VARIANT_BIN_CL1(T,N,96,96,16,16,16,6,6,64,TAHITI, __ALPHA) ;
334 if ( applicable(variantA,args) )
335 return &variantA ;
336 #endif //#ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
337 }
338 //For GCN2 devices we will use the splitsgemm functor
339 }
340
341 if (true)
342 {
343
344 //we only manage the binary version here
345 if(!strcmp(DevName, "Tahiti"))
346 {
347 #ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
348 static const Variant variant = SGEMM_VARIANT_BIN_CL1(T,N,64,64,16,16,16,4,4,64,TAHITI, __ALPHABETA) ;
349 if ( applicable(variant,args) )
350 return &variant ;
351
352 static const Variant variantA = SGEMM_VARIANT_BIN_CL1(T,N,64,64,16,16,16,4,4,64,TAHITI, __ALPHA) ;
353 if ( applicable(variantA,args) )
354 return &variantA ;
355 #endif //#ifndef CLBLAS_TAHITI_DYNAMIC_KERNEL
356 }
357 else if(!strcmp(DevName, "Hawaii"))
358 {
359 #ifndef CLBLAS_HAWAII_DYNAMIC_KERNEL
360 static const Variant variant = SGEMM_VARIANT_BIN_CL2(T,N,64,64,16,16,16,4,4,64,HAWAII, __ALPHABETA) ;
361 if ( applicable(variant,args) )
362 return &variant ;
363 static const Variant variantA = SGEMM_VARIANT_BIN_CL2(T,N,64,64,16,16,16,4,4,64,HAWAII, __ALPHA) ;
364 if ( applicable(variantA,args) )
365 return &variantA ;
366 #endif //#ifndef CLBLAS_HAWAII_DYNAMIC_KERNEL
367 }
368 else if(!strcmp(DevName, "Bonaire"))
369 {
370 #ifndef CLBLAS_BONAIRE_DYNAMIC_KERNEL
371 static const Variant variant = SGEMM_VARIANT_BIN_CL2(T,N,64,64,16,16,16,4,4,64,BONAIRE, __ALPHABETA) ;
372 if ( applicable(variant,args) )
373 return &variant ;
374 static const Variant variantA = SGEMM_VARIANT_BIN_CL2(T,N,64,64,16,16,16,4,4,64,BONAIRE, __ALPHA) ;
375 if ( applicable(variantA,args) )
376 return &variantA ;
377 #endif //#ifndef CLBLAS_BONAIRE_DYNAMIC_KERNEL
378 }
379
380 }
381 }
382 }
383
384
385
386 return NULL ; // No suitable variant ... will use the fallback
387
388 }
389
clblasSgemmFunctorGCN(Args & args,const Variant * variant,cl_int & err)390 clblasSgemmFunctorGCN::clblasSgemmFunctorGCN(Args & args, const Variant * variant, cl_int & err) :
391 m_program(0) , m_variant(variant)
392 {
393
394 cl_device_id device;
395 cl_context context;
396
397 cl_command_queue queue = args.queue;
398 err = getDeviceAndContext(queue, device, context);
399 if( err != CL_SUCCESS )
400 {
401 return;
402 }
403
404 if (VERB) printf(" ===> GET KERNEL %s\n", this->m_variant->kernel_name) ;
405
406 //Ben do I use the correct "kernel_name"?
407 BinaryLookup bl(context, device, "clblasSgemmFunctorGCN");
408 //clGetDeviceInfo(device, CL_DEVICE_NAME);
409
410 bl.variantRaw( this->m_variant->kernel_name, strlen(this->m_variant->kernel_name)+1 ) ;
411
412 if ( !bl.found() ) // may create empty file or may wait until file is ready
413 {
414 if ( this->m_variant->bin != 0 )
415 {
416 // build from a pre-compiled version of the kernel (SPIR or cl binaries)
417 err = bl.buildFromBinary(this->m_variant->bin, this->m_variant->bin_size, this->m_variant->build_options);
418 }
419 else
420 {
421 // directly build from a char*
422 err = bl.buildFromSource(this->m_variant->source);
423 }
424
425 if ( err != CL_SUCCESS )
426 {
427 if (VERB) printf(" ===> BUILD PROBLEM\n") ;
428
429 return;
430 }
431 }
432
433 this->m_program = bl.getProgram();
434 }
435
execute(Args & args)436 clblasStatus clblasSgemmFunctorGCN::execute(Args &args)
437 {
438 cl_int err;
439 cl_command_queue queue = args.queue;
440
441 if (VERB) printf(" ===> EXECUTE KERNEL %s\n", this->m_variant->kernel_name) ;
442
443 cl_kernel kernel = clCreateKernel( this->m_program, this->m_variant->kernel_name, &err);
444 if (err != CL_SUCCESS) return clblasStatus(err) ;
445
446 if (VERB) printf(" ===> FOUND %s\n", this->m_variant->kernel_name) ;
447
448 int M = args.M, N = args.N, K = args.K;
449 int lda = args.lda, ldb = args.ldb, ldc = args.ldc;
450
451 int offsetA = args.offA;
452 int offsetB = args.offB;
453 int offsetC = args.offC;
454
455 int arg=0 ;
456
457 // All dgemm kernels shall have the same arguments: (A,B,C,M,N,K,alpha,beta,lda,ldb,ldc,offa,offb,offc)
458
459 setKernelArg<cl_mem>(kernel, arg++, args.A);
460 setKernelArg<cl_mem>(kernel, arg++, args.B);
461 setKernelArg<cl_mem>(kernel, arg++, args.C);
462
463 setKernelArg<int>(kernel, arg++, M);
464 setKernelArg<int>(kernel, arg++, N);
465 setKernelArg<int>(kernel, arg++, K);
466
467 setKernelArg<cl_float>(kernel, arg++, args.alpha);
468 if (args.beta!=0 && this->m_variant->mult.compare("__ALPHA")!=0)
469 setKernelArg<cl_float>(kernel, arg++, args.beta);
470
471 setKernelArg<int>(kernel, arg++, lda);
472 setKernelArg<int>(kernel, arg++, ldb);
473 setKernelArg<int>(kernel, arg++, ldc);
474
475 setKernelArg<int>(kernel, arg++, offsetA);
476 setKernelArg<int>(kernel, arg++, offsetB);
477 setKernelArg<int>(kernel, arg++, offsetC);
478
479 const size_t * ls = this->m_variant->ls ; // Each work group is made of ls[0] x ls[1] PE
480 const size_t * bwi = this->m_variant->bwi ; // Each PE updates bwi[0] x bwi[1] values
481
482 size_t globalThreads[2];
483
484 unsigned int thx, thy;
485
486 thx = M/bwi[0] + ((M%bwi[0] != 0) ? 1 : 0);
487 thx = thx/ls[0] + ((thx%ls[0] != 0) ? 1 : 0);
488 thx = ls[0] * thx;
489
490 thy = N/bwi[1] + ((N%bwi[1] != 0) ? 1 : 0);
491 thy = thy/ls[1] + ((thy%ls[1] != 0) ? 1 : 0);
492 thy = ls[1] * thy;
493
494 globalThreads[0] = thx;
495 globalThreads[1] = thy;
496
497 err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL,
498 globalThreads, ls ,
499 args.numEventsInWaitList,
500 args.eventWaitList,
501 args.events);
502
503 clReleaseKernel(kernel) ;
504
505 if (VERB) printf(" ===> ERR=%d \n",(int)err) ;
506
507 return clblasStatus(err) ;
508 }
509
510
511 clblasSgemmFunctorGCN *
provide(clblasSgemmFunctor::Args & args,const char * DevName)512 clblasSgemmFunctorGCN::provide(clblasSgemmFunctor::Args & args, const char* DevName)
513 {
514
515 if ( args.order == clblasRowMajor )
516 return NULL ; // The RowMajor case shall never occur.
517
518 cl_device_id dev;
519 cl_context ctxt;
520
521 cl_int err = getDeviceAndContext(args.queue, dev, ctxt);
522 if (err != CL_SUCCESS)
523 {
524 return NULL;
525 }
526
527
528 cl_uint bitness = getAddressBits(dev);
529
530 const Variant * variant = select_variant( args, DevName, bitness ) ;
531 if ( variant == NULL )
532 return NULL ;
533
534
535
536 Cache::Lookup lookup(cache, ctxt, dev, variant) ;
537
538 if ( lookup.ok() )
539 {
540 clblasSgemmFunctorGCN * functor = lookup.get();
541 functor->retain(); // increment the reference counter to avoid deletion while it is still beeing used
542 return functor;
543 }
544
545 clblasSgemmFunctorGCN * functor = new clblasSgemmFunctorGCN(args, variant, err);
546 if (err != CL_SUCCESS)
547 {
548 return NULL;
549 }
550
551 lookup.set(functor) ;
552
553 return functor;
554
555 }
556
557