1 /*=========================================================================
2 *
3 * Copyright Insight Software Consortium
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0.txt
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *=========================================================================*/
18
19 #include "itkGPUKernelManager.h"
20
21 namespace itk
22 {
GPUKernelManager()23 GPUKernelManager::GPUKernelManager()
24 {
25 m_Program = nullptr;
26 m_Manager = GPUContextManager::GetInstance();
27
28 if(m_Manager->GetNumberOfCommandQueues() > 0) m_CommandQueueId = 0; // default
29 // command
30 // queue
31 }
32
~GPUKernelManager()33 GPUKernelManager::~GPUKernelManager()
34 {
35 cl_int errid;
36
37 while(m_KernelContainer.size() > 0)
38 {
39 errid = clReleaseKernel(m_KernelContainer.back());
40 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
41 m_KernelContainer.pop_back();
42 }
43
44 if(m_Program != nullptr)
45 {
46 errid = clReleaseProgram(m_Program);
47 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
48 }
49 }
50
LoadProgramFromFile(const char * filename,const char * cPreamble)51 bool GPUKernelManager::LoadProgramFromFile(const char* filename, const char* cPreamble)
52 {
53 // locals
54 FILE* pFileStream = nullptr;
55 size_t szSourceLength;
56 size_t szFinalLength;
57
58 // open the OpenCL source code file
59 #ifdef _WIN32 // Windows version
60 if(fopen_s(&pFileStream, filename, "rb") != 0)
61 {
62 itkWarningMacro("Cannot open OpenCL source file");
63 return false;
64 }
65 #else // Linux version
66 // printout OpenCL source Path
67 std::cout << "Loading source file: " << filename << std::endl;
68 pFileStream = fopen(filename, "rb");
69 if(pFileStream == nullptr)
70 {
71 itkWarningMacro("Cannot open OpenCL source file");
72 return false;
73 }
74 #endif
75
76 size_t szPreambleLength = strlen(cPreamble);
77
78 // get the length of the source code
79 fseek(pFileStream, 0, SEEK_END);
80 szSourceLength = ftell(pFileStream);
81 fseek(pFileStream, 0, SEEK_SET);
82
83 // allocate a buffer for the source code string and read it in
84 char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1000);
85 if(szPreambleLength > 0) memcpy(cSourceString, cPreamble, szPreambleLength);
86 if (fread( (cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream) != 1)
87 {
88 fclose(pFileStream);
89 free(cSourceString);
90 return false;
91 }
92
93 // close the file and return the total length of the combined (preamble +
94 // source) string
95 fclose(pFileStream);
96
97 szFinalLength = szSourceLength + szPreambleLength;
98
99 cSourceString[szSourceLength + szPreambleLength] = '\0';
100
101 //
102 // Create OpenCL program from source strings
103 //
104 cl_int errid;
105 m_Program = clCreateProgramWithSource(
106 m_Manager->GetCurrentContext(), 1, const_cast<const char **>(&cSourceString), &szFinalLength, &errid);
107 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
108 free(cSourceString);
109
110 if(errid != CL_SUCCESS)
111 {
112 itkWarningMacro("Cannot create GPU program");
113 return false;
114 }
115
116 // build program
117 errid = clBuildProgram(m_Program, 0, nullptr, nullptr, nullptr, nullptr);
118 if(errid != CL_SUCCESS)
119 {
120 //itkWarningMacro("OpenCL program build error");
121
122 // print out build error
123 size_t paramValueSize = 0;
124
125 // get error message size
126 clGetProgramBuildInfo(m_Program, m_Manager->GetDeviceId(0), CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶mValueSize);
127
128 char *paramValue;
129 paramValue = (char*)malloc(paramValueSize);
130
131 // get error message
132 clGetProgramBuildInfo(m_Program, m_Manager->GetDeviceId(0), CL_PROGRAM_BUILD_LOG, paramValueSize, paramValue, nullptr);
133
134 /*
135 std::ostringstream itkmsg;
136 itkmsg << "ERROR: In " __FILE__ ", line " << __LINE__ << "\n"
137 << this->GetNameOfClass() << " (" << this << "): "
138 << "OpenCL program build error:" << paramValue
139 << "\n\n";
140 ::itk::OutputWindowDisplayErrorText( itkmsg.str().c_str() );
141 */
142
143 std::cerr << paramValue << std::endl;
144
145 free( paramValue );
146
147 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
148
149 return false;
150 }
151
152 return true;
153 }
154
LoadProgramFromString(const char * cSource,const char * cPreamble)155 bool GPUKernelManager::LoadProgramFromString(const char* cSource, const char* cPreamble)
156 {
157 size_t szSourceLength;
158 size_t szPreambleLength;
159 size_t szFinalLength;
160
161 szSourceLength = strlen(cSource);
162 szPreambleLength = strlen(cPreamble);
163 szFinalLength = szSourceLength + szPreambleLength;
164
165 // allocate a buffer for the source code string and read it in
166 char* cSourceString = (char *)malloc(szFinalLength + 1);
167 if(szPreambleLength > 0) memcpy(cSourceString, cPreamble, szPreambleLength);
168
169 memcpy(cSourceString + szPreambleLength, cSource, szSourceLength);
170
171
172 cSourceString[szFinalLength] = '\0';
173
174 //
175 // Create OpenCL program from source strings
176 //
177 cl_int errid;
178 m_Program = clCreateProgramWithSource(
179 m_Manager->GetCurrentContext(), 1, const_cast<const char **>(&cSourceString), &szFinalLength, &errid);
180 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
181 free(cSourceString);
182
183 if(errid != CL_SUCCESS)
184 {
185 itkWarningMacro("Cannot create GPU program");
186 return false;
187 }
188
189 // build program
190 errid = clBuildProgram(m_Program, 0, nullptr, nullptr, nullptr, nullptr);
191 if(errid != CL_SUCCESS)
192 {
193 //itkWarningMacro("OpenCL program build error");
194
195 // print out build error
196 size_t paramValueSize = 0;
197
198 // get error message size
199 clGetProgramBuildInfo(m_Program, m_Manager->GetDeviceId(0), CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶mValueSize);
200
201 char *paramValue;
202 paramValue = (char*)malloc(paramValueSize);
203
204 // get error message
205 clGetProgramBuildInfo(m_Program, m_Manager->GetDeviceId(0), CL_PROGRAM_BUILD_LOG, paramValueSize, paramValue, nullptr);
206
207 /*
208 std::ostringstream itkmsg;
209 itkmsg << "ERROR: In " __FILE__ ", line " << __LINE__ << "\n"
210 << this->GetNameOfClass() << " (" << this << "): "
211 << "OpenCL program build error:" << paramValue
212 << "\n\n";
213 ::itk::OutputWindowDisplayErrorText( itkmsg.str().c_str() );
214 */
215
216 std::cerr << paramValue << std::endl;
217
218 free( paramValue );
219
220 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
221
222 return false;
223 }
224
225 return true;
226 }
227
CreateKernel(const char * kernelName)228 int GPUKernelManager::CreateKernel(const char* kernelName)
229 {
230 cl_int errid;
231
232 // create kernel
233 cl_kernel newKernel = clCreateKernel(m_Program, kernelName, &errid);
234
235 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
236
237 if(errid != CL_SUCCESS) // failed
238 {
239 itkWarningMacro("Fail to create GPU kernel");
240 return -1;
241 }
242
243 m_KernelContainer.push_back( newKernel );
244
245 // argument list
246 m_KernelArgumentReady.push_back( std::vector< KernelArgumentList >() );
247 cl_uint nArg;
248 errid = clGetKernelInfo( newKernel, CL_KERNEL_NUM_ARGS, sizeof(cl_uint), &nArg, nullptr);
249 (m_KernelArgumentReady.back() ).resize( nArg );
250
251 ResetArguments( (int)m_KernelContainer.size()-1 );
252
253 return (int)m_KernelContainer.size()-1;
254 }
255
GetKernelWorkGroupInfo(int kernelIdx,cl_kernel_work_group_info paramName,void * value)256 cl_int GPUKernelManager::GetKernelWorkGroupInfo(int kernelIdx,
257 cl_kernel_work_group_info paramName, void *value)
258 {
259 size_t valueSize, valueSizeRet;
260
261 switch (paramName)
262 {
263 case CL_KERNEL_WORK_GROUP_SIZE:
264 valueSize = sizeof(size_t);
265 break;
266 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
267 valueSize = 3 * sizeof(size_t);
268 break;
269 case CL_KERNEL_LOCAL_MEM_SIZE:
270 valueSize = sizeof(cl_ulong);
271 break;
272 default:
273 itkGenericExceptionMacro (<< "Unknown type of work goup information");
274 }
275
276 cl_int errid = clGetKernelWorkGroupInfo(m_KernelContainer[kernelIdx], m_Manager->GetDeviceId(0),
277 paramName, valueSize, value, &valueSizeRet);
278
279 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
280
281 return errid;
282 }
283
GetDeviceInfo(cl_kernel_work_group_info paramName,size_t argSize,void * argValue)284 cl_int GPUKernelManager::GetDeviceInfo(
285 cl_kernel_work_group_info paramName,
286 size_t argSize, void *argValue)
287 {
288 cl_int errid;
289
290 switch (paramName)
291 {
292 case CL_DEVICE_MAX_WORK_ITEM_SIZES:
293 errid = clGetDeviceInfo(m_Manager->GetDeviceId(0),
294 CL_DEVICE_MAX_WORK_ITEM_SIZES, argSize, argValue, nullptr);
295 break;
296 default:
297 itkGenericExceptionMacro (<< "Unknown type of device info");
298 }
299 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
300
301 return errid;
302 }
303
SetKernelArg(int kernelIdx,cl_uint argIdx,size_t argSize,const void * argVal)304 bool GPUKernelManager::SetKernelArg(int kernelIdx, cl_uint argIdx, size_t argSize, const void* argVal)
305 {
306 if(kernelIdx < 0 || kernelIdx >= (int)m_KernelContainer.size() ) return false;
307
308 cl_int errid;
309
310 errid = clSetKernelArg(m_KernelContainer[kernelIdx], argIdx, argSize, argVal);
311 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
312
313 m_KernelArgumentReady[kernelIdx][argIdx].m_IsReady = true;
314 m_KernelArgumentReady[kernelIdx][argIdx].m_GPUDataManager = (GPUDataManager::Pointer)nullptr;
315
316 return true;
317 }
318
SetKernelArgWithImage(int kernelIdx,cl_uint argIdx,GPUDataManager::Pointer manager)319 bool GPUKernelManager::SetKernelArgWithImage(int kernelIdx, cl_uint argIdx, GPUDataManager::Pointer manager)
320 {
321 if(kernelIdx < 0 || kernelIdx >= (int)m_KernelContainer.size() ) return false;
322
323 cl_int errid;
324
325 errid = clSetKernelArg(m_KernelContainer[kernelIdx], argIdx, sizeof(cl_mem), manager->GetGPUBufferPointer() );
326 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
327
328 m_KernelArgumentReady[kernelIdx][argIdx].m_IsReady = true;
329 m_KernelArgumentReady[kernelIdx][argIdx].m_GPUDataManager = manager;
330
331 return true;
332 }
333
334 // this function must be called right before GPU kernel is launched
CheckArgumentReady(int kernelIdx)335 bool GPUKernelManager::CheckArgumentReady(int kernelIdx)
336 {
337 int nArg = m_KernelArgumentReady[kernelIdx].size();
338
339 for(int i=0; i<nArg; i++)
340 {
341 if(!(m_KernelArgumentReady[kernelIdx][i].m_IsReady) ) return false;
342
343 // automatic synchronization before kernel launch
344 if(m_KernelArgumentReady[kernelIdx][i].m_GPUDataManager != (GPUDataManager::Pointer)nullptr)
345 {
346 m_KernelArgumentReady[kernelIdx][i].m_GPUDataManager->SetCPUBufferDirty();
347 }
348 }
349 return true;
350 }
351
ResetArguments(int kernelIdx)352 void GPUKernelManager::ResetArguments(int kernelIdx)
353 {
354 int nArg = m_KernelArgumentReady[kernelIdx].size();
355
356 for(int i=0; i<nArg; i++)
357 {
358 m_KernelArgumentReady[kernelIdx][i].m_IsReady = false;
359 m_KernelArgumentReady[kernelIdx][i].m_GPUDataManager = (GPUDataManager::Pointer)nullptr;
360 }
361 }
362
LaunchKernel1D(int kernelIdx,size_t globalWorkSize,size_t itkNotUsed (localWorkSize))363 bool GPUKernelManager::LaunchKernel1D(int kernelIdx, size_t globalWorkSize, size_t itkNotUsed(localWorkSize))
364 {
365 if(kernelIdx < 0 || kernelIdx >= (int)m_KernelContainer.size() ) return false;
366
367 if(!CheckArgumentReady(kernelIdx) )
368 {
369 itkWarningMacro("GPU kernel arguments are not completely assigned");
370 return false;
371 }
372
373 cl_int errid;
374 // TODO should we allow the user to determine localWorkSize?
375 // errid = clEnqueueNDRangeKernel(m_Manager->GetCommandQueue(
376 // m_CommandQueueId), m_KernelContainer[kernelIdx], 1, nullptr, &globalWorkSize,
377 // &localWorkSize, 0, nullptr, nullptr);
378 errid = clEnqueueNDRangeKernel(m_Manager->GetCommandQueue(
379 m_CommandQueueId), m_KernelContainer[kernelIdx], 1, nullptr, &globalWorkSize,
380 nullptr, 0, nullptr, nullptr);
381 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
382
383 if(errid != CL_SUCCESS)
384 {
385 itkWarningMacro("GPU kernel launch failed");
386 return false;
387 }
388
389 return true;
390 }
391
LaunchKernel2D(int kernelIdx,size_t globalWorkSizeX,size_t globalWorkSizeY,size_t itkNotUsed (localWorkSizeX),size_t itkNotUsed (localWorkSizeY))392 bool GPUKernelManager::LaunchKernel2D(int kernelIdx,
393 size_t globalWorkSizeX, size_t globalWorkSizeY,
394 size_t itkNotUsed(localWorkSizeX), size_t itkNotUsed(localWorkSizeY) )
395 {
396 if(kernelIdx < 0 || kernelIdx >= (int)m_KernelContainer.size() ) return false;
397
398 if(!CheckArgumentReady(kernelIdx) )
399 {
400 itkWarningMacro("GPU kernel arguments are not completely assigned");
401 return false;
402 }
403
404 size_t gws[2];
405 gws[0] = globalWorkSizeX;
406 gws[1] = globalWorkSizeY;
407
408 // size_t lws[2];
409 // lws[0] = localWorkSizeX;
410 // lws[1] = localWorkSizeY;
411
412 cl_int errid;
413 // TODO should we allow the user to determine localWorkSize?
414 // errid = clEnqueueNDRangeKernel(m_Manager->GetCommandQueue(
415 // m_CommandQueueId), m_KernelContainer[kernelIdx], 2, nullptr, gws,
416 // lws, 0, nullptr, nullptr);
417 errid = clEnqueueNDRangeKernel(m_Manager->GetCommandQueue(
418 m_CommandQueueId), m_KernelContainer[kernelIdx], 2, nullptr, gws, nullptr, 0, nullptr, nullptr);
419 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
420
421 if(errid != CL_SUCCESS)
422 {
423 itkWarningMacro("GPU kernel launch failed");
424 return false;
425 }
426
427 return true;
428 }
429
LaunchKernel3D(int kernelIdx,size_t globalWorkSizeX,size_t globalWorkSizeY,size_t globalWorkSizeZ,size_t itkNotUsed (localWorkSizeX),size_t itkNotUsed (localWorkSizeY),size_t itkNotUsed (localWorkSizeZ))430 bool GPUKernelManager::LaunchKernel3D(int kernelIdx,
431 size_t globalWorkSizeX, size_t globalWorkSizeY, size_t globalWorkSizeZ,
432 size_t itkNotUsed(localWorkSizeX), size_t itkNotUsed(localWorkSizeY), size_t itkNotUsed(localWorkSizeZ) )
433 {
434 if(kernelIdx < 0 || kernelIdx >= (int)m_KernelContainer.size() ) return false;
435
436 if(!CheckArgumentReady(kernelIdx) )
437 {
438 itkWarningMacro("GPU kernel arguments are not completely assigned");
439 return false;
440 }
441
442 size_t gws[3];
443 gws[0] = globalWorkSizeX;
444 gws[1] = globalWorkSizeY;
445 gws[2] = globalWorkSizeZ;
446
447 // size_t lws[3];
448 // lws[0] = localWorkSizeX;
449 // lws[1] = localWorkSizeY;
450 // lws[2] = localWorkSizeZ;
451
452 cl_int errid;
453 // TODO should we allow the user to determine localWorkSize?
454 // errid = clEnqueueNDRangeKernel(m_Manager->GetCommandQueue(
455 // m_CommandQueueId), m_KernelContainer[kernelIdx], 3, nullptr, gws, lws, 0, nullptr, nullptr);
456 errid = clEnqueueNDRangeKernel(m_Manager->GetCommandQueue(
457 m_CommandQueueId), m_KernelContainer[kernelIdx], 3, nullptr, gws, nullptr, 0, nullptr, nullptr);
458 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
459
460 if(errid != CL_SUCCESS)
461 {
462 itkWarningMacro("GPU kernel launch failed");
463 return false;
464 }
465
466 return true;
467 }
468
LaunchKernel(int kernelIdx,int dim,size_t * globalWorkSize,size_t * localWorkSize)469 bool GPUKernelManager::LaunchKernel(int kernelIdx, int dim, size_t *globalWorkSize, size_t *localWorkSize)
470 {
471 if(kernelIdx < 0 || kernelIdx >= (int)m_KernelContainer.size() ) return false;
472
473 if(!CheckArgumentReady(kernelIdx) )
474 {
475 itkWarningMacro("GPU kernel arguments are not completely assigned");
476 return false;
477 }
478
479 // debug
480 //std::cout << "Dim : " << dim << std::endl;
481
482 // debug - if devicetype is CPU
483 //localWorkSize[0] = localWorkSize[1] = localWorkSize[2] = 1;
484 //
485
486 cl_int errid;
487 errid = clEnqueueNDRangeKernel(m_Manager->GetCommandQueue(
488 m_CommandQueueId), m_KernelContainer[kernelIdx], (cl_uint)dim, nullptr, globalWorkSize,
489 localWorkSize, 0, nullptr, nullptr);
490 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
491
492 /*
493 std::cout << "Check point 1" << std::endl;
494
495 // debug -- synchronize
496 errid = clFlush(m_Manager->GetCommandQueue(m_CommandQueueId));
497 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
498
499 std::cout << "Check point 2" << std::endl;
500 */
501 errid = clFinish(m_Manager->GetCommandQueue(m_CommandQueueId));
502 OpenCLCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);
503 /*
504 std::cout << "Wait for kernel execution ends" << std::endl;
505 */
506
507 if(errid != CL_SUCCESS)
508 {
509 itkWarningMacro("GPU kernel launch failed");
510 return false;
511 }
512
513 return true;
514 }
515
SetCurrentCommandQueue(int queueid)516 void GPUKernelManager::SetCurrentCommandQueue( int queueid )
517 {
518 if( queueid >= 0 && queueid < (int)m_Manager->GetNumberOfCommandQueues() )
519 {
520 // Assumption: different command queue is assigned to different device
521 m_CommandQueueId = queueid;
522 }
523 else
524 {
525 itkWarningMacro("Not a valid command queue id");
526 }
527 }
528
GetCurrentCommandQueueID()529 int GPUKernelManager::GetCurrentCommandQueueID()
530 {
531 return m_CommandQueueId;
532 }
533
534 }
535