1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
21 //
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
25 //
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "precomp.hpp"
43
44 #ifndef HAVE_OPENCL
45 #include "ocl_disabled.impl.hpp"
46 #else // HAVE_OPENCL
47
48 #include <list>
49 #include <map>
50 #include <deque>
51 #include <set>
52 #include <string>
53 #include <sstream>
54 #include <iostream> // std::cerr
55 #include <fstream>
56 #if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
57 #include <inttypes.h>
58 #endif
59
60 #include <opencv2/core/utils/configuration.private.hpp>
61
62 #include <opencv2/core/utils/logger.defines.hpp>
63 #undef CV_LOG_STRIP_LEVEL
64 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
65 #include <opencv2/core/utils/logger.hpp>
66
67 #include "opencv2/core/ocl_genbase.hpp"
68 #include "opencl_kernels_core.hpp"
69
70 #include "opencv2/core/utils/lock.private.hpp"
71 #include "opencv2/core/utils/filesystem.hpp"
72 #include "opencv2/core/utils/filesystem.private.hpp"
73
74 #define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
75 #include "opencv2/core/utils/allocator_stats.impl.hpp"
76 #undef CV__ALLOCATOR_STATS_LOG
77
78 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
79
80 #define CV_OPENCL_SHOW_RUN_KERNELS 0
81 #define CV_OPENCL_TRACE_CHECK 0
82
83 #define CV_OPENCL_VALIDATE_BINARY_PROGRAMS 1
84
85 #define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
86 #define CV_OPENCL_SHOW_SVM_LOG 0
87
88 #include "opencv2/core/bufferpool.hpp"
89 #ifndef LOG_BUFFER_POOL
90 # if 0
91 # define LOG_BUFFER_POOL printf
92 # else
93 # define LOG_BUFFER_POOL(...)
94 # endif
95 #endif
96
97 #if CV_OPENCL_SHOW_SVM_LOG
98 // TODO add timestamp logging
99 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
100 #else
101 #define CV_OPENCL_SVM_TRACE_P(...)
102 #endif
103
104 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
105 // TODO add timestamp logging
106 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
107 #else
108 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
109 #endif
110
111 #include "opencv2/core/opencl/runtime/opencl_clblas.hpp"
112 #include "opencv2/core/opencl/runtime/opencl_clfft.hpp"
113
114 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
115
116 #ifdef HAVE_OPENCL_SVM
117 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
118 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
119 #include "opencv2/core/opencl/opencl_svm.hpp"
120 #endif
121
122 #include "umatrix.hpp"
123
124 namespace cv { namespace ocl {
125
126 #define IMPLEMENT_REFCOUNTABLE() \
127 void addref() { CV_XADD(&refcount, 1); } \
128 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
129 int refcount
130
131 static cv::utils::AllocatorStatistics opencl_allocator_stats;
132
133 CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
getOpenCLAllocatorStatistics()134 cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
135 {
136 return opencl_allocator_stats;
137 }
138
139 #ifndef _DEBUG
isRaiseError()140 static bool isRaiseError()
141 {
142 static bool initialized = false;
143 static bool value = false;
144 if (!initialized)
145 {
146 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
147 initialized = true;
148 }
149 return value;
150 }
151 #endif
152
153 #if CV_OPENCL_TRACE_CHECK
154 static inline
traceOpenCLCheck(cl_int status,const char * message)155 void traceOpenCLCheck(cl_int status, const char* message)
156 {
157 std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
158 }
159 #define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
160 #else
161 #define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
162 #endif
163
164 #define CV_OCL_API_ERROR_MSG(check_result, msg) \
165 cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
166
167 #define CV_OCL_CHECK_RESULT(check_result, msg) \
168 do { \
169 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
170 if (check_result != CL_SUCCESS) \
171 { \
172 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_CHECK_RESULT must be const char*"); \
173 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
174 CV_Error(Error::OpenCLApiCallError, error_msg); \
175 } \
176 } while (0)
177
178 #define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
179
180 #define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
181
182 #ifdef _DEBUG
183 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
184 #define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
185 #define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
186 #else
187 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
188 do { \
189 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
190 if (check_result != CL_SUCCESS && isRaiseError()) \
191 { \
192 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_DBG_CHECK_RESULT must be const char*"); \
193 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
194 CV_Error(Error::OpenCLApiCallError, error_msg); \
195 } \
196 } while (0)
197 #define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
198 #define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
199 #endif
200
201
202 static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_ENABLE", true);
203 static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_WRITE", true);
204 static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_LOCK_ENABLE", true);
205 static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_CLEANUP", true);
206
207 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
208 static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool("OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", false);
209 #endif
210
211 // Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
212 static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool("OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
213 #ifdef __APPLE__
214 true
215 #else
216 false
217 #endif
218 );
219
getBuildExtraOptions()220 static const String getBuildExtraOptions()
221 {
222 static String param_buildExtraOptions;
223 static bool initialized = false;
224 if (!initialized)
225 {
226 param_buildExtraOptions = utils::getConfigurationParameterString("OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", "");
227 initialized = true;
228 if (!param_buildExtraOptions.empty())
229 CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
230 }
231 return param_buildExtraOptions;
232 }
233
234 static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool("OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", true);
235 static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", 4);
236
237
238 struct UMat2D
239 {
UMat2Dcv::ocl::UMat2D240 UMat2D(const UMat& m)
241 {
242 offset = (int)m.offset;
243 step = (int)m.step;
244 rows = m.rows;
245 cols = m.cols;
246 }
247 int offset;
248 int step;
249 int rows;
250 int cols;
251 };
252
253 struct UMat3D
254 {
UMat3Dcv::ocl::UMat3D255 UMat3D(const UMat& m)
256 {
257 offset = (int)m.offset;
258 step = (int)m.step.p[1];
259 slicestep = (int)m.step.p[0];
260 slices = (int)m.size.p[0];
261 rows = m.size.p[1];
262 cols = m.size.p[2];
263 }
264 int offset;
265 int slicestep;
266 int step;
267 int slices;
268 int rows;
269 int cols;
270 };
271
272 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
crc64(const uchar * data,size_t size,uint64 crc0=0)273 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
274 {
275 static uint64 table[256];
276 static bool initialized = false;
277
278 if( !initialized )
279 {
280 for( int i = 0; i < 256; i++ )
281 {
282 uint64 c = i;
283 for( int j = 0; j < 8; j++ )
284 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
285 table[i] = c;
286 }
287 initialized = true;
288 }
289
290 uint64 crc = ~crc0;
291 for( size_t idx = 0; idx < size; idx++ )
292 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
293
294 return ~crc;
295 }
296
297 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
298 struct OpenCLBinaryCacheConfigurator
299 {
300 cv::String cache_path_;
301 cv::String cache_lock_filename_;
302 cv::Ptr<utils::fs::FileLock> cache_lock_;
303
304 typedef std::map<std::string, std::string> ContextCacheType;
305 ContextCacheType prepared_contexts_;
306 Mutex mutex_prepared_contexts_;
307
OpenCLBinaryCacheConfiguratorcv::ocl::OpenCLBinaryCacheConfigurator308 OpenCLBinaryCacheConfigurator()
309 {
310 CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
311 if (!CV_OPENCL_CACHE_ENABLE)
312 {
313 CV_LOG_INFO(NULL, "OpenCL cache is disabled");
314 return;
315 }
316 cache_path_ = utils::fs::getCacheDirectory("opencl_cache", "OPENCV_OPENCL_CACHE_DIR");
317 if (cache_path_.empty())
318 {
319 CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
320 }
321 do
322 {
323 try
324 {
325 if (cache_path_.empty())
326 break;
327 if (cache_path_ == "disabled")
328 break;
329 if (!utils::fs::createDirectories(cache_path_))
330 {
331 CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
332 clear();
333 break;
334 }
335
336 if (CV_OPENCL_CACHE_LOCK_ENABLE)
337 {
338 cache_lock_filename_ = cache_path_ + ".lock";
339 if (!utils::fs::exists(cache_lock_filename_))
340 {
341 CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
342 std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
343 if (!lock_filename.is_open())
344 {
345 CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
346 break;
347 }
348 }
349
350 try
351 {
352 cache_lock_ = makePtr<utils::fs::FileLock>(cache_lock_filename_.c_str());
353 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
354 {
355 utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
356 }
357 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
358 }
359 catch (const cv::Exception& e)
360 {
361 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
362 }
363 catch (...)
364 {
365 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
366 }
367 }
368 else
369 {
370 if (CV_OPENCL_CACHE_WRITE)
371 {
372 CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
373 "(not safe for multiprocess environment)");
374 }
375 else
376 {
377 CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
378 }
379 }
380 }
381 catch (const cv::Exception& e)
382 {
383 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
384 clear();
385 }
386 } while (0);
387 if (!cache_path_.empty())
388 {
389 if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
390 {
391 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
392 "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
393 }
394 else
395 {
396 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
397 }
398 }
399 }
400
clearcv::ocl::OpenCLBinaryCacheConfigurator401 void clear()
402 {
403 cache_path_.clear();
404 cache_lock_filename_.clear();
405 cache_lock_.release();
406 }
407
prepareCacheDirectoryForContextcv::ocl::OpenCLBinaryCacheConfigurator408 std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
409 const std::string& cleanup_prefix)
410 {
411 if (cache_path_.empty())
412 return std::string();
413
414 AutoLock lock(mutex_prepared_contexts_);
415
416 ContextCacheType::iterator found_it = prepared_contexts_.find(ctx_prefix);
417 if (found_it != prepared_contexts_.end())
418 return found_it->second;
419
420 CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
421
422 std::string target_directory = cache_path_ + ctx_prefix + "/";
423 bool result = utils::fs::isDirectory(target_directory);
424 if (!result)
425 {
426 try
427 {
428 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
429 if (utils::fs::createDirectories(target_directory))
430 {
431 result = true;
432 }
433 else
434 {
435 CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
436 }
437 }
438 catch (const cv::Exception& e)
439 {
440 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
441 }
442 }
443 target_directory = result ? target_directory : std::string();
444 prepared_contexts_.insert(std::pair<std::string, std::string>(ctx_prefix, target_directory));
445
446 if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
447 {
448 try
449 {
450 std::vector<String> entries;
451 utils::fs::glob_relative(cache_path_, cleanup_prefix + "*", entries, false, true);
452 std::vector<String> remove_entries;
453 for (size_t i = 0; i < entries.size(); i++)
454 {
455 const String& name = entries[i];
456 if (0 == name.find(cleanup_prefix))
457 {
458 if (0 == name.find(ctx_prefix))
459 continue; // skip current
460 remove_entries.push_back(name);
461 }
462 }
463 if (!remove_entries.empty())
464 {
465 CV_LOG_WARNING(NULL, (remove_entries.size() == 1
466 ? "Detected OpenCL cache directory for other version of OpenCL device."
467 : "Detected OpenCL cache directories for other versions of OpenCL device.")
468 << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
469 CV_LOG_WARNING(NULL, "Trying to remove these directories...");
470 for (size_t i = 0; i < remove_entries.size(); i++)
471 {
472 CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
473 }
474 CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
475
476 for (size_t i = 0; i < remove_entries.size(); i++)
477 {
478 const String& name = remove_entries[i];
479 cv::String path = utils::fs::join(cache_path_, name);
480 try
481 {
482 utils::fs::remove_all(path);
483 CV_LOG_WARNING(NULL, "Removed: " << path);
484 }
485 catch (const cv::Exception& e)
486 {
487 CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
488 }
489 }
490 }
491 }
492 catch (...)
493 {
494 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
495 }
496 }
497
498 CV_LOG_VERBOSE(NULL, 1, " Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
499 return target_directory;
500 }
501
getSingletonInstancecv::ocl::OpenCLBinaryCacheConfigurator502 static OpenCLBinaryCacheConfigurator& getSingletonInstance()
503 {
504 CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
505 }
506 };
507 class BinaryProgramFile
508 {
509 enum { MAX_ENTRIES = 64 };
510
511 typedef unsigned int uint32_t;
512
513 struct CV_DECL_ALIGNED(4) FileHeader
514 {
515 uint32_t sourceSignatureSize;
516 //char sourceSignature[];
517 };
518
519 struct CV_DECL_ALIGNED(4) FileTable
520 {
521 uint32_t numberOfEntries;
522 //uint32_t firstEntryOffset[];
523 };
524
525 struct CV_DECL_ALIGNED(4) FileEntry
526 {
527 uint32_t nextEntryFileOffset; // 0 for the last entry in chain
528 uint32_t keySize;
529 uint32_t dataSize;
530 //char key[];
531 //char data[];
532 };
533
534 const std::string fileName_;
535 const char* const sourceSignature_;
536 const size_t sourceSignatureSize_;
537
538 std::fstream f;
539
540 uint32_t entryOffsets[MAX_ENTRIES];
541
getHash(const std::string & options)542 uint32_t getHash(const std::string& options)
543 {
544 uint64 hash = crc64((const uchar*)options.c_str(), options.size(), 0);
545 return hash & (MAX_ENTRIES - 1);
546 }
547
getFileSize()548 inline size_t getFileSize()
549 {
550 size_t pos = (size_t)f.tellg();
551 f.seekg(0, std::fstream::end);
552 size_t fileSize = (size_t)f.tellg();
553 f.seekg(pos, std::fstream::beg);
554 return fileSize;
555 }
readUInt32()556 inline uint32_t readUInt32()
557 {
558 uint32_t res = 0;
559 f.read((char*)&res, sizeof(uint32_t));
560 CV_Assert(!f.fail());
561 return res;
562 }
writeUInt32(const uint32_t value)563 inline void writeUInt32(const uint32_t value)
564 {
565 uint32_t v = value;
566 f.write((char*)&v, sizeof(uint32_t));
567 CV_Assert(!f.fail());
568 }
569
seekReadAbsolute(size_t pos)570 inline void seekReadAbsolute(size_t pos)
571 {
572 f.seekg(pos, std::fstream::beg);
573 CV_Assert(!f.fail());
574 }
seekReadRelative(size_t pos)575 inline void seekReadRelative(size_t pos)
576 {
577 f.seekg(pos, std::fstream::cur);
578 CV_Assert(!f.fail());
579 }
580
seekWriteAbsolute(size_t pos)581 inline void seekWriteAbsolute(size_t pos)
582 {
583 f.seekp(pos, std::fstream::beg);
584 CV_Assert(!f.fail());
585 }
586
clearFile()587 void clearFile()
588 {
589 f.close();
590 if (0 != remove(fileName_.c_str()))
591 CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
592 return;
593 }
594
595 public:
BinaryProgramFile(const std::string & fileName,const char * sourceSignature)596 BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
597 : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(sourceSignature_) : 0)
598 {
599 CV_StaticAssert(sizeof(uint32_t) == 4, "");
600 CV_Assert(sourceSignature_ != NULL);
601 CV_Assert(sourceSignatureSize_ > 0);
602 memset(entryOffsets, 0, sizeof(entryOffsets));
603
604 f.rdbuf()->pubsetbuf(0, 0); // disable buffering
605 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
606 if(f.is_open() && getFileSize() > 0)
607 {
608 bool isValid = false;
609 try
610 {
611 uint32_t fileSourceSignatureSize = readUInt32();
612 if (fileSourceSignatureSize == sourceSignatureSize_)
613 {
614 cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
615 f.read(fileSourceSignature.data(), fileSourceSignatureSize);
616 if (f.eof())
617 {
618 CV_LOG_ERROR(NULL, "Unexpected EOF");
619 }
620 else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
621 {
622 isValid = true;
623 }
624 }
625 if (!isValid)
626 {
627 CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
628 }
629 }
630 catch (const cv::Exception& e)
631 {
632 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
633 }
634 catch (...)
635 {
636 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
637 }
638 if (!isValid)
639 {
640 clearFile();
641 }
642 else
643 {
644 seekReadAbsolute(0);
645 }
646 }
647 }
648
read(const std::string & key,std::vector<char> & buf)649 bool read(const std::string& key, std::vector<char>& buf)
650 {
651 if (!f.is_open())
652 return false;
653
654 size_t fileSize = getFileSize();
655 if (fileSize == 0)
656 {
657 CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
658 clearFile();
659 return false;
660 }
661 seekReadAbsolute(0);
662
663 // bypass FileHeader
664 uint32_t fileSourceSignatureSize = readUInt32();
665 CV_Assert(fileSourceSignatureSize > 0);
666 seekReadRelative(fileSourceSignatureSize);
667
668 uint32_t numberOfEntries = readUInt32();
669 CV_Assert(numberOfEntries > 0);
670 if (numberOfEntries != MAX_ENTRIES)
671 {
672 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
673 clearFile();
674 return false;
675 }
676 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
677 CV_Assert(!f.fail());
678
679 uint32_t entryNum = getHash(key);
680
681 uint32_t entryOffset = entryOffsets[entryNum];
682 FileEntry entry;
683 while (entryOffset > 0)
684 {
685 seekReadAbsolute(entryOffset);
686 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
687 f.read((char*)&entry, sizeof(entry));
688 CV_Assert(!f.fail());
689 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
690 if (key.size() == entry.keySize)
691 {
692 if (entry.keySize > 0)
693 {
694 f.read(fileKey.data(), entry.keySize);
695 CV_Assert(!f.fail());
696 }
697 if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
698 {
699 buf.resize(entry.dataSize);
700 f.read(&buf[0], entry.dataSize);
701 CV_Assert(!f.fail());
702 seekReadAbsolute(0);
703 CV_LOG_VERBOSE(NULL, 0, "Read...");
704 return true;
705 }
706 }
707 if (entry.nextEntryFileOffset == 0)
708 break;
709 entryOffset = entry.nextEntryFileOffset;
710 }
711 return false;
712 }
713
write(const std::string & key,std::vector<char> & buf)714 bool write(const std::string& key, std::vector<char>& buf)
715 {
716 if (!f.is_open())
717 {
718 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
719 if (!f.is_open())
720 {
721 f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
722 if (!f.is_open())
723 {
724 CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
725 return false;
726 }
727 }
728 }
729
730 size_t fileSize = getFileSize();
731 if (fileSize == 0)
732 {
733 // Write header
734 seekWriteAbsolute(0);
735 writeUInt32((uint32_t)sourceSignatureSize_);
736 f.write(sourceSignature_, sourceSignatureSize_);
737 CV_Assert(!f.fail());
738
739 writeUInt32(MAX_ENTRIES);
740 memset(entryOffsets, 0, sizeof(entryOffsets));
741 f.write((char*)entryOffsets, sizeof(entryOffsets));
742 CV_Assert(!f.fail());
743 f.flush();
744 CV_Assert(!f.fail());
745 f.close();
746 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
747 CV_Assert(f.is_open());
748 fileSize = getFileSize();
749 }
750 seekReadAbsolute(0);
751
752 // bypass FileHeader
753 uint32_t fileSourceSignatureSize = readUInt32();
754 CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
755 seekReadRelative(fileSourceSignatureSize);
756
757 uint32_t numberOfEntries = readUInt32();
758 CV_Assert(numberOfEntries > 0);
759 if (numberOfEntries != MAX_ENTRIES)
760 {
761 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
762 clearFile();
763 return false;
764 }
765 size_t tableEntriesOffset = (size_t)f.tellg();
766 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
767 CV_Assert(!f.fail());
768
769 uint32_t entryNum = getHash(key);
770
771 uint32_t entryOffset = entryOffsets[entryNum];
772 FileEntry entry;
773 while (entryOffset > 0)
774 {
775 seekReadAbsolute(entryOffset);
776 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
777 f.read((char*)&entry, sizeof(entry));
778 CV_Assert(!f.fail());
779 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
780 if (key.size() == entry.keySize)
781 {
782 if (entry.keySize > 0)
783 {
784 f.read(fileKey.data(), entry.keySize);
785 CV_Assert(!f.fail());
786 }
787 if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
788 {
789 // duplicate
790 CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
791 return false;
792 }
793 }
794 if (entry.nextEntryFileOffset == 0)
795 break;
796 entryOffset = entry.nextEntryFileOffset;
797 }
798 seekReadAbsolute(0);
799 if (entryOffset > 0)
800 {
801 seekWriteAbsolute(entryOffset);
802 entry.nextEntryFileOffset = (uint32_t)fileSize;
803 f.write((char*)&entry, sizeof(entry));
804 CV_Assert(!f.fail());
805 }
806 else
807 {
808 entryOffsets[entryNum] = (uint32_t)fileSize;
809 seekWriteAbsolute(tableEntriesOffset);
810 f.write((char*)entryOffsets, sizeof(entryOffsets));
811 CV_Assert(!f.fail());
812 }
813 seekWriteAbsolute(fileSize);
814 entry.nextEntryFileOffset = 0;
815 entry.dataSize = (uint32_t)buf.size();
816 entry.keySize = (uint32_t)key.size();
817 f.write((char*)&entry, sizeof(entry));
818 CV_Assert(!f.fail());
819 f.write(key.c_str(), entry.keySize);
820 CV_Assert(!f.fail());
821 f.write(&buf[0], entry.dataSize);
822 CV_Assert(!f.fail());
823 f.flush();
824 CV_Assert(!f.fail());
825 CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
826 return true;
827 }
828 };
829 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
830
831
832
833 struct OpenCLExecutionContext::Impl
834 {
835 ocl::Context context_;
836 int device_; // device index in context
837 ocl::Queue queue_;
838 int useOpenCL_;
839
840 protected:
841 Impl() = delete;
842
_init_devicecv::ocl::OpenCLExecutionContext::Impl843 void _init_device(cl_device_id deviceID)
844 {
845 CV_Assert(deviceID);
846 int ndevices = (int)context_.ndevices();
847 CV_Assert(ndevices > 0);
848 bool found = false;
849 for (int i = 0; i < ndevices; i++)
850 {
851 ocl::Device d = context_.device(i);
852 cl_device_id dhandle = (cl_device_id)d.ptr();
853 if (dhandle == deviceID)
854 {
855 device_ = i;
856 found = true;
857 break;
858 }
859 }
860 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
861 }
862
_init_devicecv::ocl::OpenCLExecutionContext::Impl863 void _init_device(const ocl::Device& device)
864 {
865 CV_Assert(device.ptr());
866 int ndevices = (int)context_.ndevices();
867 CV_Assert(ndevices > 0);
868 bool found = false;
869 for (int i = 0; i < ndevices; i++)
870 {
871 ocl::Device d = context_.device(i);
872 if (d.getImpl() == device.getImpl())
873 {
874 device_ = i;
875 found = true;
876 break;
877 }
878 }
879 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
880 }
881
882 public:
Implcv::ocl::OpenCLExecutionContext::Impl883 Impl(cl_platform_id platformID, cl_context context, cl_device_id deviceID)
884 : device_(0), useOpenCL_(-1)
885 {
886 CV_UNUSED(platformID);
887 CV_Assert(context);
888 CV_Assert(deviceID);
889
890 context_ = Context::fromHandle(context);
891 _init_device(deviceID);
892 queue_ = Queue(context_, context_.device(device_));
893 }
894
Implcv::ocl::OpenCLExecutionContext::Impl895 Impl(const ocl::Context& context, const ocl::Device& device, const ocl::Queue& queue)
896 : device_(0), useOpenCL_(-1)
897 {
898 CV_Assert(context.ptr());
899 CV_Assert(device.ptr());
900
901 context_ = context;
902 _init_device(device);
903 queue_ = queue;
904 }
905
Implcv::ocl::OpenCLExecutionContext::Impl906 Impl(const ocl::Context& context, const ocl::Device& device)
907 : device_(0), useOpenCL_(-1)
908 {
909 CV_Assert(context.ptr());
910 CV_Assert(device.ptr());
911
912 context_ = context;
913 _init_device(device);
914 queue_ = Queue(context_, context_.device(device_));
915 }
916
Implcv::ocl::OpenCLExecutionContext::Impl917 Impl(const ocl::Context& context, const int device, const ocl::Queue& queue)
918 : context_(context)
919 , device_(device)
920 , queue_(queue)
921 , useOpenCL_(-1)
922 {
923 // nothing
924 }
Implcv::ocl::OpenCLExecutionContext::Impl925 Impl(const Impl& other)
926 : context_(other.context_)
927 , device_(other.device_)
928 , queue_(other.queue_)
929 , useOpenCL_(-1)
930 {
931 // nothing
932 }
933
useOpenCLcv::ocl::OpenCLExecutionContext::Impl934 inline bool useOpenCL() const { return const_cast<Impl*>(this)->useOpenCL(); }
useOpenCLcv::ocl::OpenCLExecutionContext::Impl935 bool useOpenCL()
936 {
937 if (useOpenCL_ < 0)
938 {
939 try
940 {
941 useOpenCL_ = 0;
942 if (!context_.empty() && context_.ndevices() > 0)
943 {
944 const Device& d = context_.device(device_);
945 useOpenCL_ = d.available();
946 }
947 }
948 catch (const cv::Exception&)
949 {
950 // nothing
951 }
952 if (!useOpenCL_)
953 CV_LOG_INFO(NULL, "OpenCL: can't use OpenCL execution context");
954 }
955 return useOpenCL_ > 0;
956 }
957
setUseOpenCLcv::ocl::OpenCLExecutionContext::Impl958 void setUseOpenCL(bool flag)
959 {
960 if (!flag)
961 useOpenCL_ = 0;
962 else
963 useOpenCL_ = -1;
964 }
965
getInitializedExecutionContextcv::ocl::OpenCLExecutionContext::Impl966 static const std::shared_ptr<Impl>& getInitializedExecutionContext()
967 {
968 CV_TRACE_FUNCTION();
969
970 CV_LOG_INFO(NULL, "OpenCL: initializing thread execution context");
971
972 static bool initialized = false;
973 static std::shared_ptr<Impl> g_primaryExecutionContext;
974
975 if (!initialized)
976 {
977 cv::AutoLock lock(getInitializationMutex());
978 if (!initialized)
979 {
980 CV_LOG_INFO(NULL, "OpenCL: creating new execution context...");
981 try
982 {
983 Context c = ocl::Context::create(std::string());
984 if (c.ndevices())
985 {
986 int deviceId = 0;
987 auto& d = c.device(deviceId);
988 if (d.available())
989 {
990 auto q = ocl::Queue(c, d);
991 if (!q.ptr())
992 {
993 CV_LOG_ERROR(NULL, "OpenCL: Can't create default OpenCL queue");
994 }
995 else
996 {
997 g_primaryExecutionContext = std::make_shared<Impl>(c, deviceId, q);
998 CV_LOG_INFO(NULL, "OpenCL: device=" << d.name());
999 }
1000 }
1001 else
1002 {
1003 CV_LOG_ERROR(NULL, "OpenCL: OpenCL device is not available (CL_DEVICE_AVAILABLE returns false)");
1004 }
1005 }
1006 else
1007 {
1008 CV_LOG_INFO(NULL, "OpenCL: context is not available/disabled");
1009 }
1010 }
1011 catch (const std::exception& e)
1012 {
1013 CV_LOG_INFO(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: " << e.what());
1014 }
1015 catch (...)
1016 {
1017 CV_LOG_WARNING(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: unknown C++ exception");
1018 }
1019 initialized = true;
1020 }
1021 }
1022 return g_primaryExecutionContext;
1023 }
1024 };
1025
getContext() const1026 Context& OpenCLExecutionContext::getContext() const
1027 {
1028 CV_Assert(p);
1029 return p->context_;
1030 }
getDevice() const1031 Device& OpenCLExecutionContext::getDevice() const
1032 {
1033 CV_Assert(p);
1034 return p->context_.device(p->device_);
1035 }
getQueue() const1036 Queue& OpenCLExecutionContext::getQueue() const
1037 {
1038 CV_Assert(p);
1039 return p->queue_;
1040 }
1041
useOpenCL() const1042 bool OpenCLExecutionContext::useOpenCL() const
1043 {
1044 if (p)
1045 return p->useOpenCL();
1046 return false;
1047 }
setUseOpenCL(bool flag)1048 void OpenCLExecutionContext::setUseOpenCL(bool flag)
1049 {
1050 CV_Assert(p);
1051 p->setUseOpenCL(flag);
1052 }
1053
1054 /* static */
getCurrent()1055 OpenCLExecutionContext& OpenCLExecutionContext::getCurrent()
1056 {
1057 CV_TRACE_FUNCTION();
1058 CoreTLSData& data = getCoreTlsData();
1059 OpenCLExecutionContext& c = data.oclExecutionContext;
1060 if (!data.oclExecutionContextInitialized)
1061 {
1062 data.oclExecutionContextInitialized = true;
1063 if (c.empty() && haveOpenCL())
1064 c.p = Impl::getInitializedExecutionContext();
1065 }
1066 return c;
1067 }
1068
1069 /* static */
getCurrentRef()1070 OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef()
1071 {
1072 CV_TRACE_FUNCTION();
1073 CoreTLSData& data = getCoreTlsData();
1074 OpenCLExecutionContext& c = data.oclExecutionContext;
1075 return c;
1076 }
1077
bind() const1078 void OpenCLExecutionContext::bind() const
1079 {
1080 CV_TRACE_FUNCTION();
1081 CV_Assert(p);
1082 CoreTLSData& data = getCoreTlsData();
1083 data.oclExecutionContext = *this;
1084 data.oclExecutionContextInitialized = true;
1085 data.useOpenCL = p->useOpenCL_; // propagate "-1", avoid call useOpenCL()
1086 }
1087
1088
cloneWithNewQueue() const1089 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const
1090 {
1091 CV_TRACE_FUNCTION();
1092 CV_Assert(p);
1093 const Queue q(getContext(), getDevice());
1094 return cloneWithNewQueue(q);
1095 }
1096
cloneWithNewQueue(const ocl::Queue & q) const1097 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const
1098 {
1099 CV_TRACE_FUNCTION();
1100 CV_Assert(p);
1101 CV_Assert(q.ptr() != NULL);
1102 OpenCLExecutionContext c;
1103 c.p = std::make_shared<Impl>(p->context_, p->device_, q);
1104 return c;
1105 }
1106
1107 /* static */
create(const Context & context,const Device & device,const ocl::Queue & queue)1108 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue)
1109 {
1110 CV_TRACE_FUNCTION();
1111 if (!haveOpenCL())
1112 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1113
1114 CV_Assert(!context.empty());
1115 CV_Assert(context.ptr());
1116 CV_Assert(!device.empty());
1117 CV_Assert(device.ptr());
1118 OpenCLExecutionContext ctx;
1119 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device, queue);
1120 return ctx;
1121
1122 }
1123
1124 /* static */
create(const Context & context,const Device & device)1125 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device)
1126 {
1127 CV_TRACE_FUNCTION();
1128 if (!haveOpenCL())
1129 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1130
1131 CV_Assert(!context.empty());
1132 CV_Assert(context.ptr());
1133 CV_Assert(!device.empty());
1134 CV_Assert(device.ptr());
1135 OpenCLExecutionContext ctx;
1136 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device);
1137 return ctx;
1138
1139 }
1140
release()1141 void OpenCLExecutionContext::release()
1142 {
1143 CV_TRACE_FUNCTION();
1144 p.reset();
1145 }
1146
1147
1148
1149 // true if we have initialized OpenCL subsystem with available platforms
1150 static bool g_isOpenCLInitialized = false;
1151 static bool g_isOpenCLAvailable = false;
1152
haveOpenCL()1153 bool haveOpenCL()
1154 {
1155 CV_TRACE_FUNCTION();
1156
1157 if (!g_isOpenCLInitialized)
1158 {
1159 CV_TRACE_REGION("Init_OpenCL_Runtime");
1160 const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
1161 if (envPath)
1162 {
1163 if (cv::String(envPath) == "disabled")
1164 {
1165 g_isOpenCLAvailable = false;
1166 g_isOpenCLInitialized = true;
1167 return false;
1168 }
1169 }
1170
1171 cv::AutoLock lock(getInitializationMutex());
1172 CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
1173 try
1174 {
1175 cl_uint n = 0;
1176 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1177 g_isOpenCLAvailable &= n > 0;
1178 CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms");
1179 }
1180 catch (...)
1181 {
1182 g_isOpenCLAvailable = false;
1183 }
1184 g_isOpenCLInitialized = true;
1185 }
1186 return g_isOpenCLAvailable;
1187 }
1188
useOpenCL()1189 bool useOpenCL()
1190 {
1191 CoreTLSData& data = getCoreTlsData();
1192 if (data.useOpenCL < 0)
1193 {
1194 try
1195 {
1196 data.useOpenCL = 0;
1197 if (haveOpenCL())
1198 {
1199 auto c = OpenCLExecutionContext::getCurrent();
1200 data.useOpenCL = c.useOpenCL();
1201 }
1202 }
1203 catch (...)
1204 {
1205 CV_LOG_INFO(NULL, "OpenCL: can't initialize thread OpenCL execution context");
1206 }
1207 }
1208 return data.useOpenCL > 0;
1209 }
1210
isOpenCLActivated()1211 bool isOpenCLActivated()
1212 {
1213 if (!g_isOpenCLAvailable)
1214 return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
1215 return useOpenCL();
1216 }
1217
setUseOpenCL(bool flag)1218 void setUseOpenCL(bool flag)
1219 {
1220 CV_TRACE_FUNCTION();
1221
1222 CoreTLSData& data = getCoreTlsData();
1223 auto& c = OpenCLExecutionContext::getCurrentRef();
1224 if (!c.empty())
1225 {
1226 c.setUseOpenCL(flag);
1227 data.useOpenCL = c.useOpenCL();
1228 }
1229 else
1230 {
1231 if (!flag)
1232 data.useOpenCL = 0;
1233 else
1234 data.useOpenCL = -1; // enabled by default (if context is not initialized)
1235 }
1236 }
1237
1238
1239
1240 #ifdef HAVE_CLAMDBLAS
1241
1242 class AmdBlasHelper
1243 {
1244 public:
getInstance()1245 static AmdBlasHelper & getInstance()
1246 {
1247 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
1248 }
1249
isAvailable() const1250 bool isAvailable() const
1251 {
1252 return g_isAmdBlasAvailable;
1253 }
1254
~AmdBlasHelper()1255 ~AmdBlasHelper()
1256 {
1257 // Do not tear down clBLAS.
1258 // The user application may still use clBLAS even after OpenCV is unloaded.
1259 /*try
1260 {
1261 clblasTeardown();
1262 }
1263 catch (...) { }*/
1264 }
1265
1266 protected:
AmdBlasHelper()1267 AmdBlasHelper()
1268 {
1269 if (!g_isAmdBlasInitialized)
1270 {
1271 AutoLock lock(getInitializationMutex());
1272
1273 if (!g_isAmdBlasInitialized)
1274 {
1275 if (haveOpenCL())
1276 {
1277 try
1278 {
1279 g_isAmdBlasAvailable = clblasSetup() == clblasSuccess;
1280 }
1281 catch (...)
1282 {
1283 g_isAmdBlasAvailable = false;
1284 }
1285 }
1286 else
1287 g_isAmdBlasAvailable = false;
1288
1289 g_isAmdBlasInitialized = true;
1290 }
1291 }
1292 }
1293
1294 private:
1295 static bool g_isAmdBlasInitialized;
1296 static bool g_isAmdBlasAvailable;
1297 };
1298
1299 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1300 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1301
haveAmdBlas()1302 bool haveAmdBlas()
1303 {
1304 return AmdBlasHelper::getInstance().isAvailable();
1305 }
1306
1307 #else
1308
haveAmdBlas()1309 bool haveAmdBlas()
1310 {
1311 return false;
1312 }
1313
1314 #endif
1315
1316 #ifdef HAVE_CLAMDFFT
1317
1318 class AmdFftHelper
1319 {
1320 public:
getInstance()1321 static AmdFftHelper & getInstance()
1322 {
1323 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
1324 }
1325
isAvailable() const1326 bool isAvailable() const
1327 {
1328 return g_isAmdFftAvailable;
1329 }
1330
~AmdFftHelper()1331 ~AmdFftHelper()
1332 {
1333 // Do not tear down clFFT.
1334 // The user application may still use clFFT even after OpenCV is unloaded.
1335 /*try
1336 {
1337 clfftTeardown();
1338 }
1339 catch (...) { }*/
1340 }
1341
1342 protected:
AmdFftHelper()1343 AmdFftHelper()
1344 {
1345 if (!g_isAmdFftInitialized)
1346 {
1347 AutoLock lock(getInitializationMutex());
1348
1349 if (!g_isAmdFftInitialized)
1350 {
1351 if (haveOpenCL())
1352 {
1353 try
1354 {
1355 cl_uint major, minor, patch;
1356 CV_Assert(clfftInitSetupData(&setupData) == CLFFT_SUCCESS);
1357
1358 // it throws exception in case AmdFft binaries are not found
1359 CV_Assert(clfftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1360 g_isAmdFftAvailable = true;
1361 }
1362 catch (const Exception &)
1363 {
1364 g_isAmdFftAvailable = false;
1365 }
1366 }
1367 else
1368 g_isAmdFftAvailable = false;
1369
1370 g_isAmdFftInitialized = true;
1371 }
1372 }
1373 }
1374
1375 private:
1376 static clfftSetupData setupData;
1377 static bool g_isAmdFftInitialized;
1378 static bool g_isAmdFftAvailable;
1379 };
1380
1381 clfftSetupData AmdFftHelper::setupData;
1382 bool AmdFftHelper::g_isAmdFftAvailable = false;
1383 bool AmdFftHelper::g_isAmdFftInitialized = false;
1384
haveAmdFft()1385 bool haveAmdFft()
1386 {
1387 return AmdFftHelper::getInstance().isAvailable();
1388 }
1389
1390 #else
1391
haveAmdFft()1392 bool haveAmdFft()
1393 {
1394 return false;
1395 }
1396
1397 #endif
1398
haveSVM()1399 bool haveSVM()
1400 {
1401 #ifdef HAVE_OPENCL_SVM
1402 return true;
1403 #else
1404 return false;
1405 #endif
1406 }
1407
finish()1408 void finish()
1409 {
1410 Queue::getDefault().finish();
1411 }
1412
1413 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1414
1415 struct Platform::Impl
1416 {
Implcv::ocl::Platform::Impl1417 Impl()
1418 {
1419 refcount = 1;
1420 handle = 0;
1421 initialized = false;
1422 }
1423
~Implcv::ocl::Platform::Impl1424 ~Impl() {}
1425
initcv::ocl::Platform::Impl1426 void init()
1427 {
1428 if( !initialized )
1429 {
1430 //cl_uint num_entries
1431 cl_uint n = 0;
1432 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1433 handle = 0;
1434 if( handle != 0 )
1435 {
1436 char buf[1000];
1437 size_t len = 0;
1438 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1439 buf[len] = '\0';
1440 vendor = String(buf);
1441 }
1442
1443 initialized = true;
1444 }
1445 }
1446
1447 IMPLEMENT_REFCOUNTABLE();
1448
1449 cl_platform_id handle;
1450 String vendor;
1451 bool initialized;
1452 };
1453
Platform()1454 Platform::Platform() CV_NOEXCEPT
1455 {
1456 p = 0;
1457 }
1458
~Platform()1459 Platform::~Platform()
1460 {
1461 if(p)
1462 p->release();
1463 }
1464
Platform(const Platform & pl)1465 Platform::Platform(const Platform& pl)
1466 {
1467 p = (Impl*)pl.p;
1468 if(p)
1469 p->addref();
1470 }
1471
operator =(const Platform & pl)1472 Platform& Platform::operator = (const Platform& pl)
1473 {
1474 Impl* newp = (Impl*)pl.p;
1475 if(newp)
1476 newp->addref();
1477 if(p)
1478 p->release();
1479 p = newp;
1480 return *this;
1481 }
1482
Platform(Platform && pl)1483 Platform::Platform(Platform&& pl) CV_NOEXCEPT
1484 {
1485 p = pl.p;
1486 pl.p = nullptr;
1487 }
1488
operator =(Platform && pl)1489 Platform& Platform::operator = (Platform&& pl) CV_NOEXCEPT
1490 {
1491 if (this != &pl) {
1492 if(p)
1493 p->release();
1494 p = pl.p;
1495 pl.p = nullptr;
1496 }
1497 return *this;
1498 }
1499
ptr() const1500 void* Platform::ptr() const
1501 {
1502 return p ? p->handle : 0;
1503 }
1504
getDefault()1505 Platform& Platform::getDefault()
1506 {
1507 CV_LOG_ONCE_WARNING(NULL, "OpenCL: Platform::getDefault() is deprecated and will be removed. Use cv::ocl::getPlatfomsInfo() for enumeration of available platforms");
1508 static Platform p;
1509 if( !p.p )
1510 {
1511 p.p = new Impl;
1512 p.p->init();
1513 }
1514 return p;
1515 }
1516
1517 /////////////////////////////////////// Device ////////////////////////////////////////////
1518
1519 // Version has format:
1520 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1521 // by specification
1522 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1523 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1524 // https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetPlatformInfo.html
1525 // https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetPlatformInfo.html
parseOpenCLVersion(const String & version,int & major,int & minor)1526 static void parseOpenCLVersion(const String &version, int &major, int &minor)
1527 {
1528 major = minor = 0;
1529 if (10 >= version.length())
1530 return;
1531 const char *pstr = version.c_str();
1532 if (0 != strncmp(pstr, "OpenCL ", 7))
1533 return;
1534 size_t ppos = version.find('.', 7);
1535 if (String::npos == ppos)
1536 return;
1537 String temp = version.substr(7, ppos - 7);
1538 major = atoi(temp.c_str());
1539 temp = version.substr(ppos + 1);
1540 minor = atoi(temp.c_str());
1541 }
1542
1543 struct Device::Impl
1544 {
Implcv::ocl::Device::Impl1545 Impl(void* d)
1546 : refcount(1)
1547 , handle(0)
1548 {
1549 try
1550 {
1551 cl_device_id device = (cl_device_id)d;
1552 _init(device);
1553 CV_OCL_CHECK(clRetainDevice(device)); // increment reference counter on success only
1554 }
1555 catch (...)
1556 {
1557 throw;
1558 }
1559 }
1560
_initcv::ocl::Device::Impl1561 void _init(cl_device_id d)
1562 {
1563 handle = (cl_device_id)d;
1564
1565 name_ = getStrProp(CL_DEVICE_NAME);
1566 version_ = getStrProp(CL_DEVICE_VERSION);
1567 extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1568 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1569 halfFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG);
1570 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1571 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1572 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1573 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1574 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1575 addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1576
1577 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1578 parseOpenCLVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1579
1580 size_t pos = 0;
1581 while (pos < extensions_.size())
1582 {
1583 size_t pos2 = extensions_.find(' ', pos);
1584 if (pos2 == String::npos)
1585 pos2 = extensions_.size();
1586 if (pos2 > pos)
1587 {
1588 std::string extensionName = extensions_.substr(pos, pos2 - pos);
1589 extensions_set_.insert(extensionName);
1590 }
1591 pos = pos2 + 1;
1592 }
1593
1594 intelSubgroupsSupport_ = isExtensionSupported("cl_intel_subgroups");
1595
1596 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1597 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1598 vendorName_ == "AMD")
1599 vendorID_ = VENDOR_AMD;
1600 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1601 vendorID_ = VENDOR_INTEL;
1602 else if (vendorName_ == "NVIDIA Corporation")
1603 vendorID_ = VENDOR_NVIDIA;
1604 else
1605 vendorID_ = UNKNOWN_VENDOR;
1606
1607 const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", 0);
1608 if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1609 {
1610 const size_t new_maxWorkGroupSize = std::min(maxWorkGroupSize_, CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1611 if (new_maxWorkGroupSize != maxWorkGroupSize_)
1612 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1613 maxWorkGroupSize_ = new_maxWorkGroupSize;
1614 }
1615 #if 0
1616 if (isExtensionSupported("cl_khr_spir"))
1617 {
1618 #ifndef CL_DEVICE_SPIR_VERSIONS
1619 #define CL_DEVICE_SPIR_VERSIONS 0x40E0
1620 #endif
1621 cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1622 std::cout << spir_versions << std::endl;
1623 }
1624 #endif
1625 }
1626
~Implcv::ocl::Device::Impl1627 ~Impl()
1628 {
1629 #ifdef _WIN32
1630 if (!cv::__termination)
1631 #endif
1632 {
1633 if (handle)
1634 {
1635 CV_OCL_CHECK(clReleaseDevice(handle));
1636 handle = 0;
1637 }
1638 }
1639 }
1640
1641 template<typename _TpCL, typename _TpOut>
getPropcv::ocl::Device::Impl1642 _TpOut getProp(cl_device_info prop) const
1643 {
1644 _TpCL temp=_TpCL();
1645 size_t sz = 0;
1646
1647 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1648 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1649 }
1650
getBoolPropcv::ocl::Device::Impl1651 bool getBoolProp(cl_device_info prop) const
1652 {
1653 cl_bool temp = CL_FALSE;
1654 size_t sz = 0;
1655
1656 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1657 sz == sizeof(temp) ? temp != 0 : false;
1658 }
1659
getStrPropcv::ocl::Device::Impl1660 String getStrProp(cl_device_info prop) const
1661 {
1662 char buf[4096];
1663 size_t sz=0;
1664 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1665 sz < sizeof(buf) ? String(buf) : String();
1666 }
1667
isExtensionSupportedcv::ocl::Device::Impl1668 bool isExtensionSupported(const std::string& extensionName) const
1669 {
1670 return extensions_set_.count(extensionName) > 0;
1671 }
1672
1673
1674 IMPLEMENT_REFCOUNTABLE();
1675
1676 cl_device_id handle;
1677
1678 String name_;
1679 String version_;
1680 std::string extensions_;
1681 int doubleFPConfig_;
1682 int halfFPConfig_;
1683 bool hostUnifiedMemory_;
1684 int maxComputeUnits_;
1685 size_t maxWorkGroupSize_;
1686 int type_;
1687 int addressBits_;
1688 int deviceVersionMajor_;
1689 int deviceVersionMinor_;
1690 String driverVersion_;
1691 String vendorName_;
1692 int vendorID_;
1693 bool intelSubgroupsSupport_;
1694
1695 std::set<std::string> extensions_set_;
1696 };
1697
1698
Device()1699 Device::Device() CV_NOEXCEPT
1700 {
1701 p = 0;
1702 }
1703
Device(void * d)1704 Device::Device(void* d)
1705 {
1706 p = 0;
1707 set(d);
1708 }
1709
Device(const Device & d)1710 Device::Device(const Device& d)
1711 {
1712 p = d.p;
1713 if(p)
1714 p->addref();
1715 }
1716
operator =(const Device & d)1717 Device& Device::operator = (const Device& d)
1718 {
1719 Impl* newp = (Impl*)d.p;
1720 if(newp)
1721 newp->addref();
1722 if(p)
1723 p->release();
1724 p = newp;
1725 return *this;
1726 }
1727
Device(Device && d)1728 Device::Device(Device&& d) CV_NOEXCEPT
1729 {
1730 p = d.p;
1731 d.p = nullptr;
1732 }
1733
operator =(Device && d)1734 Device& Device::operator = (Device&& d) CV_NOEXCEPT
1735 {
1736 if (this != &d) {
1737 if(p)
1738 p->release();
1739 p = d.p;
1740 d.p = nullptr;
1741 }
1742 return *this;
1743 }
1744
~Device()1745 Device::~Device()
1746 {
1747 if(p)
1748 p->release();
1749 }
1750
set(void * d)1751 void Device::set(void* d)
1752 {
1753 if(p)
1754 p->release();
1755 p = new Impl(d);
1756 if (p->handle)
1757 {
1758 CV_OCL_CHECK(clReleaseDevice((cl_device_id)d));
1759 }
1760 }
1761
fromHandle(void * d)1762 Device Device::fromHandle(void* d)
1763 {
1764 Device device(d);
1765 return device;
1766 }
1767
ptr() const1768 void* Device::ptr() const
1769 {
1770 return p ? p->handle : 0;
1771 }
1772
name() const1773 String Device::name() const
1774 { return p ? p->name_ : String(); }
1775
extensions() const1776 String Device::extensions() const
1777 { return p ? String(p->extensions_) : String(); }
1778
isExtensionSupported(const String & extensionName) const1779 bool Device::isExtensionSupported(const String& extensionName) const
1780 { return p ? p->isExtensionSupported(extensionName) : false; }
1781
version() const1782 String Device::version() const
1783 { return p ? p->version_ : String(); }
1784
vendorName() const1785 String Device::vendorName() const
1786 { return p ? p->vendorName_ : String(); }
1787
vendorID() const1788 int Device::vendorID() const
1789 { return p ? p->vendorID_ : 0; }
1790
OpenCL_C_Version() const1791 String Device::OpenCL_C_Version() const
1792 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1793
OpenCLVersion() const1794 String Device::OpenCLVersion() const
1795 { return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1796
deviceVersionMajor() const1797 int Device::deviceVersionMajor() const
1798 { return p ? p->deviceVersionMajor_ : 0; }
1799
deviceVersionMinor() const1800 int Device::deviceVersionMinor() const
1801 { return p ? p->deviceVersionMinor_ : 0; }
1802
driverVersion() const1803 String Device::driverVersion() const
1804 { return p ? p->driverVersion_ : String(); }
1805
type() const1806 int Device::type() const
1807 { return p ? p->type_ : 0; }
1808
addressBits() const1809 int Device::addressBits() const
1810 { return p ? p->addressBits_ : 0; }
1811
available() const1812 bool Device::available() const
1813 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1814
compilerAvailable() const1815 bool Device::compilerAvailable() const
1816 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1817
linkerAvailable() const1818 bool Device::linkerAvailable() const
1819 #ifdef CL_VERSION_1_2
1820 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1821 #else
1822 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1823 #endif
1824
doubleFPConfig() const1825 int Device::doubleFPConfig() const
1826 { return p ? p->doubleFPConfig_ : 0; }
1827
singleFPConfig() const1828 int Device::singleFPConfig() const
1829 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1830
halfFPConfig() const1831 int Device::halfFPConfig() const
1832 { return p ? p->halfFPConfig_ : 0; }
1833
endianLittle() const1834 bool Device::endianLittle() const
1835 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1836
errorCorrectionSupport() const1837 bool Device::errorCorrectionSupport() const
1838 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1839
executionCapabilities() const1840 int Device::executionCapabilities() const
1841 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1842
globalMemCacheSize() const1843 size_t Device::globalMemCacheSize() const
1844 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1845
globalMemCacheType() const1846 int Device::globalMemCacheType() const
1847 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1848
globalMemCacheLineSize() const1849 int Device::globalMemCacheLineSize() const
1850 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1851
globalMemSize() const1852 size_t Device::globalMemSize() const
1853 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1854
localMemSize() const1855 size_t Device::localMemSize() const
1856 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1857
localMemType() const1858 int Device::localMemType() const
1859 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1860
hostUnifiedMemory() const1861 bool Device::hostUnifiedMemory() const
1862 { return p ? p->hostUnifiedMemory_ : false; }
1863
imageSupport() const1864 bool Device::imageSupport() const
1865 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1866
imageFromBufferSupport() const1867 bool Device::imageFromBufferSupport() const
1868 {
1869 return p ? p->isExtensionSupported("cl_khr_image2d_from_buffer") : false;
1870 }
1871
imagePitchAlignment() const1872 uint Device::imagePitchAlignment() const
1873 {
1874 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1875 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1876 #else
1877 return 0;
1878 #endif
1879 }
1880
imageBaseAddressAlignment() const1881 uint Device::imageBaseAddressAlignment() const
1882 {
1883 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1884 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1885 #else
1886 return 0;
1887 #endif
1888 }
1889
image2DMaxWidth() const1890 size_t Device::image2DMaxWidth() const
1891 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1892
image2DMaxHeight() const1893 size_t Device::image2DMaxHeight() const
1894 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1895
image3DMaxWidth() const1896 size_t Device::image3DMaxWidth() const
1897 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1898
image3DMaxHeight() const1899 size_t Device::image3DMaxHeight() const
1900 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1901
image3DMaxDepth() const1902 size_t Device::image3DMaxDepth() const
1903 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1904
imageMaxBufferSize() const1905 size_t Device::imageMaxBufferSize() const
1906 #ifdef CL_VERSION_1_2
1907 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1908 #else
1909 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1910 #endif
1911
imageMaxArraySize() const1912 size_t Device::imageMaxArraySize() const
1913 #ifdef CL_VERSION_1_2
1914 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1915 #else
1916 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1917 #endif
1918
intelSubgroupsSupport() const1919 bool Device::intelSubgroupsSupport() const
1920 { return p ? p->intelSubgroupsSupport_ : false; }
1921
maxClockFrequency() const1922 int Device::maxClockFrequency() const
1923 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1924
maxComputeUnits() const1925 int Device::maxComputeUnits() const
1926 { return p ? p->maxComputeUnits_ : 0; }
1927
maxConstantArgs() const1928 int Device::maxConstantArgs() const
1929 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1930
maxConstantBufferSize() const1931 size_t Device::maxConstantBufferSize() const
1932 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1933
maxMemAllocSize() const1934 size_t Device::maxMemAllocSize() const
1935 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1936
maxParameterSize() const1937 size_t Device::maxParameterSize() const
1938 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1939
maxReadImageArgs() const1940 int Device::maxReadImageArgs() const
1941 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1942
maxWriteImageArgs() const1943 int Device::maxWriteImageArgs() const
1944 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1945
maxSamplers() const1946 int Device::maxSamplers() const
1947 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1948
maxWorkGroupSize() const1949 size_t Device::maxWorkGroupSize() const
1950 { return p ? p->maxWorkGroupSize_ : 0; }
1951
maxWorkItemDims() const1952 int Device::maxWorkItemDims() const
1953 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1954
maxWorkItemSizes(size_t * sizes) const1955 void Device::maxWorkItemSizes(size_t* sizes) const
1956 {
1957 if(p)
1958 {
1959 const int MAX_DIMS = 32;
1960 size_t retsz = 0;
1961 CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1962 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1963 }
1964 }
1965
memBaseAddrAlign() const1966 int Device::memBaseAddrAlign() const
1967 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1968
nativeVectorWidthChar() const1969 int Device::nativeVectorWidthChar() const
1970 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1971
nativeVectorWidthShort() const1972 int Device::nativeVectorWidthShort() const
1973 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1974
nativeVectorWidthInt() const1975 int Device::nativeVectorWidthInt() const
1976 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
1977
nativeVectorWidthLong() const1978 int Device::nativeVectorWidthLong() const
1979 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
1980
nativeVectorWidthFloat() const1981 int Device::nativeVectorWidthFloat() const
1982 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
1983
nativeVectorWidthDouble() const1984 int Device::nativeVectorWidthDouble() const
1985 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
1986
nativeVectorWidthHalf() const1987 int Device::nativeVectorWidthHalf() const
1988 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
1989
preferredVectorWidthChar() const1990 int Device::preferredVectorWidthChar() const
1991 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
1992
preferredVectorWidthShort() const1993 int Device::preferredVectorWidthShort() const
1994 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
1995
preferredVectorWidthInt() const1996 int Device::preferredVectorWidthInt() const
1997 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
1998
preferredVectorWidthLong() const1999 int Device::preferredVectorWidthLong() const
2000 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
2001
preferredVectorWidthFloat() const2002 int Device::preferredVectorWidthFloat() const
2003 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
2004
preferredVectorWidthDouble() const2005 int Device::preferredVectorWidthDouble() const
2006 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
2007
preferredVectorWidthHalf() const2008 int Device::preferredVectorWidthHalf() const
2009 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
2010
printfBufferSize() const2011 size_t Device::printfBufferSize() const
2012 #ifdef CL_VERSION_1_2
2013 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
2014 #else
2015 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2016 #endif
2017
2018
profilingTimerResolution() const2019 size_t Device::profilingTimerResolution() const
2020 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
2021
getDefault()2022 const Device& Device::getDefault()
2023 {
2024 auto& c = OpenCLExecutionContext::getCurrent();
2025 if (!c.empty())
2026 {
2027 return c.getDevice();
2028 }
2029
2030 static Device dummy;
2031 return dummy;
2032 }
2033
2034 ////////////////////////////////////// Context ///////////////////////////////////////////////////
2035
2036 template <typename Functor, typename ObjectType>
getStringInfo(Functor f,ObjectType obj,cl_uint name,std::string & param)2037 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2038 {
2039 ::size_t required;
2040 cl_int err = f(obj, name, 0, NULL, &required);
2041 if (err != CL_SUCCESS)
2042 return err;
2043
2044 param.clear();
2045 if (required > 0)
2046 {
2047 AutoBuffer<char> buf(required + 1);
2048 char* ptr = buf.data(); // cleanup is not needed
2049 err = f(obj, name, required, ptr, NULL);
2050 if (err != CL_SUCCESS)
2051 return err;
2052 param = ptr;
2053 }
2054
2055 return CL_SUCCESS;
2056 }
2057
split(const std::string & s,char delim,std::vector<std::string> & elems)2058 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2059 {
2060 elems.clear();
2061 if (s.size() == 0)
2062 return;
2063 std::istringstream ss(s);
2064 std::string item;
2065 while (!ss.eof())
2066 {
2067 std::getline(ss, item, delim);
2068 elems.push_back(item);
2069 }
2070 }
2071
2072 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2073 // Sample: AMD:GPU:
2074 // Sample: AMD:GPU:Tahiti
2075 // Sample: :GPU|CPU: = '' = ':' = '::'
parseOpenCLDeviceConfiguration(const std::string & configurationStr,std::string & platform,std::vector<std::string> & deviceTypes,std::string & deviceNameOrID)2076 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2077 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2078 {
2079 std::vector<std::string> parts;
2080 split(configurationStr, ':', parts);
2081 if (parts.size() > 3)
2082 {
2083 CV_LOG_ERROR(NULL, "OpenCL: Invalid configuration string for OpenCL device: " << configurationStr);
2084 return false;
2085 }
2086 if (parts.size() > 2)
2087 deviceNameOrID = parts[2];
2088 if (parts.size() > 1)
2089 {
2090 split(parts[1], '|', deviceTypes);
2091 }
2092 if (parts.size() > 0)
2093 {
2094 platform = parts[0];
2095 }
2096 return true;
2097 }
2098
2099 #if defined WINRT || defined _WIN32_WCE
selectOpenCLDevice(const char * configuration=NULL)2100 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2101 {
2102 CV_UNUSED(configuration)
2103 return NULL;
2104 }
2105 #else
selectOpenCLDevice(const char * configuration=NULL)2106 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2107 {
2108 std::string platform, deviceName;
2109 std::vector<std::string> deviceTypes;
2110
2111 if (!configuration)
2112 configuration = getenv("OPENCV_OPENCL_DEVICE");
2113
2114 if (configuration &&
2115 (strcmp(configuration, "disabled") == 0 ||
2116 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
2117 ))
2118 return NULL;
2119
2120 bool isID = false;
2121 int deviceID = -1;
2122 if (deviceName.length() == 1)
2123 // We limit ID range to 0..9, because we want to write:
2124 // - '2500' to mean i5-2500
2125 // - '8350' to mean AMD FX-8350
2126 // - '650' to mean GeForce 650
2127 // To extend ID range change condition to '> 0'
2128 {
2129 isID = true;
2130 for (size_t i = 0; i < deviceName.length(); i++)
2131 {
2132 if (!isdigit(deviceName[i]))
2133 {
2134 isID = false;
2135 break;
2136 }
2137 }
2138 if (isID)
2139 {
2140 deviceID = atoi(deviceName.c_str());
2141 if (deviceID < 0)
2142 return NULL;
2143 }
2144 }
2145
2146 std::vector<cl_platform_id> platforms;
2147 {
2148 cl_uint numPlatforms = 0;
2149 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
2150
2151 if (numPlatforms == 0)
2152 return NULL;
2153 platforms.resize((size_t)numPlatforms);
2154 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
2155 platforms.resize(numPlatforms);
2156 }
2157
2158 int selectedPlatform = -1;
2159 if (platform.length() > 0)
2160 {
2161 for (size_t i = 0; i < platforms.size(); i++)
2162 {
2163 std::string name;
2164 CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name));
2165 if (name.find(platform) != std::string::npos)
2166 {
2167 selectedPlatform = (int)i;
2168 break;
2169 }
2170 }
2171 if (selectedPlatform == -1)
2172 {
2173 CV_LOG_ERROR(NULL, "OpenCL: Can't find OpenCL platform by name: " << platform);
2174 goto not_found;
2175 }
2176 }
2177 if (deviceTypes.size() == 0)
2178 {
2179 if (!isID)
2180 {
2181 deviceTypes.push_back("GPU");
2182 if (configuration)
2183 deviceTypes.push_back("CPU");
2184 }
2185 else
2186 deviceTypes.push_back("ALL");
2187 }
2188 for (size_t t = 0; t < deviceTypes.size(); t++)
2189 {
2190 int deviceType = 0;
2191 std::string tempStrDeviceType = deviceTypes[t];
2192 std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), details::char_tolower);
2193
2194 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2195 deviceType = Device::TYPE_GPU;
2196 else if (tempStrDeviceType == "cpu")
2197 deviceType = Device::TYPE_CPU;
2198 else if (tempStrDeviceType == "accelerator")
2199 deviceType = Device::TYPE_ACCELERATOR;
2200 else if (tempStrDeviceType == "all")
2201 deviceType = Device::TYPE_ALL;
2202 else
2203 {
2204 CV_LOG_ERROR(NULL, "OpenCL: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t]);
2205 goto not_found;
2206 }
2207
2208 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2209 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2210 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2211 i++)
2212 {
2213 cl_uint count = 0;
2214 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2215 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2216 {
2217 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
2218 }
2219 if (count == 0)
2220 continue;
2221 size_t base = devices.size();
2222 devices.resize(base + count);
2223 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2224 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2225 {
2226 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
2227 }
2228 }
2229
2230 for (size_t i = (isID ? deviceID : 0);
2231 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2232 i++)
2233 {
2234 std::string name;
2235 CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
2236 cl_bool useGPU = true;
2237 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2238 {
2239 cl_bool isIGPU = CL_FALSE;
2240 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
2241 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2242 }
2243 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2244 {
2245 // TODO check for OpenCL 1.1
2246 return devices[i];
2247 }
2248 }
2249 }
2250
2251 not_found:
2252 if (!configuration)
2253 return NULL; // suppress messages on stderr
2254
2255 std::ostringstream msg;
2256 msg << "ERROR: Requested OpenCL device not found, check configuration: '" << configuration << "'" << std::endl
2257 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2258 << " Device types:";
2259 for (size_t t = 0; t < deviceTypes.size(); t++)
2260 msg << ' ' << deviceTypes[t];
2261
2262 msg << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName);
2263
2264 CV_LOG_ERROR(NULL, msg.str());
2265 return NULL;
2266 }
2267 #endif
2268
2269 #ifdef HAVE_OPENCL_SVM
2270 namespace svm {
2271
2272 enum AllocatorFlags { // don't use first 16 bits
2273 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
2274 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
2275 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
2276 OPENCL_SVM_BUFFER_MASK = 3 << 16,
2277 OPENCL_SVM_BUFFER_MAP = 4 << 16
2278 };
2279
checkForceSVMUmatUsage()2280 static bool checkForceSVMUmatUsage()
2281 {
2282 static bool initialized = false;
2283 static bool force = false;
2284 if (!initialized)
2285 {
2286 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
2287 initialized = true;
2288 }
2289 return force;
2290 }
checkDisableSVMUMatUsage()2291 static bool checkDisableSVMUMatUsage()
2292 {
2293 static bool initialized = false;
2294 static bool force = false;
2295 if (!initialized)
2296 {
2297 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
2298 initialized = true;
2299 }
2300 return force;
2301 }
checkDisableSVM()2302 static bool checkDisableSVM()
2303 {
2304 static bool initialized = false;
2305 static bool force = false;
2306 if (!initialized)
2307 {
2308 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
2309 initialized = true;
2310 }
2311 return force;
2312 }
2313 // see SVMCapabilities
getSVMCapabilitiesMask()2314 static unsigned int getSVMCapabilitiesMask()
2315 {
2316 static bool initialized = false;
2317 static unsigned int mask = 0;
2318 if (!initialized)
2319 {
2320 const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
2321 if (envValue == NULL)
2322 {
2323 return ~0U; // all bits 1
2324 }
2325 mask = atoi(envValue);
2326 initialized = true;
2327 }
2328 return mask;
2329 }
2330 } // namespace
2331 #endif
2332
getProgramCountLimit()2333 static size_t getProgramCountLimit()
2334 {
2335 static bool initialized = false;
2336 static size_t count = 0;
2337 if (!initialized)
2338 {
2339 count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
2340 initialized = true;
2341 }
2342 return count;
2343 }
2344
2345 static int g_contextId = 0;
2346
2347 class OpenCLBufferPoolImpl;
2348 class OpenCLSVMBufferPoolImpl;
2349
2350 struct Context::Impl
2351 {
getcv::ocl::Context::Impl2352 static Context::Impl* get(Context& context) { return context.p; }
2353
2354 typedef std::deque<Context::Impl*> container_t;
getGlobalContainercv::ocl::Context::Impl2355 static container_t& getGlobalContainer()
2356 {
2357 // never delete this container (Impl lifetime is greater due to TLS storage)
2358 static container_t* g_contexts = new container_t();
2359 return *g_contexts;
2360 }
2361
2362 protected:
Implcv::ocl::Context::Impl2363 Impl(const std::string& configuration_)
2364 : refcount(1)
2365 , contextId(CV_XADD(&g_contextId, 1))
2366 , configuration(configuration_)
2367 , handle(0)
2368 #ifdef HAVE_OPENCL_SVM
2369 , svmInitialized(false)
2370 #endif
2371 {
2372 if (!haveOpenCL())
2373 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
2374
2375 cv::AutoLock lock(cv::getInitializationMutex());
2376 auto& container = getGlobalContainer();
2377 container.resize(std::max(container.size(), (size_t)contextId + 1));
2378 container[contextId] = this;
2379 }
2380
~Implcv::ocl::Context::Impl2381 ~Impl()
2382 {
2383 #ifdef _WIN32
2384 if (!cv::__termination)
2385 #endif
2386 {
2387 if (handle)
2388 {
2389 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2390 handle = NULL;
2391 }
2392 devices.clear();
2393 }
2394
2395 userContextStorage.clear();
2396
2397 {
2398 cv::AutoLock lock(cv::getInitializationMutex());
2399 auto& container = getGlobalContainer();
2400 CV_CheckLT((size_t)contextId, container.size(), "");
2401 container[contextId] = NULL;
2402 }
2403 }
2404
init_device_listcv::ocl::Context::Impl2405 void init_device_list()
2406 {
2407 CV_Assert(handle);
2408
2409 cl_uint ndevices = 0;
2410 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_NUM_DEVICES, sizeof(ndevices), &ndevices, NULL));
2411 CV_Assert(ndevices > 0);
2412
2413 cv::AutoBuffer<cl_device_id> cl_devices(ndevices);
2414 size_t devices_ret_size = 0;
2415 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_DEVICES, cl_devices.size() * sizeof(cl_device_id), &cl_devices[0], &devices_ret_size));
2416 CV_CheckEQ(devices_ret_size, cl_devices.size() * sizeof(cl_device_id), "");
2417
2418 devices.clear();
2419 for (unsigned i = 0; i < ndevices; i++)
2420 {
2421 devices.emplace_back(Device::fromHandle(cl_devices[i]));
2422 }
2423 }
2424
2425 void __init_buffer_pools(); // w/o synchronization
_init_buffer_poolscv::ocl::Context::Impl2426 void _init_buffer_pools() const
2427 {
2428 if (!bufferPool_)
2429 {
2430 cv::AutoLock lock(cv::getInitializationMutex());
2431 if (!bufferPool_)
2432 {
2433 const_cast<Impl*>(this)->__init_buffer_pools();
2434 }
2435 }
2436 }
2437 public:
findContextcv::ocl::Context::Impl2438 static Impl* findContext(const std::string& configuration)
2439 {
2440 CV_TRACE_FUNCTION();
2441 cv::AutoLock lock(cv::getInitializationMutex());
2442 auto& container = getGlobalContainer();
2443 if (configuration.empty() && !container.empty())
2444 return container[0];
2445 for (auto it = container.begin(); it != container.end(); ++it)
2446 {
2447 Impl* i = *it;
2448 if (i && i->configuration == configuration)
2449 {
2450 return i;
2451 }
2452 }
2453 return NULL;
2454 }
2455
findOrCreateContextcv::ocl::Context::Impl2456 static Impl* findOrCreateContext(const std::string& configuration_)
2457 {
2458 CV_TRACE_FUNCTION();
2459 std::string configuration = configuration_;
2460 if (configuration_.empty())
2461 {
2462 const char* c = getenv("OPENCV_OPENCL_DEVICE");
2463 if (c)
2464 configuration = c;
2465 }
2466 Impl* impl = findContext(configuration);
2467 if (impl)
2468 {
2469 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2470 impl->addref();
2471 return impl;
2472 }
2473
2474 cl_device_id d = selectOpenCLDevice(configuration.empty() ? NULL : configuration.c_str());
2475 if (d == NULL)
2476 return NULL;
2477
2478 impl = new Impl(configuration);
2479 try
2480 {
2481 impl->createFromDevice(d);
2482 if (impl->handle)
2483 return impl;
2484 delete impl;
2485 return NULL;
2486 }
2487 catch (...)
2488 {
2489 delete impl;
2490 throw;
2491 }
2492 }
2493
findOrCreateContextcv::ocl::Context::Impl2494 static Impl* findOrCreateContext(cl_context h)
2495 {
2496 CV_TRACE_FUNCTION();
2497
2498 CV_Assert(h);
2499
2500 std::string configuration = cv::format("@ctx-%p", (void*)h);
2501 Impl* impl = findContext(configuration);
2502 if (impl)
2503 {
2504 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2505 impl->addref();
2506 return impl;
2507 }
2508
2509 impl = new Impl(configuration);
2510 try
2511 {
2512 CV_OCL_CHECK(clRetainContext(h));
2513 impl->handle = h;
2514 impl->init_device_list();
2515 return impl;
2516 }
2517 catch (...)
2518 {
2519 delete impl;
2520 throw;
2521 }
2522 }
2523
findOrCreateContextcv::ocl::Context::Impl2524 static Impl* findOrCreateContext(const ocl::Device& device)
2525 {
2526 CV_TRACE_FUNCTION();
2527
2528 CV_Assert(!device.empty());
2529 cl_device_id d = (cl_device_id)device.ptr();
2530 CV_Assert(d);
2531
2532 std::string configuration = cv::format("@dev-%p", (void*)d);
2533 Impl* impl = findContext(configuration);
2534 if (impl)
2535 {
2536 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2537 impl->addref();
2538 return impl;
2539 }
2540
2541 impl = new Impl(configuration);
2542 try
2543 {
2544 impl->createFromDevice(d);
2545 CV_Assert(impl->handle);
2546 return impl;
2547 }
2548 catch (...)
2549 {
2550 delete impl;
2551 throw;
2552 }
2553 }
2554
setDefaultcv::ocl::Context::Impl2555 void setDefault()
2556 {
2557 CV_TRACE_FUNCTION();
2558 cl_device_id d = selectOpenCLDevice();
2559
2560 if (d == NULL)
2561 return;
2562
2563 createFromDevice(d);
2564 }
2565
createFromDevicecv::ocl::Context::Impl2566 void createFromDevice(cl_device_id d)
2567 {
2568 CV_TRACE_FUNCTION();
2569 CV_Assert(handle == NULL);
2570
2571 cl_platform_id pl = NULL;
2572 CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
2573
2574 cl_context_properties prop[] =
2575 {
2576 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2577 0
2578 };
2579
2580 // !!! in the current implementation force the number of devices to 1 !!!
2581 cl_uint nd = 1;
2582 cl_int status;
2583
2584 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2585 CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
2586
2587 bool ok = handle != 0 && status == CL_SUCCESS;
2588 if( ok )
2589 {
2590 devices.resize(nd);
2591 devices[0].set(d);
2592 }
2593 else
2594 handle = NULL;
2595 }
2596
2597 Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2598
unloadProgcv::ocl::Context::Impl2599 void unloadProg(Program& prog)
2600 {
2601 cv::AutoLock lock(program_cache_mutex);
2602 for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2603 {
2604 phash_t::iterator it = phash.find(*i);
2605 if (it != phash.end())
2606 {
2607 if (it->second.ptr() == prog.ptr())
2608 {
2609 phash.erase(*i);
2610 cacheList.erase(i);
2611 return;
2612 }
2613 }
2614 }
2615 }
2616
getPrefixStringcv::ocl::Context::Impl2617 std::string& getPrefixString()
2618 {
2619 if (prefix.empty())
2620 {
2621 cv::AutoLock lock(program_cache_mutex);
2622 if (prefix.empty())
2623 {
2624 CV_Assert(!devices.empty());
2625 const Device& d = devices[0];
2626 int bits = d.addressBits();
2627 if (bits > 0 && bits != 64)
2628 prefix = cv::format("%d-bit--", bits);
2629 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2630 // sanitize chars
2631 for (size_t i = 0; i < prefix.size(); i++)
2632 {
2633 char c = prefix[i];
2634 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2635 {
2636 prefix[i] = '_';
2637 }
2638 }
2639 }
2640 }
2641 return prefix;
2642 }
2643
getPrefixBasecv::ocl::Context::Impl2644 std::string& getPrefixBase()
2645 {
2646 if (prefix_base.empty())
2647 {
2648 cv::AutoLock lock(program_cache_mutex);
2649 if (prefix_base.empty())
2650 {
2651 const Device& d = devices[0];
2652 int bits = d.addressBits();
2653 if (bits > 0 && bits != 64)
2654 prefix_base = cv::format("%d-bit--", bits);
2655 prefix_base += d.vendorName() + "--" + d.name() + "--";
2656 // sanitize chars
2657 for (size_t i = 0; i < prefix_base.size(); i++)
2658 {
2659 char c = prefix_base[i];
2660 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2661 {
2662 prefix_base[i] = '_';
2663 }
2664 }
2665 }
2666 }
2667 return prefix_base;
2668 }
2669
2670 IMPLEMENT_REFCOUNTABLE();
2671
2672 const int contextId; // global unique ID
2673 const std::string configuration;
2674
2675 cl_context handle;
2676 std::vector<Device> devices;
2677
2678 std::string prefix;
2679 std::string prefix_base;
2680
2681 cv::Mutex program_cache_mutex;
2682 typedef std::map<std::string, Program> phash_t;
2683 phash_t phash;
2684 typedef std::list<cv::String> CacheList;
2685 CacheList cacheList;
2686
2687 std::shared_ptr<OpenCLBufferPoolImpl> bufferPool_;
2688 std::shared_ptr<OpenCLBufferPoolImpl> bufferPoolHostPtr_;
getBufferPoolcv::ocl::Context::Impl2689 OpenCLBufferPoolImpl& getBufferPool() const
2690 {
2691 _init_buffer_pools();
2692 CV_DbgAssert(bufferPool_);
2693 return *bufferPool_.get();
2694 }
getBufferPoolHostPtrcv::ocl::Context::Impl2695 OpenCLBufferPoolImpl& getBufferPoolHostPtr() const
2696 {
2697 _init_buffer_pools();
2698 CV_DbgAssert(bufferPoolHostPtr_);
2699 return *bufferPoolHostPtr_.get();
2700 }
2701
2702 std::map<std::type_index, std::shared_ptr<UserContext>> userContextStorage;
2703 cv::Mutex userContextMutex;
setUserContextcv::ocl::Context::Impl2704 void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext) {
2705 cv::AutoLock lock(userContextMutex);
2706 userContextStorage[typeId] = userContext;
2707 }
getUserContextcv::ocl::Context::Impl2708 std::shared_ptr<UserContext> getUserContext(std::type_index typeId) {
2709 cv::AutoLock lock(userContextMutex);
2710 auto it = userContextStorage.find(typeId);
2711 if (it != userContextStorage.end())
2712 return it->second;
2713 else
2714 return nullptr;
2715 }
2716
2717 #ifdef HAVE_OPENCL_SVM
2718 bool svmInitialized;
2719 bool svmAvailable;
2720 bool svmEnabled;
2721 svm::SVMCapabilities svmCapabilities;
2722 svm::SVMFunctions svmFunctions;
2723
svmInitcv::ocl::Context::Impl2724 void svmInit()
2725 {
2726 CV_Assert(handle != NULL);
2727 const Device& device = devices[0];
2728 cl_device_svm_capabilities deviceCaps = 0;
2729 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2730 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2731 if (status != CL_SUCCESS)
2732 {
2733 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2734 goto noSVM;
2735 }
2736 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2737 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2738 svmCapabilities.value_ =
2739 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2740 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2741 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2742 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2743 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2744 if (svmCapabilities.value_ == 0)
2745 {
2746 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2747 goto noSVM;
2748 }
2749 try
2750 {
2751 // Try OpenCL 2.0
2752 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2753 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2754 if (!ptr)
2755 {
2756 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2757 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2758 }
2759 try
2760 {
2761 bool error = false;
2762 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2763 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2764 {
2765 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2766 CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2767 }
2768 clFinish(q);
2769 try
2770 {
2771 ((int*)ptr)[0] = 100;
2772 }
2773 catch (...)
2774 {
2775 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2776 error = true;
2777 }
2778 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2779 {
2780 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2781 CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2782 }
2783 clFinish(q);
2784 if (error)
2785 {
2786 CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2787 }
2788 }
2789 catch (...)
2790 {
2791 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2792 clSVMFree(handle, ptr);
2793 throw;
2794 }
2795 clSVMFree(handle, ptr);
2796 svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2797 svmFunctions.fn_clSVMFree = clSVMFree;
2798 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2799 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2800 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2801 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2802 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2803 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2804 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2805 }
2806 catch (...)
2807 {
2808 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2809 try
2810 {
2811 // Try HSA extension
2812 String extensions = device.extensions();
2813 if (extensions.find("cl_amd_svm") == String::npos)
2814 {
2815 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2816 goto noSVM;
2817 }
2818 cl_platform_id p = NULL;
2819 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2820 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2821 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2822 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2823 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2824 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2825 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2826 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2827 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2828 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2829 CV_Assert(svmFunctions.isValid());
2830 }
2831 catch (...)
2832 {
2833 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2834 goto noSVM;
2835 }
2836 }
2837
2838 svmAvailable = true;
2839 svmEnabled = !svm::checkDisableSVM();
2840 svmInitialized = true;
2841 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2842 return;
2843 noSVM:
2844 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2845 svmAvailable = false;
2846 svmEnabled = false;
2847 svmCapabilities.value_ = 0;
2848 svmInitialized = true;
2849 svmFunctions.fn_clSVMAlloc = NULL;
2850 return;
2851 }
2852
2853 std::shared_ptr<OpenCLSVMBufferPoolImpl> bufferPoolSVM_;
2854
getBufferPoolSVMcv::ocl::Context::Impl2855 OpenCLSVMBufferPoolImpl& getBufferPoolSVM() const
2856 {
2857 _init_buffer_pools();
2858 CV_DbgAssert(bufferPoolSVM_);
2859 return *bufferPoolSVM_.get();
2860 }
2861 #endif
2862
2863 friend class Program;
2864 };
2865
2866
Context()2867 Context::Context() CV_NOEXCEPT
2868 {
2869 p = 0;
2870 }
2871
~Context()2872 Context::~Context()
2873 {
2874 release();
2875 }
2876
2877 // deprecated
Context(int dtype)2878 Context::Context(int dtype)
2879 {
2880 p = 0;
2881 create(dtype);
2882 }
2883
release()2884 void Context::release()
2885 {
2886 if (p)
2887 {
2888 p->release();
2889 p = NULL;
2890 }
2891 }
2892
create()2893 bool Context::create()
2894 {
2895 release();
2896 if (!haveOpenCL())
2897 return false;
2898 p = Impl::findOrCreateContext(std::string());
2899 if (p && p->handle)
2900 return true;
2901 release();
2902 return false;
2903 }
2904
2905 // deprecated
create(int dtype)2906 bool Context::create(int dtype)
2907 {
2908 if( !haveOpenCL() )
2909 return false;
2910 release();
2911 if (dtype == CL_DEVICE_TYPE_DEFAULT || (unsigned)dtype == (unsigned)CL_DEVICE_TYPE_ALL)
2912 {
2913 p = Impl::findOrCreateContext("");
2914 }
2915 else if (dtype == CL_DEVICE_TYPE_GPU)
2916 {
2917 p = Impl::findOrCreateContext(":GPU:");
2918 }
2919 else if (dtype == CL_DEVICE_TYPE_CPU)
2920 {
2921 p = Impl::findOrCreateContext(":CPU:");
2922 }
2923 else
2924 {
2925 CV_LOG_ERROR(NULL, "OpenCL: Can't recognize OpenCV device type=" << dtype);
2926 }
2927 if (p && !p->handle)
2928 {
2929 release();
2930 }
2931 return p != 0;
2932 }
2933
Context(const Context & c)2934 Context::Context(const Context& c)
2935 {
2936 p = (Impl*)c.p;
2937 if(p)
2938 p->addref();
2939 }
2940
operator =(const Context & c)2941 Context& Context::operator = (const Context& c)
2942 {
2943 Impl* newp = (Impl*)c.p;
2944 if(newp)
2945 newp->addref();
2946 if(p)
2947 p->release();
2948 p = newp;
2949 return *this;
2950 }
2951
Context(Context && c)2952 Context::Context(Context&& c) CV_NOEXCEPT
2953 {
2954 p = c.p;
2955 c.p = nullptr;
2956 }
2957
operator =(Context && c)2958 Context& Context::operator = (Context&& c) CV_NOEXCEPT
2959 {
2960 if (this != &c) {
2961 if(p)
2962 p->release();
2963 p = c.p;
2964 c.p = nullptr;
2965 }
2966 return *this;
2967 }
2968
ptr() const2969 void* Context::ptr() const
2970 {
2971 return p == NULL ? NULL : p->handle;
2972 }
2973
ndevices() const2974 size_t Context::ndevices() const
2975 {
2976 return p ? p->devices.size() : 0;
2977 }
2978
device(size_t idx) const2979 Device& Context::device(size_t idx) const
2980 {
2981 static Device dummy;
2982 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2983 }
2984
getDefault(bool initialize)2985 Context& Context::getDefault(bool initialize)
2986 {
2987 auto& c = OpenCLExecutionContext::getCurrent();
2988 if (!c.empty())
2989 {
2990 auto& ctx = c.getContext();
2991 return ctx;
2992 }
2993
2994 CV_UNUSED(initialize);
2995 static Context dummy;
2996 return dummy;
2997 }
2998
getProg(const ProgramSource & prog,const String & buildopts,String & errmsg)2999 Program Context::getProg(const ProgramSource& prog,
3000 const String& buildopts, String& errmsg)
3001 {
3002 return p ? p->getProg(prog, buildopts, errmsg) : Program();
3003 }
3004
unloadProg(Program & prog)3005 void Context::unloadProg(Program& prog)
3006 {
3007 if (p)
3008 p->unloadProg(prog);
3009 }
3010
3011 /* static */
fromHandle(void * context)3012 Context Context::fromHandle(void* context)
3013 {
3014 Context ctx;
3015 ctx.p = Impl::findOrCreateContext((cl_context)context);
3016 return ctx;
3017 }
3018
3019 /* static */
fromDevice(const ocl::Device & device)3020 Context Context::fromDevice(const ocl::Device& device)
3021 {
3022 Context ctx;
3023 ctx.p = Impl::findOrCreateContext(device);
3024 return ctx;
3025 }
3026
3027 /* static */
create(const std::string & configuration)3028 Context Context::create(const std::string& configuration)
3029 {
3030 Context ctx;
3031 ctx.p = Impl::findOrCreateContext(configuration);
3032 return ctx;
3033 }
3034
getOpenCLContextProperty(int propertyId) const3035 void* Context::getOpenCLContextProperty(int propertyId) const
3036 {
3037 if (p == NULL)
3038 return nullptr;
3039 ::size_t size = 0;
3040 CV_OCL_CHECK(clGetContextInfo(p->handle, CL_CONTEXT_PROPERTIES, 0, NULL, &size));
3041 std::vector<cl_context_properties> prop(size / sizeof(cl_context_properties), (cl_context_properties)0);
3042 CV_OCL_CHECK(clGetContextInfo(p->handle, CL_CONTEXT_PROPERTIES, size, prop.data(), NULL));
3043 for (size_t i = 0; i < prop.size(); i += 2)
3044 {
3045 if (prop[i] == (cl_context_properties)propertyId)
3046 {
3047 CV_LOG_DEBUG(NULL, "OpenCL: found context property=" << propertyId << ") => " << (void*)prop[i + 1]);
3048 return (void*)prop[i + 1];
3049 }
3050 }
3051 return nullptr;
3052 }
3053
3054 #ifdef HAVE_OPENCL_SVM
useSVM() const3055 bool Context::useSVM() const
3056 {
3057 Context::Impl* i = p;
3058 CV_Assert(i);
3059 if (!i->svmInitialized)
3060 i->svmInit();
3061 return i->svmEnabled;
3062 }
setUseSVM(bool enabled)3063 void Context::setUseSVM(bool enabled)
3064 {
3065 Context::Impl* i = p;
3066 CV_Assert(i);
3067 if (!i->svmInitialized)
3068 i->svmInit();
3069 if (enabled && !i->svmAvailable)
3070 {
3071 CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
3072 }
3073 i->svmEnabled = enabled;
3074 }
3075 #else
useSVM() const3076 bool Context::useSVM() const { return false; }
setUseSVM(bool enabled)3077 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
3078 #endif
3079
3080 #ifdef HAVE_OPENCL_SVM
3081 namespace svm {
3082
getSVMCapabilitites(const ocl::Context & context)3083 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
3084 {
3085 Context::Impl* i = context.p;
3086 CV_Assert(i);
3087 if (!i->svmInitialized)
3088 i->svmInit();
3089 return i->svmCapabilities;
3090 }
3091
getSVMFunctions(const ocl::Context & context)3092 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
3093 {
3094 Context::Impl* i = context.p;
3095 CV_Assert(i);
3096 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
3097 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
3098 return &i->svmFunctions;
3099 }
3100
useSVM(UMatUsageFlags usageFlags)3101 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
3102 {
3103 if (checkForceSVMUmatUsage())
3104 return true;
3105 if (checkDisableSVMUMatUsage())
3106 return false;
3107 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
3108 return true;
3109 return false; // don't use SVM by default
3110 }
3111
3112 } // namespace cv::ocl::svm
3113 #endif // HAVE_OPENCL_SVM
3114
~UserContext()3115 Context::UserContext::~UserContext()
3116 {
3117 }
3118
setUserContext(std::type_index typeId,const std::shared_ptr<Context::UserContext> & userContext)3119 void Context::setUserContext(std::type_index typeId, const std::shared_ptr<Context::UserContext>& userContext)
3120 {
3121 CV_Assert(p);
3122 p->setUserContext(typeId, userContext);
3123 }
3124
getUserContext(std::type_index typeId)3125 std::shared_ptr<Context::UserContext> Context::getUserContext(std::type_index typeId)
3126 {
3127 CV_Assert(p);
3128 return p->getUserContext(typeId);
3129 }
3130
get_platform_name(cl_platform_id id,String & name)3131 static void get_platform_name(cl_platform_id id, String& name)
3132 {
3133 // get platform name string length
3134 size_t sz = 0;
3135 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
3136
3137 // get platform name string
3138 AutoBuffer<char> buf(sz + 1);
3139 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
3140
3141 // just in case, ensure trailing zero for ASCIIZ string
3142 buf[sz] = 0;
3143
3144 name = buf.data();
3145 }
3146
3147 /*
3148 // Attaches OpenCL context to OpenCV
3149 */
attachContext(const String & platformName,void * platformID,void * context,void * deviceID)3150 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
3151 {
3152 auto ctx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3153 ctx.bind();
3154 }
3155
3156 /* static */
create(const std::string & platformName,void * platformID,void * context,void * deviceID)3157 OpenCLExecutionContext OpenCLExecutionContext::create(
3158 const std::string& platformName, void* platformID, void* context, void* deviceID
3159 )
3160 {
3161 if (!haveOpenCL())
3162 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
3163
3164 cl_uint cnt = 0;
3165 CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
3166
3167 if (cnt == 0)
3168 CV_Error(cv::Error::OpenCLApiCallError, "No OpenCL platform available!");
3169
3170 std::vector<cl_platform_id> platforms(cnt);
3171
3172 CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
3173
3174 bool platformAvailable = false;
3175
3176 // check if external platformName contained in list of available platforms in OpenCV
3177 for (unsigned int i = 0; i < cnt; i++)
3178 {
3179 String availablePlatformName;
3180 get_platform_name(platforms[i], availablePlatformName);
3181 // external platform is found in the list of available platforms
3182 if (platformName == availablePlatformName)
3183 {
3184 platformAvailable = true;
3185 break;
3186 }
3187 }
3188
3189 if (!platformAvailable)
3190 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3191
3192 // check if platformID corresponds to platformName
3193 String actualPlatformName;
3194 get_platform_name((cl_platform_id)platformID, actualPlatformName);
3195 if (platformName != actualPlatformName)
3196 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3197
3198 OpenCLExecutionContext ctx;
3199 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>((cl_platform_id)platformID, (cl_context)context, (cl_device_id)deviceID);
3200 CV_OCL_CHECK(clReleaseContext((cl_context)context));
3201 CV_OCL_CHECK(clReleaseDevice((cl_device_id)deviceID));
3202 return ctx;
3203 }
3204
initializeContextFromHandle(Context & ctx,void * _platform,void * _context,void * _device)3205 void initializeContextFromHandle(Context& ctx, void* _platform, void* _context, void* _device)
3206 {
3207 // internal call, less checks
3208 cl_platform_id platformID = (cl_platform_id)_platform;
3209 cl_context context = (cl_context)_context;
3210 cl_device_id deviceID = (cl_device_id)_device;
3211
3212 std::string platformName = PlatformInfo(&platformID).name();
3213
3214 auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3215 CV_Assert(!clExecCtx.empty());
3216 ctx = clExecCtx.getContext();
3217 }
3218
3219 /////////////////////////////////////////// Queue /////////////////////////////////////////////
3220
3221 struct Queue::Impl
3222 {
__initcv::ocl::Queue::Impl3223 inline void __init()
3224 {
3225 refcount = 1;
3226 handle = 0;
3227 isProfilingQueue_ = false;
3228 }
3229
Implcv::ocl::Queue::Impl3230 Impl(cl_command_queue q)
3231 {
3232 __init();
3233 handle = q;
3234
3235 cl_command_queue_properties props = 0;
3236 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
3237 isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
3238 }
3239
Implcv::ocl::Queue::Impl3240 Impl(cl_command_queue q, bool isProfilingQueue)
3241 {
3242 __init();
3243 handle = q;
3244 isProfilingQueue_ = isProfilingQueue;
3245 }
3246
Implcv::ocl::Queue::Impl3247 Impl(const Context& c, const Device& d, bool withProfiling = false)
3248 {
3249 __init();
3250
3251 const Context* pc = &c;
3252 cl_context ch = (cl_context)pc->ptr();
3253 if( !ch )
3254 {
3255 pc = &Context::getDefault();
3256 ch = (cl_context)pc->ptr();
3257 }
3258 cl_device_id dh = (cl_device_id)d.ptr();
3259 if( !dh )
3260 dh = (cl_device_id)pc->device(0).ptr();
3261 cl_int retval = 0;
3262 cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
3263 CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
3264 isProfilingQueue_ = withProfiling;
3265 }
3266
~Implcv::ocl::Queue::Impl3267 ~Impl()
3268 {
3269 #ifdef _WIN32
3270 if (!cv::__termination)
3271 #endif
3272 {
3273 if(handle)
3274 {
3275 CV_OCL_DBG_CHECK(clFinish(handle));
3276 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
3277 handle = NULL;
3278 }
3279 }
3280 }
3281
getProfilingQueuecv::ocl::Queue::Impl3282 const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
3283 {
3284 if (isProfilingQueue_)
3285 return self;
3286
3287 if (profiling_queue_.ptr())
3288 return profiling_queue_;
3289
3290 cl_context ctx = 0;
3291 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
3292
3293 cl_device_id device = 0;
3294 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
3295
3296 cl_int result = CL_SUCCESS;
3297 cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
3298 cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
3299 CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
3300
3301 Queue queue;
3302 queue.p = new Impl(q, true);
3303 profiling_queue_ = queue;
3304
3305 return profiling_queue_;
3306 }
3307
3308 IMPLEMENT_REFCOUNTABLE();
3309
3310 cl_command_queue handle;
3311 bool isProfilingQueue_;
3312 cv::ocl::Queue profiling_queue_;
3313 };
3314
Queue()3315 Queue::Queue() CV_NOEXCEPT
3316 {
3317 p = 0;
3318 }
3319
Queue(const Context & c,const Device & d)3320 Queue::Queue(const Context& c, const Device& d)
3321 {
3322 p = 0;
3323 create(c, d);
3324 }
3325
Queue(const Queue & q)3326 Queue::Queue(const Queue& q)
3327 {
3328 p = q.p;
3329 if(p)
3330 p->addref();
3331 }
3332
operator =(const Queue & q)3333 Queue& Queue::operator = (const Queue& q)
3334 {
3335 Impl* newp = (Impl*)q.p;
3336 if(newp)
3337 newp->addref();
3338 if(p)
3339 p->release();
3340 p = newp;
3341 return *this;
3342 }
3343
Queue(Queue && q)3344 Queue::Queue(Queue&& q) CV_NOEXCEPT
3345 {
3346 p = q.p;
3347 q.p = nullptr;
3348 }
3349
operator =(Queue && q)3350 Queue& Queue::operator = (Queue&& q) CV_NOEXCEPT
3351 {
3352 if (this != &q) {
3353 if(p)
3354 p->release();
3355 p = q.p;
3356 q.p = nullptr;
3357 }
3358 return *this;
3359 }
3360
~Queue()3361 Queue::~Queue()
3362 {
3363 if(p)
3364 p->release();
3365 }
3366
create(const Context & c,const Device & d)3367 bool Queue::create(const Context& c, const Device& d)
3368 {
3369 if(p)
3370 p->release();
3371 p = new Impl(c, d);
3372 return p->handle != 0;
3373 }
3374
finish()3375 void Queue::finish()
3376 {
3377 if(p && p->handle)
3378 {
3379 CV_OCL_DBG_CHECK(clFinish(p->handle));
3380 }
3381 }
3382
getProfilingQueue() const3383 const Queue& Queue::getProfilingQueue() const
3384 {
3385 CV_Assert(p);
3386 return p->getProfilingQueue(*this);
3387 }
3388
ptr() const3389 void* Queue::ptr() const
3390 {
3391 return p ? p->handle : 0;
3392 }
3393
getDefault()3394 Queue& Queue::getDefault()
3395 {
3396 auto& c = OpenCLExecutionContext::getCurrent();
3397 if (!c.empty())
3398 {
3399 auto& q = c.getQueue();
3400 return q;
3401 }
3402 static Queue dummy;
3403 return dummy;
3404 }
3405
getQueue(const Queue & q)3406 static cl_command_queue getQueue(const Queue& q)
3407 {
3408 cl_command_queue qq = (cl_command_queue)q.ptr();
3409 if(!qq)
3410 qq = (cl_command_queue)Queue::getDefault().ptr();
3411 return qq;
3412 }
3413
3414 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
3415
KernelArg()3416 KernelArg::KernelArg() CV_NOEXCEPT
3417 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
3418 {
3419 }
3420
KernelArg(int _flags,UMat * _m,int _wscale,int _iwscale,const void * _obj,size_t _sz)3421 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
3422 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
3423 {
3424 CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
3425 }
3426
Constant(const Mat & m)3427 KernelArg KernelArg::Constant(const Mat& m)
3428 {
3429 CV_Assert(m.isContinuous());
3430 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
3431 }
3432
3433 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
3434
3435 struct Kernel::Impl
3436 {
Implcv::ocl::Kernel::Impl3437 Impl(const char* kname, const Program& prog) :
3438 refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
3439 {
3440 cl_program ph = (cl_program)prog.ptr();
3441 cl_int retval = 0;
3442 name = kname;
3443 if (ph)
3444 {
3445 handle = clCreateKernel(ph, kname, &retval);
3446 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
3447 }
3448 for( int i = 0; i < MAX_ARRS; i++ )
3449 u[i] = 0;
3450 haveTempDstUMats = false;
3451 haveTempSrcUMats = false;
3452 }
3453
cleanupUMatscv::ocl::Kernel::Impl3454 void cleanupUMats()
3455 {
3456 for( int i = 0; i < MAX_ARRS; i++ )
3457 if( u[i] )
3458 {
3459 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
3460 {
3461 u[i]->flags |= UMatData::ASYNC_CLEANUP;
3462 u[i]->currAllocator->deallocate(u[i]);
3463 }
3464 u[i] = 0;
3465 }
3466 nu = 0;
3467 haveTempDstUMats = false;
3468 haveTempSrcUMats = false;
3469 }
3470
addUMatcv::ocl::Kernel::Impl3471 void addUMat(const UMat& m, bool dst)
3472 {
3473 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
3474 u[nu] = m.u;
3475 CV_XADD(&m.u->urefcount, 1);
3476 nu++;
3477 if(dst && m.u->tempUMat())
3478 haveTempDstUMats = true;
3479 if(m.u->originalUMatData == NULL && m.u->tempUMat())
3480 haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat)
3481 }
3482
3483 /// Preserve image lifetime (while it is specified as Kernel argument)
registerImageArgumentcv::ocl::Kernel::Impl3484 void registerImageArgument(int arg, const Image2D& image)
3485 {
3486 CV_CheckGE(arg, 0, "");
3487 if (arg < (int)shadow_images.size() && shadow_images[arg].ptr() != image.ptr()) // TODO future: replace ptr => impl (more strong check)
3488 {
3489 CV_Check(arg, !isInProgress, "ocl::Kernel: clearing of pending Image2D arguments is not allowed");
3490 }
3491 shadow_images.reserve(MAX_ARRS);
3492 shadow_images.resize(std::max(shadow_images.size(), (size_t)arg + 1));
3493 shadow_images[arg] = image;
3494 }
3495
finitcv::ocl::Kernel::Impl3496 void finit(cl_event e)
3497 {
3498 CV_UNUSED(e);
3499 cleanupUMats();
3500 isInProgress = false;
3501 release();
3502 }
3503
3504 bool run(int dims, size_t _globalsize[], size_t _localsize[],
3505 bool sync, int64* timeNS, const Queue& q);
3506
~Implcv::ocl::Kernel::Impl3507 ~Impl()
3508 {
3509 if(handle)
3510 {
3511 CV_OCL_DBG_CHECK(clReleaseKernel(handle));
3512 }
3513 }
3514
3515 IMPLEMENT_REFCOUNTABLE();
3516
3517 cv::String name;
3518 cl_kernel handle;
3519 enum { MAX_ARRS = 16 };
3520 UMatData* u[MAX_ARRS];
3521 bool isInProgress;
3522 bool isAsyncRun; // true if kernel was scheduled in async mode
3523 int nu;
3524 std::vector<Image2D> shadow_images;
3525 bool haveTempDstUMats;
3526 bool haveTempSrcUMats;
3527 };
3528
3529 }} // namespace cv::ocl
3530
3531 extern "C" {
3532
oclCleanupCallback(cl_event e,cl_int,void * p)3533 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
3534 {
3535 try
3536 {
3537 ((cv::ocl::Kernel::Impl*)p)->finit(e);
3538 }
3539 catch (const cv::Exception& exc)
3540 {
3541 CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
3542 }
3543 catch (const std::exception& exc)
3544 {
3545 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
3546 }
3547 catch (...)
3548 {
3549 CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
3550 }
3551 }
3552
3553 }
3554
3555 namespace cv { namespace ocl {
3556
Kernel()3557 Kernel::Kernel() CV_NOEXCEPT
3558 {
3559 p = 0;
3560 }
3561
Kernel(const char * kname,const Program & prog)3562 Kernel::Kernel(const char* kname, const Program& prog)
3563 {
3564 p = 0;
3565 create(kname, prog);
3566 }
3567
Kernel(const char * kname,const ProgramSource & src,const String & buildopts,String * errmsg)3568 Kernel::Kernel(const char* kname, const ProgramSource& src,
3569 const String& buildopts, String* errmsg)
3570 {
3571 p = 0;
3572 create(kname, src, buildopts, errmsg);
3573 }
3574
Kernel(const Kernel & k)3575 Kernel::Kernel(const Kernel& k)
3576 {
3577 p = k.p;
3578 if(p)
3579 p->addref();
3580 }
3581
operator =(const Kernel & k)3582 Kernel& Kernel::operator = (const Kernel& k)
3583 {
3584 Impl* newp = (Impl*)k.p;
3585 if(newp)
3586 newp->addref();
3587 if(p)
3588 p->release();
3589 p = newp;
3590 return *this;
3591 }
3592
Kernel(Kernel && k)3593 Kernel::Kernel(Kernel&& k) CV_NOEXCEPT
3594 {
3595 p = k.p;
3596 k.p = nullptr;
3597 }
3598
operator =(Kernel && k)3599 Kernel& Kernel::operator = (Kernel&& k) CV_NOEXCEPT
3600 {
3601 if (this != &k) {
3602 if(p)
3603 p->release();
3604 p = k.p;
3605 k.p = nullptr;
3606 }
3607 return *this;
3608 }
3609
~Kernel()3610 Kernel::~Kernel()
3611 {
3612 if(p)
3613 p->release();
3614 }
3615
create(const char * kname,const Program & prog)3616 bool Kernel::create(const char* kname, const Program& prog)
3617 {
3618 if(p)
3619 p->release();
3620 p = new Impl(kname, prog);
3621 if(p->handle == 0)
3622 {
3623 p->release();
3624 p = 0;
3625 }
3626 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
3627 CV_Assert(p);
3628 #endif
3629 return p != 0;
3630 }
3631
create(const char * kname,const ProgramSource & src,const String & buildopts,String * errmsg)3632 bool Kernel::create(const char* kname, const ProgramSource& src,
3633 const String& buildopts, String* errmsg)
3634 {
3635 if(p)
3636 {
3637 p->release();
3638 p = 0;
3639 }
3640 String tempmsg;
3641 if( !errmsg ) errmsg = &tempmsg;
3642 const Program prog = Context::getDefault().getProg(src, buildopts, *errmsg);
3643 return create(kname, prog);
3644 }
3645
ptr() const3646 void* Kernel::ptr() const
3647 {
3648 return p ? p->handle : 0;
3649 }
3650
empty() const3651 bool Kernel::empty() const
3652 {
3653 return ptr() == 0;
3654 }
3655
dumpValue(size_t sz,const void * p)3656 static cv::String dumpValue(size_t sz, const void* p)
3657 {
3658 if (sz == 4)
3659 return cv::format("%d / %uu / 0x%08x / %g", *(int*)p, *(int*)p, *(int*)p, *(float*)p);
3660 if (sz == 8)
3661 return cv::format("%lld / %lluu / 0x%16llx / %g", *(long long*)p, *(long long*)p, *(long long*)p, *(double*)p);
3662 return cv::format("%p", p);
3663 }
3664
set(int i,const void * value,size_t sz)3665 int Kernel::set(int i, const void* value, size_t sz)
3666 {
3667 if (!p || !p->handle)
3668 return -1;
3669 if (i < 0)
3670 return i;
3671 if( i == 0 )
3672 p->cleanupUMats();
3673
3674 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
3675 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%s)", p->name.c_str(), (int)i, (int)sz, dumpValue(sz, value).c_str()).c_str());
3676 if (retval != CL_SUCCESS)
3677 return -1;
3678 return i+1;
3679 }
3680
set(int i,const Image2D & image2D)3681 int Kernel::set(int i, const Image2D& image2D)
3682 {
3683 cl_mem h = (cl_mem)image2D.ptr();
3684 int res = set(i, &h, sizeof(h));
3685 if (res >= 0)
3686 p->registerImageArgument(i, image2D);
3687 return res;
3688 }
3689
set(int i,const UMat & m)3690 int Kernel::set(int i, const UMat& m)
3691 {
3692 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
3693 }
3694
set(int i,const KernelArg & arg)3695 int Kernel::set(int i, const KernelArg& arg)
3696 {
3697 if( !p || !p->handle )
3698 return -1;
3699 if (i < 0)
3700 {
3701 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
3702 p->name.c_str(), (int)i));
3703 return i;
3704 }
3705 if( i == 0 )
3706 p->cleanupUMats();
3707 cl_int status = 0;
3708 if( arg.m )
3709 {
3710 AccessFlag accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : static_cast<AccessFlag>(0)) |
3711 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : static_cast<AccessFlag>(0));
3712 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
3713 if (ptronly && arg.m->empty())
3714 {
3715 cl_mem h_null = (cl_mem)NULL;
3716 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3717 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3718 return i + 1;
3719 }
3720 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3721
3722 if (!h)
3723 {
3724 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3725 p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3726 p->release();
3727 p = 0;
3728 return -1;
3729 }
3730
3731 #ifdef HAVE_OPENCL_SVM
3732 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3733 {
3734 const Context& ctx = Context::getDefault();
3735 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3736 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3737 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3738 #if 1 // TODO
3739 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3740 #else
3741 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3742 #endif
3743 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3744 }
3745 else
3746 #endif
3747 {
3748 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3749 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3750 }
3751
3752 if (ptronly)
3753 {
3754 i++;
3755 }
3756 else if( arg.m->dims <= 2 )
3757 {
3758 UMat2D u2d(*arg.m);
3759 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3760 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3761 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3762 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3763 i += 3;
3764
3765 if( !(arg.flags & KernelArg::NO_SIZE) )
3766 {
3767 int cols = u2d.cols*arg.wscale/arg.iwscale;
3768 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3769 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3770 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3771 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3772 i += 2;
3773 }
3774 }
3775 else
3776 {
3777 UMat3D u3d(*arg.m);
3778 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3779 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3780 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3781 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3782 status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3783 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3784 i += 4;
3785 if( !(arg.flags & KernelArg::NO_SIZE) )
3786 {
3787 int cols = u3d.cols*arg.wscale/arg.iwscale;
3788 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3789 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3790 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3791 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3792 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3793 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3794 i += 3;
3795 }
3796 }
3797 p->addUMat(*arg.m, !!(accessFlags & ACCESS_WRITE));
3798 return i;
3799 }
3800 status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3801 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3802 return i+1;
3803 }
3804
run(int dims,size_t _globalsize[],size_t _localsize[],bool sync,const Queue & q)3805 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3806 bool sync, const Queue& q)
3807 {
3808 if (!p)
3809 return false;
3810
3811 size_t globalsize[CV_MAX_DIM] = {1,1,1};
3812 size_t total = 1;
3813 CV_Assert(_globalsize != NULL);
3814 for (int i = 0; i < dims; i++)
3815 {
3816 size_t val = _localsize ? _localsize[i] :
3817 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3818 CV_Assert( val > 0 );
3819 total *= _globalsize[i];
3820 if (_globalsize[i] == 1 && !_localsize)
3821 val = 1;
3822 globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
3823 }
3824 CV_Assert(total > 0);
3825
3826 return p->run(dims, globalsize, _localsize, sync, NULL, q);
3827 }
3828
3829
isRaiseErrorOnReuseAsyncKernel()3830 static bool isRaiseErrorOnReuseAsyncKernel()
3831 {
3832 static bool initialized = false;
3833 static bool value = false;
3834 if (!initialized)
3835 {
3836 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
3837 initialized = true;
3838 }
3839 return value;
3840 }
3841
run(int dims,size_t globalsize[],size_t localsize[],bool sync,int64 * timeNS,const Queue & q)3842 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3843 bool sync, int64* timeNS, const Queue& q)
3844 {
3845 CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3846
3847 if (!handle)
3848 {
3849 CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
3850 return false;
3851 }
3852
3853 if (isAsyncRun)
3854 {
3855 CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
3856 if (isRaiseErrorOnReuseAsyncKernel())
3857 CV_Assert(0);
3858 return false; // OpenCV 5.0: raise error
3859 }
3860 isAsyncRun = !sync;
3861
3862 if (isInProgress)
3863 {
3864 CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
3865 if (isRaiseErrorOnReuseAsyncKernel())
3866 CV_Assert(0);
3867 return false; // OpenCV 5.0: raise error
3868 }
3869
3870 cl_command_queue qq = getQueue(q);
3871 if (haveTempDstUMats)
3872 sync = true;
3873 if (haveTempSrcUMats)
3874 sync = true;
3875 if (timeNS)
3876 sync = true;
3877 cl_event asyncEvent = 0;
3878 cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3879 NULL, globalsize, localsize, 0, 0,
3880 (sync && !timeNS) ? 0 : &asyncEvent);
3881 #if !CV_OPENCL_SHOW_RUN_KERNELS
3882 if (retval != CL_SUCCESS)
3883 #endif
3884 {
3885 cv::String msg = cv::format("clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%zux%zux%zu, localsize=%s) sync=%s", name.c_str(), (int)dims,
3886 globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3887 (localsize ? cv::format("%zux%zux%zu", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3888 sync ? "true" : "false"
3889 );
3890 if (retval != CL_SUCCESS)
3891 {
3892 msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3893 }
3894 #if CV_OPENCL_TRACE_CHECK
3895 CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3896 #else
3897 printf("%s\n", msg.c_str());
3898 fflush(stdout);
3899 #endif
3900 }
3901 if (sync || retval != CL_SUCCESS)
3902 {
3903 CV_OCL_DBG_CHECK(clFinish(qq));
3904 if (timeNS)
3905 {
3906 if (retval == CL_SUCCESS)
3907 {
3908 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3909 cl_ulong startTime, stopTime;
3910 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3911 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3912 *timeNS = (int64)(stopTime - startTime);
3913 }
3914 else
3915 {
3916 *timeNS = -1;
3917 }
3918 }
3919 cleanupUMats();
3920 }
3921 else
3922 {
3923 addref();
3924 isInProgress = true;
3925 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3926 }
3927 if (asyncEvent)
3928 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3929 return retval == CL_SUCCESS;
3930 }
3931
runTask(bool sync,const Queue & q)3932 bool Kernel::runTask(bool sync, const Queue& q)
3933 {
3934 if(!p || !p->handle || p->isInProgress)
3935 return false;
3936
3937 cl_command_queue qq = getQueue(q);
3938 cl_event asyncEvent = 0;
3939 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3940 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3941 if (sync || retval != CL_SUCCESS)
3942 {
3943 CV_OCL_DBG_CHECK(clFinish(qq));
3944 p->cleanupUMats();
3945 }
3946 else
3947 {
3948 p->addref();
3949 p->isInProgress = true;
3950 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
3951 }
3952 if (asyncEvent)
3953 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3954 return retval == CL_SUCCESS;
3955 }
3956
runProfiling(int dims,size_t globalsize[],size_t localsize[],const Queue & q_)3957 int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
3958 {
3959 CV_Assert(p && p->handle && !p->isInProgress);
3960 Queue q = q_.ptr() ? q_ : Queue::getDefault();
3961 CV_Assert(q.ptr());
3962 q.finish(); // call clFinish() on base queue
3963 Queue profilingQueue = q.getProfilingQueue();
3964 int64 timeNs = -1;
3965 bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
3966 return res ? timeNs : -1;
3967 }
3968
workGroupSize() const3969 size_t Kernel::workGroupSize() const
3970 {
3971 if(!p || !p->handle)
3972 return 0;
3973 size_t val = 0, retsz = 0;
3974 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3975 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
3976 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
3977 return status == CL_SUCCESS ? val : 0;
3978 }
3979
preferedWorkGroupSizeMultiple() const3980 size_t Kernel::preferedWorkGroupSizeMultiple() const
3981 {
3982 if(!p || !p->handle)
3983 return 0;
3984 size_t val = 0, retsz = 0;
3985 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3986 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
3987 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
3988 return status == CL_SUCCESS ? val : 0;
3989 }
3990
compileWorkGroupSize(size_t wsz[]) const3991 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3992 {
3993 if(!p || !p->handle || !wsz)
3994 return 0;
3995 size_t retsz = 0;
3996 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3997 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
3998 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
3999 return status == CL_SUCCESS;
4000 }
4001
localMemSize() const4002 size_t Kernel::localMemSize() const
4003 {
4004 if(!p || !p->handle)
4005 return 0;
4006 size_t retsz = 0;
4007 cl_ulong val = 0;
4008 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
4009 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
4010 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
4011 return status == CL_SUCCESS ? (size_t)val : 0;
4012 }
4013
4014
4015
4016 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
4017
4018 struct ProgramSource::Impl
4019 {
4020 IMPLEMENT_REFCOUNTABLE();
4021
4022 enum KIND {
4023 PROGRAM_SOURCE_CODE = 0,
4024 PROGRAM_BINARIES,
4025 PROGRAM_SPIR,
4026 PROGRAM_SPIRV
4027 } kind_;
4028
Implcv::ocl::ProgramSource::Impl4029 Impl(const String& src)
4030 {
4031 init(PROGRAM_SOURCE_CODE, cv::String(), cv::String());
4032 initFromSource(src, cv::String());
4033 }
Implcv::ocl::ProgramSource::Impl4034 Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
4035 {
4036 init(PROGRAM_SOURCE_CODE, module, name);
4037 initFromSource(codeStr, codeHash);
4038 }
4039
4040 /// reset fields
initcv::ocl::ProgramSource::Impl4041 void init(enum KIND kind, const String& module, const String& name)
4042 {
4043 refcount = 1;
4044 kind_ = kind;
4045 module_ = module;
4046 name_ = name;
4047
4048 sourceAddr_ = NULL;
4049 sourceSize_ = 0;
4050 isHashUpdated = false;
4051 }
4052
initFromSourcecv::ocl::ProgramSource::Impl4053 void initFromSource(const String& codeStr, const String& codeHash)
4054 {
4055 codeStr_ = codeStr;
4056 sourceHash_ = codeHash;
4057 if (sourceHash_.empty())
4058 {
4059 updateHash();
4060 }
4061 else
4062 {
4063 isHashUpdated = true;
4064 }
4065 }
4066
updateHashcv::ocl::ProgramSource::Impl4067 void updateHash(const char* hashStr = NULL)
4068 {
4069 if (hashStr)
4070 {
4071 sourceHash_ = cv::String(hashStr);
4072 isHashUpdated = true;
4073 return;
4074 }
4075 uint64 hash = 0;
4076 switch (kind_)
4077 {
4078 case PROGRAM_SOURCE_CODE:
4079 if (sourceAddr_)
4080 {
4081 CV_Assert(codeStr_.empty());
4082 hash = crc64(sourceAddr_, sourceSize_); // static storage
4083 }
4084 else
4085 {
4086 CV_Assert(!codeStr_.empty());
4087 hash = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
4088 }
4089 break;
4090 case PROGRAM_BINARIES:
4091 case PROGRAM_SPIR:
4092 case PROGRAM_SPIRV:
4093 hash = crc64(sourceAddr_, sourceSize_);
4094 break;
4095 default:
4096 CV_Error(Error::StsInternal, "Internal error");
4097 }
4098 sourceHash_ = cv::format("%08jx", (uintmax_t)hash);
4099 isHashUpdated = true;
4100 }
4101
Implcv::ocl::ProgramSource::Impl4102 Impl(enum KIND kind,
4103 const String& module, const String& name,
4104 const unsigned char* binary, const size_t size,
4105 const cv::String& buildOptions = cv::String())
4106 {
4107 init(kind, module, name);
4108
4109 sourceAddr_ = binary;
4110 sourceSize_ = size;
4111
4112 buildOptions_ = buildOptions;
4113 }
4114
fromSourceWithStaticLifetimecv::ocl::ProgramSource::Impl4115 static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
4116 const char* sourceCodeStaticStr, const char* hashStaticStr,
4117 const cv::String& buildOptions)
4118 {
4119 ProgramSource result;
4120 result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
4121 (const unsigned char*)sourceCodeStaticStr, strlen(sourceCodeStaticStr), buildOptions);
4122 result.p->updateHash(hashStaticStr);
4123 return result;
4124 }
4125
fromBinarycv::ocl::ProgramSource::Impl4126 static ProgramSource fromBinary(const String& module, const String& name,
4127 const unsigned char* binary, const size_t size,
4128 const cv::String& buildOptions)
4129 {
4130 ProgramSource result;
4131 result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
4132 return result;
4133 }
4134
fromSPIRcv::ocl::ProgramSource::Impl4135 static ProgramSource fromSPIR(const String& module, const String& name,
4136 const unsigned char* binary, const size_t size,
4137 const cv::String& buildOptions)
4138 {
4139 ProgramSource result;
4140 result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
4141 return result;
4142 }
4143
4144 String module_;
4145 String name_;
4146
4147 // TODO std::vector<ProgramSource> includes_;
4148 String codeStr_; // PROGRAM_SOURCE_CODE only
4149
4150 const unsigned char* sourceAddr_;
4151 size_t sourceSize_;
4152
4153 cv::String buildOptions_;
4154
4155 String sourceHash_;
4156 bool isHashUpdated;
4157
4158 friend struct Program::Impl;
4159 friend struct internal::ProgramEntry;
4160 friend struct Context::Impl;
4161 };
4162
4163
ProgramSource()4164 ProgramSource::ProgramSource() CV_NOEXCEPT
4165 {
4166 p = 0;
4167 }
4168
ProgramSource(const String & module,const String & name,const String & codeStr,const String & codeHash)4169 ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
4170 {
4171 p = new Impl(module, name, codeStr, codeHash);
4172 }
4173
ProgramSource(const char * prog)4174 ProgramSource::ProgramSource(const char* prog)
4175 {
4176 p = new Impl(prog);
4177 }
4178
ProgramSource(const String & prog)4179 ProgramSource::ProgramSource(const String& prog)
4180 {
4181 p = new Impl(prog);
4182 }
4183
~ProgramSource()4184 ProgramSource::~ProgramSource()
4185 {
4186 if(p)
4187 p->release();
4188 }
4189
ProgramSource(const ProgramSource & prog)4190 ProgramSource::ProgramSource(const ProgramSource& prog)
4191 {
4192 p = prog.p;
4193 if(p)
4194 p->addref();
4195 }
4196
operator =(const ProgramSource & prog)4197 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
4198 {
4199 Impl* newp = (Impl*)prog.p;
4200 if(newp)
4201 newp->addref();
4202 if(p)
4203 p->release();
4204 p = newp;
4205 return *this;
4206 }
4207
ProgramSource(ProgramSource && prog)4208 ProgramSource::ProgramSource(ProgramSource&& prog) CV_NOEXCEPT
4209 {
4210 p = prog.p;
4211 prog.p = nullptr;
4212 }
4213
operator =(ProgramSource && prog)4214 ProgramSource& ProgramSource::operator = (ProgramSource&& prog) CV_NOEXCEPT
4215 {
4216 if (this != &prog) {
4217 if(p)
4218 p->release();
4219 p = prog.p;
4220 prog.p = nullptr;
4221 }
4222 return *this;
4223 }
4224
source() const4225 const String& ProgramSource::source() const
4226 {
4227 CV_Assert(p);
4228 CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
4229 CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
4230 return p->codeStr_;
4231 }
4232
hash() const4233 ProgramSource::hash_t ProgramSource::hash() const
4234 {
4235 CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
4236 }
4237
fromBinary(const String & module,const String & name,const unsigned char * binary,const size_t size,const cv::String & buildOptions)4238 ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
4239 const unsigned char* binary, const size_t size,
4240 const cv::String& buildOptions)
4241 {
4242 CV_Assert(binary);
4243 CV_Assert(size > 0);
4244 return Impl::fromBinary(module, name, binary, size, buildOptions);
4245 }
4246
fromSPIR(const String & module,const String & name,const unsigned char * binary,const size_t size,const cv::String & buildOptions)4247 ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
4248 const unsigned char* binary, const size_t size,
4249 const cv::String& buildOptions)
4250 {
4251 CV_Assert(binary);
4252 CV_Assert(size > 0);
4253 return Impl::fromBinary(module, name, binary, size, buildOptions);
4254 }
4255
4256
operator ProgramSource&() const4257 internal::ProgramEntry::operator ProgramSource&() const
4258 {
4259 if (this->pProgramSource == NULL)
4260 {
4261 cv::AutoLock lock(cv::getInitializationMutex());
4262 if (this->pProgramSource == NULL)
4263 {
4264 ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(this->module, this->name, this->programCode, this->programHash, cv::String());
4265 ProgramSource* ptr = new ProgramSource(ps);
4266 const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
4267 }
4268 }
4269 return *this->pProgramSource;
4270 }
4271
4272
4273
4274 /////////////////////////////////////////// Program /////////////////////////////////////////////
4275
4276 static
joinBuildOptions(const cv::String & a,const cv::String & b)4277 cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
4278 {
4279 if (b.empty())
4280 return a;
4281 if (a.empty())
4282 return b;
4283 if (b[0] == ' ')
4284 return a + b;
4285 return a + (cv::String(" ") + b);
4286 }
4287
4288 struct Program::Impl
4289 {
4290 IMPLEMENT_REFCOUNTABLE();
4291
Implcv::ocl::Program::Impl4292 Impl(const ProgramSource& src,
4293 const String& _buildflags, String& errmsg) :
4294 refcount(1),
4295 handle(NULL),
4296 buildflags(_buildflags)
4297 {
4298 const ProgramSource::Impl* src_ = src.getImpl();
4299 CV_Assert(src_);
4300 sourceModule_ = src_->module_;
4301 sourceName_ = src_->name_;
4302 const Context ctx = Context::getDefault();
4303 Device device = ctx.device(0);
4304 if (ctx.ptr() == NULL || device.ptr() == NULL)
4305 return;
4306 buildflags = joinBuildOptions(buildflags, src_->buildOptions_);
4307 if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4308 {
4309 if (device.isAMD())
4310 buildflags = joinBuildOptions(buildflags, " -D AMD_DEVICE");
4311 else if (device.isIntel())
4312 buildflags = joinBuildOptions(buildflags, " -D INTEL_DEVICE");
4313 const String param_buildExtraOptions = getBuildExtraOptions();
4314 if (!param_buildExtraOptions.empty())
4315 buildflags = joinBuildOptions(buildflags, param_buildExtraOptions);
4316 }
4317 compile(ctx, src_, errmsg);
4318 }
4319
compilecv::ocl::Program::Impl4320 bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4321 {
4322 CV_Assert(ctx.getImpl());
4323 CV_Assert(src_);
4324
4325 // We don't cache OpenCL binaries
4326 if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
4327 {
4328 CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4329 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4330 return isLoaded;
4331 }
4332 return compileWithCache(ctx, src_, errmsg);
4333 }
4334
compileWithCachecv::ocl::Program::Impl4335 bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4336 {
4337 CV_Assert(ctx.getImpl());
4338 CV_Assert(src_);
4339 CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
4340
4341 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4342 OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
4343 const std::string base_dir = config.prepareCacheDirectoryForContext(
4344 ctx.getImpl()->getPrefixString(),
4345 ctx.getImpl()->getPrefixBase()
4346 );
4347 const String& hash_str = src_->sourceHash_;
4348 cv::String fname;
4349 if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
4350 {
4351 CV_Assert(!hash_str.empty());
4352 fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
4353 fname = utils::fs::join(base_dir, fname);
4354 }
4355 const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
4356 if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
4357 {
4358 try
4359 {
4360 std::vector<char> binaryBuf;
4361 bool res = false;
4362 {
4363 cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4364 BinaryProgramFile file(fname, hash_str.c_str());
4365 res = file.read(buildflags, binaryBuf);
4366 }
4367 if (res)
4368 {
4369 CV_Assert(!binaryBuf.empty());
4370 CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
4371 bool isLoaded = createFromBinary(ctx, binaryBuf, errmsg);
4372 if (isLoaded)
4373 return true;
4374 }
4375 }
4376 catch (const cv::Exception& e)
4377 {
4378 CV_UNUSED(e);
4379 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
4380 }
4381 catch (...)
4382 {
4383 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
4384 }
4385 }
4386 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4387 CV_Assert(handle == NULL);
4388 if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4389 {
4390 if (!buildFromSources(ctx, src_, errmsg))
4391 {
4392 return false;
4393 }
4394 }
4395 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
4396 {
4397 buildflags = joinBuildOptions(buildflags, " -x spir");
4398 if ((cv::String(" ") + buildflags).find(" -spir-std=") == cv::String::npos)
4399 {
4400 buildflags = joinBuildOptions(buildflags, " -spir-std=1.2");
4401 }
4402 CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4403 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4404 if (!isLoaded)
4405 return false;
4406 }
4407 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
4408 {
4409 CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
4410 }
4411 else
4412 {
4413 CV_Error(Error::StsInternal, "Internal error");
4414 }
4415 CV_Assert(handle != NULL);
4416 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4417 if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
4418 {
4419 try
4420 {
4421 std::vector<char> binaryBuf;
4422 getProgramBinary(binaryBuf);
4423 {
4424 cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4425 BinaryProgramFile file(fname, hash_str.c_str());
4426 file.write(buildflags, binaryBuf);
4427 }
4428 }
4429 catch (const cv::Exception& e)
4430 {
4431 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
4432 }
4433 catch (...)
4434 {
4435 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
4436 }
4437 }
4438 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4439 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4440 if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4441 {
4442 std::vector<char> binaryBuf;
4443 getProgramBinary(binaryBuf);
4444 if (!binaryBuf.empty())
4445 {
4446 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4447 handle = NULL;
4448 createFromBinary(ctx, binaryBuf, errmsg);
4449 }
4450 }
4451 #endif
4452 return handle != NULL;
4453 }
4454
dumpBuildLog_cv::ocl::Program::Impl4455 void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
4456 {
4457 AutoBuffer<char, 4096> buffer; buffer[0] = 0;
4458
4459 size_t retsz = 0;
4460 cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4461 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
4462 if (log_retval == CL_SUCCESS && retsz > 1)
4463 {
4464 buffer.resize(retsz + 16);
4465 log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4466 CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
4467 if (log_retval == CL_SUCCESS)
4468 {
4469 if (retsz < buffer.size())
4470 buffer[retsz] = 0;
4471 else
4472 buffer[buffer.size() - 1] = 0;
4473 }
4474 else
4475 {
4476 buffer[0] = 0;
4477 }
4478 }
4479
4480 errmsg = String(buffer.data());
4481 printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
4482 sourceModule_.c_str(), sourceName_.c_str(),
4483 result, getOpenCLErrorString(result),
4484 buildflags.c_str(), errmsg.c_str());
4485 fflush(stdout);
4486 }
4487
buildFromSourcescv::ocl::Program::Impl4488 bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4489 {
4490 CV_Assert(src_);
4491 CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
4492 CV_Assert(handle == NULL);
4493 CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
4494 sourceModule_.c_str(), sourceName_.c_str(),
4495 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
4496
4497 CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
4498
4499 const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
4500 size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
4501 CV_Assert(srcptr != NULL);
4502 CV_Assert(srclen > 0);
4503
4504 cl_int retval = 0;
4505
4506 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
4507 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
4508 CV_Assert(handle || retval != CL_SUCCESS);
4509 if (handle && retval == CL_SUCCESS)
4510 {
4511 size_t n = ctx.ndevices();
4512 AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
4513 cl_device_id* deviceList = deviceListBuf.data();
4514 for (size_t i = 0; i < n; i++)
4515 {
4516 deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
4517 }
4518
4519 retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
4520 CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
4521 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
4522 if (retval != CL_SUCCESS)
4523 #endif
4524 {
4525 dumpBuildLog_(retval, deviceList, errmsg);
4526
4527 // don't remove "retval != CL_SUCCESS" condition here:
4528 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
4529 if (retval != CL_SUCCESS && handle)
4530 {
4531 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4532 handle = NULL;
4533 }
4534 }
4535 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4536 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4537 {
4538 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
4539 size_t retsz = 0;
4540 char kernels_buffer[4096] = {0};
4541 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4542 if (retsz < sizeof(kernels_buffer))
4543 kernels_buffer[retsz] = 0;
4544 else
4545 kernels_buffer[0] = 0;
4546 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4547 }
4548 #endif
4549
4550 }
4551 return handle != NULL;
4552 }
4553
getProgramBinarycv::ocl::Program::Impl4554 void getProgramBinary(std::vector<char>& buf)
4555 {
4556 CV_Assert(handle);
4557 size_t sz = 0;
4558 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
4559 buf.resize(sz);
4560 uchar* ptr = (uchar*)&buf[0];
4561 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
4562 }
4563
createFromBinarycv::ocl::Program::Impl4564 bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
4565 {
4566 return createFromBinary(ctx, (const unsigned char*)&buf[0], buf.size(), errmsg);
4567 }
4568
createFromBinarycv::ocl::Program::Impl4569 bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
4570 {
4571 CV_Assert(handle == NULL);
4572 CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
4573 CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
4574
4575 CV_Assert(binarySize > 0);
4576
4577 size_t ndevices = (int)ctx.ndevices();
4578 AutoBuffer<cl_device_id> devices_(ndevices);
4579 AutoBuffer<const uchar*> binaryPtrs_(ndevices);
4580 AutoBuffer<size_t> binarySizes_(ndevices);
4581
4582 cl_device_id* devices = devices_.data();
4583 const uchar** binaryPtrs = binaryPtrs_.data();
4584 size_t* binarySizes = binarySizes_.data();
4585 for (size_t i = 0; i < ndevices; i++)
4586 {
4587 devices[i] = (cl_device_id)ctx.device(i).ptr();
4588 binaryPtrs[i] = binaryAddr;
4589 binarySizes[i] = binarySize;
4590 }
4591
4592 cl_int result = 0;
4593 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
4594 binarySizes, binaryPtrs, NULL, &result);
4595 if (result != CL_SUCCESS)
4596 {
4597 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
4598 if (handle)
4599 {
4600 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4601 handle = NULL;
4602 }
4603 }
4604 if (!handle)
4605 {
4606 return false;
4607 }
4608 // call clBuildProgram()
4609 {
4610 result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
4611 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
4612 if (result != CL_SUCCESS)
4613 {
4614 dumpBuildLog_(result, devices, errmsg);
4615 if (handle)
4616 {
4617 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4618 handle = NULL;
4619 }
4620 return false;
4621 }
4622 }
4623 // check build status
4624 {
4625 cl_build_status build_status = CL_BUILD_NONE;
4626 size_t retsz = 0;
4627 CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
4628 sizeof(build_status), &build_status, &retsz));
4629 if (result == CL_SUCCESS)
4630 {
4631 if (build_status == CL_BUILD_SUCCESS)
4632 {
4633 return true;
4634 }
4635 else
4636 {
4637 CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
4638 return false;
4639 }
4640 }
4641 else
4642 {
4643 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
4644 if (handle)
4645 {
4646 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4647 handle = NULL;
4648 }
4649 }
4650 }
4651 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4652 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4653 {
4654 CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
4655 size_t retsz = 0;
4656 char kernels_buffer[4096] = {0};
4657 result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4658 if (retsz < sizeof(kernels_buffer))
4659 kernels_buffer[retsz] = 0;
4660 else
4661 kernels_buffer[0] = 0;
4662 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4663 }
4664 #endif
4665 return handle != NULL;
4666 }
4667
~Implcv::ocl::Program::Impl4668 ~Impl()
4669 {
4670 if( handle )
4671 {
4672 #ifdef _WIN32
4673 if (!cv::__termination)
4674 #endif
4675 {
4676 clReleaseProgram(handle);
4677 }
4678 handle = NULL;
4679 }
4680 }
4681
4682 cl_program handle;
4683
4684 String buildflags;
4685 String sourceModule_;
4686 String sourceName_;
4687 };
4688
4689
Program()4690 Program::Program() CV_NOEXCEPT
4691 {
4692 p = 0;
4693 }
4694
Program(const ProgramSource & src,const String & buildflags,String & errmsg)4695 Program::Program(const ProgramSource& src,
4696 const String& buildflags, String& errmsg)
4697 {
4698 p = 0;
4699 create(src, buildflags, errmsg);
4700 }
4701
Program(const Program & prog)4702 Program::Program(const Program& prog)
4703 {
4704 p = prog.p;
4705 if(p)
4706 p->addref();
4707 }
4708
operator =(const Program & prog)4709 Program& Program::operator = (const Program& prog)
4710 {
4711 Impl* newp = (Impl*)prog.p;
4712 if(newp)
4713 newp->addref();
4714 if(p)
4715 p->release();
4716 p = newp;
4717 return *this;
4718 }
4719
Program(Program && prog)4720 Program::Program(Program&& prog) CV_NOEXCEPT
4721 {
4722 p = prog.p;
4723 prog.p = nullptr;
4724 }
4725
operator =(Program && prog)4726 Program& Program::operator = (Program&& prog) CV_NOEXCEPT
4727 {
4728 if (this != &prog) {
4729 if(p)
4730 p->release();
4731 p = prog.p;
4732 prog.p = nullptr;
4733 }
4734 return *this;
4735 }
4736
~Program()4737 Program::~Program()
4738 {
4739 if(p)
4740 p->release();
4741 }
4742
create(const ProgramSource & src,const String & buildflags,String & errmsg)4743 bool Program::create(const ProgramSource& src,
4744 const String& buildflags, String& errmsg)
4745 {
4746 if(p)
4747 {
4748 p->release();
4749 p = NULL;
4750 }
4751 p = new Impl(src, buildflags, errmsg);
4752 if(!p->handle)
4753 {
4754 p->release();
4755 p = 0;
4756 }
4757 return p != 0;
4758 }
4759
ptr() const4760 void* Program::ptr() const
4761 {
4762 return p ? p->handle : 0;
4763 }
4764
4765 #ifndef OPENCV_REMOVE_DEPRECATED_API
source() const4766 const ProgramSource& Program::source() const
4767 {
4768 CV_Error(Error::StsNotImplemented, "Removed API");
4769 }
4770
read(const String & bin,const String & buildflags)4771 bool Program::read(const String& bin, const String& buildflags)
4772 {
4773 CV_UNUSED(bin); CV_UNUSED(buildflags);
4774 CV_Error(Error::StsNotImplemented, "Removed API");
4775 }
4776
write(String & bin) const4777 bool Program::write(String& bin) const
4778 {
4779 CV_UNUSED(bin);
4780 CV_Error(Error::StsNotImplemented, "Removed API");
4781 }
4782
getPrefix() const4783 String Program::getPrefix() const
4784 {
4785 if(!p)
4786 return String();
4787 Context::Impl* ctx_ = Context::getDefault().getImpl();
4788 CV_Assert(ctx_);
4789 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4790 }
4791
getPrefix(const String & buildflags)4792 String Program::getPrefix(const String& buildflags)
4793 {
4794 Context::Impl* ctx_ = Context::getDefault().getImpl();
4795 CV_Assert(ctx_);
4796 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4797 }
4798 #endif // OPENCV_REMOVE_DEPRECATED_API
4799
getBinary(std::vector<char> & binary) const4800 void Program::getBinary(std::vector<char>& binary) const
4801 {
4802 CV_Assert(p && "Empty program");
4803 p->getProgramBinary(binary);
4804 }
4805
getProg(const ProgramSource & src,const String & buildflags,String & errmsg)4806 Program Context::Impl::getProg(const ProgramSource& src,
4807 const String& buildflags, String& errmsg)
4808 {
4809 size_t limit = getProgramCountLimit();
4810 const ProgramSource::Impl* src_ = src.getImpl();
4811 CV_Assert(src_);
4812 String key = cv::format("module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4813 src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4814 getPrefixString().c_str(),
4815 buildflags.c_str());
4816 {
4817 cv::AutoLock lock(program_cache_mutex);
4818 phash_t::iterator it = phash.find(key);
4819 if (it != phash.end())
4820 {
4821 // TODO LRU cache
4822 CacheList::iterator i = std::find(cacheList.begin(), cacheList.end(), key);
4823 if (i != cacheList.end() && i != cacheList.begin())
4824 {
4825 cacheList.erase(i);
4826 cacheList.push_front(key);
4827 }
4828 return it->second;
4829 }
4830 { // cleanup program cache
4831 size_t sz = phash.size();
4832 if (limit > 0 && sz >= limit)
4833 {
4834 static bool warningFlag = false;
4835 if (!warningFlag)
4836 {
4837 printf("\nWARNING: OpenCV-OpenCL:\n"
4838 " In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4839 " You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4840 warningFlag = true;
4841 }
4842 while (!cacheList.empty())
4843 {
4844 size_t c = phash.erase(cacheList.back());
4845 cacheList.pop_back();
4846 if (c != 0)
4847 break;
4848 }
4849 }
4850 }
4851 }
4852 Program prog(src, buildflags, errmsg);
4853 // Cache result of build failures too (to prevent unnecessary compiler invocations)
4854 {
4855 cv::AutoLock lock(program_cache_mutex);
4856 phash.insert(std::pair<std::string, Program>(key, prog));
4857 cacheList.push_front(key);
4858 }
4859 return prog;
4860 }
4861
4862
4863 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4864
4865 template<typename T>
4866 class OpenCLBufferPool
4867 {
4868 protected:
~OpenCLBufferPool()4869 ~OpenCLBufferPool() { }
4870 public:
4871 virtual T allocate(size_t size) = 0;
4872 virtual void release(T buffer) = 0;
4873 };
4874
4875 template <typename Derived, typename BufferEntry, typename T>
4876 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4877 {
4878 private:
derived()4879 inline Derived& derived() { return *static_cast<Derived*>(this); }
4880 protected:
4881 Mutex mutex_;
4882
4883 size_t currentReservedSize;
4884 size_t maxReservedSize;
4885
4886 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4887 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4888
4889 // synchronized
_findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry & entry,T buffer)4890 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4891 {
4892 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4893 for (; i != allocatedEntries_.end(); ++i)
4894 {
4895 BufferEntry& e = *i;
4896 if (e.clBuffer_ == buffer)
4897 {
4898 entry = e;
4899 allocatedEntries_.erase(i);
4900 return true;
4901 }
4902 }
4903 return false;
4904 }
4905
4906 // synchronized
_findAndRemoveEntryFromReservedList(CV_OUT BufferEntry & entry,const size_t size)4907 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4908 {
4909 if (reservedEntries_.empty())
4910 return false;
4911 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4912 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4913 BufferEntry result;
4914 size_t minDiff = (size_t)(-1);
4915 for (; i != reservedEntries_.end(); ++i)
4916 {
4917 BufferEntry& e = *i;
4918 if (e.capacity_ >= size)
4919 {
4920 size_t diff = e.capacity_ - size;
4921 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
4922 {
4923 minDiff = diff;
4924 result_pos = i;
4925 result = e;
4926 if (diff == 0)
4927 break;
4928 }
4929 }
4930 }
4931 if (result_pos != reservedEntries_.end())
4932 {
4933 //CV_DbgAssert(result == *result_pos);
4934 reservedEntries_.erase(result_pos);
4935 entry = result;
4936 currentReservedSize -= entry.capacity_;
4937 allocatedEntries_.push_back(entry);
4938 return true;
4939 }
4940 return false;
4941 }
4942
4943 // synchronized
_checkSizeOfReservedEntries()4944 void _checkSizeOfReservedEntries()
4945 {
4946 while (currentReservedSize > maxReservedSize)
4947 {
4948 CV_DbgAssert(!reservedEntries_.empty());
4949 const BufferEntry& entry = reservedEntries_.back();
4950 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4951 currentReservedSize -= entry.capacity_;
4952 derived()._releaseBufferEntry(entry);
4953 reservedEntries_.pop_back();
4954 }
4955 }
4956
_allocationGranularity(size_t size)4957 inline size_t _allocationGranularity(size_t size)
4958 {
4959 // heuristic values
4960 if (size < 1024*1024)
4961 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
4962 else if (size < 16*1024*1024)
4963 return 64*1024;
4964 else
4965 return 1024*1024;
4966 }
4967
4968 public:
OpenCLBufferPoolBaseImpl()4969 OpenCLBufferPoolBaseImpl()
4970 : currentReservedSize(0),
4971 maxReservedSize(0)
4972 {
4973 // nothing
4974 }
~OpenCLBufferPoolBaseImpl()4975 virtual ~OpenCLBufferPoolBaseImpl()
4976 {
4977 freeAllReservedBuffers();
4978 CV_Assert(reservedEntries_.empty());
4979 }
4980 public:
allocate(size_t size)4981 virtual T allocate(size_t size) CV_OVERRIDE
4982 {
4983 AutoLock locker(mutex_);
4984 BufferEntry entry;
4985 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
4986 {
4987 CV_DbgAssert(size <= entry.capacity_);
4988 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
4989 }
4990 else
4991 {
4992 derived()._allocateBufferEntry(entry, size);
4993 }
4994 return entry.clBuffer_;
4995 }
release(T buffer)4996 virtual void release(T buffer) CV_OVERRIDE
4997 {
4998 AutoLock locker(mutex_);
4999 BufferEntry entry;
5000 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
5001 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
5002 {
5003 derived()._releaseBufferEntry(entry);
5004 }
5005 else
5006 {
5007 reservedEntries_.push_front(entry);
5008 currentReservedSize += entry.capacity_;
5009 _checkSizeOfReservedEntries();
5010 }
5011 }
5012
getReservedSize() const5013 virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
getMaxReservedSize() const5014 virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
setMaxReservedSize(size_t size)5015 virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
5016 {
5017 AutoLock locker(mutex_);
5018 size_t oldMaxReservedSize = maxReservedSize;
5019 maxReservedSize = size;
5020 if (maxReservedSize < oldMaxReservedSize)
5021 {
5022 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
5023 for (; i != reservedEntries_.end();)
5024 {
5025 const BufferEntry& entry = *i;
5026 if (entry.capacity_ > maxReservedSize / 8)
5027 {
5028 CV_DbgAssert(currentReservedSize >= entry.capacity_);
5029 currentReservedSize -= entry.capacity_;
5030 derived()._releaseBufferEntry(entry);
5031 i = reservedEntries_.erase(i);
5032 continue;
5033 }
5034 ++i;
5035 }
5036 _checkSizeOfReservedEntries();
5037 }
5038 }
freeAllReservedBuffers()5039 virtual void freeAllReservedBuffers() CV_OVERRIDE
5040 {
5041 AutoLock locker(mutex_);
5042 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
5043 for (; i != reservedEntries_.end(); ++i)
5044 {
5045 const BufferEntry& entry = *i;
5046 derived()._releaseBufferEntry(entry);
5047 }
5048 reservedEntries_.clear();
5049 currentReservedSize = 0;
5050 }
5051 };
5052
5053 struct CLBufferEntry
5054 {
5055 cl_mem clBuffer_;
5056 size_t capacity_;
CLBufferEntrycv::ocl::CLBufferEntry5057 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
5058 };
5059
5060 class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
5061 {
5062 public:
5063 typedef struct CLBufferEntry BufferEntry;
5064 protected:
5065 int createFlags_;
5066 public:
OpenCLBufferPoolImpl(int createFlags=0)5067 OpenCLBufferPoolImpl(int createFlags = 0)
5068 : createFlags_(createFlags)
5069 {
5070 }
5071
_allocateBufferEntry(BufferEntry & entry,size_t size)5072 void _allocateBufferEntry(BufferEntry& entry, size_t size)
5073 {
5074 CV_DbgAssert(entry.clBuffer_ == NULL);
5075 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
5076 Context& ctx = Context::getDefault();
5077 cl_int retval = CL_SUCCESS;
5078 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
5079 CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
5080 CV_Assert(entry.clBuffer_ != NULL);
5081 if(retval == CL_SUCCESS)
5082 {
5083 CV_IMPL_ADD(CV_IMPL_OCL);
5084 }
5085 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
5086 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
5087 allocatedEntries_.push_back(entry);
5088 }
5089
_releaseBufferEntry(const BufferEntry & entry)5090 void _releaseBufferEntry(const BufferEntry& entry)
5091 {
5092 CV_Assert(entry.capacity_ != 0);
5093 CV_Assert(entry.clBuffer_ != NULL);
5094 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
5095 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
5096 CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
5097 }
5098 };
5099
5100 #ifdef HAVE_OPENCL_SVM
5101 struct CLSVMBufferEntry
5102 {
5103 void* clBuffer_;
5104 size_t capacity_;
CLSVMBufferEntrycv::ocl::CLSVMBufferEntry5105 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
5106 };
5107 class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
5108 {
5109 public:
5110 typedef struct CLSVMBufferEntry BufferEntry;
5111 public:
OpenCLSVMBufferPoolImpl()5112 OpenCLSVMBufferPoolImpl()
5113 {
5114 }
5115
_allocateBufferEntry(BufferEntry & entry,size_t size)5116 void _allocateBufferEntry(BufferEntry& entry, size_t size)
5117 {
5118 CV_DbgAssert(entry.clBuffer_ == NULL);
5119 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
5120
5121 Context& ctx = Context::getDefault();
5122 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5123 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5124 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
5125 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5126
5127 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5128 CV_DbgAssert(svmFns->isValid());
5129
5130 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
5131 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
5132 CV_Assert(buf);
5133
5134 entry.clBuffer_ = buf;
5135 {
5136 CV_IMPL_ADD(CV_IMPL_OCL);
5137 }
5138 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
5139 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
5140 allocatedEntries_.push_back(entry);
5141 }
5142
_releaseBufferEntry(const BufferEntry & entry)5143 void _releaseBufferEntry(const BufferEntry& entry)
5144 {
5145 CV_Assert(entry.capacity_ != 0);
5146 CV_Assert(entry.clBuffer_ != NULL);
5147 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
5148 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
5149 Context& ctx = Context::getDefault();
5150 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5151 CV_DbgAssert(svmFns->isValid());
5152 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
5153 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
5154 }
5155 };
5156 #endif
5157
5158
5159
5160 template <bool readAccess, bool writeAccess>
5161 class AlignedDataPtr
5162 {
5163 protected:
5164 const size_t size_;
5165 uchar* const originPtr_;
5166 const size_t alignment_;
5167 uchar* ptr_;
5168 uchar* allocatedPtr_;
5169
5170 public:
AlignedDataPtr(uchar * ptr,size_t size,size_t alignment)5171 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
5172 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
5173 {
5174 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5175 CV_DbgAssert(!readAccess || ptr);
5176 if (((size_t)ptr_ & (alignment - 1)) != 0)
5177 {
5178 allocatedPtr_ = new uchar[size_ + alignment - 1];
5179 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5180 if (readAccess)
5181 {
5182 memcpy(ptr_, originPtr_, size_);
5183 }
5184 }
5185 }
5186
getAlignedPtr() const5187 uchar* getAlignedPtr() const
5188 {
5189 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5190 return ptr_;
5191 }
5192
~AlignedDataPtr()5193 ~AlignedDataPtr()
5194 {
5195 if (allocatedPtr_)
5196 {
5197 if (writeAccess)
5198 {
5199 memcpy(originPtr_, ptr_, size_);
5200 }
5201 delete[] allocatedPtr_;
5202 allocatedPtr_ = NULL;
5203 }
5204 ptr_ = NULL;
5205 }
5206 private:
5207 AlignedDataPtr(const AlignedDataPtr&); // disabled
5208 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
5209 };
5210
5211 template <bool readAccess, bool writeAccess>
5212 class AlignedDataPtr2D
5213 {
5214 protected:
5215 const size_t size_;
5216 uchar* const originPtr_;
5217 const size_t alignment_;
5218 uchar* ptr_;
5219 uchar* allocatedPtr_;
5220 size_t rows_;
5221 size_t cols_;
5222 size_t step_;
5223
5224 public:
AlignedDataPtr2D(uchar * ptr,size_t rows,size_t cols,size_t step,size_t alignment,size_t extrabytes=0)5225 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
5226 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
5227 {
5228 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5229 CV_DbgAssert(!readAccess || ptr != NULL);
5230 if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
5231 {
5232 allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
5233 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5234 if (readAccess)
5235 {
5236 for (size_t i = 0; i < rows_; i++)
5237 memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
5238 }
5239 }
5240 }
5241
getAlignedPtr() const5242 uchar* getAlignedPtr() const
5243 {
5244 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5245 return ptr_;
5246 }
5247
~AlignedDataPtr2D()5248 ~AlignedDataPtr2D()
5249 {
5250 if (allocatedPtr_)
5251 {
5252 if (writeAccess)
5253 {
5254 for (size_t i = 0; i < rows_; i++)
5255 memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
5256 }
5257 delete[] allocatedPtr_;
5258 allocatedPtr_ = NULL;
5259 }
5260 ptr_ = NULL;
5261 }
5262 private:
5263 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
5264 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
5265 };
5266
5267 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
5268 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
5269 #endif
5270
5271
__init_buffer_pools()5272 void Context::Impl::__init_buffer_pools()
5273 {
5274 bufferPool_ = std::make_shared<OpenCLBufferPoolImpl>(0);
5275 OpenCLBufferPoolImpl& bufferPool = *bufferPool_.get();
5276 bufferPoolHostPtr_ = std::make_shared<OpenCLBufferPoolImpl>(CL_MEM_ALLOC_HOST_PTR);
5277 OpenCLBufferPoolImpl& bufferPoolHostPtr = *bufferPoolHostPtr_.get();
5278
5279 size_t defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
5280 size_t poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
5281 bufferPool.setMaxReservedSize(poolSize);
5282 size_t poolSizeHostPtr = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
5283 bufferPoolHostPtr.setMaxReservedSize(poolSizeHostPtr);
5284
5285 #ifdef HAVE_OPENCL_SVM
5286 bufferPoolSVM_ = std::make_shared<OpenCLSVMBufferPoolImpl>();
5287 OpenCLSVMBufferPoolImpl& bufferPoolSVM = *bufferPoolSVM_.get();
5288 size_t poolSizeSVM = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
5289 bufferPoolSVM.setMaxReservedSize(poolSizeSVM);
5290 #endif
5291
5292 CV_LOG_INFO(NULL, "OpenCL: Initializing buffer pool for context@" << contextId << " with max capacity: poolSize=" << poolSize << " poolSizeHostPtr=" << poolSizeHostPtr);
5293 }
5294
5295 class OpenCLAllocator CV_FINAL : public MatAllocator
5296 {
5297 public:
5298 enum AllocatorFlags
5299 {
5300 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
5301 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
5302 #ifdef HAVE_OPENCL_SVM
5303 ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
5304 #endif
5305 ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3 // convertFromBuffer()
5306 };
5307
OpenCLAllocator()5308 OpenCLAllocator()
5309 {
5310 matStdAllocator = Mat::getDefaultAllocator();
5311 }
~OpenCLAllocator()5312 ~OpenCLAllocator()
5313 {
5314 flushCleanupQueue();
5315 }
5316
defaultAllocate(int dims,const int * sizes,int type,void * data,size_t * step,AccessFlag flags,UMatUsageFlags usageFlags) const5317 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
5318 AccessFlag flags, UMatUsageFlags usageFlags) const
5319 {
5320 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
5321 return u;
5322 }
5323
isOpenCLMapForced()5324 static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
5325 {
5326 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
5327 return value;
5328 }
isOpenCLCopyingForced()5329 static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
5330 {
5331 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
5332 return value;
5333 }
5334
getBestFlags(const Context & ctx,AccessFlag,UMatUsageFlags usageFlags,int & createFlags,UMatData::MemoryFlag & flags0) const5335 void getBestFlags(const Context& ctx, AccessFlag /*flags*/, UMatUsageFlags usageFlags, int& createFlags, UMatData::MemoryFlag& flags0) const
5336 {
5337 const Device& dev = ctx.device(0);
5338 createFlags = 0;
5339 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
5340 createFlags |= CL_MEM_ALLOC_HOST_PTR;
5341
5342 if (!isOpenCLCopyingForced() &&
5343 (isOpenCLMapForced() ||
5344 (dev.hostUnifiedMemory()
5345 #ifndef __APPLE__
5346 || dev.isIntel()
5347 #endif
5348 )
5349 )
5350 )
5351 flags0 = static_cast<UMatData::MemoryFlag>(0);
5352 else
5353 flags0 = UMatData::COPY_ON_MAP;
5354 }
5355
allocate(int dims,const int * sizes,int type,void * data,size_t * step,AccessFlag flags,UMatUsageFlags usageFlags) const5356 UMatData* allocate(int dims, const int* sizes, int type,
5357 void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5358 {
5359 if(!useOpenCL())
5360 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5361
5362 flushCleanupQueue();
5363
5364 CV_Assert(data == 0);
5365 size_t total = CV_ELEM_SIZE(type);
5366 for( int i = dims-1; i >= 0; i-- )
5367 {
5368 if( step )
5369 step[i] = total;
5370 total *= sizes[i];
5371 }
5372
5373 Context& ctx = Context::getDefault();
5374 if (!ctx.getImpl())
5375 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5376 Context::Impl& ctxImpl = *ctx.getImpl();
5377
5378 int createFlags = 0;
5379 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5380 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
5381
5382 void* handle = NULL;
5383 int allocatorFlags = 0;
5384
5385 #ifdef HAVE_OPENCL_SVM
5386 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5387 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
5388 {
5389 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
5390 handle = ctxImpl.getBufferPoolSVM().allocate(total);
5391
5392 // this property is constant, so single buffer pool can be used here
5393 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5394 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5395 }
5396 else
5397 #endif
5398 if (createFlags == 0)
5399 {
5400 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
5401 handle = ctxImpl.getBufferPool().allocate(total);
5402 }
5403 else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
5404 {
5405 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
5406 handle = ctxImpl.getBufferPoolHostPtr().allocate(total);
5407 }
5408 else
5409 {
5410 CV_Assert(handle != NULL); // Unsupported, throw
5411 }
5412
5413 if (!handle)
5414 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5415
5416 UMatData* u = new UMatData(this);
5417 u->data = 0;
5418 u->size = total;
5419 u->handle = handle;
5420 u->flags = flags0;
5421 u->allocatorFlags_ = allocatorFlags;
5422 u->allocatorContext = std::static_pointer_cast<void>(std::make_shared<ocl::Context>(ctx));
5423 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
5424 u->markHostCopyObsolete(true);
5425 opencl_allocator_stats.onAllocate(u->size);
5426 return u;
5427 }
5428
allocate(UMatData * u,AccessFlag accessFlags,UMatUsageFlags usageFlags) const5429 bool allocate(UMatData* u, AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5430 {
5431 if(!u)
5432 return false;
5433
5434 flushCleanupQueue();
5435
5436 UMatDataAutoLock lock(u);
5437
5438 if(u->handle == 0)
5439 {
5440 CV_Assert(u->origdata != 0);
5441 Context& ctx = Context::getDefault();
5442 int createFlags = 0;
5443 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5444 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
5445
5446 bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
5447
5448 cl_context ctx_handle = (cl_context)ctx.ptr();
5449 int allocatorFlags = 0;
5450 UMatData::MemoryFlag tempUMatFlags = static_cast<UMatData::MemoryFlag>(0);
5451 void* handle = NULL;
5452 cl_int retval = CL_SUCCESS;
5453
5454 #ifdef HAVE_OPENCL_SVM
5455 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5456 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
5457 if (useSVM && svmCaps.isSupportFineGrainSystem())
5458 {
5459 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
5460 tempUMatFlags = UMatData::TEMP_UMAT;
5461 handle = u->origdata;
5462 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
5463 }
5464 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
5465 {
5466 if (!(accessFlags & ACCESS_FAST)) // memcpy used
5467 {
5468 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5469
5470 cl_svm_mem_flags memFlags = createFlags |
5471 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5472
5473 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5474 CV_DbgAssert(svmFns->isValid());
5475
5476 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
5477 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
5478 CV_Assert(handle);
5479
5480 cl_command_queue q = NULL;
5481 if (!isFineGrainBuffer)
5482 {
5483 q = (cl_command_queue)Queue::getDefault().ptr();
5484 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
5485 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
5486 handle, u->size,
5487 0, NULL, NULL);
5488 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5489
5490 }
5491 memcpy(handle, u->origdata, u->size);
5492 if (!isFineGrainBuffer)
5493 {
5494 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
5495 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
5496 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5497 }
5498
5499 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
5500 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
5501 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5502 }
5503 }
5504 else
5505 #endif
5506 {
5507 if( copyOnMap )
5508 accessFlags &= ~ACCESS_FAST;
5509
5510 tempUMatFlags = UMatData::TEMP_UMAT;
5511 if (
5512 #ifdef __APPLE__
5513 !copyOnMap &&
5514 #endif
5515 CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
5516 // There are OpenCL runtime issues for less aligned data
5517 && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
5518 && u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
5519 // Avoid sharing of host memory between OpenCL buffers
5520 && !(u->originalUMatData && u->originalUMatData->handle)
5521 )
5522 {
5523 // Change the host-side origdata[size] to "pinned memory" that enables fast
5524 // DMA-transfers over PCIe to the device. Often used with clEnqueueMapBuffer/clEnqueueUnmapMemObject
5525 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR),
5526 u->size, u->origdata, &retval);
5527 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR), sz=%lld, origdata=%p) => %p",
5528 (long long int)u->size, u->origdata, (void*)handle).c_str());
5529 }
5530 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
5531 {
5532 // Allocate device-side memory and immediately copy data from the host-side pointer origdata[size].
5533 // If createFlags=CL_MEM_ALLOC_HOST_PTR (aka cv::USAGE_ALLOCATE_HOST_MEMORY), then
5534 // additionally allocate a host-side "pinned" duplicate of the origdata that is
5535 // managed by OpenCL. This is potentially faster in unaligned/unmanaged scenarios.
5536 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
5537 u->size, u->origdata, &retval);
5538 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
5539 (long long int)u->size, u->origdata, (void*)handle).c_str());
5540 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
5541 }
5542 }
5543 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
5544 if(!handle || retval != CL_SUCCESS)
5545 return false;
5546 u->handle = handle;
5547 u->prevAllocator = u->currAllocator;
5548 u->currAllocator = this;
5549 u->flags |= tempUMatFlags | flags0;
5550 u->allocatorFlags_ = allocatorFlags;
5551 }
5552 if (!!(accessFlags & ACCESS_WRITE))
5553 u->markHostCopyObsolete(true);
5554 opencl_allocator_stats.onAllocate(u->size);
5555 return true;
5556 }
5557
5558 /*void sync(UMatData* u) const
5559 {
5560 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5561 UMatDataAutoLock lock(u);
5562
5563 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
5564 {
5565 if( u->tempCopiedUMat() )
5566 {
5567 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5568 u->size, u->origdata, 0, 0, 0);
5569 }
5570 else
5571 {
5572 cl_int retval = 0;
5573 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5574 (CL_MAP_READ | CL_MAP_WRITE),
5575 0, u->size, 0, 0, 0, &retval);
5576 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5577 clFinish(q);
5578 }
5579 u->markHostCopyObsolete(false);
5580 }
5581 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
5582 {
5583 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5584 u->size, u->data, 0, 0, 0);
5585 }
5586 }*/
5587
deallocate(UMatData * u) const5588 void deallocate(UMatData* u) const CV_OVERRIDE
5589 {
5590 if(!u)
5591 return;
5592
5593 CV_Assert(u->urefcount == 0);
5594 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
5595
5596 CV_Assert(u->handle != 0);
5597 CV_Assert(u->mapcount == 0);
5598
5599 if (!!(u->flags & UMatData::ASYNC_CLEANUP))
5600 addToCleanupQueue(u);
5601 else
5602 deallocate_(u);
5603 }
5604
deallocate_(UMatData * u) const5605 void deallocate_(UMatData* u) const
5606 {
5607 CV_Assert(u);
5608 CV_Assert(u->handle);
5609 if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
5610 {
5611 opencl_allocator_stats.onFree(u->size);
5612 }
5613
5614 #ifdef _WIN32
5615 if (cv::__termination) // process is not in consistent state (after ExitProcess call) and terminating
5616 return; // avoid any OpenCL calls
5617 #endif
5618 if(u->tempUMat())
5619 {
5620 CV_Assert(u->origdata);
5621 // UMatDataAutoLock lock(u);
5622
5623 if (u->hostCopyObsolete())
5624 {
5625 #ifdef HAVE_OPENCL_SVM
5626 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5627 {
5628 Context& ctx = Context::getDefault();
5629 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5630 CV_DbgAssert(svmFns->isValid());
5631
5632 if( u->tempCopiedUMat() )
5633 {
5634 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5635 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
5636 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
5637 cl_command_queue q = NULL;
5638 if (!isFineGrainBuffer)
5639 {
5640 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
5641 q = (cl_command_queue)Queue::getDefault().ptr();
5642 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5643 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5644 u->handle, u->size,
5645 0, NULL, NULL);
5646 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5647 }
5648 clFinish(q);
5649 memcpy(u->origdata, u->handle, u->size);
5650 if (!isFineGrainBuffer)
5651 {
5652 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5653 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5654 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5655 }
5656 }
5657 else
5658 {
5659 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
5660 // nothing
5661 }
5662 }
5663 else
5664 #endif
5665 {
5666 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5667 if( u->tempCopiedUMat() )
5668 {
5669 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5670 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5671 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
5672 }
5673 else
5674 {
5675 cl_int retval = 0;
5676 if (u->tempUMat())
5677 {
5678 CV_Assert(u->mapcount == 0);
5679 flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
5680 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5681 (CL_MAP_READ | CL_MAP_WRITE),
5682 0, u->size, 0, 0, 0, &retval);
5683 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
5684 CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
5685 if (u->originalUMatData)
5686 {
5687 CV_Assert(u->originalUMatData->data == data);
5688 }
5689 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5690 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
5691 CV_OCL_DBG_CHECK(clFinish(q));
5692 }
5693 }
5694 }
5695 u->markHostCopyObsolete(false);
5696 }
5697 else
5698 {
5699 // nothing
5700 }
5701 #ifdef HAVE_OPENCL_SVM
5702 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5703 {
5704 if( u->tempCopiedUMat() )
5705 {
5706 Context& ctx = Context::getDefault();
5707 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5708 CV_DbgAssert(svmFns->isValid());
5709
5710 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
5711 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
5712 }
5713 }
5714 else
5715 #endif
5716 {
5717 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
5718 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
5719 }
5720 u->handle = 0;
5721 u->markDeviceCopyObsolete(true);
5722 u->currAllocator = u->prevAllocator;
5723 u->prevAllocator = NULL;
5724 if(u->data && u->copyOnMap() && u->data != u->origdata)
5725 fastFree(u->data);
5726 u->data = u->origdata;
5727 u->currAllocator->deallocate(u);
5728 u = NULL;
5729 }
5730 else
5731 {
5732 CV_Assert(u->origdata == NULL);
5733 if(u->data && u->copyOnMap() && u->data != u->origdata)
5734 {
5735 fastFree(u->data);
5736 u->data = 0;
5737 u->markHostCopyObsolete(true);
5738 }
5739 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
5740 {
5741 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5742 CV_Assert(pCtx);
5743 ocl::Context& ctx = *pCtx.get();
5744 CV_Assert(ctx.getImpl());
5745 ctx.getImpl()->getBufferPool().release((cl_mem)u->handle);
5746 }
5747 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
5748 {
5749 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5750 CV_Assert(pCtx);
5751 ocl::Context& ctx = *pCtx.get();
5752 CV_Assert(ctx.getImpl());
5753 ctx.getImpl()->getBufferPoolHostPtr().release((cl_mem)u->handle);
5754 }
5755 #ifdef HAVE_OPENCL_SVM
5756 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
5757 {
5758 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5759 CV_Assert(pCtx);
5760 ocl::Context& ctx = *pCtx.get();
5761 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5762 {
5763 //nothing
5764 }
5765 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5766 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5767 {
5768 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5769 CV_DbgAssert(svmFns->isValid());
5770 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5771
5772 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
5773 {
5774 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5775 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5776 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5777 }
5778 }
5779 CV_Assert(ctx.getImpl());
5780 ctx.getImpl()->getBufferPoolSVM().release((void*)u->handle);
5781 }
5782 #endif
5783 else
5784 {
5785 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
5786 }
5787 u->handle = 0;
5788 u->markDeviceCopyObsolete(true);
5789 delete u;
5790 u = NULL;
5791 }
5792 CV_Assert(u == NULL);
5793 }
5794
5795 // synchronized call (external UMatDataAutoLock, see UMat::getMat)
map(UMatData * u,AccessFlag accessFlags) const5796 void map(UMatData* u, AccessFlag accessFlags) const CV_OVERRIDE
5797 {
5798 CV_Assert(u && u->handle);
5799
5800 if (!!(accessFlags & ACCESS_WRITE))
5801 u->markDeviceCopyObsolete(true);
5802
5803 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5804
5805 {
5806 if( !u->copyOnMap() )
5807 {
5808 // TODO
5809 // because there can be other map requests for the same UMat with different access flags,
5810 // we use the universal (read-write) access mode.
5811 #ifdef HAVE_OPENCL_SVM
5812 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5813 {
5814 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5815 {
5816 Context& ctx = Context::getDefault();
5817 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5818 CV_DbgAssert(svmFns->isValid());
5819
5820 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5821 {
5822 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5823 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5824 u->handle, u->size,
5825 0, NULL, NULL);
5826 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5827 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5828 }
5829 }
5830 clFinish(q);
5831 u->data = (uchar*)u->handle;
5832 u->markHostCopyObsolete(false);
5833 u->markDeviceMemMapped(true);
5834 return;
5835 }
5836 #endif
5837
5838 cl_int retval = CL_SUCCESS;
5839 if (!u->deviceMemMapped())
5840 {
5841 CV_Assert(u->refcount == 1);
5842 CV_Assert(u->mapcount++ == 0);
5843 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5844 (CL_MAP_READ | CL_MAP_WRITE),
5845 0, u->size, 0, 0, 0, &retval);
5846 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5847 }
5848 if (u->data && retval == CL_SUCCESS)
5849 {
5850 u->markHostCopyObsolete(false);
5851 u->markDeviceMemMapped(true);
5852 return;
5853 }
5854
5855 // TODO Is it really a good idea and was it tested well?
5856 // if map failed, switch to copy-on-map mode for the particular buffer
5857 u->flags |= UMatData::COPY_ON_MAP;
5858 }
5859
5860 if(!u->data)
5861 {
5862 u->data = (uchar*)fastMalloc(u->size);
5863 u->markHostCopyObsolete(true);
5864 }
5865 }
5866
5867 if (!!(accessFlags & ACCESS_READ) && u->hostCopyObsolete())
5868 {
5869 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5870 #ifdef HAVE_OPENCL_SVM
5871 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5872 #endif
5873 cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5874 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5875 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5876 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5877 u->markHostCopyObsolete(false);
5878 }
5879 }
5880
unmap(UMatData * u) const5881 void unmap(UMatData* u) const CV_OVERRIDE
5882 {
5883 if(!u)
5884 return;
5885
5886
5887 CV_Assert(u->handle != 0);
5888
5889 UMatDataAutoLock autolock(u);
5890
5891 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5892 cl_int retval = 0;
5893 if( !u->copyOnMap() && u->deviceMemMapped() )
5894 {
5895 CV_Assert(u->data != NULL);
5896 #ifdef HAVE_OPENCL_SVM
5897 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5898 {
5899 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5900 {
5901 Context& ctx = Context::getDefault();
5902 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5903 CV_DbgAssert(svmFns->isValid());
5904
5905 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5906 {
5907 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5908 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5909 0, NULL, NULL);
5910 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5911 clFinish(q);
5912 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5913 }
5914 }
5915 if (u->refcount == 0)
5916 u->data = 0;
5917 u->markDeviceCopyObsolete(false);
5918 u->markHostCopyObsolete(true);
5919 return;
5920 }
5921 #endif
5922 if (u->refcount == 0)
5923 {
5924 CV_Assert(u->mapcount-- == 1);
5925 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
5926 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
5927 if (Device::getDefault().isAMD())
5928 {
5929 // required for multithreaded applications (see stitching test)
5930 CV_OCL_DBG_CHECK(clFinish(q));
5931 }
5932 u->markDeviceMemMapped(false);
5933 u->data = 0;
5934 u->markDeviceCopyObsolete(false);
5935 u->markHostCopyObsolete(true);
5936 }
5937 }
5938 else if( u->copyOnMap() && u->deviceCopyObsolete() )
5939 {
5940 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5941 #ifdef HAVE_OPENCL_SVM
5942 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5943 #endif
5944 retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5945 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5946 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5947 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5948 u->markDeviceCopyObsolete(false);
5949 u->markHostCopyObsolete(true);
5950 }
5951 }
5952
checkContinuous(int dims,const size_t sz[],const size_t srcofs[],const size_t srcstep[],const size_t dstofs[],const size_t dststep[],size_t & total,size_t new_sz[],size_t & srcrawofs,size_t new_srcofs[],size_t new_srcstep[],size_t & dstrawofs,size_t new_dstofs[],size_t new_dststep[]) const5953 bool checkContinuous(int dims, const size_t sz[],
5954 const size_t srcofs[], const size_t srcstep[],
5955 const size_t dstofs[], const size_t dststep[],
5956 size_t& total, size_t new_sz[],
5957 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
5958 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
5959 {
5960 bool iscontinuous = true;
5961 srcrawofs = srcofs ? srcofs[dims-1] : 0;
5962 dstrawofs = dstofs ? dstofs[dims-1] : 0;
5963 total = sz[dims-1];
5964 for( int i = dims-2; i >= 0; i-- )
5965 {
5966 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
5967 iscontinuous = false;
5968 total *= sz[i];
5969 if( srcofs )
5970 srcrawofs += srcofs[i]*srcstep[i];
5971 if( dstofs )
5972 dstrawofs += dstofs[i]*dststep[i];
5973 }
5974
5975 if( !iscontinuous )
5976 {
5977 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
5978 if( dims == 2 )
5979 {
5980 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
5981 // we assume that new_... arrays are initialized by caller
5982 // with 0's, so there is no else branch
5983 if( srcofs )
5984 {
5985 new_srcofs[0] = srcofs[1];
5986 new_srcofs[1] = srcofs[0];
5987 new_srcofs[2] = 0;
5988 }
5989
5990 if( dstofs )
5991 {
5992 new_dstofs[0] = dstofs[1];
5993 new_dstofs[1] = dstofs[0];
5994 new_dstofs[2] = 0;
5995 }
5996
5997 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
5998 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
5999 }
6000 else
6001 {
6002 // we could check for dims == 3 here,
6003 // but from user perspective this one is more informative
6004 CV_Assert(dims <= 3);
6005 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
6006 if( srcofs )
6007 {
6008 new_srcofs[0] = srcofs[2];
6009 new_srcofs[1] = srcofs[1];
6010 new_srcofs[2] = srcofs[0];
6011 }
6012
6013 if( dstofs )
6014 {
6015 new_dstofs[0] = dstofs[2];
6016 new_dstofs[1] = dstofs[1];
6017 new_dstofs[2] = dstofs[0];
6018 }
6019
6020 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
6021 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
6022 }
6023 }
6024 return iscontinuous;
6025 }
6026
download(UMatData * u,void * dstptr,int dims,const size_t sz[],const size_t srcofs[],const size_t srcstep[],const size_t dststep[]) const6027 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
6028 const size_t srcofs[], const size_t srcstep[],
6029 const size_t dststep[]) const CV_OVERRIDE
6030 {
6031 if(!u)
6032 return;
6033 UMatDataAutoLock autolock(u);
6034
6035 if( u->data && !u->hostCopyObsolete() )
6036 {
6037 Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
6038 return;
6039 }
6040 CV_Assert( u->handle != 0 );
6041
6042 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6043
6044 size_t total = 0, new_sz[] = {0, 0, 0};
6045 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6046 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6047
6048 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
6049 total, new_sz,
6050 srcrawofs, new_srcofs, new_srcstep,
6051 dstrawofs, new_dstofs, new_dststep);
6052
6053 #ifdef HAVE_OPENCL_SVM
6054 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6055 {
6056 CV_DbgAssert(u->data == NULL || u->data == u->handle);
6057 Context& ctx = Context::getDefault();
6058 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6059 CV_DbgAssert(svmFns->isValid());
6060
6061 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
6062 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6063 {
6064 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
6065 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
6066 u->handle, u->size,
6067 0, NULL, NULL);
6068 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
6069 }
6070 clFinish(q);
6071 if( iscontinuous )
6072 {
6073 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
6074 }
6075 else
6076 {
6077 // This code is from MatAllocator::download()
6078 int isz[CV_MAX_DIM];
6079 uchar* srcptr = (uchar*)u->handle;
6080 for( int i = 0; i < dims; i++ )
6081 {
6082 CV_Assert( sz[i] <= (size_t)INT_MAX );
6083 if( sz[i] == 0 )
6084 return;
6085 if( srcofs )
6086 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6087 isz[i] = (int)sz[i];
6088 }
6089
6090 Mat src(dims, isz, CV_8U, srcptr, srcstep);
6091 Mat dst(dims, isz, CV_8U, dstptr, dststep);
6092
6093 const Mat* arrays[] = { &src, &dst };
6094 uchar* ptrs[2];
6095 NAryMatIterator it(arrays, ptrs, 2);
6096 size_t j, planesz = it.size;
6097
6098 for( j = 0; j < it.nplanes; j++, ++it )
6099 memcpy(ptrs[1], ptrs[0], planesz);
6100 }
6101 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6102 {
6103 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6104 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6105 0, NULL, NULL);
6106 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6107 clFinish(q);
6108 }
6109 }
6110 else
6111 #endif
6112 {
6113 if( iscontinuous )
6114 {
6115 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6116 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6117 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
6118 }
6119 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6120 {
6121 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6122 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6123 size_t membuf_ofs = srcrawofs - new_srcrawofs;
6124 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6125 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6126 uchar* ptr = alignedPtr.getAlignedPtr();
6127
6128 CV_Assert(new_srcstep[0] >= new_sz[0]);
6129 total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
6130 total = std::min(total, u->size - new_srcrawofs);
6131 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6132 new_srcrawofs, total, ptr, 0, 0, 0));
6133 for( size_t i = 0; i < new_sz[1]; i++ )
6134 memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
6135 }
6136 else
6137 {
6138 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6139 uchar* ptr = alignedPtr.getAlignedPtr();
6140
6141 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6142 new_srcofs, new_dstofs, new_sz,
6143 new_srcstep[0], 0,
6144 new_dststep[0], 0,
6145 ptr, 0, 0, 0));
6146 }
6147 }
6148 }
6149
upload(UMatData * u,const void * srcptr,int dims,const size_t sz[],const size_t dstofs[],const size_t dststep[],const size_t srcstep[]) const6150 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
6151 const size_t dstofs[], const size_t dststep[],
6152 const size_t srcstep[]) const CV_OVERRIDE
6153 {
6154 if(!u)
6155 return;
6156
6157 // there should be no user-visible CPU copies of the UMat which we are going to copy to
6158 CV_Assert(u->refcount == 0 || u->tempUMat());
6159
6160 size_t total = 0, new_sz[] = {0, 0, 0};
6161 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6162 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6163
6164 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
6165 total, new_sz,
6166 srcrawofs, new_srcofs, new_srcstep,
6167 dstrawofs, new_dstofs, new_dststep);
6168
6169 UMatDataAutoLock autolock(u);
6170
6171 // if there is cached CPU copy of the GPU matrix,
6172 // we could use it as a destination.
6173 // we can do it in 2 cases:
6174 // 1. we overwrite the whole content
6175 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
6176 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
6177 {
6178 Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
6179 u->markHostCopyObsolete(false);
6180 u->markDeviceCopyObsolete(true);
6181 return;
6182 }
6183
6184 CV_Assert( u->handle != 0 );
6185 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6186
6187 #ifdef HAVE_OPENCL_SVM
6188 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6189 {
6190 CV_DbgAssert(u->data == NULL || u->data == u->handle);
6191 Context& ctx = Context::getDefault();
6192 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6193 CV_DbgAssert(svmFns->isValid());
6194
6195 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
6196 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6197 {
6198 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
6199 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
6200 u->handle, u->size,
6201 0, NULL, NULL);
6202 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
6203 }
6204 clFinish(q);
6205 if( iscontinuous )
6206 {
6207 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
6208 }
6209 else
6210 {
6211 // This code is from MatAllocator::upload()
6212 int isz[CV_MAX_DIM];
6213 uchar* dstptr = (uchar*)u->handle;
6214 for( int i = 0; i < dims; i++ )
6215 {
6216 CV_Assert( sz[i] <= (size_t)INT_MAX );
6217 if( sz[i] == 0 )
6218 return;
6219 if( dstofs )
6220 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6221 isz[i] = (int)sz[i];
6222 }
6223
6224 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
6225 Mat dst(dims, isz, CV_8U, dstptr, dststep);
6226
6227 const Mat* arrays[] = { &src, &dst };
6228 uchar* ptrs[2];
6229 NAryMatIterator it(arrays, ptrs, 2);
6230 size_t j, planesz = it.size;
6231
6232 for( j = 0; j < it.nplanes; j++, ++it )
6233 memcpy(ptrs[1], ptrs[0], planesz);
6234 }
6235 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6236 {
6237 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6238 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6239 0, NULL, NULL);
6240 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6241 clFinish(q);
6242 }
6243 }
6244 else
6245 #endif
6246 {
6247 if( iscontinuous )
6248 {
6249 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6250 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6251 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
6252 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
6253 (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
6254 }
6255 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6256 {
6257 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6258 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6259 size_t membuf_ofs = dstrawofs - new_dstrawofs;
6260 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
6261 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6262 uchar* ptr = alignedPtr.getAlignedPtr();
6263
6264 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6265 total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
6266 total = std::min(total, u->size - new_dstrawofs);
6267 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
6268 (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
6269 (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
6270 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6271 new_dstrawofs, total, ptr, 0, 0, 0));
6272 for( size_t i = 0; i < new_sz[1]; i++ )
6273 memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
6274 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6275 new_dstrawofs, total, ptr, 0, 0, 0));
6276 }
6277 else
6278 {
6279 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6280 uchar* ptr = alignedPtr.getAlignedPtr();
6281
6282 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6283 new_dstofs, new_srcofs, new_sz,
6284 new_dststep[0], 0,
6285 new_srcstep[0], 0,
6286 ptr, 0, 0, 0));
6287 }
6288 }
6289 u->markHostCopyObsolete(true);
6290 #ifdef HAVE_OPENCL_SVM
6291 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6292 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6293 {
6294 // nothing
6295 }
6296 else
6297 #endif
6298 {
6299 u->markHostCopyObsolete(true);
6300 }
6301 u->markDeviceCopyObsolete(false);
6302 }
6303
copy(UMatData * src,UMatData * dst,int dims,const size_t sz[],const size_t srcofs[],const size_t srcstep[],const size_t dstofs[],const size_t dststep[],bool _sync) const6304 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
6305 const size_t srcofs[], const size_t srcstep[],
6306 const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
6307 {
6308 if(!src || !dst)
6309 return;
6310
6311 size_t total = 0, new_sz[] = {0, 0, 0};
6312 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6313 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6314
6315 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
6316 total, new_sz,
6317 srcrawofs, new_srcofs, new_srcstep,
6318 dstrawofs, new_dstofs, new_dststep);
6319
6320 UMatDataAutoLock src_autolock(src, dst);
6321
6322 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
6323 {
6324 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6325 return;
6326 }
6327 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
6328 {
6329 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6330 dst->markHostCopyObsolete(false);
6331 #ifdef HAVE_OPENCL_SVM
6332 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6333 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6334 {
6335 // nothing
6336 }
6337 else
6338 #endif
6339 {
6340 dst->markDeviceCopyObsolete(true);
6341 }
6342 return;
6343 }
6344
6345 // there should be no user-visible CPU copies of the UMat which we are going to copy to
6346 CV_Assert(dst->refcount == 0);
6347 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6348
6349 cl_int retval = CL_SUCCESS;
6350 #ifdef HAVE_OPENCL_SVM
6351 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
6352 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6353 {
6354 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
6355 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6356 {
6357 Context& ctx = Context::getDefault();
6358 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6359 CV_DbgAssert(svmFns->isValid());
6360
6361 if( iscontinuous )
6362 {
6363 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
6364 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
6365 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
6366 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
6367 total, 0, NULL, NULL);
6368 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
6369 }
6370 else
6371 {
6372 clFinish(q);
6373 // This code is from MatAllocator::download()/upload()
6374 int isz[CV_MAX_DIM];
6375 uchar* srcptr = (uchar*)src->handle;
6376 for( int i = 0; i < dims; i++ )
6377 {
6378 CV_Assert( sz[i] <= (size_t)INT_MAX );
6379 if( sz[i] == 0 )
6380 return;
6381 if( srcofs )
6382 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6383 isz[i] = (int)sz[i];
6384 }
6385 Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
6386
6387 uchar* dstptr = (uchar*)dst->handle;
6388 for( int i = 0; i < dims; i++ )
6389 {
6390 if( dstofs )
6391 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6392 }
6393 Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
6394
6395 const Mat* arrays[] = { &m_src, &m_dst };
6396 uchar* ptrs[2];
6397 NAryMatIterator it(arrays, ptrs, 2);
6398 size_t j, planesz = it.size;
6399
6400 for( j = 0; j < it.nplanes; j++, ++it )
6401 memcpy(ptrs[1], ptrs[0], planesz);
6402 }
6403 }
6404 else
6405 {
6406 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6407 {
6408 map(src, ACCESS_READ);
6409 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6410 unmap(src);
6411 }
6412 else
6413 {
6414 map(dst, ACCESS_WRITE);
6415 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6416 unmap(dst);
6417 }
6418 }
6419 }
6420 else
6421 #endif
6422 {
6423 if( iscontinuous )
6424 {
6425 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6426 srcrawofs, dstrawofs, total, 0, 0, 0);
6427 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
6428 (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
6429 }
6430 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6431 {
6432 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6433 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6434 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
6435 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6436 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
6437
6438 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6439 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6440 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
6441 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6442 uchar* srcptr = srcBuf.getAlignedPtr();
6443 uchar* dstptr = dstBuf.getAlignedPtr();
6444
6445 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6446
6447 size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
6448 src_total = std::min(src_total, src->size - new_srcrawofs);
6449 size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
6450 dst_total = std::min(dst_total, dst->size - new_dstrawofs);
6451
6452 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
6453 new_srcrawofs, src_total, srcptr, 0, 0, 0));
6454 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6455 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6456
6457 for( size_t i = 0; i < new_sz[1]; i++ )
6458 memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
6459 srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
6460 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6461 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6462 }
6463 else
6464 {
6465 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6466 new_srcofs, new_dstofs, new_sz,
6467 new_srcstep[0], 0,
6468 new_dststep[0], 0,
6469 0, 0, 0));
6470 }
6471 }
6472 if (retval == CL_SUCCESS)
6473 {
6474 CV_IMPL_ADD(CV_IMPL_OCL)
6475 }
6476
6477 #ifdef HAVE_OPENCL_SVM
6478 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6479 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6480 {
6481 // nothing
6482 }
6483 else
6484 #endif
6485 {
6486 dst->markHostCopyObsolete(true);
6487 }
6488 dst->markDeviceCopyObsolete(false);
6489
6490 if( _sync )
6491 {
6492 CV_OCL_DBG_CHECK(clFinish(q));
6493 }
6494 }
6495
getBufferPoolController(const char * id) const6496 BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE
6497 {
6498 ocl::Context ctx = Context::getDefault();
6499 if (ctx.empty())
6500 return NULL;
6501 #ifdef HAVE_OPENCL_SVM
6502 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
6503 {
6504 return &ctx.getImpl()->getBufferPoolSVM();
6505 }
6506 #endif
6507 if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
6508 {
6509 return &ctx.getImpl()->getBufferPoolHostPtr();
6510 }
6511 if (id != NULL && strcmp(id, "OCL") != 0)
6512 {
6513 CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
6514 }
6515 return &ctx.getImpl()->getBufferPool();
6516 }
6517
6518 MatAllocator* matStdAllocator;
6519
6520 mutable cv::Mutex cleanupQueueMutex;
6521 mutable std::deque<UMatData*> cleanupQueue;
6522
flushCleanupQueue() const6523 void flushCleanupQueue() const
6524 {
6525 if (!cleanupQueue.empty())
6526 {
6527 std::deque<UMatData*> q;
6528 {
6529 cv::AutoLock lock(cleanupQueueMutex);
6530 q.swap(cleanupQueue);
6531 }
6532 for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
6533 {
6534 deallocate_(*i);
6535 }
6536 }
6537 }
addToCleanupQueue(UMatData * u) const6538 void addToCleanupQueue(UMatData* u) const
6539 {
6540 //TODO: Validation check: CV_Assert(!u->tempUMat());
6541 {
6542 cv::AutoLock lock(cleanupQueueMutex);
6543 cleanupQueue.push_back(u);
6544 }
6545 }
6546 };
6547
getOpenCLAllocator_()6548 static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
6549 {
6550 static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
6551 return g_allocator;
6552 }
getOpenCLAllocator()6553 MatAllocator* getOpenCLAllocator()
6554 {
6555 CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
6556 }
6557
6558 }} // namespace cv::ocl
6559
6560
6561 namespace cv {
6562
6563 // three funcs below are implemented in umatrix.cpp
6564 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
6565 bool autoSteps = false );
6566 void finalizeHdr(UMat& m);
6567
6568 } // namespace cv
6569
6570
6571 namespace cv { namespace ocl {
6572
6573 /*
6574 // Convert OpenCL buffer memory to UMat
6575 */
convertFromBuffer(void * cl_mem_buffer,size_t step,int rows,int cols,int type,UMat & dst)6576 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
6577 {
6578 int d = 2;
6579 int sizes[] = { rows, cols };
6580
6581 CV_Assert(0 <= d && d <= CV_MAX_DIM);
6582
6583 dst.release();
6584
6585 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
6586 dst.usageFlags = USAGE_DEFAULT;
6587
6588 setSize(dst, d, sizes, 0, true);
6589 dst.offset = 0;
6590
6591 cl_mem memobj = (cl_mem)cl_mem_buffer;
6592 cl_mem_object_type mem_type = 0;
6593
6594 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6595
6596 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
6597
6598 size_t total = 0;
6599 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
6600
6601 CV_OCL_CHECK(clRetainMemObject(memobj));
6602
6603 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
6604 CV_Assert(total >= rows * step);
6605
6606 // attach clBuffer to UMatData
6607 dst.u = new UMatData(getOpenCLAllocator());
6608 dst.u->data = 0;
6609 dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER; // not allocated from any OpenCV buffer pool
6610 dst.u->flags = static_cast<UMatData::MemoryFlag>(0);
6611 dst.u->handle = cl_mem_buffer;
6612 dst.u->origdata = 0;
6613 dst.u->prevAllocator = 0;
6614 dst.u->size = total;
6615
6616 finalizeHdr(dst);
6617 dst.addref();
6618
6619 return;
6620 } // convertFromBuffer()
6621
6622
6623 /*
6624 // Convert OpenCL image2d_t memory to UMat
6625 */
convertFromImage(void * cl_mem_image,UMat & dst)6626 void convertFromImage(void* cl_mem_image, UMat& dst)
6627 {
6628 cl_mem clImage = (cl_mem)cl_mem_image;
6629 cl_mem_object_type mem_type = 0;
6630
6631 CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6632
6633 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
6634
6635 cl_image_format fmt = { 0, 0 };
6636 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
6637
6638 int depth = CV_8U;
6639 switch (fmt.image_channel_data_type)
6640 {
6641 case CL_UNORM_INT8:
6642 case CL_UNSIGNED_INT8:
6643 depth = CV_8U;
6644 break;
6645
6646 case CL_SNORM_INT8:
6647 case CL_SIGNED_INT8:
6648 depth = CV_8S;
6649 break;
6650
6651 case CL_UNORM_INT16:
6652 case CL_UNSIGNED_INT16:
6653 depth = CV_16U;
6654 break;
6655
6656 case CL_SNORM_INT16:
6657 case CL_SIGNED_INT16:
6658 depth = CV_16S;
6659 break;
6660
6661 case CL_SIGNED_INT32:
6662 depth = CV_32S;
6663 break;
6664
6665 case CL_FLOAT:
6666 depth = CV_32F;
6667 break;
6668
6669 case CL_HALF_FLOAT:
6670 depth = CV_16F;
6671 break;
6672
6673 default:
6674 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
6675 }
6676
6677 int type = CV_8UC1;
6678 switch (fmt.image_channel_order)
6679 {
6680 case CL_R:
6681 case CL_A:
6682 case CL_INTENSITY:
6683 case CL_LUMINANCE:
6684 type = CV_MAKE_TYPE(depth, 1);
6685 break;
6686
6687 case CL_RG:
6688 case CL_RA:
6689 type = CV_MAKE_TYPE(depth, 2);
6690 break;
6691
6692 // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
6693 // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
6694 /*case CL_RGB:
6695 type = CV_MAKE_TYPE(depth, 3);
6696 break;*/
6697
6698 case CL_RGBA:
6699 case CL_BGRA:
6700 case CL_ARGB:
6701 type = CV_MAKE_TYPE(depth, 4);
6702 break;
6703
6704 default:
6705 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
6706 break;
6707 }
6708
6709 size_t step = 0;
6710 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
6711
6712 size_t w = 0;
6713 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
6714
6715 size_t h = 0;
6716 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
6717
6718 dst.create((int)h, (int)w, type);
6719
6720 cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
6721
6722 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6723
6724 size_t offset = 0;
6725 size_t src_origin[3] = { 0, 0, 0 };
6726 size_t region[3] = { w, h, 1 };
6727 CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
6728
6729 CV_OCL_CHECK(clFinish(q));
6730
6731 return;
6732 } // convertFromImage()
6733
6734
6735 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
6736
getDevices(std::vector<cl_device_id> & devices,cl_platform_id platform)6737 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
6738 {
6739 cl_uint numDevices = 0;
6740 cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
6741 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
6742 {
6743 CV_OCL_DBG_CHECK_RESULT(status,
6744 cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
6745 }
6746
6747 if (numDevices == 0)
6748 {
6749 devices.clear();
6750 return;
6751 }
6752
6753 devices.resize((size_t)numDevices);
6754 CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
6755 }
6756
6757 struct PlatformInfo::Impl
6758 {
Implcv::ocl::PlatformInfo::Impl6759 Impl(void* id)
6760 {
6761 refcount = 1;
6762 handle = *(cl_platform_id*)id;
6763 getDevices(devices, handle);
6764
6765 version_ = getStrProp(CL_PLATFORM_VERSION);
6766 parseOpenCLVersion(version_, versionMajor_, versionMinor_);
6767 }
6768
getStrPropcv::ocl::PlatformInfo::Impl6769 String getStrProp(cl_platform_info prop) const
6770 {
6771 char buf[1024];
6772 size_t sz=0;
6773 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
6774 sz < sizeof(buf) ? String(buf) : String();
6775 }
6776
6777 IMPLEMENT_REFCOUNTABLE();
6778 std::vector<cl_device_id> devices;
6779 cl_platform_id handle;
6780
6781 String version_;
6782 int versionMajor_;
6783 int versionMinor_;
6784 };
6785
PlatformInfo()6786 PlatformInfo::PlatformInfo() CV_NOEXCEPT
6787 {
6788 p = 0;
6789 }
6790
PlatformInfo(void * platform_id)6791 PlatformInfo::PlatformInfo(void* platform_id)
6792 {
6793 p = new Impl(platform_id);
6794 }
6795
~PlatformInfo()6796 PlatformInfo::~PlatformInfo()
6797 {
6798 if(p)
6799 p->release();
6800 }
6801
PlatformInfo(const PlatformInfo & i)6802 PlatformInfo::PlatformInfo(const PlatformInfo& i)
6803 {
6804 if (i.p)
6805 i.p->addref();
6806 p = i.p;
6807 }
6808
operator =(const PlatformInfo & i)6809 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
6810 {
6811 if (i.p != p)
6812 {
6813 if (i.p)
6814 i.p->addref();
6815 if (p)
6816 p->release();
6817 p = i.p;
6818 }
6819 return *this;
6820 }
6821
PlatformInfo(PlatformInfo && i)6822 PlatformInfo::PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT
6823 {
6824 p = i.p;
6825 i.p = nullptr;
6826 }
6827
operator =(PlatformInfo && i)6828 PlatformInfo& PlatformInfo::operator = (PlatformInfo&& i) CV_NOEXCEPT
6829 {
6830 if (this != &i) {
6831 if(p)
6832 p->release();
6833 p = i.p;
6834 i.p = nullptr;
6835 }
6836 return *this;
6837 }
6838
deviceNumber() const6839 int PlatformInfo::deviceNumber() const
6840 {
6841 return p ? (int)p->devices.size() : 0;
6842 }
6843
getDevice(Device & device,int d) const6844 void PlatformInfo::getDevice(Device& device, int d) const
6845 {
6846 CV_Assert(p && d < (int)p->devices.size() );
6847 if(p)
6848 device.set(p->devices[d]);
6849 }
6850
name() const6851 String PlatformInfo::name() const
6852 {
6853 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
6854 }
6855
vendor() const6856 String PlatformInfo::vendor() const
6857 {
6858 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6859 }
6860
version() const6861 String PlatformInfo::version() const
6862 {
6863 return p ? p->version_ : String();
6864 }
6865
versionMajor() const6866 int PlatformInfo::versionMajor() const
6867 {
6868 CV_Assert(p);
6869 return p->versionMajor_;
6870 }
6871
versionMinor() const6872 int PlatformInfo::versionMinor() const
6873 {
6874 CV_Assert(p);
6875 return p->versionMinor_;
6876 }
6877
getPlatforms(std::vector<cl_platform_id> & platforms)6878 static void getPlatforms(std::vector<cl_platform_id>& platforms)
6879 {
6880 cl_uint numPlatforms = 0;
6881 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6882
6883 if (numPlatforms == 0)
6884 {
6885 platforms.clear();
6886 return;
6887 }
6888
6889 platforms.resize((size_t)numPlatforms);
6890 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6891 }
6892
getPlatfomsInfo(std::vector<PlatformInfo> & platformsInfo)6893 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6894 {
6895 std::vector<cl_platform_id> platforms;
6896 getPlatforms(platforms);
6897
6898 for (size_t i = 0; i < platforms.size(); i++)
6899 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
6900 }
6901
typeToStr(int type)6902 const char* typeToStr(int type)
6903 {
6904 static const char* tab[]=
6905 {
6906 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6907 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6908 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6909 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6910 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6911 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6912 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6913 "half", "half2", "half3", "half4", 0, 0, 0, "half8", 0, 0, 0, 0, 0, 0, 0, "half16",
6914 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6915 };
6916 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6917 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6918 CV_Assert(result);
6919 return result;
6920 }
6921
memopTypeToStr(int type)6922 const char* memopTypeToStr(int type)
6923 {
6924 static const char* tab[] =
6925 {
6926 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6927 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6928 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6929 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6930 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6931 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6932 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6933 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6934 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6935 };
6936 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6937 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6938 CV_Assert(result);
6939 return result;
6940 }
6941
vecopTypeToStr(int type)6942 const char* vecopTypeToStr(int type)
6943 {
6944 static const char* tab[] =
6945 {
6946 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6947 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6948 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6949 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6950 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6951 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6952 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6953 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6954 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6955 };
6956 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6957 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6958 CV_Assert(result);
6959 return result;
6960 }
6961
convertTypeStr(int sdepth,int ddepth,int cn,char * buf)6962 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
6963 {
6964 if( sdepth == ddepth )
6965 return "noconvert";
6966 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
6967 if( ddepth >= CV_32F ||
6968 (ddepth == CV_32S && sdepth < CV_32S) ||
6969 (ddepth == CV_16S && sdepth <= CV_8S) ||
6970 (ddepth == CV_16U && sdepth == CV_8U))
6971 {
6972 sprintf(buf, "convert_%s", typestr);
6973 }
6974 else if( sdepth >= CV_32F )
6975 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
6976 else
6977 sprintf(buf, "convert_%s_sat", typestr);
6978
6979 return buf;
6980 }
6981
getOpenCLErrorString(int errorCode)6982 const char* getOpenCLErrorString(int errorCode)
6983 {
6984 #define CV_OCL_CODE(id) case id: return #id
6985 #define CV_OCL_CODE_(id, name) case id: return #name
6986 switch (errorCode)
6987 {
6988 CV_OCL_CODE(CL_SUCCESS);
6989 CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
6990 CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
6991 CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
6992 CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
6993 CV_OCL_CODE(CL_OUT_OF_RESOURCES);
6994 CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
6995 CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
6996 CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
6997 CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
6998 CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
6999 CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
7000 CV_OCL_CODE(CL_MAP_FAILURE);
7001 CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
7002 CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
7003 CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
7004 CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
7005 CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
7006 CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
7007 CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
7008 CV_OCL_CODE(CL_INVALID_VALUE);
7009 CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
7010 CV_OCL_CODE(CL_INVALID_PLATFORM);
7011 CV_OCL_CODE(CL_INVALID_DEVICE);
7012 CV_OCL_CODE(CL_INVALID_CONTEXT);
7013 CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
7014 CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
7015 CV_OCL_CODE(CL_INVALID_HOST_PTR);
7016 CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
7017 CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
7018 CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
7019 CV_OCL_CODE(CL_INVALID_SAMPLER);
7020 CV_OCL_CODE(CL_INVALID_BINARY);
7021 CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
7022 CV_OCL_CODE(CL_INVALID_PROGRAM);
7023 CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
7024 CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
7025 CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
7026 CV_OCL_CODE(CL_INVALID_KERNEL);
7027 CV_OCL_CODE(CL_INVALID_ARG_INDEX);
7028 CV_OCL_CODE(CL_INVALID_ARG_VALUE);
7029 CV_OCL_CODE(CL_INVALID_ARG_SIZE);
7030 CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
7031 CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
7032 CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
7033 CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
7034 CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
7035 CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
7036 CV_OCL_CODE(CL_INVALID_EVENT);
7037 CV_OCL_CODE(CL_INVALID_OPERATION);
7038 CV_OCL_CODE(CL_INVALID_GL_OBJECT);
7039 CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
7040 CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
7041 CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
7042 // OpenCL 1.1
7043 CV_OCL_CODE(CL_INVALID_PROPERTY);
7044 // OpenCL 1.2
7045 CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
7046 CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
7047 CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
7048 CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
7049 // OpenCL 2.0
7050 CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
7051 CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
7052 // Extensions
7053 CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
7054 CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
7055 CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
7056 CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
7057 CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
7058 CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
7059 default: return "Unknown OpenCL error";
7060 }
7061 #undef CV_OCL_CODE
7062 #undef CV_OCL_CODE_
7063 }
7064
7065 template <typename T>
kerToStr(const Mat & k)7066 static std::string kerToStr(const Mat & k)
7067 {
7068 int width = k.cols - 1, depth = k.depth();
7069 const T * const data = k.ptr<T>();
7070
7071 std::ostringstream stream;
7072 stream.precision(10);
7073
7074 if (depth <= CV_8S)
7075 {
7076 for (int i = 0; i < width; ++i)
7077 stream << "DIG(" << (int)data[i] << ")";
7078 stream << "DIG(" << (int)data[width] << ")";
7079 }
7080 else if (depth == CV_32F)
7081 {
7082 stream.setf(std::ios_base::showpoint);
7083 for (int i = 0; i < width; ++i)
7084 stream << "DIG(" << data[i] << "f)";
7085 stream << "DIG(" << data[width] << "f)";
7086 }
7087 else if (depth == CV_16F)
7088 {
7089 stream.setf(std::ios_base::showpoint);
7090 for (int i = 0; i < width; ++i)
7091 stream << "DIG(" << (float)data[i] << "h)";
7092 stream << "DIG(" << (float)data[width] << "h)";
7093 }
7094 else
7095 {
7096 for (int i = 0; i < width; ++i)
7097 stream << "DIG(" << data[i] << ")";
7098 stream << "DIG(" << data[width] << ")";
7099 }
7100
7101 return stream.str();
7102 }
7103
kernelToStr(InputArray _kernel,int ddepth,const char * name)7104 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
7105 {
7106 Mat kernel = _kernel.getMat().reshape(1, 1);
7107
7108 int depth = kernel.depth();
7109 if (ddepth < 0)
7110 ddepth = depth;
7111
7112 if (ddepth != depth)
7113 kernel.convertTo(kernel, ddepth);
7114
7115 typedef std::string (* func_t)(const Mat &);
7116 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
7117 kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<float16_t> };
7118 const func_t func = funcs[ddepth];
7119 CV_Assert(func != 0);
7120
7121 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
7122 }
7123
7124 #define PROCESS_SRC(src) \
7125 do \
7126 { \
7127 if (!src.empty()) \
7128 { \
7129 CV_Assert(src.isMat() || src.isUMat()); \
7130 Size csize = src.size(); \
7131 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
7132 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
7133 if (cwidth < ckercn || ckercn <= 0) \
7134 return 1; \
7135 cols.push_back(cwidth); \
7136 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
7137 return 1; \
7138 offsets.push_back(src.offset()); \
7139 steps.push_back(src.step()); \
7140 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
7141 kercns.push_back(ckercn); \
7142 } \
7143 } \
7144 while ((void)0, 0)
7145
predictOptimalVectorWidth(InputArray src1,InputArray src2,InputArray src3,InputArray src4,InputArray src5,InputArray src6,InputArray src7,InputArray src8,InputArray src9,OclVectorStrategy strat)7146 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
7147 InputArray src4, InputArray src5, InputArray src6,
7148 InputArray src7, InputArray src8, InputArray src9,
7149 OclVectorStrategy strat)
7150 {
7151 const ocl::Device & d = ocl::Device::getDefault();
7152
7153 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
7154 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
7155 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
7156 d.preferredVectorWidthDouble(), d.preferredVectorWidthHalf() };
7157
7158 // if the device says don't use vectors
7159 if (vectorWidths[0] == 1)
7160 {
7161 // it's heuristic
7162 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
7163 vectorWidths[CV_16U] = vectorWidths[CV_16S] = vectorWidths[CV_16F] = 2;
7164 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
7165 }
7166
7167 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
7168 }
7169
checkOptimalVectorWidth(const int * vectorWidths,InputArray src1,InputArray src2,InputArray src3,InputArray src4,InputArray src5,InputArray src6,InputArray src7,InputArray src8,InputArray src9,OclVectorStrategy strat)7170 int checkOptimalVectorWidth(const int *vectorWidths,
7171 InputArray src1, InputArray src2, InputArray src3,
7172 InputArray src4, InputArray src5, InputArray src6,
7173 InputArray src7, InputArray src8, InputArray src9,
7174 OclVectorStrategy strat)
7175 {
7176 CV_Assert(vectorWidths);
7177
7178 int ref_type = src1.type();
7179
7180 std::vector<size_t> offsets, steps, cols;
7181 std::vector<int> dividers, kercns;
7182 PROCESS_SRC(src1);
7183 PROCESS_SRC(src2);
7184 PROCESS_SRC(src3);
7185 PROCESS_SRC(src4);
7186 PROCESS_SRC(src5);
7187 PROCESS_SRC(src6);
7188 PROCESS_SRC(src7);
7189 PROCESS_SRC(src8);
7190 PROCESS_SRC(src9);
7191
7192 size_t size = offsets.size();
7193
7194 for (size_t i = 0; i < size; ++i)
7195 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
7196 dividers[i] >>= 1, kercns[i] >>= 1;
7197
7198 // default strategy
7199 int kercn = *std::min_element(kercns.begin(), kercns.end());
7200
7201 return kercn;
7202 }
7203
predictOptimalVectorWidthMax(InputArray src1,InputArray src2,InputArray src3,InputArray src4,InputArray src5,InputArray src6,InputArray src7,InputArray src8,InputArray src9)7204 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
7205 InputArray src4, InputArray src5, InputArray src6,
7206 InputArray src7, InputArray src8, InputArray src9)
7207 {
7208 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
7209 }
7210
7211 #undef PROCESS_SRC
7212
7213
7214 // TODO Make this as a method of OpenCL "BuildOptions" class
buildOptionsAddMatrixDescription(String & buildOptions,const String & name,InputArray _m)7215 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
7216 {
7217 if (!buildOptions.empty())
7218 buildOptions += " ";
7219 int type = _m.type(), depth = CV_MAT_DEPTH(type);
7220 buildOptions += format(
7221 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
7222 name.c_str(), ocl::typeToStr(type),
7223 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
7224 name.c_str(), (int)CV_MAT_CN(type),
7225 name.c_str(), (int)CV_ELEM_SIZE(type),
7226 name.c_str(), (int)CV_ELEM_SIZE1(type),
7227 name.c_str(), (int)depth
7228 );
7229 }
7230
7231
7232 struct Image2D::Impl
7233 {
Implcv::ocl::Image2D::Impl7234 Impl(const UMat &src, bool norm, bool alias)
7235 {
7236 handle = 0;
7237 refcount = 1;
7238 init(src, norm, alias);
7239 }
7240
~Implcv::ocl::Image2D::Impl7241 ~Impl()
7242 {
7243 if (handle)
7244 clReleaseMemObject(handle);
7245 }
7246
getImageFormatcv::ocl::Image2D::Impl7247 static cl_image_format getImageFormat(int depth, int cn, bool norm)
7248 {
7249 cl_image_format format;
7250 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
7251 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, CL_HALF_FLOAT };
7252 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
7253 CL_SNORM_INT16, -1, -1, -1, -1 };
7254 // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
7255 // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
7256 static const int channelOrders[] = { -1, CL_R, CL_RG, /*CL_RGB*/ -1, CL_RGBA };
7257
7258 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
7259 int channelOrder = channelOrders[cn];
7260 format.image_channel_data_type = (cl_channel_type)channelType;
7261 format.image_channel_order = (cl_channel_order)channelOrder;
7262 return format;
7263 }
7264
isFormatSupportedcv::ocl::Image2D::Impl7265 static bool isFormatSupported(cl_image_format format)
7266 {
7267 if (!haveOpenCL())
7268 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7269
7270 cl_context context = (cl_context)Context::getDefault().ptr();
7271 if (!context)
7272 return false;
7273
7274 // Figure out how many formats are supported by this context.
7275 cl_uint numFormats = 0;
7276 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7277 CL_MEM_OBJECT_IMAGE2D, numFormats,
7278 NULL, &numFormats);
7279 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
7280 if (numFormats > 0)
7281 {
7282 AutoBuffer<cl_image_format> formats(numFormats);
7283 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7284 CL_MEM_OBJECT_IMAGE2D, numFormats,
7285 formats.data(), NULL);
7286 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
7287 for (cl_uint i = 0; i < numFormats; ++i)
7288 {
7289 if (!memcmp(&formats[i], &format, sizeof(format)))
7290 {
7291 return true;
7292 }
7293 }
7294 }
7295 return false;
7296 }
7297
initcv::ocl::Image2D::Impl7298 void init(const UMat &src, bool norm, bool alias)
7299 {
7300 if (!haveOpenCL())
7301 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7302
7303 CV_Assert(!src.empty());
7304 CV_Assert(ocl::Device::getDefault().imageSupport());
7305
7306 int err, depth = src.depth(), cn = src.channels();
7307 CV_Assert(cn <= 4);
7308 cl_image_format format = getImageFormat(depth, cn, norm);
7309
7310 if (!isFormatSupported(format))
7311 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
7312
7313 if (alias && !src.handle(ACCESS_RW))
7314 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
7315
7316 cl_context context = (cl_context)Context::getDefault().ptr();
7317 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
7318
7319 #ifdef CL_VERSION_1_2
7320 // this enables backwards portability to
7321 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
7322 const Device & d = ocl::Device::getDefault();
7323 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
7324 CV_Assert(!alias || canCreateAlias(src));
7325 if (1 < major || (1 == major && 2 <= minor))
7326 {
7327 cl_image_desc desc;
7328 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
7329 desc.image_width = src.cols;
7330 desc.image_height = src.rows;
7331 desc.image_depth = 0;
7332 desc.image_array_size = 1;
7333 desc.image_row_pitch = alias ? src.step[0] : 0;
7334 desc.image_slice_pitch = 0;
7335 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
7336 desc.num_mip_levels = 0;
7337 desc.num_samples = 0;
7338 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
7339 }
7340 else
7341 #endif
7342 {
7343 CV_SUPPRESS_DEPRECATED_START
7344 CV_Assert(!alias); // This is an OpenCL 1.2 extension
7345 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
7346 CV_SUPPRESS_DEPRECATED_END
7347 }
7348 CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
7349
7350 size_t origin[] = { 0, 0, 0 };
7351 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
7352
7353 cl_mem devData;
7354 if (!alias && !src.isContinuous())
7355 {
7356 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
7357 CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
7358 (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
7359 ).c_str());
7360
7361 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
7362 CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
7363 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
7364 CV_OCL_DBG_CHECK(clFlush(queue));
7365 }
7366 else
7367 {
7368 devData = (cl_mem)src.handle(ACCESS_READ);
7369 }
7370 CV_Assert(devData != NULL);
7371
7372 if (!alias)
7373 {
7374 CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
7375 if (!src.isContinuous())
7376 {
7377 CV_OCL_DBG_CHECK(clFlush(queue));
7378 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
7379 }
7380 }
7381 }
7382
7383 IMPLEMENT_REFCOUNTABLE();
7384
7385 cl_mem handle;
7386 };
7387
Image2D()7388 Image2D::Image2D() CV_NOEXCEPT
7389 {
7390 p = NULL;
7391 }
7392
Image2D(const UMat & src,bool norm,bool alias)7393 Image2D::Image2D(const UMat &src, bool norm, bool alias)
7394 {
7395 p = new Impl(src, norm, alias);
7396 }
7397
canCreateAlias(const UMat & m)7398 bool Image2D::canCreateAlias(const UMat &m)
7399 {
7400 bool ret = false;
7401 const Device & d = ocl::Device::getDefault();
7402 if (d.imageFromBufferSupport() && !m.empty())
7403 {
7404 // This is the required pitch alignment in pixels
7405 uint pitchAlign = d.imagePitchAlignment();
7406 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
7407 {
7408 // We don't currently handle the case where the buffer was created
7409 // with CL_MEM_USE_HOST_PTR
7410 if (!m.u->tempUMat())
7411 {
7412 ret = true;
7413 }
7414 }
7415 }
7416 return ret;
7417 }
7418
isFormatSupported(int depth,int cn,bool norm)7419 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
7420 {
7421 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
7422
7423 return Impl::isFormatSupported(format);
7424 }
7425
Image2D(const Image2D & i)7426 Image2D::Image2D(const Image2D & i)
7427 {
7428 p = i.p;
7429 if (p)
7430 p->addref();
7431 }
7432
operator =(const Image2D & i)7433 Image2D & Image2D::operator = (const Image2D & i)
7434 {
7435 if (i.p != p)
7436 {
7437 if (i.p)
7438 i.p->addref();
7439 if (p)
7440 p->release();
7441 p = i.p;
7442 }
7443 return *this;
7444 }
7445
Image2D(Image2D && i)7446 Image2D::Image2D(Image2D&& i) CV_NOEXCEPT
7447 {
7448 p = i.p;
7449 i.p = nullptr;
7450 }
7451
operator =(Image2D && i)7452 Image2D& Image2D::operator = (Image2D&& i) CV_NOEXCEPT
7453 {
7454 if (this != &i) {
7455 if (p)
7456 p->release();
7457 p = i.p;
7458 i.p = nullptr;
7459 }
7460 return *this;
7461 }
7462
~Image2D()7463 Image2D::~Image2D()
7464 {
7465 if (p)
7466 p->release();
7467 }
7468
ptr() const7469 void* Image2D::ptr() const
7470 {
7471 return p ? p->handle : 0;
7472 }
7473
isOpenCLForced()7474 bool internal::isOpenCLForced()
7475 {
7476 static bool initialized = false;
7477 static bool value = false;
7478 if (!initialized)
7479 {
7480 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
7481 initialized = true;
7482 }
7483 return value;
7484 }
7485
isPerformanceCheckBypassed()7486 bool internal::isPerformanceCheckBypassed()
7487 {
7488 static bool initialized = false;
7489 static bool value = false;
7490 if (!initialized)
7491 {
7492 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
7493 initialized = true;
7494 }
7495 return value;
7496 }
7497
isCLBuffer(UMat & u)7498 bool internal::isCLBuffer(UMat& u)
7499 {
7500 void* h = u.handle(ACCESS_RW);
7501 if (!h)
7502 return true;
7503 CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
7504 #if 1
7505 if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
7506 return false;
7507 #else
7508 cl_mem_object_type type = 0;
7509 cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
7510 if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
7511 return false;
7512 #endif
7513 return true;
7514 }
7515
7516 struct Timer::Impl
7517 {
7518 const Queue queue;
7519
Implcv::ocl::Timer::Impl7520 Impl(const Queue& q)
7521 : queue(q)
7522 {
7523 }
7524
~Implcv::ocl::Timer::Impl7525 ~Impl(){}
7526
startcv::ocl::Timer::Impl7527 void start()
7528 {
7529 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7530 timer.start();
7531 }
7532
stopcv::ocl::Timer::Impl7533 void stop()
7534 {
7535 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7536 timer.stop();
7537 }
7538
durationNScv::ocl::Timer::Impl7539 uint64 durationNS() const
7540 {
7541 return (uint64)(timer.getTimeSec() * 1e9);
7542 }
7543
7544 TickMeter timer;
7545 };
7546
Timer(const Queue & q)7547 Timer::Timer(const Queue& q) : p(new Impl(q)) { }
~Timer()7548 Timer::~Timer() { delete p; }
7549
start()7550 void Timer::start()
7551 {
7552 CV_Assert(p);
7553 p->start();
7554 }
7555
stop()7556 void Timer::stop()
7557 {
7558 CV_Assert(p);
7559 p->stop();
7560 }
7561
durationNS() const7562 uint64 Timer::durationNS() const
7563 {
7564 CV_Assert(p);
7565 return p->durationNS();
7566 }
7567
7568 }} // namespace
7569
7570 #endif // HAVE_OPENCL
7571