1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of the copyright holders may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #include "precomp.hpp"
43 
44 #ifndef HAVE_OPENCL
45 #include "ocl_disabled.impl.hpp"
46 #else // HAVE_OPENCL
47 
48 #include <list>
49 #include <map>
50 #include <deque>
51 #include <set>
52 #include <string>
53 #include <sstream>
54 #include <iostream> // std::cerr
55 #include <fstream>
56 #if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
57 #include <inttypes.h>
58 #endif
59 
60 #include <opencv2/core/utils/configuration.private.hpp>
61 
62 #include <opencv2/core/utils/logger.defines.hpp>
63 #undef CV_LOG_STRIP_LEVEL
64 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
65 #include <opencv2/core/utils/logger.hpp>
66 
67 #include "opencv2/core/ocl_genbase.hpp"
68 #include "opencl_kernels_core.hpp"
69 
70 #include "opencv2/core/utils/lock.private.hpp"
71 #include "opencv2/core/utils/filesystem.hpp"
72 #include "opencv2/core/utils/filesystem.private.hpp"
73 
74 #define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
75 #include "opencv2/core/utils/allocator_stats.impl.hpp"
76 #undef CV__ALLOCATOR_STATS_LOG
77 
78 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG          0
79 
80 #define CV_OPENCL_SHOW_RUN_KERNELS               0
81 #define CV_OPENCL_TRACE_CHECK                    0
82 
83 #define CV_OPENCL_VALIDATE_BINARY_PROGRAMS       1
84 
85 #define CV_OPENCL_SHOW_SVM_ERROR_LOG             1
86 #define CV_OPENCL_SHOW_SVM_LOG                   0
87 
88 #include "opencv2/core/bufferpool.hpp"
89 #ifndef LOG_BUFFER_POOL
90 # if 0
91 #   define LOG_BUFFER_POOL printf
92 # else
93 #   define LOG_BUFFER_POOL(...)
94 # endif
95 #endif
96 
97 #if CV_OPENCL_SHOW_SVM_LOG
98 // TODO add timestamp logging
99 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
100 #else
101 #define CV_OPENCL_SVM_TRACE_P(...)
102 #endif
103 
104 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
105 // TODO add timestamp logging
106 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
107 #else
108 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
109 #endif
110 
111 #include "opencv2/core/opencl/runtime/opencl_clblas.hpp"
112 #include "opencv2/core/opencl/runtime/opencl_clfft.hpp"
113 
114 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
115 
116 #ifdef HAVE_OPENCL_SVM
117 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
118 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
119 #include "opencv2/core/opencl/opencl_svm.hpp"
120 #endif
121 
122 #include "umatrix.hpp"
123 
124 namespace cv { namespace ocl {
125 
126 #define IMPLEMENT_REFCOUNTABLE() \
127     void addref() { CV_XADD(&refcount, 1); } \
128     void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
129     int refcount
130 
131 static cv::utils::AllocatorStatistics opencl_allocator_stats;
132 
133 CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
getOpenCLAllocatorStatistics()134 cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
135 {
136     return opencl_allocator_stats;
137 }
138 
139 #ifndef _DEBUG
isRaiseError()140 static bool isRaiseError()
141 {
142     static bool initialized = false;
143     static bool value = false;
144     if (!initialized)
145     {
146         value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
147         initialized = true;
148     }
149     return value;
150 }
151 #endif
152 
153 #if CV_OPENCL_TRACE_CHECK
154 static inline
traceOpenCLCheck(cl_int status,const char * message)155 void traceOpenCLCheck(cl_int status, const char* message)
156 {
157     std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
158 }
159 #define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
160 #else
161 #define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
162 #endif
163 
164 #define CV_OCL_API_ERROR_MSG(check_result, msg) \
165     cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
166 
167 #define CV_OCL_CHECK_RESULT(check_result, msg) \
168     do { \
169         CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
170         if (check_result != CL_SUCCESS) \
171         { \
172             static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_CHECK_RESULT must be const char*"); \
173             cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
174             CV_Error(Error::OpenCLApiCallError, error_msg); \
175         } \
176     } while (0)
177 
178 #define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
179 
180 #define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
181 
182 #ifdef _DEBUG
183 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
184 #define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
185 #define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
186 #else
187 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
188     do { \
189         CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
190         if (check_result != CL_SUCCESS && isRaiseError()) \
191         { \
192             static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_DBG_CHECK_RESULT must be const char*"); \
193             cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
194             CV_Error(Error::OpenCLApiCallError, error_msg); \
195         } \
196     } while (0)
197 #define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
198 #define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
199 #endif
200 
201 
202 static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_ENABLE", true);
203 static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_WRITE", true);
204 static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_LOCK_ENABLE", true);
205 static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_CLEANUP", true);
206 
207 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
208 static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool("OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", false);
209 #endif
210 
211 // Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
212 static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool("OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
213 #ifdef __APPLE__
214         true
215 #else
216         false
217 #endif
218 );
219 
getBuildExtraOptions()220 static const String getBuildExtraOptions()
221 {
222     static String param_buildExtraOptions;
223     static bool initialized = false;
224     if (!initialized)
225     {
226         param_buildExtraOptions = utils::getConfigurationParameterString("OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", "");
227         initialized = true;
228         if (!param_buildExtraOptions.empty())
229             CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
230     }
231     return param_buildExtraOptions;
232 }
233 
234 static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool("OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", true);
235 static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", 4);
236 
237 
238 struct UMat2D
239 {
UMat2Dcv::ocl::UMat2D240     UMat2D(const UMat& m)
241     {
242         offset = (int)m.offset;
243         step = (int)m.step;
244         rows = m.rows;
245         cols = m.cols;
246     }
247     int offset;
248     int step;
249     int rows;
250     int cols;
251 };
252 
253 struct UMat3D
254 {
UMat3Dcv::ocl::UMat3D255     UMat3D(const UMat& m)
256     {
257         offset = (int)m.offset;
258         step = (int)m.step.p[1];
259         slicestep = (int)m.step.p[0];
260         slices = (int)m.size.p[0];
261         rows = m.size.p[1];
262         cols = m.size.p[2];
263     }
264     int offset;
265     int slicestep;
266     int step;
267     int slices;
268     int rows;
269     int cols;
270 };
271 
272 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
crc64(const uchar * data,size_t size,uint64 crc0=0)273 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
274 {
275     static uint64 table[256];
276     static bool initialized = false;
277 
278     if( !initialized )
279     {
280         for( int i = 0; i < 256; i++ )
281         {
282             uint64 c = i;
283             for( int j = 0; j < 8; j++ )
284                 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
285             table[i] = c;
286         }
287         initialized = true;
288     }
289 
290     uint64 crc = ~crc0;
291     for( size_t idx = 0; idx < size; idx++ )
292         crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
293 
294     return ~crc;
295 }
296 
297 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
298 struct OpenCLBinaryCacheConfigurator
299 {
300     cv::String cache_path_;
301     cv::String cache_lock_filename_;
302     cv::Ptr<utils::fs::FileLock> cache_lock_;
303 
304     typedef std::map<std::string, std::string> ContextCacheType;
305     ContextCacheType prepared_contexts_;
306     Mutex mutex_prepared_contexts_;
307 
OpenCLBinaryCacheConfiguratorcv::ocl::OpenCLBinaryCacheConfigurator308     OpenCLBinaryCacheConfigurator()
309     {
310         CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
311         if (!CV_OPENCL_CACHE_ENABLE)
312         {
313             CV_LOG_INFO(NULL, "OpenCL cache is disabled");
314             return;
315         }
316         cache_path_ = utils::fs::getCacheDirectory("opencl_cache", "OPENCV_OPENCL_CACHE_DIR");
317         if (cache_path_.empty())
318         {
319             CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
320         }
321         do
322         {
323             try
324             {
325                 if (cache_path_.empty())
326                     break;
327                 if (cache_path_ == "disabled")
328                     break;
329                 if (!utils::fs::createDirectories(cache_path_))
330                 {
331                     CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
332                     clear();
333                     break;
334                 }
335 
336                 if (CV_OPENCL_CACHE_LOCK_ENABLE)
337                 {
338                     cache_lock_filename_ = cache_path_ + ".lock";
339                     if (!utils::fs::exists(cache_lock_filename_))
340                     {
341                         CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
342                         std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
343                         if (!lock_filename.is_open())
344                         {
345                             CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
346                             break;
347                         }
348                     }
349 
350                     try
351                     {
352                         cache_lock_ = makePtr<utils::fs::FileLock>(cache_lock_filename_.c_str());
353                         CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
354                         {
355                             utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
356                         }
357                         CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
358                     }
359                     catch (const cv::Exception& e)
360                     {
361                         CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
362                     }
363                     catch (...)
364                     {
365                         CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
366                     }
367                 }
368                 else
369                 {
370                     if (CV_OPENCL_CACHE_WRITE)
371                     {
372                         CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
373                                 "(not safe for multiprocess environment)");
374                     }
375                     else
376                     {
377                         CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
378                     }
379                 }
380             }
381             catch (const cv::Exception& e)
382             {
383                 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
384                 clear();
385             }
386         } while (0);
387         if (!cache_path_.empty())
388         {
389             if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
390             {
391                 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
392                         "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
393             }
394             else
395             {
396                 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
397             }
398         }
399     }
400 
clearcv::ocl::OpenCLBinaryCacheConfigurator401     void clear()
402     {
403         cache_path_.clear();
404         cache_lock_filename_.clear();
405         cache_lock_.release();
406     }
407 
prepareCacheDirectoryForContextcv::ocl::OpenCLBinaryCacheConfigurator408     std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
409             const std::string& cleanup_prefix)
410     {
411         if (cache_path_.empty())
412             return std::string();
413 
414         AutoLock lock(mutex_prepared_contexts_);
415 
416         ContextCacheType::iterator found_it = prepared_contexts_.find(ctx_prefix);
417         if (found_it != prepared_contexts_.end())
418             return found_it->second;
419 
420         CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
421 
422         std::string target_directory = cache_path_ + ctx_prefix + "/";
423         bool result = utils::fs::isDirectory(target_directory);
424         if (!result)
425         {
426             try
427             {
428                 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
429                 if (utils::fs::createDirectories(target_directory))
430                 {
431                     result = true;
432                 }
433                 else
434                 {
435                     CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
436                 }
437             }
438             catch (const cv::Exception& e)
439             {
440                 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
441             }
442         }
443         target_directory = result ? target_directory : std::string();
444         prepared_contexts_.insert(std::pair<std::string, std::string>(ctx_prefix, target_directory));
445 
446         if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
447         {
448             try
449             {
450                 std::vector<String> entries;
451                 utils::fs::glob_relative(cache_path_, cleanup_prefix + "*", entries, false, true);
452                 std::vector<String> remove_entries;
453                 for (size_t i = 0; i < entries.size(); i++)
454                 {
455                     const String& name = entries[i];
456                     if (0 == name.find(cleanup_prefix))
457                     {
458                         if (0 == name.find(ctx_prefix))
459                             continue; // skip current
460                         remove_entries.push_back(name);
461                     }
462                 }
463                 if (!remove_entries.empty())
464                 {
465                     CV_LOG_WARNING(NULL, (remove_entries.size() == 1
466                             ? "Detected OpenCL cache directory for other version of OpenCL device."
467                             : "Detected OpenCL cache directories for other versions of OpenCL device.")
468                             << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
469                     CV_LOG_WARNING(NULL, "Trying to remove these directories...");
470                     for (size_t i = 0; i < remove_entries.size(); i++)
471                     {
472                         CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
473                     }
474                     CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
475 
476                     for (size_t i = 0; i < remove_entries.size(); i++)
477                     {
478                         const String& name = remove_entries[i];
479                         cv::String path = utils::fs::join(cache_path_, name);
480                         try
481                         {
482                             utils::fs::remove_all(path);
483                             CV_LOG_WARNING(NULL, "Removed: " << path);
484                         }
485                         catch (const cv::Exception& e)
486                         {
487                             CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
488                         }
489                     }
490                 }
491             }
492             catch (...)
493             {
494                 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
495             }
496         }
497 
498         CV_LOG_VERBOSE(NULL, 1, "  Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
499         return target_directory;
500     }
501 
getSingletonInstancecv::ocl::OpenCLBinaryCacheConfigurator502     static OpenCLBinaryCacheConfigurator& getSingletonInstance()
503     {
504         CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
505     }
506 };
507 class BinaryProgramFile
508 {
509     enum { MAX_ENTRIES = 64 };
510 
511     typedef unsigned int uint32_t;
512 
513     struct CV_DECL_ALIGNED(4) FileHeader
514     {
515         uint32_t sourceSignatureSize;
516         //char sourceSignature[];
517     };
518 
519     struct CV_DECL_ALIGNED(4) FileTable
520     {
521         uint32_t numberOfEntries;
522         //uint32_t firstEntryOffset[];
523     };
524 
525     struct CV_DECL_ALIGNED(4) FileEntry
526     {
527         uint32_t nextEntryFileOffset; // 0 for the last entry in chain
528         uint32_t keySize;
529         uint32_t dataSize;
530         //char key[];
531         //char data[];
532     };
533 
534     const std::string fileName_;
535     const char* const sourceSignature_;
536     const size_t sourceSignatureSize_;
537 
538     std::fstream f;
539 
540     uint32_t entryOffsets[MAX_ENTRIES];
541 
getHash(const std::string & options)542     uint32_t getHash(const std::string& options)
543     {
544         uint64 hash = crc64((const uchar*)options.c_str(), options.size(), 0);
545         return hash & (MAX_ENTRIES - 1);
546     }
547 
getFileSize()548     inline size_t getFileSize()
549     {
550         size_t pos = (size_t)f.tellg();
551         f.seekg(0, std::fstream::end);
552         size_t fileSize = (size_t)f.tellg();
553         f.seekg(pos, std::fstream::beg);
554         return fileSize;
555     }
readUInt32()556     inline uint32_t readUInt32()
557     {
558         uint32_t res = 0;
559         f.read((char*)&res, sizeof(uint32_t));
560         CV_Assert(!f.fail());
561         return res;
562     }
writeUInt32(const uint32_t value)563     inline void writeUInt32(const uint32_t value)
564     {
565         uint32_t v = value;
566         f.write((char*)&v, sizeof(uint32_t));
567         CV_Assert(!f.fail());
568     }
569 
seekReadAbsolute(size_t pos)570     inline void seekReadAbsolute(size_t pos)
571     {
572         f.seekg(pos, std::fstream::beg);
573         CV_Assert(!f.fail());
574     }
seekReadRelative(size_t pos)575     inline void seekReadRelative(size_t pos)
576     {
577         f.seekg(pos, std::fstream::cur);
578         CV_Assert(!f.fail());
579     }
580 
seekWriteAbsolute(size_t pos)581     inline void seekWriteAbsolute(size_t pos)
582     {
583         f.seekp(pos, std::fstream::beg);
584         CV_Assert(!f.fail());
585     }
586 
clearFile()587     void clearFile()
588     {
589         f.close();
590         if (0 != remove(fileName_.c_str()))
591             CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
592         return;
593     }
594 
595 public:
BinaryProgramFile(const std::string & fileName,const char * sourceSignature)596     BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
597         : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(sourceSignature_) : 0)
598     {
599         CV_StaticAssert(sizeof(uint32_t) == 4, "");
600         CV_Assert(sourceSignature_ != NULL);
601         CV_Assert(sourceSignatureSize_ > 0);
602         memset(entryOffsets, 0, sizeof(entryOffsets));
603 
604         f.rdbuf()->pubsetbuf(0, 0); // disable buffering
605         f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
606         if(f.is_open() && getFileSize() > 0)
607         {
608             bool isValid = false;
609             try
610             {
611                 uint32_t fileSourceSignatureSize = readUInt32();
612                 if (fileSourceSignatureSize == sourceSignatureSize_)
613                 {
614                     cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
615                     f.read(fileSourceSignature.data(), fileSourceSignatureSize);
616                     if (f.eof())
617                     {
618                         CV_LOG_ERROR(NULL, "Unexpected EOF");
619                     }
620                     else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
621                     {
622                         isValid = true;
623                     }
624                 }
625                 if (!isValid)
626                 {
627                     CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
628                 }
629             }
630             catch (const cv::Exception& e)
631             {
632                 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
633             }
634             catch (...)
635             {
636                 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
637             }
638             if (!isValid)
639             {
640                 clearFile();
641             }
642             else
643             {
644                 seekReadAbsolute(0);
645             }
646         }
647     }
648 
read(const std::string & key,std::vector<char> & buf)649     bool read(const std::string& key, std::vector<char>& buf)
650     {
651         if (!f.is_open())
652             return false;
653 
654         size_t fileSize = getFileSize();
655         if (fileSize == 0)
656         {
657             CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
658             clearFile();
659             return false;
660         }
661         seekReadAbsolute(0);
662 
663         // bypass FileHeader
664         uint32_t fileSourceSignatureSize = readUInt32();
665         CV_Assert(fileSourceSignatureSize > 0);
666         seekReadRelative(fileSourceSignatureSize);
667 
668         uint32_t numberOfEntries = readUInt32();
669         CV_Assert(numberOfEntries > 0);
670         if (numberOfEntries != MAX_ENTRIES)
671         {
672             CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
673             clearFile();
674             return false;
675         }
676         f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
677         CV_Assert(!f.fail());
678 
679         uint32_t entryNum = getHash(key);
680 
681         uint32_t entryOffset = entryOffsets[entryNum];
682         FileEntry entry;
683         while (entryOffset > 0)
684         {
685             seekReadAbsolute(entryOffset);
686             //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
687             f.read((char*)&entry, sizeof(entry));
688             CV_Assert(!f.fail());
689             cv::AutoBuffer<char> fileKey(entry.keySize + 1);
690             if (key.size() == entry.keySize)
691             {
692                 if (entry.keySize > 0)
693                 {
694                     f.read(fileKey.data(), entry.keySize);
695                     CV_Assert(!f.fail());
696                 }
697                 if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
698                 {
699                     buf.resize(entry.dataSize);
700                     f.read(&buf[0], entry.dataSize);
701                     CV_Assert(!f.fail());
702                     seekReadAbsolute(0);
703                     CV_LOG_VERBOSE(NULL, 0, "Read...");
704                     return true;
705                 }
706             }
707             if (entry.nextEntryFileOffset == 0)
708                 break;
709             entryOffset = entry.nextEntryFileOffset;
710         }
711         return false;
712     }
713 
write(const std::string & key,std::vector<char> & buf)714     bool write(const std::string& key, std::vector<char>& buf)
715     {
716         if (!f.is_open())
717         {
718             f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
719             if (!f.is_open())
720             {
721                 f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
722                 if (!f.is_open())
723                 {
724                     CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
725                     return false;
726                 }
727             }
728         }
729 
730         size_t fileSize = getFileSize();
731         if (fileSize == 0)
732         {
733             // Write header
734             seekWriteAbsolute(0);
735             writeUInt32((uint32_t)sourceSignatureSize_);
736             f.write(sourceSignature_, sourceSignatureSize_);
737             CV_Assert(!f.fail());
738 
739             writeUInt32(MAX_ENTRIES);
740             memset(entryOffsets, 0, sizeof(entryOffsets));
741             f.write((char*)entryOffsets, sizeof(entryOffsets));
742             CV_Assert(!f.fail());
743             f.flush();
744             CV_Assert(!f.fail());
745             f.close();
746             f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
747             CV_Assert(f.is_open());
748             fileSize = getFileSize();
749         }
750         seekReadAbsolute(0);
751 
752         // bypass FileHeader
753         uint32_t fileSourceSignatureSize = readUInt32();
754         CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
755         seekReadRelative(fileSourceSignatureSize);
756 
757         uint32_t numberOfEntries = readUInt32();
758         CV_Assert(numberOfEntries > 0);
759         if (numberOfEntries != MAX_ENTRIES)
760         {
761             CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
762             clearFile();
763             return false;
764         }
765         size_t tableEntriesOffset = (size_t)f.tellg();
766         f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
767         CV_Assert(!f.fail());
768 
769         uint32_t entryNum = getHash(key);
770 
771         uint32_t entryOffset = entryOffsets[entryNum];
772         FileEntry entry;
773         while (entryOffset > 0)
774         {
775             seekReadAbsolute(entryOffset);
776             //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
777             f.read((char*)&entry, sizeof(entry));
778             CV_Assert(!f.fail());
779             cv::AutoBuffer<char> fileKey(entry.keySize + 1);
780             if (key.size() == entry.keySize)
781             {
782                 if (entry.keySize > 0)
783                 {
784                     f.read(fileKey.data(), entry.keySize);
785                     CV_Assert(!f.fail());
786                 }
787                 if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
788                 {
789                     // duplicate
790                     CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
791                     return false;
792                 }
793             }
794             if (entry.nextEntryFileOffset == 0)
795                 break;
796             entryOffset = entry.nextEntryFileOffset;
797         }
798         seekReadAbsolute(0);
799         if (entryOffset > 0)
800         {
801             seekWriteAbsolute(entryOffset);
802             entry.nextEntryFileOffset = (uint32_t)fileSize;
803             f.write((char*)&entry, sizeof(entry));
804             CV_Assert(!f.fail());
805         }
806         else
807         {
808             entryOffsets[entryNum] = (uint32_t)fileSize;
809             seekWriteAbsolute(tableEntriesOffset);
810             f.write((char*)entryOffsets, sizeof(entryOffsets));
811             CV_Assert(!f.fail());
812         }
813         seekWriteAbsolute(fileSize);
814         entry.nextEntryFileOffset = 0;
815         entry.dataSize = (uint32_t)buf.size();
816         entry.keySize = (uint32_t)key.size();
817         f.write((char*)&entry, sizeof(entry));
818         CV_Assert(!f.fail());
819         f.write(key.c_str(), entry.keySize);
820         CV_Assert(!f.fail());
821         f.write(&buf[0], entry.dataSize);
822         CV_Assert(!f.fail());
823         f.flush();
824         CV_Assert(!f.fail());
825         CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
826         return true;
827     }
828 };
829 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
830 
831 
832 
833 struct OpenCLExecutionContext::Impl
834 {
835     ocl::Context context_;
836     int device_;  // device index in context
837     ocl::Queue queue_;
838     int useOpenCL_;
839 
840 protected:
841     Impl() = delete;
842 
_init_devicecv::ocl::OpenCLExecutionContext::Impl843     void _init_device(cl_device_id deviceID)
844     {
845         CV_Assert(deviceID);
846         int ndevices = (int)context_.ndevices();
847         CV_Assert(ndevices > 0);
848         bool found = false;
849         for (int i = 0; i < ndevices; i++)
850         {
851             ocl::Device d = context_.device(i);
852             cl_device_id dhandle = (cl_device_id)d.ptr();
853             if (dhandle == deviceID)
854             {
855                 device_ = i;
856                 found = true;
857                 break;
858             }
859         }
860         CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
861     }
862 
_init_devicecv::ocl::OpenCLExecutionContext::Impl863     void _init_device(const ocl::Device& device)
864     {
865         CV_Assert(device.ptr());
866         int ndevices = (int)context_.ndevices();
867         CV_Assert(ndevices > 0);
868         bool found = false;
869         for (int i = 0; i < ndevices; i++)
870         {
871             ocl::Device d = context_.device(i);
872             if (d.getImpl() == device.getImpl())
873             {
874                 device_ = i;
875                 found = true;
876                 break;
877             }
878         }
879         CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
880     }
881 
882 public:
Implcv::ocl::OpenCLExecutionContext::Impl883     Impl(cl_platform_id platformID, cl_context context, cl_device_id deviceID)
884         : device_(0), useOpenCL_(-1)
885     {
886         CV_UNUSED(platformID);
887         CV_Assert(context);
888         CV_Assert(deviceID);
889 
890         context_ = Context::fromHandle(context);
891         _init_device(deviceID);
892         queue_ = Queue(context_, context_.device(device_));
893     }
894 
Implcv::ocl::OpenCLExecutionContext::Impl895     Impl(const ocl::Context& context, const ocl::Device& device, const ocl::Queue& queue)
896         : device_(0), useOpenCL_(-1)
897     {
898         CV_Assert(context.ptr());
899         CV_Assert(device.ptr());
900 
901         context_ = context;
902         _init_device(device);
903         queue_ = queue;
904     }
905 
Implcv::ocl::OpenCLExecutionContext::Impl906     Impl(const ocl::Context& context, const ocl::Device& device)
907         : device_(0), useOpenCL_(-1)
908     {
909         CV_Assert(context.ptr());
910         CV_Assert(device.ptr());
911 
912         context_ = context;
913         _init_device(device);
914         queue_ = Queue(context_, context_.device(device_));
915     }
916 
Implcv::ocl::OpenCLExecutionContext::Impl917     Impl(const ocl::Context& context, const int device, const ocl::Queue& queue)
918         : context_(context)
919         , device_(device)
920         , queue_(queue)
921         , useOpenCL_(-1)
922     {
923         // nothing
924     }
Implcv::ocl::OpenCLExecutionContext::Impl925     Impl(const Impl& other)
926         : context_(other.context_)
927         , device_(other.device_)
928         , queue_(other.queue_)
929         , useOpenCL_(-1)
930     {
931         // nothing
932     }
933 
useOpenCLcv::ocl::OpenCLExecutionContext::Impl934     inline bool useOpenCL() const { return const_cast<Impl*>(this)->useOpenCL(); }
useOpenCLcv::ocl::OpenCLExecutionContext::Impl935     bool useOpenCL()
936     {
937         if (useOpenCL_ < 0)
938         {
939             try
940             {
941                 useOpenCL_ = 0;
942                 if (!context_.empty() && context_.ndevices() > 0)
943                 {
944                     const Device& d = context_.device(device_);
945                     useOpenCL_ = d.available();
946                 }
947             }
948             catch (const cv::Exception&)
949             {
950                 // nothing
951             }
952             if (!useOpenCL_)
953                 CV_LOG_INFO(NULL, "OpenCL: can't use OpenCL execution context");
954         }
955         return useOpenCL_ > 0;
956     }
957 
setUseOpenCLcv::ocl::OpenCLExecutionContext::Impl958     void setUseOpenCL(bool flag)
959     {
960         if (!flag)
961             useOpenCL_ = 0;
962         else
963             useOpenCL_ = -1;
964     }
965 
getInitializedExecutionContextcv::ocl::OpenCLExecutionContext::Impl966     static const std::shared_ptr<Impl>& getInitializedExecutionContext()
967     {
968         CV_TRACE_FUNCTION();
969 
970         CV_LOG_INFO(NULL, "OpenCL: initializing thread execution context");
971 
972         static bool initialized = false;
973         static std::shared_ptr<Impl> g_primaryExecutionContext;
974 
975         if (!initialized)
976         {
977             cv::AutoLock lock(getInitializationMutex());
978             if (!initialized)
979             {
980                 CV_LOG_INFO(NULL, "OpenCL: creating new execution context...");
981                 try
982                 {
983                     Context c = ocl::Context::create(std::string());
984                     if (c.ndevices())
985                     {
986                         int deviceId = 0;
987                         auto& d = c.device(deviceId);
988                         if (d.available())
989                         {
990                             auto q = ocl::Queue(c, d);
991                             if (!q.ptr())
992                             {
993                                 CV_LOG_ERROR(NULL, "OpenCL: Can't create default OpenCL queue");
994                             }
995                             else
996                             {
997                                 g_primaryExecutionContext = std::make_shared<Impl>(c, deviceId, q);
998                                 CV_LOG_INFO(NULL, "OpenCL: device=" << d.name());
999                             }
1000                         }
1001                         else
1002                         {
1003                             CV_LOG_ERROR(NULL, "OpenCL: OpenCL device is not available (CL_DEVICE_AVAILABLE returns false)");
1004                         }
1005                     }
1006                     else
1007                     {
1008                         CV_LOG_INFO(NULL, "OpenCL: context is not available/disabled");
1009                     }
1010                 }
1011                 catch (const std::exception& e)
1012                 {
1013                     CV_LOG_INFO(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: " << e.what());
1014                 }
1015                 catch (...)
1016                 {
1017                     CV_LOG_WARNING(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: unknown C++ exception");
1018                 }
1019                 initialized = true;
1020             }
1021         }
1022         return g_primaryExecutionContext;
1023     }
1024 };
1025 
getContext() const1026 Context& OpenCLExecutionContext::getContext() const
1027 {
1028     CV_Assert(p);
1029     return p->context_;
1030 }
getDevice() const1031 Device& OpenCLExecutionContext::getDevice() const
1032 {
1033     CV_Assert(p);
1034     return p->context_.device(p->device_);
1035 }
getQueue() const1036 Queue& OpenCLExecutionContext::getQueue() const
1037 {
1038     CV_Assert(p);
1039     return p->queue_;
1040 }
1041 
useOpenCL() const1042 bool OpenCLExecutionContext::useOpenCL() const
1043 {
1044     if (p)
1045         return p->useOpenCL();
1046     return false;
1047 }
setUseOpenCL(bool flag)1048 void OpenCLExecutionContext::setUseOpenCL(bool flag)
1049 {
1050     CV_Assert(p);
1051     p->setUseOpenCL(flag);
1052 }
1053 
1054 /* static */
getCurrent()1055 OpenCLExecutionContext& OpenCLExecutionContext::getCurrent()
1056 {
1057     CV_TRACE_FUNCTION();
1058     CoreTLSData& data = getCoreTlsData();
1059     OpenCLExecutionContext& c = data.oclExecutionContext;
1060     if (!data.oclExecutionContextInitialized)
1061     {
1062         data.oclExecutionContextInitialized = true;
1063         if (c.empty() && haveOpenCL())
1064             c.p = Impl::getInitializedExecutionContext();
1065     }
1066     return c;
1067 }
1068 
1069 /* static */
getCurrentRef()1070 OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef()
1071 {
1072     CV_TRACE_FUNCTION();
1073     CoreTLSData& data = getCoreTlsData();
1074     OpenCLExecutionContext& c = data.oclExecutionContext;
1075     return c;
1076 }
1077 
bind() const1078 void OpenCLExecutionContext::bind() const
1079 {
1080     CV_TRACE_FUNCTION();
1081     CV_Assert(p);
1082     CoreTLSData& data = getCoreTlsData();
1083     data.oclExecutionContext = *this;
1084     data.oclExecutionContextInitialized = true;
1085     data.useOpenCL = p->useOpenCL_;  // propagate "-1", avoid call useOpenCL()
1086 }
1087 
1088 
cloneWithNewQueue() const1089 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const
1090 {
1091     CV_TRACE_FUNCTION();
1092     CV_Assert(p);
1093     const Queue q(getContext(), getDevice());
1094     return cloneWithNewQueue(q);
1095 }
1096 
cloneWithNewQueue(const ocl::Queue & q) const1097 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const
1098 {
1099     CV_TRACE_FUNCTION();
1100     CV_Assert(p);
1101     CV_Assert(q.ptr() != NULL);
1102     OpenCLExecutionContext c;
1103     c.p = std::make_shared<Impl>(p->context_, p->device_, q);
1104     return c;
1105 }
1106 
1107 /* static */
create(const Context & context,const Device & device,const ocl::Queue & queue)1108 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue)
1109 {
1110     CV_TRACE_FUNCTION();
1111     if (!haveOpenCL())
1112         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1113 
1114     CV_Assert(!context.empty());
1115     CV_Assert(context.ptr());
1116     CV_Assert(!device.empty());
1117     CV_Assert(device.ptr());
1118     OpenCLExecutionContext ctx;
1119     ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device, queue);
1120     return ctx;
1121 
1122 }
1123 
1124 /* static */
create(const Context & context,const Device & device)1125 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device)
1126 {
1127     CV_TRACE_FUNCTION();
1128     if (!haveOpenCL())
1129         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1130 
1131     CV_Assert(!context.empty());
1132     CV_Assert(context.ptr());
1133     CV_Assert(!device.empty());
1134     CV_Assert(device.ptr());
1135     OpenCLExecutionContext ctx;
1136     ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device);
1137     return ctx;
1138 
1139 }
1140 
release()1141 void OpenCLExecutionContext::release()
1142 {
1143     CV_TRACE_FUNCTION();
1144     p.reset();
1145 }
1146 
1147 
1148 
1149 // true if we have initialized OpenCL subsystem with available platforms
1150 static bool g_isOpenCLInitialized = false;
1151 static bool g_isOpenCLAvailable = false;
1152 
haveOpenCL()1153 bool haveOpenCL()
1154 {
1155     CV_TRACE_FUNCTION();
1156 
1157     if (!g_isOpenCLInitialized)
1158     {
1159         CV_TRACE_REGION("Init_OpenCL_Runtime");
1160         const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
1161         if (envPath)
1162         {
1163             if (cv::String(envPath) == "disabled")
1164             {
1165                 g_isOpenCLAvailable = false;
1166                 g_isOpenCLInitialized = true;
1167                 return false;
1168             }
1169         }
1170 
1171         cv::AutoLock lock(getInitializationMutex());
1172         CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
1173         try
1174         {
1175             cl_uint n = 0;
1176             g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1177             g_isOpenCLAvailable &= n > 0;
1178             CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms");
1179         }
1180         catch (...)
1181         {
1182             g_isOpenCLAvailable = false;
1183         }
1184         g_isOpenCLInitialized = true;
1185     }
1186     return g_isOpenCLAvailable;
1187 }
1188 
useOpenCL()1189 bool useOpenCL()
1190 {
1191     CoreTLSData& data = getCoreTlsData();
1192     if (data.useOpenCL < 0)
1193     {
1194         try
1195         {
1196             data.useOpenCL = 0;
1197             if (haveOpenCL())
1198             {
1199                 auto c = OpenCLExecutionContext::getCurrent();
1200                 data.useOpenCL = c.useOpenCL();
1201             }
1202         }
1203         catch (...)
1204         {
1205             CV_LOG_INFO(NULL, "OpenCL: can't initialize thread OpenCL execution context");
1206         }
1207     }
1208     return data.useOpenCL > 0;
1209 }
1210 
isOpenCLActivated()1211 bool isOpenCLActivated()
1212 {
1213     if (!g_isOpenCLAvailable)
1214         return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
1215     return useOpenCL();
1216 }
1217 
setUseOpenCL(bool flag)1218 void setUseOpenCL(bool flag)
1219 {
1220     CV_TRACE_FUNCTION();
1221 
1222     CoreTLSData& data = getCoreTlsData();
1223     auto& c = OpenCLExecutionContext::getCurrentRef();
1224     if (!c.empty())
1225     {
1226         c.setUseOpenCL(flag);
1227         data.useOpenCL = c.useOpenCL();
1228     }
1229     else
1230     {
1231         if (!flag)
1232             data.useOpenCL = 0;
1233         else
1234             data.useOpenCL = -1; // enabled by default (if context is not initialized)
1235     }
1236 }
1237 
1238 
1239 
1240 #ifdef HAVE_CLAMDBLAS
1241 
1242 class AmdBlasHelper
1243 {
1244 public:
getInstance()1245     static AmdBlasHelper & getInstance()
1246     {
1247         CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
1248     }
1249 
isAvailable() const1250     bool isAvailable() const
1251     {
1252         return g_isAmdBlasAvailable;
1253     }
1254 
~AmdBlasHelper()1255     ~AmdBlasHelper()
1256     {
1257         // Do not tear down clBLAS.
1258         // The user application may still use clBLAS even after OpenCV is unloaded.
1259         /*try
1260         {
1261             clblasTeardown();
1262         }
1263         catch (...) { }*/
1264     }
1265 
1266 protected:
AmdBlasHelper()1267     AmdBlasHelper()
1268     {
1269         if (!g_isAmdBlasInitialized)
1270         {
1271             AutoLock lock(getInitializationMutex());
1272 
1273             if (!g_isAmdBlasInitialized)
1274             {
1275                 if (haveOpenCL())
1276                 {
1277                     try
1278                     {
1279                         g_isAmdBlasAvailable = clblasSetup() == clblasSuccess;
1280                     }
1281                     catch (...)
1282                     {
1283                         g_isAmdBlasAvailable = false;
1284                     }
1285                 }
1286                 else
1287                     g_isAmdBlasAvailable = false;
1288 
1289                 g_isAmdBlasInitialized = true;
1290             }
1291         }
1292     }
1293 
1294 private:
1295     static bool g_isAmdBlasInitialized;
1296     static bool g_isAmdBlasAvailable;
1297 };
1298 
1299 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1300 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1301 
haveAmdBlas()1302 bool haveAmdBlas()
1303 {
1304     return AmdBlasHelper::getInstance().isAvailable();
1305 }
1306 
1307 #else
1308 
haveAmdBlas()1309 bool haveAmdBlas()
1310 {
1311     return false;
1312 }
1313 
1314 #endif
1315 
1316 #ifdef HAVE_CLAMDFFT
1317 
1318 class AmdFftHelper
1319 {
1320 public:
getInstance()1321     static AmdFftHelper & getInstance()
1322     {
1323         CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
1324     }
1325 
isAvailable() const1326     bool isAvailable() const
1327     {
1328         return g_isAmdFftAvailable;
1329     }
1330 
~AmdFftHelper()1331     ~AmdFftHelper()
1332     {
1333         // Do not tear down clFFT.
1334         // The user application may still use clFFT even after OpenCV is unloaded.
1335         /*try
1336         {
1337             clfftTeardown();
1338         }
1339         catch (...) { }*/
1340     }
1341 
1342 protected:
AmdFftHelper()1343     AmdFftHelper()
1344     {
1345         if (!g_isAmdFftInitialized)
1346         {
1347             AutoLock lock(getInitializationMutex());
1348 
1349             if (!g_isAmdFftInitialized)
1350             {
1351                 if (haveOpenCL())
1352                 {
1353                     try
1354                     {
1355                         cl_uint major, minor, patch;
1356                         CV_Assert(clfftInitSetupData(&setupData) == CLFFT_SUCCESS);
1357 
1358                         // it throws exception in case AmdFft binaries are not found
1359                         CV_Assert(clfftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1360                         g_isAmdFftAvailable = true;
1361                     }
1362                     catch (const Exception &)
1363                     {
1364                         g_isAmdFftAvailable = false;
1365                     }
1366                 }
1367                 else
1368                     g_isAmdFftAvailable = false;
1369 
1370                 g_isAmdFftInitialized = true;
1371             }
1372         }
1373     }
1374 
1375 private:
1376     static clfftSetupData setupData;
1377     static bool g_isAmdFftInitialized;
1378     static bool g_isAmdFftAvailable;
1379 };
1380 
1381 clfftSetupData AmdFftHelper::setupData;
1382 bool AmdFftHelper::g_isAmdFftAvailable = false;
1383 bool AmdFftHelper::g_isAmdFftInitialized = false;
1384 
haveAmdFft()1385 bool haveAmdFft()
1386 {
1387     return AmdFftHelper::getInstance().isAvailable();
1388 }
1389 
1390 #else
1391 
haveAmdFft()1392 bool haveAmdFft()
1393 {
1394     return false;
1395 }
1396 
1397 #endif
1398 
haveSVM()1399 bool haveSVM()
1400 {
1401 #ifdef HAVE_OPENCL_SVM
1402     return true;
1403 #else
1404     return false;
1405 #endif
1406 }
1407 
finish()1408 void finish()
1409 {
1410     Queue::getDefault().finish();
1411 }
1412 
1413 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1414 
1415 struct Platform::Impl
1416 {
Implcv::ocl::Platform::Impl1417     Impl()
1418     {
1419         refcount = 1;
1420         handle = 0;
1421         initialized = false;
1422     }
1423 
~Implcv::ocl::Platform::Impl1424     ~Impl() {}
1425 
initcv::ocl::Platform::Impl1426     void init()
1427     {
1428         if( !initialized )
1429         {
1430             //cl_uint num_entries
1431             cl_uint n = 0;
1432             if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1433                 handle = 0;
1434             if( handle != 0 )
1435             {
1436                 char buf[1000];
1437                 size_t len = 0;
1438                 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1439                 buf[len] = '\0';
1440                 vendor = String(buf);
1441             }
1442 
1443             initialized = true;
1444         }
1445     }
1446 
1447     IMPLEMENT_REFCOUNTABLE();
1448 
1449     cl_platform_id handle;
1450     String vendor;
1451     bool initialized;
1452 };
1453 
Platform()1454 Platform::Platform() CV_NOEXCEPT
1455 {
1456     p = 0;
1457 }
1458 
~Platform()1459 Platform::~Platform()
1460 {
1461     if(p)
1462         p->release();
1463 }
1464 
Platform(const Platform & pl)1465 Platform::Platform(const Platform& pl)
1466 {
1467     p = (Impl*)pl.p;
1468     if(p)
1469         p->addref();
1470 }
1471 
operator =(const Platform & pl)1472 Platform& Platform::operator = (const Platform& pl)
1473 {
1474     Impl* newp = (Impl*)pl.p;
1475     if(newp)
1476         newp->addref();
1477     if(p)
1478         p->release();
1479     p = newp;
1480     return *this;
1481 }
1482 
Platform(Platform && pl)1483 Platform::Platform(Platform&& pl) CV_NOEXCEPT
1484 {
1485     p = pl.p;
1486     pl.p = nullptr;
1487 }
1488 
operator =(Platform && pl)1489 Platform& Platform::operator = (Platform&& pl) CV_NOEXCEPT
1490 {
1491     if (this != &pl) {
1492         if(p)
1493             p->release();
1494         p = pl.p;
1495         pl.p = nullptr;
1496     }
1497     return *this;
1498 }
1499 
ptr() const1500 void* Platform::ptr() const
1501 {
1502     return p ? p->handle : 0;
1503 }
1504 
getDefault()1505 Platform& Platform::getDefault()
1506 {
1507     CV_LOG_ONCE_WARNING(NULL, "OpenCL: Platform::getDefault() is deprecated and will be removed. Use cv::ocl::getPlatfomsInfo() for enumeration of available platforms");
1508     static Platform p;
1509     if( !p.p )
1510     {
1511         p.p = new Impl;
1512         p.p->init();
1513     }
1514     return p;
1515 }
1516 
1517 /////////////////////////////////////// Device ////////////////////////////////////////////
1518 
1519 // Version has format:
1520 //   OpenCL<space><major_version.minor_version><space><vendor-specific information>
1521 // by specification
1522 //   http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1523 //   http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1524 //   https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetPlatformInfo.html
1525 //   https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetPlatformInfo.html
parseOpenCLVersion(const String & version,int & major,int & minor)1526 static void parseOpenCLVersion(const String &version, int &major, int &minor)
1527 {
1528     major = minor = 0;
1529     if (10 >= version.length())
1530         return;
1531     const char *pstr = version.c_str();
1532     if (0 != strncmp(pstr, "OpenCL ", 7))
1533         return;
1534     size_t ppos = version.find('.', 7);
1535     if (String::npos == ppos)
1536         return;
1537     String temp = version.substr(7, ppos - 7);
1538     major = atoi(temp.c_str());
1539     temp = version.substr(ppos + 1);
1540     minor = atoi(temp.c_str());
1541 }
1542 
1543 struct Device::Impl
1544 {
Implcv::ocl::Device::Impl1545     Impl(void* d)
1546         : refcount(1)
1547         , handle(0)
1548     {
1549         try
1550         {
1551             cl_device_id device = (cl_device_id)d;
1552             _init(device);
1553             CV_OCL_CHECK(clRetainDevice(device));  // increment reference counter on success only
1554         }
1555         catch (...)
1556         {
1557             throw;
1558         }
1559     }
1560 
_initcv::ocl::Device::Impl1561     void _init(cl_device_id d)
1562     {
1563         handle = (cl_device_id)d;
1564 
1565         name_ = getStrProp(CL_DEVICE_NAME);
1566         version_ = getStrProp(CL_DEVICE_VERSION);
1567         extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1568         doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1569         halfFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG);
1570         hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1571         maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1572         maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1573         type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1574         driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1575         addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1576 
1577         String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1578         parseOpenCLVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1579 
1580         size_t pos = 0;
1581         while (pos < extensions_.size())
1582         {
1583             size_t pos2 = extensions_.find(' ', pos);
1584             if (pos2 == String::npos)
1585                 pos2 = extensions_.size();
1586             if (pos2 > pos)
1587             {
1588                 std::string extensionName = extensions_.substr(pos, pos2 - pos);
1589                 extensions_set_.insert(extensionName);
1590             }
1591             pos = pos2 + 1;
1592         }
1593 
1594         intelSubgroupsSupport_ = isExtensionSupported("cl_intel_subgroups");
1595 
1596         vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1597         if (vendorName_ == "Advanced Micro Devices, Inc." ||
1598             vendorName_ == "AMD")
1599             vendorID_ = VENDOR_AMD;
1600         else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1601             vendorID_ = VENDOR_INTEL;
1602         else if (vendorName_ == "NVIDIA Corporation")
1603             vendorID_ = VENDOR_NVIDIA;
1604         else
1605             vendorID_ = UNKNOWN_VENDOR;
1606 
1607         const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", 0);
1608         if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1609         {
1610             const size_t new_maxWorkGroupSize = std::min(maxWorkGroupSize_, CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1611             if (new_maxWorkGroupSize != maxWorkGroupSize_)
1612                 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1613             maxWorkGroupSize_ = new_maxWorkGroupSize;
1614         }
1615 #if 0
1616         if (isExtensionSupported("cl_khr_spir"))
1617         {
1618 #ifndef CL_DEVICE_SPIR_VERSIONS
1619 #define CL_DEVICE_SPIR_VERSIONS                     0x40E0
1620 #endif
1621             cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1622             std::cout << spir_versions << std::endl;
1623         }
1624 #endif
1625     }
1626 
~Implcv::ocl::Device::Impl1627     ~Impl()
1628     {
1629 #ifdef _WIN32
1630         if (!cv::__termination)
1631 #endif
1632         {
1633             if (handle)
1634             {
1635                 CV_OCL_CHECK(clReleaseDevice(handle));
1636                 handle = 0;
1637             }
1638         }
1639     }
1640 
1641     template<typename _TpCL, typename _TpOut>
getPropcv::ocl::Device::Impl1642     _TpOut getProp(cl_device_info prop) const
1643     {
1644         _TpCL temp=_TpCL();
1645         size_t sz = 0;
1646 
1647         return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1648             sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1649     }
1650 
getBoolPropcv::ocl::Device::Impl1651     bool getBoolProp(cl_device_info prop) const
1652     {
1653         cl_bool temp = CL_FALSE;
1654         size_t sz = 0;
1655 
1656         return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1657             sz == sizeof(temp) ? temp != 0 : false;
1658     }
1659 
getStrPropcv::ocl::Device::Impl1660     String getStrProp(cl_device_info prop) const
1661     {
1662         char buf[4096];
1663         size_t sz=0;
1664         return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1665             sz < sizeof(buf) ? String(buf) : String();
1666     }
1667 
isExtensionSupportedcv::ocl::Device::Impl1668     bool isExtensionSupported(const std::string& extensionName) const
1669     {
1670         return extensions_set_.count(extensionName) > 0;
1671     }
1672 
1673 
1674     IMPLEMENT_REFCOUNTABLE();
1675 
1676     cl_device_id handle;
1677 
1678     String name_;
1679     String version_;
1680     std::string extensions_;
1681     int doubleFPConfig_;
1682     int halfFPConfig_;
1683     bool hostUnifiedMemory_;
1684     int maxComputeUnits_;
1685     size_t maxWorkGroupSize_;
1686     int type_;
1687     int addressBits_;
1688     int deviceVersionMajor_;
1689     int deviceVersionMinor_;
1690     String driverVersion_;
1691     String vendorName_;
1692     int vendorID_;
1693     bool intelSubgroupsSupport_;
1694 
1695     std::set<std::string> extensions_set_;
1696 };
1697 
1698 
Device()1699 Device::Device() CV_NOEXCEPT
1700 {
1701     p = 0;
1702 }
1703 
Device(void * d)1704 Device::Device(void* d)
1705 {
1706     p = 0;
1707     set(d);
1708 }
1709 
Device(const Device & d)1710 Device::Device(const Device& d)
1711 {
1712     p = d.p;
1713     if(p)
1714         p->addref();
1715 }
1716 
operator =(const Device & d)1717 Device& Device::operator = (const Device& d)
1718 {
1719     Impl* newp = (Impl*)d.p;
1720     if(newp)
1721         newp->addref();
1722     if(p)
1723         p->release();
1724     p = newp;
1725     return *this;
1726 }
1727 
Device(Device && d)1728 Device::Device(Device&& d) CV_NOEXCEPT
1729 {
1730     p = d.p;
1731     d.p = nullptr;
1732 }
1733 
operator =(Device && d)1734 Device& Device::operator = (Device&& d) CV_NOEXCEPT
1735 {
1736     if (this != &d) {
1737         if(p)
1738             p->release();
1739         p = d.p;
1740         d.p = nullptr;
1741     }
1742     return *this;
1743 }
1744 
~Device()1745 Device::~Device()
1746 {
1747     if(p)
1748         p->release();
1749 }
1750 
set(void * d)1751 void Device::set(void* d)
1752 {
1753     if(p)
1754         p->release();
1755     p = new Impl(d);
1756     if (p->handle)
1757     {
1758         CV_OCL_CHECK(clReleaseDevice((cl_device_id)d));
1759     }
1760 }
1761 
fromHandle(void * d)1762 Device Device::fromHandle(void* d)
1763 {
1764     Device device(d);
1765     return device;
1766 }
1767 
ptr() const1768 void* Device::ptr() const
1769 {
1770     return p ? p->handle : 0;
1771 }
1772 
name() const1773 String Device::name() const
1774 { return p ? p->name_ : String(); }
1775 
extensions() const1776 String Device::extensions() const
1777 { return p ? String(p->extensions_) : String(); }
1778 
isExtensionSupported(const String & extensionName) const1779 bool Device::isExtensionSupported(const String& extensionName) const
1780 { return p ? p->isExtensionSupported(extensionName) : false; }
1781 
version() const1782 String Device::version() const
1783 { return p ? p->version_ : String(); }
1784 
vendorName() const1785 String Device::vendorName() const
1786 { return p ? p->vendorName_ : String(); }
1787 
vendorID() const1788 int Device::vendorID() const
1789 { return p ? p->vendorID_ : 0; }
1790 
OpenCL_C_Version() const1791 String Device::OpenCL_C_Version() const
1792 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1793 
OpenCLVersion() const1794 String Device::OpenCLVersion() const
1795 { return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1796 
deviceVersionMajor() const1797 int Device::deviceVersionMajor() const
1798 { return p ? p->deviceVersionMajor_ : 0; }
1799 
deviceVersionMinor() const1800 int Device::deviceVersionMinor() const
1801 { return p ? p->deviceVersionMinor_ : 0; }
1802 
driverVersion() const1803 String Device::driverVersion() const
1804 { return p ? p->driverVersion_ : String(); }
1805 
type() const1806 int Device::type() const
1807 { return p ? p->type_ : 0; }
1808 
addressBits() const1809 int Device::addressBits() const
1810 { return p ? p->addressBits_ : 0; }
1811 
available() const1812 bool Device::available() const
1813 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1814 
compilerAvailable() const1815 bool Device::compilerAvailable() const
1816 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1817 
linkerAvailable() const1818 bool Device::linkerAvailable() const
1819 #ifdef CL_VERSION_1_2
1820 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1821 #else
1822 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1823 #endif
1824 
doubleFPConfig() const1825 int Device::doubleFPConfig() const
1826 { return p ? p->doubleFPConfig_ : 0; }
1827 
singleFPConfig() const1828 int Device::singleFPConfig() const
1829 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1830 
halfFPConfig() const1831 int Device::halfFPConfig() const
1832 { return p ? p->halfFPConfig_ : 0; }
1833 
endianLittle() const1834 bool Device::endianLittle() const
1835 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1836 
errorCorrectionSupport() const1837 bool Device::errorCorrectionSupport() const
1838 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1839 
executionCapabilities() const1840 int Device::executionCapabilities() const
1841 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1842 
globalMemCacheSize() const1843 size_t Device::globalMemCacheSize() const
1844 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1845 
globalMemCacheType() const1846 int Device::globalMemCacheType() const
1847 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1848 
globalMemCacheLineSize() const1849 int Device::globalMemCacheLineSize() const
1850 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1851 
globalMemSize() const1852 size_t Device::globalMemSize() const
1853 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1854 
localMemSize() const1855 size_t Device::localMemSize() const
1856 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1857 
localMemType() const1858 int Device::localMemType() const
1859 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1860 
hostUnifiedMemory() const1861 bool Device::hostUnifiedMemory() const
1862 { return p ? p->hostUnifiedMemory_ : false; }
1863 
imageSupport() const1864 bool Device::imageSupport() const
1865 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1866 
imageFromBufferSupport() const1867 bool Device::imageFromBufferSupport() const
1868 {
1869     return p ? p->isExtensionSupported("cl_khr_image2d_from_buffer") : false;
1870 }
1871 
imagePitchAlignment() const1872 uint Device::imagePitchAlignment() const
1873 {
1874 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1875     return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1876 #else
1877     return 0;
1878 #endif
1879 }
1880 
imageBaseAddressAlignment() const1881 uint Device::imageBaseAddressAlignment() const
1882 {
1883 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1884     return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1885 #else
1886     return 0;
1887 #endif
1888 }
1889 
image2DMaxWidth() const1890 size_t Device::image2DMaxWidth() const
1891 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1892 
image2DMaxHeight() const1893 size_t Device::image2DMaxHeight() const
1894 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1895 
image3DMaxWidth() const1896 size_t Device::image3DMaxWidth() const
1897 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1898 
image3DMaxHeight() const1899 size_t Device::image3DMaxHeight() const
1900 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1901 
image3DMaxDepth() const1902 size_t Device::image3DMaxDepth() const
1903 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1904 
imageMaxBufferSize() const1905 size_t Device::imageMaxBufferSize() const
1906 #ifdef CL_VERSION_1_2
1907 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1908 #else
1909 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1910 #endif
1911 
imageMaxArraySize() const1912 size_t Device::imageMaxArraySize() const
1913 #ifdef CL_VERSION_1_2
1914 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1915 #else
1916 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1917 #endif
1918 
intelSubgroupsSupport() const1919 bool Device::intelSubgroupsSupport() const
1920 { return p ? p->intelSubgroupsSupport_ : false; }
1921 
maxClockFrequency() const1922 int Device::maxClockFrequency() const
1923 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1924 
maxComputeUnits() const1925 int Device::maxComputeUnits() const
1926 { return p ? p->maxComputeUnits_ : 0; }
1927 
maxConstantArgs() const1928 int Device::maxConstantArgs() const
1929 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1930 
maxConstantBufferSize() const1931 size_t Device::maxConstantBufferSize() const
1932 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1933 
maxMemAllocSize() const1934 size_t Device::maxMemAllocSize() const
1935 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1936 
maxParameterSize() const1937 size_t Device::maxParameterSize() const
1938 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1939 
maxReadImageArgs() const1940 int Device::maxReadImageArgs() const
1941 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1942 
maxWriteImageArgs() const1943 int Device::maxWriteImageArgs() const
1944 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1945 
maxSamplers() const1946 int Device::maxSamplers() const
1947 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1948 
maxWorkGroupSize() const1949 size_t Device::maxWorkGroupSize() const
1950 { return p ? p->maxWorkGroupSize_ : 0; }
1951 
maxWorkItemDims() const1952 int Device::maxWorkItemDims() const
1953 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1954 
maxWorkItemSizes(size_t * sizes) const1955 void Device::maxWorkItemSizes(size_t* sizes) const
1956 {
1957     if(p)
1958     {
1959         const int MAX_DIMS = 32;
1960         size_t retsz = 0;
1961         CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1962                 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1963     }
1964 }
1965 
memBaseAddrAlign() const1966 int Device::memBaseAddrAlign() const
1967 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1968 
nativeVectorWidthChar() const1969 int Device::nativeVectorWidthChar() const
1970 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1971 
nativeVectorWidthShort() const1972 int Device::nativeVectorWidthShort() const
1973 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1974 
nativeVectorWidthInt() const1975 int Device::nativeVectorWidthInt() const
1976 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
1977 
nativeVectorWidthLong() const1978 int Device::nativeVectorWidthLong() const
1979 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
1980 
nativeVectorWidthFloat() const1981 int Device::nativeVectorWidthFloat() const
1982 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
1983 
nativeVectorWidthDouble() const1984 int Device::nativeVectorWidthDouble() const
1985 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
1986 
nativeVectorWidthHalf() const1987 int Device::nativeVectorWidthHalf() const
1988 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
1989 
preferredVectorWidthChar() const1990 int Device::preferredVectorWidthChar() const
1991 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
1992 
preferredVectorWidthShort() const1993 int Device::preferredVectorWidthShort() const
1994 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
1995 
preferredVectorWidthInt() const1996 int Device::preferredVectorWidthInt() const
1997 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
1998 
preferredVectorWidthLong() const1999 int Device::preferredVectorWidthLong() const
2000 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
2001 
preferredVectorWidthFloat() const2002 int Device::preferredVectorWidthFloat() const
2003 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
2004 
preferredVectorWidthDouble() const2005 int Device::preferredVectorWidthDouble() const
2006 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
2007 
preferredVectorWidthHalf() const2008 int Device::preferredVectorWidthHalf() const
2009 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
2010 
printfBufferSize() const2011 size_t Device::printfBufferSize() const
2012 #ifdef CL_VERSION_1_2
2013 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
2014 #else
2015 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2016 #endif
2017 
2018 
profilingTimerResolution() const2019 size_t Device::profilingTimerResolution() const
2020 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
2021 
getDefault()2022 const Device& Device::getDefault()
2023 {
2024     auto& c = OpenCLExecutionContext::getCurrent();
2025     if (!c.empty())
2026     {
2027         return c.getDevice();
2028     }
2029 
2030     static Device dummy;
2031     return dummy;
2032 }
2033 
2034 ////////////////////////////////////// Context ///////////////////////////////////////////////////
2035 
2036 template <typename Functor, typename ObjectType>
getStringInfo(Functor f,ObjectType obj,cl_uint name,std::string & param)2037 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2038 {
2039     ::size_t required;
2040     cl_int err = f(obj, name, 0, NULL, &required);
2041     if (err != CL_SUCCESS)
2042         return err;
2043 
2044     param.clear();
2045     if (required > 0)
2046     {
2047         AutoBuffer<char> buf(required + 1);
2048         char* ptr = buf.data(); // cleanup is not needed
2049         err = f(obj, name, required, ptr, NULL);
2050         if (err != CL_SUCCESS)
2051             return err;
2052         param = ptr;
2053     }
2054 
2055     return CL_SUCCESS;
2056 }
2057 
split(const std::string & s,char delim,std::vector<std::string> & elems)2058 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2059 {
2060     elems.clear();
2061     if (s.size() == 0)
2062         return;
2063     std::istringstream ss(s);
2064     std::string item;
2065     while (!ss.eof())
2066     {
2067         std::getline(ss, item, delim);
2068         elems.push_back(item);
2069     }
2070 }
2071 
2072 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2073 // Sample: AMD:GPU:
2074 // Sample: AMD:GPU:Tahiti
2075 // Sample: :GPU|CPU: = '' = ':' = '::'
parseOpenCLDeviceConfiguration(const std::string & configurationStr,std::string & platform,std::vector<std::string> & deviceTypes,std::string & deviceNameOrID)2076 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2077         std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2078 {
2079     std::vector<std::string> parts;
2080     split(configurationStr, ':', parts);
2081     if (parts.size() > 3)
2082     {
2083         CV_LOG_ERROR(NULL, "OpenCL: Invalid configuration string for OpenCL device: " << configurationStr);
2084         return false;
2085     }
2086     if (parts.size() > 2)
2087         deviceNameOrID = parts[2];
2088     if (parts.size() > 1)
2089     {
2090         split(parts[1], '|', deviceTypes);
2091     }
2092     if (parts.size() > 0)
2093     {
2094         platform = parts[0];
2095     }
2096     return true;
2097 }
2098 
2099 #if defined WINRT || defined _WIN32_WCE
selectOpenCLDevice(const char * configuration=NULL)2100 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2101 {
2102     CV_UNUSED(configuration)
2103     return NULL;
2104 }
2105 #else
selectOpenCLDevice(const char * configuration=NULL)2106 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2107 {
2108     std::string platform, deviceName;
2109     std::vector<std::string> deviceTypes;
2110 
2111     if (!configuration)
2112         configuration = getenv("OPENCV_OPENCL_DEVICE");
2113 
2114     if (configuration &&
2115             (strcmp(configuration, "disabled") == 0 ||
2116              !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
2117             ))
2118         return NULL;
2119 
2120     bool isID = false;
2121     int deviceID = -1;
2122     if (deviceName.length() == 1)
2123     // We limit ID range to 0..9, because we want to write:
2124     // - '2500' to mean i5-2500
2125     // - '8350' to mean AMD FX-8350
2126     // - '650' to mean GeForce 650
2127     // To extend ID range change condition to '> 0'
2128     {
2129         isID = true;
2130         for (size_t i = 0; i < deviceName.length(); i++)
2131         {
2132             if (!isdigit(deviceName[i]))
2133             {
2134                 isID = false;
2135                 break;
2136             }
2137         }
2138         if (isID)
2139         {
2140             deviceID = atoi(deviceName.c_str());
2141             if (deviceID < 0)
2142                 return NULL;
2143         }
2144     }
2145 
2146     std::vector<cl_platform_id> platforms;
2147     {
2148         cl_uint numPlatforms = 0;
2149         CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
2150 
2151         if (numPlatforms == 0)
2152             return NULL;
2153         platforms.resize((size_t)numPlatforms);
2154         CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
2155         platforms.resize(numPlatforms);
2156     }
2157 
2158     int selectedPlatform = -1;
2159     if (platform.length() > 0)
2160     {
2161         for (size_t i = 0; i < platforms.size(); i++)
2162         {
2163             std::string name;
2164             CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name));
2165             if (name.find(platform) != std::string::npos)
2166             {
2167                 selectedPlatform = (int)i;
2168                 break;
2169             }
2170         }
2171         if (selectedPlatform == -1)
2172         {
2173             CV_LOG_ERROR(NULL, "OpenCL: Can't find OpenCL platform by name: " << platform);
2174             goto not_found;
2175         }
2176     }
2177     if (deviceTypes.size() == 0)
2178     {
2179         if (!isID)
2180         {
2181             deviceTypes.push_back("GPU");
2182             if (configuration)
2183                 deviceTypes.push_back("CPU");
2184         }
2185         else
2186             deviceTypes.push_back("ALL");
2187     }
2188     for (size_t t = 0; t < deviceTypes.size(); t++)
2189     {
2190         int deviceType = 0;
2191         std::string tempStrDeviceType = deviceTypes[t];
2192         std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), details::char_tolower);
2193 
2194         if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2195             deviceType = Device::TYPE_GPU;
2196         else if (tempStrDeviceType == "cpu")
2197             deviceType = Device::TYPE_CPU;
2198         else if (tempStrDeviceType == "accelerator")
2199             deviceType = Device::TYPE_ACCELERATOR;
2200         else if (tempStrDeviceType == "all")
2201             deviceType = Device::TYPE_ALL;
2202         else
2203         {
2204             CV_LOG_ERROR(NULL, "OpenCL: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t]);
2205             goto not_found;
2206         }
2207 
2208         std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2209         for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2210                 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2211                 i++)
2212         {
2213             cl_uint count = 0;
2214             cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2215             if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2216             {
2217                 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
2218             }
2219             if (count == 0)
2220                 continue;
2221             size_t base = devices.size();
2222             devices.resize(base + count);
2223             status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2224             if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2225             {
2226                 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
2227             }
2228         }
2229 
2230         for (size_t i = (isID ? deviceID : 0);
2231              (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2232              i++)
2233         {
2234             std::string name;
2235             CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
2236             cl_bool useGPU = true;
2237             if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2238             {
2239                 cl_bool isIGPU = CL_FALSE;
2240                 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
2241                 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2242             }
2243             if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2244             {
2245                 // TODO check for OpenCL 1.1
2246                 return devices[i];
2247             }
2248         }
2249     }
2250 
2251 not_found:
2252     if (!configuration)
2253         return NULL; // suppress messages on stderr
2254 
2255     std::ostringstream msg;
2256     msg << "ERROR: Requested OpenCL device not found, check configuration: '" << configuration << "'" << std::endl
2257         << "    Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2258         << "    Device types:";
2259     for (size_t t = 0; t < deviceTypes.size(); t++)
2260         msg << ' ' << deviceTypes[t];
2261 
2262     msg << std::endl << "    Device name: " << (deviceName.length() == 0 ? "any" : deviceName);
2263 
2264     CV_LOG_ERROR(NULL, msg.str());
2265     return NULL;
2266 }
2267 #endif
2268 
2269 #ifdef HAVE_OPENCL_SVM
2270 namespace svm {
2271 
2272 enum AllocatorFlags { // don't use first 16 bits
2273         OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
2274         OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
2275         OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
2276         OPENCL_SVM_BUFFER_MASK = 3 << 16,
2277         OPENCL_SVM_BUFFER_MAP = 4 << 16
2278 };
2279 
checkForceSVMUmatUsage()2280 static bool checkForceSVMUmatUsage()
2281 {
2282     static bool initialized = false;
2283     static bool force = false;
2284     if (!initialized)
2285     {
2286         force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
2287         initialized = true;
2288     }
2289     return force;
2290 }
checkDisableSVMUMatUsage()2291 static bool checkDisableSVMUMatUsage()
2292 {
2293     static bool initialized = false;
2294     static bool force = false;
2295     if (!initialized)
2296     {
2297         force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
2298         initialized = true;
2299     }
2300     return force;
2301 }
checkDisableSVM()2302 static bool checkDisableSVM()
2303 {
2304     static bool initialized = false;
2305     static bool force = false;
2306     if (!initialized)
2307     {
2308         force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
2309         initialized = true;
2310     }
2311     return force;
2312 }
2313 // see SVMCapabilities
getSVMCapabilitiesMask()2314 static unsigned int getSVMCapabilitiesMask()
2315 {
2316     static bool initialized = false;
2317     static unsigned int mask = 0;
2318     if (!initialized)
2319     {
2320         const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
2321         if (envValue == NULL)
2322         {
2323             return ~0U; // all bits 1
2324         }
2325         mask = atoi(envValue);
2326         initialized = true;
2327     }
2328     return mask;
2329 }
2330 } // namespace
2331 #endif
2332 
getProgramCountLimit()2333 static size_t getProgramCountLimit()
2334 {
2335     static bool initialized = false;
2336     static size_t count = 0;
2337     if (!initialized)
2338     {
2339         count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
2340         initialized = true;
2341     }
2342     return count;
2343 }
2344 
2345 static int g_contextId = 0;
2346 
2347 class OpenCLBufferPoolImpl;
2348 class OpenCLSVMBufferPoolImpl;
2349 
2350 struct Context::Impl
2351 {
getcv::ocl::Context::Impl2352     static Context::Impl* get(Context& context) { return context.p; }
2353 
2354     typedef std::deque<Context::Impl*> container_t;
getGlobalContainercv::ocl::Context::Impl2355     static container_t& getGlobalContainer()
2356     {
2357         // never delete this container (Impl lifetime is greater due to TLS storage)
2358         static container_t* g_contexts = new container_t();
2359         return *g_contexts;
2360     }
2361 
2362 protected:
Implcv::ocl::Context::Impl2363     Impl(const std::string& configuration_)
2364         : refcount(1)
2365         , contextId(CV_XADD(&g_contextId, 1))
2366         , configuration(configuration_)
2367         , handle(0)
2368 #ifdef HAVE_OPENCL_SVM
2369         , svmInitialized(false)
2370 #endif
2371     {
2372         if (!haveOpenCL())
2373             CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
2374 
2375         cv::AutoLock lock(cv::getInitializationMutex());
2376         auto& container = getGlobalContainer();
2377         container.resize(std::max(container.size(), (size_t)contextId + 1));
2378         container[contextId] = this;
2379     }
2380 
~Implcv::ocl::Context::Impl2381     ~Impl()
2382     {
2383 #ifdef _WIN32
2384         if (!cv::__termination)
2385 #endif
2386         {
2387             if (handle)
2388             {
2389                 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2390                 handle = NULL;
2391             }
2392             devices.clear();
2393         }
2394 
2395         userContextStorage.clear();
2396 
2397         {
2398             cv::AutoLock lock(cv::getInitializationMutex());
2399             auto& container = getGlobalContainer();
2400             CV_CheckLT((size_t)contextId, container.size(), "");
2401             container[contextId] = NULL;
2402         }
2403     }
2404 
init_device_listcv::ocl::Context::Impl2405     void init_device_list()
2406     {
2407         CV_Assert(handle);
2408 
2409         cl_uint ndevices = 0;
2410         CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_NUM_DEVICES, sizeof(ndevices), &ndevices, NULL));
2411         CV_Assert(ndevices > 0);
2412 
2413         cv::AutoBuffer<cl_device_id> cl_devices(ndevices);
2414         size_t devices_ret_size = 0;
2415         CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_DEVICES, cl_devices.size() * sizeof(cl_device_id), &cl_devices[0], &devices_ret_size));
2416         CV_CheckEQ(devices_ret_size, cl_devices.size() * sizeof(cl_device_id), "");
2417 
2418         devices.clear();
2419         for (unsigned i = 0; i < ndevices; i++)
2420         {
2421             devices.emplace_back(Device::fromHandle(cl_devices[i]));
2422         }
2423     }
2424 
2425     void __init_buffer_pools();  // w/o synchronization
_init_buffer_poolscv::ocl::Context::Impl2426     void _init_buffer_pools() const
2427     {
2428         if (!bufferPool_)
2429         {
2430             cv::AutoLock lock(cv::getInitializationMutex());
2431             if (!bufferPool_)
2432             {
2433                 const_cast<Impl*>(this)->__init_buffer_pools();
2434             }
2435         }
2436     }
2437 public:
findContextcv::ocl::Context::Impl2438     static Impl* findContext(const std::string& configuration)
2439     {
2440         CV_TRACE_FUNCTION();
2441         cv::AutoLock lock(cv::getInitializationMutex());
2442         auto& container = getGlobalContainer();
2443         if (configuration.empty() && !container.empty())
2444             return container[0];
2445         for (auto it = container.begin(); it != container.end(); ++it)
2446         {
2447             Impl* i = *it;
2448             if (i && i->configuration == configuration)
2449             {
2450                 return i;
2451             }
2452         }
2453         return NULL;
2454     }
2455 
findOrCreateContextcv::ocl::Context::Impl2456     static Impl* findOrCreateContext(const std::string& configuration_)
2457     {
2458         CV_TRACE_FUNCTION();
2459         std::string configuration = configuration_;
2460         if (configuration_.empty())
2461         {
2462             const char* c = getenv("OPENCV_OPENCL_DEVICE");
2463             if (c)
2464                 configuration = c;
2465         }
2466         Impl* impl = findContext(configuration);
2467         if (impl)
2468         {
2469             CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2470             impl->addref();
2471             return impl;
2472         }
2473 
2474         cl_device_id d = selectOpenCLDevice(configuration.empty() ? NULL : configuration.c_str());
2475         if (d == NULL)
2476             return NULL;
2477 
2478         impl = new Impl(configuration);
2479         try
2480         {
2481             impl->createFromDevice(d);
2482             if (impl->handle)
2483                 return impl;
2484             delete impl;
2485             return NULL;
2486         }
2487         catch (...)
2488         {
2489             delete impl;
2490             throw;
2491         }
2492     }
2493 
findOrCreateContextcv::ocl::Context::Impl2494     static Impl* findOrCreateContext(cl_context h)
2495     {
2496         CV_TRACE_FUNCTION();
2497 
2498         CV_Assert(h);
2499 
2500         std::string configuration = cv::format("@ctx-%p", (void*)h);
2501         Impl* impl = findContext(configuration);
2502         if (impl)
2503         {
2504             CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2505             impl->addref();
2506             return impl;
2507         }
2508 
2509         impl = new Impl(configuration);
2510         try
2511         {
2512             CV_OCL_CHECK(clRetainContext(h));
2513             impl->handle = h;
2514             impl->init_device_list();
2515             return impl;
2516         }
2517         catch (...)
2518         {
2519             delete impl;
2520             throw;
2521         }
2522     }
2523 
findOrCreateContextcv::ocl::Context::Impl2524     static Impl* findOrCreateContext(const ocl::Device& device)
2525     {
2526         CV_TRACE_FUNCTION();
2527 
2528         CV_Assert(!device.empty());
2529         cl_device_id d = (cl_device_id)device.ptr();
2530         CV_Assert(d);
2531 
2532         std::string configuration = cv::format("@dev-%p", (void*)d);
2533         Impl* impl = findContext(configuration);
2534         if (impl)
2535         {
2536             CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2537             impl->addref();
2538             return impl;
2539         }
2540 
2541         impl = new Impl(configuration);
2542         try
2543         {
2544             impl->createFromDevice(d);
2545             CV_Assert(impl->handle);
2546             return impl;
2547         }
2548         catch (...)
2549         {
2550             delete impl;
2551             throw;
2552         }
2553     }
2554 
setDefaultcv::ocl::Context::Impl2555     void setDefault()
2556     {
2557         CV_TRACE_FUNCTION();
2558         cl_device_id d = selectOpenCLDevice();
2559 
2560         if (d == NULL)
2561             return;
2562 
2563         createFromDevice(d);
2564     }
2565 
createFromDevicecv::ocl::Context::Impl2566     void createFromDevice(cl_device_id d)
2567     {
2568         CV_TRACE_FUNCTION();
2569         CV_Assert(handle == NULL);
2570 
2571         cl_platform_id pl = NULL;
2572         CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
2573 
2574         cl_context_properties prop[] =
2575         {
2576             CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2577             0
2578         };
2579 
2580         // !!! in the current implementation force the number of devices to 1 !!!
2581         cl_uint nd = 1;
2582         cl_int status;
2583 
2584         handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2585         CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
2586 
2587         bool ok = handle != 0 && status == CL_SUCCESS;
2588         if( ok )
2589         {
2590             devices.resize(nd);
2591             devices[0].set(d);
2592         }
2593         else
2594             handle = NULL;
2595     }
2596 
2597     Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2598 
unloadProgcv::ocl::Context::Impl2599     void unloadProg(Program& prog)
2600     {
2601         cv::AutoLock lock(program_cache_mutex);
2602         for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2603         {
2604               phash_t::iterator it = phash.find(*i);
2605               if (it != phash.end())
2606               {
2607                   if (it->second.ptr() == prog.ptr())
2608                   {
2609                       phash.erase(*i);
2610                       cacheList.erase(i);
2611                       return;
2612                   }
2613               }
2614         }
2615     }
2616 
getPrefixStringcv::ocl::Context::Impl2617     std::string& getPrefixString()
2618     {
2619         if (prefix.empty())
2620         {
2621             cv::AutoLock lock(program_cache_mutex);
2622             if (prefix.empty())
2623             {
2624                 CV_Assert(!devices.empty());
2625                 const Device& d = devices[0];
2626                 int bits = d.addressBits();
2627                 if (bits > 0 && bits != 64)
2628                     prefix = cv::format("%d-bit--", bits);
2629                 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2630                 // sanitize chars
2631                 for (size_t i = 0; i < prefix.size(); i++)
2632                 {
2633                     char c = prefix[i];
2634                     if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2635                     {
2636                         prefix[i] = '_';
2637                     }
2638                 }
2639             }
2640         }
2641         return prefix;
2642     }
2643 
getPrefixBasecv::ocl::Context::Impl2644     std::string& getPrefixBase()
2645     {
2646         if (prefix_base.empty())
2647         {
2648             cv::AutoLock lock(program_cache_mutex);
2649             if (prefix_base.empty())
2650             {
2651                 const Device& d = devices[0];
2652                 int bits = d.addressBits();
2653                 if (bits > 0 && bits != 64)
2654                     prefix_base = cv::format("%d-bit--", bits);
2655                 prefix_base += d.vendorName() + "--" + d.name() + "--";
2656                 // sanitize chars
2657                 for (size_t i = 0; i < prefix_base.size(); i++)
2658                 {
2659                     char c = prefix_base[i];
2660                     if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2661                     {
2662                         prefix_base[i] = '_';
2663                     }
2664                 }
2665             }
2666         }
2667         return prefix_base;
2668     }
2669 
2670     IMPLEMENT_REFCOUNTABLE();
2671 
2672     const int contextId;  // global unique ID
2673     const std::string configuration;
2674 
2675     cl_context handle;
2676     std::vector<Device> devices;
2677 
2678     std::string prefix;
2679     std::string prefix_base;
2680 
2681     cv::Mutex program_cache_mutex;
2682     typedef std::map<std::string, Program> phash_t;
2683     phash_t phash;
2684     typedef std::list<cv::String> CacheList;
2685     CacheList cacheList;
2686 
2687     std::shared_ptr<OpenCLBufferPoolImpl> bufferPool_;
2688     std::shared_ptr<OpenCLBufferPoolImpl> bufferPoolHostPtr_;
getBufferPoolcv::ocl::Context::Impl2689     OpenCLBufferPoolImpl& getBufferPool() const
2690     {
2691         _init_buffer_pools();
2692         CV_DbgAssert(bufferPool_);
2693         return *bufferPool_.get();
2694     }
getBufferPoolHostPtrcv::ocl::Context::Impl2695     OpenCLBufferPoolImpl& getBufferPoolHostPtr() const
2696     {
2697         _init_buffer_pools();
2698         CV_DbgAssert(bufferPoolHostPtr_);
2699         return *bufferPoolHostPtr_.get();
2700     }
2701 
2702     std::map<std::type_index, std::shared_ptr<UserContext>> userContextStorage;
2703     cv::Mutex userContextMutex;
setUserContextcv::ocl::Context::Impl2704     void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext) {
2705         cv::AutoLock lock(userContextMutex);
2706         userContextStorage[typeId] = userContext;
2707     }
getUserContextcv::ocl::Context::Impl2708     std::shared_ptr<UserContext> getUserContext(std::type_index typeId) {
2709         cv::AutoLock lock(userContextMutex);
2710         auto it = userContextStorage.find(typeId);
2711         if (it != userContextStorage.end())
2712             return it->second;
2713         else
2714             return nullptr;
2715     }
2716 
2717 #ifdef HAVE_OPENCL_SVM
2718     bool svmInitialized;
2719     bool svmAvailable;
2720     bool svmEnabled;
2721     svm::SVMCapabilities svmCapabilities;
2722     svm::SVMFunctions svmFunctions;
2723 
svmInitcv::ocl::Context::Impl2724     void svmInit()
2725     {
2726         CV_Assert(handle != NULL);
2727         const Device& device = devices[0];
2728         cl_device_svm_capabilities deviceCaps = 0;
2729         CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2730         cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2731         if (status != CL_SUCCESS)
2732         {
2733             CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2734             goto noSVM;
2735         }
2736         CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2737         CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2738         svmCapabilities.value_ =
2739                 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2740                 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2741                 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2742                 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2743         svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2744         if (svmCapabilities.value_ == 0)
2745         {
2746             CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2747             goto noSVM;
2748         }
2749         try
2750         {
2751             // Try OpenCL 2.0
2752             CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2753             void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2754             if (!ptr)
2755             {
2756                 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2757                 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2758             }
2759             try
2760             {
2761                 bool error = false;
2762                 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2763                 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2764                 {
2765                     CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2766                     CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2767                 }
2768                 clFinish(q);
2769                 try
2770                 {
2771                     ((int*)ptr)[0] = 100;
2772                 }
2773                 catch (...)
2774                 {
2775                     CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2776                     error = true;
2777                 }
2778                 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2779                 {
2780                     CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2781                     CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2782                 }
2783                 clFinish(q);
2784                 if (error)
2785                 {
2786                     CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2787                 }
2788             }
2789             catch (...)
2790             {
2791                 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2792                 clSVMFree(handle, ptr);
2793                 throw;
2794             }
2795             clSVMFree(handle, ptr);
2796             svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2797             svmFunctions.fn_clSVMFree = clSVMFree;
2798             svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2799             //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2800             //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2801             svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2802             svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2803             svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2804             svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2805         }
2806         catch (...)
2807         {
2808             CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2809             try
2810             {
2811                 // Try HSA extension
2812                 String extensions = device.extensions();
2813                 if (extensions.find("cl_amd_svm") == String::npos)
2814                 {
2815                     CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2816                     goto noSVM;
2817                 }
2818                 cl_platform_id p = NULL;
2819                 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2820                 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2821                 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2822                 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2823                 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2824                 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2825                 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2826                 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2827                 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2828                 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2829                 CV_Assert(svmFunctions.isValid());
2830             }
2831             catch (...)
2832             {
2833                 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2834                 goto noSVM;
2835             }
2836         }
2837 
2838         svmAvailable = true;
2839         svmEnabled = !svm::checkDisableSVM();
2840         svmInitialized = true;
2841         CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2842         return;
2843     noSVM:
2844         CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2845         svmAvailable = false;
2846         svmEnabled = false;
2847         svmCapabilities.value_ = 0;
2848         svmInitialized = true;
2849         svmFunctions.fn_clSVMAlloc = NULL;
2850         return;
2851     }
2852 
2853     std::shared_ptr<OpenCLSVMBufferPoolImpl> bufferPoolSVM_;
2854 
getBufferPoolSVMcv::ocl::Context::Impl2855     OpenCLSVMBufferPoolImpl& getBufferPoolSVM() const
2856     {
2857         _init_buffer_pools();
2858         CV_DbgAssert(bufferPoolSVM_);
2859         return *bufferPoolSVM_.get();
2860     }
2861 #endif
2862 
2863     friend class Program;
2864 };
2865 
2866 
Context()2867 Context::Context() CV_NOEXCEPT
2868 {
2869     p = 0;
2870 }
2871 
~Context()2872 Context::~Context()
2873 {
2874     release();
2875 }
2876 
2877 // deprecated
Context(int dtype)2878 Context::Context(int dtype)
2879 {
2880     p = 0;
2881     create(dtype);
2882 }
2883 
release()2884 void Context::release()
2885 {
2886     if (p)
2887     {
2888         p->release();
2889         p = NULL;
2890     }
2891 }
2892 
create()2893 bool Context::create()
2894 {
2895     release();
2896     if (!haveOpenCL())
2897         return false;
2898     p = Impl::findOrCreateContext(std::string());
2899     if (p && p->handle)
2900         return true;
2901     release();
2902     return false;
2903 }
2904 
2905 // deprecated
create(int dtype)2906 bool Context::create(int dtype)
2907 {
2908     if( !haveOpenCL() )
2909         return false;
2910     release();
2911     if (dtype == CL_DEVICE_TYPE_DEFAULT || (unsigned)dtype == (unsigned)CL_DEVICE_TYPE_ALL)
2912     {
2913         p = Impl::findOrCreateContext("");
2914     }
2915     else if (dtype == CL_DEVICE_TYPE_GPU)
2916     {
2917         p = Impl::findOrCreateContext(":GPU:");
2918     }
2919     else if (dtype == CL_DEVICE_TYPE_CPU)
2920     {
2921         p = Impl::findOrCreateContext(":CPU:");
2922     }
2923     else
2924     {
2925         CV_LOG_ERROR(NULL, "OpenCL: Can't recognize OpenCV device type=" << dtype);
2926     }
2927     if (p && !p->handle)
2928     {
2929         release();
2930     }
2931     return p != 0;
2932 }
2933 
Context(const Context & c)2934 Context::Context(const Context& c)
2935 {
2936     p = (Impl*)c.p;
2937     if(p)
2938         p->addref();
2939 }
2940 
operator =(const Context & c)2941 Context& Context::operator = (const Context& c)
2942 {
2943     Impl* newp = (Impl*)c.p;
2944     if(newp)
2945         newp->addref();
2946     if(p)
2947         p->release();
2948     p = newp;
2949     return *this;
2950 }
2951 
Context(Context && c)2952 Context::Context(Context&& c) CV_NOEXCEPT
2953 {
2954     p = c.p;
2955     c.p = nullptr;
2956 }
2957 
operator =(Context && c)2958 Context& Context::operator = (Context&& c) CV_NOEXCEPT
2959 {
2960     if (this != &c) {
2961         if(p)
2962             p->release();
2963         p = c.p;
2964         c.p = nullptr;
2965     }
2966     return *this;
2967 }
2968 
ptr() const2969 void* Context::ptr() const
2970 {
2971     return p == NULL ? NULL : p->handle;
2972 }
2973 
ndevices() const2974 size_t Context::ndevices() const
2975 {
2976     return p ? p->devices.size() : 0;
2977 }
2978 
device(size_t idx) const2979 Device& Context::device(size_t idx) const
2980 {
2981     static Device dummy;
2982     return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2983 }
2984 
getDefault(bool initialize)2985 Context& Context::getDefault(bool initialize)
2986 {
2987     auto& c = OpenCLExecutionContext::getCurrent();
2988     if (!c.empty())
2989     {
2990         auto& ctx = c.getContext();
2991         return ctx;
2992     }
2993 
2994     CV_UNUSED(initialize);
2995     static Context dummy;
2996     return dummy;
2997 }
2998 
getProg(const ProgramSource & prog,const String & buildopts,String & errmsg)2999 Program Context::getProg(const ProgramSource& prog,
3000                          const String& buildopts, String& errmsg)
3001 {
3002     return p ? p->getProg(prog, buildopts, errmsg) : Program();
3003 }
3004 
unloadProg(Program & prog)3005 void Context::unloadProg(Program& prog)
3006 {
3007     if (p)
3008         p->unloadProg(prog);
3009 }
3010 
3011 /* static */
fromHandle(void * context)3012 Context Context::fromHandle(void* context)
3013 {
3014     Context ctx;
3015     ctx.p = Impl::findOrCreateContext((cl_context)context);
3016     return ctx;
3017 }
3018 
3019 /* static */
fromDevice(const ocl::Device & device)3020 Context Context::fromDevice(const ocl::Device& device)
3021 {
3022     Context ctx;
3023     ctx.p = Impl::findOrCreateContext(device);
3024     return ctx;
3025 }
3026 
3027 /* static */
create(const std::string & configuration)3028 Context Context::create(const std::string& configuration)
3029 {
3030     Context ctx;
3031     ctx.p = Impl::findOrCreateContext(configuration);
3032     return ctx;
3033 }
3034 
getOpenCLContextProperty(int propertyId) const3035 void* Context::getOpenCLContextProperty(int propertyId) const
3036 {
3037     if (p == NULL)
3038         return nullptr;
3039     ::size_t size = 0;
3040     CV_OCL_CHECK(clGetContextInfo(p->handle, CL_CONTEXT_PROPERTIES, 0, NULL, &size));
3041     std::vector<cl_context_properties> prop(size / sizeof(cl_context_properties), (cl_context_properties)0);
3042     CV_OCL_CHECK(clGetContextInfo(p->handle, CL_CONTEXT_PROPERTIES, size, prop.data(), NULL));
3043     for (size_t i = 0; i < prop.size(); i += 2)
3044     {
3045         if (prop[i] == (cl_context_properties)propertyId)
3046         {
3047             CV_LOG_DEBUG(NULL, "OpenCL: found context property=" << propertyId << ") => " << (void*)prop[i + 1]);
3048             return (void*)prop[i + 1];
3049         }
3050     }
3051     return nullptr;
3052 }
3053 
3054 #ifdef HAVE_OPENCL_SVM
useSVM() const3055 bool Context::useSVM() const
3056 {
3057     Context::Impl* i = p;
3058     CV_Assert(i);
3059     if (!i->svmInitialized)
3060         i->svmInit();
3061     return i->svmEnabled;
3062 }
setUseSVM(bool enabled)3063 void Context::setUseSVM(bool enabled)
3064 {
3065     Context::Impl* i = p;
3066     CV_Assert(i);
3067     if (!i->svmInitialized)
3068         i->svmInit();
3069     if (enabled && !i->svmAvailable)
3070     {
3071         CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
3072     }
3073     i->svmEnabled = enabled;
3074 }
3075 #else
useSVM() const3076 bool Context::useSVM() const { return false; }
setUseSVM(bool enabled)3077 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
3078 #endif
3079 
3080 #ifdef HAVE_OPENCL_SVM
3081 namespace svm {
3082 
getSVMCapabilitites(const ocl::Context & context)3083 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
3084 {
3085     Context::Impl* i = context.p;
3086     CV_Assert(i);
3087     if (!i->svmInitialized)
3088         i->svmInit();
3089     return i->svmCapabilities;
3090 }
3091 
getSVMFunctions(const ocl::Context & context)3092 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
3093 {
3094     Context::Impl* i = context.p;
3095     CV_Assert(i);
3096     CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
3097     CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
3098     return &i->svmFunctions;
3099 }
3100 
useSVM(UMatUsageFlags usageFlags)3101 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
3102 {
3103     if (checkForceSVMUmatUsage())
3104         return true;
3105     if (checkDisableSVMUMatUsage())
3106         return false;
3107     if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
3108         return true;
3109     return false; // don't use SVM by default
3110 }
3111 
3112 } // namespace cv::ocl::svm
3113 #endif // HAVE_OPENCL_SVM
3114 
~UserContext()3115 Context::UserContext::~UserContext()
3116 {
3117 }
3118 
setUserContext(std::type_index typeId,const std::shared_ptr<Context::UserContext> & userContext)3119 void Context::setUserContext(std::type_index typeId, const std::shared_ptr<Context::UserContext>& userContext)
3120 {
3121     CV_Assert(p);
3122     p->setUserContext(typeId, userContext);
3123 }
3124 
getUserContext(std::type_index typeId)3125 std::shared_ptr<Context::UserContext> Context::getUserContext(std::type_index typeId)
3126 {
3127     CV_Assert(p);
3128     return p->getUserContext(typeId);
3129 }
3130 
get_platform_name(cl_platform_id id,String & name)3131 static void get_platform_name(cl_platform_id id, String& name)
3132 {
3133     // get platform name string length
3134     size_t sz = 0;
3135     CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
3136 
3137     // get platform name string
3138     AutoBuffer<char> buf(sz + 1);
3139     CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
3140 
3141     // just in case, ensure trailing zero for ASCIIZ string
3142     buf[sz] = 0;
3143 
3144     name = buf.data();
3145 }
3146 
3147 /*
3148 // Attaches OpenCL context to OpenCV
3149 */
attachContext(const String & platformName,void * platformID,void * context,void * deviceID)3150 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
3151 {
3152     auto ctx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3153     ctx.bind();
3154 }
3155 
3156 /* static */
create(const std::string & platformName,void * platformID,void * context,void * deviceID)3157 OpenCLExecutionContext OpenCLExecutionContext::create(
3158         const std::string& platformName, void* platformID, void* context, void* deviceID
3159 )
3160 {
3161     if (!haveOpenCL())
3162         CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
3163 
3164     cl_uint cnt = 0;
3165     CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
3166 
3167     if (cnt == 0)
3168         CV_Error(cv::Error::OpenCLApiCallError, "No OpenCL platform available!");
3169 
3170     std::vector<cl_platform_id> platforms(cnt);
3171 
3172     CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
3173 
3174     bool platformAvailable = false;
3175 
3176     // check if external platformName contained in list of available platforms in OpenCV
3177     for (unsigned int i = 0; i < cnt; i++)
3178     {
3179         String availablePlatformName;
3180         get_platform_name(platforms[i], availablePlatformName);
3181         // external platform is found in the list of available platforms
3182         if (platformName == availablePlatformName)
3183         {
3184             platformAvailable = true;
3185             break;
3186         }
3187     }
3188 
3189     if (!platformAvailable)
3190         CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3191 
3192     // check if platformID corresponds to platformName
3193     String actualPlatformName;
3194     get_platform_name((cl_platform_id)platformID, actualPlatformName);
3195     if (platformName != actualPlatformName)
3196         CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3197 
3198     OpenCLExecutionContext ctx;
3199     ctx.p = std::make_shared<OpenCLExecutionContext::Impl>((cl_platform_id)platformID, (cl_context)context, (cl_device_id)deviceID);
3200     CV_OCL_CHECK(clReleaseContext((cl_context)context));
3201     CV_OCL_CHECK(clReleaseDevice((cl_device_id)deviceID));
3202     return ctx;
3203 }
3204 
initializeContextFromHandle(Context & ctx,void * _platform,void * _context,void * _device)3205 void initializeContextFromHandle(Context& ctx, void* _platform, void* _context, void* _device)
3206 {
3207     // internal call, less checks
3208     cl_platform_id platformID = (cl_platform_id)_platform;
3209     cl_context context = (cl_context)_context;
3210     cl_device_id deviceID = (cl_device_id)_device;
3211 
3212     std::string platformName = PlatformInfo(&platformID).name();
3213 
3214     auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3215     CV_Assert(!clExecCtx.empty());
3216     ctx = clExecCtx.getContext();
3217 }
3218 
3219 /////////////////////////////////////////// Queue /////////////////////////////////////////////
3220 
3221 struct Queue::Impl
3222 {
__initcv::ocl::Queue::Impl3223     inline void __init()
3224     {
3225         refcount = 1;
3226         handle = 0;
3227         isProfilingQueue_ = false;
3228     }
3229 
Implcv::ocl::Queue::Impl3230     Impl(cl_command_queue q)
3231     {
3232         __init();
3233         handle = q;
3234 
3235         cl_command_queue_properties props = 0;
3236         CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
3237         isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
3238     }
3239 
Implcv::ocl::Queue::Impl3240     Impl(cl_command_queue q, bool isProfilingQueue)
3241     {
3242         __init();
3243         handle = q;
3244         isProfilingQueue_ = isProfilingQueue;
3245     }
3246 
Implcv::ocl::Queue::Impl3247     Impl(const Context& c, const Device& d, bool withProfiling = false)
3248     {
3249         __init();
3250 
3251         const Context* pc = &c;
3252         cl_context ch = (cl_context)pc->ptr();
3253         if( !ch )
3254         {
3255             pc = &Context::getDefault();
3256             ch = (cl_context)pc->ptr();
3257         }
3258         cl_device_id dh = (cl_device_id)d.ptr();
3259         if( !dh )
3260             dh = (cl_device_id)pc->device(0).ptr();
3261         cl_int retval = 0;
3262         cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
3263         CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
3264         isProfilingQueue_ = withProfiling;
3265     }
3266 
~Implcv::ocl::Queue::Impl3267     ~Impl()
3268     {
3269 #ifdef _WIN32
3270         if (!cv::__termination)
3271 #endif
3272         {
3273             if(handle)
3274             {
3275                 CV_OCL_DBG_CHECK(clFinish(handle));
3276                 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
3277                 handle = NULL;
3278             }
3279         }
3280     }
3281 
getProfilingQueuecv::ocl::Queue::Impl3282     const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
3283     {
3284         if (isProfilingQueue_)
3285             return self;
3286 
3287         if (profiling_queue_.ptr())
3288             return profiling_queue_;
3289 
3290         cl_context ctx = 0;
3291         CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
3292 
3293         cl_device_id device = 0;
3294         CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
3295 
3296         cl_int result = CL_SUCCESS;
3297         cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
3298         cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
3299         CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
3300 
3301         Queue queue;
3302         queue.p = new Impl(q, true);
3303         profiling_queue_ = queue;
3304 
3305         return profiling_queue_;
3306     }
3307 
3308     IMPLEMENT_REFCOUNTABLE();
3309 
3310     cl_command_queue handle;
3311     bool isProfilingQueue_;
3312     cv::ocl::Queue profiling_queue_;
3313 };
3314 
Queue()3315 Queue::Queue() CV_NOEXCEPT
3316 {
3317     p = 0;
3318 }
3319 
Queue(const Context & c,const Device & d)3320 Queue::Queue(const Context& c, const Device& d)
3321 {
3322     p = 0;
3323     create(c, d);
3324 }
3325 
Queue(const Queue & q)3326 Queue::Queue(const Queue& q)
3327 {
3328     p = q.p;
3329     if(p)
3330         p->addref();
3331 }
3332 
operator =(const Queue & q)3333 Queue& Queue::operator = (const Queue& q)
3334 {
3335     Impl* newp = (Impl*)q.p;
3336     if(newp)
3337         newp->addref();
3338     if(p)
3339         p->release();
3340     p = newp;
3341     return *this;
3342 }
3343 
Queue(Queue && q)3344 Queue::Queue(Queue&& q) CV_NOEXCEPT
3345 {
3346     p = q.p;
3347     q.p = nullptr;
3348 }
3349 
operator =(Queue && q)3350 Queue& Queue::operator = (Queue&& q) CV_NOEXCEPT
3351 {
3352     if (this != &q) {
3353         if(p)
3354             p->release();
3355         p = q.p;
3356         q.p = nullptr;
3357     }
3358     return *this;
3359 }
3360 
~Queue()3361 Queue::~Queue()
3362 {
3363     if(p)
3364         p->release();
3365 }
3366 
create(const Context & c,const Device & d)3367 bool Queue::create(const Context& c, const Device& d)
3368 {
3369     if(p)
3370         p->release();
3371     p = new Impl(c, d);
3372     return p->handle != 0;
3373 }
3374 
finish()3375 void Queue::finish()
3376 {
3377     if(p && p->handle)
3378     {
3379         CV_OCL_DBG_CHECK(clFinish(p->handle));
3380     }
3381 }
3382 
getProfilingQueue() const3383 const Queue& Queue::getProfilingQueue() const
3384 {
3385     CV_Assert(p);
3386     return p->getProfilingQueue(*this);
3387 }
3388 
ptr() const3389 void* Queue::ptr() const
3390 {
3391     return p ? p->handle : 0;
3392 }
3393 
getDefault()3394 Queue& Queue::getDefault()
3395 {
3396     auto& c = OpenCLExecutionContext::getCurrent();
3397     if (!c.empty())
3398     {
3399         auto& q = c.getQueue();
3400         return q;
3401     }
3402     static Queue dummy;
3403     return dummy;
3404 }
3405 
getQueue(const Queue & q)3406 static cl_command_queue getQueue(const Queue& q)
3407 {
3408     cl_command_queue qq = (cl_command_queue)q.ptr();
3409     if(!qq)
3410         qq = (cl_command_queue)Queue::getDefault().ptr();
3411     return qq;
3412 }
3413 
3414 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
3415 
KernelArg()3416 KernelArg::KernelArg() CV_NOEXCEPT
3417     : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
3418 {
3419 }
3420 
KernelArg(int _flags,UMat * _m,int _wscale,int _iwscale,const void * _obj,size_t _sz)3421 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
3422     : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
3423 {
3424     CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
3425 }
3426 
Constant(const Mat & m)3427 KernelArg KernelArg::Constant(const Mat& m)
3428 {
3429     CV_Assert(m.isContinuous());
3430     return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
3431 }
3432 
3433 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
3434 
3435 struct Kernel::Impl
3436 {
Implcv::ocl::Kernel::Impl3437     Impl(const char* kname, const Program& prog) :
3438         refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
3439     {
3440         cl_program ph = (cl_program)prog.ptr();
3441         cl_int retval = 0;
3442         name = kname;
3443         if (ph)
3444         {
3445             handle = clCreateKernel(ph, kname, &retval);
3446             CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
3447         }
3448         for( int i = 0; i < MAX_ARRS; i++ )
3449             u[i] = 0;
3450         haveTempDstUMats = false;
3451         haveTempSrcUMats = false;
3452     }
3453 
cleanupUMatscv::ocl::Kernel::Impl3454     void cleanupUMats()
3455     {
3456         for( int i = 0; i < MAX_ARRS; i++ )
3457             if( u[i] )
3458             {
3459                 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
3460                 {
3461                     u[i]->flags |= UMatData::ASYNC_CLEANUP;
3462                     u[i]->currAllocator->deallocate(u[i]);
3463                 }
3464                 u[i] = 0;
3465             }
3466         nu = 0;
3467         haveTempDstUMats = false;
3468         haveTempSrcUMats = false;
3469     }
3470 
addUMatcv::ocl::Kernel::Impl3471     void addUMat(const UMat& m, bool dst)
3472     {
3473         CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
3474         u[nu] = m.u;
3475         CV_XADD(&m.u->urefcount, 1);
3476         nu++;
3477         if(dst && m.u->tempUMat())
3478             haveTempDstUMats = true;
3479         if(m.u->originalUMatData == NULL && m.u->tempUMat())
3480             haveTempSrcUMats = true;  // UMat is created on RAW memory (without proper lifetime management, even from Mat)
3481     }
3482 
3483     /// Preserve image lifetime (while it is specified as Kernel argument)
registerImageArgumentcv::ocl::Kernel::Impl3484     void registerImageArgument(int arg, const Image2D& image)
3485     {
3486         CV_CheckGE(arg, 0, "");
3487         if (arg < (int)shadow_images.size() && shadow_images[arg].ptr() != image.ptr())  // TODO future: replace ptr => impl (more strong check)
3488         {
3489             CV_Check(arg, !isInProgress, "ocl::Kernel: clearing of pending Image2D arguments is not allowed");
3490         }
3491         shadow_images.reserve(MAX_ARRS);
3492         shadow_images.resize(std::max(shadow_images.size(), (size_t)arg + 1));
3493         shadow_images[arg] = image;
3494     }
3495 
finitcv::ocl::Kernel::Impl3496     void finit(cl_event e)
3497     {
3498         CV_UNUSED(e);
3499         cleanupUMats();
3500         isInProgress = false;
3501         release();
3502     }
3503 
3504     bool run(int dims, size_t _globalsize[], size_t _localsize[],
3505             bool sync, int64* timeNS, const Queue& q);
3506 
~Implcv::ocl::Kernel::Impl3507     ~Impl()
3508     {
3509         if(handle)
3510         {
3511             CV_OCL_DBG_CHECK(clReleaseKernel(handle));
3512         }
3513     }
3514 
3515     IMPLEMENT_REFCOUNTABLE();
3516 
3517     cv::String name;
3518     cl_kernel handle;
3519     enum { MAX_ARRS = 16 };
3520     UMatData* u[MAX_ARRS];
3521     bool isInProgress;
3522     bool isAsyncRun;  // true if kernel was scheduled in async mode
3523     int nu;
3524     std::vector<Image2D> shadow_images;
3525     bool haveTempDstUMats;
3526     bool haveTempSrcUMats;
3527 };
3528 
3529 }} // namespace cv::ocl
3530 
3531 extern "C" {
3532 
oclCleanupCallback(cl_event e,cl_int,void * p)3533 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
3534 {
3535     try
3536     {
3537         ((cv::ocl::Kernel::Impl*)p)->finit(e);
3538     }
3539     catch (const cv::Exception& exc)
3540     {
3541         CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
3542     }
3543     catch (const std::exception& exc)
3544     {
3545         CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
3546     }
3547     catch (...)
3548     {
3549         CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
3550     }
3551 }
3552 
3553 }
3554 
3555 namespace cv { namespace ocl {
3556 
Kernel()3557 Kernel::Kernel() CV_NOEXCEPT
3558 {
3559     p = 0;
3560 }
3561 
Kernel(const char * kname,const Program & prog)3562 Kernel::Kernel(const char* kname, const Program& prog)
3563 {
3564     p = 0;
3565     create(kname, prog);
3566 }
3567 
Kernel(const char * kname,const ProgramSource & src,const String & buildopts,String * errmsg)3568 Kernel::Kernel(const char* kname, const ProgramSource& src,
3569                const String& buildopts, String* errmsg)
3570 {
3571     p = 0;
3572     create(kname, src, buildopts, errmsg);
3573 }
3574 
Kernel(const Kernel & k)3575 Kernel::Kernel(const Kernel& k)
3576 {
3577     p = k.p;
3578     if(p)
3579         p->addref();
3580 }
3581 
operator =(const Kernel & k)3582 Kernel& Kernel::operator = (const Kernel& k)
3583 {
3584     Impl* newp = (Impl*)k.p;
3585     if(newp)
3586         newp->addref();
3587     if(p)
3588         p->release();
3589     p = newp;
3590     return *this;
3591 }
3592 
Kernel(Kernel && k)3593 Kernel::Kernel(Kernel&& k) CV_NOEXCEPT
3594 {
3595     p = k.p;
3596     k.p = nullptr;
3597 }
3598 
operator =(Kernel && k)3599 Kernel& Kernel::operator = (Kernel&& k) CV_NOEXCEPT
3600 {
3601     if (this != &k) {
3602         if(p)
3603             p->release();
3604         p = k.p;
3605         k.p = nullptr;
3606     }
3607     return *this;
3608 }
3609 
~Kernel()3610 Kernel::~Kernel()
3611 {
3612     if(p)
3613         p->release();
3614 }
3615 
create(const char * kname,const Program & prog)3616 bool Kernel::create(const char* kname, const Program& prog)
3617 {
3618     if(p)
3619         p->release();
3620     p = new Impl(kname, prog);
3621     if(p->handle == 0)
3622     {
3623         p->release();
3624         p = 0;
3625     }
3626 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
3627     CV_Assert(p);
3628 #endif
3629     return p != 0;
3630 }
3631 
create(const char * kname,const ProgramSource & src,const String & buildopts,String * errmsg)3632 bool Kernel::create(const char* kname, const ProgramSource& src,
3633                     const String& buildopts, String* errmsg)
3634 {
3635     if(p)
3636     {
3637         p->release();
3638         p = 0;
3639     }
3640     String tempmsg;
3641     if( !errmsg ) errmsg = &tempmsg;
3642     const Program prog = Context::getDefault().getProg(src, buildopts, *errmsg);
3643     return create(kname, prog);
3644 }
3645 
ptr() const3646 void* Kernel::ptr() const
3647 {
3648     return p ? p->handle : 0;
3649 }
3650 
empty() const3651 bool Kernel::empty() const
3652 {
3653     return ptr() == 0;
3654 }
3655 
dumpValue(size_t sz,const void * p)3656 static cv::String dumpValue(size_t sz, const void* p)
3657 {
3658     if (sz == 4)
3659         return cv::format("%d / %uu / 0x%08x / %g", *(int*)p, *(int*)p, *(int*)p, *(float*)p);
3660     if (sz == 8)
3661         return cv::format("%lld / %lluu / 0x%16llx / %g", *(long long*)p, *(long long*)p, *(long long*)p, *(double*)p);
3662     return cv::format("%p", p);
3663 }
3664 
set(int i,const void * value,size_t sz)3665 int Kernel::set(int i, const void* value, size_t sz)
3666 {
3667     if (!p || !p->handle)
3668         return -1;
3669     if (i < 0)
3670         return i;
3671     if( i == 0 )
3672         p->cleanupUMats();
3673 
3674     cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
3675     CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%s)", p->name.c_str(), (int)i, (int)sz, dumpValue(sz, value).c_str()).c_str());
3676     if (retval != CL_SUCCESS)
3677         return -1;
3678     return i+1;
3679 }
3680 
set(int i,const Image2D & image2D)3681 int Kernel::set(int i, const Image2D& image2D)
3682 {
3683     cl_mem h = (cl_mem)image2D.ptr();
3684     int res = set(i, &h, sizeof(h));
3685     if (res >= 0)
3686         p->registerImageArgument(i, image2D);
3687     return res;
3688 }
3689 
set(int i,const UMat & m)3690 int Kernel::set(int i, const UMat& m)
3691 {
3692     return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
3693 }
3694 
set(int i,const KernelArg & arg)3695 int Kernel::set(int i, const KernelArg& arg)
3696 {
3697     if( !p || !p->handle )
3698         return -1;
3699     if (i < 0)
3700     {
3701         CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
3702                 p->name.c_str(), (int)i));
3703         return i;
3704     }
3705     if( i == 0 )
3706         p->cleanupUMats();
3707     cl_int status = 0;
3708     if( arg.m )
3709     {
3710         AccessFlag accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : static_cast<AccessFlag>(0)) |
3711                                  ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : static_cast<AccessFlag>(0));
3712         bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
3713         if (ptronly && arg.m->empty())
3714         {
3715             cl_mem h_null = (cl_mem)NULL;
3716             status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3717             CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3718             return i + 1;
3719         }
3720         cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3721 
3722         if (!h)
3723         {
3724             CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3725                     p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3726             p->release();
3727             p = 0;
3728             return -1;
3729         }
3730 
3731 #ifdef HAVE_OPENCL_SVM
3732         if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3733         {
3734             const Context& ctx = Context::getDefault();
3735             const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3736             uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3737             CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3738 #if 1 // TODO
3739             status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3740 #else
3741             status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3742 #endif
3743             CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3744         }
3745         else
3746 #endif
3747         {
3748             status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3749             CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3750         }
3751 
3752         if (ptronly)
3753         {
3754             i++;
3755         }
3756         else if( arg.m->dims <= 2 )
3757         {
3758             UMat2D u2d(*arg.m);
3759             status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3760             CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3761             status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3762             CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3763             i += 3;
3764 
3765             if( !(arg.flags & KernelArg::NO_SIZE) )
3766             {
3767                 int cols = u2d.cols*arg.wscale/arg.iwscale;
3768                 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3769                 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3770                 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3771                 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3772                 i += 2;
3773             }
3774         }
3775         else
3776         {
3777             UMat3D u3d(*arg.m);
3778             status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3779             CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3780             status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3781             CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3782             status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3783             CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3784             i += 4;
3785             if( !(arg.flags & KernelArg::NO_SIZE) )
3786             {
3787                 int cols = u3d.cols*arg.wscale/arg.iwscale;
3788                 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3789                 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3790                 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3791                 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3792                 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3793                 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3794                 i += 3;
3795             }
3796         }
3797         p->addUMat(*arg.m, !!(accessFlags & ACCESS_WRITE));
3798         return i;
3799     }
3800     status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3801     CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3802     return i+1;
3803 }
3804 
run(int dims,size_t _globalsize[],size_t _localsize[],bool sync,const Queue & q)3805 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3806                  bool sync, const Queue& q)
3807 {
3808     if (!p)
3809         return false;
3810 
3811     size_t globalsize[CV_MAX_DIM] = {1,1,1};
3812     size_t total = 1;
3813     CV_Assert(_globalsize != NULL);
3814     for (int i = 0; i < dims; i++)
3815     {
3816         size_t val = _localsize ? _localsize[i] :
3817             dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3818         CV_Assert( val > 0 );
3819         total *= _globalsize[i];
3820         if (_globalsize[i] == 1 && !_localsize)
3821             val = 1;
3822         globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
3823     }
3824     CV_Assert(total > 0);
3825 
3826     return p->run(dims, globalsize, _localsize, sync, NULL, q);
3827 }
3828 
3829 
isRaiseErrorOnReuseAsyncKernel()3830 static bool isRaiseErrorOnReuseAsyncKernel()
3831 {
3832     static bool initialized = false;
3833     static bool value = false;
3834     if (!initialized)
3835     {
3836         value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
3837         initialized = true;
3838     }
3839     return value;
3840 }
3841 
run(int dims,size_t globalsize[],size_t localsize[],bool sync,int64 * timeNS,const Queue & q)3842 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3843         bool sync, int64* timeNS, const Queue& q)
3844 {
3845     CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3846 
3847     if (!handle)
3848     {
3849         CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
3850         return false;
3851     }
3852 
3853     if (isAsyncRun)
3854     {
3855         CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
3856         if (isRaiseErrorOnReuseAsyncKernel())
3857             CV_Assert(0);
3858         return false;  // OpenCV 5.0: raise error
3859     }
3860     isAsyncRun = !sync;
3861 
3862     if (isInProgress)
3863     {
3864         CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
3865         if (isRaiseErrorOnReuseAsyncKernel())
3866             CV_Assert(0);
3867         return false;  // OpenCV 5.0: raise error
3868     }
3869 
3870     cl_command_queue qq = getQueue(q);
3871     if (haveTempDstUMats)
3872         sync = true;
3873     if (haveTempSrcUMats)
3874         sync = true;
3875     if (timeNS)
3876         sync = true;
3877     cl_event asyncEvent = 0;
3878     cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3879                                            NULL, globalsize, localsize, 0, 0,
3880                                            (sync && !timeNS) ? 0 : &asyncEvent);
3881 #if !CV_OPENCL_SHOW_RUN_KERNELS
3882     if (retval != CL_SUCCESS)
3883 #endif
3884     {
3885         cv::String msg = cv::format("clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%zux%zux%zu, localsize=%s) sync=%s", name.c_str(), (int)dims,
3886                         globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3887                         (localsize ? cv::format("%zux%zux%zu", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3888                         sync ? "true" : "false"
3889                         );
3890         if (retval != CL_SUCCESS)
3891         {
3892             msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3893         }
3894 #if CV_OPENCL_TRACE_CHECK
3895         CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3896 #else
3897         printf("%s\n", msg.c_str());
3898         fflush(stdout);
3899 #endif
3900     }
3901     if (sync || retval != CL_SUCCESS)
3902     {
3903         CV_OCL_DBG_CHECK(clFinish(qq));
3904         if (timeNS)
3905         {
3906             if (retval == CL_SUCCESS)
3907             {
3908                 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3909                 cl_ulong startTime, stopTime;
3910                 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3911                 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3912                 *timeNS = (int64)(stopTime - startTime);
3913             }
3914             else
3915             {
3916                 *timeNS = -1;
3917             }
3918         }
3919         cleanupUMats();
3920     }
3921     else
3922     {
3923         addref();
3924         isInProgress = true;
3925         CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3926     }
3927     if (asyncEvent)
3928         CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3929     return retval == CL_SUCCESS;
3930 }
3931 
runTask(bool sync,const Queue & q)3932 bool Kernel::runTask(bool sync, const Queue& q)
3933 {
3934     if(!p || !p->handle || p->isInProgress)
3935         return false;
3936 
3937     cl_command_queue qq = getQueue(q);
3938     cl_event asyncEvent = 0;
3939     cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3940     CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3941     if (sync || retval != CL_SUCCESS)
3942     {
3943         CV_OCL_DBG_CHECK(clFinish(qq));
3944         p->cleanupUMats();
3945     }
3946     else
3947     {
3948         p->addref();
3949         p->isInProgress = true;
3950         CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
3951     }
3952     if (asyncEvent)
3953         CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3954     return retval == CL_SUCCESS;
3955 }
3956 
runProfiling(int dims,size_t globalsize[],size_t localsize[],const Queue & q_)3957 int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
3958 {
3959     CV_Assert(p && p->handle && !p->isInProgress);
3960     Queue q = q_.ptr() ? q_ : Queue::getDefault();
3961     CV_Assert(q.ptr());
3962     q.finish(); // call clFinish() on base queue
3963     Queue profilingQueue = q.getProfilingQueue();
3964     int64 timeNs = -1;
3965     bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
3966     return res ? timeNs : -1;
3967 }
3968 
workGroupSize() const3969 size_t Kernel::workGroupSize() const
3970 {
3971     if(!p || !p->handle)
3972         return 0;
3973     size_t val = 0, retsz = 0;
3974     cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3975     cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
3976     CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
3977     return status == CL_SUCCESS ? val : 0;
3978 }
3979 
preferedWorkGroupSizeMultiple() const3980 size_t Kernel::preferedWorkGroupSizeMultiple() const
3981 {
3982     if(!p || !p->handle)
3983         return 0;
3984     size_t val = 0, retsz = 0;
3985     cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3986     cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
3987     CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
3988     return status == CL_SUCCESS ? val : 0;
3989 }
3990 
compileWorkGroupSize(size_t wsz[]) const3991 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3992 {
3993     if(!p || !p->handle || !wsz)
3994         return 0;
3995     size_t retsz = 0;
3996     cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3997     cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
3998     CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
3999     return status == CL_SUCCESS;
4000 }
4001 
localMemSize() const4002 size_t Kernel::localMemSize() const
4003 {
4004     if(!p || !p->handle)
4005         return 0;
4006     size_t retsz = 0;
4007     cl_ulong val = 0;
4008     cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
4009     cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
4010     CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
4011     return status == CL_SUCCESS ? (size_t)val : 0;
4012 }
4013 
4014 
4015 
4016 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
4017 
4018 struct ProgramSource::Impl
4019 {
4020     IMPLEMENT_REFCOUNTABLE();
4021 
4022     enum KIND {
4023         PROGRAM_SOURCE_CODE = 0,
4024         PROGRAM_BINARIES,
4025         PROGRAM_SPIR,
4026         PROGRAM_SPIRV
4027     } kind_;
4028 
Implcv::ocl::ProgramSource::Impl4029     Impl(const String& src)
4030     {
4031         init(PROGRAM_SOURCE_CODE, cv::String(), cv::String());
4032         initFromSource(src, cv::String());
4033     }
Implcv::ocl::ProgramSource::Impl4034     Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
4035     {
4036         init(PROGRAM_SOURCE_CODE, module, name);
4037         initFromSource(codeStr, codeHash);
4038     }
4039 
4040     /// reset fields
initcv::ocl::ProgramSource::Impl4041     void init(enum KIND kind, const String& module, const String& name)
4042     {
4043         refcount = 1;
4044         kind_ = kind;
4045         module_ = module;
4046         name_ = name;
4047 
4048         sourceAddr_ = NULL;
4049         sourceSize_ = 0;
4050         isHashUpdated = false;
4051     }
4052 
initFromSourcecv::ocl::ProgramSource::Impl4053     void initFromSource(const String& codeStr, const String& codeHash)
4054     {
4055         codeStr_ = codeStr;
4056         sourceHash_ = codeHash;
4057         if (sourceHash_.empty())
4058         {
4059             updateHash();
4060         }
4061         else
4062         {
4063             isHashUpdated = true;
4064         }
4065     }
4066 
updateHashcv::ocl::ProgramSource::Impl4067     void updateHash(const char* hashStr = NULL)
4068     {
4069         if (hashStr)
4070         {
4071             sourceHash_ = cv::String(hashStr);
4072             isHashUpdated = true;
4073             return;
4074         }
4075         uint64 hash = 0;
4076         switch (kind_)
4077         {
4078         case PROGRAM_SOURCE_CODE:
4079             if (sourceAddr_)
4080             {
4081                 CV_Assert(codeStr_.empty());
4082                 hash = crc64(sourceAddr_, sourceSize_); // static storage
4083             }
4084             else
4085             {
4086                 CV_Assert(!codeStr_.empty());
4087                 hash = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
4088             }
4089             break;
4090         case PROGRAM_BINARIES:
4091         case PROGRAM_SPIR:
4092         case PROGRAM_SPIRV:
4093             hash = crc64(sourceAddr_, sourceSize_);
4094             break;
4095         default:
4096             CV_Error(Error::StsInternal, "Internal error");
4097         }
4098         sourceHash_ = cv::format("%08jx", (uintmax_t)hash);
4099         isHashUpdated = true;
4100     }
4101 
Implcv::ocl::ProgramSource::Impl4102     Impl(enum KIND kind,
4103             const String& module, const String& name,
4104             const unsigned char* binary, const size_t size,
4105             const cv::String& buildOptions = cv::String())
4106     {
4107         init(kind, module, name);
4108 
4109         sourceAddr_ = binary;
4110         sourceSize_ = size;
4111 
4112         buildOptions_ = buildOptions;
4113     }
4114 
fromSourceWithStaticLifetimecv::ocl::ProgramSource::Impl4115     static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
4116             const char* sourceCodeStaticStr, const char* hashStaticStr,
4117             const cv::String& buildOptions)
4118     {
4119         ProgramSource result;
4120         result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
4121                 (const unsigned char*)sourceCodeStaticStr, strlen(sourceCodeStaticStr), buildOptions);
4122         result.p->updateHash(hashStaticStr);
4123         return result;
4124     }
4125 
fromBinarycv::ocl::ProgramSource::Impl4126     static ProgramSource fromBinary(const String& module, const String& name,
4127             const unsigned char* binary, const size_t size,
4128             const cv::String& buildOptions)
4129     {
4130         ProgramSource result;
4131         result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
4132         return result;
4133     }
4134 
fromSPIRcv::ocl::ProgramSource::Impl4135     static ProgramSource fromSPIR(const String& module, const String& name,
4136             const unsigned char* binary, const size_t size,
4137             const cv::String& buildOptions)
4138     {
4139         ProgramSource result;
4140         result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
4141         return result;
4142     }
4143 
4144     String module_;
4145     String name_;
4146 
4147     // TODO std::vector<ProgramSource> includes_;
4148     String codeStr_; // PROGRAM_SOURCE_CODE only
4149 
4150     const unsigned char* sourceAddr_;
4151     size_t sourceSize_;
4152 
4153     cv::String buildOptions_;
4154 
4155     String sourceHash_;
4156     bool isHashUpdated;
4157 
4158     friend struct Program::Impl;
4159     friend struct internal::ProgramEntry;
4160     friend struct Context::Impl;
4161 };
4162 
4163 
ProgramSource()4164 ProgramSource::ProgramSource() CV_NOEXCEPT
4165 {
4166     p = 0;
4167 }
4168 
ProgramSource(const String & module,const String & name,const String & codeStr,const String & codeHash)4169 ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
4170 {
4171     p = new Impl(module, name, codeStr, codeHash);
4172 }
4173 
ProgramSource(const char * prog)4174 ProgramSource::ProgramSource(const char* prog)
4175 {
4176     p = new Impl(prog);
4177 }
4178 
ProgramSource(const String & prog)4179 ProgramSource::ProgramSource(const String& prog)
4180 {
4181     p = new Impl(prog);
4182 }
4183 
~ProgramSource()4184 ProgramSource::~ProgramSource()
4185 {
4186     if(p)
4187         p->release();
4188 }
4189 
ProgramSource(const ProgramSource & prog)4190 ProgramSource::ProgramSource(const ProgramSource& prog)
4191 {
4192     p = prog.p;
4193     if(p)
4194         p->addref();
4195 }
4196 
operator =(const ProgramSource & prog)4197 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
4198 {
4199     Impl* newp = (Impl*)prog.p;
4200     if(newp)
4201         newp->addref();
4202     if(p)
4203         p->release();
4204     p = newp;
4205     return *this;
4206 }
4207 
ProgramSource(ProgramSource && prog)4208 ProgramSource::ProgramSource(ProgramSource&& prog) CV_NOEXCEPT
4209 {
4210     p = prog.p;
4211     prog.p = nullptr;
4212 }
4213 
operator =(ProgramSource && prog)4214 ProgramSource& ProgramSource::operator = (ProgramSource&& prog) CV_NOEXCEPT
4215 {
4216     if (this != &prog) {
4217         if(p)
4218             p->release();
4219         p = prog.p;
4220         prog.p = nullptr;
4221     }
4222     return *this;
4223 }
4224 
source() const4225 const String& ProgramSource::source() const
4226 {
4227     CV_Assert(p);
4228     CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
4229     CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
4230     return p->codeStr_;
4231 }
4232 
hash() const4233 ProgramSource::hash_t ProgramSource::hash() const
4234 {
4235     CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
4236 }
4237 
fromBinary(const String & module,const String & name,const unsigned char * binary,const size_t size,const cv::String & buildOptions)4238 ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
4239         const unsigned char* binary, const size_t size,
4240         const cv::String& buildOptions)
4241 {
4242     CV_Assert(binary);
4243     CV_Assert(size > 0);
4244     return Impl::fromBinary(module, name, binary, size, buildOptions);
4245 }
4246 
fromSPIR(const String & module,const String & name,const unsigned char * binary,const size_t size,const cv::String & buildOptions)4247 ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
4248         const unsigned char* binary, const size_t size,
4249         const cv::String& buildOptions)
4250 {
4251     CV_Assert(binary);
4252     CV_Assert(size > 0);
4253     return Impl::fromBinary(module, name, binary, size, buildOptions);
4254 }
4255 
4256 
operator ProgramSource&() const4257 internal::ProgramEntry::operator ProgramSource&() const
4258 {
4259     if (this->pProgramSource == NULL)
4260     {
4261         cv::AutoLock lock(cv::getInitializationMutex());
4262         if (this->pProgramSource == NULL)
4263         {
4264             ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(this->module, this->name, this->programCode, this->programHash, cv::String());
4265             ProgramSource* ptr = new ProgramSource(ps);
4266             const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
4267         }
4268     }
4269     return *this->pProgramSource;
4270 }
4271 
4272 
4273 
4274 /////////////////////////////////////////// Program /////////////////////////////////////////////
4275 
4276 static
joinBuildOptions(const cv::String & a,const cv::String & b)4277 cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
4278 {
4279     if (b.empty())
4280         return a;
4281     if (a.empty())
4282         return b;
4283     if (b[0] == ' ')
4284         return a + b;
4285     return a + (cv::String(" ") + b);
4286 }
4287 
4288 struct Program::Impl
4289 {
4290     IMPLEMENT_REFCOUNTABLE();
4291 
Implcv::ocl::Program::Impl4292     Impl(const ProgramSource& src,
4293          const String& _buildflags, String& errmsg) :
4294          refcount(1),
4295          handle(NULL),
4296          buildflags(_buildflags)
4297     {
4298         const ProgramSource::Impl* src_ = src.getImpl();
4299         CV_Assert(src_);
4300         sourceModule_ = src_->module_;
4301         sourceName_ = src_->name_;
4302         const Context ctx = Context::getDefault();
4303         Device device = ctx.device(0);
4304         if (ctx.ptr() == NULL || device.ptr() == NULL)
4305             return;
4306         buildflags = joinBuildOptions(buildflags, src_->buildOptions_);
4307         if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4308         {
4309             if (device.isAMD())
4310                 buildflags = joinBuildOptions(buildflags, " -D AMD_DEVICE");
4311             else if (device.isIntel())
4312                 buildflags = joinBuildOptions(buildflags, " -D INTEL_DEVICE");
4313             const String param_buildExtraOptions = getBuildExtraOptions();
4314             if (!param_buildExtraOptions.empty())
4315                 buildflags = joinBuildOptions(buildflags, param_buildExtraOptions);
4316         }
4317         compile(ctx, src_, errmsg);
4318     }
4319 
compilecv::ocl::Program::Impl4320     bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4321     {
4322         CV_Assert(ctx.getImpl());
4323         CV_Assert(src_);
4324 
4325         // We don't cache OpenCL binaries
4326         if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
4327         {
4328             CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4329             bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4330             return isLoaded;
4331         }
4332         return compileWithCache(ctx, src_, errmsg);
4333     }
4334 
compileWithCachecv::ocl::Program::Impl4335     bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4336     {
4337         CV_Assert(ctx.getImpl());
4338         CV_Assert(src_);
4339         CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
4340 
4341 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4342         OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
4343         const std::string base_dir = config.prepareCacheDirectoryForContext(
4344                 ctx.getImpl()->getPrefixString(),
4345                 ctx.getImpl()->getPrefixBase()
4346         );
4347         const String& hash_str = src_->sourceHash_;
4348         cv::String fname;
4349         if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
4350         {
4351             CV_Assert(!hash_str.empty());
4352             fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
4353             fname = utils::fs::join(base_dir, fname);
4354         }
4355         const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
4356         if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
4357         {
4358             try
4359             {
4360                 std::vector<char> binaryBuf;
4361                 bool res = false;
4362                 {
4363                     cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4364                     BinaryProgramFile file(fname, hash_str.c_str());
4365                     res = file.read(buildflags, binaryBuf);
4366                 }
4367                 if (res)
4368                 {
4369                     CV_Assert(!binaryBuf.empty());
4370                     CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
4371                     bool isLoaded = createFromBinary(ctx, binaryBuf, errmsg);
4372                     if (isLoaded)
4373                         return true;
4374                 }
4375             }
4376             catch (const cv::Exception& e)
4377             {
4378                 CV_UNUSED(e);
4379                 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
4380             }
4381             catch (...)
4382             {
4383                 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
4384             }
4385         }
4386 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4387         CV_Assert(handle == NULL);
4388         if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4389         {
4390             if (!buildFromSources(ctx, src_, errmsg))
4391             {
4392                 return false;
4393             }
4394         }
4395         else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
4396         {
4397             buildflags = joinBuildOptions(buildflags, " -x spir");
4398             if ((cv::String(" ") + buildflags).find(" -spir-std=") == cv::String::npos)
4399             {
4400                 buildflags = joinBuildOptions(buildflags, " -spir-std=1.2");
4401             }
4402             CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4403             bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4404             if (!isLoaded)
4405                 return false;
4406         }
4407         else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
4408         {
4409             CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
4410         }
4411         else
4412         {
4413             CV_Error(Error::StsInternal, "Internal error");
4414         }
4415         CV_Assert(handle != NULL);
4416 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4417         if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
4418         {
4419             try
4420             {
4421                 std::vector<char> binaryBuf;
4422                 getProgramBinary(binaryBuf);
4423                 {
4424                     cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4425                     BinaryProgramFile file(fname, hash_str.c_str());
4426                     file.write(buildflags, binaryBuf);
4427                 }
4428             }
4429             catch (const cv::Exception& e)
4430             {
4431                 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
4432             }
4433             catch (...)
4434             {
4435                 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
4436             }
4437         }
4438 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4439 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4440         if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4441         {
4442             std::vector<char> binaryBuf;
4443             getProgramBinary(binaryBuf);
4444             if (!binaryBuf.empty())
4445             {
4446                 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4447                 handle = NULL;
4448                 createFromBinary(ctx, binaryBuf, errmsg);
4449             }
4450         }
4451 #endif
4452         return handle != NULL;
4453     }
4454 
dumpBuildLog_cv::ocl::Program::Impl4455     void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
4456     {
4457         AutoBuffer<char, 4096> buffer; buffer[0] = 0;
4458 
4459         size_t retsz = 0;
4460         cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4461                                                   CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
4462         if (log_retval == CL_SUCCESS && retsz > 1)
4463         {
4464             buffer.resize(retsz + 16);
4465             log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4466                                                CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
4467             if (log_retval == CL_SUCCESS)
4468             {
4469                 if (retsz < buffer.size())
4470                     buffer[retsz] = 0;
4471                 else
4472                     buffer[buffer.size() - 1] = 0;
4473             }
4474             else
4475             {
4476                 buffer[0] = 0;
4477             }
4478         }
4479 
4480         errmsg = String(buffer.data());
4481         printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
4482                 sourceModule_.c_str(), sourceName_.c_str(),
4483                 result, getOpenCLErrorString(result),
4484                 buildflags.c_str(), errmsg.c_str());
4485         fflush(stdout);
4486     }
4487 
buildFromSourcescv::ocl::Program::Impl4488     bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4489     {
4490         CV_Assert(src_);
4491         CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
4492         CV_Assert(handle == NULL);
4493         CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
4494                 sourceModule_.c_str(), sourceName_.c_str(),
4495                 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
4496 
4497         CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
4498 
4499         const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
4500         size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
4501         CV_Assert(srcptr != NULL);
4502         CV_Assert(srclen > 0);
4503 
4504         cl_int retval = 0;
4505 
4506         handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
4507         CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
4508         CV_Assert(handle || retval != CL_SUCCESS);
4509         if (handle && retval == CL_SUCCESS)
4510         {
4511             size_t n = ctx.ndevices();
4512             AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
4513             cl_device_id* deviceList = deviceListBuf.data();
4514             for (size_t i = 0; i < n; i++)
4515             {
4516                 deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
4517             }
4518 
4519             retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
4520             CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
4521 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
4522             if (retval != CL_SUCCESS)
4523 #endif
4524             {
4525                 dumpBuildLog_(retval, deviceList, errmsg);
4526 
4527                 // don't remove "retval != CL_SUCCESS" condition here:
4528                 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
4529                 if (retval != CL_SUCCESS && handle)
4530                 {
4531                     CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4532                     handle = NULL;
4533                 }
4534             }
4535 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4536             if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4537             {
4538                 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
4539                 size_t retsz = 0;
4540                 char kernels_buffer[4096] = {0};
4541                 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4542                 if (retsz < sizeof(kernels_buffer))
4543                     kernels_buffer[retsz] = 0;
4544                 else
4545                     kernels_buffer[0] = 0;
4546                 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4547             }
4548 #endif
4549 
4550         }
4551         return handle != NULL;
4552     }
4553 
getProgramBinarycv::ocl::Program::Impl4554     void getProgramBinary(std::vector<char>& buf)
4555     {
4556         CV_Assert(handle);
4557         size_t sz = 0;
4558         CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
4559         buf.resize(sz);
4560         uchar* ptr = (uchar*)&buf[0];
4561         CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
4562     }
4563 
createFromBinarycv::ocl::Program::Impl4564     bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
4565     {
4566         return createFromBinary(ctx, (const unsigned char*)&buf[0], buf.size(), errmsg);
4567     }
4568 
createFromBinarycv::ocl::Program::Impl4569     bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
4570     {
4571         CV_Assert(handle == NULL);
4572         CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
4573         CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
4574 
4575         CV_Assert(binarySize > 0);
4576 
4577         size_t ndevices = (int)ctx.ndevices();
4578         AutoBuffer<cl_device_id> devices_(ndevices);
4579         AutoBuffer<const uchar*> binaryPtrs_(ndevices);
4580         AutoBuffer<size_t> binarySizes_(ndevices);
4581 
4582         cl_device_id* devices = devices_.data();
4583         const uchar** binaryPtrs = binaryPtrs_.data();
4584         size_t* binarySizes = binarySizes_.data();
4585         for (size_t i = 0; i < ndevices; i++)
4586         {
4587             devices[i] = (cl_device_id)ctx.device(i).ptr();
4588             binaryPtrs[i] = binaryAddr;
4589             binarySizes[i] = binarySize;
4590         }
4591 
4592         cl_int result = 0;
4593         handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
4594                                            binarySizes, binaryPtrs, NULL, &result);
4595         if (result != CL_SUCCESS)
4596         {
4597             CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
4598             if (handle)
4599             {
4600                 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4601                 handle = NULL;
4602             }
4603         }
4604         if (!handle)
4605         {
4606             return false;
4607         }
4608         // call clBuildProgram()
4609         {
4610             result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
4611             CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
4612             if (result != CL_SUCCESS)
4613             {
4614                 dumpBuildLog_(result, devices, errmsg);
4615                 if (handle)
4616                 {
4617                     CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4618                     handle = NULL;
4619                 }
4620                 return false;
4621             }
4622         }
4623         // check build status
4624         {
4625             cl_build_status build_status = CL_BUILD_NONE;
4626             size_t retsz = 0;
4627             CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
4628                     sizeof(build_status), &build_status, &retsz));
4629             if (result == CL_SUCCESS)
4630             {
4631                 if (build_status == CL_BUILD_SUCCESS)
4632                 {
4633                     return true;
4634                 }
4635                 else
4636                 {
4637                     CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
4638                     return false;
4639                 }
4640             }
4641             else
4642             {
4643                 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
4644                 if (handle)
4645                 {
4646                     CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4647                     handle = NULL;
4648                 }
4649             }
4650         }
4651 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4652         if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4653         {
4654             CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
4655             size_t retsz = 0;
4656             char kernels_buffer[4096] = {0};
4657             result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4658             if (retsz < sizeof(kernels_buffer))
4659                 kernels_buffer[retsz] = 0;
4660             else
4661                 kernels_buffer[0] = 0;
4662             CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4663         }
4664 #endif
4665         return handle != NULL;
4666     }
4667 
~Implcv::ocl::Program::Impl4668     ~Impl()
4669     {
4670         if( handle )
4671         {
4672 #ifdef _WIN32
4673             if (!cv::__termination)
4674 #endif
4675             {
4676                 clReleaseProgram(handle);
4677             }
4678             handle = NULL;
4679         }
4680     }
4681 
4682     cl_program handle;
4683 
4684     String buildflags;
4685     String sourceModule_;
4686     String sourceName_;
4687 };
4688 
4689 
Program()4690 Program::Program() CV_NOEXCEPT
4691 {
4692     p = 0;
4693 }
4694 
Program(const ProgramSource & src,const String & buildflags,String & errmsg)4695 Program::Program(const ProgramSource& src,
4696         const String& buildflags, String& errmsg)
4697 {
4698     p = 0;
4699     create(src, buildflags, errmsg);
4700 }
4701 
Program(const Program & prog)4702 Program::Program(const Program& prog)
4703 {
4704     p = prog.p;
4705     if(p)
4706         p->addref();
4707 }
4708 
operator =(const Program & prog)4709 Program& Program::operator = (const Program& prog)
4710 {
4711     Impl* newp = (Impl*)prog.p;
4712     if(newp)
4713         newp->addref();
4714     if(p)
4715         p->release();
4716     p = newp;
4717     return *this;
4718 }
4719 
Program(Program && prog)4720 Program::Program(Program&& prog) CV_NOEXCEPT
4721 {
4722     p = prog.p;
4723     prog.p = nullptr;
4724 }
4725 
operator =(Program && prog)4726 Program& Program::operator = (Program&& prog) CV_NOEXCEPT
4727 {
4728     if (this != &prog) {
4729         if(p)
4730             p->release();
4731         p = prog.p;
4732         prog.p = nullptr;
4733     }
4734     return *this;
4735 }
4736 
~Program()4737 Program::~Program()
4738 {
4739     if(p)
4740         p->release();
4741 }
4742 
create(const ProgramSource & src,const String & buildflags,String & errmsg)4743 bool Program::create(const ProgramSource& src,
4744             const String& buildflags, String& errmsg)
4745 {
4746     if(p)
4747     {
4748         p->release();
4749         p = NULL;
4750     }
4751     p = new Impl(src, buildflags, errmsg);
4752     if(!p->handle)
4753     {
4754         p->release();
4755         p = 0;
4756     }
4757     return p != 0;
4758 }
4759 
ptr() const4760 void* Program::ptr() const
4761 {
4762     return p ? p->handle : 0;
4763 }
4764 
4765 #ifndef OPENCV_REMOVE_DEPRECATED_API
source() const4766 const ProgramSource& Program::source() const
4767 {
4768     CV_Error(Error::StsNotImplemented, "Removed API");
4769 }
4770 
read(const String & bin,const String & buildflags)4771 bool Program::read(const String& bin, const String& buildflags)
4772 {
4773     CV_UNUSED(bin); CV_UNUSED(buildflags);
4774     CV_Error(Error::StsNotImplemented, "Removed API");
4775 }
4776 
write(String & bin) const4777 bool Program::write(String& bin) const
4778 {
4779     CV_UNUSED(bin);
4780     CV_Error(Error::StsNotImplemented, "Removed API");
4781 }
4782 
getPrefix() const4783 String Program::getPrefix() const
4784 {
4785     if(!p)
4786         return String();
4787     Context::Impl* ctx_ = Context::getDefault().getImpl();
4788     CV_Assert(ctx_);
4789     return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4790 }
4791 
getPrefix(const String & buildflags)4792 String Program::getPrefix(const String& buildflags)
4793 {
4794         Context::Impl* ctx_ = Context::getDefault().getImpl();
4795         CV_Assert(ctx_);
4796         return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4797 }
4798 #endif // OPENCV_REMOVE_DEPRECATED_API
4799 
getBinary(std::vector<char> & binary) const4800 void Program::getBinary(std::vector<char>& binary) const
4801 {
4802     CV_Assert(p && "Empty program");
4803     p->getProgramBinary(binary);
4804 }
4805 
getProg(const ProgramSource & src,const String & buildflags,String & errmsg)4806 Program Context::Impl::getProg(const ProgramSource& src,
4807                                const String& buildflags, String& errmsg)
4808 {
4809     size_t limit = getProgramCountLimit();
4810     const ProgramSource::Impl* src_ = src.getImpl();
4811     CV_Assert(src_);
4812     String key = cv::format("module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4813             src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4814             getPrefixString().c_str(),
4815             buildflags.c_str());
4816     {
4817         cv::AutoLock lock(program_cache_mutex);
4818         phash_t::iterator it = phash.find(key);
4819         if (it != phash.end())
4820         {
4821             // TODO LRU cache
4822             CacheList::iterator i = std::find(cacheList.begin(), cacheList.end(), key);
4823             if (i != cacheList.end() && i != cacheList.begin())
4824             {
4825                 cacheList.erase(i);
4826                 cacheList.push_front(key);
4827             }
4828             return it->second;
4829         }
4830         { // cleanup program cache
4831             size_t sz = phash.size();
4832             if (limit > 0 && sz >= limit)
4833             {
4834                 static bool warningFlag = false;
4835                 if (!warningFlag)
4836                 {
4837                     printf("\nWARNING: OpenCV-OpenCL:\n"
4838                         "    In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4839                         "    You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4840                     warningFlag = true;
4841                 }
4842                 while (!cacheList.empty())
4843                 {
4844                     size_t c = phash.erase(cacheList.back());
4845                     cacheList.pop_back();
4846                     if (c != 0)
4847                         break;
4848                 }
4849             }
4850         }
4851     }
4852     Program prog(src, buildflags, errmsg);
4853     // Cache result of build failures too (to prevent unnecessary compiler invocations)
4854     {
4855         cv::AutoLock lock(program_cache_mutex);
4856         phash.insert(std::pair<std::string, Program>(key, prog));
4857         cacheList.push_front(key);
4858     }
4859     return prog;
4860 }
4861 
4862 
4863 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4864 
4865 template<typename T>
4866 class OpenCLBufferPool
4867 {
4868 protected:
~OpenCLBufferPool()4869     ~OpenCLBufferPool() { }
4870 public:
4871     virtual T allocate(size_t size) = 0;
4872     virtual void release(T buffer) = 0;
4873 };
4874 
4875 template <typename Derived, typename BufferEntry, typename T>
4876 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4877 {
4878 private:
derived()4879     inline Derived& derived() { return *static_cast<Derived*>(this); }
4880 protected:
4881     Mutex mutex_;
4882 
4883     size_t currentReservedSize;
4884     size_t maxReservedSize;
4885 
4886     std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4887     std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4888 
4889     // synchronized
_findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry & entry,T buffer)4890     bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4891     {
4892         typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4893         for (; i != allocatedEntries_.end(); ++i)
4894         {
4895             BufferEntry& e = *i;
4896             if (e.clBuffer_ == buffer)
4897             {
4898                 entry = e;
4899                 allocatedEntries_.erase(i);
4900                 return true;
4901             }
4902         }
4903         return false;
4904     }
4905 
4906     // synchronized
_findAndRemoveEntryFromReservedList(CV_OUT BufferEntry & entry,const size_t size)4907     bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4908     {
4909         if (reservedEntries_.empty())
4910             return false;
4911         typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4912         typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4913         BufferEntry result;
4914         size_t minDiff = (size_t)(-1);
4915         for (; i != reservedEntries_.end(); ++i)
4916         {
4917             BufferEntry& e = *i;
4918             if (e.capacity_ >= size)
4919             {
4920                 size_t diff = e.capacity_ - size;
4921                 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
4922                 {
4923                     minDiff = diff;
4924                     result_pos = i;
4925                     result = e;
4926                     if (diff == 0)
4927                         break;
4928                 }
4929             }
4930         }
4931         if (result_pos != reservedEntries_.end())
4932         {
4933             //CV_DbgAssert(result == *result_pos);
4934             reservedEntries_.erase(result_pos);
4935             entry = result;
4936             currentReservedSize -= entry.capacity_;
4937             allocatedEntries_.push_back(entry);
4938             return true;
4939         }
4940         return false;
4941     }
4942 
4943     // synchronized
_checkSizeOfReservedEntries()4944     void _checkSizeOfReservedEntries()
4945     {
4946         while (currentReservedSize > maxReservedSize)
4947         {
4948             CV_DbgAssert(!reservedEntries_.empty());
4949             const BufferEntry& entry = reservedEntries_.back();
4950             CV_DbgAssert(currentReservedSize >= entry.capacity_);
4951             currentReservedSize -= entry.capacity_;
4952             derived()._releaseBufferEntry(entry);
4953             reservedEntries_.pop_back();
4954         }
4955     }
4956 
_allocationGranularity(size_t size)4957     inline size_t _allocationGranularity(size_t size)
4958     {
4959         // heuristic values
4960         if (size < 1024*1024)
4961             return 4096;  // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
4962         else if (size < 16*1024*1024)
4963             return 64*1024;
4964         else
4965             return 1024*1024;
4966     }
4967 
4968 public:
OpenCLBufferPoolBaseImpl()4969     OpenCLBufferPoolBaseImpl()
4970         : currentReservedSize(0),
4971           maxReservedSize(0)
4972     {
4973         // nothing
4974     }
~OpenCLBufferPoolBaseImpl()4975     virtual ~OpenCLBufferPoolBaseImpl()
4976     {
4977         freeAllReservedBuffers();
4978         CV_Assert(reservedEntries_.empty());
4979     }
4980 public:
allocate(size_t size)4981     virtual T allocate(size_t size) CV_OVERRIDE
4982     {
4983         AutoLock locker(mutex_);
4984         BufferEntry entry;
4985         if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
4986         {
4987             CV_DbgAssert(size <= entry.capacity_);
4988             LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
4989         }
4990         else
4991         {
4992             derived()._allocateBufferEntry(entry, size);
4993         }
4994         return entry.clBuffer_;
4995     }
release(T buffer)4996     virtual void release(T buffer) CV_OVERRIDE
4997     {
4998         AutoLock locker(mutex_);
4999         BufferEntry entry;
5000         CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
5001         if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
5002         {
5003             derived()._releaseBufferEntry(entry);
5004         }
5005         else
5006         {
5007             reservedEntries_.push_front(entry);
5008             currentReservedSize += entry.capacity_;
5009             _checkSizeOfReservedEntries();
5010         }
5011     }
5012 
getReservedSize() const5013     virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
getMaxReservedSize() const5014     virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
setMaxReservedSize(size_t size)5015     virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
5016     {
5017         AutoLock locker(mutex_);
5018         size_t oldMaxReservedSize = maxReservedSize;
5019         maxReservedSize = size;
5020         if (maxReservedSize < oldMaxReservedSize)
5021         {
5022             typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
5023             for (; i != reservedEntries_.end();)
5024             {
5025                 const BufferEntry& entry = *i;
5026                 if (entry.capacity_ > maxReservedSize / 8)
5027                 {
5028                     CV_DbgAssert(currentReservedSize >= entry.capacity_);
5029                     currentReservedSize -= entry.capacity_;
5030                     derived()._releaseBufferEntry(entry);
5031                     i = reservedEntries_.erase(i);
5032                     continue;
5033                 }
5034                 ++i;
5035             }
5036             _checkSizeOfReservedEntries();
5037         }
5038     }
freeAllReservedBuffers()5039     virtual void freeAllReservedBuffers() CV_OVERRIDE
5040     {
5041         AutoLock locker(mutex_);
5042         typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
5043         for (; i != reservedEntries_.end(); ++i)
5044         {
5045             const BufferEntry& entry = *i;
5046             derived()._releaseBufferEntry(entry);
5047         }
5048         reservedEntries_.clear();
5049         currentReservedSize = 0;
5050     }
5051 };
5052 
5053 struct CLBufferEntry
5054 {
5055     cl_mem clBuffer_;
5056     size_t capacity_;
CLBufferEntrycv::ocl::CLBufferEntry5057     CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
5058 };
5059 
5060 class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
5061 {
5062 public:
5063     typedef struct CLBufferEntry BufferEntry;
5064 protected:
5065     int createFlags_;
5066 public:
OpenCLBufferPoolImpl(int createFlags=0)5067     OpenCLBufferPoolImpl(int createFlags = 0)
5068         : createFlags_(createFlags)
5069     {
5070     }
5071 
_allocateBufferEntry(BufferEntry & entry,size_t size)5072     void _allocateBufferEntry(BufferEntry& entry, size_t size)
5073     {
5074         CV_DbgAssert(entry.clBuffer_ == NULL);
5075         entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
5076         Context& ctx = Context::getDefault();
5077         cl_int retval = CL_SUCCESS;
5078         entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
5079         CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
5080         CV_Assert(entry.clBuffer_ != NULL);
5081         if(retval == CL_SUCCESS)
5082         {
5083             CV_IMPL_ADD(CV_IMPL_OCL);
5084         }
5085         LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
5086                 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
5087         allocatedEntries_.push_back(entry);
5088     }
5089 
_releaseBufferEntry(const BufferEntry & entry)5090     void _releaseBufferEntry(const BufferEntry& entry)
5091     {
5092         CV_Assert(entry.capacity_ != 0);
5093         CV_Assert(entry.clBuffer_ != NULL);
5094         LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
5095                 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
5096         CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
5097     }
5098 };
5099 
5100 #ifdef HAVE_OPENCL_SVM
5101 struct CLSVMBufferEntry
5102 {
5103     void* clBuffer_;
5104     size_t capacity_;
CLSVMBufferEntrycv::ocl::CLSVMBufferEntry5105     CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
5106 };
5107 class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
5108 {
5109 public:
5110     typedef struct CLSVMBufferEntry BufferEntry;
5111 public:
OpenCLSVMBufferPoolImpl()5112     OpenCLSVMBufferPoolImpl()
5113     {
5114     }
5115 
_allocateBufferEntry(BufferEntry & entry,size_t size)5116     void _allocateBufferEntry(BufferEntry& entry, size_t size)
5117     {
5118         CV_DbgAssert(entry.clBuffer_ == NULL);
5119         entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
5120 
5121         Context& ctx = Context::getDefault();
5122         const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5123         bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5124         cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
5125                 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5126 
5127         const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5128         CV_DbgAssert(svmFns->isValid());
5129 
5130         CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
5131         void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
5132         CV_Assert(buf);
5133 
5134         entry.clBuffer_ = buf;
5135         {
5136             CV_IMPL_ADD(CV_IMPL_OCL);
5137         }
5138         LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
5139                 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
5140         allocatedEntries_.push_back(entry);
5141     }
5142 
_releaseBufferEntry(const BufferEntry & entry)5143     void _releaseBufferEntry(const BufferEntry& entry)
5144     {
5145         CV_Assert(entry.capacity_ != 0);
5146         CV_Assert(entry.clBuffer_ != NULL);
5147         LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
5148                 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
5149         Context& ctx = Context::getDefault();
5150         const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5151         CV_DbgAssert(svmFns->isValid());
5152         CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n",  entry.clBuffer_);
5153         svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
5154     }
5155 };
5156 #endif
5157 
5158 
5159 
5160 template <bool readAccess, bool writeAccess>
5161 class AlignedDataPtr
5162 {
5163 protected:
5164     const size_t size_;
5165     uchar* const originPtr_;
5166     const size_t alignment_;
5167     uchar* ptr_;
5168     uchar* allocatedPtr_;
5169 
5170 public:
AlignedDataPtr(uchar * ptr,size_t size,size_t alignment)5171     AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
5172         : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
5173     {
5174         CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5175         CV_DbgAssert(!readAccess || ptr);
5176         if (((size_t)ptr_ & (alignment - 1)) != 0)
5177         {
5178             allocatedPtr_ = new uchar[size_ + alignment - 1];
5179             ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5180             if (readAccess)
5181             {
5182                 memcpy(ptr_, originPtr_, size_);
5183             }
5184         }
5185     }
5186 
getAlignedPtr() const5187     uchar* getAlignedPtr() const
5188     {
5189         CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5190         return ptr_;
5191     }
5192 
~AlignedDataPtr()5193     ~AlignedDataPtr()
5194     {
5195         if (allocatedPtr_)
5196         {
5197             if (writeAccess)
5198             {
5199                 memcpy(originPtr_, ptr_, size_);
5200             }
5201             delete[] allocatedPtr_;
5202             allocatedPtr_ = NULL;
5203         }
5204         ptr_ = NULL;
5205     }
5206 private:
5207     AlignedDataPtr(const AlignedDataPtr&); // disabled
5208     AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
5209 };
5210 
5211 template <bool readAccess, bool writeAccess>
5212 class AlignedDataPtr2D
5213 {
5214 protected:
5215     const size_t size_;
5216     uchar* const originPtr_;
5217     const size_t alignment_;
5218     uchar* ptr_;
5219     uchar* allocatedPtr_;
5220     size_t rows_;
5221     size_t cols_;
5222     size_t step_;
5223 
5224 public:
AlignedDataPtr2D(uchar * ptr,size_t rows,size_t cols,size_t step,size_t alignment,size_t extrabytes=0)5225     AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
5226         : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
5227     {
5228         CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5229         CV_DbgAssert(!readAccess || ptr != NULL);
5230         if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
5231         {
5232             allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
5233             ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5234             if (readAccess)
5235             {
5236                 for (size_t i = 0; i < rows_; i++)
5237                     memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
5238             }
5239         }
5240     }
5241 
getAlignedPtr() const5242     uchar* getAlignedPtr() const
5243     {
5244         CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5245         return ptr_;
5246     }
5247 
~AlignedDataPtr2D()5248     ~AlignedDataPtr2D()
5249     {
5250         if (allocatedPtr_)
5251         {
5252             if (writeAccess)
5253             {
5254                 for (size_t i = 0; i < rows_; i++)
5255                     memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
5256             }
5257             delete[] allocatedPtr_;
5258             allocatedPtr_ = NULL;
5259         }
5260         ptr_ = NULL;
5261     }
5262 private:
5263     AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
5264     AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
5265 };
5266 
5267 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
5268 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
5269 #endif
5270 
5271 
__init_buffer_pools()5272 void Context::Impl::__init_buffer_pools()
5273 {
5274     bufferPool_ = std::make_shared<OpenCLBufferPoolImpl>(0);
5275     OpenCLBufferPoolImpl& bufferPool = *bufferPool_.get();
5276     bufferPoolHostPtr_ = std::make_shared<OpenCLBufferPoolImpl>(CL_MEM_ALLOC_HOST_PTR);
5277     OpenCLBufferPoolImpl& bufferPoolHostPtr = *bufferPoolHostPtr_.get();
5278 
5279     size_t defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
5280     size_t poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
5281     bufferPool.setMaxReservedSize(poolSize);
5282     size_t poolSizeHostPtr = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
5283     bufferPoolHostPtr.setMaxReservedSize(poolSizeHostPtr);
5284 
5285 #ifdef HAVE_OPENCL_SVM
5286     bufferPoolSVM_ = std::make_shared<OpenCLSVMBufferPoolImpl>();
5287     OpenCLSVMBufferPoolImpl& bufferPoolSVM = *bufferPoolSVM_.get();
5288     size_t poolSizeSVM = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
5289     bufferPoolSVM.setMaxReservedSize(poolSizeSVM);
5290 #endif
5291 
5292     CV_LOG_INFO(NULL, "OpenCL: Initializing buffer pool for context@" << contextId << " with max capacity: poolSize=" << poolSize << " poolSizeHostPtr=" << poolSizeHostPtr);
5293 }
5294 
5295 class OpenCLAllocator CV_FINAL : public MatAllocator
5296 {
5297 public:
5298     enum AllocatorFlags
5299     {
5300         ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
5301         ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
5302 #ifdef HAVE_OPENCL_SVM
5303         ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
5304 #endif
5305         ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3  // convertFromBuffer()
5306     };
5307 
OpenCLAllocator()5308     OpenCLAllocator()
5309     {
5310         matStdAllocator = Mat::getDefaultAllocator();
5311     }
~OpenCLAllocator()5312     ~OpenCLAllocator()
5313     {
5314         flushCleanupQueue();
5315     }
5316 
defaultAllocate(int dims,const int * sizes,int type,void * data,size_t * step,AccessFlag flags,UMatUsageFlags usageFlags) const5317     UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
5318             AccessFlag flags, UMatUsageFlags usageFlags) const
5319     {
5320         UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
5321         return u;
5322     }
5323 
isOpenCLMapForced()5324     static bool isOpenCLMapForced()  // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
5325     {
5326         static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
5327         return value;
5328     }
isOpenCLCopyingForced()5329     static bool isOpenCLCopyingForced()  // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
5330     {
5331         static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
5332         return value;
5333     }
5334 
getBestFlags(const Context & ctx,AccessFlag,UMatUsageFlags usageFlags,int & createFlags,UMatData::MemoryFlag & flags0) const5335     void getBestFlags(const Context& ctx, AccessFlag /*flags*/, UMatUsageFlags usageFlags, int& createFlags, UMatData::MemoryFlag& flags0) const
5336     {
5337         const Device& dev = ctx.device(0);
5338         createFlags = 0;
5339         if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
5340             createFlags |= CL_MEM_ALLOC_HOST_PTR;
5341 
5342         if (!isOpenCLCopyingForced() &&
5343             (isOpenCLMapForced() ||
5344                 (dev.hostUnifiedMemory()
5345 #ifndef __APPLE__
5346                 || dev.isIntel()
5347 #endif
5348                 )
5349             )
5350         )
5351             flags0 = static_cast<UMatData::MemoryFlag>(0);
5352         else
5353             flags0 = UMatData::COPY_ON_MAP;
5354     }
5355 
allocate(int dims,const int * sizes,int type,void * data,size_t * step,AccessFlag flags,UMatUsageFlags usageFlags) const5356     UMatData* allocate(int dims, const int* sizes, int type,
5357                        void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5358     {
5359         if(!useOpenCL())
5360             return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5361 
5362         flushCleanupQueue();
5363 
5364         CV_Assert(data == 0);
5365         size_t total = CV_ELEM_SIZE(type);
5366         for( int i = dims-1; i >= 0; i-- )
5367         {
5368             if( step )
5369                 step[i] = total;
5370             total *= sizes[i];
5371         }
5372 
5373         Context& ctx = Context::getDefault();
5374         if (!ctx.getImpl())
5375             return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5376         Context::Impl& ctxImpl = *ctx.getImpl();
5377 
5378         int createFlags = 0;
5379         UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5380         getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
5381 
5382         void* handle = NULL;
5383         int allocatorFlags = 0;
5384 
5385 #ifdef HAVE_OPENCL_SVM
5386         const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5387         if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
5388         {
5389             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
5390             handle = ctxImpl.getBufferPoolSVM().allocate(total);
5391 
5392             // this property is constant, so single buffer pool can be used here
5393             bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5394             allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5395         }
5396         else
5397 #endif
5398         if (createFlags == 0)
5399         {
5400             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
5401             handle = ctxImpl.getBufferPool().allocate(total);
5402         }
5403         else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
5404         {
5405             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
5406             handle = ctxImpl.getBufferPoolHostPtr().allocate(total);
5407         }
5408         else
5409         {
5410             CV_Assert(handle != NULL); // Unsupported, throw
5411         }
5412 
5413         if (!handle)
5414             return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5415 
5416         UMatData* u = new UMatData(this);
5417         u->data = 0;
5418         u->size = total;
5419         u->handle = handle;
5420         u->flags = flags0;
5421         u->allocatorFlags_ = allocatorFlags;
5422         u->allocatorContext = std::static_pointer_cast<void>(std::make_shared<ocl::Context>(ctx));
5423         CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
5424         u->markHostCopyObsolete(true);
5425         opencl_allocator_stats.onAllocate(u->size);
5426         return u;
5427     }
5428 
allocate(UMatData * u,AccessFlag accessFlags,UMatUsageFlags usageFlags) const5429     bool allocate(UMatData* u, AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5430     {
5431         if(!u)
5432             return false;
5433 
5434         flushCleanupQueue();
5435 
5436         UMatDataAutoLock lock(u);
5437 
5438         if(u->handle == 0)
5439         {
5440             CV_Assert(u->origdata != 0);
5441             Context& ctx = Context::getDefault();
5442             int createFlags = 0;
5443             UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5444             getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
5445 
5446             bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
5447 
5448             cl_context ctx_handle = (cl_context)ctx.ptr();
5449             int allocatorFlags = 0;
5450             UMatData::MemoryFlag tempUMatFlags = static_cast<UMatData::MemoryFlag>(0);
5451             void* handle = NULL;
5452             cl_int retval = CL_SUCCESS;
5453 
5454 #ifdef HAVE_OPENCL_SVM
5455             svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5456             bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
5457             if (useSVM && svmCaps.isSupportFineGrainSystem())
5458             {
5459                 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
5460                 tempUMatFlags = UMatData::TEMP_UMAT;
5461                 handle = u->origdata;
5462                 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
5463             }
5464             else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
5465             {
5466                 if (!(accessFlags & ACCESS_FAST)) // memcpy used
5467                 {
5468                     bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5469 
5470                     cl_svm_mem_flags memFlags = createFlags |
5471                             (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5472 
5473                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5474                     CV_DbgAssert(svmFns->isValid());
5475 
5476                     CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
5477                     handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
5478                     CV_Assert(handle);
5479 
5480                     cl_command_queue q = NULL;
5481                     if (!isFineGrainBuffer)
5482                     {
5483                         q = (cl_command_queue)Queue::getDefault().ptr();
5484                         CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
5485                         cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
5486                                 handle, u->size,
5487                                 0, NULL, NULL);
5488                         CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5489 
5490                     }
5491                     memcpy(handle, u->origdata, u->size);
5492                     if (!isFineGrainBuffer)
5493                     {
5494                         CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
5495                         cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
5496                         CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5497                     }
5498 
5499                     tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
5500                     allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
5501                                                 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5502                 }
5503             }
5504             else
5505 #endif
5506             {
5507                 if( copyOnMap )
5508                     accessFlags &= ~ACCESS_FAST;
5509 
5510                 tempUMatFlags = UMatData::TEMP_UMAT;
5511                 if (
5512                 #ifdef __APPLE__
5513                     !copyOnMap &&
5514                 #endif
5515                     CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
5516                     // There are OpenCL runtime issues for less aligned data
5517                     && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
5518                         && u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
5519                     // Avoid sharing of host memory between OpenCL buffers
5520                     && !(u->originalUMatData && u->originalUMatData->handle)
5521                 )
5522                 {
5523                     // Change the host-side origdata[size] to "pinned memory" that enables fast
5524                     // DMA-transfers over PCIe to the device. Often used with clEnqueueMapBuffer/clEnqueueUnmapMemObject
5525                     handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR),
5526                                             u->size, u->origdata, &retval);
5527                     CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR), sz=%lld, origdata=%p) => %p",
5528                             (long long int)u->size, u->origdata, (void*)handle).c_str());
5529                 }
5530                 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
5531                 {
5532                     // Allocate device-side memory and immediately copy data from the host-side pointer origdata[size].
5533                     // If createFlags=CL_MEM_ALLOC_HOST_PTR (aka cv::USAGE_ALLOCATE_HOST_MEMORY), then
5534                     // additionally allocate a host-side "pinned" duplicate of the origdata that is
5535                     // managed by OpenCL. This is potentially faster in unaligned/unmanaged scenarios.
5536                     handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
5537                                                u->size, u->origdata, &retval);
5538                     CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
5539                             (long long int)u->size, u->origdata, (void*)handle).c_str());
5540                     tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
5541                 }
5542             }
5543             CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
5544             if(!handle || retval != CL_SUCCESS)
5545                 return false;
5546             u->handle = handle;
5547             u->prevAllocator = u->currAllocator;
5548             u->currAllocator = this;
5549             u->flags |= tempUMatFlags | flags0;
5550             u->allocatorFlags_ = allocatorFlags;
5551         }
5552         if (!!(accessFlags & ACCESS_WRITE))
5553             u->markHostCopyObsolete(true);
5554         opencl_allocator_stats.onAllocate(u->size);
5555         return true;
5556     }
5557 
5558     /*void sync(UMatData* u) const
5559     {
5560         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5561         UMatDataAutoLock lock(u);
5562 
5563         if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
5564         {
5565             if( u->tempCopiedUMat() )
5566             {
5567                 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5568                                     u->size, u->origdata, 0, 0, 0);
5569             }
5570             else
5571             {
5572                 cl_int retval = 0;
5573                 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5574                                                 (CL_MAP_READ | CL_MAP_WRITE),
5575                                                 0, u->size, 0, 0, 0, &retval);
5576                 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5577                 clFinish(q);
5578             }
5579             u->markHostCopyObsolete(false);
5580         }
5581         else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
5582         {
5583             clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5584                                  u->size, u->data, 0, 0, 0);
5585         }
5586     }*/
5587 
deallocate(UMatData * u) const5588     void deallocate(UMatData* u) const CV_OVERRIDE
5589     {
5590         if(!u)
5591             return;
5592 
5593         CV_Assert(u->urefcount == 0);
5594         CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
5595 
5596         CV_Assert(u->handle != 0);
5597         CV_Assert(u->mapcount == 0);
5598 
5599         if (!!(u->flags & UMatData::ASYNC_CLEANUP))
5600             addToCleanupQueue(u);
5601         else
5602             deallocate_(u);
5603     }
5604 
deallocate_(UMatData * u) const5605     void deallocate_(UMatData* u) const
5606     {
5607         CV_Assert(u);
5608         CV_Assert(u->handle);
5609         if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
5610         {
5611             opencl_allocator_stats.onFree(u->size);
5612         }
5613 
5614 #ifdef _WIN32
5615         if (cv::__termination)  // process is not in consistent state (after ExitProcess call) and terminating
5616             return;             // avoid any OpenCL calls
5617 #endif
5618         if(u->tempUMat())
5619         {
5620             CV_Assert(u->origdata);
5621 //            UMatDataAutoLock lock(u);
5622 
5623             if (u->hostCopyObsolete())
5624             {
5625 #ifdef HAVE_OPENCL_SVM
5626                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5627                 {
5628                     Context& ctx = Context::getDefault();
5629                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5630                     CV_DbgAssert(svmFns->isValid());
5631 
5632                     if( u->tempCopiedUMat() )
5633                     {
5634                         CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5635                                 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
5636                         bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
5637                         cl_command_queue q = NULL;
5638                         if (!isFineGrainBuffer)
5639                         {
5640                             CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
5641                             q = (cl_command_queue)Queue::getDefault().ptr();
5642                             CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5643                             cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5644                                     u->handle, u->size,
5645                                     0, NULL, NULL);
5646                             CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5647                         }
5648                         clFinish(q);
5649                         memcpy(u->origdata, u->handle, u->size);
5650                         if (!isFineGrainBuffer)
5651                         {
5652                             CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5653                             cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5654                             CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5655                         }
5656                     }
5657                     else
5658                     {
5659                         CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
5660                         // nothing
5661                     }
5662                 }
5663                 else
5664 #endif
5665                 {
5666                     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5667                     if( u->tempCopiedUMat() )
5668                     {
5669                         AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5670                         CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5671                                             u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
5672                     }
5673                     else
5674                     {
5675                         cl_int retval = 0;
5676                         if (u->tempUMat())
5677                         {
5678                             CV_Assert(u->mapcount == 0);
5679                             flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
5680                             void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5681                                 (CL_MAP_READ | CL_MAP_WRITE),
5682                                 0, u->size, 0, 0, 0, &retval);
5683                             CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
5684                             CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
5685                             if (u->originalUMatData)
5686                             {
5687                                 CV_Assert(u->originalUMatData->data == data);
5688                             }
5689                             retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5690                             CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
5691                             CV_OCL_DBG_CHECK(clFinish(q));
5692                         }
5693                     }
5694                 }
5695                 u->markHostCopyObsolete(false);
5696             }
5697             else
5698             {
5699                 // nothing
5700             }
5701 #ifdef HAVE_OPENCL_SVM
5702             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5703             {
5704                 if( u->tempCopiedUMat() )
5705                 {
5706                     Context& ctx = Context::getDefault();
5707                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5708                     CV_DbgAssert(svmFns->isValid());
5709 
5710                     CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
5711                     svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
5712                 }
5713             }
5714             else
5715 #endif
5716             {
5717                 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
5718                 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
5719             }
5720             u->handle = 0;
5721             u->markDeviceCopyObsolete(true);
5722             u->currAllocator = u->prevAllocator;
5723             u->prevAllocator = NULL;
5724             if(u->data && u->copyOnMap() && u->data != u->origdata)
5725                 fastFree(u->data);
5726             u->data = u->origdata;
5727             u->currAllocator->deallocate(u);
5728             u = NULL;
5729         }
5730         else
5731         {
5732             CV_Assert(u->origdata == NULL);
5733             if(u->data && u->copyOnMap() && u->data != u->origdata)
5734             {
5735                 fastFree(u->data);
5736                 u->data = 0;
5737                 u->markHostCopyObsolete(true);
5738             }
5739             if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
5740             {
5741                 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5742                 CV_Assert(pCtx);
5743                 ocl::Context& ctx = *pCtx.get();
5744                 CV_Assert(ctx.getImpl());
5745                 ctx.getImpl()->getBufferPool().release((cl_mem)u->handle);
5746             }
5747             else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
5748             {
5749                 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5750                 CV_Assert(pCtx);
5751                 ocl::Context& ctx = *pCtx.get();
5752                 CV_Assert(ctx.getImpl());
5753                 ctx.getImpl()->getBufferPoolHostPtr().release((cl_mem)u->handle);
5754             }
5755 #ifdef HAVE_OPENCL_SVM
5756             else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
5757             {
5758                 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5759                 CV_Assert(pCtx);
5760                 ocl::Context& ctx = *pCtx.get();
5761                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5762                 {
5763                     //nothing
5764                 }
5765                 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5766                         (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5767                 {
5768                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5769                     CV_DbgAssert(svmFns->isValid());
5770                     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5771 
5772                     if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
5773                     {
5774                         CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5775                         cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5776                         CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5777                     }
5778                 }
5779                 CV_Assert(ctx.getImpl());
5780                 ctx.getImpl()->getBufferPoolSVM().release((void*)u->handle);
5781             }
5782 #endif
5783             else
5784             {
5785                 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
5786             }
5787             u->handle = 0;
5788             u->markDeviceCopyObsolete(true);
5789             delete u;
5790             u = NULL;
5791         }
5792         CV_Assert(u == NULL);
5793     }
5794 
5795     // synchronized call (external UMatDataAutoLock, see UMat::getMat)
map(UMatData * u,AccessFlag accessFlags) const5796     void map(UMatData* u, AccessFlag accessFlags) const CV_OVERRIDE
5797     {
5798         CV_Assert(u && u->handle);
5799 
5800         if (!!(accessFlags & ACCESS_WRITE))
5801             u->markDeviceCopyObsolete(true);
5802 
5803         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5804 
5805         {
5806             if( !u->copyOnMap() )
5807             {
5808                 // TODO
5809                 // because there can be other map requests for the same UMat with different access flags,
5810                 // we use the universal (read-write) access mode.
5811 #ifdef HAVE_OPENCL_SVM
5812                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5813                 {
5814                     if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5815                     {
5816                         Context& ctx = Context::getDefault();
5817                         const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5818                         CV_DbgAssert(svmFns->isValid());
5819 
5820                         if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5821                         {
5822                             CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5823                             cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5824                                     u->handle, u->size,
5825                                     0, NULL, NULL);
5826                             CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5827                             u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5828                         }
5829                     }
5830                     clFinish(q);
5831                     u->data = (uchar*)u->handle;
5832                     u->markHostCopyObsolete(false);
5833                     u->markDeviceMemMapped(true);
5834                     return;
5835                 }
5836 #endif
5837 
5838                 cl_int retval = CL_SUCCESS;
5839                 if (!u->deviceMemMapped())
5840                 {
5841                     CV_Assert(u->refcount == 1);
5842                     CV_Assert(u->mapcount++ == 0);
5843                     u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5844                                                          (CL_MAP_READ | CL_MAP_WRITE),
5845                                                          0, u->size, 0, 0, 0, &retval);
5846                     CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5847                 }
5848                 if (u->data && retval == CL_SUCCESS)
5849                 {
5850                     u->markHostCopyObsolete(false);
5851                     u->markDeviceMemMapped(true);
5852                     return;
5853                 }
5854 
5855                 // TODO Is it really a good idea and was it tested well?
5856                 // if map failed, switch to copy-on-map mode for the particular buffer
5857                 u->flags |= UMatData::COPY_ON_MAP;
5858             }
5859 
5860             if(!u->data)
5861             {
5862                 u->data = (uchar*)fastMalloc(u->size);
5863                 u->markHostCopyObsolete(true);
5864             }
5865         }
5866 
5867         if (!!(accessFlags & ACCESS_READ) && u->hostCopyObsolete())
5868         {
5869             AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5870 #ifdef HAVE_OPENCL_SVM
5871             CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5872 #endif
5873             cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5874                     0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5875             CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5876                     (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5877             u->markHostCopyObsolete(false);
5878         }
5879     }
5880 
unmap(UMatData * u) const5881     void unmap(UMatData* u) const CV_OVERRIDE
5882     {
5883         if(!u)
5884             return;
5885 
5886 
5887         CV_Assert(u->handle != 0);
5888 
5889         UMatDataAutoLock autolock(u);
5890 
5891         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5892         cl_int retval = 0;
5893         if( !u->copyOnMap() && u->deviceMemMapped() )
5894         {
5895             CV_Assert(u->data != NULL);
5896 #ifdef HAVE_OPENCL_SVM
5897             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5898             {
5899                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5900                 {
5901                     Context& ctx = Context::getDefault();
5902                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5903                     CV_DbgAssert(svmFns->isValid());
5904 
5905                     CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5906                     {
5907                         CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5908                         cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5909                                 0, NULL, NULL);
5910                         CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5911                         clFinish(q);
5912                         u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5913                     }
5914                 }
5915                 if (u->refcount == 0)
5916                     u->data = 0;
5917                 u->markDeviceCopyObsolete(false);
5918                 u->markHostCopyObsolete(true);
5919                 return;
5920             }
5921 #endif
5922             if (u->refcount == 0)
5923             {
5924                 CV_Assert(u->mapcount-- == 1);
5925                 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
5926                 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
5927                 if (Device::getDefault().isAMD())
5928                 {
5929                     // required for multithreaded applications (see stitching test)
5930                     CV_OCL_DBG_CHECK(clFinish(q));
5931                 }
5932                 u->markDeviceMemMapped(false);
5933                 u->data = 0;
5934                 u->markDeviceCopyObsolete(false);
5935                 u->markHostCopyObsolete(true);
5936             }
5937         }
5938         else if( u->copyOnMap() && u->deviceCopyObsolete() )
5939         {
5940             AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5941 #ifdef HAVE_OPENCL_SVM
5942             CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5943 #endif
5944             retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5945                                 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5946             CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5947                     (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5948             u->markDeviceCopyObsolete(false);
5949             u->markHostCopyObsolete(true);
5950         }
5951     }
5952 
checkContinuous(int dims,const size_t sz[],const size_t srcofs[],const size_t srcstep[],const size_t dstofs[],const size_t dststep[],size_t & total,size_t new_sz[],size_t & srcrawofs,size_t new_srcofs[],size_t new_srcstep[],size_t & dstrawofs,size_t new_dstofs[],size_t new_dststep[]) const5953     bool checkContinuous(int dims, const size_t sz[],
5954                          const size_t srcofs[], const size_t srcstep[],
5955                          const size_t dstofs[], const size_t dststep[],
5956                          size_t& total, size_t new_sz[],
5957                          size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
5958                          size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
5959     {
5960         bool iscontinuous = true;
5961         srcrawofs = srcofs ? srcofs[dims-1] : 0;
5962         dstrawofs = dstofs ? dstofs[dims-1] : 0;
5963         total = sz[dims-1];
5964         for( int i = dims-2; i >= 0; i-- )
5965         {
5966             if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
5967                 iscontinuous = false;
5968             total *= sz[i];
5969             if( srcofs )
5970                 srcrawofs += srcofs[i]*srcstep[i];
5971             if( dstofs )
5972                 dstrawofs += dstofs[i]*dststep[i];
5973         }
5974 
5975         if( !iscontinuous )
5976         {
5977             // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
5978             if( dims == 2 )
5979             {
5980                 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
5981                 // we assume that new_... arrays are initialized by caller
5982                 // with 0's, so there is no else branch
5983                 if( srcofs )
5984                 {
5985                     new_srcofs[0] = srcofs[1];
5986                     new_srcofs[1] = srcofs[0];
5987                     new_srcofs[2] = 0;
5988                 }
5989 
5990                 if( dstofs )
5991                 {
5992                     new_dstofs[0] = dstofs[1];
5993                     new_dstofs[1] = dstofs[0];
5994                     new_dstofs[2] = 0;
5995                 }
5996 
5997                 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
5998                 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
5999             }
6000             else
6001             {
6002                 // we could check for dims == 3 here,
6003                 // but from user perspective this one is more informative
6004                 CV_Assert(dims <= 3);
6005                 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
6006                 if( srcofs )
6007                 {
6008                     new_srcofs[0] = srcofs[2];
6009                     new_srcofs[1] = srcofs[1];
6010                     new_srcofs[2] = srcofs[0];
6011                 }
6012 
6013                 if( dstofs )
6014                 {
6015                     new_dstofs[0] = dstofs[2];
6016                     new_dstofs[1] = dstofs[1];
6017                     new_dstofs[2] = dstofs[0];
6018                 }
6019 
6020                 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
6021                 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
6022             }
6023         }
6024         return iscontinuous;
6025     }
6026 
download(UMatData * u,void * dstptr,int dims,const size_t sz[],const size_t srcofs[],const size_t srcstep[],const size_t dststep[]) const6027     void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
6028                   const size_t srcofs[], const size_t srcstep[],
6029                   const size_t dststep[]) const CV_OVERRIDE
6030     {
6031         if(!u)
6032             return;
6033         UMatDataAutoLock autolock(u);
6034 
6035         if( u->data && !u->hostCopyObsolete() )
6036         {
6037             Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
6038             return;
6039         }
6040         CV_Assert( u->handle != 0 );
6041 
6042         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6043 
6044         size_t total = 0, new_sz[] = {0, 0, 0};
6045         size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6046         size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6047 
6048         bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
6049                                             total, new_sz,
6050                                             srcrawofs, new_srcofs, new_srcstep,
6051                                             dstrawofs, new_dstofs, new_dststep);
6052 
6053 #ifdef HAVE_OPENCL_SVM
6054         if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6055         {
6056             CV_DbgAssert(u->data == NULL || u->data == u->handle);
6057             Context& ctx = Context::getDefault();
6058             const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6059             CV_DbgAssert(svmFns->isValid());
6060 
6061             CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
6062             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6063             {
6064                 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
6065                 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
6066                         u->handle, u->size,
6067                         0, NULL, NULL);
6068                 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
6069             }
6070             clFinish(q);
6071             if( iscontinuous )
6072             {
6073                 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
6074             }
6075             else
6076             {
6077                 // This code is from MatAllocator::download()
6078                 int isz[CV_MAX_DIM];
6079                 uchar* srcptr = (uchar*)u->handle;
6080                 for( int i = 0; i < dims; i++ )
6081                 {
6082                     CV_Assert( sz[i] <= (size_t)INT_MAX );
6083                     if( sz[i] == 0 )
6084                     return;
6085                     if( srcofs )
6086                     srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6087                     isz[i] = (int)sz[i];
6088                 }
6089 
6090                 Mat src(dims, isz, CV_8U, srcptr, srcstep);
6091                 Mat dst(dims, isz, CV_8U, dstptr, dststep);
6092 
6093                 const Mat* arrays[] = { &src, &dst };
6094                 uchar* ptrs[2];
6095                 NAryMatIterator it(arrays, ptrs, 2);
6096                 size_t j, planesz = it.size;
6097 
6098                 for( j = 0; j < it.nplanes; j++, ++it )
6099                     memcpy(ptrs[1], ptrs[0], planesz);
6100             }
6101             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6102             {
6103                 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6104                 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6105                         0, NULL, NULL);
6106                 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6107                 clFinish(q);
6108             }
6109         }
6110         else
6111 #endif
6112         {
6113             if( iscontinuous )
6114             {
6115                 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6116                 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6117                     srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
6118             }
6119             else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6120             {
6121                 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6122                 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6123                 size_t membuf_ofs = srcrawofs - new_srcrawofs;
6124                 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6125                                                           CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6126                 uchar* ptr = alignedPtr.getAlignedPtr();
6127 
6128                 CV_Assert(new_srcstep[0] >= new_sz[0]);
6129                 total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
6130                 total = std::min(total, u->size - new_srcrawofs);
6131                 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6132                                                  new_srcrawofs, total, ptr, 0, 0, 0));
6133                 for( size_t i = 0; i < new_sz[1]; i++ )
6134                     memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
6135             }
6136             else
6137             {
6138                 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6139                 uchar* ptr = alignedPtr.getAlignedPtr();
6140 
6141                 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6142                     new_srcofs, new_dstofs, new_sz,
6143                     new_srcstep[0], 0,
6144                     new_dststep[0], 0,
6145                     ptr, 0, 0, 0));
6146             }
6147         }
6148     }
6149 
upload(UMatData * u,const void * srcptr,int dims,const size_t sz[],const size_t dstofs[],const size_t dststep[],const size_t srcstep[]) const6150     void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
6151                 const size_t dstofs[], const size_t dststep[],
6152                 const size_t srcstep[]) const CV_OVERRIDE
6153     {
6154         if(!u)
6155             return;
6156 
6157         // there should be no user-visible CPU copies of the UMat which we are going to copy to
6158         CV_Assert(u->refcount == 0 || u->tempUMat());
6159 
6160         size_t total = 0, new_sz[] = {0, 0, 0};
6161         size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6162         size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6163 
6164         bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
6165                                             total, new_sz,
6166                                             srcrawofs, new_srcofs, new_srcstep,
6167                                             dstrawofs, new_dstofs, new_dststep);
6168 
6169         UMatDataAutoLock autolock(u);
6170 
6171         // if there is cached CPU copy of the GPU matrix,
6172         // we could use it as a destination.
6173         // we can do it in 2 cases:
6174         //    1. we overwrite the whole content
6175         //    2. we overwrite part of the matrix, but the GPU copy is out-of-date
6176         if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
6177         {
6178             Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
6179             u->markHostCopyObsolete(false);
6180             u->markDeviceCopyObsolete(true);
6181             return;
6182         }
6183 
6184         CV_Assert( u->handle != 0 );
6185         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6186 
6187 #ifdef HAVE_OPENCL_SVM
6188         if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6189         {
6190             CV_DbgAssert(u->data == NULL || u->data == u->handle);
6191             Context& ctx = Context::getDefault();
6192             const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6193             CV_DbgAssert(svmFns->isValid());
6194 
6195             CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
6196             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6197             {
6198                 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
6199                 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
6200                         u->handle, u->size,
6201                         0, NULL, NULL);
6202                 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
6203             }
6204             clFinish(q);
6205             if( iscontinuous )
6206             {
6207                 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
6208             }
6209             else
6210             {
6211                 // This code is from MatAllocator::upload()
6212                 int isz[CV_MAX_DIM];
6213                 uchar* dstptr = (uchar*)u->handle;
6214                 for( int i = 0; i < dims; i++ )
6215                 {
6216                     CV_Assert( sz[i] <= (size_t)INT_MAX );
6217                     if( sz[i] == 0 )
6218                     return;
6219                     if( dstofs )
6220                     dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6221                     isz[i] = (int)sz[i];
6222                 }
6223 
6224                 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
6225                 Mat dst(dims, isz, CV_8U, dstptr, dststep);
6226 
6227                 const Mat* arrays[] = { &src, &dst };
6228                 uchar* ptrs[2];
6229                 NAryMatIterator it(arrays, ptrs, 2);
6230                 size_t j, planesz = it.size;
6231 
6232                 for( j = 0; j < it.nplanes; j++, ++it )
6233                     memcpy(ptrs[1], ptrs[0], planesz);
6234             }
6235             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6236             {
6237                 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6238                 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6239                         0, NULL, NULL);
6240                 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6241                 clFinish(q);
6242             }
6243         }
6244         else
6245 #endif
6246         {
6247             if( iscontinuous )
6248             {
6249                 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6250                 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6251                     dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
6252                 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
6253                         (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
6254             }
6255             else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6256             {
6257                 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6258                 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6259                 size_t membuf_ofs = dstrawofs - new_dstrawofs;
6260                 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
6261                                                           CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6262                 uchar* ptr = alignedPtr.getAlignedPtr();
6263 
6264                 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6265                 total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
6266                 total = std::min(total, u->size - new_dstrawofs);
6267                 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
6268                        (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
6269                        (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
6270                 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6271                                                  new_dstrawofs, total, ptr, 0, 0, 0));
6272                 for( size_t i = 0; i < new_sz[1]; i++ )
6273                     memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
6274                 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6275                                                  new_dstrawofs, total, ptr, 0, 0, 0));
6276             }
6277             else
6278             {
6279                 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6280                 uchar* ptr = alignedPtr.getAlignedPtr();
6281 
6282                 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6283                     new_dstofs, new_srcofs, new_sz,
6284                     new_dststep[0], 0,
6285                     new_srcstep[0], 0,
6286                     ptr, 0, 0, 0));
6287             }
6288         }
6289         u->markHostCopyObsolete(true);
6290 #ifdef HAVE_OPENCL_SVM
6291         if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6292                 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6293         {
6294             // nothing
6295         }
6296         else
6297 #endif
6298         {
6299             u->markHostCopyObsolete(true);
6300         }
6301         u->markDeviceCopyObsolete(false);
6302     }
6303 
copy(UMatData * src,UMatData * dst,int dims,const size_t sz[],const size_t srcofs[],const size_t srcstep[],const size_t dstofs[],const size_t dststep[],bool _sync) const6304     void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
6305               const size_t srcofs[], const size_t srcstep[],
6306               const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
6307     {
6308         if(!src || !dst)
6309             return;
6310 
6311         size_t total = 0, new_sz[] = {0, 0, 0};
6312         size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6313         size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6314 
6315         bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
6316                                             total, new_sz,
6317                                             srcrawofs, new_srcofs, new_srcstep,
6318                                             dstrawofs, new_dstofs, new_dststep);
6319 
6320         UMatDataAutoLock src_autolock(src, dst);
6321 
6322         if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
6323         {
6324             upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6325             return;
6326         }
6327         if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
6328         {
6329             download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6330             dst->markHostCopyObsolete(false);
6331 #ifdef HAVE_OPENCL_SVM
6332             if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6333                     (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6334             {
6335                 // nothing
6336             }
6337             else
6338 #endif
6339             {
6340                 dst->markDeviceCopyObsolete(true);
6341             }
6342             return;
6343         }
6344 
6345         // there should be no user-visible CPU copies of the UMat which we are going to copy to
6346         CV_Assert(dst->refcount == 0);
6347         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6348 
6349         cl_int retval = CL_SUCCESS;
6350 #ifdef HAVE_OPENCL_SVM
6351         if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
6352                 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6353         {
6354             if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
6355                             (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6356             {
6357                 Context& ctx = Context::getDefault();
6358                 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6359                 CV_DbgAssert(svmFns->isValid());
6360 
6361                 if( iscontinuous )
6362                 {
6363                     CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
6364                             (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
6365                     cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
6366                             (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
6367                             total, 0, NULL, NULL);
6368                     CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
6369                 }
6370                 else
6371                 {
6372                     clFinish(q);
6373                     // This code is from MatAllocator::download()/upload()
6374                     int isz[CV_MAX_DIM];
6375                     uchar* srcptr = (uchar*)src->handle;
6376                     for( int i = 0; i < dims; i++ )
6377                     {
6378                         CV_Assert( sz[i] <= (size_t)INT_MAX );
6379                         if( sz[i] == 0 )
6380                         return;
6381                         if( srcofs )
6382                         srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6383                         isz[i] = (int)sz[i];
6384                     }
6385                     Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
6386 
6387                     uchar* dstptr = (uchar*)dst->handle;
6388                     for( int i = 0; i < dims; i++ )
6389                     {
6390                         if( dstofs )
6391                         dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6392                     }
6393                     Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
6394 
6395                     const Mat* arrays[] = { &m_src, &m_dst };
6396                     uchar* ptrs[2];
6397                     NAryMatIterator it(arrays, ptrs, 2);
6398                     size_t j, planesz = it.size;
6399 
6400                     for( j = 0; j < it.nplanes; j++, ++it )
6401                         memcpy(ptrs[1], ptrs[0], planesz);
6402                 }
6403             }
6404             else
6405             {
6406                 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6407                 {
6408                     map(src, ACCESS_READ);
6409                     upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6410                     unmap(src);
6411                 }
6412                 else
6413                 {
6414                     map(dst, ACCESS_WRITE);
6415                     download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6416                     unmap(dst);
6417                 }
6418             }
6419         }
6420         else
6421 #endif
6422         {
6423             if( iscontinuous )
6424             {
6425                 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6426                                                srcrawofs, dstrawofs, total, 0, 0, 0);
6427                 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
6428                         (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
6429             }
6430             else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6431             {
6432                 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6433                 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6434                 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
6435                 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6436                 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
6437 
6438                 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6439                                                       CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6440                 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
6441                                                       CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6442                 uchar* srcptr = srcBuf.getAlignedPtr();
6443                 uchar* dstptr = dstBuf.getAlignedPtr();
6444 
6445                 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6446 
6447                 size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
6448                 src_total = std::min(src_total, src->size - new_srcrawofs);
6449                 size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
6450                 dst_total = std::min(dst_total, dst->size - new_dstrawofs);
6451 
6452                 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
6453                                                  new_srcrawofs, src_total, srcptr, 0, 0, 0));
6454                 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6455                                                  new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6456 
6457                 for( size_t i = 0; i < new_sz[1]; i++ )
6458                     memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
6459                             srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
6460                 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6461                                                   new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6462             }
6463             else
6464             {
6465                 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6466                                                    new_srcofs, new_dstofs, new_sz,
6467                                                    new_srcstep[0], 0,
6468                                                    new_dststep[0], 0,
6469                                                    0, 0, 0));
6470             }
6471         }
6472         if (retval == CL_SUCCESS)
6473         {
6474             CV_IMPL_ADD(CV_IMPL_OCL)
6475         }
6476 
6477 #ifdef HAVE_OPENCL_SVM
6478         if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6479             (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6480         {
6481             // nothing
6482         }
6483         else
6484 #endif
6485         {
6486             dst->markHostCopyObsolete(true);
6487         }
6488         dst->markDeviceCopyObsolete(false);
6489 
6490         if( _sync )
6491         {
6492             CV_OCL_DBG_CHECK(clFinish(q));
6493         }
6494     }
6495 
getBufferPoolController(const char * id) const6496     BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE
6497     {
6498         ocl::Context ctx = Context::getDefault();
6499         if (ctx.empty())
6500             return NULL;
6501 #ifdef HAVE_OPENCL_SVM
6502         if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
6503         {
6504             return &ctx.getImpl()->getBufferPoolSVM();
6505         }
6506 #endif
6507         if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
6508         {
6509             return &ctx.getImpl()->getBufferPoolHostPtr();
6510         }
6511         if (id != NULL && strcmp(id, "OCL") != 0)
6512         {
6513             CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
6514         }
6515         return &ctx.getImpl()->getBufferPool();
6516     }
6517 
6518     MatAllocator* matStdAllocator;
6519 
6520     mutable cv::Mutex cleanupQueueMutex;
6521     mutable std::deque<UMatData*> cleanupQueue;
6522 
flushCleanupQueue() const6523     void flushCleanupQueue() const
6524     {
6525         if (!cleanupQueue.empty())
6526         {
6527             std::deque<UMatData*> q;
6528             {
6529                 cv::AutoLock lock(cleanupQueueMutex);
6530                 q.swap(cleanupQueue);
6531             }
6532             for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
6533             {
6534                 deallocate_(*i);
6535             }
6536         }
6537     }
addToCleanupQueue(UMatData * u) const6538     void addToCleanupQueue(UMatData* u) const
6539     {
6540         //TODO: Validation check: CV_Assert(!u->tempUMat());
6541         {
6542             cv::AutoLock lock(cleanupQueueMutex);
6543             cleanupQueue.push_back(u);
6544         }
6545     }
6546 };
6547 
getOpenCLAllocator_()6548 static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
6549 {
6550     static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
6551     return g_allocator;
6552 }
getOpenCLAllocator()6553 MatAllocator* getOpenCLAllocator()
6554 {
6555     CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
6556 }
6557 
6558 }} // namespace cv::ocl
6559 
6560 
6561 namespace cv {
6562 
6563 // three funcs below are implemented in umatrix.cpp
6564 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
6565               bool autoSteps = false );
6566 void finalizeHdr(UMat& m);
6567 
6568 } // namespace cv
6569 
6570 
6571 namespace cv { namespace ocl {
6572 
6573 /*
6574 // Convert OpenCL buffer memory to UMat
6575 */
convertFromBuffer(void * cl_mem_buffer,size_t step,int rows,int cols,int type,UMat & dst)6576 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
6577 {
6578     int d = 2;
6579     int sizes[] = { rows, cols };
6580 
6581     CV_Assert(0 <= d && d <= CV_MAX_DIM);
6582 
6583     dst.release();
6584 
6585     dst.flags      = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
6586     dst.usageFlags = USAGE_DEFAULT;
6587 
6588     setSize(dst, d, sizes, 0, true);
6589     dst.offset = 0;
6590 
6591     cl_mem             memobj = (cl_mem)cl_mem_buffer;
6592     cl_mem_object_type mem_type = 0;
6593 
6594     CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6595 
6596     CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
6597 
6598     size_t total = 0;
6599     CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
6600 
6601     CV_OCL_CHECK(clRetainMemObject(memobj));
6602 
6603     CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
6604     CV_Assert(total >= rows * step);
6605 
6606     // attach clBuffer to UMatData
6607     dst.u = new UMatData(getOpenCLAllocator());
6608     dst.u->data            = 0;
6609     dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER;  // not allocated from any OpenCV buffer pool
6610     dst.u->flags           = static_cast<UMatData::MemoryFlag>(0);
6611     dst.u->handle          = cl_mem_buffer;
6612     dst.u->origdata        = 0;
6613     dst.u->prevAllocator   = 0;
6614     dst.u->size            = total;
6615 
6616     finalizeHdr(dst);
6617     dst.addref();
6618 
6619     return;
6620 } // convertFromBuffer()
6621 
6622 
6623 /*
6624 // Convert OpenCL image2d_t memory to UMat
6625 */
convertFromImage(void * cl_mem_image,UMat & dst)6626 void convertFromImage(void* cl_mem_image, UMat& dst)
6627 {
6628     cl_mem             clImage = (cl_mem)cl_mem_image;
6629     cl_mem_object_type mem_type = 0;
6630 
6631     CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6632 
6633     CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
6634 
6635     cl_image_format fmt = { 0, 0 };
6636     CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
6637 
6638     int depth = CV_8U;
6639     switch (fmt.image_channel_data_type)
6640     {
6641     case CL_UNORM_INT8:
6642     case CL_UNSIGNED_INT8:
6643         depth = CV_8U;
6644         break;
6645 
6646     case CL_SNORM_INT8:
6647     case CL_SIGNED_INT8:
6648         depth = CV_8S;
6649         break;
6650 
6651     case CL_UNORM_INT16:
6652     case CL_UNSIGNED_INT16:
6653         depth = CV_16U;
6654         break;
6655 
6656     case CL_SNORM_INT16:
6657     case CL_SIGNED_INT16:
6658         depth = CV_16S;
6659         break;
6660 
6661     case CL_SIGNED_INT32:
6662         depth = CV_32S;
6663         break;
6664 
6665     case CL_FLOAT:
6666         depth = CV_32F;
6667         break;
6668 
6669     case CL_HALF_FLOAT:
6670         depth = CV_16F;
6671         break;
6672 
6673     default:
6674         CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
6675     }
6676 
6677     int type = CV_8UC1;
6678     switch (fmt.image_channel_order)
6679     {
6680     case CL_R:
6681     case CL_A:
6682     case CL_INTENSITY:
6683     case CL_LUMINANCE:
6684         type = CV_MAKE_TYPE(depth, 1);
6685         break;
6686 
6687     case CL_RG:
6688     case CL_RA:
6689         type = CV_MAKE_TYPE(depth, 2);
6690         break;
6691 
6692     // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
6693     // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
6694     /*case CL_RGB:
6695         type = CV_MAKE_TYPE(depth, 3);
6696         break;*/
6697 
6698     case CL_RGBA:
6699     case CL_BGRA:
6700     case CL_ARGB:
6701         type = CV_MAKE_TYPE(depth, 4);
6702         break;
6703 
6704     default:
6705         CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
6706         break;
6707     }
6708 
6709     size_t step = 0;
6710     CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
6711 
6712     size_t w = 0;
6713     CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
6714 
6715     size_t h = 0;
6716     CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
6717 
6718     dst.create((int)h, (int)w, type);
6719 
6720     cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
6721 
6722     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6723 
6724     size_t offset = 0;
6725     size_t src_origin[3] = { 0, 0, 0 };
6726     size_t region[3] = { w, h, 1 };
6727     CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
6728 
6729     CV_OCL_CHECK(clFinish(q));
6730 
6731     return;
6732 } // convertFromImage()
6733 
6734 
6735 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
6736 
getDevices(std::vector<cl_device_id> & devices,cl_platform_id platform)6737 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
6738 {
6739     cl_uint numDevices = 0;
6740     cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
6741     if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
6742     {
6743         CV_OCL_DBG_CHECK_RESULT(status,
6744             cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
6745     }
6746 
6747     if (numDevices == 0)
6748     {
6749         devices.clear();
6750         return;
6751     }
6752 
6753     devices.resize((size_t)numDevices);
6754     CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
6755 }
6756 
6757 struct PlatformInfo::Impl
6758 {
Implcv::ocl::PlatformInfo::Impl6759     Impl(void* id)
6760     {
6761         refcount = 1;
6762         handle = *(cl_platform_id*)id;
6763         getDevices(devices, handle);
6764 
6765         version_ = getStrProp(CL_PLATFORM_VERSION);
6766         parseOpenCLVersion(version_, versionMajor_, versionMinor_);
6767     }
6768 
getStrPropcv::ocl::PlatformInfo::Impl6769     String getStrProp(cl_platform_info prop) const
6770     {
6771         char buf[1024];
6772         size_t sz=0;
6773         return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
6774             sz < sizeof(buf) ? String(buf) : String();
6775     }
6776 
6777     IMPLEMENT_REFCOUNTABLE();
6778     std::vector<cl_device_id> devices;
6779     cl_platform_id handle;
6780 
6781     String version_;
6782     int versionMajor_;
6783     int versionMinor_;
6784 };
6785 
PlatformInfo()6786 PlatformInfo::PlatformInfo() CV_NOEXCEPT
6787 {
6788     p = 0;
6789 }
6790 
PlatformInfo(void * platform_id)6791 PlatformInfo::PlatformInfo(void* platform_id)
6792 {
6793     p = new Impl(platform_id);
6794 }
6795 
~PlatformInfo()6796 PlatformInfo::~PlatformInfo()
6797 {
6798     if(p)
6799         p->release();
6800 }
6801 
PlatformInfo(const PlatformInfo & i)6802 PlatformInfo::PlatformInfo(const PlatformInfo& i)
6803 {
6804     if (i.p)
6805         i.p->addref();
6806     p = i.p;
6807 }
6808 
operator =(const PlatformInfo & i)6809 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
6810 {
6811     if (i.p != p)
6812     {
6813         if (i.p)
6814             i.p->addref();
6815         if (p)
6816             p->release();
6817         p = i.p;
6818     }
6819     return *this;
6820 }
6821 
PlatformInfo(PlatformInfo && i)6822 PlatformInfo::PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT
6823 {
6824     p = i.p;
6825     i.p = nullptr;
6826 }
6827 
operator =(PlatformInfo && i)6828 PlatformInfo& PlatformInfo::operator = (PlatformInfo&& i) CV_NOEXCEPT
6829 {
6830     if (this != &i) {
6831         if(p)
6832             p->release();
6833         p = i.p;
6834         i.p = nullptr;
6835     }
6836     return *this;
6837 }
6838 
deviceNumber() const6839 int PlatformInfo::deviceNumber() const
6840 {
6841     return p ? (int)p->devices.size() : 0;
6842 }
6843 
getDevice(Device & device,int d) const6844 void PlatformInfo::getDevice(Device& device, int d) const
6845 {
6846     CV_Assert(p && d < (int)p->devices.size() );
6847     if(p)
6848         device.set(p->devices[d]);
6849 }
6850 
name() const6851 String PlatformInfo::name() const
6852 {
6853     return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
6854 }
6855 
vendor() const6856 String PlatformInfo::vendor() const
6857 {
6858     return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6859 }
6860 
version() const6861 String PlatformInfo::version() const
6862 {
6863     return p ? p->version_ : String();
6864 }
6865 
versionMajor() const6866 int PlatformInfo::versionMajor() const
6867 {
6868     CV_Assert(p);
6869     return p->versionMajor_;
6870 }
6871 
versionMinor() const6872 int PlatformInfo::versionMinor() const
6873 {
6874     CV_Assert(p);
6875     return p->versionMinor_;
6876 }
6877 
getPlatforms(std::vector<cl_platform_id> & platforms)6878 static void getPlatforms(std::vector<cl_platform_id>& platforms)
6879 {
6880     cl_uint numPlatforms = 0;
6881     CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6882 
6883     if (numPlatforms == 0)
6884     {
6885         platforms.clear();
6886         return;
6887     }
6888 
6889     platforms.resize((size_t)numPlatforms);
6890     CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6891 }
6892 
getPlatfomsInfo(std::vector<PlatformInfo> & platformsInfo)6893 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6894 {
6895     std::vector<cl_platform_id> platforms;
6896     getPlatforms(platforms);
6897 
6898     for (size_t i = 0; i < platforms.size(); i++)
6899         platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
6900 }
6901 
typeToStr(int type)6902 const char* typeToStr(int type)
6903 {
6904     static const char* tab[]=
6905     {
6906         "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6907         "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6908         "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6909         "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6910         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6911         "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6912         "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6913         "half", "half2", "half3", "half4", 0, 0, 0, "half8", 0, 0, 0, 0, 0, 0, 0, "half16",
6914         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6915     };
6916     int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6917     const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6918     CV_Assert(result);
6919     return result;
6920 }
6921 
memopTypeToStr(int type)6922 const char* memopTypeToStr(int type)
6923 {
6924     static const char* tab[] =
6925     {
6926         "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6927         "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6928         "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6929         "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6930         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6931         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6932         "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6933         "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6934         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6935     };
6936     int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6937     const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6938     CV_Assert(result);
6939     return result;
6940 }
6941 
vecopTypeToStr(int type)6942 const char* vecopTypeToStr(int type)
6943 {
6944     static const char* tab[] =
6945     {
6946         "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6947         "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6948         "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6949         "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6950         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6951         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6952         "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6953         "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6954         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6955     };
6956     int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6957     const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6958     CV_Assert(result);
6959     return result;
6960 }
6961 
convertTypeStr(int sdepth,int ddepth,int cn,char * buf)6962 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
6963 {
6964     if( sdepth == ddepth )
6965         return "noconvert";
6966     const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
6967     if( ddepth >= CV_32F ||
6968         (ddepth == CV_32S && sdepth < CV_32S) ||
6969         (ddepth == CV_16S && sdepth <= CV_8S) ||
6970         (ddepth == CV_16U && sdepth == CV_8U))
6971     {
6972         sprintf(buf, "convert_%s", typestr);
6973     }
6974     else if( sdepth >= CV_32F )
6975         sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
6976     else
6977         sprintf(buf, "convert_%s_sat", typestr);
6978 
6979     return buf;
6980 }
6981 
getOpenCLErrorString(int errorCode)6982 const char* getOpenCLErrorString(int errorCode)
6983 {
6984 #define CV_OCL_CODE(id) case id: return #id
6985 #define CV_OCL_CODE_(id, name) case id: return #name
6986     switch (errorCode)
6987     {
6988     CV_OCL_CODE(CL_SUCCESS);
6989     CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
6990     CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
6991     CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
6992     CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
6993     CV_OCL_CODE(CL_OUT_OF_RESOURCES);
6994     CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
6995     CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
6996     CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
6997     CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
6998     CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
6999     CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
7000     CV_OCL_CODE(CL_MAP_FAILURE);
7001     CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
7002     CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
7003     CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
7004     CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
7005     CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
7006     CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
7007     CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
7008     CV_OCL_CODE(CL_INVALID_VALUE);
7009     CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
7010     CV_OCL_CODE(CL_INVALID_PLATFORM);
7011     CV_OCL_CODE(CL_INVALID_DEVICE);
7012     CV_OCL_CODE(CL_INVALID_CONTEXT);
7013     CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
7014     CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
7015     CV_OCL_CODE(CL_INVALID_HOST_PTR);
7016     CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
7017     CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
7018     CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
7019     CV_OCL_CODE(CL_INVALID_SAMPLER);
7020     CV_OCL_CODE(CL_INVALID_BINARY);
7021     CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
7022     CV_OCL_CODE(CL_INVALID_PROGRAM);
7023     CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
7024     CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
7025     CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
7026     CV_OCL_CODE(CL_INVALID_KERNEL);
7027     CV_OCL_CODE(CL_INVALID_ARG_INDEX);
7028     CV_OCL_CODE(CL_INVALID_ARG_VALUE);
7029     CV_OCL_CODE(CL_INVALID_ARG_SIZE);
7030     CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
7031     CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
7032     CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
7033     CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
7034     CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
7035     CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
7036     CV_OCL_CODE(CL_INVALID_EVENT);
7037     CV_OCL_CODE(CL_INVALID_OPERATION);
7038     CV_OCL_CODE(CL_INVALID_GL_OBJECT);
7039     CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
7040     CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
7041     CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
7042     // OpenCL 1.1
7043     CV_OCL_CODE(CL_INVALID_PROPERTY);
7044     // OpenCL 1.2
7045     CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
7046     CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
7047     CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
7048     CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
7049     // OpenCL 2.0
7050     CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
7051     CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
7052     // Extensions
7053     CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
7054     CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
7055     CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
7056     CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
7057     CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
7058     CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
7059     default: return "Unknown OpenCL error";
7060     }
7061 #undef CV_OCL_CODE
7062 #undef CV_OCL_CODE_
7063 }
7064 
7065 template <typename T>
kerToStr(const Mat & k)7066 static std::string kerToStr(const Mat & k)
7067 {
7068     int width = k.cols - 1, depth = k.depth();
7069     const T * const data = k.ptr<T>();
7070 
7071     std::ostringstream stream;
7072     stream.precision(10);
7073 
7074     if (depth <= CV_8S)
7075     {
7076         for (int i = 0; i < width; ++i)
7077             stream << "DIG(" << (int)data[i] << ")";
7078         stream << "DIG(" << (int)data[width] << ")";
7079     }
7080     else if (depth == CV_32F)
7081     {
7082         stream.setf(std::ios_base::showpoint);
7083         for (int i = 0; i < width; ++i)
7084             stream << "DIG(" << data[i] << "f)";
7085         stream << "DIG(" << data[width] << "f)";
7086     }
7087     else if (depth == CV_16F)
7088     {
7089         stream.setf(std::ios_base::showpoint);
7090         for (int i = 0; i < width; ++i)
7091             stream << "DIG(" << (float)data[i] << "h)";
7092         stream << "DIG(" << (float)data[width] << "h)";
7093     }
7094     else
7095     {
7096         for (int i = 0; i < width; ++i)
7097             stream << "DIG(" << data[i] << ")";
7098         stream << "DIG(" << data[width] << ")";
7099     }
7100 
7101     return stream.str();
7102 }
7103 
kernelToStr(InputArray _kernel,int ddepth,const char * name)7104 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
7105 {
7106     Mat kernel = _kernel.getMat().reshape(1, 1);
7107 
7108     int depth = kernel.depth();
7109     if (ddepth < 0)
7110         ddepth = depth;
7111 
7112     if (ddepth != depth)
7113         kernel.convertTo(kernel, ddepth);
7114 
7115     typedef std::string (* func_t)(const Mat &);
7116     static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
7117                                     kerToStr<int>, kerToStr<float>, kerToStr<double>, kerToStr<float16_t> };
7118     const func_t func = funcs[ddepth];
7119     CV_Assert(func != 0);
7120 
7121     return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
7122 }
7123 
7124 #define PROCESS_SRC(src) \
7125     do \
7126     { \
7127         if (!src.empty()) \
7128         { \
7129             CV_Assert(src.isMat() || src.isUMat()); \
7130             Size csize = src.size(); \
7131             int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
7132                 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
7133             if (cwidth < ckercn || ckercn <= 0) \
7134                 return 1; \
7135             cols.push_back(cwidth); \
7136             if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
7137                 return 1; \
7138             offsets.push_back(src.offset()); \
7139             steps.push_back(src.step()); \
7140             dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
7141             kercns.push_back(ckercn); \
7142         } \
7143     } \
7144     while ((void)0, 0)
7145 
predictOptimalVectorWidth(InputArray src1,InputArray src2,InputArray src3,InputArray src4,InputArray src5,InputArray src6,InputArray src7,InputArray src8,InputArray src9,OclVectorStrategy strat)7146 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
7147                               InputArray src4, InputArray src5, InputArray src6,
7148                               InputArray src7, InputArray src8, InputArray src9,
7149                               OclVectorStrategy strat)
7150 {
7151     const ocl::Device & d = ocl::Device::getDefault();
7152 
7153     int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
7154         d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
7155         d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
7156         d.preferredVectorWidthDouble(), d.preferredVectorWidthHalf() };
7157 
7158     // if the device says don't use vectors
7159     if (vectorWidths[0] == 1)
7160     {
7161         // it's heuristic
7162         vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
7163         vectorWidths[CV_16U] = vectorWidths[CV_16S] = vectorWidths[CV_16F] = 2;
7164         vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
7165     }
7166 
7167     return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
7168 }
7169 
checkOptimalVectorWidth(const int * vectorWidths,InputArray src1,InputArray src2,InputArray src3,InputArray src4,InputArray src5,InputArray src6,InputArray src7,InputArray src8,InputArray src9,OclVectorStrategy strat)7170 int checkOptimalVectorWidth(const int *vectorWidths,
7171                             InputArray src1, InputArray src2, InputArray src3,
7172                             InputArray src4, InputArray src5, InputArray src6,
7173                             InputArray src7, InputArray src8, InputArray src9,
7174                             OclVectorStrategy strat)
7175 {
7176     CV_Assert(vectorWidths);
7177 
7178     int ref_type = src1.type();
7179 
7180     std::vector<size_t> offsets, steps, cols;
7181     std::vector<int> dividers, kercns;
7182     PROCESS_SRC(src1);
7183     PROCESS_SRC(src2);
7184     PROCESS_SRC(src3);
7185     PROCESS_SRC(src4);
7186     PROCESS_SRC(src5);
7187     PROCESS_SRC(src6);
7188     PROCESS_SRC(src7);
7189     PROCESS_SRC(src8);
7190     PROCESS_SRC(src9);
7191 
7192     size_t size = offsets.size();
7193 
7194     for (size_t i = 0; i < size; ++i)
7195         while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
7196             dividers[i] >>= 1, kercns[i] >>= 1;
7197 
7198     // default strategy
7199     int kercn = *std::min_element(kercns.begin(), kercns.end());
7200 
7201     return kercn;
7202 }
7203 
predictOptimalVectorWidthMax(InputArray src1,InputArray src2,InputArray src3,InputArray src4,InputArray src5,InputArray src6,InputArray src7,InputArray src8,InputArray src9)7204 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
7205                                  InputArray src4, InputArray src5, InputArray src6,
7206                                  InputArray src7, InputArray src8, InputArray src9)
7207 {
7208     return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
7209 }
7210 
7211 #undef PROCESS_SRC
7212 
7213 
7214 // TODO Make this as a method of OpenCL "BuildOptions" class
buildOptionsAddMatrixDescription(String & buildOptions,const String & name,InputArray _m)7215 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
7216 {
7217     if (!buildOptions.empty())
7218         buildOptions += " ";
7219     int type = _m.type(), depth = CV_MAT_DEPTH(type);
7220     buildOptions += format(
7221             "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
7222             name.c_str(), ocl::typeToStr(type),
7223             name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
7224             name.c_str(), (int)CV_MAT_CN(type),
7225             name.c_str(), (int)CV_ELEM_SIZE(type),
7226             name.c_str(), (int)CV_ELEM_SIZE1(type),
7227             name.c_str(), (int)depth
7228             );
7229 }
7230 
7231 
7232 struct Image2D::Impl
7233 {
Implcv::ocl::Image2D::Impl7234     Impl(const UMat &src, bool norm, bool alias)
7235     {
7236         handle = 0;
7237         refcount = 1;
7238         init(src, norm, alias);
7239     }
7240 
~Implcv::ocl::Image2D::Impl7241     ~Impl()
7242     {
7243         if (handle)
7244             clReleaseMemObject(handle);
7245     }
7246 
getImageFormatcv::ocl::Image2D::Impl7247     static cl_image_format getImageFormat(int depth, int cn, bool norm)
7248     {
7249         cl_image_format format;
7250         static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
7251                                        CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, CL_HALF_FLOAT };
7252         static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
7253                                                 CL_SNORM_INT16, -1, -1, -1, -1 };
7254         // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with
7255         // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010.
7256         static const int channelOrders[] = { -1, CL_R, CL_RG, /*CL_RGB*/ -1, CL_RGBA };
7257 
7258         int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
7259         int channelOrder = channelOrders[cn];
7260         format.image_channel_data_type = (cl_channel_type)channelType;
7261         format.image_channel_order = (cl_channel_order)channelOrder;
7262         return format;
7263     }
7264 
isFormatSupportedcv::ocl::Image2D::Impl7265     static bool isFormatSupported(cl_image_format format)
7266     {
7267         if (!haveOpenCL())
7268             CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7269 
7270         cl_context context = (cl_context)Context::getDefault().ptr();
7271         if (!context)
7272             return false;
7273 
7274         // Figure out how many formats are supported by this context.
7275         cl_uint numFormats = 0;
7276         cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7277                                                 CL_MEM_OBJECT_IMAGE2D, numFormats,
7278                                                 NULL, &numFormats);
7279         CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
7280         if (numFormats > 0)
7281         {
7282             AutoBuffer<cl_image_format> formats(numFormats);
7283             err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7284                                              CL_MEM_OBJECT_IMAGE2D, numFormats,
7285                                              formats.data(), NULL);
7286             CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
7287             for (cl_uint i = 0; i < numFormats; ++i)
7288             {
7289                 if (!memcmp(&formats[i], &format, sizeof(format)))
7290                 {
7291                     return true;
7292                 }
7293             }
7294         }
7295         return false;
7296     }
7297 
initcv::ocl::Image2D::Impl7298     void init(const UMat &src, bool norm, bool alias)
7299     {
7300         if (!haveOpenCL())
7301             CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7302 
7303         CV_Assert(!src.empty());
7304         CV_Assert(ocl::Device::getDefault().imageSupport());
7305 
7306         int err, depth = src.depth(), cn = src.channels();
7307         CV_Assert(cn <= 4);
7308         cl_image_format format = getImageFormat(depth, cn, norm);
7309 
7310         if (!isFormatSupported(format))
7311             CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
7312 
7313         if (alias && !src.handle(ACCESS_RW))
7314             CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
7315 
7316         cl_context context = (cl_context)Context::getDefault().ptr();
7317         cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
7318 
7319 #ifdef CL_VERSION_1_2
7320         // this enables backwards portability to
7321         // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
7322         const Device & d = ocl::Device::getDefault();
7323         int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
7324         CV_Assert(!alias || canCreateAlias(src));
7325         if (1 < major || (1 == major && 2 <= minor))
7326         {
7327             cl_image_desc desc;
7328             desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
7329             desc.image_width      = src.cols;
7330             desc.image_height     = src.rows;
7331             desc.image_depth      = 0;
7332             desc.image_array_size = 1;
7333             desc.image_row_pitch  = alias ? src.step[0] : 0;
7334             desc.image_slice_pitch = 0;
7335             desc.buffer           = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
7336             desc.num_mip_levels   = 0;
7337             desc.num_samples      = 0;
7338             handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
7339         }
7340         else
7341 #endif
7342         {
7343             CV_SUPPRESS_DEPRECATED_START
7344             CV_Assert(!alias);  // This is an OpenCL 1.2 extension
7345             handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
7346             CV_SUPPRESS_DEPRECATED_END
7347         }
7348         CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
7349 
7350         size_t origin[] = { 0, 0, 0 };
7351         size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
7352 
7353         cl_mem devData;
7354         if (!alias && !src.isContinuous())
7355         {
7356             devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
7357             CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
7358                     (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
7359                 ).c_str());
7360 
7361             const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
7362             CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
7363                 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
7364             CV_OCL_DBG_CHECK(clFlush(queue));
7365         }
7366         else
7367         {
7368             devData = (cl_mem)src.handle(ACCESS_READ);
7369         }
7370         CV_Assert(devData != NULL);
7371 
7372         if (!alias)
7373         {
7374             CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
7375             if (!src.isContinuous())
7376             {
7377                 CV_OCL_DBG_CHECK(clFlush(queue));
7378                 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
7379             }
7380         }
7381     }
7382 
7383     IMPLEMENT_REFCOUNTABLE();
7384 
7385     cl_mem handle;
7386 };
7387 
Image2D()7388 Image2D::Image2D() CV_NOEXCEPT
7389 {
7390     p = NULL;
7391 }
7392 
Image2D(const UMat & src,bool norm,bool alias)7393 Image2D::Image2D(const UMat &src, bool norm, bool alias)
7394 {
7395     p = new Impl(src, norm, alias);
7396 }
7397 
canCreateAlias(const UMat & m)7398 bool Image2D::canCreateAlias(const UMat &m)
7399 {
7400     bool ret = false;
7401     const Device & d = ocl::Device::getDefault();
7402     if (d.imageFromBufferSupport() && !m.empty())
7403     {
7404         // This is the required pitch alignment in pixels
7405         uint pitchAlign = d.imagePitchAlignment();
7406         if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
7407         {
7408             // We don't currently handle the case where the buffer was created
7409             // with CL_MEM_USE_HOST_PTR
7410             if (!m.u->tempUMat())
7411             {
7412                 ret = true;
7413             }
7414         }
7415     }
7416     return ret;
7417 }
7418 
isFormatSupported(int depth,int cn,bool norm)7419 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
7420 {
7421     cl_image_format format = Impl::getImageFormat(depth, cn, norm);
7422 
7423     return Impl::isFormatSupported(format);
7424 }
7425 
Image2D(const Image2D & i)7426 Image2D::Image2D(const Image2D & i)
7427 {
7428     p = i.p;
7429     if (p)
7430         p->addref();
7431 }
7432 
operator =(const Image2D & i)7433 Image2D & Image2D::operator = (const Image2D & i)
7434 {
7435     if (i.p != p)
7436     {
7437         if (i.p)
7438             i.p->addref();
7439         if (p)
7440             p->release();
7441         p = i.p;
7442     }
7443     return *this;
7444 }
7445 
Image2D(Image2D && i)7446 Image2D::Image2D(Image2D&& i) CV_NOEXCEPT
7447 {
7448     p = i.p;
7449     i.p = nullptr;
7450 }
7451 
operator =(Image2D && i)7452 Image2D& Image2D::operator = (Image2D&& i) CV_NOEXCEPT
7453 {
7454     if (this != &i) {
7455         if (p)
7456             p->release();
7457         p = i.p;
7458         i.p = nullptr;
7459     }
7460     return *this;
7461 }
7462 
~Image2D()7463 Image2D::~Image2D()
7464 {
7465     if (p)
7466         p->release();
7467 }
7468 
ptr() const7469 void* Image2D::ptr() const
7470 {
7471     return p ? p->handle : 0;
7472 }
7473 
isOpenCLForced()7474 bool internal::isOpenCLForced()
7475 {
7476     static bool initialized = false;
7477     static bool value = false;
7478     if (!initialized)
7479     {
7480         value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
7481         initialized = true;
7482     }
7483     return value;
7484 }
7485 
isPerformanceCheckBypassed()7486 bool internal::isPerformanceCheckBypassed()
7487 {
7488     static bool initialized = false;
7489     static bool value = false;
7490     if (!initialized)
7491     {
7492         value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
7493         initialized = true;
7494     }
7495     return value;
7496 }
7497 
isCLBuffer(UMat & u)7498 bool internal::isCLBuffer(UMat& u)
7499 {
7500     void* h = u.handle(ACCESS_RW);
7501     if (!h)
7502         return true;
7503     CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
7504 #if 1
7505     if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
7506         return false;
7507 #else
7508     cl_mem_object_type type = 0;
7509     cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
7510     if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
7511         return false;
7512 #endif
7513     return true;
7514 }
7515 
7516 struct Timer::Impl
7517 {
7518     const Queue queue;
7519 
Implcv::ocl::Timer::Impl7520     Impl(const Queue& q)
7521         : queue(q)
7522     {
7523     }
7524 
~Implcv::ocl::Timer::Impl7525     ~Impl(){}
7526 
startcv::ocl::Timer::Impl7527     void start()
7528     {
7529         CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7530         timer.start();
7531     }
7532 
stopcv::ocl::Timer::Impl7533     void stop()
7534     {
7535         CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7536         timer.stop();
7537     }
7538 
durationNScv::ocl::Timer::Impl7539     uint64 durationNS() const
7540     {
7541         return (uint64)(timer.getTimeSec() * 1e9);
7542     }
7543 
7544     TickMeter timer;
7545 };
7546 
Timer(const Queue & q)7547 Timer::Timer(const Queue& q) : p(new Impl(q)) { }
~Timer()7548 Timer::~Timer() { delete p; }
7549 
start()7550 void Timer::start()
7551 {
7552     CV_Assert(p);
7553     p->start();
7554 }
7555 
stop()7556 void Timer::stop()
7557 {
7558     CV_Assert(p);
7559     p->stop();
7560 }
7561 
durationNS() const7562 uint64 Timer::durationNS() const
7563 {
7564     CV_Assert(p);
7565     return p->durationNS();
7566 }
7567 
7568 }} // namespace
7569 
7570 #endif // HAVE_OPENCL
7571