1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 
45 using namespace cv;
46 using namespace cv::cuda;
47 
getCudaEnabledDeviceCount()48 int cv::cuda::getCudaEnabledDeviceCount()
49 {
50 #ifndef HAVE_CUDA
51     return 0;
52 #else
53     int count;
54     cudaError_t error = cudaGetDeviceCount(&count);
55 
56     if (error == cudaErrorInsufficientDriver)
57         return -1;
58 
59     if (error == cudaErrorNoDevice)
60         return 0;
61 
62     cudaSafeCall( error );
63     return count;
64 #endif
65 }
66 
setDevice(int device)67 void cv::cuda::setDevice(int device)
68 {
69 #ifndef HAVE_CUDA
70     CV_UNUSED(device);
71     throw_no_cuda();
72 #else
73     cudaSafeCall( cudaSetDevice(device) );
74     cudaSafeCall( cudaFree(0) );
75 #endif
76 }
77 
getDevice()78 int cv::cuda::getDevice()
79 {
80 #ifndef HAVE_CUDA
81     throw_no_cuda();
82 #else
83     int device;
84     cudaSafeCall( cudaGetDevice(&device) );
85     return device;
86 #endif
87 }
88 
resetDevice()89 void cv::cuda::resetDevice()
90 {
91 #ifndef HAVE_CUDA
92     throw_no_cuda();
93 #else
94     cudaSafeCall( cudaDeviceReset() );
95 #endif
96 }
97 
deviceSupports(FeatureSet feature_set)98 bool cv::cuda::deviceSupports(FeatureSet feature_set)
99 {
100 #ifndef HAVE_CUDA
101     CV_UNUSED(feature_set);
102     throw_no_cuda();
103 #else
104     static int versions[] =
105     {
106         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
107     };
108     static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
109 
110     const int devId = getDevice();
111 
112     int version;
113 
114     if (devId < cache_size && versions[devId] >= 0)
115     {
116         version = versions[devId];
117     }
118     else
119     {
120         DeviceInfo dev(devId);
121         version = dev.majorVersion() * 10 + dev.minorVersion();
122         if (devId < cache_size)
123             versions[devId] = version;
124     }
125 
126     return TargetArchs::builtWith(feature_set) && (version >= feature_set);
127 #endif
128 }
129 
130 ////////////////////////////////////////////////////////////////////////
131 // TargetArchs
132 
133 #ifdef HAVE_CUDA
134 
135 namespace
136 {
137     class CudaArch
138     {
139     public:
140         CudaArch();
141 
142         bool builtWith(FeatureSet feature_set) const;
143         bool hasPtx(int major, int minor) const;
144         bool hasBin(int major, int minor) const;
145         bool hasEqualOrLessPtx(int major, int minor) const;
146         bool hasEqualOrGreaterPtx(int major, int minor) const;
147         bool hasEqualOrGreaterBin(int major, int minor) const;
148 
149     private:
150         static void fromStr(const char* set_as_str, std::vector<int>& arr);
151 
152         std::vector<int> bin;
153         std::vector<int> ptx;
154         std::vector<int> features;
155     };
156 
157     const CudaArch cudaArch;
158 
CudaArch()159     CudaArch::CudaArch()
160     {
161         fromStr(CUDA_ARCH_BIN, bin);
162         fromStr(CUDA_ARCH_PTX, ptx);
163         fromStr(CUDA_ARCH_FEATURES, features);
164     }
165 
builtWith(FeatureSet feature_set) const166     bool CudaArch::builtWith(FeatureSet feature_set) const
167     {
168         return !features.empty() && (features.back() >= feature_set);
169     }
170 
hasPtx(int major,int minor) const171     bool CudaArch::hasPtx(int major, int minor) const
172     {
173         return std::find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end();
174     }
175 
hasBin(int major,int minor) const176     bool CudaArch::hasBin(int major, int minor) const
177     {
178         return std::find(bin.begin(), bin.end(), major * 10 + minor) != bin.end();
179     }
180 
hasEqualOrLessPtx(int major,int minor) const181     bool CudaArch::hasEqualOrLessPtx(int major, int minor) const
182     {
183         return !ptx.empty() && (ptx.front() <= major * 10 + minor);
184     }
185 
hasEqualOrGreaterPtx(int major,int minor) const186     bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const
187     {
188         return !ptx.empty() && (ptx.back() >= major * 10 + minor);
189     }
190 
hasEqualOrGreaterBin(int major,int minor) const191     bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const
192     {
193         return !bin.empty() && (bin.back() >= major * 10 + minor);
194     }
195 
fromStr(const char * set_as_str,std::vector<int> & arr)196     void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
197     {
198         arr.clear();
199 
200         const size_t len = strlen(set_as_str);
201 
202         size_t pos = 0;
203         while (pos < len)
204         {
205             if (isspace(set_as_str[pos]))
206             {
207                 ++pos;
208             }
209             else
210             {
211                 int cur_value;
212                 int chars_read;
213                 int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
214                 CV_Assert( args_read == 1 );
215 
216                 arr.push_back(cur_value);
217                 pos += chars_read;
218             }
219         }
220 
221         std::sort(arr.begin(), arr.end());
222     }
223 }
224 
225 #endif
226 
builtWith(cv::cuda::FeatureSet feature_set)227 bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set)
228 {
229 #ifndef HAVE_CUDA
230     CV_UNUSED(feature_set);
231     throw_no_cuda();
232 #else
233     return cudaArch.builtWith(feature_set);
234 #endif
235 }
236 
hasPtx(int major,int minor)237 bool cv::cuda::TargetArchs::hasPtx(int major, int minor)
238 {
239 #ifndef HAVE_CUDA
240     CV_UNUSED(major);
241     CV_UNUSED(minor);
242     throw_no_cuda();
243 #else
244     return cudaArch.hasPtx(major, minor);
245 #endif
246 }
247 
hasBin(int major,int minor)248 bool cv::cuda::TargetArchs::hasBin(int major, int minor)
249 {
250 #ifndef HAVE_CUDA
251     CV_UNUSED(major);
252     CV_UNUSED(minor);
253     throw_no_cuda();
254 #else
255     return cudaArch.hasBin(major, minor);
256 #endif
257 }
258 
hasEqualOrLessPtx(int major,int minor)259 bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor)
260 {
261 #ifndef HAVE_CUDA
262     CV_UNUSED(major);
263     CV_UNUSED(minor);
264     throw_no_cuda();
265 #else
266     return cudaArch.hasEqualOrLessPtx(major, minor);
267 #endif
268 }
269 
hasEqualOrGreaterPtx(int major,int minor)270 bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
271 {
272 #ifndef HAVE_CUDA
273     CV_UNUSED(major);
274     CV_UNUSED(minor);
275     throw_no_cuda();
276 #else
277     return cudaArch.hasEqualOrGreaterPtx(major, minor);
278 #endif
279 }
280 
hasEqualOrGreaterBin(int major,int minor)281 bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
282 {
283 #ifndef HAVE_CUDA
284     CV_UNUSED(major);
285     CV_UNUSED(minor);
286     throw_no_cuda();
287 #else
288     return cudaArch.hasEqualOrGreaterBin(major, minor);
289 #endif
290 }
291 
292 ////////////////////////////////////////////////////////////////////////
293 // DeviceInfo
294 
295 #ifdef HAVE_CUDA
296 
297 namespace
298 {
299     class DeviceProps
300     {
301     public:
302         DeviceProps();
303 
304         const cudaDeviceProp* get(int devID) const;
305 
306     private:
307         std::vector<cudaDeviceProp> props_;
308     };
309 
DeviceProps()310     DeviceProps::DeviceProps()
311     {
312         int count = getCudaEnabledDeviceCount();
313 
314         if (count > 0)
315         {
316             props_.resize(count);
317 
318             for (int devID = 0; devID < count; ++devID)
319             {
320                 cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
321             }
322         }
323     }
324 
get(int devID) const325     const cudaDeviceProp* DeviceProps::get(int devID) const
326     {
327         CV_Assert( static_cast<size_t>(devID) < props_.size() );
328 
329         return &props_[devID];
330     }
331 
deviceProps()332     DeviceProps& deviceProps()
333     {
334         static DeviceProps props;
335         return props;
336     }
337 }
338 
339 #endif
340 
name() const341 const char* cv::cuda::DeviceInfo::name() const
342 {
343 #ifndef HAVE_CUDA
344     throw_no_cuda();
345 #else
346     return deviceProps().get(device_id_)->name;
347 #endif
348 }
349 
totalGlobalMem() const350 size_t cv::cuda::DeviceInfo::totalGlobalMem() const
351 {
352 #ifndef HAVE_CUDA
353     throw_no_cuda();
354 #else
355     return deviceProps().get(device_id_)->totalGlobalMem;
356 #endif
357 }
358 
sharedMemPerBlock() const359 size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const
360 {
361 #ifndef HAVE_CUDA
362     throw_no_cuda();
363 #else
364     return deviceProps().get(device_id_)->sharedMemPerBlock;
365 #endif
366 }
367 
regsPerBlock() const368 int cv::cuda::DeviceInfo::regsPerBlock() const
369 {
370 #ifndef HAVE_CUDA
371     throw_no_cuda();
372 #else
373     return deviceProps().get(device_id_)->regsPerBlock;
374 #endif
375 }
376 
warpSize() const377 int cv::cuda::DeviceInfo::warpSize() const
378 {
379 #ifndef HAVE_CUDA
380     throw_no_cuda();
381 #else
382     return deviceProps().get(device_id_)->warpSize;
383 #endif
384 }
385 
memPitch() const386 size_t cv::cuda::DeviceInfo::memPitch() const
387 {
388 #ifndef HAVE_CUDA
389     throw_no_cuda();
390 #else
391     return deviceProps().get(device_id_)->memPitch;
392 #endif
393 }
394 
maxThreadsPerBlock() const395 int cv::cuda::DeviceInfo::maxThreadsPerBlock() const
396 {
397 #ifndef HAVE_CUDA
398     throw_no_cuda();
399 #else
400     return deviceProps().get(device_id_)->maxThreadsPerBlock;
401 #endif
402 }
403 
maxThreadsDim() const404 Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const
405 {
406 #ifndef HAVE_CUDA
407     throw_no_cuda();
408 #else
409     return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
410 #endif
411 }
412 
maxGridSize() const413 Vec3i cv::cuda::DeviceInfo::maxGridSize() const
414 {
415 #ifndef HAVE_CUDA
416     throw_no_cuda();
417 #else
418     return Vec3i(deviceProps().get(device_id_)->maxGridSize);
419 #endif
420 }
421 
clockRate() const422 int cv::cuda::DeviceInfo::clockRate() const
423 {
424 #ifndef HAVE_CUDA
425     throw_no_cuda();
426 #else
427     return deviceProps().get(device_id_)->clockRate;
428 #endif
429 }
430 
totalConstMem() const431 size_t cv::cuda::DeviceInfo::totalConstMem() const
432 {
433 #ifndef HAVE_CUDA
434     throw_no_cuda();
435 #else
436     return deviceProps().get(device_id_)->totalConstMem;
437 #endif
438 }
439 
majorVersion() const440 int cv::cuda::DeviceInfo::majorVersion() const
441 {
442 #ifndef HAVE_CUDA
443     throw_no_cuda();
444 #else
445     return deviceProps().get(device_id_)->major;
446 #endif
447 }
448 
minorVersion() const449 int cv::cuda::DeviceInfo::minorVersion() const
450 {
451 #ifndef HAVE_CUDA
452     throw_no_cuda();
453 #else
454     return deviceProps().get(device_id_)->minor;
455 #endif
456 }
457 
textureAlignment() const458 size_t cv::cuda::DeviceInfo::textureAlignment() const
459 {
460 #ifndef HAVE_CUDA
461     throw_no_cuda();
462 #else
463     return deviceProps().get(device_id_)->textureAlignment;
464 #endif
465 }
466 
texturePitchAlignment() const467 size_t cv::cuda::DeviceInfo::texturePitchAlignment() const
468 {
469 #ifndef HAVE_CUDA
470     throw_no_cuda();
471 #else
472     return deviceProps().get(device_id_)->texturePitchAlignment;
473 #endif
474 }
475 
multiProcessorCount() const476 int cv::cuda::DeviceInfo::multiProcessorCount() const
477 {
478 #ifndef HAVE_CUDA
479     throw_no_cuda();
480 #else
481     return deviceProps().get(device_id_)->multiProcessorCount;
482 #endif
483 }
484 
kernelExecTimeoutEnabled() const485 bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
486 {
487 #ifndef HAVE_CUDA
488     throw_no_cuda();
489 #else
490     return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
491 #endif
492 }
493 
integrated() const494 bool cv::cuda::DeviceInfo::integrated() const
495 {
496 #ifndef HAVE_CUDA
497     throw_no_cuda();
498 #else
499     return deviceProps().get(device_id_)->integrated != 0;
500 #endif
501 }
502 
canMapHostMemory() const503 bool cv::cuda::DeviceInfo::canMapHostMemory() const
504 {
505 #ifndef HAVE_CUDA
506     throw_no_cuda();
507 #else
508     return deviceProps().get(device_id_)->canMapHostMemory != 0;
509 #endif
510 }
511 
computeMode() const512 DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
513 {
514 #ifndef HAVE_CUDA
515     throw_no_cuda();
516 #else
517     static const ComputeMode tbl[] =
518     {
519         ComputeModeDefault,
520         ComputeModeExclusive,
521         ComputeModeProhibited,
522         ComputeModeExclusiveProcess
523     };
524 
525     return tbl[deviceProps().get(device_id_)->computeMode];
526 #endif
527 }
528 
maxTexture1D() const529 int cv::cuda::DeviceInfo::maxTexture1D() const
530 {
531 #ifndef HAVE_CUDA
532     throw_no_cuda();
533 #else
534     return deviceProps().get(device_id_)->maxTexture1D;
535 #endif
536 }
537 
maxTexture1DMipmap() const538 int cv::cuda::DeviceInfo::maxTexture1DMipmap() const
539 {
540 #ifndef HAVE_CUDA
541     throw_no_cuda();
542 #else
543     #if CUDA_VERSION >= 5000
544         return deviceProps().get(device_id_)->maxTexture1DMipmap;
545     #else
546         CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
547         return 0;
548     #endif
549 #endif
550 }
551 
maxTexture1DLinear() const552 int cv::cuda::DeviceInfo::maxTexture1DLinear() const
553 {
554 #ifndef HAVE_CUDA
555     throw_no_cuda();
556 #else
557     return deviceProps().get(device_id_)->maxTexture1DLinear;
558 #endif
559 }
560 
maxTexture2D() const561 Vec2i cv::cuda::DeviceInfo::maxTexture2D() const
562 {
563 #ifndef HAVE_CUDA
564     throw_no_cuda();
565 #else
566     return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
567 #endif
568 }
569 
maxTexture2DMipmap() const570 Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const
571 {
572 #ifndef HAVE_CUDA
573     throw_no_cuda();
574 #else
575     #if CUDA_VERSION >= 5000
576         return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
577     #else
578         CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
579         return Vec2i();
580     #endif
581 #endif
582 }
583 
maxTexture2DLinear() const584 Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const
585 {
586 #ifndef HAVE_CUDA
587     throw_no_cuda();
588 #else
589     return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
590 #endif
591 }
592 
maxTexture2DGather() const593 Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const
594 {
595 #ifndef HAVE_CUDA
596     throw_no_cuda();
597 #else
598     return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
599 #endif
600 }
601 
maxTexture3D() const602 Vec3i cv::cuda::DeviceInfo::maxTexture3D() const
603 {
604 #ifndef HAVE_CUDA
605     throw_no_cuda();
606 #else
607     return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
608 #endif
609 }
610 
maxTextureCubemap() const611 int cv::cuda::DeviceInfo::maxTextureCubemap() const
612 {
613 #ifndef HAVE_CUDA
614     throw_no_cuda();
615 #else
616     return deviceProps().get(device_id_)->maxTextureCubemap;
617 #endif
618 }
619 
maxTexture1DLayered() const620 Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const
621 {
622 #ifndef HAVE_CUDA
623     throw_no_cuda();
624 #else
625     return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
626 #endif
627 }
628 
maxTexture2DLayered() const629 Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const
630 {
631 #ifndef HAVE_CUDA
632     throw_no_cuda();
633 #else
634     return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
635 #endif
636 }
637 
maxTextureCubemapLayered() const638 Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const
639 {
640 #ifndef HAVE_CUDA
641     throw_no_cuda();
642 #else
643     return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
644 #endif
645 }
646 
maxSurface1D() const647 int cv::cuda::DeviceInfo::maxSurface1D() const
648 {
649 #ifndef HAVE_CUDA
650     throw_no_cuda();
651 #else
652     return deviceProps().get(device_id_)->maxSurface1D;
653 #endif
654 }
655 
maxSurface2D() const656 Vec2i cv::cuda::DeviceInfo::maxSurface2D() const
657 {
658 #ifndef HAVE_CUDA
659     throw_no_cuda();
660 #else
661     return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
662 #endif
663 }
664 
maxSurface3D() const665 Vec3i cv::cuda::DeviceInfo::maxSurface3D() const
666 {
667 #ifndef HAVE_CUDA
668     throw_no_cuda();
669 #else
670     return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
671 #endif
672 }
673 
maxSurface1DLayered() const674 Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const
675 {
676 #ifndef HAVE_CUDA
677     throw_no_cuda();
678 #else
679     return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
680 #endif
681 }
682 
maxSurface2DLayered() const683 Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const
684 {
685 #ifndef HAVE_CUDA
686     throw_no_cuda();
687 #else
688     return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
689 #endif
690 }
691 
maxSurfaceCubemap() const692 int cv::cuda::DeviceInfo::maxSurfaceCubemap() const
693 {
694 #ifndef HAVE_CUDA
695     throw_no_cuda();
696 #else
697     return deviceProps().get(device_id_)->maxSurfaceCubemap;
698 #endif
699 }
700 
maxSurfaceCubemapLayered() const701 Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const
702 {
703 #ifndef HAVE_CUDA
704     throw_no_cuda();
705 #else
706     return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
707 #endif
708 }
709 
surfaceAlignment() const710 size_t cv::cuda::DeviceInfo::surfaceAlignment() const
711 {
712 #ifndef HAVE_CUDA
713     throw_no_cuda();
714 #else
715     return deviceProps().get(device_id_)->surfaceAlignment;
716 #endif
717 }
718 
concurrentKernels() const719 bool cv::cuda::DeviceInfo::concurrentKernels() const
720 {
721 #ifndef HAVE_CUDA
722     throw_no_cuda();
723 #else
724     return deviceProps().get(device_id_)->concurrentKernels != 0;
725 #endif
726 }
727 
ECCEnabled() const728 bool cv::cuda::DeviceInfo::ECCEnabled() const
729 {
730 #ifndef HAVE_CUDA
731     throw_no_cuda();
732 #else
733     return deviceProps().get(device_id_)->ECCEnabled != 0;
734 #endif
735 }
736 
pciBusID() const737 int cv::cuda::DeviceInfo::pciBusID() const
738 {
739 #ifndef HAVE_CUDA
740     throw_no_cuda();
741 #else
742     return deviceProps().get(device_id_)->pciBusID;
743 #endif
744 }
745 
pciDeviceID() const746 int cv::cuda::DeviceInfo::pciDeviceID() const
747 {
748 #ifndef HAVE_CUDA
749     throw_no_cuda();
750 #else
751     return deviceProps().get(device_id_)->pciDeviceID;
752 #endif
753 }
754 
pciDomainID() const755 int cv::cuda::DeviceInfo::pciDomainID() const
756 {
757 #ifndef HAVE_CUDA
758     throw_no_cuda();
759 #else
760     return deviceProps().get(device_id_)->pciDomainID;
761 #endif
762 }
763 
tccDriver() const764 bool cv::cuda::DeviceInfo::tccDriver() const
765 {
766 #ifndef HAVE_CUDA
767     throw_no_cuda();
768 #else
769     return deviceProps().get(device_id_)->tccDriver != 0;
770 #endif
771 }
772 
asyncEngineCount() const773 int cv::cuda::DeviceInfo::asyncEngineCount() const
774 {
775 #ifndef HAVE_CUDA
776     throw_no_cuda();
777 #else
778     return deviceProps().get(device_id_)->asyncEngineCount;
779 #endif
780 }
781 
unifiedAddressing() const782 bool cv::cuda::DeviceInfo::unifiedAddressing() const
783 {
784 #ifndef HAVE_CUDA
785     throw_no_cuda();
786 #else
787     return deviceProps().get(device_id_)->unifiedAddressing != 0;
788 #endif
789 }
790 
memoryClockRate() const791 int cv::cuda::DeviceInfo::memoryClockRate() const
792 {
793 #ifndef HAVE_CUDA
794     throw_no_cuda();
795 #else
796     return deviceProps().get(device_id_)->memoryClockRate;
797 #endif
798 }
799 
memoryBusWidth() const800 int cv::cuda::DeviceInfo::memoryBusWidth() const
801 {
802 #ifndef HAVE_CUDA
803     throw_no_cuda();
804 #else
805     return deviceProps().get(device_id_)->memoryBusWidth;
806 #endif
807 }
808 
l2CacheSize() const809 int cv::cuda::DeviceInfo::l2CacheSize() const
810 {
811 #ifndef HAVE_CUDA
812     throw_no_cuda();
813 #else
814     return deviceProps().get(device_id_)->l2CacheSize;
815 #endif
816 }
817 
maxThreadsPerMultiProcessor() const818 int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const
819 {
820 #ifndef HAVE_CUDA
821     throw_no_cuda();
822 #else
823     return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
824 #endif
825 }
826 
queryMemory(size_t & _totalMemory,size_t & _freeMemory) const827 void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
828 {
829 #ifndef HAVE_CUDA
830     CV_UNUSED(_totalMemory);
831     CV_UNUSED(_freeMemory);
832     throw_no_cuda();
833 #else
834     int prevDeviceID = getDevice();
835     if (prevDeviceID != device_id_)
836         setDevice(device_id_);
837 
838     cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
839 
840     if (prevDeviceID != device_id_)
841         setDevice(prevDeviceID);
842 #endif
843 }
844 
isCompatible() const845 bool cv::cuda::DeviceInfo::isCompatible() const
846 {
847 #ifndef HAVE_CUDA
848     throw_no_cuda();
849 #else
850     // Check PTX compatibility
851     if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
852         return true;
853 
854     // Check BIN compatibility
855     for (int i = minorVersion(); i >= 0; --i)
856         if (TargetArchs::hasBin(majorVersion(), i))
857             return true;
858 
859     return false;
860 #endif
861 }
862 
863 ////////////////////////////////////////////////////////////////////////
864 // print info
865 
866 #ifdef HAVE_CUDA
867 
868 namespace
869 {
convertSMVer2Cores(int major,int minor)870     int convertSMVer2Cores(int major, int minor)
871     {
872         // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
873         typedef struct {
874             int SM; // 0xMm (hexadecimal notation), M = SM Major version, and m = SM minor version
875             int Cores;
876         } SMtoCores;
877 
878         SMtoCores gpuArchCoresPerSM[] =  { { 0x10,  8 }, { 0x11,  8 }, { 0x12,  8 }, { 0x13,  8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 }  };
879 
880         int index = 0;
881         while (gpuArchCoresPerSM[index].SM != -1)
882         {
883             if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
884                 return gpuArchCoresPerSM[index].Cores;
885             index++;
886         }
887 
888         return -1;
889     }
890 }
891 
892 #endif
893 
printCudaDeviceInfo(int device)894 void cv::cuda::printCudaDeviceInfo(int device)
895 {
896 #ifndef HAVE_CUDA
897     CV_UNUSED(device);
898     throw_no_cuda();
899 #else
900     int count = getCudaEnabledDeviceCount();
901     bool valid = (device >= 0) && (device < count);
902 
903     int beg = valid ? device   : 0;
904     int end = valid ? device+1 : count;
905 
906     printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
907     printf("Device count: %d\n", count);
908 
909     int driverVersion = 0, runtimeVersion = 0;
910     cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
911     cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
912 
913     const char *computeMode[] = {
914         "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
915         "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
916         "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
917         "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
918         "Unknown",
919         NULL
920     };
921 
922     for(int dev = beg; dev < end; ++dev)
923     {
924         cudaDeviceProp prop;
925         cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
926 
927         printf("\nDevice %d: \"%s\"\n", dev, prop.name);
928         printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
929         printf("  CUDA Capability Major/Minor version number:    %d.%d\n", prop.major, prop.minor);
930         printf("  Total amount of global memory:                 %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem);
931 
932         int cores = convertSMVer2Cores(prop.major, prop.minor);
933         if (cores > 0)
934             printf("  (%2d) Multiprocessors x (%2d) CUDA Cores/MP:     %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
935 
936         printf("  GPU Clock Speed:                               %.2f GHz\n", prop.clockRate * 1e-6f);
937 
938         printf("  Max Texture Dimension Size (x,y,z)             1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
939             prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
940             prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
941         printf("  Max Layered Texture Size (dim) x layers        1D=(%d) x %d, 2D=(%d,%d) x %d\n",
942             prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
943             prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
944 
945         printf("  Total amount of constant memory:               %u bytes\n", (int)prop.totalConstMem);
946         printf("  Total amount of shared memory per block:       %u bytes\n", (int)prop.sharedMemPerBlock);
947         printf("  Total number of registers available per block: %d\n", prop.regsPerBlock);
948         printf("  Warp size:                                     %d\n", prop.warpSize);
949         printf("  Maximum number of threads per block:           %d\n", prop.maxThreadsPerBlock);
950         printf("  Maximum sizes of each dimension of a block:    %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
951         printf("  Maximum sizes of each dimension of a grid:     %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1],  prop.maxGridSize[2]);
952         printf("  Maximum memory pitch:                          %u bytes\n", (int)prop.memPitch);
953         printf("  Texture alignment:                             %u bytes\n", (int)prop.textureAlignment);
954 
955         printf("  Concurrent copy and execution:                 %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
956         printf("  Run time limit on kernels:                     %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
957         printf("  Integrated GPU sharing Host Memory:            %s\n", prop.integrated ? "Yes" : "No");
958         printf("  Support host page-locked memory mapping:       %s\n", prop.canMapHostMemory ? "Yes" : "No");
959 
960         printf("  Concurrent kernel execution:                   %s\n", prop.concurrentKernels ? "Yes" : "No");
961         printf("  Alignment requirement for Surfaces:            %s\n", prop.surfaceAlignment ? "Yes" : "No");
962         printf("  Device has ECC support enabled:                %s\n", prop.ECCEnabled ? "Yes" : "No");
963         printf("  Device is using TCC driver mode:               %s\n", prop.tccDriver ? "Yes" : "No");
964         printf("  Device supports Unified Addressing (UVA):      %s\n", prop.unifiedAddressing ? "Yes" : "No");
965         printf("  Device PCI Bus ID / PCI location ID:           %d / %d\n", prop.pciBusID, prop.pciDeviceID );
966         printf("  Compute Mode:\n");
967         printf("      %s \n", computeMode[prop.computeMode]);
968     }
969 
970     printf("\n");
971     printf("deviceQuery, CUDA Driver = CUDART");
972     printf(", CUDA Driver Version  = %d.%d", driverVersion / 1000, driverVersion % 100);
973     printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
974     printf(", NumDevs = %d\n\n", count);
975 
976     fflush(stdout);
977 #endif
978 }
979 
printShortCudaDeviceInfo(int device)980 void cv::cuda::printShortCudaDeviceInfo(int device)
981 {
982 #ifndef HAVE_CUDA
983     CV_UNUSED(device);
984     throw_no_cuda();
985 #else
986     int count = getCudaEnabledDeviceCount();
987     bool valid = (device >= 0) && (device < count);
988 
989     int beg = valid ? device   : 0;
990     int end = valid ? device+1 : count;
991 
992     int driverVersion = 0, runtimeVersion = 0;
993     cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
994     cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
995 
996     for(int dev = beg; dev < end; ++dev)
997     {
998         cudaDeviceProp prop;
999         cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
1000 
1001         const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
1002         printf("Device %d:  \"%s\"  %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
1003         printf(", sm_%d%d%s", prop.major, prop.minor, arch_str);
1004 
1005         int cores = convertSMVer2Cores(prop.major, prop.minor);
1006         if (cores > 0)
1007             printf(", %d cores", cores * prop.multiProcessorCount);
1008 
1009         printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
1010     }
1011 
1012     fflush(stdout);
1013 #endif
1014 }
1015 
1016 ////////////////////////////////////////////////////////////////////////
1017 // Error handling
1018 
1019 #ifdef HAVE_CUDA
1020 
1021 namespace
1022 {
1023     #define error_entry(entry)  { entry, #entry }
1024 
1025     struct ErrorEntry
1026     {
1027         int code;
1028         const char* str;
1029     };
1030 
1031     struct ErrorEntryComparer
1032     {
1033         int code;
ErrorEntryComparer__anon4542c0f30511::ErrorEntryComparer1034         ErrorEntryComparer(int code_) : code(code_) {}
operator ()__anon4542c0f30511::ErrorEntryComparer1035         bool operator()(const ErrorEntry& e) const { return e.code == code; }
1036     };
1037 
1038     const ErrorEntry npp_errors [] =
1039     {
1040     #if defined (_MSC_VER)
1041         error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
1042     #endif
1043 
1044     #if NPP_VERSION < 5500
1045         error_entry( NPP_BAD_ARG_ERROR ),
1046         error_entry( NPP_COEFF_ERROR ),
1047         error_entry( NPP_RECT_ERROR ),
1048         error_entry( NPP_QUAD_ERROR ),
1049         error_entry( NPP_MEMFREE_ERR ),
1050         error_entry( NPP_MEMSET_ERR ),
1051         error_entry( NPP_MEM_ALLOC_ERR ),
1052         error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
1053         error_entry( NPP_MIRROR_FLIP_ERR ),
1054         error_entry( NPP_INVALID_INPUT ),
1055         error_entry( NPP_POINTER_ERROR ),
1056         error_entry( NPP_WARNING ),
1057         error_entry( NPP_ODD_ROI_WARNING ),
1058     #else
1059         error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
1060         error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
1061         error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
1062         error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
1063         error_entry( NPP_MEMFREE_ERROR ),
1064         error_entry( NPP_MEMSET_ERROR ),
1065         error_entry( NPP_QUALITY_INDEX_ERROR ),
1066         error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
1067         error_entry( NPP_CHANNEL_ORDER_ERROR ),
1068         error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
1069         error_entry( NPP_QUADRANGLE_ERROR ),
1070         error_entry( NPP_RECTANGLE_ERROR ),
1071         error_entry( NPP_COEFFICIENT_ERROR ),
1072         error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
1073         error_entry( NPP_COI_ERROR ),
1074         error_entry( NPP_DIVISOR_ERROR ),
1075         error_entry( NPP_CHANNEL_ERROR ),
1076         error_entry( NPP_STRIDE_ERROR ),
1077         error_entry( NPP_ANCHOR_ERROR ),
1078         error_entry( NPP_MASK_SIZE_ERROR ),
1079         error_entry( NPP_MIRROR_FLIP_ERROR ),
1080         error_entry( NPP_MOMENT_00_ZERO_ERROR ),
1081         error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
1082         error_entry( NPP_THRESHOLD_ERROR ),
1083         error_entry( NPP_CONTEXT_MATCH_ERROR ),
1084         error_entry( NPP_FFT_FLAG_ERROR ),
1085         error_entry( NPP_FFT_ORDER_ERROR ),
1086         error_entry( NPP_SCALE_RANGE_ERROR ),
1087         error_entry( NPP_DATA_TYPE_ERROR ),
1088         error_entry( NPP_OUT_OFF_RANGE_ERROR ),
1089         error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
1090         error_entry( NPP_MEMORY_ALLOCATION_ERR ),
1091         error_entry( NPP_RANGE_ERROR ),
1092         error_entry( NPP_BAD_ARGUMENT_ERROR ),
1093         error_entry( NPP_NO_MEMORY_ERROR ),
1094         error_entry( NPP_ERROR_RESERVED ),
1095         error_entry( NPP_NO_OPERATION_WARNING ),
1096         error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
1097         error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
1098     #endif
1099 
1100         error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
1101         error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
1102         error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
1103         error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
1104         error_entry( NPP_TEXTURE_BIND_ERROR ),
1105         error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
1106         error_entry( NPP_NOT_EVEN_STEP_ERROR ),
1107         error_entry( NPP_INTERPOLATION_ERROR ),
1108         error_entry( NPP_RESIZE_FACTOR_ERROR ),
1109         error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
1110         error_entry( NPP_MEMCPY_ERROR ),
1111         error_entry( NPP_ALIGNMENT_ERROR ),
1112         error_entry( NPP_STEP_ERROR ),
1113         error_entry( NPP_SIZE_ERROR ),
1114         error_entry( NPP_NULL_POINTER_ERROR ),
1115         error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
1116         error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
1117         error_entry( NPP_ERROR ),
1118         error_entry( NPP_NO_ERROR ),
1119         error_entry( NPP_SUCCESS ),
1120         error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
1121         error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
1122         error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
1123         error_entry( NPP_DOUBLE_SIZE_WARNING )
1124     };
1125 
1126     const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
1127 
1128     const ErrorEntry cu_errors [] =
1129     {
1130         error_entry( CUDA_SUCCESS                              ),
1131         error_entry( CUDA_ERROR_INVALID_VALUE                  ),
1132         error_entry( CUDA_ERROR_OUT_OF_MEMORY                  ),
1133         error_entry( CUDA_ERROR_NOT_INITIALIZED                ),
1134         error_entry( CUDA_ERROR_DEINITIALIZED                  ),
1135         error_entry( CUDA_ERROR_PROFILER_DISABLED              ),
1136         error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED       ),
1137         error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED       ),
1138         error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED       ),
1139         error_entry( CUDA_ERROR_NO_DEVICE                      ),
1140         error_entry( CUDA_ERROR_INVALID_DEVICE                 ),
1141         error_entry( CUDA_ERROR_INVALID_IMAGE                  ),
1142         error_entry( CUDA_ERROR_INVALID_CONTEXT                ),
1143         error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT        ),
1144         error_entry( CUDA_ERROR_MAP_FAILED                     ),
1145         error_entry( CUDA_ERROR_UNMAP_FAILED                   ),
1146         error_entry( CUDA_ERROR_ARRAY_IS_MAPPED                ),
1147         error_entry( CUDA_ERROR_ALREADY_MAPPED                 ),
1148         error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU              ),
1149         error_entry( CUDA_ERROR_ALREADY_ACQUIRED               ),
1150         error_entry( CUDA_ERROR_NOT_MAPPED                     ),
1151         error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY            ),
1152         error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER          ),
1153         error_entry( CUDA_ERROR_ECC_UNCORRECTABLE              ),
1154         error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT              ),
1155         error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE         ),
1156         error_entry( CUDA_ERROR_INVALID_SOURCE                 ),
1157         error_entry( CUDA_ERROR_FILE_NOT_FOUND                 ),
1158         error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
1159         error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      ),
1160         error_entry( CUDA_ERROR_OPERATING_SYSTEM               ),
1161         error_entry( CUDA_ERROR_INVALID_HANDLE                 ),
1162         error_entry( CUDA_ERROR_NOT_FOUND                      ),
1163         error_entry( CUDA_ERROR_NOT_READY                      ),
1164         error_entry( CUDA_ERROR_LAUNCH_FAILED                  ),
1165         error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        ),
1166         error_entry( CUDA_ERROR_LAUNCH_TIMEOUT                 ),
1167         error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  ),
1168         error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    ),
1169         error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        ),
1170         error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         ),
1171         error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED           ),
1172         error_entry( CUDA_ERROR_ASSERT                         ),
1173         error_entry( CUDA_ERROR_TOO_MANY_PEERS                 ),
1174         error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
1175         error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED     ),
1176         error_entry( CUDA_ERROR_UNKNOWN                        )
1177     };
1178 
1179     const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
1180 
getErrorString(int code,const ErrorEntry * errors,size_t n)1181     cv::String getErrorString(int code, const ErrorEntry* errors, size_t n)
1182     {
1183         size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
1184 
1185         const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
1186         cv::String str = cv::format("%s [Code = %d]", msg, code);
1187 
1188         return str;
1189     }
1190 }
1191 
1192 #endif
1193 
getNppErrorMessage(int code)1194 String cv::cuda::getNppErrorMessage(int code)
1195 {
1196 #ifndef HAVE_CUDA
1197     CV_UNUSED(code);
1198     return String();
1199 #else
1200     return getErrorString(code, npp_errors, npp_error_num);
1201 #endif
1202 }
1203 
getCudaDriverApiErrorMessage(int code)1204 String cv::cuda::getCudaDriverApiErrorMessage(int code)
1205 {
1206 #ifndef HAVE_CUDA
1207     CV_UNUSED(code);
1208     return String();
1209 #else
1210     return getErrorString(code, cu_errors, cu_errors_num);
1211 #endif
1212 }
1213