1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44
45 using namespace cv;
46 using namespace cv::cuda;
47
getCudaEnabledDeviceCount()48 int cv::cuda::getCudaEnabledDeviceCount()
49 {
50 #ifndef HAVE_CUDA
51 return 0;
52 #else
53 int count;
54 cudaError_t error = cudaGetDeviceCount(&count);
55
56 if (error == cudaErrorInsufficientDriver)
57 return -1;
58
59 if (error == cudaErrorNoDevice)
60 return 0;
61
62 cudaSafeCall( error );
63 return count;
64 #endif
65 }
66
setDevice(int device)67 void cv::cuda::setDevice(int device)
68 {
69 #ifndef HAVE_CUDA
70 CV_UNUSED(device);
71 throw_no_cuda();
72 #else
73 cudaSafeCall( cudaSetDevice(device) );
74 cudaSafeCall( cudaFree(0) );
75 #endif
76 }
77
getDevice()78 int cv::cuda::getDevice()
79 {
80 #ifndef HAVE_CUDA
81 throw_no_cuda();
82 #else
83 int device;
84 cudaSafeCall( cudaGetDevice(&device) );
85 return device;
86 #endif
87 }
88
resetDevice()89 void cv::cuda::resetDevice()
90 {
91 #ifndef HAVE_CUDA
92 throw_no_cuda();
93 #else
94 cudaSafeCall( cudaDeviceReset() );
95 #endif
96 }
97
deviceSupports(FeatureSet feature_set)98 bool cv::cuda::deviceSupports(FeatureSet feature_set)
99 {
100 #ifndef HAVE_CUDA
101 CV_UNUSED(feature_set);
102 throw_no_cuda();
103 #else
104 static int versions[] =
105 {
106 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
107 };
108 static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
109
110 const int devId = getDevice();
111
112 int version;
113
114 if (devId < cache_size && versions[devId] >= 0)
115 {
116 version = versions[devId];
117 }
118 else
119 {
120 DeviceInfo dev(devId);
121 version = dev.majorVersion() * 10 + dev.minorVersion();
122 if (devId < cache_size)
123 versions[devId] = version;
124 }
125
126 return TargetArchs::builtWith(feature_set) && (version >= feature_set);
127 #endif
128 }
129
130 ////////////////////////////////////////////////////////////////////////
131 // TargetArchs
132
133 #ifdef HAVE_CUDA
134
135 namespace
136 {
137 class CudaArch
138 {
139 public:
140 CudaArch();
141
142 bool builtWith(FeatureSet feature_set) const;
143 bool hasPtx(int major, int minor) const;
144 bool hasBin(int major, int minor) const;
145 bool hasEqualOrLessPtx(int major, int minor) const;
146 bool hasEqualOrGreaterPtx(int major, int minor) const;
147 bool hasEqualOrGreaterBin(int major, int minor) const;
148
149 private:
150 static void fromStr(const char* set_as_str, std::vector<int>& arr);
151
152 std::vector<int> bin;
153 std::vector<int> ptx;
154 std::vector<int> features;
155 };
156
157 const CudaArch cudaArch;
158
CudaArch()159 CudaArch::CudaArch()
160 {
161 fromStr(CUDA_ARCH_BIN, bin);
162 fromStr(CUDA_ARCH_PTX, ptx);
163 fromStr(CUDA_ARCH_FEATURES, features);
164 }
165
builtWith(FeatureSet feature_set) const166 bool CudaArch::builtWith(FeatureSet feature_set) const
167 {
168 return !features.empty() && (features.back() >= feature_set);
169 }
170
hasPtx(int major,int minor) const171 bool CudaArch::hasPtx(int major, int minor) const
172 {
173 return std::find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end();
174 }
175
hasBin(int major,int minor) const176 bool CudaArch::hasBin(int major, int minor) const
177 {
178 return std::find(bin.begin(), bin.end(), major * 10 + minor) != bin.end();
179 }
180
hasEqualOrLessPtx(int major,int minor) const181 bool CudaArch::hasEqualOrLessPtx(int major, int minor) const
182 {
183 return !ptx.empty() && (ptx.front() <= major * 10 + minor);
184 }
185
hasEqualOrGreaterPtx(int major,int minor) const186 bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const
187 {
188 return !ptx.empty() && (ptx.back() >= major * 10 + minor);
189 }
190
hasEqualOrGreaterBin(int major,int minor) const191 bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const
192 {
193 return !bin.empty() && (bin.back() >= major * 10 + minor);
194 }
195
fromStr(const char * set_as_str,std::vector<int> & arr)196 void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
197 {
198 arr.clear();
199
200 const size_t len = strlen(set_as_str);
201
202 size_t pos = 0;
203 while (pos < len)
204 {
205 if (isspace(set_as_str[pos]))
206 {
207 ++pos;
208 }
209 else
210 {
211 int cur_value;
212 int chars_read;
213 int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
214 CV_Assert( args_read == 1 );
215
216 arr.push_back(cur_value);
217 pos += chars_read;
218 }
219 }
220
221 std::sort(arr.begin(), arr.end());
222 }
223 }
224
225 #endif
226
builtWith(cv::cuda::FeatureSet feature_set)227 bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set)
228 {
229 #ifndef HAVE_CUDA
230 CV_UNUSED(feature_set);
231 throw_no_cuda();
232 #else
233 return cudaArch.builtWith(feature_set);
234 #endif
235 }
236
hasPtx(int major,int minor)237 bool cv::cuda::TargetArchs::hasPtx(int major, int minor)
238 {
239 #ifndef HAVE_CUDA
240 CV_UNUSED(major);
241 CV_UNUSED(minor);
242 throw_no_cuda();
243 #else
244 return cudaArch.hasPtx(major, minor);
245 #endif
246 }
247
hasBin(int major,int minor)248 bool cv::cuda::TargetArchs::hasBin(int major, int minor)
249 {
250 #ifndef HAVE_CUDA
251 CV_UNUSED(major);
252 CV_UNUSED(minor);
253 throw_no_cuda();
254 #else
255 return cudaArch.hasBin(major, minor);
256 #endif
257 }
258
hasEqualOrLessPtx(int major,int minor)259 bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor)
260 {
261 #ifndef HAVE_CUDA
262 CV_UNUSED(major);
263 CV_UNUSED(minor);
264 throw_no_cuda();
265 #else
266 return cudaArch.hasEqualOrLessPtx(major, minor);
267 #endif
268 }
269
hasEqualOrGreaterPtx(int major,int minor)270 bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
271 {
272 #ifndef HAVE_CUDA
273 CV_UNUSED(major);
274 CV_UNUSED(minor);
275 throw_no_cuda();
276 #else
277 return cudaArch.hasEqualOrGreaterPtx(major, minor);
278 #endif
279 }
280
hasEqualOrGreaterBin(int major,int minor)281 bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
282 {
283 #ifndef HAVE_CUDA
284 CV_UNUSED(major);
285 CV_UNUSED(minor);
286 throw_no_cuda();
287 #else
288 return cudaArch.hasEqualOrGreaterBin(major, minor);
289 #endif
290 }
291
292 ////////////////////////////////////////////////////////////////////////
293 // DeviceInfo
294
295 #ifdef HAVE_CUDA
296
297 namespace
298 {
299 class DeviceProps
300 {
301 public:
302 DeviceProps();
303
304 const cudaDeviceProp* get(int devID) const;
305
306 private:
307 std::vector<cudaDeviceProp> props_;
308 };
309
DeviceProps()310 DeviceProps::DeviceProps()
311 {
312 int count = getCudaEnabledDeviceCount();
313
314 if (count > 0)
315 {
316 props_.resize(count);
317
318 for (int devID = 0; devID < count; ++devID)
319 {
320 cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
321 }
322 }
323 }
324
get(int devID) const325 const cudaDeviceProp* DeviceProps::get(int devID) const
326 {
327 CV_Assert( static_cast<size_t>(devID) < props_.size() );
328
329 return &props_[devID];
330 }
331
deviceProps()332 DeviceProps& deviceProps()
333 {
334 static DeviceProps props;
335 return props;
336 }
337 }
338
339 #endif
340
name() const341 const char* cv::cuda::DeviceInfo::name() const
342 {
343 #ifndef HAVE_CUDA
344 throw_no_cuda();
345 #else
346 return deviceProps().get(device_id_)->name;
347 #endif
348 }
349
totalGlobalMem() const350 size_t cv::cuda::DeviceInfo::totalGlobalMem() const
351 {
352 #ifndef HAVE_CUDA
353 throw_no_cuda();
354 #else
355 return deviceProps().get(device_id_)->totalGlobalMem;
356 #endif
357 }
358
sharedMemPerBlock() const359 size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const
360 {
361 #ifndef HAVE_CUDA
362 throw_no_cuda();
363 #else
364 return deviceProps().get(device_id_)->sharedMemPerBlock;
365 #endif
366 }
367
regsPerBlock() const368 int cv::cuda::DeviceInfo::regsPerBlock() const
369 {
370 #ifndef HAVE_CUDA
371 throw_no_cuda();
372 #else
373 return deviceProps().get(device_id_)->regsPerBlock;
374 #endif
375 }
376
warpSize() const377 int cv::cuda::DeviceInfo::warpSize() const
378 {
379 #ifndef HAVE_CUDA
380 throw_no_cuda();
381 #else
382 return deviceProps().get(device_id_)->warpSize;
383 #endif
384 }
385
memPitch() const386 size_t cv::cuda::DeviceInfo::memPitch() const
387 {
388 #ifndef HAVE_CUDA
389 throw_no_cuda();
390 #else
391 return deviceProps().get(device_id_)->memPitch;
392 #endif
393 }
394
maxThreadsPerBlock() const395 int cv::cuda::DeviceInfo::maxThreadsPerBlock() const
396 {
397 #ifndef HAVE_CUDA
398 throw_no_cuda();
399 #else
400 return deviceProps().get(device_id_)->maxThreadsPerBlock;
401 #endif
402 }
403
maxThreadsDim() const404 Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const
405 {
406 #ifndef HAVE_CUDA
407 throw_no_cuda();
408 #else
409 return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
410 #endif
411 }
412
maxGridSize() const413 Vec3i cv::cuda::DeviceInfo::maxGridSize() const
414 {
415 #ifndef HAVE_CUDA
416 throw_no_cuda();
417 #else
418 return Vec3i(deviceProps().get(device_id_)->maxGridSize);
419 #endif
420 }
421
clockRate() const422 int cv::cuda::DeviceInfo::clockRate() const
423 {
424 #ifndef HAVE_CUDA
425 throw_no_cuda();
426 #else
427 return deviceProps().get(device_id_)->clockRate;
428 #endif
429 }
430
totalConstMem() const431 size_t cv::cuda::DeviceInfo::totalConstMem() const
432 {
433 #ifndef HAVE_CUDA
434 throw_no_cuda();
435 #else
436 return deviceProps().get(device_id_)->totalConstMem;
437 #endif
438 }
439
majorVersion() const440 int cv::cuda::DeviceInfo::majorVersion() const
441 {
442 #ifndef HAVE_CUDA
443 throw_no_cuda();
444 #else
445 return deviceProps().get(device_id_)->major;
446 #endif
447 }
448
minorVersion() const449 int cv::cuda::DeviceInfo::minorVersion() const
450 {
451 #ifndef HAVE_CUDA
452 throw_no_cuda();
453 #else
454 return deviceProps().get(device_id_)->minor;
455 #endif
456 }
457
textureAlignment() const458 size_t cv::cuda::DeviceInfo::textureAlignment() const
459 {
460 #ifndef HAVE_CUDA
461 throw_no_cuda();
462 #else
463 return deviceProps().get(device_id_)->textureAlignment;
464 #endif
465 }
466
texturePitchAlignment() const467 size_t cv::cuda::DeviceInfo::texturePitchAlignment() const
468 {
469 #ifndef HAVE_CUDA
470 throw_no_cuda();
471 #else
472 return deviceProps().get(device_id_)->texturePitchAlignment;
473 #endif
474 }
475
multiProcessorCount() const476 int cv::cuda::DeviceInfo::multiProcessorCount() const
477 {
478 #ifndef HAVE_CUDA
479 throw_no_cuda();
480 #else
481 return deviceProps().get(device_id_)->multiProcessorCount;
482 #endif
483 }
484
kernelExecTimeoutEnabled() const485 bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
486 {
487 #ifndef HAVE_CUDA
488 throw_no_cuda();
489 #else
490 return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
491 #endif
492 }
493
integrated() const494 bool cv::cuda::DeviceInfo::integrated() const
495 {
496 #ifndef HAVE_CUDA
497 throw_no_cuda();
498 #else
499 return deviceProps().get(device_id_)->integrated != 0;
500 #endif
501 }
502
canMapHostMemory() const503 bool cv::cuda::DeviceInfo::canMapHostMemory() const
504 {
505 #ifndef HAVE_CUDA
506 throw_no_cuda();
507 #else
508 return deviceProps().get(device_id_)->canMapHostMemory != 0;
509 #endif
510 }
511
computeMode() const512 DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
513 {
514 #ifndef HAVE_CUDA
515 throw_no_cuda();
516 #else
517 static const ComputeMode tbl[] =
518 {
519 ComputeModeDefault,
520 ComputeModeExclusive,
521 ComputeModeProhibited,
522 ComputeModeExclusiveProcess
523 };
524
525 return tbl[deviceProps().get(device_id_)->computeMode];
526 #endif
527 }
528
maxTexture1D() const529 int cv::cuda::DeviceInfo::maxTexture1D() const
530 {
531 #ifndef HAVE_CUDA
532 throw_no_cuda();
533 #else
534 return deviceProps().get(device_id_)->maxTexture1D;
535 #endif
536 }
537
maxTexture1DMipmap() const538 int cv::cuda::DeviceInfo::maxTexture1DMipmap() const
539 {
540 #ifndef HAVE_CUDA
541 throw_no_cuda();
542 #else
543 #if CUDA_VERSION >= 5000
544 return deviceProps().get(device_id_)->maxTexture1DMipmap;
545 #else
546 CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
547 return 0;
548 #endif
549 #endif
550 }
551
maxTexture1DLinear() const552 int cv::cuda::DeviceInfo::maxTexture1DLinear() const
553 {
554 #ifndef HAVE_CUDA
555 throw_no_cuda();
556 #else
557 return deviceProps().get(device_id_)->maxTexture1DLinear;
558 #endif
559 }
560
maxTexture2D() const561 Vec2i cv::cuda::DeviceInfo::maxTexture2D() const
562 {
563 #ifndef HAVE_CUDA
564 throw_no_cuda();
565 #else
566 return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
567 #endif
568 }
569
maxTexture2DMipmap() const570 Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const
571 {
572 #ifndef HAVE_CUDA
573 throw_no_cuda();
574 #else
575 #if CUDA_VERSION >= 5000
576 return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
577 #else
578 CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
579 return Vec2i();
580 #endif
581 #endif
582 }
583
maxTexture2DLinear() const584 Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const
585 {
586 #ifndef HAVE_CUDA
587 throw_no_cuda();
588 #else
589 return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
590 #endif
591 }
592
maxTexture2DGather() const593 Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const
594 {
595 #ifndef HAVE_CUDA
596 throw_no_cuda();
597 #else
598 return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
599 #endif
600 }
601
maxTexture3D() const602 Vec3i cv::cuda::DeviceInfo::maxTexture3D() const
603 {
604 #ifndef HAVE_CUDA
605 throw_no_cuda();
606 #else
607 return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
608 #endif
609 }
610
maxTextureCubemap() const611 int cv::cuda::DeviceInfo::maxTextureCubemap() const
612 {
613 #ifndef HAVE_CUDA
614 throw_no_cuda();
615 #else
616 return deviceProps().get(device_id_)->maxTextureCubemap;
617 #endif
618 }
619
maxTexture1DLayered() const620 Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const
621 {
622 #ifndef HAVE_CUDA
623 throw_no_cuda();
624 #else
625 return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
626 #endif
627 }
628
maxTexture2DLayered() const629 Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const
630 {
631 #ifndef HAVE_CUDA
632 throw_no_cuda();
633 #else
634 return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
635 #endif
636 }
637
maxTextureCubemapLayered() const638 Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const
639 {
640 #ifndef HAVE_CUDA
641 throw_no_cuda();
642 #else
643 return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
644 #endif
645 }
646
maxSurface1D() const647 int cv::cuda::DeviceInfo::maxSurface1D() const
648 {
649 #ifndef HAVE_CUDA
650 throw_no_cuda();
651 #else
652 return deviceProps().get(device_id_)->maxSurface1D;
653 #endif
654 }
655
maxSurface2D() const656 Vec2i cv::cuda::DeviceInfo::maxSurface2D() const
657 {
658 #ifndef HAVE_CUDA
659 throw_no_cuda();
660 #else
661 return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
662 #endif
663 }
664
maxSurface3D() const665 Vec3i cv::cuda::DeviceInfo::maxSurface3D() const
666 {
667 #ifndef HAVE_CUDA
668 throw_no_cuda();
669 #else
670 return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
671 #endif
672 }
673
maxSurface1DLayered() const674 Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const
675 {
676 #ifndef HAVE_CUDA
677 throw_no_cuda();
678 #else
679 return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
680 #endif
681 }
682
maxSurface2DLayered() const683 Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const
684 {
685 #ifndef HAVE_CUDA
686 throw_no_cuda();
687 #else
688 return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
689 #endif
690 }
691
maxSurfaceCubemap() const692 int cv::cuda::DeviceInfo::maxSurfaceCubemap() const
693 {
694 #ifndef HAVE_CUDA
695 throw_no_cuda();
696 #else
697 return deviceProps().get(device_id_)->maxSurfaceCubemap;
698 #endif
699 }
700
maxSurfaceCubemapLayered() const701 Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const
702 {
703 #ifndef HAVE_CUDA
704 throw_no_cuda();
705 #else
706 return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
707 #endif
708 }
709
surfaceAlignment() const710 size_t cv::cuda::DeviceInfo::surfaceAlignment() const
711 {
712 #ifndef HAVE_CUDA
713 throw_no_cuda();
714 #else
715 return deviceProps().get(device_id_)->surfaceAlignment;
716 #endif
717 }
718
concurrentKernels() const719 bool cv::cuda::DeviceInfo::concurrentKernels() const
720 {
721 #ifndef HAVE_CUDA
722 throw_no_cuda();
723 #else
724 return deviceProps().get(device_id_)->concurrentKernels != 0;
725 #endif
726 }
727
ECCEnabled() const728 bool cv::cuda::DeviceInfo::ECCEnabled() const
729 {
730 #ifndef HAVE_CUDA
731 throw_no_cuda();
732 #else
733 return deviceProps().get(device_id_)->ECCEnabled != 0;
734 #endif
735 }
736
pciBusID() const737 int cv::cuda::DeviceInfo::pciBusID() const
738 {
739 #ifndef HAVE_CUDA
740 throw_no_cuda();
741 #else
742 return deviceProps().get(device_id_)->pciBusID;
743 #endif
744 }
745
pciDeviceID() const746 int cv::cuda::DeviceInfo::pciDeviceID() const
747 {
748 #ifndef HAVE_CUDA
749 throw_no_cuda();
750 #else
751 return deviceProps().get(device_id_)->pciDeviceID;
752 #endif
753 }
754
pciDomainID() const755 int cv::cuda::DeviceInfo::pciDomainID() const
756 {
757 #ifndef HAVE_CUDA
758 throw_no_cuda();
759 #else
760 return deviceProps().get(device_id_)->pciDomainID;
761 #endif
762 }
763
tccDriver() const764 bool cv::cuda::DeviceInfo::tccDriver() const
765 {
766 #ifndef HAVE_CUDA
767 throw_no_cuda();
768 #else
769 return deviceProps().get(device_id_)->tccDriver != 0;
770 #endif
771 }
772
asyncEngineCount() const773 int cv::cuda::DeviceInfo::asyncEngineCount() const
774 {
775 #ifndef HAVE_CUDA
776 throw_no_cuda();
777 #else
778 return deviceProps().get(device_id_)->asyncEngineCount;
779 #endif
780 }
781
unifiedAddressing() const782 bool cv::cuda::DeviceInfo::unifiedAddressing() const
783 {
784 #ifndef HAVE_CUDA
785 throw_no_cuda();
786 #else
787 return deviceProps().get(device_id_)->unifiedAddressing != 0;
788 #endif
789 }
790
memoryClockRate() const791 int cv::cuda::DeviceInfo::memoryClockRate() const
792 {
793 #ifndef HAVE_CUDA
794 throw_no_cuda();
795 #else
796 return deviceProps().get(device_id_)->memoryClockRate;
797 #endif
798 }
799
memoryBusWidth() const800 int cv::cuda::DeviceInfo::memoryBusWidth() const
801 {
802 #ifndef HAVE_CUDA
803 throw_no_cuda();
804 #else
805 return deviceProps().get(device_id_)->memoryBusWidth;
806 #endif
807 }
808
l2CacheSize() const809 int cv::cuda::DeviceInfo::l2CacheSize() const
810 {
811 #ifndef HAVE_CUDA
812 throw_no_cuda();
813 #else
814 return deviceProps().get(device_id_)->l2CacheSize;
815 #endif
816 }
817
maxThreadsPerMultiProcessor() const818 int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const
819 {
820 #ifndef HAVE_CUDA
821 throw_no_cuda();
822 #else
823 return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
824 #endif
825 }
826
queryMemory(size_t & _totalMemory,size_t & _freeMemory) const827 void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
828 {
829 #ifndef HAVE_CUDA
830 CV_UNUSED(_totalMemory);
831 CV_UNUSED(_freeMemory);
832 throw_no_cuda();
833 #else
834 int prevDeviceID = getDevice();
835 if (prevDeviceID != device_id_)
836 setDevice(device_id_);
837
838 cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
839
840 if (prevDeviceID != device_id_)
841 setDevice(prevDeviceID);
842 #endif
843 }
844
isCompatible() const845 bool cv::cuda::DeviceInfo::isCompatible() const
846 {
847 #ifndef HAVE_CUDA
848 throw_no_cuda();
849 #else
850 // Check PTX compatibility
851 if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
852 return true;
853
854 // Check BIN compatibility
855 for (int i = minorVersion(); i >= 0; --i)
856 if (TargetArchs::hasBin(majorVersion(), i))
857 return true;
858
859 return false;
860 #endif
861 }
862
863 ////////////////////////////////////////////////////////////////////////
864 // print info
865
866 #ifdef HAVE_CUDA
867
868 namespace
869 {
convertSMVer2Cores(int major,int minor)870 int convertSMVer2Cores(int major, int minor)
871 {
872 // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
873 typedef struct {
874 int SM; // 0xMm (hexadecimal notation), M = SM Major version, and m = SM minor version
875 int Cores;
876 } SMtoCores;
877
878 SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
879
880 int index = 0;
881 while (gpuArchCoresPerSM[index].SM != -1)
882 {
883 if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
884 return gpuArchCoresPerSM[index].Cores;
885 index++;
886 }
887
888 return -1;
889 }
890 }
891
892 #endif
893
printCudaDeviceInfo(int device)894 void cv::cuda::printCudaDeviceInfo(int device)
895 {
896 #ifndef HAVE_CUDA
897 CV_UNUSED(device);
898 throw_no_cuda();
899 #else
900 int count = getCudaEnabledDeviceCount();
901 bool valid = (device >= 0) && (device < count);
902
903 int beg = valid ? device : 0;
904 int end = valid ? device+1 : count;
905
906 printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
907 printf("Device count: %d\n", count);
908
909 int driverVersion = 0, runtimeVersion = 0;
910 cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
911 cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
912
913 const char *computeMode[] = {
914 "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
915 "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
916 "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
917 "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
918 "Unknown",
919 NULL
920 };
921
922 for(int dev = beg; dev < end; ++dev)
923 {
924 cudaDeviceProp prop;
925 cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
926
927 printf("\nDevice %d: \"%s\"\n", dev, prop.name);
928 printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
929 printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor);
930 printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem);
931
932 int cores = convertSMVer2Cores(prop.major, prop.minor);
933 if (cores > 0)
934 printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
935
936 printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
937
938 printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
939 prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
940 prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
941 printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
942 prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
943 prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
944
945 printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
946 printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
947 printf(" Total number of registers available per block: %d\n", prop.regsPerBlock);
948 printf(" Warp size: %d\n", prop.warpSize);
949 printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock);
950 printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
951 printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
952 printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
953 printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
954
955 printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
956 printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
957 printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
958 printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
959
960 printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No");
961 printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No");
962 printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No");
963 printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
964 printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
965 printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
966 printf(" Compute Mode:\n");
967 printf(" %s \n", computeMode[prop.computeMode]);
968 }
969
970 printf("\n");
971 printf("deviceQuery, CUDA Driver = CUDART");
972 printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
973 printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
974 printf(", NumDevs = %d\n\n", count);
975
976 fflush(stdout);
977 #endif
978 }
979
printShortCudaDeviceInfo(int device)980 void cv::cuda::printShortCudaDeviceInfo(int device)
981 {
982 #ifndef HAVE_CUDA
983 CV_UNUSED(device);
984 throw_no_cuda();
985 #else
986 int count = getCudaEnabledDeviceCount();
987 bool valid = (device >= 0) && (device < count);
988
989 int beg = valid ? device : 0;
990 int end = valid ? device+1 : count;
991
992 int driverVersion = 0, runtimeVersion = 0;
993 cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
994 cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
995
996 for(int dev = beg; dev < end; ++dev)
997 {
998 cudaDeviceProp prop;
999 cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
1000
1001 const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
1002 printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
1003 printf(", sm_%d%d%s", prop.major, prop.minor, arch_str);
1004
1005 int cores = convertSMVer2Cores(prop.major, prop.minor);
1006 if (cores > 0)
1007 printf(", %d cores", cores * prop.multiProcessorCount);
1008
1009 printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
1010 }
1011
1012 fflush(stdout);
1013 #endif
1014 }
1015
1016 ////////////////////////////////////////////////////////////////////////
1017 // Error handling
1018
1019 #ifdef HAVE_CUDA
1020
1021 namespace
1022 {
1023 #define error_entry(entry) { entry, #entry }
1024
1025 struct ErrorEntry
1026 {
1027 int code;
1028 const char* str;
1029 };
1030
1031 struct ErrorEntryComparer
1032 {
1033 int code;
ErrorEntryComparer__anon4542c0f30511::ErrorEntryComparer1034 ErrorEntryComparer(int code_) : code(code_) {}
operator ()__anon4542c0f30511::ErrorEntryComparer1035 bool operator()(const ErrorEntry& e) const { return e.code == code; }
1036 };
1037
1038 const ErrorEntry npp_errors [] =
1039 {
1040 #if defined (_MSC_VER)
1041 error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
1042 #endif
1043
1044 #if NPP_VERSION < 5500
1045 error_entry( NPP_BAD_ARG_ERROR ),
1046 error_entry( NPP_COEFF_ERROR ),
1047 error_entry( NPP_RECT_ERROR ),
1048 error_entry( NPP_QUAD_ERROR ),
1049 error_entry( NPP_MEMFREE_ERR ),
1050 error_entry( NPP_MEMSET_ERR ),
1051 error_entry( NPP_MEM_ALLOC_ERR ),
1052 error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
1053 error_entry( NPP_MIRROR_FLIP_ERR ),
1054 error_entry( NPP_INVALID_INPUT ),
1055 error_entry( NPP_POINTER_ERROR ),
1056 error_entry( NPP_WARNING ),
1057 error_entry( NPP_ODD_ROI_WARNING ),
1058 #else
1059 error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
1060 error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
1061 error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
1062 error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
1063 error_entry( NPP_MEMFREE_ERROR ),
1064 error_entry( NPP_MEMSET_ERROR ),
1065 error_entry( NPP_QUALITY_INDEX_ERROR ),
1066 error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
1067 error_entry( NPP_CHANNEL_ORDER_ERROR ),
1068 error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
1069 error_entry( NPP_QUADRANGLE_ERROR ),
1070 error_entry( NPP_RECTANGLE_ERROR ),
1071 error_entry( NPP_COEFFICIENT_ERROR ),
1072 error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
1073 error_entry( NPP_COI_ERROR ),
1074 error_entry( NPP_DIVISOR_ERROR ),
1075 error_entry( NPP_CHANNEL_ERROR ),
1076 error_entry( NPP_STRIDE_ERROR ),
1077 error_entry( NPP_ANCHOR_ERROR ),
1078 error_entry( NPP_MASK_SIZE_ERROR ),
1079 error_entry( NPP_MIRROR_FLIP_ERROR ),
1080 error_entry( NPP_MOMENT_00_ZERO_ERROR ),
1081 error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
1082 error_entry( NPP_THRESHOLD_ERROR ),
1083 error_entry( NPP_CONTEXT_MATCH_ERROR ),
1084 error_entry( NPP_FFT_FLAG_ERROR ),
1085 error_entry( NPP_FFT_ORDER_ERROR ),
1086 error_entry( NPP_SCALE_RANGE_ERROR ),
1087 error_entry( NPP_DATA_TYPE_ERROR ),
1088 error_entry( NPP_OUT_OFF_RANGE_ERROR ),
1089 error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
1090 error_entry( NPP_MEMORY_ALLOCATION_ERR ),
1091 error_entry( NPP_RANGE_ERROR ),
1092 error_entry( NPP_BAD_ARGUMENT_ERROR ),
1093 error_entry( NPP_NO_MEMORY_ERROR ),
1094 error_entry( NPP_ERROR_RESERVED ),
1095 error_entry( NPP_NO_OPERATION_WARNING ),
1096 error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
1097 error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
1098 #endif
1099
1100 error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
1101 error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
1102 error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
1103 error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
1104 error_entry( NPP_TEXTURE_BIND_ERROR ),
1105 error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
1106 error_entry( NPP_NOT_EVEN_STEP_ERROR ),
1107 error_entry( NPP_INTERPOLATION_ERROR ),
1108 error_entry( NPP_RESIZE_FACTOR_ERROR ),
1109 error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
1110 error_entry( NPP_MEMCPY_ERROR ),
1111 error_entry( NPP_ALIGNMENT_ERROR ),
1112 error_entry( NPP_STEP_ERROR ),
1113 error_entry( NPP_SIZE_ERROR ),
1114 error_entry( NPP_NULL_POINTER_ERROR ),
1115 error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
1116 error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
1117 error_entry( NPP_ERROR ),
1118 error_entry( NPP_NO_ERROR ),
1119 error_entry( NPP_SUCCESS ),
1120 error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
1121 error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
1122 error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
1123 error_entry( NPP_DOUBLE_SIZE_WARNING )
1124 };
1125
1126 const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
1127
1128 const ErrorEntry cu_errors [] =
1129 {
1130 error_entry( CUDA_SUCCESS ),
1131 error_entry( CUDA_ERROR_INVALID_VALUE ),
1132 error_entry( CUDA_ERROR_OUT_OF_MEMORY ),
1133 error_entry( CUDA_ERROR_NOT_INITIALIZED ),
1134 error_entry( CUDA_ERROR_DEINITIALIZED ),
1135 error_entry( CUDA_ERROR_PROFILER_DISABLED ),
1136 error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ),
1137 error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ),
1138 error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ),
1139 error_entry( CUDA_ERROR_NO_DEVICE ),
1140 error_entry( CUDA_ERROR_INVALID_DEVICE ),
1141 error_entry( CUDA_ERROR_INVALID_IMAGE ),
1142 error_entry( CUDA_ERROR_INVALID_CONTEXT ),
1143 error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ),
1144 error_entry( CUDA_ERROR_MAP_FAILED ),
1145 error_entry( CUDA_ERROR_UNMAP_FAILED ),
1146 error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ),
1147 error_entry( CUDA_ERROR_ALREADY_MAPPED ),
1148 error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ),
1149 error_entry( CUDA_ERROR_ALREADY_ACQUIRED ),
1150 error_entry( CUDA_ERROR_NOT_MAPPED ),
1151 error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ),
1152 error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ),
1153 error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ),
1154 error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ),
1155 error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ),
1156 error_entry( CUDA_ERROR_INVALID_SOURCE ),
1157 error_entry( CUDA_ERROR_FILE_NOT_FOUND ),
1158 error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
1159 error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ),
1160 error_entry( CUDA_ERROR_OPERATING_SYSTEM ),
1161 error_entry( CUDA_ERROR_INVALID_HANDLE ),
1162 error_entry( CUDA_ERROR_NOT_FOUND ),
1163 error_entry( CUDA_ERROR_NOT_READY ),
1164 error_entry( CUDA_ERROR_LAUNCH_FAILED ),
1165 error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ),
1166 error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ),
1167 error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ),
1168 error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ),
1169 error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ),
1170 error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ),
1171 error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ),
1172 error_entry( CUDA_ERROR_ASSERT ),
1173 error_entry( CUDA_ERROR_TOO_MANY_PEERS ),
1174 error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
1175 error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ),
1176 error_entry( CUDA_ERROR_UNKNOWN )
1177 };
1178
1179 const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
1180
getErrorString(int code,const ErrorEntry * errors,size_t n)1181 cv::String getErrorString(int code, const ErrorEntry* errors, size_t n)
1182 {
1183 size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
1184
1185 const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
1186 cv::String str = cv::format("%s [Code = %d]", msg, code);
1187
1188 return str;
1189 }
1190 }
1191
1192 #endif
1193
getNppErrorMessage(int code)1194 String cv::cuda::getNppErrorMessage(int code)
1195 {
1196 #ifndef HAVE_CUDA
1197 CV_UNUSED(code);
1198 return String();
1199 #else
1200 return getErrorString(code, npp_errors, npp_error_num);
1201 #endif
1202 }
1203
getCudaDriverApiErrorMessage(int code)1204 String cv::cuda::getCudaDriverApiErrorMessage(int code)
1205 {
1206 #ifndef HAVE_CUDA
1207 CV_UNUSED(code);
1208 return String();
1209 #else
1210 return getErrorString(code, cu_errors, cu_errors_num);
1211 #endif
1212 }
1213