1 /* This sample demonstrates the way you can perform independent tasks
2    on the different GPUs */
3 
4 // Disable some warnings which are caused with CUDA headers
5 #if defined(_MSC_VER)
6 #pragma warning(disable: 4201 4408 4100)
7 #endif
8 
9 #include <iostream>
10 #include "opencv2/core.hpp"
11 #include "opencv2/cudaarithm.hpp"
12 
13 #if !defined(HAVE_CUDA)
14 
main()15 int main()
16 {
17     std::cout << "CUDA support is required (OpenCV CMake parameter 'WITH_CUDA' must be true)." << std::endl;
18     return 0;
19 }
20 
21 #else
22 
23 using namespace std;
24 using namespace cv;
25 using namespace cv::cuda;
26 
27 struct Worker : public cv::ParallelLoopBody
28 {
operator ()Worker29     void operator()(const Range& r) const CV_OVERRIDE
30     {
31         for (int i = r.start; i < r.end; ++i) { this->operator()(i); }
32     }
33     void operator()(int device_id) const;
34 };
35 
main()36 int main()
37 {
38     int num_devices = getCudaEnabledDeviceCount();
39     if (num_devices < 2)
40     {
41         std::cout << "Two or more GPUs are required\n";
42         return -1;
43     }
44     for (int i = 0; i < num_devices; ++i)
45     {
46         cv::cuda::printShortCudaDeviceInfo(i);
47 
48         DeviceInfo dev_info(i);
49         if (!dev_info.isCompatible())
50         {
51             std::cout << "CUDA module isn't built for GPU #" << i << " ("
52                  << dev_info.name() << ", CC " << dev_info.majorVersion()
53                  << dev_info.minorVersion() << "\n";
54             return -1;
55         }
56     }
57 
58     // Execute calculation in two threads using two GPUs
59     cv::Range devices(0, 2);
60     cv::parallel_for_(devices, Worker(), devices.size());
61 
62     return 0;
63 }
64 
65 
operator ()(int device_id) const66 void Worker::operator()(int device_id) const
67 {
68     setDevice(device_id);
69 
70     Mat src(1000, 1000, CV_32F);
71     Mat dst;
72 
73     RNG rng(0);
74     rng.fill(src, RNG::UNIFORM, 0, 1);
75 
76     // CPU works
77     cv::transpose(src, dst);
78 
79     // GPU works
80     GpuMat d_src(src);
81     GpuMat d_dst;
82     cuda::transpose(d_src, d_dst);
83 
84     // Check results
85     bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
86     std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
87         << (passed ? "passed" : "FAILED") << endl;
88 
89     // Deallocate data here, otherwise deallocation will be performed
90     // after context is extracted from the stack
91     d_src.release();
92     d_dst.release();
93 }
94 
95 #endif
96