1 /* This sample demonstrates the way you can perform independent tasks
2 on the different GPUs */
3
4 // Disable some warnings which are caused with CUDA headers
5 #if defined(_MSC_VER)
6 #pragma warning(disable: 4201 4408 4100)
7 #endif
8
9 #include <iostream>
10 #include "opencv2/core.hpp"
11 #include "opencv2/cudaarithm.hpp"
12
13 #if !defined(HAVE_CUDA)
14
main()15 int main()
16 {
17 std::cout << "CUDA support is required (OpenCV CMake parameter 'WITH_CUDA' must be true)." << std::endl;
18 return 0;
19 }
20
21 #else
22
23 using namespace std;
24 using namespace cv;
25 using namespace cv::cuda;
26
27 struct Worker : public cv::ParallelLoopBody
28 {
operator ()Worker29 void operator()(const Range& r) const CV_OVERRIDE
30 {
31 for (int i = r.start; i < r.end; ++i) { this->operator()(i); }
32 }
33 void operator()(int device_id) const;
34 };
35
main()36 int main()
37 {
38 int num_devices = getCudaEnabledDeviceCount();
39 if (num_devices < 2)
40 {
41 std::cout << "Two or more GPUs are required\n";
42 return -1;
43 }
44 for (int i = 0; i < num_devices; ++i)
45 {
46 cv::cuda::printShortCudaDeviceInfo(i);
47
48 DeviceInfo dev_info(i);
49 if (!dev_info.isCompatible())
50 {
51 std::cout << "CUDA module isn't built for GPU #" << i << " ("
52 << dev_info.name() << ", CC " << dev_info.majorVersion()
53 << dev_info.minorVersion() << "\n";
54 return -1;
55 }
56 }
57
58 // Execute calculation in two threads using two GPUs
59 cv::Range devices(0, 2);
60 cv::parallel_for_(devices, Worker(), devices.size());
61
62 return 0;
63 }
64
65
operator ()(int device_id) const66 void Worker::operator()(int device_id) const
67 {
68 setDevice(device_id);
69
70 Mat src(1000, 1000, CV_32F);
71 Mat dst;
72
73 RNG rng(0);
74 rng.fill(src, RNG::UNIFORM, 0, 1);
75
76 // CPU works
77 cv::transpose(src, dst);
78
79 // GPU works
80 GpuMat d_src(src);
81 GpuMat d_dst;
82 cuda::transpose(d_src, d_dst);
83
84 // Check results
85 bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
86 std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
87 << (passed ? "passed" : "FAILED") << endl;
88
89 // Deallocate data here, otherwise deallocation will be performed
90 // after context is extracted from the stack
91 d_src.release();
92 d_dst.release();
93 }
94
95 #endif
96