1 // This program demonstrates how to find an element in a vector
2 // using the CUDA standard algorithms in Taskflow.
3 
4 #include <taskflow/cudaflow.hpp>
5 
main(int argc,char * argv[])6 int main(int argc, char* argv[]) {
7 
8   if(argc != 3) {
9     std::cerr << "usage: ./cuda_find N iterations\n";
10     std::exit(EXIT_FAILURE);
11   }
12 
13   unsigned N = std::atoi(argv[1]);
14   unsigned M = std::atoi(argv[2]);
15 
16   // gpu data
17   auto gdata = tf::cuda_malloc_shared<int>(N);
18   auto gfind = tf::cuda_malloc_shared<unsigned>(1);
19 
20   // cpu data
21   auto hdata = std::vector<int>(N);
22 
23   size_t tgpu{0}, tcpu{0};
24 
25   for(unsigned count = 1000; count<M; count += count/100) {
26 
27     // initialize the data
28     for(unsigned i=0; i<N; i++) {
29       auto k = rand();
30       gdata[i] = k;
31       hdata[i] = k;
32     }
33 
34     // --------------------------------------------------------------------------
35     // GPU find
36     // --------------------------------------------------------------------------
37     auto beg = std::chrono::steady_clock::now();
38     auto p = tf::cudaDefaultExecutionPolicy{};
39     tf::cuda_find_if(
40       p, gdata, gdata+N, gfind, []__device__(int v) { return v == 100; }
41     );
42     p.synchronize();
43     auto end = std::chrono::steady_clock::now();
44     tgpu += std::chrono::duration_cast<std::chrono::microseconds>(end-beg).count();
45 
46     // --------------------------------------------------------------------------
47     // CPU find
48     // --------------------------------------------------------------------------
49     beg = std::chrono::steady_clock::now();
50     auto hiter = std::find_if(
51       hdata.begin(), hdata.end(), [=](int v) { return v == 100; }
52     );
53     end = std::chrono::steady_clock::now();
54     tcpu += std::chrono::duration_cast<std::chrono::microseconds>(end-beg).count();
55 
56     // --------------------------------------------------------------------------
57     // verify the result
58     // --------------------------------------------------------------------------
59     if(unsigned hfind = std::distance(hdata.begin(), hiter); *gfind != hfind) {
60       printf("gdata[%u]=%d, hdata[%u]=%d\n",
61         *gfind, gdata[*gfind], hfind, hdata[hfind]
62       );
63       throw std::runtime_error("incorrect result");
64     }
65   }
66 
67   // output the time
68   std::cout << "GPU time: " << tgpu << '\n'
69             << "CPU time: " << tcpu << std::endl;
70 
71   // delete the memory
72   tf::cuda_free(gdata);
73   tf::cuda_free(gfind);
74 
75   return 0;
76 }
77