1 // This program demonstrates how to find an element in a vector
2 // using the CUDA standard algorithms in Taskflow.
3
4 #include <taskflow/cudaflow.hpp>
5
main(int argc,char * argv[])6 int main(int argc, char* argv[]) {
7
8 if(argc != 3) {
9 std::cerr << "usage: ./cuda_find N iterations\n";
10 std::exit(EXIT_FAILURE);
11 }
12
13 unsigned N = std::atoi(argv[1]);
14 unsigned M = std::atoi(argv[2]);
15
16 // gpu data
17 auto gdata = tf::cuda_malloc_shared<int>(N);
18 auto gfind = tf::cuda_malloc_shared<unsigned>(1);
19
20 // cpu data
21 auto hdata = std::vector<int>(N);
22
23 size_t tgpu{0}, tcpu{0};
24
25 for(unsigned count = 1000; count<M; count += count/100) {
26
27 // initialize the data
28 for(unsigned i=0; i<N; i++) {
29 auto k = rand();
30 gdata[i] = k;
31 hdata[i] = k;
32 }
33
34 // --------------------------------------------------------------------------
35 // GPU find
36 // --------------------------------------------------------------------------
37 auto beg = std::chrono::steady_clock::now();
38 auto p = tf::cudaDefaultExecutionPolicy{};
39 tf::cuda_find_if(
40 p, gdata, gdata+N, gfind, []__device__(int v) { return v == 100; }
41 );
42 p.synchronize();
43 auto end = std::chrono::steady_clock::now();
44 tgpu += std::chrono::duration_cast<std::chrono::microseconds>(end-beg).count();
45
46 // --------------------------------------------------------------------------
47 // CPU find
48 // --------------------------------------------------------------------------
49 beg = std::chrono::steady_clock::now();
50 auto hiter = std::find_if(
51 hdata.begin(), hdata.end(), [=](int v) { return v == 100; }
52 );
53 end = std::chrono::steady_clock::now();
54 tcpu += std::chrono::duration_cast<std::chrono::microseconds>(end-beg).count();
55
56 // --------------------------------------------------------------------------
57 // verify the result
58 // --------------------------------------------------------------------------
59 if(unsigned hfind = std::distance(hdata.begin(), hiter); *gfind != hfind) {
60 printf("gdata[%u]=%d, hdata[%u]=%d\n",
61 *gfind, gdata[*gfind], hfind, hdata[hfind]
62 );
63 throw std::runtime_error("incorrect result");
64 }
65 }
66
67 // output the time
68 std::cout << "GPU time: " << tgpu << '\n'
69 << "CPU time: " << tcpu << std::endl;
70
71 // delete the memory
72 tf::cuda_free(gdata);
73 tf::cuda_free(gfind);
74
75 return 0;
76 }
77