1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #include <Kokkos_Core.hpp>
46 #include <Kokkos_Random.hpp>
47 #include <Kokkos_DualView.hpp>
48 #include <impl/Kokkos_Timer.hpp>
49 #include <cstdlib>
50 
51 using DefaultHostType = Kokkos::HostSpace::execution_space;
52 
53 // Kokkos provides two different random number generators with a 64 bit and a
54 // 1024 bit state. These generators are based on Vigna, Sebastiano (2014). "An
55 // experimental exploration of Marsaglia's xorshift generators, scrambled" See:
56 // http://arxiv.org/abs/1402.6246 The generators can be used fully independently
57 // on each thread and have been tested to produce good statistics for both inter
58 // and intra thread numbers. Note that within a kernel NO random number
59 // operations are (team) collective operations. Everything can be called within
60 // branches. This is a difference to the curand library where certain operations
61 // are required to be called by all threads in a block.
62 //
63 // In Kokkos you are required to create a pool of generator states, so that
64 // threads can grep their own. On CPU architectures the pool size is equal to
65 // the thread number, on CUDA about 128k states are generated (enough to give
66 // every potentially simultaneously running thread its own state). With a kernel
67 // a thread is required to acquire a state from the pool and later return it. On
68 // CPUs the Random number generator is deterministic if using the same number of
69 // threads. On GPUs (i.e. using the CUDA backend it is not deterministic because
70 // threads acquire states via atomics.
71 
72 // A Functor for generating uint64_t random numbers templated on the
73 // GeneratorPool type
74 template <class GeneratorPool>
75 struct generate_random {
76   // Output View for the random numbers
77   Kokkos::View<uint64_t*> vals;
78 
79   // The GeneratorPool
80   GeneratorPool rand_pool;
81 
82   int samples;
83 
84   // Initialize all members
generate_randomgenerate_random85   generate_random(Kokkos::View<uint64_t*> vals_, GeneratorPool rand_pool_,
86                   int samples_)
87       : vals(vals_), rand_pool(rand_pool_), samples(samples_) {}
88 
89   KOKKOS_INLINE_FUNCTION
operator ()generate_random90   void operator()(int i) const {
91     // Get a random number state from the pool for the active thread
92     typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();
93 
94     // Draw samples numbers from the pool as urand64 between 0 and
95     // rand_pool.MAX_URAND64 Note there are function calls to get other type of
96     // scalars, and also to specify Ranges or get a normal distributed float.
97     for (int k = 0; k < samples; k++)
98       vals(i * samples + k) = rand_gen.urand64();
99 
100     // Give the state back, which will allow another thread to acquire it
101     rand_pool.free_state(rand_gen);
102   }
103 };
104 
main(int argc,char * args[])105 int main(int argc, char* args[]) {
106   if (argc != 3) {
107     printf("Please pass two integers on the command line\n");
108   } else {
109     // Initialize Kokkos
110     Kokkos::initialize(argc, args);
111     int size    = std::stoi(args[1]);
112     int samples = std::stoi(args[2]);
113 
114     // Create two random number generator pools one for 64bit states and one for
115     // 1024 bit states Both take an 64 bit unsigned integer seed to initialize a
116     // Random_XorShift64 generator which is used to fill the generators of the
117     // pool.
118     Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
119     Kokkos::Random_XorShift1024_Pool<> rand_pool1024(5374857);
120     Kokkos::DualView<uint64_t*> vals("Vals", size * samples);
121 
122     // Run some performance comparisons
123     Kokkos::Timer timer;
124     Kokkos::parallel_for(size,
125                          generate_random<Kokkos::Random_XorShift64_Pool<> >(
126                              vals.d_view, rand_pool64, samples));
127     Kokkos::fence();
128 
129     timer.reset();
130     Kokkos::parallel_for(size,
131                          generate_random<Kokkos::Random_XorShift64_Pool<> >(
132                              vals.d_view, rand_pool64, samples));
133     Kokkos::fence();
134     double time_64 = timer.seconds();
135 
136     Kokkos::parallel_for(size,
137                          generate_random<Kokkos::Random_XorShift1024_Pool<> >(
138                              vals.d_view, rand_pool1024, samples));
139     Kokkos::fence();
140 
141     timer.reset();
142     Kokkos::parallel_for(size,
143                          generate_random<Kokkos::Random_XorShift1024_Pool<> >(
144                              vals.d_view, rand_pool1024, samples));
145     Kokkos::fence();
146     double time_1024 = timer.seconds();
147 
148     printf("#Time XorShift64*:   %e %e\n", time_64,
149            1.0e-9 * samples * size / time_64);
150     printf("#Time XorShift1024*: %e %e\n", time_1024,
151            1.0e-9 * samples * size / time_1024);
152 
153     Kokkos::deep_copy(vals.h_view, vals.d_view);
154 
155     Kokkos::finalize();
156   }
157   return 0;
158 }
159