1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #include <Kokkos_Core.hpp>
46 #include <Kokkos_Random.hpp>
47 #include <Kokkos_DualView.hpp>
48 #include <impl/Kokkos_Timer.hpp>
49 #include <cstdlib>
50
51 using DefaultHostType = Kokkos::HostSpace::execution_space;
52
53 // Kokkos provides two different random number generators with a 64 bit and a
54 // 1024 bit state. These generators are based on Vigna, Sebastiano (2014). "An
55 // experimental exploration of Marsaglia's xorshift generators, scrambled" See:
56 // http://arxiv.org/abs/1402.6246 The generators can be used fully independently
57 // on each thread and have been tested to produce good statistics for both inter
58 // and intra thread numbers. Note that within a kernel NO random number
59 // operations are (team) collective operations. Everything can be called within
60 // branches. This is a difference to the curand library where certain operations
61 // are required to be called by all threads in a block.
62 //
63 // In Kokkos you are required to create a pool of generator states, so that
64 // threads can grep their own. On CPU architectures the pool size is equal to
65 // the thread number, on CUDA about 128k states are generated (enough to give
66 // every potentially simultaneously running thread its own state). With a kernel
67 // a thread is required to acquire a state from the pool and later return it. On
68 // CPUs the Random number generator is deterministic if using the same number of
69 // threads. On GPUs (i.e. using the CUDA backend it is not deterministic because
70 // threads acquire states via atomics.
71
72 // A Functor for generating uint64_t random numbers templated on the
73 // GeneratorPool type
74 template <class GeneratorPool>
75 struct generate_random {
76 // Output View for the random numbers
77 Kokkos::View<uint64_t*> vals;
78
79 // The GeneratorPool
80 GeneratorPool rand_pool;
81
82 int samples;
83
84 // Initialize all members
generate_randomgenerate_random85 generate_random(Kokkos::View<uint64_t*> vals_, GeneratorPool rand_pool_,
86 int samples_)
87 : vals(vals_), rand_pool(rand_pool_), samples(samples_) {}
88
89 KOKKOS_INLINE_FUNCTION
operator ()generate_random90 void operator()(int i) const {
91 // Get a random number state from the pool for the active thread
92 typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();
93
94 // Draw samples numbers from the pool as urand64 between 0 and
95 // rand_pool.MAX_URAND64 Note there are function calls to get other type of
96 // scalars, and also to specify Ranges or get a normal distributed float.
97 for (int k = 0; k < samples; k++)
98 vals(i * samples + k) = rand_gen.urand64();
99
100 // Give the state back, which will allow another thread to acquire it
101 rand_pool.free_state(rand_gen);
102 }
103 };
104
main(int argc,char * args[])105 int main(int argc, char* args[]) {
106 if (argc != 3) {
107 printf("Please pass two integers on the command line\n");
108 } else {
109 // Initialize Kokkos
110 Kokkos::initialize(argc, args);
111 int size = std::stoi(args[1]);
112 int samples = std::stoi(args[2]);
113
114 // Create two random number generator pools one for 64bit states and one for
115 // 1024 bit states Both take an 64 bit unsigned integer seed to initialize a
116 // Random_XorShift64 generator which is used to fill the generators of the
117 // pool.
118 Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
119 Kokkos::Random_XorShift1024_Pool<> rand_pool1024(5374857);
120 Kokkos::DualView<uint64_t*> vals("Vals", size * samples);
121
122 // Run some performance comparisons
123 Kokkos::Timer timer;
124 Kokkos::parallel_for(size,
125 generate_random<Kokkos::Random_XorShift64_Pool<> >(
126 vals.d_view, rand_pool64, samples));
127 Kokkos::fence();
128
129 timer.reset();
130 Kokkos::parallel_for(size,
131 generate_random<Kokkos::Random_XorShift64_Pool<> >(
132 vals.d_view, rand_pool64, samples));
133 Kokkos::fence();
134 double time_64 = timer.seconds();
135
136 Kokkos::parallel_for(size,
137 generate_random<Kokkos::Random_XorShift1024_Pool<> >(
138 vals.d_view, rand_pool1024, samples));
139 Kokkos::fence();
140
141 timer.reset();
142 Kokkos::parallel_for(size,
143 generate_random<Kokkos::Random_XorShift1024_Pool<> >(
144 vals.d_view, rand_pool1024, samples));
145 Kokkos::fence();
146 double time_1024 = timer.seconds();
147
148 printf("#Time XorShift64*: %e %e\n", time_64,
149 1.0e-9 * samples * size / time_64);
150 printf("#Time XorShift1024*: %e %e\n", time_1024,
151 1.0e-9 * samples * size / time_1024);
152
153 Kokkos::deep_copy(vals.h_view, vals.d_view);
154
155 Kokkos::finalize();
156 }
157 return 0;
158 }
159