1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
12 
13 namespace Eigen {
14 namespace internal {
15 
16 namespace {
17 
get_random_seed()18 EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
19 #ifdef __CUDA_ARCH__
20   // We don't support 3d kernels since we currently only use 1 and
21   // 2d kernels.
22   assert(threadIdx.z == 0);
23   return clock64() +
24       blockIdx.x * blockDim.x + threadIdx.x +
25       gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y);
26 
27 #elif defined _WIN32
28   // Use the current time as a baseline.
29   SYSTEMTIME st;
30   GetSystemTime(&st);
31   int time = st.wSecond + 1000 * st.wMilliseconds;
32   // Mix in a random number to make sure that we get different seeds if
33   // we try to generate seeds faster than the clock resolution.
34   // We need 2 random values since the generator only generate 16 bits at
35   // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx)
36   int rnd1 = ::rand();
37   int rnd2 = ::rand();
38   uint64_t rnd = (rnd1 | rnd2 << 16) ^ time;
39   return rnd;
40 
41 #elif defined __APPLE__
42   // Same approach as for win32, except that the random number generator
43   // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random).
44   uint64_t rnd = ::random() ^ mach_absolute_time();
45   return rnd;
46 
47 #else
48   // Augment the current time with pseudo random number generation
49   // to ensure that we get different seeds if we try to generate seeds
50   // faster than the clock resolution.
51   timespec ts;
52   clock_gettime(CLOCK_REALTIME, &ts);
53   uint64_t rnd = ::random() ^ ts.tv_nsec;
54   return rnd;
55 #endif
56 }
57 
PCG_XSH_RS_generator(uint64_t * state)58 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) {
59   // TODO: Unify with the implementation in the non blocking thread pool.
60   uint64_t current = *state;
61   // Update the internal state
62   *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
63   // Generate the random output (using the PCG-XSH-RS scheme)
64   return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
65 }
66 
PCG_XSH_RS_state(uint64_t seed)67 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
68   seed = seed ? seed : get_random_seed();
69   return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
70 }
71 
72 }  // namespace
73 
74 
75 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
RandomToTypeUniform(uint64_t * state)76 T RandomToTypeUniform(uint64_t* state) {
77   unsigned rnd = PCG_XSH_RS_generator(state);
78   return static_cast<T>(rnd);
79 }
80 
81 
82 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
83 Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) {
84   Eigen::half result;
85   // Generate 10 random bits for the mantissa
86   unsigned rnd = PCG_XSH_RS_generator(state);
87   result.x = static_cast<uint16_t>(rnd & 0x3ffu);
88   // Set the exponent
89   result.x |= (static_cast<uint16_t>(15) << 10);
90   // Return the final result
91   return result - Eigen::half(1.0f);
92 }
93 
94 
95 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
96 float RandomToTypeUniform<float>(uint64_t* state) {
97   typedef union {
98     uint32_t raw;
99     float fp;
100   } internal;
101   internal result;
102   // Generate 23 random bits for the mantissa mantissa
103   const unsigned rnd = PCG_XSH_RS_generator(state);
104   result.raw = rnd & 0x7fffffu;
105   // Set the exponent
106   result.raw |= (static_cast<uint32_t>(127) << 23);
107   // Return the final result
108   return result.fp - 1.0f;
109 }
110 
111 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
112 double RandomToTypeUniform<double>(uint64_t* state) {
113   typedef union {
114     uint64_t raw;
115     double dp;
116   } internal;
117   internal result;
118   result.raw = 0;
119   // Generate 52 random bits for the mantissa
120   // First generate the upper 20 bits
121   unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu;
122   // The generate the lower 32 bits
123   unsigned rnd2 = PCG_XSH_RS_generator(state);
124   result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2;
125   // Set the exponent
126   result.raw |= (static_cast<uint64_t>(1023) << 52);
127   // Return the final result
128   return result.dp - 1.0;
129 }
130 
131 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
132 std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) {
133   return std::complex<float>(RandomToTypeUniform<float>(state),
134                              RandomToTypeUniform<float>(state));
135 }
136 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
137 std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) {
138   return std::complex<double>(RandomToTypeUniform<double>(state),
139                               RandomToTypeUniform<double>(state));
140 }
141 
142 template <typename T> class UniformRandomGenerator {
143  public:
144   static const bool PacketAccess = true;
145 
146   // Uses the given "seed" if non-zero, otherwise uses a random seed.
147   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
148       uint64_t seed = 0) {
149     m_state = PCG_XSH_RS_state(seed);
150   }
UniformRandomGenerator(const UniformRandomGenerator & other)151   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
152       const UniformRandomGenerator& other) {
153     m_state = other.m_state;
154   }
155 
156   template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
operator()157   T operator()(Index i) const {
158     uint64_t local_state = m_state + i;
159     T result = RandomToTypeUniform<T>(&local_state);
160     m_state = local_state;
161     return result;
162   }
163 
164   template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
packetOp(Index i)165   Packet packetOp(Index i) const {
166     const int packetSize = internal::unpacket_traits<Packet>::size;
167     EIGEN_ALIGN_MAX T values[packetSize];
168     uint64_t local_state = m_state + i;
169     for (int j = 0; j < packetSize; ++j) {
170       values[j] = RandomToTypeUniform<T>(&local_state);
171     }
172     m_state = local_state;
173     return internal::pload<Packet>(values);
174   }
175 
176  private:
177   mutable uint64_t m_state;
178 };
179 
180 template <typename Scalar>
181 struct functor_traits<UniformRandomGenerator<Scalar> > {
182   enum {
183     // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)).
184     Cost = 12 * NumTraits<Scalar>::AddCost *
185            ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)),
186     PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
187   };
188 };
189 
190 
191 
192 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
193 T RandomToTypeNormal(uint64_t* state) {
194   // Use the ratio of uniform method to generate numbers following a normal
195   // distribution. See for example Numerical Recipes chapter 7.3.9 for the
196   // details.
197   T u, v, q;
198   do {
199     u = RandomToTypeUniform<T>(state);
200     v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5));
201     const T x = u - T(0.449871);
202     const T y = numext::abs(v) + T(0.386595);
203     q = x*x + y * (T(0.196)*y - T(0.25472)*x);
204   } while (q > T(0.27597) &&
205            (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u));
206 
207   return v/u;
208 }
209 
210 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
211 std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) {
212   return std::complex<float>(RandomToTypeNormal<float>(state),
213                              RandomToTypeNormal<float>(state));
214 }
215 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
216 std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) {
217   return std::complex<double>(RandomToTypeNormal<double>(state),
218                               RandomToTypeNormal<double>(state));
219 }
220 
221 
222 template <typename T> class NormalRandomGenerator {
223  public:
224   static const bool PacketAccess = true;
225 
226   // Uses the given "seed" if non-zero, otherwise uses a random seed.
227   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
228     m_state = PCG_XSH_RS_state(seed);
229   }
230   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
231       const NormalRandomGenerator& other) {
232     m_state = other.m_state;
233   }
234 
235  template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
236   T operator()(Index i) const {
237     uint64_t local_state = m_state + i;
238     T result = RandomToTypeNormal<T>(&local_state);
239     m_state = local_state;
240     return result;
241   }
242 
243   template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
244   Packet packetOp(Index i) const {
245     const int packetSize = internal::unpacket_traits<Packet>::size;
246     EIGEN_ALIGN_MAX T values[packetSize];
247     uint64_t local_state = m_state + i;
248     for (int j = 0; j < packetSize; ++j) {
249       values[j] = RandomToTypeNormal<T>(&local_state);
250     }
251     m_state = local_state;
252     return internal::pload<Packet>(values);
253   }
254 
255  private:
256   mutable uint64_t m_state;
257 };
258 
259 
260 template <typename Scalar>
261 struct functor_traits<NormalRandomGenerator<Scalar> > {
262   enum {
263     // On average, we need to generate about 3 random numbers
264     // 15 mul, 8 add, 1.5 logs
265     Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost +
266            15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost +
267            3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2,
268     PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
269   };
270 };
271 
272 
273 } // end namespace internal
274 } // end namespace Eigen
275 
276 #endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
277