1 /* NUMrandom.cpp
2  *
3  * Copyright (C) 1992-2006,2008,2011,2012,2014-2018,2020 Paul Boersma
4  *
5  * This code is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This code is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13  * See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this work. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /*
20    A C-program for MT19937-64 (2014/2/23 version).
21    Coded by Takuji Nishimura and Makoto Matsumoto.
22 
23    This is a 64-bit version of Mersenne Twister pseudorandom number
24    generator.
25 
26    Before using, initialize the state by using init_genrand64(seed)
27    or init_by_array64(init_key, key_length).
28 
29    Copyright (C) 2004, 2014, Makoto Matsumoto and Takuji Nishimura,
30    All rights reserved.
31 
32    Redistribution and use in source and binary forms, with or without
33    modification, are permitted provided that the following conditions
34    are met:
35 
36      1. Redistributions of source code must retain the above copyright
37         notice, this list of conditions and the following disclaimer.
38 
39      2. Redistributions in binary form must reproduce the above copyright
40         notice, this list of conditions and the following disclaimer in the
41         documentation and/or other materials provided with the distribution.
42 
43      3. The names of its contributors may not be used to endorse or promote
44         products derived from this software without specific prior written
45         permission.
46 
47    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
48    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
49    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
50    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
51    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
52    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
53    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
54    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
55    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
56    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
57    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58 
59    References:
60    T. Nishimura, ``Tables of 64-bit Mersenne Twisters''
61      ACM Transactions on Modeling and
62      Computer Simulation 10. (2000) 348--357.
63    M. Matsumoto and T. Nishimura,
64      ``Mersenne Twister: a 623-dimensionally equidistributed
65        uniform pseudorandom number generator''
66      ACM Transactions on Modeling and
67      Computer Simulation 8. (Jan. 1998) 3--30.
68 
69    Any feedback is very welcome.
70    http://www.math.hiroshima-u.ac.jp/~m-mat/MT/emt.html
71    email: m-mat @ math.sci.hiroshima-u.ac.jp (remove spaces)
72 */
73 
74 #if defined (__MINGW32__) || defined (linux)
75 	#define UINT64_C(n)  n ## ULL
76 #endif
77 #include <unistd.h>
78 #include "melder.h"
79 #include <chrono>
80 
81 #define NN  312
82 #define MM  156
83 #define MATRIX_A  UINT64_C (0xB5026F5AA96619E9)
84 #define UM  UINT64_C (0xFFFFFFFF80000000) /* Most significant 33 bits */
85 #define LM  UINT64_C (0x7FFFFFFF) /* Least significant 31 bits */
86 
87 class NUMrandom_State { public:
88 
89 	/** The state vector.
90 	 */
91 	uint64 array [NN];
92 
93 	/** The pointer into the state vector.
94 		Equals NN + 1 iff the array has not been initialized.
95 	 */
96 	int index;
NUMrandom_State()97 	NUMrandom_State () : index (NN + 1) {}
98 		// this initialization will lead to an immediate crash
99 		// when NUMrandomFraction() is called without NUMrandom_initXXX() having been called before;
100 		// without this initialization, it would be detected only after 312 calls to NUMrandomFraction()
101 
102 	bool secondAvailable;
103 	double y;
104 
105 	/**
106 		Initialize the whole array with one seed.
107 		This can be used for testing whether our implementation is correct (i.e. predicts the correct published sequence)
108 		and perhaps for generating reproducible sequences.
109 	 */
init_genrand64(uint64 seed)110 	uint64 init_genrand64 (uint64 seed) {
111 		array [0] = seed;
112 		for (index = 1; index < NN; index ++) {
113 			array [index] =
114 				(UINT64_C (6364136223846793005) * (array [index - 1] ^ (array [index - 1] >> 62))
115 				+ (uint64) index);
116 		}
117 		return array [NN - 1];
118 	}
119 
120 	/* initialize by an array with array-length */
121 	/* init_key is the array for initializing keys */
122 	/* key_length is its length */
123 	void init_by_array64 (uint64 init_key[], unsigned int key_length);
124 
125 } states [17];
126 
127 /* initialize the array with a number of seeds */
init_by_array64(uint64 init_key[],unsigned int key_length)128 void NUMrandom_State :: init_by_array64 (uint64 init_key [], unsigned int key_length)
129 {
130 	init_genrand64 (UINT64_C (19650218));   // warm it up
131 
132 	unsigned int i = 1, j = 0;
133 	unsigned int k = ( NN > key_length ? NN : key_length );
134 	for (; k; k --) {
135 		array [i] = (array [i] ^ ((array [i - 1] ^ (array [i - 1] >> 62)) * UINT64_C (3935559000370003845)))
136 		  + init_key [j] + (uint64) j;   // non-linear
137 		i ++;
138 		j ++;
139 		if (i >= NN) { array [0] = array [NN - 1]; i = 1; }
140 		if (j >= key_length) j = 0;
141 	}
142 	for (k = NN - 1; k; k --) {
143 		array [i] = (array [i] ^ ((array [i - 1] ^ (array [i - 1] >> 62)) * UINT64_C (2862933555777941757)))
144 		  - (uint64) i;   // non-linear
145 		i ++;
146 		if (i >= NN) { array [0] = array [NN - 1]; i = 1; }
147 	}
148 
149 	array [0] = UINT64_C (1) << 63;   // MSB is 1; assuring non-zero initial array
150 }
151 
getTicksSince1969()152 static uint64 getTicksSince1969 () {
153 	using namespace std::chrono;
154 	const auto timePoint = system_clock::now ();
155 	const auto duration = timePoint. time_since_epoch ();
156 	return uint64 (duration. count ());
157 	//Melder_casual (U"ticks since 1969: ", ticksSince1969);
158 }
159 
getTicksSinceBoot()160 static uint64 getTicksSinceBoot () {
161 	using namespace std::chrono;
162 	const auto timePoint = high_resolution_clock::now ();
163 	const auto duration = timePoint. time_since_epoch ();
164 	return uint64 (duration. count ());
165 	//Melder_casual (U"ticks since boot: ", ticksSinceBoot);
166 }
167 
168 static bool theInited = false;
NUMrandom_initializeSafelyAndUnpredictably()169 void NUMrandom_initializeSafelyAndUnpredictably () {
170 	const uint64 ticksSince1969 = getTicksSince1969 ();   // possibly microseconds
171 	const uint64 ticksSinceBoot = getTicksSinceBoot ();   // possibly nanoseconds
172 	for (int threadNumber = 0; threadNumber <= 16; threadNumber ++) {
173 		constexpr integer numberOfKeys = 7;
174 		uint64 keys [numberOfKeys];
175 		keys [0] = ticksSince1969;   // unique between boots of the same computer
176 		keys [1] = UINT64_C (7320321686725470078) + uint64 (threadNumber);   // unique between threads in the same process
177 		switch (threadNumber) {
178 			case  0: keys [2] = UINT64_C  (4492812493098689432); keys [3] = UINT64_C  (8902321878452586268); break;
179 			case  1: keys [2] = UINT64_C  (1875086582568685862); keys [3] = UINT64_C (12243257483652989599); break;
180 			case  2: keys [2] = UINT64_C  (9040925727554857487); keys [3] = UINT64_C  (8037578605604605534); break;
181 			case  3: keys [2] = UINT64_C (11168476768576857685); keys [3] = UINT64_C  (7862359785763816517); break;
182 			case  4: keys [2] = UINT64_C  (3878901748368466876); keys [3] = UINT64_C  (3563078257726526076); break;
183 			case  5: keys [2] = UINT64_C  (2185735817578415800); keys [3] = UINT64_C   (198502654671560756); break;
184 			case  6: keys [2] = UINT64_C (12248047509814562486); keys [3] = UINT64_C  (9836250167165762757); break;
185 			case  7: keys [2] = UINT64_C    (28362088588870143); keys [3] = UINT64_C  (8756376201767075602); break;
186 			case  8: keys [2] = UINT64_C  (5758130586486546775); keys [3] = UINT64_C  (4213784157469743413); break;
187 			case  9: keys [2] = UINT64_C  (8508416536565170756); keys [3] = UINT64_C  (2856175717654375656); break;
188 			case 10: keys [2] = UINT64_C  (2802356275260644756); keys [3] = UINT64_C  (2309872134087235167); break;
189 			case 11: keys [2] = UINT64_C   (230875784065064545); keys [3] = UINT64_C  (1209802371478023476); break;
190 			case 12: keys [2] = UINT64_C  (6520185868568714577); keys [3] = UINT64_C  (2173615001556504015); break;
191 			case 13: keys [2] = UINT64_C  (9082605608605765650); keys [3] = UINT64_C  (1204167447560475647); break;
192 			case 14: keys [2] = UINT64_C  (1238716515545475765); keys [3] = UINT64_C  (8435674023875847388); break;
193 			case 15: keys [2] = UINT64_C  (6127715675014756456); keys [3] = UINT64_C  (2435788450287508457); break;
194 			case 16: keys [2] = UINT64_C  (1081237546238975884); keys [3] = UINT64_C  (2939783238574293882); break;
195 			default: Melder_fatal (U"Thread number too high.");
196 		}
197 		keys [4] = (uint64) (int64) getpid ();   // unique between processes that run simultaneously on the same computer
198 		keys [5] = ticksSinceBoot;   // some extra randomness
199 		static uint64 callInstance = 0;
200 		keys [6] = UINT64_C (3642334578453) + (++ callInstance);
201 		states [threadNumber]. init_by_array64 (keys, numberOfKeys);
202 	}
203 	theInited = true;
204 }
NUMrandom_initializeWithSeedUnsafelyButPredictably(uint64 seed)205 void NUMrandom_initializeWithSeedUnsafelyButPredictably (uint64 seed) {
206 	for (int threadNumber = 0; threadNumber <= 16; threadNumber ++)
207 		seed = states [threadNumber]. init_genrand64 (seed);
208 	theInited = true;
209 }
210 
211 /* Throughout the years, several versions for "zero or magic" have been proposed. Choose the fastest. */
212 
213 #define ZERO_OR_MAGIC_VERSION  3
214 
215 #if ZERO_OR_MAGIC_VERSION == 1   // M&N 1999
216 	#define ZERO_OR_MAGIC  ( (x & UINT64_C (1)) ? MATRIX_A : UINT64_C (0) )
217 #elif ZERO_OR_MAGIC_VERSION == 2
218 	#define ZERO_OR_MAGIC  ( (x & UINT64_C (1)) * MATRIX_A )
219 #else   // M&N 2014
220 	constexpr uint64 mag01 [2] { UINT64_C (0), MATRIX_A };
221 	#define ZERO_OR_MAGIC  mag01 [(int) (x & UINT64_C (1))]
222 #endif
223 
NUMrandomFraction()224 double NUMrandomFraction () {
225 	NUMrandom_State *me = & states [0];
226 	uint64 x;
227 
228 	if (my index >= NN) {   // generate NN words at a time
229 
230 		Melder_assert (theInited);   // if NUMrandom_initXXX() hasn't been called, we'll detect that here, probably in the first call
231 
232 		int i;
233 		for (i = 0; i < NN - MM; i ++) {
234 			x = (my array [i] & UM) | (my array [i + 1] & LM);
235 			my array [i] = my array [i + MM] ^ (x >> 1) ^ ZERO_OR_MAGIC;
236 		}
237 		for (; i < NN - 1; i ++) {
238 			x = (my array [i] & UM) | (my array [i + 1] & LM);
239 			my array [i] = my array [i + (MM - NN)] ^ (x >> 1) ^ ZERO_OR_MAGIC;
240 		}
241 		x = (my array [NN - 1] & UM) | (my array [0] & LM);
242 		my array [NN - 1] = my array [MM - 1] ^ (x >> 1) ^ ZERO_OR_MAGIC;
243 
244 		my index = 0;
245 	}
246 
247 	x = my array [my index ++];
248 
249 	x ^= (x >> 29) & UINT64_C (0x5555555555555555);
250 	x ^= (x << 17) & UINT64_C (0x71D67FFFEDA60000);
251 	x ^= (x << 37) & UINT64_C (0xFFF7EEE000000000);
252 	x ^= (x >> 43);
253 
254 	return (x >> 11) * (1.0/9007199254740992.0);
255 }
256 
NUMrandomFraction_mt(int threadNumber)257 double NUMrandomFraction_mt (int threadNumber) {
258 	NUMrandom_State *me = & states [threadNumber];
259 	uint64 x;
260 
261 	if (my index >= NN) {   // generate NN words at a time
262 
263 		Melder_assert (theInited);
264 
265 		int i;
266 		for (i = 0; i < NN - MM; i ++) {
267 			x = (my array [i] & UM) | (my array [i + 1] & LM);
268 			my array [i] = my array [i + MM] ^ (x >> 1) ^ ZERO_OR_MAGIC;
269 		}
270 		for (; i < NN - 1; i ++) {
271 			x = (my array [i] & UM) | (my array [i + 1] & LM);
272 			my array [i] = my array [i + (MM - NN)] ^ (x >> 1) ^ ZERO_OR_MAGIC;
273 		}
274 		x = (my array [NN - 1] & UM) | (my array [0] & LM);
275 		my array [NN - 1] = my array [MM - 1] ^ (x >> 1) ^ ZERO_OR_MAGIC;
276 
277 		my index = 0;
278 	}
279 
280 	x = my array [my index ++];
281 
282 	x ^= (x >> 29) & UINT64_C (0x5555555555555555);
283 	x ^= (x << 17) & UINT64_C (0x71D67FFFEDA60000);
284 	x ^= (x << 37) & UINT64_C (0xFFF7EEE000000000);
285 	x ^= (x >> 43);
286 
287 	return (x >> 11) * (1.0/9007199254740992.0);
288 }
289 
NUMrandomUniform(double lowest,double highest)290 double NUMrandomUniform (double lowest, double highest) {
291 	return lowest + (highest - lowest) * NUMrandomFraction ();
292 }
293 
NUMrandomInteger(integer lowest,integer highest)294 integer NUMrandomInteger (integer lowest, integer highest) {
295 	return lowest + (integer) ((highest - lowest + 1) * NUMrandomFraction ());   // round down by truncation, because positive
296 }
297 
NUMrandomBernoulli(double probability)298 bool NUMrandomBernoulli (double probability) {
299 	return NUMrandomFraction() < probability;
300 }
301 
NUMrandomBernoulli_real(double probability)302 double NUMrandomBernoulli_real (double probability) {
303 	return (double) (NUMrandomFraction() < probability);
304 }
305 
306 #define repeat  do
307 #define until(cond)  while (! (cond))
NUMrandomGauss(double mean,double standardDeviation)308 double NUMrandomGauss (double mean, double standardDeviation) {
309 	NUMrandom_State *me = & states [0];
310 	/*
311 		Knuth, p. 122.
312 	*/
313 	if (my secondAvailable) {
314 		my secondAvailable = false;
315 		return mean + standardDeviation * my y;
316 	} else {
317 		double s, x;
318 		repeat {
319 			x = 2.0 * NUMrandomFraction () - 1.0;   // inside the square [-1; 1] x [-1; 1]
320 			my y = 2.0 * NUMrandomFraction () - 1.0;
321 			s = x * x + my y * my y;
322 		} until (s < 1.0);   // inside the unit circle
323 		if (s == 0.0) {
324 			x = my y = 0.0;
325 		} else {
326 			double factor = sqrt (-2.0 * log (s) / s);
327 			x *= factor;
328 			my y *= factor;
329 		}
330 		my secondAvailable = true;
331 		return mean + standardDeviation * x;
332 	}
333 }
334 
NUMrandomGauss_mt(int threadNumber,double mean,double standardDeviation)335 double NUMrandomGauss_mt (int threadNumber, double mean, double standardDeviation) {
336 	NUMrandom_State *me = & states [threadNumber];
337 	/*
338 		Knuth, p. 122.
339 	*/
340 	if (my secondAvailable) {
341 		my secondAvailable = false;
342 		return mean + standardDeviation * my y;
343 	} else {
344 		double s, x;
345 		repeat {
346 			x = 2.0 * NUMrandomFraction_mt (threadNumber) - 1.0;   // inside the square [-1; 1] x [-1; 1]
347 			my y = 2.0 * NUMrandomFraction_mt (threadNumber) - 1.0;
348 			s = x * x + my y * my y;
349 		} until (s < 1.0);   // inside the unit circle
350 		if (s == 0.0) {
351 			x = my y = 0.0;
352 		} else {
353 			double factor = sqrt (-2.0 * log (s) / s);
354 			x *= factor;
355 			my y *= factor;
356 		}
357 		my secondAvailable = true;
358 		return mean + standardDeviation * x;
359 	}
360 }
361 
NUMrandomPoisson(double mean)362 double NUMrandomPoisson (double mean) {
363 	/*
364 		The Poisson distribution is
365 
366 			P(k) = mean^k * exp (- mean) / k!
367 
368 		We have to find a function, with known primitive,
369 		that is always (a bit) greater than P (k).
370 		This function is based on the Lorentzian distribution,
371 		with a maximum of P(mean)/0.9 at k=mean:
372 
373 			f (k) = mean^mean * exp (- mean) / mean! / (0.9 * (1 + (k - mean)^2 / (2 * mean)))
374 
375 		The tangent is computed as the deviate
376 
377 			tangent = tan (pi * unif (0, 1))
378 
379 		This must equal the square root of (k - mean)^2 / (2 * mean),
380 		so that a trial value for k is given by
381 
382 			k = floor (mean + tangent * sqrt (2 * mean))
383 
384 		The probability that this is a good value is proportionate to the ratio of the Poisson
385 		distribution and the encapsulating function:
386 
387 			probability = P (k) / f (k) = 0.9 * (1 + tangent^2) * mean ^ (k - mean) * mean! / k!
388 
389 		The last two factors can be calculated as
390 
391 			exp ((k - mean) * ln (mean) + lnGamma (mean + 1) - lnGamma (k + 1))
392 	*/
393 	static double previousMean = -1.0;   // this routine may well be called repeatedly with the same mean; optimize
394 	if (mean < 8.0) {
395 		static double expMean;
396 		double product = 1.0;
397 		integer result = -1;
398 		if (mean != previousMean) {
399 			previousMean = mean;
400 			expMean = exp (- mean);
401 		}
402 		repeat {
403 			product *= NUMrandomFraction ();
404 			result ++;
405 		} until (product <= expMean);
406 		return result;
407 	} else {
408 		static double sqrtTwoMean, lnMean, lnMeanFactorial;
409 		double result, probability, tangent;
410 		if (mean != previousMean) {
411 			previousMean = mean;
412 			sqrtTwoMean = sqrt (2.0 * mean);
413 			lnMean = log (mean);
414 			lnMeanFactorial = NUMlnGamma (mean + 1.0);
415 		}
416 		repeat {
417 			repeat {
418 				tangent = tan (NUMpi * NUMrandomFraction ());
419 				result = mean + tangent * sqrtTwoMean;
420 			} until (result >= 0.0);
421 			result = floor (result);
422 			probability = 0.9 * (1.0 + tangent * tangent) * exp ((result - mean) * lnMean + lnMeanFactorial - NUMlnGamma (result + 1.0));
423 		} until (NUMrandomFraction () <= probability);
424 		return result;
425 	}
426 }
427 
NUMhashString(conststring32 string)428 uint32 NUMhashString (conststring32 string) {
429 	/*
430 	 * Jenkins' one-at-a-time hash.
431 	 */
432 	uint32 hash = 0;
433 	for (char32 kar = *string; kar != U'\0'; kar = * (++ string)) {
434 		hash += (kar >> 16) & 0xFF;   // highest five bits (a char32 actually has only 21 significant bits)
435 		hash += (hash << 10);
436 		hash ^= (hash >> 6);
437 		hash += (kar >> 8) & 0xFF;   // middle 8 bits
438 		hash += (hash << 10);
439 		hash ^= (hash >> 6);
440 		hash += kar & 0xFF;   // lowest 8 bits
441 		hash += (hash << 10);
442 		hash ^= (hash >> 6);
443 	}
444 	hash += (hash << 3);
445 	hash ^= (hash >> 11);
446 	hash += (hash << 15);
447 	return hash;
448 }
449 
450 /* End of file NUMrandom.cpp */
451