1 #ifndef fast_gauss_noise_h
2 #define fast_gauss_noise_h
3
4 #include <cstdint>
5 #include <cstddef>
6 #include <list>
7 #include <iostream>
8 #include <cstdlib>
9 #include <ctime>
10 #include <cmath>
11 #include <climits>
12 #include <cstring>
13 #include <tuple>
14 #include <typeinfo>
15 #include <gmp.h>
16 #include <mpfr.h>
17 #include "fastrandombytes.h"
18
19 #ifdef BOOST_RAPHSON
20 #include <boost/math/tools/roots.hpp>
21 #endif
22
23
24 //#define OUTPUT_BARRIERS
25 //#define OUTPUT_LUT_FLAGS
26 // Use it to test rare values do not happen too often
27 //#define UNITTEST_ONEMILLION
28
29 namespace nfl {
30
31 template<typename T0, typename T1>
tstbit(T0 x,T1 n)32 constexpr inline auto tstbit(T0 x, T1 n) -> decltype((x << (63 - n)) >> 63 ) { return ((x << (63 - n)) >> 63 ); }
33
34 template<class in_class, class out_class>
35 struct output {
36 out_class val;
37 bool flag;
38 std::list<in_class*> l_b_ptr;
39 };
40
41 template<class in_class, class out_class, unsigned _lu_depth>
42 class FastGaussianNoise {
43 typedef output<in_class, out_class> output_t;
44 private:
45 unsigned int _bit_precision;
46 unsigned int _word_precision;
47 unsigned int _number_of_barriers;
48 unsigned int _lu_size;
49 unsigned int _flag_ctr1;
50 unsigned int _flag_ctr2;
51 double _sigma;
52 mpfr_t _const_sigma;
53 mpfr_t _center;
54 int rounded_center;
55 unsigned int _security;
56 unsigned int _samples;
57 double _tail_bound;
58 bool _verbose;
59
60 in_class **barriers;
61 output_t *lu_table;
62 output_t **lu_table2;
63
64 void check_template_params();
65 void init();
66 void precomputeBarrierValues();
67 void buildLookupTables();
68 void nn_gaussian_law(mpfr_t rop, const mpfr_t x_fr);
69 int cmp(in_class *op1, in_class *op2);
70
71 public:
72 static const unsigned int default_k;
73 FastGaussianNoise(double sigma, unsigned int security, unsigned int samples, double center_d = 0, bool verbose = false);
74 FastGaussianNoise(double sigma, unsigned int security, unsigned int samples, mpfr_t center, bool verbose = false);
75 ~FastGaussianNoise();
76 void getNoise(out_class * const rand_data2out, uint64_t rlen);
77 };
78
79
80 /* NOTATIONS (from paper Sampling from discrete Gaussians
81 * for lattice-based cryptography on a constrainded device
82 * Nagarjun C. Dwarakanath, Steven Galbraith
83 * Journal : AAECC 2014 (25)
84 *
85 * m : lattice dimension, we set it to 1 in here (better results for same security) !!!!
86 *
87 * security : output distribution should be within 2^{-security}
88 * statistical distance from the Gaussian distribution
89 *
90 * sigma : parameter of the Gaussian distribution (close
91 * to the standard deviation but not equal)
92 * SUM = \sum_{k=-\inf}^{\inf} exp(-(k-c)^2/(2*sigma^2))
93 *
94 * D_{sigma} : distribution such that for x\in \mathcal{Z}
95 * Pr(x) = ro_{sigma}(x) = 1/SUM exp(-(x-c)^2/(2*sigma^2))
96 *
97 * Warning : can be replaced in some works by s = sqrt(2*pi)*sigma
98 * if Pr(x) = 1/SUM exp(-pi*(x-c)^2/s^2)
99 *
100 * tail_bound : tail bound we only output points with
101 * norm(point-c) < tail_bound*sigma
102 *
103 * bit_precision : bit precision when computing the probabilities
104 *
105 * delta_tailbound : statistical distance introduced
106 * by the tail bound
107 *
108 * delta_epsi : statistical distance introduced by
109 * probability approximation
110 */
111
112
113
114 // HELPER FUNCTIONS
115
116 /* /!\ Warning only for x64 */
rdtsc(void)117 extern __inline__ uint64_t rdtsc(void)
118 {
119 uint64_t a, d;
120 __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
121 return (d<<32) | a;
122 }
123
124 // Function to be used in a Newton-Raphson solver
125 struct funct
126 {
functnfl::funct127 funct(double const& target) : k(target){}
operator ()nfl::funct128 std::tuple<double, double> operator()(double const& x)
129 {
130 return std::make_tuple(x*x - 2*log(x) - 1 - 2*k*log(2), 2*x-2/x);
131 }
132 private:
133 double k;
134 };
135
136 inline
newton_raphson(double k,double max_guess,int digits)137 double newton_raphson(double k, double max_guess, int digits)
138 {
139 unsigned max_counter = 1U<<15;
140 std::tuple<double, double> values;
141 double delta;
142 double guess = max_guess;
143 for (unsigned counter = 0 ; counter < max_counter; counter++)
144 {
145 values = funct(k)(guess);
146 delta = std::get<0>(values)/std::get<1>(values);
147 guess -= delta;
148 if ( fabs(delta)/fabs(guess) < pow(10.0,-digits) ) break;
149 }
150 // In case there is a flat zone in the function
151 while(0.95*guess*0.95*guess - 2*log(0.95*guess) - 1 - 2*k*log(2)>=0) guess*=0.95;
152 // Test result
153 if(guess*guess - 2*log(guess) - 1 - 2*k*log(2)<0)
154 {
155 std::cout << "FastGaussianNoise: WARNING Newton-Raphson failed the generator is NOT secure" << std::endl;
156 }
157 return guess;
158 }
159
160
161 // Constructors
162
163
164 template<class in_class, class out_class, unsigned _lu_depth>
FastGaussianNoise(double sigma,unsigned int security,unsigned int samples,double center_d,bool verbose)165 FastGaussianNoise<in_class, out_class, _lu_depth>::FastGaussianNoise( double sigma,
166 unsigned int security, unsigned int samples, double center_d /*=0*/, bool verbose /*=true*/):
167 _sigma(sigma),
168 _security(security),
169 _samples(samples),
170 _verbose(verbose)
171 {
172 // Check template parameters
173 check_template_params();
174
175 //Center cannot be initialized before the constructor
176 mpfr_init_set_d(_center, center_d, MPFR_RNDN);
177 rounded_center = round(center_d);
178
179 //Initialization functions
180 init();
181 precomputeBarrierValues();
182 buildLookupTables();
183 }
184
185 template<class in_class, class out_class, unsigned _lu_depth>
FastGaussianNoise(double sigma,unsigned int security,unsigned int samples,mpfr_t center,bool verbose)186 FastGaussianNoise<in_class, out_class, _lu_depth>::FastGaussianNoise( double sigma,
187 unsigned int security, unsigned int samples, mpfr_t center /*=0*/, bool verbose /*=true*/):
188 _sigma(sigma),
189 _security(security),
190 _samples(samples),
191 _verbose(verbose)
192 {
193 // Check template parameters
194 check_template_params();
195
196 //Center cannot be initialized before the constructor
197 mpfr_init_set(_center, center, MPFR_RNDN);
198 rounded_center = mpfr_get_d(_center, MPFR_RNDN);
199
200 //Initialization functions
201 init();
202 precomputeBarrierValues();
203 buildLookupTables();
204 }
205
206
207 // Check template parameters
208 template<class in_class, class out_class, unsigned _lu_depth>
check_template_params()209 void FastGaussianNoise<in_class, out_class, _lu_depth>::check_template_params()
210 {
211 // Lookup tables can only have depth 1 or 2
212 if (_lu_depth != 1 && _lu_depth != 2)
213 {
214 std::cout << "FastGaussianNoise: CRITICAL _lu_depth must be 1 or 2" << std::endl;
215 exit(1);
216 }
217
218 // Lookup tables can only have uint8_t or uint16_t indexes
219 if (typeid(in_class) == typeid(uint8_t))
220 _lu_size = 1<<8;
221 else if(typeid(in_class) == typeid(uint16_t))
222 _lu_size = 1<<16;
223 else
224 {
225 std::cout << "FastGaussianNoise: CRITICAL in_class must be uint8_t or uint16_t" << std::endl;
226 exit(2);
227 }
228 }
229
230
231 // Compute some values (precision, number of barriers and outputs)
232 template<class in_class, class out_class, unsigned _lu_depth>
init()233 void FastGaussianNoise<in_class, out_class, _lu_depth>::init()
234 {
235 double epsi, k;
236
237 /* Lemma 1:
238 * tail_bound >= sqrt(1 + 2*log(tail_bound) + 2*k*log(2))
239 * IMPLIES delta_tailbound < 2**(-k)
240 *
241 * -epsi <= -k - log2(2 * tail_bound * sigma))
242 * IMPLIES 2 * tail_bound * sigma * 2**(-epsi) <= 2**(-k)
243 *
244 * Lemma 2 (and Lemma 1):
245 * 2 * tail_bound * sigma * 2**(-epsi) <= 2**(-k)
246 * IMPLIES delta_epsilon < 2**(-k)
247 */
248
249 // THUS setting
250 k = _security + 1 + ceil(log(_samples)/log(2));
251 // IMPLIES (delta_tailbound + delta_epsilon)*_samples < 2**(-_security)
252 // We can thus generate vectors of _samples samples securely
253
254 // To compute the tail bound we use Newton-Raphson with three digits of precision
255 // WE HAVE tail_bound >= sqrt(1 + 2*log(tail_bound) + 2*k*log(2))
256 // IS EQUIV TO tail_bound**2 -2*log(tail_bound) - 1- 2*k*log(2) >= 0
257 int digits = 3;
258 #ifdef BOOST_RAPHSON
259 double max_guess = 1+2*k*log(2);
260 double min_guess = sqrt(1 + 2*k*log(2)), guess = min_guess;
261 _tail_bound = boost::math::tools::newton_raphson_iterate(funct(k),guess, min_guess, max_guess, digits);
262 #else
263 double min_guess = sqrt(1 + 2*k*log(2));
264 _tail_bound = newton_raphson(k, min_guess, digits);
265 #endif
266 // We now can compute the precision needed
267 epsi = k + log2(2 * _tail_bound * _sigma);
268 _bit_precision = ceil(epsi);
269 _word_precision = ceil(_bit_precision/(8.0*sizeof(in_class)));
270 // For the same cost we get a simpler situation and more precision
271 _bit_precision = _word_precision*8*sizeof(in_class);
272 // And the number of probabilities that must be computed
273 // Only half of them are computed (others are symmetric)
274 // but all values are stored to speedup noise generation
275 _number_of_barriers = 1+2*ceil(_tail_bound*_sigma);
276 if ( ((uint64_t)_number_of_barriers >> (sizeof(out_class) * 8 - 1)) != 0)
277 std::cout << "FastGaussianNoise: WARNING out_class too small to contain some of the (signed) results" << std::endl;
278 if ( ((uint64_t)_number_of_barriers >> (sizeof(int) * 8 - 1)) != 0)
279 std::cout << "FastGaussianNoise: WARNING outputs are above 2**31, unexpected results" << std::endl;
280
281 // Finally we precompute 1/(2*sigma**2) to accelerate things
282 mpfr_inits2(_bit_precision, _const_sigma, nullptr);
283 mpfr_set_d(_const_sigma, _sigma, MPFR_RNDN);
284 mpfr_sqr(_const_sigma, _const_sigma, MPFR_RNDN);
285 mpfr_mul_ui(_const_sigma, _const_sigma, 2, MPFR_RNDN);
286 mpfr_ui_div(_const_sigma, 1, _const_sigma, MPFR_RNDN);
287
288 // Give some feedback
289 if (_verbose) std::cout << "FastGaussianNoise: " << _number_of_barriers <<
290 " barriers with " << _bit_precision <<
291 " bits of precision will be computed" << std::endl;
292 }
293
294
295 template<class in_class, class out_class, unsigned _lu_depth>
precomputeBarrierValues()296 void FastGaussianNoise<in_class, out_class, _lu_depth>::precomputeBarrierValues()
297 {
298 // Declare and init mpfr vars
299 mpfr_t sum, tmp, tmp2;
300 mpfr_t *mp_barriers;
301 mpfr_inits2(_bit_precision, sum, tmp, tmp2, nullptr);
302
303 // This var is used to export mpfr values
304 mpz_t int_value;
305 mpz_init2(int_value, _bit_precision);
306
307 // Init SUM = \sum_{k=-ceil(tail_bound*sigma)}^{ceil(tail_bound*sigma)} exp(-(k+round(c)-c)^2/(2*sigma^2))
308 // and compute on the loop with the barriers
309 mpfr_set_ui(sum, 0, MPFR_RNDN);
310
311 // Allocate memory for the barrier pointers
312 barriers = (in_class **) malloc(_number_of_barriers*sizeof(in_class *));
313 mp_barriers = (mpfr_t *) malloc(_number_of_barriers*sizeof(mpfr_t));
314
315 // Now loop over the barriers
316 for (int i = 0; i < (int)_number_of_barriers; i++)
317 {
318 // Init mpfr var
319 mpfr_init2(mp_barriers[i], _bit_precision);
320
321 // Compute the barrier value (without normalization)
322 mpfr_set_si(tmp2, rounded_center+i-((int)_number_of_barriers-1)/2, MPFR_RNDN);
323 nn_gaussian_law(tmp, tmp2);
324 //mpfr_out_str(stdout, 10, 0, tmp2, MPFR_RNDN);
325 //std::cout << std::endl;
326 if (i==0) mpfr_set(mp_barriers[0], tmp, MPFR_RNDN);
327 else
328 {
329 mpfr_add(mp_barriers[i], mp_barriers[i-1], tmp, MPFR_RNDN);
330 }
331
332 // Add the probability to the sum
333 mpfr_add(sum, sum, tmp, MPFR_RNDN);
334 }
335
336 // Invert the sum and scale it
337 mpfr_ui_div(sum, 1, sum, MPFR_RNDN);
338 mpfr_set_ui(tmp, 2, MPFR_RNDN);
339 mpfr_pow_ui(tmp, tmp, _bit_precision, MPFR_RNDN);
340 mpfr_sub_ui(tmp, tmp, 1, MPFR_RNDN);
341 mpfr_mul(sum, sum, tmp, MPFR_RNDN);
342
343 // Now that we got the inverted sum normalize and export
344 for (unsigned i = 0; i < _number_of_barriers; i++)
345 {
346 // Allocate space
347 barriers[i] = (in_class *) calloc(_word_precision,sizeof(in_class));
348
349 mpfr_mul(mp_barriers[i], mp_barriers[i], sum, MPFR_RNDN);
350 mpfr_get_z(int_value, mp_barriers[i], MPFR_RNDN);
351 mpz_export((void *) (barriers[i] + ((int)_word_precision -
352 (int)ceil( (float)mpz_sizeinbase(int_value, 256)/sizeof(in_class) ))), nullptr, 1, sizeof(in_class), 0, 0, int_value);
353 #ifdef OUTPUT_BARRIERS
354 mpz_out_str(stdout, 10, int_value);
355 std::cout << " = Barriers[" << i << "] = " << std::endl;
356 if (sizeof(in_class) == 1) for (unsigned j = 0 ; j < _word_precision; j++)
357 printf("%.2x", barriers[i][j]);
358 if (sizeof(in_class) == 2) for (unsigned j = 0 ; j < _word_precision; j++)
359 printf("%.4x", barriers[i][j]);
360 std::cout << std::endl;
361 #endif
362 mpfr_clear(mp_barriers[i]);
363 }
364 mpfr_clears(sum, tmp, tmp2, nullptr);
365 mpz_clear(int_value);
366 mpfr_free_cache();
367 free(mp_barriers);
368 }
369
370
371
372 //Build lookup tables used during noise generation
373 template<class in_class, class out_class, unsigned _lu_depth>
buildLookupTables()374 void FastGaussianNoise<in_class, out_class, _lu_depth>::buildLookupTables()
375 {
376 unsigned lu_index1 = 0, lu_index2 = 0;
377 _flag_ctr1 = _flag_ctr2 = 0;
378
379 // Allocate space for the lookup tables
380 lu_table = new output_t[_lu_size]();
381 if(_lu_depth == 2)
382 lu_table2 = (output_t **) calloc(_lu_size,sizeof(output_t *));
383
384 // We start building the first dimension of the lookup table
385 // corresponding to the first in_class word of the barriers
386 for (int64_t val = -((int)_number_of_barriers-1)/2 + rounded_center, b_index = 0; val <= ((int)_number_of_barriers-1)/2 + rounded_center && lu_index1 < _lu_size;)
387 {
388
389 while (lu_index1 < barriers[b_index][0] && lu_index1 < _lu_size)
390 {
391 lu_table[lu_index1].val = val;
392 lu_index1++;
393 }
394
395 // Flag the entry
396 lu_table[lu_index1].val = val;
397 lu_table[lu_index1].flag = true;
398 _flag_ctr1++;
399 // If _lu_depth == 1 we have to list the barriers here
400 if (_lu_depth == 1)
401 {
402 #ifdef OUTPUT_LUT_FLAGS
403 std::cout << "FastGaussianNoise: flagged lu_table["
404 << lu_index1 << "] for barriers " << val;
405 #endif
406
407 // Prepare the first element of the chained list of barrier
408 lu_table[lu_index1].l_b_ptr.push_back(barriers[b_index++]);
409 val++;
410 // If more that one barrier is present, we add them to the chained list
411 while ( (b_index<_number_of_barriers) && (lu_index1 == barriers[b_index][0]))
412 {
413 lu_table[lu_index1].l_b_ptr.push_back(barriers[b_index]);
414 #ifdef OUTPUT_LUT_FLAGS
415 std::cout << "FastGaussianNoise: flagged lu_table[" << lu_index1 << "] for barriers " << val;
416 #endif
417 b_index++;
418 val++;
419 } // while
420 } // if
421
422 if (_lu_depth == 2)
423 {
424 // When we meet a barrier in an entry of the lu_table,
425 // we build another lu_table inside that entry
426 // corresponding to the next in_class word of the barriers
427 lu_index2 = 0;
428 lu_table2[lu_index1] = new output_t[_lu_size]();
429 while (lu_index2 < _lu_size)
430 {
431 if(lu_index1 < barriers[b_index][0] || lu_index2 < barriers[b_index][1])
432 {
433 lu_table2[lu_index1][lu_index2].val = val;
434 }
435 else
436 {
437 // If we are on a barrier
438 if (lu_index1 == barriers[b_index][0] && lu_index2 == barriers[b_index][1])
439 {
440 // Flag the entry
441 lu_table2[lu_index1][lu_index2].val = val;
442 lu_table2[lu_index1][lu_index2].flag = true;
443 #ifdef OUTPUT_LUT_FLAGS
444 std::cout << "FastGaussianNoise: flagged lu_table2[" << lu_index1 << "][" << lu_index2 << "] for barriers " << val;
445 #endif
446 _flag_ctr2++;
447 // And prepare the first element of the chained list of barrier
448 lu_table2[lu_index1][lu_index2].l_b_ptr.push_back(barriers[b_index++]);
449 val++;
450 // If more that one barrier is present, we add them to the chained list
451 while ( (b_index<_number_of_barriers) &&
452 (lu_index1 == barriers[b_index][0]) &&
453 (lu_index2 == barriers[b_index][1]) )
454 {
455 lu_table2[lu_index1][lu_index2].l_b_ptr.push_back(barriers[b_index]);
456 #ifdef OUTPUT_LUT_FLAGS
457 std::cout << " " << val;
458 #endif
459 b_index++;
460 val++;
461 } // while
462 #ifdef OUTPUT_LUT_FLAGS
463 std::cout << std::endl;
464 #endif
465 } // if
466 } // else
467 lu_index2++;
468 } // while
469 } // if
470
471 lu_index1++;
472 }
473 // Give some feedback
474 if (_verbose) std::cout << "FastGaussianNoise: Lookup tables built" << std::endl;
475 }
476
477 template<class in_class, class out_class, unsigned _lu_depth>
getNoise(out_class * const rand_outdata,uint64_t rlen)478 void FastGaussianNoise<in_class, out_class, _lu_depth>::getNoise(out_class* const rand_outdata, uint64_t rlen)
479 {
480 uint64_t computed_outputs, innoise_bytesize, innoise_words, used_words;
481 int64_t output;
482 bool flagged;
483 in_class *noise, *noise_init_ptr, input1, input2;
484 float innoise_multiplier;
485
486 // Expected number of input bytes per output byte. Lowering this
487 // (e.g. by a factor .5) works but could lead to segfaults.
488 if (_lu_depth == 1)
489 {
490 innoise_multiplier = 1.05 * ((float)(_lu_size - _flag_ctr1)/(float)_lu_size) + _word_precision * ((float)_flag_ctr1/_lu_size);
491 }
492 else // _lu_depth == 2
493 {
494 innoise_multiplier = 1.05 * ((float)(_lu_size - _flag_ctr1)/(float)_lu_size) + 2.0 * ((float)_flag_ctr1/(float)_lu_size) + _word_precision * ((float)_flag_ctr2/((float)_lu_size*_lu_size));
495 }
496 innoise_words = rlen * innoise_multiplier;
497 innoise_bytesize = sizeof(in_class) * innoise_words;
498 noise = noise_init_ptr = new in_class[innoise_words];
499 used_words = 0;
500 if (_verbose) std::cout << "FastGaussianNoise: Using " << " " <<innoise_multiplier*sizeof(in_class)*8/std::min(log2(_number_of_barriers),(double)sizeof(out_class)*8) << " input bits per output bit" << std::endl;
501
502 // Count time for uniform noise generation
503 uint64_t start = rdtsc();
504 fastrandombytes((uint8_t*)noise, innoise_bytesize);
505 uint64_t stop = rdtsc();
506
507 // Give some feedback
508 if (_verbose) printf("FastGaussianNoise: Uniform noise cycles = %.2e bits = %.2e cycles/bit = %.2e\n", (float) stop - start, (float) innoise_bytesize * 8, (float)(stop-start)/(innoise_bytesize*8));
509
510 // Loop until all the outputs have been generated
511 computed_outputs = 0;
512 while (computed_outputs < rlen )
513 {
514 input1 = *noise;
515 flagged = lu_table[input1].flag;
516
517 // If flagged we have to look at the next in_class word
518 if (flagged)
519 {
520 if (_lu_depth == 1)
521 {
522 output = lu_table[input1].val;
523 for(in_class* b_ptr : lu_table[input1].l_b_ptr)
524 {
525 // If the barrier value is greater than the noise
526 if(cmp(b_ptr, noise)==1) break;
527 output++;
528 }
529 // We shift the noise pointer of word_precision minus 1
530 // As there another byte shift later
531 noise += _word_precision - 1;
532 used_words += _word_precision - 1;
533
534 }
535 else // _lu_depth == 2
536 {
537 input2 = *(noise+1);
538 flagged = lu_table2[input1][input2].flag;
539 // If flagged again we compare using full precision the random value
540 // with all barriers in the linked list of the lookup table
541 if (flagged)
542 {
543 output = lu_table2[input1][input2].val;
544 for(in_class* b_ptr : lu_table2[input1][input2].l_b_ptr)
545 {
546 // If the barrier value is greater than the noise
547 if(cmp(b_ptr, noise)==1) break;
548 output++;
549 }
550 // We shift the noise pointer of word_precision minus 2
551 // As there are two other one byte shifts later
552 noise += _word_precision - 2;
553 used_words += _word_precision - 2;
554 } // if
555 else
556 {
557 output = lu_table2[input1][input2].val;
558 }
559 noise++;
560 used_words++;
561 } // else
562 }
563 else
564 {
565 output = lu_table[input1].val;
566 }
567 noise++;
568 used_words++;
569 // Add the obtained result to the list of outputs
570 rand_outdata[computed_outputs++] = (out_class) output;
571
572 #ifdef UNITTEST_ONEMILLION
573 if ( (output > _sigma * 6) || (output < -_sigma*6) )
574 {
575 std::cout << output << "FastGaussianNoise: Unit test failed, this should happen once in a million. Uniform input leading to this is ";
576 for (unsigned i = 0; i < _word_precision ; i++)
577 printf("%.2x", *(noise-_word_precision+i));
578 std::cout << std::endl;
579 }
580 #endif
581
582 #if 1
583 // If too much noise has been used regenerate it
584 if ((used_words+_word_precision) >= innoise_words)
585 {
586 noise = noise_init_ptr;
587 used_words = 0;
588 if (_verbose) std::cout << "FastGaussianNoise: All the input bits have been used, regenerating them ..." << std::endl;
589
590 fastrandombytes((uint8_t*)noise, innoise_bytesize);
591 }
592 #endif
593 }
594 delete[] noise_init_ptr;
595 }
596
597 /* Compare two arrays word by word.
598 * return 1 if op1 > op2, 0 if equals and -1 if op1 < op2 */
599 template<class in_class, class out_class, unsigned _lu_depth>
cmp(in_class * op1,in_class * op2)600 inline int FastGaussianNoise<in_class, out_class, _lu_depth>::cmp(in_class *op1, in_class *op2)
601 {
602
603 for (int i = 0; i < (int)_word_precision; i++)
604 {
605
606 if (op1[i] > op2[i]) return 1;
607
608 else if (op1[i] < op2[i]) return -1;
609 }
610 return 0;
611 }
612
613
614 // Compute exp(-(x-center)^2/(2*sigma^2)) this is not normalized ! (hence the nn)
615 template<class in_class, class out_class, unsigned _lu_depth>
nn_gaussian_law(mpfr_t rop,const mpfr_t x)616 void inline FastGaussianNoise<in_class, out_class, _lu_depth>::nn_gaussian_law(mpfr_t rop, const mpfr_t x)
617 {
618 mpfr_sub(rop, x, _center, MPFR_RNDN);
619 mpfr_sqr(rop, rop, MPFR_RNDN);
620 mpfr_neg(rop, rop, MPFR_RNDN);
621 mpfr_mul(rop, rop, _const_sigma, MPFR_RNDN);
622 mpfr_exp(rop, rop, MPFR_RNDN);
623 }
624
625
626 template<class in_class, class out_class, unsigned _lu_depth>
~FastGaussianNoise()627 FastGaussianNoise<in_class, out_class, _lu_depth>::~FastGaussianNoise()
628 {
629 // Freed allocated memory for the barriers
630 for (unsigned ctr = 0; ctr < _number_of_barriers; ctr++)
631 {
632 if (barriers[ctr] != nullptr) free(barriers[ctr]);
633 barriers[ctr] = nullptr;
634 }
635 if (barriers != nullptr) free(barriers);
636 barriers=nullptr;
637
638 // Free other variables
639 mpfr_clear(_const_sigma);
640 mpfr_clear(_center);
641 delete[](lu_table);
642 if(_lu_depth == 2)
643 {
644 for (unsigned ctr = 0 ; ctr < _lu_size; ctr++)
645 {
646 if (lu_table2[ctr]!=nullptr) delete[](lu_table2[ctr]);
647 }
648 free(lu_table2);
649 }
650 }
651
652 } // namespace nfl
653
654 #endif
655