1 #ifndef fast_gauss_noise_h
2 #define fast_gauss_noise_h
3 
4 #include <cstdint>
5 #include <cstddef>
6 #include <list>
7 #include <iostream>
8 #include <cstdlib>
9 #include <ctime>
10 #include <cmath>
11 #include <climits>
12 #include <cstring>
13 #include <tuple>
14 #include <typeinfo>
15 #include <gmp.h>
16 #include <mpfr.h>
17 #include "fastrandombytes.h"
18 
19 #ifdef BOOST_RAPHSON
20 #include <boost/math/tools/roots.hpp>
21 #endif
22 
23 
24 //#define OUTPUT_BARRIERS
25 //#define OUTPUT_LUT_FLAGS
26 // Use it to test rare values do not happen too often
27 //#define UNITTEST_ONEMILLION
28 
29 namespace nfl {
30 
31 template<typename T0, typename T1>
tstbit(T0 x,T1 n)32 constexpr inline auto tstbit(T0 x, T1 n) -> decltype((x << (63 - n)) >> 63 ) { return ((x << (63 - n)) >> 63 ); }
33 
34 template<class in_class, class out_class>
35 struct output {
36   out_class val;
37   bool flag;
38   std::list<in_class*> l_b_ptr;
39 };
40 
41 template<class in_class, class out_class, unsigned _lu_depth>
42 class FastGaussianNoise {
43     typedef output<in_class, out_class> output_t;
44   private:
45     unsigned int _bit_precision;
46     unsigned int _word_precision;
47     unsigned int _number_of_barriers;
48     unsigned int _lu_size;
49     unsigned int _flag_ctr1;
50     unsigned int _flag_ctr2;
51     double _sigma;
52     mpfr_t _const_sigma;
53     mpfr_t _center;
54     int rounded_center;
55     unsigned int _security;
56     unsigned int _samples;
57     double _tail_bound;
58     bool _verbose;
59 
60     in_class **barriers;
61     output_t *lu_table;
62     output_t **lu_table2;
63 
64     void check_template_params();
65     void init();
66     void precomputeBarrierValues();
67     void buildLookupTables();
68     void nn_gaussian_law(mpfr_t rop, const mpfr_t x_fr);
69     int cmp(in_class *op1, in_class *op2);
70 
71   public:
72     static const unsigned int default_k;
73     FastGaussianNoise(double sigma, unsigned int security, unsigned int samples, double center_d = 0, bool verbose = false);
74     FastGaussianNoise(double sigma, unsigned int security, unsigned int samples, mpfr_t center, bool verbose = false);
75     ~FastGaussianNoise();
76     void getNoise(out_class * const rand_data2out, uint64_t rlen);
77 };
78 
79 
80 /* NOTATIONS (from paper Sampling from discrete Gaussians
81  * for lattice-based cryptography on a constrainded device
82  * Nagarjun C. Dwarakanath, Steven Galbraith
83  * Journal : AAECC 2014 (25)
84  *
85  * m : lattice dimension, we set it to 1 in here (better results for same security) !!!!
86  *
87  * security : output distribution should be within 2^{-security}
88  * statistical distance from the Gaussian distribution
89  *
90  * sigma : parameter of the Gaussian distribution (close
91  * to the standard deviation but not equal)
92  * SUM = \sum_{k=-\inf}^{\inf} exp(-(k-c)^2/(2*sigma^2))
93  *
94  * D_{sigma} : distribution such that for x\in \mathcal{Z}
95  * Pr(x) = ro_{sigma}(x) = 1/SUM exp(-(x-c)^2/(2*sigma^2))
96  *
97  * Warning : can be replaced in some works by s = sqrt(2*pi)*sigma
98  * if Pr(x) = 1/SUM exp(-pi*(x-c)^2/s^2)
99  *
100  * tail_bound : tail bound we only output points with
101  * norm(point-c) < tail_bound*sigma
102  *
103  * bit_precision : bit precision when computing the probabilities
104  *
105  * delta_tailbound : statistical distance introduced
106  * by the tail bound
107  *
108  * delta_epsi : statistical distance introduced by
109  * probability approximation
110  */
111 
112 
113 
114 // HELPER FUNCTIONS
115 
116 /* /!\ Warning only for x64 */
rdtsc(void)117 extern __inline__ uint64_t rdtsc(void)
118 {
119   uint64_t a, d;
120   __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
121   return (d<<32) | a;
122 }
123 
124 // Function to be used in a Newton-Raphson solver
125 struct funct
126 {
functnfl::funct127 	funct(double const& target) : k(target){}
operator ()nfl::funct128 	std::tuple<double, double> operator()(double const& x)
129 	{
130 		return std::make_tuple(x*x - 2*log(x) - 1 - 2*k*log(2), 2*x-2/x);
131 	}
132 	private:
133 	double k;
134 };
135 
136 inline
newton_raphson(double k,double max_guess,int digits)137 double newton_raphson(double k, double max_guess, int digits)
138 {
139   unsigned max_counter = 1U<<15;
140   std::tuple<double, double> values;
141   double delta;
142   double guess = max_guess;
143   for (unsigned counter = 0 ; counter < max_counter; counter++)
144   {
145     values = funct(k)(guess);
146     delta = std::get<0>(values)/std::get<1>(values);
147     guess -= delta;
148     if ( fabs(delta)/fabs(guess) < pow(10.0,-digits) ) break;
149   }
150   // In case there is a flat zone in the function
151   while(0.95*guess*0.95*guess - 2*log(0.95*guess) - 1 - 2*k*log(2)>=0) guess*=0.95;
152   // Test result
153   if(guess*guess - 2*log(guess) - 1 - 2*k*log(2)<0)
154   {
155     std::cout << "FastGaussianNoise: WARNING Newton-Raphson failed the generator is NOT secure" << std::endl;
156   }
157   return guess;
158 }
159 
160 
161 // Constructors
162 
163 
164 template<class in_class, class out_class, unsigned _lu_depth>
FastGaussianNoise(double sigma,unsigned int security,unsigned int samples,double center_d,bool verbose)165 FastGaussianNoise<in_class, out_class, _lu_depth>::FastGaussianNoise( double sigma,
166 		unsigned int security, unsigned int samples, double center_d /*=0*/, bool verbose /*=true*/):
167 	_sigma(sigma),
168 	_security(security),
169   _samples(samples),
170   _verbose(verbose)
171 {
172   // Check template parameters
173   check_template_params();
174 
175   //Center cannot be initialized before the constructor
176   mpfr_init_set_d(_center, center_d, MPFR_RNDN);
177   rounded_center = round(center_d);
178 
179   //Initialization functions
180 	init();
181 	precomputeBarrierValues();
182 	buildLookupTables();
183 }
184 
185 template<class in_class, class out_class, unsigned _lu_depth>
FastGaussianNoise(double sigma,unsigned int security,unsigned int samples,mpfr_t center,bool verbose)186 FastGaussianNoise<in_class, out_class, _lu_depth>::FastGaussianNoise( double sigma,
187 		unsigned int security, unsigned int samples, mpfr_t center /*=0*/, bool verbose /*=true*/):
188 	_sigma(sigma),
189 	_security(security),
190   _samples(samples),
191   _verbose(verbose)
192 {
193   // Check template parameters
194   check_template_params();
195 
196   //Center cannot be initialized before the constructor
197   mpfr_init_set(_center, center, MPFR_RNDN);
198   rounded_center = mpfr_get_d(_center, MPFR_RNDN);
199 
200   //Initialization functions
201 	init();
202 	precomputeBarrierValues();
203 	buildLookupTables();
204 }
205 
206 
207 // Check template parameters
208 template<class in_class, class out_class, unsigned _lu_depth>
check_template_params()209 void FastGaussianNoise<in_class, out_class, _lu_depth>::check_template_params()
210 {
211   // Lookup tables can only have depth 1 or 2
212   if (_lu_depth != 1 && _lu_depth != 2)
213   {
214     std::cout << "FastGaussianNoise: CRITICAL _lu_depth must be 1 or 2" << std::endl;
215     exit(1);
216   }
217 
218   // Lookup tables can only have uint8_t or uint16_t indexes
219   if (typeid(in_class) == typeid(uint8_t))
220     _lu_size = 1<<8;
221   else if(typeid(in_class) == typeid(uint16_t))
222     _lu_size = 1<<16;
223   else
224   {
225     std::cout << "FastGaussianNoise: CRITICAL in_class must be uint8_t or uint16_t" << std::endl;
226     exit(2);
227   }
228 }
229 
230 
231 // Compute some values (precision, number of barriers and outputs)
232 template<class in_class, class out_class, unsigned _lu_depth>
init()233 void FastGaussianNoise<in_class, out_class, _lu_depth>::init()
234 {
235 	double epsi, k;
236 
237 	/* Lemma 1:
238 	 * tail_bound >= sqrt(1 + 2*log(tail_bound) + 2*k*log(2))
239 	 * IMPLIES delta_tailbound < 2**(-k)
240    *
241    * -epsi <= -k - log2(2 * tail_bound * sigma))
242 	 * IMPLIES 2 * tail_bound * sigma * 2**(-epsi) <= 2**(-k)
243 	 *
244    * Lemma 2 (and Lemma 1):
245 	 * 2 * tail_bound * sigma * 2**(-epsi) <= 2**(-k)
246 	 * IMPLIES  delta_epsilon < 2**(-k)
247    */
248 
249 	// THUS setting
250 	k = _security + 1 + ceil(log(_samples)/log(2));
251 	// IMPLIES (delta_tailbound + delta_epsilon)*_samples < 2**(-_security)
252 	// We can thus generate vectors of _samples samples securely
253 
254   // To compute the tail bound we use Newton-Raphson with three digits of precision
255 	// WE HAVE tail_bound >= sqrt(1 + 2*log(tail_bound) + 2*k*log(2))
256   // IS EQUIV TO tail_bound**2 -2*log(tail_bound) - 1- 2*k*log(2) >= 0
257 	int digits = 3;
258 #ifdef BOOST_RAPHSON
259   double max_guess = 1+2*k*log(2);
260   double min_guess = sqrt(1 + 2*k*log(2)), guess = min_guess;
261 	_tail_bound = boost::math::tools::newton_raphson_iterate(funct(k),guess, min_guess, max_guess, digits);
262 #else
263   double min_guess = sqrt(1 + 2*k*log(2));
264   _tail_bound = newton_raphson(k, min_guess, digits);
265 #endif
266 	// We now can compute the precision needed
267 	epsi = k + log2(2 * _tail_bound * _sigma);
268 	_bit_precision = ceil(epsi);
269 	_word_precision = ceil(_bit_precision/(8.0*sizeof(in_class)));
270   // For the same cost we get a simpler situation and more precision
271 	_bit_precision = _word_precision*8*sizeof(in_class);
272   // And the number of probabilities that must be computed
273   // Only half of them are computed (others are symmetric)
274   // but all values are stored to speedup noise generation
275   _number_of_barriers = 1+2*ceil(_tail_bound*_sigma);
276   if ( ((uint64_t)_number_of_barriers >> (sizeof(out_class) * 8 - 1)) != 0)
277     std::cout << "FastGaussianNoise: WARNING out_class too small to contain some of the (signed) results" << std::endl;
278   if ( ((uint64_t)_number_of_barriers >> (sizeof(int) * 8 - 1)) != 0)
279     std::cout << "FastGaussianNoise: WARNING outputs are above 2**31, unexpected results" << std::endl;
280 
281   // Finally we precompute 1/(2*sigma**2) to accelerate things
282 	mpfr_inits2(_bit_precision, _const_sigma, nullptr);
283   mpfr_set_d(_const_sigma, _sigma, MPFR_RNDN);
284 	mpfr_sqr(_const_sigma, _const_sigma, MPFR_RNDN);
285 	mpfr_mul_ui(_const_sigma, _const_sigma, 2, MPFR_RNDN);
286 	mpfr_ui_div(_const_sigma, 1, _const_sigma, MPFR_RNDN);
287 
288   // Give some feedback
289   if (_verbose) std::cout << "FastGaussianNoise: " << _number_of_barriers <<
290     " barriers with " << _bit_precision <<
291     " bits of precision will be computed" << std::endl;
292 }
293 
294 
295 template<class in_class, class out_class, unsigned _lu_depth>
precomputeBarrierValues()296 void FastGaussianNoise<in_class, out_class, _lu_depth>::precomputeBarrierValues()
297 {
298   // Declare and init mpfr vars
299   mpfr_t sum, tmp, tmp2;
300   mpfr_t *mp_barriers;
301   mpfr_inits2(_bit_precision, sum, tmp, tmp2, nullptr);
302 
303   // This var is used to export mpfr values
304   mpz_t int_value;
305   mpz_init2(int_value, _bit_precision);
306 
307   // Init SUM = \sum_{k=-ceil(tail_bound*sigma)}^{ceil(tail_bound*sigma)} exp(-(k+round(c)-c)^2/(2*sigma^2))
308   // and compute on the loop with the barriers
309   mpfr_set_ui(sum, 0, MPFR_RNDN);
310 
311   // Allocate memory for the barrier pointers
312   barriers = (in_class **) malloc(_number_of_barriers*sizeof(in_class *));
313   mp_barriers = (mpfr_t *) malloc(_number_of_barriers*sizeof(mpfr_t));
314 
315   // Now loop over the barriers
316   for (int i = 0; i < (int)_number_of_barriers; i++)
317   {
318     // Init mpfr var
319     mpfr_init2(mp_barriers[i], _bit_precision);
320 
321     // Compute the barrier value (without normalization)
322     mpfr_set_si(tmp2, rounded_center+i-((int)_number_of_barriers-1)/2, MPFR_RNDN);
323     nn_gaussian_law(tmp, tmp2);
324     //mpfr_out_str(stdout, 10, 0, tmp2, MPFR_RNDN);
325     //std::cout << std::endl;
326     if (i==0) mpfr_set(mp_barriers[0], tmp, MPFR_RNDN);
327     else
328     {
329       mpfr_add(mp_barriers[i], mp_barriers[i-1], tmp, MPFR_RNDN);
330     }
331 
332     // Add the probability to the sum
333     mpfr_add(sum, sum, tmp, MPFR_RNDN);
334   }
335 
336   // Invert the sum and scale it
337   mpfr_ui_div(sum, 1, sum, MPFR_RNDN);
338   mpfr_set_ui(tmp, 2, MPFR_RNDN);
339   mpfr_pow_ui(tmp, tmp, _bit_precision, MPFR_RNDN);
340   mpfr_sub_ui(tmp, tmp, 1, MPFR_RNDN);
341   mpfr_mul(sum, sum, tmp, MPFR_RNDN);
342 
343   // Now that we got the inverted sum normalize and export
344   for (unsigned i = 0; i < _number_of_barriers; i++)
345   {
346     // Allocate space
347     barriers[i] = (in_class *) calloc(_word_precision,sizeof(in_class));
348 
349     mpfr_mul(mp_barriers[i], mp_barriers[i], sum, MPFR_RNDN);
350     mpfr_get_z(int_value, mp_barriers[i], MPFR_RNDN);
351     mpz_export((void *) (barriers[i] + ((int)_word_precision -
352            (int)ceil( (float)mpz_sizeinbase(int_value, 256)/sizeof(in_class) ))), nullptr, 1, sizeof(in_class), 0, 0, int_value);
353 #ifdef OUTPUT_BARRIERS
354     mpz_out_str(stdout, 10, int_value);
355     std::cout << " = Barriers[" << i << "] = " << std::endl;
356     if (sizeof(in_class) == 1) for (unsigned j = 0 ; j < _word_precision; j++)
357       printf("%.2x", barriers[i][j]);
358     if (sizeof(in_class) == 2) for (unsigned j = 0 ; j < _word_precision; j++)
359       printf("%.4x", barriers[i][j]);
360     std::cout <<  std::endl;
361 #endif
362     mpfr_clear(mp_barriers[i]);
363   }
364 	mpfr_clears(sum, tmp, tmp2, nullptr);
365   mpz_clear(int_value);
366 	mpfr_free_cache();
367   free(mp_barriers);
368 }
369 
370 
371 
372 //Build lookup tables used during noise generation
373 template<class in_class, class out_class, unsigned _lu_depth>
buildLookupTables()374 void FastGaussianNoise<in_class, out_class, _lu_depth>::buildLookupTables()
375 {
376 	unsigned lu_index1 = 0, lu_index2 = 0;
377   _flag_ctr1 = _flag_ctr2 = 0;
378 
379   // Allocate space for the lookup tables
380   lu_table = new output_t[_lu_size]();
381   if(_lu_depth == 2)
382     lu_table2 = (output_t **) calloc(_lu_size,sizeof(output_t *));
383 
384 	// We start building the first dimension of the lookup table
385   // corresponding to the first in_class word of the barriers
386 	for (int64_t val = -((int)_number_of_barriers-1)/2 + rounded_center, b_index = 0; val <= ((int)_number_of_barriers-1)/2 + rounded_center && lu_index1 < _lu_size;)
387   {
388 
389 		while (lu_index1 < barriers[b_index][0] && lu_index1 < _lu_size)
390     {
391       lu_table[lu_index1].val = val;
392 			lu_index1++;
393 		}
394 
395 		// Flag the entry
396 		lu_table[lu_index1].val = val;
397 		lu_table[lu_index1].flag = true;
398     _flag_ctr1++;
399 		// If _lu_depth == 1 we have to list the barriers here
400     if (_lu_depth == 1)
401     {
402 #ifdef OUTPUT_LUT_FLAGS
403       std::cout << "FastGaussianNoise: flagged lu_table["
404        << lu_index1 << "] for barriers " << val;
405 #endif
406 
407       // Prepare the first element of the chained list of barrier
408 			lu_table[lu_index1].l_b_ptr.push_back(barriers[b_index++]);
409       val++;
410 			// If more that one barrier is present, we add them to the chained list
411 			while ( (b_index<_number_of_barriers) && (lu_index1 == barriers[b_index][0]))
412       {
413 			  lu_table[lu_index1].l_b_ptr.push_back(barriers[b_index]);
414 #ifdef OUTPUT_LUT_FLAGS
415       std::cout << "FastGaussianNoise: flagged lu_table[" << lu_index1 << "] for barriers " << val;
416 #endif
417 				b_index++;
418 				val++;
419 			} // while
420     } // if
421 
422     if (_lu_depth == 2)
423     {
424       // When we meet a barrier in an entry of the lu_table,
425       // we build another lu_table inside that entry
426 		  // corresponding to the next in_class word of the barriers
427 		  lu_index2 = 0;
428       lu_table2[lu_index1] = new output_t[_lu_size]();
429 		  while (lu_index2 < _lu_size)
430       {
431 			  if(lu_index1 < barriers[b_index][0] || lu_index2 < barriers[b_index][1])
432         {
433           lu_table2[lu_index1][lu_index2].val = val;
434         }
435         else
436         {
437 		      // If we are on a barrier
438 		      if (lu_index1 == barriers[b_index][0] && lu_index2 == barriers[b_index][1])
439           {
440 			      // Flag the entry
441 			      lu_table2[lu_index1][lu_index2].val = val;
442 			      lu_table2[lu_index1][lu_index2].flag = true;
443 #ifdef OUTPUT_LUT_FLAGS
444             std::cout << "FastGaussianNoise: flagged lu_table2[" << lu_index1 << "][" << lu_index2 << "] for barriers " << val;
445 #endif
446             _flag_ctr2++;
447 			      // And prepare the first element of the chained list of barrier
448 			      lu_table2[lu_index1][lu_index2].l_b_ptr.push_back(barriers[b_index++]);
449             val++;
450 			      // If more that one barrier is present, we add them to the chained list
451 			      while ( (b_index<_number_of_barriers) &&
452                 (lu_index1 == barriers[b_index][0]) &&
453                 (lu_index2 == barriers[b_index][1]) )
454             {
455 				      lu_table2[lu_index1][lu_index2].l_b_ptr.push_back(barriers[b_index]);
456 #ifdef OUTPUT_LUT_FLAGS
457             std::cout << " " << val;
458 #endif
459 				      b_index++;
460 				      val++;
461 			      } // while
462 #ifdef OUTPUT_LUT_FLAGS
463             std::cout << std::endl;
464 #endif
465           } // if
466         } // else
467         lu_index2++;
468       } // while
469 		} // if
470 
471 		lu_index1++;
472 	}
473   // Give some feedback
474   if (_verbose) std::cout << "FastGaussianNoise: Lookup tables built" << std::endl;
475 }
476 
477 template<class in_class, class out_class, unsigned _lu_depth>
getNoise(out_class * const rand_outdata,uint64_t rlen)478 void FastGaussianNoise<in_class, out_class, _lu_depth>::getNoise(out_class* const rand_outdata, uint64_t rlen)
479 {
480 	uint64_t computed_outputs, innoise_bytesize, innoise_words, used_words;
481 	int64_t output;
482   bool flagged;
483 	in_class *noise, *noise_init_ptr, input1, input2;
484   float innoise_multiplier;
485 
486   // Expected number of input bytes per output byte. Lowering this
487   // (e.g. by a factor .5) works but could lead to segfaults.
488   if (_lu_depth == 1)
489   {
490     innoise_multiplier = 1.05 * ((float)(_lu_size - _flag_ctr1)/(float)_lu_size) + _word_precision * ((float)_flag_ctr1/_lu_size);
491   }
492   else // _lu_depth == 2
493   {
494     innoise_multiplier = 1.05 * ((float)(_lu_size - _flag_ctr1)/(float)_lu_size) + 2.0 * ((float)_flag_ctr1/(float)_lu_size) + _word_precision * ((float)_flag_ctr2/((float)_lu_size*_lu_size));
495   }
496   innoise_words = rlen * innoise_multiplier;
497   innoise_bytesize = sizeof(in_class) * innoise_words;
498 	noise = noise_init_ptr = new in_class[innoise_words];
499   used_words = 0;
500   if (_verbose) std::cout << "FastGaussianNoise: Using " << " " <<innoise_multiplier*sizeof(in_class)*8/std::min(log2(_number_of_barriers),(double)sizeof(out_class)*8) << " input bits per output bit" << std::endl;
501 
502   // Count time for uniform noise generation
503 	uint64_t start = rdtsc();
504 	fastrandombytes((uint8_t*)noise, innoise_bytesize);
505 	uint64_t stop = rdtsc();
506 
507   // Give some feedback
508 	if (_verbose) printf("FastGaussianNoise: Uniform noise  cycles = %.2e bits = %.2e cycles/bit = %.2e\n", (float) stop - start, (float) innoise_bytesize * 8, (float)(stop-start)/(innoise_bytesize*8));
509 
510   // Loop until all the outputs have been generated
511   computed_outputs = 0;
512 	while (computed_outputs < rlen )
513   {
514 		input1 = *noise;
515 		flagged = lu_table[input1].flag;
516 
517     // If flagged we have to look at the next in_class word
518     if (flagged)
519     {
520 		  if (_lu_depth == 1)
521       {
522 				output = lu_table[input1].val;
523 				for(in_class* b_ptr : lu_table[input1].l_b_ptr)
524         {
525 					// If the barrier value is greater than the noise
526 					if(cmp(b_ptr, noise)==1) break;
527 					output++;
528 				}
529         // We shift the noise pointer of word_precision minus 1
530         // As there another byte shift later
531 				noise += _word_precision - 1;
532 				used_words += _word_precision - 1;
533 
534       }
535       else // _lu_depth == 2
536       {
537 			  input2 = *(noise+1);
538 			  flagged = lu_table2[input1][input2].flag;
539         // If flagged again we compare using full precision the random value
540         // with all barriers in the linked list of the lookup table
541 			  if (flagged)
542         {
543 			    output = lu_table2[input1][input2].val;
544 				  for(in_class* b_ptr : lu_table2[input1][input2].l_b_ptr)
545           {
546 					  // If the barrier value is greater than the noise
547 					  if(cmp(b_ptr, noise)==1) break;
548 					  output++;
549 				  }
550           // We shift the noise pointer of word_precision minus 2
551           // As there are two other one byte shifts later
552 				  noise += _word_precision - 2;
553 				  used_words += _word_precision - 2;
554 			  } // if
555         else
556         {
557 			    output = lu_table2[input1][input2].val;
558         }
559         noise++;
560         used_words++;
561       } // else
562     }
563     else
564     {
565 		  output = lu_table[input1].val;
566     }
567 		noise++;
568     used_words++;
569 		// Add the obtained result to the list of outputs
570 		rand_outdata[computed_outputs++] = (out_class) output;
571 
572 #ifdef UNITTEST_ONEMILLION
573     if ( (output > _sigma * 6) || (output < -_sigma*6) )
574     {
575       std::cout << output << "FastGaussianNoise: Unit test failed, this should happen once in a million. Uniform input leading to this is  ";
576       for (unsigned i = 0; i < _word_precision ; i++)
577         printf("%.2x", *(noise-_word_precision+i));
578       std::cout << std::endl;
579     }
580 #endif
581 
582 #if 1
583     // If too much noise has been used regenerate it
584     if ((used_words+_word_precision) >= innoise_words)
585     {
586       noise = noise_init_ptr;
587       used_words = 0;
588       if (_verbose) std::cout << "FastGaussianNoise: All the input bits have been used, regenerating them ..." << std::endl;
589 
590 	    fastrandombytes((uint8_t*)noise, innoise_bytesize);
591     }
592 #endif
593 	}
594 	delete[] noise_init_ptr;
595 }
596 
597 /* Compare two arrays word by word.
598  * return 1 if op1 > op2, 0 if equals and -1 if op1 < op2 */
599 template<class in_class, class out_class, unsigned _lu_depth>
cmp(in_class * op1,in_class * op2)600 inline int FastGaussianNoise<in_class, out_class, _lu_depth>::cmp(in_class *op1, in_class *op2)
601 {
602 
603 	for (int i = 0; i < (int)_word_precision; i++)
604   {
605 
606 		if (op1[i] > op2[i]) return 1;
607 
608 		else if (op1[i] < op2[i]) return -1;
609 	}
610 	return 0;
611 }
612 
613 
614 // Compute exp(-(x-center)^2/(2*sigma^2)) this is not normalized ! (hence the nn)
615 template<class in_class, class out_class, unsigned _lu_depth>
nn_gaussian_law(mpfr_t rop,const mpfr_t x)616 void  inline FastGaussianNoise<in_class, out_class, _lu_depth>::nn_gaussian_law(mpfr_t rop, const mpfr_t x)
617 {
618 	mpfr_sub(rop, x, _center, MPFR_RNDN);
619 	mpfr_sqr(rop, rop, MPFR_RNDN);
620 	mpfr_neg(rop, rop, MPFR_RNDN);
621 	mpfr_mul(rop, rop, _const_sigma, MPFR_RNDN);
622 	mpfr_exp(rop, rop, MPFR_RNDN);
623 }
624 
625 
626 template<class in_class, class out_class, unsigned _lu_depth>
~FastGaussianNoise()627 FastGaussianNoise<in_class, out_class, _lu_depth>::~FastGaussianNoise()
628 {
629   // Freed allocated memory for the barriers
630   for (unsigned ctr = 0; ctr < _number_of_barriers; ctr++)
631   {
632     if (barriers[ctr] != nullptr) free(barriers[ctr]);
633     barriers[ctr] = nullptr;
634   }
635   if (barriers != nullptr) free(barriers);
636   barriers=nullptr;
637 
638   // Free other variables
639   mpfr_clear(_const_sigma);
640   mpfr_clear(_center);
641   delete[](lu_table);
642   if(_lu_depth == 2)
643   {
644     for (unsigned ctr = 0 ; ctr < _lu_size; ctr++)
645     {
646       if (lu_table2[ctr]!=nullptr) delete[](lu_table2[ctr]);
647     }
648     free(lu_table2);
649   }
650 }
651 
652 }  // namespace nfl
653 
654 #endif
655