1 #include <mpi.h>
2 
3 #include <string>
4 #include <iomanip>
5 #include <cstdio>
6 #include <cstdlib>
7 #include <iostream>
8 #include <vector>
9 #include <omp.h>
10 #include <sstream>
11 
12 
13 #ifdef _PROFILE_SORT
14 #include "sort_profiler.h"
15 #endif
16 
17 #include <binUtils.h>
18 #include <ompUtils.h>
19 #include <parUtils.h>
20 
21 #ifdef KWICK
22 	#if KWAY > 2
23 		#define SORT_FUNCTION par::HyperQuickSort_kway
24 	#else
25 		#ifdef SWAPRANKS
26       #define SORT_FUNCTION par::RankSwapSort
27     #else
28 	 	  #define SORT_FUNCTION par::HyperQuickSort
29     #endif
30 	#endif
31 #else
32     #define SORT_FUNCTION par::sampleSort
33 #endif
34 
35 // #define __VERIFY__
36 
37 
38 enum DistribType{
39 	UNIF_DISTRIB,
40 	GAUSS_DISTRIB,
41 };
42 
43 void printResults(int num_threads, MPI_Comm comm);
44 
getStats(double val,double * meanV,double * minV,double * maxV,MPI_Comm comm)45 void getStats(double val, double *meanV, double *minV, double *maxV, MPI_Comm comm)
46 {
47 	int p;
48 	double d, din;
49 	din = val;
50   MPI_Comm_size(comm, &p);
51 	MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_SUM, 0, comm); *meanV = d/p;
52 	MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_MIN, 0, comm); *minV = d;
53 	MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_MAX, 0, comm); *maxV = d;
54 }
55 
getDistType(char * code)56 DistribType getDistType(char* code) {
57 	if(!strcmp(code,"GAUSS\0")){
58 		return GAUSS_DISTRIB;
59 	}else{
60 		return UNIF_DISTRIB;
61 	}
62 }
63 
getNumElements(char * code)64 long getNumElements(char* code) {
65   unsigned int slen = strlen(code);
66   char dtype = code[0];
67   char tmp[128];
68   strncpy(tmp, code+1, slen-3); tmp[slen-3] = '\0';
69 
70   long numBytes = atol(tmp);
71   switch(code[slen-2]) {
72     case 'g':
73     case 'G':
74       numBytes *= 1024*1024*1024;
75       break;
76     case 'k':
77     case 'K':
78       numBytes *= 1024;
79       break;
80     case 'm':
81     case 'M':
82       numBytes *= 1024*1024;
83       break;
84     default:
85       std::cout << "unknown code " << code[slen-2] << std::endl;
86       return 0;
87   };
88 
89   switch (dtype) {
90     case 'd': // double array
91       return numBytes/sizeof(double);
92       break;
93     case 'f': // float array
94       return numBytes/sizeof(float);
95       break;
96     case 'i': // int array
97       return numBytes/sizeof(int);
98       break;
99     case 'l': // long array
100       return numBytes/sizeof(long);
101       break;
102     default:
103 			std::cout << "unknown dtype: " << dtype << std::endl;
104       return 0;
105   };
106 
107 }
108 
109 template <class T>
verify(std::vector<T> & in_,std::vector<T> & out_,MPI_Comm comm)110 bool verify (std::vector<T>& in_, std::vector<T> &out_, MPI_Comm comm){
111 
112   // Find out my identity in the default communicator
113   int myrank, p;
114   MPI_Comm_rank(comm, &myrank);
115   MPI_Comm_size(comm,&p);
116 
117   if (!myrank) std::cout << "Verifying sort" << std::endl;
118 
119   std::vector<T> in;
120   {
121     int N_local=in_.size()*sizeof(T);
122     std::vector<int> r_size(p, 0);
123     std::vector<int> r_disp(p, 0);
124     MPI_Gather(&N_local  , 1, MPI_INT,
125         &r_size[0], 1, MPI_INT, 0, comm);
126     omp_par::scan(&r_size[0], &r_disp[0], p);
127 
128     if(!myrank) in.resize((r_size[p-1]+r_disp[p-1])/sizeof(T));
129     MPI_Gatherv((char*)&in_[0],    N_local,             MPI_BYTE,
130         (char*)&in [0], &r_size[0], &r_disp[0], MPI_BYTE, 0, comm);
131   }
132 
133   std::vector<T> out;
134   {
135     int N_local=out_.size()*sizeof(T);
136     std::vector<int> r_size(p, 0);
137     std::vector<int> r_disp(p, 0);
138     MPI_Gather(&N_local  , 1, MPI_INT,
139         &r_size[0], 1, MPI_INT, 0, comm);
140     omp_par::scan(&r_size[0], &r_disp[0], p);
141 
142     if(!myrank) out.resize((r_size[p-1]+r_disp[p-1])/sizeof(T));
143     MPI_Gatherv((char*)&out_[0],    N_local,             MPI_BYTE,
144         (char*)&out [0], &r_size[0], &r_disp[0], MPI_BYTE, 0, comm);
145   }
146 
147   if(in.size()!=out.size()){
148     std::cout<<"Wrong size: in="<<in.size()<<" out="<<out.size()<<'\n';
149     return false;
150   }
151   std::sort(&in[0], &in[in.size()]);
152 
153   for(long j=0;j<in.size();j++)
154     if(in[j]!=out[j]){
155       std::cout<<"Failed at:"<<j<<'\n';
156 //      std::cout<<"Failed at:"<<j<<"; in="<<in[j]<<" out="<<out[j]<<'\n';
157       return false;
158     }
159 
160   return true;
161 }
162 
163 template <class T>
time_sort(size_t N,MPI_Comm comm,DistribType dist_type)164 double time_sort(size_t N, MPI_Comm comm, DistribType dist_type){
165   int myrank, p;
166 
167   MPI_Comm_rank(comm, &myrank);
168   MPI_Comm_size(comm,&p);
169   int omp_p=omp_get_max_threads();
170 
171   std::vector<IndexHolder<T>> in1(N);
172 
173   // Generate random data
174   std::vector<T> in(N);
175 	if(dist_type==UNIF_DISTRIB){
176     #pragma omp parallel for
177     for(int j=0;j<omp_p;j++){
178       unsigned int seed=j*p+myrank;
179       size_t start=(j*N)/omp_p;
180       size_t end=((j+1)*N)/omp_p;
181       for(unsigned int i=start;i<end;i++){
182         in[i]=rand_r(&seed);
183       }
184     }
185 	} else if(dist_type==GAUSS_DISTRIB){
186     double e=2.7182818284590452;
187     double log_e=log(e);
188 
189     unsigned int seed1=p+myrank;
190     long mn = rand_r(&seed1);
191 
192     #pragma omp parallel for
193     for(int j=0;j<omp_p;j++){
194       unsigned int seed=j*p+myrank;
195       size_t start=(j*N)/omp_p;
196       size_t end=((j+1)*N)/omp_p;
197       for(unsigned int i=start;i<end;i++){
198         in[i]= mn + sqrt(-2*log(rand_r(&seed)*1.0/RAND_MAX)/log_e)
199               * cos(rand_r(&seed)*2*M_PI/RAND_MAX)*RAND_MAX*0.1;
200       }
201     }
202 	}
203 
204  std::vector<T> in_cpy=in;
205   std::vector<T> out;
206 
207   // in=in_cpy;
208   SORT_FUNCTION<T>(in_cpy, comm);
209 #ifdef __VERIFY__
210   verify(in,in_cpy,comm);
211 #endif
212 
213 #ifdef _PROFILE_SORT
214 	total_sort.clear();
215 
216 	seq_sort.clear();
217 	sort_partitionw.clear();
218 
219 	sample_get_splitters.clear();
220 	sample_sort_splitters.clear();
221 	sample_prepare_scatter.clear();
222 	sample_do_all2all.clear();
223 
224 	hyper_compute_splitters.clear();
225 	hyper_communicate.clear();
226 	hyper_merge.clear();
227 #endif
228 
229 
230   //Sort
231   MPI_Barrier(comm);
232   double wtime=-omp_get_wtime();
233   // SORT_FUNCTION<T>(in, out, comm);
234   SORT_FUNCTION<T>(in, comm);
235   MPI_Barrier(comm);
236   wtime+=omp_get_wtime();
237 
238   return wtime;
239 }
240 
241 
242 /*
243 template <class T>
244 double time_sort(size_t N, MPI_Comm comm, DistribType dist_type){
245     int myrank, p;
246 
247     MPI_Comm_rank(comm, &myrank);
248     MPI_Comm_size(comm,&p);
249     int omp_p=omp_get_max_threads();
250 
251     // Generate random data
252     std::vector<T> in(N);
253     if(dist_type==UNIF_DISTRIB){
254 #pragma omp parallel for
255         for(int j=0;j<omp_p;j++){
256             unsigned int seed=j*p+myrank;
257             size_t start=(j*N)/omp_p;
258             size_t end=((j+1)*N)/omp_p;
259             for(unsigned int i=start;i<end;i++){
260                 in[i]=rand_r(&seed);
261             }
262         }
263     } else if(dist_type==GAUSS_DISTRIB){
264         double e=2.7182818284590452;
265         double log_e=log(e);
266 
267         unsigned int seed1=p+myrank;
268         long mn = rand_r(&seed1);
269 
270 #pragma omp parallel for
271         for(int j=0;j<omp_p;j++){
272             unsigned int seed=j*p+myrank;
273             size_t start=(j*N)/omp_p;
274             size_t end=((j+1)*N)/omp_p;
275             for(unsigned int i=start;i<end;i++){
276                 in[i]= mn + sqrt(-2*log(rand_r(&seed)*1.0/RAND_MAX)/log_e)
277                 * cos(rand_r(&seed)*2*M_PI/RAND_MAX)*RAND_MAX*0.1;
278             }
279         }
280     }
281 
282     std::vector<T> in_cpy=in;
283     std::vector<T> out;
284 
285     // in=in_cpy;
286     SORT_FUNCTION<T>(in_cpy, comm);
287 #ifdef __VERIFY__
288     verify(in,in_cpy,comm);
289 #endif
290 
291 #ifdef _PROFILE_SORT
292     total_sort.clear();
293 
294     seq_sort.clear();
295     sort_partitionw.clear();
296 
297     sample_get_splitters.clear();
298     sample_sort_splitters.clear();
299     sample_prepare_scatter.clear();
300     sample_do_all2all.clear();
301 
302     hyper_compute_splitters.clear();
303     hyper_communicate.clear();
304     hyper_merge.clear();
305 #endif
306 
307 
308     //Sort
309     MPI_Barrier(comm);
310     double wtime=-omp_get_wtime();
311     // SORT_FUNCTION<T>(in, out, comm);
312     SORT_FUNCTION<T>(in, comm);
313     MPI_Barrier(comm);
314     wtime+=omp_get_wtime();
315 
316     return wtime;
317 }*/
318 
main(int argc,char ** argv)319 int main(int argc, char **argv){
320 
321   if (argc < 4) {
322     std::cerr << "Usage: " << argv[0] << " numThreads typeSize typeDistrib" << std::endl;
323     std::cerr << "\t\t typeSize is a character for type of data follwed by data size per node." << std::endl;
324 		std::cerr << "\t\t typeSize can be d-double, f-float, i-int, l-long." << std::endl;
325     std::cerr << "\t\t Examples:" << std::endl;
326     std::cerr << "\t\t i1GB : integer  array of size 1GB" << std::endl;
327     std::cerr << "\t\t l1GB : long     array of size 1GB" << std::endl;
328 		std::cerr << "\t\t typeDistrib can be UNIF, GAUSS" << std::endl;
329     return 1;
330   }
331 
332   std::cout<<setiosflags(std::ios::fixed)<<std::setprecision(4)<<std::setiosflags(std::ios::right);
333 
334   //Set number of OpenMP threads to use.
335   int num_threads = atoi(argv[1]);
336 	omp_set_num_threads(num_threads);
337 
338   // Initialize MPI
339   MPI_Init(&argc, &argv);
340 
341   // Find out my identity in the default communicator
342   int myrank;
343   MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
344 
345   // Find out number of processes
346   int p;
347   MPI_Comm_size(MPI_COMM_WORLD, &p);
348 
349 
350   //!----------------------------------
351   /*
352   // 1. read in file corresponding to the node % 100;
353   // 2. select splitters ... 5 - 20 - print ...
354   // 3. bucket and write ... with num_spliters = 20;
355 
356 
357 
358   if (!myrank) std::cout << myrank << ": reading file" << std::endl;
359   char fname[256];
360   // sprintf(fname, "/scratch/01903/hsundar/from_karl/input/part%d", myrank%100);
361   sprintf(fname, "part%d", myrank);
362   FILE* fp = fopen(fname, "rb");
363 
364   long lSize;
365   size_t result;
366   fseek (fp , 0 , SEEK_END);
367   lSize = ftell (fp);
368   rewind (fp);
369 
370   // test buckets ...
371   int nl = lSize/100;
372   std::vector<sortRecord> qq(nl), out;
373 
374   // printf("%d: read size is %ld\n", myrank, lSize);
375   / *
376   srand(myrank*1713);
377   for (int i=0; i<nl; i++) {
378     qq[i] = rand();
379   }
380   * /
381   result = fread ( &(*qq.begin()), 1, lSize, fp);
382   if (result != lSize) { std::cerr << myrank << ": Reading error" << std::endl; exit (3);}
383   fclose (fp);
384   if (!myrank) std::cout << myrank << ": finished reading file" << std::endl;
385 
386   omp_par::merge_sort(&qq[0], &qq[qq.size()]);
387 
388   MPI_Barrier(MPI_COMM_WORLD);
389   if (!myrank) std::cout << myrank << ": starting approx Select" << std::endl;
390 
391   std::vector<std::pair<sortRecord, DendroIntL> > sortBins = par::Sorted_approx_Select_skewed(qq, 9, MPI_COMM_WORLD);
392 
393   if (!myrank) std::cout << myrank << ": finished approx Select" << std::endl;
394   / *
395   if (!myrank) {
396     std::cout << "splitters = [" << std::endl;
397     for (int i=0; i<9; ++i) std::cout << sortBins[i] << " " << std::endl;;
398     std::cout << "]" << std::endl;
399   }
400   * /
401   / *
402   double t0 = omp_get_wtime();
403   par::bucketDataAndWriteSkewed(qq, sortBins, "/tmp/foo", MPI_COMM_WORLD);
404   double t1 = omp_get_wtime();
405 
406   * /
407   double t0 = omp_get_wtime();
408   // par::HyperQuickSort(qq, out, MPI_COMM_WORLD);
409   par::sampleSort(qq, MPI_COMM_WORLD);
410   double t1 = omp_get_wtime();
411 
412   std::cout << myrank << ": all done in " << t1-t0 << std::endl;
413 
414 
415   MPI_Finalize();
416   return 0;
417   //!----------------------------------
418   */
419   int proc_group=0;
420   int min_np=1;
421   MPI_Comm comm;
422 
423   std::vector<double> tt(10000,0);
424 
425   int k = 0; // in case size based runs are needed
426   char dtype = argv[2][0];
427   long N = getNumElements(argv[2]);
428 	DistribType dist_type=getDistType(argv[3]);
429   if (!N) {
430     std::cerr << "illegal typeSize code provided: " << argv[2] << std::endl;
431     return 2;
432   }
433 
434   std::string num;
435   std::stringstream mystream;
436   mystream << N*p;
437   num = mystream.str();
438   int insertPosition = num.length() - 3;
439   while (insertPosition > 0) {
440     num.insert(insertPosition, ",");
441     insertPosition-=3;
442   }
443   if (!myrank)
444     std::cout << "sorting array of size " << num << " keys of type " << dtype << std::endl;
445 
446   // check if arguments are ok ...
447 
448   { // -- full size run
449     double ttt;
450 
451     switch(dtype) {
452 			case 'd':
453 				ttt = time_sort<double>(N, MPI_COMM_WORLD,dist_type);
454 				break;
455 			case 'f':
456 				ttt = time_sort<float>(N, MPI_COMM_WORLD,dist_type);
457 				break;
458 			case 'i':
459 				ttt = time_sort<int>(N, MPI_COMM_WORLD,dist_type);
460 				break;
461       case 'l':
462         ttt = time_sort<long>(N, MPI_COMM_WORLD,dist_type);
463         break;
464     };
465 #ifdef _PROFILE_SORT
466 		if (!myrank) {
467 			std::cout << "---------------------------------------------------------------------------" << std::endl;
468 		#ifndef KWICK
469 			std::cout << "\tSample Sort with " << KWAY << "-way all2all" << "\t\tMean\tMin\tMax" << std::endl;
470 		#else
471 		  #ifdef SWAPRANKS
472 			  std::cout << "\t" << KWAY << "-way SwapRankSort " << "\t\tMean\tMin\tMax" << std::endl;
473       #else
474         std::cout << "\t" << KWAY << "-way HyperQuickSort" << "\t\tMean\tMin\tMax" << std::endl;
475 		  #endif
476 		#endif
477 			std::cout << "---------------------------------------------------------------------------" << std::endl;
478 		}
479 		printResults(num_threads, MPI_COMM_WORLD);
480 #endif
481 
482     if(!myrank){
483       tt[100*k+0]=ttt;
484     }
485   }
486 
487 	// MPI_Finalize();
488   // return 0;
489 
490   for(int i=p; myrank<i && i>=min_np; i=i>>1) proc_group++;
491   MPI_Comm_split(MPI_COMM_WORLD, proc_group, myrank, &comm);
492 
493   { // smaller /2^k runs
494     int myrank_;
495     MPI_Comm_rank(comm, &myrank_);
496     double ttt;
497 
498     switch(dtype) {
499 			case 'd':
500 				ttt = time_sort<double>(N, comm,dist_type);
501 				break;
502 			case 'f':
503 				ttt = time_sort<float>(N, comm,dist_type);
504 				break;
505 			case 'i':
506         ttt = time_sort<int>(N, comm,dist_type);
507         break;
508       case 'l':
509         ttt = time_sort<long>(N, comm,dist_type);
510         break;
511     };
512 
513     if(!myrank_){
514       tt[100*k+proc_group]=ttt;
515     }
516   }
517 
518 	int num_groups=0;
519 	if (!myrank) num_groups = proc_group;
520 	MPI_Bcast(&num_groups, 1, MPI_INT, 0, MPI_COMM_WORLD);
521 
522 	for(size_t i = 0; i < num_groups; ++i)
523 	{
524 		MPI_Barrier(MPI_COMM_WORLD);
525 		if (proc_group == i) {
526 			MPI_Barrier(comm);
527 			printResults(num_threads, comm);
528 			MPI_Barrier(comm);
529 		}
530 	}
531 
532   MPI_Barrier(MPI_COMM_WORLD);
533 
534   std::vector<double> tt_glb(10000);
535   MPI_Reduce(&tt[0], &tt_glb[0], 10000, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
536   if(!myrank){
537     std::cout<<"\nSort - Weak Scaling:\n";
538     for(int i=0;i<proc_group;i++){
539       int np=p;
540       if(i>0) np=(p>>(i-1))-(p>>i);
541       std::cout<<"\t\t\tP = "<<np<<"\t\t";
542       // for(int k=0;k<=log_N;k++)
543         std::cout<<tt_glb[100*k+i]<<' ';
544       std::cout<<'\n';
545     }
546   }
547 
548   // Shut down MPI
549   MPI_Finalize();
550    return 0;
551  }
552 
printResults(int num_threads,MPI_Comm comm)553 void printResults(int num_threads, MPI_Comm comm) {
554 	int myrank, p, simd_width=0;
555 	MPI_Comm_size(comm, &p);
556 	MPI_Comm_rank(comm, &myrank);
557 #ifdef SIMD_MERGE
558 	simd_width=SIMD_MERGE;
559 #endif
560 		// reduce results
561 		double t, meanV, minV, maxV;
562 
563 #ifdef _PROFILE_SORT
564 		if (!myrank) {
565 			// std::cout << std::endl;
566 			// std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl;
567 			std::cout <<  p << " tasks : " << num_threads << " threads " << simd_width << " SIMD_WIDTH " << std::endl;
568 			// std::cout << "===========================================================================" << std::endl;
569 		}
570 		t = total_sort.seconds; 			getStats(t, &meanV, &minV, &maxV, comm);
571 		if (!myrank) {
572 			std::cout << "Total sort time   \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
573 			// std::cout << "----------------------------------------------------------------------" << std::endl;
574 		}
575 		t = seq_sort.seconds; 				getStats(t, &meanV, &minV, &maxV, comm);
576 		if (!myrank) {
577 			std::cout << "Sequential Sort   \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
578 		}
579 		t = sort_partitionw.seconds; 	getStats(t, &meanV, &minV, &maxV, comm);
580 		if (!myrank) {
581 			std::cout << "partitionW        \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
582 			// std::cout << "----------------------------------------------------------------------" << std::endl;
583 		}
584 #ifndef KWICK
585 		t = sample_sort_splitters.seconds; 				getStats(t, &meanV, &minV, &maxV, comm);
586 		if (!myrank) {
587 			std::cout << "sort splitters    \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
588 		}
589 		t = sample_prepare_scatter.seconds; 				getStats(t, &meanV, &minV, &maxV, comm);
590 		if (!myrank) {
591 			std::cout << "prepare scatter   \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
592 		}
593 		t = sample_do_all2all.seconds; 				getStats(t, &meanV, &minV, &maxV, comm);
594 		if (!myrank) {
595 			std::cout << "all2all           \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
596 		}
597 #else
598 		t = hyper_compute_splitters.seconds; 				getStats(t, &meanV, &minV, &maxV, comm);
599 		if (!myrank) {
600 			std::cout << "compute splitters \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
601 		}
602 		t = hyper_communicate.seconds; 				getStats(t, &meanV, &minV, &maxV, comm);
603 		if (!myrank) {
604 			std::cout << "exchange data     \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
605 		}
606 		t = hyper_merge.seconds; 				getStats(t, &meanV, &minV, &maxV, comm);
607 		if (!myrank) {
608 			std::cout << "merge arrays      \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
609 		}
610 		t = hyper_comm_split.seconds; 				getStats(t, &meanV, &minV, &maxV, comm);
611 		if (!myrank) {
612 			std::cout << "comm split        \t\t\t" << meanV << "\t" << minV << "\t" << maxV <<  std::endl;
613 		}
614     if (!myrank) {
615       std::string num;
616       std::stringstream mystream;
617       mystream << total_bytes;
618       num = mystream.str();
619       int insertPosition = num.length() - 3;
620       while (insertPosition > 0) {
621         num.insert(insertPosition, ",");
622         insertPosition-=3;
623       }
624       std::cout << "total comm        \t\t\t" << num << " bytes" <<  std::endl;
625     }
626 
627 #endif
628 #endif
629 		if (!myrank) {
630 			// std::cout << "---------------------------------------------------------------------------" << std::endl;
631 			std::cout << "" << std::endl;
632 		}
633 }
634 
635 #define  FALSE          0       // Boolean false
636 #define  TRUE           1       // Boolean true
637 //===========================================================================
638 //=  Function to generate Zipf (power law) distributed random variables     =
639 //=    - Input: alpha and N                                                 =
640 //=    - Output: Returns with Zipf distributed random variable              =
641 //===========================================================================
zipf(double alpha,int n,unsigned int * seedp)642 int zipf(double alpha, int n, unsigned int *seedp)
643 {
644   static int first = TRUE;      // Static first time flag
645   static double c = 0;          // Normalization constant
646   double z;                     // Uniform random number (0 < z < 1)
647   double sum_prob;              // Sum of probabilities
648   double zipf_value;            // Computed exponential value to be returned
649   int    i;                     // Loop counter
650 
651   // Compute normalization constant on first call only
652   if (first == TRUE)
653   {
654     for (i=1; i<=n; i++)
655       c = c + (1.0 / pow((double) i, alpha));
656     c = 1.0 / c;
657     first = FALSE;
658   }
659 
660   // Pull a uniform random number (0 < z < 1)
661   do
662   {
663     z = rand_r(seedp)*(1.0/RAND_MAX);
664   }
665   while ((z == 0) || (z == 1));
666 
667 	static std::vector<double> oopia;
668 	#pragma omp critical
669 	if(oopia.size()!=n){
670 		oopia.resize(n);
671 		for(int i=0;i<n;i++)
672 			oopia[i]=1.0/pow((double) i, alpha);
673 	}
674 
675   // Map z to the value
676   sum_prob = 0;
677   for (i=1; i<=n; i++)
678   {
679     sum_prob = sum_prob + c*oopia[i-1];
680     if (sum_prob >= z)
681     {
682       zipf_value = i;
683       break;
684     }
685   }
686 
687   // Assert that zipf_value is between 1 and N
688   assert((zipf_value >=1) && (zipf_value <= n));
689 
690   return(zipf_value);
691 }
692