1 #include <string.h>
2 #include <limits.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <stdarg.h>
6 #include <iostream>
7 #include <fstream>
8 
9 #include "moab/TupleList.hpp"
10 
11 namespace moab {
12 
fail(const char * fmt,...)13 void fail(const char *fmt, ...)
14 {
15   va_list ap;
16   va_start(ap, fmt);
17   vfprintf(stderr, fmt, ap);
18   va_end(ap);
19   exit(1);
20 }
21 
buffer(size_t sz)22 TupleList::buffer::buffer(size_t sz)
23 {
24   ptr = NULL;
25   buffSize = 0;
26   this->buffer_init_(sz, __FILE__);
27 }
28 
buffer()29 TupleList::buffer::buffer()
30 {
31   buffSize = 0;
32   ptr = NULL;
33 }
34 
buffer_init_(size_t sizeIn,const char * file)35 void TupleList::buffer::buffer_init_(size_t sizeIn, const char *file)
36 {
37   this->buffSize = sizeIn;
38   void *res = malloc(this->buffSize);
39   if (!res && buffSize > 0)
40     fail("%s: allocation of %d bytes failed\n", file, (int) buffSize);
41   ptr = (char*) res;
42 }
43 
buffer_reserve_(size_t min,const char * file)44 void TupleList::buffer::buffer_reserve_(size_t min, const char *file)
45 {
46   if (this->buffSize < min)
47   {
48     size_t newSize = this->buffSize;
49     newSize += newSize / 2 + 1;
50     if (newSize < min)
51       newSize = min;
52     void *res = realloc(ptr, newSize);
53     if (!res && newSize > 0)
54       fail("%s: reallocation of %d bytes failed\n", file, newSize);
55     ptr = (char*) res;
56     this->buffSize = newSize;
57   }
58 }
59 
reset()60 void TupleList::buffer::reset()
61 {
62   free(ptr);
63   ptr = NULL;
64   buffSize = 0;
65 }
66 
TupleList(uint p_mi,uint p_ml,uint p_mul,uint p_mr,uint p_max)67 TupleList::TupleList(uint p_mi, uint p_ml, uint p_mul, uint p_mr, uint p_max)
68 : vi(NULL), vl(NULL), vul(NULL), vr(NULL),
69   last_sorted(-1)
70 {
71   initialize(p_mi, p_ml, p_mul, p_mr, p_max);
72 }
73 
TupleList()74 TupleList::TupleList()
75 : vi_rd(NULL), vl_rd(NULL), vul_rd(NULL), vr_rd(NULL),
76   mi(0), ml(0), mul(0), mr(0),
77   n(0), max(0),
78   vi(NULL), vl(NULL), vul(NULL), vr(NULL),
79   last_sorted(-1)
80 {
81   disableWriteAccess();
82 }
83 
84 // Allocates space for the tuple list in memory according to parameters
initialize(uint p_mi,uint p_ml,uint p_mul,uint p_mr,uint p_max)85 void TupleList::initialize(uint p_mi, uint p_ml, uint p_mul, uint p_mr,
86     uint p_max)
87 {
88   this->n = 0;
89   this->max = p_max;
90   this->mi = p_mi;
91   this->ml = p_ml;
92   this->mul = p_mul;
93   this->mr = p_mr;
94   size_t sz;
95 
96   if (max * mi > 0)
97   {
98     sz = max * mi * sizeof(sint);
99     void *resi = malloc(sz);
100     if (!resi && max * mi > 0)
101       fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
102     vi = (sint*) resi;
103   }
104   else
105     vi = NULL;
106   if (max * ml > 0)
107   {
108     sz = max * ml * sizeof(slong);
109     void *resl = malloc(sz);
110     if (!resl && max * ml > 0)
111       fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
112     vl = (slong*) resl;
113   }
114   else
115     vl = NULL;
116   if (max * mul > 0)
117   {
118     sz = max * mul * sizeof(Ulong);
119     void *resu = malloc(sz);
120     if (!resu && max * mul > 0)
121       fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
122     vul = (Ulong*) resu;
123   }
124   else
125     vul = NULL;
126   if (max * mr > 0)
127   {
128     sz = max * mr * sizeof(realType);
129     void *resr = malloc(sz);
130     if (!resr && max * ml > 0)
131       fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
132     vr = (realType*) resr;
133   }
134   else
135     vr = NULL;
136 
137   //Begin with write access disabled
138   this->disableWriteAccess();
139 
140   //Set read variables
141   vi_rd = vi;
142   vl_rd = vl;
143   vul_rd = vul;
144   vr_rd = vr;
145 }
146 
147 // Resizes a tuplelist to the given uint max
resize(uint maxIn)148 ErrorCode TupleList::resize(uint maxIn)
149 {
150   this->max = maxIn;
151   size_t sz;
152 
153   if (vi || (max * mi > 0))
154   {
155     sz = max * mi * sizeof(sint);
156     void *resi = realloc(vi, sz);
157     if (!resi && max * mi > 0)
158     {
159       fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
160       return moab::MB_MEMORY_ALLOCATION_FAILED;
161     }
162     vi = (sint*) resi;
163   }
164   if (vl || (max * ml > 0))
165   {
166     sz = max * ml * sizeof(slong);
167     void *resl = realloc(vl, sz);
168     if (!resl && max * ml > 0)
169     {
170       fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
171       return moab::MB_MEMORY_ALLOCATION_FAILED;
172     }
173     vl = (slong*) resl;
174   }
175   if (vul || (max * mul > 0))
176   {
177     sz = max * mul * sizeof(Ulong);
178     void *resu = realloc(vul, sz);
179     if (!resu && max * mul > 0)
180     {
181       fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
182       return moab::MB_MEMORY_ALLOCATION_FAILED;
183     }
184     vul = (Ulong*) resu;
185   }
186   if (vr || (max * mr > 0))
187   {
188     sz = max * mr * sizeof(realType);
189     void *resr = realloc(vr, sz);
190     if (!resr && max * mr > 0)
191     {
192       fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
193       return moab::MB_MEMORY_ALLOCATION_FAILED;
194     }
195     vr = (realType*) resr;
196   }
197 
198   //Set read variables
199   vi_rd = vi;
200   vl_rd = vl;
201   vul_rd = vul;
202   vr_rd = vr;
203 
204   //Set the write variables if necessary
205   if (writeEnabled)
206   {
207     vi_wr = vi;
208     vl_wr = vl;
209     vul_wr = vul;
210     vr_wr = vr;
211   }
212   return moab::MB_SUCCESS;
213 }
214 
215 // Frees the memory used by the tuplelist
reset()216 void TupleList::reset()
217 {
218   //free up the pointers
219   free(vi);
220   free(vl);
221   free(vul);
222   free(vr);
223   //Set them all to null
224   vr = NULL;
225   vi = NULL;
226   vul = NULL;
227   vl = NULL;
228   //Set the read and write pointers to null
229   disableWriteAccess();
230   vi_rd = NULL;
231   vl_rd = NULL;
232   vul_rd = NULL;
233   vr_rd = NULL;
234 }
235 
236 // Increments n; if n>max, increase the size of the tuplelist
reserve()237 void TupleList::reserve()
238 {
239   n++;
240   while (n > max)
241     resize((max ? max + max / 2 + 1 : 2));
242   last_sorted = -1;
243 }
244 
245 // Given the value and the position in the field, finds the index of the tuple
246 // to which the value belongs
find(unsigned int key_num,sint value)247 int TupleList::find(unsigned int key_num, sint value)
248 {
249   // we are passing an int, no issue, leave it at long
250   long uvalue = (long) value;
251   if (!(key_num > mi))
252   {
253     // Binary search: only if the tuple_list is sorted
254     if (last_sorted == (int) key_num)
255     {
256       int lb = 0, ub = n, index; // lb=lower bound, ub=upper bound, index=mid
257       for (; lb <= ub;)
258       {
259         index = (lb + ub) / 2;
260         if (vi[index * mi + key_num] == uvalue)
261           return index;
262         else if (vi[index * mi + key_num] > uvalue)
263           ub = index - 1;
264         else if (vi[index * mi + key_num] < uvalue)
265           lb = index + 1;
266       }
267     }
268     else
269     {
270       // Sequential search: if tuple_list is not sorted
271       for (uint index = 0; index < n; index++)
272       {
273         if (vi[index * mi + key_num] == uvalue)
274           return index;
275       }
276     }
277   }
278   return -1; // If the value wasn't present or an invalid key was given
279 }
280 
find(unsigned int key_num,slong value)281 int TupleList::find(unsigned int key_num, slong value)
282 {
283   long uvalue = (long) value;
284   if (!(key_num > ml))
285   {
286     if (last_sorted - mi == key_num)
287     {
288       int lb = 0, ub = n, index; // lb=lower bound, ub=upper bound, index=mid
289       for (; lb <= ub;)
290       {
291         index = (lb + ub) / 2;
292         if (vl[index * ml + key_num] ==  uvalue)
293           return index;
294         else if (vl[index * ml + key_num] > uvalue)
295           ub = index - 1;
296         else if (vl[index * ml + key_num] < uvalue)
297           lb = index + 1;
298       }
299     }
300     else
301     {
302       // Sequential search: if tuple_list is not sorted
303       for (uint index = 0; index < n; index++)
304       {
305         if (vl[index * ml + key_num] == uvalue)
306           return index;
307       }
308     }
309   }
310   return -1; // If the value wasn't present or an invalid key was given
311 }
312 
find(unsigned int key_num,Ulong value)313 int TupleList::find(unsigned int key_num, Ulong value)
314 {
315   if (!(key_num > mul))
316   {
317     if (last_sorted - mi - ml == key_num)
318     {
319       int lb = 0, ub = n - 1, index; // lb=lower bound, ub=upper bound, index=mid
320       for (; lb <= ub;)
321       {
322         index = (lb + ub) / 2;
323         if (vul[index * mul + key_num] == value)
324           return index;
325         else if (vul[index * mul + key_num] > value)
326           ub = index - 1;
327         else if (vul[index * mul + key_num] < value)
328           lb = index + 1;
329       }
330     }
331     else
332     {
333       // Sequential search: if tuple_list is not sorted
334       for (uint index = 0; index < n; index++)
335       {
336         if (vul[index * mul + key_num] == value)
337           return index;
338       }
339     }
340   }
341   return -1; // If the value wasn't present or an invalid key was given
342 }
343 
find(unsigned int key_num,realType value)344 int TupleList::find(unsigned int key_num, realType value)
345 {
346   if (!(key_num > mr))
347   {
348     // Sequential search: TupleList cannot be sorted by reals
349     for (uint index = 0; index < n; index++)
350     {
351       if (vr[index * mr + key_num] == value)
352         return index;
353     }
354   }
355   return -1; // If the value wasn't present or an invalid key was given
356 }
357 
get_sint(unsigned int index,unsigned int m)358 sint TupleList::get_sint(unsigned int index, unsigned int m)
359 {
360   if (mi > m && n > index)
361     return vi[index * mi + m];
362   return 0;
363 }
364 
get_int(unsigned int index,unsigned int m)365 slong TupleList::get_int(unsigned int index, unsigned int m)
366 {
367   if (ml > m && n > index)
368     return vl[index * ml + m];
369   return 0;
370 }
371 
get_ulong(unsigned int index,unsigned int m)372 Ulong TupleList::get_ulong(unsigned int index, unsigned int m)
373 {
374   if (mul > m && n > index)
375     return vul[index * mul + m];
376   return 0;
377 }
378 
get_double(unsigned int index,unsigned int m)379 realType TupleList::get_double(unsigned int index, unsigned int m)
380 {
381   if (mr > m && n > index)
382     return vr[index * mr + m];
383   return 0;
384 }
385 
get(unsigned int index,const sint * & sp,const slong * & ip,const Ulong * & lp,const realType * & dp)386 ErrorCode TupleList::get(unsigned int index, const sint *&sp, const slong *&ip,
387     const Ulong *&lp, const realType *&dp)
388 {
389   if (index <= n)
390   {
391     if (mi)
392       *&sp = &vi[index * mi];
393     else
394       *&sp = NULL;
395     if (ml)
396       *&ip = &vl[index * ml];
397     else
398       *&ip = NULL;
399     if (mul)
400       *&lp = &vul[index * mul];
401     else
402       *&lp = NULL;
403     if (mr)
404       *&dp = &vr[index * mr];
405     else
406       *&dp = NULL;
407 
408     return MB_SUCCESS;
409   }
410   return MB_FAILURE;
411 }
412 
push_back(sint * sp,slong * ip,Ulong * lp,realType * dp)413 unsigned int TupleList::push_back(sint *sp, slong *ip, Ulong *lp, realType *dp)
414 {
415   reserve();
416   if (mi)
417     memcpy(&vi[mi * (n - 1)], sp, mi * sizeof(sint));
418   if (ml)
419     memcpy(&vl[ml * (n - 1)], ip, ml * sizeof(long));
420   if (mul)
421     memcpy(&vul[mul * (n - 1)], lp, mul * sizeof(Ulong));
422   if (mr)
423     memcpy(&vr[mr * (n - 1)], dp, mr * sizeof(realType));
424 
425   last_sorted = -1;
426   return n - 1;
427 }
428 
enableWriteAccess()429 void TupleList::enableWriteAccess()
430 {
431   writeEnabled = true;
432   last_sorted = -1;
433   vi_wr = vi;
434   vl_wr = vl;
435   vul_wr = vul;
436   vr_wr = vr;
437 }
438 
disableWriteAccess()439 void TupleList::disableWriteAccess()
440 {
441   writeEnabled = false;
442   vi_wr = NULL;
443   vl_wr = NULL;
444   vul_wr = NULL;
445   vr_wr = NULL;
446 }
447 
getTupleSize(uint & mi_out,uint & ml_out,uint & mul_out,uint & mr_out) const448 void TupleList::getTupleSize(uint &mi_out, uint &ml_out, uint &mul_out,
449     uint &mr_out) const
450 {
451   mi_out = mi;
452   ml_out = ml;
453   mul_out = mul;
454   mr_out = mr;
455 }
456 
inc_n()457 uint TupleList::inc_n()
458 {
459   //Check for direct write access
460   if (!writeEnabled)
461   {
462     enableWriteAccess();
463   }
464   n++;
465   return n;
466 }
467 
set_n(uint n_in)468 void TupleList::set_n(uint n_in)
469 {
470   //Check for direct write access;
471   if (!writeEnabled)
472   {
473     enableWriteAccess();
474   }
475   n = n_in;
476 }
477 
print(const char * name) const478 void TupleList::print(const char *name) const
479 {
480   std::cout << "Printing Tuple " << name << "===================" << std::endl;
481   unsigned long i = 0, l = 0, ul = 0, r = 0;
482   for (uint k = 0; k < n; k++)
483   {
484     for (uint j = 0; j < mi; j++)
485     {
486       std::cout << vi[i++] << " | ";
487     }
488     for (uint j = 0; j < ml; j++)
489     {
490       std::cout << vl[l++] << " | ";
491     }
492     for (uint j = 0; j < mul; j++)
493     {
494       std::cout << vul[ul++] << " | ";
495     }
496     for (uint j = 0; j < mr; j++)
497     {
498       std::cout << vr[r++] << " | ";
499     }
500     std::cout << std::endl;
501   }
502   std::cout << "=======================================" << std::endl
503       << std::endl;
504 }
print_to_file(const char * filename) const505 void TupleList::print_to_file(const char * filename) const
506 {
507   std::ofstream ofs;
508   ofs.open (filename, std::ofstream::out | std::ofstream::app);
509 
510   ofs <<  "Printing Tuple " << filename << "===================" << std::endl;
511   unsigned long i = 0, l = 0, ul = 0, r = 0;
512   for (uint k = 0; k < n; k++)
513   {
514     for (uint j = 0; j < mi; j++)
515     {
516       ofs << vi[i++] << " | ";
517     }
518     for (uint j = 0; j < ml; j++)
519     {
520       ofs << vl[l++] << " | ";
521     }
522     for (uint j = 0; j < mul; j++)
523     {
524       ofs << vul[ul++] << " | ";
525     }
526     for (uint j = 0; j < mr; j++)
527     {
528       ofs << vr[r++] << " | ";
529     }
530     ofs << std::endl;
531   }
532   ofs << "=======================================" << std::endl
533       << std::endl;
534 
535   ofs.close();
536 }
permute(uint * perm,void * work)537 void TupleList::permute(uint *perm, void *work)
538 {
539   const unsigned int_size = mi * sizeof(sint), long_size = ml * sizeof(slong),
540       Ulong_size = mul * sizeof(Ulong), real_size = mr * sizeof(realType);
541   if (mi)
542   {
543     uint *p = perm, *pe = p + n;
544     char *sorted = (char *) work;
545     while (p != pe)
546       memcpy((void *) sorted, &vi[mi * (*p++)], int_size), sorted += int_size;
547     memcpy(vi, work, int_size * n);
548   }
549   if (ml)
550   {
551     uint *p = perm, *pe = p + n;
552     char *sorted = (char *) work;
553     while (p != pe)
554       memcpy((void *) sorted, &vl[ml * (*p++)], long_size), sorted += long_size;
555     memcpy(vl, work, long_size * n);
556   }
557   if (mul)
558   {
559     uint *p = perm, *pe = p + n;
560     char *sorted = (char *) work;
561     while (p != pe)
562       memcpy((void *) sorted, &vul[mul * (*p++)], Ulong_size), sorted +=
563           Ulong_size;
564     memcpy(vul, work, Ulong_size * n);
565   }
566   if (mr)
567   {
568     uint *p = perm, *pe = p + n;
569     char *sorted = (char *) work;
570     while (p != pe)
571       memcpy((void *) sorted, &vr[mr * (*p++)], real_size), sorted += real_size;
572     memcpy(vr, work, real_size * n);
573   }
574 }
575 
576 # define umax_2(a, b) (((a)>(b)) ? (a):(b))
577 
sort(uint key,TupleList::buffer * buf)578 ErrorCode TupleList::sort(uint key, TupleList::buffer *buf)
579 {
580   const unsigned int_size = mi * sizeof(sint);
581   const unsigned long_size = ml * sizeof(slong);
582   const unsigned Ulong_size = mul * sizeof(Ulong);
583   const unsigned real_size = mr * sizeof(realType);
584   const unsigned width = umax_2(umax_2(int_size,long_size),
585       umax_2(Ulong_size,real_size));
586   unsigned data_size =
587       key >= mi ? sizeof(SortData<long> ) : sizeof(SortData<uint> );
588 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32)
589   if (key>=mi+ml)
590     data_size = sizeof(SortData<Ulong> );
591 #endif
592 
593   uint work_min = n * umax_2(2*data_size,sizeof(sint)+width);
594   uint *work;
595   buf->buffer_reserve(work_min);
596   work = (uint *) buf->ptr;
597   if (key < mi)
598     index_sort((uint *) &vi[key], n, mi, work, (SortData<uint>*) work);
599   else if (key < mi + ml)
600     index_sort((long*) &vl[key - mi], n, ml, work, (SortData<long>*) work);
601   else if (key < mi + ml + mul)
602     index_sort((Ulong*) &vul[key - mi - ml], n, mul, work,
603         (SortData<Ulong>*) work);
604   else
605     return MB_NOT_IMPLEMENTED;
606 
607   permute(work, work + n);
608 
609   if (!writeEnabled)
610     last_sorted = key;
611   return MB_SUCCESS;
612 }
613 
614 #undef umax_2
615 
616 #define DIGIT_BITS   8
617 #define DIGIT_VALUES (1<<DIGIT_BITS)
618 #define DIGIT_MASK   ((Value)(DIGIT_VALUES-1))
619 #define CEILDIV(a,b) (((a)+(b)-1)/(b))
620 #define DIGITS       CEILDIV(CHAR_BIT*sizeof(Value),DIGIT_BITS)
621 #define VALUE_BITS   (DIGIT_BITS*DIGITS)
622 #define COUNT_SIZE   (DIGITS*DIGIT_VALUES)
623 
624 /* used to unroll a tiny loop: */
625 #define COUNT_DIGIT_01(n,i) \
626     if(n>i) count[i][val&DIGIT_MASK]++, val>>=DIGIT_BITS
627 #define COUNT_DIGIT_02(n,i) COUNT_DIGIT_01(n,i); COUNT_DIGIT_01(n,i+ 1)
628 #define COUNT_DIGIT_04(n,i) COUNT_DIGIT_02(n,i); COUNT_DIGIT_02(n,i+ 2)
629 #define COUNT_DIGIT_08(n,i) COUNT_DIGIT_04(n,i); COUNT_DIGIT_04(n,i+ 4)
630 #define COUNT_DIGIT_16(n,i) COUNT_DIGIT_08(n,i); COUNT_DIGIT_08(n,i+ 8)
631 #define COUNT_DIGIT_32(n,i) COUNT_DIGIT_16(n,i); COUNT_DIGIT_16(n,i+16)
632 #define COUNT_DIGIT_64(n,i) COUNT_DIGIT_32(n,i); COUNT_DIGIT_32(n,i+32)
633 
634 template<class Value>
radix_count(const Value * A,const Value * end,Index stride,Index count[DIGITS][DIGIT_VALUES])635 Value TupleList::radix_count(const Value *A, const Value *end, Index stride,
636     Index count[DIGITS][DIGIT_VALUES])
637 {
638   Value bitorkey = 0;
639   memset(count, 0, COUNT_SIZE * sizeof(Index));
640   do
641   {
642     Value val = *A;
643     bitorkey |= val;
644     COUNT_DIGIT_64(DIGITS, 0);
645     // above macro expands to:
646     //if(DIGITS> 0) count[ 0][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
647     //if(DIGITS> 1) count[ 1][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
648     //  ...
649     //if(DIGITS>63) count[63][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
650 
651   } while (A += stride, A != end);
652   return bitorkey;
653 }
654 
655 #undef COUNT_DIGIT_01
656 #undef COUNT_DIGIT_02
657 #undef COUNT_DIGIT_04
658 #undef COUNT_DIGIT_08
659 #undef COUNT_DIGIT_16
660 #undef COUNT_DIGIT_32
661 #undef COUNT_DIGIT_64
662 
radix_offsets(Index * c)663 void TupleList::radix_offsets(Index *c)
664 {
665   Index sum = 0, t, *ce = c + DIGIT_VALUES;
666   do
667     t = *c, *c++ = sum, sum += t;
668   while (c != ce);
669 }
670 
671 template<class Value>
radix_zeros(Value bitorkey,Index count[DIGITS][DIGIT_VALUES],unsigned * shift,Index ** offsets)672 unsigned TupleList::radix_zeros(Value bitorkey,
673     Index count[DIGITS][DIGIT_VALUES], unsigned *shift, Index **offsets)
674 {
675   unsigned digits = 0, sh = 0;
676   Index *c = &count[0][0];
677   do
678   {
679     if (bitorkey & DIGIT_MASK)
680       *shift++ = sh, *offsets++ = c, ++digits, radix_offsets(c);
681   } while (bitorkey >>= DIGIT_BITS,sh += DIGIT_BITS,c += DIGIT_VALUES,sh
682       != VALUE_BITS);
683   return digits;
684 }
685 
686 template<class Value>
radix_index_pass_b(const Value * A,Index n,Index stride,unsigned sh,Index * off,SortData<Value> * out)687 void TupleList::radix_index_pass_b(const Value *A, Index n, Index stride,
688     unsigned sh, Index *off, SortData<Value> *out)
689 {
690   Index i = 0;
691   do
692   {
693     Value v = *A;
694     SortData<Value> *d = &out[off[(v >> sh) & DIGIT_MASK]++];
695     d->v = v, d->i = i++;
696   } while (A += stride, i != n);
697 }
698 
699 template<class Value>
radix_index_pass_m(const SortData<Value> * src,const SortData<Value> * end,unsigned sh,Index * off,SortData<Value> * out)700 void TupleList::radix_index_pass_m(const SortData<Value> *src,
701     const SortData<Value> *end, unsigned sh, Index *off, SortData<Value> *out)
702 {
703   do
704   {
705     SortData<Value> *d = &out[off[(src->v >> sh) & DIGIT_MASK]++];
706     d->v = src->v, d->i = src->i;
707   } while (++src != end);
708 }
709 
710 template<class Value>
radix_index_pass_e(const SortData<Value> * src,const SortData<Value> * end,unsigned sh,Index * off,Index * out)711 void TupleList::radix_index_pass_e(const SortData<Value> *src,
712     const SortData<Value> *end, unsigned sh, Index *off, Index *out)
713 {
714   do
715     out[off[(src->v >> sh) & DIGIT_MASK]++] = src->i;
716   while (++src != end);
717 }
718 
719 template<class Value>
radix_index_pass_be(const Value * A,Index n,Index stride,unsigned sh,Index * off,Index * out)720 void TupleList::radix_index_pass_be(const Value *A, Index n, Index stride,
721     unsigned sh, Index *off, Index *out)
722 {
723   Index i = 0;
724   do
725     out[off[(*A >> sh) & DIGIT_MASK]++] = i++;
726   while (A += stride, i != n);
727 }
728 
729 template<class Value>
radix_index_sort(const Value * A,Index n,Index stride,Index * idx,SortData<Value> * work)730 void TupleList::radix_index_sort(const Value *A, Index n, Index stride,
731     Index *idx, SortData<Value> *work)
732 {
733   Index count[DIGITS][DIGIT_VALUES];
734   Value bitorkey = radix_count(A, A + n * stride, stride, count);
735   unsigned shift[DIGITS];
736   Index *offsets[DIGITS];
737   unsigned digits = radix_zeros(bitorkey, count, shift, offsets);
738   if (digits == 0)
739   {
740     Index i = 0;
741     do
742       *idx++ = i++;
743     while (i != n);
744   }
745   else if (digits == 1)
746   {
747     radix_index_pass_be(A, n, stride, shift[0], offsets[0], idx);
748   }
749   else
750   {
751     SortData<Value> *src, *dst;
752     unsigned d;
753     if ((digits & 1) == 0)
754       dst = work, src = dst + n;
755     else
756       src = work, dst = src + n;
757     radix_index_pass_b(A, n, stride, shift[0], offsets[0], src);
758     for (d = 1; d != digits - 1; ++d)
759     {
760       SortData<Value> *t;
761       radix_index_pass_m(src, src + n, shift[d], offsets[d], dst);
762       t = src, src = dst, dst = t;
763     }
764     radix_index_pass_e(src, src + n, shift[d], offsets[d], idx);
765   }
766 }
767 
768 template<class Value>
merge_index_sort(const Value * A,const Index An,Index stride,Index * idx,SortData<Value> * work)769 void TupleList::merge_index_sort(const Value *A, const Index An, Index stride,
770     Index *idx, SortData<Value> *work)
771 {
772   SortData<Value> * const buf[2] = { work + An, work };
773   Index n = An, base = -n, odd = 0, c = 0, b = 1;
774   Index i = 0;
775   for (;;)
776   {
777     SortData<Value> *p;
778     if ((c & 1) == 0)
779     {
780       base += n, n += (odd & 1), c |= 1, b ^= 1;
781       while (n > 3)
782         odd <<= 1, odd |= (n & 1), n >>= 1, c <<= 1, b ^= 1;
783     }
784     else
785       base -= n - (odd & 1), n <<= 1, n -= (odd & 1), odd >>= 1, c >>= 1;
786     if (c == 0)
787       break;
788     p = buf[b] + base;
789     if (n == 2)
790     {
791       Value v[2];
792       v[0] = *A, A += stride, v[1] = *A, A += stride;
793       if (v[1] < v[0])
794         p[0].v = v[1], p[0].i = i + 1, p[1].v = v[0], p[1].i = i;
795       else
796         p[0].v = v[0], p[0].i = i, p[1].v = v[1], p[1].i = i + 1;
797       i += 2;
798     }
799     else if (n == 3)
800     {
801       Value v[3];
802       v[0] = *A, A += stride, v[1] = *A, A += stride, v[2] = *A, A += stride;
803       if (v[1] < v[0])
804       {
805         if (v[2] < v[1])
806           p[0].v = v[2], p[1].v = v[1], p[2].v = v[0], p[0].i = i + 2, p[1].i =
807               i + 1, p[2].i = i;
808         else
809         {
810           if (v[2] < v[0])
811             p[0].v = v[1], p[1].v = v[2], p[2].v = v[0], p[0].i = i + 1, p[1].i =
812                 i + 2, p[2].i = i;
813           else
814             p[0].v = v[1], p[1].v = v[0], p[2].v = v[2], p[0].i = i + 1, p[1].i =
815                 i, p[2].i = i + 2;
816         }
817       }
818       else
819       {
820         if (v[2] < v[0])
821           p[0].v = v[2], p[1].v = v[0], p[2].v = v[1], p[0].i = i + 2, p[1].i =
822               i, p[2].i = i + 1;
823         else
824         {
825           if (v[2] < v[1])
826             p[0].v = v[0], p[1].v = v[2], p[2].v = v[1], p[0].i = i, p[1].i = i
827                 + 2, p[2].i = i + 1;
828           else
829             p[0].v = v[0], p[1].v = v[1], p[2].v = v[2], p[0].i = i, p[1].i = i
830                 + 1, p[2].i = i + 2;
831         }
832       }
833       i += 3;
834     }
835     else
836     {
837       const Index na = n >> 1, nb = (n + 1) >> 1;
838       const SortData<Value> *ap = buf[b ^ 1] + base, *ae = ap + na;
839       SortData<Value> *bp = p + na, *be = bp + nb;
840       for (;;)
841       {
842         if (bp->v < ap->v)
843         {
844           *p++ = *bp++;
845           if (bp != be)
846             continue;
847           do
848             *p++ = *ap++;
849           while (ap != ae);
850           break;
851         }
852         else
853         {
854           *p++ = *ap++;
855           if (ap == ae)
856             break;
857         }
858       }
859     }
860   }
861   {
862     const SortData<Value> *p = buf[0], *pe = p + An;
863     do
864       *idx++ = (p++)->i;
865     while (p != pe);
866   }
867 }
868 
869 template<class Value>
index_sort(const Value * A,Index n,Index stride,Index * idx,SortData<Value> * work)870 void TupleList::index_sort(const Value *A, Index n, Index stride, Index *idx,
871     SortData<Value> *work)
872 {
873   if (n < DIGIT_VALUES)
874   {
875     if (n == 0)
876       return;
877     if (n == 1)
878       *idx = 0;
879     else
880       merge_index_sort(A, n, stride, idx, work);
881   }
882   else
883     radix_index_sort(A, n, stride, idx, work);
884 }
885 
886 #undef DIGIT_BITS
887 #undef DIGIT_VALUES
888 #undef DIGIT_MASK
889 #undef CEILDIV
890 #undef DIGITS
891 #undef VALUE_BITS
892 #undef COUNT_SIZE
893 #undef sort_data_long
894 
895 } //namespace
896 
897