1 #include <string.h>
2 #include <limits.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <stdarg.h>
6 #include <iostream>
7 #include <fstream>
8
9 #include "moab/TupleList.hpp"
10
11 namespace moab {
12
fail(const char * fmt,...)13 void fail(const char *fmt, ...)
14 {
15 va_list ap;
16 va_start(ap, fmt);
17 vfprintf(stderr, fmt, ap);
18 va_end(ap);
19 exit(1);
20 }
21
buffer(size_t sz)22 TupleList::buffer::buffer(size_t sz)
23 {
24 ptr = NULL;
25 buffSize = 0;
26 this->buffer_init_(sz, __FILE__);
27 }
28
buffer()29 TupleList::buffer::buffer()
30 {
31 buffSize = 0;
32 ptr = NULL;
33 }
34
buffer_init_(size_t sizeIn,const char * file)35 void TupleList::buffer::buffer_init_(size_t sizeIn, const char *file)
36 {
37 this->buffSize = sizeIn;
38 void *res = malloc(this->buffSize);
39 if (!res && buffSize > 0)
40 fail("%s: allocation of %d bytes failed\n", file, (int) buffSize);
41 ptr = (char*) res;
42 }
43
buffer_reserve_(size_t min,const char * file)44 void TupleList::buffer::buffer_reserve_(size_t min, const char *file)
45 {
46 if (this->buffSize < min)
47 {
48 size_t newSize = this->buffSize;
49 newSize += newSize / 2 + 1;
50 if (newSize < min)
51 newSize = min;
52 void *res = realloc(ptr, newSize);
53 if (!res && newSize > 0)
54 fail("%s: reallocation of %d bytes failed\n", file, newSize);
55 ptr = (char*) res;
56 this->buffSize = newSize;
57 }
58 }
59
reset()60 void TupleList::buffer::reset()
61 {
62 free(ptr);
63 ptr = NULL;
64 buffSize = 0;
65 }
66
TupleList(uint p_mi,uint p_ml,uint p_mul,uint p_mr,uint p_max)67 TupleList::TupleList(uint p_mi, uint p_ml, uint p_mul, uint p_mr, uint p_max)
68 : vi(NULL), vl(NULL), vul(NULL), vr(NULL),
69 last_sorted(-1)
70 {
71 initialize(p_mi, p_ml, p_mul, p_mr, p_max);
72 }
73
TupleList()74 TupleList::TupleList()
75 : vi_rd(NULL), vl_rd(NULL), vul_rd(NULL), vr_rd(NULL),
76 mi(0), ml(0), mul(0), mr(0),
77 n(0), max(0),
78 vi(NULL), vl(NULL), vul(NULL), vr(NULL),
79 last_sorted(-1)
80 {
81 disableWriteAccess();
82 }
83
84 // Allocates space for the tuple list in memory according to parameters
initialize(uint p_mi,uint p_ml,uint p_mul,uint p_mr,uint p_max)85 void TupleList::initialize(uint p_mi, uint p_ml, uint p_mul, uint p_mr,
86 uint p_max)
87 {
88 this->n = 0;
89 this->max = p_max;
90 this->mi = p_mi;
91 this->ml = p_ml;
92 this->mul = p_mul;
93 this->mr = p_mr;
94 size_t sz;
95
96 if (max * mi > 0)
97 {
98 sz = max * mi * sizeof(sint);
99 void *resi = malloc(sz);
100 if (!resi && max * mi > 0)
101 fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
102 vi = (sint*) resi;
103 }
104 else
105 vi = NULL;
106 if (max * ml > 0)
107 {
108 sz = max * ml * sizeof(slong);
109 void *resl = malloc(sz);
110 if (!resl && max * ml > 0)
111 fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
112 vl = (slong*) resl;
113 }
114 else
115 vl = NULL;
116 if (max * mul > 0)
117 {
118 sz = max * mul * sizeof(Ulong);
119 void *resu = malloc(sz);
120 if (!resu && max * mul > 0)
121 fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
122 vul = (Ulong*) resu;
123 }
124 else
125 vul = NULL;
126 if (max * mr > 0)
127 {
128 sz = max * mr * sizeof(realType);
129 void *resr = malloc(sz);
130 if (!resr && max * ml > 0)
131 fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
132 vr = (realType*) resr;
133 }
134 else
135 vr = NULL;
136
137 //Begin with write access disabled
138 this->disableWriteAccess();
139
140 //Set read variables
141 vi_rd = vi;
142 vl_rd = vl;
143 vul_rd = vul;
144 vr_rd = vr;
145 }
146
147 // Resizes a tuplelist to the given uint max
resize(uint maxIn)148 ErrorCode TupleList::resize(uint maxIn)
149 {
150 this->max = maxIn;
151 size_t sz;
152
153 if (vi || (max * mi > 0))
154 {
155 sz = max * mi * sizeof(sint);
156 void *resi = realloc(vi, sz);
157 if (!resi && max * mi > 0)
158 {
159 fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
160 return moab::MB_MEMORY_ALLOCATION_FAILED;
161 }
162 vi = (sint*) resi;
163 }
164 if (vl || (max * ml > 0))
165 {
166 sz = max * ml * sizeof(slong);
167 void *resl = realloc(vl, sz);
168 if (!resl && max * ml > 0)
169 {
170 fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
171 return moab::MB_MEMORY_ALLOCATION_FAILED;
172 }
173 vl = (slong*) resl;
174 }
175 if (vul || (max * mul > 0))
176 {
177 sz = max * mul * sizeof(Ulong);
178 void *resu = realloc(vul, sz);
179 if (!resu && max * mul > 0)
180 {
181 fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
182 return moab::MB_MEMORY_ALLOCATION_FAILED;
183 }
184 vul = (Ulong*) resu;
185 }
186 if (vr || (max * mr > 0))
187 {
188 sz = max * mr * sizeof(realType);
189 void *resr = realloc(vr, sz);
190 if (!resr && max * mr > 0)
191 {
192 fail("%s: allocation of %d bytes failed\n", __FILE__, (int) sz);
193 return moab::MB_MEMORY_ALLOCATION_FAILED;
194 }
195 vr = (realType*) resr;
196 }
197
198 //Set read variables
199 vi_rd = vi;
200 vl_rd = vl;
201 vul_rd = vul;
202 vr_rd = vr;
203
204 //Set the write variables if necessary
205 if (writeEnabled)
206 {
207 vi_wr = vi;
208 vl_wr = vl;
209 vul_wr = vul;
210 vr_wr = vr;
211 }
212 return moab::MB_SUCCESS;
213 }
214
215 // Frees the memory used by the tuplelist
reset()216 void TupleList::reset()
217 {
218 //free up the pointers
219 free(vi);
220 free(vl);
221 free(vul);
222 free(vr);
223 //Set them all to null
224 vr = NULL;
225 vi = NULL;
226 vul = NULL;
227 vl = NULL;
228 //Set the read and write pointers to null
229 disableWriteAccess();
230 vi_rd = NULL;
231 vl_rd = NULL;
232 vul_rd = NULL;
233 vr_rd = NULL;
234 }
235
236 // Increments n; if n>max, increase the size of the tuplelist
reserve()237 void TupleList::reserve()
238 {
239 n++;
240 while (n > max)
241 resize((max ? max + max / 2 + 1 : 2));
242 last_sorted = -1;
243 }
244
245 // Given the value and the position in the field, finds the index of the tuple
246 // to which the value belongs
find(unsigned int key_num,sint value)247 int TupleList::find(unsigned int key_num, sint value)
248 {
249 // we are passing an int, no issue, leave it at long
250 long uvalue = (long) value;
251 if (!(key_num > mi))
252 {
253 // Binary search: only if the tuple_list is sorted
254 if (last_sorted == (int) key_num)
255 {
256 int lb = 0, ub = n, index; // lb=lower bound, ub=upper bound, index=mid
257 for (; lb <= ub;)
258 {
259 index = (lb + ub) / 2;
260 if (vi[index * mi + key_num] == uvalue)
261 return index;
262 else if (vi[index * mi + key_num] > uvalue)
263 ub = index - 1;
264 else if (vi[index * mi + key_num] < uvalue)
265 lb = index + 1;
266 }
267 }
268 else
269 {
270 // Sequential search: if tuple_list is not sorted
271 for (uint index = 0; index < n; index++)
272 {
273 if (vi[index * mi + key_num] == uvalue)
274 return index;
275 }
276 }
277 }
278 return -1; // If the value wasn't present or an invalid key was given
279 }
280
find(unsigned int key_num,slong value)281 int TupleList::find(unsigned int key_num, slong value)
282 {
283 long uvalue = (long) value;
284 if (!(key_num > ml))
285 {
286 if (last_sorted - mi == key_num)
287 {
288 int lb = 0, ub = n, index; // lb=lower bound, ub=upper bound, index=mid
289 for (; lb <= ub;)
290 {
291 index = (lb + ub) / 2;
292 if (vl[index * ml + key_num] == uvalue)
293 return index;
294 else if (vl[index * ml + key_num] > uvalue)
295 ub = index - 1;
296 else if (vl[index * ml + key_num] < uvalue)
297 lb = index + 1;
298 }
299 }
300 else
301 {
302 // Sequential search: if tuple_list is not sorted
303 for (uint index = 0; index < n; index++)
304 {
305 if (vl[index * ml + key_num] == uvalue)
306 return index;
307 }
308 }
309 }
310 return -1; // If the value wasn't present or an invalid key was given
311 }
312
find(unsigned int key_num,Ulong value)313 int TupleList::find(unsigned int key_num, Ulong value)
314 {
315 if (!(key_num > mul))
316 {
317 if (last_sorted - mi - ml == key_num)
318 {
319 int lb = 0, ub = n - 1, index; // lb=lower bound, ub=upper bound, index=mid
320 for (; lb <= ub;)
321 {
322 index = (lb + ub) / 2;
323 if (vul[index * mul + key_num] == value)
324 return index;
325 else if (vul[index * mul + key_num] > value)
326 ub = index - 1;
327 else if (vul[index * mul + key_num] < value)
328 lb = index + 1;
329 }
330 }
331 else
332 {
333 // Sequential search: if tuple_list is not sorted
334 for (uint index = 0; index < n; index++)
335 {
336 if (vul[index * mul + key_num] == value)
337 return index;
338 }
339 }
340 }
341 return -1; // If the value wasn't present or an invalid key was given
342 }
343
find(unsigned int key_num,realType value)344 int TupleList::find(unsigned int key_num, realType value)
345 {
346 if (!(key_num > mr))
347 {
348 // Sequential search: TupleList cannot be sorted by reals
349 for (uint index = 0; index < n; index++)
350 {
351 if (vr[index * mr + key_num] == value)
352 return index;
353 }
354 }
355 return -1; // If the value wasn't present or an invalid key was given
356 }
357
get_sint(unsigned int index,unsigned int m)358 sint TupleList::get_sint(unsigned int index, unsigned int m)
359 {
360 if (mi > m && n > index)
361 return vi[index * mi + m];
362 return 0;
363 }
364
get_int(unsigned int index,unsigned int m)365 slong TupleList::get_int(unsigned int index, unsigned int m)
366 {
367 if (ml > m && n > index)
368 return vl[index * ml + m];
369 return 0;
370 }
371
get_ulong(unsigned int index,unsigned int m)372 Ulong TupleList::get_ulong(unsigned int index, unsigned int m)
373 {
374 if (mul > m && n > index)
375 return vul[index * mul + m];
376 return 0;
377 }
378
get_double(unsigned int index,unsigned int m)379 realType TupleList::get_double(unsigned int index, unsigned int m)
380 {
381 if (mr > m && n > index)
382 return vr[index * mr + m];
383 return 0;
384 }
385
get(unsigned int index,const sint * & sp,const slong * & ip,const Ulong * & lp,const realType * & dp)386 ErrorCode TupleList::get(unsigned int index, const sint *&sp, const slong *&ip,
387 const Ulong *&lp, const realType *&dp)
388 {
389 if (index <= n)
390 {
391 if (mi)
392 *&sp = &vi[index * mi];
393 else
394 *&sp = NULL;
395 if (ml)
396 *&ip = &vl[index * ml];
397 else
398 *&ip = NULL;
399 if (mul)
400 *&lp = &vul[index * mul];
401 else
402 *&lp = NULL;
403 if (mr)
404 *&dp = &vr[index * mr];
405 else
406 *&dp = NULL;
407
408 return MB_SUCCESS;
409 }
410 return MB_FAILURE;
411 }
412
push_back(sint * sp,slong * ip,Ulong * lp,realType * dp)413 unsigned int TupleList::push_back(sint *sp, slong *ip, Ulong *lp, realType *dp)
414 {
415 reserve();
416 if (mi)
417 memcpy(&vi[mi * (n - 1)], sp, mi * sizeof(sint));
418 if (ml)
419 memcpy(&vl[ml * (n - 1)], ip, ml * sizeof(long));
420 if (mul)
421 memcpy(&vul[mul * (n - 1)], lp, mul * sizeof(Ulong));
422 if (mr)
423 memcpy(&vr[mr * (n - 1)], dp, mr * sizeof(realType));
424
425 last_sorted = -1;
426 return n - 1;
427 }
428
enableWriteAccess()429 void TupleList::enableWriteAccess()
430 {
431 writeEnabled = true;
432 last_sorted = -1;
433 vi_wr = vi;
434 vl_wr = vl;
435 vul_wr = vul;
436 vr_wr = vr;
437 }
438
disableWriteAccess()439 void TupleList::disableWriteAccess()
440 {
441 writeEnabled = false;
442 vi_wr = NULL;
443 vl_wr = NULL;
444 vul_wr = NULL;
445 vr_wr = NULL;
446 }
447
getTupleSize(uint & mi_out,uint & ml_out,uint & mul_out,uint & mr_out) const448 void TupleList::getTupleSize(uint &mi_out, uint &ml_out, uint &mul_out,
449 uint &mr_out) const
450 {
451 mi_out = mi;
452 ml_out = ml;
453 mul_out = mul;
454 mr_out = mr;
455 }
456
inc_n()457 uint TupleList::inc_n()
458 {
459 //Check for direct write access
460 if (!writeEnabled)
461 {
462 enableWriteAccess();
463 }
464 n++;
465 return n;
466 }
467
set_n(uint n_in)468 void TupleList::set_n(uint n_in)
469 {
470 //Check for direct write access;
471 if (!writeEnabled)
472 {
473 enableWriteAccess();
474 }
475 n = n_in;
476 }
477
print(const char * name) const478 void TupleList::print(const char *name) const
479 {
480 std::cout << "Printing Tuple " << name << "===================" << std::endl;
481 unsigned long i = 0, l = 0, ul = 0, r = 0;
482 for (uint k = 0; k < n; k++)
483 {
484 for (uint j = 0; j < mi; j++)
485 {
486 std::cout << vi[i++] << " | ";
487 }
488 for (uint j = 0; j < ml; j++)
489 {
490 std::cout << vl[l++] << " | ";
491 }
492 for (uint j = 0; j < mul; j++)
493 {
494 std::cout << vul[ul++] << " | ";
495 }
496 for (uint j = 0; j < mr; j++)
497 {
498 std::cout << vr[r++] << " | ";
499 }
500 std::cout << std::endl;
501 }
502 std::cout << "=======================================" << std::endl
503 << std::endl;
504 }
print_to_file(const char * filename) const505 void TupleList::print_to_file(const char * filename) const
506 {
507 std::ofstream ofs;
508 ofs.open (filename, std::ofstream::out | std::ofstream::app);
509
510 ofs << "Printing Tuple " << filename << "===================" << std::endl;
511 unsigned long i = 0, l = 0, ul = 0, r = 0;
512 for (uint k = 0; k < n; k++)
513 {
514 for (uint j = 0; j < mi; j++)
515 {
516 ofs << vi[i++] << " | ";
517 }
518 for (uint j = 0; j < ml; j++)
519 {
520 ofs << vl[l++] << " | ";
521 }
522 for (uint j = 0; j < mul; j++)
523 {
524 ofs << vul[ul++] << " | ";
525 }
526 for (uint j = 0; j < mr; j++)
527 {
528 ofs << vr[r++] << " | ";
529 }
530 ofs << std::endl;
531 }
532 ofs << "=======================================" << std::endl
533 << std::endl;
534
535 ofs.close();
536 }
permute(uint * perm,void * work)537 void TupleList::permute(uint *perm, void *work)
538 {
539 const unsigned int_size = mi * sizeof(sint), long_size = ml * sizeof(slong),
540 Ulong_size = mul * sizeof(Ulong), real_size = mr * sizeof(realType);
541 if (mi)
542 {
543 uint *p = perm, *pe = p + n;
544 char *sorted = (char *) work;
545 while (p != pe)
546 memcpy((void *) sorted, &vi[mi * (*p++)], int_size), sorted += int_size;
547 memcpy(vi, work, int_size * n);
548 }
549 if (ml)
550 {
551 uint *p = perm, *pe = p + n;
552 char *sorted = (char *) work;
553 while (p != pe)
554 memcpy((void *) sorted, &vl[ml * (*p++)], long_size), sorted += long_size;
555 memcpy(vl, work, long_size * n);
556 }
557 if (mul)
558 {
559 uint *p = perm, *pe = p + n;
560 char *sorted = (char *) work;
561 while (p != pe)
562 memcpy((void *) sorted, &vul[mul * (*p++)], Ulong_size), sorted +=
563 Ulong_size;
564 memcpy(vul, work, Ulong_size * n);
565 }
566 if (mr)
567 {
568 uint *p = perm, *pe = p + n;
569 char *sorted = (char *) work;
570 while (p != pe)
571 memcpy((void *) sorted, &vr[mr * (*p++)], real_size), sorted += real_size;
572 memcpy(vr, work, real_size * n);
573 }
574 }
575
576 # define umax_2(a, b) (((a)>(b)) ? (a):(b))
577
sort(uint key,TupleList::buffer * buf)578 ErrorCode TupleList::sort(uint key, TupleList::buffer *buf)
579 {
580 const unsigned int_size = mi * sizeof(sint);
581 const unsigned long_size = ml * sizeof(slong);
582 const unsigned Ulong_size = mul * sizeof(Ulong);
583 const unsigned real_size = mr * sizeof(realType);
584 const unsigned width = umax_2(umax_2(int_size,long_size),
585 umax_2(Ulong_size,real_size));
586 unsigned data_size =
587 key >= mi ? sizeof(SortData<long> ) : sizeof(SortData<uint> );
588 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32)
589 if (key>=mi+ml)
590 data_size = sizeof(SortData<Ulong> );
591 #endif
592
593 uint work_min = n * umax_2(2*data_size,sizeof(sint)+width);
594 uint *work;
595 buf->buffer_reserve(work_min);
596 work = (uint *) buf->ptr;
597 if (key < mi)
598 index_sort((uint *) &vi[key], n, mi, work, (SortData<uint>*) work);
599 else if (key < mi + ml)
600 index_sort((long*) &vl[key - mi], n, ml, work, (SortData<long>*) work);
601 else if (key < mi + ml + mul)
602 index_sort((Ulong*) &vul[key - mi - ml], n, mul, work,
603 (SortData<Ulong>*) work);
604 else
605 return MB_NOT_IMPLEMENTED;
606
607 permute(work, work + n);
608
609 if (!writeEnabled)
610 last_sorted = key;
611 return MB_SUCCESS;
612 }
613
614 #undef umax_2
615
616 #define DIGIT_BITS 8
617 #define DIGIT_VALUES (1<<DIGIT_BITS)
618 #define DIGIT_MASK ((Value)(DIGIT_VALUES-1))
619 #define CEILDIV(a,b) (((a)+(b)-1)/(b))
620 #define DIGITS CEILDIV(CHAR_BIT*sizeof(Value),DIGIT_BITS)
621 #define VALUE_BITS (DIGIT_BITS*DIGITS)
622 #define COUNT_SIZE (DIGITS*DIGIT_VALUES)
623
624 /* used to unroll a tiny loop: */
625 #define COUNT_DIGIT_01(n,i) \
626 if(n>i) count[i][val&DIGIT_MASK]++, val>>=DIGIT_BITS
627 #define COUNT_DIGIT_02(n,i) COUNT_DIGIT_01(n,i); COUNT_DIGIT_01(n,i+ 1)
628 #define COUNT_DIGIT_04(n,i) COUNT_DIGIT_02(n,i); COUNT_DIGIT_02(n,i+ 2)
629 #define COUNT_DIGIT_08(n,i) COUNT_DIGIT_04(n,i); COUNT_DIGIT_04(n,i+ 4)
630 #define COUNT_DIGIT_16(n,i) COUNT_DIGIT_08(n,i); COUNT_DIGIT_08(n,i+ 8)
631 #define COUNT_DIGIT_32(n,i) COUNT_DIGIT_16(n,i); COUNT_DIGIT_16(n,i+16)
632 #define COUNT_DIGIT_64(n,i) COUNT_DIGIT_32(n,i); COUNT_DIGIT_32(n,i+32)
633
634 template<class Value>
radix_count(const Value * A,const Value * end,Index stride,Index count[DIGITS][DIGIT_VALUES])635 Value TupleList::radix_count(const Value *A, const Value *end, Index stride,
636 Index count[DIGITS][DIGIT_VALUES])
637 {
638 Value bitorkey = 0;
639 memset(count, 0, COUNT_SIZE * sizeof(Index));
640 do
641 {
642 Value val = *A;
643 bitorkey |= val;
644 COUNT_DIGIT_64(DIGITS, 0);
645 // above macro expands to:
646 //if(DIGITS> 0) count[ 0][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
647 //if(DIGITS> 1) count[ 1][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
648 // ...
649 //if(DIGITS>63) count[63][val&DIGIT_MASK]++, val>>=DIGIT_BITS;
650
651 } while (A += stride, A != end);
652 return bitorkey;
653 }
654
655 #undef COUNT_DIGIT_01
656 #undef COUNT_DIGIT_02
657 #undef COUNT_DIGIT_04
658 #undef COUNT_DIGIT_08
659 #undef COUNT_DIGIT_16
660 #undef COUNT_DIGIT_32
661 #undef COUNT_DIGIT_64
662
radix_offsets(Index * c)663 void TupleList::radix_offsets(Index *c)
664 {
665 Index sum = 0, t, *ce = c + DIGIT_VALUES;
666 do
667 t = *c, *c++ = sum, sum += t;
668 while (c != ce);
669 }
670
671 template<class Value>
radix_zeros(Value bitorkey,Index count[DIGITS][DIGIT_VALUES],unsigned * shift,Index ** offsets)672 unsigned TupleList::radix_zeros(Value bitorkey,
673 Index count[DIGITS][DIGIT_VALUES], unsigned *shift, Index **offsets)
674 {
675 unsigned digits = 0, sh = 0;
676 Index *c = &count[0][0];
677 do
678 {
679 if (bitorkey & DIGIT_MASK)
680 *shift++ = sh, *offsets++ = c, ++digits, radix_offsets(c);
681 } while (bitorkey >>= DIGIT_BITS,sh += DIGIT_BITS,c += DIGIT_VALUES,sh
682 != VALUE_BITS);
683 return digits;
684 }
685
686 template<class Value>
radix_index_pass_b(const Value * A,Index n,Index stride,unsigned sh,Index * off,SortData<Value> * out)687 void TupleList::radix_index_pass_b(const Value *A, Index n, Index stride,
688 unsigned sh, Index *off, SortData<Value> *out)
689 {
690 Index i = 0;
691 do
692 {
693 Value v = *A;
694 SortData<Value> *d = &out[off[(v >> sh) & DIGIT_MASK]++];
695 d->v = v, d->i = i++;
696 } while (A += stride, i != n);
697 }
698
699 template<class Value>
radix_index_pass_m(const SortData<Value> * src,const SortData<Value> * end,unsigned sh,Index * off,SortData<Value> * out)700 void TupleList::radix_index_pass_m(const SortData<Value> *src,
701 const SortData<Value> *end, unsigned sh, Index *off, SortData<Value> *out)
702 {
703 do
704 {
705 SortData<Value> *d = &out[off[(src->v >> sh) & DIGIT_MASK]++];
706 d->v = src->v, d->i = src->i;
707 } while (++src != end);
708 }
709
710 template<class Value>
radix_index_pass_e(const SortData<Value> * src,const SortData<Value> * end,unsigned sh,Index * off,Index * out)711 void TupleList::radix_index_pass_e(const SortData<Value> *src,
712 const SortData<Value> *end, unsigned sh, Index *off, Index *out)
713 {
714 do
715 out[off[(src->v >> sh) & DIGIT_MASK]++] = src->i;
716 while (++src != end);
717 }
718
719 template<class Value>
radix_index_pass_be(const Value * A,Index n,Index stride,unsigned sh,Index * off,Index * out)720 void TupleList::radix_index_pass_be(const Value *A, Index n, Index stride,
721 unsigned sh, Index *off, Index *out)
722 {
723 Index i = 0;
724 do
725 out[off[(*A >> sh) & DIGIT_MASK]++] = i++;
726 while (A += stride, i != n);
727 }
728
729 template<class Value>
radix_index_sort(const Value * A,Index n,Index stride,Index * idx,SortData<Value> * work)730 void TupleList::radix_index_sort(const Value *A, Index n, Index stride,
731 Index *idx, SortData<Value> *work)
732 {
733 Index count[DIGITS][DIGIT_VALUES];
734 Value bitorkey = radix_count(A, A + n * stride, stride, count);
735 unsigned shift[DIGITS];
736 Index *offsets[DIGITS];
737 unsigned digits = radix_zeros(bitorkey, count, shift, offsets);
738 if (digits == 0)
739 {
740 Index i = 0;
741 do
742 *idx++ = i++;
743 while (i != n);
744 }
745 else if (digits == 1)
746 {
747 radix_index_pass_be(A, n, stride, shift[0], offsets[0], idx);
748 }
749 else
750 {
751 SortData<Value> *src, *dst;
752 unsigned d;
753 if ((digits & 1) == 0)
754 dst = work, src = dst + n;
755 else
756 src = work, dst = src + n;
757 radix_index_pass_b(A, n, stride, shift[0], offsets[0], src);
758 for (d = 1; d != digits - 1; ++d)
759 {
760 SortData<Value> *t;
761 radix_index_pass_m(src, src + n, shift[d], offsets[d], dst);
762 t = src, src = dst, dst = t;
763 }
764 radix_index_pass_e(src, src + n, shift[d], offsets[d], idx);
765 }
766 }
767
768 template<class Value>
merge_index_sort(const Value * A,const Index An,Index stride,Index * idx,SortData<Value> * work)769 void TupleList::merge_index_sort(const Value *A, const Index An, Index stride,
770 Index *idx, SortData<Value> *work)
771 {
772 SortData<Value> * const buf[2] = { work + An, work };
773 Index n = An, base = -n, odd = 0, c = 0, b = 1;
774 Index i = 0;
775 for (;;)
776 {
777 SortData<Value> *p;
778 if ((c & 1) == 0)
779 {
780 base += n, n += (odd & 1), c |= 1, b ^= 1;
781 while (n > 3)
782 odd <<= 1, odd |= (n & 1), n >>= 1, c <<= 1, b ^= 1;
783 }
784 else
785 base -= n - (odd & 1), n <<= 1, n -= (odd & 1), odd >>= 1, c >>= 1;
786 if (c == 0)
787 break;
788 p = buf[b] + base;
789 if (n == 2)
790 {
791 Value v[2];
792 v[0] = *A, A += stride, v[1] = *A, A += stride;
793 if (v[1] < v[0])
794 p[0].v = v[1], p[0].i = i + 1, p[1].v = v[0], p[1].i = i;
795 else
796 p[0].v = v[0], p[0].i = i, p[1].v = v[1], p[1].i = i + 1;
797 i += 2;
798 }
799 else if (n == 3)
800 {
801 Value v[3];
802 v[0] = *A, A += stride, v[1] = *A, A += stride, v[2] = *A, A += stride;
803 if (v[1] < v[0])
804 {
805 if (v[2] < v[1])
806 p[0].v = v[2], p[1].v = v[1], p[2].v = v[0], p[0].i = i + 2, p[1].i =
807 i + 1, p[2].i = i;
808 else
809 {
810 if (v[2] < v[0])
811 p[0].v = v[1], p[1].v = v[2], p[2].v = v[0], p[0].i = i + 1, p[1].i =
812 i + 2, p[2].i = i;
813 else
814 p[0].v = v[1], p[1].v = v[0], p[2].v = v[2], p[0].i = i + 1, p[1].i =
815 i, p[2].i = i + 2;
816 }
817 }
818 else
819 {
820 if (v[2] < v[0])
821 p[0].v = v[2], p[1].v = v[0], p[2].v = v[1], p[0].i = i + 2, p[1].i =
822 i, p[2].i = i + 1;
823 else
824 {
825 if (v[2] < v[1])
826 p[0].v = v[0], p[1].v = v[2], p[2].v = v[1], p[0].i = i, p[1].i = i
827 + 2, p[2].i = i + 1;
828 else
829 p[0].v = v[0], p[1].v = v[1], p[2].v = v[2], p[0].i = i, p[1].i = i
830 + 1, p[2].i = i + 2;
831 }
832 }
833 i += 3;
834 }
835 else
836 {
837 const Index na = n >> 1, nb = (n + 1) >> 1;
838 const SortData<Value> *ap = buf[b ^ 1] + base, *ae = ap + na;
839 SortData<Value> *bp = p + na, *be = bp + nb;
840 for (;;)
841 {
842 if (bp->v < ap->v)
843 {
844 *p++ = *bp++;
845 if (bp != be)
846 continue;
847 do
848 *p++ = *ap++;
849 while (ap != ae);
850 break;
851 }
852 else
853 {
854 *p++ = *ap++;
855 if (ap == ae)
856 break;
857 }
858 }
859 }
860 }
861 {
862 const SortData<Value> *p = buf[0], *pe = p + An;
863 do
864 *idx++ = (p++)->i;
865 while (p != pe);
866 }
867 }
868
869 template<class Value>
index_sort(const Value * A,Index n,Index stride,Index * idx,SortData<Value> * work)870 void TupleList::index_sort(const Value *A, Index n, Index stride, Index *idx,
871 SortData<Value> *work)
872 {
873 if (n < DIGIT_VALUES)
874 {
875 if (n == 0)
876 return;
877 if (n == 1)
878 *idx = 0;
879 else
880 merge_index_sort(A, n, stride, idx, work);
881 }
882 else
883 radix_index_sort(A, n, stride, idx, work);
884 }
885
886 #undef DIGIT_BITS
887 #undef DIGIT_VALUES
888 #undef DIGIT_MASK
889 #undef CEILDIV
890 #undef DIGITS
891 #undef VALUE_BITS
892 #undef COUNT_SIZE
893 #undef sort_data_long
894
895 } //namespace
896
897