1 /* BwtSort.c -- BWT block sorting
2 2021-04-01 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 #include "BwtSort.h"
7 #include "Sort.h"
8 
9 /* #define BLOCK_SORT_USE_HEAP_SORT */
10 
11 #define NO_INLINE MY_FAST_CALL
12 
13 /* Don't change it !!! */
14 #define kNumHashBytes 2
15 #define kNumHashValues (1 << (kNumHashBytes * 8))
16 
17 /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */
18 #define kNumRefBitsMax 12
19 
20 #define BS_TEMP_SIZE kNumHashValues
21 
22 #ifdef BLOCK_SORT_EXTERNAL_FLAGS
23 
24 /* 32 Flags in UInt32 word */
25 #define kNumFlagsBits 5
26 #define kNumFlagsInWord (1 << kNumFlagsBits)
27 #define kFlagsMask (kNumFlagsInWord - 1)
28 #define kAllFlags 0xFFFFFFFF
29 
30 #else
31 
32 #define kNumBitsMax 20
33 #define kIndexMask ((1 << kNumBitsMax) - 1)
34 #define kNumExtraBits (32 - kNumBitsMax)
35 #define kNumExtra0Bits (kNumExtraBits - 2)
36 #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
37 
38 #define SetFinishedGroupSize(p, size) \
39   {  *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
40     if ((size) > (1 << kNumExtra0Bits)) { \
41     *(p) |= 0x40000000;  *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
42 
SetGroupSize(UInt32 * p,UInt32 size)43 static void SetGroupSize(UInt32 *p, UInt32 size)
44 {
45   if (--size == 0)
46     return;
47   *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
48   if (size >= (1 << kNumExtra0Bits))
49   {
50     *p |= 0x40000000;
51     p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
52   }
53 }
54 
55 #endif
56 
57 /*
58 SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks
59   "range" is not real range. It's only for optimization.
60 returns: 1 - if there are groups, 0 - no more groups
61 */
62 
SortGroup(UInt32 BlockSize,UInt32 NumSortedBytes,UInt32 groupOffset,UInt32 groupSize,int NumRefBits,UInt32 * Indices,UInt32 left,UInt32 range)63 static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
64   #ifndef BLOCK_SORT_USE_HEAP_SORT
65   , UInt32 left, UInt32 range
66   #endif
67   )
68 {
69   UInt32 *ind2 = Indices + groupOffset;
70   UInt32 *Groups;
71   if (groupSize <= 1)
72   {
73     /*
74     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
75     SetFinishedGroupSize(ind2, 1);
76     #endif
77     */
78     return 0;
79   }
80   Groups = Indices + BlockSize + BS_TEMP_SIZE;
81   if (groupSize <= ((UInt32)1 << NumRefBits)
82       #ifndef BLOCK_SORT_USE_HEAP_SORT
83       && groupSize <= range
84       #endif
85       )
86   {
87     UInt32 *temp = Indices + BlockSize;
88     UInt32 j;
89     UInt32 mask, thereAreGroups, group, cg;
90     {
91       UInt32 gPrev;
92       UInt32 gRes = 0;
93       {
94         UInt32 sp = ind2[0] + NumSortedBytes;
95         if (sp >= BlockSize) sp -= BlockSize;
96         gPrev = Groups[sp];
97         temp[0] = (gPrev << NumRefBits);
98       }
99 
100       for (j = 1; j < groupSize; j++)
101       {
102         UInt32 sp = ind2[j] + NumSortedBytes;
103         UInt32 g;
104         if (sp >= BlockSize) sp -= BlockSize;
105         g = Groups[sp];
106         temp[j] = (g << NumRefBits) | j;
107         gRes |= (gPrev ^ g);
108       }
109       if (gRes == 0)
110       {
111         #ifndef BLOCK_SORT_EXTERNAL_FLAGS
112         SetGroupSize(ind2, groupSize);
113         #endif
114         return 1;
115       }
116     }
117 
118     HeapSort(temp, groupSize);
119     mask = (((UInt32)1 << NumRefBits) - 1);
120     thereAreGroups = 0;
121 
122     group = groupOffset;
123     cg = (temp[0] >> NumRefBits);
124     temp[0] = ind2[temp[0] & mask];
125 
126     {
127     #ifdef BLOCK_SORT_EXTERNAL_FLAGS
128     UInt32 *Flags = Groups + BlockSize;
129     #else
130     UInt32 prevGroupStart = 0;
131     #endif
132 
133     for (j = 1; j < groupSize; j++)
134     {
135       UInt32 val = temp[j];
136       UInt32 cgCur = (val >> NumRefBits);
137 
138       if (cgCur != cg)
139       {
140         cg = cgCur;
141         group = groupOffset + j;
142 
143         #ifdef BLOCK_SORT_EXTERNAL_FLAGS
144         {
145         UInt32 t = group - 1;
146         Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
147         }
148         #else
149         SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
150         prevGroupStart = j;
151         #endif
152       }
153       else
154         thereAreGroups = 1;
155       {
156       UInt32 ind = ind2[val & mask];
157       temp[j] = ind;
158       Groups[ind] = group;
159       }
160     }
161 
162     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
163     SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
164     #endif
165     }
166 
167     for (j = 0; j < groupSize; j++)
168       ind2[j] = temp[j];
169     return thereAreGroups;
170   }
171 
172   /* Check that all strings are in one group (cannot sort) */
173   {
174     UInt32 group, j;
175     UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
176     group = Groups[sp];
177     for (j = 1; j < groupSize; j++)
178     {
179       sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
180       if (Groups[sp] != group)
181         break;
182     }
183     if (j == groupSize)
184     {
185       #ifndef BLOCK_SORT_EXTERNAL_FLAGS
186       SetGroupSize(ind2, groupSize);
187       #endif
188       return 1;
189     }
190   }
191 
192   #ifndef BLOCK_SORT_USE_HEAP_SORT
193   {
194   /* ---------- Range Sort ---------- */
195   UInt32 i;
196   UInt32 mid;
197   for (;;)
198   {
199     UInt32 j;
200     if (range <= 1)
201     {
202       #ifndef BLOCK_SORT_EXTERNAL_FLAGS
203       SetGroupSize(ind2, groupSize);
204       #endif
205       return 1;
206     }
207     mid = left + ((range + 1) >> 1);
208     j = groupSize;
209     i = 0;
210     do
211     {
212       UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
213       if (Groups[sp] >= mid)
214       {
215         for (j--; j > i; j--)
216         {
217           sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
218           if (Groups[sp] < mid)
219           {
220             UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp;
221             break;
222           }
223         }
224         if (i >= j)
225           break;
226       }
227     }
228     while (++i < j);
229     if (i == 0)
230     {
231       range = range - (mid - left);
232       left = mid;
233     }
234     else if (i == groupSize)
235       range = (mid - left);
236     else
237       break;
238   }
239 
240   #ifdef BLOCK_SORT_EXTERNAL_FLAGS
241   {
242     UInt32 t = (groupOffset + i - 1);
243     UInt32 *Flags = Groups + BlockSize;
244     Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
245   }
246   #endif
247 
248   {
249     UInt32 j;
250     for (j = i; j < groupSize; j++)
251       Groups[ind2[j]] = groupOffset + i;
252   }
253 
254   {
255   UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
256   return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
257   }
258 
259   }
260 
261   #else
262 
263   /* ---------- Heap Sort ---------- */
264 
265   {
266     UInt32 j;
267     for (j = 0; j < groupSize; j++)
268     {
269       UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
270       ind2[j] = sp;
271     }
272 
273     HeapSortRef(ind2, Groups, groupSize);
274 
275     /* Write Flags */
276     {
277     UInt32 sp = ind2[0];
278     UInt32 group = Groups[sp];
279 
280     #ifdef BLOCK_SORT_EXTERNAL_FLAGS
281     UInt32 *Flags = Groups + BlockSize;
282     #else
283     UInt32 prevGroupStart = 0;
284     #endif
285 
286     for (j = 1; j < groupSize; j++)
287     {
288       sp = ind2[j];
289       if (Groups[sp] != group)
290       {
291         group = Groups[sp];
292         #ifdef BLOCK_SORT_EXTERNAL_FLAGS
293         {
294         UInt32 t = groupOffset + j - 1;
295         Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
296         }
297         #else
298         SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
299         prevGroupStart = j;
300         #endif
301       }
302     }
303 
304     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
305     SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
306     #endif
307     }
308     {
309     /* Write new Groups values and Check that there are groups */
310     UInt32 thereAreGroups = 0;
311     for (j = 0; j < groupSize; j++)
312     {
313       UInt32 group = groupOffset + j;
314       #ifndef BLOCK_SORT_EXTERNAL_FLAGS
315       UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
316       if ((ind2[j] & 0x40000000) != 0)
317         subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits);
318       subGroupSize++;
319       for (;;)
320       {
321         UInt32 original = ind2[j];
322         UInt32 sp = original & kIndexMask;
323         if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
324         ind2[j] = sp | (original & ~kIndexMask);
325         Groups[sp] = group;
326         if (--subGroupSize == 0)
327           break;
328         j++;
329         thereAreGroups = 1;
330       }
331       #else
332       UInt32 *Flags = Groups + BlockSize;
333       for (;;)
334       {
335         UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
336         ind2[j] = sp;
337         Groups[sp] = group;
338         if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
339           break;
340         j++;
341         thereAreGroups = 1;
342       }
343       #endif
344     }
345     return thereAreGroups;
346     }
347   }
348   #endif
349 }
350 
351 /* conditions: blockSize > 0 */
BlockSort(UInt32 * Indices,const Byte * data,UInt32 blockSize)352 UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
353 {
354   UInt32 *counters = Indices + blockSize;
355   UInt32 i;
356   UInt32 *Groups;
357   #ifdef BLOCK_SORT_EXTERNAL_FLAGS
358   UInt32 *Flags;
359   #endif
360 
361   /* Radix-Sort for 2 bytes */
362   for (i = 0; i < kNumHashValues; i++)
363     counters[i] = 0;
364   for (i = 0; i < blockSize - 1; i++)
365     counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++;
366   counters[((UInt32)data[i] << 8) | data[0]]++;
367 
368   Groups = counters + BS_TEMP_SIZE;
369   #ifdef BLOCK_SORT_EXTERNAL_FLAGS
370   Flags = Groups + blockSize;
371     {
372       UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
373       for (i = 0; i < numWords; i++)
374         Flags[i] = kAllFlags;
375     }
376   #endif
377 
378   {
379     UInt32 sum = 0;
380     for (i = 0; i < kNumHashValues; i++)
381     {
382       UInt32 groupSize = counters[i];
383       if (groupSize > 0)
384       {
385         #ifdef BLOCK_SORT_EXTERNAL_FLAGS
386         UInt32 t = sum + groupSize - 1;
387         Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
388         #endif
389         sum += groupSize;
390       }
391       counters[i] = sum - groupSize;
392     }
393 
394     for (i = 0; i < blockSize - 1; i++)
395       Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]];
396     Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
397 
398     for (i = 0; i < blockSize - 1; i++)
399       Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i;
400     Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
401 
402     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
403     {
404     UInt32 prev = 0;
405     for (i = 0; i < kNumHashValues; i++)
406     {
407       UInt32 prevGroupSize = counters[i] - prev;
408       if (prevGroupSize == 0)
409         continue;
410       SetGroupSize(Indices + prev, prevGroupSize);
411       prev = counters[i];
412     }
413     }
414     #endif
415   }
416 
417   {
418   int NumRefBits;
419   UInt32 NumSortedBytes;
420   for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
421   NumRefBits = 32 - NumRefBits;
422   if (NumRefBits > kNumRefBitsMax)
423     NumRefBits = kNumRefBitsMax;
424 
425   for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
426   {
427     #ifndef BLOCK_SORT_EXTERNAL_FLAGS
428     UInt32 finishedGroupSize = 0;
429     #endif
430     UInt32 newLimit = 0;
431     for (i = 0; i < blockSize;)
432     {
433       UInt32 groupSize;
434       #ifdef BLOCK_SORT_EXTERNAL_FLAGS
435 
436       if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
437       {
438         i++;
439         continue;
440       }
441       for (groupSize = 1;
442         (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
443         groupSize++);
444 
445       groupSize++;
446 
447       #else
448 
449       groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
450       {
451       BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
452       if ((Indices[i] & 0x40000000) != 0)
453       {
454         groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
455         Indices[(size_t)i + 1] &= kIndexMask;
456       }
457       Indices[i] &= kIndexMask;
458       groupSize++;
459       if (finishedGroup || groupSize == 1)
460       {
461         Indices[i - finishedGroupSize] &= kIndexMask;
462         if (finishedGroupSize > 1)
463           Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
464         {
465         UInt32 newGroupSize = groupSize + finishedGroupSize;
466         SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize);
467         finishedGroupSize = newGroupSize;
468         }
469         i += groupSize;
470         continue;
471       }
472       finishedGroupSize = 0;
473       }
474 
475       #endif
476 
477       if (NumSortedBytes >= blockSize)
478       {
479         UInt32 j;
480         for (j = 0; j < groupSize; j++)
481         {
482           UInt32 t = (i + j);
483           /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
484           Groups[Indices[t]] = t;
485         }
486       }
487       else
488         if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
489           #ifndef BLOCK_SORT_USE_HEAP_SORT
490           , 0, blockSize
491           #endif
492           ) != 0)
493           newLimit = i + groupSize;
494       i += groupSize;
495     }
496     if (newLimit == 0)
497       break;
498   }
499   }
500   #ifndef BLOCK_SORT_EXTERNAL_FLAGS
501   for (i = 0; i < blockSize;)
502   {
503     UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
504     if ((Indices[i] & 0x40000000) != 0)
505     {
506       groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
507       Indices[(size_t)i + 1] &= kIndexMask;
508     }
509     Indices[i] &= kIndexMask;
510     groupSize++;
511     i += groupSize;
512   }
513   #endif
514   return Groups[0];
515 }
516