10c16b537SWarner Losh /*
20c16b537SWarner Losh  * divsufsort.c for libdivsufsort-lite
30c16b537SWarner Losh  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
40c16b537SWarner Losh  *
50c16b537SWarner Losh  * Permission is hereby granted, free of charge, to any person
60c16b537SWarner Losh  * obtaining a copy of this software and associated documentation
70c16b537SWarner Losh  * files (the "Software"), to deal in the Software without
80c16b537SWarner Losh  * restriction, including without limitation the rights to use,
90c16b537SWarner Losh  * copy, modify, merge, publish, distribute, sublicense, and/or sell
100c16b537SWarner Losh  * copies of the Software, and to permit persons to whom the
110c16b537SWarner Losh  * Software is furnished to do so, subject to the following
120c16b537SWarner Losh  * conditions:
130c16b537SWarner Losh  *
140c16b537SWarner Losh  * The above copyright notice and this permission notice shall be
150c16b537SWarner Losh  * included in all copies or substantial portions of the Software.
160c16b537SWarner Losh  *
170c16b537SWarner Losh  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
180c16b537SWarner Losh  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
190c16b537SWarner Losh  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
200c16b537SWarner Losh  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
210c16b537SWarner Losh  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
220c16b537SWarner Losh  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
230c16b537SWarner Losh  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
240c16b537SWarner Losh  * OTHER DEALINGS IN THE SOFTWARE.
250c16b537SWarner Losh  */
260c16b537SWarner Losh 
270c16b537SWarner Losh /*- Compiler specifics -*/
280c16b537SWarner Losh #ifdef __clang__
290c16b537SWarner Losh #pragma clang diagnostic ignored "-Wshorten-64-to-32"
300c16b537SWarner Losh #endif
310c16b537SWarner Losh 
320c16b537SWarner Losh #if defined(_MSC_VER)
330c16b537SWarner Losh #  pragma warning(disable : 4244)
340c16b537SWarner Losh #  pragma warning(disable : 4127)    /* C4127 : Condition expression is constant */
350c16b537SWarner Losh #endif
360c16b537SWarner Losh 
370c16b537SWarner Losh 
380c16b537SWarner Losh /*- Dependencies -*/
390c16b537SWarner Losh #include <assert.h>
400c16b537SWarner Losh #include <stdio.h>
410c16b537SWarner Losh #include <stdlib.h>
420c16b537SWarner Losh 
430c16b537SWarner Losh #include "divsufsort.h"
440c16b537SWarner Losh 
450c16b537SWarner Losh /*- Constants -*/
460c16b537SWarner Losh #if defined(INLINE)
470c16b537SWarner Losh # undef INLINE
480c16b537SWarner Losh #endif
490c16b537SWarner Losh #if !defined(INLINE)
500c16b537SWarner Losh # define INLINE __inline
510c16b537SWarner Losh #endif
520c16b537SWarner Losh #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
530c16b537SWarner Losh # undef ALPHABET_SIZE
540c16b537SWarner Losh #endif
550c16b537SWarner Losh #if !defined(ALPHABET_SIZE)
560c16b537SWarner Losh # define ALPHABET_SIZE (256)
570c16b537SWarner Losh #endif
580c16b537SWarner Losh #define BUCKET_A_SIZE (ALPHABET_SIZE)
590c16b537SWarner Losh #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
600c16b537SWarner Losh #if defined(SS_INSERTIONSORT_THRESHOLD)
610c16b537SWarner Losh # if SS_INSERTIONSORT_THRESHOLD < 1
620c16b537SWarner Losh #  undef SS_INSERTIONSORT_THRESHOLD
630c16b537SWarner Losh #  define SS_INSERTIONSORT_THRESHOLD (1)
640c16b537SWarner Losh # endif
650c16b537SWarner Losh #else
660c16b537SWarner Losh # define SS_INSERTIONSORT_THRESHOLD (8)
670c16b537SWarner Losh #endif
680c16b537SWarner Losh #if defined(SS_BLOCKSIZE)
690c16b537SWarner Losh # if SS_BLOCKSIZE < 0
700c16b537SWarner Losh #  undef SS_BLOCKSIZE
710c16b537SWarner Losh #  define SS_BLOCKSIZE (0)
720c16b537SWarner Losh # elif 32768 <= SS_BLOCKSIZE
730c16b537SWarner Losh #  undef SS_BLOCKSIZE
740c16b537SWarner Losh #  define SS_BLOCKSIZE (32767)
750c16b537SWarner Losh # endif
760c16b537SWarner Losh #else
770c16b537SWarner Losh # define SS_BLOCKSIZE (1024)
780c16b537SWarner Losh #endif
790c16b537SWarner Losh /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
800c16b537SWarner Losh #if SS_BLOCKSIZE == 0
810c16b537SWarner Losh # define SS_MISORT_STACKSIZE (96)
820c16b537SWarner Losh #elif SS_BLOCKSIZE <= 4096
830c16b537SWarner Losh # define SS_MISORT_STACKSIZE (16)
840c16b537SWarner Losh #else
850c16b537SWarner Losh # define SS_MISORT_STACKSIZE (24)
860c16b537SWarner Losh #endif
870c16b537SWarner Losh #define SS_SMERGE_STACKSIZE (32)
880c16b537SWarner Losh #define TR_INSERTIONSORT_THRESHOLD (8)
890c16b537SWarner Losh #define TR_STACKSIZE (64)
900c16b537SWarner Losh 
910c16b537SWarner Losh 
920c16b537SWarner Losh /*- Macros -*/
930c16b537SWarner Losh #ifndef SWAP
940c16b537SWarner Losh # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
950c16b537SWarner Losh #endif /* SWAP */
960c16b537SWarner Losh #ifndef MIN
970c16b537SWarner Losh # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
980c16b537SWarner Losh #endif /* MIN */
990c16b537SWarner Losh #ifndef MAX
1000c16b537SWarner Losh # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
1010c16b537SWarner Losh #endif /* MAX */
1020c16b537SWarner Losh #define STACK_PUSH(_a, _b, _c, _d)\
1030c16b537SWarner Losh   do {\
1040c16b537SWarner Losh     assert(ssize < STACK_SIZE);\
1050c16b537SWarner Losh     stack[ssize].a = (_a), stack[ssize].b = (_b),\
1060c16b537SWarner Losh     stack[ssize].c = (_c), stack[ssize++].d = (_d);\
1070c16b537SWarner Losh   } while(0)
1080c16b537SWarner Losh #define STACK_PUSH5(_a, _b, _c, _d, _e)\
1090c16b537SWarner Losh   do {\
1100c16b537SWarner Losh     assert(ssize < STACK_SIZE);\
1110c16b537SWarner Losh     stack[ssize].a = (_a), stack[ssize].b = (_b),\
1120c16b537SWarner Losh     stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
1130c16b537SWarner Losh   } while(0)
1140c16b537SWarner Losh #define STACK_POP(_a, _b, _c, _d)\
1150c16b537SWarner Losh   do {\
1160c16b537SWarner Losh     assert(0 <= ssize);\
1170c16b537SWarner Losh     if(ssize == 0) { return; }\
1180c16b537SWarner Losh     (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
1190c16b537SWarner Losh     (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
1200c16b537SWarner Losh   } while(0)
1210c16b537SWarner Losh #define STACK_POP5(_a, _b, _c, _d, _e)\
1220c16b537SWarner Losh   do {\
1230c16b537SWarner Losh     assert(0 <= ssize);\
1240c16b537SWarner Losh     if(ssize == 0) { return; }\
1250c16b537SWarner Losh     (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
1260c16b537SWarner Losh     (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
1270c16b537SWarner Losh   } while(0)
1280c16b537SWarner Losh #define BUCKET_A(_c0) bucket_A[(_c0)]
1290c16b537SWarner Losh #if ALPHABET_SIZE == 256
1300c16b537SWarner Losh #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
1310c16b537SWarner Losh #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
1320c16b537SWarner Losh #else
1330c16b537SWarner Losh #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
1340c16b537SWarner Losh #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
1350c16b537SWarner Losh #endif
1360c16b537SWarner Losh 
1370c16b537SWarner Losh 
1380c16b537SWarner Losh /*- Private Functions -*/
1390c16b537SWarner Losh 
1400c16b537SWarner Losh static const int lg_table[256]= {
1410c16b537SWarner Losh  -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
1420c16b537SWarner Losh   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
1430c16b537SWarner Losh   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
1440c16b537SWarner Losh   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
1450c16b537SWarner Losh   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
1460c16b537SWarner Losh   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
1470c16b537SWarner Losh   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
1480c16b537SWarner Losh   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
1490c16b537SWarner Losh };
1500c16b537SWarner Losh 
1510c16b537SWarner Losh #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
1520c16b537SWarner Losh 
1530c16b537SWarner Losh static INLINE
1540c16b537SWarner Losh int
ss_ilg(int n)1550c16b537SWarner Losh ss_ilg(int n) {
1560c16b537SWarner Losh #if SS_BLOCKSIZE == 0
1570c16b537SWarner Losh   return (n & 0xffff0000) ?
1580c16b537SWarner Losh           ((n & 0xff000000) ?
1590c16b537SWarner Losh             24 + lg_table[(n >> 24) & 0xff] :
1600c16b537SWarner Losh             16 + lg_table[(n >> 16) & 0xff]) :
1610c16b537SWarner Losh           ((n & 0x0000ff00) ?
1620c16b537SWarner Losh              8 + lg_table[(n >>  8) & 0xff] :
1630c16b537SWarner Losh              0 + lg_table[(n >>  0) & 0xff]);
1640c16b537SWarner Losh #elif SS_BLOCKSIZE < 256
1650c16b537SWarner Losh   return lg_table[n];
1660c16b537SWarner Losh #else
1670c16b537SWarner Losh   return (n & 0xff00) ?
1680c16b537SWarner Losh           8 + lg_table[(n >> 8) & 0xff] :
1690c16b537SWarner Losh           0 + lg_table[(n >> 0) & 0xff];
1700c16b537SWarner Losh #endif
1710c16b537SWarner Losh }
1720c16b537SWarner Losh 
1730c16b537SWarner Losh #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
1740c16b537SWarner Losh 
1750c16b537SWarner Losh #if SS_BLOCKSIZE != 0
1760c16b537SWarner Losh 
1770c16b537SWarner Losh static const int sqq_table[256] = {
1780c16b537SWarner Losh   0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
1790c16b537SWarner Losh  64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
1800c16b537SWarner Losh  90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
1810c16b537SWarner Losh 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
1820c16b537SWarner Losh 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
1830c16b537SWarner Losh 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
1840c16b537SWarner Losh 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
1850c16b537SWarner Losh 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
1860c16b537SWarner Losh 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
1870c16b537SWarner Losh 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
1880c16b537SWarner Losh 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
1890c16b537SWarner Losh 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
1900c16b537SWarner Losh 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
1910c16b537SWarner Losh 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
1920c16b537SWarner Losh 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
1930c16b537SWarner Losh 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
1940c16b537SWarner Losh };
1950c16b537SWarner Losh 
1960c16b537SWarner Losh static INLINE
1970c16b537SWarner Losh int
ss_isqrt(int x)1980c16b537SWarner Losh ss_isqrt(int x) {
1990c16b537SWarner Losh   int y, e;
2000c16b537SWarner Losh 
2010c16b537SWarner Losh   if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
2020c16b537SWarner Losh   e = (x & 0xffff0000) ?
2030c16b537SWarner Losh         ((x & 0xff000000) ?
2040c16b537SWarner Losh           24 + lg_table[(x >> 24) & 0xff] :
2050c16b537SWarner Losh           16 + lg_table[(x >> 16) & 0xff]) :
2060c16b537SWarner Losh         ((x & 0x0000ff00) ?
2070c16b537SWarner Losh            8 + lg_table[(x >>  8) & 0xff] :
2080c16b537SWarner Losh            0 + lg_table[(x >>  0) & 0xff]);
2090c16b537SWarner Losh 
2100c16b537SWarner Losh   if(e >= 16) {
2110c16b537SWarner Losh     y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
2120c16b537SWarner Losh     if(e >= 24) { y = (y + 1 + x / y) >> 1; }
2130c16b537SWarner Losh     y = (y + 1 + x / y) >> 1;
2140c16b537SWarner Losh   } else if(e >= 8) {
2150c16b537SWarner Losh     y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
2160c16b537SWarner Losh   } else {
2170c16b537SWarner Losh     return sqq_table[x] >> 4;
2180c16b537SWarner Losh   }
2190c16b537SWarner Losh 
2200c16b537SWarner Losh   return (x < (y * y)) ? y - 1 : y;
2210c16b537SWarner Losh }
2220c16b537SWarner Losh 
2230c16b537SWarner Losh #endif /* SS_BLOCKSIZE != 0 */
2240c16b537SWarner Losh 
2250c16b537SWarner Losh 
2260c16b537SWarner Losh /*---------------------------------------------------------------------------*/
2270c16b537SWarner Losh 
2280c16b537SWarner Losh /* Compares two suffixes. */
2290c16b537SWarner Losh static INLINE
2300c16b537SWarner Losh int
ss_compare(const unsigned char * T,const int * p1,const int * p2,int depth)2310c16b537SWarner Losh ss_compare(const unsigned char *T,
2320c16b537SWarner Losh            const int *p1, const int *p2,
2330c16b537SWarner Losh            int depth) {
2340c16b537SWarner Losh   const unsigned char *U1, *U2, *U1n, *U2n;
2350c16b537SWarner Losh 
2360c16b537SWarner Losh   for(U1 = T + depth + *p1,
2370c16b537SWarner Losh       U2 = T + depth + *p2,
2380c16b537SWarner Losh       U1n = T + *(p1 + 1) + 2,
2390c16b537SWarner Losh       U2n = T + *(p2 + 1) + 2;
2400c16b537SWarner Losh       (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
2410c16b537SWarner Losh       ++U1, ++U2) {
2420c16b537SWarner Losh   }
2430c16b537SWarner Losh 
2440c16b537SWarner Losh   return U1 < U1n ?
2450c16b537SWarner Losh         (U2 < U2n ? *U1 - *U2 : 1) :
2460c16b537SWarner Losh         (U2 < U2n ? -1 : 0);
2470c16b537SWarner Losh }
2480c16b537SWarner Losh 
2490c16b537SWarner Losh 
2500c16b537SWarner Losh /*---------------------------------------------------------------------------*/
2510c16b537SWarner Losh 
2520c16b537SWarner Losh #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
2530c16b537SWarner Losh 
2540c16b537SWarner Losh /* Insertionsort for small size groups */
2550c16b537SWarner Losh static
2560c16b537SWarner Losh void
ss_insertionsort(const unsigned char * T,const int * PA,int * first,int * last,int depth)2570c16b537SWarner Losh ss_insertionsort(const unsigned char *T, const int *PA,
2580c16b537SWarner Losh                  int *first, int *last, int depth) {
2590c16b537SWarner Losh   int *i, *j;
2600c16b537SWarner Losh   int t;
2610c16b537SWarner Losh   int r;
2620c16b537SWarner Losh 
2630c16b537SWarner Losh   for(i = last - 2; first <= i; --i) {
2640c16b537SWarner Losh     for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
2650c16b537SWarner Losh       do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
2660c16b537SWarner Losh       if(last <= j) { break; }
2670c16b537SWarner Losh     }
2680c16b537SWarner Losh     if(r == 0) { *j = ~*j; }
2690c16b537SWarner Losh     *(j - 1) = t;
2700c16b537SWarner Losh   }
2710c16b537SWarner Losh }
2720c16b537SWarner Losh 
2730c16b537SWarner Losh #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
2740c16b537SWarner Losh 
2750c16b537SWarner Losh 
2760c16b537SWarner Losh /*---------------------------------------------------------------------------*/
2770c16b537SWarner Losh 
2780c16b537SWarner Losh #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
2790c16b537SWarner Losh 
2800c16b537SWarner Losh static INLINE
2810c16b537SWarner Losh void
ss_fixdown(const unsigned char * Td,const int * PA,int * SA,int i,int size)2820c16b537SWarner Losh ss_fixdown(const unsigned char *Td, const int *PA,
2830c16b537SWarner Losh            int *SA, int i, int size) {
2840c16b537SWarner Losh   int j, k;
2850c16b537SWarner Losh   int v;
2860c16b537SWarner Losh   int c, d, e;
2870c16b537SWarner Losh 
2880c16b537SWarner Losh   for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
2890c16b537SWarner Losh     d = Td[PA[SA[k = j++]]];
2900c16b537SWarner Losh     if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
2910c16b537SWarner Losh     if(d <= c) { break; }
2920c16b537SWarner Losh   }
2930c16b537SWarner Losh   SA[i] = v;
2940c16b537SWarner Losh }
2950c16b537SWarner Losh 
2960c16b537SWarner Losh /* Simple top-down heapsort. */
2970c16b537SWarner Losh static
2980c16b537SWarner Losh void
ss_heapsort(const unsigned char * Td,const int * PA,int * SA,int size)2990c16b537SWarner Losh ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
3000c16b537SWarner Losh   int i, m;
3010c16b537SWarner Losh   int t;
3020c16b537SWarner Losh 
3030c16b537SWarner Losh   m = size;
3040c16b537SWarner Losh   if((size % 2) == 0) {
3050c16b537SWarner Losh     m--;
3060c16b537SWarner Losh     if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
3070c16b537SWarner Losh   }
3080c16b537SWarner Losh 
3090c16b537SWarner Losh   for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
3100c16b537SWarner Losh   if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
3110c16b537SWarner Losh   for(i = m - 1; 0 < i; --i) {
3120c16b537SWarner Losh     t = SA[0], SA[0] = SA[i];
3130c16b537SWarner Losh     ss_fixdown(Td, PA, SA, 0, i);
3140c16b537SWarner Losh     SA[i] = t;
3150c16b537SWarner Losh   }
3160c16b537SWarner Losh }
3170c16b537SWarner Losh 
3180c16b537SWarner Losh 
3190c16b537SWarner Losh /*---------------------------------------------------------------------------*/
3200c16b537SWarner Losh 
3210c16b537SWarner Losh /* Returns the median of three elements. */
3220c16b537SWarner Losh static INLINE
3230c16b537SWarner Losh int *
ss_median3(const unsigned char * Td,const int * PA,int * v1,int * v2,int * v3)3240c16b537SWarner Losh ss_median3(const unsigned char *Td, const int *PA,
3250c16b537SWarner Losh            int *v1, int *v2, int *v3) {
3260c16b537SWarner Losh   int *t;
3270c16b537SWarner Losh   if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
3280c16b537SWarner Losh   if(Td[PA[*v2]] > Td[PA[*v3]]) {
3290c16b537SWarner Losh     if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
3300c16b537SWarner Losh     else { return v3; }
3310c16b537SWarner Losh   }
3320c16b537SWarner Losh   return v2;
3330c16b537SWarner Losh }
3340c16b537SWarner Losh 
3350c16b537SWarner Losh /* Returns the median of five elements. */
3360c16b537SWarner Losh static INLINE
3370c16b537SWarner Losh int *
ss_median5(const unsigned char * Td,const int * PA,int * v1,int * v2,int * v3,int * v4,int * v5)3380c16b537SWarner Losh ss_median5(const unsigned char *Td, const int *PA,
3390c16b537SWarner Losh            int *v1, int *v2, int *v3, int *v4, int *v5) {
3400c16b537SWarner Losh   int *t;
3410c16b537SWarner Losh   if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
3420c16b537SWarner Losh   if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
3430c16b537SWarner Losh   if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
3440c16b537SWarner Losh   if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
3450c16b537SWarner Losh   if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
3460c16b537SWarner Losh   if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
3470c16b537SWarner Losh   return v3;
3480c16b537SWarner Losh }
3490c16b537SWarner Losh 
3500c16b537SWarner Losh /* Returns the pivot element. */
3510c16b537SWarner Losh static INLINE
3520c16b537SWarner Losh int *
ss_pivot(const unsigned char * Td,const int * PA,int * first,int * last)3530c16b537SWarner Losh ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
3540c16b537SWarner Losh   int *middle;
3550c16b537SWarner Losh   int t;
3560c16b537SWarner Losh 
3570c16b537SWarner Losh   t = last - first;
3580c16b537SWarner Losh   middle = first + t / 2;
3590c16b537SWarner Losh 
3600c16b537SWarner Losh   if(t <= 512) {
3610c16b537SWarner Losh     if(t <= 32) {
3620c16b537SWarner Losh       return ss_median3(Td, PA, first, middle, last - 1);
3630c16b537SWarner Losh     } else {
3640c16b537SWarner Losh       t >>= 2;
3650c16b537SWarner Losh       return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
3660c16b537SWarner Losh     }
3670c16b537SWarner Losh   }
3680c16b537SWarner Losh   t >>= 3;
3690c16b537SWarner Losh   first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
3700c16b537SWarner Losh   middle = ss_median3(Td, PA, middle - t, middle, middle + t);
3710c16b537SWarner Losh   last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
3720c16b537SWarner Losh   return ss_median3(Td, PA, first, middle, last);
3730c16b537SWarner Losh }
3740c16b537SWarner Losh 
3750c16b537SWarner Losh 
3760c16b537SWarner Losh /*---------------------------------------------------------------------------*/
3770c16b537SWarner Losh 
3780c16b537SWarner Losh /* Binary partition for substrings. */
3790c16b537SWarner Losh static INLINE
3800c16b537SWarner Losh int *
ss_partition(const int * PA,int * first,int * last,int depth)3810c16b537SWarner Losh ss_partition(const int *PA,
3820c16b537SWarner Losh                     int *first, int *last, int depth) {
3830c16b537SWarner Losh   int *a, *b;
3840c16b537SWarner Losh   int t;
3850c16b537SWarner Losh   for(a = first - 1, b = last;;) {
3860c16b537SWarner Losh     for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
3870c16b537SWarner Losh     for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
3880c16b537SWarner Losh     if(b <= a) { break; }
3890c16b537SWarner Losh     t = ~*b;
3900c16b537SWarner Losh     *b = *a;
3910c16b537SWarner Losh     *a = t;
3920c16b537SWarner Losh   }
3930c16b537SWarner Losh   if(first < a) { *first = ~*first; }
3940c16b537SWarner Losh   return a;
3950c16b537SWarner Losh }
3960c16b537SWarner Losh 
3970c16b537SWarner Losh /* Multikey introsort for medium size groups. */
3980c16b537SWarner Losh static
3990c16b537SWarner Losh void
ss_mintrosort(const unsigned char * T,const int * PA,int * first,int * last,int depth)4000c16b537SWarner Losh ss_mintrosort(const unsigned char *T, const int *PA,
4010c16b537SWarner Losh               int *first, int *last,
4020c16b537SWarner Losh               int depth) {
4030c16b537SWarner Losh #define STACK_SIZE SS_MISORT_STACKSIZE
4040c16b537SWarner Losh   struct { int *a, *b, c; int d; } stack[STACK_SIZE];
4050c16b537SWarner Losh   const unsigned char *Td;
4060c16b537SWarner Losh   int *a, *b, *c, *d, *e, *f;
4070c16b537SWarner Losh   int s, t;
4080c16b537SWarner Losh   int ssize;
4090c16b537SWarner Losh   int limit;
4100c16b537SWarner Losh   int v, x = 0;
4110c16b537SWarner Losh 
4120c16b537SWarner Losh   for(ssize = 0, limit = ss_ilg(last - first);;) {
4130c16b537SWarner Losh 
4140c16b537SWarner Losh     if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
4150c16b537SWarner Losh #if 1 < SS_INSERTIONSORT_THRESHOLD
4160c16b537SWarner Losh       if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
4170c16b537SWarner Losh #endif
4180c16b537SWarner Losh       STACK_POP(first, last, depth, limit);
4190c16b537SWarner Losh       continue;
4200c16b537SWarner Losh     }
4210c16b537SWarner Losh 
4220c16b537SWarner Losh     Td = T + depth;
4230c16b537SWarner Losh     if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
4240c16b537SWarner Losh     if(limit < 0) {
4250c16b537SWarner Losh       for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
4260c16b537SWarner Losh         if((x = Td[PA[*a]]) != v) {
4270c16b537SWarner Losh           if(1 < (a - first)) { break; }
4280c16b537SWarner Losh           v = x;
4290c16b537SWarner Losh           first = a;
4300c16b537SWarner Losh         }
4310c16b537SWarner Losh       }
4320c16b537SWarner Losh       if(Td[PA[*first] - 1] < v) {
4330c16b537SWarner Losh         first = ss_partition(PA, first, a, depth);
4340c16b537SWarner Losh       }
4350c16b537SWarner Losh       if((a - first) <= (last - a)) {
4360c16b537SWarner Losh         if(1 < (a - first)) {
4370c16b537SWarner Losh           STACK_PUSH(a, last, depth, -1);
4380c16b537SWarner Losh           last = a, depth += 1, limit = ss_ilg(a - first);
4390c16b537SWarner Losh         } else {
4400c16b537SWarner Losh           first = a, limit = -1;
4410c16b537SWarner Losh         }
4420c16b537SWarner Losh       } else {
4430c16b537SWarner Losh         if(1 < (last - a)) {
4440c16b537SWarner Losh           STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
4450c16b537SWarner Losh           first = a, limit = -1;
4460c16b537SWarner Losh         } else {
4470c16b537SWarner Losh           last = a, depth += 1, limit = ss_ilg(a - first);
4480c16b537SWarner Losh         }
4490c16b537SWarner Losh       }
4500c16b537SWarner Losh       continue;
4510c16b537SWarner Losh     }
4520c16b537SWarner Losh 
4530c16b537SWarner Losh     /* choose pivot */
4540c16b537SWarner Losh     a = ss_pivot(Td, PA, first, last);
4550c16b537SWarner Losh     v = Td[PA[*a]];
4560c16b537SWarner Losh     SWAP(*first, *a);
4570c16b537SWarner Losh 
4580c16b537SWarner Losh     /* partition */
4590c16b537SWarner Losh     for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
4600c16b537SWarner Losh     if(((a = b) < last) && (x < v)) {
4610c16b537SWarner Losh       for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
4620c16b537SWarner Losh         if(x == v) { SWAP(*b, *a); ++a; }
4630c16b537SWarner Losh       }
4640c16b537SWarner Losh     }
4650c16b537SWarner Losh     for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
4660c16b537SWarner Losh     if((b < (d = c)) && (x > v)) {
4670c16b537SWarner Losh       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
4680c16b537SWarner Losh         if(x == v) { SWAP(*c, *d); --d; }
4690c16b537SWarner Losh       }
4700c16b537SWarner Losh     }
4710c16b537SWarner Losh     for(; b < c;) {
4720c16b537SWarner Losh       SWAP(*b, *c);
4730c16b537SWarner Losh       for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
4740c16b537SWarner Losh         if(x == v) { SWAP(*b, *a); ++a; }
4750c16b537SWarner Losh       }
4760c16b537SWarner Losh       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
4770c16b537SWarner Losh         if(x == v) { SWAP(*c, *d); --d; }
4780c16b537SWarner Losh       }
4790c16b537SWarner Losh     }
4800c16b537SWarner Losh 
4810c16b537SWarner Losh     if(a <= d) {
4820c16b537SWarner Losh       c = b - 1;
4830c16b537SWarner Losh 
4840c16b537SWarner Losh       if((s = a - first) > (t = b - a)) { s = t; }
4850c16b537SWarner Losh       for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
4860c16b537SWarner Losh       if((s = d - c) > (t = last - d - 1)) { s = t; }
4870c16b537SWarner Losh       for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
4880c16b537SWarner Losh 
4890c16b537SWarner Losh       a = first + (b - a), c = last - (d - c);
4900c16b537SWarner Losh       b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
4910c16b537SWarner Losh 
4920c16b537SWarner Losh       if((a - first) <= (last - c)) {
4930c16b537SWarner Losh         if((last - c) <= (c - b)) {
4940c16b537SWarner Losh           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
4950c16b537SWarner Losh           STACK_PUSH(c, last, depth, limit);
4960c16b537SWarner Losh           last = a;
4970c16b537SWarner Losh         } else if((a - first) <= (c - b)) {
4980c16b537SWarner Losh           STACK_PUSH(c, last, depth, limit);
4990c16b537SWarner Losh           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
5000c16b537SWarner Losh           last = a;
5010c16b537SWarner Losh         } else {
5020c16b537SWarner Losh           STACK_PUSH(c, last, depth, limit);
5030c16b537SWarner Losh           STACK_PUSH(first, a, depth, limit);
5040c16b537SWarner Losh           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
5050c16b537SWarner Losh         }
5060c16b537SWarner Losh       } else {
5070c16b537SWarner Losh         if((a - first) <= (c - b)) {
5080c16b537SWarner Losh           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
5090c16b537SWarner Losh           STACK_PUSH(first, a, depth, limit);
5100c16b537SWarner Losh           first = c;
5110c16b537SWarner Losh         } else if((last - c) <= (c - b)) {
5120c16b537SWarner Losh           STACK_PUSH(first, a, depth, limit);
5130c16b537SWarner Losh           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
5140c16b537SWarner Losh           first = c;
5150c16b537SWarner Losh         } else {
5160c16b537SWarner Losh           STACK_PUSH(first, a, depth, limit);
5170c16b537SWarner Losh           STACK_PUSH(c, last, depth, limit);
5180c16b537SWarner Losh           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
5190c16b537SWarner Losh         }
5200c16b537SWarner Losh       }
5210c16b537SWarner Losh     } else {
5220c16b537SWarner Losh       limit += 1;
5230c16b537SWarner Losh       if(Td[PA[*first] - 1] < v) {
5240c16b537SWarner Losh         first = ss_partition(PA, first, last, depth);
5250c16b537SWarner Losh         limit = ss_ilg(last - first);
5260c16b537SWarner Losh       }
5270c16b537SWarner Losh       depth += 1;
5280c16b537SWarner Losh     }
5290c16b537SWarner Losh   }
5300c16b537SWarner Losh #undef STACK_SIZE
5310c16b537SWarner Losh }
5320c16b537SWarner Losh 
5330c16b537SWarner Losh #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
5340c16b537SWarner Losh 
5350c16b537SWarner Losh 
5360c16b537SWarner Losh /*---------------------------------------------------------------------------*/
5370c16b537SWarner Losh 
5380c16b537SWarner Losh #if SS_BLOCKSIZE != 0
5390c16b537SWarner Losh 
5400c16b537SWarner Losh static INLINE
5410c16b537SWarner Losh void
ss_blockswap(int * a,int * b,int n)5420c16b537SWarner Losh ss_blockswap(int *a, int *b, int n) {
5430c16b537SWarner Losh   int t;
5440c16b537SWarner Losh   for(; 0 < n; --n, ++a, ++b) {
5450c16b537SWarner Losh     t = *a, *a = *b, *b = t;
5460c16b537SWarner Losh   }
5470c16b537SWarner Losh }
5480c16b537SWarner Losh 
5490c16b537SWarner Losh static INLINE
5500c16b537SWarner Losh void
ss_rotate(int * first,int * middle,int * last)5510c16b537SWarner Losh ss_rotate(int *first, int *middle, int *last) {
5520c16b537SWarner Losh   int *a, *b, t;
5530c16b537SWarner Losh   int l, r;
5540c16b537SWarner Losh   l = middle - first, r = last - middle;
5550c16b537SWarner Losh   for(; (0 < l) && (0 < r);) {
5560c16b537SWarner Losh     if(l == r) { ss_blockswap(first, middle, l); break; }
5570c16b537SWarner Losh     if(l < r) {
5580c16b537SWarner Losh       a = last - 1, b = middle - 1;
5590c16b537SWarner Losh       t = *a;
5600c16b537SWarner Losh       do {
5610c16b537SWarner Losh         *a-- = *b, *b-- = *a;
5620c16b537SWarner Losh         if(b < first) {
5630c16b537SWarner Losh           *a = t;
5640c16b537SWarner Losh           last = a;
5650c16b537SWarner Losh           if((r -= l + 1) <= l) { break; }
5660c16b537SWarner Losh           a -= 1, b = middle - 1;
5670c16b537SWarner Losh           t = *a;
5680c16b537SWarner Losh         }
5690c16b537SWarner Losh       } while(1);
5700c16b537SWarner Losh     } else {
5710c16b537SWarner Losh       a = first, b = middle;
5720c16b537SWarner Losh       t = *a;
5730c16b537SWarner Losh       do {
5740c16b537SWarner Losh         *a++ = *b, *b++ = *a;
5750c16b537SWarner Losh         if(last <= b) {
5760c16b537SWarner Losh           *a = t;
5770c16b537SWarner Losh           first = a + 1;
5780c16b537SWarner Losh           if((l -= r + 1) <= r) { break; }
5790c16b537SWarner Losh           a += 1, b = middle;
5800c16b537SWarner Losh           t = *a;
5810c16b537SWarner Losh         }
5820c16b537SWarner Losh       } while(1);
5830c16b537SWarner Losh     }
5840c16b537SWarner Losh   }
5850c16b537SWarner Losh }
5860c16b537SWarner Losh 
5870c16b537SWarner Losh 
5880c16b537SWarner Losh /*---------------------------------------------------------------------------*/
5890c16b537SWarner Losh 
5900c16b537SWarner Losh static
5910c16b537SWarner Losh void
ss_inplacemerge(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int depth)5920c16b537SWarner Losh ss_inplacemerge(const unsigned char *T, const int *PA,
5930c16b537SWarner Losh                 int *first, int *middle, int *last,
5940c16b537SWarner Losh                 int depth) {
5950c16b537SWarner Losh   const int *p;
5960c16b537SWarner Losh   int *a, *b;
5970c16b537SWarner Losh   int len, half;
5980c16b537SWarner Losh   int q, r;
5990c16b537SWarner Losh   int x;
6000c16b537SWarner Losh 
6010c16b537SWarner Losh   for(;;) {
6020c16b537SWarner Losh     if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
6030c16b537SWarner Losh     else                { x = 0; p = PA +  *(last - 1); }
6040c16b537SWarner Losh     for(a = first, len = middle - first, half = len >> 1, r = -1;
6050c16b537SWarner Losh         0 < len;
6060c16b537SWarner Losh         len = half, half >>= 1) {
6070c16b537SWarner Losh       b = a + half;
6080c16b537SWarner Losh       q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
6090c16b537SWarner Losh       if(q < 0) {
6100c16b537SWarner Losh         a = b + 1;
6110c16b537SWarner Losh         half -= (len & 1) ^ 1;
6120c16b537SWarner Losh       } else {
6130c16b537SWarner Losh         r = q;
6140c16b537SWarner Losh       }
6150c16b537SWarner Losh     }
6160c16b537SWarner Losh     if(a < middle) {
6170c16b537SWarner Losh       if(r == 0) { *a = ~*a; }
6180c16b537SWarner Losh       ss_rotate(a, middle, last);
6190c16b537SWarner Losh       last -= middle - a;
6200c16b537SWarner Losh       middle = a;
6210c16b537SWarner Losh       if(first == middle) { break; }
6220c16b537SWarner Losh     }
6230c16b537SWarner Losh     --last;
6240c16b537SWarner Losh     if(x != 0) { while(*--last < 0) { } }
6250c16b537SWarner Losh     if(middle == last) { break; }
6260c16b537SWarner Losh   }
6270c16b537SWarner Losh }
6280c16b537SWarner Losh 
6290c16b537SWarner Losh 
6300c16b537SWarner Losh /*---------------------------------------------------------------------------*/
6310c16b537SWarner Losh 
6320c16b537SWarner Losh /* Merge-forward with internal buffer. */
6330c16b537SWarner Losh static
6340c16b537SWarner Losh void
ss_mergeforward(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int depth)6350c16b537SWarner Losh ss_mergeforward(const unsigned char *T, const int *PA,
6360c16b537SWarner Losh                 int *first, int *middle, int *last,
6370c16b537SWarner Losh                 int *buf, int depth) {
6380c16b537SWarner Losh   int *a, *b, *c, *bufend;
6390c16b537SWarner Losh   int t;
6400c16b537SWarner Losh   int r;
6410c16b537SWarner Losh 
6420c16b537SWarner Losh   bufend = buf + (middle - first) - 1;
6430c16b537SWarner Losh   ss_blockswap(buf, first, middle - first);
6440c16b537SWarner Losh 
6450c16b537SWarner Losh   for(t = *(a = first), b = buf, c = middle;;) {
6460c16b537SWarner Losh     r = ss_compare(T, PA + *b, PA + *c, depth);
6470c16b537SWarner Losh     if(r < 0) {
6480c16b537SWarner Losh       do {
6490c16b537SWarner Losh         *a++ = *b;
6500c16b537SWarner Losh         if(bufend <= b) { *bufend = t; return; }
6510c16b537SWarner Losh         *b++ = *a;
6520c16b537SWarner Losh       } while(*b < 0);
6530c16b537SWarner Losh     } else if(r > 0) {
6540c16b537SWarner Losh       do {
6550c16b537SWarner Losh         *a++ = *c, *c++ = *a;
6560c16b537SWarner Losh         if(last <= c) {
6570c16b537SWarner Losh           while(b < bufend) { *a++ = *b, *b++ = *a; }
6580c16b537SWarner Losh           *a = *b, *b = t;
6590c16b537SWarner Losh           return;
6600c16b537SWarner Losh         }
6610c16b537SWarner Losh       } while(*c < 0);
6620c16b537SWarner Losh     } else {
6630c16b537SWarner Losh       *c = ~*c;
6640c16b537SWarner Losh       do {
6650c16b537SWarner Losh         *a++ = *b;
6660c16b537SWarner Losh         if(bufend <= b) { *bufend = t; return; }
6670c16b537SWarner Losh         *b++ = *a;
6680c16b537SWarner Losh       } while(*b < 0);
6690c16b537SWarner Losh 
6700c16b537SWarner Losh       do {
6710c16b537SWarner Losh         *a++ = *c, *c++ = *a;
6720c16b537SWarner Losh         if(last <= c) {
6730c16b537SWarner Losh           while(b < bufend) { *a++ = *b, *b++ = *a; }
6740c16b537SWarner Losh           *a = *b, *b = t;
6750c16b537SWarner Losh           return;
6760c16b537SWarner Losh         }
6770c16b537SWarner Losh       } while(*c < 0);
6780c16b537SWarner Losh     }
6790c16b537SWarner Losh   }
6800c16b537SWarner Losh }
6810c16b537SWarner Losh 
6820c16b537SWarner Losh /* Merge-backward with internal buffer. */
6830c16b537SWarner Losh static
6840c16b537SWarner Losh void
ss_mergebackward(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int depth)6850c16b537SWarner Losh ss_mergebackward(const unsigned char *T, const int *PA,
6860c16b537SWarner Losh                  int *first, int *middle, int *last,
6870c16b537SWarner Losh                  int *buf, int depth) {
6880c16b537SWarner Losh   const int *p1, *p2;
6890c16b537SWarner Losh   int *a, *b, *c, *bufend;
6900c16b537SWarner Losh   int t;
6910c16b537SWarner Losh   int r;
6920c16b537SWarner Losh   int x;
6930c16b537SWarner Losh 
6940c16b537SWarner Losh   bufend = buf + (last - middle) - 1;
6950c16b537SWarner Losh   ss_blockswap(buf, middle, last - middle);
6960c16b537SWarner Losh 
6970c16b537SWarner Losh   x = 0;
6980c16b537SWarner Losh   if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
6990c16b537SWarner Losh   else                  { p1 = PA +  *bufend; }
7000c16b537SWarner Losh   if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
7010c16b537SWarner Losh   else                  { p2 = PA +  *(middle - 1); }
7020c16b537SWarner Losh   for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
7030c16b537SWarner Losh     r = ss_compare(T, p1, p2, depth);
7040c16b537SWarner Losh     if(0 < r) {
7050c16b537SWarner Losh       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
7060c16b537SWarner Losh       *a-- = *b;
7070c16b537SWarner Losh       if(b <= buf) { *buf = t; break; }
7080c16b537SWarner Losh       *b-- = *a;
7090c16b537SWarner Losh       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
7100c16b537SWarner Losh       else       { p1 = PA +  *b; }
7110c16b537SWarner Losh     } else if(r < 0) {
7120c16b537SWarner Losh       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
7130c16b537SWarner Losh       *a-- = *c, *c-- = *a;
7140c16b537SWarner Losh       if(c < first) {
7150c16b537SWarner Losh         while(buf < b) { *a-- = *b, *b-- = *a; }
7160c16b537SWarner Losh         *a = *b, *b = t;
7170c16b537SWarner Losh         break;
7180c16b537SWarner Losh       }
7190c16b537SWarner Losh       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
7200c16b537SWarner Losh       else       { p2 = PA +  *c; }
7210c16b537SWarner Losh     } else {
7220c16b537SWarner Losh       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
7230c16b537SWarner Losh       *a-- = ~*b;
7240c16b537SWarner Losh       if(b <= buf) { *buf = t; break; }
7250c16b537SWarner Losh       *b-- = *a;
7260c16b537SWarner Losh       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
7270c16b537SWarner Losh       *a-- = *c, *c-- = *a;
7280c16b537SWarner Losh       if(c < first) {
7290c16b537SWarner Losh         while(buf < b) { *a-- = *b, *b-- = *a; }
7300c16b537SWarner Losh         *a = *b, *b = t;
7310c16b537SWarner Losh         break;
7320c16b537SWarner Losh       }
7330c16b537SWarner Losh       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
7340c16b537SWarner Losh       else       { p1 = PA +  *b; }
7350c16b537SWarner Losh       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
7360c16b537SWarner Losh       else       { p2 = PA +  *c; }
7370c16b537SWarner Losh     }
7380c16b537SWarner Losh   }
7390c16b537SWarner Losh }
7400c16b537SWarner Losh 
7410c16b537SWarner Losh /* D&C based merge. */
7420c16b537SWarner Losh static
7430c16b537SWarner Losh void
ss_swapmerge(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int bufsize,int depth)7440c16b537SWarner Losh ss_swapmerge(const unsigned char *T, const int *PA,
7450c16b537SWarner Losh              int *first, int *middle, int *last,
7460c16b537SWarner Losh              int *buf, int bufsize, int depth) {
7470c16b537SWarner Losh #define STACK_SIZE SS_SMERGE_STACKSIZE
7480c16b537SWarner Losh #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
7490c16b537SWarner Losh #define MERGE_CHECK(a, b, c)\
7500c16b537SWarner Losh   do {\
7510c16b537SWarner Losh     if(((c) & 1) ||\
7520c16b537SWarner Losh        (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
7530c16b537SWarner Losh       *(a) = ~*(a);\
7540c16b537SWarner Losh     }\
7550c16b537SWarner Losh     if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
7560c16b537SWarner Losh       *(b) = ~*(b);\
7570c16b537SWarner Losh     }\
7580c16b537SWarner Losh   } while(0)
7590c16b537SWarner Losh   struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
7600c16b537SWarner Losh   int *l, *r, *lm, *rm;
7610c16b537SWarner Losh   int m, len, half;
7620c16b537SWarner Losh   int ssize;
7630c16b537SWarner Losh   int check, next;
7640c16b537SWarner Losh 
7650c16b537SWarner Losh   for(check = 0, ssize = 0;;) {
7660c16b537SWarner Losh     if((last - middle) <= bufsize) {
7670c16b537SWarner Losh       if((first < middle) && (middle < last)) {
7680c16b537SWarner Losh         ss_mergebackward(T, PA, first, middle, last, buf, depth);
7690c16b537SWarner Losh       }
7700c16b537SWarner Losh       MERGE_CHECK(first, last, check);
7710c16b537SWarner Losh       STACK_POP(first, middle, last, check);
7720c16b537SWarner Losh       continue;
7730c16b537SWarner Losh     }
7740c16b537SWarner Losh 
7750c16b537SWarner Losh     if((middle - first) <= bufsize) {
7760c16b537SWarner Losh       if(first < middle) {
7770c16b537SWarner Losh         ss_mergeforward(T, PA, first, middle, last, buf, depth);
7780c16b537SWarner Losh       }
7790c16b537SWarner Losh       MERGE_CHECK(first, last, check);
7800c16b537SWarner Losh       STACK_POP(first, middle, last, check);
7810c16b537SWarner Losh       continue;
7820c16b537SWarner Losh     }
7830c16b537SWarner Losh 
7840c16b537SWarner Losh     for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
7850c16b537SWarner Losh         0 < len;
7860c16b537SWarner Losh         len = half, half >>= 1) {
7870c16b537SWarner Losh       if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
7880c16b537SWarner Losh                        PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
7890c16b537SWarner Losh         m += half + 1;
7900c16b537SWarner Losh         half -= (len & 1) ^ 1;
7910c16b537SWarner Losh       }
7920c16b537SWarner Losh     }
7930c16b537SWarner Losh 
7940c16b537SWarner Losh     if(0 < m) {
7950c16b537SWarner Losh       lm = middle - m, rm = middle + m;
7960c16b537SWarner Losh       ss_blockswap(lm, middle, m);
7970c16b537SWarner Losh       l = r = middle, next = 0;
7980c16b537SWarner Losh       if(rm < last) {
7990c16b537SWarner Losh         if(*rm < 0) {
8000c16b537SWarner Losh           *rm = ~*rm;
8010c16b537SWarner Losh           if(first < lm) { for(; *--l < 0;) { } next |= 4; }
8020c16b537SWarner Losh           next |= 1;
8030c16b537SWarner Losh         } else if(first < lm) {
8040c16b537SWarner Losh           for(; *r < 0; ++r) { }
8050c16b537SWarner Losh           next |= 2;
8060c16b537SWarner Losh         }
8070c16b537SWarner Losh       }
8080c16b537SWarner Losh 
8090c16b537SWarner Losh       if((l - first) <= (last - r)) {
8100c16b537SWarner Losh         STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
8110c16b537SWarner Losh         middle = lm, last = l, check = (check & 3) | (next & 4);
8120c16b537SWarner Losh       } else {
8130c16b537SWarner Losh         if((next & 2) && (r == middle)) { next ^= 6; }
8140c16b537SWarner Losh         STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
8150c16b537SWarner Losh         first = r, middle = rm, check = (next & 3) | (check & 4);
8160c16b537SWarner Losh       }
8170c16b537SWarner Losh     } else {
8180c16b537SWarner Losh       if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
8190c16b537SWarner Losh         *middle = ~*middle;
8200c16b537SWarner Losh       }
8210c16b537SWarner Losh       MERGE_CHECK(first, last, check);
8220c16b537SWarner Losh       STACK_POP(first, middle, last, check);
8230c16b537SWarner Losh     }
8240c16b537SWarner Losh   }
8250c16b537SWarner Losh #undef STACK_SIZE
8260c16b537SWarner Losh }
8270c16b537SWarner Losh 
8280c16b537SWarner Losh #endif /* SS_BLOCKSIZE != 0 */
8290c16b537SWarner Losh 
8300c16b537SWarner Losh 
8310c16b537SWarner Losh /*---------------------------------------------------------------------------*/
8320c16b537SWarner Losh 
8330c16b537SWarner Losh /* Substring sort */
8340c16b537SWarner Losh static
8350c16b537SWarner Losh void
sssort(const unsigned char * T,const int * PA,int * first,int * last,int * buf,int bufsize,int depth,int n,int lastsuffix)8360c16b537SWarner Losh sssort(const unsigned char *T, const int *PA,
8370c16b537SWarner Losh        int *first, int *last,
8380c16b537SWarner Losh        int *buf, int bufsize,
8390c16b537SWarner Losh        int depth, int n, int lastsuffix) {
8400c16b537SWarner Losh   int *a;
8410c16b537SWarner Losh #if SS_BLOCKSIZE != 0
8420c16b537SWarner Losh   int *b, *middle, *curbuf;
8430c16b537SWarner Losh   int j, k, curbufsize, limit;
8440c16b537SWarner Losh #endif
8450c16b537SWarner Losh   int i;
8460c16b537SWarner Losh 
8470c16b537SWarner Losh   if(lastsuffix != 0) { ++first; }
8480c16b537SWarner Losh 
8490c16b537SWarner Losh #if SS_BLOCKSIZE == 0
8500c16b537SWarner Losh   ss_mintrosort(T, PA, first, last, depth);
8510c16b537SWarner Losh #else
8520c16b537SWarner Losh   if((bufsize < SS_BLOCKSIZE) &&
8530c16b537SWarner Losh       (bufsize < (last - first)) &&
8540c16b537SWarner Losh       (bufsize < (limit = ss_isqrt(last - first)))) {
8550c16b537SWarner Losh     if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
8560c16b537SWarner Losh     buf = middle = last - limit, bufsize = limit;
8570c16b537SWarner Losh   } else {
8580c16b537SWarner Losh     middle = last, limit = 0;
8590c16b537SWarner Losh   }
8600c16b537SWarner Losh   for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
8610c16b537SWarner Losh #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
8620c16b537SWarner Losh     ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
8630c16b537SWarner Losh #elif 1 < SS_BLOCKSIZE
8640c16b537SWarner Losh     ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
8650c16b537SWarner Losh #endif
8660c16b537SWarner Losh     curbufsize = last - (a + SS_BLOCKSIZE);
8670c16b537SWarner Losh     curbuf = a + SS_BLOCKSIZE;
8680c16b537SWarner Losh     if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
8690c16b537SWarner Losh     for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
8700c16b537SWarner Losh       ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
8710c16b537SWarner Losh     }
8720c16b537SWarner Losh   }
8730c16b537SWarner Losh #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
8740c16b537SWarner Losh   ss_mintrosort(T, PA, a, middle, depth);
8750c16b537SWarner Losh #elif 1 < SS_BLOCKSIZE
8760c16b537SWarner Losh   ss_insertionsort(T, PA, a, middle, depth);
8770c16b537SWarner Losh #endif
8780c16b537SWarner Losh   for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
8790c16b537SWarner Losh     if(i & 1) {
8800c16b537SWarner Losh       ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
8810c16b537SWarner Losh       a -= k;
8820c16b537SWarner Losh     }
8830c16b537SWarner Losh   }
8840c16b537SWarner Losh   if(limit != 0) {
8850c16b537SWarner Losh #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
8860c16b537SWarner Losh     ss_mintrosort(T, PA, middle, last, depth);
8870c16b537SWarner Losh #elif 1 < SS_BLOCKSIZE
8880c16b537SWarner Losh     ss_insertionsort(T, PA, middle, last, depth);
8890c16b537SWarner Losh #endif
8900c16b537SWarner Losh     ss_inplacemerge(T, PA, first, middle, last, depth);
8910c16b537SWarner Losh   }
8920c16b537SWarner Losh #endif
8930c16b537SWarner Losh 
8940c16b537SWarner Losh   if(lastsuffix != 0) {
8950c16b537SWarner Losh     /* Insert last type B* suffix. */
8960c16b537SWarner Losh     int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
8970c16b537SWarner Losh     for(a = first, i = *(first - 1);
8980c16b537SWarner Losh         (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
8990c16b537SWarner Losh         ++a) {
9000c16b537SWarner Losh       *(a - 1) = *a;
9010c16b537SWarner Losh     }
9020c16b537SWarner Losh     *(a - 1) = i;
9030c16b537SWarner Losh   }
9040c16b537SWarner Losh }
9050c16b537SWarner Losh 
9060c16b537SWarner Losh 
9070c16b537SWarner Losh /*---------------------------------------------------------------------------*/
9080c16b537SWarner Losh 
9090c16b537SWarner Losh static INLINE
9100c16b537SWarner Losh int
tr_ilg(int n)9110c16b537SWarner Losh tr_ilg(int n) {
9120c16b537SWarner Losh   return (n & 0xffff0000) ?
9130c16b537SWarner Losh           ((n & 0xff000000) ?
9140c16b537SWarner Losh             24 + lg_table[(n >> 24) & 0xff] :
9150c16b537SWarner Losh             16 + lg_table[(n >> 16) & 0xff]) :
9160c16b537SWarner Losh           ((n & 0x0000ff00) ?
9170c16b537SWarner Losh              8 + lg_table[(n >>  8) & 0xff] :
9180c16b537SWarner Losh              0 + lg_table[(n >>  0) & 0xff]);
9190c16b537SWarner Losh }
9200c16b537SWarner Losh 
9210c16b537SWarner Losh 
9220c16b537SWarner Losh /*---------------------------------------------------------------------------*/
9230c16b537SWarner Losh 
9240c16b537SWarner Losh /* Simple insertionsort for small size groups. */
9250c16b537SWarner Losh static
9260c16b537SWarner Losh void
tr_insertionsort(const int * ISAd,int * first,int * last)9270c16b537SWarner Losh tr_insertionsort(const int *ISAd, int *first, int *last) {
9280c16b537SWarner Losh   int *a, *b;
9290c16b537SWarner Losh   int t, r;
9300c16b537SWarner Losh 
9310c16b537SWarner Losh   for(a = first + 1; a < last; ++a) {
9320c16b537SWarner Losh     for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
9330c16b537SWarner Losh       do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
9340c16b537SWarner Losh       if(b < first) { break; }
9350c16b537SWarner Losh     }
9360c16b537SWarner Losh     if(r == 0) { *b = ~*b; }
9370c16b537SWarner Losh     *(b + 1) = t;
9380c16b537SWarner Losh   }
9390c16b537SWarner Losh }
9400c16b537SWarner Losh 
9410c16b537SWarner Losh 
9420c16b537SWarner Losh /*---------------------------------------------------------------------------*/
9430c16b537SWarner Losh 
9440c16b537SWarner Losh static INLINE
9450c16b537SWarner Losh void
tr_fixdown(const int * ISAd,int * SA,int i,int size)9460c16b537SWarner Losh tr_fixdown(const int *ISAd, int *SA, int i, int size) {
9470c16b537SWarner Losh   int j, k;
9480c16b537SWarner Losh   int v;
9490c16b537SWarner Losh   int c, d, e;
9500c16b537SWarner Losh 
9510c16b537SWarner Losh   for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
9520c16b537SWarner Losh     d = ISAd[SA[k = j++]];
9530c16b537SWarner Losh     if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
9540c16b537SWarner Losh     if(d <= c) { break; }
9550c16b537SWarner Losh   }
9560c16b537SWarner Losh   SA[i] = v;
9570c16b537SWarner Losh }
9580c16b537SWarner Losh 
9590c16b537SWarner Losh /* Simple top-down heapsort. */
9600c16b537SWarner Losh static
9610c16b537SWarner Losh void
tr_heapsort(const int * ISAd,int * SA,int size)9620c16b537SWarner Losh tr_heapsort(const int *ISAd, int *SA, int size) {
9630c16b537SWarner Losh   int i, m;
9640c16b537SWarner Losh   int t;
9650c16b537SWarner Losh 
9660c16b537SWarner Losh   m = size;
9670c16b537SWarner Losh   if((size % 2) == 0) {
9680c16b537SWarner Losh     m--;
9690c16b537SWarner Losh     if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
9700c16b537SWarner Losh   }
9710c16b537SWarner Losh 
9720c16b537SWarner Losh   for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
9730c16b537SWarner Losh   if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
9740c16b537SWarner Losh   for(i = m - 1; 0 < i; --i) {
9750c16b537SWarner Losh     t = SA[0], SA[0] = SA[i];
9760c16b537SWarner Losh     tr_fixdown(ISAd, SA, 0, i);
9770c16b537SWarner Losh     SA[i] = t;
9780c16b537SWarner Losh   }
9790c16b537SWarner Losh }
9800c16b537SWarner Losh 
9810c16b537SWarner Losh 
9820c16b537SWarner Losh /*---------------------------------------------------------------------------*/
9830c16b537SWarner Losh 
9840c16b537SWarner Losh /* Returns the median of three elements. */
9850c16b537SWarner Losh static INLINE
9860c16b537SWarner Losh int *
tr_median3(const int * ISAd,int * v1,int * v2,int * v3)9870c16b537SWarner Losh tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
9880c16b537SWarner Losh   int *t;
9890c16b537SWarner Losh   if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
9900c16b537SWarner Losh   if(ISAd[*v2] > ISAd[*v3]) {
9910c16b537SWarner Losh     if(ISAd[*v1] > ISAd[*v3]) { return v1; }
9920c16b537SWarner Losh     else { return v3; }
9930c16b537SWarner Losh   }
9940c16b537SWarner Losh   return v2;
9950c16b537SWarner Losh }
9960c16b537SWarner Losh 
9970c16b537SWarner Losh /* Returns the median of five elements. */
9980c16b537SWarner Losh static INLINE
9990c16b537SWarner Losh int *
tr_median5(const int * ISAd,int * v1,int * v2,int * v3,int * v4,int * v5)10000c16b537SWarner Losh tr_median5(const int *ISAd,
10010c16b537SWarner Losh            int *v1, int *v2, int *v3, int *v4, int *v5) {
10020c16b537SWarner Losh   int *t;
10030c16b537SWarner Losh   if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
10040c16b537SWarner Losh   if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
10050c16b537SWarner Losh   if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
10060c16b537SWarner Losh   if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
10070c16b537SWarner Losh   if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
10080c16b537SWarner Losh   if(ISAd[*v3] > ISAd[*v4]) { return v4; }
10090c16b537SWarner Losh   return v3;
10100c16b537SWarner Losh }
10110c16b537SWarner Losh 
10120c16b537SWarner Losh /* Returns the pivot element. */
10130c16b537SWarner Losh static INLINE
10140c16b537SWarner Losh int *
tr_pivot(const int * ISAd,int * first,int * last)10150c16b537SWarner Losh tr_pivot(const int *ISAd, int *first, int *last) {
10160c16b537SWarner Losh   int *middle;
10170c16b537SWarner Losh   int t;
10180c16b537SWarner Losh 
10190c16b537SWarner Losh   t = last - first;
10200c16b537SWarner Losh   middle = first + t / 2;
10210c16b537SWarner Losh 
10220c16b537SWarner Losh   if(t <= 512) {
10230c16b537SWarner Losh     if(t <= 32) {
10240c16b537SWarner Losh       return tr_median3(ISAd, first, middle, last - 1);
10250c16b537SWarner Losh     } else {
10260c16b537SWarner Losh       t >>= 2;
10270c16b537SWarner Losh       return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
10280c16b537SWarner Losh     }
10290c16b537SWarner Losh   }
10300c16b537SWarner Losh   t >>= 3;
10310c16b537SWarner Losh   first  = tr_median3(ISAd, first, first + t, first + (t << 1));
10320c16b537SWarner Losh   middle = tr_median3(ISAd, middle - t, middle, middle + t);
10330c16b537SWarner Losh   last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
10340c16b537SWarner Losh   return tr_median3(ISAd, first, middle, last);
10350c16b537SWarner Losh }
10360c16b537SWarner Losh 
10370c16b537SWarner Losh 
10380c16b537SWarner Losh /*---------------------------------------------------------------------------*/
10390c16b537SWarner Losh 
10400c16b537SWarner Losh typedef struct _trbudget_t trbudget_t;
10410c16b537SWarner Losh struct _trbudget_t {
10420c16b537SWarner Losh   int chance;
10430c16b537SWarner Losh   int remain;
10440c16b537SWarner Losh   int incval;
10450c16b537SWarner Losh   int count;
10460c16b537SWarner Losh };
10470c16b537SWarner Losh 
10480c16b537SWarner Losh static INLINE
10490c16b537SWarner Losh void
trbudget_init(trbudget_t * budget,int chance,int incval)10500c16b537SWarner Losh trbudget_init(trbudget_t *budget, int chance, int incval) {
10510c16b537SWarner Losh   budget->chance = chance;
10520c16b537SWarner Losh   budget->remain = budget->incval = incval;
10530c16b537SWarner Losh }
10540c16b537SWarner Losh 
10550c16b537SWarner Losh static INLINE
10560c16b537SWarner Losh int
trbudget_check(trbudget_t * budget,int size)10570c16b537SWarner Losh trbudget_check(trbudget_t *budget, int size) {
10580c16b537SWarner Losh   if(size <= budget->remain) { budget->remain -= size; return 1; }
10590c16b537SWarner Losh   if(budget->chance == 0) { budget->count += size; return 0; }
10600c16b537SWarner Losh   budget->remain += budget->incval - size;
10610c16b537SWarner Losh   budget->chance -= 1;
10620c16b537SWarner Losh   return 1;
10630c16b537SWarner Losh }
10640c16b537SWarner Losh 
10650c16b537SWarner Losh 
10660c16b537SWarner Losh /*---------------------------------------------------------------------------*/
10670c16b537SWarner Losh 
10680c16b537SWarner Losh static INLINE
10690c16b537SWarner Losh void
tr_partition(const int * ISAd,int * first,int * middle,int * last,int ** pa,int ** pb,int v)10700c16b537SWarner Losh tr_partition(const int *ISAd,
10710c16b537SWarner Losh              int *first, int *middle, int *last,
10720c16b537SWarner Losh              int **pa, int **pb, int v) {
10730c16b537SWarner Losh   int *a, *b, *c, *d, *e, *f;
10740c16b537SWarner Losh   int t, s;
10750c16b537SWarner Losh   int x = 0;
10760c16b537SWarner Losh 
10770c16b537SWarner Losh   for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
10780c16b537SWarner Losh   if(((a = b) < last) && (x < v)) {
10790c16b537SWarner Losh     for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
10800c16b537SWarner Losh       if(x == v) { SWAP(*b, *a); ++a; }
10810c16b537SWarner Losh     }
10820c16b537SWarner Losh   }
10830c16b537SWarner Losh   for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
10840c16b537SWarner Losh   if((b < (d = c)) && (x > v)) {
10850c16b537SWarner Losh     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
10860c16b537SWarner Losh       if(x == v) { SWAP(*c, *d); --d; }
10870c16b537SWarner Losh     }
10880c16b537SWarner Losh   }
10890c16b537SWarner Losh   for(; b < c;) {
10900c16b537SWarner Losh     SWAP(*b, *c);
10910c16b537SWarner Losh     for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
10920c16b537SWarner Losh       if(x == v) { SWAP(*b, *a); ++a; }
10930c16b537SWarner Losh     }
10940c16b537SWarner Losh     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
10950c16b537SWarner Losh       if(x == v) { SWAP(*c, *d); --d; }
10960c16b537SWarner Losh     }
10970c16b537SWarner Losh   }
10980c16b537SWarner Losh 
10990c16b537SWarner Losh   if(a <= d) {
11000c16b537SWarner Losh     c = b - 1;
11010c16b537SWarner Losh     if((s = a - first) > (t = b - a)) { s = t; }
11020c16b537SWarner Losh     for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
11030c16b537SWarner Losh     if((s = d - c) > (t = last - d - 1)) { s = t; }
11040c16b537SWarner Losh     for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
11050c16b537SWarner Losh     first += (b - a), last -= (d - c);
11060c16b537SWarner Losh   }
11070c16b537SWarner Losh   *pa = first, *pb = last;
11080c16b537SWarner Losh }
11090c16b537SWarner Losh 
11100c16b537SWarner Losh static
11110c16b537SWarner Losh void
tr_copy(int * ISA,const int * SA,int * first,int * a,int * b,int * last,int depth)11120c16b537SWarner Losh tr_copy(int *ISA, const int *SA,
11130c16b537SWarner Losh         int *first, int *a, int *b, int *last,
11140c16b537SWarner Losh         int depth) {
11150c16b537SWarner Losh   /* sort suffixes of middle partition
11160c16b537SWarner Losh      by using sorted order of suffixes of left and right partition. */
11170c16b537SWarner Losh   int *c, *d, *e;
11180c16b537SWarner Losh   int s, v;
11190c16b537SWarner Losh 
11200c16b537SWarner Losh   v = b - SA - 1;
11210c16b537SWarner Losh   for(c = first, d = a - 1; c <= d; ++c) {
11220c16b537SWarner Losh     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
11230c16b537SWarner Losh       *++d = s;
11240c16b537SWarner Losh       ISA[s] = d - SA;
11250c16b537SWarner Losh     }
11260c16b537SWarner Losh   }
11270c16b537SWarner Losh   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
11280c16b537SWarner Losh     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
11290c16b537SWarner Losh       *--d = s;
11300c16b537SWarner Losh       ISA[s] = d - SA;
11310c16b537SWarner Losh     }
11320c16b537SWarner Losh   }
11330c16b537SWarner Losh }
11340c16b537SWarner Losh 
11350c16b537SWarner Losh static
11360c16b537SWarner Losh void
tr_partialcopy(int * ISA,const int * SA,int * first,int * a,int * b,int * last,int depth)11370c16b537SWarner Losh tr_partialcopy(int *ISA, const int *SA,
11380c16b537SWarner Losh                int *first, int *a, int *b, int *last,
11390c16b537SWarner Losh                int depth) {
11400c16b537SWarner Losh   int *c, *d, *e;
11410c16b537SWarner Losh   int s, v;
11420c16b537SWarner Losh   int rank, lastrank, newrank = -1;
11430c16b537SWarner Losh 
11440c16b537SWarner Losh   v = b - SA - 1;
11450c16b537SWarner Losh   lastrank = -1;
11460c16b537SWarner Losh   for(c = first, d = a - 1; c <= d; ++c) {
11470c16b537SWarner Losh     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
11480c16b537SWarner Losh       *++d = s;
11490c16b537SWarner Losh       rank = ISA[s + depth];
11500c16b537SWarner Losh       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
11510c16b537SWarner Losh       ISA[s] = newrank;
11520c16b537SWarner Losh     }
11530c16b537SWarner Losh   }
11540c16b537SWarner Losh 
11550c16b537SWarner Losh   lastrank = -1;
11560c16b537SWarner Losh   for(e = d; first <= e; --e) {
11570c16b537SWarner Losh     rank = ISA[*e];
11580c16b537SWarner Losh     if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
11590c16b537SWarner Losh     if(newrank != rank) { ISA[*e] = newrank; }
11600c16b537SWarner Losh   }
11610c16b537SWarner Losh 
11620c16b537SWarner Losh   lastrank = -1;
11630c16b537SWarner Losh   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
11640c16b537SWarner Losh     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
11650c16b537SWarner Losh       *--d = s;
11660c16b537SWarner Losh       rank = ISA[s + depth];
11670c16b537SWarner Losh       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
11680c16b537SWarner Losh       ISA[s] = newrank;
11690c16b537SWarner Losh     }
11700c16b537SWarner Losh   }
11710c16b537SWarner Losh }
11720c16b537SWarner Losh 
11730c16b537SWarner Losh static
11740c16b537SWarner Losh void
tr_introsort(int * ISA,const int * ISAd,int * SA,int * first,int * last,trbudget_t * budget)11750c16b537SWarner Losh tr_introsort(int *ISA, const int *ISAd,
11760c16b537SWarner Losh              int *SA, int *first, int *last,
11770c16b537SWarner Losh              trbudget_t *budget) {
11780c16b537SWarner Losh #define STACK_SIZE TR_STACKSIZE
11790c16b537SWarner Losh   struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
11800c16b537SWarner Losh   int *a, *b, *c;
11810c16b537SWarner Losh   int t;
11820c16b537SWarner Losh   int v, x = 0;
11830c16b537SWarner Losh   int incr = ISAd - ISA;
11840c16b537SWarner Losh   int limit, next;
11850c16b537SWarner Losh   int ssize, trlink = -1;
11860c16b537SWarner Losh 
11870c16b537SWarner Losh   for(ssize = 0, limit = tr_ilg(last - first);;) {
11880c16b537SWarner Losh 
11890c16b537SWarner Losh     if(limit < 0) {
11900c16b537SWarner Losh       if(limit == -1) {
11910c16b537SWarner Losh         /* tandem repeat partition */
11920c16b537SWarner Losh         tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
11930c16b537SWarner Losh 
11940c16b537SWarner Losh         /* update ranks */
11950c16b537SWarner Losh         if(a < last) {
11960c16b537SWarner Losh           for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
11970c16b537SWarner Losh         }
11980c16b537SWarner Losh         if(b < last) {
11990c16b537SWarner Losh           for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
12000c16b537SWarner Losh         }
12010c16b537SWarner Losh 
12020c16b537SWarner Losh         /* push */
12030c16b537SWarner Losh         if(1 < (b - a)) {
12040c16b537SWarner Losh           STACK_PUSH5(NULL, a, b, 0, 0);
12050c16b537SWarner Losh           STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
12060c16b537SWarner Losh           trlink = ssize - 2;
12070c16b537SWarner Losh         }
12080c16b537SWarner Losh         if((a - first) <= (last - b)) {
12090c16b537SWarner Losh           if(1 < (a - first)) {
12100c16b537SWarner Losh             STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
12110c16b537SWarner Losh             last = a, limit = tr_ilg(a - first);
12120c16b537SWarner Losh           } else if(1 < (last - b)) {
12130c16b537SWarner Losh             first = b, limit = tr_ilg(last - b);
12140c16b537SWarner Losh           } else {
12150c16b537SWarner Losh             STACK_POP5(ISAd, first, last, limit, trlink);
12160c16b537SWarner Losh           }
12170c16b537SWarner Losh         } else {
12180c16b537SWarner Losh           if(1 < (last - b)) {
12190c16b537SWarner Losh             STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
12200c16b537SWarner Losh             first = b, limit = tr_ilg(last - b);
12210c16b537SWarner Losh           } else if(1 < (a - first)) {
12220c16b537SWarner Losh             last = a, limit = tr_ilg(a - first);
12230c16b537SWarner Losh           } else {
12240c16b537SWarner Losh             STACK_POP5(ISAd, first, last, limit, trlink);
12250c16b537SWarner Losh           }
12260c16b537SWarner Losh         }
12270c16b537SWarner Losh       } else if(limit == -2) {
12280c16b537SWarner Losh         /* tandem repeat copy */
12290c16b537SWarner Losh         a = stack[--ssize].b, b = stack[ssize].c;
12300c16b537SWarner Losh         if(stack[ssize].d == 0) {
12310c16b537SWarner Losh           tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
12320c16b537SWarner Losh         } else {
12330c16b537SWarner Losh           if(0 <= trlink) { stack[trlink].d = -1; }
12340c16b537SWarner Losh           tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
12350c16b537SWarner Losh         }
12360c16b537SWarner Losh         STACK_POP5(ISAd, first, last, limit, trlink);
12370c16b537SWarner Losh       } else {
12380c16b537SWarner Losh         /* sorted partition */
12390c16b537SWarner Losh         if(0 <= *first) {
12400c16b537SWarner Losh           a = first;
12410c16b537SWarner Losh           do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
12420c16b537SWarner Losh           first = a;
12430c16b537SWarner Losh         }
12440c16b537SWarner Losh         if(first < last) {
12450c16b537SWarner Losh           a = first; do { *a = ~*a; } while(*++a < 0);
12460c16b537SWarner Losh           next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
12470c16b537SWarner Losh           if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
12480c16b537SWarner Losh 
12490c16b537SWarner Losh           /* push */
12500c16b537SWarner Losh           if(trbudget_check(budget, a - first)) {
12510c16b537SWarner Losh             if((a - first) <= (last - a)) {
12520c16b537SWarner Losh               STACK_PUSH5(ISAd, a, last, -3, trlink);
12530c16b537SWarner Losh               ISAd += incr, last = a, limit = next;
12540c16b537SWarner Losh             } else {
12550c16b537SWarner Losh               if(1 < (last - a)) {
12560c16b537SWarner Losh                 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
12570c16b537SWarner Losh                 first = a, limit = -3;
12580c16b537SWarner Losh               } else {
12590c16b537SWarner Losh                 ISAd += incr, last = a, limit = next;
12600c16b537SWarner Losh               }
12610c16b537SWarner Losh             }
12620c16b537SWarner Losh           } else {
12630c16b537SWarner Losh             if(0 <= trlink) { stack[trlink].d = -1; }
12640c16b537SWarner Losh             if(1 < (last - a)) {
12650c16b537SWarner Losh               first = a, limit = -3;
12660c16b537SWarner Losh             } else {
12670c16b537SWarner Losh               STACK_POP5(ISAd, first, last, limit, trlink);
12680c16b537SWarner Losh             }
12690c16b537SWarner Losh           }
12700c16b537SWarner Losh         } else {
12710c16b537SWarner Losh           STACK_POP5(ISAd, first, last, limit, trlink);
12720c16b537SWarner Losh         }
12730c16b537SWarner Losh       }
12740c16b537SWarner Losh       continue;
12750c16b537SWarner Losh     }
12760c16b537SWarner Losh 
12770c16b537SWarner Losh     if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
12780c16b537SWarner Losh       tr_insertionsort(ISAd, first, last);
12790c16b537SWarner Losh       limit = -3;
12800c16b537SWarner Losh       continue;
12810c16b537SWarner Losh     }
12820c16b537SWarner Losh 
12830c16b537SWarner Losh     if(limit-- == 0) {
12840c16b537SWarner Losh       tr_heapsort(ISAd, first, last - first);
12850c16b537SWarner Losh       for(a = last - 1; first < a; a = b) {
12860c16b537SWarner Losh         for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
12870c16b537SWarner Losh       }
12880c16b537SWarner Losh       limit = -3;
12890c16b537SWarner Losh       continue;
12900c16b537SWarner Losh     }
12910c16b537SWarner Losh 
12920c16b537SWarner Losh     /* choose pivot */
12930c16b537SWarner Losh     a = tr_pivot(ISAd, first, last);
12940c16b537SWarner Losh     SWAP(*first, *a);
12950c16b537SWarner Losh     v = ISAd[*first];
12960c16b537SWarner Losh 
12970c16b537SWarner Losh     /* partition */
12980c16b537SWarner Losh     tr_partition(ISAd, first, first + 1, last, &a, &b, v);
12990c16b537SWarner Losh     if((last - first) != (b - a)) {
13000c16b537SWarner Losh       next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
13010c16b537SWarner Losh 
13020c16b537SWarner Losh       /* update ranks */
13030c16b537SWarner Losh       for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
13040c16b537SWarner Losh       if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
13050c16b537SWarner Losh 
13060c16b537SWarner Losh       /* push */
13070c16b537SWarner Losh       if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
13080c16b537SWarner Losh         if((a - first) <= (last - b)) {
13090c16b537SWarner Losh           if((last - b) <= (b - a)) {
13100c16b537SWarner Losh             if(1 < (a - first)) {
13110c16b537SWarner Losh               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
13120c16b537SWarner Losh               STACK_PUSH5(ISAd, b, last, limit, trlink);
13130c16b537SWarner Losh               last = a;
13140c16b537SWarner Losh             } else if(1 < (last - b)) {
13150c16b537SWarner Losh               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
13160c16b537SWarner Losh               first = b;
13170c16b537SWarner Losh             } else {
13180c16b537SWarner Losh               ISAd += incr, first = a, last = b, limit = next;
13190c16b537SWarner Losh             }
13200c16b537SWarner Losh           } else if((a - first) <= (b - a)) {
13210c16b537SWarner Losh             if(1 < (a - first)) {
13220c16b537SWarner Losh               STACK_PUSH5(ISAd, b, last, limit, trlink);
13230c16b537SWarner Losh               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
13240c16b537SWarner Losh               last = a;
13250c16b537SWarner Losh             } else {
13260c16b537SWarner Losh               STACK_PUSH5(ISAd, b, last, limit, trlink);
13270c16b537SWarner Losh               ISAd += incr, first = a, last = b, limit = next;
13280c16b537SWarner Losh             }
13290c16b537SWarner Losh           } else {
13300c16b537SWarner Losh             STACK_PUSH5(ISAd, b, last, limit, trlink);
13310c16b537SWarner Losh             STACK_PUSH5(ISAd, first, a, limit, trlink);
13320c16b537SWarner Losh             ISAd += incr, first = a, last = b, limit = next;
13330c16b537SWarner Losh           }
13340c16b537SWarner Losh         } else {
13350c16b537SWarner Losh           if((a - first) <= (b - a)) {
13360c16b537SWarner Losh             if(1 < (last - b)) {
13370c16b537SWarner Losh               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
13380c16b537SWarner Losh               STACK_PUSH5(ISAd, first, a, limit, trlink);
13390c16b537SWarner Losh               first = b;
13400c16b537SWarner Losh             } else if(1 < (a - first)) {
13410c16b537SWarner Losh               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
13420c16b537SWarner Losh               last = a;
13430c16b537SWarner Losh             } else {
13440c16b537SWarner Losh               ISAd += incr, first = a, last = b, limit = next;
13450c16b537SWarner Losh             }
13460c16b537SWarner Losh           } else if((last - b) <= (b - a)) {
13470c16b537SWarner Losh             if(1 < (last - b)) {
13480c16b537SWarner Losh               STACK_PUSH5(ISAd, first, a, limit, trlink);
13490c16b537SWarner Losh               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
13500c16b537SWarner Losh               first = b;
13510c16b537SWarner Losh             } else {
13520c16b537SWarner Losh               STACK_PUSH5(ISAd, first, a, limit, trlink);
13530c16b537SWarner Losh               ISAd += incr, first = a, last = b, limit = next;
13540c16b537SWarner Losh             }
13550c16b537SWarner Losh           } else {
13560c16b537SWarner Losh             STACK_PUSH5(ISAd, first, a, limit, trlink);
13570c16b537SWarner Losh             STACK_PUSH5(ISAd, b, last, limit, trlink);
13580c16b537SWarner Losh             ISAd += incr, first = a, last = b, limit = next;
13590c16b537SWarner Losh           }
13600c16b537SWarner Losh         }
13610c16b537SWarner Losh       } else {
13620c16b537SWarner Losh         if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
13630c16b537SWarner Losh         if((a - first) <= (last - b)) {
13640c16b537SWarner Losh           if(1 < (a - first)) {
13650c16b537SWarner Losh             STACK_PUSH5(ISAd, b, last, limit, trlink);
13660c16b537SWarner Losh             last = a;
13670c16b537SWarner Losh           } else if(1 < (last - b)) {
13680c16b537SWarner Losh             first = b;
13690c16b537SWarner Losh           } else {
13700c16b537SWarner Losh             STACK_POP5(ISAd, first, last, limit, trlink);
13710c16b537SWarner Losh           }
13720c16b537SWarner Losh         } else {
13730c16b537SWarner Losh           if(1 < (last - b)) {
13740c16b537SWarner Losh             STACK_PUSH5(ISAd, first, a, limit, trlink);
13750c16b537SWarner Losh             first = b;
13760c16b537SWarner Losh           } else if(1 < (a - first)) {
13770c16b537SWarner Losh             last = a;
13780c16b537SWarner Losh           } else {
13790c16b537SWarner Losh             STACK_POP5(ISAd, first, last, limit, trlink);
13800c16b537SWarner Losh           }
13810c16b537SWarner Losh         }
13820c16b537SWarner Losh       }
13830c16b537SWarner Losh     } else {
13840c16b537SWarner Losh       if(trbudget_check(budget, last - first)) {
13850c16b537SWarner Losh         limit = tr_ilg(last - first), ISAd += incr;
13860c16b537SWarner Losh       } else {
13870c16b537SWarner Losh         if(0 <= trlink) { stack[trlink].d = -1; }
13880c16b537SWarner Losh         STACK_POP5(ISAd, first, last, limit, trlink);
13890c16b537SWarner Losh       }
13900c16b537SWarner Losh     }
13910c16b537SWarner Losh   }
13920c16b537SWarner Losh #undef STACK_SIZE
13930c16b537SWarner Losh }
13940c16b537SWarner Losh 
13950c16b537SWarner Losh 
13960c16b537SWarner Losh 
13970c16b537SWarner Losh /*---------------------------------------------------------------------------*/
13980c16b537SWarner Losh 
13990c16b537SWarner Losh /* Tandem repeat sort */
14000c16b537SWarner Losh static
14010c16b537SWarner Losh void
trsort(int * ISA,int * SA,int n,int depth)14020c16b537SWarner Losh trsort(int *ISA, int *SA, int n, int depth) {
14030c16b537SWarner Losh   int *ISAd;
14040c16b537SWarner Losh   int *first, *last;
14050c16b537SWarner Losh   trbudget_t budget;
14060c16b537SWarner Losh   int t, skip, unsorted;
14070c16b537SWarner Losh 
14080c16b537SWarner Losh   trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
14090c16b537SWarner Losh /*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
14100c16b537SWarner Losh   for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
14110c16b537SWarner Losh     first = SA;
14120c16b537SWarner Losh     skip = 0;
14130c16b537SWarner Losh     unsorted = 0;
14140c16b537SWarner Losh     do {
14150c16b537SWarner Losh       if((t = *first) < 0) { first -= t; skip += t; }
14160c16b537SWarner Losh       else {
14170c16b537SWarner Losh         if(skip != 0) { *(first + skip) = skip; skip = 0; }
14180c16b537SWarner Losh         last = SA + ISA[t] + 1;
14190c16b537SWarner Losh         if(1 < (last - first)) {
14200c16b537SWarner Losh           budget.count = 0;
14210c16b537SWarner Losh           tr_introsort(ISA, ISAd, SA, first, last, &budget);
14220c16b537SWarner Losh           if(budget.count != 0) { unsorted += budget.count; }
14230c16b537SWarner Losh           else { skip = first - last; }
14240c16b537SWarner Losh         } else if((last - first) == 1) {
14250c16b537SWarner Losh           skip = -1;
14260c16b537SWarner Losh         }
14270c16b537SWarner Losh         first = last;
14280c16b537SWarner Losh       }
14290c16b537SWarner Losh     } while(first < (SA + n));
14300c16b537SWarner Losh     if(skip != 0) { *(first + skip) = skip; }
14310c16b537SWarner Losh     if(unsorted == 0) { break; }
14320c16b537SWarner Losh   }
14330c16b537SWarner Losh }
14340c16b537SWarner Losh 
14350c16b537SWarner Losh 
14360c16b537SWarner Losh /*---------------------------------------------------------------------------*/
14370c16b537SWarner Losh 
14380c16b537SWarner Losh /* Sorts suffixes of type B*. */
14390c16b537SWarner Losh static
14400c16b537SWarner Losh int
sort_typeBstar(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int openMP)14410c16b537SWarner Losh sort_typeBstar(const unsigned char *T, int *SA,
14420c16b537SWarner Losh                int *bucket_A, int *bucket_B,
14430c16b537SWarner Losh                int n, int openMP) {
14440c16b537SWarner Losh   int *PAb, *ISAb, *buf;
14450c16b537SWarner Losh #ifdef LIBBSC_OPENMP
14460c16b537SWarner Losh   int *curbuf;
14470c16b537SWarner Losh   int l;
14480c16b537SWarner Losh #endif
14490c16b537SWarner Losh   int i, j, k, t, m, bufsize;
14500c16b537SWarner Losh   int c0, c1;
14510c16b537SWarner Losh #ifdef LIBBSC_OPENMP
14520c16b537SWarner Losh   int d0, d1;
14530c16b537SWarner Losh #endif
14540c16b537SWarner Losh   (void)openMP;
14550c16b537SWarner Losh 
14560c16b537SWarner Losh   /* Initialize bucket arrays. */
14570c16b537SWarner Losh   for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
14580c16b537SWarner Losh   for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
14590c16b537SWarner Losh 
14600c16b537SWarner Losh   /* Count the number of occurrences of the first one or two characters of each
14610c16b537SWarner Losh      type A, B and B* suffix. Moreover, store the beginning position of all
14620c16b537SWarner Losh      type B* suffixes into the array SA. */
14630c16b537SWarner Losh   for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
14640c16b537SWarner Losh     /* type A suffix. */
14650c16b537SWarner Losh     do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
14660c16b537SWarner Losh     if(0 <= i) {
14670c16b537SWarner Losh       /* type B* suffix. */
14680c16b537SWarner Losh       ++BUCKET_BSTAR(c0, c1);
14690c16b537SWarner Losh       SA[--m] = i;
14700c16b537SWarner Losh       /* type B suffix. */
14710c16b537SWarner Losh       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
14720c16b537SWarner Losh         ++BUCKET_B(c0, c1);
14730c16b537SWarner Losh       }
14740c16b537SWarner Losh     }
14750c16b537SWarner Losh   }
14760c16b537SWarner Losh   m = n - m;
14770c16b537SWarner Losh /*
14780c16b537SWarner Losh note:
14790c16b537SWarner Losh   A type B* suffix is lexicographically smaller than a type B suffix that
14800c16b537SWarner Losh   begins with the same first two characters.
14810c16b537SWarner Losh */
14820c16b537SWarner Losh 
14830c16b537SWarner Losh   /* Calculate the index of start/end point of each bucket. */
14840c16b537SWarner Losh   for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
14850c16b537SWarner Losh     t = i + BUCKET_A(c0);
14860c16b537SWarner Losh     BUCKET_A(c0) = i + j; /* start point */
14870c16b537SWarner Losh     i = t + BUCKET_B(c0, c0);
14880c16b537SWarner Losh     for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
14890c16b537SWarner Losh       j += BUCKET_BSTAR(c0, c1);
14900c16b537SWarner Losh       BUCKET_BSTAR(c0, c1) = j; /* end point */
14910c16b537SWarner Losh       i += BUCKET_B(c0, c1);
14920c16b537SWarner Losh     }
14930c16b537SWarner Losh   }
14940c16b537SWarner Losh 
14950c16b537SWarner Losh   if(0 < m) {
14960c16b537SWarner Losh     /* Sort the type B* suffixes by their first two characters. */
14970c16b537SWarner Losh     PAb = SA + n - m; ISAb = SA + m;
14980c16b537SWarner Losh     for(i = m - 2; 0 <= i; --i) {
14990c16b537SWarner Losh       t = PAb[i], c0 = T[t], c1 = T[t + 1];
15000c16b537SWarner Losh       SA[--BUCKET_BSTAR(c0, c1)] = i;
15010c16b537SWarner Losh     }
15020c16b537SWarner Losh     t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
15030c16b537SWarner Losh     SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
15040c16b537SWarner Losh 
15050c16b537SWarner Losh     /* Sort the type B* substrings using sssort. */
15060c16b537SWarner Losh #ifdef LIBBSC_OPENMP
15070c16b537SWarner Losh     if (openMP)
15080c16b537SWarner Losh     {
15090c16b537SWarner Losh         buf = SA + m;
15100c16b537SWarner Losh         c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
15110c16b537SWarner Losh #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
15120c16b537SWarner Losh         {
15130c16b537SWarner Losh           bufsize = (n - (2 * m)) / omp_get_num_threads();
15140c16b537SWarner Losh           curbuf = buf + omp_get_thread_num() * bufsize;
15150c16b537SWarner Losh           k = 0;
15160c16b537SWarner Losh           for(;;) {
15170c16b537SWarner Losh             #pragma omp critical(sssort_lock)
15180c16b537SWarner Losh             {
15190c16b537SWarner Losh               if(0 < (l = j)) {
15200c16b537SWarner Losh                 d0 = c0, d1 = c1;
15210c16b537SWarner Losh                 do {
15220c16b537SWarner Losh                   k = BUCKET_BSTAR(d0, d1);
15230c16b537SWarner Losh                   if(--d1 <= d0) {
15240c16b537SWarner Losh                     d1 = ALPHABET_SIZE - 1;
15250c16b537SWarner Losh                     if(--d0 < 0) { break; }
15260c16b537SWarner Losh                   }
15270c16b537SWarner Losh                 } while(((l - k) <= 1) && (0 < (l = k)));
15280c16b537SWarner Losh                 c0 = d0, c1 = d1, j = k;
15290c16b537SWarner Losh               }
15300c16b537SWarner Losh             }
15310c16b537SWarner Losh             if(l == 0) { break; }
15320c16b537SWarner Losh             sssort(T, PAb, SA + k, SA + l,
15330c16b537SWarner Losh                    curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
15340c16b537SWarner Losh           }
15350c16b537SWarner Losh         }
15360c16b537SWarner Losh     }
15370c16b537SWarner Losh     else
15380c16b537SWarner Losh     {
15390c16b537SWarner Losh         buf = SA + m, bufsize = n - (2 * m);
15400c16b537SWarner Losh         for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
15410c16b537SWarner Losh           for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
15420c16b537SWarner Losh             i = BUCKET_BSTAR(c0, c1);
15430c16b537SWarner Losh             if(1 < (j - i)) {
15440c16b537SWarner Losh               sssort(T, PAb, SA + i, SA + j,
15450c16b537SWarner Losh                      buf, bufsize, 2, n, *(SA + i) == (m - 1));
15460c16b537SWarner Losh             }
15470c16b537SWarner Losh           }
15480c16b537SWarner Losh         }
15490c16b537SWarner Losh     }
15500c16b537SWarner Losh #else
15510c16b537SWarner Losh     buf = SA + m, bufsize = n - (2 * m);
15520c16b537SWarner Losh     for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
15530c16b537SWarner Losh       for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
15540c16b537SWarner Losh         i = BUCKET_BSTAR(c0, c1);
15550c16b537SWarner Losh         if(1 < (j - i)) {
15560c16b537SWarner Losh           sssort(T, PAb, SA + i, SA + j,
15570c16b537SWarner Losh                  buf, bufsize, 2, n, *(SA + i) == (m - 1));
15580c16b537SWarner Losh         }
15590c16b537SWarner Losh       }
15600c16b537SWarner Losh     }
15610c16b537SWarner Losh #endif
15620c16b537SWarner Losh 
15630c16b537SWarner Losh     /* Compute ranks of type B* substrings. */
15640c16b537SWarner Losh     for(i = m - 1; 0 <= i; --i) {
15650c16b537SWarner Losh       if(0 <= SA[i]) {
15660c16b537SWarner Losh         j = i;
15670c16b537SWarner Losh         do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
15680c16b537SWarner Losh         SA[i + 1] = i - j;
15690c16b537SWarner Losh         if(i <= 0) { break; }
15700c16b537SWarner Losh       }
15710c16b537SWarner Losh       j = i;
15720c16b537SWarner Losh       do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
15730c16b537SWarner Losh       ISAb[SA[i]] = j;
15740c16b537SWarner Losh     }
15750c16b537SWarner Losh 
15760c16b537SWarner Losh     /* Construct the inverse suffix array of type B* suffixes using trsort. */
15770c16b537SWarner Losh     trsort(ISAb, SA, m, 1);
15780c16b537SWarner Losh 
15795ff13fbcSAllan Jude     /* Set the sorted order of type B* suffixes. */
15800c16b537SWarner Losh     for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
15810c16b537SWarner Losh       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
15820c16b537SWarner Losh       if(0 <= i) {
15830c16b537SWarner Losh         t = i;
15840c16b537SWarner Losh         for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
15850c16b537SWarner Losh         SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
15860c16b537SWarner Losh       }
15870c16b537SWarner Losh     }
15880c16b537SWarner Losh 
15890c16b537SWarner Losh     /* Calculate the index of start/end point of each bucket. */
15900c16b537SWarner Losh     BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
15910c16b537SWarner Losh     for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
15920c16b537SWarner Losh       i = BUCKET_A(c0 + 1) - 1;
15930c16b537SWarner Losh       for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
15940c16b537SWarner Losh         t = i - BUCKET_B(c0, c1);
15950c16b537SWarner Losh         BUCKET_B(c0, c1) = i; /* end point */
15960c16b537SWarner Losh 
15970c16b537SWarner Losh         /* Move all type B* suffixes to the correct position. */
15980c16b537SWarner Losh         for(i = t, j = BUCKET_BSTAR(c0, c1);
15990c16b537SWarner Losh             j <= k;
16000c16b537SWarner Losh             --i, --k) { SA[i] = SA[k]; }
16010c16b537SWarner Losh       }
16020c16b537SWarner Losh       BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
16030c16b537SWarner Losh       BUCKET_B(c0, c0) = i; /* end point */
16040c16b537SWarner Losh     }
16050c16b537SWarner Losh   }
16060c16b537SWarner Losh 
16070c16b537SWarner Losh   return m;
16080c16b537SWarner Losh }
16090c16b537SWarner Losh 
16100c16b537SWarner Losh /* Constructs the suffix array by using the sorted order of type B* suffixes. */
16110c16b537SWarner Losh static
16120c16b537SWarner Losh void
construct_SA(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m)16130c16b537SWarner Losh construct_SA(const unsigned char *T, int *SA,
16140c16b537SWarner Losh              int *bucket_A, int *bucket_B,
16150c16b537SWarner Losh              int n, int m) {
16160c16b537SWarner Losh   int *i, *j, *k;
16170c16b537SWarner Losh   int s;
16180c16b537SWarner Losh   int c0, c1, c2;
16190c16b537SWarner Losh 
16200c16b537SWarner Losh   if(0 < m) {
16210c16b537SWarner Losh     /* Construct the sorted order of type B suffixes by using
16220c16b537SWarner Losh        the sorted order of type B* suffixes. */
16230c16b537SWarner Losh     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
16240c16b537SWarner Losh       /* Scan the suffix array from right to left. */
16250c16b537SWarner Losh       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
16260c16b537SWarner Losh           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
16270c16b537SWarner Losh           i <= j;
16280c16b537SWarner Losh           --j) {
16290c16b537SWarner Losh         if(0 < (s = *j)) {
16300c16b537SWarner Losh           assert(T[s] == c1);
16310c16b537SWarner Losh           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
16320c16b537SWarner Losh           assert(T[s - 1] <= T[s]);
16330c16b537SWarner Losh           *j = ~s;
16340c16b537SWarner Losh           c0 = T[--s];
16350c16b537SWarner Losh           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
16360c16b537SWarner Losh           if(c0 != c2) {
16370c16b537SWarner Losh             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
16380c16b537SWarner Losh             k = SA + BUCKET_B(c2 = c0, c1);
16390c16b537SWarner Losh           }
16400f743729SConrad Meyer           assert(k < j); assert(k != NULL);
16410c16b537SWarner Losh           *k-- = s;
16420c16b537SWarner Losh         } else {
16430c16b537SWarner Losh           assert(((s == 0) && (T[s] == c1)) || (s < 0));
16440c16b537SWarner Losh           *j = ~s;
16450c16b537SWarner Losh         }
16460c16b537SWarner Losh       }
16470c16b537SWarner Losh     }
16480c16b537SWarner Losh   }
16490c16b537SWarner Losh 
16500c16b537SWarner Losh   /* Construct the suffix array by using
16510c16b537SWarner Losh      the sorted order of type B suffixes. */
16520c16b537SWarner Losh   k = SA + BUCKET_A(c2 = T[n - 1]);
16530c16b537SWarner Losh   *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
16540c16b537SWarner Losh   /* Scan the suffix array from left to right. */
16550c16b537SWarner Losh   for(i = SA, j = SA + n; i < j; ++i) {
16560c16b537SWarner Losh     if(0 < (s = *i)) {
16570c16b537SWarner Losh       assert(T[s - 1] >= T[s]);
16580c16b537SWarner Losh       c0 = T[--s];
16590c16b537SWarner Losh       if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
16600c16b537SWarner Losh       if(c0 != c2) {
16610c16b537SWarner Losh         BUCKET_A(c2) = k - SA;
16620c16b537SWarner Losh         k = SA + BUCKET_A(c2 = c0);
16630c16b537SWarner Losh       }
16640c16b537SWarner Losh       assert(i < k);
16650c16b537SWarner Losh       *k++ = s;
16660c16b537SWarner Losh     } else {
16670c16b537SWarner Losh       assert(s < 0);
16680c16b537SWarner Losh       *i = ~s;
16690c16b537SWarner Losh     }
16700c16b537SWarner Losh   }
16710c16b537SWarner Losh }
16720c16b537SWarner Losh 
16730c16b537SWarner Losh /* Constructs the burrows-wheeler transformed string directly
16740c16b537SWarner Losh    by using the sorted order of type B* suffixes. */
16750c16b537SWarner Losh static
16760c16b537SWarner Losh int
construct_BWT(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m)16770c16b537SWarner Losh construct_BWT(const unsigned char *T, int *SA,
16780c16b537SWarner Losh               int *bucket_A, int *bucket_B,
16790c16b537SWarner Losh               int n, int m) {
16800c16b537SWarner Losh   int *i, *j, *k, *orig;
16810c16b537SWarner Losh   int s;
16820c16b537SWarner Losh   int c0, c1, c2;
16830c16b537SWarner Losh 
16840c16b537SWarner Losh   if(0 < m) {
16850c16b537SWarner Losh     /* Construct the sorted order of type B suffixes by using
16860c16b537SWarner Losh        the sorted order of type B* suffixes. */
16870c16b537SWarner Losh     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
16880c16b537SWarner Losh       /* Scan the suffix array from right to left. */
16890c16b537SWarner Losh       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
16900c16b537SWarner Losh           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
16910c16b537SWarner Losh           i <= j;
16920c16b537SWarner Losh           --j) {
16930c16b537SWarner Losh         if(0 < (s = *j)) {
16940c16b537SWarner Losh           assert(T[s] == c1);
16950c16b537SWarner Losh           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
16960c16b537SWarner Losh           assert(T[s - 1] <= T[s]);
16970c16b537SWarner Losh           c0 = T[--s];
16980c16b537SWarner Losh           *j = ~((int)c0);
16990c16b537SWarner Losh           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
17000c16b537SWarner Losh           if(c0 != c2) {
17010c16b537SWarner Losh             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
17020c16b537SWarner Losh             k = SA + BUCKET_B(c2 = c0, c1);
17030c16b537SWarner Losh           }
17040f743729SConrad Meyer           assert(k < j); assert(k != NULL);
17050c16b537SWarner Losh           *k-- = s;
17060c16b537SWarner Losh         } else if(s != 0) {
17070c16b537SWarner Losh           *j = ~s;
17080c16b537SWarner Losh #ifndef NDEBUG
17090c16b537SWarner Losh         } else {
17100c16b537SWarner Losh           assert(T[s] == c1);
17110c16b537SWarner Losh #endif
17120c16b537SWarner Losh         }
17130c16b537SWarner Losh       }
17140c16b537SWarner Losh     }
17150c16b537SWarner Losh   }
17160c16b537SWarner Losh 
17170c16b537SWarner Losh   /* Construct the BWTed string by using
17180c16b537SWarner Losh      the sorted order of type B suffixes. */
17190c16b537SWarner Losh   k = SA + BUCKET_A(c2 = T[n - 1]);
17200c16b537SWarner Losh   *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
17210c16b537SWarner Losh   /* Scan the suffix array from left to right. */
17220c16b537SWarner Losh   for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
17230c16b537SWarner Losh     if(0 < (s = *i)) {
17240c16b537SWarner Losh       assert(T[s - 1] >= T[s]);
17250c16b537SWarner Losh       c0 = T[--s];
17260c16b537SWarner Losh       *i = c0;
17270c16b537SWarner Losh       if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
17280c16b537SWarner Losh       if(c0 != c2) {
17290c16b537SWarner Losh         BUCKET_A(c2) = k - SA;
17300c16b537SWarner Losh         k = SA + BUCKET_A(c2 = c0);
17310c16b537SWarner Losh       }
17320c16b537SWarner Losh       assert(i < k);
17330c16b537SWarner Losh       *k++ = s;
17340c16b537SWarner Losh     } else if(s != 0) {
17350c16b537SWarner Losh       *i = ~s;
17360c16b537SWarner Losh     } else {
17370c16b537SWarner Losh       orig = i;
17380c16b537SWarner Losh     }
17390c16b537SWarner Losh   }
17400c16b537SWarner Losh 
17410c16b537SWarner Losh   return orig - SA;
17420c16b537SWarner Losh }
17430c16b537SWarner Losh 
17440c16b537SWarner Losh /* Constructs the burrows-wheeler transformed string directly
17450c16b537SWarner Losh    by using the sorted order of type B* suffixes. */
17460c16b537SWarner Losh static
17470c16b537SWarner Losh int
construct_BWT_indexes(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m,unsigned char * num_indexes,int * indexes)17480c16b537SWarner Losh construct_BWT_indexes(const unsigned char *T, int *SA,
17490c16b537SWarner Losh                       int *bucket_A, int *bucket_B,
17500c16b537SWarner Losh                       int n, int m,
17510c16b537SWarner Losh                       unsigned char * num_indexes, int * indexes) {
17520c16b537SWarner Losh   int *i, *j, *k, *orig;
17530c16b537SWarner Losh   int s;
17540c16b537SWarner Losh   int c0, c1, c2;
17550c16b537SWarner Losh 
17560c16b537SWarner Losh   int mod = n / 8;
17570c16b537SWarner Losh   {
17580c16b537SWarner Losh       mod |= mod >> 1;  mod |= mod >> 2;
17590c16b537SWarner Losh       mod |= mod >> 4;  mod |= mod >> 8;
17600c16b537SWarner Losh       mod |= mod >> 16; mod >>= 1;
17610c16b537SWarner Losh 
17620c16b537SWarner Losh       *num_indexes = (unsigned char)((n - 1) / (mod + 1));
17630c16b537SWarner Losh   }
17640c16b537SWarner Losh 
17650c16b537SWarner Losh   if(0 < m) {
17660c16b537SWarner Losh     /* Construct the sorted order of type B suffixes by using
17670c16b537SWarner Losh        the sorted order of type B* suffixes. */
17680c16b537SWarner Losh     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
17690c16b537SWarner Losh       /* Scan the suffix array from right to left. */
17700c16b537SWarner Losh       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
17710c16b537SWarner Losh           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
17720c16b537SWarner Losh           i <= j;
17730c16b537SWarner Losh           --j) {
17740c16b537SWarner Losh         if(0 < (s = *j)) {
17750c16b537SWarner Losh           assert(T[s] == c1);
17760c16b537SWarner Losh           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
17770c16b537SWarner Losh           assert(T[s - 1] <= T[s]);
17780c16b537SWarner Losh 
17790c16b537SWarner Losh           if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
17800c16b537SWarner Losh 
17810c16b537SWarner Losh           c0 = T[--s];
17820c16b537SWarner Losh           *j = ~((int)c0);
17830c16b537SWarner Losh           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
17840c16b537SWarner Losh           if(c0 != c2) {
17850c16b537SWarner Losh             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
17860c16b537SWarner Losh             k = SA + BUCKET_B(c2 = c0, c1);
17870c16b537SWarner Losh           }
17880f743729SConrad Meyer           assert(k < j); assert(k != NULL);
17890c16b537SWarner Losh           *k-- = s;
17900c16b537SWarner Losh         } else if(s != 0) {
17910c16b537SWarner Losh           *j = ~s;
17920c16b537SWarner Losh #ifndef NDEBUG
17930c16b537SWarner Losh         } else {
17940c16b537SWarner Losh           assert(T[s] == c1);
17950c16b537SWarner Losh #endif
17960c16b537SWarner Losh         }
17970c16b537SWarner Losh       }
17980c16b537SWarner Losh     }
17990c16b537SWarner Losh   }
18000c16b537SWarner Losh 
18010c16b537SWarner Losh   /* Construct the BWTed string by using
18020c16b537SWarner Losh      the sorted order of type B suffixes. */
18030c16b537SWarner Losh   k = SA + BUCKET_A(c2 = T[n - 1]);
18040c16b537SWarner Losh   if (T[n - 2] < c2) {
18050c16b537SWarner Losh     if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
18060c16b537SWarner Losh     *k++ = ~((int)T[n - 2]);
18070c16b537SWarner Losh   }
18080c16b537SWarner Losh   else {
18090c16b537SWarner Losh     *k++ = n - 1;
18100c16b537SWarner Losh   }
18110c16b537SWarner Losh 
18120c16b537SWarner Losh   /* Scan the suffix array from left to right. */
18130c16b537SWarner Losh   for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
18140c16b537SWarner Losh     if(0 < (s = *i)) {
18150c16b537SWarner Losh       assert(T[s - 1] >= T[s]);
18160c16b537SWarner Losh 
18170c16b537SWarner Losh       if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
18180c16b537SWarner Losh 
18190c16b537SWarner Losh       c0 = T[--s];
18200c16b537SWarner Losh       *i = c0;
18210c16b537SWarner Losh       if(c0 != c2) {
18220c16b537SWarner Losh         BUCKET_A(c2) = k - SA;
18230c16b537SWarner Losh         k = SA + BUCKET_A(c2 = c0);
18240c16b537SWarner Losh       }
18250c16b537SWarner Losh       assert(i < k);
18260c16b537SWarner Losh       if((0 < s) && (T[s - 1] < c0)) {
18270c16b537SWarner Losh           if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
18280c16b537SWarner Losh           *k++ = ~((int)T[s - 1]);
18290c16b537SWarner Losh       } else
18300c16b537SWarner Losh         *k++ = s;
18310c16b537SWarner Losh     } else if(s != 0) {
18320c16b537SWarner Losh       *i = ~s;
18330c16b537SWarner Losh     } else {
18340c16b537SWarner Losh       orig = i;
18350c16b537SWarner Losh     }
18360c16b537SWarner Losh   }
18370c16b537SWarner Losh 
18380c16b537SWarner Losh   return orig - SA;
18390c16b537SWarner Losh }
18400c16b537SWarner Losh 
18410c16b537SWarner Losh 
18420c16b537SWarner Losh /*---------------------------------------------------------------------------*/
18430c16b537SWarner Losh 
18440c16b537SWarner Losh /*- Function -*/
18450c16b537SWarner Losh 
18460c16b537SWarner Losh int
divsufsort(const unsigned char * T,int * SA,int n,int openMP)18470c16b537SWarner Losh divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
18480c16b537SWarner Losh   int *bucket_A, *bucket_B;
18490c16b537SWarner Losh   int m;
18500c16b537SWarner Losh   int err = 0;
18510c16b537SWarner Losh 
18520c16b537SWarner Losh   /* Check arguments. */
18530c16b537SWarner Losh   if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
18540c16b537SWarner Losh   else if(n == 0) { return 0; }
18550c16b537SWarner Losh   else if(n == 1) { SA[0] = 0; return 0; }
18560c16b537SWarner Losh   else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
18570c16b537SWarner Losh 
18580c16b537SWarner Losh   bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
18590c16b537SWarner Losh   bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
18600c16b537SWarner Losh 
18610c16b537SWarner Losh   /* Suffixsort. */
18620c16b537SWarner Losh   if((bucket_A != NULL) && (bucket_B != NULL)) {
18630c16b537SWarner Losh     m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
18640c16b537SWarner Losh     construct_SA(T, SA, bucket_A, bucket_B, n, m);
18650c16b537SWarner Losh   } else {
18660c16b537SWarner Losh     err = -2;
18670c16b537SWarner Losh   }
18680c16b537SWarner Losh 
18690c16b537SWarner Losh   free(bucket_B);
18700c16b537SWarner Losh   free(bucket_A);
18710c16b537SWarner Losh 
18720c16b537SWarner Losh   return err;
18730c16b537SWarner Losh }
18740c16b537SWarner Losh 
18750c16b537SWarner Losh int
divbwt(const unsigned char * T,unsigned char * U,int * A,int n,unsigned char * num_indexes,int * indexes,int openMP)18760c16b537SWarner Losh divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
18770c16b537SWarner Losh   int *B;
18780c16b537SWarner Losh   int *bucket_A, *bucket_B;
18790c16b537SWarner Losh   int m, pidx, i;
18800c16b537SWarner Losh 
18810c16b537SWarner Losh   /* Check arguments. */
18820c16b537SWarner Losh   if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
18830c16b537SWarner Losh   else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
18840c16b537SWarner Losh 
18850c16b537SWarner Losh   if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
18860c16b537SWarner Losh   bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
18870c16b537SWarner Losh   bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
18880c16b537SWarner Losh 
18890c16b537SWarner Losh   /* Burrows-Wheeler Transform. */
18900c16b537SWarner Losh   if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
18910c16b537SWarner Losh     m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
18920c16b537SWarner Losh 
18930c16b537SWarner Losh     if (num_indexes == NULL || indexes == NULL) {
18940c16b537SWarner Losh         pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
18950c16b537SWarner Losh     } else {
18960c16b537SWarner Losh         pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
18970c16b537SWarner Losh     }
18980c16b537SWarner Losh 
18990c16b537SWarner Losh     /* Copy to output string. */
19000c16b537SWarner Losh     U[0] = T[n - 1];
19010c16b537SWarner Losh     for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
19020c16b537SWarner Losh     for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
19030c16b537SWarner Losh     pidx += 1;
19040c16b537SWarner Losh   } else {
19050c16b537SWarner Losh     pidx = -2;
19060c16b537SWarner Losh   }
19070c16b537SWarner Losh 
19080c16b537SWarner Losh   free(bucket_B);
19090c16b537SWarner Losh   free(bucket_A);
19100c16b537SWarner Losh   if(A == NULL) { free(B); }
19110c16b537SWarner Losh 
19120c16b537SWarner Losh   return pidx;
19130c16b537SWarner Losh }
1914