xref: /freebsd/contrib/libdivsufsort/lib/trsort.c (revision c08cbc64)
1*c08cbc64SXin LI /*
2*c08cbc64SXin LI  * trsort.c for libdivsufsort
3*c08cbc64SXin LI  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4*c08cbc64SXin LI  *
5*c08cbc64SXin LI  * Permission is hereby granted, free of charge, to any person
6*c08cbc64SXin LI  * obtaining a copy of this software and associated documentation
7*c08cbc64SXin LI  * files (the "Software"), to deal in the Software without
8*c08cbc64SXin LI  * restriction, including without limitation the rights to use,
9*c08cbc64SXin LI  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10*c08cbc64SXin LI  * copies of the Software, and to permit persons to whom the
11*c08cbc64SXin LI  * Software is furnished to do so, subject to the following
12*c08cbc64SXin LI  * conditions:
13*c08cbc64SXin LI  *
14*c08cbc64SXin LI  * The above copyright notice and this permission notice shall be
15*c08cbc64SXin LI  * included in all copies or substantial portions of the Software.
16*c08cbc64SXin LI  *
17*c08cbc64SXin LI  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18*c08cbc64SXin LI  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19*c08cbc64SXin LI  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20*c08cbc64SXin LI  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21*c08cbc64SXin LI  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22*c08cbc64SXin LI  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23*c08cbc64SXin LI  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24*c08cbc64SXin LI  * OTHER DEALINGS IN THE SOFTWARE.
25*c08cbc64SXin LI  */
26*c08cbc64SXin LI 
27*c08cbc64SXin LI #include "divsufsort_private.h"
28*c08cbc64SXin LI 
29*c08cbc64SXin LI 
30*c08cbc64SXin LI /*- Private Functions -*/
31*c08cbc64SXin LI 
32*c08cbc64SXin LI static const saint_t lg_table[256]= {
33*c08cbc64SXin LI  -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
34*c08cbc64SXin LI   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
35*c08cbc64SXin LI   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
36*c08cbc64SXin LI   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
37*c08cbc64SXin LI   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
38*c08cbc64SXin LI   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
39*c08cbc64SXin LI   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
40*c08cbc64SXin LI   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
41*c08cbc64SXin LI };
42*c08cbc64SXin LI 
43*c08cbc64SXin LI static INLINE
44*c08cbc64SXin LI saint_t
tr_ilg(saidx_t n)45*c08cbc64SXin LI tr_ilg(saidx_t n) {
46*c08cbc64SXin LI #if defined(BUILD_DIVSUFSORT64)
47*c08cbc64SXin LI   return (n >> 32) ?
48*c08cbc64SXin LI           ((n >> 48) ?
49*c08cbc64SXin LI             ((n >> 56) ?
50*c08cbc64SXin LI               56 + lg_table[(n >> 56) & 0xff] :
51*c08cbc64SXin LI               48 + lg_table[(n >> 48) & 0xff]) :
52*c08cbc64SXin LI             ((n >> 40) ?
53*c08cbc64SXin LI               40 + lg_table[(n >> 40) & 0xff] :
54*c08cbc64SXin LI               32 + lg_table[(n >> 32) & 0xff])) :
55*c08cbc64SXin LI           ((n & 0xffff0000) ?
56*c08cbc64SXin LI             ((n & 0xff000000) ?
57*c08cbc64SXin LI               24 + lg_table[(n >> 24) & 0xff] :
58*c08cbc64SXin LI               16 + lg_table[(n >> 16) & 0xff]) :
59*c08cbc64SXin LI             ((n & 0x0000ff00) ?
60*c08cbc64SXin LI                8 + lg_table[(n >>  8) & 0xff] :
61*c08cbc64SXin LI                0 + lg_table[(n >>  0) & 0xff]));
62*c08cbc64SXin LI #else
63*c08cbc64SXin LI   return (n & 0xffff0000) ?
64*c08cbc64SXin LI           ((n & 0xff000000) ?
65*c08cbc64SXin LI             24 + lg_table[(n >> 24) & 0xff] :
66*c08cbc64SXin LI             16 + lg_table[(n >> 16) & 0xff]) :
67*c08cbc64SXin LI           ((n & 0x0000ff00) ?
68*c08cbc64SXin LI              8 + lg_table[(n >>  8) & 0xff] :
69*c08cbc64SXin LI              0 + lg_table[(n >>  0) & 0xff]);
70*c08cbc64SXin LI #endif
71*c08cbc64SXin LI }
72*c08cbc64SXin LI 
73*c08cbc64SXin LI 
74*c08cbc64SXin LI /*---------------------------------------------------------------------------*/
75*c08cbc64SXin LI 
76*c08cbc64SXin LI /* Simple insertionsort for small size groups. */
77*c08cbc64SXin LI static
78*c08cbc64SXin LI void
tr_insertionsort(const saidx_t * ISAd,saidx_t * first,saidx_t * last)79*c08cbc64SXin LI tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
80*c08cbc64SXin LI   saidx_t *a, *b;
81*c08cbc64SXin LI   saidx_t t, r;
82*c08cbc64SXin LI 
83*c08cbc64SXin LI   for(a = first + 1; a < last; ++a) {
84*c08cbc64SXin LI     for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
85*c08cbc64SXin LI       do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
86*c08cbc64SXin LI       if(b < first) { break; }
87*c08cbc64SXin LI     }
88*c08cbc64SXin LI     if(r == 0) { *b = ~*b; }
89*c08cbc64SXin LI     *(b + 1) = t;
90*c08cbc64SXin LI   }
91*c08cbc64SXin LI }
92*c08cbc64SXin LI 
93*c08cbc64SXin LI 
94*c08cbc64SXin LI /*---------------------------------------------------------------------------*/
95*c08cbc64SXin LI 
96*c08cbc64SXin LI static INLINE
97*c08cbc64SXin LI void
tr_fixdown(const saidx_t * ISAd,saidx_t * SA,saidx_t i,saidx_t size)98*c08cbc64SXin LI tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
99*c08cbc64SXin LI   saidx_t j, k;
100*c08cbc64SXin LI   saidx_t v;
101*c08cbc64SXin LI   saidx_t c, d, e;
102*c08cbc64SXin LI 
103*c08cbc64SXin LI   for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
104*c08cbc64SXin LI     d = ISAd[SA[k = j++]];
105*c08cbc64SXin LI     if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
106*c08cbc64SXin LI     if(d <= c) { break; }
107*c08cbc64SXin LI   }
108*c08cbc64SXin LI   SA[i] = v;
109*c08cbc64SXin LI }
110*c08cbc64SXin LI 
111*c08cbc64SXin LI /* Simple top-down heapsort. */
112*c08cbc64SXin LI static
113*c08cbc64SXin LI void
tr_heapsort(const saidx_t * ISAd,saidx_t * SA,saidx_t size)114*c08cbc64SXin LI tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
115*c08cbc64SXin LI   saidx_t i, m;
116*c08cbc64SXin LI   saidx_t t;
117*c08cbc64SXin LI 
118*c08cbc64SXin LI   m = size;
119*c08cbc64SXin LI   if((size % 2) == 0) {
120*c08cbc64SXin LI     m--;
121*c08cbc64SXin LI     if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
122*c08cbc64SXin LI   }
123*c08cbc64SXin LI 
124*c08cbc64SXin LI   for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
125*c08cbc64SXin LI   if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
126*c08cbc64SXin LI   for(i = m - 1; 0 < i; --i) {
127*c08cbc64SXin LI     t = SA[0], SA[0] = SA[i];
128*c08cbc64SXin LI     tr_fixdown(ISAd, SA, 0, i);
129*c08cbc64SXin LI     SA[i] = t;
130*c08cbc64SXin LI   }
131*c08cbc64SXin LI }
132*c08cbc64SXin LI 
133*c08cbc64SXin LI 
134*c08cbc64SXin LI /*---------------------------------------------------------------------------*/
135*c08cbc64SXin LI 
136*c08cbc64SXin LI /* Returns the median of three elements. */
137*c08cbc64SXin LI static INLINE
138*c08cbc64SXin LI saidx_t *
tr_median3(const saidx_t * ISAd,saidx_t * v1,saidx_t * v2,saidx_t * v3)139*c08cbc64SXin LI tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
140*c08cbc64SXin LI   saidx_t *t;
141*c08cbc64SXin LI   if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
142*c08cbc64SXin LI   if(ISAd[*v2] > ISAd[*v3]) {
143*c08cbc64SXin LI     if(ISAd[*v1] > ISAd[*v3]) { return v1; }
144*c08cbc64SXin LI     else { return v3; }
145*c08cbc64SXin LI   }
146*c08cbc64SXin LI   return v2;
147*c08cbc64SXin LI }
148*c08cbc64SXin LI 
149*c08cbc64SXin LI /* Returns the median of five elements. */
150*c08cbc64SXin LI static INLINE
151*c08cbc64SXin LI saidx_t *
tr_median5(const saidx_t * ISAd,saidx_t * v1,saidx_t * v2,saidx_t * v3,saidx_t * v4,saidx_t * v5)152*c08cbc64SXin LI tr_median5(const saidx_t *ISAd,
153*c08cbc64SXin LI            saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
154*c08cbc64SXin LI   saidx_t *t;
155*c08cbc64SXin LI   if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
156*c08cbc64SXin LI   if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
157*c08cbc64SXin LI   if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
158*c08cbc64SXin LI   if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
159*c08cbc64SXin LI   if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
160*c08cbc64SXin LI   if(ISAd[*v3] > ISAd[*v4]) { return v4; }
161*c08cbc64SXin LI   return v3;
162*c08cbc64SXin LI }
163*c08cbc64SXin LI 
164*c08cbc64SXin LI /* Returns the pivot element. */
165*c08cbc64SXin LI static INLINE
166*c08cbc64SXin LI saidx_t *
tr_pivot(const saidx_t * ISAd,saidx_t * first,saidx_t * last)167*c08cbc64SXin LI tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
168*c08cbc64SXin LI   saidx_t *middle;
169*c08cbc64SXin LI   saidx_t t;
170*c08cbc64SXin LI 
171*c08cbc64SXin LI   t = last - first;
172*c08cbc64SXin LI   middle = first + t / 2;
173*c08cbc64SXin LI 
174*c08cbc64SXin LI   if(t <= 512) {
175*c08cbc64SXin LI     if(t <= 32) {
176*c08cbc64SXin LI       return tr_median3(ISAd, first, middle, last - 1);
177*c08cbc64SXin LI     } else {
178*c08cbc64SXin LI       t >>= 2;
179*c08cbc64SXin LI       return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
180*c08cbc64SXin LI     }
181*c08cbc64SXin LI   }
182*c08cbc64SXin LI   t >>= 3;
183*c08cbc64SXin LI   first  = tr_median3(ISAd, first, first + t, first + (t << 1));
184*c08cbc64SXin LI   middle = tr_median3(ISAd, middle - t, middle, middle + t);
185*c08cbc64SXin LI   last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
186*c08cbc64SXin LI   return tr_median3(ISAd, first, middle, last);
187*c08cbc64SXin LI }
188*c08cbc64SXin LI 
189*c08cbc64SXin LI 
190*c08cbc64SXin LI /*---------------------------------------------------------------------------*/
191*c08cbc64SXin LI 
192*c08cbc64SXin LI typedef struct _trbudget_t trbudget_t;
193*c08cbc64SXin LI struct _trbudget_t {
194*c08cbc64SXin LI   saidx_t chance;
195*c08cbc64SXin LI   saidx_t remain;
196*c08cbc64SXin LI   saidx_t incval;
197*c08cbc64SXin LI   saidx_t count;
198*c08cbc64SXin LI };
199*c08cbc64SXin LI 
200*c08cbc64SXin LI static INLINE
201*c08cbc64SXin LI void
trbudget_init(trbudget_t * budget,saidx_t chance,saidx_t incval)202*c08cbc64SXin LI trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
203*c08cbc64SXin LI   budget->chance = chance;
204*c08cbc64SXin LI   budget->remain = budget->incval = incval;
205*c08cbc64SXin LI }
206*c08cbc64SXin LI 
207*c08cbc64SXin LI static INLINE
208*c08cbc64SXin LI saint_t
trbudget_check(trbudget_t * budget,saidx_t size)209*c08cbc64SXin LI trbudget_check(trbudget_t *budget, saidx_t size) {
210*c08cbc64SXin LI   if(size <= budget->remain) { budget->remain -= size; return 1; }
211*c08cbc64SXin LI   if(budget->chance == 0) { budget->count += size; return 0; }
212*c08cbc64SXin LI   budget->remain += budget->incval - size;
213*c08cbc64SXin LI   budget->chance -= 1;
214*c08cbc64SXin LI   return 1;
215*c08cbc64SXin LI }
216*c08cbc64SXin LI 
217*c08cbc64SXin LI 
218*c08cbc64SXin LI /*---------------------------------------------------------------------------*/
219*c08cbc64SXin LI 
220*c08cbc64SXin LI static INLINE
221*c08cbc64SXin LI void
tr_partition(const saidx_t * ISAd,saidx_t * first,saidx_t * middle,saidx_t * last,saidx_t ** pa,saidx_t ** pb,saidx_t v)222*c08cbc64SXin LI tr_partition(const saidx_t *ISAd,
223*c08cbc64SXin LI              saidx_t *first, saidx_t *middle, saidx_t *last,
224*c08cbc64SXin LI              saidx_t **pa, saidx_t **pb, saidx_t v) {
225*c08cbc64SXin LI   saidx_t *a, *b, *c, *d, *e, *f;
226*c08cbc64SXin LI   saidx_t t, s;
227*c08cbc64SXin LI   saidx_t x = 0;
228*c08cbc64SXin LI 
229*c08cbc64SXin LI   for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
230*c08cbc64SXin LI   if(((a = b) < last) && (x < v)) {
231*c08cbc64SXin LI     for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
232*c08cbc64SXin LI       if(x == v) { SWAP(*b, *a); ++a; }
233*c08cbc64SXin LI     }
234*c08cbc64SXin LI   }
235*c08cbc64SXin LI   for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
236*c08cbc64SXin LI   if((b < (d = c)) && (x > v)) {
237*c08cbc64SXin LI     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
238*c08cbc64SXin LI       if(x == v) { SWAP(*c, *d); --d; }
239*c08cbc64SXin LI     }
240*c08cbc64SXin LI   }
241*c08cbc64SXin LI   for(; b < c;) {
242*c08cbc64SXin LI     SWAP(*b, *c);
243*c08cbc64SXin LI     for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
244*c08cbc64SXin LI       if(x == v) { SWAP(*b, *a); ++a; }
245*c08cbc64SXin LI     }
246*c08cbc64SXin LI     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
247*c08cbc64SXin LI       if(x == v) { SWAP(*c, *d); --d; }
248*c08cbc64SXin LI     }
249*c08cbc64SXin LI   }
250*c08cbc64SXin LI 
251*c08cbc64SXin LI   if(a <= d) {
252*c08cbc64SXin LI     c = b - 1;
253*c08cbc64SXin LI     if((s = a - first) > (t = b - a)) { s = t; }
254*c08cbc64SXin LI     for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
255*c08cbc64SXin LI     if((s = d - c) > (t = last - d - 1)) { s = t; }
256*c08cbc64SXin LI     for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
257*c08cbc64SXin LI     first += (b - a), last -= (d - c);
258*c08cbc64SXin LI   }
259*c08cbc64SXin LI   *pa = first, *pb = last;
260*c08cbc64SXin LI }
261*c08cbc64SXin LI 
262*c08cbc64SXin LI static
263*c08cbc64SXin LI void
tr_copy(saidx_t * ISA,const saidx_t * SA,saidx_t * first,saidx_t * a,saidx_t * b,saidx_t * last,saidx_t depth)264*c08cbc64SXin LI tr_copy(saidx_t *ISA, const saidx_t *SA,
265*c08cbc64SXin LI         saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
266*c08cbc64SXin LI         saidx_t depth) {
267*c08cbc64SXin LI   /* sort suffixes of middle partition
268*c08cbc64SXin LI      by using sorted order of suffixes of left and right partition. */
269*c08cbc64SXin LI   saidx_t *c, *d, *e;
270*c08cbc64SXin LI   saidx_t s, v;
271*c08cbc64SXin LI 
272*c08cbc64SXin LI   v = b - SA - 1;
273*c08cbc64SXin LI   for(c = first, d = a - 1; c <= d; ++c) {
274*c08cbc64SXin LI     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
275*c08cbc64SXin LI       *++d = s;
276*c08cbc64SXin LI       ISA[s] = d - SA;
277*c08cbc64SXin LI     }
278*c08cbc64SXin LI   }
279*c08cbc64SXin LI   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
280*c08cbc64SXin LI     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
281*c08cbc64SXin LI       *--d = s;
282*c08cbc64SXin LI       ISA[s] = d - SA;
283*c08cbc64SXin LI     }
284*c08cbc64SXin LI   }
285*c08cbc64SXin LI }
286*c08cbc64SXin LI 
287*c08cbc64SXin LI static
288*c08cbc64SXin LI void
tr_partialcopy(saidx_t * ISA,const saidx_t * SA,saidx_t * first,saidx_t * a,saidx_t * b,saidx_t * last,saidx_t depth)289*c08cbc64SXin LI tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
290*c08cbc64SXin LI                saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
291*c08cbc64SXin LI                saidx_t depth) {
292*c08cbc64SXin LI   saidx_t *c, *d, *e;
293*c08cbc64SXin LI   saidx_t s, v;
294*c08cbc64SXin LI   saidx_t rank, lastrank, newrank = -1;
295*c08cbc64SXin LI 
296*c08cbc64SXin LI   v = b - SA - 1;
297*c08cbc64SXin LI   lastrank = -1;
298*c08cbc64SXin LI   for(c = first, d = a - 1; c <= d; ++c) {
299*c08cbc64SXin LI     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
300*c08cbc64SXin LI       *++d = s;
301*c08cbc64SXin LI       rank = ISA[s + depth];
302*c08cbc64SXin LI       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
303*c08cbc64SXin LI       ISA[s] = newrank;
304*c08cbc64SXin LI     }
305*c08cbc64SXin LI   }
306*c08cbc64SXin LI 
307*c08cbc64SXin LI   lastrank = -1;
308*c08cbc64SXin LI   for(e = d; first <= e; --e) {
309*c08cbc64SXin LI     rank = ISA[*e];
310*c08cbc64SXin LI     if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
311*c08cbc64SXin LI     if(newrank != rank) { ISA[*e] = newrank; }
312*c08cbc64SXin LI   }
313*c08cbc64SXin LI 
314*c08cbc64SXin LI   lastrank = -1;
315*c08cbc64SXin LI   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
316*c08cbc64SXin LI     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
317*c08cbc64SXin LI       *--d = s;
318*c08cbc64SXin LI       rank = ISA[s + depth];
319*c08cbc64SXin LI       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
320*c08cbc64SXin LI       ISA[s] = newrank;
321*c08cbc64SXin LI     }
322*c08cbc64SXin LI   }
323*c08cbc64SXin LI }
324*c08cbc64SXin LI 
325*c08cbc64SXin LI static
326*c08cbc64SXin LI void
tr_introsort(saidx_t * ISA,const saidx_t * ISAd,saidx_t * SA,saidx_t * first,saidx_t * last,trbudget_t * budget)327*c08cbc64SXin LI tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
328*c08cbc64SXin LI              saidx_t *SA, saidx_t *first, saidx_t *last,
329*c08cbc64SXin LI              trbudget_t *budget) {
330*c08cbc64SXin LI #define STACK_SIZE TR_STACKSIZE
331*c08cbc64SXin LI   struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
332*c08cbc64SXin LI   saidx_t *a, *b, *c;
333*c08cbc64SXin LI   saidx_t t;
334*c08cbc64SXin LI   saidx_t v, x = 0;
335*c08cbc64SXin LI   saidx_t incr = ISAd - ISA;
336*c08cbc64SXin LI   saint_t limit, next;
337*c08cbc64SXin LI   saint_t ssize, trlink = -1;
338*c08cbc64SXin LI 
339*c08cbc64SXin LI   for(ssize = 0, limit = tr_ilg(last - first);;) {
340*c08cbc64SXin LI 
341*c08cbc64SXin LI     if(limit < 0) {
342*c08cbc64SXin LI       if(limit == -1) {
343*c08cbc64SXin LI         /* tandem repeat partition */
344*c08cbc64SXin LI         tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
345*c08cbc64SXin LI 
346*c08cbc64SXin LI         /* update ranks */
347*c08cbc64SXin LI         if(a < last) {
348*c08cbc64SXin LI           for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
349*c08cbc64SXin LI         }
350*c08cbc64SXin LI         if(b < last) {
351*c08cbc64SXin LI           for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
352*c08cbc64SXin LI         }
353*c08cbc64SXin LI 
354*c08cbc64SXin LI         /* push */
355*c08cbc64SXin LI         if(1 < (b - a)) {
356*c08cbc64SXin LI           STACK_PUSH5(NULL, a, b, 0, 0);
357*c08cbc64SXin LI           STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
358*c08cbc64SXin LI           trlink = ssize - 2;
359*c08cbc64SXin LI         }
360*c08cbc64SXin LI         if((a - first) <= (last - b)) {
361*c08cbc64SXin LI           if(1 < (a - first)) {
362*c08cbc64SXin LI             STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
363*c08cbc64SXin LI             last = a, limit = tr_ilg(a - first);
364*c08cbc64SXin LI           } else if(1 < (last - b)) {
365*c08cbc64SXin LI             first = b, limit = tr_ilg(last - b);
366*c08cbc64SXin LI           } else {
367*c08cbc64SXin LI             STACK_POP5(ISAd, first, last, limit, trlink);
368*c08cbc64SXin LI           }
369*c08cbc64SXin LI         } else {
370*c08cbc64SXin LI           if(1 < (last - b)) {
371*c08cbc64SXin LI             STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
372*c08cbc64SXin LI             first = b, limit = tr_ilg(last - b);
373*c08cbc64SXin LI           } else if(1 < (a - first)) {
374*c08cbc64SXin LI             last = a, limit = tr_ilg(a - first);
375*c08cbc64SXin LI           } else {
376*c08cbc64SXin LI             STACK_POP5(ISAd, first, last, limit, trlink);
377*c08cbc64SXin LI           }
378*c08cbc64SXin LI         }
379*c08cbc64SXin LI       } else if(limit == -2) {
380*c08cbc64SXin LI         /* tandem repeat copy */
381*c08cbc64SXin LI         a = stack[--ssize].b, b = stack[ssize].c;
382*c08cbc64SXin LI         if(stack[ssize].d == 0) {
383*c08cbc64SXin LI           tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
384*c08cbc64SXin LI         } else {
385*c08cbc64SXin LI           if(0 <= trlink) { stack[trlink].d = -1; }
386*c08cbc64SXin LI           tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
387*c08cbc64SXin LI         }
388*c08cbc64SXin LI         STACK_POP5(ISAd, first, last, limit, trlink);
389*c08cbc64SXin LI       } else {
390*c08cbc64SXin LI         /* sorted partition */
391*c08cbc64SXin LI         if(0 <= *first) {
392*c08cbc64SXin LI           a = first;
393*c08cbc64SXin LI           do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
394*c08cbc64SXin LI           first = a;
395*c08cbc64SXin LI         }
396*c08cbc64SXin LI         if(first < last) {
397*c08cbc64SXin LI           a = first; do { *a = ~*a; } while(*++a < 0);
398*c08cbc64SXin LI           next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
399*c08cbc64SXin LI           if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
400*c08cbc64SXin LI 
401*c08cbc64SXin LI           /* push */
402*c08cbc64SXin LI           if(trbudget_check(budget, a - first)) {
403*c08cbc64SXin LI             if((a - first) <= (last - a)) {
404*c08cbc64SXin LI               STACK_PUSH5(ISAd, a, last, -3, trlink);
405*c08cbc64SXin LI               ISAd += incr, last = a, limit = next;
406*c08cbc64SXin LI             } else {
407*c08cbc64SXin LI               if(1 < (last - a)) {
408*c08cbc64SXin LI                 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
409*c08cbc64SXin LI                 first = a, limit = -3;
410*c08cbc64SXin LI               } else {
411*c08cbc64SXin LI                 ISAd += incr, last = a, limit = next;
412*c08cbc64SXin LI               }
413*c08cbc64SXin LI             }
414*c08cbc64SXin LI           } else {
415*c08cbc64SXin LI             if(0 <= trlink) { stack[trlink].d = -1; }
416*c08cbc64SXin LI             if(1 < (last - a)) {
417*c08cbc64SXin LI               first = a, limit = -3;
418*c08cbc64SXin LI             } else {
419*c08cbc64SXin LI               STACK_POP5(ISAd, first, last, limit, trlink);
420*c08cbc64SXin LI             }
421*c08cbc64SXin LI           }
422*c08cbc64SXin LI         } else {
423*c08cbc64SXin LI           STACK_POP5(ISAd, first, last, limit, trlink);
424*c08cbc64SXin LI         }
425*c08cbc64SXin LI       }
426*c08cbc64SXin LI       continue;
427*c08cbc64SXin LI     }
428*c08cbc64SXin LI 
429*c08cbc64SXin LI     if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
430*c08cbc64SXin LI       tr_insertionsort(ISAd, first, last);
431*c08cbc64SXin LI       limit = -3;
432*c08cbc64SXin LI       continue;
433*c08cbc64SXin LI     }
434*c08cbc64SXin LI 
435*c08cbc64SXin LI     if(limit-- == 0) {
436*c08cbc64SXin LI       tr_heapsort(ISAd, first, last - first);
437*c08cbc64SXin LI       for(a = last - 1; first < a; a = b) {
438*c08cbc64SXin LI         for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
439*c08cbc64SXin LI       }
440*c08cbc64SXin LI       limit = -3;
441*c08cbc64SXin LI       continue;
442*c08cbc64SXin LI     }
443*c08cbc64SXin LI 
444*c08cbc64SXin LI     /* choose pivot */
445*c08cbc64SXin LI     a = tr_pivot(ISAd, first, last);
446*c08cbc64SXin LI     SWAP(*first, *a);
447*c08cbc64SXin LI     v = ISAd[*first];
448*c08cbc64SXin LI 
449*c08cbc64SXin LI     /* partition */
450*c08cbc64SXin LI     tr_partition(ISAd, first, first + 1, last, &a, &b, v);
451*c08cbc64SXin LI     if((last - first) != (b - a)) {
452*c08cbc64SXin LI       next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
453*c08cbc64SXin LI 
454*c08cbc64SXin LI       /* update ranks */
455*c08cbc64SXin LI       for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
456*c08cbc64SXin LI       if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
457*c08cbc64SXin LI 
458*c08cbc64SXin LI       /* push */
459*c08cbc64SXin LI       if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
460*c08cbc64SXin LI         if((a - first) <= (last - b)) {
461*c08cbc64SXin LI           if((last - b) <= (b - a)) {
462*c08cbc64SXin LI             if(1 < (a - first)) {
463*c08cbc64SXin LI               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
464*c08cbc64SXin LI               STACK_PUSH5(ISAd, b, last, limit, trlink);
465*c08cbc64SXin LI               last = a;
466*c08cbc64SXin LI             } else if(1 < (last - b)) {
467*c08cbc64SXin LI               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
468*c08cbc64SXin LI               first = b;
469*c08cbc64SXin LI             } else {
470*c08cbc64SXin LI               ISAd += incr, first = a, last = b, limit = next;
471*c08cbc64SXin LI             }
472*c08cbc64SXin LI           } else if((a - first) <= (b - a)) {
473*c08cbc64SXin LI             if(1 < (a - first)) {
474*c08cbc64SXin LI               STACK_PUSH5(ISAd, b, last, limit, trlink);
475*c08cbc64SXin LI               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
476*c08cbc64SXin LI               last = a;
477*c08cbc64SXin LI             } else {
478*c08cbc64SXin LI               STACK_PUSH5(ISAd, b, last, limit, trlink);
479*c08cbc64SXin LI               ISAd += incr, first = a, last = b, limit = next;
480*c08cbc64SXin LI             }
481*c08cbc64SXin LI           } else {
482*c08cbc64SXin LI             STACK_PUSH5(ISAd, b, last, limit, trlink);
483*c08cbc64SXin LI             STACK_PUSH5(ISAd, first, a, limit, trlink);
484*c08cbc64SXin LI             ISAd += incr, first = a, last = b, limit = next;
485*c08cbc64SXin LI           }
486*c08cbc64SXin LI         } else {
487*c08cbc64SXin LI           if((a - first) <= (b - a)) {
488*c08cbc64SXin LI             if(1 < (last - b)) {
489*c08cbc64SXin LI               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
490*c08cbc64SXin LI               STACK_PUSH5(ISAd, first, a, limit, trlink);
491*c08cbc64SXin LI               first = b;
492*c08cbc64SXin LI             } else if(1 < (a - first)) {
493*c08cbc64SXin LI               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
494*c08cbc64SXin LI               last = a;
495*c08cbc64SXin LI             } else {
496*c08cbc64SXin LI               ISAd += incr, first = a, last = b, limit = next;
497*c08cbc64SXin LI             }
498*c08cbc64SXin LI           } else if((last - b) <= (b - a)) {
499*c08cbc64SXin LI             if(1 < (last - b)) {
500*c08cbc64SXin LI               STACK_PUSH5(ISAd, first, a, limit, trlink);
501*c08cbc64SXin LI               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
502*c08cbc64SXin LI               first = b;
503*c08cbc64SXin LI             } else {
504*c08cbc64SXin LI               STACK_PUSH5(ISAd, first, a, limit, trlink);
505*c08cbc64SXin LI               ISAd += incr, first = a, last = b, limit = next;
506*c08cbc64SXin LI             }
507*c08cbc64SXin LI           } else {
508*c08cbc64SXin LI             STACK_PUSH5(ISAd, first, a, limit, trlink);
509*c08cbc64SXin LI             STACK_PUSH5(ISAd, b, last, limit, trlink);
510*c08cbc64SXin LI             ISAd += incr, first = a, last = b, limit = next;
511*c08cbc64SXin LI           }
512*c08cbc64SXin LI         }
513*c08cbc64SXin LI       } else {
514*c08cbc64SXin LI         if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
515*c08cbc64SXin LI         if((a - first) <= (last - b)) {
516*c08cbc64SXin LI           if(1 < (a - first)) {
517*c08cbc64SXin LI             STACK_PUSH5(ISAd, b, last, limit, trlink);
518*c08cbc64SXin LI             last = a;
519*c08cbc64SXin LI           } else if(1 < (last - b)) {
520*c08cbc64SXin LI             first = b;
521*c08cbc64SXin LI           } else {
522*c08cbc64SXin LI             STACK_POP5(ISAd, first, last, limit, trlink);
523*c08cbc64SXin LI           }
524*c08cbc64SXin LI         } else {
525*c08cbc64SXin LI           if(1 < (last - b)) {
526*c08cbc64SXin LI             STACK_PUSH5(ISAd, first, a, limit, trlink);
527*c08cbc64SXin LI             first = b;
528*c08cbc64SXin LI           } else if(1 < (a - first)) {
529*c08cbc64SXin LI             last = a;
530*c08cbc64SXin LI           } else {
531*c08cbc64SXin LI             STACK_POP5(ISAd, first, last, limit, trlink);
532*c08cbc64SXin LI           }
533*c08cbc64SXin LI         }
534*c08cbc64SXin LI       }
535*c08cbc64SXin LI     } else {
536*c08cbc64SXin LI       if(trbudget_check(budget, last - first)) {
537*c08cbc64SXin LI         limit = tr_ilg(last - first), ISAd += incr;
538*c08cbc64SXin LI       } else {
539*c08cbc64SXin LI         if(0 <= trlink) { stack[trlink].d = -1; }
540*c08cbc64SXin LI         STACK_POP5(ISAd, first, last, limit, trlink);
541*c08cbc64SXin LI       }
542*c08cbc64SXin LI     }
543*c08cbc64SXin LI   }
544*c08cbc64SXin LI #undef STACK_SIZE
545*c08cbc64SXin LI }
546*c08cbc64SXin LI 
547*c08cbc64SXin LI 
548*c08cbc64SXin LI 
549*c08cbc64SXin LI /*---------------------------------------------------------------------------*/
550*c08cbc64SXin LI 
551*c08cbc64SXin LI /*- Function -*/
552*c08cbc64SXin LI 
553*c08cbc64SXin LI /* Tandem repeat sort */
554*c08cbc64SXin LI void
trsort(saidx_t * ISA,saidx_t * SA,saidx_t n,saidx_t depth)555*c08cbc64SXin LI trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
556*c08cbc64SXin LI   saidx_t *ISAd;
557*c08cbc64SXin LI   saidx_t *first, *last;
558*c08cbc64SXin LI   trbudget_t budget;
559*c08cbc64SXin LI   saidx_t t, skip, unsorted;
560*c08cbc64SXin LI 
561*c08cbc64SXin LI   trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
562*c08cbc64SXin LI /*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
563*c08cbc64SXin LI   for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
564*c08cbc64SXin LI     first = SA;
565*c08cbc64SXin LI     skip = 0;
566*c08cbc64SXin LI     unsorted = 0;
567*c08cbc64SXin LI     do {
568*c08cbc64SXin LI       if((t = *first) < 0) { first -= t; skip += t; }
569*c08cbc64SXin LI       else {
570*c08cbc64SXin LI         if(skip != 0) { *(first + skip) = skip; skip = 0; }
571*c08cbc64SXin LI         last = SA + ISA[t] + 1;
572*c08cbc64SXin LI         if(1 < (last - first)) {
573*c08cbc64SXin LI           budget.count = 0;
574*c08cbc64SXin LI           tr_introsort(ISA, ISAd, SA, first, last, &budget);
575*c08cbc64SXin LI           if(budget.count != 0) { unsorted += budget.count; }
576*c08cbc64SXin LI           else { skip = first - last; }
577*c08cbc64SXin LI         } else if((last - first) == 1) {
578*c08cbc64SXin LI           skip = -1;
579*c08cbc64SXin LI         }
580*c08cbc64SXin LI         first = last;
581*c08cbc64SXin LI       }
582*c08cbc64SXin LI     } while(first < (SA + n));
583*c08cbc64SXin LI     if(skip != 0) { *(first + skip) = skip; }
584*c08cbc64SXin LI     if(unsorted == 0) { break; }
585*c08cbc64SXin LI   }
586*c08cbc64SXin LI }
587