1*c08cbc64SXin LI /*
2*c08cbc64SXin LI  * divsufsort.c for libdivsufsort
3*c08cbc64SXin LI  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4*c08cbc64SXin LI  *
5*c08cbc64SXin LI  * Permission is hereby granted, free of charge, to any person
6*c08cbc64SXin LI  * obtaining a copy of this software and associated documentation
7*c08cbc64SXin LI  * files (the "Software"), to deal in the Software without
8*c08cbc64SXin LI  * restriction, including without limitation the rights to use,
9*c08cbc64SXin LI  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10*c08cbc64SXin LI  * copies of the Software, and to permit persons to whom the
11*c08cbc64SXin LI  * Software is furnished to do so, subject to the following
12*c08cbc64SXin LI  * conditions:
13*c08cbc64SXin LI  *
14*c08cbc64SXin LI  * The above copyright notice and this permission notice shall be
15*c08cbc64SXin LI  * included in all copies or substantial portions of the Software.
16*c08cbc64SXin LI  *
17*c08cbc64SXin LI  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18*c08cbc64SXin LI  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19*c08cbc64SXin LI  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20*c08cbc64SXin LI  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21*c08cbc64SXin LI  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22*c08cbc64SXin LI  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23*c08cbc64SXin LI  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24*c08cbc64SXin LI  * OTHER DEALINGS IN THE SOFTWARE.
25*c08cbc64SXin LI  */
26*c08cbc64SXin LI 
27*c08cbc64SXin LI #include "divsufsort_private.h"
28*c08cbc64SXin LI #ifdef _OPENMP
29*c08cbc64SXin LI # include <omp.h>
30*c08cbc64SXin LI #endif
31*c08cbc64SXin LI 
32*c08cbc64SXin LI 
33*c08cbc64SXin LI /*- Private Functions -*/
34*c08cbc64SXin LI 
35*c08cbc64SXin LI /* Sorts suffixes of type B*. */
36*c08cbc64SXin LI static
37*c08cbc64SXin LI saidx_t
sort_typeBstar(const sauchar_t * T,saidx_t * SA,saidx_t * bucket_A,saidx_t * bucket_B,saidx_t n)38*c08cbc64SXin LI sort_typeBstar(const sauchar_t *T, saidx_t *SA,
39*c08cbc64SXin LI                saidx_t *bucket_A, saidx_t *bucket_B,
40*c08cbc64SXin LI                saidx_t n) {
41*c08cbc64SXin LI   saidx_t *PAb, *ISAb, *buf;
42*c08cbc64SXin LI #ifdef _OPENMP
43*c08cbc64SXin LI   saidx_t *curbuf;
44*c08cbc64SXin LI   saidx_t l;
45*c08cbc64SXin LI #endif
46*c08cbc64SXin LI   saidx_t i, j, k, t, m, bufsize;
47*c08cbc64SXin LI   saint_t c0, c1;
48*c08cbc64SXin LI #ifdef _OPENMP
49*c08cbc64SXin LI   saint_t d0, d1;
50*c08cbc64SXin LI   int tmp;
51*c08cbc64SXin LI #endif
52*c08cbc64SXin LI 
53*c08cbc64SXin LI   /* Initialize bucket arrays. */
54*c08cbc64SXin LI   for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
55*c08cbc64SXin LI   for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
56*c08cbc64SXin LI 
57*c08cbc64SXin LI   /* Count the number of occurrences of the first one or two characters of each
58*c08cbc64SXin LI      type A, B and B* suffix. Moreover, store the beginning position of all
59*c08cbc64SXin LI      type B* suffixes into the array SA. */
60*c08cbc64SXin LI   for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
61*c08cbc64SXin LI     /* type A suffix. */
62*c08cbc64SXin LI     do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
63*c08cbc64SXin LI     if(0 <= i) {
64*c08cbc64SXin LI       /* type B* suffix. */
65*c08cbc64SXin LI       ++BUCKET_BSTAR(c0, c1);
66*c08cbc64SXin LI       SA[--m] = i;
67*c08cbc64SXin LI       /* type B suffix. */
68*c08cbc64SXin LI       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
69*c08cbc64SXin LI         ++BUCKET_B(c0, c1);
70*c08cbc64SXin LI       }
71*c08cbc64SXin LI     }
72*c08cbc64SXin LI   }
73*c08cbc64SXin LI   m = n - m;
74*c08cbc64SXin LI /*
75*c08cbc64SXin LI note:
76*c08cbc64SXin LI   A type B* suffix is lexicographically smaller than a type B suffix that
77*c08cbc64SXin LI   begins with the same first two characters.
78*c08cbc64SXin LI */
79*c08cbc64SXin LI 
80*c08cbc64SXin LI   /* Calculate the index of start/end point of each bucket. */
81*c08cbc64SXin LI   for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
82*c08cbc64SXin LI     t = i + BUCKET_A(c0);
83*c08cbc64SXin LI     BUCKET_A(c0) = i + j; /* start point */
84*c08cbc64SXin LI     i = t + BUCKET_B(c0, c0);
85*c08cbc64SXin LI     for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
86*c08cbc64SXin LI       j += BUCKET_BSTAR(c0, c1);
87*c08cbc64SXin LI       BUCKET_BSTAR(c0, c1) = j; /* end point */
88*c08cbc64SXin LI       i += BUCKET_B(c0, c1);
89*c08cbc64SXin LI     }
90*c08cbc64SXin LI   }
91*c08cbc64SXin LI 
92*c08cbc64SXin LI   if(0 < m) {
93*c08cbc64SXin LI     /* Sort the type B* suffixes by their first two characters. */
94*c08cbc64SXin LI     PAb = SA + n - m; ISAb = SA + m;
95*c08cbc64SXin LI     for(i = m - 2; 0 <= i; --i) {
96*c08cbc64SXin LI       t = PAb[i], c0 = T[t], c1 = T[t + 1];
97*c08cbc64SXin LI       SA[--BUCKET_BSTAR(c0, c1)] = i;
98*c08cbc64SXin LI     }
99*c08cbc64SXin LI     t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
100*c08cbc64SXin LI     SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
101*c08cbc64SXin LI 
102*c08cbc64SXin LI     /* Sort the type B* substrings using sssort. */
103*c08cbc64SXin LI #ifdef _OPENMP
104*c08cbc64SXin LI     tmp = omp_get_max_threads();
105*c08cbc64SXin LI     buf = SA + m, bufsize = (n - (2 * m)) / tmp;
106*c08cbc64SXin LI     c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
107*c08cbc64SXin LI #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
108*c08cbc64SXin LI     {
109*c08cbc64SXin LI       tmp = omp_get_thread_num();
110*c08cbc64SXin LI       curbuf = buf + tmp * bufsize;
111*c08cbc64SXin LI       k = 0;
112*c08cbc64SXin LI       for(;;) {
113*c08cbc64SXin LI         #pragma omp critical(sssort_lock)
114*c08cbc64SXin LI         {
115*c08cbc64SXin LI           if(0 < (l = j)) {
116*c08cbc64SXin LI             d0 = c0, d1 = c1;
117*c08cbc64SXin LI             do {
118*c08cbc64SXin LI               k = BUCKET_BSTAR(d0, d1);
119*c08cbc64SXin LI               if(--d1 <= d0) {
120*c08cbc64SXin LI                 d1 = ALPHABET_SIZE - 1;
121*c08cbc64SXin LI                 if(--d0 < 0) { break; }
122*c08cbc64SXin LI               }
123*c08cbc64SXin LI             } while(((l - k) <= 1) && (0 < (l = k)));
124*c08cbc64SXin LI             c0 = d0, c1 = d1, j = k;
125*c08cbc64SXin LI           }
126*c08cbc64SXin LI         }
127*c08cbc64SXin LI         if(l == 0) { break; }
128*c08cbc64SXin LI         sssort(T, PAb, SA + k, SA + l,
129*c08cbc64SXin LI                curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
130*c08cbc64SXin LI       }
131*c08cbc64SXin LI     }
132*c08cbc64SXin LI #else
133*c08cbc64SXin LI     buf = SA + m, bufsize = n - (2 * m);
134*c08cbc64SXin LI     for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
135*c08cbc64SXin LI       for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
136*c08cbc64SXin LI         i = BUCKET_BSTAR(c0, c1);
137*c08cbc64SXin LI         if(1 < (j - i)) {
138*c08cbc64SXin LI           sssort(T, PAb, SA + i, SA + j,
139*c08cbc64SXin LI                  buf, bufsize, 2, n, *(SA + i) == (m - 1));
140*c08cbc64SXin LI         }
141*c08cbc64SXin LI       }
142*c08cbc64SXin LI     }
143*c08cbc64SXin LI #endif
144*c08cbc64SXin LI 
145*c08cbc64SXin LI     /* Compute ranks of type B* substrings. */
146*c08cbc64SXin LI     for(i = m - 1; 0 <= i; --i) {
147*c08cbc64SXin LI       if(0 <= SA[i]) {
148*c08cbc64SXin LI         j = i;
149*c08cbc64SXin LI         do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
150*c08cbc64SXin LI         SA[i + 1] = i - j;
151*c08cbc64SXin LI         if(i <= 0) { break; }
152*c08cbc64SXin LI       }
153*c08cbc64SXin LI       j = i;
154*c08cbc64SXin LI       do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
155*c08cbc64SXin LI       ISAb[SA[i]] = j;
156*c08cbc64SXin LI     }
157*c08cbc64SXin LI 
158*c08cbc64SXin LI     /* Construct the inverse suffix array of type B* suffixes using trsort. */
159*c08cbc64SXin LI     trsort(ISAb, SA, m, 1);
160*c08cbc64SXin LI 
161*c08cbc64SXin LI     /* Set the sorted order of tyoe B* suffixes. */
162*c08cbc64SXin LI     for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
163*c08cbc64SXin LI       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
164*c08cbc64SXin LI       if(0 <= i) {
165*c08cbc64SXin LI         t = i;
166*c08cbc64SXin LI         for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
167*c08cbc64SXin LI         SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
168*c08cbc64SXin LI       }
169*c08cbc64SXin LI     }
170*c08cbc64SXin LI 
171*c08cbc64SXin LI     /* Calculate the index of start/end point of each bucket. */
172*c08cbc64SXin LI     BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
173*c08cbc64SXin LI     for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
174*c08cbc64SXin LI       i = BUCKET_A(c0 + 1) - 1;
175*c08cbc64SXin LI       for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
176*c08cbc64SXin LI         t = i - BUCKET_B(c0, c1);
177*c08cbc64SXin LI         BUCKET_B(c0, c1) = i; /* end point */
178*c08cbc64SXin LI 
179*c08cbc64SXin LI         /* Move all type B* suffixes to the correct position. */
180*c08cbc64SXin LI         for(i = t, j = BUCKET_BSTAR(c0, c1);
181*c08cbc64SXin LI             j <= k;
182*c08cbc64SXin LI             --i, --k) { SA[i] = SA[k]; }
183*c08cbc64SXin LI       }
184*c08cbc64SXin LI       BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
185*c08cbc64SXin LI       BUCKET_B(c0, c0) = i; /* end point */
186*c08cbc64SXin LI     }
187*c08cbc64SXin LI   }
188*c08cbc64SXin LI 
189*c08cbc64SXin LI   return m;
190*c08cbc64SXin LI }
191*c08cbc64SXin LI 
192*c08cbc64SXin LI /* Constructs the suffix array by using the sorted order of type B* suffixes. */
193*c08cbc64SXin LI static
194*c08cbc64SXin LI void
construct_SA(const sauchar_t * T,saidx_t * SA,saidx_t * bucket_A,saidx_t * bucket_B,saidx_t n,saidx_t m)195*c08cbc64SXin LI construct_SA(const sauchar_t *T, saidx_t *SA,
196*c08cbc64SXin LI              saidx_t *bucket_A, saidx_t *bucket_B,
197*c08cbc64SXin LI              saidx_t n, saidx_t m) {
198*c08cbc64SXin LI   saidx_t *i, *j, *k;
199*c08cbc64SXin LI   saidx_t s;
200*c08cbc64SXin LI   saint_t c0, c1, c2;
201*c08cbc64SXin LI 
202*c08cbc64SXin LI   if(0 < m) {
203*c08cbc64SXin LI     /* Construct the sorted order of type B suffixes by using
204*c08cbc64SXin LI        the sorted order of type B* suffixes. */
205*c08cbc64SXin LI     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
206*c08cbc64SXin LI       /* Scan the suffix array from right to left. */
207*c08cbc64SXin LI       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
208*c08cbc64SXin LI           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
209*c08cbc64SXin LI           i <= j;
210*c08cbc64SXin LI           --j) {
211*c08cbc64SXin LI         if(0 < (s = *j)) {
212*c08cbc64SXin LI           assert(T[s] == c1);
213*c08cbc64SXin LI           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
214*c08cbc64SXin LI           assert(T[s - 1] <= T[s]);
215*c08cbc64SXin LI           *j = ~s;
216*c08cbc64SXin LI           c0 = T[--s];
217*c08cbc64SXin LI           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
218*c08cbc64SXin LI           if(c0 != c2) {
219*c08cbc64SXin LI             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
220*c08cbc64SXin LI             k = SA + BUCKET_B(c2 = c0, c1);
221*c08cbc64SXin LI           }
222*c08cbc64SXin LI           assert(k < j);
223*c08cbc64SXin LI           *k-- = s;
224*c08cbc64SXin LI         } else {
225*c08cbc64SXin LI           assert(((s == 0) && (T[s] == c1)) || (s < 0));
226*c08cbc64SXin LI           *j = ~s;
227*c08cbc64SXin LI         }
228*c08cbc64SXin LI       }
229*c08cbc64SXin LI     }
230*c08cbc64SXin LI   }
231*c08cbc64SXin LI 
232*c08cbc64SXin LI   /* Construct the suffix array by using
233*c08cbc64SXin LI      the sorted order of type B suffixes. */
234*c08cbc64SXin LI   k = SA + BUCKET_A(c2 = T[n - 1]);
235*c08cbc64SXin LI   *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
236*c08cbc64SXin LI   /* Scan the suffix array from left to right. */
237*c08cbc64SXin LI   for(i = SA, j = SA + n; i < j; ++i) {
238*c08cbc64SXin LI     if(0 < (s = *i)) {
239*c08cbc64SXin LI       assert(T[s - 1] >= T[s]);
240*c08cbc64SXin LI       c0 = T[--s];
241*c08cbc64SXin LI       if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
242*c08cbc64SXin LI       if(c0 != c2) {
243*c08cbc64SXin LI         BUCKET_A(c2) = k - SA;
244*c08cbc64SXin LI         k = SA + BUCKET_A(c2 = c0);
245*c08cbc64SXin LI       }
246*c08cbc64SXin LI       assert(i < k);
247*c08cbc64SXin LI       *k++ = s;
248*c08cbc64SXin LI     } else {
249*c08cbc64SXin LI       assert(s < 0);
250*c08cbc64SXin LI       *i = ~s;
251*c08cbc64SXin LI     }
252*c08cbc64SXin LI   }
253*c08cbc64SXin LI }
254*c08cbc64SXin LI 
255*c08cbc64SXin LI /* Constructs the burrows-wheeler transformed string directly
256*c08cbc64SXin LI    by using the sorted order of type B* suffixes. */
257*c08cbc64SXin LI static
258*c08cbc64SXin LI saidx_t
construct_BWT(const sauchar_t * T,saidx_t * SA,saidx_t * bucket_A,saidx_t * bucket_B,saidx_t n,saidx_t m)259*c08cbc64SXin LI construct_BWT(const sauchar_t *T, saidx_t *SA,
260*c08cbc64SXin LI               saidx_t *bucket_A, saidx_t *bucket_B,
261*c08cbc64SXin LI               saidx_t n, saidx_t m) {
262*c08cbc64SXin LI   saidx_t *i, *j, *k, *orig;
263*c08cbc64SXin LI   saidx_t s;
264*c08cbc64SXin LI   saint_t c0, c1, c2;
265*c08cbc64SXin LI 
266*c08cbc64SXin LI   if(0 < m) {
267*c08cbc64SXin LI     /* Construct the sorted order of type B suffixes by using
268*c08cbc64SXin LI        the sorted order of type B* suffixes. */
269*c08cbc64SXin LI     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
270*c08cbc64SXin LI       /* Scan the suffix array from right to left. */
271*c08cbc64SXin LI       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
272*c08cbc64SXin LI           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
273*c08cbc64SXin LI           i <= j;
274*c08cbc64SXin LI           --j) {
275*c08cbc64SXin LI         if(0 < (s = *j)) {
276*c08cbc64SXin LI           assert(T[s] == c1);
277*c08cbc64SXin LI           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
278*c08cbc64SXin LI           assert(T[s - 1] <= T[s]);
279*c08cbc64SXin LI           c0 = T[--s];
280*c08cbc64SXin LI           *j = ~((saidx_t)c0);
281*c08cbc64SXin LI           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
282*c08cbc64SXin LI           if(c0 != c2) {
283*c08cbc64SXin LI             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
284*c08cbc64SXin LI             k = SA + BUCKET_B(c2 = c0, c1);
285*c08cbc64SXin LI           }
286*c08cbc64SXin LI           assert(k < j);
287*c08cbc64SXin LI           *k-- = s;
288*c08cbc64SXin LI         } else if(s != 0) {
289*c08cbc64SXin LI           *j = ~s;
290*c08cbc64SXin LI #ifndef NDEBUG
291*c08cbc64SXin LI         } else {
292*c08cbc64SXin LI           assert(T[s] == c1);
293*c08cbc64SXin LI #endif
294*c08cbc64SXin LI         }
295*c08cbc64SXin LI       }
296*c08cbc64SXin LI     }
297*c08cbc64SXin LI   }
298*c08cbc64SXin LI 
299*c08cbc64SXin LI   /* Construct the BWTed string by using
300*c08cbc64SXin LI      the sorted order of type B suffixes. */
301*c08cbc64SXin LI   k = SA + BUCKET_A(c2 = T[n - 1]);
302*c08cbc64SXin LI   *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
303*c08cbc64SXin LI   /* Scan the suffix array from left to right. */
304*c08cbc64SXin LI   for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
305*c08cbc64SXin LI     if(0 < (s = *i)) {
306*c08cbc64SXin LI       assert(T[s - 1] >= T[s]);
307*c08cbc64SXin LI       c0 = T[--s];
308*c08cbc64SXin LI       *i = c0;
309*c08cbc64SXin LI       if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
310*c08cbc64SXin LI       if(c0 != c2) {
311*c08cbc64SXin LI         BUCKET_A(c2) = k - SA;
312*c08cbc64SXin LI         k = SA + BUCKET_A(c2 = c0);
313*c08cbc64SXin LI       }
314*c08cbc64SXin LI       assert(i < k);
315*c08cbc64SXin LI       *k++ = s;
316*c08cbc64SXin LI     } else if(s != 0) {
317*c08cbc64SXin LI       *i = ~s;
318*c08cbc64SXin LI     } else {
319*c08cbc64SXin LI       orig = i;
320*c08cbc64SXin LI     }
321*c08cbc64SXin LI   }
322*c08cbc64SXin LI 
323*c08cbc64SXin LI   return orig - SA;
324*c08cbc64SXin LI }
325*c08cbc64SXin LI 
326*c08cbc64SXin LI 
327*c08cbc64SXin LI /*---------------------------------------------------------------------------*/
328*c08cbc64SXin LI 
329*c08cbc64SXin LI /*- Function -*/
330*c08cbc64SXin LI 
331*c08cbc64SXin LI saint_t
divsufsort(const sauchar_t * T,saidx_t * SA,saidx_t n)332*c08cbc64SXin LI divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
333*c08cbc64SXin LI   saidx_t *bucket_A, *bucket_B;
334*c08cbc64SXin LI   saidx_t m;
335*c08cbc64SXin LI   saint_t err = 0;
336*c08cbc64SXin LI 
337*c08cbc64SXin LI   /* Check arguments. */
338*c08cbc64SXin LI   if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
339*c08cbc64SXin LI   else if(n == 0) { return 0; }
340*c08cbc64SXin LI   else if(n == 1) { SA[0] = 0; return 0; }
341*c08cbc64SXin LI   else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
342*c08cbc64SXin LI 
343*c08cbc64SXin LI   bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
344*c08cbc64SXin LI   bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
345*c08cbc64SXin LI 
346*c08cbc64SXin LI   /* Suffixsort. */
347*c08cbc64SXin LI   if((bucket_A != NULL) && (bucket_B != NULL)) {
348*c08cbc64SXin LI     m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
349*c08cbc64SXin LI     construct_SA(T, SA, bucket_A, bucket_B, n, m);
350*c08cbc64SXin LI   } else {
351*c08cbc64SXin LI     err = -2;
352*c08cbc64SXin LI   }
353*c08cbc64SXin LI 
354*c08cbc64SXin LI   free(bucket_B);
355*c08cbc64SXin LI   free(bucket_A);
356*c08cbc64SXin LI 
357*c08cbc64SXin LI   return err;
358*c08cbc64SXin LI }
359*c08cbc64SXin LI 
360*c08cbc64SXin LI saidx_t
divbwt(const sauchar_t * T,sauchar_t * U,saidx_t * A,saidx_t n)361*c08cbc64SXin LI divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
362*c08cbc64SXin LI   saidx_t *B;
363*c08cbc64SXin LI   saidx_t *bucket_A, *bucket_B;
364*c08cbc64SXin LI   saidx_t m, pidx, i;
365*c08cbc64SXin LI 
366*c08cbc64SXin LI   /* Check arguments. */
367*c08cbc64SXin LI   if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
368*c08cbc64SXin LI   else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
369*c08cbc64SXin LI 
370*c08cbc64SXin LI   if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
371*c08cbc64SXin LI   bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
372*c08cbc64SXin LI   bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
373*c08cbc64SXin LI 
374*c08cbc64SXin LI   /* Burrows-Wheeler Transform. */
375*c08cbc64SXin LI   if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
376*c08cbc64SXin LI     m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
377*c08cbc64SXin LI     pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
378*c08cbc64SXin LI 
379*c08cbc64SXin LI     /* Copy to output string. */
380*c08cbc64SXin LI     U[0] = T[n - 1];
381*c08cbc64SXin LI     for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
382*c08cbc64SXin LI     for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
383*c08cbc64SXin LI     pidx += 1;
384*c08cbc64SXin LI   } else {
385*c08cbc64SXin LI     pidx = -2;
386*c08cbc64SXin LI   }
387*c08cbc64SXin LI 
388*c08cbc64SXin LI   free(bucket_B);
389*c08cbc64SXin LI   free(bucket_A);
390*c08cbc64SXin LI   if(A == NULL) { free(B); }
391*c08cbc64SXin LI 
392*c08cbc64SXin LI   return pidx;
393*c08cbc64SXin LI }
394*c08cbc64SXin LI 
395*c08cbc64SXin LI const char *
divsufsort_version(void)396*c08cbc64SXin LI divsufsort_version(void) {
397*c08cbc64SXin LI   return PROJECT_VERSION_FULL;
398*c08cbc64SXin LI }
399