1 /*
2  * sais.hxx for sais
3  * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #ifndef _SAIS_HXX
28 #define _SAIS_HXX 1
29 #ifdef __cplusplus
30 
31 #include <cassert>
32 #include <iterator>
33 #include <limits>
34 
35 #ifdef __INTEL_COMPILER
36 #pragma warning(disable : 383 981 1418)
37 #endif
38 
39 #ifdef _MSC_VER
40 #pragma warning(push)
41 #pragma warning(disable : 4365)
42 #endif
43 
44 namespace saisxx_private {
45 
46 /* find the start or end of each bucket */
47 template<typename string_type, typename bucket_type, typename index_type>
48 void
getCounts(const string_type T,bucket_type C,index_type n,index_type k)49 getCounts(const string_type T, bucket_type C, index_type n, index_type k) {
50   index_type i;
51   for(i = 0; i < k; ++i) { C[i] = 0; }
52   for(i = 0; i < n; ++i) { ++C[T[i]]; }
53 }
54 template<typename bucketC_type, typename bucketB_type, typename index_type>
55 void
getBuckets(const bucketC_type C,bucketB_type B,index_type k,bool end)56 getBuckets(const bucketC_type C, bucketB_type B, index_type k, bool end) {
57   index_type i, sum = 0;
58   if(end != false) { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } }
59   else { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } }
60 }
61 
62 template<typename string_type, typename sarray_type,
63          typename bucketC_type, typename bucketB_type, typename index_type>
64 void
LMSsort1(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type k,bool recount)65 LMSsort1(string_type T, sarray_type SA,
66          bucketC_type C, bucketB_type B,
67          index_type n, index_type k, bool recount) {
68 typedef typename std::iterator_traits<string_type>::value_type char_type;
69   sarray_type b;
70   index_type i, j;
71   char_type c0, c1;
72 
73   /* compute SAl */
74   if(recount != false) { getCounts(T, C, n, k); }
75   getBuckets(C, B, k, false); /* find starts of buckets */
76   j = n - 1;
77   b = SA + B[c1 = T[j]];
78   --j;
79   *b++ = (T[j] < c1) ? ~j : j;
80   for(i = 0; i < n; ++i) {
81     if(0 < (j = SA[i])) {
82       assert(T[j] >= T[j + 1]);
83       if((c0 = T[j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
84       assert(i < (b - SA));
85       --j;
86       *b++ = (T[j] < c1) ? ~j : j;
87       SA[i] = 0;
88     } else if(j < 0) {
89       SA[i] = ~j;
90     }
91   }
92   /* compute SAs */
93   if(recount != false) { getCounts(T, C, n, k); }
94   getBuckets(C, B, k, true); /* find ends of buckets */
95   for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
96     if(0 < (j = SA[i])) {
97       assert(T[j] <= T[j + 1]);
98       if((c0 = T[j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
99       assert((b - SA) <= i);
100       --j;
101       *--b = (T[j] > c1) ? ~(j + 1) : j;
102       SA[i] = 0;
103     }
104   }
105 }
106 template<typename string_type, typename sarray_type, typename index_type>
107 index_type
LMSpostproc1(string_type T,sarray_type SA,index_type n,index_type m)108 LMSpostproc1(string_type T, sarray_type SA, index_type n, index_type m) {
109 typedef typename std::iterator_traits<string_type>::value_type char_type;
110   index_type i, j, p, q, plen, qlen, name;
111   char_type c0, c1;
112   bool diff;
113 
114   /* compact all the sorted substrings into the first m items of SA
115       2*m must be not larger than n (proveable) */
116   assert(0 < n);
117   for(i = 0; (p = SA[i]) < 0; ++i) { SA[i] = ~p; assert((i + 1) < n); }
118   if(i < m) {
119     for(j = i, ++i;; ++i) {
120       assert(i < n);
121       if((p = SA[i]) < 0) {
122         SA[j++] = ~p; SA[i] = 0;
123         if(j == m) { break; }
124       }
125     }
126   }
127 
128   /* store the length of all substrings */
129   i = n - 1; j = n - 1; c0 = T[n - 1];
130   do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
131   for(; 0 <= i;) {
132     do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
133     if(0 <= i) {
134       SA[m + ((i + 1) >> 1)] = j - i; j = i + 1;
135       do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
136     }
137   }
138 
139   /* find the lexicographic names of all substrings */
140   for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
141     p = SA[i], plen = SA[m + (p >> 1)], diff = true;
142     if((plen == qlen) && ((q + plen) < n)) {
143       for(j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { }
144       if(j == plen) { diff = false; }
145     }
146     if(diff != false) { ++name, q = p, qlen = plen; }
147     SA[m + (p >> 1)] = name;
148   }
149 
150   return name;
151 }
152 template<typename string_type, typename sarray_type,
153          typename bucketC_type, typename bucketB_type, typename bucketD_type,
154          typename index_type>
155 void
LMSsort2(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,bucketD_type D,index_type n,index_type k)156 LMSsort2(string_type T, sarray_type SA,
157          bucketC_type C, bucketB_type B, bucketD_type D,
158          index_type n, index_type k) {
159 typedef typename std::iterator_traits<string_type>::value_type char_type;
160   sarray_type b;
161   index_type i, j, t, d;
162   char_type c0, c1;
163 
164   /* compute SAl */
165   getBuckets(C, B, k, false); /* find starts of buckets */
166   j = n - 1;
167   b = SA + B[c1 = T[j]];
168   --j;
169   t = (T[j] < c1);
170   j += n;
171   *b++ = (t & 1) ? ~j : j;
172   for(i = 0, d = 0; i < n; ++i) {
173     if(0 < (j = SA[i])) {
174       if(n <= j) { d += 1; j -= n; }
175       assert(T[j] >= T[j + 1]);
176       if((c0 = T[j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
177       assert(i < (b - SA));
178       --j;
179       t = c0; t = (t << 1) | (T[j] < c1);
180       if(D[t] != d) { j += n; D[t] = d; }
181       *b++ = (t & 1) ? ~j : j;
182       SA[i] = 0;
183     } else if(j < 0) {
184       SA[i] = ~j;
185     }
186   }
187   for(i = n - 1; 0 <= i; --i) {
188     if(0 < SA[i]) {
189       if(SA[i] < n) {
190         SA[i] += n;
191         for(j = i - 1; SA[j] < n; --j) { }
192         SA[j] -= n;
193         i = j;
194       }
195     }
196   }
197 
198   /* compute SAs */
199   getBuckets(C, B, k, true); /* find ends of buckets */
200   for(i = n - 1, d += 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
201     if(0 < (j = SA[i])) {
202       if(n <= j) { d += 1; j -= n; }
203       assert(T[j] <= T[j + 1]);
204       if((c0 = T[j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
205       assert((b - SA) <= i);
206       --j;
207       t = c0; t = (t << 1) | (T[j] > c1);
208       if(D[t] != d) { j += n; D[t] = d; }
209       *--b = (t & 1) ? ~(j + 1) : j;
210       SA[i] = 0;
211     }
212   }
213 }
214 template<typename sarray_type, typename index_type>
215 index_type
LMSpostproc2(sarray_type SA,index_type n,index_type m)216 LMSpostproc2(sarray_type SA, index_type n, index_type m) {
217   index_type i, j, d, name;
218 
219   /* compact all the sorted LMS substrings into the first m items of SA */
220   assert(0 < n);
221   for(i = 0, name = 0; (j = SA[i]) < 0; ++i) {
222     j = ~j;
223     if(n <= j) { name += 1; }
224     SA[i] = j;
225     assert((i + 1) < n);
226   }
227   if(i < m) {
228     for(d = i, ++i;; ++i) {
229       assert(i < n);
230       if((j = SA[i]) < 0) {
231         j = ~j;
232         if(n <= j) { name += 1; }
233         SA[d++] = j; SA[i] = 0;
234         if(d == m) { break; }
235       }
236     }
237   }
238   if(name < m) {
239     /* store the lexicographic names */
240     for(i = m - 1, d = name + 1; 0 <= i; --i) {
241       if(n <= (j = SA[i])) { j -= n; --d; }
242       SA[m + (j >> 1)] = d;
243     }
244   } else {
245     /* unset flags */
246     for(i = 0; i < m; ++i) {
247       if(n <= (j = SA[i])) { j -= n; SA[i] = j; }
248     }
249   }
250 
251   return name;
252 }
253 
254 /* compute SA and BWT */
255 template<typename string_type, typename sarray_type,
256          typename bucketC_type, typename bucketB_type, typename index_type>
257 void
induceSA(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type k,bool recount)258 induceSA(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
259          index_type n, index_type k, bool recount) {
260 typedef typename std::iterator_traits<string_type>::value_type char_type;
261   sarray_type b;
262   index_type i, j;
263   char_type c0, c1;
264   /* compute SAl */
265   if(recount != false) { getCounts(T, C, n, k); }
266   getBuckets(C, B, k, false); /* find starts of buckets */
267   b = SA + B[c1 = T[j = n - 1]];
268   *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
269   for(i = 0; i < n; ++i) {
270     j = SA[i], SA[i] = ~j;
271     if(0 < j) {
272       if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
273       *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
274     }
275   }
276   /* compute SAs */
277   if(recount != false) { getCounts(T, C, n, k); }
278   getBuckets(C, B, k, true); /* find ends of buckets */
279   for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
280     if(0 < (j = SA[i])) {
281       if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
282       *--b = ((j == 0) || (T[j - 1] > c1)) ? ~j : j;
283     } else {
284       SA[i] = ~j;
285     }
286   }
287 }
288 template<typename string_type, typename sarray_type,
289          typename bucketC_type, typename bucketB_type, typename index_type>
290 int
computeBWT(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type k,bool recount)291 computeBWT(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
292            index_type n, index_type k, bool recount) {
293 typedef typename std::iterator_traits<string_type>::value_type char_type;
294   sarray_type b;
295   index_type i, j, pidx = -1;
296   char_type c0, c1;
297   /* compute SAl */
298   if(recount != false) { getCounts(T, C, n, k); }
299   getBuckets(C, B, k, false); /* find starts of buckets */
300   b = SA + B[c1 = T[j = n - 1]];
301   *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
302   for(i = 0; i < n; ++i) {
303     if(0 < (j = SA[i])) {
304       SA[i] = ~((index_type)(c0 = T[--j]));
305       if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
306       *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
307     } else if(j != 0) {
308       SA[i] = ~j;
309     }
310   }
311   /* compute SAs */
312   if(recount != false) { getCounts(T, C, n, k); }
313   getBuckets(C, B, k, true); /* find ends of buckets */
314   for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
315     if(0 < (j = SA[i])) {
316       SA[i] = (c0 = T[--j]);
317       if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
318       *--b = ((0 < j) && (T[j - 1] > c1)) ? ~((index_type)T[j - 1]) : j;
319     } else if(j != 0) {
320       SA[i] = ~j;
321     } else {
322       pidx = i;
323     }
324   }
325   return pidx;
326 }
327 
328 template<typename string_type, typename sarray_type,
329          typename bucketC_type, typename bucketB_type,
330          typename index_type>
331 std::pair<index_type, index_type>
stage1sort(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type k,unsigned flags)332 stage1sort(string_type T, sarray_type SA,
333            bucketC_type C, bucketB_type B,
334            index_type n, index_type k, unsigned flags) {
335 typedef typename std::iterator_traits<string_type>::value_type char_type;
336   sarray_type b;
337   index_type i, j, name, m;
338   char_type c0, c1;
339   getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
340   for(i = 0; i < n; ++i) { SA[i] = 0; }
341   b = SA + n - 1; i = n - 1; j = n; m = 0; c0 = T[n - 1];
342   do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
343   for(; 0 <= i;) {
344     do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
345     if(0 <= i) {
346       *b = j; b = SA + --B[c1]; j = i; ++m; assert(B[c1] != (n - 1));
347       do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
348     }
349   }
350   SA[n - 1] = 0;
351 
352   if(1 < m) {
353     if(flags & (16 | 32)) {
354       assert((j + 1) < n);
355       ++B[T[j + 1]];
356       if(flags & 16) {
357         index_type *D;
358         try { D = new index_type[k * 2]; } catch(...) { D = 0; }
359         if(D == 0) { return std::make_pair(-2, -2); }
360         for(i = 0, j = 0; i < k; ++i) {
361           j += C[i];
362           if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
363           D[i] = D[i + k] = 0;
364         }
365         LMSsort2(T, SA, C, B, D, n, k);
366         delete[] D;
367       } else {
368         bucketB_type D = B - k * 2;
369         for(i = 0, j = 0; i < k; ++i) {
370           j += C[i];
371           if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
372           D[i] = D[i + k] = 0;
373         }
374         LMSsort2(T, SA, C, B, D, n, k);
375       }
376       name = LMSpostproc2(SA, n, m);
377     } else {
378       LMSsort1(T, SA, C, B, n, k, (flags & (4 | 64)) != 0);
379       name = LMSpostproc1(T, SA, n, m);
380     }
381   } else if(m == 1) {
382     *b = j + 1;
383     name = 1;
384   } else {
385     name = 0;
386   }
387   return std::make_pair(m, name);
388 }
389 template<typename string_type, typename sarray_type,
390          typename bucketC_type, typename bucketB_type, typename index_type>
391 index_type
stage3sort(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type m,index_type k,unsigned flags,bool isbwt)392 stage3sort(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
393            index_type n, index_type m, index_type k,
394            unsigned flags, bool isbwt) {
395 typedef typename std::iterator_traits<string_type>::value_type char_type;
396   index_type i, j, p, q, pidx = 0;
397   char_type c0, c1;
398   if((flags & 8) != 0) { getCounts(T, C, n, k); }
399   /* put all left-most S characters into their buckets */
400   if(1 < m) {
401     getBuckets(C, B, k, 1); /* find ends of buckets */
402     i = m - 1, j = n, p = SA[m - 1], c1 = T[p];
403     do {
404       q = B[c0 = c1];
405       while(q < j) { SA[--j] = 0; }
406       do {
407         SA[--j] = p;
408         if(--i < 0) { break; }
409         p = SA[i];
410       } while((c1 = T[p]) == c0);
411     } while(0 <= i);
412     while(0 < j) { SA[--j] = 0; }
413   }
414   if(isbwt == false) { induceSA(T, SA, C, B, n, k, (flags & (4 | 64)) != 0); }
415   else { pidx = computeBWT(T, SA, C, B, n, k, (flags & (4 | 64)) != 0); }
416   return pidx;
417 }
418 
419 /* find the suffix array SA of T[0..n-1] in {0..k}^n
420    use a working space (excluding s and SA) of at most 2n+O(1) for a constant alphabet */
421 template<typename string_type, typename sarray_type, typename index_type>
422 int
suffixsort(string_type T,sarray_type SA,index_type fs,index_type n,index_type k,bool isbwt)423 suffixsort(string_type T, sarray_type SA,
424            index_type fs, index_type n, index_type k,
425            bool isbwt) {
426 typedef typename std::iterator_traits<string_type>::value_type char_type;
427   sarray_type RA, C, B;
428   index_type *Cp, *Bp;
429   index_type i, j, m, name, pidx, newfs;
430   unsigned flags = 0;
431   char_type c0, c1;
432 
433   /* stage 1: reduce the problem by at least 1/2
434      sort all the S-substrings */
435   C = B = SA; /* for warnings */
436   Cp = 0, Bp = 0;
437   if(k <= 256) {
438     try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
439     if(Cp == 0) { return -2; }
440     if(k <= fs) {
441       B = SA + (n + fs - k);
442       flags = 1;
443     } else {
444       try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
445       if(Bp == 0) { return -2; }
446       flags = 3;
447     }
448   } else if(k <= fs) {
449     C = SA + (n + fs - k);
450     if(k <= (fs - k)) {
451       B = C - k;
452       flags = 0;
453     } else if(k <= 1024) {
454       try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
455       if(Bp == 0) { return -2; }
456       flags = 2;
457     } else {
458       B = C;
459       flags = 64 | 8;
460     }
461   } else {
462     try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
463     if(Cp == 0) { return -2; }
464     Bp = Cp;
465     flags = 4 | 8;
466   }
467   if((n <= ((std::numeric_limits<index_type>::max)() / 2)) && (2 <= (n / k))) {
468     if(flags & 1) { flags |= ((k * 2) <= (fs - k)) ? 32 : 16; }
469     else if((flags == 0) && ((k * 2) <= (fs - k * 2))) { flags |= 32; }
470   }
471   {
472     std::pair<index_type, index_type> r;
473     if(Cp != 0) {
474       if(Bp != 0) { r = stage1sort(T, SA, Cp, Bp, n, k, flags); }
475       else { r = stage1sort(T, SA, Cp, B, n, k, flags); }
476     } else {
477       if(Bp != 0) { r = stage1sort(T, SA, C, Bp, n, k, flags); }
478       else { r = stage1sort(T, SA, C, B, n, k, flags); }
479     }
480     m = r.first, name = r.second;
481   }
482   if(m < 0) {
483     if(flags & (1 | 4)) { delete[] Cp; }
484     if(flags & 2) { delete[] Bp; }
485     return -2;
486   }
487 
488   /* stage 2: solve the reduced problem
489      recurse if names are not yet unique */
490   if(name < m) {
491     if(flags & 4) { delete[] Cp; }
492     if(flags & 2) { delete[] Bp; }
493     newfs = (n + fs) - (m * 2);
494     if((flags & (1 | 4 | 64)) == 0) {
495       if((k + name) <= newfs) { newfs -= k; }
496       else { flags |= 8; }
497     }
498     assert((n >> 1) <= (newfs + m));
499     RA = SA + m + newfs;
500     for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) {
501       if(SA[i] != 0) { RA[j--] = SA[i] - 1; }
502     }
503     if(suffixsort(RA, SA, newfs, m, name, false) != 0) { if(flags & 1) { delete[] Cp; } return -2; }
504     i = n - 1; j = m - 1; c0 = T[n - 1];
505     do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
506     for(; 0 <= i;) {
507       do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
508       if(0 <= i) {
509         RA[j--] = i + 1;
510         do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
511       }
512     }
513     for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; }
514     if(flags & 4) {
515       try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
516       if(Cp == 0) { return -2; }
517       Bp = Cp;
518     }
519     if(flags & 2) {
520       try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
521       if(Bp == 0) { if(flags & 1) { delete[] Cp; } return -2; }
522     }
523   }
524 
525   /* stage 3: induce the result for the original problem */
526   if(Cp != 0) {
527     if(Bp != 0) { pidx = stage3sort(T, SA, Cp, Bp, n, m, k, flags, isbwt); }
528     else { pidx = stage3sort(T, SA, Cp, B, n, m, k, flags, isbwt); }
529   } else {
530     if(Bp != 0) { pidx = stage3sort(T, SA, C, Bp, n, m, k, flags, isbwt); }
531     else { pidx = stage3sort(T, SA, C, B, n, m, k, flags, isbwt); }
532   }
533   if(flags & (1 | 4)) { delete[] Cp; }
534   if(flags & 2) { delete[] Bp; }
535 
536   return pidx;
537 }
538 
539 } /* namespace saisxx_private */
540 
541 
542 /**
543  * @brief Constructs the suffix array of a given string in linear time.
544  * @param T[0..n-1] The input string. (random access iterator)
545  * @param SA[0..n-1] The output array of suffixes. (random access iterator)
546  * @param n The length of the given string.
547  * @param k The alphabet size.
548  * @return 0 if no error occurred, -1 or -2 otherwise.
549  */
550 template<typename string_type, typename sarray_type, typename index_type>
551 int
saisxx(string_type T,sarray_type SA,index_type n,index_type k=256)552 saisxx(string_type T, sarray_type SA, index_type n, index_type k = 256) {
553 typedef typename std::iterator_traits<sarray_type>::value_type savalue_type;
554   savalue_type unused;
555   (void)unused;
556   assert((std::numeric_limits<index_type>::min)() < 0);
557   assert((std::numeric_limits<savalue_type>::min)() < 0);
558   assert((std::numeric_limits<savalue_type>::max)() == (std::numeric_limits<index_type>::max)());
559   assert((std::numeric_limits<savalue_type>::min)() == (std::numeric_limits<index_type>::min)());
560   if((n < 0) || (k <= 0)) { return -1; }
561   if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; }
562   return saisxx_private::suffixsort(T, SA, (index_type)0, n, k, false);
563 }
564 
565 /**
566  * @brief Constructs the burrows-wheeler transformed string of a given string in linear time.
567  * @param T[0..n-1] The input string. (random access iterator)
568  * @param U[0..n-1] The output string. (random access iterator)
569  * @param A[0..n-1] The temporary array. (random access iterator)
570  * @param n The length of the given string.
571  * @param k The alphabet size.
572  * @return The primary index if no error occurred, -1 or -2 otherwise.
573  */
574 template<typename string_type, typename sarray_type, typename index_type>
575 index_type
saisxx_bwt(string_type T,string_type U,sarray_type A,index_type n,index_type k=256)576 saisxx_bwt(string_type T, string_type U, sarray_type A, index_type n, index_type k = 256) {
577 typedef typename std::iterator_traits<sarray_type>::value_type savalue_type;
578 typedef typename std::iterator_traits<string_type>::value_type char_type;
579   savalue_type unused;
580   (void)unused;
581   index_type i, pidx;
582   assert((std::numeric_limits<index_type>::min)() < 0);
583   assert((std::numeric_limits<savalue_type>::min)() < 0);
584   assert((std::numeric_limits<savalue_type>::max)() == (std::numeric_limits<index_type>::max)());
585   assert((std::numeric_limits<savalue_type>::min)() == (std::numeric_limits<index_type>::min)());
586   if((n < 0) || (k <= 0)) { return -1; }
587   if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
588   pidx = saisxx_private::suffixsort(T, A, (index_type)0, n, k, true);
589   if(0 <= pidx) {
590     U[0] = T[n - 1];
591     for(i = 0; i < pidx; ++i) { U[i + 1] = (char_type)A[i]; }
592     for(i += 1; i < n; ++i) { U[i] = (char_type)A[i]; }
593     pidx += 1;
594   }
595   return pidx;
596 }
597 
598 
599 #ifdef _MSC_VER
600 #pragma warning(pop)
601 #endif
602 
603 #endif /* __cplusplus */
604 #endif /* _SAIS_HXX */
605