1 /*
2 * sais.hxx for sais
3 * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #ifndef _SAIS_HXX
28 #define _SAIS_HXX 1
29 #ifdef __cplusplus
30
31 #include <cassert>
32 #include <iterator>
33 #include <limits>
34
35 #ifdef __INTEL_COMPILER
36 #pragma warning(disable : 383 981 1418)
37 #endif
38
39 #ifdef _MSC_VER
40 #pragma warning(push)
41 #pragma warning(disable : 4365)
42 #endif
43
44 namespace saisxx_private {
45
46 /* find the start or end of each bucket */
47 template<typename string_type, typename bucket_type, typename index_type>
48 void
getCounts(const string_type T,bucket_type C,index_type n,index_type k)49 getCounts(const string_type T, bucket_type C, index_type n, index_type k) {
50 index_type i;
51 for(i = 0; i < k; ++i) { C[i] = 0; }
52 for(i = 0; i < n; ++i) { ++C[T[i]]; }
53 }
54 template<typename bucketC_type, typename bucketB_type, typename index_type>
55 void
getBuckets(const bucketC_type C,bucketB_type B,index_type k,bool end)56 getBuckets(const bucketC_type C, bucketB_type B, index_type k, bool end) {
57 index_type i, sum = 0;
58 if(end != false) { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } }
59 else { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } }
60 }
61
62 template<typename string_type, typename sarray_type,
63 typename bucketC_type, typename bucketB_type, typename index_type>
64 void
LMSsort1(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type k,bool recount)65 LMSsort1(string_type T, sarray_type SA,
66 bucketC_type C, bucketB_type B,
67 index_type n, index_type k, bool recount) {
68 typedef typename std::iterator_traits<string_type>::value_type char_type;
69 sarray_type b;
70 index_type i, j;
71 char_type c0, c1;
72
73 /* compute SAl */
74 if(recount != false) { getCounts(T, C, n, k); }
75 getBuckets(C, B, k, false); /* find starts of buckets */
76 j = n - 1;
77 b = SA + B[c1 = T[j]];
78 --j;
79 *b++ = (T[j] < c1) ? ~j : j;
80 for(i = 0; i < n; ++i) {
81 if(0 < (j = SA[i])) {
82 assert(T[j] >= T[j + 1]);
83 if((c0 = T[j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
84 assert(i < (b - SA));
85 --j;
86 *b++ = (T[j] < c1) ? ~j : j;
87 SA[i] = 0;
88 } else if(j < 0) {
89 SA[i] = ~j;
90 }
91 }
92 /* compute SAs */
93 if(recount != false) { getCounts(T, C, n, k); }
94 getBuckets(C, B, k, true); /* find ends of buckets */
95 for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
96 if(0 < (j = SA[i])) {
97 assert(T[j] <= T[j + 1]);
98 if((c0 = T[j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
99 assert((b - SA) <= i);
100 --j;
101 *--b = (T[j] > c1) ? ~(j + 1) : j;
102 SA[i] = 0;
103 }
104 }
105 }
106 template<typename string_type, typename sarray_type, typename index_type>
107 index_type
LMSpostproc1(string_type T,sarray_type SA,index_type n,index_type m)108 LMSpostproc1(string_type T, sarray_type SA, index_type n, index_type m) {
109 typedef typename std::iterator_traits<string_type>::value_type char_type;
110 index_type i, j, p, q, plen, qlen, name;
111 char_type c0, c1;
112 bool diff;
113
114 /* compact all the sorted substrings into the first m items of SA
115 2*m must be not larger than n (proveable) */
116 assert(0 < n);
117 for(i = 0; (p = SA[i]) < 0; ++i) { SA[i] = ~p; assert((i + 1) < n); }
118 if(i < m) {
119 for(j = i, ++i;; ++i) {
120 assert(i < n);
121 if((p = SA[i]) < 0) {
122 SA[j++] = ~p; SA[i] = 0;
123 if(j == m) { break; }
124 }
125 }
126 }
127
128 /* store the length of all substrings */
129 i = n - 1; j = n - 1; c0 = T[n - 1];
130 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
131 for(; 0 <= i;) {
132 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
133 if(0 <= i) {
134 SA[m + ((i + 1) >> 1)] = j - i; j = i + 1;
135 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
136 }
137 }
138
139 /* find the lexicographic names of all substrings */
140 for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
141 p = SA[i], plen = SA[m + (p >> 1)], diff = true;
142 if((plen == qlen) && ((q + plen) < n)) {
143 for(j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { }
144 if(j == plen) { diff = false; }
145 }
146 if(diff != false) { ++name, q = p, qlen = plen; }
147 SA[m + (p >> 1)] = name;
148 }
149
150 return name;
151 }
152 template<typename string_type, typename sarray_type,
153 typename bucketC_type, typename bucketB_type, typename bucketD_type,
154 typename index_type>
155 void
LMSsort2(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,bucketD_type D,index_type n,index_type k)156 LMSsort2(string_type T, sarray_type SA,
157 bucketC_type C, bucketB_type B, bucketD_type D,
158 index_type n, index_type k) {
159 typedef typename std::iterator_traits<string_type>::value_type char_type;
160 sarray_type b;
161 index_type i, j, t, d;
162 char_type c0, c1;
163
164 /* compute SAl */
165 getBuckets(C, B, k, false); /* find starts of buckets */
166 j = n - 1;
167 b = SA + B[c1 = T[j]];
168 --j;
169 t = (T[j] < c1);
170 j += n;
171 *b++ = (t & 1) ? ~j : j;
172 for(i = 0, d = 0; i < n; ++i) {
173 if(0 < (j = SA[i])) {
174 if(n <= j) { d += 1; j -= n; }
175 assert(T[j] >= T[j + 1]);
176 if((c0 = T[j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
177 assert(i < (b - SA));
178 --j;
179 t = c0; t = (t << 1) | (T[j] < c1);
180 if(D[t] != d) { j += n; D[t] = d; }
181 *b++ = (t & 1) ? ~j : j;
182 SA[i] = 0;
183 } else if(j < 0) {
184 SA[i] = ~j;
185 }
186 }
187 for(i = n - 1; 0 <= i; --i) {
188 if(0 < SA[i]) {
189 if(SA[i] < n) {
190 SA[i] += n;
191 for(j = i - 1; SA[j] < n; --j) { }
192 SA[j] -= n;
193 i = j;
194 }
195 }
196 }
197
198 /* compute SAs */
199 getBuckets(C, B, k, true); /* find ends of buckets */
200 for(i = n - 1, d += 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
201 if(0 < (j = SA[i])) {
202 if(n <= j) { d += 1; j -= n; }
203 assert(T[j] <= T[j + 1]);
204 if((c0 = T[j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
205 assert((b - SA) <= i);
206 --j;
207 t = c0; t = (t << 1) | (T[j] > c1);
208 if(D[t] != d) { j += n; D[t] = d; }
209 *--b = (t & 1) ? ~(j + 1) : j;
210 SA[i] = 0;
211 }
212 }
213 }
214 template<typename sarray_type, typename index_type>
215 index_type
LMSpostproc2(sarray_type SA,index_type n,index_type m)216 LMSpostproc2(sarray_type SA, index_type n, index_type m) {
217 index_type i, j, d, name;
218
219 /* compact all the sorted LMS substrings into the first m items of SA */
220 assert(0 < n);
221 for(i = 0, name = 0; (j = SA[i]) < 0; ++i) {
222 j = ~j;
223 if(n <= j) { name += 1; }
224 SA[i] = j;
225 assert((i + 1) < n);
226 }
227 if(i < m) {
228 for(d = i, ++i;; ++i) {
229 assert(i < n);
230 if((j = SA[i]) < 0) {
231 j = ~j;
232 if(n <= j) { name += 1; }
233 SA[d++] = j; SA[i] = 0;
234 if(d == m) { break; }
235 }
236 }
237 }
238 if(name < m) {
239 /* store the lexicographic names */
240 for(i = m - 1, d = name + 1; 0 <= i; --i) {
241 if(n <= (j = SA[i])) { j -= n; --d; }
242 SA[m + (j >> 1)] = d;
243 }
244 } else {
245 /* unset flags */
246 for(i = 0; i < m; ++i) {
247 if(n <= (j = SA[i])) { j -= n; SA[i] = j; }
248 }
249 }
250
251 return name;
252 }
253
254 /* compute SA and BWT */
255 template<typename string_type, typename sarray_type,
256 typename bucketC_type, typename bucketB_type, typename index_type>
257 void
induceSA(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type k,bool recount)258 induceSA(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
259 index_type n, index_type k, bool recount) {
260 typedef typename std::iterator_traits<string_type>::value_type char_type;
261 sarray_type b;
262 index_type i, j;
263 char_type c0, c1;
264 /* compute SAl */
265 if(recount != false) { getCounts(T, C, n, k); }
266 getBuckets(C, B, k, false); /* find starts of buckets */
267 b = SA + B[c1 = T[j = n - 1]];
268 *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
269 for(i = 0; i < n; ++i) {
270 j = SA[i], SA[i] = ~j;
271 if(0 < j) {
272 if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
273 *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
274 }
275 }
276 /* compute SAs */
277 if(recount != false) { getCounts(T, C, n, k); }
278 getBuckets(C, B, k, true); /* find ends of buckets */
279 for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
280 if(0 < (j = SA[i])) {
281 if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
282 *--b = ((j == 0) || (T[j - 1] > c1)) ? ~j : j;
283 } else {
284 SA[i] = ~j;
285 }
286 }
287 }
288 template<typename string_type, typename sarray_type,
289 typename bucketC_type, typename bucketB_type, typename index_type>
290 int
computeBWT(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type k,bool recount)291 computeBWT(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
292 index_type n, index_type k, bool recount) {
293 typedef typename std::iterator_traits<string_type>::value_type char_type;
294 sarray_type b;
295 index_type i, j, pidx = -1;
296 char_type c0, c1;
297 /* compute SAl */
298 if(recount != false) { getCounts(T, C, n, k); }
299 getBuckets(C, B, k, false); /* find starts of buckets */
300 b = SA + B[c1 = T[j = n - 1]];
301 *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
302 for(i = 0; i < n; ++i) {
303 if(0 < (j = SA[i])) {
304 SA[i] = ~((index_type)(c0 = T[--j]));
305 if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
306 *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
307 } else if(j != 0) {
308 SA[i] = ~j;
309 }
310 }
311 /* compute SAs */
312 if(recount != false) { getCounts(T, C, n, k); }
313 getBuckets(C, B, k, true); /* find ends of buckets */
314 for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
315 if(0 < (j = SA[i])) {
316 SA[i] = (c0 = T[--j]);
317 if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
318 *--b = ((0 < j) && (T[j - 1] > c1)) ? ~((index_type)T[j - 1]) : j;
319 } else if(j != 0) {
320 SA[i] = ~j;
321 } else {
322 pidx = i;
323 }
324 }
325 return pidx;
326 }
327
328 template<typename string_type, typename sarray_type,
329 typename bucketC_type, typename bucketB_type,
330 typename index_type>
331 std::pair<index_type, index_type>
stage1sort(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type k,unsigned flags)332 stage1sort(string_type T, sarray_type SA,
333 bucketC_type C, bucketB_type B,
334 index_type n, index_type k, unsigned flags) {
335 typedef typename std::iterator_traits<string_type>::value_type char_type;
336 sarray_type b;
337 index_type i, j, name, m;
338 char_type c0, c1;
339 getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
340 for(i = 0; i < n; ++i) { SA[i] = 0; }
341 b = SA + n - 1; i = n - 1; j = n; m = 0; c0 = T[n - 1];
342 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
343 for(; 0 <= i;) {
344 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
345 if(0 <= i) {
346 *b = j; b = SA + --B[c1]; j = i; ++m; assert(B[c1] != (n - 1));
347 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
348 }
349 }
350 SA[n - 1] = 0;
351
352 if(1 < m) {
353 if(flags & (16 | 32)) {
354 assert((j + 1) < n);
355 ++B[T[j + 1]];
356 if(flags & 16) {
357 index_type *D;
358 try { D = new index_type[k * 2]; } catch(...) { D = 0; }
359 if(D == 0) { return std::make_pair(-2, -2); }
360 for(i = 0, j = 0; i < k; ++i) {
361 j += C[i];
362 if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
363 D[i] = D[i + k] = 0;
364 }
365 LMSsort2(T, SA, C, B, D, n, k);
366 delete[] D;
367 } else {
368 bucketB_type D = B - k * 2;
369 for(i = 0, j = 0; i < k; ++i) {
370 j += C[i];
371 if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
372 D[i] = D[i + k] = 0;
373 }
374 LMSsort2(T, SA, C, B, D, n, k);
375 }
376 name = LMSpostproc2(SA, n, m);
377 } else {
378 LMSsort1(T, SA, C, B, n, k, (flags & (4 | 64)) != 0);
379 name = LMSpostproc1(T, SA, n, m);
380 }
381 } else if(m == 1) {
382 *b = j + 1;
383 name = 1;
384 } else {
385 name = 0;
386 }
387 return std::make_pair(m, name);
388 }
389 template<typename string_type, typename sarray_type,
390 typename bucketC_type, typename bucketB_type, typename index_type>
391 index_type
stage3sort(string_type T,sarray_type SA,bucketC_type C,bucketB_type B,index_type n,index_type m,index_type k,unsigned flags,bool isbwt)392 stage3sort(string_type T, sarray_type SA, bucketC_type C, bucketB_type B,
393 index_type n, index_type m, index_type k,
394 unsigned flags, bool isbwt) {
395 typedef typename std::iterator_traits<string_type>::value_type char_type;
396 index_type i, j, p, q, pidx = 0;
397 char_type c0, c1;
398 if((flags & 8) != 0) { getCounts(T, C, n, k); }
399 /* put all left-most S characters into their buckets */
400 if(1 < m) {
401 getBuckets(C, B, k, 1); /* find ends of buckets */
402 i = m - 1, j = n, p = SA[m - 1], c1 = T[p];
403 do {
404 q = B[c0 = c1];
405 while(q < j) { SA[--j] = 0; }
406 do {
407 SA[--j] = p;
408 if(--i < 0) { break; }
409 p = SA[i];
410 } while((c1 = T[p]) == c0);
411 } while(0 <= i);
412 while(0 < j) { SA[--j] = 0; }
413 }
414 if(isbwt == false) { induceSA(T, SA, C, B, n, k, (flags & (4 | 64)) != 0); }
415 else { pidx = computeBWT(T, SA, C, B, n, k, (flags & (4 | 64)) != 0); }
416 return pidx;
417 }
418
419 /* find the suffix array SA of T[0..n-1] in {0..k}^n
420 use a working space (excluding s and SA) of at most 2n+O(1) for a constant alphabet */
421 template<typename string_type, typename sarray_type, typename index_type>
422 int
suffixsort(string_type T,sarray_type SA,index_type fs,index_type n,index_type k,bool isbwt)423 suffixsort(string_type T, sarray_type SA,
424 index_type fs, index_type n, index_type k,
425 bool isbwt) {
426 typedef typename std::iterator_traits<string_type>::value_type char_type;
427 sarray_type RA, C, B;
428 index_type *Cp, *Bp;
429 index_type i, j, m, name, pidx, newfs;
430 unsigned flags = 0;
431 char_type c0, c1;
432
433 /* stage 1: reduce the problem by at least 1/2
434 sort all the S-substrings */
435 C = B = SA; /* for warnings */
436 Cp = 0, Bp = 0;
437 if(k <= 256) {
438 try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
439 if(Cp == 0) { return -2; }
440 if(k <= fs) {
441 B = SA + (n + fs - k);
442 flags = 1;
443 } else {
444 try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
445 if(Bp == 0) { return -2; }
446 flags = 3;
447 }
448 } else if(k <= fs) {
449 C = SA + (n + fs - k);
450 if(k <= (fs - k)) {
451 B = C - k;
452 flags = 0;
453 } else if(k <= 1024) {
454 try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
455 if(Bp == 0) { return -2; }
456 flags = 2;
457 } else {
458 B = C;
459 flags = 64 | 8;
460 }
461 } else {
462 try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
463 if(Cp == 0) { return -2; }
464 Bp = Cp;
465 flags = 4 | 8;
466 }
467 if((n <= ((std::numeric_limits<index_type>::max)() / 2)) && (2 <= (n / k))) {
468 if(flags & 1) { flags |= ((k * 2) <= (fs - k)) ? 32 : 16; }
469 else if((flags == 0) && ((k * 2) <= (fs - k * 2))) { flags |= 32; }
470 }
471 {
472 std::pair<index_type, index_type> r;
473 if(Cp != 0) {
474 if(Bp != 0) { r = stage1sort(T, SA, Cp, Bp, n, k, flags); }
475 else { r = stage1sort(T, SA, Cp, B, n, k, flags); }
476 } else {
477 if(Bp != 0) { r = stage1sort(T, SA, C, Bp, n, k, flags); }
478 else { r = stage1sort(T, SA, C, B, n, k, flags); }
479 }
480 m = r.first, name = r.second;
481 }
482 if(m < 0) {
483 if(flags & (1 | 4)) { delete[] Cp; }
484 if(flags & 2) { delete[] Bp; }
485 return -2;
486 }
487
488 /* stage 2: solve the reduced problem
489 recurse if names are not yet unique */
490 if(name < m) {
491 if(flags & 4) { delete[] Cp; }
492 if(flags & 2) { delete[] Bp; }
493 newfs = (n + fs) - (m * 2);
494 if((flags & (1 | 4 | 64)) == 0) {
495 if((k + name) <= newfs) { newfs -= k; }
496 else { flags |= 8; }
497 }
498 assert((n >> 1) <= (newfs + m));
499 RA = SA + m + newfs;
500 for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) {
501 if(SA[i] != 0) { RA[j--] = SA[i] - 1; }
502 }
503 if(suffixsort(RA, SA, newfs, m, name, false) != 0) { if(flags & 1) { delete[] Cp; } return -2; }
504 i = n - 1; j = m - 1; c0 = T[n - 1];
505 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
506 for(; 0 <= i;) {
507 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) <= c1));
508 if(0 <= i) {
509 RA[j--] = i + 1;
510 do { c1 = c0; } while((0 <= --i) && ((c0 = T[i]) >= c1));
511 }
512 }
513 for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; }
514 if(flags & 4) {
515 try { Cp = new index_type[k]; } catch(...) { Cp = 0; }
516 if(Cp == 0) { return -2; }
517 Bp = Cp;
518 }
519 if(flags & 2) {
520 try { Bp = new index_type[k]; } catch(...) { Bp = 0; }
521 if(Bp == 0) { if(flags & 1) { delete[] Cp; } return -2; }
522 }
523 }
524
525 /* stage 3: induce the result for the original problem */
526 if(Cp != 0) {
527 if(Bp != 0) { pidx = stage3sort(T, SA, Cp, Bp, n, m, k, flags, isbwt); }
528 else { pidx = stage3sort(T, SA, Cp, B, n, m, k, flags, isbwt); }
529 } else {
530 if(Bp != 0) { pidx = stage3sort(T, SA, C, Bp, n, m, k, flags, isbwt); }
531 else { pidx = stage3sort(T, SA, C, B, n, m, k, flags, isbwt); }
532 }
533 if(flags & (1 | 4)) { delete[] Cp; }
534 if(flags & 2) { delete[] Bp; }
535
536 return pidx;
537 }
538
539 } /* namespace saisxx_private */
540
541
542 /**
543 * @brief Constructs the suffix array of a given string in linear time.
544 * @param T[0..n-1] The input string. (random access iterator)
545 * @param SA[0..n-1] The output array of suffixes. (random access iterator)
546 * @param n The length of the given string.
547 * @param k The alphabet size.
548 * @return 0 if no error occurred, -1 or -2 otherwise.
549 */
550 template<typename string_type, typename sarray_type, typename index_type>
551 int
saisxx(string_type T,sarray_type SA,index_type n,index_type k=256)552 saisxx(string_type T, sarray_type SA, index_type n, index_type k = 256) {
553 typedef typename std::iterator_traits<sarray_type>::value_type savalue_type;
554 savalue_type unused;
555 (void)unused;
556 assert((std::numeric_limits<index_type>::min)() < 0);
557 assert((std::numeric_limits<savalue_type>::min)() < 0);
558 assert((std::numeric_limits<savalue_type>::max)() == (std::numeric_limits<index_type>::max)());
559 assert((std::numeric_limits<savalue_type>::min)() == (std::numeric_limits<index_type>::min)());
560 if((n < 0) || (k <= 0)) { return -1; }
561 if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; }
562 return saisxx_private::suffixsort(T, SA, (index_type)0, n, k, false);
563 }
564
565 /**
566 * @brief Constructs the burrows-wheeler transformed string of a given string in linear time.
567 * @param T[0..n-1] The input string. (random access iterator)
568 * @param U[0..n-1] The output string. (random access iterator)
569 * @param A[0..n-1] The temporary array. (random access iterator)
570 * @param n The length of the given string.
571 * @param k The alphabet size.
572 * @return The primary index if no error occurred, -1 or -2 otherwise.
573 */
574 template<typename string_type, typename sarray_type, typename index_type>
575 index_type
saisxx_bwt(string_type T,string_type U,sarray_type A,index_type n,index_type k=256)576 saisxx_bwt(string_type T, string_type U, sarray_type A, index_type n, index_type k = 256) {
577 typedef typename std::iterator_traits<sarray_type>::value_type savalue_type;
578 typedef typename std::iterator_traits<string_type>::value_type char_type;
579 savalue_type unused;
580 (void)unused;
581 index_type i, pidx;
582 assert((std::numeric_limits<index_type>::min)() < 0);
583 assert((std::numeric_limits<savalue_type>::min)() < 0);
584 assert((std::numeric_limits<savalue_type>::max)() == (std::numeric_limits<index_type>::max)());
585 assert((std::numeric_limits<savalue_type>::min)() == (std::numeric_limits<index_type>::min)());
586 if((n < 0) || (k <= 0)) { return -1; }
587 if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
588 pidx = saisxx_private::suffixsort(T, A, (index_type)0, n, k, true);
589 if(0 <= pidx) {
590 U[0] = T[n - 1];
591 for(i = 0; i < pidx; ++i) { U[i + 1] = (char_type)A[i]; }
592 for(i += 1; i < n; ++i) { U[i] = (char_type)A[i]; }
593 pidx += 1;
594 }
595 return pidx;
596 }
597
598
599 #ifdef _MSC_VER
600 #pragma warning(pop)
601 #endif
602
603 #endif /* __cplusplus */
604 #endif /* _SAIS_HXX */
605