1// Copyright 2015, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5// Code generated by sais_gen.go. DO NOT EDIT. 6 7// ==================================================== 8// Copyright (c) 2008-2010 Yuta Mori All Rights Reserved. 9// 10// Permission is hereby granted, free of charge, to any person 11// obtaining a copy of this software and associated documentation 12// files (the "Software"), to deal in the Software without 13// restriction, including without limitation the rights to use, 14// copy, modify, merge, publish, distribute, sublicense, and/or sell 15// copies of the Software, and to permit persons to whom the 16// Software is furnished to do so, subject to the following 17// conditions: 18// 19// The above copyright notice and this permission notice shall be 20// included in all copies or substantial portions of the Software. 21// 22// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 24// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 25// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 27// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 29// OTHER DEALINGS IN THE SOFTWARE. 30// ==================================================== 31 32package sais 33 34func getCounts_byte(T []byte, C []int, n, k int) { 35 var i int 36 for i = 0; i < k; i++ { 37 C[i] = 0 38 } 39 for i = 0; i < n; i++ { 40 C[T[i]]++ 41 } 42} 43 44func getBuckets_byte(C, B []int, k int, end bool) { 45 var i, sum int 46 if end { 47 for i = 0; i < k; i++ { 48 sum += C[i] 49 B[i] = sum 50 } 51 } else { 52 for i = 0; i < k; i++ { 53 sum += C[i] 54 B[i] = sum - C[i] 55 } 56 } 57} 58 59func sortLMS1_byte(T []byte, SA, C, B []int, n, k int) { 60 var b, i, j int 61 var c0, c1 int 62 63 // Compute SAl. 64 if &C[0] == &B[0] { 65 getCounts_byte(T, C, n, k) 66 } 67 getBuckets_byte(C, B, k, false) // Find starts of buckets 68 j = n - 1 69 c1 = int(T[j]) 70 b = B[c1] 71 j-- 72 if int(T[j]) < c1 { 73 SA[b] = ^j 74 } else { 75 SA[b] = j 76 } 77 b++ 78 for i = 0; i < n; i++ { 79 if j = SA[i]; j > 0 { 80 if c0 = int(T[j]); c0 != c1 { 81 B[c1] = b 82 c1 = c0 83 b = B[c1] 84 } 85 j-- 86 if int(T[j]) < c1 { 87 SA[b] = ^j 88 } else { 89 SA[b] = j 90 } 91 b++ 92 SA[i] = 0 93 } else if j < 0 { 94 SA[i] = ^j 95 } 96 } 97 98 // Compute SAs. 99 if &C[0] == &B[0] { 100 getCounts_byte(T, C, n, k) 101 } 102 getBuckets_byte(C, B, k, true) // Find ends of buckets 103 c1 = 0 104 b = B[c1] 105 for i = n - 1; i >= 0; i-- { 106 if j = SA[i]; j > 0 { 107 if c0 = int(T[j]); c0 != c1 { 108 B[c1] = b 109 c1 = c0 110 b = B[c1] 111 } 112 j-- 113 b-- 114 if int(T[j]) > c1 { 115 SA[b] = ^(j + 1) 116 } else { 117 SA[b] = j 118 } 119 SA[i] = 0 120 } 121 } 122} 123 124func postProcLMS1_byte(T []byte, SA []int, n, m int) int { 125 var i, j, p, q, plen, qlen, name int 126 var c0, c1 int 127 var diff bool 128 129 // Compact all the sorted substrings into the first m items of SA. 130 // 2*m must be not larger than n (provable). 131 for i = 0; SA[i] < 0; i++ { 132 SA[i] = ^SA[i] 133 } 134 if i < m { 135 for j, i = i, i+1; ; i++ { 136 if p = SA[i]; p < 0 { 137 SA[j] = ^p 138 j++ 139 SA[i] = 0 140 if j == m { 141 break 142 } 143 } 144 } 145 } 146 147 // Store the length of all substrings. 148 i = n - 1 149 j = n - 1 150 c0 = int(T[n-1]) 151 for { 152 c1 = c0 153 if i--; i < 0 { 154 break 155 } 156 if c0 = int(T[i]); c0 < c1 { 157 break 158 } 159 } 160 for i >= 0 { 161 for { 162 c1 = c0 163 if i--; i < 0 { 164 break 165 } 166 if c0 = int(T[i]); c0 > c1 { 167 break 168 } 169 } 170 if i >= 0 { 171 SA[m+((i+1)>>1)] = j - i 172 j = i + 1 173 for { 174 c1 = c0 175 if i--; i < 0 { 176 break 177 } 178 if c0 = int(T[i]); c0 < c1 { 179 break 180 } 181 } 182 } 183 } 184 185 // Find the lexicographic names of all substrings. 186 name = 0 187 qlen = 0 188 for i, q = 0, n; i < m; i++ { 189 p = SA[i] 190 plen = SA[m+(p>>1)] 191 diff = true 192 if (plen == qlen) && ((q + plen) < n) { 193 for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ { 194 } 195 if j == plen { 196 diff = false 197 } 198 } 199 if diff { 200 name++ 201 q = p 202 qlen = plen 203 } 204 SA[m+(p>>1)] = name 205 } 206 return name 207} 208 209func sortLMS2_byte(T []byte, SA, C, B, D []int, n, k int) { 210 var b, i, j, t, d int 211 var c0, c1 int 212 213 // Compute SAl. 214 getBuckets_byte(C, B, k, false) // Find starts of buckets 215 j = n - 1 216 c1 = int(T[j]) 217 b = B[c1] 218 j-- 219 if int(T[j]) < c1 { 220 t = 1 221 } else { 222 t = 0 223 } 224 j += n 225 if t&1 > 0 { 226 SA[b] = ^j 227 } else { 228 SA[b] = j 229 } 230 b++ 231 for i, d = 0, 0; i < n; i++ { 232 if j = SA[i]; j > 0 { 233 if n <= j { 234 d += 1 235 j -= n 236 } 237 if c0 = int(T[j]); c0 != c1 { 238 B[c1] = b 239 c1 = c0 240 b = B[c1] 241 } 242 j-- 243 t = int(c0) << 1 244 if int(T[j]) < c1 { 245 t |= 1 246 } 247 if D[t] != d { 248 j += n 249 D[t] = d 250 } 251 if t&1 > 0 { 252 SA[b] = ^j 253 } else { 254 SA[b] = j 255 } 256 b++ 257 SA[i] = 0 258 } else if j < 0 { 259 SA[i] = ^j 260 } 261 } 262 for i = n - 1; 0 <= i; i-- { 263 if SA[i] > 0 { 264 if SA[i] < n { 265 SA[i] += n 266 for j = i - 1; SA[j] < n; j-- { 267 } 268 SA[j] -= n 269 i = j 270 } 271 } 272 } 273 274 // Compute SAs. 275 getBuckets_byte(C, B, k, true) // Find ends of buckets 276 c1 = 0 277 b = B[c1] 278 for i, d = n-1, d+1; i >= 0; i-- { 279 if j = SA[i]; j > 0 { 280 if n <= j { 281 d += 1 282 j -= n 283 } 284 if c0 = int(T[j]); c0 != c1 { 285 B[c1] = b 286 c1 = c0 287 b = B[c1] 288 } 289 j-- 290 t = int(c0) << 1 291 if int(T[j]) > c1 { 292 t |= 1 293 } 294 if D[t] != d { 295 j += n 296 D[t] = d 297 } 298 b-- 299 if t&1 > 0 { 300 SA[b] = ^(j + 1) 301 } else { 302 SA[b] = j 303 } 304 SA[i] = 0 305 } 306 } 307} 308 309func postProcLMS2_byte(SA []int, n, m int) int { 310 var i, j, d, name int 311 312 // Compact all the sorted LMS substrings into the first m items of SA. 313 name = 0 314 for i = 0; SA[i] < 0; i++ { 315 j = ^SA[i] 316 if n <= j { 317 name += 1 318 } 319 SA[i] = j 320 } 321 if i < m { 322 for d, i = i, i+1; ; i++ { 323 if j = SA[i]; j < 0 { 324 j = ^j 325 if n <= j { 326 name += 1 327 } 328 SA[d] = j 329 d++ 330 SA[i] = 0 331 if d == m { 332 break 333 } 334 } 335 } 336 } 337 if name < m { 338 // Store the lexicographic names. 339 for i, d = m-1, name+1; 0 <= i; i-- { 340 if j = SA[i]; n <= j { 341 j -= n 342 d-- 343 } 344 SA[m+(j>>1)] = d 345 } 346 } else { 347 // Unset flags. 348 for i = 0; i < m; i++ { 349 if j = SA[i]; n <= j { 350 j -= n 351 SA[i] = j 352 } 353 } 354 } 355 return name 356} 357 358func induceSA_byte(T []byte, SA, C, B []int, n, k int) { 359 var b, i, j int 360 var c0, c1 int 361 362 // Compute SAl. 363 if &C[0] == &B[0] { 364 getCounts_byte(T, C, n, k) 365 } 366 getBuckets_byte(C, B, k, false) // Find starts of buckets 367 j = n - 1 368 c1 = int(T[j]) 369 b = B[c1] 370 if j > 0 && int(T[j-1]) < c1 { 371 SA[b] = ^j 372 } else { 373 SA[b] = j 374 } 375 b++ 376 for i = 0; i < n; i++ { 377 j = SA[i] 378 SA[i] = ^j 379 if j > 0 { 380 j-- 381 if c0 = int(T[j]); c0 != c1 { 382 B[c1] = b 383 c1 = c0 384 b = B[c1] 385 } 386 if j > 0 && int(T[j-1]) < c1 { 387 SA[b] = ^j 388 } else { 389 SA[b] = j 390 } 391 b++ 392 } 393 } 394 395 // Compute SAs. 396 if &C[0] == &B[0] { 397 getCounts_byte(T, C, n, k) 398 } 399 getBuckets_byte(C, B, k, true) // Find ends of buckets 400 c1 = 0 401 b = B[c1] 402 for i = n - 1; i >= 0; i-- { 403 if j = SA[i]; j > 0 { 404 j-- 405 if c0 = int(T[j]); c0 != c1 { 406 B[c1] = b 407 c1 = c0 408 b = B[c1] 409 } 410 b-- 411 if (j == 0) || (int(T[j-1]) > c1) { 412 SA[b] = ^j 413 } else { 414 SA[b] = j 415 } 416 } else { 417 SA[i] = ^j 418 } 419 } 420} 421 422func computeSA_byte(T []byte, SA []int, fs, n, k int) { 423 const ( 424 minBucketSize = 512 425 sortLMS2Limit = 0x3fffffff 426 ) 427 428 var C, B, D, RA []int 429 var bo int // Offset of B relative to SA 430 var b, i, j, m, p, q, name, newfs int 431 var c0, c1 int 432 var flags uint 433 434 if k <= minBucketSize { 435 C = make([]int, k) 436 if k <= fs { 437 bo = n + fs - k 438 B = SA[bo:] 439 flags = 1 440 } else { 441 B = make([]int, k) 442 flags = 3 443 } 444 } else if k <= fs { 445 C = SA[n+fs-k:] 446 if k <= fs-k { 447 bo = n + fs - 2*k 448 B = SA[bo:] 449 flags = 0 450 } else if k <= 4*minBucketSize { 451 B = make([]int, k) 452 flags = 2 453 } else { 454 B = C 455 flags = 8 456 } 457 } else { 458 C = make([]int, k) 459 B = C 460 flags = 4 | 8 461 } 462 if n <= sortLMS2Limit && 2 <= (n/k) { 463 if flags&1 > 0 { 464 if 2*k <= fs-k { 465 flags |= 32 466 } else { 467 flags |= 16 468 } 469 } else if flags == 0 && 2*k <= (fs-2*k) { 470 flags |= 32 471 } 472 } 473 474 // Stage 1: Reduce the problem by at least 1/2. 475 // Sort all the LMS-substrings. 476 getCounts_byte(T, C, n, k) 477 getBuckets_byte(C, B, k, true) // Find ends of buckets 478 for i = 0; i < n; i++ { 479 SA[i] = 0 480 } 481 b = -1 482 i = n - 1 483 j = n 484 m = 0 485 c0 = int(T[n-1]) 486 for { 487 c1 = c0 488 if i--; i < 0 { 489 break 490 } 491 if c0 = int(T[i]); c0 < c1 { 492 break 493 } 494 } 495 for i >= 0 { 496 for { 497 c1 = c0 498 if i--; i < 0 { 499 break 500 } 501 if c0 = int(T[i]); c0 > c1 { 502 break 503 } 504 } 505 if i >= 0 { 506 if b >= 0 { 507 SA[b] = j 508 } 509 B[c1]-- 510 b = B[c1] 511 j = i 512 m++ 513 for { 514 c1 = c0 515 if i--; i < 0 { 516 break 517 } 518 if c0 = int(T[i]); c0 < c1 { 519 break 520 } 521 } 522 } 523 } 524 525 if m > 1 { 526 if flags&(16|32) > 0 { 527 if flags&16 > 0 { 528 D = make([]int, 2*k) 529 } else { 530 D = SA[bo-2*k:] 531 } 532 B[T[j+1]]++ 533 for i, j = 0, 0; i < k; i++ { 534 j += C[i] 535 if B[i] != j { 536 SA[B[i]] += n 537 } 538 D[i] = 0 539 D[i+k] = 0 540 } 541 sortLMS2_byte(T, SA, C, B, D, n, k) 542 name = postProcLMS2_byte(SA, n, m) 543 } else { 544 sortLMS1_byte(T, SA, C, B, n, k) 545 name = postProcLMS1_byte(T, SA, n, m) 546 } 547 } else if m == 1 { 548 SA[b] = j + 1 549 name = 1 550 } else { 551 name = 0 552 } 553 554 // Stage 2: Solve the reduced problem. 555 // Recurse if names are not yet unique. 556 if name < m { 557 newfs = n + fs - 2*m 558 if flags&(1|4|8) == 0 { 559 if k+name <= newfs { 560 newfs -= k 561 } else { 562 flags |= 8 563 } 564 } 565 RA = SA[m+newfs:] 566 for i, j = m+(n>>1)-1, m-1; m <= i; i-- { 567 if SA[i] != 0 { 568 RA[j] = SA[i] - 1 569 j-- 570 } 571 } 572 computeSA_int(RA, SA, newfs, m, name) 573 574 i = n - 1 575 j = m - 1 576 c0 = int(T[n-1]) 577 for { 578 c1 = c0 579 if i--; i < 0 { 580 break 581 } 582 if c0 = int(T[i]); c0 < c1 { 583 break 584 } 585 } 586 for i >= 0 { 587 for { 588 c1 = c0 589 if i--; i < 0 { 590 break 591 } 592 if c0 = int(T[i]); c0 > c1 { 593 break 594 } 595 } 596 if i >= 0 { 597 RA[j] = i + 1 598 j-- 599 for { 600 c1 = c0 601 if i--; i < 0 { 602 break 603 } 604 if c0 = int(T[i]); c0 < c1 { 605 break 606 } 607 } 608 } 609 } 610 for i = 0; i < m; i++ { 611 SA[i] = RA[SA[i]] 612 } 613 if flags&4 > 0 { 614 B = make([]int, k) 615 C = B 616 } 617 if flags&2 > 0 { 618 B = make([]int, k) 619 } 620 } 621 622 // Stage 3: Induce the result for the original problem. 623 if flags&8 > 0 { 624 getCounts_byte(T, C, n, k) 625 } 626 // Put all left-most S characters into their buckets. 627 if m > 1 { 628 getBuckets_byte(C, B, k, true) // Find ends of buckets 629 i = m - 1 630 j = n 631 p = SA[m-1] 632 c1 = int(T[p]) 633 for { 634 c0 = c1 635 q = B[c0] 636 for q < j { 637 j-- 638 SA[j] = 0 639 } 640 for { 641 j-- 642 SA[j] = p 643 if i--; i < 0 { 644 break 645 } 646 p = SA[i] 647 if c1 = int(T[p]); c1 != c0 { 648 break 649 } 650 } 651 if i < 0 { 652 break 653 } 654 } 655 for j > 0 { 656 j-- 657 SA[j] = 0 658 } 659 } 660 induceSA_byte(T, SA, C, B, n, k) 661} 662