1// Copyright 2015, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5// Code generated by sais_gen.go. DO NOT EDIT.
6
7// ====================================================
8// Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
9//
10// Permission is hereby granted, free of charge, to any person
11// obtaining a copy of this software and associated documentation
12// files (the "Software"), to deal in the Software without
13// restriction, including without limitation the rights to use,
14// copy, modify, merge, publish, distribute, sublicense, and/or sell
15// copies of the Software, and to permit persons to whom the
16// Software is furnished to do so, subject to the following
17// conditions:
18//
19// The above copyright notice and this permission notice shall be
20// included in all copies or substantial portions of the Software.
21//
22// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
24// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
27// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
29// OTHER DEALINGS IN THE SOFTWARE.
30// ====================================================
31
32package sais
33
34func getCounts_byte(T []byte, C []int, n, k int) {
35	var i int
36	for i = 0; i < k; i++ {
37		C[i] = 0
38	}
39	for i = 0; i < n; i++ {
40		C[T[i]]++
41	}
42}
43
44func getBuckets_byte(C, B []int, k int, end bool) {
45	var i, sum int
46	if end {
47		for i = 0; i < k; i++ {
48			sum += C[i]
49			B[i] = sum
50		}
51	} else {
52		for i = 0; i < k; i++ {
53			sum += C[i]
54			B[i] = sum - C[i]
55		}
56	}
57}
58
59func sortLMS1_byte(T []byte, SA, C, B []int, n, k int) {
60	var b, i, j int
61	var c0, c1 int
62
63	// Compute SAl.
64	if &C[0] == &B[0] {
65		getCounts_byte(T, C, n, k)
66	}
67	getBuckets_byte(C, B, k, false) // Find starts of buckets
68	j = n - 1
69	c1 = int(T[j])
70	b = B[c1]
71	j--
72	if int(T[j]) < c1 {
73		SA[b] = ^j
74	} else {
75		SA[b] = j
76	}
77	b++
78	for i = 0; i < n; i++ {
79		if j = SA[i]; j > 0 {
80			if c0 = int(T[j]); c0 != c1 {
81				B[c1] = b
82				c1 = c0
83				b = B[c1]
84			}
85			j--
86			if int(T[j]) < c1 {
87				SA[b] = ^j
88			} else {
89				SA[b] = j
90			}
91			b++
92			SA[i] = 0
93		} else if j < 0 {
94			SA[i] = ^j
95		}
96	}
97
98	// Compute SAs.
99	if &C[0] == &B[0] {
100		getCounts_byte(T, C, n, k)
101	}
102	getBuckets_byte(C, B, k, true) // Find ends of buckets
103	c1 = 0
104	b = B[c1]
105	for i = n - 1; i >= 0; i-- {
106		if j = SA[i]; j > 0 {
107			if c0 = int(T[j]); c0 != c1 {
108				B[c1] = b
109				c1 = c0
110				b = B[c1]
111			}
112			j--
113			b--
114			if int(T[j]) > c1 {
115				SA[b] = ^(j + 1)
116			} else {
117				SA[b] = j
118			}
119			SA[i] = 0
120		}
121	}
122}
123
124func postProcLMS1_byte(T []byte, SA []int, n, m int) int {
125	var i, j, p, q, plen, qlen, name int
126	var c0, c1 int
127	var diff bool
128
129	// Compact all the sorted substrings into the first m items of SA.
130	// 2*m must be not larger than n (provable).
131	for i = 0; SA[i] < 0; i++ {
132		SA[i] = ^SA[i]
133	}
134	if i < m {
135		for j, i = i, i+1; ; i++ {
136			if p = SA[i]; p < 0 {
137				SA[j] = ^p
138				j++
139				SA[i] = 0
140				if j == m {
141					break
142				}
143			}
144		}
145	}
146
147	// Store the length of all substrings.
148	i = n - 1
149	j = n - 1
150	c0 = int(T[n-1])
151	for {
152		c1 = c0
153		if i--; i < 0 {
154			break
155		}
156		if c0 = int(T[i]); c0 < c1 {
157			break
158		}
159	}
160	for i >= 0 {
161		for {
162			c1 = c0
163			if i--; i < 0 {
164				break
165			}
166			if c0 = int(T[i]); c0 > c1 {
167				break
168			}
169		}
170		if i >= 0 {
171			SA[m+((i+1)>>1)] = j - i
172			j = i + 1
173			for {
174				c1 = c0
175				if i--; i < 0 {
176					break
177				}
178				if c0 = int(T[i]); c0 < c1 {
179					break
180				}
181			}
182		}
183	}
184
185	// Find the lexicographic names of all substrings.
186	name = 0
187	qlen = 0
188	for i, q = 0, n; i < m; i++ {
189		p = SA[i]
190		plen = SA[m+(p>>1)]
191		diff = true
192		if (plen == qlen) && ((q + plen) < n) {
193			for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ {
194			}
195			if j == plen {
196				diff = false
197			}
198		}
199		if diff {
200			name++
201			q = p
202			qlen = plen
203		}
204		SA[m+(p>>1)] = name
205	}
206	return name
207}
208
209func sortLMS2_byte(T []byte, SA, C, B, D []int, n, k int) {
210	var b, i, j, t, d int
211	var c0, c1 int
212
213	// Compute SAl.
214	getBuckets_byte(C, B, k, false) // Find starts of buckets
215	j = n - 1
216	c1 = int(T[j])
217	b = B[c1]
218	j--
219	if int(T[j]) < c1 {
220		t = 1
221	} else {
222		t = 0
223	}
224	j += n
225	if t&1 > 0 {
226		SA[b] = ^j
227	} else {
228		SA[b] = j
229	}
230	b++
231	for i, d = 0, 0; i < n; i++ {
232		if j = SA[i]; j > 0 {
233			if n <= j {
234				d += 1
235				j -= n
236			}
237			if c0 = int(T[j]); c0 != c1 {
238				B[c1] = b
239				c1 = c0
240				b = B[c1]
241			}
242			j--
243			t = int(c0) << 1
244			if int(T[j]) < c1 {
245				t |= 1
246			}
247			if D[t] != d {
248				j += n
249				D[t] = d
250			}
251			if t&1 > 0 {
252				SA[b] = ^j
253			} else {
254				SA[b] = j
255			}
256			b++
257			SA[i] = 0
258		} else if j < 0 {
259			SA[i] = ^j
260		}
261	}
262	for i = n - 1; 0 <= i; i-- {
263		if SA[i] > 0 {
264			if SA[i] < n {
265				SA[i] += n
266				for j = i - 1; SA[j] < n; j-- {
267				}
268				SA[j] -= n
269				i = j
270			}
271		}
272	}
273
274	// Compute SAs.
275	getBuckets_byte(C, B, k, true) // Find ends of buckets
276	c1 = 0
277	b = B[c1]
278	for i, d = n-1, d+1; i >= 0; i-- {
279		if j = SA[i]; j > 0 {
280			if n <= j {
281				d += 1
282				j -= n
283			}
284			if c0 = int(T[j]); c0 != c1 {
285				B[c1] = b
286				c1 = c0
287				b = B[c1]
288			}
289			j--
290			t = int(c0) << 1
291			if int(T[j]) > c1 {
292				t |= 1
293			}
294			if D[t] != d {
295				j += n
296				D[t] = d
297			}
298			b--
299			if t&1 > 0 {
300				SA[b] = ^(j + 1)
301			} else {
302				SA[b] = j
303			}
304			SA[i] = 0
305		}
306	}
307}
308
309func postProcLMS2_byte(SA []int, n, m int) int {
310	var i, j, d, name int
311
312	// Compact all the sorted LMS substrings into the first m items of SA.
313	name = 0
314	for i = 0; SA[i] < 0; i++ {
315		j = ^SA[i]
316		if n <= j {
317			name += 1
318		}
319		SA[i] = j
320	}
321	if i < m {
322		for d, i = i, i+1; ; i++ {
323			if j = SA[i]; j < 0 {
324				j = ^j
325				if n <= j {
326					name += 1
327				}
328				SA[d] = j
329				d++
330				SA[i] = 0
331				if d == m {
332					break
333				}
334			}
335		}
336	}
337	if name < m {
338		// Store the lexicographic names.
339		for i, d = m-1, name+1; 0 <= i; i-- {
340			if j = SA[i]; n <= j {
341				j -= n
342				d--
343			}
344			SA[m+(j>>1)] = d
345		}
346	} else {
347		// Unset flags.
348		for i = 0; i < m; i++ {
349			if j = SA[i]; n <= j {
350				j -= n
351				SA[i] = j
352			}
353		}
354	}
355	return name
356}
357
358func induceSA_byte(T []byte, SA, C, B []int, n, k int) {
359	var b, i, j int
360	var c0, c1 int
361
362	// Compute SAl.
363	if &C[0] == &B[0] {
364		getCounts_byte(T, C, n, k)
365	}
366	getBuckets_byte(C, B, k, false) // Find starts of buckets
367	j = n - 1
368	c1 = int(T[j])
369	b = B[c1]
370	if j > 0 && int(T[j-1]) < c1 {
371		SA[b] = ^j
372	} else {
373		SA[b] = j
374	}
375	b++
376	for i = 0; i < n; i++ {
377		j = SA[i]
378		SA[i] = ^j
379		if j > 0 {
380			j--
381			if c0 = int(T[j]); c0 != c1 {
382				B[c1] = b
383				c1 = c0
384				b = B[c1]
385			}
386			if j > 0 && int(T[j-1]) < c1 {
387				SA[b] = ^j
388			} else {
389				SA[b] = j
390			}
391			b++
392		}
393	}
394
395	// Compute SAs.
396	if &C[0] == &B[0] {
397		getCounts_byte(T, C, n, k)
398	}
399	getBuckets_byte(C, B, k, true) // Find ends of buckets
400	c1 = 0
401	b = B[c1]
402	for i = n - 1; i >= 0; i-- {
403		if j = SA[i]; j > 0 {
404			j--
405			if c0 = int(T[j]); c0 != c1 {
406				B[c1] = b
407				c1 = c0
408				b = B[c1]
409			}
410			b--
411			if (j == 0) || (int(T[j-1]) > c1) {
412				SA[b] = ^j
413			} else {
414				SA[b] = j
415			}
416		} else {
417			SA[i] = ^j
418		}
419	}
420}
421
422func computeSA_byte(T []byte, SA []int, fs, n, k int) {
423	const (
424		minBucketSize = 512
425		sortLMS2Limit = 0x3fffffff
426	)
427
428	var C, B, D, RA []int
429	var bo int // Offset of B relative to SA
430	var b, i, j, m, p, q, name, newfs int
431	var c0, c1 int
432	var flags uint
433
434	if k <= minBucketSize {
435		C = make([]int, k)
436		if k <= fs {
437			bo = n + fs - k
438			B = SA[bo:]
439			flags = 1
440		} else {
441			B = make([]int, k)
442			flags = 3
443		}
444	} else if k <= fs {
445		C = SA[n+fs-k:]
446		if k <= fs-k {
447			bo = n + fs - 2*k
448			B = SA[bo:]
449			flags = 0
450		} else if k <= 4*minBucketSize {
451			B = make([]int, k)
452			flags = 2
453		} else {
454			B = C
455			flags = 8
456		}
457	} else {
458		C = make([]int, k)
459		B = C
460		flags = 4 | 8
461	}
462	if n <= sortLMS2Limit && 2 <= (n/k) {
463		if flags&1 > 0 {
464			if 2*k <= fs-k {
465				flags |= 32
466			} else {
467				flags |= 16
468			}
469		} else if flags == 0 && 2*k <= (fs-2*k) {
470			flags |= 32
471		}
472	}
473
474	// Stage 1: Reduce the problem by at least 1/2.
475	// Sort all the LMS-substrings.
476	getCounts_byte(T, C, n, k)
477	getBuckets_byte(C, B, k, true) // Find ends of buckets
478	for i = 0; i < n; i++ {
479		SA[i] = 0
480	}
481	b = -1
482	i = n - 1
483	j = n
484	m = 0
485	c0 = int(T[n-1])
486	for {
487		c1 = c0
488		if i--; i < 0 {
489			break
490		}
491		if c0 = int(T[i]); c0 < c1 {
492			break
493		}
494	}
495	for i >= 0 {
496		for {
497			c1 = c0
498			if i--; i < 0 {
499				break
500			}
501			if c0 = int(T[i]); c0 > c1 {
502				break
503			}
504		}
505		if i >= 0 {
506			if b >= 0 {
507				SA[b] = j
508			}
509			B[c1]--
510			b = B[c1]
511			j = i
512			m++
513			for {
514				c1 = c0
515				if i--; i < 0 {
516					break
517				}
518				if c0 = int(T[i]); c0 < c1 {
519					break
520				}
521			}
522		}
523	}
524
525	if m > 1 {
526		if flags&(16|32) > 0 {
527			if flags&16 > 0 {
528				D = make([]int, 2*k)
529			} else {
530				D = SA[bo-2*k:]
531			}
532			B[T[j+1]]++
533			for i, j = 0, 0; i < k; i++ {
534				j += C[i]
535				if B[i] != j {
536					SA[B[i]] += n
537				}
538				D[i] = 0
539				D[i+k] = 0
540			}
541			sortLMS2_byte(T, SA, C, B, D, n, k)
542			name = postProcLMS2_byte(SA, n, m)
543		} else {
544			sortLMS1_byte(T, SA, C, B, n, k)
545			name = postProcLMS1_byte(T, SA, n, m)
546		}
547	} else if m == 1 {
548		SA[b] = j + 1
549		name = 1
550	} else {
551		name = 0
552	}
553
554	// Stage 2: Solve the reduced problem.
555	// Recurse if names are not yet unique.
556	if name < m {
557		newfs = n + fs - 2*m
558		if flags&(1|4|8) == 0 {
559			if k+name <= newfs {
560				newfs -= k
561			} else {
562				flags |= 8
563			}
564		}
565		RA = SA[m+newfs:]
566		for i, j = m+(n>>1)-1, m-1; m <= i; i-- {
567			if SA[i] != 0 {
568				RA[j] = SA[i] - 1
569				j--
570			}
571		}
572		computeSA_int(RA, SA, newfs, m, name)
573
574		i = n - 1
575		j = m - 1
576		c0 = int(T[n-1])
577		for {
578			c1 = c0
579			if i--; i < 0 {
580				break
581			}
582			if c0 = int(T[i]); c0 < c1 {
583				break
584			}
585		}
586		for i >= 0 {
587			for {
588				c1 = c0
589				if i--; i < 0 {
590					break
591				}
592				if c0 = int(T[i]); c0 > c1 {
593					break
594				}
595			}
596			if i >= 0 {
597				RA[j] = i + 1
598				j--
599				for {
600					c1 = c0
601					if i--; i < 0 {
602						break
603					}
604					if c0 = int(T[i]); c0 < c1 {
605						break
606					}
607				}
608			}
609		}
610		for i = 0; i < m; i++ {
611			SA[i] = RA[SA[i]]
612		}
613		if flags&4 > 0 {
614			B = make([]int, k)
615			C = B
616		}
617		if flags&2 > 0 {
618			B = make([]int, k)
619		}
620	}
621
622	// Stage 3: Induce the result for the original problem.
623	if flags&8 > 0 {
624		getCounts_byte(T, C, n, k)
625	}
626	// Put all left-most S characters into their buckets.
627	if m > 1 {
628		getBuckets_byte(C, B, k, true) // Find ends of buckets
629		i = m - 1
630		j = n
631		p = SA[m-1]
632		c1 = int(T[p])
633		for {
634			c0 = c1
635			q = B[c0]
636			for q < j {
637				j--
638				SA[j] = 0
639			}
640			for {
641				j--
642				SA[j] = p
643				if i--; i < 0 {
644					break
645				}
646				p = SA[i]
647				if c1 = int(T[p]); c1 != c0 {
648					break
649				}
650			}
651			if i < 0 {
652				break
653			}
654		}
655		for j > 0 {
656			j--
657			SA[j] = 0
658		}
659	}
660	induceSA_byte(T, SA, C, B, n, k)
661}
662