1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Suffix array construction by induced sorting (SAIS).
6// See Ge Nong, Sen Zhang, and Wai Hong Chen,
7// "Two Efficient Algorithms for Linear Time Suffix Array Construction",
8// especially section 3 (https://ieeexplore.ieee.org/document/5582081).
9// See also http://zork.net/~st/jottings/sais.html.
10//
11// With optimizations inspired by Yuta Mori's sais-lite
12// (https://sites.google.com/site/yuta256/sais).
13//
14// And with other new optimizations.
15
16// Many of these functions are parameterized by the sizes of
17// the types they operate on. The generator gen.go makes
18// copies of these functions for use with other sizes.
19// Specifically:
20//
21// - A function with a name ending in _8_32 takes []byte and []int32 arguments
22//   and is duplicated into _32_32, _8_64, and _64_64 forms.
23//   The _32_32 and _64_64_ suffixes are shortened to plain _32 and _64.
24//   Any lines in the function body that contain the text "byte-only" or "256"
25//   are stripped when creating _32_32 and _64_64 forms.
26//   (Those lines are typically 8-bit-specific optimizations.)
27//
28// - A function with a name ending only in _32 operates on []int32
29//   and is duplicated into a _64 form. (Note that it may still take a []byte,
30//   but there is no need for a version of the function in which the []byte
31//   is widened to a full integer array.)
32
33// The overall runtime of this code is linear in the input size:
34// it runs a sequence of linear passes to reduce the problem to
35// a subproblem at most half as big, invokes itself recursively,
36// and then runs a sequence of linear passes to turn the answer
37// for the subproblem into the answer for the original problem.
38// This gives T(N) = O(N) + T(N/2) = O(N) + O(N/2) + O(N/4) + ... = O(N).
39//
40// The outline of the code, with the forward and backward scans
41// through O(N)-sized arrays called out, is:
42//
43// sais_I_N
44//	placeLMS_I_B
45//		bucketMax_I_B
46//			freq_I_B
47//				<scan +text> (1)
48//			<scan +freq> (2)
49//		<scan -text, random bucket> (3)
50//	induceSubL_I_B
51//		bucketMin_I_B
52//			freq_I_B
53//				<scan +text, often optimized away> (4)
54//			<scan +freq> (5)
55//		<scan +sa, random text, random bucket> (6)
56//	induceSubS_I_B
57//		bucketMax_I_B
58//			freq_I_B
59//				<scan +text, often optimized away> (7)
60//			<scan +freq> (8)
61//		<scan -sa, random text, random bucket> (9)
62//	assignID_I_B
63//		<scan +sa, random text substrings> (10)
64//	map_B
65//		<scan -sa> (11)
66//	recurse_B
67//		(recursive call to sais_B_B for a subproblem of size at most 1/2 input, often much smaller)
68//	unmap_I_B
69//		<scan -text> (12)
70//		<scan +sa> (13)
71//	expand_I_B
72//		bucketMax_I_B
73//			freq_I_B
74//				<scan +text, often optimized away> (14)
75//			<scan +freq> (15)
76//		<scan -sa, random text, random bucket> (16)
77//	induceL_I_B
78//		bucketMin_I_B
79//			freq_I_B
80//				<scan +text, often optimized away> (17)
81//			<scan +freq> (18)
82//		<scan +sa, random text, random bucket> (19)
83//	induceS_I_B
84//		bucketMax_I_B
85//			freq_I_B
86//				<scan +text, often optimized away> (20)
87//			<scan +freq> (21)
88//		<scan -sa, random text, random bucket> (22)
89//
90// Here, _B indicates the suffix array size (_32 or _64) and _I the input size (_8 or _B).
91//
92// The outline shows there are in general 22 scans through
93// O(N)-sized arrays for a given level of the recursion.
94// In the top level, operating on 8-bit input text,
95// the six freq scans are fixed size (256) instead of potentially
96// input-sized. Also, the frequency is counted once and cached
97// whenever there is room to do so (there is nearly always room in general,
98// and always room at the top level), which eliminates all but
99// the first freq_I_B text scans (that is, 5 of the 6).
100// So the top level of the recursion only does 22 - 6 - 5 = 11
101// input-sized scans and a typical level does 16 scans.
102//
103// The linear scans do not cost anywhere near as much as
104// the random accesses to the text made during a few of
105// the scans (specifically #6, #9, #16, #19, #22 marked above).
106// In real texts, there is not much but some locality to
107// the accesses, due to the repetitive structure of the text
108// (the same reason Burrows-Wheeler compression is so effective).
109// For random inputs, there is no locality, which makes those
110// accesses even more expensive, especially once the text
111// no longer fits in cache.
112// For example, running on 50 MB of Go source code, induceSubL_8_32
113// (which runs only once, at the top level of the recursion)
114// takes 0.44s, while on 50 MB of random input, it takes 2.55s.
115// Nearly all the relative slowdown is explained by the text access:
116//
117//		c0, c1 := text[k-1], text[k]
118//
119// That line runs for 0.23s on the Go text and 2.02s on random text.
120
121//go:generate go run gen.go
122
123package suffixarray
124
125// text_32 returns the suffix array for the input text.
126// It requires that len(text) fit in an int32
127// and that the caller zero sa.
128func text_32(text []byte, sa []int32) {
129	if int(int32(len(text))) != len(text) || len(text) != len(sa) {
130		panic("suffixarray: misuse of text_32")
131	}
132	sais_8_32(text, 256, sa, make([]int32, 2*256))
133}
134
135// sais_8_32 computes the suffix array of text.
136// The text must contain only values in [0, textMax).
137// The suffix array is stored in sa, which the caller
138// must ensure is already zeroed.
139// The caller must also provide temporary space tmp
140// with len(tmp) ≥ textMax. If len(tmp) ≥ 2*textMax
141// then the algorithm runs a little faster.
142// If sais_8_32 modifies tmp, it sets tmp[0] = -1 on return.
143func sais_8_32(text []byte, textMax int, sa, tmp []int32) {
144	if len(sa) != len(text) || len(tmp) < int(textMax) {
145		panic("suffixarray: misuse of sais_8_32")
146	}
147
148	// Trivial base cases. Sorting 0 or 1 things is easy.
149	if len(text) == 0 {
150		return
151	}
152	if len(text) == 1 {
153		sa[0] = 0
154		return
155	}
156
157	// Establish slices indexed by text character
158	// holding character frequency and bucket-sort offsets.
159	// If there's only enough tmp for one slice,
160	// we make it the bucket offsets and recompute
161	// the character frequency each time we need it.
162	var freq, bucket []int32
163	if len(tmp) >= 2*textMax {
164		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
165		freq[0] = -1 // mark as uninitialized
166	} else {
167		freq, bucket = nil, tmp[:textMax]
168	}
169
170	// The SAIS algorithm.
171	// Each of these calls makes one scan through sa.
172	// See the individual functions for documentation
173	// about each's role in the algorithm.
174	numLMS := placeLMS_8_32(text, sa, freq, bucket)
175	if numLMS <= 1 {
176		// 0 or 1 items are already sorted. Do nothing.
177	} else {
178		induceSubL_8_32(text, sa, freq, bucket)
179		induceSubS_8_32(text, sa, freq, bucket)
180		length_8_32(text, sa, numLMS)
181		maxID := assignID_8_32(text, sa, numLMS)
182		if maxID < numLMS {
183			map_32(sa, numLMS)
184			recurse_32(sa, tmp, numLMS, maxID)
185			unmap_8_32(text, sa, numLMS)
186		} else {
187			// If maxID == numLMS, then each LMS-substring
188			// is unique, so the relative ordering of two LMS-suffixes
189			// is determined by just the leading LMS-substring.
190			// That is, the LMS-suffix sort order matches the
191			// (simpler) LMS-substring sort order.
192			// Copy the original LMS-substring order into the
193			// suffix array destination.
194			copy(sa, sa[len(sa)-numLMS:])
195		}
196		expand_8_32(text, freq, bucket, sa, numLMS)
197	}
198	induceL_8_32(text, sa, freq, bucket)
199	induceS_8_32(text, sa, freq, bucket)
200
201	// Mark for caller that we overwrote tmp.
202	tmp[0] = -1
203}
204
205// freq_8_32 returns the character frequencies
206// for text, as a slice indexed by character value.
207// If freq is nil, freq_8_32 uses and returns bucket.
208// If freq is non-nil, freq_8_32 assumes that freq[0] >= 0
209// means the frequencies are already computed.
210// If the frequency data is overwritten or uninitialized,
211// the caller must set freq[0] = -1 to force recomputation
212// the next time it is needed.
213func freq_8_32(text []byte, freq, bucket []int32) []int32 {
214	if freq != nil && freq[0] >= 0 {
215		return freq // already computed
216	}
217	if freq == nil {
218		freq = bucket
219	}
220
221	freq = freq[:256] // eliminate bounds check for freq[c] below
222	for i := range freq {
223		freq[i] = 0
224	}
225	for _, c := range text {
226		freq[c]++
227	}
228	return freq
229}
230
231// bucketMin_8_32 stores into bucket[c] the minimum index
232// in the bucket for character c in a bucket-sort of text.
233func bucketMin_8_32(text []byte, freq, bucket []int32) {
234	freq = freq_8_32(text, freq, bucket)
235	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
236	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
237	total := int32(0)
238	for i, n := range freq {
239		bucket[i] = total
240		total += n
241	}
242}
243
244// bucketMax_8_32 stores into bucket[c] the maximum index
245// in the bucket for character c in a bucket-sort of text.
246// The bucket indexes for c are [min, max).
247// That is, max is one past the final index in that bucket.
248func bucketMax_8_32(text []byte, freq, bucket []int32) {
249	freq = freq_8_32(text, freq, bucket)
250	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
251	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
252	total := int32(0)
253	for i, n := range freq {
254		total += n
255		bucket[i] = total
256	}
257}
258
259// The SAIS algorithm proceeds in a sequence of scans through sa.
260// Each of the following functions implements one scan,
261// and the functions appear here in the order they execute in the algorithm.
262
263// placeLMS_8_32 places into sa the indexes of the
264// final characters of the LMS substrings of text,
265// sorted into the rightmost ends of their correct buckets
266// in the suffix array.
267//
268// The imaginary sentinel character at the end of the text
269// is the final character of the final LMS substring, but there
270// is no bucket for the imaginary sentinel character,
271// which has a smaller value than any real character.
272// The caller must therefore pretend that sa[-1] == len(text).
273//
274// The text indexes of LMS-substring characters are always ≥ 1
275// (the first LMS-substring must be preceded by one or more L-type
276// characters that are not part of any LMS-substring),
277// so using 0 as a “not present” suffix array entry is safe,
278// both in this function and in most later functions
279// (until induceL_8_32 below).
280func placeLMS_8_32(text []byte, sa, freq, bucket []int32) int {
281	bucketMax_8_32(text, freq, bucket)
282
283	numLMS := 0
284	lastB := int32(-1)
285	bucket = bucket[:256] // eliminate bounds check for bucket[c1] below
286
287	// The next stanza of code (until the blank line) loop backward
288	// over text, stopping to execute a code body at each position i
289	// such that text[i] is an L-character and text[i+1] is an S-character.
290	// That is, i+1 is the position of the start of an LMS-substring.
291	// These could be hoisted out into a function with a callback,
292	// but at a significant speed cost. Instead, we just write these
293	// seven lines a few times in this source file. The copies below
294	// refer back to the pattern established by this original as the
295	// "LMS-substring iterator".
296	//
297	// In every scan through the text, c0, c1 are successive characters of text.
298	// In this backward scan, c0 == text[i] and c1 == text[i+1].
299	// By scanning backward, we can keep track of whether the current
300	// position is type-S or type-L according to the usual definition:
301	//
302	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
303	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
304	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
305	//
306	// The backward scan lets us maintain the current type,
307	// update it when we see c0 != c1, and otherwise leave it alone.
308	// We want to identify all S positions with a preceding L.
309	// Position len(text) is one such position by definition, but we have
310	// nowhere to write it down, so we eliminate it by untruthfully
311	// setting isTypeS = false at the start of the loop.
312	c0, c1, isTypeS := byte(0), byte(0), false
313	for i := len(text) - 1; i >= 0; i-- {
314		c0, c1 = text[i], c0
315		if c0 < c1 {
316			isTypeS = true
317		} else if c0 > c1 && isTypeS {
318			isTypeS = false
319
320			// Bucket the index i+1 for the start of an LMS-substring.
321			b := bucket[c1] - 1
322			bucket[c1] = b
323			sa[b] = int32(i + 1)
324			lastB = b
325			numLMS++
326		}
327	}
328
329	// We recorded the LMS-substring starts but really want the ends.
330	// Luckily, with two differences, the start indexes and the end indexes are the same.
331	// The first difference is that the rightmost LMS-substring's end index is len(text),
332	// so the caller must pretend that sa[-1] == len(text), as noted above.
333	// The second difference is that the first leftmost LMS-substring start index
334	// does not end an earlier LMS-substring, so as an optimization we can omit
335	// that leftmost LMS-substring start index (the last one we wrote).
336	//
337	// Exception: if numLMS <= 1, the caller is not going to bother with
338	// the recursion at all and will treat the result as containing LMS-substring starts.
339	// In that case, we don't remove the final entry.
340	if numLMS > 1 {
341		sa[lastB] = 0
342	}
343	return numLMS
344}
345
346// induceSubL_8_32 inserts the L-type text indexes of LMS-substrings
347// into sa, assuming that the final characters of the LMS-substrings
348// are already inserted into sa, sorted by final character, and at the
349// right (not left) end of the corresponding character bucket.
350// Each LMS-substring has the form (as a regexp) /S+L+S/:
351// one or more S-type, one or more L-type, final S-type.
352// induceSubL_8_32 leaves behind only the leftmost L-type text
353// index for each LMS-substring. That is, it removes the final S-type
354// indexes that are present on entry, and it inserts but then removes
355// the interior L-type indexes too.
356// (Only the leftmost L-type index is needed by induceSubS_8_32.)
357func induceSubL_8_32(text []byte, sa, freq, bucket []int32) {
358	// Initialize positions for left side of character buckets.
359	bucketMin_8_32(text, freq, bucket)
360	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
361
362	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
363	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
364	// Because j-1 is type L, inserting it into sa now will sort it correctly.
365	// But we want to distinguish a j-1 with j-2 of type L from type S.
366	// We can process the former but want to leave the latter for the caller.
367	// We record the difference by negating j-1 if it is preceded by type S.
368	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
369	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
370	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
371	// and so on, in sorted but not necessarily adjacent order, until it finds
372	// one preceded by an index of type S, at which point it must stop.
373	//
374	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
375	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
376	// only the indexes of the leftmost L-type indexes for each LMS-substring.
377	//
378	// The suffix array sa therefore serves simultaneously as input, output,
379	// and a miraculously well-tailored work queue.
380
381	// placeLMS_8_32 left out the implicit entry sa[-1] == len(text),
382	// corresponding to the identified type-L index len(text)-1.
383	// Process it before the left-to-right scan of sa proper.
384	// See body in loop for commentary.
385	k := len(text) - 1
386	c0, c1 := text[k-1], text[k]
387	if c0 < c1 {
388		k = -k
389	}
390
391	// Cache recently used bucket index:
392	// we're processing suffixes in sorted order
393	// and accessing buckets indexed by the
394	// byte before the sorted order, which still
395	// has very good locality.
396	// Invariant: b is cached, possibly dirty copy of bucket[cB].
397	cB := c1
398	b := bucket[cB]
399	sa[b] = int32(k)
400	b++
401
402	for i := 0; i < len(sa); i++ {
403		j := int(sa[i])
404		if j == 0 {
405			// Skip empty entry.
406			continue
407		}
408		if j < 0 {
409			// Leave discovered type-S index for caller.
410			sa[i] = int32(-j)
411			continue
412		}
413		sa[i] = 0
414
415		// Index j was on work queue, meaning k := j-1 is L-type,
416		// so we can now place k correctly into sa.
417		// If k-1 is L-type, queue k for processing later in this loop.
418		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
419		k := j - 1
420		c0, c1 := text[k-1], text[k]
421		if c0 < c1 {
422			k = -k
423		}
424
425		if cB != c1 {
426			bucket[cB] = b
427			cB = c1
428			b = bucket[cB]
429		}
430		sa[b] = int32(k)
431		b++
432	}
433}
434
435// induceSubS_8_32 inserts the S-type text indexes of LMS-substrings
436// into sa, assuming that the leftmost L-type text indexes are already
437// inserted into sa, sorted by LMS-substring suffix, and at the
438// left end of the corresponding character bucket.
439// Each LMS-substring has the form (as a regexp) /S+L+S/:
440// one or more S-type, one or more L-type, final S-type.
441// induceSubS_8_32 leaves behind only the leftmost S-type text
442// index for each LMS-substring, in sorted order, at the right end of sa.
443// That is, it removes the L-type indexes that are present on entry,
444// and it inserts but then removes the interior S-type indexes too,
445// leaving the LMS-substring start indexes packed into sa[len(sa)-numLMS:].
446// (Only the LMS-substring start indexes are processed by the recursion.)
447func induceSubS_8_32(text []byte, sa, freq, bucket []int32) {
448	// Initialize positions for right side of character buckets.
449	bucketMax_8_32(text, freq, bucket)
450	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
451
452	// Analogous to induceSubL_8_32 above,
453	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
454	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
455	// Because j-1 is type S, inserting it into sa now will sort it correctly.
456	// But we want to distinguish a j-1 with j-2 of type S from type L.
457	// We can process the former but want to leave the latter for the caller.
458	// We record the difference by negating j-1 if it is preceded by type L.
459	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
460	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
461	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
462	// and so on, in sorted but not necessarily adjacent order, until it finds
463	// one preceded by an index of type L, at which point it must stop.
464	// That index (preceded by one of type L) is an LMS-substring start.
465	//
466	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
467	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
468	// so that the loop finishes with the top of sa containing exactly
469	// the LMS-substring start indexes, sorted by LMS-substring.
470
471	// Cache recently used bucket index:
472	cB := byte(0)
473	b := bucket[cB]
474
475	top := len(sa)
476	for i := len(sa) - 1; i >= 0; i-- {
477		j := int(sa[i])
478		if j == 0 {
479			// Skip empty entry.
480			continue
481		}
482		sa[i] = 0
483		if j < 0 {
484			// Leave discovered LMS-substring start index for caller.
485			top--
486			sa[top] = int32(-j)
487			continue
488		}
489
490		// Index j was on work queue, meaning k := j-1 is S-type,
491		// so we can now place k correctly into sa.
492		// If k-1 is S-type, queue k for processing later in this loop.
493		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
494		k := j - 1
495		c1 := text[k]
496		c0 := text[k-1]
497		if c0 > c1 {
498			k = -k
499		}
500
501		if cB != c1 {
502			bucket[cB] = b
503			cB = c1
504			b = bucket[cB]
505		}
506		b--
507		sa[b] = int32(k)
508	}
509}
510
511// length_8_32 computes and records the length of each LMS-substring in text.
512// The length of the LMS-substring at index j is stored at sa[j/2],
513// avoiding the LMS-substring indexes already stored in the top half of sa.
514// (If index j is an LMS-substring start, then index j-1 is type L and cannot be.)
515// There are two exceptions, made for optimizations in name_8_32 below.
516//
517// First, the final LMS-substring is recorded as having length 0, which is otherwise
518// impossible, instead of giving it a length that includes the implicit sentinel.
519// This ensures the final LMS-substring has length unequal to all others
520// and therefore can be detected as different without text comparison
521// (it is unequal because it is the only one that ends in the implicit sentinel,
522// and the text comparison would be problematic since the implicit sentinel
523// is not actually present at text[len(text)]).
524//
525// Second, to avoid text comparison entirely, if an LMS-substring is very short,
526// sa[j/2] records its actual text instead of its length, so that if two such
527// substrings have matching “length,” the text need not be read at all.
528// The definition of “very short” is that the text bytes must pack into an uint32,
529// and the unsigned encoding e must be ≥ len(text), so that it can be
530// distinguished from a valid length.
531func length_8_32(text []byte, sa []int32, numLMS int) {
532	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
533
534	// The encoding of N text bytes into a “length” word
535	// adds 1 to each byte, packs them into the bottom
536	// N*8 bits of a word, and then bitwise inverts the result.
537	// That is, the text sequence A B C (hex 41 42 43)
538	// encodes as ^uint32(0x42_43_44).
539	// LMS-substrings can never start or end with 0xFF.
540	// Adding 1 ensures the encoded byte sequence never
541	// starts or ends with 0x00, so that present bytes can be
542	// distinguished from zero-padding in the top bits,
543	// so the length need not be separately encoded.
544	// Inverting the bytes increases the chance that a
545	// 4-byte encoding will still be ≥ len(text).
546	// In particular, if the first byte is ASCII (<= 0x7E, so +1 <= 0x7F)
547	// then the high bit of the inversion will be set,
548	// making it clearly not a valid length (it would be a negative one).
549	//
550	// cx holds the pre-inverted encoding (the packed incremented bytes).
551	cx := uint32(0) // byte-only
552
553	// This stanza (until the blank line) is the "LMS-substring iterator",
554	// described in placeLMS_8_32 above, with one line added to maintain cx.
555	c0, c1, isTypeS := byte(0), byte(0), false
556	for i := len(text) - 1; i >= 0; i-- {
557		c0, c1 = text[i], c0
558		cx = cx<<8 | uint32(c1+1) // byte-only
559		if c0 < c1 {
560			isTypeS = true
561		} else if c0 > c1 && isTypeS {
562			isTypeS = false
563
564			// Index j = i+1 is the start of an LMS-substring.
565			// Compute length or encoded text to store in sa[j/2].
566			j := i + 1
567			var code int32
568			if end == 0 {
569				code = 0
570			} else {
571				code = int32(end - j)
572				if code <= 32/8 && ^cx >= uint32(len(text)) { // byte-only
573					code = int32(^cx) // byte-only
574				} // byte-only
575			}
576			sa[j>>1] = code
577			end = j + 1
578			cx = uint32(c1 + 1) // byte-only
579		}
580	}
581}
582
583// assignID_8_32 assigns a dense ID numbering to the
584// set of LMS-substrings respecting string ordering and equality,
585// returning the maximum assigned ID.
586// For example given the input "ababab", the LMS-substrings
587// are "aba", "aba", and "ab", renumbered as 2 2 1.
588// sa[len(sa)-numLMS:] holds the LMS-substring indexes
589// sorted in string order, so to assign numbers we can
590// consider each in turn, removing adjacent duplicates.
591// The new ID for the LMS-substring at index j is written to sa[j/2],
592// overwriting the length previously stored there (by length_8_32 above).
593func assignID_8_32(text []byte, sa []int32, numLMS int) int {
594	id := 0
595	lastLen := int32(-1) // impossible
596	lastPos := int32(0)
597	for _, j := range sa[len(sa)-numLMS:] {
598		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
599		n := sa[j/2]
600		if n != lastLen {
601			goto New
602		}
603		if uint32(n) >= uint32(len(text)) {
604			// “Length” is really encoded full text, and they match.
605			goto Same
606		}
607		{
608			// Compare actual texts.
609			n := int(n)
610			this := text[j:][:n]
611			last := text[lastPos:][:n]
612			for i := 0; i < n; i++ {
613				if this[i] != last[i] {
614					goto New
615				}
616			}
617			goto Same
618		}
619	New:
620		id++
621		lastPos = j
622		lastLen = n
623	Same:
624		sa[j/2] = int32(id)
625	}
626	return id
627}
628
629// map_32 maps the LMS-substrings in text to their new IDs,
630// producing the subproblem for the recursion.
631// The mapping itself was mostly applied by assignID_8_32:
632// sa[i] is either 0, the ID for the LMS-substring at index 2*i,
633// or the ID for the LMS-substring at index 2*i+1.
634// To produce the subproblem we need only remove the zeros
635// and change ID into ID-1 (our IDs start at 1, but text chars start at 0).
636//
637// map_32 packs the result, which is the input to the recursion,
638// into the top of sa, so that the recursion result can be stored
639// in the bottom of sa, which sets up for expand_8_32 well.
640func map_32(sa []int32, numLMS int) {
641	w := len(sa)
642	for i := len(sa) / 2; i >= 0; i-- {
643		j := sa[i]
644		if j > 0 {
645			w--
646			sa[w] = j - 1
647		}
648	}
649}
650
651// recurse_32 calls sais_32 recursively to solve the subproblem we've built.
652// The subproblem is at the right end of sa, the suffix array result will be
653// written at the left end of sa, and the middle of sa is available for use as
654// temporary frequency and bucket storage.
655func recurse_32(sa, oldTmp []int32, numLMS, maxID int) {
656	dst, saTmp, text := sa[:numLMS], sa[numLMS:len(sa)-numLMS], sa[len(sa)-numLMS:]
657
658	// Set up temporary space for recursive call.
659	// We must pass sais_32 a tmp buffer wiith at least maxID entries.
660	//
661	// The subproblem is guaranteed to have length at most len(sa)/2,
662	// so that sa can hold both the subproblem and its suffix array.
663	// Nearly all the time, however, the subproblem has length < len(sa)/3,
664	// in which case there is a subproblem-sized middle of sa that
665	// we can reuse for temporary space (saTmp).
666	// When recurse_32 is called from sais_8_32, oldTmp is length 512
667	// (from text_32), and saTmp will typically be much larger, so we'll use saTmp.
668	// When deeper recursions come back to recurse_32, now oldTmp is
669	// the saTmp from the top-most recursion, it is typically larger than
670	// the current saTmp (because the current sa gets smaller and smaller
671	// as the recursion gets deeper), and we keep reusing that top-most
672	// large saTmp instead of the offered smaller ones.
673	//
674	// Why is the subproblem length so often just under len(sa)/3?
675	// See Nong, Zhang, and Chen, section 3.6 for a plausible explanation.
676	// In brief, the len(sa)/2 case would correspond to an SLSLSLSLSLSL pattern
677	// in the input, perfect alternation of larger and smaller input bytes.
678	// Real text doesn't do that. If each L-type index is randomly followed
679	// by either an L-type or S-type index, then half the substrings will
680	// be of the form SLS, but the other half will be longer. Of that half,
681	// half (a quarter overall) will be SLLS; an eighth will be SLLLS, and so on.
682	// Not counting the final S in each (which overlaps the first S in the next),
683	// This works out to an average length 2×½ + 3×¼ + 4×⅛ + ... = 3.
684	// The space we need is further reduced by the fact that many of the
685	// short patterns like SLS will often be the same character sequences
686	// repeated throughout the text, reducing maxID relative to numLMS.
687	//
688	// For short inputs, the averages may not run in our favor, but then we
689	// can often fall back to using the length-512 tmp available in the
690	// top-most call. (Also a short allocation would not be a big deal.)
691	//
692	// For pathological inputs, we fall back to allocating a new tmp of length
693	// max(maxID, numLMS/2). This level of the recursion needs maxID,
694	// and all deeper levels of the recursion will need no more than numLMS/2,
695	// so this one allocation is guaranteed to suffice for the entire stack
696	// of recursive calls.
697	tmp := oldTmp
698	if len(tmp) < len(saTmp) {
699		tmp = saTmp
700	}
701	if len(tmp) < numLMS {
702		// TestSAIS/forcealloc reaches this code.
703		n := maxID
704		if n < numLMS/2 {
705			n = numLMS / 2
706		}
707		tmp = make([]int32, n)
708	}
709
710	// sais_32 requires that the caller arrange to clear dst,
711	// because in general the caller may know dst is
712	// freshly-allocated and already cleared. But this one is not.
713	for i := range dst {
714		dst[i] = 0
715	}
716	sais_32(text, maxID, dst, tmp)
717}
718
719// unmap_8_32 unmaps the subproblem back to the original.
720// sa[:numLMS] is the LMS-substring numbers, which don't matter much anymore.
721// sa[len(sa)-numLMS:] is the sorted list of those LMS-substring numbers.
722// The key part is that if the list says K that means the K'th substring.
723// We can replace sa[:numLMS] with the indexes of the LMS-substrings.
724// Then if the list says K it really means sa[K].
725// Having mapped the list back to LMS-substring indexes,
726// we can place those into the right buckets.
727func unmap_8_32(text []byte, sa []int32, numLMS int) {
728	unmap := sa[len(sa)-numLMS:]
729	j := len(unmap)
730
731	// "LMS-substring iterator" (see placeLMS_8_32 above).
732	c0, c1, isTypeS := byte(0), byte(0), false
733	for i := len(text) - 1; i >= 0; i-- {
734		c0, c1 = text[i], c0
735		if c0 < c1 {
736			isTypeS = true
737		} else if c0 > c1 && isTypeS {
738			isTypeS = false
739
740			// Populate inverse map.
741			j--
742			unmap[j] = int32(i + 1)
743		}
744	}
745
746	// Apply inverse map to subproblem suffix array.
747	sa = sa[:numLMS]
748	for i := 0; i < len(sa); i++ {
749		sa[i] = unmap[sa[i]]
750	}
751}
752
753// expand_8_32 distributes the compacted, sorted LMS-suffix indexes
754// from sa[:numLMS] into the tops of the appropriate buckets in sa,
755// preserving the sorted order and making room for the L-type indexes
756// to be slotted into the sorted sequence by induceL_8_32.
757func expand_8_32(text []byte, freq, bucket, sa []int32, numLMS int) {
758	bucketMax_8_32(text, freq, bucket)
759	bucket = bucket[:256] // eliminate bound check for bucket[c] below
760
761	// Loop backward through sa, always tracking
762	// the next index to populate from sa[:numLMS].
763	// When we get to one, populate it.
764	// Zero the rest of the slots; they have dead values in them.
765	x := numLMS - 1
766	saX := sa[x]
767	c := text[saX]
768	b := bucket[c] - 1
769	bucket[c] = b
770
771	for i := len(sa) - 1; i >= 0; i-- {
772		if i != int(b) {
773			sa[i] = 0
774			continue
775		}
776		sa[i] = saX
777
778		// Load next entry to put down (if any).
779		if x > 0 {
780			x--
781			saX = sa[x] // TODO bounds check
782			c = text[saX]
783			b = bucket[c] - 1
784			bucket[c] = b
785		}
786	}
787}
788
789// induceL_8_32 inserts L-type text indexes into sa,
790// assuming that the leftmost S-type indexes are inserted
791// into sa, in sorted order, in the right bucket halves.
792// It leaves all the L-type indexes in sa, but the
793// leftmost L-type indexes are negated, to mark them
794// for processing by induceS_8_32.
795func induceL_8_32(text []byte, sa, freq, bucket []int32) {
796	// Initialize positions for left side of character buckets.
797	bucketMin_8_32(text, freq, bucket)
798	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
799
800	// This scan is similar to the one in induceSubL_8_32 above.
801	// That one arranges to clear all but the leftmost L-type indexes.
802	// This scan leaves all the L-type indexes and the original S-type
803	// indexes, but it negates the positive leftmost L-type indexes
804	// (the ones that induceS_8_32 needs to process).
805
806	// expand_8_32 left out the implicit entry sa[-1] == len(text),
807	// corresponding to the identified type-L index len(text)-1.
808	// Process it before the left-to-right scan of sa proper.
809	// See body in loop for commentary.
810	k := len(text) - 1
811	c0, c1 := text[k-1], text[k]
812	if c0 < c1 {
813		k = -k
814	}
815
816	// Cache recently used bucket index.
817	cB := c1
818	b := bucket[cB]
819	sa[b] = int32(k)
820	b++
821
822	for i := 0; i < len(sa); i++ {
823		j := int(sa[i])
824		if j <= 0 {
825			// Skip empty or negated entry (including negated zero).
826			continue
827		}
828
829		// Index j was on work queue, meaning k := j-1 is L-type,
830		// so we can now place k correctly into sa.
831		// If k-1 is L-type, queue k for processing later in this loop.
832		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
833		// If k is zero, k-1 doesn't exist, so we only need to leave it
834		// for the caller. The caller can't tell the difference between
835		// an empty slot and a non-empty zero, but there's no need
836		// to distinguish them anyway: the final suffix array will end up
837		// with one zero somewhere, and that will be a real zero.
838		k := j - 1
839		c1 := text[k]
840		if k > 0 {
841			if c0 := text[k-1]; c0 < c1 {
842				k = -k
843			}
844		}
845
846		if cB != c1 {
847			bucket[cB] = b
848			cB = c1
849			b = bucket[cB]
850		}
851		sa[b] = int32(k)
852		b++
853	}
854}
855
856func induceS_8_32(text []byte, sa, freq, bucket []int32) {
857	// Initialize positions for right side of character buckets.
858	bucketMax_8_32(text, freq, bucket)
859	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
860
861	cB := byte(0)
862	b := bucket[cB]
863
864	for i := len(sa) - 1; i >= 0; i-- {
865		j := int(sa[i])
866		if j >= 0 {
867			// Skip non-flagged entry.
868			// (This loop can't see an empty entry; 0 means the real zero index.)
869			continue
870		}
871
872		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
873		j = -j
874		sa[i] = int32(j)
875
876		// Index j was on work queue (encoded as -j but now decoded),
877		// meaning k := j-1 is L-type,
878		// so we can now place k correctly into sa.
879		// If k-1 is S-type, queue -k for processing later in this loop.
880		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
881		// If k is zero, k-1 doesn't exist, so we only need to leave it
882		// for the caller.
883		k := j - 1
884		c1 := text[k]
885		if k > 0 {
886			if c0 := text[k-1]; c0 <= c1 {
887				k = -k
888			}
889		}
890
891		if cB != c1 {
892			bucket[cB] = b
893			cB = c1
894			b = bucket[cB]
895		}
896		b--
897		sa[b] = int32(k)
898	}
899}
900