1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package bytes implements functions for the manipulation of byte slices.
6// It is analogous to the facilities of the strings package.
7package bytes
8
9import (
10	"internal/bytealg"
11	"unicode"
12	"unicode/utf8"
13)
14
15// Equal returns a boolean reporting whether a and b
16// are the same length and contain the same bytes.
17// A nil argument is equivalent to an empty slice.
18func Equal(a, b []byte) bool {
19	return bytealg.Equal(a, b)
20}
21
22func equalPortable(a, b []byte) bool {
23	if len(a) != len(b) {
24		return false
25	}
26	for i, c := range a {
27		if c != b[i] {
28			return false
29		}
30	}
31	return true
32}
33
34// Compare returns an integer comparing two byte slices lexicographically.
35// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
36// A nil argument is equivalent to an empty slice.
37func Compare(a, b []byte) int {
38	return bytealg.Compare(a, b)
39}
40
41// explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
42// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
43func explode(s []byte, n int) [][]byte {
44	if n <= 0 {
45		n = len(s)
46	}
47	a := make([][]byte, n)
48	var size int
49	na := 0
50	for len(s) > 0 {
51		if na+1 >= n {
52			a[na] = s
53			na++
54			break
55		}
56		_, size = utf8.DecodeRune(s)
57		a[na] = s[0:size:size]
58		s = s[size:]
59		na++
60	}
61	return a[0:na]
62}
63
64// Count counts the number of non-overlapping instances of sep in s.
65// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
66func Count(s, sep []byte) int {
67	// special case
68	if len(sep) == 0 {
69		return utf8.RuneCount(s) + 1
70	}
71	if len(sep) == 1 {
72		return bytealg.Count(s, sep[0])
73	}
74	n := 0
75	for {
76		i := Index(s, sep)
77		if i == -1 {
78			return n
79		}
80		n++
81		s = s[i+len(sep):]
82	}
83}
84
85// Contains reports whether subslice is within b.
86func Contains(b, subslice []byte) bool {
87	return Index(b, subslice) != -1
88}
89
90// ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
91func ContainsAny(b []byte, chars string) bool {
92	return IndexAny(b, chars) >= 0
93}
94
95// ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
96func ContainsRune(b []byte, r rune) bool {
97	return IndexRune(b, r) >= 0
98}
99
100// IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b.
101func IndexByte(b []byte, c byte) int {
102	return bytealg.IndexByte(b, c)
103}
104
105func indexBytePortable(s []byte, c byte) int {
106	for i, b := range s {
107		if b == c {
108			return i
109		}
110	}
111	return -1
112}
113
114// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
115func LastIndex(s, sep []byte) int {
116	n := len(sep)
117	if n == 0 {
118		return len(s)
119	}
120	c := sep[0]
121	for i := len(s) - n; i >= 0; i-- {
122		if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
123			return i
124		}
125	}
126	return -1
127}
128
129// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
130func LastIndexByte(s []byte, c byte) int {
131	for i := len(s) - 1; i >= 0; i-- {
132		if s[i] == c {
133			return i
134		}
135	}
136	return -1
137}
138
139// IndexRune interprets s as a sequence of UTF-8-encoded code points.
140// It returns the byte index of the first occurrence in s of the given rune.
141// It returns -1 if rune is not present in s.
142// If r is utf8.RuneError, it returns the first instance of any
143// invalid UTF-8 byte sequence.
144func IndexRune(s []byte, r rune) int {
145	switch {
146	case 0 <= r && r < utf8.RuneSelf:
147		return IndexByte(s, byte(r))
148	case r == utf8.RuneError:
149		for i := 0; i < len(s); {
150			r1, n := utf8.DecodeRune(s[i:])
151			if r1 == utf8.RuneError {
152				return i
153			}
154			i += n
155		}
156		return -1
157	case !utf8.ValidRune(r):
158		return -1
159	default:
160		var b [utf8.UTFMax]byte
161		n := utf8.EncodeRune(b[:], r)
162		return Index(s, b[:n])
163	}
164}
165
166// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
167// It returns the byte index of the first occurrence in s of any of the Unicode
168// code points in chars. It returns -1 if chars is empty or if there is no code
169// point in common.
170func IndexAny(s []byte, chars string) int {
171	if chars == "" {
172		// Avoid scanning all of s.
173		return -1
174	}
175	if len(s) > 8 {
176		if as, isASCII := makeASCIISet(chars); isASCII {
177			for i, c := range s {
178				if as.contains(c) {
179					return i
180				}
181			}
182			return -1
183		}
184	}
185	var width int
186	for i := 0; i < len(s); i += width {
187		r := rune(s[i])
188		if r < utf8.RuneSelf {
189			width = 1
190		} else {
191			r, width = utf8.DecodeRune(s[i:])
192		}
193		for _, ch := range chars {
194			if r == ch {
195				return i
196			}
197		}
198	}
199	return -1
200}
201
202// LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
203// points. It returns the byte index of the last occurrence in s of any of
204// the Unicode code points in chars. It returns -1 if chars is empty or if
205// there is no code point in common.
206func LastIndexAny(s []byte, chars string) int {
207	if chars == "" {
208		// Avoid scanning all of s.
209		return -1
210	}
211	if len(s) > 8 {
212		if as, isASCII := makeASCIISet(chars); isASCII {
213			for i := len(s) - 1; i >= 0; i-- {
214				if as.contains(s[i]) {
215					return i
216				}
217			}
218			return -1
219		}
220	}
221	for i := len(s); i > 0; {
222		r, size := utf8.DecodeLastRune(s[:i])
223		i -= size
224		for _, c := range chars {
225			if r == c {
226				return i
227			}
228		}
229	}
230	return -1
231}
232
233// Generic split: splits after each instance of sep,
234// including sepSave bytes of sep in the subslices.
235func genSplit(s, sep []byte, sepSave, n int) [][]byte {
236	if n == 0 {
237		return nil
238	}
239	if len(sep) == 0 {
240		return explode(s, n)
241	}
242	if n < 0 {
243		n = Count(s, sep) + 1
244	}
245
246	a := make([][]byte, n)
247	n--
248	i := 0
249	for i < n {
250		m := Index(s, sep)
251		if m < 0 {
252			break
253		}
254		a[i] = s[: m+sepSave : m+sepSave]
255		s = s[m+len(sep):]
256		i++
257	}
258	a[i] = s
259	return a[:i+1]
260}
261
262// SplitN slices s into subslices separated by sep and returns a slice of
263// the subslices between those separators.
264// If sep is empty, SplitN splits after each UTF-8 sequence.
265// The count determines the number of subslices to return:
266//   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
267//   n == 0: the result is nil (zero subslices)
268//   n < 0: all subslices
269func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
270
271// SplitAfterN slices s into subslices after each instance of sep and
272// returns a slice of those subslices.
273// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
274// The count determines the number of subslices to return:
275//   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
276//   n == 0: the result is nil (zero subslices)
277//   n < 0: all subslices
278func SplitAfterN(s, sep []byte, n int) [][]byte {
279	return genSplit(s, sep, len(sep), n)
280}
281
282// Split slices s into all subslices separated by sep and returns a slice of
283// the subslices between those separators.
284// If sep is empty, Split splits after each UTF-8 sequence.
285// It is equivalent to SplitN with a count of -1.
286func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
287
288// SplitAfter slices s into all subslices after each instance of sep and
289// returns a slice of those subslices.
290// If sep is empty, SplitAfter splits after each UTF-8 sequence.
291// It is equivalent to SplitAfterN with a count of -1.
292func SplitAfter(s, sep []byte) [][]byte {
293	return genSplit(s, sep, len(sep), -1)
294}
295
296var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
297
298// Fields interprets s as a sequence of UTF-8-encoded code points.
299// It splits the slice s around each instance of one or more consecutive white space
300// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
301// empty slice if s contains only white space.
302func Fields(s []byte) [][]byte {
303	// First count the fields.
304	// This is an exact count if s is ASCII, otherwise it is an approximation.
305	n := 0
306	wasSpace := 1
307	// setBits is used to track which bits are set in the bytes of s.
308	setBits := uint8(0)
309	for i := 0; i < len(s); i++ {
310		r := s[i]
311		setBits |= r
312		isSpace := int(asciiSpace[r])
313		n += wasSpace & ^isSpace
314		wasSpace = isSpace
315	}
316
317	if setBits >= utf8.RuneSelf {
318		// Some runes in the input slice are not ASCII.
319		return FieldsFunc(s, unicode.IsSpace)
320	}
321
322	// ASCII fast path
323	a := make([][]byte, n)
324	na := 0
325	fieldStart := 0
326	i := 0
327	// Skip spaces in the front of the input.
328	for i < len(s) && asciiSpace[s[i]] != 0 {
329		i++
330	}
331	fieldStart = i
332	for i < len(s) {
333		if asciiSpace[s[i]] == 0 {
334			i++
335			continue
336		}
337		a[na] = s[fieldStart:i:i]
338		na++
339		i++
340		// Skip spaces in between fields.
341		for i < len(s) && asciiSpace[s[i]] != 0 {
342			i++
343		}
344		fieldStart = i
345	}
346	if fieldStart < len(s) { // Last field might end at EOF.
347		a[na] = s[fieldStart:len(s):len(s)]
348	}
349	return a
350}
351
352// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
353// It splits the slice s at each run of code points c satisfying f(c) and
354// returns a slice of subslices of s. If all code points in s satisfy f(c), or
355// len(s) == 0, an empty slice is returned.
356// FieldsFunc makes no guarantees about the order in which it calls f(c).
357// If f does not return consistent results for a given c, FieldsFunc may crash.
358func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
359	// A span is used to record a slice of s of the form s[start:end].
360	// The start index is inclusive and the end index is exclusive.
361	type span struct {
362		start int
363		end   int
364	}
365	spans := make([]span, 0, 32)
366
367	// Find the field start and end indices.
368	wasField := false
369	fromIndex := 0
370	for i := 0; i < len(s); {
371		size := 1
372		r := rune(s[i])
373		if r >= utf8.RuneSelf {
374			r, size = utf8.DecodeRune(s[i:])
375		}
376		if f(r) {
377			if wasField {
378				spans = append(spans, span{start: fromIndex, end: i})
379				wasField = false
380			}
381		} else {
382			if !wasField {
383				fromIndex = i
384				wasField = true
385			}
386		}
387		i += size
388	}
389
390	// Last field might end at EOF.
391	if wasField {
392		spans = append(spans, span{fromIndex, len(s)})
393	}
394
395	// Create subslices from recorded field indices.
396	a := make([][]byte, len(spans))
397	for i, span := range spans {
398		a[i] = s[span.start:span.end:span.end]
399	}
400
401	return a
402}
403
404// Join concatenates the elements of s to create a new byte slice. The separator
405// sep is placed between elements in the resulting slice.
406func Join(s [][]byte, sep []byte) []byte {
407	if len(s) == 0 {
408		return []byte{}
409	}
410	if len(s) == 1 {
411		// Just return a copy.
412		return append([]byte(nil), s[0]...)
413	}
414	n := len(sep) * (len(s) - 1)
415	for _, v := range s {
416		n += len(v)
417	}
418
419	b := make([]byte, n)
420	bp := copy(b, s[0])
421	for _, v := range s[1:] {
422		bp += copy(b[bp:], sep)
423		bp += copy(b[bp:], v)
424	}
425	return b
426}
427
428// HasPrefix tests whether the byte slice s begins with prefix.
429func HasPrefix(s, prefix []byte) bool {
430	return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
431}
432
433// HasSuffix tests whether the byte slice s ends with suffix.
434func HasSuffix(s, suffix []byte) bool {
435	return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
436}
437
438// Map returns a copy of the byte slice s with all its characters modified
439// according to the mapping function. If mapping returns a negative value, the character is
440// dropped from the byte slice with no replacement. The characters in s and the
441// output are interpreted as UTF-8-encoded code points.
442func Map(mapping func(r rune) rune, s []byte) []byte {
443	// In the worst case, the slice can grow when mapped, making
444	// things unpleasant. But it's so rare we barge in assuming it's
445	// fine. It could also shrink but that falls out naturally.
446	maxbytes := len(s) // length of b
447	nbytes := 0        // number of bytes encoded in b
448	b := make([]byte, maxbytes)
449	for i := 0; i < len(s); {
450		wid := 1
451		r := rune(s[i])
452		if r >= utf8.RuneSelf {
453			r, wid = utf8.DecodeRune(s[i:])
454		}
455		r = mapping(r)
456		if r >= 0 {
457			rl := utf8.RuneLen(r)
458			if rl < 0 {
459				rl = len(string(utf8.RuneError))
460			}
461			if nbytes+rl > maxbytes {
462				// Grow the buffer.
463				maxbytes = maxbytes*2 + utf8.UTFMax
464				nb := make([]byte, maxbytes)
465				copy(nb, b[0:nbytes])
466				b = nb
467			}
468			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
469		}
470		i += wid
471	}
472	return b[0:nbytes]
473}
474
475// Repeat returns a new byte slice consisting of count copies of b.
476//
477// It panics if count is negative or if
478// the result of (len(b) * count) overflows.
479func Repeat(b []byte, count int) []byte {
480	// Since we cannot return an error on overflow,
481	// we should panic if the repeat will generate
482	// an overflow.
483	// See Issue golang.org/issue/16237.
484	if count < 0 {
485		panic("bytes: negative Repeat count")
486	} else if count > 0 && len(b)*count/count != len(b) {
487		panic("bytes: Repeat count causes overflow")
488	}
489
490	nb := make([]byte, len(b)*count)
491	bp := copy(nb, b)
492	for bp < len(nb) {
493		copy(nb[bp:], nb[:bp])
494		bp *= 2
495	}
496	return nb
497}
498
499// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
500func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
501
502// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
503func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
504
505// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
506func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
507
508// ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
509// upper case, giving priority to the special casing rules.
510func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
511	return Map(c.ToUpper, s)
512}
513
514// ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
515// lower case, giving priority to the special casing rules.
516func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
517	return Map(c.ToLower, s)
518}
519
520// ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
521// title case, giving priority to the special casing rules.
522func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
523	return Map(c.ToTitle, s)
524}
525
526// isSeparator reports whether the rune could mark a word boundary.
527// TODO: update when package unicode captures more of the properties.
528func isSeparator(r rune) bool {
529	// ASCII alphanumerics and underscore are not separators
530	if r <= 0x7F {
531		switch {
532		case '0' <= r && r <= '9':
533			return false
534		case 'a' <= r && r <= 'z':
535			return false
536		case 'A' <= r && r <= 'Z':
537			return false
538		case r == '_':
539			return false
540		}
541		return true
542	}
543	// Letters and digits are not separators
544	if unicode.IsLetter(r) || unicode.IsDigit(r) {
545		return false
546	}
547	// Otherwise, all we can do for now is treat spaces as separators.
548	return unicode.IsSpace(r)
549}
550
551// Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
552// words mapped to their title case.
553//
554// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
555func Title(s []byte) []byte {
556	// Use a closure here to remember state.
557	// Hackish but effective. Depends on Map scanning in order and calling
558	// the closure once per rune.
559	prev := ' '
560	return Map(
561		func(r rune) rune {
562			if isSeparator(prev) {
563				prev = r
564				return unicode.ToTitle(r)
565			}
566			prev = r
567			return r
568		},
569		s)
570}
571
572// TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
573// all leading UTF-8-encoded code points c that satisfy f(c).
574func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
575	i := indexFunc(s, f, false)
576	if i == -1 {
577		return nil
578	}
579	return s[i:]
580}
581
582// TrimRightFunc returns a subslice of s by slicing off all trailing
583// UTF-8-encoded code points c that satisfy f(c).
584func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
585	i := lastIndexFunc(s, f, false)
586	if i >= 0 && s[i] >= utf8.RuneSelf {
587		_, wid := utf8.DecodeRune(s[i:])
588		i += wid
589	} else {
590		i++
591	}
592	return s[0:i]
593}
594
595// TrimFunc returns a subslice of s by slicing off all leading and trailing
596// UTF-8-encoded code points c that satisfy f(c).
597func TrimFunc(s []byte, f func(r rune) bool) []byte {
598	return TrimRightFunc(TrimLeftFunc(s, f), f)
599}
600
601// TrimPrefix returns s without the provided leading prefix string.
602// If s doesn't start with prefix, s is returned unchanged.
603func TrimPrefix(s, prefix []byte) []byte {
604	if HasPrefix(s, prefix) {
605		return s[len(prefix):]
606	}
607	return s
608}
609
610// TrimSuffix returns s without the provided trailing suffix string.
611// If s doesn't end with suffix, s is returned unchanged.
612func TrimSuffix(s, suffix []byte) []byte {
613	if HasSuffix(s, suffix) {
614		return s[:len(s)-len(suffix)]
615	}
616	return s
617}
618
619// IndexFunc interprets s as a sequence of UTF-8-encoded code points.
620// It returns the byte index in s of the first Unicode
621// code point satisfying f(c), or -1 if none do.
622func IndexFunc(s []byte, f func(r rune) bool) int {
623	return indexFunc(s, f, true)
624}
625
626// LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
627// It returns the byte index in s of the last Unicode
628// code point satisfying f(c), or -1 if none do.
629func LastIndexFunc(s []byte, f func(r rune) bool) int {
630	return lastIndexFunc(s, f, true)
631}
632
633// indexFunc is the same as IndexFunc except that if
634// truth==false, the sense of the predicate function is
635// inverted.
636func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
637	start := 0
638	for start < len(s) {
639		wid := 1
640		r := rune(s[start])
641		if r >= utf8.RuneSelf {
642			r, wid = utf8.DecodeRune(s[start:])
643		}
644		if f(r) == truth {
645			return start
646		}
647		start += wid
648	}
649	return -1
650}
651
652// lastIndexFunc is the same as LastIndexFunc except that if
653// truth==false, the sense of the predicate function is
654// inverted.
655func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
656	for i := len(s); i > 0; {
657		r, size := rune(s[i-1]), 1
658		if r >= utf8.RuneSelf {
659			r, size = utf8.DecodeLastRune(s[0:i])
660		}
661		i -= size
662		if f(r) == truth {
663			return i
664		}
665	}
666	return -1
667}
668
669// asciiSet is a 32-byte value, where each bit represents the presence of a
670// given ASCII character in the set. The 128-bits of the lower 16 bytes,
671// starting with the least-significant bit of the lowest word to the
672// most-significant bit of the highest word, map to the full range of all
673// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
674// ensuring that any non-ASCII character will be reported as not in the set.
675type asciiSet [8]uint32
676
677// makeASCIISet creates a set of ASCII characters and reports whether all
678// characters in chars are ASCII.
679func makeASCIISet(chars string) (as asciiSet, ok bool) {
680	for i := 0; i < len(chars); i++ {
681		c := chars[i]
682		if c >= utf8.RuneSelf {
683			return as, false
684		}
685		as[c>>5] |= 1 << uint(c&31)
686	}
687	return as, true
688}
689
690// contains reports whether c is inside the set.
691func (as *asciiSet) contains(c byte) bool {
692	return (as[c>>5] & (1 << uint(c&31))) != 0
693}
694
695func makeCutsetFunc(cutset string) func(r rune) bool {
696	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
697		return func(r rune) bool {
698			return r == rune(cutset[0])
699		}
700	}
701	if as, isASCII := makeASCIISet(cutset); isASCII {
702		return func(r rune) bool {
703			return r < utf8.RuneSelf && as.contains(byte(r))
704		}
705	}
706	return func(r rune) bool {
707		for _, c := range cutset {
708			if c == r {
709				return true
710			}
711		}
712		return false
713	}
714}
715
716// Trim returns a subslice of s by slicing off all leading and
717// trailing UTF-8-encoded code points contained in cutset.
718func Trim(s []byte, cutset string) []byte {
719	return TrimFunc(s, makeCutsetFunc(cutset))
720}
721
722// TrimLeft returns a subslice of s by slicing off all leading
723// UTF-8-encoded code points contained in cutset.
724func TrimLeft(s []byte, cutset string) []byte {
725	return TrimLeftFunc(s, makeCutsetFunc(cutset))
726}
727
728// TrimRight returns a subslice of s by slicing off all trailing
729// UTF-8-encoded code points that are contained in cutset.
730func TrimRight(s []byte, cutset string) []byte {
731	return TrimRightFunc(s, makeCutsetFunc(cutset))
732}
733
734// TrimSpace returns a subslice of s by slicing off all leading and
735// trailing white space, as defined by Unicode.
736func TrimSpace(s []byte) []byte {
737	return TrimFunc(s, unicode.IsSpace)
738}
739
740// Runes interprets s as a sequence of UTF-8-encoded code points.
741// It returns a slice of runes (Unicode code points) equivalent to s.
742func Runes(s []byte) []rune {
743	t := make([]rune, utf8.RuneCount(s))
744	i := 0
745	for len(s) > 0 {
746		r, l := utf8.DecodeRune(s)
747		t[i] = r
748		i++
749		s = s[l:]
750	}
751	return t
752}
753
754// Replace returns a copy of the slice s with the first n
755// non-overlapping instances of old replaced by new.
756// If old is empty, it matches at the beginning of the slice
757// and after each UTF-8 sequence, yielding up to k+1 replacements
758// for a k-rune slice.
759// If n < 0, there is no limit on the number of replacements.
760func Replace(s, old, new []byte, n int) []byte {
761	m := 0
762	if n != 0 {
763		// Compute number of replacements.
764		m = Count(s, old)
765	}
766	if m == 0 {
767		// Just return a copy.
768		return append([]byte(nil), s...)
769	}
770	if n < 0 || m < n {
771		n = m
772	}
773
774	// Apply replacements to buffer.
775	t := make([]byte, len(s)+n*(len(new)-len(old)))
776	w := 0
777	start := 0
778	for i := 0; i < n; i++ {
779		j := start
780		if len(old) == 0 {
781			if i > 0 {
782				_, wid := utf8.DecodeRune(s[start:])
783				j += wid
784			}
785		} else {
786			j += Index(s[start:], old)
787		}
788		w += copy(t[w:], s[start:j])
789		w += copy(t[w:], new)
790		start = j + len(old)
791	}
792	w += copy(t[w:], s[start:])
793	return t[0:w]
794}
795
796// ReplaceAll returns a copy of the slice s with all
797// non-overlapping instances of old replaced by new.
798// If old is empty, it matches at the beginning of the slice
799// and after each UTF-8 sequence, yielding up to k+1 replacements
800// for a k-rune slice.
801func ReplaceAll(s, old, new []byte) []byte {
802	return Replace(s, old, new, -1)
803}
804
805// EqualFold reports whether s and t, interpreted as UTF-8 strings,
806// are equal under Unicode case-folding.
807func EqualFold(s, t []byte) bool {
808	for len(s) != 0 && len(t) != 0 {
809		// Extract first rune from each.
810		var sr, tr rune
811		if s[0] < utf8.RuneSelf {
812			sr, s = rune(s[0]), s[1:]
813		} else {
814			r, size := utf8.DecodeRune(s)
815			sr, s = r, s[size:]
816		}
817		if t[0] < utf8.RuneSelf {
818			tr, t = rune(t[0]), t[1:]
819		} else {
820			r, size := utf8.DecodeRune(t)
821			tr, t = r, t[size:]
822		}
823
824		// If they match, keep going; if not, return false.
825
826		// Easy case.
827		if tr == sr {
828			continue
829		}
830
831		// Make sr < tr to simplify what follows.
832		if tr < sr {
833			tr, sr = sr, tr
834		}
835		// Fast check for ASCII.
836		if tr < utf8.RuneSelf {
837			// ASCII only, sr/tr must be upper/lower case
838			if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
839				continue
840			}
841			return false
842		}
843
844		// General case. SimpleFold(x) returns the next equivalent rune > x
845		// or wraps around to smaller values.
846		r := unicode.SimpleFold(sr)
847		for r != sr && r < tr {
848			r = unicode.SimpleFold(r)
849		}
850		if r == tr {
851			continue
852		}
853		return false
854	}
855
856	// One string is empty. Are both?
857	return len(s) == len(t)
858}
859
860// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
861func Index(s, sep []byte) int {
862	n := len(sep)
863	switch {
864	case n == 0:
865		return 0
866	case n == 1:
867		return IndexByte(s, sep[0])
868	case n == len(s):
869		if Equal(sep, s) {
870			return 0
871		}
872		return -1
873	case n > len(s):
874		return -1
875	case n <= bytealg.MaxLen:
876		// Use brute force when s and sep both are small
877		if len(s) <= bytealg.MaxBruteForce {
878			return bytealg.Index(s, sep)
879		}
880		c0 := sep[0]
881		c1 := sep[1]
882		i := 0
883		t := len(s) - n + 1
884		fails := 0
885		for i < t {
886			if s[i] != c0 {
887				// IndexByte is faster than bytealg.Index, so use it as long as
888				// we're not getting lots of false positives.
889				o := IndexByte(s[i:t], c0)
890				if o < 0 {
891					return -1
892				}
893				i += o
894			}
895			if s[i+1] == c1 && Equal(s[i:i+n], sep) {
896				return i
897			}
898			fails++
899			i++
900			// Switch to bytealg.Index when IndexByte produces too many false positives.
901			if fails > bytealg.Cutover(i) {
902				r := bytealg.Index(s[i:], sep)
903				if r >= 0 {
904					return r + i
905				}
906				return -1
907			}
908		}
909		return -1
910	}
911	c0 := sep[0]
912	c1 := sep[1]
913	i := 0
914	fails := 0
915	t := len(s) - n + 1
916	for i < t {
917		if s[i] != c0 {
918			o := IndexByte(s[i:t], c0)
919			if o < 0 {
920				break
921			}
922			i += o
923		}
924		if s[i+1] == c1 && Equal(s[i:i+n], sep) {
925			return i
926		}
927		i++
928		fails++
929		if fails >= 4+i>>4 && i < t {
930			// Give up on IndexByte, it isn't skipping ahead
931			// far enough to be better than Rabin-Karp.
932			// Experiments (using IndexPeriodic) suggest
933			// the cutover is about 16 byte skips.
934			// TODO: if large prefixes of sep are matching
935			// we should cutover at even larger average skips,
936			// because Equal becomes that much more expensive.
937			// This code does not take that effect into account.
938			j := indexRabinKarp(s[i:], sep)
939			if j < 0 {
940				return -1
941			}
942			return i + j
943		}
944	}
945	return -1
946}
947
948func indexRabinKarp(s, sep []byte) int {
949	// Rabin-Karp search
950	hashsep, pow := hashStr(sep)
951	n := len(sep)
952	var h uint32
953	for i := 0; i < n; i++ {
954		h = h*primeRK + uint32(s[i])
955	}
956	if h == hashsep && Equal(s[:n], sep) {
957		return 0
958	}
959	for i := n; i < len(s); {
960		h *= primeRK
961		h += uint32(s[i])
962		h -= pow * uint32(s[i-n])
963		i++
964		if h == hashsep && Equal(s[i-n:i], sep) {
965			return i - n
966		}
967	}
968	return -1
969}
970
971// primeRK is the prime base used in Rabin-Karp algorithm.
972const primeRK = 16777619
973
974// hashStr returns the hash and the appropriate multiplicative
975// factor for use in Rabin-Karp algorithm.
976func hashStr(sep []byte) (uint32, uint32) {
977	hash := uint32(0)
978	for i := 0; i < len(sep); i++ {
979		hash = hash*primeRK + uint32(sep[i])
980	}
981	var pow, sq uint32 = 1, primeRK
982	for i := len(sep); i > 0; i >>= 1 {
983		if i&1 != 0 {
984			pow *= sq
985		}
986		sq *= sq
987	}
988	return hash, pow
989}
990