1// Copyright 2015 Huan Du. All rights reserved.
2// Licensed under the MIT license that can be found in the LICENSE file.
3
4package xstrings
5
6import (
7	"strings"
8	"unicode/utf8"
9)
10
11// Reverse a utf8 encoded string.
12func Reverse(str string) string {
13	var size int
14
15	tail := len(str)
16	buf := make([]byte, tail)
17	s := buf
18
19	for len(str) > 0 {
20		_, size = utf8.DecodeRuneInString(str)
21		tail -= size
22		s = append(s[:tail], []byte(str[:size])...)
23		str = str[size:]
24	}
25
26	return string(buf)
27}
28
29// Slice a string by rune.
30//
31// Start must satisfy 0 <= start <= rune length.
32//
33// End can be positive, zero or negative.
34// If end >= 0, start and end must satisfy start <= end <= rune length.
35// If end < 0, it means slice to the end of string.
36//
37// Otherwise, Slice will panic as out of range.
38func Slice(str string, start, end int) string {
39	var size, startPos, endPos int
40
41	origin := str
42
43	if start < 0 || end > len(str) || (end >= 0 && start > end) {
44		panic("out of range")
45	}
46
47	if end >= 0 {
48		end -= start
49	}
50
51	for start > 0 && len(str) > 0 {
52		_, size = utf8.DecodeRuneInString(str)
53		start--
54		startPos += size
55		str = str[size:]
56	}
57
58	if end < 0 {
59		return origin[startPos:]
60	}
61
62	endPos = startPos
63
64	for end > 0 && len(str) > 0 {
65		_, size = utf8.DecodeRuneInString(str)
66		end--
67		endPos += size
68		str = str[size:]
69	}
70
71	if len(str) == 0 && (start > 0 || end > 0) {
72		panic("out of range")
73	}
74
75	return origin[startPos:endPos]
76}
77
78// Partition splits a string by sep into three parts.
79// The return value is a slice of strings with head, match and tail.
80//
81// If str contains sep, for example "hello" and "l", Partition returns
82//     "he", "l", "lo"
83//
84// If str doesn't contain sep, for example "hello" and "x", Partition returns
85//     "hello", "", ""
86func Partition(str, sep string) (head, match, tail string) {
87	index := strings.Index(str, sep)
88
89	if index == -1 {
90		head = str
91		return
92	}
93
94	head = str[:index]
95	match = str[index : index+len(sep)]
96	tail = str[index+len(sep):]
97	return
98}
99
100// LastPartition splits a string by last instance of sep into three parts.
101// The return value is a slice of strings with head, match and tail.
102//
103// If str contains sep, for example "hello" and "l", LastPartition returns
104//     "hel", "l", "o"
105//
106// If str doesn't contain sep, for example "hello" and "x", LastPartition returns
107//     "", "", "hello"
108func LastPartition(str, sep string) (head, match, tail string) {
109	index := strings.LastIndex(str, sep)
110
111	if index == -1 {
112		tail = str
113		return
114	}
115
116	head = str[:index]
117	match = str[index : index+len(sep)]
118	tail = str[index+len(sep):]
119	return
120}
121
122// Insert src into dst at given rune index.
123// Index is counted by runes instead of bytes.
124//
125// If index is out of range of dst, panic with out of range.
126func Insert(dst, src string, index int) string {
127	return Slice(dst, 0, index) + src + Slice(dst, index, -1)
128}
129
130// Scrub scrubs invalid utf8 bytes with repl string.
131// Adjacent invalid bytes are replaced only once.
132func Scrub(str, repl string) string {
133	var buf *stringBuilder
134	var r rune
135	var size, pos int
136	var hasError bool
137
138	origin := str
139
140	for len(str) > 0 {
141		r, size = utf8.DecodeRuneInString(str)
142
143		if r == utf8.RuneError {
144			if !hasError {
145				if buf == nil {
146					buf = &stringBuilder{}
147				}
148
149				buf.WriteString(origin[:pos])
150				hasError = true
151			}
152		} else if hasError {
153			hasError = false
154			buf.WriteString(repl)
155
156			origin = origin[pos:]
157			pos = 0
158		}
159
160		pos += size
161		str = str[size:]
162	}
163
164	if buf != nil {
165		buf.WriteString(origin)
166		return buf.String()
167	}
168
169	// No invalid byte.
170	return origin
171}
172
173// WordSplit splits a string into words. Returns a slice of words.
174// If there is no word in a string, return nil.
175//
176// Word is defined as a locale dependent string containing alphabetic characters,
177// which may also contain but not start with `'` and `-` characters.
178func WordSplit(str string) []string {
179	var word string
180	var words []string
181	var r rune
182	var size, pos int
183
184	inWord := false
185
186	for len(str) > 0 {
187		r, size = utf8.DecodeRuneInString(str)
188
189		switch {
190		case isAlphabet(r):
191			if !inWord {
192				inWord = true
193				word = str
194				pos = 0
195			}
196
197		case inWord && (r == '\'' || r == '-'):
198			// Still in word.
199
200		default:
201			if inWord {
202				inWord = false
203				words = append(words, word[:pos])
204			}
205		}
206
207		pos += size
208		str = str[size:]
209	}
210
211	if inWord {
212		words = append(words, word[:pos])
213	}
214
215	return words
216}
217