1// Copyright 2015 Huan Du. All rights reserved. 2// Licensed under the MIT license that can be found in the LICENSE file. 3 4package xstrings 5 6import ( 7 "strings" 8 "unicode/utf8" 9) 10 11// Reverse a utf8 encoded string. 12func Reverse(str string) string { 13 var size int 14 15 tail := len(str) 16 buf := make([]byte, tail) 17 s := buf 18 19 for len(str) > 0 { 20 _, size = utf8.DecodeRuneInString(str) 21 tail -= size 22 s = append(s[:tail], []byte(str[:size])...) 23 str = str[size:] 24 } 25 26 return string(buf) 27} 28 29// Slice a string by rune. 30// 31// Start must satisfy 0 <= start <= rune length. 32// 33// End can be positive, zero or negative. 34// If end >= 0, start and end must satisfy start <= end <= rune length. 35// If end < 0, it means slice to the end of string. 36// 37// Otherwise, Slice will panic as out of range. 38func Slice(str string, start, end int) string { 39 var size, startPos, endPos int 40 41 origin := str 42 43 if start < 0 || end > len(str) || (end >= 0 && start > end) { 44 panic("out of range") 45 } 46 47 if end >= 0 { 48 end -= start 49 } 50 51 for start > 0 && len(str) > 0 { 52 _, size = utf8.DecodeRuneInString(str) 53 start-- 54 startPos += size 55 str = str[size:] 56 } 57 58 if end < 0 { 59 return origin[startPos:] 60 } 61 62 endPos = startPos 63 64 for end > 0 && len(str) > 0 { 65 _, size = utf8.DecodeRuneInString(str) 66 end-- 67 endPos += size 68 str = str[size:] 69 } 70 71 if len(str) == 0 && (start > 0 || end > 0) { 72 panic("out of range") 73 } 74 75 return origin[startPos:endPos] 76} 77 78// Partition splits a string by sep into three parts. 79// The return value is a slice of strings with head, match and tail. 80// 81// If str contains sep, for example "hello" and "l", Partition returns 82// "he", "l", "lo" 83// 84// If str doesn't contain sep, for example "hello" and "x", Partition returns 85// "hello", "", "" 86func Partition(str, sep string) (head, match, tail string) { 87 index := strings.Index(str, sep) 88 89 if index == -1 { 90 head = str 91 return 92 } 93 94 head = str[:index] 95 match = str[index : index+len(sep)] 96 tail = str[index+len(sep):] 97 return 98} 99 100// LastPartition splits a string by last instance of sep into three parts. 101// The return value is a slice of strings with head, match and tail. 102// 103// If str contains sep, for example "hello" and "l", LastPartition returns 104// "hel", "l", "o" 105// 106// If str doesn't contain sep, for example "hello" and "x", LastPartition returns 107// "", "", "hello" 108func LastPartition(str, sep string) (head, match, tail string) { 109 index := strings.LastIndex(str, sep) 110 111 if index == -1 { 112 tail = str 113 return 114 } 115 116 head = str[:index] 117 match = str[index : index+len(sep)] 118 tail = str[index+len(sep):] 119 return 120} 121 122// Insert src into dst at given rune index. 123// Index is counted by runes instead of bytes. 124// 125// If index is out of range of dst, panic with out of range. 126func Insert(dst, src string, index int) string { 127 return Slice(dst, 0, index) + src + Slice(dst, index, -1) 128} 129 130// Scrub scrubs invalid utf8 bytes with repl string. 131// Adjacent invalid bytes are replaced only once. 132func Scrub(str, repl string) string { 133 var buf *stringBuilder 134 var r rune 135 var size, pos int 136 var hasError bool 137 138 origin := str 139 140 for len(str) > 0 { 141 r, size = utf8.DecodeRuneInString(str) 142 143 if r == utf8.RuneError { 144 if !hasError { 145 if buf == nil { 146 buf = &stringBuilder{} 147 } 148 149 buf.WriteString(origin[:pos]) 150 hasError = true 151 } 152 } else if hasError { 153 hasError = false 154 buf.WriteString(repl) 155 156 origin = origin[pos:] 157 pos = 0 158 } 159 160 pos += size 161 str = str[size:] 162 } 163 164 if buf != nil { 165 buf.WriteString(origin) 166 return buf.String() 167 } 168 169 // No invalid byte. 170 return origin 171} 172 173// WordSplit splits a string into words. Returns a slice of words. 174// If there is no word in a string, return nil. 175// 176// Word is defined as a locale dependent string containing alphabetic characters, 177// which may also contain but not start with `'` and `-` characters. 178func WordSplit(str string) []string { 179 var word string 180 var words []string 181 var r rune 182 var size, pos int 183 184 inWord := false 185 186 for len(str) > 0 { 187 r, size = utf8.DecodeRuneInString(str) 188 189 switch { 190 case isAlphabet(r): 191 if !inWord { 192 inWord = true 193 word = str 194 pos = 0 195 } 196 197 case inWord && (r == '\'' || r == '-'): 198 // Still in word. 199 200 default: 201 if inWord { 202 inWord = false 203 words = append(words, word[:pos]) 204 } 205 } 206 207 pos += size 208 str = str[size:] 209 } 210 211 if inWord { 212 words = append(words, word[:pos]) 213 } 214 215 return words 216} 217