1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package strings implements simple functions to manipulate UTF-8 encoded strings. 6// 7// For information about UTF-8 strings in Go, see https://blog.golang.org/strings. 8package strings 9 10import ( 11 "internal/bytealg" 12 "unicode" 13 "unicode/utf8" 14) 15 16// explode splits s into a slice of UTF-8 strings, 17// one string per Unicode character up to a maximum of n (n < 0 means no limit). 18// Invalid UTF-8 sequences become correct encodings of U+FFFD. 19func explode(s string, n int) []string { 20 l := utf8.RuneCountInString(s) 21 if n < 0 || n > l { 22 n = l 23 } 24 a := make([]string, n) 25 for i := 0; i < n-1; i++ { 26 ch, size := utf8.DecodeRuneInString(s) 27 a[i] = s[:size] 28 s = s[size:] 29 if ch == utf8.RuneError { 30 a[i] = string(utf8.RuneError) 31 } 32 } 33 if n > 0 { 34 a[n-1] = s 35 } 36 return a 37} 38 39// primeRK is the prime base used in Rabin-Karp algorithm. 40const primeRK = 16777619 41 42// hashStr returns the hash and the appropriate multiplicative 43// factor for use in Rabin-Karp algorithm. 44func hashStr(sep string) (uint32, uint32) { 45 hash := uint32(0) 46 for i := 0; i < len(sep); i++ { 47 hash = hash*primeRK + uint32(sep[i]) 48 } 49 var pow, sq uint32 = 1, primeRK 50 for i := len(sep); i > 0; i >>= 1 { 51 if i&1 != 0 { 52 pow *= sq 53 } 54 sq *= sq 55 } 56 return hash, pow 57} 58 59// hashStrRev returns the hash of the reverse of sep and the 60// appropriate multiplicative factor for use in Rabin-Karp algorithm. 61func hashStrRev(sep string) (uint32, uint32) { 62 hash := uint32(0) 63 for i := len(sep) - 1; i >= 0; i-- { 64 hash = hash*primeRK + uint32(sep[i]) 65 } 66 var pow, sq uint32 = 1, primeRK 67 for i := len(sep); i > 0; i >>= 1 { 68 if i&1 != 0 { 69 pow *= sq 70 } 71 sq *= sq 72 } 73 return hash, pow 74} 75 76// Count counts the number of non-overlapping instances of substr in s. 77// If substr is an empty string, Count returns 1 + the number of Unicode code points in s. 78func Count(s, substr string) int { 79 // special case 80 if len(substr) == 0 { 81 return utf8.RuneCountInString(s) + 1 82 } 83 if len(substr) == 1 { 84 return bytealg.CountString(s, substr[0]) 85 } 86 n := 0 87 for { 88 i := Index(s, substr) 89 if i == -1 { 90 return n 91 } 92 n++ 93 s = s[i+len(substr):] 94 } 95} 96 97// Contains reports whether substr is within s. 98func Contains(s, substr string) bool { 99 return Index(s, substr) >= 0 100} 101 102// ContainsAny reports whether any Unicode code points in chars are within s. 103func ContainsAny(s, chars string) bool { 104 return IndexAny(s, chars) >= 0 105} 106 107// ContainsRune reports whether the Unicode code point r is within s. 108func ContainsRune(s string, r rune) bool { 109 return IndexRune(s, r) >= 0 110} 111 112// LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. 113func LastIndex(s, substr string) int { 114 n := len(substr) 115 switch { 116 case n == 0: 117 return len(s) 118 case n == 1: 119 return LastIndexByte(s, substr[0]) 120 case n == len(s): 121 if substr == s { 122 return 0 123 } 124 return -1 125 case n > len(s): 126 return -1 127 } 128 // Rabin-Karp search from the end of the string 129 hashss, pow := hashStrRev(substr) 130 last := len(s) - n 131 var h uint32 132 for i := len(s) - 1; i >= last; i-- { 133 h = h*primeRK + uint32(s[i]) 134 } 135 if h == hashss && s[last:] == substr { 136 return last 137 } 138 for i := last - 1; i >= 0; i-- { 139 h *= primeRK 140 h += uint32(s[i]) 141 h -= pow * uint32(s[i+n]) 142 if h == hashss && s[i:i+n] == substr { 143 return i 144 } 145 } 146 return -1 147} 148 149// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s. 150func IndexByte(s string, c byte) int { 151 return bytealg.IndexByteString(s, c) 152} 153 154// IndexRune returns the index of the first instance of the Unicode code point 155// r, or -1 if rune is not present in s. 156// If r is utf8.RuneError, it returns the first instance of any 157// invalid UTF-8 byte sequence. 158func IndexRune(s string, r rune) int { 159 switch { 160 case 0 <= r && r < utf8.RuneSelf: 161 return IndexByte(s, byte(r)) 162 case r == utf8.RuneError: 163 for i, r := range s { 164 if r == utf8.RuneError { 165 return i 166 } 167 } 168 return -1 169 case !utf8.ValidRune(r): 170 return -1 171 default: 172 return Index(s, string(r)) 173 } 174} 175 176// IndexAny returns the index of the first instance of any Unicode code point 177// from chars in s, or -1 if no Unicode code point from chars is present in s. 178func IndexAny(s, chars string) int { 179 if chars == "" { 180 // Avoid scanning all of s. 181 return -1 182 } 183 if len(s) > 8 { 184 if as, isASCII := makeASCIISet(chars); isASCII { 185 for i := 0; i < len(s); i++ { 186 if as.contains(s[i]) { 187 return i 188 } 189 } 190 return -1 191 } 192 } 193 for i, c := range s { 194 for _, m := range chars { 195 if c == m { 196 return i 197 } 198 } 199 } 200 return -1 201} 202 203// LastIndexAny returns the index of the last instance of any Unicode code 204// point from chars in s, or -1 if no Unicode code point from chars is 205// present in s. 206func LastIndexAny(s, chars string) int { 207 if chars == "" { 208 // Avoid scanning all of s. 209 return -1 210 } 211 if len(s) > 8 { 212 if as, isASCII := makeASCIISet(chars); isASCII { 213 for i := len(s) - 1; i >= 0; i-- { 214 if as.contains(s[i]) { 215 return i 216 } 217 } 218 return -1 219 } 220 } 221 for i := len(s); i > 0; { 222 r, size := utf8.DecodeLastRuneInString(s[:i]) 223 i -= size 224 for _, c := range chars { 225 if r == c { 226 return i 227 } 228 } 229 } 230 return -1 231} 232 233// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 234func LastIndexByte(s string, c byte) int { 235 for i := len(s) - 1; i >= 0; i-- { 236 if s[i] == c { 237 return i 238 } 239 } 240 return -1 241} 242 243// Generic split: splits after each instance of sep, 244// including sepSave bytes of sep in the subarrays. 245func genSplit(s, sep string, sepSave, n int) []string { 246 if n == 0 { 247 return nil 248 } 249 if sep == "" { 250 return explode(s, n) 251 } 252 if n < 0 { 253 n = Count(s, sep) + 1 254 } 255 256 a := make([]string, n) 257 n-- 258 i := 0 259 for i < n { 260 m := Index(s, sep) 261 if m < 0 { 262 break 263 } 264 a[i] = s[:m+sepSave] 265 s = s[m+len(sep):] 266 i++ 267 } 268 a[i] = s 269 return a[:i+1] 270} 271 272// SplitN slices s into substrings separated by sep and returns a slice of 273// the substrings between those separators. 274// 275// The count determines the number of substrings to return: 276// n > 0: at most n substrings; the last substring will be the unsplit remainder. 277// n == 0: the result is nil (zero substrings) 278// n < 0: all substrings 279// 280// Edge cases for s and sep (for example, empty strings) are handled 281// as described in the documentation for Split. 282func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 283 284// SplitAfterN slices s into substrings after each instance of sep and 285// returns a slice of those substrings. 286// 287// The count determines the number of substrings to return: 288// n > 0: at most n substrings; the last substring will be the unsplit remainder. 289// n == 0: the result is nil (zero substrings) 290// n < 0: all substrings 291// 292// Edge cases for s and sep (for example, empty strings) are handled 293// as described in the documentation for SplitAfter. 294func SplitAfterN(s, sep string, n int) []string { 295 return genSplit(s, sep, len(sep), n) 296} 297 298// Split slices s into all substrings separated by sep and returns a slice of 299// the substrings between those separators. 300// 301// If s does not contain sep and sep is not empty, Split returns a 302// slice of length 1 whose only element is s. 303// 304// If sep is empty, Split splits after each UTF-8 sequence. If both s 305// and sep are empty, Split returns an empty slice. 306// 307// It is equivalent to SplitN with a count of -1. 308func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 309 310// SplitAfter slices s into all substrings after each instance of sep and 311// returns a slice of those substrings. 312// 313// If s does not contain sep and sep is not empty, SplitAfter returns 314// a slice of length 1 whose only element is s. 315// 316// If sep is empty, SplitAfter splits after each UTF-8 sequence. If 317// both s and sep are empty, SplitAfter returns an empty slice. 318// 319// It is equivalent to SplitAfterN with a count of -1. 320func SplitAfter(s, sep string) []string { 321 return genSplit(s, sep, len(sep), -1) 322} 323 324var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 325 326// Fields splits the string s around each instance of one or more consecutive white space 327// characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 328// empty slice if s contains only white space. 329func Fields(s string) []string { 330 // First count the fields. 331 // This is an exact count if s is ASCII, otherwise it is an approximation. 332 n := 0 333 wasSpace := 1 334 // setBits is used to track which bits are set in the bytes of s. 335 setBits := uint8(0) 336 for i := 0; i < len(s); i++ { 337 r := s[i] 338 setBits |= r 339 isSpace := int(asciiSpace[r]) 340 n += wasSpace & ^isSpace 341 wasSpace = isSpace 342 } 343 344 if setBits < utf8.RuneSelf { // ASCII fast path 345 a := make([]string, n) 346 na := 0 347 fieldStart := 0 348 i := 0 349 // Skip spaces in the front of the input. 350 for i < len(s) && asciiSpace[s[i]] != 0 { 351 i++ 352 } 353 fieldStart = i 354 for i < len(s) { 355 if asciiSpace[s[i]] == 0 { 356 i++ 357 continue 358 } 359 a[na] = s[fieldStart:i] 360 na++ 361 i++ 362 // Skip spaces in between fields. 363 for i < len(s) && asciiSpace[s[i]] != 0 { 364 i++ 365 } 366 fieldStart = i 367 } 368 if fieldStart < len(s) { // Last field might end at EOF. 369 a[na] = s[fieldStart:] 370 } 371 return a 372 } 373 374 // Some runes in the input string are not ASCII. 375 return FieldsFunc(s, unicode.IsSpace) 376} 377 378// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 379// and returns an array of slices of s. If all code points in s satisfy f(c) or the 380// string is empty, an empty slice is returned. 381// FieldsFunc makes no guarantees about the order in which it calls f(c). 382// If f does not return consistent results for a given c, FieldsFunc may crash. 383func FieldsFunc(s string, f func(rune) bool) []string { 384 // A span is used to record a slice of s of the form s[start:end]. 385 // The start index is inclusive and the end index is exclusive. 386 type span struct { 387 start int 388 end int 389 } 390 spans := make([]span, 0, 32) 391 392 // Find the field start and end indices. 393 wasField := false 394 fromIndex := 0 395 for i, rune := range s { 396 if f(rune) { 397 if wasField { 398 spans = append(spans, span{start: fromIndex, end: i}) 399 wasField = false 400 } 401 } else { 402 if !wasField { 403 fromIndex = i 404 wasField = true 405 } 406 } 407 } 408 409 // Last field might end at EOF. 410 if wasField { 411 spans = append(spans, span{fromIndex, len(s)}) 412 } 413 414 // Create strings from recorded field indices. 415 a := make([]string, len(spans)) 416 for i, span := range spans { 417 a[i] = s[span.start:span.end] 418 } 419 420 return a 421} 422 423// Join concatenates the elements of a to create a single string. The separator string 424// sep is placed between elements in the resulting string. 425func Join(a []string, sep string) string { 426 switch len(a) { 427 case 0: 428 return "" 429 case 1: 430 return a[0] 431 } 432 n := len(sep) * (len(a) - 1) 433 for i := 0; i < len(a); i++ { 434 n += len(a[i]) 435 } 436 437 var b Builder 438 b.Grow(n) 439 b.WriteString(a[0]) 440 for _, s := range a[1:] { 441 b.WriteString(sep) 442 b.WriteString(s) 443 } 444 return b.String() 445} 446 447// HasPrefix tests whether the string s begins with prefix. 448func HasPrefix(s, prefix string) bool { 449 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 450} 451 452// HasSuffix tests whether the string s ends with suffix. 453func HasSuffix(s, suffix string) bool { 454 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 455} 456 457// Map returns a copy of the string s with all its characters modified 458// according to the mapping function. If mapping returns a negative value, the character is 459// dropped from the string with no replacement. 460func Map(mapping func(rune) rune, s string) string { 461 // In the worst case, the string can grow when mapped, making 462 // things unpleasant. But it's so rare we barge in assuming it's 463 // fine. It could also shrink but that falls out naturally. 464 465 // The output buffer b is initialized on demand, the first 466 // time a character differs. 467 var b Builder 468 469 for i, c := range s { 470 r := mapping(c) 471 if r == c && c != utf8.RuneError { 472 continue 473 } 474 475 var width int 476 if c == utf8.RuneError { 477 c, width = utf8.DecodeRuneInString(s[i:]) 478 if width != 1 && r == c { 479 continue 480 } 481 } else { 482 width = utf8.RuneLen(c) 483 } 484 485 b.Grow(len(s) + utf8.UTFMax) 486 b.WriteString(s[:i]) 487 if r >= 0 { 488 b.WriteRune(r) 489 } 490 491 s = s[i+width:] 492 break 493 } 494 495 // Fast path for unchanged input 496 if b.Cap() == 0 { // didn't call b.Grow above 497 return s 498 } 499 500 for _, c := range s { 501 r := mapping(c) 502 503 if r >= 0 { 504 // common case 505 // Due to inlining, it is more performant to determine if WriteByte should be 506 // invoked rather than always call WriteRune 507 if r < utf8.RuneSelf { 508 b.WriteByte(byte(r)) 509 } else { 510 // r is not a ASCII rune. 511 b.WriteRune(r) 512 } 513 } 514 } 515 516 return b.String() 517} 518 519// Repeat returns a new string consisting of count copies of the string s. 520// 521// It panics if count is negative or if 522// the result of (len(s) * count) overflows. 523func Repeat(s string, count int) string { 524 if count == 0 { 525 return "" 526 } 527 528 // Since we cannot return an error on overflow, 529 // we should panic if the repeat will generate 530 // an overflow. 531 // See Issue golang.org/issue/16237 532 if count < 0 { 533 panic("strings: negative Repeat count") 534 } else if len(s)*count/count != len(s) { 535 panic("strings: Repeat count causes overflow") 536 } 537 538 n := len(s) * count 539 var b Builder 540 b.Grow(n) 541 b.WriteString(s) 542 for b.Len() < n { 543 if b.Len() <= n/2 { 544 b.WriteString(b.String()) 545 } else { 546 b.WriteString(b.String()[:n-b.Len()]) 547 break 548 } 549 } 550 return b.String() 551} 552 553// ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case. 554func ToUpper(s string) string { 555 isASCII, hasLower := true, false 556 for i := 0; i < len(s); i++ { 557 c := s[i] 558 if c >= utf8.RuneSelf { 559 isASCII = false 560 break 561 } 562 hasLower = hasLower || (c >= 'a' && c <= 'z') 563 } 564 565 if isASCII { // optimize for ASCII-only strings. 566 if !hasLower { 567 return s 568 } 569 var b Builder 570 b.Grow(len(s)) 571 for i := 0; i < len(s); i++ { 572 c := s[i] 573 if c >= 'a' && c <= 'z' { 574 c -= 'a' - 'A' 575 } 576 b.WriteByte(c) 577 } 578 return b.String() 579 } 580 return Map(unicode.ToUpper, s) 581} 582 583// ToLower returns a copy of the string s with all Unicode letters mapped to their lower case. 584func ToLower(s string) string { 585 isASCII, hasUpper := true, false 586 for i := 0; i < len(s); i++ { 587 c := s[i] 588 if c >= utf8.RuneSelf { 589 isASCII = false 590 break 591 } 592 hasUpper = hasUpper || (c >= 'A' && c <= 'Z') 593 } 594 595 if isASCII { // optimize for ASCII-only strings. 596 if !hasUpper { 597 return s 598 } 599 var b Builder 600 b.Grow(len(s)) 601 for i := 0; i < len(s); i++ { 602 c := s[i] 603 if c >= 'A' && c <= 'Z' { 604 c += 'a' - 'A' 605 } 606 b.WriteByte(c) 607 } 608 return b.String() 609 } 610 return Map(unicode.ToLower, s) 611} 612 613// ToTitle returns a copy of the string s with all Unicode letters mapped to their title case. 614func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 615 616// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 617// upper case using the case mapping specified by c. 618func ToUpperSpecial(c unicode.SpecialCase, s string) string { 619 return Map(c.ToUpper, s) 620} 621 622// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 623// lower case using the case mapping specified by c. 624func ToLowerSpecial(c unicode.SpecialCase, s string) string { 625 return Map(c.ToLower, s) 626} 627 628// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 629// title case, giving priority to the special casing rules. 630func ToTitleSpecial(c unicode.SpecialCase, s string) string { 631 return Map(c.ToTitle, s) 632} 633 634// isSeparator reports whether the rune could mark a word boundary. 635// TODO: update when package unicode captures more of the properties. 636func isSeparator(r rune) bool { 637 // ASCII alphanumerics and underscore are not separators 638 if r <= 0x7F { 639 switch { 640 case '0' <= r && r <= '9': 641 return false 642 case 'a' <= r && r <= 'z': 643 return false 644 case 'A' <= r && r <= 'Z': 645 return false 646 case r == '_': 647 return false 648 } 649 return true 650 } 651 // Letters and digits are not separators 652 if unicode.IsLetter(r) || unicode.IsDigit(r) { 653 return false 654 } 655 // Otherwise, all we can do for now is treat spaces as separators. 656 return unicode.IsSpace(r) 657} 658 659// Title returns a copy of the string s with all Unicode letters that begin words 660// mapped to their title case. 661// 662// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 663func Title(s string) string { 664 // Use a closure here to remember state. 665 // Hackish but effective. Depends on Map scanning in order and calling 666 // the closure once per rune. 667 prev := ' ' 668 return Map( 669 func(r rune) rune { 670 if isSeparator(prev) { 671 prev = r 672 return unicode.ToTitle(r) 673 } 674 prev = r 675 return r 676 }, 677 s) 678} 679 680// TrimLeftFunc returns a slice of the string s with all leading 681// Unicode code points c satisfying f(c) removed. 682func TrimLeftFunc(s string, f func(rune) bool) string { 683 i := indexFunc(s, f, false) 684 if i == -1 { 685 return "" 686 } 687 return s[i:] 688} 689 690// TrimRightFunc returns a slice of the string s with all trailing 691// Unicode code points c satisfying f(c) removed. 692func TrimRightFunc(s string, f func(rune) bool) string { 693 i := lastIndexFunc(s, f, false) 694 if i >= 0 && s[i] >= utf8.RuneSelf { 695 _, wid := utf8.DecodeRuneInString(s[i:]) 696 i += wid 697 } else { 698 i++ 699 } 700 return s[0:i] 701} 702 703// TrimFunc returns a slice of the string s with all leading 704// and trailing Unicode code points c satisfying f(c) removed. 705func TrimFunc(s string, f func(rune) bool) string { 706 return TrimRightFunc(TrimLeftFunc(s, f), f) 707} 708 709// IndexFunc returns the index into s of the first Unicode 710// code point satisfying f(c), or -1 if none do. 711func IndexFunc(s string, f func(rune) bool) int { 712 return indexFunc(s, f, true) 713} 714 715// LastIndexFunc returns the index into s of the last 716// Unicode code point satisfying f(c), or -1 if none do. 717func LastIndexFunc(s string, f func(rune) bool) int { 718 return lastIndexFunc(s, f, true) 719} 720 721// indexFunc is the same as IndexFunc except that if 722// truth==false, the sense of the predicate function is 723// inverted. 724func indexFunc(s string, f func(rune) bool, truth bool) int { 725 for i, r := range s { 726 if f(r) == truth { 727 return i 728 } 729 } 730 return -1 731} 732 733// lastIndexFunc is the same as LastIndexFunc except that if 734// truth==false, the sense of the predicate function is 735// inverted. 736func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 737 for i := len(s); i > 0; { 738 r, size := utf8.DecodeLastRuneInString(s[0:i]) 739 i -= size 740 if f(r) == truth { 741 return i 742 } 743 } 744 return -1 745} 746 747// asciiSet is a 32-byte value, where each bit represents the presence of a 748// given ASCII character in the set. The 128-bits of the lower 16 bytes, 749// starting with the least-significant bit of the lowest word to the 750// most-significant bit of the highest word, map to the full range of all 751// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 752// ensuring that any non-ASCII character will be reported as not in the set. 753type asciiSet [8]uint32 754 755// makeASCIISet creates a set of ASCII characters and reports whether all 756// characters in chars are ASCII. 757func makeASCIISet(chars string) (as asciiSet, ok bool) { 758 for i := 0; i < len(chars); i++ { 759 c := chars[i] 760 if c >= utf8.RuneSelf { 761 return as, false 762 } 763 as[c>>5] |= 1 << uint(c&31) 764 } 765 return as, true 766} 767 768// contains reports whether c is inside the set. 769func (as *asciiSet) contains(c byte) bool { 770 return (as[c>>5] & (1 << uint(c&31))) != 0 771} 772 773func makeCutsetFunc(cutset string) func(rune) bool { 774 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 775 return func(r rune) bool { 776 return r == rune(cutset[0]) 777 } 778 } 779 if as, isASCII := makeASCIISet(cutset); isASCII { 780 return func(r rune) bool { 781 return r < utf8.RuneSelf && as.contains(byte(r)) 782 } 783 } 784 return func(r rune) bool { return IndexRune(cutset, r) >= 0 } 785} 786 787// Trim returns a slice of the string s with all leading and 788// trailing Unicode code points contained in cutset removed. 789func Trim(s string, cutset string) string { 790 if s == "" || cutset == "" { 791 return s 792 } 793 return TrimFunc(s, makeCutsetFunc(cutset)) 794} 795 796// TrimLeft returns a slice of the string s with all leading 797// Unicode code points contained in cutset removed. 798// 799// To remove a prefix, use TrimPrefix instead. 800func TrimLeft(s string, cutset string) string { 801 if s == "" || cutset == "" { 802 return s 803 } 804 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 805} 806 807// TrimRight returns a slice of the string s, with all trailing 808// Unicode code points contained in cutset removed. 809// 810// To remove a suffix, use TrimSuffix instead. 811func TrimRight(s string, cutset string) string { 812 if s == "" || cutset == "" { 813 return s 814 } 815 return TrimRightFunc(s, makeCutsetFunc(cutset)) 816} 817 818// TrimSpace returns a slice of the string s, with all leading 819// and trailing white space removed, as defined by Unicode. 820func TrimSpace(s string) string { 821 return TrimFunc(s, unicode.IsSpace) 822} 823 824// TrimPrefix returns s without the provided leading prefix string. 825// If s doesn't start with prefix, s is returned unchanged. 826func TrimPrefix(s, prefix string) string { 827 if HasPrefix(s, prefix) { 828 return s[len(prefix):] 829 } 830 return s 831} 832 833// TrimSuffix returns s without the provided trailing suffix string. 834// If s doesn't end with suffix, s is returned unchanged. 835func TrimSuffix(s, suffix string) string { 836 if HasSuffix(s, suffix) { 837 return s[:len(s)-len(suffix)] 838 } 839 return s 840} 841 842// Replace returns a copy of the string s with the first n 843// non-overlapping instances of old replaced by new. 844// If old is empty, it matches at the beginning of the string 845// and after each UTF-8 sequence, yielding up to k+1 replacements 846// for a k-rune string. 847// If n < 0, there is no limit on the number of replacements. 848func Replace(s, old, new string, n int) string { 849 if old == new || n == 0 { 850 return s // avoid allocation 851 } 852 853 // Compute number of replacements. 854 if m := Count(s, old); m == 0 { 855 return s // avoid allocation 856 } else if n < 0 || m < n { 857 n = m 858 } 859 860 // Apply replacements to buffer. 861 t := make([]byte, len(s)+n*(len(new)-len(old))) 862 w := 0 863 start := 0 864 for i := 0; i < n; i++ { 865 j := start 866 if len(old) == 0 { 867 if i > 0 { 868 _, wid := utf8.DecodeRuneInString(s[start:]) 869 j += wid 870 } 871 } else { 872 j += Index(s[start:], old) 873 } 874 w += copy(t[w:], s[start:j]) 875 w += copy(t[w:], new) 876 start = j + len(old) 877 } 878 w += copy(t[w:], s[start:]) 879 return string(t[0:w]) 880} 881 882// ReplaceAll returns a copy of the string s with all 883// non-overlapping instances of old replaced by new. 884// If old is empty, it matches at the beginning of the string 885// and after each UTF-8 sequence, yielding up to k+1 replacements 886// for a k-rune string. 887func ReplaceAll(s, old, new string) string { 888 return Replace(s, old, new, -1) 889} 890 891// EqualFold reports whether s and t, interpreted as UTF-8 strings, 892// are equal under Unicode case-folding. 893func EqualFold(s, t string) bool { 894 for s != "" && t != "" { 895 // Extract first rune from each string. 896 var sr, tr rune 897 if s[0] < utf8.RuneSelf { 898 sr, s = rune(s[0]), s[1:] 899 } else { 900 r, size := utf8.DecodeRuneInString(s) 901 sr, s = r, s[size:] 902 } 903 if t[0] < utf8.RuneSelf { 904 tr, t = rune(t[0]), t[1:] 905 } else { 906 r, size := utf8.DecodeRuneInString(t) 907 tr, t = r, t[size:] 908 } 909 910 // If they match, keep going; if not, return false. 911 912 // Easy case. 913 if tr == sr { 914 continue 915 } 916 917 // Make sr < tr to simplify what follows. 918 if tr < sr { 919 tr, sr = sr, tr 920 } 921 // Fast check for ASCII. 922 if tr < utf8.RuneSelf { 923 // ASCII only, sr/tr must be upper/lower case 924 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 925 continue 926 } 927 return false 928 } 929 930 // General case. SimpleFold(x) returns the next equivalent rune > x 931 // or wraps around to smaller values. 932 r := unicode.SimpleFold(sr) 933 for r != sr && r < tr { 934 r = unicode.SimpleFold(r) 935 } 936 if r == tr { 937 continue 938 } 939 return false 940 } 941 942 // One string is empty. Are both? 943 return s == t 944} 945 946// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. 947func Index(s, substr string) int { 948 n := len(substr) 949 switch { 950 case n == 0: 951 return 0 952 case n == 1: 953 return IndexByte(s, substr[0]) 954 case n == len(s): 955 if substr == s { 956 return 0 957 } 958 return -1 959 case n > len(s): 960 return -1 961 case n <= bytealg.MaxLen: 962 // Use brute force when s and substr both are small 963 if len(s) <= bytealg.MaxBruteForce { 964 return bytealg.IndexString(s, substr) 965 } 966 c0 := substr[0] 967 c1 := substr[1] 968 i := 0 969 t := len(s) - n + 1 970 fails := 0 971 for i < t { 972 if s[i] != c0 { 973 // IndexByte is faster than bytealg.IndexString, so use it as long as 974 // we're not getting lots of false positives. 975 o := IndexByte(s[i:t], c0) 976 if o < 0 { 977 return -1 978 } 979 i += o 980 } 981 if s[i+1] == c1 && s[i:i+n] == substr { 982 return i 983 } 984 fails++ 985 i++ 986 // Switch to bytealg.IndexString when IndexByte produces too many false positives. 987 if fails > bytealg.Cutover(i) { 988 r := bytealg.IndexString(s[i:], substr) 989 if r >= 0 { 990 return r + i 991 } 992 return -1 993 } 994 } 995 return -1 996 } 997 c0 := substr[0] 998 c1 := substr[1] 999 i := 0 1000 t := len(s) - n + 1 1001 fails := 0 1002 for i < t { 1003 if s[i] != c0 { 1004 o := IndexByte(s[i:t], c0) 1005 if o < 0 { 1006 return -1 1007 } 1008 i += o 1009 } 1010 if s[i+1] == c1 && s[i:i+n] == substr { 1011 return i 1012 } 1013 i++ 1014 fails++ 1015 if fails >= 4+i>>4 && i < t { 1016 // See comment in ../bytes/bytes_generic.go. 1017 j := indexRabinKarp(s[i:], substr) 1018 if j < 0 { 1019 return -1 1020 } 1021 return i + j 1022 } 1023 } 1024 return -1 1025} 1026 1027func indexRabinKarp(s, substr string) int { 1028 // Rabin-Karp search 1029 hashss, pow := hashStr(substr) 1030 n := len(substr) 1031 var h uint32 1032 for i := 0; i < n; i++ { 1033 h = h*primeRK + uint32(s[i]) 1034 } 1035 if h == hashss && s[:n] == substr { 1036 return 0 1037 } 1038 for i := n; i < len(s); { 1039 h *= primeRK 1040 h += uint32(s[i]) 1041 h -= pow * uint32(s[i-n]) 1042 i++ 1043 if h == hashss && s[i-n:i] == substr { 1044 return i - n 1045 } 1046 } 1047 return -1 1048} 1049