1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package strings implements simple functions to manipulate UTF-8 encoded strings. 6// 7// For information about UTF-8 strings in Go, see https://blog.golang.org/strings. 8package strings 9 10import ( 11 "internal/bytealg" 12 "unicode" 13 "unicode/utf8" 14) 15 16// explode splits s into a slice of UTF-8 strings, 17// one string per Unicode character up to a maximum of n (n < 0 means no limit). 18// Invalid UTF-8 sequences become correct encodings of U+FFFD. 19func explode(s string, n int) []string { 20 l := utf8.RuneCountInString(s) 21 if n < 0 || n > l { 22 n = l 23 } 24 a := make([]string, n) 25 for i := 0; i < n-1; i++ { 26 ch, size := utf8.DecodeRuneInString(s) 27 a[i] = s[:size] 28 s = s[size:] 29 if ch == utf8.RuneError { 30 a[i] = string(utf8.RuneError) 31 } 32 } 33 if n > 0 { 34 a[n-1] = s 35 } 36 return a 37} 38 39// Count counts the number of non-overlapping instances of substr in s. 40// If substr is an empty string, Count returns 1 + the number of Unicode code points in s. 41func Count(s, substr string) int { 42 // special case 43 if len(substr) == 0 { 44 return utf8.RuneCountInString(s) + 1 45 } 46 if len(substr) == 1 { 47 return bytealg.CountString(s, substr[0]) 48 } 49 n := 0 50 for { 51 i := Index(s, substr) 52 if i == -1 { 53 return n 54 } 55 n++ 56 s = s[i+len(substr):] 57 } 58} 59 60// Contains reports whether substr is within s. 61func Contains(s, substr string) bool { 62 return Index(s, substr) >= 0 63} 64 65// ContainsAny reports whether any Unicode code points in chars are within s. 66func ContainsAny(s, chars string) bool { 67 return IndexAny(s, chars) >= 0 68} 69 70// ContainsRune reports whether the Unicode code point r is within s. 71func ContainsRune(s string, r rune) bool { 72 return IndexRune(s, r) >= 0 73} 74 75// LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. 76func LastIndex(s, substr string) int { 77 n := len(substr) 78 switch { 79 case n == 0: 80 return len(s) 81 case n == 1: 82 return LastIndexByte(s, substr[0]) 83 case n == len(s): 84 if substr == s { 85 return 0 86 } 87 return -1 88 case n > len(s): 89 return -1 90 } 91 // Rabin-Karp search from the end of the string 92 hashss, pow := bytealg.HashStrRev(substr) 93 last := len(s) - n 94 var h uint32 95 for i := len(s) - 1; i >= last; i-- { 96 h = h*bytealg.PrimeRK + uint32(s[i]) 97 } 98 if h == hashss && s[last:] == substr { 99 return last 100 } 101 for i := last - 1; i >= 0; i-- { 102 h *= bytealg.PrimeRK 103 h += uint32(s[i]) 104 h -= pow * uint32(s[i+n]) 105 if h == hashss && s[i:i+n] == substr { 106 return i 107 } 108 } 109 return -1 110} 111 112// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s. 113func IndexByte(s string, c byte) int { 114 return bytealg.IndexByteString(s, c) 115} 116 117// IndexRune returns the index of the first instance of the Unicode code point 118// r, or -1 if rune is not present in s. 119// If r is utf8.RuneError, it returns the first instance of any 120// invalid UTF-8 byte sequence. 121func IndexRune(s string, r rune) int { 122 switch { 123 case 0 <= r && r < utf8.RuneSelf: 124 return IndexByte(s, byte(r)) 125 case r == utf8.RuneError: 126 for i, r := range s { 127 if r == utf8.RuneError { 128 return i 129 } 130 } 131 return -1 132 case !utf8.ValidRune(r): 133 return -1 134 default: 135 return Index(s, string(r)) 136 } 137} 138 139// IndexAny returns the index of the first instance of any Unicode code point 140// from chars in s, or -1 if no Unicode code point from chars is present in s. 141func IndexAny(s, chars string) int { 142 if chars == "" { 143 // Avoid scanning all of s. 144 return -1 145 } 146 if len(chars) == 1 { 147 // Avoid scanning all of s. 148 r := rune(chars[0]) 149 if r >= utf8.RuneSelf { 150 r = utf8.RuneError 151 } 152 return IndexRune(s, r) 153 } 154 if len(s) > 8 { 155 if as, isASCII := makeASCIISet(chars); isASCII { 156 for i := 0; i < len(s); i++ { 157 if as.contains(s[i]) { 158 return i 159 } 160 } 161 return -1 162 } 163 } 164 for i, c := range s { 165 if IndexRune(chars, c) >= 0 { 166 return i 167 } 168 } 169 return -1 170} 171 172// LastIndexAny returns the index of the last instance of any Unicode code 173// point from chars in s, or -1 if no Unicode code point from chars is 174// present in s. 175func LastIndexAny(s, chars string) int { 176 if chars == "" { 177 // Avoid scanning all of s. 178 return -1 179 } 180 if len(s) == 1 { 181 rc := rune(s[0]) 182 if rc >= utf8.RuneSelf { 183 rc = utf8.RuneError 184 } 185 if IndexRune(chars, rc) >= 0 { 186 return 0 187 } 188 return -1 189 } 190 if len(s) > 8 { 191 if as, isASCII := makeASCIISet(chars); isASCII { 192 for i := len(s) - 1; i >= 0; i-- { 193 if as.contains(s[i]) { 194 return i 195 } 196 } 197 return -1 198 } 199 } 200 if len(chars) == 1 { 201 rc := rune(chars[0]) 202 if rc >= utf8.RuneSelf { 203 rc = utf8.RuneError 204 } 205 for i := len(s); i > 0; { 206 r, size := utf8.DecodeLastRuneInString(s[:i]) 207 i -= size 208 if rc == r { 209 return i 210 } 211 } 212 return -1 213 } 214 for i := len(s); i > 0; { 215 r, size := utf8.DecodeLastRuneInString(s[:i]) 216 i -= size 217 if IndexRune(chars, r) >= 0 { 218 return i 219 } 220 } 221 return -1 222} 223 224// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 225func LastIndexByte(s string, c byte) int { 226 for i := len(s) - 1; i >= 0; i-- { 227 if s[i] == c { 228 return i 229 } 230 } 231 return -1 232} 233 234// Generic split: splits after each instance of sep, 235// including sepSave bytes of sep in the subarrays. 236func genSplit(s, sep string, sepSave, n int) []string { 237 if n == 0 { 238 return nil 239 } 240 if sep == "" { 241 return explode(s, n) 242 } 243 if n < 0 { 244 n = Count(s, sep) + 1 245 } 246 247 a := make([]string, n) 248 n-- 249 i := 0 250 for i < n { 251 m := Index(s, sep) 252 if m < 0 { 253 break 254 } 255 a[i] = s[:m+sepSave] 256 s = s[m+len(sep):] 257 i++ 258 } 259 a[i] = s 260 return a[:i+1] 261} 262 263// SplitN slices s into substrings separated by sep and returns a slice of 264// the substrings between those separators. 265// 266// The count determines the number of substrings to return: 267// n > 0: at most n substrings; the last substring will be the unsplit remainder. 268// n == 0: the result is nil (zero substrings) 269// n < 0: all substrings 270// 271// Edge cases for s and sep (for example, empty strings) are handled 272// as described in the documentation for Split. 273func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 274 275// SplitAfterN slices s into substrings after each instance of sep and 276// returns a slice of those substrings. 277// 278// The count determines the number of substrings to return: 279// n > 0: at most n substrings; the last substring will be the unsplit remainder. 280// n == 0: the result is nil (zero substrings) 281// n < 0: all substrings 282// 283// Edge cases for s and sep (for example, empty strings) are handled 284// as described in the documentation for SplitAfter. 285func SplitAfterN(s, sep string, n int) []string { 286 return genSplit(s, sep, len(sep), n) 287} 288 289// Split slices s into all substrings separated by sep and returns a slice of 290// the substrings between those separators. 291// 292// If s does not contain sep and sep is not empty, Split returns a 293// slice of length 1 whose only element is s. 294// 295// If sep is empty, Split splits after each UTF-8 sequence. If both s 296// and sep are empty, Split returns an empty slice. 297// 298// It is equivalent to SplitN with a count of -1. 299func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 300 301// SplitAfter slices s into all substrings after each instance of sep and 302// returns a slice of those substrings. 303// 304// If s does not contain sep and sep is not empty, SplitAfter returns 305// a slice of length 1 whose only element is s. 306// 307// If sep is empty, SplitAfter splits after each UTF-8 sequence. If 308// both s and sep are empty, SplitAfter returns an empty slice. 309// 310// It is equivalent to SplitAfterN with a count of -1. 311func SplitAfter(s, sep string) []string { 312 return genSplit(s, sep, len(sep), -1) 313} 314 315var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 316 317// Fields splits the string s around each instance of one or more consecutive white space 318// characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 319// empty slice if s contains only white space. 320func Fields(s string) []string { 321 // First count the fields. 322 // This is an exact count if s is ASCII, otherwise it is an approximation. 323 n := 0 324 wasSpace := 1 325 // setBits is used to track which bits are set in the bytes of s. 326 setBits := uint8(0) 327 for i := 0; i < len(s); i++ { 328 r := s[i] 329 setBits |= r 330 isSpace := int(asciiSpace[r]) 331 n += wasSpace & ^isSpace 332 wasSpace = isSpace 333 } 334 335 if setBits >= utf8.RuneSelf { 336 // Some runes in the input string are not ASCII. 337 return FieldsFunc(s, unicode.IsSpace) 338 } 339 // ASCII fast path 340 a := make([]string, n) 341 na := 0 342 fieldStart := 0 343 i := 0 344 // Skip spaces in the front of the input. 345 for i < len(s) && asciiSpace[s[i]] != 0 { 346 i++ 347 } 348 fieldStart = i 349 for i < len(s) { 350 if asciiSpace[s[i]] == 0 { 351 i++ 352 continue 353 } 354 a[na] = s[fieldStart:i] 355 na++ 356 i++ 357 // Skip spaces in between fields. 358 for i < len(s) && asciiSpace[s[i]] != 0 { 359 i++ 360 } 361 fieldStart = i 362 } 363 if fieldStart < len(s) { // Last field might end at EOF. 364 a[na] = s[fieldStart:] 365 } 366 return a 367} 368 369// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 370// and returns an array of slices of s. If all code points in s satisfy f(c) or the 371// string is empty, an empty slice is returned. 372// 373// FieldsFunc makes no guarantees about the order in which it calls f(c) 374// and assumes that f always returns the same value for a given c. 375func FieldsFunc(s string, f func(rune) bool) []string { 376 // A span is used to record a slice of s of the form s[start:end]. 377 // The start index is inclusive and the end index is exclusive. 378 type span struct { 379 start int 380 end int 381 } 382 spans := make([]span, 0, 32) 383 384 // Find the field start and end indices. 385 // Doing this in a separate pass (rather than slicing the string s 386 // and collecting the result substrings right away) is significantly 387 // more efficient, possibly due to cache effects. 388 start := -1 // valid span start if >= 0 389 for end, rune := range s { 390 if f(rune) { 391 if start >= 0 { 392 spans = append(spans, span{start, end}) 393 // Set start to a negative value. 394 // Note: using -1 here consistently and reproducibly 395 // slows down this code by a several percent on amd64. 396 start = ^start 397 } 398 } else { 399 if start < 0 { 400 start = end 401 } 402 } 403 } 404 405 // Last field might end at EOF. 406 if start >= 0 { 407 spans = append(spans, span{start, len(s)}) 408 } 409 410 // Create strings from recorded field indices. 411 a := make([]string, len(spans)) 412 for i, span := range spans { 413 a[i] = s[span.start:span.end] 414 } 415 416 return a 417} 418 419// Join concatenates the elements of its first argument to create a single string. The separator 420// string sep is placed between elements in the resulting string. 421func Join(elems []string, sep string) string { 422 switch len(elems) { 423 case 0: 424 return "" 425 case 1: 426 return elems[0] 427 } 428 n := len(sep) * (len(elems) - 1) 429 for i := 0; i < len(elems); i++ { 430 n += len(elems[i]) 431 } 432 433 var b Builder 434 b.Grow(n) 435 b.WriteString(elems[0]) 436 for _, s := range elems[1:] { 437 b.WriteString(sep) 438 b.WriteString(s) 439 } 440 return b.String() 441} 442 443// HasPrefix tests whether the string s begins with prefix. 444func HasPrefix(s, prefix string) bool { 445 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 446} 447 448// HasSuffix tests whether the string s ends with suffix. 449func HasSuffix(s, suffix string) bool { 450 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 451} 452 453// Map returns a copy of the string s with all its characters modified 454// according to the mapping function. If mapping returns a negative value, the character is 455// dropped from the string with no replacement. 456func Map(mapping func(rune) rune, s string) string { 457 // In the worst case, the string can grow when mapped, making 458 // things unpleasant. But it's so rare we barge in assuming it's 459 // fine. It could also shrink but that falls out naturally. 460 461 // The output buffer b is initialized on demand, the first 462 // time a character differs. 463 var b Builder 464 465 for i, c := range s { 466 r := mapping(c) 467 if r == c && c != utf8.RuneError { 468 continue 469 } 470 471 var width int 472 if c == utf8.RuneError { 473 c, width = utf8.DecodeRuneInString(s[i:]) 474 if width != 1 && r == c { 475 continue 476 } 477 } else { 478 width = utf8.RuneLen(c) 479 } 480 481 b.Grow(len(s) + utf8.UTFMax) 482 b.WriteString(s[:i]) 483 if r >= 0 { 484 b.WriteRune(r) 485 } 486 487 s = s[i+width:] 488 break 489 } 490 491 // Fast path for unchanged input 492 if b.Cap() == 0 { // didn't call b.Grow above 493 return s 494 } 495 496 for _, c := range s { 497 r := mapping(c) 498 499 if r >= 0 { 500 // common case 501 // Due to inlining, it is more performant to determine if WriteByte should be 502 // invoked rather than always call WriteRune 503 if r < utf8.RuneSelf { 504 b.WriteByte(byte(r)) 505 } else { 506 // r is not a ASCII rune. 507 b.WriteRune(r) 508 } 509 } 510 } 511 512 return b.String() 513} 514 515// Repeat returns a new string consisting of count copies of the string s. 516// 517// It panics if count is negative or if 518// the result of (len(s) * count) overflows. 519func Repeat(s string, count int) string { 520 if count == 0 { 521 return "" 522 } 523 524 // Since we cannot return an error on overflow, 525 // we should panic if the repeat will generate 526 // an overflow. 527 // See Issue golang.org/issue/16237 528 if count < 0 { 529 panic("strings: negative Repeat count") 530 } else if len(s)*count/count != len(s) { 531 panic("strings: Repeat count causes overflow") 532 } 533 534 n := len(s) * count 535 var b Builder 536 b.Grow(n) 537 b.WriteString(s) 538 for b.Len() < n { 539 if b.Len() <= n/2 { 540 b.WriteString(b.String()) 541 } else { 542 b.WriteString(b.String()[:n-b.Len()]) 543 break 544 } 545 } 546 return b.String() 547} 548 549// ToUpper returns s with all Unicode letters mapped to their upper case. 550func ToUpper(s string) string { 551 isASCII, hasLower := true, false 552 for i := 0; i < len(s); i++ { 553 c := s[i] 554 if c >= utf8.RuneSelf { 555 isASCII = false 556 break 557 } 558 hasLower = hasLower || ('a' <= c && c <= 'z') 559 } 560 561 if isASCII { // optimize for ASCII-only strings. 562 if !hasLower { 563 return s 564 } 565 var b Builder 566 b.Grow(len(s)) 567 for i := 0; i < len(s); i++ { 568 c := s[i] 569 if 'a' <= c && c <= 'z' { 570 c -= 'a' - 'A' 571 } 572 b.WriteByte(c) 573 } 574 return b.String() 575 } 576 return Map(unicode.ToUpper, s) 577} 578 579// ToLower returns s with all Unicode letters mapped to their lower case. 580func ToLower(s string) string { 581 isASCII, hasUpper := true, false 582 for i := 0; i < len(s); i++ { 583 c := s[i] 584 if c >= utf8.RuneSelf { 585 isASCII = false 586 break 587 } 588 hasUpper = hasUpper || ('A' <= c && c <= 'Z') 589 } 590 591 if isASCII { // optimize for ASCII-only strings. 592 if !hasUpper { 593 return s 594 } 595 var b Builder 596 b.Grow(len(s)) 597 for i := 0; i < len(s); i++ { 598 c := s[i] 599 if 'A' <= c && c <= 'Z' { 600 c += 'a' - 'A' 601 } 602 b.WriteByte(c) 603 } 604 return b.String() 605 } 606 return Map(unicode.ToLower, s) 607} 608 609// ToTitle returns a copy of the string s with all Unicode letters mapped to 610// their Unicode title case. 611func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 612 613// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 614// upper case using the case mapping specified by c. 615func ToUpperSpecial(c unicode.SpecialCase, s string) string { 616 return Map(c.ToUpper, s) 617} 618 619// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 620// lower case using the case mapping specified by c. 621func ToLowerSpecial(c unicode.SpecialCase, s string) string { 622 return Map(c.ToLower, s) 623} 624 625// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 626// Unicode title case, giving priority to the special casing rules. 627func ToTitleSpecial(c unicode.SpecialCase, s string) string { 628 return Map(c.ToTitle, s) 629} 630 631// ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences 632// replaced by the replacement string, which may be empty. 633func ToValidUTF8(s, replacement string) string { 634 var b Builder 635 636 for i, c := range s { 637 if c != utf8.RuneError { 638 continue 639 } 640 641 _, wid := utf8.DecodeRuneInString(s[i:]) 642 if wid == 1 { 643 b.Grow(len(s) + len(replacement)) 644 b.WriteString(s[:i]) 645 s = s[i:] 646 break 647 } 648 } 649 650 // Fast path for unchanged input 651 if b.Cap() == 0 { // didn't call b.Grow above 652 return s 653 } 654 655 invalid := false // previous byte was from an invalid UTF-8 sequence 656 for i := 0; i < len(s); { 657 c := s[i] 658 if c < utf8.RuneSelf { 659 i++ 660 invalid = false 661 b.WriteByte(c) 662 continue 663 } 664 _, wid := utf8.DecodeRuneInString(s[i:]) 665 if wid == 1 { 666 i++ 667 if !invalid { 668 invalid = true 669 b.WriteString(replacement) 670 } 671 continue 672 } 673 invalid = false 674 b.WriteString(s[i : i+wid]) 675 i += wid 676 } 677 678 return b.String() 679} 680 681// isSeparator reports whether the rune could mark a word boundary. 682// TODO: update when package unicode captures more of the properties. 683func isSeparator(r rune) bool { 684 // ASCII alphanumerics and underscore are not separators 685 if r <= 0x7F { 686 switch { 687 case '0' <= r && r <= '9': 688 return false 689 case 'a' <= r && r <= 'z': 690 return false 691 case 'A' <= r && r <= 'Z': 692 return false 693 case r == '_': 694 return false 695 } 696 return true 697 } 698 // Letters and digits are not separators 699 if unicode.IsLetter(r) || unicode.IsDigit(r) { 700 return false 701 } 702 // Otherwise, all we can do for now is treat spaces as separators. 703 return unicode.IsSpace(r) 704} 705 706// Title returns a copy of the string s with all Unicode letters that begin words 707// mapped to their Unicode title case. 708// 709// Deprecated: The rule Title uses for word boundaries does not handle Unicode 710// punctuation properly. Use golang.org/x/text/cases instead. 711func Title(s string) string { 712 // Use a closure here to remember state. 713 // Hackish but effective. Depends on Map scanning in order and calling 714 // the closure once per rune. 715 prev := ' ' 716 return Map( 717 func(r rune) rune { 718 if isSeparator(prev) { 719 prev = r 720 return unicode.ToTitle(r) 721 } 722 prev = r 723 return r 724 }, 725 s) 726} 727 728// TrimLeftFunc returns a slice of the string s with all leading 729// Unicode code points c satisfying f(c) removed. 730func TrimLeftFunc(s string, f func(rune) bool) string { 731 i := indexFunc(s, f, false) 732 if i == -1 { 733 return "" 734 } 735 return s[i:] 736} 737 738// TrimRightFunc returns a slice of the string s with all trailing 739// Unicode code points c satisfying f(c) removed. 740func TrimRightFunc(s string, f func(rune) bool) string { 741 i := lastIndexFunc(s, f, false) 742 if i >= 0 && s[i] >= utf8.RuneSelf { 743 _, wid := utf8.DecodeRuneInString(s[i:]) 744 i += wid 745 } else { 746 i++ 747 } 748 return s[0:i] 749} 750 751// TrimFunc returns a slice of the string s with all leading 752// and trailing Unicode code points c satisfying f(c) removed. 753func TrimFunc(s string, f func(rune) bool) string { 754 return TrimRightFunc(TrimLeftFunc(s, f), f) 755} 756 757// IndexFunc returns the index into s of the first Unicode 758// code point satisfying f(c), or -1 if none do. 759func IndexFunc(s string, f func(rune) bool) int { 760 return indexFunc(s, f, true) 761} 762 763// LastIndexFunc returns the index into s of the last 764// Unicode code point satisfying f(c), or -1 if none do. 765func LastIndexFunc(s string, f func(rune) bool) int { 766 return lastIndexFunc(s, f, true) 767} 768 769// indexFunc is the same as IndexFunc except that if 770// truth==false, the sense of the predicate function is 771// inverted. 772func indexFunc(s string, f func(rune) bool, truth bool) int { 773 for i, r := range s { 774 if f(r) == truth { 775 return i 776 } 777 } 778 return -1 779} 780 781// lastIndexFunc is the same as LastIndexFunc except that if 782// truth==false, the sense of the predicate function is 783// inverted. 784func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 785 for i := len(s); i > 0; { 786 r, size := utf8.DecodeLastRuneInString(s[0:i]) 787 i -= size 788 if f(r) == truth { 789 return i 790 } 791 } 792 return -1 793} 794 795// asciiSet is a 32-byte value, where each bit represents the presence of a 796// given ASCII character in the set. The 128-bits of the lower 16 bytes, 797// starting with the least-significant bit of the lowest word to the 798// most-significant bit of the highest word, map to the full range of all 799// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 800// ensuring that any non-ASCII character will be reported as not in the set. 801// This allocates a total of 32 bytes even though the upper half 802// is unused to avoid bounds checks in asciiSet.contains. 803type asciiSet [8]uint32 804 805// makeASCIISet creates a set of ASCII characters and reports whether all 806// characters in chars are ASCII. 807func makeASCIISet(chars string) (as asciiSet, ok bool) { 808 for i := 0; i < len(chars); i++ { 809 c := chars[i] 810 if c >= utf8.RuneSelf { 811 return as, false 812 } 813 as[c/32] |= 1 << (c % 32) 814 } 815 return as, true 816} 817 818// contains reports whether c is inside the set. 819func (as *asciiSet) contains(c byte) bool { 820 return (as[c/32] & (1 << (c % 32))) != 0 821} 822 823// Trim returns a slice of the string s with all leading and 824// trailing Unicode code points contained in cutset removed. 825func Trim(s, cutset string) string { 826 if s == "" || cutset == "" { 827 return s 828 } 829 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 830 return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0]) 831 } 832 if as, ok := makeASCIISet(cutset); ok { 833 return trimLeftASCII(trimRightASCII(s, &as), &as) 834 } 835 return trimLeftUnicode(trimRightUnicode(s, cutset), cutset) 836} 837 838// TrimLeft returns a slice of the string s with all leading 839// Unicode code points contained in cutset removed. 840// 841// To remove a prefix, use TrimPrefix instead. 842func TrimLeft(s, cutset string) string { 843 if s == "" || cutset == "" { 844 return s 845 } 846 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 847 return trimLeftByte(s, cutset[0]) 848 } 849 if as, ok := makeASCIISet(cutset); ok { 850 return trimLeftASCII(s, &as) 851 } 852 return trimLeftUnicode(s, cutset) 853} 854 855func trimLeftByte(s string, c byte) string { 856 for len(s) > 0 && s[0] == c { 857 s = s[1:] 858 } 859 return s 860} 861 862func trimLeftASCII(s string, as *asciiSet) string { 863 for len(s) > 0 { 864 if !as.contains(s[0]) { 865 break 866 } 867 s = s[1:] 868 } 869 return s 870} 871 872func trimLeftUnicode(s, cutset string) string { 873 for len(s) > 0 { 874 r, n := rune(s[0]), 1 875 if r >= utf8.RuneSelf { 876 r, n = utf8.DecodeRuneInString(s) 877 } 878 if !ContainsRune(cutset, r) { 879 break 880 } 881 s = s[n:] 882 } 883 return s 884} 885 886// TrimRight returns a slice of the string s, with all trailing 887// Unicode code points contained in cutset removed. 888// 889// To remove a suffix, use TrimSuffix instead. 890func TrimRight(s, cutset string) string { 891 if s == "" || cutset == "" { 892 return s 893 } 894 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 895 return trimRightByte(s, cutset[0]) 896 } 897 if as, ok := makeASCIISet(cutset); ok { 898 return trimRightASCII(s, &as) 899 } 900 return trimRightUnicode(s, cutset) 901} 902 903func trimRightByte(s string, c byte) string { 904 for len(s) > 0 && s[len(s)-1] == c { 905 s = s[:len(s)-1] 906 } 907 return s 908} 909 910func trimRightASCII(s string, as *asciiSet) string { 911 for len(s) > 0 { 912 if !as.contains(s[len(s)-1]) { 913 break 914 } 915 s = s[:len(s)-1] 916 } 917 return s 918} 919 920func trimRightUnicode(s, cutset string) string { 921 for len(s) > 0 { 922 r, n := rune(s[len(s)-1]), 1 923 if r >= utf8.RuneSelf { 924 r, n = utf8.DecodeLastRuneInString(s) 925 } 926 if !ContainsRune(cutset, r) { 927 break 928 } 929 s = s[:len(s)-n] 930 } 931 return s 932} 933 934// TrimSpace returns a slice of the string s, with all leading 935// and trailing white space removed, as defined by Unicode. 936func TrimSpace(s string) string { 937 // Fast path for ASCII: look for the first ASCII non-space byte 938 start := 0 939 for ; start < len(s); start++ { 940 c := s[start] 941 if c >= utf8.RuneSelf { 942 // If we run into a non-ASCII byte, fall back to the 943 // slower unicode-aware method on the remaining bytes 944 return TrimFunc(s[start:], unicode.IsSpace) 945 } 946 if asciiSpace[c] == 0 { 947 break 948 } 949 } 950 951 // Now look for the first ASCII non-space byte from the end 952 stop := len(s) 953 for ; stop > start; stop-- { 954 c := s[stop-1] 955 if c >= utf8.RuneSelf { 956 return TrimFunc(s[start:stop], unicode.IsSpace) 957 } 958 if asciiSpace[c] == 0 { 959 break 960 } 961 } 962 963 // At this point s[start:stop] starts and ends with an ASCII 964 // non-space bytes, so we're done. Non-ASCII cases have already 965 // been handled above. 966 return s[start:stop] 967} 968 969// TrimPrefix returns s without the provided leading prefix string. 970// If s doesn't start with prefix, s is returned unchanged. 971func TrimPrefix(s, prefix string) string { 972 if HasPrefix(s, prefix) { 973 return s[len(prefix):] 974 } 975 return s 976} 977 978// TrimSuffix returns s without the provided trailing suffix string. 979// If s doesn't end with suffix, s is returned unchanged. 980func TrimSuffix(s, suffix string) string { 981 if HasSuffix(s, suffix) { 982 return s[:len(s)-len(suffix)] 983 } 984 return s 985} 986 987// Replace returns a copy of the string s with the first n 988// non-overlapping instances of old replaced by new. 989// If old is empty, it matches at the beginning of the string 990// and after each UTF-8 sequence, yielding up to k+1 replacements 991// for a k-rune string. 992// If n < 0, there is no limit on the number of replacements. 993func Replace(s, old, new string, n int) string { 994 if old == new || n == 0 { 995 return s // avoid allocation 996 } 997 998 // Compute number of replacements. 999 if m := Count(s, old); m == 0 { 1000 return s // avoid allocation 1001 } else if n < 0 || m < n { 1002 n = m 1003 } 1004 1005 // Apply replacements to buffer. 1006 var b Builder 1007 b.Grow(len(s) + n*(len(new)-len(old))) 1008 start := 0 1009 for i := 0; i < n; i++ { 1010 j := start 1011 if len(old) == 0 { 1012 if i > 0 { 1013 _, wid := utf8.DecodeRuneInString(s[start:]) 1014 j += wid 1015 } 1016 } else { 1017 j += Index(s[start:], old) 1018 } 1019 b.WriteString(s[start:j]) 1020 b.WriteString(new) 1021 start = j + len(old) 1022 } 1023 b.WriteString(s[start:]) 1024 return b.String() 1025} 1026 1027// ReplaceAll returns a copy of the string s with all 1028// non-overlapping instances of old replaced by new. 1029// If old is empty, it matches at the beginning of the string 1030// and after each UTF-8 sequence, yielding up to k+1 replacements 1031// for a k-rune string. 1032func ReplaceAll(s, old, new string) string { 1033 return Replace(s, old, new, -1) 1034} 1035 1036// EqualFold reports whether s and t, interpreted as UTF-8 strings, 1037// are equal under Unicode case-folding, which is a more general 1038// form of case-insensitivity. 1039func EqualFold(s, t string) bool { 1040 for s != "" && t != "" { 1041 // Extract first rune from each string. 1042 var sr, tr rune 1043 if s[0] < utf8.RuneSelf { 1044 sr, s = rune(s[0]), s[1:] 1045 } else { 1046 r, size := utf8.DecodeRuneInString(s) 1047 sr, s = r, s[size:] 1048 } 1049 if t[0] < utf8.RuneSelf { 1050 tr, t = rune(t[0]), t[1:] 1051 } else { 1052 r, size := utf8.DecodeRuneInString(t) 1053 tr, t = r, t[size:] 1054 } 1055 1056 // If they match, keep going; if not, return false. 1057 1058 // Easy case. 1059 if tr == sr { 1060 continue 1061 } 1062 1063 // Make sr < tr to simplify what follows. 1064 if tr < sr { 1065 tr, sr = sr, tr 1066 } 1067 // Fast check for ASCII. 1068 if tr < utf8.RuneSelf { 1069 // ASCII only, sr/tr must be upper/lower case 1070 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 1071 continue 1072 } 1073 return false 1074 } 1075 1076 // General case. SimpleFold(x) returns the next equivalent rune > x 1077 // or wraps around to smaller values. 1078 r := unicode.SimpleFold(sr) 1079 for r != sr && r < tr { 1080 r = unicode.SimpleFold(r) 1081 } 1082 if r == tr { 1083 continue 1084 } 1085 return false 1086 } 1087 1088 // One string is empty. Are both? 1089 return s == t 1090} 1091 1092// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. 1093func Index(s, substr string) int { 1094 n := len(substr) 1095 switch { 1096 case n == 0: 1097 return 0 1098 case n == 1: 1099 return IndexByte(s, substr[0]) 1100 case n == len(s): 1101 if substr == s { 1102 return 0 1103 } 1104 return -1 1105 case n > len(s): 1106 return -1 1107 case n <= bytealg.MaxLen: 1108 // Use brute force when s and substr both are small 1109 if len(s) <= bytealg.MaxBruteForce { 1110 return bytealg.IndexString(s, substr) 1111 } 1112 c0 := substr[0] 1113 c1 := substr[1] 1114 i := 0 1115 t := len(s) - n + 1 1116 fails := 0 1117 for i < t { 1118 if s[i] != c0 { 1119 // IndexByte is faster than bytealg.IndexString, so use it as long as 1120 // we're not getting lots of false positives. 1121 o := IndexByte(s[i+1:t], c0) 1122 if o < 0 { 1123 return -1 1124 } 1125 i += o + 1 1126 } 1127 if s[i+1] == c1 && s[i:i+n] == substr { 1128 return i 1129 } 1130 fails++ 1131 i++ 1132 // Switch to bytealg.IndexString when IndexByte produces too many false positives. 1133 if fails > bytealg.Cutover(i) { 1134 r := bytealg.IndexString(s[i:], substr) 1135 if r >= 0 { 1136 return r + i 1137 } 1138 return -1 1139 } 1140 } 1141 return -1 1142 } 1143 c0 := substr[0] 1144 c1 := substr[1] 1145 i := 0 1146 t := len(s) - n + 1 1147 fails := 0 1148 for i < t { 1149 if s[i] != c0 { 1150 o := IndexByte(s[i+1:t], c0) 1151 if o < 0 { 1152 return -1 1153 } 1154 i += o + 1 1155 } 1156 if s[i+1] == c1 && s[i:i+n] == substr { 1157 return i 1158 } 1159 i++ 1160 fails++ 1161 if fails >= 4+i>>4 && i < t { 1162 // See comment in ../bytes/bytes.go. 1163 j := bytealg.IndexRabinKarp(s[i:], substr) 1164 if j < 0 { 1165 return -1 1166 } 1167 return i + j 1168 } 1169 } 1170 return -1 1171} 1172 1173// Cut slices s around the first instance of sep, 1174// returning the text before and after sep. 1175// The found result reports whether sep appears in s. 1176// If sep does not appear in s, cut returns s, "", false. 1177func Cut(s, sep string) (before, after string, found bool) { 1178 if i := Index(s, sep); i >= 0 { 1179 return s[:i], s[i+len(sep):], true 1180 } 1181 return s, "", false 1182} 1183