1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package strings implements simple functions to manipulate UTF-8 encoded strings. 6// 7// For information about UTF-8 strings in Go, see https://blog.golang.org/strings. 8package strings 9 10import ( 11 "internal/bytealg" 12 "unicode" 13 "unicode/utf8" 14) 15 16// explode splits s into a slice of UTF-8 strings, 17// one string per Unicode character up to a maximum of n (n < 0 means no limit). 18// Invalid UTF-8 sequences become correct encodings of U+FFFD. 19func explode(s string, n int) []string { 20 l := utf8.RuneCountInString(s) 21 if n < 0 || n > l { 22 n = l 23 } 24 a := make([]string, n) 25 for i := 0; i < n-1; i++ { 26 ch, size := utf8.DecodeRuneInString(s) 27 a[i] = s[:size] 28 s = s[size:] 29 if ch == utf8.RuneError { 30 a[i] = string(utf8.RuneError) 31 } 32 } 33 if n > 0 { 34 a[n-1] = s 35 } 36 return a 37} 38 39// Count counts the number of non-overlapping instances of substr in s. 40// If substr is an empty string, Count returns 1 + the number of Unicode code points in s. 41func Count(s, substr string) int { 42 // special case 43 if len(substr) == 0 { 44 return utf8.RuneCountInString(s) + 1 45 } 46 if len(substr) == 1 { 47 return bytealg.CountString(s, substr[0]) 48 } 49 n := 0 50 for { 51 i := Index(s, substr) 52 if i == -1 { 53 return n 54 } 55 n++ 56 s = s[i+len(substr):] 57 } 58} 59 60// Contains reports whether substr is within s. 61func Contains(s, substr string) bool { 62 return Index(s, substr) >= 0 63} 64 65// ContainsAny reports whether any Unicode code points in chars are within s. 66func ContainsAny(s, chars string) bool { 67 return IndexAny(s, chars) >= 0 68} 69 70// ContainsRune reports whether the Unicode code point r is within s. 71func ContainsRune(s string, r rune) bool { 72 return IndexRune(s, r) >= 0 73} 74 75// LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. 76func LastIndex(s, substr string) int { 77 n := len(substr) 78 switch { 79 case n == 0: 80 return len(s) 81 case n == 1: 82 return LastIndexByte(s, substr[0]) 83 case n == len(s): 84 if substr == s { 85 return 0 86 } 87 return -1 88 case n > len(s): 89 return -1 90 } 91 // Rabin-Karp search from the end of the string 92 hashss, pow := bytealg.HashStrRev(substr) 93 last := len(s) - n 94 var h uint32 95 for i := len(s) - 1; i >= last; i-- { 96 h = h*bytealg.PrimeRK + uint32(s[i]) 97 } 98 if h == hashss && s[last:] == substr { 99 return last 100 } 101 for i := last - 1; i >= 0; i-- { 102 h *= bytealg.PrimeRK 103 h += uint32(s[i]) 104 h -= pow * uint32(s[i+n]) 105 if h == hashss && s[i:i+n] == substr { 106 return i 107 } 108 } 109 return -1 110} 111 112// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s. 113func IndexByte(s string, c byte) int { 114 return bytealg.IndexByteString(s, c) 115} 116 117// IndexRune returns the index of the first instance of the Unicode code point 118// r, or -1 if rune is not present in s. 119// If r is utf8.RuneError, it returns the first instance of any 120// invalid UTF-8 byte sequence. 121func IndexRune(s string, r rune) int { 122 switch { 123 case 0 <= r && r < utf8.RuneSelf: 124 return IndexByte(s, byte(r)) 125 case r == utf8.RuneError: 126 for i, r := range s { 127 if r == utf8.RuneError { 128 return i 129 } 130 } 131 return -1 132 case !utf8.ValidRune(r): 133 return -1 134 default: 135 return Index(s, string(r)) 136 } 137} 138 139// IndexAny returns the index of the first instance of any Unicode code point 140// from chars in s, or -1 if no Unicode code point from chars is present in s. 141func IndexAny(s, chars string) int { 142 if chars == "" { 143 // Avoid scanning all of s. 144 return -1 145 } 146 if len(chars) == 1 { 147 // Avoid scanning all of s. 148 r := rune(chars[0]) 149 if r >= utf8.RuneSelf { 150 r = utf8.RuneError 151 } 152 return IndexRune(s, r) 153 } 154 if len(s) > 8 { 155 if as, isASCII := makeASCIISet(chars); isASCII { 156 for i := 0; i < len(s); i++ { 157 if as.contains(s[i]) { 158 return i 159 } 160 } 161 return -1 162 } 163 } 164 for i, c := range s { 165 if IndexRune(chars, c) >= 0 { 166 return i 167 } 168 } 169 return -1 170} 171 172// LastIndexAny returns the index of the last instance of any Unicode code 173// point from chars in s, or -1 if no Unicode code point from chars is 174// present in s. 175func LastIndexAny(s, chars string) int { 176 if chars == "" { 177 // Avoid scanning all of s. 178 return -1 179 } 180 if len(s) == 1 { 181 rc := rune(s[0]) 182 if rc >= utf8.RuneSelf { 183 rc = utf8.RuneError 184 } 185 if IndexRune(chars, rc) >= 0 { 186 return 0 187 } 188 return -1 189 } 190 if len(s) > 8 { 191 if as, isASCII := makeASCIISet(chars); isASCII { 192 for i := len(s) - 1; i >= 0; i-- { 193 if as.contains(s[i]) { 194 return i 195 } 196 } 197 return -1 198 } 199 } 200 if len(chars) == 1 { 201 rc := rune(chars[0]) 202 if rc >= utf8.RuneSelf { 203 rc = utf8.RuneError 204 } 205 for i := len(s); i > 0; { 206 r, size := utf8.DecodeLastRuneInString(s[:i]) 207 i -= size 208 if rc == r { 209 return i 210 } 211 } 212 return -1 213 } 214 for i := len(s); i > 0; { 215 r, size := utf8.DecodeLastRuneInString(s[:i]) 216 i -= size 217 if IndexRune(chars, r) >= 0 { 218 return i 219 } 220 } 221 return -1 222} 223 224// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 225func LastIndexByte(s string, c byte) int { 226 for i := len(s) - 1; i >= 0; i-- { 227 if s[i] == c { 228 return i 229 } 230 } 231 return -1 232} 233 234// Generic split: splits after each instance of sep, 235// including sepSave bytes of sep in the subarrays. 236func genSplit(s, sep string, sepSave, n int) []string { 237 if n == 0 { 238 return nil 239 } 240 if sep == "" { 241 return explode(s, n) 242 } 243 if n < 0 { 244 n = Count(s, sep) + 1 245 } 246 247 a := make([]string, n) 248 n-- 249 i := 0 250 for i < n { 251 m := Index(s, sep) 252 if m < 0 { 253 break 254 } 255 a[i] = s[:m+sepSave] 256 s = s[m+len(sep):] 257 i++ 258 } 259 a[i] = s 260 return a[:i+1] 261} 262 263// SplitN slices s into substrings separated by sep and returns a slice of 264// the substrings between those separators. 265// 266// The count determines the number of substrings to return: 267// n > 0: at most n substrings; the last substring will be the unsplit remainder. 268// n == 0: the result is nil (zero substrings) 269// n < 0: all substrings 270// 271// Edge cases for s and sep (for example, empty strings) are handled 272// as described in the documentation for Split. 273func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 274 275// SplitAfterN slices s into substrings after each instance of sep and 276// returns a slice of those substrings. 277// 278// The count determines the number of substrings to return: 279// n > 0: at most n substrings; the last substring will be the unsplit remainder. 280// n == 0: the result is nil (zero substrings) 281// n < 0: all substrings 282// 283// Edge cases for s and sep (for example, empty strings) are handled 284// as described in the documentation for SplitAfter. 285func SplitAfterN(s, sep string, n int) []string { 286 return genSplit(s, sep, len(sep), n) 287} 288 289// Split slices s into all substrings separated by sep and returns a slice of 290// the substrings between those separators. 291// 292// If s does not contain sep and sep is not empty, Split returns a 293// slice of length 1 whose only element is s. 294// 295// If sep is empty, Split splits after each UTF-8 sequence. If both s 296// and sep are empty, Split returns an empty slice. 297// 298// It is equivalent to SplitN with a count of -1. 299func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 300 301// SplitAfter slices s into all substrings after each instance of sep and 302// returns a slice of those substrings. 303// 304// If s does not contain sep and sep is not empty, SplitAfter returns 305// a slice of length 1 whose only element is s. 306// 307// If sep is empty, SplitAfter splits after each UTF-8 sequence. If 308// both s and sep are empty, SplitAfter returns an empty slice. 309// 310// It is equivalent to SplitAfterN with a count of -1. 311func SplitAfter(s, sep string) []string { 312 return genSplit(s, sep, len(sep), -1) 313} 314 315var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 316 317// Fields splits the string s around each instance of one or more consecutive white space 318// characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 319// empty slice if s contains only white space. 320func Fields(s string) []string { 321 // First count the fields. 322 // This is an exact count if s is ASCII, otherwise it is an approximation. 323 n := 0 324 wasSpace := 1 325 // setBits is used to track which bits are set in the bytes of s. 326 setBits := uint8(0) 327 for i := 0; i < len(s); i++ { 328 r := s[i] 329 setBits |= r 330 isSpace := int(asciiSpace[r]) 331 n += wasSpace & ^isSpace 332 wasSpace = isSpace 333 } 334 335 if setBits >= utf8.RuneSelf { 336 // Some runes in the input string are not ASCII. 337 return FieldsFunc(s, unicode.IsSpace) 338 } 339 // ASCII fast path 340 a := make([]string, n) 341 na := 0 342 fieldStart := 0 343 i := 0 344 // Skip spaces in the front of the input. 345 for i < len(s) && asciiSpace[s[i]] != 0 { 346 i++ 347 } 348 fieldStart = i 349 for i < len(s) { 350 if asciiSpace[s[i]] == 0 { 351 i++ 352 continue 353 } 354 a[na] = s[fieldStart:i] 355 na++ 356 i++ 357 // Skip spaces in between fields. 358 for i < len(s) && asciiSpace[s[i]] != 0 { 359 i++ 360 } 361 fieldStart = i 362 } 363 if fieldStart < len(s) { // Last field might end at EOF. 364 a[na] = s[fieldStart:] 365 } 366 return a 367} 368 369// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 370// and returns an array of slices of s. If all code points in s satisfy f(c) or the 371// string is empty, an empty slice is returned. 372// 373// FieldsFunc makes no guarantees about the order in which it calls f(c) 374// and assumes that f always returns the same value for a given c. 375func FieldsFunc(s string, f func(rune) bool) []string { 376 // A span is used to record a slice of s of the form s[start:end]. 377 // The start index is inclusive and the end index is exclusive. 378 type span struct { 379 start int 380 end int 381 } 382 spans := make([]span, 0, 32) 383 384 // Find the field start and end indices. 385 // Doing this in a separate pass (rather than slicing the string s 386 // and collecting the result substrings right away) is significantly 387 // more efficient, possibly due to cache effects. 388 start := -1 // valid span start if >= 0 389 for end, rune := range s { 390 if f(rune) { 391 if start >= 0 { 392 spans = append(spans, span{start, end}) 393 // Set start to a negative value. 394 // Note: using -1 here consistently and reproducibly 395 // slows down this code by a several percent on amd64. 396 start = ^start 397 } 398 } else { 399 if start < 0 { 400 start = end 401 } 402 } 403 } 404 405 // Last field might end at EOF. 406 if start >= 0 { 407 spans = append(spans, span{start, len(s)}) 408 } 409 410 // Create strings from recorded field indices. 411 a := make([]string, len(spans)) 412 for i, span := range spans { 413 a[i] = s[span.start:span.end] 414 } 415 416 return a 417} 418 419// Join concatenates the elements of its first argument to create a single string. The separator 420// string sep is placed between elements in the resulting string. 421func Join(elems []string, sep string) string { 422 switch len(elems) { 423 case 0: 424 return "" 425 case 1: 426 return elems[0] 427 } 428 n := len(sep) * (len(elems) - 1) 429 for i := 0; i < len(elems); i++ { 430 n += len(elems[i]) 431 } 432 433 var b Builder 434 b.Grow(n) 435 b.WriteString(elems[0]) 436 for _, s := range elems[1:] { 437 b.WriteString(sep) 438 b.WriteString(s) 439 } 440 return b.String() 441} 442 443// HasPrefix tests whether the string s begins with prefix. 444func HasPrefix(s, prefix string) bool { 445 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 446} 447 448// HasSuffix tests whether the string s ends with suffix. 449func HasSuffix(s, suffix string) bool { 450 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 451} 452 453// Map returns a copy of the string s with all its characters modified 454// according to the mapping function. If mapping returns a negative value, the character is 455// dropped from the string with no replacement. 456func Map(mapping func(rune) rune, s string) string { 457 // In the worst case, the string can grow when mapped, making 458 // things unpleasant. But it's so rare we barge in assuming it's 459 // fine. It could also shrink but that falls out naturally. 460 461 // The output buffer b is initialized on demand, the first 462 // time a character differs. 463 var b Builder 464 465 for i, c := range s { 466 r := mapping(c) 467 if r == c && c != utf8.RuneError { 468 continue 469 } 470 471 var width int 472 if c == utf8.RuneError { 473 c, width = utf8.DecodeRuneInString(s[i:]) 474 if width != 1 && r == c { 475 continue 476 } 477 } else { 478 width = utf8.RuneLen(c) 479 } 480 481 b.Grow(len(s) + utf8.UTFMax) 482 b.WriteString(s[:i]) 483 if r >= 0 { 484 b.WriteRune(r) 485 } 486 487 s = s[i+width:] 488 break 489 } 490 491 // Fast path for unchanged input 492 if b.Cap() == 0 { // didn't call b.Grow above 493 return s 494 } 495 496 for _, c := range s { 497 r := mapping(c) 498 499 if r >= 0 { 500 // common case 501 // Due to inlining, it is more performant to determine if WriteByte should be 502 // invoked rather than always call WriteRune 503 if r < utf8.RuneSelf { 504 b.WriteByte(byte(r)) 505 } else { 506 // r is not a ASCII rune. 507 b.WriteRune(r) 508 } 509 } 510 } 511 512 return b.String() 513} 514 515// Repeat returns a new string consisting of count copies of the string s. 516// 517// It panics if count is negative or if 518// the result of (len(s) * count) overflows. 519func Repeat(s string, count int) string { 520 if count == 0 { 521 return "" 522 } 523 524 // Since we cannot return an error on overflow, 525 // we should panic if the repeat will generate 526 // an overflow. 527 // See Issue golang.org/issue/16237 528 if count < 0 { 529 panic("strings: negative Repeat count") 530 } else if len(s)*count/count != len(s) { 531 panic("strings: Repeat count causes overflow") 532 } 533 534 n := len(s) * count 535 var b Builder 536 b.Grow(n) 537 b.WriteString(s) 538 for b.Len() < n { 539 if b.Len() <= n/2 { 540 b.WriteString(b.String()) 541 } else { 542 b.WriteString(b.String()[:n-b.Len()]) 543 break 544 } 545 } 546 return b.String() 547} 548 549// ToUpper returns s with all Unicode letters mapped to their upper case. 550func ToUpper(s string) string { 551 isASCII, hasLower := true, false 552 for i := 0; i < len(s); i++ { 553 c := s[i] 554 if c >= utf8.RuneSelf { 555 isASCII = false 556 break 557 } 558 hasLower = hasLower || ('a' <= c && c <= 'z') 559 } 560 561 if isASCII { // optimize for ASCII-only strings. 562 if !hasLower { 563 return s 564 } 565 var b Builder 566 b.Grow(len(s)) 567 for i := 0; i < len(s); i++ { 568 c := s[i] 569 if 'a' <= c && c <= 'z' { 570 c -= 'a' - 'A' 571 } 572 b.WriteByte(c) 573 } 574 return b.String() 575 } 576 return Map(unicode.ToUpper, s) 577} 578 579// ToLower returns s with all Unicode letters mapped to their lower case. 580func ToLower(s string) string { 581 isASCII, hasUpper := true, false 582 for i := 0; i < len(s); i++ { 583 c := s[i] 584 if c >= utf8.RuneSelf { 585 isASCII = false 586 break 587 } 588 hasUpper = hasUpper || ('A' <= c && c <= 'Z') 589 } 590 591 if isASCII { // optimize for ASCII-only strings. 592 if !hasUpper { 593 return s 594 } 595 var b Builder 596 b.Grow(len(s)) 597 for i := 0; i < len(s); i++ { 598 c := s[i] 599 if 'A' <= c && c <= 'Z' { 600 c += 'a' - 'A' 601 } 602 b.WriteByte(c) 603 } 604 return b.String() 605 } 606 return Map(unicode.ToLower, s) 607} 608 609// ToTitle returns a copy of the string s with all Unicode letters mapped to 610// their Unicode title case. 611func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 612 613// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 614// upper case using the case mapping specified by c. 615func ToUpperSpecial(c unicode.SpecialCase, s string) string { 616 return Map(c.ToUpper, s) 617} 618 619// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 620// lower case using the case mapping specified by c. 621func ToLowerSpecial(c unicode.SpecialCase, s string) string { 622 return Map(c.ToLower, s) 623} 624 625// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 626// Unicode title case, giving priority to the special casing rules. 627func ToTitleSpecial(c unicode.SpecialCase, s string) string { 628 return Map(c.ToTitle, s) 629} 630 631// ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences 632// replaced by the replacement string, which may be empty. 633func ToValidUTF8(s, replacement string) string { 634 var b Builder 635 636 for i, c := range s { 637 if c != utf8.RuneError { 638 continue 639 } 640 641 _, wid := utf8.DecodeRuneInString(s[i:]) 642 if wid == 1 { 643 b.Grow(len(s) + len(replacement)) 644 b.WriteString(s[:i]) 645 s = s[i:] 646 break 647 } 648 } 649 650 // Fast path for unchanged input 651 if b.Cap() == 0 { // didn't call b.Grow above 652 return s 653 } 654 655 invalid := false // previous byte was from an invalid UTF-8 sequence 656 for i := 0; i < len(s); { 657 c := s[i] 658 if c < utf8.RuneSelf { 659 i++ 660 invalid = false 661 b.WriteByte(c) 662 continue 663 } 664 _, wid := utf8.DecodeRuneInString(s[i:]) 665 if wid == 1 { 666 i++ 667 if !invalid { 668 invalid = true 669 b.WriteString(replacement) 670 } 671 continue 672 } 673 invalid = false 674 b.WriteString(s[i : i+wid]) 675 i += wid 676 } 677 678 return b.String() 679} 680 681// isSeparator reports whether the rune could mark a word boundary. 682// TODO: update when package unicode captures more of the properties. 683func isSeparator(r rune) bool { 684 // ASCII alphanumerics and underscore are not separators 685 if r <= 0x7F { 686 switch { 687 case '0' <= r && r <= '9': 688 return false 689 case 'a' <= r && r <= 'z': 690 return false 691 case 'A' <= r && r <= 'Z': 692 return false 693 case r == '_': 694 return false 695 } 696 return true 697 } 698 // Letters and digits are not separators 699 if unicode.IsLetter(r) || unicode.IsDigit(r) { 700 return false 701 } 702 // Otherwise, all we can do for now is treat spaces as separators. 703 return unicode.IsSpace(r) 704} 705 706// Title returns a copy of the string s with all Unicode letters that begin words 707// mapped to their Unicode title case. 708// 709// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 710func Title(s string) string { 711 // Use a closure here to remember state. 712 // Hackish but effective. Depends on Map scanning in order and calling 713 // the closure once per rune. 714 prev := ' ' 715 return Map( 716 func(r rune) rune { 717 if isSeparator(prev) { 718 prev = r 719 return unicode.ToTitle(r) 720 } 721 prev = r 722 return r 723 }, 724 s) 725} 726 727// TrimLeftFunc returns a slice of the string s with all leading 728// Unicode code points c satisfying f(c) removed. 729func TrimLeftFunc(s string, f func(rune) bool) string { 730 i := indexFunc(s, f, false) 731 if i == -1 { 732 return "" 733 } 734 return s[i:] 735} 736 737// TrimRightFunc returns a slice of the string s with all trailing 738// Unicode code points c satisfying f(c) removed. 739func TrimRightFunc(s string, f func(rune) bool) string { 740 i := lastIndexFunc(s, f, false) 741 if i >= 0 && s[i] >= utf8.RuneSelf { 742 _, wid := utf8.DecodeRuneInString(s[i:]) 743 i += wid 744 } else { 745 i++ 746 } 747 return s[0:i] 748} 749 750// TrimFunc returns a slice of the string s with all leading 751// and trailing Unicode code points c satisfying f(c) removed. 752func TrimFunc(s string, f func(rune) bool) string { 753 return TrimRightFunc(TrimLeftFunc(s, f), f) 754} 755 756// IndexFunc returns the index into s of the first Unicode 757// code point satisfying f(c), or -1 if none do. 758func IndexFunc(s string, f func(rune) bool) int { 759 return indexFunc(s, f, true) 760} 761 762// LastIndexFunc returns the index into s of the last 763// Unicode code point satisfying f(c), or -1 if none do. 764func LastIndexFunc(s string, f func(rune) bool) int { 765 return lastIndexFunc(s, f, true) 766} 767 768// indexFunc is the same as IndexFunc except that if 769// truth==false, the sense of the predicate function is 770// inverted. 771func indexFunc(s string, f func(rune) bool, truth bool) int { 772 for i, r := range s { 773 if f(r) == truth { 774 return i 775 } 776 } 777 return -1 778} 779 780// lastIndexFunc is the same as LastIndexFunc except that if 781// truth==false, the sense of the predicate function is 782// inverted. 783func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 784 for i := len(s); i > 0; { 785 r, size := utf8.DecodeLastRuneInString(s[0:i]) 786 i -= size 787 if f(r) == truth { 788 return i 789 } 790 } 791 return -1 792} 793 794// asciiSet is a 32-byte value, where each bit represents the presence of a 795// given ASCII character in the set. The 128-bits of the lower 16 bytes, 796// starting with the least-significant bit of the lowest word to the 797// most-significant bit of the highest word, map to the full range of all 798// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 799// ensuring that any non-ASCII character will be reported as not in the set. 800type asciiSet [8]uint32 801 802// makeASCIISet creates a set of ASCII characters and reports whether all 803// characters in chars are ASCII. 804func makeASCIISet(chars string) (as asciiSet, ok bool) { 805 for i := 0; i < len(chars); i++ { 806 c := chars[i] 807 if c >= utf8.RuneSelf { 808 return as, false 809 } 810 as[c>>5] |= 1 << uint(c&31) 811 } 812 return as, true 813} 814 815// contains reports whether c is inside the set. 816func (as *asciiSet) contains(c byte) bool { 817 return (as[c>>5] & (1 << uint(c&31))) != 0 818} 819 820func makeCutsetFunc(cutset string) func(rune) bool { 821 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 822 return func(r rune) bool { 823 return r == rune(cutset[0]) 824 } 825 } 826 if as, isASCII := makeASCIISet(cutset); isASCII { 827 return func(r rune) bool { 828 return r < utf8.RuneSelf && as.contains(byte(r)) 829 } 830 } 831 return func(r rune) bool { return IndexRune(cutset, r) >= 0 } 832} 833 834// Trim returns a slice of the string s with all leading and 835// trailing Unicode code points contained in cutset removed. 836func Trim(s, cutset string) string { 837 if s == "" || cutset == "" { 838 return s 839 } 840 return TrimFunc(s, makeCutsetFunc(cutset)) 841} 842 843// TrimLeft returns a slice of the string s with all leading 844// Unicode code points contained in cutset removed. 845// 846// To remove a prefix, use TrimPrefix instead. 847func TrimLeft(s, cutset string) string { 848 if s == "" || cutset == "" { 849 return s 850 } 851 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 852} 853 854// TrimRight returns a slice of the string s, with all trailing 855// Unicode code points contained in cutset removed. 856// 857// To remove a suffix, use TrimSuffix instead. 858func TrimRight(s, cutset string) string { 859 if s == "" || cutset == "" { 860 return s 861 } 862 return TrimRightFunc(s, makeCutsetFunc(cutset)) 863} 864 865// TrimSpace returns a slice of the string s, with all leading 866// and trailing white space removed, as defined by Unicode. 867func TrimSpace(s string) string { 868 // Fast path for ASCII: look for the first ASCII non-space byte 869 start := 0 870 for ; start < len(s); start++ { 871 c := s[start] 872 if c >= utf8.RuneSelf { 873 // If we run into a non-ASCII byte, fall back to the 874 // slower unicode-aware method on the remaining bytes 875 return TrimFunc(s[start:], unicode.IsSpace) 876 } 877 if asciiSpace[c] == 0 { 878 break 879 } 880 } 881 882 // Now look for the first ASCII non-space byte from the end 883 stop := len(s) 884 for ; stop > start; stop-- { 885 c := s[stop-1] 886 if c >= utf8.RuneSelf { 887 return TrimFunc(s[start:stop], unicode.IsSpace) 888 } 889 if asciiSpace[c] == 0 { 890 break 891 } 892 } 893 894 // At this point s[start:stop] starts and ends with an ASCII 895 // non-space bytes, so we're done. Non-ASCII cases have already 896 // been handled above. 897 return s[start:stop] 898} 899 900// TrimPrefix returns s without the provided leading prefix string. 901// If s doesn't start with prefix, s is returned unchanged. 902func TrimPrefix(s, prefix string) string { 903 if HasPrefix(s, prefix) { 904 return s[len(prefix):] 905 } 906 return s 907} 908 909// TrimSuffix returns s without the provided trailing suffix string. 910// If s doesn't end with suffix, s is returned unchanged. 911func TrimSuffix(s, suffix string) string { 912 if HasSuffix(s, suffix) { 913 return s[:len(s)-len(suffix)] 914 } 915 return s 916} 917 918// Replace returns a copy of the string s with the first n 919// non-overlapping instances of old replaced by new. 920// If old is empty, it matches at the beginning of the string 921// and after each UTF-8 sequence, yielding up to k+1 replacements 922// for a k-rune string. 923// If n < 0, there is no limit on the number of replacements. 924func Replace(s, old, new string, n int) string { 925 if old == new || n == 0 { 926 return s // avoid allocation 927 } 928 929 // Compute number of replacements. 930 if m := Count(s, old); m == 0 { 931 return s // avoid allocation 932 } else if n < 0 || m < n { 933 n = m 934 } 935 936 // Apply replacements to buffer. 937 var b Builder 938 b.Grow(len(s) + n*(len(new)-len(old))) 939 start := 0 940 for i := 0; i < n; i++ { 941 j := start 942 if len(old) == 0 { 943 if i > 0 { 944 _, wid := utf8.DecodeRuneInString(s[start:]) 945 j += wid 946 } 947 } else { 948 j += Index(s[start:], old) 949 } 950 b.WriteString(s[start:j]) 951 b.WriteString(new) 952 start = j + len(old) 953 } 954 b.WriteString(s[start:]) 955 return b.String() 956} 957 958// ReplaceAll returns a copy of the string s with all 959// non-overlapping instances of old replaced by new. 960// If old is empty, it matches at the beginning of the string 961// and after each UTF-8 sequence, yielding up to k+1 replacements 962// for a k-rune string. 963func ReplaceAll(s, old, new string) string { 964 return Replace(s, old, new, -1) 965} 966 967// EqualFold reports whether s and t, interpreted as UTF-8 strings, 968// are equal under Unicode case-folding, which is a more general 969// form of case-insensitivity. 970func EqualFold(s, t string) bool { 971 for s != "" && t != "" { 972 // Extract first rune from each string. 973 var sr, tr rune 974 if s[0] < utf8.RuneSelf { 975 sr, s = rune(s[0]), s[1:] 976 } else { 977 r, size := utf8.DecodeRuneInString(s) 978 sr, s = r, s[size:] 979 } 980 if t[0] < utf8.RuneSelf { 981 tr, t = rune(t[0]), t[1:] 982 } else { 983 r, size := utf8.DecodeRuneInString(t) 984 tr, t = r, t[size:] 985 } 986 987 // If they match, keep going; if not, return false. 988 989 // Easy case. 990 if tr == sr { 991 continue 992 } 993 994 // Make sr < tr to simplify what follows. 995 if tr < sr { 996 tr, sr = sr, tr 997 } 998 // Fast check for ASCII. 999 if tr < utf8.RuneSelf { 1000 // ASCII only, sr/tr must be upper/lower case 1001 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 1002 continue 1003 } 1004 return false 1005 } 1006 1007 // General case. SimpleFold(x) returns the next equivalent rune > x 1008 // or wraps around to smaller values. 1009 r := unicode.SimpleFold(sr) 1010 for r != sr && r < tr { 1011 r = unicode.SimpleFold(r) 1012 } 1013 if r == tr { 1014 continue 1015 } 1016 return false 1017 } 1018 1019 // One string is empty. Are both? 1020 return s == t 1021} 1022 1023// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. 1024func Index(s, substr string) int { 1025 n := len(substr) 1026 switch { 1027 case n == 0: 1028 return 0 1029 case n == 1: 1030 return IndexByte(s, substr[0]) 1031 case n == len(s): 1032 if substr == s { 1033 return 0 1034 } 1035 return -1 1036 case n > len(s): 1037 return -1 1038 case n <= bytealg.MaxLen: 1039 // Use brute force when s and substr both are small 1040 if len(s) <= bytealg.MaxBruteForce { 1041 return bytealg.IndexString(s, substr) 1042 } 1043 c0 := substr[0] 1044 c1 := substr[1] 1045 i := 0 1046 t := len(s) - n + 1 1047 fails := 0 1048 for i < t { 1049 if s[i] != c0 { 1050 // IndexByte is faster than bytealg.IndexString, so use it as long as 1051 // we're not getting lots of false positives. 1052 o := IndexByte(s[i+1:t], c0) 1053 if o < 0 { 1054 return -1 1055 } 1056 i += o + 1 1057 } 1058 if s[i+1] == c1 && s[i:i+n] == substr { 1059 return i 1060 } 1061 fails++ 1062 i++ 1063 // Switch to bytealg.IndexString when IndexByte produces too many false positives. 1064 if fails > bytealg.Cutover(i) { 1065 r := bytealg.IndexString(s[i:], substr) 1066 if r >= 0 { 1067 return r + i 1068 } 1069 return -1 1070 } 1071 } 1072 return -1 1073 } 1074 c0 := substr[0] 1075 c1 := substr[1] 1076 i := 0 1077 t := len(s) - n + 1 1078 fails := 0 1079 for i < t { 1080 if s[i] != c0 { 1081 o := IndexByte(s[i+1:t], c0) 1082 if o < 0 { 1083 return -1 1084 } 1085 i += o + 1 1086 } 1087 if s[i+1] == c1 && s[i:i+n] == substr { 1088 return i 1089 } 1090 i++ 1091 fails++ 1092 if fails >= 4+i>>4 && i < t { 1093 // See comment in ../bytes/bytes.go. 1094 j := bytealg.IndexRabinKarp(s[i:], substr) 1095 if j < 0 { 1096 return -1 1097 } 1098 return i + j 1099 } 1100 } 1101 return -1 1102} 1103