1package highlight 2 3import ( 4 "unicode" 5 "unicode/utf8" 6) 7 8var minMark = rune(unicode.Mark.R16[0].Lo) 9 10func isMark(r rune) bool { 11 // Fast path 12 if r < minMark { 13 return false 14 } 15 return unicode.In(r, unicode.Mark) 16} 17 18// DecodeCharacter returns the next character from an array of bytes 19// A character is a rune along with any accompanying combining runes 20func DecodeCharacter(b []byte) (rune, []rune, int) { 21 r, size := utf8.DecodeRune(b) 22 b = b[size:] 23 c, s := utf8.DecodeRune(b) 24 25 var combc []rune 26 for isMark(c) { 27 combc = append(combc, c) 28 size += s 29 30 b = b[s:] 31 c, s = utf8.DecodeRune(b) 32 } 33 34 return r, combc, size 35} 36 37// DecodeCharacterInString returns the next character from a string 38// A character is a rune along with any accompanying combining runes 39func DecodeCharacterInString(str string) (rune, []rune, int) { 40 r, size := utf8.DecodeRuneInString(str) 41 str = str[size:] 42 c, s := utf8.DecodeRuneInString(str) 43 44 var combc []rune 45 for isMark(c) { 46 combc = append(combc, c) 47 size += s 48 49 str = str[s:] 50 c, s = utf8.DecodeRuneInString(str) 51 } 52 53 return r, combc, size 54} 55 56// CharacterCount returns the number of characters in a byte array 57// Similar to utf8.RuneCount but for unicode characters 58func CharacterCount(b []byte) int { 59 s := 0 60 61 for len(b) > 0 { 62 r, size := utf8.DecodeRune(b) 63 if !isMark(r) { 64 s++ 65 } 66 67 b = b[size:] 68 } 69 70 return s 71} 72 73// CharacterCount returns the number of characters in a string 74// Similar to utf8.RuneCountInString but for unicode characters 75func CharacterCountInString(str string) int { 76 s := 0 77 78 for _, r := range str { 79 if !isMark(r) { 80 s++ 81 } 82 } 83 84 return s 85} 86