1package highlight
2
3import (
4	"unicode"
5	"unicode/utf8"
6)
7
8var minMark = rune(unicode.Mark.R16[0].Lo)
9
10func isMark(r rune) bool {
11	// Fast path
12	if r < minMark {
13		return false
14	}
15	return unicode.In(r, unicode.Mark)
16}
17
18// DecodeCharacter returns the next character from an array of bytes
19// A character is a rune along with any accompanying combining runes
20func DecodeCharacter(b []byte) (rune, []rune, int) {
21	r, size := utf8.DecodeRune(b)
22	b = b[size:]
23	c, s := utf8.DecodeRune(b)
24
25	var combc []rune
26	for isMark(c) {
27		combc = append(combc, c)
28		size += s
29
30		b = b[s:]
31		c, s = utf8.DecodeRune(b)
32	}
33
34	return r, combc, size
35}
36
37// DecodeCharacterInString returns the next character from a string
38// A character is a rune along with any accompanying combining runes
39func DecodeCharacterInString(str string) (rune, []rune, int) {
40	r, size := utf8.DecodeRuneInString(str)
41	str = str[size:]
42	c, s := utf8.DecodeRuneInString(str)
43
44	var combc []rune
45	for isMark(c) {
46		combc = append(combc, c)
47		size += s
48
49		str = str[s:]
50		c, s = utf8.DecodeRuneInString(str)
51	}
52
53	return r, combc, size
54}
55
56// CharacterCount returns the number of characters in a byte array
57// Similar to utf8.RuneCount but for unicode characters
58func CharacterCount(b []byte) int {
59	s := 0
60
61	for len(b) > 0 {
62		r, size := utf8.DecodeRune(b)
63		if !isMark(r) {
64			s++
65		}
66
67		b = b[size:]
68	}
69
70	return s
71}
72
73// CharacterCount returns the number of characters in a string
74// Similar to utf8.RuneCountInString but for unicode characters
75func CharacterCountInString(str string) int {
76	s := 0
77
78	for _, r := range str {
79		if !isMark(r) {
80			s++
81		}
82	}
83
84	return s
85}
86