1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package unicode
6
7// Bit masks for each code point under U+0100, for fast lookup.
8const (
9	pC     = 1 << iota // a control character.
10	pP                 // a punctuation character.
11	pN                 // a numeral.
12	pS                 // a symbolic character.
13	pZ                 // a spacing character.
14	pLu                // an upper-case letter.
15	pLl                // a lower-case letter.
16	pp                 // a printable character according to Go's definition.
17	pg     = pp | pZ   // a graphical character according to the Unicode definition.
18	pLo    = pLl | pLu // a letter that is neither upper nor lower case.
19	pLmask = pLo
20)
21
22// GraphicRanges defines the set of graphic characters according to Unicode.
23var GraphicRanges = []*RangeTable{
24	L, M, N, P, S, Zs,
25}
26
27// PrintRanges defines the set of printable characters according to Go.
28// ASCII space, U+0020, is handled separately.
29var PrintRanges = []*RangeTable{
30	L, M, N, P, S,
31}
32
33// IsGraphic reports whether the rune is defined as a Graphic by Unicode.
34// Such characters include letters, marks, numbers, punctuation, symbols, and
35// spaces, from categories L, M, N, P, S, Zs.
36func IsGraphic(r rune) bool {
37	// We convert to uint32 to avoid the extra test for negative,
38	// and in the index we convert to uint8 to avoid the range check.
39	if uint32(r) <= MaxLatin1 {
40		return properties[uint8(r)]&pg != 0
41	}
42	return In(r, GraphicRanges...)
43}
44
45// IsPrint reports whether the rune is defined as printable by Go. Such
46// characters include letters, marks, numbers, punctuation, symbols, and the
47// ASCII space character, from categories L, M, N, P, S and the ASCII space
48// character. This categorization is the same as IsGraphic except that the
49// only spacing character is ASCII space, U+0020.
50func IsPrint(r rune) bool {
51	if uint32(r) <= MaxLatin1 {
52		return properties[uint8(r)]&pp != 0
53	}
54	return In(r, PrintRanges...)
55}
56
57// IsOneOf reports whether the rune is a member of one of the ranges.
58// The function "In" provides a nicer signature and should be used in preference to IsOneOf.
59func IsOneOf(ranges []*RangeTable, r rune) bool {
60	for _, inside := range ranges {
61		if Is(inside, r) {
62			return true
63		}
64	}
65	return false
66}
67
68// In reports whether the rune is a member of one of the ranges.
69func In(r rune, ranges ...*RangeTable) bool {
70	for _, inside := range ranges {
71		if Is(inside, r) {
72			return true
73		}
74	}
75	return false
76}
77
78// IsControl reports whether the rune is a control character.
79// The C (Other) Unicode category includes more code points
80// such as surrogates; use Is(C, r) to test for them.
81func IsControl(r rune) bool {
82	if uint32(r) <= MaxLatin1 {
83		return properties[uint8(r)]&pC != 0
84	}
85	// All control characters are < MaxLatin1.
86	return false
87}
88
89// IsLetter reports whether the rune is a letter (category L).
90func IsLetter(r rune) bool {
91	if uint32(r) <= MaxLatin1 {
92		return properties[uint8(r)]&(pLmask) != 0
93	}
94	return isExcludingLatin(Letter, r)
95}
96
97// IsMark reports whether the rune is a mark character (category M).
98func IsMark(r rune) bool {
99	// There are no mark characters in Latin-1.
100	return isExcludingLatin(Mark, r)
101}
102
103// IsNumber reports whether the rune is a number (category N).
104func IsNumber(r rune) bool {
105	if uint32(r) <= MaxLatin1 {
106		return properties[uint8(r)]&pN != 0
107	}
108	return isExcludingLatin(Number, r)
109}
110
111// IsPunct reports whether the rune is a Unicode punctuation character
112// (category P).
113func IsPunct(r rune) bool {
114	if uint32(r) <= MaxLatin1 {
115		return properties[uint8(r)]&pP != 0
116	}
117	return Is(Punct, r)
118}
119
120// IsSpace reports whether the rune is a space character as defined
121// by Unicode's White Space property; in the Latin-1 space
122// this is
123//	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
124// Other definitions of spacing characters are set by category
125// Z and property Pattern_White_Space.
126func IsSpace(r rune) bool {
127	// This property isn't the same as Z; special-case it.
128	if uint32(r) <= MaxLatin1 {
129		switch r {
130		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
131			return true
132		}
133		return false
134	}
135	return isExcludingLatin(White_Space, r)
136}
137
138// IsSymbol reports whether the rune is a symbolic character.
139func IsSymbol(r rune) bool {
140	if uint32(r) <= MaxLatin1 {
141		return properties[uint8(r)]&pS != 0
142	}
143	return isExcludingLatin(Symbol, r)
144}
145