1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package unicode 6 7// Bit masks for each code point under U+0100, for fast lookup. 8const ( 9 pC = 1 << iota // a control character. 10 pP // a punctuation character. 11 pN // a numeral. 12 pS // a symbolic character. 13 pZ // a spacing character. 14 pLu // an upper-case letter. 15 pLl // a lower-case letter. 16 pp // a printable character according to Go's definition. 17 pg = pp | pZ // a graphical character according to the Unicode definition. 18 pLo = pLl | pLu // a letter that is neither upper nor lower case. 19 pLmask = pLo 20) 21 22// GraphicRanges defines the set of graphic characters according to Unicode. 23var GraphicRanges = []*RangeTable{ 24 L, M, N, P, S, Zs, 25} 26 27// PrintRanges defines the set of printable characters according to Go. 28// ASCII space, U+0020, is handled separately. 29var PrintRanges = []*RangeTable{ 30 L, M, N, P, S, 31} 32 33// IsGraphic reports whether the rune is defined as a Graphic by Unicode. 34// Such characters include letters, marks, numbers, punctuation, symbols, and 35// spaces, from categories L, M, N, P, S, Zs. 36func IsGraphic(r rune) bool { 37 // We convert to uint32 to avoid the extra test for negative, 38 // and in the index we convert to uint8 to avoid the range check. 39 if uint32(r) <= MaxLatin1 { 40 return properties[uint8(r)]&pg != 0 41 } 42 return IsOneOf(GraphicRanges, r) 43} 44 45// IsPrint reports whether the rune is defined as printable by Go. Such 46// characters include letters, marks, numbers, punctuation, symbols, and the 47// ASCII space character, from categories L, M, N, P, S and the ASCII space 48// character. This categorization is the same as IsGraphic except that the 49// only spacing character is ASCII space, U+0020. 50func IsPrint(r rune) bool { 51 if uint32(r) <= MaxLatin1 { 52 return properties[uint8(r)]&pp != 0 53 } 54 return IsOneOf(PrintRanges, r) 55} 56 57// IsOneOf reports whether the rune is a member of one of the ranges. 58func IsOneOf(set []*RangeTable, r rune) bool { 59 for _, inside := range set { 60 if Is(inside, r) { 61 return true 62 } 63 } 64 return false 65} 66 67// IsControl reports whether the rune is a control character. 68// The C (Other) Unicode category includes more code points 69// such as surrogates; use Is(C, r) to test for them. 70func IsControl(r rune) bool { 71 if uint32(r) <= MaxLatin1 { 72 return properties[uint8(r)]&pC != 0 73 } 74 // All control characters are < Latin1Max. 75 return false 76} 77 78// IsLetter reports whether the rune is a letter (category L). 79func IsLetter(r rune) bool { 80 if uint32(r) <= MaxLatin1 { 81 return properties[uint8(r)]&(pLmask) != 0 82 } 83 return isExcludingLatin(Letter, r) 84} 85 86// IsMark reports whether the rune is a mark character (category M). 87func IsMark(r rune) bool { 88 // There are no mark characters in Latin-1. 89 return isExcludingLatin(Mark, r) 90} 91 92// IsNumber reports whether the rune is a number (category N). 93func IsNumber(r rune) bool { 94 if uint32(r) <= MaxLatin1 { 95 return properties[uint8(r)]&pN != 0 96 } 97 return isExcludingLatin(Number, r) 98} 99 100// IsPunct reports whether the rune is a Unicode punctuation character 101// (category P). 102func IsPunct(r rune) bool { 103 if uint32(r) <= MaxLatin1 { 104 return properties[uint8(r)]&pP != 0 105 } 106 return Is(Punct, r) 107} 108 109// IsSpace reports whether the rune is a space character as defined 110// by Unicode's White Space property; in the Latin-1 space 111// this is 112// '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP). 113// Other definitions of spacing characters are set by category 114// Z and property Pattern_White_Space. 115func IsSpace(r rune) bool { 116 // This property isn't the same as Z; special-case it. 117 if uint32(r) <= MaxLatin1 { 118 switch r { 119 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0: 120 return true 121 } 122 return false 123 } 124 return isExcludingLatin(White_Space, r) 125} 126 127// IsSymbol reports whether the rune is a symbolic character. 128func IsSymbol(r rune) bool { 129 if uint32(r) <= MaxLatin1 { 130 return properties[uint8(r)]&pS != 0 131 } 132 return isExcludingLatin(Symbol, r) 133} 134