1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/strings/char-predicates.h"
6 #include "src/strings/char-predicates-inl.h"
7 #include "src/strings/unicode.h"
8 #include "testing/gtest/include/gtest/gtest.h"
9
10 namespace v8 {
11 namespace internal {
12
TEST(CharPredicatesTest,WhiteSpace)13 TEST(CharPredicatesTest, WhiteSpace) {
14 EXPECT_TRUE(IsWhiteSpace(0x0009));
15 EXPECT_TRUE(IsWhiteSpace(0x000B));
16 EXPECT_TRUE(IsWhiteSpace(0x000C));
17 EXPECT_TRUE(IsWhiteSpace(' '));
18 EXPECT_TRUE(IsWhiteSpace(0x00A0));
19 EXPECT_TRUE(IsWhiteSpace(0x1680));
20 EXPECT_TRUE(IsWhiteSpace(0x2000));
21 EXPECT_TRUE(IsWhiteSpace(0x2007));
22 EXPECT_TRUE(IsWhiteSpace(0x202F));
23 EXPECT_TRUE(IsWhiteSpace(0x205F));
24 EXPECT_TRUE(IsWhiteSpace(0x3000));
25 EXPECT_TRUE(IsWhiteSpace(0xFEFF));
26 EXPECT_FALSE(IsWhiteSpace(0x180E));
27 }
28
TEST(CharPredicatesTest,WhiteSpaceOrLineTerminator)29 TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) {
30 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x0009));
31 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000B));
32 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000C));
33 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(' '));
34 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x00A0));
35 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x1680));
36 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2000));
37 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2007));
38 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x202F));
39 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x205F));
40 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0xFEFF));
41 // Line terminators
42 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000A));
43 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000D));
44 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2028));
45 EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2029));
46 EXPECT_FALSE(IsWhiteSpaceOrLineTerminator(0x180E));
47 }
48
TEST(CharPredicatesTest,IdentifierStart)49 TEST(CharPredicatesTest, IdentifierStart) {
50 EXPECT_TRUE(IsIdentifierStart('$'));
51 EXPECT_TRUE(IsIdentifierStart('_'));
52 EXPECT_TRUE(IsIdentifierStart('\\'));
53
54 // http://www.unicode.org/reports/tr31/
55 // curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
56 // grep 'Other_ID_Start'
57 // Other_ID_Start
58 EXPECT_TRUE(IsIdentifierStart(0x1885));
59 EXPECT_TRUE(IsIdentifierStart(0x1886));
60 EXPECT_TRUE(IsIdentifierStart(0x2118));
61 EXPECT_TRUE(IsIdentifierStart(0x212E));
62 EXPECT_TRUE(IsIdentifierStart(0x309B));
63 EXPECT_TRUE(IsIdentifierStart(0x309C));
64
65 // Issue 2892:
66 // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
67 EXPECT_FALSE(IsIdentifierStart(0x2E2F));
68
69 #ifdef V8_INTL_SUPPORT
70 // New in Unicode 8.0 (6,847 code points)
71 // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
72 EXPECT_TRUE(IsIdentifierStart(0x08B3));
73 EXPECT_TRUE(IsIdentifierStart(0x0AF9));
74 EXPECT_TRUE(IsIdentifierStart(0x13F8));
75 EXPECT_TRUE(IsIdentifierStart(0x9FCD));
76 EXPECT_TRUE(IsIdentifierStart(0xAB60));
77 EXPECT_TRUE(IsIdentifierStart(0x10CC0));
78 EXPECT_TRUE(IsIdentifierStart(0x108E0));
79 EXPECT_TRUE(IsIdentifierStart(0x2B820));
80
81 // New in Unicode 9.0 (7,177 code points)
82 // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
83
84 EXPECT_TRUE(IsIdentifierStart(0x1C80));
85 EXPECT_TRUE(IsIdentifierStart(0x104DB));
86 EXPECT_TRUE(IsIdentifierStart(0x1E922));
87 #endif
88 }
89
TEST(CharPredicatesTest,IdentifierPart)90 TEST(CharPredicatesTest, IdentifierPart) {
91 EXPECT_TRUE(IsIdentifierPart('$'));
92 EXPECT_TRUE(IsIdentifierPart('_'));
93 EXPECT_TRUE(IsIdentifierPart('\\'));
94 EXPECT_TRUE(IsIdentifierPart(0x200C));
95 EXPECT_TRUE(IsIdentifierPart(0x200D));
96
97 #ifdef V8_INTL_SUPPORT
98 // New in Unicode 8.0 (6,847 code points)
99 // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
100 EXPECT_TRUE(IsIdentifierPart(0x08B3));
101 EXPECT_TRUE(IsIdentifierPart(0x0AF9));
102 EXPECT_TRUE(IsIdentifierPart(0x13F8));
103 EXPECT_TRUE(IsIdentifierPart(0x9FCD));
104 EXPECT_TRUE(IsIdentifierPart(0xAB60));
105 EXPECT_TRUE(IsIdentifierPart(0x10CC0));
106 EXPECT_TRUE(IsIdentifierPart(0x108E0));
107 EXPECT_TRUE(IsIdentifierPart(0x2B820));
108
109 // [[:ID_Continue:]-[:ID_Start:]] & [[:Age=8.0:]-[:Age=7.0:]]
110 // 162 code points
111 EXPECT_TRUE(IsIdentifierPart(0x08E3));
112 EXPECT_TRUE(IsIdentifierPart(0xA69E));
113 EXPECT_TRUE(IsIdentifierPart(0x11730));
114
115 // New in Unicode 9.0 (7,177 code points)
116 // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
117 EXPECT_TRUE(IsIdentifierPart(0x1C80));
118 EXPECT_TRUE(IsIdentifierPart(0x104DB));
119 EXPECT_TRUE(IsIdentifierPart(0x1E922));
120
121 // [[:ID_Continue:]-[:ID_Start:]] & [[:Age=9.0:]-[:Age=8.0:]]
122 // 162 code points
123 EXPECT_TRUE(IsIdentifierPart(0x08D4));
124 EXPECT_TRUE(IsIdentifierPart(0x1DFB));
125 EXPECT_TRUE(IsIdentifierPart(0xA8C5));
126 EXPECT_TRUE(IsIdentifierPart(0x11450));
127 #endif
128
129 // http://www.unicode.org/reports/tr31/
130 // curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
131 // grep 'Other_ID_(Continue|Start)'
132
133 // Other_ID_Start
134 EXPECT_TRUE(IsIdentifierPart(0x1885));
135 EXPECT_TRUE(IsIdentifierPart(0x1886));
136 EXPECT_TRUE(IsIdentifierPart(0x2118));
137 EXPECT_TRUE(IsIdentifierPart(0x212E));
138 EXPECT_TRUE(IsIdentifierPart(0x309B));
139 EXPECT_TRUE(IsIdentifierPart(0x309C));
140
141 // Other_ID_Continue
142 EXPECT_TRUE(IsIdentifierPart(0x00B7));
143 EXPECT_TRUE(IsIdentifierPart(0x0387));
144 EXPECT_TRUE(IsIdentifierPart(0x1369));
145 EXPECT_TRUE(IsIdentifierPart(0x1370));
146 EXPECT_TRUE(IsIdentifierPart(0x1371));
147 EXPECT_TRUE(IsIdentifierPart(0x19DA));
148
149 // Issue 2892:
150 // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
151 EXPECT_FALSE(IsIdentifierPart(0x2E2F));
152 }
153
154 #ifdef V8_INTL_SUPPORT
TEST(CharPredicatesTest,SupplementaryPlaneIdentifiers)155 TEST(CharPredicatesTest, SupplementaryPlaneIdentifiers) {
156 // Both ID_Start and ID_Continue.
157 EXPECT_TRUE(IsIdentifierStart(0x10403)); // Category Lu
158 EXPECT_TRUE(IsIdentifierPart(0x10403));
159 EXPECT_TRUE(IsIdentifierStart(0x1043C)); // Category Ll
160 EXPECT_TRUE(IsIdentifierPart(0x1043C));
161 EXPECT_TRUE(IsIdentifierStart(0x16F9C)); // Category Lm
162 EXPECT_TRUE(IsIdentifierPart(0x16F9C));
163 EXPECT_TRUE(IsIdentifierStart(0x10048)); // Category Lo
164 EXPECT_TRUE(IsIdentifierPart(0x10048));
165 EXPECT_TRUE(IsIdentifierStart(0x1014D)); // Category Nl
166 EXPECT_TRUE(IsIdentifierPart(0x1014D));
167
168 // New in Unicode 8.0
169 // [ [:ID_Start=Yes:] & [:Age=8.0:]] - [:Age=7.0:]
170 EXPECT_TRUE(IsIdentifierStart(0x108E0));
171 EXPECT_TRUE(IsIdentifierStart(0x10C80));
172
173 // Only ID_Continue.
174 EXPECT_FALSE(IsIdentifierStart(0x101FD)); // Category Mn
175 EXPECT_TRUE(IsIdentifierPart(0x101FD));
176 EXPECT_FALSE(IsIdentifierStart(0x11002)); // Category Mc
177 EXPECT_TRUE(IsIdentifierPart(0x11002));
178 EXPECT_FALSE(IsIdentifierStart(0x104A9)); // Category Nd
179 EXPECT_TRUE(IsIdentifierPart(0x104A9));
180
181 // Neither.
182 EXPECT_FALSE(IsIdentifierStart(0x10111)); // Category No
183 EXPECT_FALSE(IsIdentifierPart(0x10111));
184 EXPECT_FALSE(IsIdentifierStart(0x1F4A9)); // Category So
185 EXPECT_FALSE(IsIdentifierPart(0x1F4A9));
186 }
187 #endif // V8_INTL_SUPPORT
188
189 } // namespace internal
190 } // namespace v8
191