1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/strings/char-predicates.h"
6 #include "src/strings/char-predicates-inl.h"
7 #include "src/strings/unicode.h"
8 #include "testing/gtest/include/gtest/gtest.h"
9 
10 namespace v8 {
11 namespace internal {
12 
TEST(CharPredicatesTest,WhiteSpace)13 TEST(CharPredicatesTest, WhiteSpace) {
14   EXPECT_TRUE(IsWhiteSpace(0x0009));
15   EXPECT_TRUE(IsWhiteSpace(0x000B));
16   EXPECT_TRUE(IsWhiteSpace(0x000C));
17   EXPECT_TRUE(IsWhiteSpace(' '));
18   EXPECT_TRUE(IsWhiteSpace(0x00A0));
19   EXPECT_TRUE(IsWhiteSpace(0x1680));
20   EXPECT_TRUE(IsWhiteSpace(0x2000));
21   EXPECT_TRUE(IsWhiteSpace(0x2007));
22   EXPECT_TRUE(IsWhiteSpace(0x202F));
23   EXPECT_TRUE(IsWhiteSpace(0x205F));
24   EXPECT_TRUE(IsWhiteSpace(0x3000));
25   EXPECT_TRUE(IsWhiteSpace(0xFEFF));
26   EXPECT_FALSE(IsWhiteSpace(0x180E));
27 }
28 
TEST(CharPredicatesTest,WhiteSpaceOrLineTerminator)29 TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) {
30   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x0009));
31   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000B));
32   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000C));
33   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(' '));
34   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x00A0));
35   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x1680));
36   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2000));
37   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2007));
38   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x202F));
39   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x205F));
40   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0xFEFF));
41   // Line terminators
42   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000A));
43   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000D));
44   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2028));
45   EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2029));
46   EXPECT_FALSE(IsWhiteSpaceOrLineTerminator(0x180E));
47 }
48 
TEST(CharPredicatesTest,IdentifierStart)49 TEST(CharPredicatesTest, IdentifierStart) {
50   EXPECT_TRUE(IsIdentifierStart('$'));
51   EXPECT_TRUE(IsIdentifierStart('_'));
52   EXPECT_TRUE(IsIdentifierStart('\\'));
53 
54   // http://www.unicode.org/reports/tr31/
55   // curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
56   // grep 'Other_ID_Start'
57   // Other_ID_Start
58   EXPECT_TRUE(IsIdentifierStart(0x1885));
59   EXPECT_TRUE(IsIdentifierStart(0x1886));
60   EXPECT_TRUE(IsIdentifierStart(0x2118));
61   EXPECT_TRUE(IsIdentifierStart(0x212E));
62   EXPECT_TRUE(IsIdentifierStart(0x309B));
63   EXPECT_TRUE(IsIdentifierStart(0x309C));
64 
65   // Issue 2892:
66   // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
67   EXPECT_FALSE(IsIdentifierStart(0x2E2F));
68 
69 #ifdef V8_INTL_SUPPORT
70   // New in Unicode 8.0 (6,847 code points)
71   // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
72   EXPECT_TRUE(IsIdentifierStart(0x08B3));
73   EXPECT_TRUE(IsIdentifierStart(0x0AF9));
74   EXPECT_TRUE(IsIdentifierStart(0x13F8));
75   EXPECT_TRUE(IsIdentifierStart(0x9FCD));
76   EXPECT_TRUE(IsIdentifierStart(0xAB60));
77   EXPECT_TRUE(IsIdentifierStart(0x10CC0));
78   EXPECT_TRUE(IsIdentifierStart(0x108E0));
79   EXPECT_TRUE(IsIdentifierStart(0x2B820));
80 
81   // New in Unicode 9.0 (7,177 code points)
82   // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
83 
84   EXPECT_TRUE(IsIdentifierStart(0x1C80));
85   EXPECT_TRUE(IsIdentifierStart(0x104DB));
86   EXPECT_TRUE(IsIdentifierStart(0x1E922));
87 #endif
88 }
89 
TEST(CharPredicatesTest,IdentifierPart)90 TEST(CharPredicatesTest, IdentifierPart) {
91   EXPECT_TRUE(IsIdentifierPart('$'));
92   EXPECT_TRUE(IsIdentifierPart('_'));
93   EXPECT_TRUE(IsIdentifierPart('\\'));
94   EXPECT_TRUE(IsIdentifierPart(0x200C));
95   EXPECT_TRUE(IsIdentifierPart(0x200D));
96 
97 #ifdef V8_INTL_SUPPORT
98   // New in Unicode 8.0 (6,847 code points)
99   // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
100   EXPECT_TRUE(IsIdentifierPart(0x08B3));
101   EXPECT_TRUE(IsIdentifierPart(0x0AF9));
102   EXPECT_TRUE(IsIdentifierPart(0x13F8));
103   EXPECT_TRUE(IsIdentifierPart(0x9FCD));
104   EXPECT_TRUE(IsIdentifierPart(0xAB60));
105   EXPECT_TRUE(IsIdentifierPart(0x10CC0));
106   EXPECT_TRUE(IsIdentifierPart(0x108E0));
107   EXPECT_TRUE(IsIdentifierPart(0x2B820));
108 
109   // [[:ID_Continue:]-[:ID_Start:]] &  [[:Age=8.0:]-[:Age=7.0:]]
110   // 162 code points
111   EXPECT_TRUE(IsIdentifierPart(0x08E3));
112   EXPECT_TRUE(IsIdentifierPart(0xA69E));
113   EXPECT_TRUE(IsIdentifierPart(0x11730));
114 
115   // New in Unicode 9.0 (7,177 code points)
116   // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
117   EXPECT_TRUE(IsIdentifierPart(0x1C80));
118   EXPECT_TRUE(IsIdentifierPart(0x104DB));
119   EXPECT_TRUE(IsIdentifierPart(0x1E922));
120 
121   // [[:ID_Continue:]-[:ID_Start:]] &  [[:Age=9.0:]-[:Age=8.0:]]
122   // 162 code points
123   EXPECT_TRUE(IsIdentifierPart(0x08D4));
124   EXPECT_TRUE(IsIdentifierPart(0x1DFB));
125   EXPECT_TRUE(IsIdentifierPart(0xA8C5));
126   EXPECT_TRUE(IsIdentifierPart(0x11450));
127 #endif
128 
129   // http://www.unicode.org/reports/tr31/
130   // curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
131   // grep 'Other_ID_(Continue|Start)'
132 
133   // Other_ID_Start
134   EXPECT_TRUE(IsIdentifierPart(0x1885));
135   EXPECT_TRUE(IsIdentifierPart(0x1886));
136   EXPECT_TRUE(IsIdentifierPart(0x2118));
137   EXPECT_TRUE(IsIdentifierPart(0x212E));
138   EXPECT_TRUE(IsIdentifierPart(0x309B));
139   EXPECT_TRUE(IsIdentifierPart(0x309C));
140 
141   // Other_ID_Continue
142   EXPECT_TRUE(IsIdentifierPart(0x00B7));
143   EXPECT_TRUE(IsIdentifierPart(0x0387));
144   EXPECT_TRUE(IsIdentifierPart(0x1369));
145   EXPECT_TRUE(IsIdentifierPart(0x1370));
146   EXPECT_TRUE(IsIdentifierPart(0x1371));
147   EXPECT_TRUE(IsIdentifierPart(0x19DA));
148 
149   // Issue 2892:
150   // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
151   EXPECT_FALSE(IsIdentifierPart(0x2E2F));
152 }
153 
154 #ifdef V8_INTL_SUPPORT
TEST(CharPredicatesTest,SupplementaryPlaneIdentifiers)155 TEST(CharPredicatesTest, SupplementaryPlaneIdentifiers) {
156   // Both ID_Start and ID_Continue.
157   EXPECT_TRUE(IsIdentifierStart(0x10403));  // Category Lu
158   EXPECT_TRUE(IsIdentifierPart(0x10403));
159   EXPECT_TRUE(IsIdentifierStart(0x1043C));  // Category Ll
160   EXPECT_TRUE(IsIdentifierPart(0x1043C));
161   EXPECT_TRUE(IsIdentifierStart(0x16F9C));  // Category Lm
162   EXPECT_TRUE(IsIdentifierPart(0x16F9C));
163   EXPECT_TRUE(IsIdentifierStart(0x10048));  // Category Lo
164   EXPECT_TRUE(IsIdentifierPart(0x10048));
165   EXPECT_TRUE(IsIdentifierStart(0x1014D));  // Category Nl
166   EXPECT_TRUE(IsIdentifierPart(0x1014D));
167 
168   // New in Unicode 8.0
169   // [ [:ID_Start=Yes:] & [:Age=8.0:]] - [:Age=7.0:]
170   EXPECT_TRUE(IsIdentifierStart(0x108E0));
171   EXPECT_TRUE(IsIdentifierStart(0x10C80));
172 
173   // Only ID_Continue.
174   EXPECT_FALSE(IsIdentifierStart(0x101FD));  // Category Mn
175   EXPECT_TRUE(IsIdentifierPart(0x101FD));
176   EXPECT_FALSE(IsIdentifierStart(0x11002));  // Category Mc
177   EXPECT_TRUE(IsIdentifierPart(0x11002));
178   EXPECT_FALSE(IsIdentifierStart(0x104A9));  // Category Nd
179   EXPECT_TRUE(IsIdentifierPart(0x104A9));
180 
181   // Neither.
182   EXPECT_FALSE(IsIdentifierStart(0x10111));  // Category No
183   EXPECT_FALSE(IsIdentifierPart(0x10111));
184   EXPECT_FALSE(IsIdentifierStart(0x1F4A9));  // Category So
185   EXPECT_FALSE(IsIdentifierPart(0x1F4A9));
186 }
187 #endif  // V8_INTL_SUPPORT
188 
189 }  // namespace internal
190 }  // namespace v8
191