1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_
6 #define V8_STRINGS_CHAR_PREDICATES_INL_H_
7 
8 #include "src/base/bounds.h"
9 #include "src/strings/char-predicates.h"
10 #include "src/utils/utils.h"
11 
12 namespace v8 {
13 namespace internal {
14 
15 // If c is in 'A'-'Z' or 'a'-'z', return its lower-case.
16 // Else, return something outside of 'A'-'Z' and 'a'-'z'.
17 // Note: it ignores LOCALE.
AsciiAlphaToLower(uc32 c)18 inline constexpr int AsciiAlphaToLower(uc32 c) { return c | 0x20; }
19 
IsCarriageReturn(uc32 c)20 inline constexpr bool IsCarriageReturn(uc32 c) { return c == 0x000D; }
21 
IsLineFeed(uc32 c)22 inline constexpr bool IsLineFeed(uc32 c) { return c == 0x000A; }
23 
IsAsciiIdentifier(uc32 c)24 inline constexpr bool IsAsciiIdentifier(uc32 c) {
25   return IsAlphaNumeric(c) || c == '$' || c == '_';
26 }
27 
IsAlphaNumeric(uc32 c)28 inline constexpr bool IsAlphaNumeric(uc32 c) {
29   return base::IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c);
30 }
31 
IsDecimalDigit(uc32 c)32 inline constexpr bool IsDecimalDigit(uc32 c) {
33   // ECMA-262, 3rd, 7.8.3 (p 16)
34   return base::IsInRange(c, '0', '9');
35 }
36 
IsHexDigit(uc32 c)37 inline constexpr bool IsHexDigit(uc32 c) {
38   // ECMA-262, 3rd, 7.6 (p 15)
39   return IsDecimalDigit(c) || base::IsInRange(AsciiAlphaToLower(c), 'a', 'f');
40 }
41 
IsOctalDigit(uc32 c)42 inline constexpr bool IsOctalDigit(uc32 c) {
43   // ECMA-262, 6th, 7.8.3
44   return base::IsInRange(c, '0', '7');
45 }
46 
IsNonOctalDecimalDigit(uc32 c)47 inline constexpr bool IsNonOctalDecimalDigit(uc32 c) {
48   return base::IsInRange(c, '8', '9');
49 }
50 
IsBinaryDigit(uc32 c)51 inline constexpr bool IsBinaryDigit(uc32 c) {
52   // ECMA-262, 6th, 7.8.3
53   return c == '0' || c == '1';
54 }
55 
IsAsciiLower(uc32 c)56 inline constexpr bool IsAsciiLower(uc32 c) {
57   return base::IsInRange(c, 'a', 'z');
58 }
59 
IsAsciiUpper(uc32 c)60 inline constexpr bool IsAsciiUpper(uc32 c) {
61   return base::IsInRange(c, 'A', 'Z');
62 }
63 
ToAsciiUpper(uc32 c)64 inline constexpr uc32 ToAsciiUpper(uc32 c) {
65   return c & ~(IsAsciiLower(c) << 5);
66 }
67 
ToAsciiLower(uc32 c)68 inline constexpr uc32 ToAsciiLower(uc32 c) {
69   return c | (IsAsciiUpper(c) << 5);
70 }
71 
IsRegExpWord(uc32 c)72 inline constexpr bool IsRegExpWord(uc32 c) {
73   return IsAlphaNumeric(c) || c == '_';
74 }
75 
76 // Constexpr cache table for character flags.
77 enum OneByteCharFlags {
78   kIsIdentifierStart = 1 << 0,
79   kIsIdentifierPart = 1 << 1,
80   kIsWhiteSpace = 1 << 2,
81   kIsWhiteSpaceOrLineTerminator = 1 << 3,
82   kMaybeLineEnd = 1 << 4
83 };
84 
85 // See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
86 // ID_Start. Additionally includes '_' and '$'.
IsOneByteIDStart(uc32 c)87 constexpr bool IsOneByteIDStart(uc32 c) {
88   return c == 0x0024 || (c >= 0x0041 && c <= 0x005A) || c == 0x005F ||
89          (c >= 0x0061 && c <= 0x007A) || c == 0x00AA || c == 0x00B5 ||
90          c == 0x00BA || (c >= 0x00C0 && c <= 0x00D6) ||
91          (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF);
92 }
93 
94 // See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
95 // ID_Continue. Additionally includes '_' and '$'.
IsOneByteIDContinue(uc32 c)96 constexpr bool IsOneByteIDContinue(uc32 c) {
97   return c == 0x0024 || (c >= 0x0030 && c <= 0x0039) || c == 0x005F ||
98          (c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) ||
99          c == 0x00AA || c == 0x00B5 || c == 0x00B7 || c == 0x00BA ||
100          (c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) ||
101          (c >= 0x00F8 && c <= 0x00FF);
102 }
103 
IsOneByteWhitespace(uc32 c)104 constexpr bool IsOneByteWhitespace(uc32 c) {
105   return c == '\t' || c == '\v' || c == '\f' || c == ' ' || c == u'\xa0';
106 }
107 
BuildOneByteCharFlags(uc32 c)108 constexpr uint8_t BuildOneByteCharFlags(uc32 c) {
109   uint8_t result = 0;
110   if (IsOneByteIDStart(c) || c == '\\') result |= kIsIdentifierStart;
111   if (IsOneByteIDContinue(c) || c == '\\') result |= kIsIdentifierPart;
112   if (IsOneByteWhitespace(c)) {
113     result |= kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator;
114   }
115   if (c == '\r' || c == '\n') {
116     result |= kIsWhiteSpaceOrLineTerminator | kMaybeLineEnd;
117   }
118   // Add markers to identify 0x2028 and 0x2029.
119   if (c == static_cast<uint8_t>(0x2028) || c == static_cast<uint8_t>(0x2029)) {
120     result |= kMaybeLineEnd;
121   }
122   return result;
123 }
124 const constexpr uint8_t kOneByteCharFlags[256] = {
125 #define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N),
126     INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
127 #undef BUILD_CHAR_FLAGS
128 #define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N + 128),
129         INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
130 #undef BUILD_CHAR_FLAGS
131 };
132 
IsIdentifierStart(uc32 c)133 bool IsIdentifierStart(uc32 c) {
134   if (!base::IsInRange(c, 0, 255)) return IsIdentifierStartSlow(c);
135   DCHECK_EQ(IsIdentifierStartSlow(c),
136             static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierStart));
137   return kOneByteCharFlags[c] & kIsIdentifierStart;
138 }
139 
IsIdentifierPart(uc32 c)140 bool IsIdentifierPart(uc32 c) {
141   if (!base::IsInRange(c, 0, 255)) return IsIdentifierPartSlow(c);
142   DCHECK_EQ(IsIdentifierPartSlow(c),
143             static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierPart));
144   return kOneByteCharFlags[c] & kIsIdentifierPart;
145 }
146 
IsWhiteSpace(uc32 c)147 bool IsWhiteSpace(uc32 c) {
148   if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceSlow(c);
149   DCHECK_EQ(IsWhiteSpaceSlow(c),
150             static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpace));
151   return kOneByteCharFlags[c] & kIsWhiteSpace;
152 }
153 
IsWhiteSpaceOrLineTerminator(uc32 c)154 bool IsWhiteSpaceOrLineTerminator(uc32 c) {
155   if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceOrLineTerminatorSlow(c);
156   DCHECK_EQ(
157       IsWhiteSpaceOrLineTerminatorSlow(c),
158       static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator));
159   return kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator;
160 }
161 
IsLineTerminatorSequence(uc32 c,uc32 next)162 bool IsLineTerminatorSequence(uc32 c, uc32 next) {
163   if (kOneByteCharFlags[static_cast<uint8_t>(c)] & kMaybeLineEnd) {
164     if (c == '\n') return true;
165     if (c == '\r') return next != '\n';
166     return base::IsInRange(static_cast<unsigned int>(c), 0x2028u, 0x2029u);
167   }
168   return false;
169 }
170 
171 }  // namespace internal
172 
173 }  // namespace v8
174 
175 #endif  // V8_STRINGS_CHAR_PREDICATES_INL_H_
176