1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef vm_StaticStrings_h
8 #define vm_StaticStrings_h
9
10 #include "mozilla/Assertions.h" // MOZ_ASSERT
11 #include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE
12 #include "mozilla/TextUtils.h" // mozilla::{IsAsciiDigit, IsAsciiLowercaseAlpha, IsAsciiUppercaseAlpha}
13
14 #include <stddef.h> // size_t
15 #include <stdint.h> // int32_t, uint32_t
16 #include <type_traits> // std::is_same_v
17
18 #include "jstypes.h" // JS_PUBLIC_API, js::Bit, js::BitMask
19
20 #include "js/TypeDecls.h" // JS::Latin1Char
21
22 struct JS_PUBLIC_API JSContext;
23
24 class JSAtom;
25 class JSLinearString;
26 class JSString;
27
28 namespace js {
29
30 namespace frontend {
31 class ParserAtomsTable;
32 class TaggedParserAtomIndex;
33 class WellKnownParserAtoms;
34 struct CompilationAtomCache;
35 } // namespace frontend
36
37 class StaticStrings {
38 // NOTE: The WellKnownParserAtoms rely on these tables and may need to be
39 // update if these tables are changed.
40 friend class js::frontend::ParserAtomsTable;
41 friend class js::frontend::TaggedParserAtomIndex;
42 friend class js::frontend::WellKnownParserAtoms;
43 friend struct js::frontend::CompilationAtomCache;
44
45 private:
46 // Strings matches `[A-Za-z0-9$_]{2}` pattern.
47 // Store each character in 6 bits.
48 // See fromSmallChar/toSmallChar for the mapping.
49 static constexpr size_t SMALL_CHAR_BITS = 6;
50 static constexpr size_t SMALL_CHAR_MASK = js::BitMask(SMALL_CHAR_BITS);
51
52 // To optimize ASCII -> small char, allocate a table.
53 static constexpr size_t SMALL_CHAR_TABLE_SIZE = 128U;
54 static constexpr size_t NUM_SMALL_CHARS = js::Bit(SMALL_CHAR_BITS);
55 static constexpr size_t NUM_LENGTH2_ENTRIES =
56 NUM_SMALL_CHARS * NUM_SMALL_CHARS;
57
58 JSAtom* length2StaticTable[NUM_LENGTH2_ENTRIES] = {}; // zeroes
59
60 public:
61 /* We keep these public for the JITs. */
62 static const size_t UNIT_STATIC_LIMIT = 256U;
63 JSAtom* unitStaticTable[UNIT_STATIC_LIMIT] = {}; // zeroes
64
65 static const size_t INT_STATIC_LIMIT = 256U;
66 JSAtom* intStaticTable[INT_STATIC_LIMIT] = {}; // zeroes
67
68 StaticStrings() = default;
69
70 bool init(JSContext* cx);
71
hasUint(uint32_t u)72 static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; }
73
getUint(uint32_t u)74 JSAtom* getUint(uint32_t u) {
75 MOZ_ASSERT(hasUint(u));
76 return intStaticTable[u];
77 }
78
hasInt(int32_t i)79 static bool hasInt(int32_t i) { return uint32_t(i) < INT_STATIC_LIMIT; }
80
getInt(int32_t i)81 JSAtom* getInt(int32_t i) {
82 MOZ_ASSERT(hasInt(i));
83 return getUint(uint32_t(i));
84 }
85
hasUnit(char16_t c)86 static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; }
87
getUnit(char16_t c)88 JSAtom* getUnit(char16_t c) {
89 MOZ_ASSERT(hasUnit(c));
90 return unitStaticTable[c];
91 }
92
93 /* May not return atom, returns null on (reported) failure. */
94 inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str,
95 size_t index);
96
97 template <typename CharT>
98 static bool isStatic(const CharT* chars, size_t len);
99
100 /* Return null if no static atom exists for the given (chars, length). */
101 template <typename CharT>
lookup(const CharT * chars,size_t length)102 MOZ_ALWAYS_INLINE JSAtom* lookup(const CharT* chars, size_t length) {
103 static_assert(std::is_same_v<CharT, JS::Latin1Char> ||
104 std::is_same_v<CharT, char16_t>,
105 "for understandability, |chars| must be one of a few "
106 "identified types");
107
108 switch (length) {
109 case 1: {
110 char16_t c = chars[0];
111 if (c < UNIT_STATIC_LIMIT) {
112 return getUnit(c);
113 }
114 return nullptr;
115 }
116 case 2:
117 if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1])) {
118 return getLength2(chars[0], chars[1]);
119 }
120 return nullptr;
121 case 3:
122 /*
123 * Here we know that JSString::intStringTable covers only 256 (or at
124 * least not 1000 or more) chars. We rely on order here to resolve the
125 * unit vs. int string/length-2 string atom identity issue by giving
126 * priority to unit strings for "0" through "9" and length-2 strings for
127 * "10" through "99".
128 */
129 int i;
130 if (fitsInLength3Static(chars[0], chars[1], chars[2], &i)) {
131 return getInt(i);
132 }
133 return nullptr;
134 }
135
136 return nullptr;
137 }
138
lookup(const char * chars,size_t length)139 MOZ_ALWAYS_INLINE JSAtom* lookup(const char* chars, size_t length) {
140 // Collapse calls for |const char*| into |const Latin1Char char*| to avoid
141 // excess instantiations.
142 return lookup(reinterpret_cast<const JS::Latin1Char*>(chars), length);
143 }
144
145 private:
146 using SmallChar = uint8_t;
147
148 struct SmallCharTable {
149 SmallChar storage[SMALL_CHAR_TABLE_SIZE];
150
151 constexpr SmallChar& operator[](size_t idx) { return storage[idx]; }
152 constexpr const SmallChar& operator[](size_t idx) const {
153 return storage[idx];
154 }
155 };
156
157 static const SmallChar INVALID_SMALL_CHAR = -1;
158
fitsInSmallChar(char16_t c)159 static bool fitsInSmallChar(char16_t c) {
160 return c < SMALL_CHAR_TABLE_SIZE &&
161 toSmallCharTable[c] != INVALID_SMALL_CHAR;
162 }
163
164 template <typename CharT>
fitsInLength3Static(CharT c1,CharT c2,CharT c3,int * i)165 static bool fitsInLength3Static(CharT c1, CharT c2, CharT c3, int* i) {
166 static_assert(INT_STATIC_LIMIT <= 299,
167 "static int strings assumed below to be at most "
168 "three digits where the first digit is either 1 or 2");
169 if ('1' <= c1 && c1 < '3' && '0' <= c2 && c2 <= '9' && '0' <= c3 &&
170 c3 <= '9') {
171 *i = (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0');
172
173 if (unsigned(*i) < INT_STATIC_LIMIT) {
174 return true;
175 }
176 }
177 return false;
178 }
179
180 static constexpr JS::Latin1Char fromSmallChar(SmallChar c);
181
182 static constexpr SmallChar toSmallChar(uint32_t c);
183
184 static constexpr SmallCharTable createSmallCharTable();
185
186 static const SmallCharTable toSmallCharTable;
187
firstCharOfLength2(size_t s)188 static constexpr JS::Latin1Char firstCharOfLength2(size_t s) {
189 return fromSmallChar(s >> SMALL_CHAR_BITS);
190 }
secondCharOfLength2(size_t s)191 static constexpr JS::Latin1Char secondCharOfLength2(size_t s) {
192 return fromSmallChar(s & SMALL_CHAR_MASK);
193 }
194
firstCharOfLength3(uint32_t i)195 static constexpr JS::Latin1Char firstCharOfLength3(uint32_t i) {
196 return '0' + (i / 100);
197 }
secondCharOfLength3(uint32_t i)198 static constexpr JS::Latin1Char secondCharOfLength3(uint32_t i) {
199 return '0' + ((i / 10) % 10);
200 }
thirdCharOfLength3(uint32_t i)201 static constexpr JS::Latin1Char thirdCharOfLength3(uint32_t i) {
202 return '0' + (i % 10);
203 }
204
getLength2Index(char16_t c1,char16_t c2)205 static MOZ_ALWAYS_INLINE size_t getLength2Index(char16_t c1, char16_t c2) {
206 MOZ_ASSERT(fitsInSmallChar(c1));
207 MOZ_ASSERT(fitsInSmallChar(c2));
208 return (size_t(toSmallCharTable[c1]) << SMALL_CHAR_BITS) +
209 toSmallCharTable[c2];
210 }
211
212 // Same as getLength2Index, but withtout runtime assertion,
213 // this should be used only for known static string.
getLength2IndexStatic(char c1,char c2)214 static constexpr size_t getLength2IndexStatic(char c1, char c2) {
215 return (size_t(toSmallChar(c1)) << SMALL_CHAR_BITS) + toSmallChar(c2);
216 }
217
getLength2FromIndex(size_t index)218 MOZ_ALWAYS_INLINE JSAtom* getLength2FromIndex(size_t index) {
219 return length2StaticTable[index];
220 }
221
getLength2(char16_t c1,char16_t c2)222 MOZ_ALWAYS_INLINE JSAtom* getLength2(char16_t c1, char16_t c2) {
223 return getLength2FromIndex(getLength2Index(c1, c2));
224 }
225 };
226
227 /*
228 * Declare length-2 strings. We only store strings where both characters are
229 * alphanumeric. The lower 10 short chars are the numerals, the next 26 are
230 * the lowercase letters, and the next 26 are the uppercase letters.
231 */
232
fromSmallChar(SmallChar c)233 constexpr JS::Latin1Char StaticStrings::fromSmallChar(SmallChar c) {
234 if (c < 10) {
235 return c + '0';
236 }
237 if (c < 36) {
238 return c + 'a' - 10;
239 }
240 if (c < 62) {
241 return c + 'A' - 36;
242 }
243 if (c == 62) {
244 return '$';
245 }
246 return '_';
247 }
248
toSmallChar(uint32_t c)249 constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) {
250 if (mozilla::IsAsciiDigit(c)) {
251 return c - '0';
252 }
253 if (mozilla::IsAsciiLowercaseAlpha(c)) {
254 return c - 'a' + 10;
255 }
256 if (mozilla::IsAsciiUppercaseAlpha(c)) {
257 return c - 'A' + 36;
258 }
259 if (c == '$') {
260 return 62;
261 }
262 if (c == '_') {
263 return 63;
264 }
265 return StaticStrings::INVALID_SMALL_CHAR;
266 }
267
268 } // namespace js
269
270 #endif /* vm_StaticStrings_h */
271