1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef vm_StaticStrings_h
8 #define vm_StaticStrings_h
9 
10 #include "mozilla/Assertions.h"  // MOZ_ASSERT
11 #include "mozilla/Attributes.h"  // MOZ_ALWAYS_INLINE
12 #include "mozilla/TextUtils.h"  // mozilla::{IsAsciiDigit, IsAsciiLowercaseAlpha, IsAsciiUppercaseAlpha}
13 
14 #include <stddef.h>     // size_t
15 #include <stdint.h>     // int32_t, uint32_t
16 #include <type_traits>  // std::is_same_v
17 
18 #include "jstypes.h"  // JS_PUBLIC_API, js::Bit, js::BitMask
19 
20 #include "js/TypeDecls.h"  // JS::Latin1Char
21 
22 struct JS_PUBLIC_API JSContext;
23 
24 class JSAtom;
25 class JSLinearString;
26 class JSString;
27 
28 namespace js {
29 
30 namespace frontend {
31 class ParserAtomsTable;
32 class TaggedParserAtomIndex;
33 class WellKnownParserAtoms;
34 struct CompilationAtomCache;
35 }  // namespace frontend
36 
37 class StaticStrings {
38   // NOTE: The WellKnownParserAtoms rely on these tables and may need to be
39   //       update if these tables are changed.
40   friend class js::frontend::ParserAtomsTable;
41   friend class js::frontend::TaggedParserAtomIndex;
42   friend class js::frontend::WellKnownParserAtoms;
43   friend struct js::frontend::CompilationAtomCache;
44 
45  private:
46   // Strings matches `[A-Za-z0-9$_]{2}` pattern.
47   // Store each character in 6 bits.
48   // See fromSmallChar/toSmallChar for the mapping.
49   static constexpr size_t SMALL_CHAR_BITS = 6;
50   static constexpr size_t SMALL_CHAR_MASK = js::BitMask(SMALL_CHAR_BITS);
51 
52   // To optimize ASCII -> small char, allocate a table.
53   static constexpr size_t SMALL_CHAR_TABLE_SIZE = 128U;
54   static constexpr size_t NUM_SMALL_CHARS = js::Bit(SMALL_CHAR_BITS);
55   static constexpr size_t NUM_LENGTH2_ENTRIES =
56       NUM_SMALL_CHARS * NUM_SMALL_CHARS;
57 
58   JSAtom* length2StaticTable[NUM_LENGTH2_ENTRIES] = {};  // zeroes
59 
60  public:
61   /* We keep these public for the JITs. */
62   static const size_t UNIT_STATIC_LIMIT = 256U;
63   JSAtom* unitStaticTable[UNIT_STATIC_LIMIT] = {};  // zeroes
64 
65   static const size_t INT_STATIC_LIMIT = 256U;
66   JSAtom* intStaticTable[INT_STATIC_LIMIT] = {};  // zeroes
67 
68   StaticStrings() = default;
69 
70   bool init(JSContext* cx);
71 
hasUint(uint32_t u)72   static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; }
73 
getUint(uint32_t u)74   JSAtom* getUint(uint32_t u) {
75     MOZ_ASSERT(hasUint(u));
76     return intStaticTable[u];
77   }
78 
hasInt(int32_t i)79   static bool hasInt(int32_t i) { return uint32_t(i) < INT_STATIC_LIMIT; }
80 
getInt(int32_t i)81   JSAtom* getInt(int32_t i) {
82     MOZ_ASSERT(hasInt(i));
83     return getUint(uint32_t(i));
84   }
85 
hasUnit(char16_t c)86   static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; }
87 
getUnit(char16_t c)88   JSAtom* getUnit(char16_t c) {
89     MOZ_ASSERT(hasUnit(c));
90     return unitStaticTable[c];
91   }
92 
93   /* May not return atom, returns null on (reported) failure. */
94   inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str,
95                                                  size_t index);
96 
97   template <typename CharT>
98   static bool isStatic(const CharT* chars, size_t len);
99 
100   /* Return null if no static atom exists for the given (chars, length). */
101   template <typename CharT>
lookup(const CharT * chars,size_t length)102   MOZ_ALWAYS_INLINE JSAtom* lookup(const CharT* chars, size_t length) {
103     static_assert(std::is_same_v<CharT, JS::Latin1Char> ||
104                       std::is_same_v<CharT, char16_t>,
105                   "for understandability, |chars| must be one of a few "
106                   "identified types");
107 
108     switch (length) {
109       case 1: {
110         char16_t c = chars[0];
111         if (c < UNIT_STATIC_LIMIT) {
112           return getUnit(c);
113         }
114         return nullptr;
115       }
116       case 2:
117         if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1])) {
118           return getLength2(chars[0], chars[1]);
119         }
120         return nullptr;
121       case 3:
122         /*
123          * Here we know that JSString::intStringTable covers only 256 (or at
124          * least not 1000 or more) chars. We rely on order here to resolve the
125          * unit vs. int string/length-2 string atom identity issue by giving
126          * priority to unit strings for "0" through "9" and length-2 strings for
127          * "10" through "99".
128          */
129         int i;
130         if (fitsInLength3Static(chars[0], chars[1], chars[2], &i)) {
131           return getInt(i);
132         }
133         return nullptr;
134     }
135 
136     return nullptr;
137   }
138 
lookup(const char * chars,size_t length)139   MOZ_ALWAYS_INLINE JSAtom* lookup(const char* chars, size_t length) {
140     // Collapse calls for |const char*| into |const Latin1Char char*| to avoid
141     // excess instantiations.
142     return lookup(reinterpret_cast<const JS::Latin1Char*>(chars), length);
143   }
144 
145  private:
146   using SmallChar = uint8_t;
147 
148   struct SmallCharTable {
149     SmallChar storage[SMALL_CHAR_TABLE_SIZE];
150 
151     constexpr SmallChar& operator[](size_t idx) { return storage[idx]; }
152     constexpr const SmallChar& operator[](size_t idx) const {
153       return storage[idx];
154     }
155   };
156 
157   static const SmallChar INVALID_SMALL_CHAR = -1;
158 
fitsInSmallChar(char16_t c)159   static bool fitsInSmallChar(char16_t c) {
160     return c < SMALL_CHAR_TABLE_SIZE &&
161            toSmallCharTable[c] != INVALID_SMALL_CHAR;
162   }
163 
164   template <typename CharT>
fitsInLength3Static(CharT c1,CharT c2,CharT c3,int * i)165   static bool fitsInLength3Static(CharT c1, CharT c2, CharT c3, int* i) {
166     static_assert(INT_STATIC_LIMIT <= 299,
167                   "static int strings assumed below to be at most "
168                   "three digits where the first digit is either 1 or 2");
169     if ('1' <= c1 && c1 < '3' && '0' <= c2 && c2 <= '9' && '0' <= c3 &&
170         c3 <= '9') {
171       *i = (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0');
172 
173       if (unsigned(*i) < INT_STATIC_LIMIT) {
174         return true;
175       }
176     }
177     return false;
178   }
179 
180   static constexpr JS::Latin1Char fromSmallChar(SmallChar c);
181 
182   static constexpr SmallChar toSmallChar(uint32_t c);
183 
184   static constexpr SmallCharTable createSmallCharTable();
185 
186   static const SmallCharTable toSmallCharTable;
187 
firstCharOfLength2(size_t s)188   static constexpr JS::Latin1Char firstCharOfLength2(size_t s) {
189     return fromSmallChar(s >> SMALL_CHAR_BITS);
190   }
secondCharOfLength2(size_t s)191   static constexpr JS::Latin1Char secondCharOfLength2(size_t s) {
192     return fromSmallChar(s & SMALL_CHAR_MASK);
193   }
194 
firstCharOfLength3(uint32_t i)195   static constexpr JS::Latin1Char firstCharOfLength3(uint32_t i) {
196     return '0' + (i / 100);
197   }
secondCharOfLength3(uint32_t i)198   static constexpr JS::Latin1Char secondCharOfLength3(uint32_t i) {
199     return '0' + ((i / 10) % 10);
200   }
thirdCharOfLength3(uint32_t i)201   static constexpr JS::Latin1Char thirdCharOfLength3(uint32_t i) {
202     return '0' + (i % 10);
203   }
204 
getLength2Index(char16_t c1,char16_t c2)205   static MOZ_ALWAYS_INLINE size_t getLength2Index(char16_t c1, char16_t c2) {
206     MOZ_ASSERT(fitsInSmallChar(c1));
207     MOZ_ASSERT(fitsInSmallChar(c2));
208     return (size_t(toSmallCharTable[c1]) << SMALL_CHAR_BITS) +
209            toSmallCharTable[c2];
210   }
211 
212   // Same as getLength2Index, but withtout runtime assertion,
213   // this should be used only for known static string.
getLength2IndexStatic(char c1,char c2)214   static constexpr size_t getLength2IndexStatic(char c1, char c2) {
215     return (size_t(toSmallChar(c1)) << SMALL_CHAR_BITS) + toSmallChar(c2);
216   }
217 
getLength2FromIndex(size_t index)218   MOZ_ALWAYS_INLINE JSAtom* getLength2FromIndex(size_t index) {
219     return length2StaticTable[index];
220   }
221 
getLength2(char16_t c1,char16_t c2)222   MOZ_ALWAYS_INLINE JSAtom* getLength2(char16_t c1, char16_t c2) {
223     return getLength2FromIndex(getLength2Index(c1, c2));
224   }
225 };
226 
227 /*
228  * Declare length-2 strings. We only store strings where both characters are
229  * alphanumeric. The lower 10 short chars are the numerals, the next 26 are
230  * the lowercase letters, and the next 26 are the uppercase letters.
231  */
232 
fromSmallChar(SmallChar c)233 constexpr JS::Latin1Char StaticStrings::fromSmallChar(SmallChar c) {
234   if (c < 10) {
235     return c + '0';
236   }
237   if (c < 36) {
238     return c + 'a' - 10;
239   }
240   if (c < 62) {
241     return c + 'A' - 36;
242   }
243   if (c == 62) {
244     return '$';
245   }
246   return '_';
247 }
248 
toSmallChar(uint32_t c)249 constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) {
250   if (mozilla::IsAsciiDigit(c)) {
251     return c - '0';
252   }
253   if (mozilla::IsAsciiLowercaseAlpha(c)) {
254     return c - 'a' + 10;
255   }
256   if (mozilla::IsAsciiUppercaseAlpha(c)) {
257     return c - 'A' + 36;
258   }
259   if (c == '$') {
260     return 62;
261   }
262   if (c == '_') {
263     return 63;
264   }
265   return StaticStrings::INVALID_SMALL_CHAR;
266 }
267 
268 }  // namespace js
269 
270 #endif /* vm_StaticStrings_h */
271