1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_ASMJS_ASM_SCANNER_H_ 6 #define V8_ASMJS_ASM_SCANNER_H_ 7 8 #include <memory> 9 #include <string> 10 #include <unordered_map> 11 12 #include "src/asmjs/asm-names.h" 13 #include "src/base/logging.h" 14 #include "src/globals.h" 15 16 namespace v8 { 17 namespace internal { 18 19 class Utf16CharacterStream; 20 21 // A custom scanner to extract the token stream needed to parse valid 22 // asm.js: http://asmjs.org/spec/latest/ 23 // This scanner intentionally avoids the portion of JavaScript lexing 24 // that are not required to determine if code is valid asm.js code. 25 // * Strings are disallowed except for 'use asm'. 26 // * Only the subset of keywords needed to check asm.js invariants are 27 // included. 28 // * Identifiers are accumulated into local + global string tables 29 // (for performance). 30 class V8_EXPORT_PRIVATE AsmJsScanner { 31 public: 32 typedef int32_t token_t; 33 34 explicit AsmJsScanner(Utf16CharacterStream* stream); 35 36 // Get current token. Token()37 token_t Token() const { return token_; } 38 // Get position of current token. Position()39 size_t Position() const { return position_; } 40 // Advance to the next token. 41 void Next(); 42 // Back up by one token. 43 void Rewind(); 44 45 // Get raw string for current identifier. Note that the returned string will 46 // become invalid when the scanner advances, create a copy to preserve it. GetIdentifierString()47 const std::string& GetIdentifierString() const { 48 // Identifier strings don't work after a rewind. 49 DCHECK(!rewind_); 50 return identifier_string_; 51 } 52 53 // Check if we just passed a newline. IsPrecededByNewline()54 bool IsPrecededByNewline() const { 55 // Newline tracking doesn't work if you back up. 56 DCHECK(!rewind_); 57 return preceded_by_newline_; 58 } 59 60 #if DEBUG 61 // Debug only method to go from a token back to its name. 62 // Slow, only use for debugging. 63 std::string Name(token_t token) const; 64 #endif 65 66 // Restores old position (token after that position). Note that it is not 67 // allowed to rewind right after a seek, because previous tokens are unknown. 68 void Seek(size_t pos); 69 70 // Select whether identifiers are resolved in global or local scope, 71 // and which scope new identifiers are added to. EnterLocalScope()72 void EnterLocalScope() { in_local_scope_ = true; } EnterGlobalScope()73 void EnterGlobalScope() { in_local_scope_ = false; } 74 // Drop all current local identifiers. 75 void ResetLocals(); 76 77 // Methods to check if a token is an identifier and which scope. IsLocal()78 bool IsLocal() const { return IsLocal(Token()); } IsGlobal()79 bool IsGlobal() const { return IsGlobal(Token()); } IsLocal(token_t token)80 static bool IsLocal(token_t token) { return token <= kLocalsStart; } IsGlobal(token_t token)81 static bool IsGlobal(token_t token) { return token >= kGlobalsStart; } 82 // Methods to find the index position of an identifier (count starting from 83 // 0 for each scope separately). LocalIndex(token_t token)84 static size_t LocalIndex(token_t token) { 85 DCHECK(IsLocal(token)); 86 return -(token - kLocalsStart); 87 } GlobalIndex(token_t token)88 static size_t GlobalIndex(token_t token) { 89 DCHECK(IsGlobal(token)); 90 return token - kGlobalsStart; 91 } 92 93 // Methods to check if the current token is a numeric literal considered an 94 // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note 95 // that numbers without a dot outside the [0 .. 2^32) range are errors. IsUnsigned()96 bool IsUnsigned() const { return Token() == kUnsigned; } AsUnsigned()97 uint32_t AsUnsigned() const { 98 DCHECK(IsUnsigned()); 99 return unsigned_value_; 100 } IsDouble()101 bool IsDouble() const { return Token() == kDouble; } AsDouble()102 double AsDouble() const { 103 DCHECK(IsDouble()); 104 return double_value_; 105 } 106 107 // clang-format off 108 enum { 109 // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting 110 // backwards) 111 // [-10000 .. -1) :: Builtin tokens like keywords 112 // (also includes some special 113 // ones like end of input) 114 // 0 .. 255 :: Single char tokens 115 // 256 .. 256+kMaxIdentifierCount :: Global identifiers 116 kLocalsStart = -10000, 117 #define V(name, _junk1, _junk2, _junk3) kToken_##name, 118 STDLIB_MATH_FUNCTION_LIST(V) 119 STDLIB_ARRAY_TYPE_LIST(V) 120 #undef V 121 #define V(name, _junk1) kToken_##name, 122 STDLIB_MATH_VALUE_LIST(V) 123 #undef V 124 #define V(name) kToken_##name, 125 STDLIB_OTHER_LIST(V) 126 KEYWORD_NAME_LIST(V) 127 #undef V 128 #define V(rawname, name) kToken_##name, 129 LONG_SYMBOL_NAME_LIST(V) 130 #undef V 131 #define V(name, value, string_name) name = value, 132 SPECIAL_TOKEN_LIST(V) 133 #undef V 134 kGlobalsStart = 256, 135 }; 136 // clang-format on 137 138 private: 139 Utf16CharacterStream* stream_; 140 token_t token_; 141 token_t preceding_token_; 142 token_t next_token_; // Only set when in {rewind} state. 143 size_t position_; // Corresponds to {token} position. 144 size_t preceding_position_; // Corresponds to {preceding_token} position. 145 size_t next_position_; // Only set when in {rewind} state. 146 bool rewind_; 147 std::string identifier_string_; 148 bool in_local_scope_; 149 std::unordered_map<std::string, token_t> local_names_; 150 std::unordered_map<std::string, token_t> global_names_; 151 std::unordered_map<std::string, token_t> property_names_; 152 int global_count_; 153 double double_value_; 154 uint32_t unsigned_value_; 155 bool preceded_by_newline_; 156 157 // Consume multiple characters. 158 void ConsumeIdentifier(uc32 ch); 159 void ConsumeNumber(uc32 ch); 160 bool ConsumeCComment(); 161 void ConsumeCPPComment(); 162 void ConsumeString(uc32 quote); 163 void ConsumeCompareOrShift(uc32 ch); 164 165 // Classify character categories. 166 bool IsIdentifierStart(uc32 ch); 167 bool IsIdentifierPart(uc32 ch); 168 bool IsNumberStart(uc32 ch); 169 }; 170 171 } // namespace internal 172 } // namespace v8 173 174 #endif // V8_ASMJS_ASM_SCANNER_H_ 175