1 //-------------------------------------------------------------------------- 2 // Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved. 3 // 4 // This program is free software; you can redistribute it and/or modify it 5 // under the terms of the GNU General Public License Version 2 as published 6 // by the Free Software Foundation. You may not use, modify or distribute 7 // this program under any other version of the GNU General Public License. 8 // 9 // This program is distributed in the hope that it will be useful, but 10 // WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 // General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License along 15 // with this program; if not, write to the Free Software Foundation, Inc., 16 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 //-------------------------------------------------------------------------- 18 // js_tokenizer.h author Oleksandr Serhiienko <oserhiie@cisco.com> 19 20 #ifndef JS_TOKENIZER_H 21 #define JS_TOKENIZER_H 22 23 #include <sstream> 24 #include <stack> 25 #include <vector> 26 27 #include "log/messages.h" 28 #include "main/snort_debug.h" 29 #include "service_inspectors/http_inspect/http_enum.h" 30 31 extern THREAD_LOCAL const snort::Trace* http_trace; 32 33 // The longest pattern has 9 characters " < / s c r i p t > ", 34 // 8 of them can reside in 1st chunk 35 // Each character in the identifier forms its own group (pattern matching case), 36 // i.e. in the current implementation IDENTIFIER has " . " rule. 37 #define JSTOKENIZER_MAX_STATES 8 38 39 // To hold potentially long identifiers 40 #define JSTOKENIZER_BUF_MAX_SIZE 256 41 42 enum JSProgramScopeType : unsigned int; 43 44 class JSIdentifierCtxBase; 45 46 class JSTokenizer : public yyFlexLexer 47 { 48 private: 49 enum JSToken 50 { 51 UNDEFINED = 0, 52 IDENTIFIER, 53 KEYWORD, 54 PUNCTUATOR, 55 OPERATOR, 56 LITERAL, 57 DIRECTIVE, 58 DOT, 59 COLON, 60 CLOSING_BRACKET, 61 KEYWORD_VAR_DECL, // var, let, const 62 KEYWORD_FUNCTION, 63 KEYWORD_BLOCK, // for all block-definition keywords e.g. if, else, for, etc. 64 KEYWORD_CLASS, 65 OPERATOR_ASSIGNMENT, 66 OPERATOR_COMPLEX_ASSIGNMENT, 67 OPERATOR_COMPARISON, 68 OPERATOR_LOGICAL, 69 OPERATOR_SHIFT 70 }; 71 72 enum ScopeType 73 { 74 GLOBAL = 0, // not in the brackets (the initial one) 75 BRACES, // {} 76 PARENTHESES, // () 77 BRACKETS // [] 78 }; 79 enum ScopeMetaType 80 { 81 NOT_SET = 0, 82 FUNCTION, // function, arrow function 83 BLOCK, // if, else, for, while, do, with, switch, try, catch, finally, block of code 84 OBJECT, // object definition, class definition 85 SCOPE_META_TYPE_MAX 86 }; 87 struct Scope 88 { ScopeScope89 Scope(ScopeType t) : 90 type(t), meta_type(ScopeMetaType::NOT_SET), ident_norm(true), func_call(false), 91 block_param(false), do_loop(false) 92 {} 93 94 ScopeType type; 95 ScopeMetaType meta_type; 96 bool ident_norm; 97 bool func_call; 98 bool block_param; 99 bool do_loop; 100 }; 101 102 enum ASIGroup 103 { 104 ASI_OTHER = 0, 105 ASI_GROUP_1, // { 106 ASI_GROUP_2, // } 107 ASI_GROUP_3, // [ ( 108 ASI_GROUP_4, // ] 109 ASI_GROUP_5, // ) 110 ASI_GROUP_6, // + - 111 ASI_GROUP_7, // this true false null identifier literal 112 //IDENTIFIER + LITERAL + KEYWORD_LITERAL 113 ASI_GROUP_8, // ++ -- 114 ASI_GROUP_9, // continue break return debugger // same as KEYWORD_BA 115 ASI_GROUP_10, // var function new delete void typeof if do while for with 116 // switch throw try ~ + 117 ASI_GROUP_MAX 118 }; 119 120 enum AliasState 121 { 122 ALIAS_NONE = 0, 123 ALIAS_DEFINITION, // var a 124 ALIAS_PREFIX, // var a +%possible PDU split% 125 // to handle ambiguity between a++, a+=, and a + b 126 ALIAS_EQUALS, // var a = 127 ALIAS_VALUE // var a = eval 128 }; 129 130 public: 131 enum JSRet 132 { 133 EOS = 0, 134 SCRIPT_ENDED, 135 SCRIPT_CONTINUE, 136 OPENING_TAG, 137 CLOSING_TAG, 138 BAD_TOKEN, 139 IDENTIFIER_OVERFLOW, 140 TEMPLATE_NESTING_OVERFLOW, 141 BRACKET_NESTING_OVERFLOW, 142 SCOPE_NESTING_OVERFLOW, 143 WRONG_CLOSING_SYMBOL, 144 ENDED_IN_INNER_SCOPE, 145 MAX 146 }; 147 148 JSTokenizer() = delete; 149 explicit JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx, 150 uint8_t max_template_nesting, uint32_t max_bracket_depth, char*& buf, size_t& buf_size, 151 int cap_size = JSTOKENIZER_BUF_MAX_SIZE); 152 ~JSTokenizer() override; 153 154 JSRet process(size_t& bytes_in); 155 156 protected: LexerError(const char * msg)157 [[noreturn]] void LexerError(const char* msg) override 158 { snort::FatalError("%s", msg); } 159 160 private: 161 int yylex() override; 162 163 void switch_to_initial(); 164 void switch_to_temporal(const std::string& data); 165 JSRet eval_eof(); 166 JSRet do_spacing(JSToken cur_token); 167 JSRet do_operator_spacing(); 168 JSRet do_semicolon_insertion(ASIGroup current); 169 JSRet do_identifier_substitution(const char* lexeme, bool id_part); 170 JSRet push_identifier(const char* ident); 171 bool unescape(const char* lexeme); 172 void process_punctuator(JSToken tok = PUNCTUATOR); 173 void process_closing_brace(); 174 JSRet process_subst_open(); 175 176 bool states_process(); 177 void states_correct(int); 178 void states_reset(); 179 void states_over(); 180 void states_adjust(); 181 182 // scope stack servicing 183 JSRet scope_push(ScopeType); 184 JSRet scope_pop(ScopeType); 185 Scope& scope_cur(); 186 187 // program scope stack servicing 188 JSRet p_scope_push(ScopeMetaType); 189 JSRet p_scope_pop(ScopeMetaType); 190 191 // interactions with the current scope 192 bool global_scope(); 193 void set_meta_type(ScopeMetaType); 194 ScopeMetaType meta_type(); 195 void set_ident_norm(bool); 196 bool ident_norm(); 197 void set_func_call(bool); 198 bool func_call(); 199 void set_block_param(bool); 200 bool block_param(); 201 void set_do_loop(bool); 202 bool do_loop(); 203 204 static JSProgramScopeType m2p(ScopeMetaType); 205 static const char* m2str(ScopeMetaType); 206 static bool is_operator(JSToken); 207 208 void dealias_clear_mutated(bool id_part); 209 void dealias_increment(); 210 void dealias_identifier(bool id_part, bool assignment_start); 211 void dealias_reset(); 212 void dealias_prefix_reset(); 213 void dealias_equals(bool complex); 214 void dealias_append(); 215 void dealias_finalize(); 216 217 static const char* p_scope_codes[]; 218 219 void* cur_buffer; 220 void* tmp_buffer = nullptr; 221 std::stringstream tmp; 222 223 std::stringstream aliased; 224 std::string alias; 225 std::string last_dealiased; 226 AliasState alias_state = ALIAS_NONE; 227 bool prefix_increment = false; 228 bool dealias_stored = false; 229 230 uint8_t max_template_nesting; 231 std::stack<uint16_t, std::vector<uint16_t>> brace_depth; 232 JSToken token = UNDEFINED; 233 ASIGroup previous_group = ASI_OTHER; 234 JSIdentifierCtxBase& ident_ctx; 235 size_t bytes_read; 236 size_t tmp_bytes_read; 237 238 struct 239 { 240 JSToken token = UNDEFINED; // the token before 241 int orig_len = 0; // current token original length 242 int norm_len = 0; // normalized length of previous tokens 243 int sc = 0; // current Starting Condition (0 means NOT_SET) 244 } states[JSTOKENIZER_MAX_STATES]; 245 int sp = 0; // points to the top of states 246 int eof_sp = 0; // points to the last state before the EOF 247 JSToken eof_token = UNDEFINED; // the last token before the EOF 248 int eof_sc = 0; // the last Starting Condition before the EOF 249 int bytes_skip = 0; // num of bytes to skip of processing in the next chunk 250 251 char*& tmp_buf; 252 size_t& tmp_buf_size; 253 const int tmp_cap_size; 254 255 bool newline_found = false; 256 constexpr static bool insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX] 257 { 258 {false, false, false, false, false, false, false, false, false, false, false,}, 259 {false, false, false, false, false, false, false, false, false, false, false,}, 260 {false, false, false, false, false, false, false, false, false, false, false,}, 261 {false, false, false, false, false, false, false, false, false, false, false,}, 262 {false, true, false, false, false, false, false, true, true, true, true, }, 263 {false, false, false, false, false, false, false, true, true, true, true, }, 264 {false, false, false, false, false, false, false, false, false, false, false,}, 265 {false, true, false, false, false, false, false, true, true, true, true, }, 266 {false, true, false, true, false, false, false, true, true, true, true, }, 267 {false, true, false, true, false, false, true, true, true, true, true, }, 268 {false, false, false, false, false, false, false, false, false, false, false,} 269 }; 270 271 const uint32_t max_bracket_depth; 272 std::stack<Scope> scope_stack; 273 }; 274 275 #endif // JS_TOKENIZER_H 276