1 //--------------------------------------------------------------------------
2 // Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
3 //
4 // This program is free software; you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License Version 2 as published
6 // by the Free Software Foundation.  You may not use, modify or distribute
7 // this program under any other version of the GNU General Public License.
8 //
9 // This program is distributed in the hope that it will be useful, but
10 // WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License along
15 // with this program; if not, write to the Free Software Foundation, Inc.,
16 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 //--------------------------------------------------------------------------
18 // js_tokenizer.h author Oleksandr Serhiienko <oserhiie@cisco.com>
19 
20 #ifndef JS_TOKENIZER_H
21 #define JS_TOKENIZER_H
22 
23 #include <sstream>
24 #include <stack>
25 #include <vector>
26 
27 #include "log/messages.h"
28 #include "main/snort_debug.h"
29 #include "service_inspectors/http_inspect/http_enum.h"
30 
31 extern THREAD_LOCAL const snort::Trace* http_trace;
32 
33 // The longest pattern has 9 characters " < / s c r i p t > ",
34 // 8 of them can reside in 1st chunk
35 // Each character in the identifier forms its own group (pattern matching case),
36 // i.e. in the current implementation IDENTIFIER has " . " rule.
37 #define JSTOKENIZER_MAX_STATES 8
38 
39 // To hold potentially long identifiers
40 #define JSTOKENIZER_BUF_MAX_SIZE 256
41 
42 enum JSProgramScopeType : unsigned int;
43 
44 class JSIdentifierCtxBase;
45 
46 class JSTokenizer : public yyFlexLexer
47 {
48 private:
49     enum JSToken
50     {
51         UNDEFINED = 0,
52         IDENTIFIER,
53         KEYWORD,
54         PUNCTUATOR,
55         OPERATOR,
56         LITERAL,
57         DIRECTIVE,
58         DOT,
59         COLON,
60         CLOSING_BRACKET,
61         KEYWORD_VAR_DECL,             // var, let, const
62         KEYWORD_FUNCTION,
63         KEYWORD_BLOCK,                // for all block-definition keywords e.g. if, else, for, etc.
64         KEYWORD_CLASS,
65         OPERATOR_ASSIGNMENT,
66         OPERATOR_COMPLEX_ASSIGNMENT,
67         OPERATOR_COMPARISON,
68         OPERATOR_LOGICAL,
69         OPERATOR_SHIFT
70     };
71 
72     enum ScopeType
73     {
74         GLOBAL = 0,  // not in the brackets (the initial one)
75         BRACES,      // {}
76         PARENTHESES, // ()
77         BRACKETS     // []
78     };
79     enum ScopeMetaType
80     {
81         NOT_SET = 0,
82         FUNCTION,   // function, arrow function
83         BLOCK,      // if, else, for, while, do, with, switch, try, catch, finally, block of code
84         OBJECT,     // object definition, class definition
85         SCOPE_META_TYPE_MAX
86     };
87     struct Scope
88     {
ScopeScope89         Scope(ScopeType t) :
90             type(t), meta_type(ScopeMetaType::NOT_SET), ident_norm(true), func_call(false),
91             block_param(false), do_loop(false)
92         {}
93 
94         ScopeType type;
95         ScopeMetaType meta_type;
96         bool ident_norm;
97         bool func_call;
98         bool block_param;
99         bool do_loop;
100     };
101 
102     enum ASIGroup
103     {
104         ASI_OTHER = 0,
105         ASI_GROUP_1,    // {
106         ASI_GROUP_2,    // }
107         ASI_GROUP_3,    // [ (
108         ASI_GROUP_4,    // ]
109         ASI_GROUP_5,    // )
110         ASI_GROUP_6,    // + -
111         ASI_GROUP_7,    // this true false null identifier literal
112                         //IDENTIFIER + LITERAL + KEYWORD_LITERAL
113         ASI_GROUP_8,    // ++ --
114         ASI_GROUP_9,    // continue break return debugger // same as KEYWORD_BA
115         ASI_GROUP_10,   // var function new delete void typeof if do while for with
116                         // switch throw try ~ +
117         ASI_GROUP_MAX
118     };
119 
120     enum AliasState
121     {
122         ALIAS_NONE = 0,
123         ALIAS_DEFINITION, // var a
124         ALIAS_PREFIX,     // var a +%possible PDU split%
125                           // to handle ambiguity between a++, a+=, and a + b
126         ALIAS_EQUALS,     // var a =
127         ALIAS_VALUE       // var a = eval
128     };
129 
130 public:
131     enum JSRet
132     {
133         EOS = 0,
134         SCRIPT_ENDED,
135         SCRIPT_CONTINUE,
136         OPENING_TAG,
137         CLOSING_TAG,
138         BAD_TOKEN,
139         IDENTIFIER_OVERFLOW,
140         TEMPLATE_NESTING_OVERFLOW,
141         BRACKET_NESTING_OVERFLOW,
142         SCOPE_NESTING_OVERFLOW,
143         WRONG_CLOSING_SYMBOL,
144         ENDED_IN_INNER_SCOPE,
145         MAX
146     };
147 
148     JSTokenizer() = delete;
149     explicit JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx,
150         uint8_t max_template_nesting, uint32_t max_bracket_depth, char*& buf, size_t& buf_size,
151         int cap_size = JSTOKENIZER_BUF_MAX_SIZE);
152     ~JSTokenizer() override;
153 
154     JSRet process(size_t& bytes_in);
155 
156 protected:
LexerError(const char * msg)157     [[noreturn]] void LexerError(const char* msg) override
158     { snort::FatalError("%s", msg); }
159 
160 private:
161     int yylex() override;
162 
163     void switch_to_initial();
164     void switch_to_temporal(const std::string& data);
165     JSRet eval_eof();
166     JSRet do_spacing(JSToken cur_token);
167     JSRet do_operator_spacing();
168     JSRet do_semicolon_insertion(ASIGroup current);
169     JSRet do_identifier_substitution(const char* lexeme, bool id_part);
170     JSRet push_identifier(const char* ident);
171     bool unescape(const char* lexeme);
172     void process_punctuator(JSToken tok = PUNCTUATOR);
173     void process_closing_brace();
174     JSRet process_subst_open();
175 
176     bool states_process();
177     void states_correct(int);
178     void states_reset();
179     void states_over();
180     void states_adjust();
181 
182     // scope stack servicing
183     JSRet scope_push(ScopeType);
184     JSRet scope_pop(ScopeType);
185     Scope& scope_cur();
186 
187     // program scope stack servicing
188     JSRet p_scope_push(ScopeMetaType);
189     JSRet p_scope_pop(ScopeMetaType);
190 
191     // interactions with the current scope
192     bool global_scope();
193     void set_meta_type(ScopeMetaType);
194     ScopeMetaType meta_type();
195     void set_ident_norm(bool);
196     bool ident_norm();
197     void set_func_call(bool);
198     bool func_call();
199     void set_block_param(bool);
200     bool block_param();
201     void set_do_loop(bool);
202     bool do_loop();
203 
204     static JSProgramScopeType m2p(ScopeMetaType);
205     static const char* m2str(ScopeMetaType);
206     static bool is_operator(JSToken);
207 
208     void dealias_clear_mutated(bool id_part);
209     void dealias_increment();
210     void dealias_identifier(bool id_part, bool assignment_start);
211     void dealias_reset();
212     void dealias_prefix_reset();
213     void dealias_equals(bool complex);
214     void dealias_append();
215     void dealias_finalize();
216 
217     static const char* p_scope_codes[];
218 
219     void* cur_buffer;
220     void* tmp_buffer = nullptr;
221     std::stringstream tmp;
222 
223     std::stringstream aliased;
224     std::string alias;
225     std::string last_dealiased;
226     AliasState alias_state = ALIAS_NONE;
227     bool prefix_increment = false;
228     bool dealias_stored = false;
229 
230     uint8_t max_template_nesting;
231     std::stack<uint16_t, std::vector<uint16_t>> brace_depth;
232     JSToken token = UNDEFINED;
233     ASIGroup previous_group = ASI_OTHER;
234     JSIdentifierCtxBase& ident_ctx;
235     size_t bytes_read;
236     size_t tmp_bytes_read;
237 
238     struct
239     {
240         JSToken token = UNDEFINED;          // the token before
241         int orig_len = 0;                   // current token original length
242         int norm_len = 0;                   // normalized length of previous tokens
243         int sc = 0;                        // current Starting Condition (0 means NOT_SET)
244     } states[JSTOKENIZER_MAX_STATES];
245     int sp = 0;                             // points to the top of states
246     int eof_sp = 0;                         // points to the last state before the EOF
247     JSToken eof_token = UNDEFINED;          // the last token before the EOF
248     int eof_sc = 0;                         // the last Starting Condition before the EOF
249     int bytes_skip = 0;                     // num of bytes to skip of processing in the next chunk
250 
251     char*& tmp_buf;
252     size_t& tmp_buf_size;
253     const int tmp_cap_size;
254 
255     bool newline_found = false;
256     constexpr static bool insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX]
257     {
258         {false, false, false, false, false, false, false, false, false, false, false,},
259         {false, false, false, false, false, false, false, false, false, false, false,},
260         {false, false, false, false, false, false, false, false, false, false, false,},
261         {false, false, false, false, false, false, false, false, false, false, false,},
262         {false, true,  false, false, false, false, false, true,  true,  true,  true, },
263         {false, false, false, false, false, false, false, true,  true,  true,  true, },
264         {false, false, false, false, false, false, false, false, false, false, false,},
265         {false, true,  false, false, false, false, false, true,  true,  true,  true, },
266         {false, true,  false, true,  false, false, false, true,  true,  true,  true, },
267         {false, true,  false, true,  false, false, true,  true,  true,  true,  true, },
268         {false, false, false, false, false, false, false, false, false, false, false,}
269     };
270 
271     const uint32_t max_bracket_depth;
272     std::stack<Scope> scope_stack;
273 };
274 
275 #endif // JS_TOKENIZER_H
276