1 //--------------------------------------------------------------------------
2 // Copyright (C) 2016-2021 Cisco and/or its affiliates. All rights reserved.
3 //
4 // This program is free software; you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License Version 2 as published
6 // by the Free Software Foundation.  You may not use, modify or distribute
7 // this program under any other version of the GNU General Public License.
8 //
9 // This program is distributed in the hope that it will be useful, but
10 // WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License along
15 // with this program; if not, write to the Free Software Foundation, Inc.,
16 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 //--------------------------------------------------------------------------
18 // http_js_norm.cc author Tom Peters <thopeter@cisco.com>
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include "http_js_norm.h"
25 
26 #include "main/snort_debug.h"
27 #include "utils/js_normalizer.h"
28 #include "utils/safec.h"
29 #include "utils/util_jsnorm.h"
30 
31 #include "http_common.h"
32 #include "http_enum.h"
33 
34 using namespace HttpEnums;
35 using namespace snort;
36 
37 static const char* jsret_codes[] =
38 {
39     "end of stream",
40     "script ended",
41     "script continues",
42     "opening tag",
43     "closing tag",
44     "bad token",
45     "identifier overflow",
46     "template nesting overflow",
47     "bracket nesting overflow",
48     "scope nesting overflow",
49     "wrong closing symbol",
50     "ended in inner scope",
51     "unknown"
52 };
53 
ret2str(JSTokenizer::JSRet ret)54 static const char* ret2str(JSTokenizer::JSRet ret)
55 {
56     assert(ret < JSTokenizer::JSRet::MAX);
57     ret = ret < JSTokenizer::JSRet::MAX ? ret : JSTokenizer::JSRet::MAX;
58     return jsret_codes[ret];
59 }
60 
js_normalize(JSNormalizer & ctx,const char * const end,const char * & ptr)61 static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const char* const end,
62     const char*& ptr)
63 {
64     trace_logf(3, http_trace, TRACE_JS_DUMP, nullptr,
65         "original[%zu]: %.*s\n", end - ptr, static_cast<int>(end - ptr), ptr);
66 
67     auto ret = ctx.normalize(ptr, end - ptr);
68     auto src_next = ctx.get_src_next();
69 
70     trace_logf(3, http_trace, TRACE_JS_PROC, nullptr,
71         "normalizer returned with %d '%s'\n", ret, ret2str(ret));
72 
73     if (src_next > ptr)
74         HttpModule::increment_peg_counts(PEG_JS_BYTES, src_next - ptr);
75     else
76         src_next = end; // Normalizer has failed, thus aborting the remaining input
77 
78     ptr = src_next;
79 
80     return ret;
81 }
82 
HttpJsNorm(const HttpParaList::UriParam & uri_param_,int64_t normalization_depth_,int32_t identifier_depth_,uint8_t max_template_nesting_,uint32_t max_bracket_depth_,uint32_t max_scope_depth_,const std::unordered_set<std::string> & ignored_ids_)83 HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
84     int32_t identifier_depth_, uint8_t max_template_nesting_, uint32_t max_bracket_depth_,
85     uint32_t max_scope_depth_, const std::unordered_set<std::string>& ignored_ids_) :
86     uri_param(uri_param_),
87     detection_depth(UINT64_MAX),
88     normalization_depth(normalization_depth_),
89     identifier_depth(identifier_depth_),
90     max_template_nesting(max_template_nesting_),
91     max_bracket_depth(max_bracket_depth_),
92     max_scope_depth(max_scope_depth_),
93     ignored_ids(ignored_ids_),
94     mpse_otag(nullptr),
95     mpse_attr(nullptr),
96     mpse_type(nullptr)
97 {}
98 
~HttpJsNorm()99 HttpJsNorm::~HttpJsNorm()
100 {
101     delete mpse_otag;
102     delete mpse_attr;
103     delete mpse_type;
104 }
105 
configure()106 void HttpJsNorm::configure()
107 {
108     if (configure_once)
109         return;
110 
111     mpse_otag = new SearchTool;
112     mpse_attr = new SearchTool;
113     mpse_type = new SearchTool;
114 
115     static constexpr const char* otag_start = "<SCRIPT";
116     static constexpr const char* attr_slash = "/";
117     static constexpr const char* attr_gt = ">";
118     static constexpr const char* attr_src = "SRC";
119     static constexpr const char* attr_js1 = "JAVASCRIPT";
120     static constexpr const char* attr_js2 = "ECMASCRIPT";
121     static constexpr const char* attr_vb = "VBSCRIPT";
122 
123     mpse_otag->add(otag_start, strlen(otag_start), 0);
124     mpse_attr->add(attr_slash, strlen(attr_slash), AID_SLASH);
125     mpse_attr->add(attr_gt, strlen(attr_gt), AID_GT);
126     mpse_attr->add(attr_src, strlen(attr_src), AID_SRC);
127     mpse_attr->add(attr_js1, strlen(attr_js1), AID_JS);
128     mpse_attr->add(attr_js2, strlen(attr_js2), AID_ECMA);
129     mpse_attr->add(attr_vb, strlen(attr_vb), AID_VB);
130     mpse_type->add(attr_js1, strlen(attr_js1), AID_JS);
131     mpse_type->add(attr_js2, strlen(attr_js2), AID_ECMA);
132     mpse_type->add(attr_vb, strlen(attr_vb), AID_VB);
133 
134     mpse_otag->prep();
135     mpse_attr->prep();
136     mpse_type->prep();
137 
138     configure_once = true;
139 }
140 
do_external(const Field & input,Field & output,HttpInfractions * infractions,HttpFlowData * ssn,bool final_portion) const141 void HttpJsNorm::do_external(const Field& input, Field& output,
142     HttpInfractions* infractions, HttpFlowData* ssn, bool final_portion) const
143 {
144     if (ssn->js_built_in_event)
145         return;
146 
147     const char* ptr = (const char*)input.start();
148     const char* const end = ptr + input.length();
149 
150     HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER];
151 
152     if (!alive_ctx(ssn))
153     {
154         HttpModule::increment_peg_counts(PEG_JS_EXTERNAL);
155         trace_logf(2, http_trace, TRACE_JS_PROC, nullptr,
156             "script starts\n");
157     }
158     else
159         trace_logf(2, http_trace, TRACE_JS_PROC, nullptr,
160             "script continues\n");
161 
162     auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, max_template_nesting,
163         max_bracket_depth, max_scope_depth, ignored_ids);
164 
165     while (ptr < end)
166     {
167         trace_logf(1, http_trace, TRACE_JS_PROC, nullptr,
168             "external script at %zd offset\n", ptr - (const char*)input.start());
169 
170         auto ret = js_normalize(js_ctx, end, ptr);
171 
172         switch (ret)
173         {
174         case JSTokenizer::EOS:
175         case JSTokenizer::SCRIPT_CONTINUE:
176             break;
177         case JSTokenizer::SCRIPT_ENDED:
178         case JSTokenizer::CLOSING_TAG:
179             *infractions += INF_JS_CLOSING_TAG;
180             events->create_event(EVENT_JS_CLOSING_TAG);
181             ssn->js_built_in_event = true;
182             break;
183         case JSTokenizer::OPENING_TAG:
184             *infractions += INF_JS_OPENING_TAG;
185             events->create_event(EVENT_JS_OPENING_TAG);
186             ssn->js_built_in_event = true;
187             break;
188         case JSTokenizer::BAD_TOKEN:
189         case JSTokenizer::WRONG_CLOSING_SYMBOL:
190         case JSTokenizer::ENDED_IN_INNER_SCOPE:
191             *infractions += INF_JS_BAD_TOKEN;
192             events->create_event(EVENT_JS_BAD_TOKEN);
193             ssn->js_built_in_event = true;
194             break;
195         case JSTokenizer::IDENTIFIER_OVERFLOW:
196             HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
197             *infractions += INF_JS_IDENTIFIER_OVERFLOW;
198             events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
199             ssn->js_built_in_event = true;
200             break;
201         case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
202         case JSTokenizer::BRACKET_NESTING_OVERFLOW:
203             *infractions += INF_JS_BRACKET_NEST_OVERFLOW;
204             events->create_event(EVENT_JS_BRACKET_NEST_OVERFLOW);
205             ssn->js_built_in_event = true;
206             break;
207         case JSTokenizer::SCOPE_NESTING_OVERFLOW:
208             *infractions += INF_JS_SCOPE_NEST_OVERFLOW;
209             events->create_event(EVENT_JS_SCOPE_NEST_OVERFLOW);
210             ssn->js_built_in_event = true;
211             break;
212         default:
213             assert(false);
214             break;
215         }
216 
217         if (ssn->js_built_in_event)
218             break;
219     }
220 
221     debug_logf(4, http_trace, TRACE_JS_PROC, nullptr,
222         "input data was %s\n", final_portion ? "last one in PDU" : "a part of PDU");
223 
224     uint32_t data_len = std::min(detection_depth, js_ctx.script_size());
225 
226     if (data_len)
227     {
228         const char* data = final_portion ? js_ctx.take_script() : js_ctx.get_script();
229 
230         if (data)
231         {
232             trace_logf(1, http_trace, TRACE_JS_DUMP, nullptr,
233                        "js_data[%u]: %.*s\n", data_len, data_len, data);
234 
235             output.set(data_len, (const uint8_t*)data, final_portion);
236         }
237     }
238 }
239 
do_inline(const Field & input,Field & output,HttpInfractions * infractions,HttpFlowData * ssn,bool final_portion) const240 void HttpJsNorm::do_inline(const Field& input, Field& output,
241     HttpInfractions* infractions, HttpFlowData* ssn, bool final_portion) const
242 {
243     const char* ptr = (const char*)input.start();
244     const char* const end = ptr + input.length();
245 
246     HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER];
247 
248     bool script_continue = ssn->js_continue;
249     bool script_external = false;
250 
251     while (ptr < end)
252     {
253         if (!script_continue)
254         {
255             if (!mpse_otag->find(ptr, end - ptr, match_otag, false, &ptr))
256                 break;
257             if (ptr >= end)
258                 break;
259 
260             MatchContext sctx = {ptr, true, false, false};
261 
262             if (ptr[0] == '>')
263                 ptr++;
264             else
265             {
266                 if (!mpse_attr->find(ptr, end - ptr, match_attr, false, &sctx))
267                     break; // the opening tag never ends
268                 ptr = sctx.next;
269             }
270 
271             trace_logf(1, http_trace, TRACE_JS_PROC, nullptr,
272                 "opening tag at %zd offset\n", ptr - (const char*)input.start());
273 
274             trace_logf(2, http_trace, TRACE_JS_PROC, nullptr,
275                 "script attributes [%s, %s, %s]\n",
276                 sctx.is_shortened ? "shortened form" : "full form",
277                 sctx.is_javascript ? "JavaScript type" : "unknown type",
278                 sctx.is_external ? "external source" : "inline");
279 
280             if (sctx.is_shortened)
281             {
282                 *infractions += INF_JS_SHORTENED_TAG;
283                 events->create_event(EVENT_JS_SHORTENED_TAG);
284                 continue;
285             }
286 
287             if (!sctx.is_javascript)
288                 continue;
289 
290             script_external = sctx.is_external;
291 
292             // script found
293             if (!script_external)
294                 HttpModule::increment_peg_counts(PEG_JS_INLINE);
295         }
296 
297         auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth,
298             max_template_nesting, max_bracket_depth, max_scope_depth, ignored_ids);
299         auto output_size_before = js_ctx.script_size();
300 
301         auto ret = js_normalize(js_ctx, end, ptr);
302 
303         switch (ret)
304         {
305         case JSTokenizer::EOS:
306             js_ctx.reset_depth();
307             break;
308         case JSTokenizer::SCRIPT_ENDED:
309             break;
310         case JSTokenizer::SCRIPT_CONTINUE:
311             break;
312         case JSTokenizer::OPENING_TAG:
313             *infractions += INF_JS_OPENING_TAG;
314             events->create_event(EVENT_JS_OPENING_TAG);
315             break;
316         case JSTokenizer::CLOSING_TAG:
317             *infractions += INF_JS_CLOSING_TAG;
318             events->create_event(EVENT_JS_CLOSING_TAG);
319             break;
320         case JSTokenizer::BAD_TOKEN:
321         case JSTokenizer::WRONG_CLOSING_SYMBOL:
322         case JSTokenizer::ENDED_IN_INNER_SCOPE:
323             *infractions += INF_JS_BAD_TOKEN;
324             events->create_event(EVENT_JS_BAD_TOKEN);
325             break;
326         case JSTokenizer::IDENTIFIER_OVERFLOW:
327             HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
328             *infractions += INF_JS_IDENTIFIER_OVERFLOW;
329             events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
330             break;
331         case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
332         case JSTokenizer::BRACKET_NESTING_OVERFLOW:
333             *infractions += INF_JS_BRACKET_NEST_OVERFLOW;
334             events->create_event(EVENT_JS_BRACKET_NEST_OVERFLOW);
335             break;
336         case JSTokenizer::SCOPE_NESTING_OVERFLOW:
337             *infractions += INF_JS_SCOPE_NEST_OVERFLOW;
338             events->create_event(EVENT_JS_SCOPE_NEST_OVERFLOW);
339             break;
340         default:
341             assert(false);
342             break;
343         }
344 
345         if (script_external && output_size_before != js_ctx.script_size())
346         {
347             *infractions += INF_JS_CODE_IN_EXTERNAL;
348             events->create_event(EVENT_JS_CODE_IN_EXTERNAL);
349         }
350 
351         script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
352     }
353 
354     ssn->js_continue = script_continue;
355 
356     if (!alive_ctx(ssn))
357         return;
358 
359     debug_logf(4, http_trace, TRACE_JS_PROC, nullptr,
360         "input data was %s\n", final_portion ? "last one in PDU" : "a part of PDU");
361 
362     auto js_ctx = ssn->js_normalizer;
363     uint32_t data_len = std::min(detection_depth, js_ctx->script_size());
364 
365     if (data_len)
366     {
367         const char* data = final_portion ? js_ctx->take_script() : js_ctx->get_script();
368 
369         if (data)
370         {
371             trace_logf(1, http_trace, TRACE_JS_DUMP, nullptr,
372                        "js_data[%u]: %.*s\n", data_len, data_len, data);
373 
374             output.set(data_len, (const uint8_t*)data, final_portion);
375         }
376     }
377 
378     if (!script_continue && final_portion)
379         ssn->release_js_ctx();
380 }
381 
do_legacy(const Field & input,Field & output,HttpInfractions * infractions,HttpEventGen * events,int max_javascript_whitespaces) const382 void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* infractions,
383     HttpEventGen* events, int max_javascript_whitespaces) const
384 {
385     bool js_present = false;
386     int index = 0;
387     const char* ptr = (const char*)input.start();
388     const char* const end = ptr + input.length();
389 
390     JSState js;
391     js.allowed_spaces = max_javascript_whitespaces;
392     js.allowed_levels = MAX_ALLOWED_OBFUSCATION;
393     js.alerts = 0;
394 
395     uint8_t* buffer = new uint8_t[input.length()];
396 
397     while (ptr < end)
398     {
399         int bytes_copied = 0;
400         int mindex;
401 
402         // Search for beginning of a javascript
403         if (mpse_otag->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
404         {
405             const char* js_start = ptr + mindex;
406             const char* const angle_bracket =
407                 (const char*)SnortStrnStr(js_start, end - js_start, ">");
408             if (angle_bracket == nullptr || (end - angle_bracket) == 0)
409                 break;
410 
411             bool type_js = false;
412             if (angle_bracket > js_start)
413             {
414                 int mid;
415                 const int script_found = mpse_type->find(
416                     js_start, (angle_bracket-js_start), search_html_found, false, &mid);
417 
418                 js_start = angle_bracket + 1;
419                 if (script_found > 0)
420                 {
421                     switch (mid)
422                     {
423                     case AID_JS:
424                         js_present = true;
425                         type_js = true;
426                         break;
427                     default:
428                         type_js = false;
429                         break;
430                     }
431                 }
432                 else
433                 {
434                     // if no type or language is found we assume it is a javascript
435                     js_present = true;
436                     type_js = true;
437                 }
438             }
439             // Save before the <script> begins
440             if (js_start > ptr)
441             {
442                 if ((js_start - ptr) > (input.length() - index))
443                     break;
444                 memmove_s(buffer + index, input.length() - index, ptr, js_start - ptr);
445                 index += js_start - ptr;
446             }
447 
448             ptr = js_start;
449             if (!type_js)
450                 continue;
451 
452             JSNormalizeDecode(js_start, (uint16_t)(end-js_start), (char*)buffer+index,
453                 (uint16_t)(input.length() - index), &ptr, &bytes_copied, &js,
454                 uri_param.iis_unicode ? uri_param.unicode_map : nullptr);
455 
456             index += bytes_copied;
457         }
458         else
459             break;
460     }
461 
462     if (js_present)
463     {
464         if ((ptr < end) && ((input.length() - index) >= (end - ptr)))
465         {
466             memmove_s(buffer + index, input.length() - index, ptr, end - ptr);
467             index += end - ptr;
468         }
469         if (js.alerts)
470         {
471             if (js.alerts & ALERT_LEVELS_EXCEEDED)
472             {
473                 *infractions += INF_JS_OBFUSCATION_EXCD;
474                 events->create_event(EVENT_JS_OBFUSCATION_EXCD);
475             }
476             if (js.alerts & ALERT_SPACES_EXCEEDED)
477             {
478                 *infractions += INF_JS_EXCESS_WS;
479                 events->create_event(EVENT_JS_EXCESS_WS);
480             }
481             if (js.alerts & ALERT_MIXED_ENCODINGS)
482             {
483                 *infractions += INF_MIXED_ENCODINGS;
484                 events->create_event(EVENT_MIXED_ENCODINGS);
485             }
486         }
487         output.set(index, buffer, true);
488     }
489     else
490     {
491         delete[] buffer;
492         output.set(input);
493     }
494 }
495 
search_js_found(void *,void *,int index,void * index_ptr,void *)496 int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*)
497 {
498     static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
499     *((int*) index_ptr) = index - script_start_length;
500     return 1;
501 }
502 
search_html_found(void * id,void *,int,void * id_ptr,void *)503 int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
504 {
505     *((int*) id_ptr)  = (int)(uintptr_t)id;
506     return 1;
507 }
508 
match_otag(void *,void *,int index,void * ptr,void *)509 int HttpJsNorm::match_otag(void*, void*, int index, void* ptr, void*)
510 {
511     *(char**)ptr += index;
512     return 1;
513 }
514 
match_attr(void * pid,void *,int index,void * sctx,void *)515 int HttpJsNorm::match_attr(void* pid, void*, int index, void* sctx, void*)
516 {
517     MatchContext* ctx = (MatchContext*)sctx;
518     AttrId id = (AttrId)(uintptr_t)pid;
519     const char* c;
520 
521     switch (id)
522     {
523     case AID_SLASH:
524         if (*(ctx->next + index) == '>')
525         {
526             ctx->is_shortened = true;
527             ctx->next += index;
528             return 1;
529         }
530         else
531         {
532             ctx->is_shortened = false;
533             return 0;
534         }
535 
536     case AID_GT:
537         ctx->next += index;
538         return 1;
539 
540     case AID_SRC:
541         c = ctx->next + index;
542         while (*c == ' ') c++;
543         ctx->is_external = ctx->is_external || *c == '=';
544         return 0;
545 
546     case AID_JS:
547         ctx->is_javascript = true;
548         return 0;
549 
550     case AID_ECMA:
551         ctx->is_javascript = true;
552         return 0;
553 
554     case AID_VB:
555         ctx->is_javascript = false;
556         return 0;
557 
558     default:
559         ctx->is_external = false;
560         ctx->is_javascript = false;
561         ctx->next += index;
562         return 1;
563     }
564 }
565