1 //--------------------------------------------------------------------------
2 // Copyright (C) 2016-2021 Cisco and/or its affiliates. All rights reserved.
3 //
4 // This program is free software; you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License Version 2 as published
6 // by the Free Software Foundation. You may not use, modify or distribute
7 // this program under any other version of the GNU General Public License.
8 //
9 // This program is distributed in the hope that it will be useful, but
10 // WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License along
15 // with this program; if not, write to the Free Software Foundation, Inc.,
16 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 //--------------------------------------------------------------------------
18 // http_js_norm.cc author Tom Peters <thopeter@cisco.com>
19
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23
24 #include "http_js_norm.h"
25
26 #include "main/snort_debug.h"
27 #include "utils/js_normalizer.h"
28 #include "utils/safec.h"
29 #include "utils/util_jsnorm.h"
30
31 #include "http_common.h"
32 #include "http_enum.h"
33
34 using namespace HttpEnums;
35 using namespace snort;
36
37 static const char* jsret_codes[] =
38 {
39 "end of stream",
40 "script ended",
41 "script continues",
42 "opening tag",
43 "closing tag",
44 "bad token",
45 "identifier overflow",
46 "template nesting overflow",
47 "bracket nesting overflow",
48 "scope nesting overflow",
49 "wrong closing symbol",
50 "ended in inner scope",
51 "unknown"
52 };
53
ret2str(JSTokenizer::JSRet ret)54 static const char* ret2str(JSTokenizer::JSRet ret)
55 {
56 assert(ret < JSTokenizer::JSRet::MAX);
57 ret = ret < JSTokenizer::JSRet::MAX ? ret : JSTokenizer::JSRet::MAX;
58 return jsret_codes[ret];
59 }
60
js_normalize(JSNormalizer & ctx,const char * const end,const char * & ptr)61 static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const char* const end,
62 const char*& ptr)
63 {
64 trace_logf(3, http_trace, TRACE_JS_DUMP, nullptr,
65 "original[%zu]: %.*s\n", end - ptr, static_cast<int>(end - ptr), ptr);
66
67 auto ret = ctx.normalize(ptr, end - ptr);
68 auto src_next = ctx.get_src_next();
69
70 trace_logf(3, http_trace, TRACE_JS_PROC, nullptr,
71 "normalizer returned with %d '%s'\n", ret, ret2str(ret));
72
73 if (src_next > ptr)
74 HttpModule::increment_peg_counts(PEG_JS_BYTES, src_next - ptr);
75 else
76 src_next = end; // Normalizer has failed, thus aborting the remaining input
77
78 ptr = src_next;
79
80 return ret;
81 }
82
HttpJsNorm(const HttpParaList::UriParam & uri_param_,int64_t normalization_depth_,int32_t identifier_depth_,uint8_t max_template_nesting_,uint32_t max_bracket_depth_,uint32_t max_scope_depth_,const std::unordered_set<std::string> & ignored_ids_)83 HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
84 int32_t identifier_depth_, uint8_t max_template_nesting_, uint32_t max_bracket_depth_,
85 uint32_t max_scope_depth_, const std::unordered_set<std::string>& ignored_ids_) :
86 uri_param(uri_param_),
87 detection_depth(UINT64_MAX),
88 normalization_depth(normalization_depth_),
89 identifier_depth(identifier_depth_),
90 max_template_nesting(max_template_nesting_),
91 max_bracket_depth(max_bracket_depth_),
92 max_scope_depth(max_scope_depth_),
93 ignored_ids(ignored_ids_),
94 mpse_otag(nullptr),
95 mpse_attr(nullptr),
96 mpse_type(nullptr)
97 {}
98
~HttpJsNorm()99 HttpJsNorm::~HttpJsNorm()
100 {
101 delete mpse_otag;
102 delete mpse_attr;
103 delete mpse_type;
104 }
105
configure()106 void HttpJsNorm::configure()
107 {
108 if (configure_once)
109 return;
110
111 mpse_otag = new SearchTool;
112 mpse_attr = new SearchTool;
113 mpse_type = new SearchTool;
114
115 static constexpr const char* otag_start = "<SCRIPT";
116 static constexpr const char* attr_slash = "/";
117 static constexpr const char* attr_gt = ">";
118 static constexpr const char* attr_src = "SRC";
119 static constexpr const char* attr_js1 = "JAVASCRIPT";
120 static constexpr const char* attr_js2 = "ECMASCRIPT";
121 static constexpr const char* attr_vb = "VBSCRIPT";
122
123 mpse_otag->add(otag_start, strlen(otag_start), 0);
124 mpse_attr->add(attr_slash, strlen(attr_slash), AID_SLASH);
125 mpse_attr->add(attr_gt, strlen(attr_gt), AID_GT);
126 mpse_attr->add(attr_src, strlen(attr_src), AID_SRC);
127 mpse_attr->add(attr_js1, strlen(attr_js1), AID_JS);
128 mpse_attr->add(attr_js2, strlen(attr_js2), AID_ECMA);
129 mpse_attr->add(attr_vb, strlen(attr_vb), AID_VB);
130 mpse_type->add(attr_js1, strlen(attr_js1), AID_JS);
131 mpse_type->add(attr_js2, strlen(attr_js2), AID_ECMA);
132 mpse_type->add(attr_vb, strlen(attr_vb), AID_VB);
133
134 mpse_otag->prep();
135 mpse_attr->prep();
136 mpse_type->prep();
137
138 configure_once = true;
139 }
140
do_external(const Field & input,Field & output,HttpInfractions * infractions,HttpFlowData * ssn,bool final_portion) const141 void HttpJsNorm::do_external(const Field& input, Field& output,
142 HttpInfractions* infractions, HttpFlowData* ssn, bool final_portion) const
143 {
144 if (ssn->js_built_in_event)
145 return;
146
147 const char* ptr = (const char*)input.start();
148 const char* const end = ptr + input.length();
149
150 HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER];
151
152 if (!alive_ctx(ssn))
153 {
154 HttpModule::increment_peg_counts(PEG_JS_EXTERNAL);
155 trace_logf(2, http_trace, TRACE_JS_PROC, nullptr,
156 "script starts\n");
157 }
158 else
159 trace_logf(2, http_trace, TRACE_JS_PROC, nullptr,
160 "script continues\n");
161
162 auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, max_template_nesting,
163 max_bracket_depth, max_scope_depth, ignored_ids);
164
165 while (ptr < end)
166 {
167 trace_logf(1, http_trace, TRACE_JS_PROC, nullptr,
168 "external script at %zd offset\n", ptr - (const char*)input.start());
169
170 auto ret = js_normalize(js_ctx, end, ptr);
171
172 switch (ret)
173 {
174 case JSTokenizer::EOS:
175 case JSTokenizer::SCRIPT_CONTINUE:
176 break;
177 case JSTokenizer::SCRIPT_ENDED:
178 case JSTokenizer::CLOSING_TAG:
179 *infractions += INF_JS_CLOSING_TAG;
180 events->create_event(EVENT_JS_CLOSING_TAG);
181 ssn->js_built_in_event = true;
182 break;
183 case JSTokenizer::OPENING_TAG:
184 *infractions += INF_JS_OPENING_TAG;
185 events->create_event(EVENT_JS_OPENING_TAG);
186 ssn->js_built_in_event = true;
187 break;
188 case JSTokenizer::BAD_TOKEN:
189 case JSTokenizer::WRONG_CLOSING_SYMBOL:
190 case JSTokenizer::ENDED_IN_INNER_SCOPE:
191 *infractions += INF_JS_BAD_TOKEN;
192 events->create_event(EVENT_JS_BAD_TOKEN);
193 ssn->js_built_in_event = true;
194 break;
195 case JSTokenizer::IDENTIFIER_OVERFLOW:
196 HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
197 *infractions += INF_JS_IDENTIFIER_OVERFLOW;
198 events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
199 ssn->js_built_in_event = true;
200 break;
201 case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
202 case JSTokenizer::BRACKET_NESTING_OVERFLOW:
203 *infractions += INF_JS_BRACKET_NEST_OVERFLOW;
204 events->create_event(EVENT_JS_BRACKET_NEST_OVERFLOW);
205 ssn->js_built_in_event = true;
206 break;
207 case JSTokenizer::SCOPE_NESTING_OVERFLOW:
208 *infractions += INF_JS_SCOPE_NEST_OVERFLOW;
209 events->create_event(EVENT_JS_SCOPE_NEST_OVERFLOW);
210 ssn->js_built_in_event = true;
211 break;
212 default:
213 assert(false);
214 break;
215 }
216
217 if (ssn->js_built_in_event)
218 break;
219 }
220
221 debug_logf(4, http_trace, TRACE_JS_PROC, nullptr,
222 "input data was %s\n", final_portion ? "last one in PDU" : "a part of PDU");
223
224 uint32_t data_len = std::min(detection_depth, js_ctx.script_size());
225
226 if (data_len)
227 {
228 const char* data = final_portion ? js_ctx.take_script() : js_ctx.get_script();
229
230 if (data)
231 {
232 trace_logf(1, http_trace, TRACE_JS_DUMP, nullptr,
233 "js_data[%u]: %.*s\n", data_len, data_len, data);
234
235 output.set(data_len, (const uint8_t*)data, final_portion);
236 }
237 }
238 }
239
do_inline(const Field & input,Field & output,HttpInfractions * infractions,HttpFlowData * ssn,bool final_portion) const240 void HttpJsNorm::do_inline(const Field& input, Field& output,
241 HttpInfractions* infractions, HttpFlowData* ssn, bool final_portion) const
242 {
243 const char* ptr = (const char*)input.start();
244 const char* const end = ptr + input.length();
245
246 HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER];
247
248 bool script_continue = ssn->js_continue;
249 bool script_external = false;
250
251 while (ptr < end)
252 {
253 if (!script_continue)
254 {
255 if (!mpse_otag->find(ptr, end - ptr, match_otag, false, &ptr))
256 break;
257 if (ptr >= end)
258 break;
259
260 MatchContext sctx = {ptr, true, false, false};
261
262 if (ptr[0] == '>')
263 ptr++;
264 else
265 {
266 if (!mpse_attr->find(ptr, end - ptr, match_attr, false, &sctx))
267 break; // the opening tag never ends
268 ptr = sctx.next;
269 }
270
271 trace_logf(1, http_trace, TRACE_JS_PROC, nullptr,
272 "opening tag at %zd offset\n", ptr - (const char*)input.start());
273
274 trace_logf(2, http_trace, TRACE_JS_PROC, nullptr,
275 "script attributes [%s, %s, %s]\n",
276 sctx.is_shortened ? "shortened form" : "full form",
277 sctx.is_javascript ? "JavaScript type" : "unknown type",
278 sctx.is_external ? "external source" : "inline");
279
280 if (sctx.is_shortened)
281 {
282 *infractions += INF_JS_SHORTENED_TAG;
283 events->create_event(EVENT_JS_SHORTENED_TAG);
284 continue;
285 }
286
287 if (!sctx.is_javascript)
288 continue;
289
290 script_external = sctx.is_external;
291
292 // script found
293 if (!script_external)
294 HttpModule::increment_peg_counts(PEG_JS_INLINE);
295 }
296
297 auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth,
298 max_template_nesting, max_bracket_depth, max_scope_depth, ignored_ids);
299 auto output_size_before = js_ctx.script_size();
300
301 auto ret = js_normalize(js_ctx, end, ptr);
302
303 switch (ret)
304 {
305 case JSTokenizer::EOS:
306 js_ctx.reset_depth();
307 break;
308 case JSTokenizer::SCRIPT_ENDED:
309 break;
310 case JSTokenizer::SCRIPT_CONTINUE:
311 break;
312 case JSTokenizer::OPENING_TAG:
313 *infractions += INF_JS_OPENING_TAG;
314 events->create_event(EVENT_JS_OPENING_TAG);
315 break;
316 case JSTokenizer::CLOSING_TAG:
317 *infractions += INF_JS_CLOSING_TAG;
318 events->create_event(EVENT_JS_CLOSING_TAG);
319 break;
320 case JSTokenizer::BAD_TOKEN:
321 case JSTokenizer::WRONG_CLOSING_SYMBOL:
322 case JSTokenizer::ENDED_IN_INNER_SCOPE:
323 *infractions += INF_JS_BAD_TOKEN;
324 events->create_event(EVENT_JS_BAD_TOKEN);
325 break;
326 case JSTokenizer::IDENTIFIER_OVERFLOW:
327 HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
328 *infractions += INF_JS_IDENTIFIER_OVERFLOW;
329 events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
330 break;
331 case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
332 case JSTokenizer::BRACKET_NESTING_OVERFLOW:
333 *infractions += INF_JS_BRACKET_NEST_OVERFLOW;
334 events->create_event(EVENT_JS_BRACKET_NEST_OVERFLOW);
335 break;
336 case JSTokenizer::SCOPE_NESTING_OVERFLOW:
337 *infractions += INF_JS_SCOPE_NEST_OVERFLOW;
338 events->create_event(EVENT_JS_SCOPE_NEST_OVERFLOW);
339 break;
340 default:
341 assert(false);
342 break;
343 }
344
345 if (script_external && output_size_before != js_ctx.script_size())
346 {
347 *infractions += INF_JS_CODE_IN_EXTERNAL;
348 events->create_event(EVENT_JS_CODE_IN_EXTERNAL);
349 }
350
351 script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
352 }
353
354 ssn->js_continue = script_continue;
355
356 if (!alive_ctx(ssn))
357 return;
358
359 debug_logf(4, http_trace, TRACE_JS_PROC, nullptr,
360 "input data was %s\n", final_portion ? "last one in PDU" : "a part of PDU");
361
362 auto js_ctx = ssn->js_normalizer;
363 uint32_t data_len = std::min(detection_depth, js_ctx->script_size());
364
365 if (data_len)
366 {
367 const char* data = final_portion ? js_ctx->take_script() : js_ctx->get_script();
368
369 if (data)
370 {
371 trace_logf(1, http_trace, TRACE_JS_DUMP, nullptr,
372 "js_data[%u]: %.*s\n", data_len, data_len, data);
373
374 output.set(data_len, (const uint8_t*)data, final_portion);
375 }
376 }
377
378 if (!script_continue && final_portion)
379 ssn->release_js_ctx();
380 }
381
do_legacy(const Field & input,Field & output,HttpInfractions * infractions,HttpEventGen * events,int max_javascript_whitespaces) const382 void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* infractions,
383 HttpEventGen* events, int max_javascript_whitespaces) const
384 {
385 bool js_present = false;
386 int index = 0;
387 const char* ptr = (const char*)input.start();
388 const char* const end = ptr + input.length();
389
390 JSState js;
391 js.allowed_spaces = max_javascript_whitespaces;
392 js.allowed_levels = MAX_ALLOWED_OBFUSCATION;
393 js.alerts = 0;
394
395 uint8_t* buffer = new uint8_t[input.length()];
396
397 while (ptr < end)
398 {
399 int bytes_copied = 0;
400 int mindex;
401
402 // Search for beginning of a javascript
403 if (mpse_otag->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
404 {
405 const char* js_start = ptr + mindex;
406 const char* const angle_bracket =
407 (const char*)SnortStrnStr(js_start, end - js_start, ">");
408 if (angle_bracket == nullptr || (end - angle_bracket) == 0)
409 break;
410
411 bool type_js = false;
412 if (angle_bracket > js_start)
413 {
414 int mid;
415 const int script_found = mpse_type->find(
416 js_start, (angle_bracket-js_start), search_html_found, false, &mid);
417
418 js_start = angle_bracket + 1;
419 if (script_found > 0)
420 {
421 switch (mid)
422 {
423 case AID_JS:
424 js_present = true;
425 type_js = true;
426 break;
427 default:
428 type_js = false;
429 break;
430 }
431 }
432 else
433 {
434 // if no type or language is found we assume it is a javascript
435 js_present = true;
436 type_js = true;
437 }
438 }
439 // Save before the <script> begins
440 if (js_start > ptr)
441 {
442 if ((js_start - ptr) > (input.length() - index))
443 break;
444 memmove_s(buffer + index, input.length() - index, ptr, js_start - ptr);
445 index += js_start - ptr;
446 }
447
448 ptr = js_start;
449 if (!type_js)
450 continue;
451
452 JSNormalizeDecode(js_start, (uint16_t)(end-js_start), (char*)buffer+index,
453 (uint16_t)(input.length() - index), &ptr, &bytes_copied, &js,
454 uri_param.iis_unicode ? uri_param.unicode_map : nullptr);
455
456 index += bytes_copied;
457 }
458 else
459 break;
460 }
461
462 if (js_present)
463 {
464 if ((ptr < end) && ((input.length() - index) >= (end - ptr)))
465 {
466 memmove_s(buffer + index, input.length() - index, ptr, end - ptr);
467 index += end - ptr;
468 }
469 if (js.alerts)
470 {
471 if (js.alerts & ALERT_LEVELS_EXCEEDED)
472 {
473 *infractions += INF_JS_OBFUSCATION_EXCD;
474 events->create_event(EVENT_JS_OBFUSCATION_EXCD);
475 }
476 if (js.alerts & ALERT_SPACES_EXCEEDED)
477 {
478 *infractions += INF_JS_EXCESS_WS;
479 events->create_event(EVENT_JS_EXCESS_WS);
480 }
481 if (js.alerts & ALERT_MIXED_ENCODINGS)
482 {
483 *infractions += INF_MIXED_ENCODINGS;
484 events->create_event(EVENT_MIXED_ENCODINGS);
485 }
486 }
487 output.set(index, buffer, true);
488 }
489 else
490 {
491 delete[] buffer;
492 output.set(input);
493 }
494 }
495
search_js_found(void *,void *,int index,void * index_ptr,void *)496 int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*)
497 {
498 static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
499 *((int*) index_ptr) = index - script_start_length;
500 return 1;
501 }
502
search_html_found(void * id,void *,int,void * id_ptr,void *)503 int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
504 {
505 *((int*) id_ptr) = (int)(uintptr_t)id;
506 return 1;
507 }
508
match_otag(void *,void *,int index,void * ptr,void *)509 int HttpJsNorm::match_otag(void*, void*, int index, void* ptr, void*)
510 {
511 *(char**)ptr += index;
512 return 1;
513 }
514
match_attr(void * pid,void *,int index,void * sctx,void *)515 int HttpJsNorm::match_attr(void* pid, void*, int index, void* sctx, void*)
516 {
517 MatchContext* ctx = (MatchContext*)sctx;
518 AttrId id = (AttrId)(uintptr_t)pid;
519 const char* c;
520
521 switch (id)
522 {
523 case AID_SLASH:
524 if (*(ctx->next + index) == '>')
525 {
526 ctx->is_shortened = true;
527 ctx->next += index;
528 return 1;
529 }
530 else
531 {
532 ctx->is_shortened = false;
533 return 0;
534 }
535
536 case AID_GT:
537 ctx->next += index;
538 return 1;
539
540 case AID_SRC:
541 c = ctx->next + index;
542 while (*c == ' ') c++;
543 ctx->is_external = ctx->is_external || *c == '=';
544 return 0;
545
546 case AID_JS:
547 ctx->is_javascript = true;
548 return 0;
549
550 case AID_ECMA:
551 ctx->is_javascript = true;
552 return 0;
553
554 case AID_VB:
555 ctx->is_javascript = false;
556 return 0;
557
558 default:
559 ctx->is_external = false;
560 ctx->is_javascript = false;
561 ctx->next += index;
562 return 1;
563 }
564 }
565