1 //--------------------------------------------------------------------------
2 // Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
3 //
4 // This program is free software; you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License Version 2 as published
6 // by the Free Software Foundation. You may not use, modify or distribute
7 // this program under any other version of the GNU General Public License.
8 //
9 // This program is distributed in the hope that it will be useful, but
10 // WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License along
15 // with this program; if not, write to the Free Software Foundation, Inc.,
16 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 //--------------------------------------------------------------------------
18 // js_normalizer_test.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
19
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23
24 #include <cstring>
25
26 #include "catch/catch.hpp"
27
28 #include "utils/js_identifier_ctx.h"
29 #include "utils/js_normalizer.h"
30 #include "utils/test/js_test_utils.h"
31
32 using namespace snort;
33
34 // Unit tests
35
36 #ifdef CATCH_TEST_BUILD
37
38 #define DST_SIZE 512
39
40 #define NORMALIZE(src) \
41 JSIdentifierCtxStub ident_ctx; \
42 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
43 auto ret = norm.normalize(src, sizeof(src)); \
44 const char* ptr = norm.get_src_next(); \
45 int act_len = norm.script_size(); \
46 const char* dst = norm.take_script();
47
48 #define VALIDATE(src, expected) \
49 CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
50 CHECK((ptr - src) == sizeof(src)); \
51 CHECK(act_len == sizeof(expected) - 1); \
52 CHECK(!memcmp(dst, expected, act_len)); \
53 delete[] dst;
54
55 #define VALIDATE_FAIL(src, expected, ret_code, ptr_offset) \
56 CHECK(ret == ret_code); \
57 CHECK((ptr - src) == ptr_offset); \
58 CHECK(act_len == sizeof(expected) - 1); \
59 CHECK(!memcmp(dst, expected, act_len)); \
60 delete[] dst;
61
62
63 #define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \
64 { \
65 JSIdentifierCtxStub ident_ctx; \
66 JSNormalizer norm(ident_ctx, depth, max_template_nesting, max_bracket_depth); \
67 ret = norm.normalize(src, src_len); \
68 ptr = norm.get_src_next(); \
69 len = norm.script_size(); \
70 const char* dptr = norm.get_script(); \
71 REQUIRE(len == dst_len); \
72 memcpy(dst, dptr, dst_len); \
73 }
74
75 #define DO(src, slen, dst, dlen) \
76 { \
77 auto ret = norm.normalize(src, slen); \
78 CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
79 auto nsrc = norm.get_src_next(); \
80 int act_len = norm.script_size(); \
81 const char* ptr = norm.take_script(); \
82 REQUIRE(nsrc - src == slen); \
83 REQUIRE(act_len == dlen); \
84 memcpy(dst, ptr, dlen); \
85 delete[] ptr; \
86 }
87
88 #define TRY(src, slen, dst, dlen, rexp) \
89 { \
90 auto ret = norm.normalize(src, slen); \
91 CHECK(ret == rexp); \
92 int act_len = norm.script_size(); \
93 const char* ptr = norm.get_script(); \
94 REQUIRE(act_len == dlen); \
95 memcpy(dst, ptr, dlen); \
96 }
97
98 #define CLOSE() \
99 { \
100 const char end[] = "</script>"; \
101 auto ret = norm.normalize(end, sizeof(end) - 1); \
102 CHECK(ret == JSTokenizer::SCRIPT_ENDED); \
103 }
104
105 #define NORMALIZE_S(src1, exp1) \
106 { \
107 char dst1[sizeof(exp1)]; \
108 \
109 JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); \
110 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
111 \
112 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
113 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
114 \
115 CLOSE(); \
116 }
117
118 #define NORMALIZE_T(src1, src2, exp1, exp2) \
119 { \
120 char dst1[sizeof(exp1)]; \
121 char dst2[sizeof(exp2)]; \
122 \
123 JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); \
124 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
125 \
126 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
127 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
128 \
129 DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
130 CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
131 \
132 CLOSE(); \
133 }
134
135 #define NORMALIZE_1(src1, exp1) \
136 { \
137 char dst1[sizeof(exp1)]; \
138 \
139 JSIdentifierCtxStub ident_ctx; \
140 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
141 \
142 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
143 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
144 \
145 CLOSE(); \
146 }
147
148 #define NORMALIZE_2(src1, src2, exp1, exp2) \
149 { \
150 char dst1[sizeof(exp1)]; \
151 char dst2[sizeof(exp2)]; \
152 \
153 JSIdentifierCtxStub ident_ctx; \
154 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
155 \
156 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
157 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
158 \
159 DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
160 CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
161 \
162 CLOSE(); \
163 }
164
165 #define NORMALIZE_3(src1, src2, src3, exp1, exp2, exp3) \
166 { \
167 char dst1[sizeof(exp1)]; \
168 char dst2[sizeof(exp2)]; \
169 char dst3[sizeof(exp3)]; \
170 \
171 JSIdentifierCtxStub ident_ctx; \
172 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
173 \
174 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
175 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
176 \
177 DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
178 CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
179 \
180 DO(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1); \
181 CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1)); \
182 \
183 CLOSE(); \
184 }
185
186 #define NORM_BAD_1(src1, exp1, code) \
187 { \
188 char dst1[sizeof(exp1)]; \
189 \
190 JSIdentifierCtxStub ident_ctx; \
191 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
192 \
193 TRY(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1, code); \
194 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
195 }
196
197 #define NORM_BAD_2(src1, src2, exp1, exp2, code) \
198 { \
199 char dst1[sizeof(exp1)]; \
200 char dst2[sizeof(exp2)]; \
201 \
202 JSIdentifierCtxStub ident_ctx; \
203 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
204 \
205 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
206 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
207 \
208 TRY(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1, code); \
209 CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
210 }
211
212 #define NORM_BAD_3(src1, src2, src3, exp1, exp2, exp3, code) \
213 { \
214 char dst1[sizeof(exp1)]; \
215 char dst2[sizeof(exp2)]; \
216 char dst3[sizeof(exp3)]; \
217 \
218 JSIdentifierCtxStub ident_ctx; \
219 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
220 \
221 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
222 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
223 \
224 DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
225 CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
226 \
227 TRY(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1, code); \
228 CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1)); \
229 }
230
231 #define NORM_LIMITED(limit, src1, src2, exp1, exp2) \
232 { \
233 char dst1[sizeof(exp1)]; \
234 char dst2[sizeof(exp2)]; \
235 \
236 JSIdentifierCtxStub ident_ctx; \
237 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth, limit); \
238 \
239 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \
240 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \
241 \
242 DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \
243 CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \
244 \
245 CLOSE(); \
246 }
247
248 #define NORM_COMBINED_2(src1, src2, exp) \
249 { \
250 JSIdentifierCtxStub ident_ctx; \
251 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
252 \
253 auto ret = norm.normalize(src1, sizeof(src1) - 1); \
254 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
255 \
256 ret = norm.normalize(src2, sizeof(src2) - 1); \
257 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
258 \
259 const char end[] = "</script>"; \
260 ret = norm.normalize(end, sizeof(end) - 1); \
261 REQUIRE(ret == JSTokenizer::SCRIPT_ENDED); \
262 \
263 size_t act_len = norm.script_size(); \
264 REQUIRE(act_len == sizeof(exp) - 1); \
265 \
266 const char* dst = norm.get_script(); \
267 CHECK(!memcmp(exp, dst, sizeof(exp) - 1)); \
268 }
269
270 #define NORM_COMBINED_3(src1, src2, src3, exp) \
271 { \
272 JSIdentifierCtxStub ident_ctx; \
273 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
274 \
275 auto ret = norm.normalize(src1, sizeof(src1) - 1); \
276 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
277 \
278 ret = norm.normalize(src2, sizeof(src2) - 1); \
279 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
280 \
281 ret = norm.normalize(src3, sizeof(src3) - 1); \
282 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
283 \
284 const char end[] = "</script>"; \
285 ret = norm.normalize(end, sizeof(end) - 1); \
286 REQUIRE(ret == JSTokenizer::SCRIPT_ENDED); \
287 \
288 size_t act_len = norm.script_size(); \
289 REQUIRE(act_len == sizeof(exp) - 1); \
290 \
291 const char* dst = norm.get_script(); \
292 CHECK(!memcmp(exp, dst, sizeof(exp) - 1)); \
293 }
294
295 #define NORM_COMBINED_BAD_2(src1, src2, exp, eret) \
296 { \
297 JSIdentifierCtxStub ident_ctx; \
298 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
299 \
300 auto ret = norm.normalize(src1, sizeof(src1) - 1); \
301 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
302 \
303 ret = norm.normalize(src2, sizeof(src2) - 1); \
304 REQUIRE(ret == eret); \
305 \
306 size_t act_len = norm.script_size(); \
307 REQUIRE(act_len == sizeof(exp) - 1); \
308 \
309 const char* dst = norm.get_script(); \
310 CHECK(!memcmp(exp, dst, sizeof(exp) - 1)); \
311 }
312
313 #define NORM_COMBINED_BAD_3(src1, src2, src3, exp, eret) \
314 { \
315 JSIdentifierCtxStub ident_ctx; \
316 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
317 \
318 auto ret = norm.normalize(src1, sizeof(src1) - 1); \
319 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
320 \
321 ret = norm.normalize(src2, sizeof(src2) - 1); \
322 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
323 \
324 ret = norm.normalize(src3, sizeof(src3) - 1); \
325 REQUIRE(ret == eret); \
326 \
327 size_t act_len = norm.script_size(); \
328 REQUIRE(act_len == sizeof(exp) - 1); \
329 \
330 const char* dst = norm.get_script(); \
331 CHECK(!memcmp(exp, dst, sizeof(exp) - 1)); \
332 }
333
334 #define NORM_COMBINED_LIMITED_2(limit, src1, src2, exp) \
335 { \
336 JSIdentifierCtxStub ident_ctx; \
337 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth, limit); \
338 \
339 auto ret = norm.normalize(src1, sizeof(src1) - 1); \
340 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
341 \
342 ret = norm.normalize(src2, sizeof(src2) - 1); \
343 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
344 \
345 const char end[] = "</script>"; \
346 ret = norm.normalize(end, sizeof(end) - 1); \
347 REQUIRE(ret == JSTokenizer::SCRIPT_ENDED); \
348 \
349 size_t act_len = norm.script_size(); \
350 REQUIRE(act_len == sizeof(exp) - 1); \
351 \
352 const char* dst = norm.get_script(); \
353 CHECK(!memcmp(exp, dst, sizeof(exp) - 1)); \
354 }
355
356 #define NORM_COMBINED_S_2(src1, src2, exp) \
357 { \
358 JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); \
359 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
360 \
361 auto ret = norm.normalize(src1, sizeof(src1) - 1); \
362 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
363 \
364 ret = norm.normalize(src2, sizeof(src2) - 1); \
365 REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE); \
366 \
367 const char end[] = "</script>"; \
368 ret = norm.normalize(end, sizeof(end) - 1); \
369 REQUIRE(ret == JSTokenizer::SCRIPT_ENDED); \
370 \
371 size_t act_len = norm.script_size(); \
372 REQUIRE(act_len == sizeof(exp) - 1); \
373 \
374 const char* dst = norm.get_script(); \
375 CHECK(!memcmp(exp, dst, sizeof(exp) - 1)); \
376 }
377
378 // ClamAV test vectors from: https://github.com/Cisco-Talos/clamav/blob/main/unit_tests/check_jsnorm.c
379 static const char clamav_buf0[] =
380 "function foo(a, b) {\n"
381 "var x = 1.9e2*2*a/ 4.;\n"
382 "var y = 'test\\'tst';//var\n"
383 "x=b[5],/* multiline\nvar z=6;\nsome*some/other**/"
384 "z=x/y;/* multiline oneline */var t=z/a;\n"
385 "z=[test,testi];"
386 "document.writeln('something\\n');}";
387
388 static const char clamav_expected0[] =
389 "function foo(a,b){var x=1.9e2*2*a/4.;var y='test\\'tst';x=b[5],z=x/y;var t=z/a;"
390 "z=[test,testi];document.writeln('something\\n');}";
391
392 static const char clamav_buf1[] =
393 "function () { var id\\u1234tx;}";
394
395 static const char clamav_expected1[] =
396 "function(){var id\u1234tx;}";
397
398 static const char clamav_buf2[] =
399 "function () { var tst=\"a\"+'bc'+ 'd'; }";
400
401 static const char clamav_expected2[] =
402 "function(){var tst=\"a\"+'bc'+'d';}";
403
404 static const char clamav_buf3[] =
405 "dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
406
407 static const char clamav_expected3[] =
408 "dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
409
410 #define B64 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
411
412 static char clamav_buf4[] =
413 "qbphzrag.jevgr(harfpncr('%3P%73%63%72%69%70%74%20%6P%61%6R%67%75%61%67%65%3Q%22%6N%61%76%61"
414 "%73%63%72%69%70%74%22%3R%66%75%6R%63%74%69%6S%6R%20%64%46%28%73%29%7O%76%61%72%20%73%31"
415 "%3Q%75%6R%65%73%63%61%70%65%28%73%2R%73%75%62%73%74%72%28%30%2P%73%2R%6P%65%6R%67%74%68"
416 "%2Q%31%29%29%3O%20%76%61%72%20%74%3Q%27%27%3O%66%6S%72%28%69%3Q%30%3O%69%3P%73%31%2R%6P"
417 "%65%6R%67%74%68%3O%69%2O%2O%29%74%2O%3Q%53%74%72%69%6R%67%2R%66%72%6S%6Q%43%68%61%72%43"
418 "%6S%64%65%28%73%31%2R%63%68%61%72%43%6S%64%65%41%74%28%69%29%2Q%73%2R%73%75%62%73%74%72"
419 "%28%73%2R%6P%65%6R%67%74%68%2Q%31%2P%31%29%29%3O%64%6S%63%75%6Q%65%6R%74%2R%77%72%69%74"
420 "%65%28%75%6R%65%73%63%61%70%65%28%74%29%29%3O%7Q%3P%2S%73%63%72%69%70%74%3R'));"
421 "riny(qS('tV%285%3O%285%3Nsdwjl%28585%3N7%28586Q%28585%3N7%3P%7P55l%28585%3N7%3P%28585%3N7"
422 "%28586R%28585%3N8T5%285%3N%285%3P%286R3'));";
423
424 static char clamav_expected4[] =
425 "qbphzrag.jevgr(harfpncr('%3P%73%63%72%69%70%74%20%6P%61%6R%67%75%61%67%65%3Q%22%6N%61%76%61"
426 "%73%63%72%69%70%74%22%3R%66%75%6R%63%74%69%6S%6R%20%64%46%28%73%29%7O%76%61%72%20%73%31"
427 "%3Q%75%6R%65%73%63%61%70%65%28%73%2R%73%75%62%73%74%72%28%30%2P%73%2R%6P%65%6R%67%74%68"
428 "%2Q%31%29%29%3O%20%76%61%72%20%74%3Q%27%27%3O%66%6S%72%28%69%3Q%30%3O%69%3P%73%31%2R%6P"
429 "%65%6R%67%74%68%3O%69%2O%2O%29%74%2O%3Q%53%74%72%69%6R%67%2R%66%72%6S%6Q%43%68%61%72%43"
430 "%6S%64%65%28%73%31%2R%63%68%61%72%43%6S%64%65%41%74%28%69%29%2Q%73%2R%73%75%62%73%74%72"
431 "%28%73%2R%6P%65%6R%67%74%68%2Q%31%2P%31%29%29%3O%64%6S%63%75%6Q%65%6R%74%2R%77%72%69%74"
432 "%65%28%75%6R%65%73%63%61%70%65%28%74%29%29%3O%7Q%3P%2S%73%63%72%69%70%74%3R'));"
433 "riny(qS('tV%285%3O%285%3Nsdwjl%28585%3N7%28586Q%28585%3N7%3P%7P55l%28585%3N7%3P%28585%3N7"
434 "%28586R%28585%3N8T5%285%3N%285%3P%286R3'));";
435
436 static char clamav_buf5[] =
437 "shapgvba (c,n,p,x,r,e){}('0(\\'1\\');',2,2,'nyreg|j00g'.fcyvg('|'),0,{});";
438
439 static const char clamav_expected5[] =
440 "shapgvba(c,n,p,x,r,e){}('0(\\'1\\');',2,2,'nyreg|j00g'.fcyvg('|'),0,{});";
441
442 static const char clamav_buf6[] =
443 "function $(p,a,c,k,e,d){} something(); $('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
444
445 static const char clamav_expected6[] =
446 "function $(p,a,c,k,e,d){}something();$('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
447
448 static const char clamav_buf7[] =
449 "var z=\"tst" B64 "tst\";";
450
451 static const char clamav_expected7[] =
452 "var z=\"tst" B64 "tst\";";
453
454 static const char clamav_buf8[] =
455 "var z=\'tst" B64 "tst\';";
456
457 static const char clamav_expected8[] =
458 "var z=\'tst" B64 "tst\';";
459
460 static char clamav_buf9[] =
461 "riny(harfpncr('%61%6p%65%72%74%28%27%74%65%73%74%27%29%3o'));";
462
463 static const char clamav_expected9[] =
464 "riny(harfpncr('%61%6p%65%72%74%28%27%74%65%73%74%27%29%3o'));";
465
466 static const char clamav_buf10[] =
467 "function $ $() dF(x); function (p,a,c,k,e,r){function $(){}";
468
469 static const char clamav_expected10[] =
470 "function $ $()dF(x);function(p,a,c,k,e,r){function $(){}";
471
472 static const char clamav_buf11[] =
473 "var x=123456789 ;";
474
475 static const char clamav_expected11[] =
476 "var x=123456789;";
477
478 static const char clamav_buf12[] =
479 "var x='test\\u0000test';";
480
481 static const char clamav_expected12[] =
482 "var x='test\\u0000test';";
483
484 static const char clamav_buf13[] =
485 "var x\\s12345";
486
487 static const char clamav_expected13[] =
488 "var x\\s12345";
489
490 static const char clamav_buf14[] =
491 "document.write(unescape('test%20test";
492
493 static const char clamav_expected14[] =
494 "document.write(unescape('test%20test";
495
496 TEST_CASE("clamav tests", "[JSNormalizer]")
497 {
498 SECTION("test_case_0 - mixed identifiers and comments")
499 {
500 NORMALIZE(clamav_buf0);
501 VALIDATE(clamav_buf0, clamav_expected0);
502 }
503 SECTION("test_case_1 - escaped unicode in identifier")
504 {
505 NORMALIZE(clamav_buf1);
506 VALIDATE(clamav_buf1, clamav_expected1);
507 }
508 SECTION("test_case_2 - accumulated string assignment")
509 {
510 NORMALIZE(clamav_buf2);
511 VALIDATE(clamav_buf2, clamav_expected2);
512 }
513 SECTION("test_case_3 - percent-encoded string")
514 {
515 NORMALIZE(clamav_buf3);
516 VALIDATE(clamav_buf3, clamav_expected3);
517 }
518 SECTION("test_case_4 - percent-encoded string")
519 {
520 NORMALIZE(clamav_buf4);
521 VALIDATE(clamav_buf4, clamav_expected4);
522 }
523 SECTION("test_case_5 - obfuscated script")
524 {
525 NORMALIZE(clamav_buf5);
526 VALIDATE(clamav_buf5, clamav_expected5);
527 }
528 SECTION("test_case_6 - obfuscated script")
529 {
530 NORMALIZE(clamav_buf6);
531 VALIDATE(clamav_buf6, clamav_expected6);
532 }
533 SECTION("test_case_7 - single quotes string")
534 {
535 NORMALIZE(clamav_buf7);
536 VALIDATE(clamav_buf7, clamav_expected7);
537 }
538 SECTION("test_case_8 - double quotes string")
539 {
540 NORMALIZE(clamav_buf8);
541 VALIDATE(clamav_buf8, clamav_expected8);
542 }
543 SECTION("test_case_9 - obfuscated script")
544 {
545 NORMALIZE(clamav_buf9);
546 VALIDATE(clamav_buf9, clamav_expected9);
547 }
548 SECTION("test_case_10 - obfuscated script")
549 {
550 NORMALIZE(clamav_buf10);
551 VALIDATE(clamav_buf10, clamav_expected10);
552 }
553 SECTION("test_case_11 - integer literal")
554 {
555 NORMALIZE(clamav_buf11);
556 VALIDATE(clamav_buf11, clamav_expected11);
557 }
558 SECTION("test_case_12 - escaped unicode in string literal")
559 {
560 NORMALIZE(clamav_buf12);
561 VALIDATE(clamav_buf12, clamav_expected12);
562 }
563 // FIXIT-L this should be revisited
564 SECTION("test_case_13 - invalid escape sequence")
565 {
566 NORMALIZE(clamav_buf13);
567 VALIDATE(clamav_buf13, clamav_expected13);
568 }
569 SECTION("test_case_14 - EOF in the middle of string literal")
570 {
571 NORMALIZE(clamav_buf14);
572 // trailing \0 is included as a part of the string
573 // to utilize available macros we alter the read length
574 act_len -= 1;
575 VALIDATE(clamav_buf14, clamav_expected14);
576 }
577 }
578
579 // Test vectors for all match patterns
580 static const char all_patterns_buf0[] =
581 "var \x9\xB\xC\x20\xA0\x8\xA\xD\xEF\xBB\xBF\xE2\x80\xA8\xE2\x80\xA9\n"
582 " \n\t\r\v a; \0";
583
584 static const char all_patterns_expected0[] =
585 "var a;";
586
587 static const char all_patterns_buf1[] =
588 "<!-- var html_comment = 'comment' ;\n"
589 "var a = 1;// first var\nvar b = 2; /* second var\nvar foo = 'bar'\n*/"
590 "\nvar c = 3; // third var";
591
592 static const char all_patterns_expected1[] =
593 "var a=1;var b=2;var c=3;";
594
595 static const char all_patterns_buf2[] =
596 "{ a } ( a ) [ a ] a >= b a == b a != b a === b a !== b a /= b . ; , "
597 "a < b a > b a <= b a + b- c a * b a % b a ++; --b a << 2 a >> 3 a >>> 4 a & b a | b "
598 "a ^ b ! a a && b a || b ?: a = 2 a += 2 a -= 2 a *= 2 a %= 2 a <<= b a >>= b a >>>= b "
599 "a &= b a|= b a ^= b a/b ~ a";
600
601 static const char all_patterns_expected2[] =
602 "{a}(a)[a]a>=b a==b a!=b a===b a!==b a/=b.;,a<b a>b a<=b a+b-c a*b "
603 "a%b a++;--b a<<2 a>>3 a>>>4 a&b a|b a^b!a a&&b a||b?:a=2 a+=2 a-=2 a*=2 a%=2 a<<=b "
604 "a>>=b a>>>=b a&=b a|=b a^=b a/b~a";
605
606 static const char all_patterns_buf3[] =
607 "break case debugger in import protected do else function try "
608 "implements static instanceof new this class let a typeof var a with enum private catch "
609 "continue default extends public finally for if super yield return switch throw const a "
610 "interface void while delete export package";
611
612 static const char all_patterns_expected3[] =
613 "break case debugger in import protected do else function try "
614 "implements static instanceof new this class let a typeof var a with enum private catch "
615 "continue default extends public finally for if super yield return switch throw const a "
616 "interface void while delete export package";
617
618 static const char all_patterns_buf4[] =
619 "/regex/g undefined null true false 2 23 2.3 2.23 .2 .02 4. +2 -2 "
620 "+3.3 -3.3 +23 -32 2.3E45 3.E34 -2.3E45 -3.E34 +2.3E45 +3.E34 0x1234 0XFFFF Infinity "
621 "\xE2\x88\x9E NaN \"\" \"double string\" \"d\" '' 'single string' 's' x=/regex/gs "
622 "x=2/2/1 `\ntemplate\n`";
623
624 static const char all_patterns_expected4[] =
625 "/regex/g undefined null true false 2 23 2.3 2.23 .2 .02 4.+2-2"
626 "+3.3-3.3+23-32 2.3E45 3.E34-2.3E45-3.E34+2.3E45+3.E34 0x1234 0XFFFF Infinity "
627 "\xE2\x88\x9E NaN \"\" \"double string\" \"d\" '' 'single string' 's' x=/regex/gs "
628 "x=2/2/1 `\ntemplate\n`";
629
630 static const char all_patterns_buf5[] =
631 "$2abc _2abc abc $__$ 肖晗 XÆA12 \\u0041abc \\u00FBdef \\u1234ghi ab\xE2\x80\xA8ww "
632 "ab\xE2\x80\xA9ww ab\xEF\xBB\xBFww ab∞ww 2abc";
633
634 static const char all_patterns_expected5[] =
635 "$2abc _2abc abc $__$ 肖晗 XÆA12 \u0041abc \u00FBdef \u1234ghi ab;ww "
636 "ab;ww ab ww ab ∞ ww 2 abc";
637
638 static const char all_patterns_buf6[] =
639 "tag` template\n ${ a + b } template`";
640
641 static const char all_patterns_expected6[] =
642 "tag ` template\n ${a+b} template`";
643
644 TEST_CASE("all patterns", "[JSNormalizer]")
645 {
646 SECTION("whitespaces and special characters")
647 {
648 NORMALIZE(all_patterns_buf0);
649 VALIDATE(all_patterns_buf0, all_patterns_expected0);
650 }
651 SECTION("comments")
652 {
653 NORMALIZE(all_patterns_buf1);
654 VALIDATE(all_patterns_buf1, all_patterns_expected1);
655 }
656 SECTION("directives")
657 {
658 const char src0[] = "'use strict'\nvar a = 1;";
659 const char src1[] = "\"use strict\"\nvar a = 1;";
660 const char src2[] = "'use strict';var a = 1;";
661 const char src3[] = "\"use strict\";var a = 1;";
662 const char src4[] = "var a = 1 'use strict';";
663
664 const char expected0[] = "'use strict';var a=1;";
665 const char expected1[] = "\"use strict\";var a=1;";
666 const char expected2[] = "var a=1 'use strict';";
667
668 char dst0[sizeof(expected0) - 1];
669 char dst1[sizeof(expected1) - 1];
670 char dst2[sizeof(expected0) - 1];
671 char dst3[sizeof(expected1) - 1];
672 char dst4[sizeof(expected2) - 1];
673
674 int ret0, ret1, ret2, ret3, ret4;
675 const char *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
676 int act_len0, act_len1, act_len2, act_len3, act_len4;
677
678 NORMALIZE_L(src0, sizeof(src0), dst0, sizeof(dst0), norm_depth, ret0, ptr0, act_len0);
679 NORMALIZE_L(src1, sizeof(src1), dst1, sizeof(dst1), norm_depth, ret1, ptr1, act_len1);
680 NORMALIZE_L(src2, sizeof(src2), dst2, sizeof(dst2), norm_depth, ret2, ptr2, act_len2);
681 NORMALIZE_L(src3, sizeof(src3), dst3, sizeof(dst3), norm_depth, ret3, ptr3, act_len3);
682 NORMALIZE_L(src4, sizeof(src4), dst4, sizeof(dst4), norm_depth, ret4, ptr4, act_len4);
683
684 CHECK(ret0 == JSTokenizer::SCRIPT_CONTINUE);
685 CHECK((ptr0 - src0) == sizeof(src0));
686 CHECK(act_len0 == sizeof(expected0) - 1);
687 CHECK(!memcmp(dst0, expected0, act_len0));
688
689 CHECK(ret1 == JSTokenizer::SCRIPT_CONTINUE);
690 CHECK((ptr1 - src1) == sizeof(src1));
691 CHECK(act_len1 == sizeof(expected1) - 1);
692 CHECK(!memcmp(dst1, expected1, act_len1));
693
694 CHECK(ret2 == JSTokenizer::SCRIPT_CONTINUE);
695 CHECK((ptr2 - src2) == sizeof(src2));
696 CHECK(act_len2 == sizeof(expected0) - 1);
697 CHECK(!memcmp(dst2, expected0, act_len2));
698
699 CHECK(ret3 == JSTokenizer::SCRIPT_CONTINUE);
700 CHECK((ptr3 - src3) == sizeof(src3));
701 CHECK(act_len3 == sizeof(expected1) - 1);
702 CHECK(!memcmp(dst3, expected1, act_len3));
703
704 CHECK(ret4 == JSTokenizer::SCRIPT_CONTINUE);
705 CHECK((ptr4 - src4) == sizeof(src4));
706 CHECK(act_len4 == sizeof(expected2) - 1);
707 CHECK(!memcmp(dst4, expected2, act_len4));
708 }
709 SECTION("punctuators")
710 {
711 NORMALIZE(all_patterns_buf2);
712 VALIDATE(all_patterns_buf2, all_patterns_expected2);
713 }
714 SECTION("keywords")
715 {
716 NORMALIZE(all_patterns_buf3);
717 VALIDATE(all_patterns_buf3, all_patterns_expected3);
718 }
719 SECTION("literals")
720 {
721 NORMALIZE(all_patterns_buf4);
722 VALIDATE(all_patterns_buf4, all_patterns_expected4);
723 }
724 SECTION("identifiers")
725 {
726 NORMALIZE(all_patterns_buf5);
727 VALIDATE(all_patterns_buf5, all_patterns_expected5);
728 }
729 SECTION("template literals")
730 {
731 NORMALIZE(all_patterns_buf6);
732 VALIDATE(all_patterns_buf6, all_patterns_expected6);
733 }
734 }
735
736 // Test vectors for different syntax cases
737 static const char syntax_cases_buf0[] =
738 "var a;\n"
739 "var b = \"init this stuff\";\n"
740 "var c = \"Hi\" + \" \" + \"Joe\";\n"
741 "var d = 1 + 2 + \"3\";\n"
742 "var e = [ 2, 3, 5, 8 ];\n"
743 "var f = false;\n"
744 "var g = /( i'm a .* regex )/;\n"
745 "var h = function(){};\n"
746 "const PI = 3.14;\n"
747 "var a = 1, b = 2, c = a + b;\n"
748 "let z = 'zzz zz';\n"
749 "var g = null;\n"
750 "var name = { first: \"Jane\", last: \"Doe\" };\n"
751 "var esc = 'I don\\'t \\n know';\n";
752
753 static const char syntax_cases_expected0[] =
754 "var a;var b=\"init this stuff\";var c=\"Hi\"+\" \"+\"Joe\";"
755 "var d=1+2+\"3\";var e=[2,3,5,8];var f=false;var g=/( i'm a .* regex )/;"
756 "var h=function(){};const PI=3.14;var a=1,b=2,c=a+b;let z='zzz zz';var g=null;"
757 "var name={first:\"Jane\",last:\"Doe\"};var esc='I don\\'t \\n know';";
758
759 static const char syntax_cases_buf1[] =
760 "a = b + c - d;\n"
761 "a = b * (c / d);\n"
762 "x = 100 % 48;\n"
763 "a ++; b -- ; -- a; ++ b;\n";
764
765 static const char syntax_cases_expected1[] =
766 "a=b+c-d;a=b*(c/d);x=100%48;a++;b--;--a;++b;";
767
768 static const char syntax_cases_buf2[] =
769 "!(a == b);\n"
770 "a != b;\n"
771 "typeof a;\n"
772 "x << 2; x >> 3;\n"
773 "a = b;\n"
774 "a == b;\n"
775 "a != b;\n"
776 "a === b;\n"
777 "a !== b;\n"
778 "a < b; a > b;\n"
779 "a <= b; a >= b;\n"
780 "a += b;\n"
781 "a && b;\n"
782 "a || b;\n";
783
784 static const char syntax_cases_expected2[] =
785 "!(a==b);a!=b;typeof a;x<<2;x>>3;a=b;a==b;a!=b;a===b;a!==b;a<b;a>b;"
786 "a<=b;a>=b;a+=b;a&&b;a||b;";
787
788 static const char syntax_cases_buf3[] =
789 "var foo = {\n"
790 "firstFoo: \"FooFirst\",\n"
791 "secondFoo: \"FooSecond\",\n"
792 "thirdFoo: 10,\n"
793 "fourthFoo: 120,\n"
794 "methodFoo : function () {\n"
795 "\treturn this.firstFoo + \" \" + this.secondFoo;\n"
796 "}\n"
797 "};\n";
798
799 static const char syntax_cases_expected3[] =
800 "var foo={firstFoo:\"FooFirst\",secondFoo:\"FooSecond\","
801 "thirdFoo:10,fourthFoo:120,methodFoo:function(){return this.firstFoo+\" \"+"
802 "this.secondFoo;}};";
803
804 static const char syntax_cases_buf4[] =
805 "var dogs = [\"Bulldog\", \"Beagle\", \"Labrador\"];\n"
806 "var dogs = new Array(\"Bulldog\", \"Beagle\", \"Labrador\");\n"
807 "\t\t\t\n"
808 "alert( dogs[ 1 ] );\n"
809 "dogs[0] = \"Bull Terrier\";\n"
810 "\n"
811 "for (var i = 0; i < dogs.length; i++) {\n"
812 "console.log(dogs[i]);\n"
813 "}\n\r";
814
815 static const char syntax_cases_expected4[] =
816 "var dogs=[\"Bulldog\",\"Beagle\",\"Labrador\"];"
817 "var dogs=new Array(\"Bulldog\",\"Beagle\",\"Labrador\");alert(dogs[1]);"
818 "dogs[0]=\"Bull Terrier\";for(var i=0;i<dogs.length;i++){console.log(dogs[i]);}";
819
820 static const char syntax_cases_buf5[] =
821 "var i = 1;\n"
822 "while (i < 100) {\n"
823 "i *= 2;\n"
824 "document.write(i + \", \");\n"
825 "}\n"
826 "\n"
827 "i = 1;\n"
828 "do {\n"
829 "i *= 2;\n"
830 "document.write(i + \", \");\n"
831 "} while (i < 100)\n"
832 "\n"
833 "for (var i = 0; i < 10; i++) {\n"
834 "if (i == 5) { break; }\n"
835 "document.write(i + \", \");\n"
836 "}\n"
837 "\n"
838 "for (var i = 0; i < 10; i++) {\n"
839 "if (i == 5) { continue; }\n"
840 "document.write(i + \", \");\n"
841 "}\n\r";
842
843 static const char syntax_cases_expected5[] =
844 "var i=1;while(i<100){i*=2;document.write(i+\", \");}i=1;do{i*=2;"
845 "document.write(i+\", \");}while(i<100);for(var i=0;i<10;i++){if(i==5){break;}"
846 "document.write(i+\", \");}for(var i=0;i<10;i++){if(i==5){continue;}"
847 "document.write(i+\", \");}";
848
849 static const char syntax_cases_buf6[] =
850 "var n = 1800;\n"
851 "var res;\n"
852 "if ( (n >= 1400) && (n < 1900) ) {\n"
853 "res = \"In range.\";\n"
854 "} else {\n"
855 "res = \"Not in range.\";\n"
856 "}\n"
857 "\n"
858 "var text;\n"
859 "switch ( new Date().getDay() ) {\n"
860 "case 6:\n"
861 "text = \"Saturday\";\n"
862 "break;\n"
863 "case 0:\n"
864 "text = \"Sunday\";\n"
865 "break;\n"
866 "default:\n"
867 "text = \"Whatever\";\n"
868 "}\n\r";
869
870 static const char syntax_cases_expected6[] =
871 "var n=1800;var res;if((n>=1400)&&(n<1900)){res=\"In range.\";}"
872 "else{res=\"Not in range.\";}var text;switch(new Date().getDay()){case 6:"
873 "text=\"Saturday\";break;case 0:text=\"Sunday\";break;default:text=\"Whatever\";}";
874
875 static const char syntax_cases_buf7[] =
876 "var x = document.getElementById(\"mynum\").value;\n"
877 "try { \n"
878 "if(x == \"\") throw \"empty\";\n"
879 "if(isNaN(x)) throw \"not a number\";\n"
880 "x = Number(x);\n"
881 "if(x > 10) throw \"too high\";\n"
882 "}\n"
883 "catch(err) {\n"
884 "document.write(\"Input is \" + err);\n"
885 "console.error(err);\n"
886 "}\n"
887 "finally {\n"
888 "document.write(\"</br />Done\");\n"
889 "}\n\r";
890
891 static const char syntax_cases_expected7[] =
892 "var x=document.getElementById(\"mynum\").value;try{if(x==\"\")"
893 "throw \"empty\";if(isNaN(x))throw \"not a number\";x=Number(x);if(x>10)"
894 "throw \"too high\";}catch(err){document.write(\"Input is \"+err);console.error(err);}"
895 "finally{document.write(\"</br />Done\");}";
896
897 static const char syntax_cases_buf8[] =
898 "function sum (a, b) {\n"
899 "return new Promise(function (resolve, reject) {\n"
900 "setTimeout(function () {\n"
901 "if (typeof a !== \"number\" || typeof b !== \"number\") {\n"
902 "return reject(new TypeError(\"Inputs must be numbers\"));\n"
903 "}\n"
904 "resolve(a + b);\n"
905 "}, 1000);\n"
906 "});\n"
907 "}\n"
908 "\n"
909 "var myPromise = sum(10, 5);\n"
910 "myPromise.then(function (result) {\n"
911 "document.write(\" 10 + 5: \", result);\n"
912 "return sum(null, \"foo\");\n"
913 "}).then(function () {\n"
914 "}).catch(function (err) {\n"
915 "console.error(err);\n"
916 "});\n\r";
917
918 static const char syntax_cases_expected8[] =
919 "function sum(a,b){return new Promise(function(resolve,reject)"
920 "{setTimeout(function(){if(typeof a!==\"number\"||typeof b!==\"number\"){return "
921 "reject(new TypeError(\"Inputs must be numbers\"));}resolve(a+b);},1000);});}"
922 "var myPromise=sum(10,5);myPromise.then(function(result){"
923 "document.write(\" 10 + 5: \",result);return sum(null,\"foo\");}).then(function(){})"
924 ".catch(function(err){console.error(err);});";
925
926 static const char syntax_cases_buf9[] =
927 "var a = Math.round( (new Date).getTime()/1E3 );\n"
928 "var b = a.match( /^[0-9a-z-_.]{10,1200}$/i );\n"
929 "var c = a.match( /=\\s*{((.|\\s)*?)};/g ) ;\n\r";
930
931 static const char syntax_cases_expected9[] =
932 "var a=Math.round((new Date).getTime()/1E3);"
933 "var b=a.match(/^[0-9a-z-_.]{10,1200}$/i);"
934 "var c=a.match(/=\\s*{((.|\\s)*?)};/g);";
935
936 static const char syntax_cases_buf10[] =
937 "var a = 2\n/ab -cd/";
938
939 static const char syntax_cases_expected10[] =
940 "var a=2;/ab -cd/";
941
942 static const char syntax_cases_buf11[] =
943 "var d_str1 = \"\\\\ \" ; var d_str2 = \"abc\\\"def\" ;"
944 "var d_str3 = \"\\\"abc \" ;var s_str1 = '\\\\ ' ; var s_str2 = 'abc\\\'def' ; "
945 "var s_str3 = '\\\'abc ' ;var re_1 = /\\\\ / ; var re_2 = /abc\\/def/ ; "
946 "var re_3 = /\\/abc / ;";
947
948 static const char syntax_cases_expected11[] =
949 "var d_str1=\"\\\\ \";var d_str2=\"abc\\\"def\";"
950 "var d_str3=\"\\\"abc \";var s_str1='\\\\ ';var s_str2='abc\\\'def';"
951 "var s_str3='\\\'abc ';var re_1=/\\\\ /;var re_2=/abc\\/def/;var re_3=/\\/abc /;";
952
953 static const char syntax_cases_buf12[] =
954 "var str1 = \"abc\\\n def\" ;"
955 "var str2 = \"abc\\\r\n def\" ;"
956 "var str3 = 'abc\\\n def' ;"
957 "var str4 = 'abc\\\r\n def' ;";
958
959 static const char syntax_cases_expected12[] =
960 "var str1=\"abc def\";"
961 "var str2=\"abc def\";"
962 "var str3='abc def';"
963 "var str4='abc def';";
964
965 static const char syntax_cases_buf13[] =
966 "return /regex/i.test( str ) ;";
967
968 static const char syntax_cases_expected13[] =
969 "return /regex/i.test(str);";
970
971 static const char syntax_cases_buf14[] =
972 "var a = b+ ++c ;\n"
973 "var a = b++ +c ;\n"
974 "var a = b++ + ++c ;\n"
975 "var a = b- --c ;\n"
976 "var a = b-- -c ;\n"
977 "var a = b-- - --c ;\n"
978 "var a = b++ - ++c ;\n"
979 "var a = b * -c ;\n"
980 "var a = b % -c ;\n"
981 "var a = b + -c ;";
982
983 static const char syntax_cases_expected14[] =
984 "var a=b+ ++c;"
985 "var a=b++ +c;"
986 "var a=b++ + ++c;"
987 "var a=b- --c;"
988 "var a=b-- -c;"
989 "var a=b-- - --c;"
990 "var a=b++ - ++c;"
991 "var a=b* -c;"
992 "var a=b% -c;"
993 "var a=b+ -c;";
994
995 // In the following cases:
996 // a reading cursor will be after the literal
997 // a malformed literal is not present in the output
998
999 static const char syntax_cases_buf15[] =
1000 "var invalid_str = 'abc\u2028 def' ;\n";
1001
1002 static const char syntax_cases_expected15[] =
1003 "var invalid_str='abc";
1004
1005 static const char syntax_cases_buf16[] =
1006 "var invalid_str = \"abc\n def\"";
1007
1008 static const char syntax_cases_expected16[] =
1009 "var invalid_str=\"abc";
1010
1011 static const char syntax_cases_buf17[] =
1012 "var invalid_str = 'abc\r def'";
1013
1014 static const char syntax_cases_expected17[] =
1015 "var invalid_str='abc";
1016
1017 static const char syntax_cases_buf18[] =
1018 "var invalid_str = 'abc\\\n\r def'";
1019
1020 static const char syntax_cases_expected18[] =
1021 "var invalid_str='abc";
1022
1023 static const char syntax_cases_buf19[] =
1024 "var invalid_re = /abc\\\n def/";
1025
1026 static const char syntax_cases_expected19[] =
1027 "var invalid_re=/abc";
1028
1029 static const char syntax_cases_buf20[] =
1030 "var invalid_re = /abc\\\r\n def/";
1031
1032 static const char syntax_cases_expected20[] =
1033 "var invalid_re=/abc";
1034
1035 static const char syntax_cases_buf21[] =
1036 "var invalid_str = 'abc\u2029 def' ;\n\r";
1037
1038 static const char syntax_cases_expected21[] =
1039 "var invalid_str='abc";
1040
1041 static const char syntax_cases_buf22[] =
1042 "tag`template\n \\\\\\${ } \\\\${ a + ` template ${ 1 + c }` }`";
1043
1044 static const char syntax_cases_expected22[] =
1045 "tag `template\n \\\\\\${ } \\\\${a+` template ${1+c}`}`";
1046
1047 static const char syntax_cases_buf23[] =
1048 "`${`${`${`${`${}`}`}`}`}`}";
1049
1050 static const char syntax_cases_expected23[] =
1051 "`${`${`${`${`";
1052
1053 TEST_CASE("syntax cases", "[JSNormalizer]")
1054 {
1055 SECTION("variables")
1056 {
1057 NORMALIZE(syntax_cases_buf0);
1058 VALIDATE(syntax_cases_buf0, syntax_cases_expected0);
1059 }
1060 SECTION("operators")
1061 {
1062 NORMALIZE(syntax_cases_buf1);
1063 VALIDATE(syntax_cases_buf1, syntax_cases_expected1);
1064 }
1065 SECTION("arithmetic and logical operators")
1066 {
1067 NORMALIZE(syntax_cases_buf2);
1068 VALIDATE(syntax_cases_buf2, syntax_cases_expected2);
1069 }
1070 SECTION("complex object")
1071 {
1072 NORMALIZE(syntax_cases_buf3);
1073 VALIDATE(syntax_cases_buf3, syntax_cases_expected3);
1074 }
1075 SECTION("arrays")
1076 {
1077 NORMALIZE(syntax_cases_buf4);
1078 VALIDATE(syntax_cases_buf4, syntax_cases_expected4);
1079 }
1080 SECTION("loops")
1081 {
1082 NORMALIZE(syntax_cases_buf5);
1083 VALIDATE(syntax_cases_buf5, syntax_cases_expected5);
1084 }
1085 SECTION("if-else and switch statements")
1086 {
1087 NORMALIZE(syntax_cases_buf6);
1088 VALIDATE(syntax_cases_buf6, syntax_cases_expected6);
1089 }
1090 SECTION("try-catch statements")
1091 {
1092 NORMALIZE(syntax_cases_buf7);
1093 VALIDATE(syntax_cases_buf7, syntax_cases_expected7);
1094 }
1095 SECTION("functions and promises")
1096 {
1097 NORMALIZE(syntax_cases_buf8);
1098 VALIDATE(syntax_cases_buf8, syntax_cases_expected8);
1099 }
1100 SECTION("regex-division ambiguity")
1101 {
1102 NORMALIZE(syntax_cases_buf9);
1103 VALIDATE(syntax_cases_buf9, syntax_cases_expected9);
1104 }
1105 SECTION("regex on a new line")
1106 {
1107 NORMALIZE(syntax_cases_buf10);
1108 VALIDATE(syntax_cases_buf10, syntax_cases_expected10);
1109 }
1110 SECTION("string and regex literals ambiguity with escaped sentinel chars")
1111 {
1112 NORMALIZE(syntax_cases_buf11);
1113 VALIDATE(syntax_cases_buf11, syntax_cases_expected11);
1114 }
1115 SECTION("escaped LF and CR chars in literals")
1116 {
1117 NORMALIZE(syntax_cases_buf12);
1118 VALIDATE(syntax_cases_buf12, syntax_cases_expected12);
1119 }
1120 SECTION("regex after keyword")
1121 {
1122 NORMALIZE(syntax_cases_buf13);
1123 VALIDATE(syntax_cases_buf13, syntax_cases_expected13);
1124 }
1125 SECTION("white space between '+'<-->'++' and '-'<-->'--'")
1126 {
1127 NORMALIZE(syntax_cases_buf14);
1128 VALIDATE(syntax_cases_buf14, syntax_cases_expected14);
1129 }
1130 SECTION("template literals")
1131 {
1132 NORMALIZE(syntax_cases_buf22);
1133 VALIDATE(syntax_cases_buf22, syntax_cases_expected22);
1134 }
1135 }
1136
1137 TEST_CASE("bad tokens", "[JSNormalizer]")
1138 {
1139 SECTION("LS chars within literal")
1140 {
1141 NORMALIZE(syntax_cases_buf15);
1142 VALIDATE_FAIL(syntax_cases_buf15, syntax_cases_expected15, JSTokenizer::BAD_TOKEN, 25);
1143 }
1144 SECTION("PS chars within literal")
1145 {
1146 NORMALIZE(syntax_cases_buf21);
1147 VALIDATE_FAIL(syntax_cases_buf21, syntax_cases_expected21, JSTokenizer::BAD_TOKEN, 25);
1148 }
1149 SECTION("explicit LF within literal")
1150 {
1151 NORMALIZE(syntax_cases_buf16);
1152 VALIDATE_FAIL(syntax_cases_buf16, syntax_cases_expected16, JSTokenizer::BAD_TOKEN, 23);
1153 }
1154 SECTION("explicit CR within literal")
1155 {
1156 NORMALIZE(syntax_cases_buf17);
1157 VALIDATE_FAIL(syntax_cases_buf17, syntax_cases_expected17, JSTokenizer::BAD_TOKEN, 23);
1158 }
1159 SECTION("escaped LF-CR sequence within literal")
1160 {
1161 NORMALIZE(syntax_cases_buf18);
1162 VALIDATE_FAIL(syntax_cases_buf18, syntax_cases_expected18, JSTokenizer::BAD_TOKEN, 25);
1163 }
1164 SECTION("escaped LF within regex literal")
1165 {
1166 NORMALIZE(syntax_cases_buf19);
1167 VALIDATE_FAIL(syntax_cases_buf19, syntax_cases_expected19, JSTokenizer::BAD_TOKEN, 23);
1168 }
1169 SECTION("escaped CR-LF within regex literal")
1170 {
1171 NORMALIZE(syntax_cases_buf20);
1172 VALIDATE_FAIL(syntax_cases_buf20, syntax_cases_expected20, JSTokenizer::BAD_TOKEN, 23);
1173 }
1174 }
1175
1176 TEST_CASE("template literal overflow", "[JSNormalizer]")
1177 {
1178 SECTION("exceeding template literal limit")
1179 {
1180 NORMALIZE(syntax_cases_buf23);
1181 VALIDATE_FAIL(syntax_cases_buf23, syntax_cases_expected23,
1182 JSTokenizer::TEMPLATE_NESTING_OVERFLOW, 15);
1183 }
1184 }
1185
1186 static const char asi_cases_buf0[] =
1187 "array[0]\n{}";
1188
1189 static const char asi_cases_expected0[] =
1190 "array[0];{}";
1191
1192 static const char asi_cases_buf1[] =
1193 "array[0]\ntrue";
1194
1195 static const char asi_cases_expected1[] =
1196 "array[0];true";
1197
1198 static const char asi_cases_buf2[] =
1199 "array[0]\n++";
1200
1201 static const char asi_cases_expected2[] =
1202 "array[0];++";
1203
1204 static const char asi_cases_buf3[] =
1205 "array[0]\ncontinue";
1206
1207 static const char asi_cases_expected3[] =
1208 "array[0];continue";
1209
1210 static const char asi_cases_buf4[] =
1211 "array[0]\nvar b;";
1212
1213 static const char asi_cases_expected4[] =
1214 "array[0];var b;";
1215
1216 static const char asi_cases_buf5[] =
1217 "func()\ntrue";
1218
1219 static const char asi_cases_expected5[] =
1220 "func();true";
1221
1222 static const char asi_cases_buf6[] =
1223 "func()\n++";
1224
1225 static const char asi_cases_expected6[] =
1226 "func();++";
1227
1228 static const char asi_cases_buf7[] =
1229 "func()\ncontinue";
1230
1231 static const char asi_cases_expected7[] =
1232 "func();continue";
1233
1234 static const char asi_cases_buf8[] =
1235 "func()\nvar b;";
1236
1237 static const char asi_cases_expected8[] =
1238 "func();var b;";
1239
1240 static const char asi_cases_buf9[] =
1241 "1024\n{}";
1242
1243 static const char asi_cases_expected9[] =
1244 "1024;{}";
1245
1246 static const char asi_cases_buf10[] =
1247 "1024\ntrue";
1248
1249 static const char asi_cases_expected10[] =
1250 "1024;true";
1251
1252 static const char asi_cases_buf11[] =
1253 "1024\n++";
1254
1255 static const char asi_cases_expected11[] =
1256 "1024;++";
1257
1258 static const char asi_cases_buf12[] =
1259 "1024\ncontinue";
1260
1261 static const char asi_cases_expected12[] =
1262 "1024;continue";
1263
1264 static const char asi_cases_buf13[] =
1265 "1024\nvar b;";
1266
1267 static const char asi_cases_expected13[] =
1268 "1024;var b;";
1269
1270 static const char asi_cases_buf14[] =
1271 "++\n{}";
1272
1273 static const char asi_cases_expected14[] =
1274 "++;{}";
1275
1276 static const char asi_cases_buf15[] =
1277 "++\n[1,2,3]";
1278
1279 static const char asi_cases_expected15[] =
1280 "++;[1,2,3]";
1281
1282 static const char asi_cases_buf16[] =
1283 "++\ntrue";
1284
1285 static const char asi_cases_expected16[] =
1286 "++;true";
1287
1288 static const char asi_cases_buf17[] =
1289 "++\n++";
1290
1291 static const char asi_cases_expected17[] =
1292 "++;++";
1293
1294 static const char asi_cases_buf18[] =
1295 "++\ncontinue";
1296
1297 static const char asi_cases_expected18[] =
1298 "++;continue";
1299
1300 static const char asi_cases_buf19[] =
1301 "++\nvar b;";
1302
1303 static const char asi_cases_expected19[] =
1304 "++;var b;";
1305
1306 static const char asi_cases_buf20[] =
1307 "return\n{}";
1308
1309 static const char asi_cases_expected20[] =
1310 "return;{}";
1311
1312 static const char asi_cases_buf21[] =
1313 "return\n[1,2,3]";
1314
1315 static const char asi_cases_expected21[] =
1316 "return;[1,2,3]";
1317
1318 static const char asi_cases_buf22[] =
1319 "return\n+a";
1320
1321 static const char asi_cases_expected22[] =
1322 "return;+a";
1323
1324 static const char asi_cases_buf23[] =
1325 "return\ntrue";
1326
1327 static const char asi_cases_expected23[] =
1328 "return;true";
1329
1330 static const char asi_cases_buf24[] =
1331 "return\n++";
1332
1333 static const char asi_cases_expected24[] =
1334 "return;++";
1335
1336 static const char asi_cases_buf25[] =
1337 "return\ncontinue";
1338
1339 static const char asi_cases_expected25[] =
1340 "return;continue";
1341
1342 static const char asi_cases_buf26[] =
1343 "return\nvar b;";
1344
1345 static const char asi_cases_expected26[] =
1346 "return;var b;";
1347
1348 TEST_CASE("automatic semicolon insertion", "[JSNormalizer]")
1349 {
1350 SECTION("group_4 to group_1")
1351 {
1352 NORMALIZE(asi_cases_buf0);
1353 VALIDATE(asi_cases_buf0, asi_cases_expected0);
1354 }
1355
1356 SECTION("group_4 to group_7")
1357 {
1358 NORMALIZE(asi_cases_buf1);
1359 VALIDATE(asi_cases_buf1, asi_cases_expected1);
1360 }
1361
1362 SECTION("group_4 to group_8")
1363 {
1364 NORMALIZE(asi_cases_buf2);
1365 VALIDATE(asi_cases_buf2, asi_cases_expected2);
1366 }
1367
1368 SECTION("group_4 to group_9")
1369 {
1370 NORMALIZE(asi_cases_buf3);
1371 VALIDATE(asi_cases_buf3, asi_cases_expected3);
1372 }
1373
1374 SECTION("group_4 to group_10")
1375 {
1376 NORMALIZE(asi_cases_buf4);
1377 VALIDATE(asi_cases_buf4, asi_cases_expected4);
1378 }
1379
1380 SECTION("group_5 to group_7")
1381 {
1382 NORMALIZE(asi_cases_buf5);
1383 VALIDATE(asi_cases_buf5, asi_cases_expected5);
1384 }
1385
1386 SECTION("group_5 to group_8")
1387 {
1388 NORMALIZE(asi_cases_buf6);
1389 VALIDATE(asi_cases_buf6, asi_cases_expected6);
1390 }
1391
1392 SECTION("group_5 to group_9")
1393 {
1394 NORMALIZE(asi_cases_buf7);
1395 VALIDATE(asi_cases_buf7, asi_cases_expected7);
1396 }
1397
1398 SECTION("group_5 to group_10")
1399 {
1400 NORMALIZE(asi_cases_buf8);
1401 VALIDATE(asi_cases_buf8, asi_cases_expected8);
1402 }
1403
1404 SECTION("group_7 to group_1")
1405 {
1406 NORMALIZE(asi_cases_buf9);
1407 VALIDATE(asi_cases_buf9, asi_cases_expected9);
1408 }
1409
1410 SECTION("group_7 to group_7")
1411 {
1412 NORMALIZE(asi_cases_buf10);
1413 VALIDATE(asi_cases_buf10, asi_cases_expected10);
1414 }
1415
1416 SECTION("group_7 to group_8")
1417 {
1418 NORMALIZE(asi_cases_buf11);
1419 VALIDATE(asi_cases_buf11, asi_cases_expected11);
1420 }
1421
1422 SECTION("group_7 to group_9")
1423 {
1424 NORMALIZE(asi_cases_buf12);
1425 VALIDATE(asi_cases_buf12, asi_cases_expected12);
1426 }
1427
1428 SECTION("group_7 to group_10")
1429 {
1430 NORMALIZE(asi_cases_buf13);
1431 VALIDATE(asi_cases_buf13, asi_cases_expected13);
1432 }
1433
1434 SECTION("group_8 to group_1")
1435 {
1436 NORMALIZE(asi_cases_buf14);
1437 VALIDATE(asi_cases_buf14, asi_cases_expected14);
1438 }
1439
1440 SECTION("group_8 to group_3")
1441 {
1442 NORMALIZE(asi_cases_buf15);
1443 VALIDATE(asi_cases_buf15, asi_cases_expected15);
1444 }
1445
1446 SECTION("group_8 to group_7")
1447 {
1448 NORMALIZE(asi_cases_buf16);
1449 VALIDATE(asi_cases_buf16, asi_cases_expected16);
1450 }
1451
1452 SECTION("group_8 to group_8")
1453 {
1454 NORMALIZE(asi_cases_buf17);
1455 VALIDATE(asi_cases_buf17, asi_cases_expected17);
1456 }
1457
1458 SECTION("group_8 to group_9")
1459 {
1460 NORMALIZE(asi_cases_buf18);
1461 VALIDATE(asi_cases_buf18, asi_cases_expected18);
1462 }
1463
1464 SECTION("group_8 to group_10")
1465 {
1466 NORMALIZE(asi_cases_buf19);
1467 VALIDATE(asi_cases_buf19, asi_cases_expected19);
1468 }
1469
1470 SECTION("group_9 to group_1")
1471 {
1472 NORMALIZE(asi_cases_buf20);
1473 VALIDATE(asi_cases_buf20, asi_cases_expected20);
1474 }
1475
1476 SECTION("group_9 to group_3")
1477 {
1478 NORMALIZE(asi_cases_buf21);
1479 VALIDATE(asi_cases_buf21, asi_cases_expected21);
1480 }
1481
1482 SECTION("group_9 to group_6")
1483 {
1484 NORMALIZE(asi_cases_buf22);
1485 VALIDATE(asi_cases_buf22, asi_cases_expected22);
1486 }
1487
1488 SECTION("group_9 to group_7")
1489 {
1490 NORMALIZE(asi_cases_buf23);
1491 VALIDATE(asi_cases_buf23, asi_cases_expected23);
1492 }
1493
1494 SECTION("group_9 to group_8")
1495 {
1496 NORMALIZE(asi_cases_buf24);
1497 VALIDATE(asi_cases_buf24, asi_cases_expected24);
1498 }
1499
1500 SECTION("group_9 to group_9")
1501 {
1502 NORMALIZE(asi_cases_buf25);
1503 VALIDATE(asi_cases_buf25, asi_cases_expected25);
1504 }
1505
1506 SECTION("group_9 to group_10")
1507 {
1508 NORMALIZE(asi_cases_buf26);
1509 VALIDATE(asi_cases_buf26, asi_cases_expected26);
1510 }
1511 }
1512
1513 TEST_CASE("endings", "[JSNormalizer]")
1514 {
1515 SECTION("script closing tag is present", "[JSNormalizer]")
1516 {
1517 const char src[] =
1518 "var a = 1 ;\n" // 12 bytes
1519 "var b = 2 ;\n" // 12 bytes
1520 "</script>\n" // ptr_offset is here = 33
1521 "var c = 3 ;\n";
1522 const int ptr_offset = 33;
1523 const char expected[] = "var a=1;var b=2;";
1524 char dst[sizeof(expected) - 1];
1525 int act_len;
1526 const char* ptr;
1527 int ret;
1528
1529 NORMALIZE_L(src, sizeof(src), dst, sizeof(dst), norm_depth, ret, ptr, act_len);
1530
1531 CHECK(ret == JSTokenizer::SCRIPT_ENDED);
1532 CHECK(act_len == sizeof(expected) - 1);
1533 CHECK((ptr - src) == ptr_offset);
1534 CHECK(!memcmp(dst, expected, act_len));
1535 }
1536 SECTION("depth reached", "[JSNormalizer]")
1537 {
1538 const char src[] = "var abc = 123;\n\r";
1539 const char src2[] = "var foo = 321;\n\r";
1540 const char expected[] = "var abc";
1541 const char* ptr;
1542 int ret;
1543
1544 JSIdentifierCtxStub ident_ctx;
1545 JSNormalizer norm(ident_ctx, 7, max_template_nesting, max_bracket_depth);
1546 ret = norm.normalize(src, sizeof(src));
1547 ptr = norm.get_src_next();
1548 int act_len1 = norm.script_size();
1549 const char* dst1 = norm.take_script();
1550
1551 CHECK(ret == JSTokenizer::EOS);
1552 CHECK(ptr == src + 7);
1553 CHECK(act_len1 == sizeof(expected) - 1);
1554 CHECK(!memcmp(dst1, expected, act_len1));
1555 delete[] dst1;
1556
1557 ret = norm.normalize(src2, sizeof(src2));
1558 ptr = norm.get_src_next();
1559 int act_len2 = norm.script_size();
1560 const char* dst2 = norm.take_script();
1561
1562 CHECK(ret == JSTokenizer::EOS);
1563 CHECK(ptr == src2 + sizeof(src2));
1564 CHECK(act_len2 == 0);
1565 delete[] dst2;
1566 }
1567 }
1568
1569 static const char unexpected_tag_buf0[] =
1570 "var a = 1;\n"
1571 "<script>\n"
1572 "var b = 2;\r\n";
1573
1574 static const char unexpected_tag_expected0[] =
1575 "var a=1;";
1576
1577 static const char unexpected_tag_buf1[] =
1578 "var a = 1;\n"
1579 "<script type=application/javascript>\n"
1580 "var b = 2;\r\n";
1581
1582 static const char unexpected_tag_expected1[] =
1583 "var a=1;";
1584
1585 static const char unexpected_tag_buf2[] =
1586 "var a = 1;\n"
1587 "var str = '<script> something';\n"
1588 "var b = 2;\r\n";
1589
1590 static const char unexpected_tag_expected2[] =
1591 "var a=1;var str='";
1592
1593 static const char unexpected_tag_buf3[] =
1594 "var a = 1;\n"
1595 "var str = 'something <script> something';\n"
1596 "var b = 2;\r\n";
1597
1598 static const char unexpected_tag_expected3[] =
1599 "var a=1;var str='something ";
1600
1601 static const char unexpected_tag_buf4[] =
1602 "var a = 1;\n"
1603 "var str = 'something <script>';\n"
1604 "var b = 2;\r\n";
1605
1606 static const char unexpected_tag_expected4[] =
1607 "var a=1;var str='something ";
1608
1609 static const char unexpected_tag_buf5[] =
1610 "var a = 1;\n"
1611 "var str = '</script> something';\n"
1612 "var b = 2;\r\n";
1613
1614 static const char unexpected_tag_expected5[] =
1615 "var a=1;var str='";
1616
1617 static const char unexpected_tag_buf6[] =
1618 "var a = 1;\n"
1619 "var str = 'something </script> something';\n"
1620 "var b = 2;\r\n";
1621
1622 static const char unexpected_tag_expected6[] =
1623 "var a=1;var str='something ";
1624
1625 static const char unexpected_tag_buf7[] =
1626 "var a = 1;\n"
1627 "var str = 'something </script>';\n"
1628 "var b = 2;\r\n";
1629
1630 static const char unexpected_tag_expected7[] =
1631 "var a=1;var str='something ";
1632
1633 static const char unexpected_tag_buf8[] =
1634 "var a = 1;\n"
1635 "var str = 'something \\<script\\> something';\n"
1636 "var b = 2;\r\n";
1637
1638 static const char unexpected_tag_expected8[] =
1639 "var a=1;var str='something \\";
1640
1641 static const char unexpected_tag_buf9[] =
1642 "var a = 1;\n"
1643 "var str = 'something \\<\\/script\\> something';\n"
1644 "var b = 2;\r\n";
1645
1646 static const char unexpected_tag_expected9[] =
1647 "var a=1;var str='something \\<\\/script\\> something';var b=2;";
1648
1649 static const char unexpected_tag_buf10[] =
1650 "var a = 1;\n"
1651 "//<script> something\n"
1652 "var b = 2;\r\n";
1653
1654 static const char unexpected_tag_expected10[] =
1655 "var a=1;";
1656
1657 static const char unexpected_tag_buf11[] =
1658 "var a = 1;\n"
1659 "//something <script> something\n"
1660 "var b = 2;\r\n";
1661
1662 static const char unexpected_tag_expected11[] =
1663 "var a=1;";
1664
1665 static const char unexpected_tag_buf12[] =
1666 "var a = 1;\n"
1667 "//something <script>\n"
1668 "var b = 2;\r\n";
1669
1670 static const char unexpected_tag_expected12[] =
1671 "var a=1;";
1672
1673 static const char unexpected_tag_buf13[] =
1674 "var a = 1;\n"
1675 "/*<script> something*/\n"
1676 "var b = 2;\r\n";
1677
1678 static const char unexpected_tag_expected13[] =
1679 "var a=1;";
1680
1681 static const char unexpected_tag_buf14[] =
1682 "var a = 1;\n"
1683 "/*something <script> something*/\n"
1684 "var b = 2;\r\n";
1685
1686 static const char unexpected_tag_expected14[] =
1687 "var a=1;";
1688
1689 static const char unexpected_tag_buf15[] =
1690 "var a = 1;\n"
1691 "/*something <script>*/\n"
1692 "var b = 2;\r\n";
1693
1694 static const char unexpected_tag_expected15[] =
1695 "var a=1;";
1696
1697 static const char unexpected_tag_buf16[] =
1698 "var a = 1;\n"
1699 "//</script> something\n"
1700 "var b = 2;\r\n";
1701
1702 static const char unexpected_tag_expected16[] =
1703 "var a=1;";
1704
1705 static const char unexpected_tag_buf17[] =
1706 "var a = 1;\n"
1707 "<!--something </script> something//-->\n"
1708 "var b = 2;\r\n";
1709
1710 static const char unexpected_tag_expected17[] =
1711 "var a=1;";
1712
1713 static const char unexpected_tag_buf18[] =
1714 "var a = 1;\n"
1715 "//something </script>\n"
1716 "var b = 2;\r\n";
1717
1718 static const char unexpected_tag_expected18[] =
1719 "var a=1;";
1720
1721 static const char unexpected_tag_buf19[] =
1722 "var a = 1;\n"
1723 "/*</script>\n"
1724 "something*/\n"
1725 "var b = 2;\r\n";
1726
1727 static const char unexpected_tag_expected19[] =
1728 "var a=1;";
1729
1730 static const char unexpected_tag_buf20[] =
1731 "var a = 1;\n"
1732 "/*something\n"
1733 "</script>\n"
1734 "something*/\n"
1735 "var b = 2;\r\n";
1736
1737 static const char unexpected_tag_expected20[] =
1738 "var a=1;";
1739
1740 static const char unexpected_tag_buf21[] =
1741 "var a = 1;\n"
1742 "/*something\n"
1743 "</script>*/\n"
1744 "var b = 2;\r\n";
1745
1746 static const char unexpected_tag_expected21[] =
1747 "var a=1;";
1748
1749 static const char unexpected_tag_buf22[] =
1750 "var a = 1;\n"
1751 "var str = 'script somescript /script something';\n"
1752 "var b = 2;\r\n";
1753
1754 static const char unexpected_tag_expected22[] =
1755 "var a=1;var str='script somescript /script something';var b=2;";
1756
1757 static const char unexpected_tag_buf23[] =
1758 "var a = 1;\n"
1759 "var str = 'script somescript /script something <script>';\n"
1760 "var b = 2;\r\n";
1761
1762 static const char unexpected_tag_expected23[] =
1763 "var a=1;var str='script somescript /script something ";
1764
1765 static const char unexpected_tag_buf24[] =
1766 "var a = 1;\n"
1767 "var str = 'something <sCrIpT>';\n"
1768 "var b = 2;\r\n";
1769
1770 static const char unexpected_tag_expected24[] =
1771 "var a=1;var str='something ";
1772
1773 TEST_CASE("nested script tags", "[JSNormalizer]")
1774 {
1775 SECTION("explicit open tag - simple")
1776 {
1777 NORMALIZE(unexpected_tag_buf0);
1778 VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, JSTokenizer::OPENING_TAG, 18);
1779 }
1780 SECTION("explicit open tag - complex")
1781 {
1782 NORMALIZE(unexpected_tag_buf1);
1783 VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, JSTokenizer::OPENING_TAG, 18);
1784 }
1785 SECTION("open tag within literal - start")
1786 {
1787 NORMALIZE(unexpected_tag_buf2);
1788 VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, JSTokenizer::OPENING_TAG, 29);
1789 }
1790 SECTION("open tag within literal - mid")
1791 {
1792 NORMALIZE(unexpected_tag_buf3);
1793 VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, JSTokenizer::OPENING_TAG, 39);
1794 }
1795 SECTION("open tag within literal - end")
1796 {
1797 NORMALIZE(unexpected_tag_buf4);
1798 VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, JSTokenizer::OPENING_TAG, 39);
1799 }
1800 SECTION("close tag within literal - start")
1801 {
1802 NORMALIZE(unexpected_tag_buf5);
1803 VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, JSTokenizer::CLOSING_TAG, 31);
1804 }
1805 SECTION("close tag within literal - mid")
1806 {
1807 NORMALIZE(unexpected_tag_buf6);
1808 VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, JSTokenizer::CLOSING_TAG, 41);
1809 }
1810 SECTION("close tag within literal - end")
1811 {
1812 NORMALIZE(unexpected_tag_buf7);
1813 VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, JSTokenizer::CLOSING_TAG, 41);
1814 }
1815 SECTION("open tag within literal - escaped")
1816 {
1817 NORMALIZE(unexpected_tag_buf8);
1818 VALIDATE_FAIL(unexpected_tag_buf8, unexpected_tag_expected8, JSTokenizer::OPENING_TAG, 40);
1819 }
1820 SECTION("close tag within literal - escaped")
1821 {
1822 NORMALIZE(unexpected_tag_buf9);
1823 VALIDATE(unexpected_tag_buf9, unexpected_tag_expected9);
1824 }
1825 SECTION("open tag within single-line comment - start")
1826 {
1827 NORMALIZE(unexpected_tag_buf10);
1828 VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, JSTokenizer::OPENING_TAG, 20);
1829 }
1830 SECTION("open tag within single-line comment - mid")
1831 {
1832 NORMALIZE(unexpected_tag_buf11);
1833 VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, JSTokenizer::OPENING_TAG, 30);
1834 }
1835 SECTION("open tag within single-line comment - end")
1836 {
1837 NORMALIZE(unexpected_tag_buf12);
1838 VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, JSTokenizer::OPENING_TAG, 30);
1839 }
1840 SECTION("open tag within multi-line comment - start")
1841 {
1842 NORMALIZE(unexpected_tag_buf13);
1843 VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, JSTokenizer::OPENING_TAG, 20);
1844 }
1845 SECTION("open tag within multi-line comment - mid")
1846 {
1847 NORMALIZE(unexpected_tag_buf14);
1848 VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, JSTokenizer::OPENING_TAG, 30);
1849 }
1850 SECTION("open tag within multi-line comment - end")
1851 {
1852 NORMALIZE(unexpected_tag_buf15);
1853 VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, JSTokenizer::OPENING_TAG, 30);
1854 }
1855 SECTION("close tag within single-line comment - start")
1856 {
1857 NORMALIZE(unexpected_tag_buf16);
1858 VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, JSTokenizer::CLOSING_TAG, 22);
1859 }
1860 SECTION("close tag within single-line comment - mid")
1861 {
1862 NORMALIZE(unexpected_tag_buf17);
1863 VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, JSTokenizer::CLOSING_TAG, 34);
1864 }
1865 SECTION("close tag within single-line comment - end")
1866 {
1867 NORMALIZE(unexpected_tag_buf18);
1868 VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, JSTokenizer::CLOSING_TAG, 32);
1869 }
1870 SECTION("close tag within multi-line comment - start")
1871 {
1872 NORMALIZE(unexpected_tag_buf19);
1873 VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, JSTokenizer::CLOSING_TAG, 22);
1874 }
1875 SECTION("close tag within multi-line comment - mid")
1876 {
1877 NORMALIZE(unexpected_tag_buf20);
1878 VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, JSTokenizer::CLOSING_TAG, 32);
1879 }
1880 SECTION("close tag within multi-line comment - end")
1881 {
1882 NORMALIZE(unexpected_tag_buf21);
1883 VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, JSTokenizer::CLOSING_TAG, 32);
1884 }
1885 SECTION("multiple patterns - not matched")
1886 {
1887 NORMALIZE(unexpected_tag_buf22);
1888 VALIDATE(unexpected_tag_buf22, unexpected_tag_expected22);
1889 }
1890 SECTION("multiple patterns - matched")
1891 {
1892 NORMALIZE(unexpected_tag_buf23);
1893 VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, JSTokenizer::OPENING_TAG, 65);
1894 }
1895 SECTION("mixed lower and upper case")
1896 {
1897 NORMALIZE(unexpected_tag_buf24);
1898 VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, JSTokenizer::OPENING_TAG, 39);
1899 }
1900 }
1901
1902 TEST_CASE("split between tokens", "[JSNormalizer]")
1903 {
1904 SECTION("operator string")
1905 {
1906 const char dat1[] = "var s = ";
1907 const char dat2[] = "'string';";
1908 const char exp1[] = "var s=";
1909 const char exp2[] = "'string';";
1910 const char exp[] = "var s='string';";
1911
1912 NORMALIZE_2(dat1, dat2, exp1, exp2);
1913 NORM_COMBINED_2(dat1, dat2, exp);
1914 }
1915 SECTION("operator number")
1916 {
1917 const char dat1[] = "a = 5 +";
1918 const char dat2[] = "b + c;";
1919 const char exp1[] = "a=5+";
1920 const char exp2[] = "b+c;";
1921 const char exp[] = "a=5+b+c;";
1922
1923 NORMALIZE_2(dat1, dat2, exp1, exp2);
1924 NORM_COMBINED_2(dat1, dat2, exp);
1925 }
1926 SECTION("comment function")
1927 {
1928 const char dat1[] = "// no comments\n";
1929 const char dat2[] = "foo(bar, baz);";
1930 const char exp1[] = "";
1931 const char exp2[] = "foo(bar,baz);";
1932 const char exp[] = "foo(bar,baz);";
1933
1934 NORMALIZE_2(dat1, dat2, exp1, exp2);
1935 NORM_COMBINED_2(dat1, dat2, exp);
1936 }
1937 SECTION("operator identifier")
1938 {
1939 const char dat1[] = "var ";
1940 const char dat2[] = "a = ";
1941 const char dat3[] = "b ;";
1942 const char exp1[] = "var";
1943 const char exp2[] = " a=";
1944 const char exp3[] = "b;";
1945 const char exp[] = "var a=b;";
1946
1947 NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
1948 NORM_COMBINED_3(dat1, dat2, dat3, exp);
1949 }
1950 }
1951
1952 TEST_CASE("split in comments", "[JSNormalizer]")
1953 {
1954 SECTION("/ /")
1955 {
1956 const char dat1[] = "/";
1957 const char dat2[] = "/comment\n";
1958 const char exp1[] = "/";
1959 const char exp2[] = "";
1960 const char exp[] = "";
1961
1962 NORMALIZE_2(dat1, dat2, exp1, exp2);
1963 NORM_COMBINED_2(dat1, dat2, exp);
1964 }
1965 SECTION("/ / msg")
1966 {
1967 const char dat1[] = "//";
1968 const char dat2[] = "comment\n";
1969 const char exp1[] = "";
1970 const char exp2[] = "";
1971 const char exp[] = "";
1972
1973 NORMALIZE_2(dat1, dat2, exp1, exp2);
1974 NORM_COMBINED_2(dat1, dat2, exp);
1975 }
1976 SECTION("/ / LF")
1977 {
1978 const char dat1[] = "//comment";
1979 const char dat2[] = "\n";
1980 const char exp1[] = "";
1981 const char exp2[] = "";
1982 const char exp[] = "";
1983
1984 NORMALIZE_2(dat1, dat2, exp1, exp2);
1985 NORM_COMBINED_2(dat1, dat2, exp);
1986 }
1987
1988 SECTION("/ *")
1989 {
1990 const char dat1[] = "/";
1991 const char dat2[] = "* comment */";
1992 const char exp1[] = "/";
1993 const char exp2[] = "";
1994 const char exp[] = "";
1995
1996 NORMALIZE_2(dat1, dat2, exp1, exp2);
1997 NORM_COMBINED_2(dat1, dat2, exp);
1998 }
1999 SECTION("/ * msg")
2000 {
2001 const char dat1[] = "/* t";
2002 const char dat2[] = "ext */";
2003 const char exp1[] = "";
2004 const char exp2[] = "";
2005 const char exp[] = "";
2006
2007 NORMALIZE_2(dat1, dat2, exp1, exp2);
2008 NORM_COMBINED_2(dat1, dat2, exp);
2009 }
2010 SECTION("* /")
2011 {
2012 const char dat1[] = "/* comment *";
2013 const char dat2[] = "/";
2014 const char exp1[] = "";
2015 const char exp2[] = "";
2016 const char exp[] = "";
2017
2018 NORMALIZE_2(dat1, dat2, exp1, exp2);
2019 NORM_COMBINED_2(dat1, dat2, exp);
2020 }
2021 SECTION("/ * msg * /")
2022 {
2023 const char dat1[] = "/";
2024 const char dat2[] = "* comment *";
2025 const char dat3[] = "/";
2026 const char exp1[] = "/";
2027 const char exp2[] = "";
2028 const char exp3[] = "";
2029 const char exp[] = "";
2030
2031 NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
2032 NORM_COMBINED_3(dat1, dat2, dat3, exp);
2033 }
2034
2035 SECTION("< !--")
2036 {
2037 const char dat1[] = "<";
2038 const char dat2[] = "!-- comment\n";
2039 const char exp1[] = "<";
2040 const char exp2[] = "";
2041 const char exp[] = "";
2042
2043 NORMALIZE_2(dat1, dat2, exp1, exp2);
2044 NORM_COMBINED_2(dat1, dat2, exp);
2045 }
2046 SECTION("<! --")
2047 {
2048 const char dat1[] = "<!";
2049 const char dat2[] = "-- comment\n";
2050 const char exp1[] = "<!";
2051 const char exp2[] = "";
2052 const char exp[] = "";
2053
2054 NORMALIZE_2(dat1, dat2, exp1, exp2);
2055 NORM_COMBINED_2(dat1, dat2, exp);
2056 }
2057 SECTION("<!- -")
2058 {
2059 const char dat1[] = "<!-";
2060 const char dat2[] = "- comment\n";
2061 const char exp1[] = "<!-";
2062 const char exp2[] = "";
2063 const char exp[] = "";
2064
2065 NORMALIZE_2(dat1, dat2, exp1, exp2);
2066 NORM_COMBINED_2(dat1, dat2, exp);
2067 }
2068 SECTION("<!-- msg")
2069 {
2070 const char dat1[] = "<!--";
2071 const char dat2[] = "comment\n";
2072 const char exp1[] = "";
2073 const char exp2[] = "";
2074 const char exp[] = "";
2075
2076 NORMALIZE_2(dat1, dat2, exp1, exp2);
2077 NORM_COMBINED_2(dat1, dat2, exp);
2078 }
2079 SECTION("<! -- msg")
2080 {
2081 const char dat1[] = "<";
2082 const char dat2[] = "!-";
2083 const char dat3[] = "-comment\n";
2084 const char exp1[] = "<";
2085 const char exp2[] = "!-";
2086 const char exp3[] = "";
2087 const char exp[] = "";
2088
2089 NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
2090 NORM_COMBINED_3(dat1, dat2, dat3, exp);
2091 }
2092 }
2093
2094 TEST_CASE("split in opening tag", "[JSNormalizer]")
2095 {
2096 SECTION("< script")
2097 {
2098 const char dat1[] = "<";
2099 const char dat2[] = "script";
2100 const char exp1[] = "<";
2101 const char exp2[] = "";
2102 const char exp[] = "";
2103
2104 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
2105 NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
2106 }
2107 SECTION("str='<s cript'")
2108 {
2109 const char dat1[] = "var str ='<s";
2110 const char dat2[] = "cript';";
2111 const char exp1[] = "var str='<s";
2112 const char exp2[] = "";
2113 const char exp[] = "var str='";
2114
2115 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
2116 NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
2117 }
2118 SECTION("str='<scrip t'")
2119 {
2120 const char dat1[] = "var str ='<scrip";
2121 const char dat2[] = "t';";
2122 const char exp1[] = "var str='<scrip";
2123 const char exp2[] = "";
2124 const char exp[] = "var str='";
2125
2126 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
2127 NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
2128 }
2129 SECTION("< scr ipt")
2130 {
2131 const char dat1[] = "<";
2132 const char dat2[] = "scr";
2133 const char dat3[] = "ipt";
2134 const char exp1[] = "<";
2135 const char exp2[] = "scr";
2136 const char exp3[] = "";
2137 const char exp[] = "";
2138
2139 NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::OPENING_TAG);
2140 NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::OPENING_TAG);
2141 }
2142 SECTION("str='<sc rip t'")
2143 {
2144 const char dat1[] = "var str =\"<sc";
2145 const char dat2[] = "rip";
2146 const char dat3[] = "t\";";
2147 const char exp1[] = "var str=\"<sc";
2148 const char exp2[] = "rip";
2149 const char exp3[] = "";
2150 const char exp[] = "var str=\"";
2151
2152 NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::OPENING_TAG);
2153 NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::OPENING_TAG);
2154 }
2155 }
2156
2157 TEST_CASE("split in closing tag", "[JSNormalizer]")
2158 {
2159 SECTION("< /script>")
2160 {
2161 const char dat1[] = "<";
2162 const char dat2[] = "/script>";
2163 const char exp1[] = "<";
2164 const char exp2[] = "";
2165 const char exp[] = "";
2166
2167 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::SCRIPT_ENDED);
2168 NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::SCRIPT_ENDED);
2169 }
2170 SECTION("</script >")
2171 {
2172 const char dat1[] = "</script";
2173 const char dat2[] = ">";
2174 const char exp1[] = "</script";
2175 const char exp2[] = "";
2176 const char exp[] = "";
2177
2178 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::SCRIPT_ENDED);
2179 NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::SCRIPT_ENDED);
2180 }
2181 SECTION("str='</ script>'")
2182 {
2183 const char dat1[] = "var str ='</";
2184 const char dat2[] = "script>';";
2185 const char exp1[] = "var str='</";
2186 const char exp2[] = "";
2187 const char exp[] = "var str='";
2188
2189 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::CLOSING_TAG);
2190 NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::CLOSING_TAG);
2191 }
2192 SECTION("str='</scrip t>'")
2193 {
2194 const char dat1[] = "var str ='</scrip";
2195 const char dat2[] = "t>';";
2196 const char exp1[] = "var str='</scrip";
2197 const char exp2[] = "";
2198 const char exp[] = "var str='";
2199
2200 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::CLOSING_TAG);
2201 NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::CLOSING_TAG);
2202 }
2203 SECTION("</ scr ipt>")
2204 {
2205 const char dat1[] = "</";
2206 const char dat2[] = "scr";
2207 const char dat3[] = "ipt>";
2208 const char exp1[] = "</";
2209 const char exp2[] = "/scr";
2210 const char exp3[] = "";
2211 const char exp[] = "";
2212
2213 NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::SCRIPT_ENDED);
2214 NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::SCRIPT_ENDED);
2215 }
2216 SECTION("str='</sc rip t>'")
2217 {
2218 const char dat1[] = "var str =\"</sc";
2219 const char dat2[] = "rip";
2220 const char dat3[] = "t>\";";
2221 const char exp1[] = "var str=\"</sc";
2222 const char exp2[] = "rip";
2223 const char exp3[] = "";
2224 const char exp[] = "var str=\"";
2225
2226 NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::CLOSING_TAG);
2227 NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::CLOSING_TAG);
2228 }
2229 SECTION("::::</scr ipt >")
2230 {
2231 const char dat1[] = ":::: </scr";
2232 const char dat2[] = "ipt";
2233 const char dat3[] = ">";
2234 const char exp1[] = "::::</scr";
2235 const char exp2[] = "cript";
2236 const char exp3[] = "";
2237 const char exp[] = "::::";
2238
2239 NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::SCRIPT_ENDED);
2240 NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::SCRIPT_ENDED);
2241 }
2242 }
2243
2244 TEST_CASE("split in string literal", "[JSNormalizer]")
2245 {
2246 SECTION("\\ LF")
2247 {
2248 const char dat1[] = "var str =\"any\\";
2249 const char dat2[] = "\none\";";
2250 const char exp1[] = "var str=\"any\\";
2251 const char exp2[] = "one\";";
2252 const char exp[] = "var str=\"anyone\";";
2253
2254 NORMALIZE_2(dat1, dat2, exp1, exp2);
2255 NORM_COMBINED_2(dat1, dat2, exp);
2256 }
2257 SECTION("\\ CR")
2258 {
2259 const char dat1[] = "var str =\"any\\";
2260 const char dat2[] = "\rone\";";
2261 const char exp1[] = "var str=\"any\\";
2262 const char exp2[] = "one\";";
2263 const char exp[] = "var str=\"anyone\";";
2264
2265 NORMALIZE_2(dat1, dat2, exp1, exp2);
2266 NORM_COMBINED_2(dat1, dat2, exp);
2267 }
2268 SECTION("\\CR LF")
2269 {
2270 const char dat1[] = "var str =\"any\\\r";
2271 const char dat2[] = "\none\";";
2272 const char exp1[] = "var str=\"any";
2273 const char exp2[] = "one\";";
2274 const char exp[] = "var str=\"anyone\";";
2275
2276 NORMALIZE_2(dat1, dat2, exp1, exp2);
2277 NORM_COMBINED_2(dat1, dat2, exp);
2278 }
2279 SECTION("\\ CRLF")
2280 {
2281 const char dat1[] = "var str =\"any\\";
2282 const char dat2[] = "\r\none\";";
2283 const char exp1[] = "var str=\"any\\";
2284 const char exp2[] = "one\";";
2285 const char exp[] = "var str=\"anyone\";";
2286
2287 NORMALIZE_2(dat1, dat2, exp1, exp2);
2288 NORM_COMBINED_2(dat1, dat2, exp);
2289 }
2290 SECTION("\\ \"")
2291 {
2292 const char dat1[] = "var str =\"any\\";
2293 const char dat2[] = "\"one\";";
2294 const char exp1[] = "var str=\"any\\";
2295 const char exp2[] = "\\\"one\";";
2296 const char exp[] = "var str=\"any\\\"one\";";
2297
2298 NORMALIZE_2(dat1, dat2, exp1, exp2);
2299 NORM_COMBINED_2(dat1, dat2, exp);
2300 }
2301 SECTION("\\ \'")
2302 {
2303 const char dat1[] = "var str =\"any\\";
2304 const char dat2[] = "\'one\";";
2305 const char exp1[] = "var str=\"any\\";
2306 const char exp2[] = "\'one\";";
2307 const char exp[] = "var str=\"any\\\'one\";";
2308
2309 NORMALIZE_2(dat1, dat2, exp1, exp2);
2310 NORM_COMBINED_2(dat1, dat2, exp);
2311 }
2312 SECTION("\\ u1234tx")
2313 {
2314 const char dat1[] = "var str =\"any\\";
2315 const char dat2[] = "u1234tx\";";
2316 const char exp1[] = "var str=\"any\\";
2317 const char exp2[] = "u1234tx\";";
2318 const char exp[] = "var str=\"any\\u1234tx\";";
2319
2320 NORMALIZE_2(dat1, dat2, exp1, exp2);
2321 NORM_COMBINED_2(dat1, dat2, exp);
2322 }
2323 SECTION("\\u 1234tx")
2324 {
2325 const char dat1[] = "var str =\"any\\u";
2326 const char dat2[] = "1234tx\";";
2327 const char exp1[] = "var str=\"any\\u";
2328 const char exp2[] = "1234tx\";";
2329 const char exp[] = "var str=\"any\\u1234tx\";";
2330
2331 NORMALIZE_2(dat1, dat2, exp1, exp2);
2332 NORM_COMBINED_2(dat1, dat2, exp);
2333 }
2334 }
2335
2336 TEST_CASE("split in identifier", "[JSNormalizer]")
2337 {
2338 SECTION("abc def")
2339 {
2340 const char dat1[] = "var abc";
2341 const char dat2[] = "def = 5";
2342 const char exp1[] = "var abc";
2343 const char exp2[] = " abcdef=5";
2344 const char exp[] = "var abcdef=5";
2345
2346 NORMALIZE_2(dat1, dat2, exp1, exp2);
2347 NORM_COMBINED_2(dat1, dat2, exp);
2348 }
2349 SECTION("long identifier")
2350 {
2351 const char dat1[] = "var res = something + long_id_starts_here";
2352 const char dat2[] = "_long_id_ends_here;";
2353 const char exp1[] = "var res=something+long_id_starts_here";
2354 const char exp2[] = "long_id_starts_here_long_id_ends_here;";
2355 const char exp[] = "var res=something+long_id_starts_here_long_id_ends_here;";
2356
2357 NORMALIZE_2(dat1, dat2, exp1, exp2);
2358 NORM_COMBINED_2(dat1, dat2, exp);
2359 }
2360 }
2361
2362 TEST_CASE("split in keyword", "[JSNormalizer]")
2363 {
2364 SECTION("finally")
2365 {
2366 const char dat1[] = "\nfin";
2367 const char dat2[] = "ally;";
2368 const char exp1[] = "fin";
2369 const char exp2[] = "finally;";
2370 const char exp[] = "finally;";
2371
2372 NORMALIZE_2(dat1, dat2, exp1, exp2);
2373 NORM_COMBINED_2(dat1, dat2, exp);
2374 }
2375 SECTION("in")
2376 {
2377 const char dat1[] = "i";
2378 const char dat2[] = "n";
2379 const char exp1[] = "i";
2380 const char exp2[] = "in";
2381 const char exp[] = "in";
2382
2383 NORMALIZE_2(dat1, dat2, exp1, exp2);
2384 NORM_COMBINED_2(dat1, dat2, exp);
2385 }
2386 SECTION("instanceof")
2387 {
2388 const char dat1[] = "in";
2389 const char dat2[] = "stance";
2390 const char dat3[] = "of";
2391 const char exp1[] = "in";
2392 const char exp2[] = "instance";
2393 const char exp3[] = "instanceof";
2394 const char exp[] = "instanceof";
2395
2396 NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
2397 NORM_COMBINED_3(dat1, dat2, dat3, exp);
2398 }
2399 }
2400
2401 TEST_CASE("split and continuation combined", "[JSNormalizer]")
2402 {
2403 SECTION("PDU 1 [cont] PDU 2 [end end cont end]")
2404 {
2405 const char src1[] = "a b" "";
2406 const char src2[] = "c d" "</script>";
2407 const char src3[] = "" "</script>";
2408 const char src4[] = "\n" "";
2409
2410 const char exp1[] = "var_0000 var_0001";
2411 const char exp2[] = " var_0002 var_0003";
2412 const char exp3[] = " var_0002 var_0003";
2413 const char exp4[] = " var_0002 var_0003";
2414
2415 char dst1[sizeof(exp1)];
2416 char dst2[sizeof(exp2)];
2417 char dst3[sizeof(exp3)];
2418 char dst4[sizeof(exp4)];
2419
2420 JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
2421 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
2422
2423 DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);
2424 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));
2425
2426 TRY(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1, JSTokenizer::SCRIPT_ENDED);
2427 CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));
2428
2429 TRY(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1, JSTokenizer::SCRIPT_ENDED);
2430 CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1));
2431
2432 DO(src4, sizeof(src4) - 1, dst4, sizeof(dst4) - 1);
2433 CHECK(!memcmp(exp4, dst4, sizeof(exp4) - 1));
2434
2435 CLOSE();
2436 }
2437 SECTION("PDU 1 [cont] PDU 2 [cont] PDU 3 [end]")
2438 {
2439 const char src1[] = "<";
2440 const char src2[] = "!-";
2441 const char src3[] = "-comment\n";
2442
2443 const char exp1[] = "<";
2444 const char exp2[] = "<!-";
2445 const char exp3[] = "";
2446
2447 const char tmp_buf1[] = "<";
2448 const char tmp_buf2[] = "<!-";
2449 const char tmp_buf3[] = "<!--comment\n";
2450
2451 char dst1[sizeof(exp1)];
2452 char dst2[sizeof(exp2)];
2453 char dst3[sizeof(exp3)];
2454
2455 JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
2456 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
2457
2458 TRY(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1, JSTokenizer::SCRIPT_CONTINUE);
2459 CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));
2460 REQUIRE(norm.get_tmp_buf_size() == sizeof(tmp_buf1) - 1);
2461 CHECK(!memcmp(norm.get_tmp_buf(), tmp_buf1, sizeof(tmp_buf1) - 1));
2462
2463 TRY(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1, JSTokenizer::SCRIPT_CONTINUE);
2464 CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));
2465 REQUIRE(norm.get_tmp_buf_size() == sizeof(tmp_buf2) - 1);
2466 CHECK(!memcmp(norm.get_tmp_buf(), tmp_buf2, sizeof(tmp_buf2) - 1));
2467
2468 TRY(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1, JSTokenizer::SCRIPT_CONTINUE);
2469 CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1));
2470 REQUIRE(norm.get_tmp_buf_size() == sizeof(tmp_buf3) - 1);
2471 CHECK(!memcmp(norm.get_tmp_buf(), tmp_buf3, sizeof(tmp_buf3) - 1));
2472
2473 CLOSE();
2474 }
2475 }
2476
2477 TEST_CASE("memcap", "[JSNormalizer]")
2478 {
2479 SECTION("3 tokens")
2480 {
2481 const char dat1[] = "var abc=in";
2482 const char dat2[] = "put;";
2483 const char exp1[] = "var abc=in";
2484 const char exp2[] = "input;";
2485 const char exp[] = "var abc=input;";
2486
2487 NORM_LIMITED(6, dat1, dat2, exp1, exp2);
2488 NORM_COMBINED_LIMITED_2(6, dat1, dat2, exp);
2489 }
2490 SECTION("2 tokens and a half")
2491 {
2492 const char dat1[] = "var abc=in";
2493 const char dat2[] = "put;";
2494 const char exp1[] = "var abc=in";
2495 const char exp2[] = "input;";
2496 const char exp[] = "var abc=input;";
2497
2498 NORM_LIMITED(4, dat1, dat2, exp1, exp2);
2499 NORM_COMBINED_LIMITED_2(4, dat1, dat2, exp);
2500 }
2501 SECTION("1 token")
2502 {
2503 const char dat1[] = "var abc=in";
2504 const char dat2[] = "put;";
2505 const char exp1[] = "var abc=in";
2506 const char exp2[] = "input;";
2507 const char exp[] = "var abc=input;";
2508
2509 NORM_LIMITED(2, dat1, dat2, exp1, exp2);
2510 NORM_COMBINED_LIMITED_2(2, dat1, dat2, exp);
2511 }
2512 SECTION("a half")
2513 {
2514 const char dat1[] = "var abc=extract";
2515 const char dat2[] = "// just a comment\n";
2516 const char exp1[] = "var abc=extract";
2517 const char exp2[] = "";
2518 const char exp[] = "var abc=extract";
2519
2520 NORM_LIMITED(5, dat1, dat2, exp1, exp2);
2521 NORM_COMBINED_LIMITED_2(5, dat1, dat2, exp);
2522 }
2523 }
2524
2525 TEST_CASE("scope tracking", "[JSNormalizer]")
2526 {
2527 SECTION("parentheses")
2528 {
2529 const char dat1[] = "()";
2530 const char dat2[] = "()()()";
2531 const char dat3[] = "((()))";
2532 const char exp1[] = "()";
2533 const char exp2[] = "()()()";
2534 const char exp3[] = "((()))";
2535
2536 NORMALIZE_1(dat1, exp1);
2537 NORMALIZE_1(dat2, exp2);
2538 NORMALIZE_1(dat3, exp3);
2539 }
2540 SECTION("curly braces")
2541 {
2542 const char dat1[] = "{}";
2543 const char dat2[] = "{}{}{}";
2544 const char dat3[] = "{{{}}}";
2545 const char exp1[] = "{}";
2546 const char exp2[] = "{}{}{}";
2547 const char exp3[] = "{{{}}}";
2548
2549 NORMALIZE_1(dat1, exp1);
2550 NORMALIZE_1(dat2, exp2);
2551 NORMALIZE_1(dat3, exp3);
2552 }
2553 SECTION("square brackets")
2554 {
2555 const char dat1[] = "[]";
2556 const char dat2[] = "[][][]";
2557 const char dat3[] = "[[[]]]";
2558 const char exp1[] = "[]";
2559 const char exp2[] = "[][][]";
2560 const char exp3[] = "[[[]]]";
2561
2562 NORMALIZE_1(dat1, exp1);
2563 NORMALIZE_1(dat2, exp2);
2564 NORMALIZE_1(dat3, exp3);
2565 }
2566 SECTION("mix of brackets")
2567 {
2568 const char dat1[] = "(){}[]";
2569 const char dat2[] = "({})[]";
2570 const char dat3[] = "(){[]}";
2571 const char exp1[] = "(){}[]";
2572 const char exp2[] = "({})[]";
2573 const char exp3[] = "(){[]}";
2574
2575 NORMALIZE_1(dat1, exp1);
2576 NORMALIZE_1(dat2, exp2);
2577 NORMALIZE_1(dat3, exp3);
2578 }
2579 SECTION("parentheses - wrong closing symbol")
2580 {
2581 const char dat1[] = "({[ (} ]})";
2582 const char dat2[] = "({[ (] ]})";
2583 const char exp1[] = "({[(";
2584 const char exp2[] = "({[(";
2585
2586 NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2587 NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2588 }
2589 SECTION("curly braces - wrong closing symbol")
2590 {
2591 const char dat1[] = "({[ {) ]})";
2592 const char dat2[] = "({[ {] ]})";
2593 const char exp1[] = "({[{";
2594 const char exp2[] = "({[{";
2595
2596 NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2597 NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2598 }
2599 SECTION("square brackets - wrong closing symbol")
2600 {
2601 const char dat1[] = "([{ [) }])";
2602 const char dat2[] = "([{ [} }])";
2603 const char exp1[] = "([{[";
2604 const char exp2[] = "([{[";
2605
2606 NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2607 NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2608 }
2609 SECTION("parentheses - mismatch")
2610 {
2611 const char dat1[] = ")";
2612 const char dat2[] = "())";
2613 const char dat3[] = "({[ ()) ]})";
2614 const char dat4[] = "(</script>";
2615 const char dat5[] = "(()</script>";
2616 const char exp1[] = "";
2617 const char exp2[] = "()";
2618 const char exp3[] = "({[()";
2619 const char exp4[] = "(";
2620 const char exp5[] = "(()";
2621
2622 NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2623 NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2624 NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
2625 NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2626 NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
2627 }
2628 SECTION("curly braces - mismatch")
2629 {
2630 const char dat1[] = "}";
2631 const char dat2[] = "{}}";
2632 const char dat3[] = "({[ {}} ]})";
2633 const char dat4[] = "{</script>";
2634 const char dat5[] = "{{}</script>";
2635 const char exp1[] = "";
2636 const char exp2[] = "{}";
2637 const char exp3[] = "({[{}";
2638 const char exp4[] = "{";
2639 const char exp5[] = "{{}";
2640
2641 NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2642 NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2643 NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
2644 NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2645 NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
2646 }
2647 SECTION("square brackets - mismatch")
2648 {
2649 const char dat1[] = "]";
2650 const char dat2[] = "[]]";
2651 const char dat3[] = "([{ []] }])";
2652 const char dat4[] = "[</script>";
2653 const char dat5[] = "[[]</script>";
2654 const char exp1[] = "";
2655 const char exp2[] = "[]";
2656 const char exp3[] = "([{[]";
2657 const char exp4[] = "[";
2658 const char exp5[] = "[[]";
2659
2660 NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2661 NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2662 NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
2663 NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2664 NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
2665 }
2666 SECTION("parentheses - continuation")
2667 {
2668 const char dat1[] = "((";
2669 const char dat2[] = "))";
2670 const char exp1[] = "((";
2671 const char exp2[] = "))";
2672 const char exp[] = "(())";
2673
2674 NORMALIZE_2(dat1, dat2, exp1, exp2);
2675 NORM_COMBINED_2(dat1, dat2, exp);
2676 }
2677 SECTION("curly braces - continuation")
2678 {
2679 const char dat1[] = "{{";
2680 const char dat2[] = "}}";
2681 const char exp1[] = "{{";
2682 const char exp2[] = "}}";
2683 const char exp[] = "{{}}";
2684
2685 NORMALIZE_2(dat1, dat2, exp1, exp2);
2686 NORM_COMBINED_2(dat1, dat2, exp);
2687 }
2688 SECTION("square brackets - continuation")
2689 {
2690 const char dat1[] = "[[";
2691 const char dat2[] = "]]";
2692 const char exp1[] = "[[";
2693 const char exp2[] = "]]";
2694 const char exp[] = "[[]]";
2695
2696 NORMALIZE_2(dat1, dat2, exp1, exp2);
2697 NORM_COMBINED_2(dat1, dat2, exp);
2698 }
2699 SECTION("parentheses - mismatch in continuation")
2700 {
2701 const char dat1[] = "(";
2702 const char dat2[] = "))";
2703 const char dat3[] = "(";
2704 const char dat4[] = " </script>";
2705 const char exp1[] = "(";
2706 const char exp2[] = ")";
2707 const char exp3[] = "(";
2708 const char exp4[] = "";
2709
2710 const char exp5[] = "()";
2711 const char exp6[] = "(";
2712
2713 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2714 NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2715
2716 NORM_COMBINED_BAD_2(dat1, dat2, exp5, JSTokenizer::WRONG_CLOSING_SYMBOL);
2717 NORM_COMBINED_BAD_2(dat3, dat4, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
2718 }
2719 SECTION("curly braces - mismatch in continuation")
2720 {
2721 const char dat1[] = "{";
2722 const char dat2[] = "}}";
2723 const char dat3[] = "{";
2724 const char dat4[] = " </script>";
2725 const char exp1[] = "{";
2726 const char exp2[] = "}";
2727 const char exp3[] = "{";
2728 const char exp4[] = "";
2729
2730 const char exp5[] = "{}";
2731 const char exp6[] = "{";
2732
2733 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2734 NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2735
2736 NORM_COMBINED_BAD_2(dat1, dat2, exp5, JSTokenizer::WRONG_CLOSING_SYMBOL);
2737 NORM_COMBINED_BAD_2(dat3, dat4, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
2738 }
2739 SECTION("square brackets - mismatch in continuation")
2740 {
2741 const char dat1[] = "[";
2742 const char dat2[] = "]]";
2743 const char dat3[] = "[";
2744 const char dat4[] = " </script>";
2745 const char exp1[] = "[";
2746 const char exp2[] = "]";
2747 const char exp3[] = "[";
2748 const char exp4[] = "";
2749
2750 const char exp5[] = "[]";
2751 const char exp6[] = "[";
2752
2753 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2754 NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2755
2756 NORM_COMBINED_BAD_2(dat1, dat2, exp5, JSTokenizer::WRONG_CLOSING_SYMBOL);
2757 NORM_COMBINED_BAD_2(dat3, dat4, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
2758 }
2759 }
2760
2761 TEST_CASE("scope misc", "[JSNormalizer]")
2762 {
2763 const int stack_limit = 256;
2764 const char* open = "1+(";
2765 const char* close = "-1)";
2766
2767 SECTION("max stack")
2768 {
2769 std::string scr;
2770
2771 for (int i = 0; i < stack_limit - 1; ++i)
2772 scr += open;
2773 for (int i = 0; i < stack_limit - 1; ++i)
2774 scr += close;
2775
2776 const char* dat = scr.c_str();
2777 int dat_len = strlen(dat);
2778 const char* exp = scr.c_str();
2779 int exp_len = strlen(exp);
2780 char* act = new char[exp_len];
2781
2782 JSIdentifierCtxStub ident_ctx;
2783 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
2784
2785 DO(dat, dat_len, act, exp_len);
2786 CHECK(!memcmp(exp, act, exp_len));
2787
2788 delete[] act;
2789
2790 CLOSE();
2791 }
2792
2793 SECTION("max stack")
2794 {
2795 std::string scr;
2796 std::string nsc;
2797
2798 for (int i = 0; i < stack_limit; ++i)
2799 scr += open;
2800 for (int i = 0; i < stack_limit; ++i)
2801 scr += close;
2802 for (int i = 0; i < stack_limit - 1; ++i)
2803 nsc += open;
2804 nsc += "1+";
2805
2806 const char* dat = scr.c_str();
2807 int dat_len = strlen(dat);
2808 const char* exp = nsc.c_str();
2809 int exp_len = strlen(exp);
2810 char* act = new char[exp_len];
2811
2812 JSIdentifierCtxStub ident_ctx;
2813 JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
2814
2815 TRY(dat, dat_len, act, exp_len, JSTokenizer::BRACKET_NESTING_OVERFLOW);
2816 CHECK(!memcmp(exp, act, exp_len));
2817
2818 delete[] act;
2819 }
2820 }
2821
2822 TEST_CASE("scope tail handling", "[JSNormalizer]")
2823 {
2824 // Padding ':' symbol has been chosen, since it:
2825 // * forms a single state for Parser
2826 // * doesn't insert white spaces
2827 // * forms a single match, i.e. there are no '::' ':::' patterns
2828 //
2829 // Thus, the tail of "::({[]})" will have JSTOKENIZER_MAX_STATES
2830 // and the same number of characters in it.
2831
2832 #if JSTOKENIZER_MAX_STATES != 8
2833 #error "scope tail handling" tests are designed for the tail of 8 bytes size
2834 #endif
2835
2836 SECTION("no scope-symbols in the tail")
2837 {
2838 const char dat1[] = "((((::::::::";
2839 const char dat2[] = "):):):):";
2840 const char dat3[] = "{}{{::::::::";
2841 const char dat4[] = "::{}}}::";
2842 const char dat5[] = "[][[::::::::";
2843 const char dat6[] = "::::]][]";
2844 const char exp1[] = "((((::::::::";
2845 const char exp2[] = "):):):):";
2846 const char exp3[] = "{}{{::::::::";
2847 const char exp4[] = "::{}}}::";
2848 const char exp5[] = "[][[::::::::";
2849 const char exp6[] = "::::]][]";
2850
2851 const char exp7[] = "((((::::::::):):):):";
2852 const char exp8[] = "{}{{::::::::::{}}}::";
2853 const char exp9[] = "[][[::::::::::::]][]";
2854
2855 NORMALIZE_2(dat1, dat2, exp1, exp2);
2856 NORMALIZE_2(dat3, dat4, exp3, exp4);
2857 NORMALIZE_2(dat5, dat6, exp5, exp6);
2858
2859 NORM_COMBINED_2(dat1, dat2, exp7);
2860 NORM_COMBINED_2(dat3, dat4, exp8);
2861 NORM_COMBINED_2(dat5, dat6, exp9);
2862 }
2863
2864 SECTION("opening scope-symbols in the tail")
2865 {
2866 const char dat1[] = "::::(:::::::";
2867 const char dat2[] = "):::::::";
2868 const char dat3[] = ":::::::::::{";
2869 const char dat4[] = ":::::::}";
2870 const char dat5[] = "::::[:::::::";
2871 const char dat6[] = "::::]:::";
2872 const char exp1[] = "::::(:::::::";
2873 const char exp2[] = "):::::::";
2874 const char exp3[] = ":::::::::::{";
2875 const char exp4[] = ":::::::}";
2876 const char exp5[] = "::::[:::::::";
2877 const char exp6[] = "::::]:::";
2878
2879 const char exp7[] = "::::(:::::::):::::::";
2880 const char exp8[] = ":::::::::::{:::::::}";
2881 const char exp9[] = "::::[:::::::::::]:::";
2882
2883 NORMALIZE_2(dat1, dat2, exp1, exp2);
2884 NORMALIZE_2(dat3, dat4, exp3, exp4);
2885 NORMALIZE_2(dat5, dat6, exp5, exp6);
2886
2887 NORM_COMBINED_2(dat1, dat2, exp7);
2888 NORM_COMBINED_2(dat3, dat4, exp8);
2889 NORM_COMBINED_2(dat5, dat6, exp9);
2890 }
2891
2892 SECTION("closing scope-symbols in the tail")
2893 {
2894 const char dat1[] = "(((()::::::)";
2895 const char dat2[] = "()::::))";
2896 const char dat3[] = "{{{{:::::::}";
2897 const char dat4[] = ":::::}}}";
2898 const char dat5[] = "[::::::::]::";
2899 const char dat6[] = "::::::::";
2900 const char exp1[] = "(((()::::::)";
2901 const char exp2[] = "()::::))";
2902 const char exp3[] = "{{{{:::::::}";
2903 const char exp4[] = ":::::}}}";
2904 const char exp5[] = "[::::::::]::";
2905 const char exp6[] = "::::::::";
2906
2907 const char exp7[] = "(((()::::::)()::::))";
2908 const char exp8[] = "{{{{:::::::}:::::}}}";
2909 const char exp9[] = "[::::::::]::::::::::";
2910
2911 NORMALIZE_2(dat1, dat2, exp1, exp2);
2912 NORMALIZE_2(dat3, dat4, exp3, exp4);
2913 NORMALIZE_2(dat5, dat6, exp5, exp6);
2914
2915 NORM_COMBINED_2(dat1, dat2, exp7);
2916 NORM_COMBINED_2(dat3, dat4, exp8);
2917 NORM_COMBINED_2(dat5, dat6, exp9);
2918 }
2919
2920 SECTION("newly opening scope-symbols in the tail")
2921 {
2922 const char dat1[] = "(:::(::::::(";
2923 const char dat2[] = "))):::::";
2924 const char dat3[] = "{:{:{:{:{:{:";
2925 const char dat4[] = "::}}}}}}";
2926 const char dat5[] = "[:[:[:::[:::";
2927 const char dat6[] = "::::]]]]";
2928 const char exp1[] = "(:::(::::::(";
2929 const char exp2[] = "))):::::";
2930 const char exp3[] = "{:{:{:{:{:{:";
2931 const char exp4[] = "::}}}}}}";
2932 const char exp5[] = "[:[:[:::[:::";
2933 const char exp6[] = "::::]]]]";
2934
2935 const char exp7[] = "(:::(::::::())):::::";
2936 const char exp8[] = "{:{:{:{:{:{:::}}}}}}";
2937 const char exp9[] = "[:[:[:::[:::::::]]]]";
2938
2939 NORMALIZE_2(dat1, dat2, exp1, exp2);
2940 NORMALIZE_2(dat3, dat4, exp3, exp4);
2941 NORMALIZE_2(dat5, dat6, exp5, exp6);
2942
2943 NORM_COMBINED_2(dat1, dat2, exp7);
2944 NORM_COMBINED_2(dat3, dat4, exp8);
2945 NORM_COMBINED_2(dat5, dat6, exp9);
2946 }
2947
2948 SECTION("fully closing scope-symbols in the tail")
2949 {
2950 const char dat1[] = "((((::::))))";
2951 const char dat2[] = "::::::::";
2952 const char dat3[] = "{{{{}:}:}:}:";
2953 const char dat4[] = "::::{}{}";
2954 const char dat5[] = "[[:::::::]:]";
2955 const char dat6[] = "[::::::]";
2956 const char exp1[] = "((((::::))))";
2957 const char exp2[] = "::::::::";
2958 const char exp3[] = "{{{{}:}:}:}:";
2959 const char exp4[] = "::::{}{}";
2960 const char exp5[] = "[[:::::::]:]";
2961 const char exp6[] = "[::::::]";
2962
2963 const char exp7[] = "((((::::))))::::::::";
2964 const char exp8[] = "{{{{}:}:}:}:::::{}{}";
2965 const char exp9[] = "[[:::::::]:][::::::]";
2966
2967 NORMALIZE_2(dat1, dat2, exp1, exp2);
2968 NORMALIZE_2(dat3, dat4, exp3, exp4);
2969 NORMALIZE_2(dat5, dat6, exp5, exp6);
2970
2971 NORM_COMBINED_2(dat1, dat2, exp7);
2972 NORM_COMBINED_2(dat3, dat4, exp8);
2973 NORM_COMBINED_2(dat5, dat6, exp9);
2974 }
2975
2976 SECTION("extra scope-symbols in the tail")
2977 {
2978 const char dat1[] = "((((((((";
2979 const char dat2[] = ")))))))))";
2980 const char dat3[] = "{{{{{{{{";
2981 const char dat4[] = "}}}}}}]}";
2982 const char dat5[] = "[[[[[[[[";
2983 const char dat6[] = "]]]]]]]</script>";
2984 const char exp1[] = "((((((((";
2985 const char exp2[] = "))))))))";
2986 const char exp3[] = "{{{{{{{{";
2987 const char exp4[] = "}}}}}}";
2988 const char exp5[] = "[[[[[[[[";
2989 const char exp6[] = "]]]]]]]";
2990
2991 const char exp7[] = "(((((((())))))))";
2992 const char exp8[] = "{{{{{{{{}}}}}}";
2993 const char exp9[] = "[[[[[[[[]]]]]]]";
2994
2995 NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2996 NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::WRONG_CLOSING_SYMBOL);
2997 NORM_BAD_2(dat5, dat6, exp5, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
2998
2999 NORM_COMBINED_BAD_2(dat1, dat2, exp7, JSTokenizer::WRONG_CLOSING_SYMBOL);
3000 NORM_COMBINED_BAD_2(dat3, dat4, exp8, JSTokenizer::WRONG_CLOSING_SYMBOL);
3001 NORM_COMBINED_BAD_2(dat5, dat6, exp9, JSTokenizer::ENDED_IN_INNER_SCOPE);
3002 }
3003
3004 SECTION("overwriting scope-symbols in the tail")
3005 {
3006 const char dat1[] = "(((((((())))";
3007 const char dat2[] = ":))))";
3008 const char dat3[] = "({[(:):]{}{}";
3009 const char dat4[] = "}[]())";
3010 const char dat5[] = "{{{{}[[]](((";
3011 const char dat6[] = ")))}}}";
3012 const char exp1[] = "(((((((())))";
3013 const char exp2[] = ":))))";
3014 const char exp3[] = "({[(:):]{}{}";
3015 const char exp4[] = "}[]())";
3016 const char exp5[] = "{{{{}[[]](((";
3017 const char exp6[] = ")))}}}";
3018
3019 const char exp7[] = "(((((((()))):))))";
3020 const char exp8[] = "({[(:):]{}{}}[]())";
3021 const char exp9[] = "{{{{}[[]]((()))}}}";
3022
3023 NORMALIZE_2(dat1, dat2, exp1, exp2);
3024 NORMALIZE_2(dat3, dat4, exp3, exp4);
3025 NORMALIZE_2(dat5, dat6, exp5, exp6);
3026
3027 NORM_COMBINED_2(dat1, dat2, exp7);
3028 NORM_COMBINED_2(dat3, dat4, exp8);
3029 NORM_COMBINED_2(dat5, dat6, exp9);
3030 }
3031 }
3032
3033 TEST_CASE("ignored identifiers", "[JSNormalizer]")
3034 {
3035 // 'console' 'eval' 'document' are in the ignore list
3036
3037 SECTION("a standalone identifier")
3038 {
3039 const char dat1[] = "alpha bravo console delta eval";
3040 const char dat2[] = "var a = 0; console = 1;";
3041 const char dat3[] = "var a = 0; var console = 1;";
3042 const char dat4[] = "foo(0); console(1); bar(2); console1(3); baz(4);";
3043 const char dat5[] = "foo(0); eval(1); bar(2); evaluate(3); baz(4);";
3044 const char exp1[] = "var_0000 var_0001 console var_0002 eval";
3045 const char exp2[] = "var var_0000=0;console=1;";
3046 const char exp3[] = "var var_0000=0;var console=1;";
3047 const char exp4[] = "var_0000(0);console(1);var_0001(2);var_0002(3);var_0003(4);";
3048 const char exp5[] = "var_0000(0);eval(1);var_0001(2);var_0002(3);var_0003(4);";
3049
3050 NORMALIZE_S(dat1, exp1);
3051 NORMALIZE_S(dat2, exp2);
3052 NORMALIZE_S(dat3, exp3);
3053 NORMALIZE_S(dat4, exp4);
3054 NORMALIZE_S(dat5, exp5);
3055 }
3056
3057 SECTION("inner objects")
3058 {
3059 const char dat1[] = "alpha.bravo.charlie.delta";
3060 const char dat2[] = "alpha.bravo.console.delta";
3061 const char dat3[] = "eval.alpha.bravo.charlie.delta";
3062 const char dat4[] = "eval.alpha.bravo.console.delta";
3063 const char exp1[] = "var_0000.var_0001.var_0002.var_0003";
3064 const char exp2[] = "var_0000.var_0001.var_0002.var_0003";
3065 const char exp3[] = "eval.alpha.bravo.charlie.delta";
3066 const char exp4[] = "eval.alpha.bravo.console.delta";
3067
3068 NORMALIZE_S(dat1, exp1);
3069 NORMALIZE_S(dat2, exp2);
3070 NORMALIZE_S(dat3, exp3);
3071 NORMALIZE_S(dat4, exp4);
3072 }
3073
3074 SECTION("function calls")
3075 {
3076 const char dat1[] = "foo.bar.baz()";
3077 const char dat2[] = "foo.bar().baz";
3078 const char dat3[] = "foo().bar.baz";
3079 const char dat4[] = "eval.bar.baz()";
3080 const char dat5[] = "eval.bar().baz";
3081 const char dat6[] = "eval().bar.baz";
3082 const char dat7[] = "foo.eval.baz()";
3083 const char dat8[] = "foo.eval().baz";
3084 const char dat9[] = "foo().eval.baz";
3085 const char dat10[] = "foo.bar.eval()";
3086 const char dat11[] = "foo.bar().eval";
3087 const char dat12[] = "var_0000().bar.eval";
3088 const char exp1[] = "var_0000.var_0001.var_0002()";
3089 const char exp2[] = "var_0000.var_0001().var_0002";
3090 const char exp3[] = "var_0000().var_0001.var_0002";
3091 const char exp4[] = "eval.bar.baz()";
3092 const char exp5[] = "eval.bar().baz";
3093 const char exp6[] = "eval().bar.baz";
3094 const char exp7[] = "var_0000.var_0001.var_0002()";
3095 const char exp8[] = "var_0000.var_0001().var_0002";
3096 const char exp9[] = "var_0000().var_0001.var_0002";
3097 const char exp10[] = "var_0000.var_0001.var_0002()";
3098 const char exp11[] = "var_0000.var_0001().var_0002";
3099 const char exp12[] = "var_0000().var_0001.var_0002";
3100
3101 NORMALIZE_S(dat1, exp1);
3102 NORMALIZE_S(dat2, exp2);
3103 NORMALIZE_S(dat3, exp3);
3104 NORMALIZE_S(dat4, exp4);
3105 NORMALIZE_S(dat5, exp5);
3106 NORMALIZE_S(dat6, exp6);
3107 NORMALIZE_S(dat7, exp7);
3108 NORMALIZE_S(dat8, exp8);
3109 NORMALIZE_S(dat9, exp9);
3110 NORMALIZE_S(dat10, exp10);
3111 NORMALIZE_S(dat11, exp11);
3112 NORMALIZE_S(dat12, exp12);
3113 }
3114 }
3115
3116 TEST_CASE("ignored identifier chain tracking", "[JSNormalizer]")
3117 {
3118 // 'console' 'eval' 'document' are in the ignore list
3119
3120 SECTION("chain terminators")
3121 {
3122 const char dat1[] = "eval.foo.bar.baz";
3123 const char dat2[] = "eval.foo bar.baz";
3124 const char dat3[] = "eval.foo;bar.baz";
3125 const char dat4[] = "eval.foo,bar.baz";
3126 const char dat5[] = "eval.foo*bar.baz";
3127 const char dat6[] = "eval.foo*=bar.baz";
3128 const char dat7[] = "eval.foo/bar.baz";
3129 const char dat8[] = "eval.foo/=bar.baz";
3130 const char dat9[] = "eval.foo%bar.baz";
3131 const char dat10[] = "eval.foo%=bar.baz";
3132 const char dat11[] = "eval.foo+bar.baz";
3133 const char dat12[] = "eval.foo+=bar.baz";
3134 const char dat13[] = "eval.foo-bar.baz";
3135 const char dat14[] = "eval.foo-=bar.baz";
3136 const char dat15[] = "eval.foo<<bar.baz";
3137 const char dat16[] = "eval.foo<<=bar.baz";
3138 const char dat17[] = "eval.foo>>bar.baz";
3139 const char dat18[] = "eval.foo>>=bar.baz";
3140 const char dat19[] = "eval.foo>>>bar.baz";
3141 const char dat20[] = "eval.foo>>>=bar.baz";
3142 const char dat21[] = "eval.foo<bar.baz";
3143 const char dat22[] = "eval.foo<=bar.baz";
3144 const char dat23[] = "eval.foo>bar.baz";
3145 const char dat24[] = "eval.foo>=bar.baz";
3146 const char dat25[] = "eval.foo instanceof bar.baz";
3147 const char dat26[] = "eval.foo==bar.baz";
3148 const char dat27[] = "eval.foo!=bar.baz";
3149 const char dat28[] = "eval.foo===bar.baz";
3150 const char dat29[] = "eval.foo!==bar.baz";
3151 const char dat30[] = "eval.foo&bar.baz";
3152 const char dat31[] = "eval.foo&=bar.baz";
3153 const char dat32[] = "eval.foo&&bar.baz";
3154 const char dat33[] = "eval.foo|bar.baz";
3155 const char dat34[] = "eval.foo|=bar.baz";
3156 const char dat35[] = "eval.foo||bar.baz";
3157 const char dat36[] = "eval.foo^bar.baz";
3158 const char dat37[] = "eval.foo^=bar.baz";
3159 const char dat38[] = "eval.foo?bar.baz";
3160 const char dat39[] = "eval.foo(bar.baz)";
3161 const char dat40[] = "eval.var.foo";
3162 const char dat41[] = "eval. break() . foo";
3163
3164 const char exp1[] = "eval.foo.bar.baz";
3165 const char exp2[] = "eval.foo var_0000.var_0001";
3166 const char exp3[] = "eval.foo;var_0000.var_0001";
3167 const char exp4[] = "eval.foo,var_0000.var_0001";
3168 const char exp5[] = "eval.foo*var_0000.var_0001";
3169 const char exp6[] = "eval.foo*=var_0000.var_0001";
3170 const char exp7[] = "eval.foo/var_0000.var_0001";
3171 const char exp8[] = "eval.foo/=var_0000.var_0001";
3172 const char exp9[] = "eval.foo%var_0000.var_0001";
3173 const char exp10[] = "eval.foo%=var_0000.var_0001";
3174 const char exp11[] = "eval.foo+var_0000.var_0001";
3175 const char exp12[] = "eval.foo+=var_0000.var_0001";
3176 const char exp13[] = "eval.foo-var_0000.var_0001";
3177 const char exp14[] = "eval.foo-=var_0000.var_0001";
3178 const char exp15[] = "eval.foo<<var_0000.var_0001";
3179 const char exp16[] = "eval.foo<<=var_0000.var_0001";
3180 const char exp17[] = "eval.foo>>var_0000.var_0001";
3181 const char exp18[] = "eval.foo>>=var_0000.var_0001";
3182 const char exp19[] = "eval.foo>>>var_0000.var_0001";
3183 const char exp20[] = "eval.foo>>>=var_0000.var_0001";
3184 const char exp21[] = "eval.foo<var_0000.var_0001";
3185 const char exp22[] = "eval.foo<=var_0000.var_0001";
3186 const char exp23[] = "eval.foo>var_0000.var_0001";
3187 const char exp24[] = "eval.foo>=var_0000.var_0001";
3188 const char exp25[] = "eval.foo instanceof var_0000.var_0001";
3189 const char exp26[] = "eval.foo==var_0000.var_0001";
3190 const char exp27[] = "eval.foo!=var_0000.var_0001";
3191 const char exp28[] = "eval.foo===var_0000.var_0001";
3192 const char exp29[] = "eval.foo!==var_0000.var_0001";
3193 const char exp30[] = "eval.foo&var_0000.var_0001";
3194 const char exp31[] = "eval.foo&=var_0000.var_0001";
3195 const char exp32[] = "eval.foo&&var_0000.var_0001";
3196 const char exp33[] = "eval.foo|var_0000.var_0001";
3197 const char exp34[] = "eval.foo|=var_0000.var_0001";
3198 const char exp35[] = "eval.foo||var_0000.var_0001";
3199 const char exp36[] = "eval.foo^var_0000.var_0001";
3200 const char exp37[] = "eval.foo^=var_0000.var_0001";
3201 const char exp38[] = "eval.foo?var_0000.var_0001";
3202 const char exp39[] = "eval.foo(var_0000.var_0001)";
3203 const char exp40[] = "eval.var.foo";
3204 const char exp41[] = "eval.break().foo";
3205
3206 NORMALIZE_S(dat1, exp1);
3207 NORMALIZE_S(dat2, exp2);
3208 NORMALIZE_S(dat3, exp3);
3209 NORMALIZE_S(dat4, exp4);
3210 NORMALIZE_S(dat5, exp5);
3211 NORMALIZE_S(dat6, exp6);
3212 NORMALIZE_S(dat7, exp7);
3213 NORMALIZE_S(dat8, exp8);
3214 NORMALIZE_S(dat9, exp9);
3215 NORMALIZE_S(dat10, exp10);
3216 NORMALIZE_S(dat11, exp11);
3217 NORMALIZE_S(dat12, exp12);
3218 NORMALIZE_S(dat13, exp13);
3219 NORMALIZE_S(dat14, exp14);
3220 NORMALIZE_S(dat15, exp15);
3221 NORMALIZE_S(dat16, exp16);
3222 NORMALIZE_S(dat17, exp17);
3223 NORMALIZE_S(dat18, exp18);
3224 NORMALIZE_S(dat19, exp19);
3225 NORMALIZE_S(dat20, exp20);
3226 NORMALIZE_S(dat21, exp21);
3227 NORMALIZE_S(dat22, exp22);
3228 NORMALIZE_S(dat23, exp23);
3229 NORMALIZE_S(dat24, exp24);
3230 NORMALIZE_S(dat25, exp25);
3231 NORMALIZE_S(dat26, exp26);
3232 NORMALIZE_S(dat27, exp27);
3233 NORMALIZE_S(dat28, exp28);
3234 NORMALIZE_S(dat29, exp29);
3235 NORMALIZE_S(dat30, exp30);
3236 NORMALIZE_S(dat31, exp31);
3237 NORMALIZE_S(dat32, exp32);
3238 NORMALIZE_S(dat33, exp33);
3239 NORMALIZE_S(dat34, exp34);
3240 NORMALIZE_S(dat35, exp35);
3241 NORMALIZE_S(dat36, exp36);
3242 NORMALIZE_S(dat37, exp37);
3243 NORMALIZE_S(dat38, exp38);
3244 NORMALIZE_S(dat39, exp39);
3245 NORMALIZE_S(dat40, exp40);
3246 NORMALIZE_S(dat41, exp41);
3247 }
3248
3249 SECTION("over inner scopes")
3250 {
3251 const char dat1[] = "eval.foo.bar.baz";
3252 const char dat2[] = "eval().foo.bar.baz";
3253 const char dat3[] = "eval.foo().bar.baz";
3254 const char dat4[] = "eval(foo.bar).baz";
3255 const char dat5[] = "eval.foo().bar[].baz";
3256 const char dat6[] = "eval.foo{bar[]}.baz";
3257 const char dat7[] = "eval(foo+bar).baz";
3258 const char dat8[] = "eval(foo bar).baz";
3259 const char exp1[] = "eval.foo.bar.baz";
3260 const char exp2[] = "eval().foo.bar.baz";
3261 const char exp3[] = "eval.foo().bar.baz";
3262 const char exp4[] = "eval(var_0000.var_0001).baz";
3263 const char exp5[] = "eval.foo().bar[].baz";
3264 const char exp6[] = "eval.foo{var_0000[]}.var_0001";
3265 const char exp7[] = "eval(var_0000+var_0001).baz";
3266 const char exp8[] = "eval(var_0000 var_0001).baz";
3267
3268 NORMALIZE_S(dat1, exp1);
3269 NORMALIZE_S(dat2, exp2);
3270 NORMALIZE_S(dat3, exp3);
3271 NORMALIZE_S(dat4, exp4);
3272 NORMALIZE_S(dat5, exp5);
3273 NORMALIZE_S(dat6, exp6);
3274 NORMALIZE_S(dat7, exp7);
3275 NORMALIZE_S(dat8, exp8);
3276 }
3277
3278 SECTION("spaces and operators")
3279 {
3280 const char dat1[] = "foo.bar.baz console.log";
3281 const char dat2[] = "console.log foo.bar.baz";
3282 const char dat3[] = "foo . bar . baz console . log";
3283 const char dat4[] = "console . log foo . bar . baz";
3284 const char dat5[] = "console . foo . bar . baz";
3285 const char dat6[] = "console = foo . bar . baz";
3286 const char dat7[] = "console . foo + bar . baz";
3287 const char dat8[] = "console . foo . bar : baz";
3288 const char dat9[] = "console.\nfoo";
3289 const char exp1[] = "var_0000.var_0001.var_0002 console.log";
3290 const char exp2[] = "console.log var_0000.var_0001.var_0002";
3291 const char exp3[] = "var_0000.var_0001.var_0002 console.log";
3292 const char exp4[] = "console.log var_0000.var_0001.var_0002";
3293 const char exp5[] = "console.foo.bar.baz";
3294 const char exp6[] = "console=var_0000.var_0001.var_0002";
3295 const char exp7[] = "console.foo+var_0000.var_0001";
3296 const char exp8[] = "console.foo.bar:var_0000";
3297 const char exp9[] = "console.foo";
3298
3299 NORMALIZE_S(dat1, exp1);
3300 NORMALIZE_S(dat2, exp2);
3301 NORMALIZE_S(dat3, exp3);
3302 NORMALIZE_S(dat4, exp4);
3303 NORMALIZE_S(dat5, exp5);
3304 NORMALIZE_S(dat6, exp6);
3305 NORMALIZE_S(dat7, exp7);
3306 NORMALIZE_S(dat8, exp8);
3307 NORMALIZE_S(dat9, exp9);
3308 }
3309
3310 SECTION("comments")
3311 {
3312 const char dat1[] = "console.<!-- HTML comment\nlog(abc).foo";
3313 const char dat2[] = "console.//single-line comment\nlog(abc).foo";
3314 const char dat3[] = "console./*multi-line comment*/log(abc).foo";
3315 const char exp[] = "console.log(var_0000).foo";
3316
3317 NORMALIZE_S(dat1, exp);
3318 NORMALIZE_S(dat2, exp);
3319 NORMALIZE_S(dat3, exp);
3320 }
3321 }
3322
3323 TEST_CASE("ignored identifier scope tracking", "[JSNormalizer]")
3324 {
3325 // 'console' 'eval' 'document' are in the ignore list
3326
3327 SECTION("basic")
3328 {
3329 const char dat1[] = "(alpha) bravo console delta eval foxtrot";
3330 const char dat2[] = "(alpha bravo) console delta eval foxtrot";
3331 const char dat3[] = "(alpha bravo console) delta eval foxtrot";
3332 const char dat4[] = "(alpha bravo console delta) eval foxtrot";
3333 const char dat5[] = "(alpha bravo console delta eval) foxtrot";
3334 const char dat6[] = "(alpha bravo console delta eval foxtrot)";
3335 const char dat7[] = "alpha bravo (console) delta (eval) foxtrot";
3336 const char exp1[] = "(var_0000)var_0001 console var_0002 eval var_0003";
3337 const char exp2[] = "(var_0000 var_0001)console var_0002 eval var_0003";
3338 const char exp3[] = "(var_0000 var_0001 console)var_0002 eval var_0003";
3339 const char exp4[] = "(var_0000 var_0001 console var_0002)eval var_0003";
3340 const char exp5[] = "(var_0000 var_0001 console var_0002 eval)var_0003";
3341 const char exp6[] = "(var_0000 var_0001 console var_0002 eval var_0003)";
3342 const char exp7[] = "var_0000 var_0001(console)var_0002(eval)var_0003";
3343
3344 NORMALIZE_S(dat1, exp1);
3345 NORMALIZE_S(dat2, exp2);
3346 NORMALIZE_S(dat3, exp3);
3347 NORMALIZE_S(dat4, exp4);
3348 NORMALIZE_S(dat5, exp5);
3349 NORMALIZE_S(dat6, exp6);
3350 NORMALIZE_S(dat7, exp7);
3351 }
3352
3353 SECTION("function calls")
3354 {
3355 const char dat1[] = "foo(bar).baz";
3356 const char dat2[] = "foo(bar(baz))";
3357 const char dat3[] = "eval(bar).baz";
3358 const char dat4[] = "eval(bar(baz))";
3359 const char dat5[] = "foo(eval).baz";
3360 const char dat6[] = "foo(eval(baz))";
3361 const char dat7[] = "foo(bar).eval";
3362 const char dat8[] = "foo(bar(eval))";
3363 const char dat9[] = "(console).log";
3364 const char dat10[] = "((console)).log";
3365 const char dat11[] = "((foo, console)).log";
3366 const char dat12[] = "((document.foo(bar), console)).log";
3367 const char dat13[] = "((document.foo(bar) console)).log";
3368 const char dat14[] = "((document.foo(bar) console))log";
3369 const char dat15[] = "((document.foo(bar) baz))log";
3370 const char dat16[] = "foo(console).log";
3371 const char dat17[] = "foo((console).log).log";
3372 const char dat18[] = "foo().baz + eval(eval['content'].bar + baz(console['content'].log, baz)).bar";
3373 const char dat19[] = "eval['foo']().bar.baz";
3374 const char dat20[] = "eval['foo']()['bar'].baz";
3375 const char dat21[] = "eval['foo']['bar'].baz()";
3376 const char exp1[] = "var_0000(var_0001).var_0002";
3377 const char exp2[] = "var_0000(var_0001(var_0002))";
3378 const char exp3[] = "eval(var_0000).baz";
3379 const char exp4[] = "eval(var_0000(var_0001))";
3380 const char exp5[] = "var_0000(eval).var_0001";
3381 const char exp6[] = "var_0000(eval(var_0001))";
3382 const char exp7[] = "var_0000(var_0001).var_0002";
3383 const char exp8[] = "var_0000(var_0001(eval))";
3384 const char exp9[] = "(console).log";
3385 const char exp10[] = "((console)).log";
3386 const char exp11[] = "((var_0000,console)).log";
3387 const char exp12[] = "((document.foo(var_0000),console)).log";
3388 const char exp13[] = "((document.foo(var_0000)console)).log";
3389 const char exp14[] = "((document.foo(var_0000)console))var_0001";
3390 const char exp15[] = "((document.foo(var_0000)var_0001))var_0002";
3391 const char exp16[] = "var_0000(console).var_0001";
3392 const char exp17[] = "var_0000((console).log).var_0001";
3393 const char exp18[] = "var_0000().var_0001+eval(eval['content'].bar+var_0001(console['content'].log,var_0001)).bar";
3394 const char exp19[] = "eval['foo']().bar.baz";
3395 const char exp20[] = "eval['foo']()['bar'].baz";
3396 const char exp21[] = "eval['foo']['bar'].baz()";
3397
3398 NORMALIZE_S(dat1, exp1);
3399 NORMALIZE_S(dat2, exp2);
3400 NORMALIZE_S(dat3, exp3);
3401 NORMALIZE_S(dat4, exp4);
3402 NORMALIZE_S(dat5, exp5);
3403 NORMALIZE_S(dat6, exp6);
3404 NORMALIZE_S(dat7, exp7);
3405 NORMALIZE_S(dat8, exp8);
3406 NORMALIZE_S(dat9, exp9);
3407 NORMALIZE_S(dat10, exp10);
3408 NORMALIZE_S(dat11, exp11);
3409 NORMALIZE_S(dat12, exp12);
3410 NORMALIZE_S(dat13, exp13);
3411 NORMALIZE_S(dat14, exp14);
3412 NORMALIZE_S(dat15, exp15);
3413 NORMALIZE_S(dat16, exp16);
3414 NORMALIZE_S(dat17, exp17);
3415 NORMALIZE_S(dat18, exp18);
3416 NORMALIZE_S(dat19, exp19);
3417 NORMALIZE_S(dat20, exp20);
3418 NORMALIZE_S(dat21, exp21);
3419 }
3420
3421 SECTION("eval cases")
3422 {
3423 const char dat1[] = "eval().bar";
3424 const char dat2[] = "eval()['bar']";
3425 const char dat3[] = "eval().bar()";
3426 const char dat4[] = "eval()['bar']()";
3427 const char dat5[] = "eval.bar";
3428 const char dat6[] = "eval.bar()";
3429 const char dat7[] = "eval['bar']";
3430 const char dat8[] = "eval['bar']()";
3431 const char dat9[] = "\\u0065\\u0076\\u0061\\u006c(\\u0062\\u0061\\u0072).\\u0062\\u0061\\u007a ;";
3432 const char dat10[] = "var foo.bar = 123 ; (\\u0065\\u0076\\u0061\\u006c).\\u0062\\u0061\\u007a ;";
3433 const char exp1[] = "eval().bar";
3434 const char exp2[] = "eval()['bar']";
3435 const char exp3[] = "eval().bar()";
3436 const char exp4[] = "eval()['bar']()";
3437 const char exp5[] = "eval.bar";
3438 const char exp6[] = "eval.bar()";
3439 const char exp7[] = "eval['bar']";
3440 const char exp8[] = "eval['bar']()";
3441 const char exp9[] = "eval(var_0000).baz;";
3442 const char exp10[] = "var var_0000.var_0001=123;(eval).baz;";
3443
3444 NORMALIZE_S(dat1, exp1);
3445 NORMALIZE_S(dat2, exp2);
3446 NORMALIZE_S(dat3, exp3);
3447 NORMALIZE_S(dat4, exp4);
3448 NORMALIZE_S(dat5, exp5);
3449 NORMALIZE_S(dat6, exp6);
3450 NORMALIZE_S(dat7, exp7);
3451 NORMALIZE_S(dat8, exp8);
3452 NORMALIZE_S(dat9, exp9);
3453 NORMALIZE_S(dat10, exp10);
3454 }
3455
3456 SECTION("console cases")
3457 {
3458 const char dat1[] = "console.log=abc";
3459 const char dat2[] = "console.log().clear()";
3460 const char dat3[] = "console.log(\"asd\").foo";
3461 const char dat4[] = "console.log[\"asd\"].foo";
3462 const char dat5[] = "console.log(`var a = ${ c + b }`).foo";
3463 const char dat6[] = "console.log(abc,def,www,document.foo(abc))";
3464 const char dat7[] = "console.log(document.getElementById(\"mem\").text).clear(abc)";
3465 const char exp1[] = "console.log=var_0000";
3466 const char exp2[] = "console.log().clear()";
3467 const char exp3[] = "console.log(\"asd\").foo";
3468 const char exp4[] = "console.log[\"asd\"].foo";
3469 const char exp5[] = "console.log(`var a = ${var_0000+var_0001}`).foo";
3470 const char exp6[] = "console.log(var_0000,var_0001,var_0002,document.foo(var_0000))";
3471 const char exp7[] = "console.log(document.getElementById(\"mem\").text).clear(var_0000)";
3472
3473 NORMALIZE_S(dat1, exp1);
3474 NORMALIZE_S(dat2, exp2);
3475 NORMALIZE_S(dat3, exp3);
3476 NORMALIZE_S(dat4, exp4);
3477 NORMALIZE_S(dat5, exp5);
3478 NORMALIZE_S(dat6, exp6);
3479 NORMALIZE_S(dat7, exp7);
3480 }
3481
3482 SECTION("corner cases")
3483 {
3484 const char dat1[] = "object = {hidden: eval}";
3485 const char dat2[] = "object = {console: \"str\"}";
3486 const char dat3[] = "object.hidden = eval";
3487 const char dat4[] = "array = [None, eval, document.getElementById]";
3488 const char dat5[] = "array[1] = eval";
3489 const char exp1[] = "var_0000={var_0001:eval}";
3490 const char exp2[] = "var_0000={console:\"str\"}";
3491 const char exp3[] = "var_0000.var_0001=eval";
3492 const char exp4[] = "var_0000=[var_0001,eval,document.getElementById]";
3493 const char exp5[] = "var_0000[1]=eval";
3494
3495 NORMALIZE_S(dat1, exp1);
3496 NORMALIZE_S(dat2, exp2);
3497 NORMALIZE_S(dat3, exp3);
3498 NORMALIZE_S(dat4, exp4);
3499 NORMALIZE_S(dat5, exp5);
3500 }
3501 }
3502
3503 TEST_CASE("ignored identifier split", "[JSNormalizer]")
3504 {
3505
3506 #if JSTOKENIZER_MAX_STATES != 8
3507 #error "ignored identifier split" tests are designed for 8 states depth
3508 #endif
3509
3510 SECTION("a standalone identifier")
3511 {
3512 const char dat1[] = "con";
3513 const char dat2[] = "sole";
3514 const char dat3[] = "e";
3515 const char dat4[] = "val";
3516 const char dat5[] = "console . ";
3517 const char dat6[] = "foo";
3518 const char dat7[] = "eval ";
3519 const char dat8[] = ". bar";
3520 const char exp1[] = "var_0000";
3521 const char exp2[] = "console";
3522 const char exp3[] = "var_0000";
3523 const char exp4[] = "eval";
3524 const char exp5[] = "console.";
3525 const char exp6[] = "foo";
3526 const char exp7[] = "eval";
3527 const char exp8[] = ".bar";
3528
3529 const char exp9[] = "console";
3530 const char exp10[] = "eval";
3531 const char exp11[] = "console.foo";
3532 const char exp12[] = "eval.bar";
3533
3534 NORMALIZE_T(dat1, dat2, exp1, exp2);
3535 NORMALIZE_T(dat3, dat4, exp3, exp4);
3536 NORMALIZE_T(dat5, dat6, exp5, exp6);
3537 NORMALIZE_T(dat7, dat8, exp7, exp8);
3538
3539 NORM_COMBINED_S_2(dat1, dat2, exp9);
3540 NORM_COMBINED_S_2(dat3, dat4, exp10);
3541 NORM_COMBINED_S_2(dat5, dat6, exp11);
3542 NORM_COMBINED_S_2(dat7, dat8, exp12);
3543 }
3544
3545 SECTION("function calls")
3546 {
3547 const char dat1[] = "console";
3548 const char dat2[] = "().foo";
3549 const char dat3[] = "console(";
3550 const char dat4[] = ").foo";
3551 const char dat5[] = "console()";
3552 const char dat6[] = ".foo";
3553 const char dat7[] = "console().";
3554 const char dat8[] = "foo";
3555 const char dat9[] = "console().re";
3556 const char dat10[] = "write";
3557 const char exp1[] = "console";
3558 const char exp2[] = "().foo";
3559 const char exp3[] = "console(";
3560 const char exp4[] = ").foo";
3561 const char exp5[] = "console()";
3562 const char exp6[] = ".foo";
3563 const char exp7[] = "console().";
3564 const char exp8[] = "foo";
3565 const char exp9[] = "console().re";
3566 const char exp10[] = "rewrite";
3567
3568 const char exp11[] = "console().foo";
3569 const char exp12[] = "console().foo";
3570 const char exp13[] = "console().foo";
3571 const char exp14[] = "console().foo";
3572 const char exp15[] = "console().rewrite";
3573
3574 NORMALIZE_T(dat1, dat2, exp1, exp2);
3575 NORMALIZE_T(dat3, dat4, exp3, exp4);
3576 NORMALIZE_T(dat5, dat6, exp5, exp6);
3577 NORMALIZE_T(dat7, dat8, exp7, exp8);
3578 NORMALIZE_T(dat9, dat10, exp9, exp10);
3579
3580 NORM_COMBINED_S_2(dat1, dat2, exp11);
3581 NORM_COMBINED_S_2(dat3, dat4, exp12);
3582 NORM_COMBINED_S_2(dat5, dat6, exp13);
3583 NORM_COMBINED_S_2(dat7, dat8, exp14);
3584 NORM_COMBINED_S_2(dat9, dat10, exp15);
3585 }
3586
3587 SECTION("terminator split")
3588 {
3589 const char dat1[] = "eval.foo ";
3590 const char dat2[] = "bar.baz";
3591 const char dat3[] = "eval.foo";
3592 const char dat4[] = " bar.baz";
3593 const char dat5[] = "eval.foo;";
3594 const char dat6[] = "bar.baz";
3595 const char dat7[] = "eval.foo";
3596 const char dat8[] = ";bar.baz";
3597 const char dat9[] = "eval.foo%";
3598 const char dat10[] = "=bar.baz";
3599 const char exp1[] = "eval.foo";
3600 const char exp2[] = " var_0000.var_0001";
3601 const char exp3[] = "eval.foo";
3602 const char exp4[] = " var_0000.var_0001";
3603 const char exp5[] = "eval.foo;";
3604 const char exp6[] = "var_0000.var_0001";
3605 const char exp7[] = "eval.foo";
3606 const char exp8[] = ";var_0000.var_0001";
3607 const char exp9[] = "eval.foo%";
3608 const char exp10[] = "%=var_0000.var_0001";
3609
3610 const char exp11[] = "eval.foo var_0000.var_0001";
3611 const char exp12[] = "eval.foo var_0000.var_0001";
3612 const char exp13[] = "eval.foo;var_0000.var_0001";
3613 const char exp14[] = "eval.foo;var_0000.var_0001";
3614 const char exp15[] = "eval.foo%=var_0000.var_0001";
3615
3616 NORMALIZE_T(dat1, dat2, exp1, exp2);
3617 NORMALIZE_T(dat3, dat4, exp3, exp4);
3618 NORMALIZE_T(dat5, dat6, exp5, exp6);
3619 NORMALIZE_T(dat7, dat8, exp7, exp8);
3620 NORMALIZE_T(dat9, dat10, exp9, exp10);
3621
3622 NORM_COMBINED_S_2(dat1, dat2, exp11);
3623 NORM_COMBINED_S_2(dat3, dat4, exp12);
3624 NORM_COMBINED_S_2(dat5, dat6, exp13);
3625 NORM_COMBINED_S_2(dat7, dat8, exp14);
3626 NORM_COMBINED_S_2(dat9, dat10, exp15);
3627 }
3628
3629 SECTION("scope split")
3630 {
3631 // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
3632 // split here ^
3633
3634 const char dat1[] = "eval(foo,eval(bar,eval(baz['";
3635 const char dat2[] = "'].console.check+check).foo).bar).baz+check";
3636 const char exp1[] = "eval(var_0000,eval(var_0001,eval(var_0002['";
3637 const char exp2[] = "'].var_0003.var_0004+var_0004).foo).bar).baz+var_0004";
3638
3639 const char exp3[] = "eval(var_0000,eval(var_0001,eval(var_0002['"
3640 "'].var_0003.var_0004+var_0004).foo).bar).baz+var_0004";
3641
3642 NORMALIZE_T(dat1, dat2, exp1, exp2);
3643 NORM_COMBINED_S_2(dat1, dat2, exp3);
3644
3645 // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
3646 // split here ^
3647
3648 const char dat3[] = "eval(foo,eval(bar,eval(baz[''].con";
3649 const char dat4[] = "sole.check+check).foo).bar).baz+check";
3650 const char exp4[] = "eval(var_0000,eval(var_0001,eval(var_0002[''].var_0003";
3651 const char exp5[] = "var_0004.var_0005+var_0005).foo).bar).baz+var_0005";
3652
3653 const char exp6[] = "eval(var_0000,eval(var_0001,eval(var_0002['']."
3654 "var_0004.var_0005+var_0005).foo).bar).baz+var_0005";
3655
3656 NORMALIZE_T(dat3, dat4, exp4, exp5);
3657 NORM_COMBINED_S_2(dat3, dat4, exp6);
3658
3659 // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
3660 // split here ^
3661
3662 const char dat5[] = "eval(foo,eval(bar,eval(baz[''].console.check+check).foo";
3663 const char dat6[] = ").bar).baz+check";
3664 const char exp7[] = "eval(var_0000,eval(var_0001,eval(var_0002[''].var_0003.var_0004+var_0004).foo";
3665 const char exp8[] = ").bar).baz+var_0004";
3666
3667 const char exp9[] = "eval(var_0000,eval(var_0001,eval(var_0002[''].var_0003.var_0004+var_0004).foo"
3668 ").bar).baz+var_0004";
3669
3670 NORMALIZE_T(dat5, dat6, exp7, exp8);
3671 NORM_COMBINED_S_2(dat5, dat6, exp9);
3672 }
3673 }
3674
3675 TEST_CASE("Scope tracking - basic","[JSNormalizer]")
3676 {
3677 SECTION("Global only")
3678 test_scope("",{GLOBAL});
3679
3680 SECTION("Function scope - named function")
3681 test_scope("function f(){",{GLOBAL,FUNCTION});
3682
3683 SECTION("Function scope - anonymous function")
3684 test_scope("var f = function(){",{GLOBAL,FUNCTION});
3685
3686 SECTION("Function scope - arrow function")
3687 test_scope("var f = (a,b)=>{",{GLOBAL,FUNCTION});
3688
3689 SECTION("Function scope - arrow function without scope")
3690 test_scope("var f = (a,b)=> a",{GLOBAL,FUNCTION});
3691
3692 SECTION("Function scope - method in object initialization")
3693 test_scope("var o = { f(){",{GLOBAL,BLOCK,BLOCK});
3694
3695 SECTION("Function scope - method in object operation")
3696 test_scope("+{ f(){",{GLOBAL,BLOCK,BLOCK});
3697
3698 SECTION("Function scope - method in object as a function parameter")
3699 test_scope("call({ f(){",{GLOBAL,BLOCK,BLOCK});
3700
3701 SECTION("Function scope - keyword name method")
3702 test_scope("var o = { let(){",{GLOBAL,BLOCK,BLOCK});
3703
3704 SECTION("Function scope - 'get' name method")
3705 test_scope("var o = { get(){",{GLOBAL,BLOCK,BLOCK});
3706
3707 SECTION("Function scope - expression method")
3708 test_scope("var o = { [a + 12](){",{GLOBAL,BLOCK,BLOCK});
3709
3710 SECTION("Function scope - method as anonymous function")
3711 test_scope("var o = { f: function(){",{GLOBAL,BLOCK,FUNCTION});
3712
3713 SECTION("Function scope - keyword name method as anonymous function")
3714 test_scope("var o = { let: function(){",{GLOBAL,BLOCK,FUNCTION});
3715
3716 SECTION("Function scope - 'get' name method as anonymous function")
3717 test_scope("var o = { get: function(){",{GLOBAL,BLOCK,FUNCTION});
3718
3719 SECTION("Function scope - expression method as anonymous function")
3720 test_scope("var o = { [a + 12]: function(){",{GLOBAL,BLOCK,FUNCTION});
3721
3722 SECTION("Function scope - getter")
3723 test_scope("var o = { get f(){",{GLOBAL,BLOCK,BLOCK});
3724
3725 SECTION("Function scope - parametric getter")
3726 test_scope("var o = { get [a + 12](){",{GLOBAL,BLOCK,BLOCK});
3727
3728 SECTION("Function scope - setter")
3729 test_scope("var o = { set f(){",{GLOBAL,BLOCK,BLOCK});
3730
3731 SECTION("Function scope - parametric setter")
3732 test_scope("var o = { set [a + 12](){",{GLOBAL,BLOCK,BLOCK});
3733
3734 SECTION("Block scope - regular block")
3735 test_scope("{",{GLOBAL,BLOCK});
3736
3737 SECTION("Block scope - object initializer")
3738 test_scope("o = {",{GLOBAL,BLOCK});
3739
3740 SECTION("Block scope - class")
3741 test_scope("class C{",{GLOBAL,BLOCK});
3742
3743 SECTION("Block scope - class with extends")
3744 test_scope("class C extends A{",{GLOBAL,BLOCK});
3745
3746 SECTION("Block scope - if")
3747 test_scope("if(true){",{GLOBAL,BLOCK});
3748
3749 SECTION("Block scope - single statement if")
3750 test_scope("if(true) func()",{GLOBAL,BLOCK});
3751
3752 SECTION("Block scope - nested multiple single statement ifs")
3753 test_scope("if(a) if(b) if(c) if(d) func()",{GLOBAL,BLOCK});
3754
3755 SECTION("Block scope - nested multiple single statement ifs with newline")
3756 test_scope("if(a)\nif(b)\nif(c)\nif(d)\nfunc()",{GLOBAL,BLOCK});
3757
3758 SECTION("Block scope - else")
3759 test_scope("if(true);else{",{GLOBAL,BLOCK});
3760
3761 SECTION("Block scope - single statement else")
3762 test_scope("if(true);else func()",{GLOBAL,BLOCK});
3763
3764 SECTION("Block scope - for loop")
3765 test_scope("for(;;){",{GLOBAL,BLOCK});
3766
3767 SECTION("Block scope - for loop in range")
3768 test_scope("for(i in range()){",{GLOBAL,BLOCK});
3769
3770 SECTION("Block scope - for loop of iterable")
3771 test_scope("for(i of o){",{GLOBAL,BLOCK});
3772
3773 SECTION("Block scope - for await loop")
3774 test_scope("for await(i of o){",{GLOBAL,BLOCK});
3775
3776 SECTION("Block scope - inside for statement")
3777 test_scope("for(",{GLOBAL,BLOCK});
3778
3779 SECTION("Block scope - inside for statement, after semicolon")
3780 test_scope("for(;",{GLOBAL,BLOCK});
3781
3782 SECTION("Block scope - single statement for")
3783 test_scope("for(;;) func()",{GLOBAL,BLOCK});
3784
3785 SECTION("Block scope - for nested in single line conditional")
3786 test_scope("if(true) for(;;) a++",{GLOBAL,BLOCK});
3787
3788 SECTION("Block scope - while")
3789 test_scope("while(true){",{GLOBAL,BLOCK});
3790
3791 SECTION("Block scope - single statement while")
3792 test_scope("while(true) func()",{GLOBAL,BLOCK});
3793
3794 SECTION("Block scope - do-while")
3795 test_scope("do{",{GLOBAL,BLOCK});
3796
3797 SECTION("Block scope - single statement do-while")
3798 test_scope("do func()",{GLOBAL,BLOCK});
3799
3800 SECTION("Block scope - try")
3801 test_scope("try{",{GLOBAL,BLOCK});
3802
3803 SECTION("Block scope - catch")
3804 test_scope("try{}catch(e){",{GLOBAL,BLOCK});
3805
3806 SECTION("Block scope - catch exception declaration")
3807 test_scope("try{}catch(",{GLOBAL,BLOCK});
3808
3809 SECTION("Block scope - finally")
3810 test_scope("try{}finally{",{GLOBAL,BLOCK});
3811
3812 SECTION("Block scope - nested object - named")
3813 test_scope("var o = {s:{",{GLOBAL,BLOCK,BLOCK});
3814
3815 SECTION("Block scope - nested object - keyword named")
3816 test_scope("var o = {let:{",{GLOBAL,BLOCK,BLOCK});
3817
3818 SECTION("Block scope - nested object - 'get' named")
3819 test_scope("var o = {get:{",{GLOBAL,BLOCK,BLOCK});
3820
3821 SECTION("Block scope - nested object - parametric")
3822 test_scope("var o = {[a+12]:{",{GLOBAL,BLOCK,BLOCK});
3823 }
3824
3825 TEST_CASE("Scope tracking - closing","[JSNormalizer]")
3826 {
3827
3828 SECTION("Function scope - named function")
3829 test_scope("function f(){}",{GLOBAL});
3830
3831 SECTION("Function scope - anonymous function")
3832 test_scope("var f = function(){}",{GLOBAL});
3833
3834 SECTION("Function scope - arrow function")
3835 test_scope("var f = (a,b)=>{}",{GLOBAL});
3836
3837 SECTION("Function scope - arrow function without scope")
3838 test_scope("var f = (a,b)=>a;",{GLOBAL});
3839
3840 SECTION("Function scope - arrow function as a function parameter")
3841 test_scope("console.log(a=>c)",{GLOBAL});
3842
3843 SECTION("Function scope - method")
3844 test_scope("var o = { f(){}",{GLOBAL,BLOCK});
3845
3846 SECTION("Function scope - keyword name method")
3847 test_scope("var o = { let(){}",{GLOBAL,BLOCK});
3848
3849 SECTION("Function scope - expression method")
3850 test_scope("var o = { [a + 12](){}",{GLOBAL,BLOCK});
3851
3852 SECTION("Function scope - method as anonymous function")
3853 test_scope("var o = { f: function(){}",{GLOBAL,BLOCK});
3854
3855 SECTION("Function scope - keyword name method as anonymous function")
3856 test_scope("var o = { let: function(){}",{GLOBAL,BLOCK});
3857
3858 SECTION("Function scope - expression method as anonymous function")
3859 test_scope("var o = { [a + 12]: function(){}",{GLOBAL,BLOCK});
3860
3861 SECTION("Function scope - getter")
3862 test_scope("var o = { get f(){}",{GLOBAL,BLOCK});
3863
3864 SECTION("Function scope - parametric getter")
3865 test_scope("var o = { get [a + 12](){}",{GLOBAL,BLOCK});
3866
3867 SECTION("Function scope - setter")
3868 test_scope("var o = { set f(){}",{GLOBAL,BLOCK});
3869
3870 SECTION("Function scope - parametric setter")
3871 test_scope("var o = { set [a + 12](){}",{GLOBAL,BLOCK});
3872
3873 SECTION("Block scope - regular block")
3874 test_scope("{}",{GLOBAL});
3875
3876 SECTION("Block scope - object initializer")
3877 test_scope("o = {}",{GLOBAL});
3878
3879 SECTION("Block scope - class")
3880 test_scope("class C{}",{GLOBAL});
3881
3882 SECTION("Block scope - class with extends")
3883 test_scope("class C extends A{}",{GLOBAL});
3884
3885 SECTION("Block scope - if")
3886 test_scope("if(true){}",{GLOBAL});
3887
3888 SECTION("Block scope - single statement if")
3889 test_scope("if(true);",{GLOBAL});
3890
3891 SECTION("Block scope - single statement if, semicolon group terminated")
3892 test_scope("if(true)\na++\nreturn",{GLOBAL});
3893
3894 SECTION("Block scope - nested multiple single statement ifs")
3895 test_scope("if(a) if(b) if(c) if(d) func();",{GLOBAL});
3896
3897 SECTION("Block scope - nested multiple single statement ifs with newline")
3898 test_scope("if(a)\nif(b)\nif(c)\nif(d)\nfunc()\nfunc()",{GLOBAL});
3899
3900 SECTION("Block scope - else")
3901 test_scope("if(true);else{}",{GLOBAL});
3902
3903 SECTION("Block scope - single statement else")
3904 test_scope("if(true);else;",{GLOBAL});
3905
3906 SECTION("Block scope - for loop")
3907 test_scope("for(;;){}",{GLOBAL});
3908
3909 SECTION("Block scope - for loop in range")
3910 test_scope("for(i in range()){}",{GLOBAL});
3911
3912 SECTION("Block scope - for loop of iterable")
3913 test_scope("for(i of o){}",{GLOBAL});
3914
3915 SECTION("Block scope - for await loop")
3916 test_scope("for await(i of o){}",{GLOBAL});
3917
3918 SECTION("Block scope - single statement for")
3919 test_scope("for(;;);",{GLOBAL});
3920
3921 SECTION("Block scope - while")
3922 test_scope("while(true){}",{GLOBAL});
3923
3924 SECTION("Block scope - single statement while")
3925 test_scope("while(true);",{GLOBAL});
3926
3927 SECTION("Block scope - do-while")
3928 test_scope("do{}while(",{GLOBAL, BLOCK});
3929
3930 SECTION("Block scope - single statement do-while")
3931 test_scope("do;while(",{GLOBAL, BLOCK});
3932
3933 SECTION("Block scope - try")
3934 test_scope("try{}",{GLOBAL});
3935
3936 SECTION("Block scope - catch")
3937 test_scope("try{}catch(e){}",{GLOBAL});
3938
3939 SECTION("Block scope - finally")
3940 test_scope("try{}finally{}",{GLOBAL});
3941
3942 SECTION("Block scope - nested object - named")
3943 test_scope("var o = {s:{}",{GLOBAL,BLOCK});
3944
3945 SECTION("Block scope - nested object - keyword named")
3946 test_scope("var o = {let:{}",{GLOBAL,BLOCK});
3947
3948 SECTION("Block scope - nested object - parametric")
3949 test_scope("var o = {[a+12]:{}",{GLOBAL,BLOCK});
3950
3951 SECTION("Block scope - advanced automatic semicolon insertion")
3952 test_scope(
3953 "var\na\n=\n0\n\n" // var a=0;
3954 "for\n(\nlet\na\n=\n0\na\n<\n5\n++\na\n)\na\n+=\n2\n\n" // for (let a = 0;a<5;++a) a+=2;
3955 "do\nlet\na\n=\n0\nwhile\n(\na\n<\n5\n)\n\n" // do let a=0; while (a < 5);
3956 "++\na\n\n" // ++a;
3957 "while\n(a\n<\n5\n)\na\n+=\n2\n\n" // while (a<5) a+=2;
3958 "if\n(\ntrue\n)\nlet\na\n=\n0\n\n" // if (true) let a=0;
3959 "else\nlet\na\n=\n0\n\na;", // else let a=0;a;
3960 {GLOBAL}
3961 );
3962
3963 SECTION("Block scope - inline block in the end of outer scope")
3964 test_scope("function() { if (true)\nfor ( ; ; ) a = 2 }", {GLOBAL});
3965 }
3966
3967 TEST_CASE("Scope tracking - over multiple PDU","[JSNormalizer]")
3968 {
3969 // Every line represents a PDU. Each pdu has input buffer, expected script
3970 // and expected scope stack, written in that order
3971 SECTION("general - variable extension")
3972 test_normalization({
3973 {"long_", "var_0000", {GLOBAL}},
3974 {"variable", "var_0001", {GLOBAL}}
3975 //FIXIT-E: if variable index will be preserved across PDUs, second pdu expected
3976 // will be "var_0000"
3977 });
3978
3979 SECTION("general - variable extension: ignored identifier to a regular one")
3980 test_normalization({
3981 {"console", "console", {GLOBAL}},
3982 {"Writer", "var_0000", {GLOBAL}}
3983 });
3984
3985 SECTION("general - variable extension: a regular identifier to ignored one")
3986 test_normalization({
3987 {"con", "var_0000", {GLOBAL}},
3988 {"sole", "console", {GLOBAL}}
3989 });
3990
3991 SECTION("general - variable extension that overwrites existing variable")
3992 test_normalization({
3993 {"a, b, an", "var_0000,var_0001,var_0002", {GLOBAL}},
3994 {"other = a", "var_0000,var_0001,var_0003=var_0000", {GLOBAL}}
3995 });
3996
3997 SECTION("general - variable extension that overwrites existing variable inside inner scope")
3998 test_normalization({
3999 {"f(a, x=>{var an", "var_0000(var_0001,var_0002=>{var var_0003", {GLOBAL,FUNCTION}},
4000 {"other = a})", "var_0000(var_0001,var_0002=>{var var_0004=var_0001})", {GLOBAL}}
4001 });
4002
4003 SECTION("block scope - basic open")
4004 test_normalization({
4005 {"{", "{", {GLOBAL, BLOCK}},
4006 {"var", "{var", {GLOBAL, BLOCK}}
4007 });
4008
4009 SECTION("block scope - basic close")
4010 test_normalization({
4011 {"{", "{", {GLOBAL, BLOCK}},
4012 {"}", "{}", {GLOBAL}}
4013 });
4014
4015 SECTION("block scope - open outside cross-PDU states")
4016 test_normalization({
4017 {"{[1,2,3,4,5,6,7,8]", "{[1,2,3,4,5,6,7,8]", {GLOBAL, BLOCK}},
4018 {"}", "{[1,2,3,4,5,6,7,8]}", {GLOBAL}}
4019 });
4020
4021 SECTION("block scope - closing brace in a string")
4022 test_normalization({
4023 {"{[1,2,3,4,5,6,7,'}']", "{[1,2,3,4,5,6,7,'}']", {GLOBAL, BLOCK}},
4024 {"}", "{[1,2,3,4,5,6,7,'}']}", {GLOBAL}}
4025 });
4026
4027 SECTION("block scope - for keyword split")
4028 test_normalization({
4029 {"fin", "var_0000", {GLOBAL}},
4030 {"ally {", "finally{", {GLOBAL, BLOCK}}
4031 });
4032
4033 SECTION("block scope - between 'for' and '('")
4034 test_normalization({
4035 {"for", "for", {GLOBAL, BLOCK}},
4036 {"(", "for(", {GLOBAL, BLOCK}}
4037 });
4038
4039 SECTION("block scope - fake 'for'")
4040 test_normalization({
4041 {"for", "for", {GLOBAL, BLOCK}},
4042 {"k", "var_0000", {GLOBAL}}
4043 });
4044
4045 SECTION("block scope - inside for-loop parentheses")
4046 test_normalization({
4047 {"for(;;", "for(;;", {GLOBAL, BLOCK}},
4048 {");", "for(;;);", {GLOBAL}}
4049 });
4050
4051 SECTION("block scope - between for-loop parentheses and code block")
4052 test_normalization({
4053 {"for(;;)", "for(;;)", {GLOBAL, BLOCK}},
4054 {"{}", "for(;;){}", {GLOBAL}}
4055 });
4056
4057 SECTION("function scope: split in 'function'")
4058 test_normalization({
4059 {"func", "var_0000", {GLOBAL}},
4060 {"tion(", "function(", {GLOBAL,FUNCTION}}
4061 });
4062
4063 SECTION("function scope: fake function")
4064 test_normalization({
4065 {"function", "function", {GLOBAL}},
4066 {"al(", "var_0000(", {GLOBAL}}
4067 });
4068
4069 SECTION("function scope: split inside string literal")
4070 test_normalization({
4071 {"`$$$$$$$$function", "`$$$$$$$$function", {GLOBAL}},
4072 {"(){a = 0", "`$$$$$$$$function(){a = 0", {GLOBAL}}
4073 });
4074
4075 SECTION("function scope: inside parameters")
4076 test_normalization({
4077 {"function(", "function(", {GLOBAL, FUNCTION}},
4078 {")", "function()", {GLOBAL,FUNCTION}}
4079 });
4080
4081 SECTION("function scope: between parameters and body")
4082 test_normalization({
4083 {"function()", "function()", {GLOBAL, FUNCTION}},
4084 {"{", "function(){", {GLOBAL,FUNCTION}}
4085 });
4086
4087 SECTION("function scope: inside code")
4088 test_normalization({
4089 {"function(){", "function(){", {GLOBAL, FUNCTION}},
4090 {"}", "function(){}", {GLOBAL}}
4091 });
4092
4093 SECTION("object initializer: basic")
4094 test_normalization({
4095 {"var o = {", "var var_0000={", {GLOBAL, BLOCK}},
4096 {"}", "var var_0000={}", {GLOBAL}}
4097 });
4098
4099 SECTION("false var keyword")
4100 test_normalization({
4101 {"var var_a; function(){ var", "var var_0000;function(){var", {GLOBAL, FUNCTION}},
4102 {"_a; }", "var var_0000;function(){var_0000;}", {GLOBAL}}
4103 });
4104
4105 SECTION("false let keyword")
4106 test_normalization({
4107 {"var let_a; function(){ let", "var var_0000;function(){let", {GLOBAL, FUNCTION}},
4108 {"_a; }", "var var_0000;function(){var_0000;}", {GLOBAL}}
4109 });
4110
4111 SECTION("false const keyword")
4112 test_normalization({
4113 {"var const_a; function(){ const", "var var_0000;function(){const", {GLOBAL, FUNCTION}},
4114 {"_a; }", "var var_0000;function(){var_0000;}", {GLOBAL}}
4115 });
4116
4117 SECTION("false class keyword")
4118 test_normalization({
4119 {"var a; class", "var var_0000;class", {GLOBAL}},
4120 {"_a; { a }", "var var_0000;var_0001;{var_0000}", {GLOBAL}}
4121 });
4122 }
4123
4124 TEST_CASE("Scope tracking - error handling", "[JSNormalizer]")
4125 {
4126 SECTION("not identifier after var keyword")
4127 test_normalization_bad(
4128 "var +;",
4129 "var",
4130 JSTokenizer::BAD_TOKEN
4131 );
4132
4133 SECTION("not identifier after let keyword")
4134 test_normalization_bad(
4135 "let class;",
4136 "let",
4137 JSTokenizer::BAD_TOKEN
4138 );
4139
4140 SECTION("not identifier after const keyword")
4141 test_normalization_bad(
4142 "const 1;",
4143 "const",
4144 JSTokenizer::BAD_TOKEN
4145 );
4146
4147 SECTION("scope mismatch")
4148 test_normalization_bad(
4149 "function f() { if (true) } }",
4150 "function var_0000(){if(true)}",
4151 JSTokenizer::WRONG_CLOSING_SYMBOL
4152 );
4153
4154 SECTION("scope mismatch with code block")
4155 test_normalization_bad(
4156 "{ { function } }",
4157 "{{function",
4158 JSTokenizer::WRONG_CLOSING_SYMBOL
4159 );
4160
4161 SECTION("scope nesting overflow")
4162 {
4163 const char src[] = "function() { if (true) { } }";
4164 const char exp[] = "function(){if";
4165 uint32_t scope_depth = 2;
4166
4167 JSIdentifierCtx ident_ctx(norm_depth, scope_depth, s_ignored_ids);
4168 JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
4169 auto ret = normalizer.normalize(src, strlen(src));
4170 std::string dst(normalizer.get_script(), normalizer.script_size());
4171
4172 CHECK(ret == JSTokenizer::SCOPE_NESTING_OVERFLOW);
4173 CHECK(dst == exp);
4174 }
4175 }
4176
4177 #endif // CATCH_TEST_BUILD
4178
4179 // Benchmark tests
4180
4181 #ifdef BENCHMARK_TEST
4182
4183 static constexpr const char* s_closing_tag = "</script>";
4184
make_input(const char * begin,const char * mid,const char * end,size_t len)4185 static const std::string make_input(const char* begin, const char* mid,
4186 const char* end, size_t len)
4187 {
4188 std::string s(begin);
4189 int fill = (len - strlen(begin) - strlen(end) - strlen(s_closing_tag)) / strlen(mid);
4190 for (int i = 0; i < fill; ++i)
4191 s.append(mid);
4192 s.append(end);
4193 s.append(s_closing_tag);
4194 return s;
4195 }
4196
make_input_repeat(const char * pattern,size_t depth)4197 static const std::string make_input_repeat(const char* pattern, size_t depth)
4198 {
4199 std::string s;
4200 size_t fill = (depth - strlen(s_closing_tag))/strlen(pattern);
4201 for (size_t it = 0; it < fill; ++it)
4202 s.append(pattern);
4203
4204 s.append(s_closing_tag);
4205 return s;
4206 }
4207
norm_ret(JSNormalizer & normalizer,const std::string & input)4208 static JSTokenizer::JSRet norm_ret(JSNormalizer& normalizer, const std::string& input)
4209 {
4210 normalizer.rewind_output();
4211 return normalizer.normalize(input.c_str(), input.size());
4212 }
4213
4214 TEST_CASE("JS Normalizer, literals by 8 K", "[JSNormalizer]")
4215 {
4216 JSIdentifierCtxStub ident_ctx;
4217 JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_bracket_depth);
4218 char dst[DEPTH];
4219
4220 constexpr size_t size = 1 << 13;
4221
4222 auto data_pl = make_input("", ".", "", size);
4223 auto data_ws = make_input("", " ", "", size);
4224 auto data_bc = make_input("/*", " ", "*/", size);
4225 auto data_dq = make_input("\"", " ", "\"", size);
4226
4227 BENCHMARK("memcpy()")
4228 {
4229 return memcpy(dst, data_pl.c_str(), data_pl.size());
4230 };
4231
4232 REQUIRE(norm_ret(normalizer, data_ws) == JSTokenizer::SCRIPT_ENDED);
4233 BENCHMARK("whitespaces")
4234 {
4235 normalizer.rewind_output();
4236 return normalizer.normalize(data_ws.c_str(), data_ws.size());
4237 };
4238
4239 REQUIRE(norm_ret(normalizer, data_bc) == JSTokenizer::SCRIPT_ENDED);
4240 BENCHMARK("block comment")
4241 {
4242 normalizer.rewind_output();
4243 return normalizer.normalize(data_bc.c_str(), data_bc.size());
4244 };
4245
4246 REQUIRE(norm_ret(normalizer, data_dq) == JSTokenizer::SCRIPT_ENDED);
4247 BENCHMARK("double quotes string")
4248 {
4249 normalizer.rewind_output();
4250 return normalizer.normalize(data_dq.c_str(), data_dq.size());
4251 };
4252 }
4253
4254 TEST_CASE("JS Normalizer, literals by 64 K", "[JSNormalizer]")
4255 {
4256 JSIdentifierCtxStub ident_ctx;
4257 JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_scope_depth);
4258 char dst[DEPTH];
4259
4260 constexpr size_t size = 1 << 16;
4261
4262 auto data_pl = make_input("", ".", "", size);
4263 auto data_ws = make_input("", " ", "", size);
4264 auto data_bc = make_input("/*", " ", "*/", size);
4265 auto data_dq = make_input("\"", " ", "\"", size);
4266
4267 BENCHMARK("memcpy()")
4268 {
4269 return memcpy(dst, data_pl.c_str(), data_pl.size());
4270 };
4271
4272 REQUIRE(norm_ret(normalizer, data_ws) == JSTokenizer::SCRIPT_ENDED);
4273 BENCHMARK("whitespaces")
4274 {
4275 normalizer.rewind_output();
4276 return normalizer.normalize(data_ws.c_str(), data_ws.size());
4277 };
4278
4279 REQUIRE(norm_ret(normalizer, data_bc) == JSTokenizer::SCRIPT_ENDED);
4280 BENCHMARK("block comment")
4281 {
4282 normalizer.rewind_output();
4283 return normalizer.normalize(data_bc.c_str(), data_bc.size());
4284 };
4285
4286 REQUIRE(norm_ret(normalizer, data_dq) == JSTokenizer::SCRIPT_ENDED);
4287 BENCHMARK("double quotes string")
4288 {
4289 normalizer.rewind_output();
4290 return normalizer.normalize(data_dq.c_str(), data_dq.size());
4291 };
4292 }
4293
4294 TEST_CASE("JS Normalizer, id normalization", "[JSNormalizer]")
4295 {
4296 // around 11 000 identifiers
4297 std::string input;
4298 for (int it = 0; it < DEPTH; ++it)
4299 input.append("n" + std::to_string(it) + " ");
4300
4301 input.resize(DEPTH - strlen(s_closing_tag));
4302 input.append(s_closing_tag, strlen(s_closing_tag));
4303
4304 JSIdentifierCtxStub ident_ctx_mock;
4305 JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth,
4306 max_template_nesting, max_bracket_depth);
4307
4308 REQUIRE(norm_ret(normalizer_wo_ident, input) == JSTokenizer::SCRIPT_ENDED);
4309 BENCHMARK("without substitution")
4310 {
4311 normalizer_wo_ident.rewind_output();
4312 return normalizer_wo_ident.normalize(input.c_str(), input.size());
4313 };
4314
4315 const std::unordered_set<std::string> ids{};
4316 JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, ids);
4317 JSNormalizer normalizer_w_ident(ident_ctx, unlim_depth, max_template_nesting, max_bracket_depth);
4318
4319 REQUIRE(norm_ret(normalizer_w_ident, input) == JSTokenizer::SCRIPT_ENDED);
4320 BENCHMARK("with substitution")
4321 {
4322 normalizer_w_ident.rewind_output();
4323 return normalizer_w_ident.normalize(input.c_str(), input.size());
4324 };
4325
4326 const std::unordered_set<std::string> ids_n { "n" };
4327 JSIdentifierCtx ident_ctx_ids_n(norm_depth, max_scope_depth, ids_n);
4328 JSNormalizer normalizer_iids(ident_ctx_ids_n, unlim_depth,
4329 max_template_nesting, max_bracket_depth);
4330
4331 REQUIRE(norm_ret(normalizer_iids, input) == JSTokenizer::SCRIPT_ENDED);
4332 BENCHMARK("with ignored identifiers")
4333 {
4334 normalizer_iids.rewind_output();
4335 return normalizer_iids.normalize(input.c_str(), input.size());
4336 };
4337 }
4338
4339 TEST_CASE("JS Normalizer, scope tracking", "[JSNormalizer]")
4340 {
4341 constexpr uint32_t depth = 65535;
4342 JSIdentifierCtxStub ident_ctx;
4343 JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, depth);
4344
4345 auto src_ws = make_input("", " ", "", depth);
4346 auto src_brace_rep = make_input_repeat("{}", depth);
4347 auto src_paren_rep = make_input_repeat("()", depth);
4348 auto src_bracket_rep = make_input_repeat("[]", depth);
4349
4350 REQUIRE(norm_ret(normalizer, src_ws) == JSTokenizer::SCRIPT_ENDED);
4351 BENCHMARK("whitespaces")
4352 {
4353 normalizer.rewind_output();
4354 return normalizer.normalize(src_ws.c_str(), src_ws.size());
4355 };
4356
4357 REQUIRE(norm_ret(normalizer, src_brace_rep) == JSTokenizer::SCRIPT_ENDED);
4358 BENCHMARK("...{}{}{}...")
4359 {
4360 normalizer.rewind_output();
4361 return normalizer.normalize(src_brace_rep.c_str(), src_brace_rep.size());
4362 };
4363
4364 REQUIRE(norm_ret(normalizer, src_paren_rep) == JSTokenizer::SCRIPT_ENDED);
4365 BENCHMARK("...()()()...")
4366 {
4367 normalizer.rewind_output();
4368 return normalizer.normalize(src_paren_rep.c_str(), src_paren_rep.size());
4369 };
4370
4371 REQUIRE(norm_ret(normalizer, src_bracket_rep) == JSTokenizer::SCRIPT_ENDED);
4372 BENCHMARK("...[][][]...")
4373 {
4374 normalizer.rewind_output();
4375 return normalizer.normalize(src_bracket_rep.c_str(), src_bracket_rep.size());
4376 };
4377 }
4378
4379 TEST_CASE("JS Normalizer, automatic semicolon", "[JSNormalizer]")
4380 {
4381 auto w_semicolons = make_input("", "a;\n", "", depth);
4382 auto wo_semicolons = make_input("", "a \n", "", depth);
4383 const char* src_w_semicolons = w_semicolons.c_str();
4384 const char* src_wo_semicolons = wo_semicolons.c_str();
4385 size_t src_len = w_semicolons.size();
4386
4387 JSIdentifierCtxStub ident_ctx_mock;
4388 JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth, max_template_nesting, depth);
4389
4390 REQUIRE(norm_ret(normalizer_wo_ident, w_semicolons) == JSTokenizer::SCRIPT_ENDED);
4391 BENCHMARK("without semicolon insertion")
4392 {
4393 normalizer_wo_ident.rewind_output();
4394 return normalizer_wo_ident.normalize(src_w_semicolons, src_len);
4395 };
4396
4397 REQUIRE(norm_ret(normalizer_wo_ident, wo_semicolons) == JSTokenizer::SCRIPT_ENDED);
4398 BENCHMARK("with semicolon insertion")
4399 {
4400 normalizer_wo_ident.rewind_output();
4401 return normalizer_wo_ident.normalize(src_wo_semicolons, src_len);
4402 };
4403 }
4404 #endif // BENCHMARK_TEST
4405