1 //--------------------------------------------------------------------------
2 // Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
3 //
4 // This program is free software; you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License Version 2 as published
6 // by the Free Software Foundation.  You may not use, modify or distribute
7 // this program under any other version of the GNU General Public License.
8 //
9 // This program is distributed in the hope that it will be useful, but
10 // WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License along
15 // with this program; if not, write to the Free Software Foundation, Inc.,
16 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 //--------------------------------------------------------------------------
18 // js_normalizer_test.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include <cstring>
25 
26 #include "catch/catch.hpp"
27 
28 #include "utils/js_identifier_ctx.h"
29 #include "utils/js_normalizer.h"
30 #include "utils/test/js_test_utils.h"
31 
32 using namespace snort;
33 
34 // Unit tests
35 
36 #ifdef CATCH_TEST_BUILD
37 
38 #define DST_SIZE 512
39 
40 #define NORMALIZE(src)                                             \
41     JSIdentifierCtxStub ident_ctx;                                 \
42     JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
43     auto ret = norm.normalize(src, sizeof(src));                   \
44     const char* ptr = norm.get_src_next();                         \
45     int act_len = norm.script_size();                              \
46     const char* dst = norm.take_script();
47 
48 #define VALIDATE(src, expected)                 \
49     CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
50     CHECK((ptr - src) == sizeof(src));          \
51     CHECK(act_len == sizeof(expected) - 1);     \
52     CHECK(!memcmp(dst, expected, act_len));     \
53     delete[] dst;
54 
55 #define VALIDATE_FAIL(src, expected, ret_code, ptr_offset) \
56     CHECK(ret == ret_code);                                \
57     CHECK((ptr - src) == ptr_offset);                      \
58     CHECK(act_len == sizeof(expected) - 1);                \
59     CHECK(!memcmp(dst, expected, act_len));                \
60     delete[] dst;
61 
62 
63 #define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \
64     {                                                                 \
65         JSIdentifierCtxStub ident_ctx;                                \
66         JSNormalizer norm(ident_ctx, depth, max_template_nesting, max_bracket_depth); \
67         ret = norm.normalize(src, src_len);                           \
68         ptr = norm.get_src_next();                                    \
69         len = norm.script_size();                                     \
70         const char* dptr = norm.get_script();                         \
71         REQUIRE(len == dst_len);                                      \
72         memcpy(dst, dptr, dst_len);                                   \
73     }
74 
75 #define DO(src, slen, dst, dlen)                            \
76     {                                                       \
77         auto ret = norm.normalize(src, slen);               \
78         CHECK(ret == JSTokenizer::SCRIPT_CONTINUE);         \
79         auto nsrc = norm.get_src_next();                    \
80         int act_len = norm.script_size();                   \
81         const char* ptr = norm.take_script();               \
82         REQUIRE(nsrc - src == slen);                        \
83         REQUIRE(act_len == dlen);                           \
84         memcpy(dst, ptr, dlen);                             \
85         delete[] ptr;                                       \
86     }
87 
88 #define TRY(src, slen, dst, dlen, rexp)                     \
89     {                                                       \
90         auto ret = norm.normalize(src, slen);               \
91         CHECK(ret == rexp);                                 \
92         int act_len = norm.script_size();                   \
93         const char* ptr = norm.get_script();                \
94         REQUIRE(act_len == dlen);                           \
95         memcpy(dst, ptr, dlen);                             \
96     }
97 
98 #define CLOSE()                                                         \
99     {                                                                   \
100         const char end[] = "</script>";                                 \
101         auto ret = norm.normalize(end, sizeof(end) - 1);                \
102         CHECK(ret == JSTokenizer::SCRIPT_ENDED);                        \
103     }
104 
105 #define NORMALIZE_S(src1, exp1)                                     \
106     {                                                               \
107         char dst1[sizeof(exp1)];                                    \
108                                                                     \
109         JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); \
110         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
111                                                                     \
112         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
113         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
114                                                                     \
115         CLOSE();                                                    \
116     }
117 
118 #define NORMALIZE_T(src1, src2, exp1, exp2)                         \
119     {                                                               \
120         char dst1[sizeof(exp1)];                                    \
121         char dst2[sizeof(exp2)];                                    \
122                                                                     \
123         JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); \
124         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
125                                                                     \
126         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
127         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
128                                                                     \
129         DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1);         \
130         CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));               \
131                                                                     \
132         CLOSE();                                                    \
133     }
134 
135 #define NORMALIZE_1(src1, exp1)                                     \
136     {                                                               \
137         char dst1[sizeof(exp1)];                                    \
138                                                                     \
139         JSIdentifierCtxStub ident_ctx;                              \
140         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
141                                                                     \
142         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
143         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
144                                                                     \
145         CLOSE();                                                    \
146     }
147 
148 #define NORMALIZE_2(src1, src2, exp1, exp2)                         \
149     {                                                               \
150         char dst1[sizeof(exp1)];                                    \
151         char dst2[sizeof(exp2)];                                    \
152                                                                     \
153         JSIdentifierCtxStub ident_ctx;                              \
154         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
155                                                                     \
156         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
157         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
158                                                                     \
159         DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1);         \
160         CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));               \
161                                                                     \
162         CLOSE();                                                    \
163     }
164 
165 #define NORMALIZE_3(src1, src2, src3, exp1, exp2, exp3)             \
166     {                                                               \
167         char dst1[sizeof(exp1)];                                    \
168         char dst2[sizeof(exp2)];                                    \
169         char dst3[sizeof(exp3)];                                    \
170                                                                     \
171         JSIdentifierCtxStub ident_ctx;                              \
172         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
173                                                                     \
174         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
175         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
176                                                                     \
177         DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1);         \
178         CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));               \
179                                                                     \
180         DO(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1);         \
181         CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1));               \
182                                                                     \
183         CLOSE();                                                    \
184     }
185 
186 #define NORM_BAD_1(src1, exp1, code)                                \
187     {                                                               \
188         char dst1[sizeof(exp1)];                                    \
189                                                                     \
190         JSIdentifierCtxStub ident_ctx;                              \
191         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
192                                                                     \
193         TRY(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1, code);  \
194         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
195     }
196 
197 #define NORM_BAD_2(src1, src2, exp1, exp2, code)                    \
198     {                                                               \
199         char dst1[sizeof(exp1)];                                    \
200         char dst2[sizeof(exp2)];                                    \
201                                                                     \
202         JSIdentifierCtxStub ident_ctx;                              \
203         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
204                                                                     \
205         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
206         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
207                                                                     \
208         TRY(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1, code);  \
209         CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));               \
210     }
211 
212 #define NORM_BAD_3(src1, src2, src3, exp1, exp2, exp3, code)        \
213     {                                                               \
214         char dst1[sizeof(exp1)];                                    \
215         char dst2[sizeof(exp2)];                                    \
216         char dst3[sizeof(exp3)];                                    \
217                                                                     \
218         JSIdentifierCtxStub ident_ctx;                              \
219         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
220                                                                     \
221         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
222         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
223                                                                     \
224         DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1);         \
225         CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));               \
226                                                                     \
227         TRY(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1, code);  \
228         CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1));               \
229     }
230 
231 #define NORM_LIMITED(limit, src1, src2, exp1, exp2)                     \
232     {                                                                   \
233         char dst1[sizeof(exp1)];                                        \
234         char dst2[sizeof(exp2)];                                        \
235                                                                         \
236         JSIdentifierCtxStub ident_ctx;                                  \
237         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth, limit); \
238                                                                         \
239         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);             \
240         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));                   \
241                                                                         \
242         DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1);             \
243         CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));                   \
244                                                                         \
245         CLOSE();                                                        \
246     }
247 
248 #define NORM_COMBINED_2(src1, src2, exp)                                \
249     {                                                                   \
250         JSIdentifierCtxStub ident_ctx;                                  \
251         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
252                                                                         \
253         auto ret = norm.normalize(src1, sizeof(src1) - 1);              \
254         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
255                                                                         \
256         ret = norm.normalize(src2, sizeof(src2) - 1);                   \
257         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
258                                                                         \
259         const char end[] = "</script>";                                 \
260         ret = norm.normalize(end, sizeof(end) - 1);                     \
261         REQUIRE(ret == JSTokenizer::SCRIPT_ENDED);                      \
262                                                                         \
263         size_t act_len = norm.script_size();                            \
264         REQUIRE(act_len == sizeof(exp) - 1);                            \
265                                                                         \
266         const char* dst = norm.get_script();                            \
267         CHECK(!memcmp(exp, dst, sizeof(exp) - 1));                      \
268     }
269 
270 #define NORM_COMBINED_3(src1, src2, src3, exp)                          \
271     {                                                                   \
272         JSIdentifierCtxStub ident_ctx;                                  \
273         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
274                                                                         \
275         auto ret = norm.normalize(src1, sizeof(src1) - 1);              \
276         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
277                                                                         \
278         ret = norm.normalize(src2, sizeof(src2) - 1);                   \
279         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
280                                                                         \
281         ret = norm.normalize(src3, sizeof(src3) - 1);                   \
282         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
283                                                                         \
284         const char end[] = "</script>";                                 \
285         ret = norm.normalize(end, sizeof(end) - 1);                     \
286         REQUIRE(ret == JSTokenizer::SCRIPT_ENDED);                      \
287                                                                         \
288         size_t act_len = norm.script_size();                            \
289         REQUIRE(act_len == sizeof(exp) - 1);                            \
290                                                                         \
291         const char* dst = norm.get_script();                            \
292         CHECK(!memcmp(exp, dst, sizeof(exp) - 1));                      \
293     }
294 
295 #define NORM_COMBINED_BAD_2(src1, src2, exp, eret)                      \
296     {                                                                   \
297         JSIdentifierCtxStub ident_ctx;                                  \
298         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
299                                                                         \
300         auto ret = norm.normalize(src1, sizeof(src1) - 1);              \
301         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
302                                                                         \
303         ret = norm.normalize(src2, sizeof(src2) - 1);                   \
304         REQUIRE(ret == eret);                                           \
305                                                                         \
306         size_t act_len = norm.script_size();                            \
307         REQUIRE(act_len == sizeof(exp) - 1);                            \
308                                                                         \
309         const char* dst = norm.get_script();                            \
310         CHECK(!memcmp(exp, dst, sizeof(exp) - 1));                      \
311     }
312 
313 #define NORM_COMBINED_BAD_3(src1, src2, src3, exp, eret)                \
314     {                                                                   \
315         JSIdentifierCtxStub ident_ctx;                                  \
316         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
317                                                                         \
318         auto ret = norm.normalize(src1, sizeof(src1) - 1);              \
319         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
320                                                                         \
321         ret = norm.normalize(src2, sizeof(src2) - 1);                   \
322         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
323                                                                         \
324         ret = norm.normalize(src3, sizeof(src3) - 1);                   \
325         REQUIRE(ret == eret);                                           \
326                                                                         \
327         size_t act_len = norm.script_size();                            \
328         REQUIRE(act_len == sizeof(exp) - 1);                            \
329                                                                         \
330         const char* dst = norm.get_script();                            \
331         CHECK(!memcmp(exp, dst, sizeof(exp) - 1));                      \
332     }
333 
334 #define NORM_COMBINED_LIMITED_2(limit, src1, src2, exp)                 \
335     {                                                                   \
336         JSIdentifierCtxStub ident_ctx;                                  \
337         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth, limit); \
338                                                                         \
339         auto ret = norm.normalize(src1, sizeof(src1) - 1);              \
340         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
341                                                                         \
342         ret = norm.normalize(src2, sizeof(src2) - 1);                   \
343         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
344                                                                         \
345         const char end[] = "</script>";                                 \
346         ret = norm.normalize(end, sizeof(end) - 1);                     \
347         REQUIRE(ret == JSTokenizer::SCRIPT_ENDED);                      \
348                                                                         \
349         size_t act_len = norm.script_size();                            \
350         REQUIRE(act_len == sizeof(exp) - 1);                            \
351                                                                         \
352         const char* dst = norm.get_script();                            \
353         CHECK(!memcmp(exp, dst, sizeof(exp) - 1));                      \
354     }
355 
356 #define NORM_COMBINED_S_2(src1, src2, exp)                              \
357     {                                                                   \
358         JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); \
359         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \
360                                                                         \
361         auto ret = norm.normalize(src1, sizeof(src1) - 1);              \
362         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
363                                                                         \
364         ret = norm.normalize(src2, sizeof(src2) - 1);                   \
365         REQUIRE(ret == JSTokenizer::SCRIPT_CONTINUE);                   \
366                                                                         \
367         const char end[] = "</script>";                                 \
368         ret = norm.normalize(end, sizeof(end) - 1);                     \
369         REQUIRE(ret == JSTokenizer::SCRIPT_ENDED);                      \
370                                                                         \
371         size_t act_len = norm.script_size();                            \
372         REQUIRE(act_len == sizeof(exp) - 1);                            \
373                                                                         \
374         const char* dst = norm.get_script();                            \
375         CHECK(!memcmp(exp, dst, sizeof(exp) - 1));                      \
376     }
377 
378 // ClamAV test vectors from: https://github.com/Cisco-Talos/clamav/blob/main/unit_tests/check_jsnorm.c
379 static const char clamav_buf0[] =
380     "function foo(a, b) {\n"
381     "var x = 1.9e2*2*a/ 4.;\n"
382     "var y = 'test\\'tst';//var\n"
383     "x=b[5],/* multiline\nvar z=6;\nsome*some/other**/"
384     "z=x/y;/* multiline oneline */var t=z/a;\n"
385     "z=[test,testi];"
386     "document.writeln('something\\n');}";
387 
388 static const char clamav_expected0[] =
389     "function foo(a,b){var x=1.9e2*2*a/4.;var y='test\\'tst';x=b[5],z=x/y;var t=z/a;"
390     "z=[test,testi];document.writeln('something\\n');}";
391 
392 static const char clamav_buf1[] =
393     "function () { var id\\u1234tx;}";
394 
395 static const char clamav_expected1[] =
396     "function(){var id\u1234tx;}";
397 
398 static const char clamav_buf2[] =
399     "function () { var tst=\"a\"+'bc'+     'd'; }";
400 
401 static const char clamav_expected2[] =
402     "function(){var tst=\"a\"+'bc'+'d';}";
403 
404 static const char clamav_buf3[] =
405     "dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
406 
407 static const char clamav_expected3[] =
408     "dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
409 
410 #define B64 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
411 
412 static char clamav_buf4[] =
413     "qbphzrag.jevgr(harfpncr('%3P%73%63%72%69%70%74%20%6P%61%6R%67%75%61%67%65%3Q%22%6N%61%76%61"
414         "%73%63%72%69%70%74%22%3R%66%75%6R%63%74%69%6S%6R%20%64%46%28%73%29%7O%76%61%72%20%73%31"
415         "%3Q%75%6R%65%73%63%61%70%65%28%73%2R%73%75%62%73%74%72%28%30%2P%73%2R%6P%65%6R%67%74%68"
416         "%2Q%31%29%29%3O%20%76%61%72%20%74%3Q%27%27%3O%66%6S%72%28%69%3Q%30%3O%69%3P%73%31%2R%6P"
417         "%65%6R%67%74%68%3O%69%2O%2O%29%74%2O%3Q%53%74%72%69%6R%67%2R%66%72%6S%6Q%43%68%61%72%43"
418         "%6S%64%65%28%73%31%2R%63%68%61%72%43%6S%64%65%41%74%28%69%29%2Q%73%2R%73%75%62%73%74%72"
419         "%28%73%2R%6P%65%6R%67%74%68%2Q%31%2P%31%29%29%3O%64%6S%63%75%6Q%65%6R%74%2R%77%72%69%74"
420         "%65%28%75%6R%65%73%63%61%70%65%28%74%29%29%3O%7Q%3P%2S%73%63%72%69%70%74%3R'));"
421         "riny(qS('tV%285%3O%285%3Nsdwjl%28585%3N7%28586Q%28585%3N7%3P%7P55l%28585%3N7%3P%28585%3N7"
422         "%28586R%28585%3N8T5%285%3N%285%3P%286R3'));";
423 
424 static char clamav_expected4[] =
425     "qbphzrag.jevgr(harfpncr('%3P%73%63%72%69%70%74%20%6P%61%6R%67%75%61%67%65%3Q%22%6N%61%76%61"
426         "%73%63%72%69%70%74%22%3R%66%75%6R%63%74%69%6S%6R%20%64%46%28%73%29%7O%76%61%72%20%73%31"
427         "%3Q%75%6R%65%73%63%61%70%65%28%73%2R%73%75%62%73%74%72%28%30%2P%73%2R%6P%65%6R%67%74%68"
428         "%2Q%31%29%29%3O%20%76%61%72%20%74%3Q%27%27%3O%66%6S%72%28%69%3Q%30%3O%69%3P%73%31%2R%6P"
429         "%65%6R%67%74%68%3O%69%2O%2O%29%74%2O%3Q%53%74%72%69%6R%67%2R%66%72%6S%6Q%43%68%61%72%43"
430         "%6S%64%65%28%73%31%2R%63%68%61%72%43%6S%64%65%41%74%28%69%29%2Q%73%2R%73%75%62%73%74%72"
431         "%28%73%2R%6P%65%6R%67%74%68%2Q%31%2P%31%29%29%3O%64%6S%63%75%6Q%65%6R%74%2R%77%72%69%74"
432         "%65%28%75%6R%65%73%63%61%70%65%28%74%29%29%3O%7Q%3P%2S%73%63%72%69%70%74%3R'));"
433         "riny(qS('tV%285%3O%285%3Nsdwjl%28585%3N7%28586Q%28585%3N7%3P%7P55l%28585%3N7%3P%28585%3N7"
434         "%28586R%28585%3N8T5%285%3N%285%3P%286R3'));";
435 
436 static char clamav_buf5[] =
437     "shapgvba (c,n,p,x,r,e){}('0(\\'1\\');',2,2,'nyreg|j00g'.fcyvg('|'),0,{});";
438 
439 static const char clamav_expected5[] =
440     "shapgvba(c,n,p,x,r,e){}('0(\\'1\\');',2,2,'nyreg|j00g'.fcyvg('|'),0,{});";
441 
442 static const char clamav_buf6[] =
443     "function $(p,a,c,k,e,d){} something(); $('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
444 
445 static const char clamav_expected6[] =
446     "function $(p,a,c,k,e,d){}something();$('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
447 
448 static const char clamav_buf7[] =
449     "var z=\"tst" B64 "tst\";";
450 
451 static const char clamav_expected7[] =
452     "var z=\"tst" B64 "tst\";";
453 
454 static const char clamav_buf8[] =
455     "var z=\'tst" B64 "tst\';";
456 
457 static const char clamav_expected8[] =
458     "var z=\'tst" B64 "tst\';";
459 
460 static char clamav_buf9[] =
461     "riny(harfpncr('%61%6p%65%72%74%28%27%74%65%73%74%27%29%3o'));";
462 
463 static const char clamav_expected9[] =
464     "riny(harfpncr('%61%6p%65%72%74%28%27%74%65%73%74%27%29%3o'));";
465 
466 static const char clamav_buf10[] =
467     "function $ $() dF(x); function (p,a,c,k,e,r){function $(){}";
468 
469 static const char clamav_expected10[] =
470     "function $ $()dF(x);function(p,a,c,k,e,r){function $(){}";
471 
472 static const char clamav_buf11[] =
473     "var x=123456789 ;";
474 
475 static const char clamav_expected11[] =
476     "var x=123456789;";
477 
478 static const char clamav_buf12[] =
479     "var x='test\\u0000test';";
480 
481 static const char clamav_expected12[] =
482     "var x='test\\u0000test';";
483 
484 static const char clamav_buf13[] =
485     "var x\\s12345";
486 
487 static const char clamav_expected13[] =
488     "var x\\s12345";
489 
490 static const char clamav_buf14[] =
491     "document.write(unescape('test%20test";
492 
493 static const char clamav_expected14[] =
494     "document.write(unescape('test%20test";
495 
496 TEST_CASE("clamav tests", "[JSNormalizer]")
497 {
498     SECTION("test_case_0 - mixed identifiers and comments")
499     {
500         NORMALIZE(clamav_buf0);
501         VALIDATE(clamav_buf0, clamav_expected0);
502     }
503     SECTION("test_case_1 - escaped unicode in identifier")
504     {
505         NORMALIZE(clamav_buf1);
506         VALIDATE(clamav_buf1, clamav_expected1);
507     }
508     SECTION("test_case_2 - accumulated string assignment")
509     {
510         NORMALIZE(clamav_buf2);
511         VALIDATE(clamav_buf2, clamav_expected2);
512     }
513     SECTION("test_case_3 - percent-encoded string")
514     {
515         NORMALIZE(clamav_buf3);
516         VALIDATE(clamav_buf3, clamav_expected3);
517     }
518     SECTION("test_case_4 - percent-encoded string")
519     {
520         NORMALIZE(clamav_buf4);
521         VALIDATE(clamav_buf4, clamav_expected4);
522     }
523     SECTION("test_case_5 - obfuscated script")
524     {
525         NORMALIZE(clamav_buf5);
526         VALIDATE(clamav_buf5, clamav_expected5);
527     }
528     SECTION("test_case_6 - obfuscated script")
529     {
530         NORMALIZE(clamav_buf6);
531         VALIDATE(clamav_buf6, clamav_expected6);
532     }
533     SECTION("test_case_7 - single quotes string")
534     {
535         NORMALIZE(clamav_buf7);
536         VALIDATE(clamav_buf7, clamav_expected7);
537     }
538     SECTION("test_case_8 - double quotes string")
539     {
540         NORMALIZE(clamav_buf8);
541         VALIDATE(clamav_buf8, clamav_expected8);
542     }
543     SECTION("test_case_9 - obfuscated script")
544     {
545         NORMALIZE(clamav_buf9);
546         VALIDATE(clamav_buf9, clamav_expected9);
547     }
548     SECTION("test_case_10 - obfuscated script")
549     {
550         NORMALIZE(clamav_buf10);
551         VALIDATE(clamav_buf10, clamav_expected10);
552     }
553     SECTION("test_case_11 - integer literal")
554     {
555         NORMALIZE(clamav_buf11);
556         VALIDATE(clamav_buf11, clamav_expected11);
557     }
558     SECTION("test_case_12 - escaped unicode in string literal")
559     {
560         NORMALIZE(clamav_buf12);
561         VALIDATE(clamav_buf12, clamav_expected12);
562     }
563     // FIXIT-L this should be revisited
564     SECTION("test_case_13 - invalid escape sequence")
565     {
566         NORMALIZE(clamav_buf13);
567         VALIDATE(clamav_buf13, clamav_expected13);
568     }
569     SECTION("test_case_14 - EOF in the middle of string literal")
570     {
571         NORMALIZE(clamav_buf14);
572         // trailing \0 is included as a part of the string
573         // to utilize available macros we alter the read length
574         act_len -= 1;
575         VALIDATE(clamav_buf14, clamav_expected14);
576     }
577 }
578 
579 // Test vectors for all match patterns
580 static const char all_patterns_buf0[] =
581     "var  \x9\xB\xC\x20\xA0\x8\xA\xD\xEF\xBB\xBF\xE2\x80\xA8\xE2\x80\xA9\n"
582     "  \n\t\r\v  a; \0";
583 
584 static const char all_patterns_expected0[] =
585     "var a;";
586 
587 static const char all_patterns_buf1[] =
588     "<!-- var html_comment = 'comment' ;\n"
589     "var a = 1;// first var\nvar b = 2;  /* second var\nvar foo = 'bar'\n*/"
590     "\nvar c = 3; // third var";
591 
592 static const char all_patterns_expected1[] =
593     "var a=1;var b=2;var c=3;";
594 
595 static const char all_patterns_buf2[] =
596     "{ a } ( a ) [ a ] a >= b a == b a != b a === b a !== b a /= b . ; , "
597     "a < b a > b a <= b a + b- c a * b a % b a ++; --b a << 2 a >> 3 a >>> 4 a & b a | b "
598     "a ^ b ! a a && b a || b ?: a = 2 a += 2 a -= 2 a *= 2 a %= 2 a <<= b a >>= b a >>>= b "
599     "a &= b a|= b a ^= b a/b ~ a";
600 
601 static const char all_patterns_expected2[] =
602     "{a}(a)[a]a>=b a==b a!=b a===b a!==b a/=b.;,a<b a>b a<=b a+b-c a*b "
603     "a%b a++;--b a<<2 a>>3 a>>>4 a&b a|b a^b!a a&&b a||b?:a=2 a+=2 a-=2 a*=2 a%=2 a<<=b "
604     "a>>=b a>>>=b a&=b a|=b a^=b a/b~a";
605 
606 static const char all_patterns_buf3[] =
607     "break case debugger in import protected do else function try "
608     "implements static instanceof new this class let a typeof var a with enum private catch "
609     "continue default extends public finally for if super yield return switch throw const a "
610     "interface void while delete export package";
611 
612 static const char all_patterns_expected3[] =
613     "break case debugger in import protected do else function try "
614     "implements static instanceof new this class let a typeof var a with enum private catch "
615     "continue default extends public finally for if super yield return switch throw const a "
616     "interface void while delete export package";
617 
618 static const char all_patterns_buf4[] =
619     "/regex/g undefined null true false 2 23 2.3 2.23 .2 .02 4. +2 -2 "
620     "+3.3 -3.3 +23 -32 2.3E45 3.E34 -2.3E45 -3.E34 +2.3E45 +3.E34 0x1234 0XFFFF Infinity "
621     "\xE2\x88\x9E NaN \"\" \"double string\" \"d\" '' 'single string' 's' x=/regex/gs "
622     "x=2/2/1 `\ntemplate\n`";
623 
624 static const char all_patterns_expected4[] =
625     "/regex/g undefined null true false 2 23 2.3 2.23 .2 .02 4.+2-2"
626     "+3.3-3.3+23-32 2.3E45 3.E34-2.3E45-3.E34+2.3E45+3.E34 0x1234 0XFFFF Infinity "
627     "\xE2\x88\x9E NaN \"\" \"double string\" \"d\" '' 'single string' 's' x=/regex/gs "
628     "x=2/2/1 `\ntemplate\n`";
629 
630 static const char all_patterns_buf5[] =
631     "$2abc _2abc abc $__$ 肖晗 XÆA12 \\u0041abc \\u00FBdef \\u1234ghi ab\xE2\x80\xA8ww "
632     "ab\xE2\x80\xA9ww ab\xEF\xBB\xBFww ab∞ww 2abc";
633 
634 static const char all_patterns_expected5[] =
635     "$2abc _2abc abc $__$ 肖晗 XÆA12 \u0041abc \u00FBdef \u1234ghi ab;ww "
636     "ab;ww ab ww ab ∞ ww 2 abc";
637 
638 static const char all_patterns_buf6[] =
639     "tag` template\n   ${ a   +   b }   template`";
640 
641 static const char all_patterns_expected6[] =
642     "tag ` template\n   ${a+b}   template`";
643 
644 TEST_CASE("all patterns", "[JSNormalizer]")
645 {
646     SECTION("whitespaces and special characters")
647     {
648         NORMALIZE(all_patterns_buf0);
649         VALIDATE(all_patterns_buf0, all_patterns_expected0);
650     }
651     SECTION("comments")
652     {
653         NORMALIZE(all_patterns_buf1);
654         VALIDATE(all_patterns_buf1, all_patterns_expected1);
655     }
656     SECTION("directives")
657     {
658         const char src0[] = "'use strict'\nvar a = 1;";
659         const char src1[] = "\"use strict\"\nvar a = 1;";
660         const char src2[] = "'use strict';var a = 1;";
661         const char src3[] = "\"use strict\";var a = 1;";
662         const char src4[] = "var a = 1 'use strict';";
663 
664         const char expected0[] = "'use strict';var a=1;";
665         const char expected1[] = "\"use strict\";var a=1;";
666         const char expected2[] = "var a=1 'use strict';";
667 
668         char dst0[sizeof(expected0) - 1];
669         char dst1[sizeof(expected1) - 1];
670         char dst2[sizeof(expected0) - 1];
671         char dst3[sizeof(expected1) - 1];
672         char dst4[sizeof(expected2) - 1];
673 
674         int ret0, ret1, ret2, ret3, ret4;
675         const char *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
676         int act_len0, act_len1, act_len2, act_len3, act_len4;
677 
678         NORMALIZE_L(src0, sizeof(src0), dst0, sizeof(dst0), norm_depth, ret0, ptr0, act_len0);
679         NORMALIZE_L(src1, sizeof(src1), dst1, sizeof(dst1), norm_depth, ret1, ptr1, act_len1);
680         NORMALIZE_L(src2, sizeof(src2), dst2, sizeof(dst2), norm_depth, ret2, ptr2, act_len2);
681         NORMALIZE_L(src3, sizeof(src3), dst3, sizeof(dst3), norm_depth, ret3, ptr3, act_len3);
682         NORMALIZE_L(src4, sizeof(src4), dst4, sizeof(dst4), norm_depth, ret4, ptr4, act_len4);
683 
684         CHECK(ret0 == JSTokenizer::SCRIPT_CONTINUE);
685         CHECK((ptr0 - src0) == sizeof(src0));
686         CHECK(act_len0 == sizeof(expected0) - 1);
687         CHECK(!memcmp(dst0, expected0, act_len0));
688 
689         CHECK(ret1 == JSTokenizer::SCRIPT_CONTINUE);
690         CHECK((ptr1 - src1) == sizeof(src1));
691         CHECK(act_len1 == sizeof(expected1) - 1);
692         CHECK(!memcmp(dst1, expected1, act_len1));
693 
694         CHECK(ret2 == JSTokenizer::SCRIPT_CONTINUE);
695         CHECK((ptr2 - src2) == sizeof(src2));
696         CHECK(act_len2 == sizeof(expected0) - 1);
697         CHECK(!memcmp(dst2, expected0, act_len2));
698 
699         CHECK(ret3 == JSTokenizer::SCRIPT_CONTINUE);
700         CHECK((ptr3 - src3) == sizeof(src3));
701         CHECK(act_len3 == sizeof(expected1) - 1);
702         CHECK(!memcmp(dst3, expected1, act_len3));
703 
704         CHECK(ret4 == JSTokenizer::SCRIPT_CONTINUE);
705         CHECK((ptr4 - src4) == sizeof(src4));
706         CHECK(act_len4 == sizeof(expected2) - 1);
707         CHECK(!memcmp(dst4, expected2, act_len4));
708     }
709     SECTION("punctuators")
710     {
711         NORMALIZE(all_patterns_buf2);
712         VALIDATE(all_patterns_buf2, all_patterns_expected2);
713     }
714     SECTION("keywords")
715     {
716         NORMALIZE(all_patterns_buf3);
717         VALIDATE(all_patterns_buf3, all_patterns_expected3);
718     }
719     SECTION("literals")
720     {
721         NORMALIZE(all_patterns_buf4);
722         VALIDATE(all_patterns_buf4, all_patterns_expected4);
723     }
724     SECTION("identifiers")
725     {
726         NORMALIZE(all_patterns_buf5);
727         VALIDATE(all_patterns_buf5, all_patterns_expected5);
728     }
729     SECTION("template literals")
730     {
731         NORMALIZE(all_patterns_buf6);
732         VALIDATE(all_patterns_buf6, all_patterns_expected6);
733     }
734 }
735 
736 // Test vectors for different syntax cases
737 static const char syntax_cases_buf0[] =
738     "var a;\n"
739     "var b = \"init this    stuff\";\n"
740     "var c = \"Hi\" + \" \" + \"Joe\";\n"
741     "var d = 1 + 2 + \"3\";\n"
742     "var e = [ 2, 3, 5, 8 ];\n"
743     "var f = false;\n"
744     "var g = /( i'm   a  .* regex )/;\n"
745     "var h = function(){};\n"
746     "const PI = 3.14;\n"
747     "var a = 1, b = 2, c = a + b;\n"
748     "let z = 'zzz zz';\n"
749     "var g = null;\n"
750     "var name = { first: \"Jane\", last: \"Doe\" };\n"
751     "var esc = 'I don\\'t \\n know';\n";
752 
753 static const char syntax_cases_expected0[] =
754     "var a;var b=\"init this    stuff\";var c=\"Hi\"+\" \"+\"Joe\";"
755     "var d=1+2+\"3\";var e=[2,3,5,8];var f=false;var g=/( i'm   a  .* regex )/;"
756     "var h=function(){};const PI=3.14;var a=1,b=2,c=a+b;let z='zzz zz';var g=null;"
757     "var name={first:\"Jane\",last:\"Doe\"};var esc='I don\\'t \\n know';";
758 
759 static const char syntax_cases_buf1[] =
760     "a = b + c - d;\n"
761     "a = b * (c / d);\n"
762     "x = 100 % 48;\n"
763     "a ++; b -- ; -- a; ++    b;\n";
764 
765 static const char syntax_cases_expected1[] =
766     "a=b+c-d;a=b*(c/d);x=100%48;a++;b--;--a;++b;";
767 
768 static const char syntax_cases_buf2[] =
769     "!(a == b);\n"
770     "a != b;\n"
771     "typeof a;\n"
772     "x << 2; x >> 3;\n"
773     "a = b;\n"
774     "a == b;\n"
775     "a != b;\n"
776     "a === b;\n"
777     "a !== b;\n"
778     "a < b; a > b;\n"
779     "a <= b;  a >= b;\n"
780     "a += b;\n"
781     "a && b;\n"
782     "a || b;\n";
783 
784 static const char syntax_cases_expected2[] =
785     "!(a==b);a!=b;typeof a;x<<2;x>>3;a=b;a==b;a!=b;a===b;a!==b;a<b;a>b;"
786     "a<=b;a>=b;a+=b;a&&b;a||b;";
787 
788 static const char syntax_cases_buf3[] =
789     "var foo = {\n"
790         "firstFoo: \"FooFirst\",\n"
791         "secondFoo: \"FooSecond\",\n"
792         "thirdFoo: 10,\n"
793         "fourthFoo: 120,\n"
794         "methodFoo : function () {\n"
795             "\treturn this.firstFoo + \" \" + this.secondFoo;\n"
796         "}\n"
797     "};\n";
798 
799 static const char syntax_cases_expected3[] =
800     "var foo={firstFoo:\"FooFirst\",secondFoo:\"FooSecond\","
801     "thirdFoo:10,fourthFoo:120,methodFoo:function(){return this.firstFoo+\" \"+"
802     "this.secondFoo;}};";
803 
804 static const char syntax_cases_buf4[] =
805     "var dogs = [\"Bulldog\", \"Beagle\", \"Labrador\"];\n"
806     "var dogs = new Array(\"Bulldog\", \"Beagle\", \"Labrador\");\n"
807     "\t\t\t\n"
808     "alert( dogs[ 1 ] );\n"
809     "dogs[0] = \"Bull Terrier\";\n"
810     "\n"
811     "for (var i = 0; i < dogs.length; i++) {\n"
812         "console.log(dogs[i]);\n"
813     "}\n\r";
814 
815 static const char syntax_cases_expected4[] =
816     "var dogs=[\"Bulldog\",\"Beagle\",\"Labrador\"];"
817     "var dogs=new Array(\"Bulldog\",\"Beagle\",\"Labrador\");alert(dogs[1]);"
818     "dogs[0]=\"Bull Terrier\";for(var i=0;i<dogs.length;i++){console.log(dogs[i]);}";
819 
820 static const char syntax_cases_buf5[] =
821     "var i = 1;\n"
822     "while (i < 100) {\n"
823         "i *= 2;\n"
824         "document.write(i + \", \");\n"
825     "}\n"
826     "\n"
827     "i = 1;\n"
828     "do {\n"
829         "i *= 2;\n"
830         "document.write(i + \", \");\n"
831     "} while (i < 100)\n"
832     "\n"
833     "for (var i = 0; i < 10; i++) {\n"
834         "if (i == 5) { break; }\n"
835         "document.write(i + \", \");\n"
836     "}\n"
837     "\n"
838     "for (var i = 0; i < 10; i++) {\n"
839         "if (i == 5) { continue; }\n"
840         "document.write(i + \", \");\n"
841     "}\n\r";
842 
843 static const char syntax_cases_expected5[] =
844     "var i=1;while(i<100){i*=2;document.write(i+\", \");}i=1;do{i*=2;"
845     "document.write(i+\", \");}while(i<100);for(var i=0;i<10;i++){if(i==5){break;}"
846     "document.write(i+\", \");}for(var i=0;i<10;i++){if(i==5){continue;}"
847     "document.write(i+\", \");}";
848 
849 static const char syntax_cases_buf6[] =
850     "var n = 1800;\n"
851     "var res;\n"
852     "if ( (n >= 1400) && (n < 1900) ) {\n"
853         "res = \"In range.\";\n"
854     "} else {\n"
855         "res = \"Not in range.\";\n"
856     "}\n"
857     "\n"
858     "var text;\n"
859     "switch ( new Date().getDay() ) {\n"
860         "case 6:\n"
861             "text = \"Saturday\";\n"
862             "break;\n"
863         "case 0:\n"
864             "text = \"Sunday\";\n"
865             "break;\n"
866         "default:\n"
867             "text = \"Whatever\";\n"
868     "}\n\r";
869 
870 static const char syntax_cases_expected6[] =
871     "var n=1800;var res;if((n>=1400)&&(n<1900)){res=\"In range.\";}"
872     "else{res=\"Not in range.\";}var text;switch(new Date().getDay()){case 6:"
873     "text=\"Saturday\";break;case 0:text=\"Sunday\";break;default:text=\"Whatever\";}";
874 
875 static const char syntax_cases_buf7[] =
876     "var x = document.getElementById(\"mynum\").value;\n"
877     "try { \n"
878         "if(x == \"\")  throw \"empty\";\n"
879         "if(isNaN(x)) throw \"not a number\";\n"
880         "x = Number(x);\n"
881         "if(x > 10)   throw \"too high\";\n"
882     "}\n"
883     "catch(err) {\n"
884         "document.write(\"Input is \" + err);\n"
885         "console.error(err);\n"
886     "}\n"
887     "finally {\n"
888         "document.write(\"</br />Done\");\n"
889     "}\n\r";
890 
891 static const char syntax_cases_expected7[] =
892     "var x=document.getElementById(\"mynum\").value;try{if(x==\"\")"
893     "throw \"empty\";if(isNaN(x))throw \"not a number\";x=Number(x);if(x>10)"
894     "throw \"too high\";}catch(err){document.write(\"Input is \"+err);console.error(err);}"
895     "finally{document.write(\"</br />Done\");}";
896 
897 static const char syntax_cases_buf8[] =
898     "function sum (a, b) {\n"
899     "return new Promise(function (resolve, reject) {\n"
900         "setTimeout(function () {\n"
901         "if (typeof a !== \"number\" || typeof b !== \"number\") {\n"
902             "return reject(new TypeError(\"Inputs must be numbers\"));\n"
903         "}\n"
904         "resolve(a + b);\n"
905         "}, 1000);\n"
906     "});\n"
907     "}\n"
908     "\n"
909     "var myPromise = sum(10, 5);\n"
910     "myPromise.then(function (result) {\n"
911         "document.write(\" 10 + 5: \", result);\n"
912         "return sum(null, \"foo\");\n"
913         "}).then(function () {\n"
914         "}).catch(function (err) {\n"
915         "console.error(err);\n"
916     "});\n\r";
917 
918 static const char syntax_cases_expected8[] =
919     "function sum(a,b){return new Promise(function(resolve,reject)"
920     "{setTimeout(function(){if(typeof a!==\"number\"||typeof b!==\"number\"){return "
921     "reject(new TypeError(\"Inputs must be numbers\"));}resolve(a+b);},1000);});}"
922     "var myPromise=sum(10,5);myPromise.then(function(result){"
923     "document.write(\" 10 + 5: \",result);return sum(null,\"foo\");}).then(function(){})"
924     ".catch(function(err){console.error(err);});";
925 
926 static const char syntax_cases_buf9[] =
927     "var a = Math.round( (new Date).getTime()/1E3 );\n"
928     "var b = a.match( /^[0-9a-z-_.]{10,1200}$/i );\n"
929     "var c = a.match( /=\\s*{((.|\\s)*?)};/g ) ;\n\r";
930 
931 static const char syntax_cases_expected9[] =
932     "var a=Math.round((new Date).getTime()/1E3);"
933     "var b=a.match(/^[0-9a-z-_.]{10,1200}$/i);"
934     "var c=a.match(/=\\s*{((.|\\s)*?)};/g);";
935 
936 static const char syntax_cases_buf10[] =
937     "var a = 2\n/ab -cd/";
938 
939 static const char syntax_cases_expected10[] =
940     "var a=2;/ab -cd/";
941 
942 static const char syntax_cases_buf11[] =
943     "var d_str1 = \"\\\\ \" ; var d_str2 = \"abc\\\"def\" ;"
944     "var d_str3 = \"\\\"abc \" ;var s_str1 = '\\\\ ' ; var s_str2 = 'abc\\\'def' ; "
945     "var s_str3 = '\\\'abc ' ;var re_1 = /\\\\ / ; var re_2 = /abc\\/def/ ; "
946     "var re_3 = /\\/abc / ;";
947 
948 static const char syntax_cases_expected11[] =
949     "var d_str1=\"\\\\ \";var d_str2=\"abc\\\"def\";"
950     "var d_str3=\"\\\"abc \";var s_str1='\\\\ ';var s_str2='abc\\\'def';"
951     "var s_str3='\\\'abc ';var re_1=/\\\\ /;var re_2=/abc\\/def/;var re_3=/\\/abc /;";
952 
953 static const char syntax_cases_buf12[] =
954     "var str1 = \"abc\\\n def\" ;"
955     "var str2 = \"abc\\\r\n def\" ;"
956     "var str3 = 'abc\\\n def' ;"
957     "var str4 = 'abc\\\r\n def' ;";
958 
959 static const char syntax_cases_expected12[] =
960     "var str1=\"abc def\";"
961     "var str2=\"abc def\";"
962     "var str3='abc def';"
963     "var str4='abc def';";
964 
965 static const char syntax_cases_buf13[] =
966     "return /regex/i.test( str ) ;";
967 
968 static const char syntax_cases_expected13[] =
969     "return /regex/i.test(str);";
970 
971 static const char syntax_cases_buf14[] =
972     "var a = b+ ++c ;\n"
973     "var a = b++ +c ;\n"
974     "var a = b++ + ++c ;\n"
975     "var a = b- --c ;\n"
976     "var a = b-- -c ;\n"
977     "var a = b-- - --c ;\n"
978     "var a = b++ - ++c ;\n"
979     "var a = b * -c ;\n"
980     "var a = b % -c ;\n"
981     "var a = b + -c ;";
982 
983 static const char syntax_cases_expected14[] =
984     "var a=b+ ++c;"
985     "var a=b++ +c;"
986     "var a=b++ + ++c;"
987     "var a=b- --c;"
988     "var a=b-- -c;"
989     "var a=b-- - --c;"
990     "var a=b++ - ++c;"
991     "var a=b* -c;"
992     "var a=b% -c;"
993     "var a=b+ -c;";
994 
995 // In the following cases:
996 //   a reading cursor will be after the literal
997 //   a malformed literal is not present in the output
998 
999 static const char syntax_cases_buf15[] =
1000     "var invalid_str = 'abc\u2028 def' ;\n";
1001 
1002 static const char syntax_cases_expected15[] =
1003     "var invalid_str='abc";
1004 
1005 static const char syntax_cases_buf16[] =
1006     "var invalid_str = \"abc\n def\"";
1007 
1008 static const char syntax_cases_expected16[] =
1009     "var invalid_str=\"abc";
1010 
1011 static const char syntax_cases_buf17[] =
1012     "var invalid_str = 'abc\r def'";
1013 
1014 static const char syntax_cases_expected17[] =
1015     "var invalid_str='abc";
1016 
1017 static const char syntax_cases_buf18[] =
1018     "var invalid_str = 'abc\\\n\r def'";
1019 
1020 static const char syntax_cases_expected18[] =
1021     "var invalid_str='abc";
1022 
1023 static const char syntax_cases_buf19[] =
1024     "var invalid_re = /abc\\\n def/";
1025 
1026 static const char syntax_cases_expected19[] =
1027     "var invalid_re=/abc";
1028 
1029 static const char syntax_cases_buf20[] =
1030     "var invalid_re = /abc\\\r\n def/";
1031 
1032 static const char syntax_cases_expected20[] =
1033     "var invalid_re=/abc";
1034 
1035 static const char syntax_cases_buf21[] =
1036     "var invalid_str = 'abc\u2029 def' ;\n\r";
1037 
1038 static const char syntax_cases_expected21[] =
1039     "var invalid_str='abc";
1040 
1041 static const char syntax_cases_buf22[] =
1042     "tag`template\n \\\\\\${   }   \\\\${   a  + ` template ${ 1 + c  }`  }`";
1043 
1044 static const char syntax_cases_expected22[] =
1045     "tag `template\n \\\\\\${   }   \\\\${a+` template ${1+c}`}`";
1046 
1047 static const char syntax_cases_buf23[] =
1048     "`${`${`${`${`${}`}`}`}`}`}";
1049 
1050 static const char syntax_cases_expected23[] =
1051     "`${`${`${`${`";
1052 
1053 TEST_CASE("syntax cases", "[JSNormalizer]")
1054 {
1055     SECTION("variables")
1056     {
1057         NORMALIZE(syntax_cases_buf0);
1058         VALIDATE(syntax_cases_buf0, syntax_cases_expected0);
1059     }
1060     SECTION("operators")
1061     {
1062         NORMALIZE(syntax_cases_buf1);
1063         VALIDATE(syntax_cases_buf1, syntax_cases_expected1);
1064     }
1065     SECTION("arithmetic and logical operators")
1066     {
1067         NORMALIZE(syntax_cases_buf2);
1068         VALIDATE(syntax_cases_buf2, syntax_cases_expected2);
1069     }
1070     SECTION("complex object")
1071     {
1072         NORMALIZE(syntax_cases_buf3);
1073         VALIDATE(syntax_cases_buf3, syntax_cases_expected3);
1074     }
1075     SECTION("arrays")
1076     {
1077         NORMALIZE(syntax_cases_buf4);
1078         VALIDATE(syntax_cases_buf4, syntax_cases_expected4);
1079     }
1080     SECTION("loops")
1081     {
1082         NORMALIZE(syntax_cases_buf5);
1083         VALIDATE(syntax_cases_buf5, syntax_cases_expected5);
1084     }
1085     SECTION("if-else and switch statements")
1086     {
1087         NORMALIZE(syntax_cases_buf6);
1088         VALIDATE(syntax_cases_buf6, syntax_cases_expected6);
1089     }
1090     SECTION("try-catch statements")
1091     {
1092         NORMALIZE(syntax_cases_buf7);
1093         VALIDATE(syntax_cases_buf7, syntax_cases_expected7);
1094     }
1095     SECTION("functions and promises")
1096     {
1097         NORMALIZE(syntax_cases_buf8);
1098         VALIDATE(syntax_cases_buf8, syntax_cases_expected8);
1099     }
1100     SECTION("regex-division ambiguity")
1101     {
1102         NORMALIZE(syntax_cases_buf9);
1103         VALIDATE(syntax_cases_buf9, syntax_cases_expected9);
1104     }
1105     SECTION("regex on a new line")
1106     {
1107         NORMALIZE(syntax_cases_buf10);
1108         VALIDATE(syntax_cases_buf10, syntax_cases_expected10);
1109     }
1110     SECTION("string and regex literals ambiguity with escaped sentinel chars")
1111     {
1112         NORMALIZE(syntax_cases_buf11);
1113         VALIDATE(syntax_cases_buf11, syntax_cases_expected11);
1114     }
1115     SECTION("escaped LF and CR chars in literals")
1116     {
1117         NORMALIZE(syntax_cases_buf12);
1118         VALIDATE(syntax_cases_buf12, syntax_cases_expected12);
1119     }
1120     SECTION("regex after keyword")
1121     {
1122         NORMALIZE(syntax_cases_buf13);
1123         VALIDATE(syntax_cases_buf13, syntax_cases_expected13);
1124     }
1125     SECTION("white space between '+'<-->'++' and '-'<-->'--'")
1126     {
1127         NORMALIZE(syntax_cases_buf14);
1128         VALIDATE(syntax_cases_buf14, syntax_cases_expected14);
1129     }
1130     SECTION("template literals")
1131     {
1132         NORMALIZE(syntax_cases_buf22);
1133         VALIDATE(syntax_cases_buf22, syntax_cases_expected22);
1134     }
1135 }
1136 
1137 TEST_CASE("bad tokens", "[JSNormalizer]")
1138 {
1139     SECTION("LS chars within literal")
1140     {
1141         NORMALIZE(syntax_cases_buf15);
1142         VALIDATE_FAIL(syntax_cases_buf15, syntax_cases_expected15, JSTokenizer::BAD_TOKEN, 25);
1143     }
1144     SECTION("PS chars within literal")
1145     {
1146         NORMALIZE(syntax_cases_buf21);
1147         VALIDATE_FAIL(syntax_cases_buf21, syntax_cases_expected21, JSTokenizer::BAD_TOKEN, 25);
1148     }
1149     SECTION("explicit LF within literal")
1150     {
1151         NORMALIZE(syntax_cases_buf16);
1152         VALIDATE_FAIL(syntax_cases_buf16, syntax_cases_expected16, JSTokenizer::BAD_TOKEN, 23);
1153     }
1154     SECTION("explicit CR within literal")
1155     {
1156         NORMALIZE(syntax_cases_buf17);
1157         VALIDATE_FAIL(syntax_cases_buf17, syntax_cases_expected17, JSTokenizer::BAD_TOKEN, 23);
1158     }
1159     SECTION("escaped LF-CR sequence within literal")
1160     {
1161         NORMALIZE(syntax_cases_buf18);
1162         VALIDATE_FAIL(syntax_cases_buf18, syntax_cases_expected18, JSTokenizer::BAD_TOKEN, 25);
1163     }
1164     SECTION("escaped LF within regex literal")
1165     {
1166         NORMALIZE(syntax_cases_buf19);
1167         VALIDATE_FAIL(syntax_cases_buf19, syntax_cases_expected19, JSTokenizer::BAD_TOKEN, 23);
1168     }
1169     SECTION("escaped CR-LF within regex literal")
1170     {
1171         NORMALIZE(syntax_cases_buf20);
1172         VALIDATE_FAIL(syntax_cases_buf20, syntax_cases_expected20, JSTokenizer::BAD_TOKEN, 23);
1173     }
1174 }
1175 
1176 TEST_CASE("template literal overflow", "[JSNormalizer]")
1177 {
1178     SECTION("exceeding template literal limit")
1179     {
1180         NORMALIZE(syntax_cases_buf23);
1181         VALIDATE_FAIL(syntax_cases_buf23, syntax_cases_expected23,
1182             JSTokenizer::TEMPLATE_NESTING_OVERFLOW, 15);
1183     }
1184 }
1185 
1186 static const char asi_cases_buf0[] =
1187     "array[0]\n{}";
1188 
1189 static const char asi_cases_expected0[] =
1190     "array[0];{}";
1191 
1192 static const char asi_cases_buf1[] =
1193     "array[0]\ntrue";
1194 
1195 static const char asi_cases_expected1[] =
1196     "array[0];true";
1197 
1198 static const char asi_cases_buf2[] =
1199     "array[0]\n++";
1200 
1201 static const char asi_cases_expected2[] =
1202     "array[0];++";
1203 
1204 static const char asi_cases_buf3[] =
1205     "array[0]\ncontinue";
1206 
1207 static const char asi_cases_expected3[] =
1208     "array[0];continue";
1209 
1210 static const char asi_cases_buf4[] =
1211     "array[0]\nvar b;";
1212 
1213 static const char asi_cases_expected4[] =
1214     "array[0];var b;";
1215 
1216 static const char asi_cases_buf5[] =
1217     "func()\ntrue";
1218 
1219 static const char asi_cases_expected5[] =
1220     "func();true";
1221 
1222 static const char asi_cases_buf6[] =
1223     "func()\n++";
1224 
1225 static const char asi_cases_expected6[] =
1226     "func();++";
1227 
1228 static const char asi_cases_buf7[] =
1229     "func()\ncontinue";
1230 
1231 static const char asi_cases_expected7[] =
1232     "func();continue";
1233 
1234 static const char asi_cases_buf8[] =
1235     "func()\nvar b;";
1236 
1237 static const char asi_cases_expected8[] =
1238     "func();var b;";
1239 
1240 static const char asi_cases_buf9[] =
1241     "1024\n{}";
1242 
1243 static const char asi_cases_expected9[] =
1244     "1024;{}";
1245 
1246 static const char asi_cases_buf10[] =
1247     "1024\ntrue";
1248 
1249 static const char asi_cases_expected10[] =
1250     "1024;true";
1251 
1252 static const char asi_cases_buf11[] =
1253     "1024\n++";
1254 
1255 static const char asi_cases_expected11[] =
1256     "1024;++";
1257 
1258 static const char asi_cases_buf12[] =
1259     "1024\ncontinue";
1260 
1261 static const char asi_cases_expected12[] =
1262     "1024;continue";
1263 
1264 static const char asi_cases_buf13[] =
1265     "1024\nvar b;";
1266 
1267 static const char asi_cases_expected13[] =
1268     "1024;var b;";
1269 
1270 static const char asi_cases_buf14[] =
1271     "++\n{}";
1272 
1273 static const char asi_cases_expected14[] =
1274     "++;{}";
1275 
1276 static const char asi_cases_buf15[] =
1277     "++\n[1,2,3]";
1278 
1279 static const char asi_cases_expected15[] =
1280     "++;[1,2,3]";
1281 
1282 static const char asi_cases_buf16[] =
1283     "++\ntrue";
1284 
1285 static const char asi_cases_expected16[] =
1286     "++;true";
1287 
1288 static const char asi_cases_buf17[] =
1289     "++\n++";
1290 
1291 static const char asi_cases_expected17[] =
1292     "++;++";
1293 
1294 static const char asi_cases_buf18[] =
1295     "++\ncontinue";
1296 
1297 static const char asi_cases_expected18[] =
1298     "++;continue";
1299 
1300 static const char asi_cases_buf19[] =
1301     "++\nvar b;";
1302 
1303 static const char asi_cases_expected19[] =
1304     "++;var b;";
1305 
1306 static const char asi_cases_buf20[] =
1307     "return\n{}";
1308 
1309 static const char asi_cases_expected20[] =
1310     "return;{}";
1311 
1312 static const char asi_cases_buf21[] =
1313     "return\n[1,2,3]";
1314 
1315 static const char asi_cases_expected21[] =
1316     "return;[1,2,3]";
1317 
1318 static const char asi_cases_buf22[] =
1319     "return\n+a";
1320 
1321 static const char asi_cases_expected22[] =
1322     "return;+a";
1323 
1324 static const char asi_cases_buf23[] =
1325     "return\ntrue";
1326 
1327 static const char asi_cases_expected23[] =
1328     "return;true";
1329 
1330 static const char asi_cases_buf24[] =
1331     "return\n++";
1332 
1333 static const char asi_cases_expected24[] =
1334     "return;++";
1335 
1336 static const char asi_cases_buf25[] =
1337     "return\ncontinue";
1338 
1339 static const char asi_cases_expected25[] =
1340     "return;continue";
1341 
1342 static const char asi_cases_buf26[] =
1343     "return\nvar b;";
1344 
1345 static const char asi_cases_expected26[] =
1346     "return;var b;";
1347 
1348 TEST_CASE("automatic semicolon insertion", "[JSNormalizer]")
1349 {
1350     SECTION("group_4 to group_1")
1351     {
1352         NORMALIZE(asi_cases_buf0);
1353         VALIDATE(asi_cases_buf0, asi_cases_expected0);
1354     }
1355 
1356     SECTION("group_4 to group_7")
1357     {
1358         NORMALIZE(asi_cases_buf1);
1359         VALIDATE(asi_cases_buf1, asi_cases_expected1);
1360     }
1361 
1362     SECTION("group_4 to group_8")
1363     {
1364         NORMALIZE(asi_cases_buf2);
1365         VALIDATE(asi_cases_buf2, asi_cases_expected2);
1366     }
1367 
1368     SECTION("group_4 to group_9")
1369     {
1370         NORMALIZE(asi_cases_buf3);
1371         VALIDATE(asi_cases_buf3, asi_cases_expected3);
1372     }
1373 
1374     SECTION("group_4 to group_10")
1375     {
1376         NORMALIZE(asi_cases_buf4);
1377         VALIDATE(asi_cases_buf4, asi_cases_expected4);
1378     }
1379 
1380     SECTION("group_5 to group_7")
1381     {
1382         NORMALIZE(asi_cases_buf5);
1383         VALIDATE(asi_cases_buf5, asi_cases_expected5);
1384     }
1385 
1386     SECTION("group_5 to group_8")
1387     {
1388         NORMALIZE(asi_cases_buf6);
1389         VALIDATE(asi_cases_buf6, asi_cases_expected6);
1390     }
1391 
1392     SECTION("group_5 to group_9")
1393     {
1394         NORMALIZE(asi_cases_buf7);
1395         VALIDATE(asi_cases_buf7, asi_cases_expected7);
1396     }
1397 
1398     SECTION("group_5 to group_10")
1399     {
1400         NORMALIZE(asi_cases_buf8);
1401         VALIDATE(asi_cases_buf8, asi_cases_expected8);
1402     }
1403 
1404     SECTION("group_7 to group_1")
1405     {
1406         NORMALIZE(asi_cases_buf9);
1407         VALIDATE(asi_cases_buf9, asi_cases_expected9);
1408     }
1409 
1410     SECTION("group_7 to group_7")
1411     {
1412         NORMALIZE(asi_cases_buf10);
1413         VALIDATE(asi_cases_buf10, asi_cases_expected10);
1414     }
1415 
1416     SECTION("group_7 to group_8")
1417     {
1418         NORMALIZE(asi_cases_buf11);
1419         VALIDATE(asi_cases_buf11, asi_cases_expected11);
1420     }
1421 
1422     SECTION("group_7 to group_9")
1423     {
1424         NORMALIZE(asi_cases_buf12);
1425         VALIDATE(asi_cases_buf12, asi_cases_expected12);
1426     }
1427 
1428     SECTION("group_7 to group_10")
1429     {
1430         NORMALIZE(asi_cases_buf13);
1431         VALIDATE(asi_cases_buf13, asi_cases_expected13);
1432     }
1433 
1434     SECTION("group_8 to group_1")
1435     {
1436         NORMALIZE(asi_cases_buf14);
1437         VALIDATE(asi_cases_buf14, asi_cases_expected14);
1438     }
1439 
1440     SECTION("group_8 to group_3")
1441     {
1442         NORMALIZE(asi_cases_buf15);
1443         VALIDATE(asi_cases_buf15, asi_cases_expected15);
1444     }
1445 
1446     SECTION("group_8 to group_7")
1447     {
1448         NORMALIZE(asi_cases_buf16);
1449         VALIDATE(asi_cases_buf16, asi_cases_expected16);
1450     }
1451 
1452     SECTION("group_8 to group_8")
1453     {
1454         NORMALIZE(asi_cases_buf17);
1455         VALIDATE(asi_cases_buf17, asi_cases_expected17);
1456     }
1457 
1458     SECTION("group_8 to group_9")
1459     {
1460         NORMALIZE(asi_cases_buf18);
1461         VALIDATE(asi_cases_buf18, asi_cases_expected18);
1462     }
1463 
1464     SECTION("group_8 to group_10")
1465     {
1466         NORMALIZE(asi_cases_buf19);
1467         VALIDATE(asi_cases_buf19, asi_cases_expected19);
1468     }
1469 
1470     SECTION("group_9 to group_1")
1471     {
1472         NORMALIZE(asi_cases_buf20);
1473         VALIDATE(asi_cases_buf20, asi_cases_expected20);
1474     }
1475 
1476     SECTION("group_9 to group_3")
1477     {
1478         NORMALIZE(asi_cases_buf21);
1479         VALIDATE(asi_cases_buf21, asi_cases_expected21);
1480     }
1481 
1482     SECTION("group_9 to group_6")
1483     {
1484         NORMALIZE(asi_cases_buf22);
1485         VALIDATE(asi_cases_buf22, asi_cases_expected22);
1486     }
1487 
1488     SECTION("group_9 to group_7")
1489     {
1490         NORMALIZE(asi_cases_buf23);
1491         VALIDATE(asi_cases_buf23, asi_cases_expected23);
1492     }
1493 
1494     SECTION("group_9 to group_8")
1495     {
1496         NORMALIZE(asi_cases_buf24);
1497         VALIDATE(asi_cases_buf24, asi_cases_expected24);
1498     }
1499 
1500     SECTION("group_9 to group_9")
1501     {
1502         NORMALIZE(asi_cases_buf25);
1503         VALIDATE(asi_cases_buf25, asi_cases_expected25);
1504     }
1505 
1506     SECTION("group_9 to group_10")
1507     {
1508         NORMALIZE(asi_cases_buf26);
1509         VALIDATE(asi_cases_buf26, asi_cases_expected26);
1510     }
1511 }
1512 
1513 TEST_CASE("endings", "[JSNormalizer]")
1514 {
1515     SECTION("script closing tag is present", "[JSNormalizer]")
1516     {
1517         const char src[] =
1518             "var a = 1 ;\n" // 12 bytes
1519             "var b = 2 ;\n" // 12 bytes
1520             "</script>\n"   // ptr_offset is here = 33
1521             "var c = 3 ;\n";
1522         const int ptr_offset = 33;
1523         const char expected[] = "var a=1;var b=2;";
1524         char dst[sizeof(expected) - 1];
1525         int act_len;
1526         const char* ptr;
1527         int ret;
1528 
1529         NORMALIZE_L(src, sizeof(src), dst, sizeof(dst), norm_depth, ret, ptr, act_len);
1530 
1531         CHECK(ret == JSTokenizer::SCRIPT_ENDED);
1532         CHECK(act_len == sizeof(expected) - 1);
1533         CHECK((ptr - src) == ptr_offset);
1534         CHECK(!memcmp(dst, expected, act_len));
1535     }
1536     SECTION("depth reached", "[JSNormalizer]")
1537     {
1538         const char src[] = "var abc = 123;\n\r";
1539         const char src2[] = "var foo = 321;\n\r";
1540         const char expected[] = "var abc";
1541         const char* ptr;
1542         int ret;
1543 
1544         JSIdentifierCtxStub ident_ctx;
1545         JSNormalizer norm(ident_ctx, 7, max_template_nesting, max_bracket_depth);
1546         ret = norm.normalize(src, sizeof(src));
1547         ptr = norm.get_src_next();
1548         int act_len1 = norm.script_size();
1549         const char* dst1 = norm.take_script();
1550 
1551         CHECK(ret == JSTokenizer::EOS);
1552         CHECK(ptr == src + 7);
1553         CHECK(act_len1 == sizeof(expected) - 1);
1554         CHECK(!memcmp(dst1, expected, act_len1));
1555         delete[] dst1;
1556 
1557         ret = norm.normalize(src2, sizeof(src2));
1558         ptr = norm.get_src_next();
1559         int act_len2 = norm.script_size();
1560         const char* dst2 = norm.take_script();
1561 
1562         CHECK(ret == JSTokenizer::EOS);
1563         CHECK(ptr == src2 + sizeof(src2));
1564         CHECK(act_len2 == 0);
1565         delete[] dst2;
1566     }
1567 }
1568 
1569 static const char unexpected_tag_buf0[] =
1570     "var a = 1;\n"
1571     "<script>\n"
1572     "var b = 2;\r\n";
1573 
1574 static const char unexpected_tag_expected0[] =
1575     "var a=1;";
1576 
1577 static const char unexpected_tag_buf1[] =
1578     "var a = 1;\n"
1579     "<script type=application/javascript>\n"
1580     "var b = 2;\r\n";
1581 
1582 static const char unexpected_tag_expected1[] =
1583     "var a=1;";
1584 
1585 static const char unexpected_tag_buf2[] =
1586     "var a = 1;\n"
1587     "var str = '<script> something';\n"
1588     "var b = 2;\r\n";
1589 
1590 static const char unexpected_tag_expected2[] =
1591     "var a=1;var str='";
1592 
1593 static const char unexpected_tag_buf3[] =
1594     "var a = 1;\n"
1595     "var str = 'something <script> something';\n"
1596     "var b = 2;\r\n";
1597 
1598 static const char unexpected_tag_expected3[] =
1599     "var a=1;var str='something ";
1600 
1601 static const char unexpected_tag_buf4[] =
1602     "var a = 1;\n"
1603     "var str = 'something <script>';\n"
1604     "var b = 2;\r\n";
1605 
1606 static const char unexpected_tag_expected4[] =
1607     "var a=1;var str='something ";
1608 
1609 static const char unexpected_tag_buf5[] =
1610     "var a = 1;\n"
1611     "var str = '</script> something';\n"
1612     "var b = 2;\r\n";
1613 
1614 static const char unexpected_tag_expected5[] =
1615     "var a=1;var str='";
1616 
1617 static const char unexpected_tag_buf6[] =
1618     "var a = 1;\n"
1619     "var str = 'something </script> something';\n"
1620     "var b = 2;\r\n";
1621 
1622 static const char unexpected_tag_expected6[] =
1623     "var a=1;var str='something ";
1624 
1625 static const char unexpected_tag_buf7[] =
1626     "var a = 1;\n"
1627     "var str = 'something </script>';\n"
1628     "var b = 2;\r\n";
1629 
1630 static const char unexpected_tag_expected7[] =
1631     "var a=1;var str='something ";
1632 
1633 static const char unexpected_tag_buf8[] =
1634     "var a = 1;\n"
1635     "var str = 'something \\<script\\> something';\n"
1636     "var b = 2;\r\n";
1637 
1638 static const char unexpected_tag_expected8[] =
1639     "var a=1;var str='something \\";
1640 
1641 static const char unexpected_tag_buf9[] =
1642     "var a = 1;\n"
1643     "var str = 'something \\<\\/script\\> something';\n"
1644     "var b = 2;\r\n";
1645 
1646 static const char unexpected_tag_expected9[] =
1647     "var a=1;var str='something \\<\\/script\\> something';var b=2;";
1648 
1649 static const char unexpected_tag_buf10[] =
1650     "var a = 1;\n"
1651     "//<script> something\n"
1652     "var b = 2;\r\n";
1653 
1654 static const char unexpected_tag_expected10[] =
1655     "var a=1;";
1656 
1657 static const char unexpected_tag_buf11[] =
1658     "var a = 1;\n"
1659     "//something <script> something\n"
1660     "var b = 2;\r\n";
1661 
1662 static const char unexpected_tag_expected11[] =
1663     "var a=1;";
1664 
1665 static const char unexpected_tag_buf12[] =
1666     "var a = 1;\n"
1667     "//something <script>\n"
1668     "var b = 2;\r\n";
1669 
1670 static const char unexpected_tag_expected12[] =
1671     "var a=1;";
1672 
1673 static const char unexpected_tag_buf13[] =
1674     "var a = 1;\n"
1675     "/*<script> something*/\n"
1676     "var b = 2;\r\n";
1677 
1678 static const char unexpected_tag_expected13[] =
1679     "var a=1;";
1680 
1681 static const char unexpected_tag_buf14[] =
1682     "var a = 1;\n"
1683     "/*something <script> something*/\n"
1684     "var b = 2;\r\n";
1685 
1686 static const char unexpected_tag_expected14[] =
1687     "var a=1;";
1688 
1689 static const char unexpected_tag_buf15[] =
1690     "var a = 1;\n"
1691     "/*something <script>*/\n"
1692     "var b = 2;\r\n";
1693 
1694 static const char unexpected_tag_expected15[] =
1695     "var a=1;";
1696 
1697 static const char unexpected_tag_buf16[] =
1698     "var a = 1;\n"
1699     "//</script> something\n"
1700     "var b = 2;\r\n";
1701 
1702 static const char unexpected_tag_expected16[] =
1703     "var a=1;";
1704 
1705 static const char unexpected_tag_buf17[] =
1706     "var a = 1;\n"
1707     "<!--something </script> something//-->\n"
1708     "var b = 2;\r\n";
1709 
1710 static const char unexpected_tag_expected17[] =
1711     "var a=1;";
1712 
1713 static const char unexpected_tag_buf18[] =
1714     "var a = 1;\n"
1715     "//something </script>\n"
1716     "var b = 2;\r\n";
1717 
1718 static const char unexpected_tag_expected18[] =
1719     "var a=1;";
1720 
1721 static const char unexpected_tag_buf19[] =
1722     "var a = 1;\n"
1723     "/*</script>\n"
1724     "something*/\n"
1725     "var b = 2;\r\n";
1726 
1727 static const char unexpected_tag_expected19[] =
1728     "var a=1;";
1729 
1730 static const char unexpected_tag_buf20[] =
1731     "var a = 1;\n"
1732     "/*something\n"
1733     "</script>\n"
1734     "something*/\n"
1735     "var b = 2;\r\n";
1736 
1737 static const char unexpected_tag_expected20[] =
1738     "var a=1;";
1739 
1740 static const char unexpected_tag_buf21[] =
1741     "var a = 1;\n"
1742     "/*something\n"
1743     "</script>*/\n"
1744     "var b = 2;\r\n";
1745 
1746 static const char unexpected_tag_expected21[] =
1747     "var a=1;";
1748 
1749 static const char unexpected_tag_buf22[] =
1750     "var a = 1;\n"
1751     "var str = 'script somescript /script something';\n"
1752     "var b = 2;\r\n";
1753 
1754 static const char unexpected_tag_expected22[] =
1755     "var a=1;var str='script somescript /script something';var b=2;";
1756 
1757 static const char unexpected_tag_buf23[] =
1758     "var a = 1;\n"
1759     "var str = 'script somescript /script something <script>';\n"
1760     "var b = 2;\r\n";
1761 
1762 static const char unexpected_tag_expected23[] =
1763     "var a=1;var str='script somescript /script something ";
1764 
1765 static const char unexpected_tag_buf24[] =
1766     "var a = 1;\n"
1767     "var str = 'something <sCrIpT>';\n"
1768     "var b = 2;\r\n";
1769 
1770 static const char unexpected_tag_expected24[] =
1771     "var a=1;var str='something ";
1772 
1773 TEST_CASE("nested script tags", "[JSNormalizer]")
1774 {
1775     SECTION("explicit open tag - simple")
1776     {
1777         NORMALIZE(unexpected_tag_buf0);
1778         VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, JSTokenizer::OPENING_TAG, 18);
1779     }
1780     SECTION("explicit open tag - complex")
1781     {
1782         NORMALIZE(unexpected_tag_buf1);
1783         VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, JSTokenizer::OPENING_TAG, 18);
1784     }
1785     SECTION("open tag within literal - start")
1786     {
1787         NORMALIZE(unexpected_tag_buf2);
1788         VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, JSTokenizer::OPENING_TAG, 29);
1789     }
1790     SECTION("open tag within literal - mid")
1791     {
1792         NORMALIZE(unexpected_tag_buf3);
1793         VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, JSTokenizer::OPENING_TAG, 39);
1794     }
1795     SECTION("open tag within literal - end")
1796     {
1797         NORMALIZE(unexpected_tag_buf4);
1798         VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, JSTokenizer::OPENING_TAG, 39);
1799     }
1800     SECTION("close tag within literal - start")
1801     {
1802         NORMALIZE(unexpected_tag_buf5);
1803         VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, JSTokenizer::CLOSING_TAG, 31);
1804     }
1805     SECTION("close tag within literal - mid")
1806     {
1807         NORMALIZE(unexpected_tag_buf6);
1808         VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, JSTokenizer::CLOSING_TAG, 41);
1809     }
1810     SECTION("close tag within literal - end")
1811     {
1812         NORMALIZE(unexpected_tag_buf7);
1813         VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, JSTokenizer::CLOSING_TAG, 41);
1814     }
1815     SECTION("open tag within literal - escaped")
1816     {
1817         NORMALIZE(unexpected_tag_buf8);
1818         VALIDATE_FAIL(unexpected_tag_buf8, unexpected_tag_expected8, JSTokenizer::OPENING_TAG, 40);
1819     }
1820     SECTION("close tag within literal - escaped")
1821     {
1822         NORMALIZE(unexpected_tag_buf9);
1823         VALIDATE(unexpected_tag_buf9, unexpected_tag_expected9);
1824     }
1825     SECTION("open tag within single-line comment - start")
1826     {
1827         NORMALIZE(unexpected_tag_buf10);
1828         VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, JSTokenizer::OPENING_TAG, 20);
1829     }
1830     SECTION("open tag within single-line comment - mid")
1831     {
1832         NORMALIZE(unexpected_tag_buf11);
1833         VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, JSTokenizer::OPENING_TAG, 30);
1834     }
1835     SECTION("open tag within single-line comment - end")
1836     {
1837         NORMALIZE(unexpected_tag_buf12);
1838         VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, JSTokenizer::OPENING_TAG, 30);
1839     }
1840     SECTION("open tag within multi-line comment - start")
1841     {
1842         NORMALIZE(unexpected_tag_buf13);
1843         VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, JSTokenizer::OPENING_TAG, 20);
1844     }
1845     SECTION("open tag within multi-line comment - mid")
1846     {
1847         NORMALIZE(unexpected_tag_buf14);
1848         VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, JSTokenizer::OPENING_TAG, 30);
1849     }
1850     SECTION("open tag within multi-line comment - end")
1851     {
1852         NORMALIZE(unexpected_tag_buf15);
1853         VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, JSTokenizer::OPENING_TAG, 30);
1854     }
1855     SECTION("close tag within single-line comment - start")
1856     {
1857         NORMALIZE(unexpected_tag_buf16);
1858         VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, JSTokenizer::CLOSING_TAG, 22);
1859     }
1860     SECTION("close tag within single-line comment - mid")
1861     {
1862         NORMALIZE(unexpected_tag_buf17);
1863         VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, JSTokenizer::CLOSING_TAG, 34);
1864     }
1865     SECTION("close tag within single-line comment - end")
1866     {
1867         NORMALIZE(unexpected_tag_buf18);
1868         VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, JSTokenizer::CLOSING_TAG, 32);
1869     }
1870     SECTION("close tag within multi-line comment - start")
1871     {
1872         NORMALIZE(unexpected_tag_buf19);
1873         VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, JSTokenizer::CLOSING_TAG, 22);
1874     }
1875     SECTION("close tag within multi-line comment - mid")
1876     {
1877         NORMALIZE(unexpected_tag_buf20);
1878         VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, JSTokenizer::CLOSING_TAG, 32);
1879     }
1880     SECTION("close tag within multi-line comment - end")
1881     {
1882         NORMALIZE(unexpected_tag_buf21);
1883         VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, JSTokenizer::CLOSING_TAG, 32);
1884     }
1885     SECTION("multiple patterns - not matched")
1886     {
1887         NORMALIZE(unexpected_tag_buf22);
1888         VALIDATE(unexpected_tag_buf22, unexpected_tag_expected22);
1889     }
1890     SECTION("multiple patterns - matched")
1891     {
1892         NORMALIZE(unexpected_tag_buf23);
1893         VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, JSTokenizer::OPENING_TAG, 65);
1894     }
1895     SECTION("mixed lower and upper case")
1896     {
1897         NORMALIZE(unexpected_tag_buf24);
1898         VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, JSTokenizer::OPENING_TAG, 39);
1899     }
1900 }
1901 
1902 TEST_CASE("split between tokens", "[JSNormalizer]")
1903 {
1904     SECTION("operator string")
1905     {
1906         const char dat1[] = "var s = ";
1907         const char dat2[] = "'string';";
1908         const char exp1[] = "var s=";
1909         const char exp2[] = "'string';";
1910         const char exp[] = "var s='string';";
1911 
1912         NORMALIZE_2(dat1, dat2, exp1, exp2);
1913         NORM_COMBINED_2(dat1, dat2, exp);
1914     }
1915     SECTION("operator number")
1916     {
1917         const char dat1[] = "a = 5 +";
1918         const char dat2[] = "b + c;";
1919         const char exp1[] = "a=5+";
1920         const char exp2[] = "b+c;";
1921         const char exp[] = "a=5+b+c;";
1922 
1923         NORMALIZE_2(dat1, dat2, exp1, exp2);
1924         NORM_COMBINED_2(dat1, dat2, exp);
1925     }
1926     SECTION("comment function")
1927     {
1928         const char dat1[] = "// no comments\n";
1929         const char dat2[] = "foo(bar, baz);";
1930         const char exp1[] = "";
1931         const char exp2[] = "foo(bar,baz);";
1932         const char exp[] = "foo(bar,baz);";
1933 
1934         NORMALIZE_2(dat1, dat2, exp1, exp2);
1935         NORM_COMBINED_2(dat1, dat2, exp);
1936     }
1937     SECTION("operator identifier")
1938     {
1939         const char dat1[] = "var ";
1940         const char dat2[] = "a = ";
1941         const char dat3[] = "b  ;";
1942         const char exp1[] = "var";
1943         const char exp2[] = " a=";
1944         const char exp3[] = "b;";
1945         const char exp[] = "var a=b;";
1946 
1947         NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
1948         NORM_COMBINED_3(dat1, dat2, dat3, exp);
1949     }
1950 }
1951 
1952 TEST_CASE("split in comments", "[JSNormalizer]")
1953 {
1954     SECTION("/ /")
1955     {
1956         const char dat1[] = "/";
1957         const char dat2[] = "/comment\n";
1958         const char exp1[] = "/";
1959         const char exp2[] = "";
1960         const char exp[] = "";
1961 
1962         NORMALIZE_2(dat1, dat2, exp1, exp2);
1963         NORM_COMBINED_2(dat1, dat2, exp);
1964     }
1965     SECTION("/ / msg")
1966     {
1967         const char dat1[] = "//";
1968         const char dat2[] = "comment\n";
1969         const char exp1[] = "";
1970         const char exp2[] = "";
1971         const char exp[] = "";
1972 
1973         NORMALIZE_2(dat1, dat2, exp1, exp2);
1974         NORM_COMBINED_2(dat1, dat2, exp);
1975     }
1976     SECTION("/ / LF")
1977     {
1978         const char dat1[] = "//comment";
1979         const char dat2[] = "\n";
1980         const char exp1[] = "";
1981         const char exp2[] = "";
1982         const char exp[] = "";
1983 
1984         NORMALIZE_2(dat1, dat2, exp1, exp2);
1985         NORM_COMBINED_2(dat1, dat2, exp);
1986     }
1987 
1988     SECTION("/ *")
1989     {
1990         const char dat1[] = "/";
1991         const char dat2[] = "* comment */";
1992         const char exp1[] = "/";
1993         const char exp2[] = "";
1994         const char exp[] = "";
1995 
1996         NORMALIZE_2(dat1, dat2, exp1, exp2);
1997         NORM_COMBINED_2(dat1, dat2, exp);
1998     }
1999     SECTION("/ * msg")
2000     {
2001         const char dat1[] = "/* t";
2002         const char dat2[] = "ext */";
2003         const char exp1[] = "";
2004         const char exp2[] = "";
2005         const char exp[] = "";
2006 
2007         NORMALIZE_2(dat1, dat2, exp1, exp2);
2008         NORM_COMBINED_2(dat1, dat2, exp);
2009     }
2010     SECTION("* /")
2011     {
2012         const char dat1[] = "/* comment *";
2013         const char dat2[] = "/";
2014         const char exp1[] = "";
2015         const char exp2[] = "";
2016         const char exp[] = "";
2017 
2018         NORMALIZE_2(dat1, dat2, exp1, exp2);
2019         NORM_COMBINED_2(dat1, dat2, exp);
2020     }
2021     SECTION("/ * msg * /")
2022     {
2023         const char dat1[] = "/";
2024         const char dat2[] = "* comment *";
2025         const char dat3[] = "/";
2026         const char exp1[] = "/";
2027         const char exp2[] = "";
2028         const char exp3[] = "";
2029         const char exp[] = "";
2030 
2031         NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
2032         NORM_COMBINED_3(dat1, dat2, dat3, exp);
2033     }
2034 
2035     SECTION("< !--")
2036     {
2037         const char dat1[] = "<";
2038         const char dat2[] = "!-- comment\n";
2039         const char exp1[] = "<";
2040         const char exp2[] = "";
2041         const char exp[] = "";
2042 
2043         NORMALIZE_2(dat1, dat2, exp1, exp2);
2044         NORM_COMBINED_2(dat1, dat2, exp);
2045     }
2046     SECTION("<! --")
2047     {
2048         const char dat1[] = "<!";
2049         const char dat2[] = "-- comment\n";
2050         const char exp1[] = "<!";
2051         const char exp2[] = "";
2052         const char exp[] = "";
2053 
2054         NORMALIZE_2(dat1, dat2, exp1, exp2);
2055         NORM_COMBINED_2(dat1, dat2, exp);
2056     }
2057     SECTION("<!- -")
2058     {
2059         const char dat1[] = "<!-";
2060         const char dat2[] = "- comment\n";
2061         const char exp1[] = "<!-";
2062         const char exp2[] = "";
2063         const char exp[] = "";
2064 
2065         NORMALIZE_2(dat1, dat2, exp1, exp2);
2066         NORM_COMBINED_2(dat1, dat2, exp);
2067     }
2068     SECTION("<!-- msg")
2069     {
2070         const char dat1[] = "<!--";
2071         const char dat2[] = "comment\n";
2072         const char exp1[] = "";
2073         const char exp2[] = "";
2074         const char exp[] = "";
2075 
2076         NORMALIZE_2(dat1, dat2, exp1, exp2);
2077         NORM_COMBINED_2(dat1, dat2, exp);
2078     }
2079     SECTION("<! -- msg")
2080     {
2081         const char dat1[] = "<";
2082         const char dat2[] = "!-";
2083         const char dat3[] = "-comment\n";
2084         const char exp1[] = "<";
2085         const char exp2[] = "!-";
2086         const char exp3[] = "";
2087         const char exp[] = "";
2088 
2089         NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
2090         NORM_COMBINED_3(dat1, dat2, dat3, exp);
2091     }
2092 }
2093 
2094 TEST_CASE("split in opening tag", "[JSNormalizer]")
2095 {
2096     SECTION("< script")
2097     {
2098         const char dat1[] = "<";
2099         const char dat2[] = "script";
2100         const char exp1[] = "<";
2101         const char exp2[] = "";
2102         const char exp[] = "";
2103 
2104         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
2105         NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
2106     }
2107     SECTION("str='<s cript'")
2108     {
2109         const char dat1[] = "var str ='<s";
2110         const char dat2[] = "cript';";
2111         const char exp1[] = "var str='<s";
2112         const char exp2[] = "";
2113         const char exp[]  = "var str='";
2114 
2115         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
2116         NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
2117     }
2118     SECTION("str='<scrip t'")
2119     {
2120         const char dat1[] = "var str ='<scrip";
2121         const char dat2[] = "t';";
2122         const char exp1[] = "var str='<scrip";
2123         const char exp2[] = "";
2124         const char exp[] = "var str='";
2125 
2126         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
2127         NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
2128     }
2129     SECTION("< scr ipt")
2130     {
2131         const char dat1[] = "<";
2132         const char dat2[] = "scr";
2133         const char dat3[] = "ipt";
2134         const char exp1[] = "<";
2135         const char exp2[] = "scr";
2136         const char exp3[] = "";
2137         const char exp[] = "";
2138 
2139         NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::OPENING_TAG);
2140         NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::OPENING_TAG);
2141     }
2142     SECTION("str='<sc rip t'")
2143     {
2144         const char dat1[] = "var str =\"<sc";
2145         const char dat2[] = "rip";
2146         const char dat3[] = "t\";";
2147         const char exp1[] = "var str=\"<sc";
2148         const char exp2[] = "rip";
2149         const char exp3[] = "";
2150         const char exp[] = "var str=\"";
2151 
2152         NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::OPENING_TAG);
2153         NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::OPENING_TAG);
2154     }
2155 }
2156 
2157 TEST_CASE("split in closing tag", "[JSNormalizer]")
2158 {
2159     SECTION("< /script>")
2160     {
2161         const char dat1[] = "<";
2162         const char dat2[] = "/script>";
2163         const char exp1[] = "<";
2164         const char exp2[] = "";
2165         const char exp[] = "";
2166 
2167         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::SCRIPT_ENDED);
2168         NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::SCRIPT_ENDED);
2169     }
2170     SECTION("</script >")
2171     {
2172         const char dat1[] = "</script";
2173         const char dat2[] = ">";
2174         const char exp1[] = "</script";
2175         const char exp2[] = "";
2176         const char exp[] = "";
2177 
2178         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::SCRIPT_ENDED);
2179         NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::SCRIPT_ENDED);
2180     }
2181     SECTION("str='</ script>'")
2182     {
2183         const char dat1[] = "var str ='</";
2184         const char dat2[] = "script>';";
2185         const char exp1[] = "var str='</";
2186         const char exp2[] = "";
2187         const char exp[] = "var str='";
2188 
2189         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::CLOSING_TAG);
2190         NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::CLOSING_TAG);
2191     }
2192     SECTION("str='</scrip t>'")
2193     {
2194         const char dat1[] = "var str ='</scrip";
2195         const char dat2[] = "t>';";
2196         const char exp1[] = "var str='</scrip";
2197         const char exp2[] = "";
2198         const char exp[] = "var str='";
2199 
2200         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::CLOSING_TAG);
2201         NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::CLOSING_TAG);
2202     }
2203     SECTION("</ scr ipt>")
2204     {
2205         const char dat1[] = "</";
2206         const char dat2[] = "scr";
2207         const char dat3[] = "ipt>";
2208         const char exp1[] = "</";
2209         const char exp2[] = "/scr";
2210         const char exp3[] = "";
2211         const char exp[] = "";
2212 
2213         NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::SCRIPT_ENDED);
2214         NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::SCRIPT_ENDED);
2215     }
2216     SECTION("str='</sc rip t>'")
2217     {
2218         const char dat1[] = "var str =\"</sc";
2219         const char dat2[] = "rip";
2220         const char dat3[] = "t>\";";
2221         const char exp1[] = "var str=\"</sc";
2222         const char exp2[] = "rip";
2223         const char exp3[] = "";
2224         const char exp[] = "var str=\"";
2225 
2226         NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::CLOSING_TAG);
2227         NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::CLOSING_TAG);
2228     }
2229     SECTION("::::</scr ipt >")
2230     {
2231         const char dat1[] = ":::: </scr";
2232         const char dat2[] = "ipt";
2233         const char dat3[] = ">";
2234         const char exp1[] = "::::</scr";
2235         const char exp2[] = "cript";
2236         const char exp3[] = "";
2237         const char exp[] = "::::";
2238 
2239         NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::SCRIPT_ENDED);
2240         NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::SCRIPT_ENDED);
2241     }
2242 }
2243 
2244 TEST_CASE("split in string literal", "[JSNormalizer]")
2245 {
2246     SECTION("\\ LF")
2247     {
2248         const char dat1[] = "var str =\"any\\";
2249         const char dat2[] = "\none\";";
2250         const char exp1[] = "var str=\"any\\";
2251         const char exp2[] = "one\";";
2252         const char exp[] = "var str=\"anyone\";";
2253 
2254         NORMALIZE_2(dat1, dat2, exp1, exp2);
2255         NORM_COMBINED_2(dat1, dat2, exp);
2256     }
2257     SECTION("\\ CR")
2258     {
2259         const char dat1[] = "var str =\"any\\";
2260         const char dat2[] = "\rone\";";
2261         const char exp1[] = "var str=\"any\\";
2262         const char exp2[] = "one\";";
2263         const char exp[] = "var str=\"anyone\";";
2264 
2265         NORMALIZE_2(dat1, dat2, exp1, exp2);
2266         NORM_COMBINED_2(dat1, dat2, exp);
2267     }
2268     SECTION("\\CR LF")
2269     {
2270         const char dat1[] = "var str =\"any\\\r";
2271         const char dat2[] = "\none\";";
2272         const char exp1[] = "var str=\"any";
2273         const char exp2[] = "one\";";
2274         const char exp[] = "var str=\"anyone\";";
2275 
2276         NORMALIZE_2(dat1, dat2, exp1, exp2);
2277         NORM_COMBINED_2(dat1, dat2, exp);
2278     }
2279     SECTION("\\ CRLF")
2280     {
2281         const char dat1[] = "var str =\"any\\";
2282         const char dat2[] = "\r\none\";";
2283         const char exp1[] = "var str=\"any\\";
2284         const char exp2[] = "one\";";
2285         const char exp[] = "var str=\"anyone\";";
2286 
2287         NORMALIZE_2(dat1, dat2, exp1, exp2);
2288         NORM_COMBINED_2(dat1, dat2, exp);
2289     }
2290     SECTION("\\ \"")
2291     {
2292         const char dat1[] = "var str =\"any\\";
2293         const char dat2[] = "\"one\";";
2294         const char exp1[] = "var str=\"any\\";
2295         const char exp2[] = "\\\"one\";";
2296         const char exp[] = "var str=\"any\\\"one\";";
2297 
2298         NORMALIZE_2(dat1, dat2, exp1, exp2);
2299         NORM_COMBINED_2(dat1, dat2, exp);
2300     }
2301     SECTION("\\ \'")
2302     {
2303         const char dat1[] = "var str =\"any\\";
2304         const char dat2[] = "\'one\";";
2305         const char exp1[] = "var str=\"any\\";
2306         const char exp2[] = "\'one\";";
2307         const char exp[] = "var str=\"any\\\'one\";";
2308 
2309         NORMALIZE_2(dat1, dat2, exp1, exp2);
2310         NORM_COMBINED_2(dat1, dat2, exp);
2311     }
2312     SECTION("\\ u1234tx")
2313     {
2314         const char dat1[] = "var str =\"any\\";
2315         const char dat2[] = "u1234tx\";";
2316         const char exp1[] = "var str=\"any\\";
2317         const char exp2[] = "u1234tx\";";
2318         const char exp[] = "var str=\"any\\u1234tx\";";
2319 
2320         NORMALIZE_2(dat1, dat2, exp1, exp2);
2321         NORM_COMBINED_2(dat1, dat2, exp);
2322     }
2323     SECTION("\\u 1234tx")
2324     {
2325         const char dat1[] = "var str =\"any\\u";
2326         const char dat2[] = "1234tx\";";
2327         const char exp1[] = "var str=\"any\\u";
2328         const char exp2[] = "1234tx\";";
2329         const char exp[] = "var str=\"any\\u1234tx\";";
2330 
2331         NORMALIZE_2(dat1, dat2, exp1, exp2);
2332         NORM_COMBINED_2(dat1, dat2, exp);
2333     }
2334 }
2335 
2336 TEST_CASE("split in identifier", "[JSNormalizer]")
2337 {
2338     SECTION("abc def")
2339     {
2340         const char dat1[] = "var abc";
2341         const char dat2[] = "def = 5";
2342         const char exp1[] = "var abc";
2343         const char exp2[] = " abcdef=5";
2344         const char exp[] = "var abcdef=5";
2345 
2346         NORMALIZE_2(dat1, dat2, exp1, exp2);
2347         NORM_COMBINED_2(dat1, dat2, exp);
2348     }
2349     SECTION("long identifier")
2350     {
2351         const char dat1[] = "var res = something + long_id_starts_here";
2352         const char dat2[] = "_long_id_ends_here;";
2353         const char exp1[] = "var res=something+long_id_starts_here";
2354         const char exp2[] = "long_id_starts_here_long_id_ends_here;";
2355         const char exp[] = "var res=something+long_id_starts_here_long_id_ends_here;";
2356 
2357         NORMALIZE_2(dat1, dat2, exp1, exp2);
2358         NORM_COMBINED_2(dat1, dat2, exp);
2359     }
2360 }
2361 
2362 TEST_CASE("split in keyword", "[JSNormalizer]")
2363 {
2364     SECTION("finally")
2365     {
2366         const char dat1[] = "\nfin";
2367         const char dat2[] = "ally;";
2368         const char exp1[] = "fin";
2369         const char exp2[] = "finally;";
2370         const char exp[] = "finally;";
2371 
2372         NORMALIZE_2(dat1, dat2, exp1, exp2);
2373         NORM_COMBINED_2(dat1, dat2, exp);
2374     }
2375     SECTION("in")
2376     {
2377         const char dat1[] = "i";
2378         const char dat2[] = "n";
2379         const char exp1[] = "i";
2380         const char exp2[] = "in";
2381         const char exp[] = "in";
2382 
2383         NORMALIZE_2(dat1, dat2, exp1, exp2);
2384         NORM_COMBINED_2(dat1, dat2, exp);
2385     }
2386     SECTION("instanceof")
2387     {
2388         const char dat1[] = "in";
2389         const char dat2[] = "stance";
2390         const char dat3[] = "of";
2391         const char exp1[] = "in";
2392         const char exp2[] = "instance";
2393         const char exp3[] = "instanceof";
2394         const char exp[] = "instanceof";
2395 
2396         NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
2397         NORM_COMBINED_3(dat1, dat2, dat3, exp);
2398     }
2399 }
2400 
2401 TEST_CASE("split and continuation combined", "[JSNormalizer]")
2402 {
2403     SECTION("PDU 1 [cont] PDU 2 [end end cont end]")
2404     {
2405         const char src1[] = "a b"    "";
2406         const char src2[] = "c d"    "</script>";
2407         const char src3[] = ""       "</script>";
2408         const char src4[] = "\n"     "";
2409 
2410         const char exp1[] = "var_0000 var_0001";
2411         const char exp2[] = " var_0002 var_0003";
2412         const char exp3[] = " var_0002 var_0003";
2413         const char exp4[] = " var_0002 var_0003";
2414 
2415         char dst1[sizeof(exp1)];
2416         char dst2[sizeof(exp2)];
2417         char dst3[sizeof(exp3)];
2418         char dst4[sizeof(exp4)];
2419 
2420         JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
2421         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
2422 
2423         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);
2424         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));
2425 
2426         TRY(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1, JSTokenizer::SCRIPT_ENDED);
2427         CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));
2428 
2429         TRY(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1, JSTokenizer::SCRIPT_ENDED);
2430         CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1));
2431 
2432         DO(src4, sizeof(src4) - 1, dst4, sizeof(dst4) - 1);
2433         CHECK(!memcmp(exp4, dst4, sizeof(exp4) - 1));
2434 
2435         CLOSE();
2436     }
2437     SECTION("PDU 1 [cont] PDU 2 [cont] PDU 3 [end]")
2438     {
2439         const char src1[] = "<";
2440         const char src2[] = "!-";
2441         const char src3[] = "-comment\n";
2442 
2443         const char exp1[] = "<";
2444         const char exp2[] = "<!-";
2445         const char exp3[] = "";
2446 
2447         const char tmp_buf1[] = "<";
2448         const char tmp_buf2[] = "<!-";
2449         const char tmp_buf3[] = "<!--comment\n";
2450 
2451         char dst1[sizeof(exp1)];
2452         char dst2[sizeof(exp2)];
2453         char dst3[sizeof(exp3)];
2454 
2455         JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
2456         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
2457 
2458         TRY(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1, JSTokenizer::SCRIPT_CONTINUE);
2459         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));
2460         REQUIRE(norm.get_tmp_buf_size() == sizeof(tmp_buf1) - 1);
2461         CHECK(!memcmp(norm.get_tmp_buf(), tmp_buf1, sizeof(tmp_buf1) - 1));
2462 
2463         TRY(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1, JSTokenizer::SCRIPT_CONTINUE);
2464         CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));
2465         REQUIRE(norm.get_tmp_buf_size() == sizeof(tmp_buf2) - 1);
2466         CHECK(!memcmp(norm.get_tmp_buf(), tmp_buf2, sizeof(tmp_buf2) - 1));
2467 
2468         TRY(src3, sizeof(src3) - 1, dst3, sizeof(dst3) - 1, JSTokenizer::SCRIPT_CONTINUE);
2469         CHECK(!memcmp(exp3, dst3, sizeof(exp3) - 1));
2470         REQUIRE(norm.get_tmp_buf_size() == sizeof(tmp_buf3) - 1);
2471         CHECK(!memcmp(norm.get_tmp_buf(), tmp_buf3, sizeof(tmp_buf3) - 1));
2472 
2473         CLOSE();
2474     }
2475 }
2476 
2477 TEST_CASE("memcap", "[JSNormalizer]")
2478 {
2479     SECTION("3 tokens")
2480     {
2481         const char dat1[] = "var abc=in";
2482         const char dat2[] = "put;";
2483         const char exp1[] = "var abc=in";
2484         const char exp2[] = "input;";
2485         const char exp[] = "var abc=input;";
2486 
2487         NORM_LIMITED(6, dat1, dat2, exp1, exp2);
2488         NORM_COMBINED_LIMITED_2(6, dat1, dat2, exp);
2489     }
2490     SECTION("2 tokens and a half")
2491     {
2492         const char dat1[] = "var abc=in";
2493         const char dat2[] = "put;";
2494         const char exp1[] = "var abc=in";
2495         const char exp2[] = "input;";
2496         const char exp[] = "var abc=input;";
2497 
2498         NORM_LIMITED(4, dat1, dat2, exp1, exp2);
2499         NORM_COMBINED_LIMITED_2(4, dat1, dat2, exp);
2500     }
2501     SECTION("1 token")
2502     {
2503         const char dat1[] = "var abc=in";
2504         const char dat2[] = "put;";
2505         const char exp1[] = "var abc=in";
2506         const char exp2[] = "input;";
2507         const char exp[] = "var abc=input;";
2508 
2509         NORM_LIMITED(2, dat1, dat2, exp1, exp2);
2510         NORM_COMBINED_LIMITED_2(2, dat1, dat2, exp);
2511     }
2512     SECTION("a half")
2513     {
2514         const char dat1[] = "var abc=extract";
2515         const char dat2[] = "// just a comment\n";
2516         const char exp1[] = "var abc=extract";
2517         const char exp2[] = "";
2518         const char exp[] = "var abc=extract";
2519 
2520         NORM_LIMITED(5, dat1, dat2, exp1, exp2);
2521         NORM_COMBINED_LIMITED_2(5, dat1, dat2, exp);
2522     }
2523 }
2524 
2525 TEST_CASE("scope tracking", "[JSNormalizer]")
2526 {
2527     SECTION("parentheses")
2528     {
2529         const char dat1[] = "()";
2530         const char dat2[] = "()()()";
2531         const char dat3[] = "((()))";
2532         const char exp1[] = "()";
2533         const char exp2[] = "()()()";
2534         const char exp3[] = "((()))";
2535 
2536         NORMALIZE_1(dat1, exp1);
2537         NORMALIZE_1(dat2, exp2);
2538         NORMALIZE_1(dat3, exp3);
2539     }
2540     SECTION("curly braces")
2541     {
2542         const char dat1[] = "{}";
2543         const char dat2[] = "{}{}{}";
2544         const char dat3[] = "{{{}}}";
2545         const char exp1[] = "{}";
2546         const char exp2[] = "{}{}{}";
2547         const char exp3[] = "{{{}}}";
2548 
2549         NORMALIZE_1(dat1, exp1);
2550         NORMALIZE_1(dat2, exp2);
2551         NORMALIZE_1(dat3, exp3);
2552     }
2553     SECTION("square brackets")
2554     {
2555         const char dat1[] = "[]";
2556         const char dat2[] = "[][][]";
2557         const char dat3[] = "[[[]]]";
2558         const char exp1[] = "[]";
2559         const char exp2[] = "[][][]";
2560         const char exp3[] = "[[[]]]";
2561 
2562         NORMALIZE_1(dat1, exp1);
2563         NORMALIZE_1(dat2, exp2);
2564         NORMALIZE_1(dat3, exp3);
2565     }
2566     SECTION("mix of brackets")
2567     {
2568         const char dat1[] = "(){}[]";
2569         const char dat2[] = "({})[]";
2570         const char dat3[] = "(){[]}";
2571         const char exp1[] = "(){}[]";
2572         const char exp2[] = "({})[]";
2573         const char exp3[] = "(){[]}";
2574 
2575         NORMALIZE_1(dat1, exp1);
2576         NORMALIZE_1(dat2, exp2);
2577         NORMALIZE_1(dat3, exp3);
2578     }
2579     SECTION("parentheses - wrong closing symbol")
2580     {
2581         const char dat1[] = "({[ (} ]})";
2582         const char dat2[] = "({[ (] ]})";
2583         const char exp1[] = "({[(";
2584         const char exp2[] = "({[(";
2585 
2586         NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2587         NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2588     }
2589     SECTION("curly braces - wrong closing symbol")
2590     {
2591         const char dat1[] = "({[ {) ]})";
2592         const char dat2[] = "({[ {] ]})";
2593         const char exp1[] = "({[{";
2594         const char exp2[] = "({[{";
2595 
2596         NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2597         NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2598     }
2599     SECTION("square brackets - wrong closing symbol")
2600     {
2601         const char dat1[] = "([{ [) }])";
2602         const char dat2[] = "([{ [} }])";
2603         const char exp1[] = "([{[";
2604         const char exp2[] = "([{[";
2605 
2606         NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2607         NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2608     }
2609     SECTION("parentheses - mismatch")
2610     {
2611         const char dat1[] = ")";
2612         const char dat2[] = "())";
2613         const char dat3[] = "({[ ()) ]})";
2614         const char dat4[] = "(</script>";
2615         const char dat5[] = "(()</script>";
2616         const char exp1[] = "";
2617         const char exp2[] = "()";
2618         const char exp3[] = "({[()";
2619         const char exp4[] = "(";
2620         const char exp5[] = "(()";
2621 
2622         NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2623         NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2624         NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
2625         NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2626         NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
2627     }
2628     SECTION("curly braces - mismatch")
2629     {
2630         const char dat1[] = "}";
2631         const char dat2[] = "{}}";
2632         const char dat3[] = "({[ {}} ]})";
2633         const char dat4[] = "{</script>";
2634         const char dat5[] = "{{}</script>";
2635         const char exp1[] = "";
2636         const char exp2[] = "{}";
2637         const char exp3[] = "({[{}";
2638         const char exp4[] = "{";
2639         const char exp5[] = "{{}";
2640 
2641         NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2642         NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2643         NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
2644         NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2645         NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
2646     }
2647     SECTION("square brackets - mismatch")
2648     {
2649         const char dat1[] = "]";
2650         const char dat2[] = "[]]";
2651         const char dat3[] = "([{ []] }])";
2652         const char dat4[] = "[</script>";
2653         const char dat5[] = "[[]</script>";
2654         const char exp1[] = "";
2655         const char exp2[] = "[]";
2656         const char exp3[] = "([{[]";
2657         const char exp4[] = "[";
2658         const char exp5[] = "[[]";
2659 
2660         NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
2661         NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2662         NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
2663         NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2664         NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
2665     }
2666     SECTION("parentheses - continuation")
2667     {
2668         const char dat1[] = "((";
2669         const char dat2[] = "))";
2670         const char exp1[] = "((";
2671         const char exp2[] = "))";
2672         const char exp[] = "(())";
2673 
2674         NORMALIZE_2(dat1, dat2, exp1, exp2);
2675         NORM_COMBINED_2(dat1, dat2, exp);
2676     }
2677     SECTION("curly braces - continuation")
2678     {
2679         const char dat1[] = "{{";
2680         const char dat2[] = "}}";
2681         const char exp1[] = "{{";
2682         const char exp2[] = "}}";
2683         const char exp[] = "{{}}";
2684 
2685         NORMALIZE_2(dat1, dat2, exp1, exp2);
2686         NORM_COMBINED_2(dat1, dat2, exp);
2687     }
2688     SECTION("square brackets - continuation")
2689     {
2690         const char dat1[] = "[[";
2691         const char dat2[] = "]]";
2692         const char exp1[] = "[[";
2693         const char exp2[] = "]]";
2694         const char exp[] = "[[]]";
2695 
2696         NORMALIZE_2(dat1, dat2, exp1, exp2);
2697         NORM_COMBINED_2(dat1, dat2, exp);
2698     }
2699     SECTION("parentheses - mismatch in continuation")
2700     {
2701         const char dat1[] = "(";
2702         const char dat2[] = "))";
2703         const char dat3[] = "(";
2704         const char dat4[] = " </script>";
2705         const char exp1[] = "(";
2706         const char exp2[] = ")";
2707         const char exp3[] = "(";
2708         const char exp4[] = "";
2709 
2710         const char exp5[] = "()";
2711         const char exp6[] = "(";
2712 
2713         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2714         NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2715 
2716         NORM_COMBINED_BAD_2(dat1, dat2, exp5, JSTokenizer::WRONG_CLOSING_SYMBOL);
2717         NORM_COMBINED_BAD_2(dat3, dat4, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
2718     }
2719     SECTION("curly braces - mismatch in continuation")
2720     {
2721         const char dat1[] = "{";
2722         const char dat2[] = "}}";
2723         const char dat3[] = "{";
2724         const char dat4[] = " </script>";
2725         const char exp1[] = "{";
2726         const char exp2[] = "}";
2727         const char exp3[] = "{";
2728         const char exp4[] = "";
2729 
2730         const char exp5[] = "{}";
2731         const char exp6[] = "{";
2732 
2733         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2734         NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2735 
2736         NORM_COMBINED_BAD_2(dat1, dat2, exp5, JSTokenizer::WRONG_CLOSING_SYMBOL);
2737         NORM_COMBINED_BAD_2(dat3, dat4, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
2738     }
2739     SECTION("square brackets - mismatch in continuation")
2740     {
2741         const char dat1[] = "[";
2742         const char dat2[] = "]]";
2743         const char dat3[] = "[";
2744         const char dat4[] = " </script>";
2745         const char exp1[] = "[";
2746         const char exp2[] = "]";
2747         const char exp3[] = "[";
2748         const char exp4[] = "";
2749 
2750         const char exp5[] = "[]";
2751         const char exp6[] = "[";
2752 
2753         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2754         NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
2755 
2756         NORM_COMBINED_BAD_2(dat1, dat2, exp5, JSTokenizer::WRONG_CLOSING_SYMBOL);
2757         NORM_COMBINED_BAD_2(dat3, dat4, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
2758     }
2759 }
2760 
2761 TEST_CASE("scope misc", "[JSNormalizer]")
2762 {
2763     const int stack_limit = 256;
2764     const char* open = "1+(";
2765     const char* close = "-1)";
2766 
2767     SECTION("max stack")
2768     {
2769         std::string scr;
2770 
2771         for (int i = 0; i < stack_limit - 1; ++i)
2772             scr += open;
2773         for (int i = 0; i < stack_limit - 1; ++i)
2774             scr += close;
2775 
2776         const char* dat = scr.c_str();
2777         int dat_len = strlen(dat);
2778         const char* exp = scr.c_str();
2779         int exp_len = strlen(exp);
2780         char* act = new char[exp_len];
2781 
2782         JSIdentifierCtxStub ident_ctx;
2783         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
2784 
2785         DO(dat, dat_len, act, exp_len);
2786         CHECK(!memcmp(exp, act, exp_len));
2787 
2788         delete[] act;
2789 
2790         CLOSE();
2791     }
2792 
2793     SECTION("max stack")
2794     {
2795         std::string scr;
2796         std::string nsc;
2797 
2798         for (int i = 0; i < stack_limit; ++i)
2799             scr += open;
2800         for (int i = 0; i < stack_limit; ++i)
2801             scr += close;
2802         for (int i = 0; i < stack_limit - 1; ++i)
2803             nsc += open;
2804         nsc += "1+";
2805 
2806         const char* dat = scr.c_str();
2807         int dat_len = strlen(dat);
2808         const char* exp = nsc.c_str();
2809         int exp_len = strlen(exp);
2810         char* act = new char[exp_len];
2811 
2812         JSIdentifierCtxStub ident_ctx;
2813         JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
2814 
2815         TRY(dat, dat_len, act, exp_len, JSTokenizer::BRACKET_NESTING_OVERFLOW);
2816         CHECK(!memcmp(exp, act, exp_len));
2817 
2818         delete[] act;
2819     }
2820 }
2821 
2822 TEST_CASE("scope tail handling", "[JSNormalizer]")
2823 {
2824     // Padding ':' symbol has been chosen, since it:
2825     //  * forms a single state for Parser
2826     //  * doesn't insert white spaces
2827     //  * forms a single match, i.e. there are no '::' ':::' patterns
2828     //
2829     // Thus, the tail of "::({[]})" will have JSTOKENIZER_MAX_STATES
2830     // and the same number of characters in it.
2831 
2832 #if JSTOKENIZER_MAX_STATES != 8
2833 #error "scope tail handling" tests are designed for the tail of 8 bytes size
2834 #endif
2835 
2836     SECTION("no scope-symbols in the tail")
2837     {
2838         const char dat1[] = "((((::::::::";
2839         const char dat2[] = "):):):):";
2840         const char dat3[] = "{}{{::::::::";
2841         const char dat4[] = "::{}}}::";
2842         const char dat5[] = "[][[::::::::";
2843         const char dat6[] = "::::]][]";
2844         const char exp1[] = "((((::::::::";
2845         const char exp2[] = "):):):):";
2846         const char exp3[] = "{}{{::::::::";
2847         const char exp4[] = "::{}}}::";
2848         const char exp5[] = "[][[::::::::";
2849         const char exp6[] = "::::]][]";
2850 
2851         const char exp7[] = "((((::::::::):):):):";
2852         const char exp8[] = "{}{{::::::::::{}}}::";
2853         const char exp9[] = "[][[::::::::::::]][]";
2854 
2855         NORMALIZE_2(dat1, dat2, exp1, exp2);
2856         NORMALIZE_2(dat3, dat4, exp3, exp4);
2857         NORMALIZE_2(dat5, dat6, exp5, exp6);
2858 
2859         NORM_COMBINED_2(dat1, dat2, exp7);
2860         NORM_COMBINED_2(dat3, dat4, exp8);
2861         NORM_COMBINED_2(dat5, dat6, exp9);
2862     }
2863 
2864     SECTION("opening scope-symbols in the tail")
2865     {
2866         const char dat1[] = "::::(:::::::";
2867         const char dat2[] = "):::::::";
2868         const char dat3[] = ":::::::::::{";
2869         const char dat4[] = ":::::::}";
2870         const char dat5[] = "::::[:::::::";
2871         const char dat6[] = "::::]:::";
2872         const char exp1[] = "::::(:::::::";
2873         const char exp2[] = "):::::::";
2874         const char exp3[] = ":::::::::::{";
2875         const char exp4[] = ":::::::}";
2876         const char exp5[] = "::::[:::::::";
2877         const char exp6[] = "::::]:::";
2878 
2879         const char exp7[] = "::::(:::::::):::::::";
2880         const char exp8[] = ":::::::::::{:::::::}";
2881         const char exp9[] = "::::[:::::::::::]:::";
2882 
2883         NORMALIZE_2(dat1, dat2, exp1, exp2);
2884         NORMALIZE_2(dat3, dat4, exp3, exp4);
2885         NORMALIZE_2(dat5, dat6, exp5, exp6);
2886 
2887         NORM_COMBINED_2(dat1, dat2, exp7);
2888         NORM_COMBINED_2(dat3, dat4, exp8);
2889         NORM_COMBINED_2(dat5, dat6, exp9);
2890     }
2891 
2892     SECTION("closing scope-symbols in the tail")
2893     {
2894         const char dat1[] = "(((()::::::)";
2895         const char dat2[] = "()::::))";
2896         const char dat3[] = "{{{{:::::::}";
2897         const char dat4[] = ":::::}}}";
2898         const char dat5[] = "[::::::::]::";
2899         const char dat6[] = "::::::::";
2900         const char exp1[] = "(((()::::::)";
2901         const char exp2[] = "()::::))";
2902         const char exp3[] = "{{{{:::::::}";
2903         const char exp4[] = ":::::}}}";
2904         const char exp5[] = "[::::::::]::";
2905         const char exp6[] = "::::::::";
2906 
2907         const char exp7[] = "(((()::::::)()::::))";
2908         const char exp8[] = "{{{{:::::::}:::::}}}";
2909         const char exp9[] = "[::::::::]::::::::::";
2910 
2911         NORMALIZE_2(dat1, dat2, exp1, exp2);
2912         NORMALIZE_2(dat3, dat4, exp3, exp4);
2913         NORMALIZE_2(dat5, dat6, exp5, exp6);
2914 
2915         NORM_COMBINED_2(dat1, dat2, exp7);
2916         NORM_COMBINED_2(dat3, dat4, exp8);
2917         NORM_COMBINED_2(dat5, dat6, exp9);
2918     }
2919 
2920     SECTION("newly opening scope-symbols in the tail")
2921     {
2922         const char dat1[] = "(:::(::::::(";
2923         const char dat2[] = "))):::::";
2924         const char dat3[] = "{:{:{:{:{:{:";
2925         const char dat4[] = "::}}}}}}";
2926         const char dat5[] = "[:[:[:::[:::";
2927         const char dat6[] = "::::]]]]";
2928         const char exp1[] = "(:::(::::::(";
2929         const char exp2[] = "))):::::";
2930         const char exp3[] = "{:{:{:{:{:{:";
2931         const char exp4[] = "::}}}}}}";
2932         const char exp5[] = "[:[:[:::[:::";
2933         const char exp6[] = "::::]]]]";
2934 
2935         const char exp7[] = "(:::(::::::())):::::";
2936         const char exp8[] = "{:{:{:{:{:{:::}}}}}}";
2937         const char exp9[] = "[:[:[:::[:::::::]]]]";
2938 
2939         NORMALIZE_2(dat1, dat2, exp1, exp2);
2940         NORMALIZE_2(dat3, dat4, exp3, exp4);
2941         NORMALIZE_2(dat5, dat6, exp5, exp6);
2942 
2943         NORM_COMBINED_2(dat1, dat2, exp7);
2944         NORM_COMBINED_2(dat3, dat4, exp8);
2945         NORM_COMBINED_2(dat5, dat6, exp9);
2946     }
2947 
2948     SECTION("fully closing scope-symbols in the tail")
2949     {
2950         const char dat1[] = "((((::::))))";
2951         const char dat2[] = "::::::::";
2952         const char dat3[] = "{{{{}:}:}:}:";
2953         const char dat4[] = "::::{}{}";
2954         const char dat5[] = "[[:::::::]:]";
2955         const char dat6[] = "[::::::]";
2956         const char exp1[] = "((((::::))))";
2957         const char exp2[] = "::::::::";
2958         const char exp3[] = "{{{{}:}:}:}:";
2959         const char exp4[] = "::::{}{}";
2960         const char exp5[] = "[[:::::::]:]";
2961         const char exp6[] = "[::::::]";
2962 
2963         const char exp7[] = "((((::::))))::::::::";
2964         const char exp8[] = "{{{{}:}:}:}:::::{}{}";
2965         const char exp9[] = "[[:::::::]:][::::::]";
2966 
2967         NORMALIZE_2(dat1, dat2, exp1, exp2);
2968         NORMALIZE_2(dat3, dat4, exp3, exp4);
2969         NORMALIZE_2(dat5, dat6, exp5, exp6);
2970 
2971         NORM_COMBINED_2(dat1, dat2, exp7);
2972         NORM_COMBINED_2(dat3, dat4, exp8);
2973         NORM_COMBINED_2(dat5, dat6, exp9);
2974     }
2975 
2976     SECTION("extra scope-symbols in the tail")
2977     {
2978         const char dat1[] = "((((((((";
2979         const char dat2[] = ")))))))))";
2980         const char dat3[] = "{{{{{{{{";
2981         const char dat4[] = "}}}}}}]}";
2982         const char dat5[] = "[[[[[[[[";
2983         const char dat6[] = "]]]]]]]</script>";
2984         const char exp1[] = "((((((((";
2985         const char exp2[] = "))))))))";
2986         const char exp3[] = "{{{{{{{{";
2987         const char exp4[] = "}}}}}}";
2988         const char exp5[] = "[[[[[[[[";
2989         const char exp6[] = "]]]]]]]";
2990 
2991         const char exp7[] = "(((((((())))))))";
2992         const char exp8[] = "{{{{{{{{}}}}}}";
2993         const char exp9[] = "[[[[[[[[]]]]]]]";
2994 
2995         NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
2996         NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::WRONG_CLOSING_SYMBOL);
2997         NORM_BAD_2(dat5, dat6, exp5, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
2998 
2999         NORM_COMBINED_BAD_2(dat1, dat2, exp7, JSTokenizer::WRONG_CLOSING_SYMBOL);
3000         NORM_COMBINED_BAD_2(dat3, dat4, exp8, JSTokenizer::WRONG_CLOSING_SYMBOL);
3001         NORM_COMBINED_BAD_2(dat5, dat6, exp9, JSTokenizer::ENDED_IN_INNER_SCOPE);
3002     }
3003 
3004     SECTION("overwriting scope-symbols in the tail")
3005     {
3006         const char dat1[] = "(((((((())))";
3007         const char dat2[] = ":))))";
3008         const char dat3[] = "({[(:):]{}{}";
3009         const char dat4[] = "}[]())";
3010         const char dat5[] = "{{{{}[[]](((";
3011         const char dat6[] = ")))}}}";
3012         const char exp1[] = "(((((((())))";
3013         const char exp2[] = ":))))";
3014         const char exp3[] = "({[(:):]{}{}";
3015         const char exp4[] = "}[]())";
3016         const char exp5[] = "{{{{}[[]](((";
3017         const char exp6[] = ")))}}}";
3018 
3019         const char exp7[] = "(((((((()))):))))";
3020         const char exp8[] = "({[(:):]{}{}}[]())";
3021         const char exp9[] = "{{{{}[[]]((()))}}}";
3022 
3023         NORMALIZE_2(dat1, dat2, exp1, exp2);
3024         NORMALIZE_2(dat3, dat4, exp3, exp4);
3025         NORMALIZE_2(dat5, dat6, exp5, exp6);
3026 
3027         NORM_COMBINED_2(dat1, dat2, exp7);
3028         NORM_COMBINED_2(dat3, dat4, exp8);
3029         NORM_COMBINED_2(dat5, dat6, exp9);
3030     }
3031 }
3032 
3033 TEST_CASE("ignored identifiers", "[JSNormalizer]")
3034 {
3035     // 'console' 'eval' 'document' are in the ignore list
3036 
3037     SECTION("a standalone identifier")
3038     {
3039         const char dat1[] = "alpha bravo console delta eval";
3040         const char dat2[] = "var a = 0;     console = 1;";
3041         const char dat3[] = "var a = 0; var console = 1;";
3042         const char dat4[] = "foo(0); console(1); bar(2); console1(3); baz(4);";
3043         const char dat5[] = "foo(0);    eval(1); bar(2); evaluate(3); baz(4);";
3044         const char exp1[] = "var_0000 var_0001 console var_0002 eval";
3045         const char exp2[] = "var var_0000=0;console=1;";
3046         const char exp3[] = "var var_0000=0;var console=1;";
3047         const char exp4[] = "var_0000(0);console(1);var_0001(2);var_0002(3);var_0003(4);";
3048         const char exp5[] = "var_0000(0);eval(1);var_0001(2);var_0002(3);var_0003(4);";
3049 
3050         NORMALIZE_S(dat1, exp1);
3051         NORMALIZE_S(dat2, exp2);
3052         NORMALIZE_S(dat3, exp3);
3053         NORMALIZE_S(dat4, exp4);
3054         NORMALIZE_S(dat5, exp5);
3055     }
3056 
3057     SECTION("inner objects")
3058     {
3059         const char dat1[] = "alpha.bravo.charlie.delta";
3060         const char dat2[] = "alpha.bravo.console.delta";
3061         const char dat3[] = "eval.alpha.bravo.charlie.delta";
3062         const char dat4[] = "eval.alpha.bravo.console.delta";
3063         const char exp1[] = "var_0000.var_0001.var_0002.var_0003";
3064         const char exp2[] = "var_0000.var_0001.var_0002.var_0003";
3065         const char exp3[] = "eval.alpha.bravo.charlie.delta";
3066         const char exp4[] = "eval.alpha.bravo.console.delta";
3067 
3068         NORMALIZE_S(dat1, exp1);
3069         NORMALIZE_S(dat2, exp2);
3070         NORMALIZE_S(dat3, exp3);
3071         NORMALIZE_S(dat4, exp4);
3072     }
3073 
3074     SECTION("function calls")
3075     {
3076         const char dat1[] = "foo.bar.baz()";
3077         const char dat2[] = "foo.bar().baz";
3078         const char dat3[] = "foo().bar.baz";
3079         const char dat4[] = "eval.bar.baz()";
3080         const char dat5[] = "eval.bar().baz";
3081         const char dat6[] = "eval().bar.baz";
3082         const char dat7[] = "foo.eval.baz()";
3083         const char dat8[] = "foo.eval().baz";
3084         const char dat9[] = "foo().eval.baz";
3085         const char dat10[] = "foo.bar.eval()";
3086         const char dat11[] = "foo.bar().eval";
3087         const char dat12[] = "var_0000().bar.eval";
3088         const char exp1[] = "var_0000.var_0001.var_0002()";
3089         const char exp2[] = "var_0000.var_0001().var_0002";
3090         const char exp3[] = "var_0000().var_0001.var_0002";
3091         const char exp4[] = "eval.bar.baz()";
3092         const char exp5[] = "eval.bar().baz";
3093         const char exp6[] = "eval().bar.baz";
3094         const char exp7[] = "var_0000.var_0001.var_0002()";
3095         const char exp8[] = "var_0000.var_0001().var_0002";
3096         const char exp9[] = "var_0000().var_0001.var_0002";
3097         const char exp10[] = "var_0000.var_0001.var_0002()";
3098         const char exp11[] = "var_0000.var_0001().var_0002";
3099         const char exp12[] = "var_0000().var_0001.var_0002";
3100 
3101         NORMALIZE_S(dat1, exp1);
3102         NORMALIZE_S(dat2, exp2);
3103         NORMALIZE_S(dat3, exp3);
3104         NORMALIZE_S(dat4, exp4);
3105         NORMALIZE_S(dat5, exp5);
3106         NORMALIZE_S(dat6, exp6);
3107         NORMALIZE_S(dat7, exp7);
3108         NORMALIZE_S(dat8, exp8);
3109         NORMALIZE_S(dat9, exp9);
3110         NORMALIZE_S(dat10, exp10);
3111         NORMALIZE_S(dat11, exp11);
3112         NORMALIZE_S(dat12, exp12);
3113     }
3114 }
3115 
3116 TEST_CASE("ignored identifier chain tracking", "[JSNormalizer]")
3117 {
3118     // 'console' 'eval' 'document' are in the ignore list
3119 
3120     SECTION("chain terminators")
3121     {
3122         const char dat1[] = "eval.foo.bar.baz";
3123         const char dat2[] = "eval.foo bar.baz";
3124         const char dat3[] = "eval.foo;bar.baz";
3125         const char dat4[] = "eval.foo,bar.baz";
3126         const char dat5[] = "eval.foo*bar.baz";
3127         const char dat6[] = "eval.foo*=bar.baz";
3128         const char dat7[] = "eval.foo/bar.baz";
3129         const char dat8[] = "eval.foo/=bar.baz";
3130         const char dat9[] = "eval.foo%bar.baz";
3131         const char dat10[] = "eval.foo%=bar.baz";
3132         const char dat11[] = "eval.foo+bar.baz";
3133         const char dat12[] = "eval.foo+=bar.baz";
3134         const char dat13[] = "eval.foo-bar.baz";
3135         const char dat14[] = "eval.foo-=bar.baz";
3136         const char dat15[] = "eval.foo<<bar.baz";
3137         const char dat16[] = "eval.foo<<=bar.baz";
3138         const char dat17[] = "eval.foo>>bar.baz";
3139         const char dat18[] = "eval.foo>>=bar.baz";
3140         const char dat19[] = "eval.foo>>>bar.baz";
3141         const char dat20[] = "eval.foo>>>=bar.baz";
3142         const char dat21[] = "eval.foo<bar.baz";
3143         const char dat22[] = "eval.foo<=bar.baz";
3144         const char dat23[] = "eval.foo>bar.baz";
3145         const char dat24[] = "eval.foo>=bar.baz";
3146         const char dat25[] = "eval.foo instanceof bar.baz";
3147         const char dat26[] = "eval.foo==bar.baz";
3148         const char dat27[] = "eval.foo!=bar.baz";
3149         const char dat28[] = "eval.foo===bar.baz";
3150         const char dat29[] = "eval.foo!==bar.baz";
3151         const char dat30[] = "eval.foo&bar.baz";
3152         const char dat31[] = "eval.foo&=bar.baz";
3153         const char dat32[] = "eval.foo&&bar.baz";
3154         const char dat33[] = "eval.foo|bar.baz";
3155         const char dat34[] = "eval.foo|=bar.baz";
3156         const char dat35[] = "eval.foo||bar.baz";
3157         const char dat36[] = "eval.foo^bar.baz";
3158         const char dat37[] = "eval.foo^=bar.baz";
3159         const char dat38[] = "eval.foo?bar.baz";
3160         const char dat39[] = "eval.foo(bar.baz)";
3161         const char dat40[] = "eval.var.foo";
3162         const char dat41[] = "eval. break() . foo";
3163 
3164         const char exp1[] = "eval.foo.bar.baz";
3165         const char exp2[] = "eval.foo var_0000.var_0001";
3166         const char exp3[] = "eval.foo;var_0000.var_0001";
3167         const char exp4[] = "eval.foo,var_0000.var_0001";
3168         const char exp5[] = "eval.foo*var_0000.var_0001";
3169         const char exp6[] = "eval.foo*=var_0000.var_0001";
3170         const char exp7[] = "eval.foo/var_0000.var_0001";
3171         const char exp8[] = "eval.foo/=var_0000.var_0001";
3172         const char exp9[] = "eval.foo%var_0000.var_0001";
3173         const char exp10[] = "eval.foo%=var_0000.var_0001";
3174         const char exp11[] = "eval.foo+var_0000.var_0001";
3175         const char exp12[] = "eval.foo+=var_0000.var_0001";
3176         const char exp13[] = "eval.foo-var_0000.var_0001";
3177         const char exp14[] = "eval.foo-=var_0000.var_0001";
3178         const char exp15[] = "eval.foo<<var_0000.var_0001";
3179         const char exp16[] = "eval.foo<<=var_0000.var_0001";
3180         const char exp17[] = "eval.foo>>var_0000.var_0001";
3181         const char exp18[] = "eval.foo>>=var_0000.var_0001";
3182         const char exp19[] = "eval.foo>>>var_0000.var_0001";
3183         const char exp20[] = "eval.foo>>>=var_0000.var_0001";
3184         const char exp21[] = "eval.foo<var_0000.var_0001";
3185         const char exp22[] = "eval.foo<=var_0000.var_0001";
3186         const char exp23[] = "eval.foo>var_0000.var_0001";
3187         const char exp24[] = "eval.foo>=var_0000.var_0001";
3188         const char exp25[] = "eval.foo instanceof var_0000.var_0001";
3189         const char exp26[] = "eval.foo==var_0000.var_0001";
3190         const char exp27[] = "eval.foo!=var_0000.var_0001";
3191         const char exp28[] = "eval.foo===var_0000.var_0001";
3192         const char exp29[] = "eval.foo!==var_0000.var_0001";
3193         const char exp30[] = "eval.foo&var_0000.var_0001";
3194         const char exp31[] = "eval.foo&=var_0000.var_0001";
3195         const char exp32[] = "eval.foo&&var_0000.var_0001";
3196         const char exp33[] = "eval.foo|var_0000.var_0001";
3197         const char exp34[] = "eval.foo|=var_0000.var_0001";
3198         const char exp35[] = "eval.foo||var_0000.var_0001";
3199         const char exp36[] = "eval.foo^var_0000.var_0001";
3200         const char exp37[] = "eval.foo^=var_0000.var_0001";
3201         const char exp38[] = "eval.foo?var_0000.var_0001";
3202         const char exp39[] = "eval.foo(var_0000.var_0001)";
3203         const char exp40[] = "eval.var.foo";
3204         const char exp41[] = "eval.break().foo";
3205 
3206         NORMALIZE_S(dat1, exp1);
3207         NORMALIZE_S(dat2, exp2);
3208         NORMALIZE_S(dat3, exp3);
3209         NORMALIZE_S(dat4, exp4);
3210         NORMALIZE_S(dat5, exp5);
3211         NORMALIZE_S(dat6, exp6);
3212         NORMALIZE_S(dat7, exp7);
3213         NORMALIZE_S(dat8, exp8);
3214         NORMALIZE_S(dat9, exp9);
3215         NORMALIZE_S(dat10, exp10);
3216         NORMALIZE_S(dat11, exp11);
3217         NORMALIZE_S(dat12, exp12);
3218         NORMALIZE_S(dat13, exp13);
3219         NORMALIZE_S(dat14, exp14);
3220         NORMALIZE_S(dat15, exp15);
3221         NORMALIZE_S(dat16, exp16);
3222         NORMALIZE_S(dat17, exp17);
3223         NORMALIZE_S(dat18, exp18);
3224         NORMALIZE_S(dat19, exp19);
3225         NORMALIZE_S(dat20, exp20);
3226         NORMALIZE_S(dat21, exp21);
3227         NORMALIZE_S(dat22, exp22);
3228         NORMALIZE_S(dat23, exp23);
3229         NORMALIZE_S(dat24, exp24);
3230         NORMALIZE_S(dat25, exp25);
3231         NORMALIZE_S(dat26, exp26);
3232         NORMALIZE_S(dat27, exp27);
3233         NORMALIZE_S(dat28, exp28);
3234         NORMALIZE_S(dat29, exp29);
3235         NORMALIZE_S(dat30, exp30);
3236         NORMALIZE_S(dat31, exp31);
3237         NORMALIZE_S(dat32, exp32);
3238         NORMALIZE_S(dat33, exp33);
3239         NORMALIZE_S(dat34, exp34);
3240         NORMALIZE_S(dat35, exp35);
3241         NORMALIZE_S(dat36, exp36);
3242         NORMALIZE_S(dat37, exp37);
3243         NORMALIZE_S(dat38, exp38);
3244         NORMALIZE_S(dat39, exp39);
3245         NORMALIZE_S(dat40, exp40);
3246         NORMALIZE_S(dat41, exp41);
3247     }
3248 
3249     SECTION("over inner scopes")
3250     {
3251         const char dat1[] = "eval.foo.bar.baz";
3252         const char dat2[] = "eval().foo.bar.baz";
3253         const char dat3[] = "eval.foo().bar.baz";
3254         const char dat4[] = "eval(foo.bar).baz";
3255         const char dat5[] = "eval.foo().bar[].baz";
3256         const char dat6[] = "eval.foo{bar[]}.baz";
3257         const char dat7[] = "eval(foo+bar).baz";
3258         const char dat8[] = "eval(foo bar).baz";
3259         const char exp1[] = "eval.foo.bar.baz";
3260         const char exp2[] = "eval().foo.bar.baz";
3261         const char exp3[] = "eval.foo().bar.baz";
3262         const char exp4[] = "eval(var_0000.var_0001).baz";
3263         const char exp5[] = "eval.foo().bar[].baz";
3264         const char exp6[] = "eval.foo{var_0000[]}.var_0001";
3265         const char exp7[] = "eval(var_0000+var_0001).baz";
3266         const char exp8[] = "eval(var_0000 var_0001).baz";
3267 
3268         NORMALIZE_S(dat1, exp1);
3269         NORMALIZE_S(dat2, exp2);
3270         NORMALIZE_S(dat3, exp3);
3271         NORMALIZE_S(dat4, exp4);
3272         NORMALIZE_S(dat5, exp5);
3273         NORMALIZE_S(dat6, exp6);
3274         NORMALIZE_S(dat7, exp7);
3275         NORMALIZE_S(dat8, exp8);
3276     }
3277 
3278     SECTION("spaces and operators")
3279     {
3280         const char dat1[] = "foo.bar.baz console.log";
3281         const char dat2[] = "console.log foo.bar.baz";
3282         const char dat3[] = "foo . bar . baz console . log";
3283         const char dat4[] = "console . log foo . bar . baz";
3284         const char dat5[] = "console . foo . bar . baz";
3285         const char dat6[] = "console = foo . bar . baz";
3286         const char dat7[] = "console . foo + bar . baz";
3287         const char dat8[] = "console . foo . bar : baz";
3288         const char dat9[] = "console.\nfoo";
3289         const char exp1[] = "var_0000.var_0001.var_0002 console.log";
3290         const char exp2[] = "console.log var_0000.var_0001.var_0002";
3291         const char exp3[] = "var_0000.var_0001.var_0002 console.log";
3292         const char exp4[] = "console.log var_0000.var_0001.var_0002";
3293         const char exp5[] = "console.foo.bar.baz";
3294         const char exp6[] = "console=var_0000.var_0001.var_0002";
3295         const char exp7[] = "console.foo+var_0000.var_0001";
3296         const char exp8[] = "console.foo.bar:var_0000";
3297         const char exp9[] = "console.foo";
3298 
3299         NORMALIZE_S(dat1, exp1);
3300         NORMALIZE_S(dat2, exp2);
3301         NORMALIZE_S(dat3, exp3);
3302         NORMALIZE_S(dat4, exp4);
3303         NORMALIZE_S(dat5, exp5);
3304         NORMALIZE_S(dat6, exp6);
3305         NORMALIZE_S(dat7, exp7);
3306         NORMALIZE_S(dat8, exp8);
3307         NORMALIZE_S(dat9, exp9);
3308     }
3309 
3310     SECTION("comments")
3311     {
3312         const char dat1[] = "console.<!-- HTML comment\nlog(abc).foo";
3313         const char dat2[] = "console.//single-line comment\nlog(abc).foo";
3314         const char dat3[] = "console./*multi-line comment*/log(abc).foo";
3315         const char exp[] = "console.log(var_0000).foo";
3316 
3317         NORMALIZE_S(dat1, exp);
3318         NORMALIZE_S(dat2, exp);
3319         NORMALIZE_S(dat3, exp);
3320     }
3321 }
3322 
3323 TEST_CASE("ignored identifier scope tracking", "[JSNormalizer]")
3324 {
3325     // 'console' 'eval' 'document' are in the ignore list
3326 
3327     SECTION("basic")
3328     {
3329         const char dat1[] = "(alpha) bravo console delta eval foxtrot";
3330         const char dat2[] = "(alpha bravo) console delta eval foxtrot";
3331         const char dat3[] = "(alpha bravo console) delta eval foxtrot";
3332         const char dat4[] = "(alpha bravo console delta) eval foxtrot";
3333         const char dat5[] = "(alpha bravo console delta eval) foxtrot";
3334         const char dat6[] = "(alpha bravo console delta eval foxtrot)";
3335         const char dat7[] = "alpha bravo (console) delta (eval) foxtrot";
3336         const char exp1[] = "(var_0000)var_0001 console var_0002 eval var_0003";
3337         const char exp2[] = "(var_0000 var_0001)console var_0002 eval var_0003";
3338         const char exp3[] = "(var_0000 var_0001 console)var_0002 eval var_0003";
3339         const char exp4[] = "(var_0000 var_0001 console var_0002)eval var_0003";
3340         const char exp5[] = "(var_0000 var_0001 console var_0002 eval)var_0003";
3341         const char exp6[] = "(var_0000 var_0001 console var_0002 eval var_0003)";
3342         const char exp7[] = "var_0000 var_0001(console)var_0002(eval)var_0003";
3343 
3344         NORMALIZE_S(dat1, exp1);
3345         NORMALIZE_S(dat2, exp2);
3346         NORMALIZE_S(dat3, exp3);
3347         NORMALIZE_S(dat4, exp4);
3348         NORMALIZE_S(dat5, exp5);
3349         NORMALIZE_S(dat6, exp6);
3350         NORMALIZE_S(dat7, exp7);
3351     }
3352 
3353     SECTION("function calls")
3354     {
3355         const char dat1[] = "foo(bar).baz";
3356         const char dat2[] = "foo(bar(baz))";
3357         const char dat3[] = "eval(bar).baz";
3358         const char dat4[] = "eval(bar(baz))";
3359         const char dat5[] = "foo(eval).baz";
3360         const char dat6[] = "foo(eval(baz))";
3361         const char dat7[] = "foo(bar).eval";
3362         const char dat8[] = "foo(bar(eval))";
3363         const char dat9[] = "(console).log";
3364         const char dat10[] = "((console)).log";
3365         const char dat11[] = "((foo, console)).log";
3366         const char dat12[] = "((document.foo(bar), console)).log";
3367         const char dat13[] = "((document.foo(bar) console)).log";
3368         const char dat14[] = "((document.foo(bar) console))log";
3369         const char dat15[] = "((document.foo(bar) baz))log";
3370         const char dat16[] = "foo(console).log";
3371         const char dat17[] = "foo((console).log).log";
3372         const char dat18[] = "foo().baz + eval(eval['content'].bar + baz(console['content'].log, baz)).bar";
3373         const char dat19[] = "eval['foo']().bar.baz";
3374         const char dat20[] = "eval['foo']()['bar'].baz";
3375         const char dat21[] = "eval['foo']['bar'].baz()";
3376         const char exp1[] = "var_0000(var_0001).var_0002";
3377         const char exp2[] = "var_0000(var_0001(var_0002))";
3378         const char exp3[] = "eval(var_0000).baz";
3379         const char exp4[] = "eval(var_0000(var_0001))";
3380         const char exp5[] = "var_0000(eval).var_0001";
3381         const char exp6[] = "var_0000(eval(var_0001))";
3382         const char exp7[] = "var_0000(var_0001).var_0002";
3383         const char exp8[] = "var_0000(var_0001(eval))";
3384         const char exp9[] = "(console).log";
3385         const char exp10[] = "((console)).log";
3386         const char exp11[] = "((var_0000,console)).log";
3387         const char exp12[] = "((document.foo(var_0000),console)).log";
3388         const char exp13[] = "((document.foo(var_0000)console)).log";
3389         const char exp14[] = "((document.foo(var_0000)console))var_0001";
3390         const char exp15[] = "((document.foo(var_0000)var_0001))var_0002";
3391         const char exp16[] = "var_0000(console).var_0001";
3392         const char exp17[] = "var_0000((console).log).var_0001";
3393         const char exp18[] = "var_0000().var_0001+eval(eval['content'].bar+var_0001(console['content'].log,var_0001)).bar";
3394         const char exp19[] = "eval['foo']().bar.baz";
3395         const char exp20[] = "eval['foo']()['bar'].baz";
3396         const char exp21[] = "eval['foo']['bar'].baz()";
3397 
3398         NORMALIZE_S(dat1, exp1);
3399         NORMALIZE_S(dat2, exp2);
3400         NORMALIZE_S(dat3, exp3);
3401         NORMALIZE_S(dat4, exp4);
3402         NORMALIZE_S(dat5, exp5);
3403         NORMALIZE_S(dat6, exp6);
3404         NORMALIZE_S(dat7, exp7);
3405         NORMALIZE_S(dat8, exp8);
3406         NORMALIZE_S(dat9, exp9);
3407         NORMALIZE_S(dat10, exp10);
3408         NORMALIZE_S(dat11, exp11);
3409         NORMALIZE_S(dat12, exp12);
3410         NORMALIZE_S(dat13, exp13);
3411         NORMALIZE_S(dat14, exp14);
3412         NORMALIZE_S(dat15, exp15);
3413         NORMALIZE_S(dat16, exp16);
3414         NORMALIZE_S(dat17, exp17);
3415         NORMALIZE_S(dat18, exp18);
3416         NORMALIZE_S(dat19, exp19);
3417         NORMALIZE_S(dat20, exp20);
3418         NORMALIZE_S(dat21, exp21);
3419     }
3420 
3421     SECTION("eval cases")
3422     {
3423         const char dat1[] = "eval().bar";
3424         const char dat2[] = "eval()['bar']";
3425         const char dat3[] = "eval().bar()";
3426         const char dat4[] = "eval()['bar']()";
3427         const char dat5[] = "eval.bar";
3428         const char dat6[] = "eval.bar()";
3429         const char dat7[] = "eval['bar']";
3430         const char dat8[] = "eval['bar']()";
3431         const char dat9[] = "\\u0065\\u0076\\u0061\\u006c(\\u0062\\u0061\\u0072).\\u0062\\u0061\\u007a ;";
3432         const char dat10[] = "var foo.bar = 123 ; (\\u0065\\u0076\\u0061\\u006c).\\u0062\\u0061\\u007a ;";
3433         const char exp1[] = "eval().bar";
3434         const char exp2[] = "eval()['bar']";
3435         const char exp3[] = "eval().bar()";
3436         const char exp4[] = "eval()['bar']()";
3437         const char exp5[] = "eval.bar";
3438         const char exp6[] = "eval.bar()";
3439         const char exp7[] = "eval['bar']";
3440         const char exp8[] = "eval['bar']()";
3441         const char exp9[] = "eval(var_0000).baz;";
3442         const char exp10[] = "var var_0000.var_0001=123;(eval).baz;";
3443 
3444         NORMALIZE_S(dat1, exp1);
3445         NORMALIZE_S(dat2, exp2);
3446         NORMALIZE_S(dat3, exp3);
3447         NORMALIZE_S(dat4, exp4);
3448         NORMALIZE_S(dat5, exp5);
3449         NORMALIZE_S(dat6, exp6);
3450         NORMALIZE_S(dat7, exp7);
3451         NORMALIZE_S(dat8, exp8);
3452         NORMALIZE_S(dat9, exp9);
3453         NORMALIZE_S(dat10, exp10);
3454     }
3455 
3456     SECTION("console cases")
3457     {
3458         const char dat1[] = "console.log=abc";
3459         const char dat2[] = "console.log().clear()";
3460         const char dat3[] = "console.log(\"asd\").foo";
3461         const char dat4[] = "console.log[\"asd\"].foo";
3462         const char dat5[] = "console.log(`var a = ${ c + b }`).foo";
3463         const char dat6[] = "console.log(abc,def,www,document.foo(abc))";
3464         const char dat7[] = "console.log(document.getElementById(\"mem\").text).clear(abc)";
3465         const char exp1[] = "console.log=var_0000";
3466         const char exp2[] = "console.log().clear()";
3467         const char exp3[] = "console.log(\"asd\").foo";
3468         const char exp4[] = "console.log[\"asd\"].foo";
3469         const char exp5[] = "console.log(`var a = ${var_0000+var_0001}`).foo";
3470         const char exp6[] = "console.log(var_0000,var_0001,var_0002,document.foo(var_0000))";
3471         const char exp7[] = "console.log(document.getElementById(\"mem\").text).clear(var_0000)";
3472 
3473         NORMALIZE_S(dat1, exp1);
3474         NORMALIZE_S(dat2, exp2);
3475         NORMALIZE_S(dat3, exp3);
3476         NORMALIZE_S(dat4, exp4);
3477         NORMALIZE_S(dat5, exp5);
3478         NORMALIZE_S(dat6, exp6);
3479         NORMALIZE_S(dat7, exp7);
3480     }
3481 
3482     SECTION("corner cases")
3483     {
3484         const char dat1[] = "object = {hidden: eval}";
3485         const char dat2[] = "object = {console: \"str\"}";
3486         const char dat3[] = "object.hidden = eval";
3487         const char dat4[] = "array = [None, eval, document.getElementById]";
3488         const char dat5[] = "array[1] = eval";
3489         const char exp1[] = "var_0000={var_0001:eval}";
3490         const char exp2[] = "var_0000={console:\"str\"}";
3491         const char exp3[] = "var_0000.var_0001=eval";
3492         const char exp4[] = "var_0000=[var_0001,eval,document.getElementById]";
3493         const char exp5[] = "var_0000[1]=eval";
3494 
3495         NORMALIZE_S(dat1, exp1);
3496         NORMALIZE_S(dat2, exp2);
3497         NORMALIZE_S(dat3, exp3);
3498         NORMALIZE_S(dat4, exp4);
3499         NORMALIZE_S(dat5, exp5);
3500     }
3501 }
3502 
3503 TEST_CASE("ignored identifier split", "[JSNormalizer]")
3504 {
3505 
3506 #if JSTOKENIZER_MAX_STATES != 8
3507 #error "ignored identifier split" tests are designed for 8 states depth
3508 #endif
3509 
3510     SECTION("a standalone identifier")
3511     {
3512         const char dat1[] = "con";
3513         const char dat2[] = "sole";
3514         const char dat3[] = "e";
3515         const char dat4[] = "val";
3516         const char dat5[] = "console . ";
3517         const char dat6[] = "foo";
3518         const char dat7[] = "eval ";
3519         const char dat8[] = ". bar";
3520         const char exp1[] = "var_0000";
3521         const char exp2[] = "console";
3522         const char exp3[] = "var_0000";
3523         const char exp4[] = "eval";
3524         const char exp5[] = "console.";
3525         const char exp6[] = "foo";
3526         const char exp7[] = "eval";
3527         const char exp8[] = ".bar";
3528 
3529         const char exp9[] = "console";
3530         const char exp10[] = "eval";
3531         const char exp11[] = "console.foo";
3532         const char exp12[] = "eval.bar";
3533 
3534         NORMALIZE_T(dat1, dat2, exp1, exp2);
3535         NORMALIZE_T(dat3, dat4, exp3, exp4);
3536         NORMALIZE_T(dat5, dat6, exp5, exp6);
3537         NORMALIZE_T(dat7, dat8, exp7, exp8);
3538 
3539         NORM_COMBINED_S_2(dat1, dat2, exp9);
3540         NORM_COMBINED_S_2(dat3, dat4, exp10);
3541         NORM_COMBINED_S_2(dat5, dat6, exp11);
3542         NORM_COMBINED_S_2(dat7, dat8, exp12);
3543     }
3544 
3545     SECTION("function calls")
3546     {
3547         const char dat1[] = "console";
3548         const char dat2[] = "().foo";
3549         const char dat3[] = "console(";
3550         const char dat4[] = ").foo";
3551         const char dat5[] = "console()";
3552         const char dat6[] = ".foo";
3553         const char dat7[] = "console().";
3554         const char dat8[] = "foo";
3555         const char dat9[] = "console().re";
3556         const char dat10[] = "write";
3557         const char exp1[] = "console";
3558         const char exp2[] = "().foo";
3559         const char exp3[] = "console(";
3560         const char exp4[] = ").foo";
3561         const char exp5[] = "console()";
3562         const char exp6[] = ".foo";
3563         const char exp7[] = "console().";
3564         const char exp8[] = "foo";
3565         const char exp9[] = "console().re";
3566         const char exp10[] = "rewrite";
3567 
3568         const char exp11[] = "console().foo";
3569         const char exp12[] = "console().foo";
3570         const char exp13[] = "console().foo";
3571         const char exp14[] = "console().foo";
3572         const char exp15[] = "console().rewrite";
3573 
3574         NORMALIZE_T(dat1, dat2, exp1, exp2);
3575         NORMALIZE_T(dat3, dat4, exp3, exp4);
3576         NORMALIZE_T(dat5, dat6, exp5, exp6);
3577         NORMALIZE_T(dat7, dat8, exp7, exp8);
3578         NORMALIZE_T(dat9, dat10, exp9, exp10);
3579 
3580         NORM_COMBINED_S_2(dat1, dat2, exp11);
3581         NORM_COMBINED_S_2(dat3, dat4, exp12);
3582         NORM_COMBINED_S_2(dat5, dat6, exp13);
3583         NORM_COMBINED_S_2(dat7, dat8, exp14);
3584         NORM_COMBINED_S_2(dat9, dat10, exp15);
3585     }
3586 
3587     SECTION("terminator split")
3588     {
3589         const char dat1[] = "eval.foo ";
3590         const char dat2[] = "bar.baz";
3591         const char dat3[] = "eval.foo";
3592         const char dat4[] = " bar.baz";
3593         const char dat5[] = "eval.foo;";
3594         const char dat6[] = "bar.baz";
3595         const char dat7[] = "eval.foo";
3596         const char dat8[] = ";bar.baz";
3597         const char dat9[] = "eval.foo%";
3598         const char dat10[] = "=bar.baz";
3599         const char exp1[] = "eval.foo";
3600         const char exp2[] = " var_0000.var_0001";
3601         const char exp3[] = "eval.foo";
3602         const char exp4[] = " var_0000.var_0001";
3603         const char exp5[] = "eval.foo;";
3604         const char exp6[] = "var_0000.var_0001";
3605         const char exp7[] = "eval.foo";
3606         const char exp8[] = ";var_0000.var_0001";
3607         const char exp9[] = "eval.foo%";
3608         const char exp10[] = "%=var_0000.var_0001";
3609 
3610         const char exp11[] = "eval.foo var_0000.var_0001";
3611         const char exp12[] = "eval.foo var_0000.var_0001";
3612         const char exp13[] = "eval.foo;var_0000.var_0001";
3613         const char exp14[] = "eval.foo;var_0000.var_0001";
3614         const char exp15[] = "eval.foo%=var_0000.var_0001";
3615 
3616         NORMALIZE_T(dat1, dat2, exp1, exp2);
3617         NORMALIZE_T(dat3, dat4, exp3, exp4);
3618         NORMALIZE_T(dat5, dat6, exp5, exp6);
3619         NORMALIZE_T(dat7, dat8, exp7, exp8);
3620         NORMALIZE_T(dat9, dat10, exp9, exp10);
3621 
3622         NORM_COMBINED_S_2(dat1, dat2, exp11);
3623         NORM_COMBINED_S_2(dat3, dat4, exp12);
3624         NORM_COMBINED_S_2(dat5, dat6, exp13);
3625         NORM_COMBINED_S_2(dat7, dat8, exp14);
3626         NORM_COMBINED_S_2(dat9, dat10, exp15);
3627     }
3628 
3629     SECTION("scope split")
3630     {
3631         // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
3632         //                   split here ^
3633 
3634         const char dat1[] = "eval(foo,eval(bar,eval(baz['";
3635         const char dat2[] = "'].console.check+check).foo).bar).baz+check";
3636         const char exp1[] = "eval(var_0000,eval(var_0001,eval(var_0002['";
3637         const char exp2[] = "'].var_0003.var_0004+var_0004).foo).bar).baz+var_0004";
3638 
3639         const char exp3[] = "eval(var_0000,eval(var_0001,eval(var_0002['"
3640             "'].var_0003.var_0004+var_0004).foo).bar).baz+var_0004";
3641 
3642         NORMALIZE_T(dat1, dat2, exp1, exp2);
3643         NORM_COMBINED_S_2(dat1, dat2, exp3);
3644 
3645         // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
3646         //                         split here ^
3647 
3648         const char dat3[] = "eval(foo,eval(bar,eval(baz[''].con";
3649         const char dat4[] = "sole.check+check).foo).bar).baz+check";
3650         const char exp4[] = "eval(var_0000,eval(var_0001,eval(var_0002[''].var_0003";
3651         const char exp5[] = "var_0004.var_0005+var_0005).foo).bar).baz+var_0005";
3652 
3653         const char exp6[] = "eval(var_0000,eval(var_0001,eval(var_0002['']."
3654             "var_0004.var_0005+var_0005).foo).bar).baz+var_0005";
3655 
3656         NORMALIZE_T(dat3, dat4, exp4, exp5);
3657         NORM_COMBINED_S_2(dat3, dat4, exp6);
3658 
3659         // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
3660         //                                              split here ^
3661 
3662         const char dat5[] = "eval(foo,eval(bar,eval(baz[''].console.check+check).foo";
3663         const char dat6[] = ").bar).baz+check";
3664         const char exp7[] = "eval(var_0000,eval(var_0001,eval(var_0002[''].var_0003.var_0004+var_0004).foo";
3665         const char exp8[] = ").bar).baz+var_0004";
3666 
3667         const char exp9[] = "eval(var_0000,eval(var_0001,eval(var_0002[''].var_0003.var_0004+var_0004).foo"
3668             ").bar).baz+var_0004";
3669 
3670         NORMALIZE_T(dat5, dat6, exp7, exp8);
3671         NORM_COMBINED_S_2(dat5, dat6, exp9);
3672     }
3673 }
3674 
3675 TEST_CASE("Scope tracking - basic","[JSNormalizer]")
3676 {
3677     SECTION("Global only")
3678         test_scope("",{GLOBAL});
3679 
3680     SECTION("Function scope - named function")
3681         test_scope("function f(){",{GLOBAL,FUNCTION});
3682 
3683     SECTION("Function scope - anonymous function")
3684         test_scope("var f = function(){",{GLOBAL,FUNCTION});
3685 
3686     SECTION("Function scope - arrow function")
3687         test_scope("var f = (a,b)=>{",{GLOBAL,FUNCTION});
3688 
3689     SECTION("Function scope - arrow function without scope")
3690         test_scope("var f = (a,b)=> a",{GLOBAL,FUNCTION});
3691 
3692     SECTION("Function scope - method in object initialization")
3693         test_scope("var o = { f(){",{GLOBAL,BLOCK,BLOCK});
3694 
3695     SECTION("Function scope - method in object operation")
3696         test_scope("+{ f(){",{GLOBAL,BLOCK,BLOCK});
3697 
3698     SECTION("Function scope - method in object as a function parameter")
3699         test_scope("call({ f(){",{GLOBAL,BLOCK,BLOCK});
3700 
3701     SECTION("Function scope - keyword name method")
3702         test_scope("var o = { let(){",{GLOBAL,BLOCK,BLOCK});
3703 
3704     SECTION("Function scope - 'get' name method")
3705         test_scope("var o = { get(){",{GLOBAL,BLOCK,BLOCK});
3706 
3707     SECTION("Function scope - expression method")
3708         test_scope("var o = { [a + 12](){",{GLOBAL,BLOCK,BLOCK});
3709 
3710     SECTION("Function scope - method as anonymous function")
3711         test_scope("var o = { f: function(){",{GLOBAL,BLOCK,FUNCTION});
3712 
3713     SECTION("Function scope - keyword name method as anonymous function")
3714         test_scope("var o = { let: function(){",{GLOBAL,BLOCK,FUNCTION});
3715 
3716     SECTION("Function scope - 'get' name method as anonymous function")
3717         test_scope("var o = { get: function(){",{GLOBAL,BLOCK,FUNCTION});
3718 
3719     SECTION("Function scope - expression method as anonymous function")
3720         test_scope("var o = { [a + 12]: function(){",{GLOBAL,BLOCK,FUNCTION});
3721 
3722     SECTION("Function scope - getter")
3723         test_scope("var o = { get f(){",{GLOBAL,BLOCK,BLOCK});
3724 
3725     SECTION("Function scope - parametric getter")
3726         test_scope("var o = { get [a + 12](){",{GLOBAL,BLOCK,BLOCK});
3727 
3728     SECTION("Function scope - setter")
3729         test_scope("var o = { set f(){",{GLOBAL,BLOCK,BLOCK});
3730 
3731     SECTION("Function scope - parametric setter")
3732         test_scope("var o = { set [a + 12](){",{GLOBAL,BLOCK,BLOCK});
3733 
3734     SECTION("Block scope - regular block")
3735         test_scope("{",{GLOBAL,BLOCK});
3736 
3737     SECTION("Block scope - object initializer")
3738         test_scope("o = {",{GLOBAL,BLOCK});
3739 
3740     SECTION("Block scope - class")
3741         test_scope("class C{",{GLOBAL,BLOCK});
3742 
3743     SECTION("Block scope - class with extends")
3744         test_scope("class C extends A{",{GLOBAL,BLOCK});
3745 
3746     SECTION("Block scope - if")
3747         test_scope("if(true){",{GLOBAL,BLOCK});
3748 
3749     SECTION("Block scope - single statement if")
3750         test_scope("if(true) func()",{GLOBAL,BLOCK});
3751 
3752     SECTION("Block scope - nested multiple single statement ifs")
3753         test_scope("if(a) if(b) if(c) if(d) func()",{GLOBAL,BLOCK});
3754 
3755     SECTION("Block scope - nested multiple single statement ifs with newline")
3756         test_scope("if(a)\nif(b)\nif(c)\nif(d)\nfunc()",{GLOBAL,BLOCK});
3757 
3758     SECTION("Block scope - else")
3759         test_scope("if(true);else{",{GLOBAL,BLOCK});
3760 
3761     SECTION("Block scope - single statement else")
3762         test_scope("if(true);else func()",{GLOBAL,BLOCK});
3763 
3764     SECTION("Block scope - for loop")
3765         test_scope("for(;;){",{GLOBAL,BLOCK});
3766 
3767     SECTION("Block scope - for loop in range")
3768         test_scope("for(i in range()){",{GLOBAL,BLOCK});
3769 
3770     SECTION("Block scope - for loop of iterable")
3771         test_scope("for(i of o){",{GLOBAL,BLOCK});
3772 
3773     SECTION("Block scope - for await loop")
3774         test_scope("for await(i of o){",{GLOBAL,BLOCK});
3775 
3776     SECTION("Block scope - inside for statement")
3777         test_scope("for(",{GLOBAL,BLOCK});
3778 
3779     SECTION("Block scope - inside for statement, after semicolon")
3780         test_scope("for(;",{GLOBAL,BLOCK});
3781 
3782     SECTION("Block scope - single statement for")
3783         test_scope("for(;;) func()",{GLOBAL,BLOCK});
3784 
3785     SECTION("Block scope - for nested in single line conditional")
3786         test_scope("if(true) for(;;) a++",{GLOBAL,BLOCK});
3787 
3788     SECTION("Block scope - while")
3789         test_scope("while(true){",{GLOBAL,BLOCK});
3790 
3791     SECTION("Block scope - single statement while")
3792         test_scope("while(true) func()",{GLOBAL,BLOCK});
3793 
3794     SECTION("Block scope - do-while")
3795         test_scope("do{",{GLOBAL,BLOCK});
3796 
3797     SECTION("Block scope - single statement do-while")
3798         test_scope("do func()",{GLOBAL,BLOCK});
3799 
3800     SECTION("Block scope - try")
3801         test_scope("try{",{GLOBAL,BLOCK});
3802 
3803     SECTION("Block scope - catch")
3804         test_scope("try{}catch(e){",{GLOBAL,BLOCK});
3805 
3806     SECTION("Block scope - catch exception declaration")
3807         test_scope("try{}catch(",{GLOBAL,BLOCK});
3808 
3809     SECTION("Block scope - finally")
3810         test_scope("try{}finally{",{GLOBAL,BLOCK});
3811 
3812     SECTION("Block scope - nested object - named")
3813         test_scope("var o = {s:{",{GLOBAL,BLOCK,BLOCK});
3814 
3815     SECTION("Block scope - nested object - keyword named")
3816         test_scope("var o = {let:{",{GLOBAL,BLOCK,BLOCK});
3817 
3818     SECTION("Block scope - nested object - 'get' named")
3819         test_scope("var o = {get:{",{GLOBAL,BLOCK,BLOCK});
3820 
3821     SECTION("Block scope - nested object - parametric")
3822         test_scope("var o = {[a+12]:{",{GLOBAL,BLOCK,BLOCK});
3823 }
3824 
3825 TEST_CASE("Scope tracking - closing","[JSNormalizer]")
3826 {
3827 
3828     SECTION("Function scope - named function")
3829         test_scope("function f(){}",{GLOBAL});
3830 
3831     SECTION("Function scope - anonymous function")
3832         test_scope("var f = function(){}",{GLOBAL});
3833 
3834     SECTION("Function scope - arrow function")
3835         test_scope("var f = (a,b)=>{}",{GLOBAL});
3836 
3837     SECTION("Function scope - arrow function without scope")
3838         test_scope("var f = (a,b)=>a;",{GLOBAL});
3839 
3840     SECTION("Function scope - arrow function as a function parameter")
3841         test_scope("console.log(a=>c)",{GLOBAL});
3842 
3843     SECTION("Function scope - method")
3844         test_scope("var o = { f(){}",{GLOBAL,BLOCK});
3845 
3846     SECTION("Function scope - keyword name method")
3847         test_scope("var o = { let(){}",{GLOBAL,BLOCK});
3848 
3849     SECTION("Function scope - expression method")
3850         test_scope("var o = { [a + 12](){}",{GLOBAL,BLOCK});
3851 
3852     SECTION("Function scope - method as anonymous function")
3853         test_scope("var o = { f: function(){}",{GLOBAL,BLOCK});
3854 
3855     SECTION("Function scope - keyword name method as anonymous function")
3856         test_scope("var o = { let: function(){}",{GLOBAL,BLOCK});
3857 
3858     SECTION("Function scope - expression method as anonymous function")
3859         test_scope("var o = { [a + 12]: function(){}",{GLOBAL,BLOCK});
3860 
3861     SECTION("Function scope - getter")
3862         test_scope("var o = { get f(){}",{GLOBAL,BLOCK});
3863 
3864     SECTION("Function scope - parametric getter")
3865         test_scope("var o = { get [a + 12](){}",{GLOBAL,BLOCK});
3866 
3867     SECTION("Function scope - setter")
3868         test_scope("var o = { set f(){}",{GLOBAL,BLOCK});
3869 
3870     SECTION("Function scope - parametric setter")
3871         test_scope("var o = { set [a + 12](){}",{GLOBAL,BLOCK});
3872 
3873     SECTION("Block scope - regular block")
3874         test_scope("{}",{GLOBAL});
3875 
3876     SECTION("Block scope - object initializer")
3877         test_scope("o = {}",{GLOBAL});
3878 
3879     SECTION("Block scope - class")
3880         test_scope("class C{}",{GLOBAL});
3881 
3882     SECTION("Block scope - class with extends")
3883         test_scope("class C extends A{}",{GLOBAL});
3884 
3885     SECTION("Block scope - if")
3886         test_scope("if(true){}",{GLOBAL});
3887 
3888     SECTION("Block scope - single statement if")
3889         test_scope("if(true);",{GLOBAL});
3890 
3891     SECTION("Block scope - single statement if, semicolon group terminated")
3892         test_scope("if(true)\na++\nreturn",{GLOBAL});
3893 
3894     SECTION("Block scope - nested multiple single statement ifs")
3895         test_scope("if(a) if(b) if(c) if(d) func();",{GLOBAL});
3896 
3897     SECTION("Block scope - nested multiple single statement ifs with newline")
3898         test_scope("if(a)\nif(b)\nif(c)\nif(d)\nfunc()\nfunc()",{GLOBAL});
3899 
3900     SECTION("Block scope - else")
3901         test_scope("if(true);else{}",{GLOBAL});
3902 
3903     SECTION("Block scope - single statement else")
3904         test_scope("if(true);else;",{GLOBAL});
3905 
3906     SECTION("Block scope - for loop")
3907         test_scope("for(;;){}",{GLOBAL});
3908 
3909     SECTION("Block scope - for loop in range")
3910         test_scope("for(i in range()){}",{GLOBAL});
3911 
3912     SECTION("Block scope - for loop of iterable")
3913         test_scope("for(i of o){}",{GLOBAL});
3914 
3915     SECTION("Block scope - for await loop")
3916         test_scope("for await(i of o){}",{GLOBAL});
3917 
3918     SECTION("Block scope - single statement for")
3919         test_scope("for(;;);",{GLOBAL});
3920 
3921     SECTION("Block scope - while")
3922         test_scope("while(true){}",{GLOBAL});
3923 
3924     SECTION("Block scope - single statement while")
3925         test_scope("while(true);",{GLOBAL});
3926 
3927     SECTION("Block scope - do-while")
3928         test_scope("do{}while(",{GLOBAL, BLOCK});
3929 
3930     SECTION("Block scope - single statement do-while")
3931         test_scope("do;while(",{GLOBAL, BLOCK});
3932 
3933     SECTION("Block scope - try")
3934         test_scope("try{}",{GLOBAL});
3935 
3936     SECTION("Block scope - catch")
3937         test_scope("try{}catch(e){}",{GLOBAL});
3938 
3939     SECTION("Block scope - finally")
3940         test_scope("try{}finally{}",{GLOBAL});
3941 
3942     SECTION("Block scope - nested object - named")
3943         test_scope("var o = {s:{}",{GLOBAL,BLOCK});
3944 
3945     SECTION("Block scope - nested object - keyword named")
3946         test_scope("var o = {let:{}",{GLOBAL,BLOCK});
3947 
3948     SECTION("Block scope - nested object - parametric")
3949         test_scope("var o = {[a+12]:{}",{GLOBAL,BLOCK});
3950 
3951     SECTION("Block scope - advanced automatic semicolon insertion")
3952         test_scope(
3953             "var\na\n=\n0\n\n"                                      // var a=0;
3954             "for\n(\nlet\na\n=\n0\na\n<\n5\n++\na\n)\na\n+=\n2\n\n" // for (let a = 0;a<5;++a) a+=2;
3955             "do\nlet\na\n=\n0\nwhile\n(\na\n<\n5\n)\n\n"            // do let a=0; while (a < 5);
3956             "++\na\n\n"                                             // ++a;
3957             "while\n(a\n<\n5\n)\na\n+=\n2\n\n"                      // while (a<5) a+=2;
3958             "if\n(\ntrue\n)\nlet\na\n=\n0\n\n"                      // if (true) let a=0;
3959             "else\nlet\na\n=\n0\n\na;",                             // else let a=0;a;
3960             {GLOBAL}
3961         );
3962 
3963     SECTION("Block scope - inline block in the end of outer scope")
3964         test_scope("function() { if (true)\nfor ( ; ; ) a = 2 }", {GLOBAL});
3965 }
3966 
3967 TEST_CASE("Scope tracking - over multiple PDU","[JSNormalizer]")
3968 {
3969     // Every line represents a PDU. Each pdu has input buffer, expected script
3970     // and expected scope stack, written in that order
3971     SECTION("general - variable extension")
3972         test_normalization({
3973             {"long_", "var_0000", {GLOBAL}},
3974             {"variable", "var_0001", {GLOBAL}}
3975             //FIXIT-E: if variable index will be preserved across PDUs, second pdu expected
3976             // will be "var_0000"
3977         });
3978 
3979     SECTION("general - variable extension: ignored identifier to a regular one")
3980         test_normalization({
3981             {"console", "console", {GLOBAL}},
3982             {"Writer", "var_0000", {GLOBAL}}
3983         });
3984 
3985     SECTION("general - variable extension: a regular identifier to ignored one")
3986         test_normalization({
3987             {"con", "var_0000", {GLOBAL}},
3988             {"sole", "console", {GLOBAL}}
3989         });
3990 
3991     SECTION("general - variable extension that overwrites existing variable")
3992         test_normalization({
3993             {"a, b, an", "var_0000,var_0001,var_0002", {GLOBAL}},
3994             {"other = a", "var_0000,var_0001,var_0003=var_0000", {GLOBAL}}
3995         });
3996 
3997     SECTION("general - variable extension that overwrites existing variable inside inner scope")
3998         test_normalization({
3999             {"f(a, x=>{var an", "var_0000(var_0001,var_0002=>{var var_0003", {GLOBAL,FUNCTION}},
4000             {"other = a})", "var_0000(var_0001,var_0002=>{var var_0004=var_0001})", {GLOBAL}}
4001         });
4002 
4003     SECTION("block scope - basic open")
4004         test_normalization({
4005             {"{", "{", {GLOBAL, BLOCK}},
4006             {"var", "{var", {GLOBAL, BLOCK}}
4007         });
4008 
4009     SECTION("block scope - basic close")
4010         test_normalization({
4011             {"{", "{", {GLOBAL, BLOCK}},
4012             {"}", "{}", {GLOBAL}}
4013         });
4014 
4015     SECTION("block scope - open outside cross-PDU states")
4016         test_normalization({
4017             {"{[1,2,3,4,5,6,7,8]", "{[1,2,3,4,5,6,7,8]", {GLOBAL, BLOCK}},
4018             {"}", "{[1,2,3,4,5,6,7,8]}", {GLOBAL}}
4019         });
4020 
4021     SECTION("block scope - closing brace in a string")
4022         test_normalization({
4023             {"{[1,2,3,4,5,6,7,'}']", "{[1,2,3,4,5,6,7,'}']", {GLOBAL, BLOCK}},
4024             {"}", "{[1,2,3,4,5,6,7,'}']}", {GLOBAL}}
4025         });
4026 
4027     SECTION("block scope - for keyword split")
4028         test_normalization({
4029             {"fin", "var_0000", {GLOBAL}},
4030             {"ally {", "finally{", {GLOBAL, BLOCK}}
4031         });
4032 
4033     SECTION("block scope - between 'for' and '('")
4034         test_normalization({
4035             {"for", "for", {GLOBAL, BLOCK}},
4036             {"(", "for(", {GLOBAL, BLOCK}}
4037         });
4038 
4039     SECTION("block scope - fake 'for'")
4040         test_normalization({
4041             {"for", "for", {GLOBAL, BLOCK}},
4042             {"k", "var_0000", {GLOBAL}}
4043         });
4044 
4045     SECTION("block scope - inside for-loop parentheses")
4046         test_normalization({
4047             {"for(;;", "for(;;", {GLOBAL, BLOCK}},
4048             {");", "for(;;);", {GLOBAL}}
4049         });
4050 
4051     SECTION("block scope - between for-loop parentheses and code block")
4052         test_normalization({
4053             {"for(;;)", "for(;;)", {GLOBAL, BLOCK}},
4054             {"{}", "for(;;){}", {GLOBAL}}
4055         });
4056 
4057     SECTION("function scope: split in 'function'")
4058         test_normalization({
4059             {"func", "var_0000", {GLOBAL}},
4060             {"tion(", "function(", {GLOBAL,FUNCTION}}
4061         });
4062 
4063     SECTION("function scope: fake function")
4064         test_normalization({
4065             {"function", "function", {GLOBAL}},
4066             {"al(", "var_0000(", {GLOBAL}}
4067         });
4068 
4069     SECTION("function scope: split inside string literal")
4070         test_normalization({
4071             {"`$$$$$$$$function", "`$$$$$$$$function", {GLOBAL}},
4072             {"(){a = 0", "`$$$$$$$$function(){a = 0", {GLOBAL}}
4073         });
4074 
4075     SECTION("function scope: inside parameters")
4076         test_normalization({
4077             {"function(", "function(", {GLOBAL, FUNCTION}},
4078             {")", "function()", {GLOBAL,FUNCTION}}
4079         });
4080 
4081     SECTION("function scope: between parameters and body")
4082         test_normalization({
4083             {"function()", "function()", {GLOBAL, FUNCTION}},
4084             {"{", "function(){", {GLOBAL,FUNCTION}}
4085         });
4086 
4087     SECTION("function scope: inside code")
4088         test_normalization({
4089             {"function(){", "function(){", {GLOBAL, FUNCTION}},
4090             {"}", "function(){}", {GLOBAL}}
4091         });
4092 
4093     SECTION("object initializer: basic")
4094         test_normalization({
4095             {"var o = {", "var var_0000={", {GLOBAL, BLOCK}},
4096             {"}", "var var_0000={}", {GLOBAL}}
4097         });
4098 
4099     SECTION("false var keyword")
4100         test_normalization({
4101             {"var var_a; function(){ var", "var var_0000;function(){var", {GLOBAL, FUNCTION}},
4102             {"_a; }", "var var_0000;function(){var_0000;}", {GLOBAL}}
4103         });
4104 
4105     SECTION("false let keyword")
4106         test_normalization({
4107             {"var let_a; function(){ let", "var var_0000;function(){let", {GLOBAL, FUNCTION}},
4108             {"_a; }", "var var_0000;function(){var_0000;}", {GLOBAL}}
4109         });
4110 
4111     SECTION("false const keyword")
4112         test_normalization({
4113             {"var const_a; function(){ const", "var var_0000;function(){const", {GLOBAL, FUNCTION}},
4114             {"_a; }", "var var_0000;function(){var_0000;}", {GLOBAL}}
4115         });
4116 
4117     SECTION("false class keyword")
4118         test_normalization({
4119             {"var a; class", "var var_0000;class", {GLOBAL}},
4120             {"_a; { a }", "var var_0000;var_0001;{var_0000}", {GLOBAL}}
4121         });
4122 }
4123 
4124 TEST_CASE("Scope tracking - error handling", "[JSNormalizer]")
4125 {
4126     SECTION("not identifier after var keyword")
4127         test_normalization_bad(
4128             "var +;",
4129             "var",
4130             JSTokenizer::BAD_TOKEN
4131         );
4132 
4133     SECTION("not identifier after let keyword")
4134         test_normalization_bad(
4135             "let class;",
4136             "let",
4137             JSTokenizer::BAD_TOKEN
4138         );
4139 
4140     SECTION("not identifier after const keyword")
4141         test_normalization_bad(
4142             "const 1;",
4143             "const",
4144             JSTokenizer::BAD_TOKEN
4145         );
4146 
4147     SECTION("scope mismatch")
4148         test_normalization_bad(
4149             "function f() { if (true) } }",
4150             "function var_0000(){if(true)}",
4151             JSTokenizer::WRONG_CLOSING_SYMBOL
4152         );
4153 
4154     SECTION("scope mismatch with code block")
4155         test_normalization_bad(
4156             "{ { function } }",
4157             "{{function",
4158             JSTokenizer::WRONG_CLOSING_SYMBOL
4159         );
4160 
4161     SECTION("scope nesting overflow")
4162     {
4163         const char src[] = "function() { if (true) { } }";
4164         const char exp[] = "function(){if";
4165         uint32_t scope_depth = 2;
4166 
4167         JSIdentifierCtx ident_ctx(norm_depth, scope_depth, s_ignored_ids);
4168         JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
4169         auto ret = normalizer.normalize(src, strlen(src));
4170         std::string dst(normalizer.get_script(), normalizer.script_size());
4171 
4172         CHECK(ret == JSTokenizer::SCOPE_NESTING_OVERFLOW);
4173         CHECK(dst == exp);
4174     }
4175 }
4176 
4177 #endif // CATCH_TEST_BUILD
4178 
4179 // Benchmark tests
4180 
4181 #ifdef BENCHMARK_TEST
4182 
4183 static constexpr const char* s_closing_tag = "</script>";
4184 
make_input(const char * begin,const char * mid,const char * end,size_t len)4185 static const std::string make_input(const char* begin, const char* mid,
4186                              const char* end, size_t len)
4187 {
4188     std::string s(begin);
4189     int fill = (len - strlen(begin) - strlen(end) - strlen(s_closing_tag)) / strlen(mid);
4190     for (int i = 0; i < fill; ++i)
4191         s.append(mid);
4192     s.append(end);
4193     s.append(s_closing_tag);
4194     return s;
4195 }
4196 
make_input_repeat(const char * pattern,size_t depth)4197 static const std::string make_input_repeat(const char* pattern, size_t depth)
4198 {
4199     std::string s;
4200     size_t fill = (depth - strlen(s_closing_tag))/strlen(pattern);
4201     for (size_t it = 0; it < fill; ++it)
4202         s.append(pattern);
4203 
4204     s.append(s_closing_tag);
4205     return s;
4206 }
4207 
norm_ret(JSNormalizer & normalizer,const std::string & input)4208 static JSTokenizer::JSRet norm_ret(JSNormalizer& normalizer, const std::string& input)
4209 {
4210     normalizer.rewind_output();
4211     return normalizer.normalize(input.c_str(), input.size());
4212 }
4213 
4214 TEST_CASE("JS Normalizer, literals by 8 K", "[JSNormalizer]")
4215 {
4216     JSIdentifierCtxStub ident_ctx;
4217     JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_bracket_depth);
4218     char dst[DEPTH];
4219 
4220     constexpr size_t size = 1 << 13;
4221 
4222     auto data_pl = make_input("", ".", "", size);
4223     auto data_ws = make_input("", " ", "", size);
4224     auto data_bc = make_input("/*", " ", "*/", size);
4225     auto data_dq = make_input("\"", " ", "\"", size);
4226 
4227     BENCHMARK("memcpy()")
4228     {
4229         return memcpy(dst, data_pl.c_str(), data_pl.size());
4230     };
4231 
4232     REQUIRE(norm_ret(normalizer, data_ws) == JSTokenizer::SCRIPT_ENDED);
4233     BENCHMARK("whitespaces")
4234     {
4235         normalizer.rewind_output();
4236         return normalizer.normalize(data_ws.c_str(), data_ws.size());
4237     };
4238 
4239     REQUIRE(norm_ret(normalizer, data_bc) == JSTokenizer::SCRIPT_ENDED);
4240     BENCHMARK("block comment")
4241     {
4242         normalizer.rewind_output();
4243         return normalizer.normalize(data_bc.c_str(), data_bc.size());
4244     };
4245 
4246     REQUIRE(norm_ret(normalizer, data_dq) == JSTokenizer::SCRIPT_ENDED);
4247     BENCHMARK("double quotes string")
4248     {
4249         normalizer.rewind_output();
4250         return normalizer.normalize(data_dq.c_str(), data_dq.size());
4251     };
4252 }
4253 
4254 TEST_CASE("JS Normalizer, literals by 64 K", "[JSNormalizer]")
4255 {
4256     JSIdentifierCtxStub ident_ctx;
4257     JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_scope_depth);
4258     char dst[DEPTH];
4259 
4260     constexpr size_t size = 1 << 16;
4261 
4262     auto data_pl = make_input("", ".", "", size);
4263     auto data_ws = make_input("", " ", "", size);
4264     auto data_bc = make_input("/*", " ", "*/", size);
4265     auto data_dq = make_input("\"", " ", "\"", size);
4266 
4267     BENCHMARK("memcpy()")
4268     {
4269         return memcpy(dst, data_pl.c_str(), data_pl.size());
4270     };
4271 
4272     REQUIRE(norm_ret(normalizer, data_ws) == JSTokenizer::SCRIPT_ENDED);
4273     BENCHMARK("whitespaces")
4274     {
4275         normalizer.rewind_output();
4276         return normalizer.normalize(data_ws.c_str(), data_ws.size());
4277     };
4278 
4279     REQUIRE(norm_ret(normalizer, data_bc) == JSTokenizer::SCRIPT_ENDED);
4280     BENCHMARK("block comment")
4281     {
4282         normalizer.rewind_output();
4283         return normalizer.normalize(data_bc.c_str(), data_bc.size());
4284     };
4285 
4286     REQUIRE(norm_ret(normalizer, data_dq) == JSTokenizer::SCRIPT_ENDED);
4287     BENCHMARK("double quotes string")
4288     {
4289         normalizer.rewind_output();
4290         return normalizer.normalize(data_dq.c_str(), data_dq.size());
4291     };
4292 }
4293 
4294 TEST_CASE("JS Normalizer, id normalization", "[JSNormalizer]")
4295 {
4296     // around 11 000 identifiers
4297     std::string input;
4298     for (int it = 0; it < DEPTH; ++it)
4299         input.append("n" + std::to_string(it) + " ");
4300 
4301     input.resize(DEPTH - strlen(s_closing_tag));
4302     input.append(s_closing_tag, strlen(s_closing_tag));
4303 
4304     JSIdentifierCtxStub ident_ctx_mock;
4305     JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth,
4306         max_template_nesting, max_bracket_depth);
4307 
4308     REQUIRE(norm_ret(normalizer_wo_ident, input) == JSTokenizer::SCRIPT_ENDED);
4309     BENCHMARK("without substitution")
4310     {
4311         normalizer_wo_ident.rewind_output();
4312         return normalizer_wo_ident.normalize(input.c_str(), input.size());
4313     };
4314 
4315     const std::unordered_set<std::string> ids{};
4316     JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, ids);
4317     JSNormalizer normalizer_w_ident(ident_ctx, unlim_depth, max_template_nesting, max_bracket_depth);
4318 
4319     REQUIRE(norm_ret(normalizer_w_ident, input) == JSTokenizer::SCRIPT_ENDED);
4320     BENCHMARK("with substitution")
4321     {
4322         normalizer_w_ident.rewind_output();
4323         return normalizer_w_ident.normalize(input.c_str(), input.size());
4324     };
4325 
4326     const std::unordered_set<std::string> ids_n { "n" };
4327     JSIdentifierCtx ident_ctx_ids_n(norm_depth, max_scope_depth, ids_n);
4328     JSNormalizer normalizer_iids(ident_ctx_ids_n, unlim_depth,
4329         max_template_nesting, max_bracket_depth);
4330 
4331     REQUIRE(norm_ret(normalizer_iids, input) == JSTokenizer::SCRIPT_ENDED);
4332     BENCHMARK("with ignored identifiers")
4333     {
4334         normalizer_iids.rewind_output();
4335         return normalizer_iids.normalize(input.c_str(), input.size());
4336     };
4337 }
4338 
4339 TEST_CASE("JS Normalizer, scope tracking", "[JSNormalizer]")
4340 {
4341     constexpr uint32_t depth = 65535;
4342     JSIdentifierCtxStub ident_ctx;
4343     JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, depth);
4344 
4345     auto src_ws = make_input("", " ", "", depth);
4346     auto src_brace_rep = make_input_repeat("{}", depth);
4347     auto src_paren_rep = make_input_repeat("()", depth);
4348     auto src_bracket_rep = make_input_repeat("[]", depth);
4349 
4350     REQUIRE(norm_ret(normalizer, src_ws) == JSTokenizer::SCRIPT_ENDED);
4351     BENCHMARK("whitespaces")
4352     {
4353         normalizer.rewind_output();
4354         return normalizer.normalize(src_ws.c_str(), src_ws.size());
4355     };
4356 
4357     REQUIRE(norm_ret(normalizer, src_brace_rep) == JSTokenizer::SCRIPT_ENDED);
4358     BENCHMARK("...{}{}{}...")
4359     {
4360         normalizer.rewind_output();
4361         return normalizer.normalize(src_brace_rep.c_str(), src_brace_rep.size());
4362     };
4363 
4364     REQUIRE(norm_ret(normalizer, src_paren_rep) == JSTokenizer::SCRIPT_ENDED);
4365     BENCHMARK("...()()()...")
4366     {
4367         normalizer.rewind_output();
4368         return normalizer.normalize(src_paren_rep.c_str(), src_paren_rep.size());
4369     };
4370 
4371     REQUIRE(norm_ret(normalizer, src_bracket_rep) == JSTokenizer::SCRIPT_ENDED);
4372     BENCHMARK("...[][][]...")
4373     {
4374         normalizer.rewind_output();
4375         return normalizer.normalize(src_bracket_rep.c_str(), src_bracket_rep.size());
4376     };
4377 }
4378 
4379 TEST_CASE("JS Normalizer, automatic semicolon", "[JSNormalizer]")
4380 {
4381     auto w_semicolons = make_input("", "a;\n", "", depth);
4382     auto wo_semicolons = make_input("", "a \n", "", depth);
4383     const char* src_w_semicolons = w_semicolons.c_str();
4384     const char* src_wo_semicolons = wo_semicolons.c_str();
4385     size_t src_len = w_semicolons.size();
4386 
4387     JSIdentifierCtxStub ident_ctx_mock;
4388     JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth, max_template_nesting, depth);
4389 
4390     REQUIRE(norm_ret(normalizer_wo_ident, w_semicolons) == JSTokenizer::SCRIPT_ENDED);
4391     BENCHMARK("without semicolon insertion")
4392     {
4393         normalizer_wo_ident.rewind_output();
4394         return normalizer_wo_ident.normalize(src_w_semicolons, src_len);
4395     };
4396 
4397     REQUIRE(norm_ret(normalizer_wo_ident, wo_semicolons) == JSTokenizer::SCRIPT_ENDED);
4398     BENCHMARK("with semicolon insertion")
4399     {
4400         normalizer_wo_ident.rewind_output();
4401         return normalizer_wo_ident.normalize(src_wo_semicolons, src_len);
4402     };
4403 }
4404 #endif // BENCHMARK_TEST
4405