1 #include "catch.hh"
2 #include <peglib.h>
3 #include <sstream>
4 
5 using namespace peg;
6 
7 TEST_CASE("Token boundary 1", "[token boundary]")
8 {
9     parser pg(R"(
10         ROOT        <- TOP
11         TOP         <- 'a' 'b' 'c'
12         %whitespace <- [ \t\r\n]*
13     )");
14 
15     REQUIRE(pg.parse(" a  b  c "));
16 }
17 
18 TEST_CASE("Token boundary 2", "[token boundary]")
19 {
20     parser pg(R"(
21         ROOT        <- TOP
22         TOP         <- < 'a' 'b' 'c' >
23         %whitespace <- [ \t\r\n]*
24     )");
25 
26     REQUIRE(!pg.parse(" a  b  c "));
27 }
28 
29 TEST_CASE("Token boundary 3", "[token boundary]")
30 {
31     parser pg(R"(
32         ROOT        <- TOP
33         TOP         <- < 'a' B 'c' >
34         B           <- 'b'
35         %whitespace <- [ \t\r\n]*
36     )");
37 
38     REQUIRE(!pg.parse(" a  b  c "));
39 }
40 
41 TEST_CASE("Token boundary 4", "[token boundary]")
42 {
43     parser pg(R"(
44         ROOT        <- TOP
45         TOP         <- < A 'b' 'c' >
46         A           <- 'a'
47         %whitespace <- [ \t\r\n]*
48     )");
49 
50     REQUIRE(!pg.parse(" a  b  c "));
51 }
52 
53 TEST_CASE("Token boundary 5", "[token boundary]")
54 {
55     parser pg(R"(
56         ROOT        <- TOP
57         TOP         <- A < 'b' C >
58         A           <- 'a'
59         C           <- 'c'
60         %whitespace <- [ \t\r\n]*
61     )");
62 
63     REQUIRE(!pg.parse(" a  b  c "));
64 }
65 
66 TEST_CASE("Token boundary 6", "[token boundary]")
67 {
68     parser pg(R"(
69         ROOT        <- TOP
70         TOP         <- < A > B C
71         A           <- 'a'
72         B           <- 'b'
73         C           <- 'c'
74         %whitespace <- [ \t\r\n]*
75     )");
76 
77     REQUIRE(pg.parse(" a  b  c "));
78 }
79 
80 TEST_CASE("Token boundary 7", "[token boundary]")
81 {
82     parser pg(R"(
83         ROOT        <- TOP
84         TOP         <- < A B C >
85         A           <- 'a'
86         B           <- 'b'
87         C           <- 'c'
88         %whitespace <- [ \t\r\n]*
89     )");
90 
91     REQUIRE(!pg.parse(" a  b  c "));
92 }
93 
94 TEST_CASE("Infinite loop 1", "[infinite loop]")
95 {
96     parser pg(R"(
97         ROOT  <- WH TOKEN* WH
98         TOKEN <- [a-z0-9]*
99         WH    <- [ \t]*
100     )");
101 
102   REQUIRE(!pg);
103 }
104 
105 TEST_CASE("Infinite loop 2", "[infinite loop]") {
106   parser pg(R"(
107         ROOT  <- WH TOKEN+ WH
108         TOKEN <- [a-z0-9]*
109         WH    <- [ \t]*
110     )");
111 
112   REQUIRE(!pg);
113 }
114 
115 TEST_CASE("Infinite loop 3", "[infinite loop]") {
116   parser pg(R"(
117         ROOT  <- WH TOKEN* WH
118         TOKEN <- !'word1'
119         WH    <- [ \t]*
120     )");
121 
122   REQUIRE(!pg);
123 }
124 
125 TEST_CASE("Infinite loop 4", "[infinite loop]") {
126   parser pg(R"(
127         ROOT  <- WH TOKEN* WH
128         TOKEN <- &'word1'
129         WH    <- [ \t]*
130     )");
131 
132   REQUIRE(!pg);
133 }
134 
135 TEST_CASE("Infinite loop 5", "[infinite loop]") {
136   parser pg(R"(
137         Numbers <- Number*
138         Number <- [0-9]+ / Spacing
139         Spacing <- ' ' / '\t' / '\n' / EOF # EOF is empty
140         EOF <- !.
141     )");
142 
143   REQUIRE(!pg);
144 }
145 
146 TEST_CASE("Infinite loop 6", "[infinite loop]") {
147   parser pg(R"(
148         S <- ''*
149     )");
150 
151   REQUIRE(!pg);
152 }
153 
154 TEST_CASE("Infinite loop 7", "[infinite loop]") {
155   parser pg(R"(
156         S <- A*
157         A <- ''
158     )");
159 
160   REQUIRE(!pg);
161 }
162 
163 TEST_CASE("Infinite loop 8", "[infinite loop]") {
164     parser pg(R"(
165         ROOT <- ('A' /)*
166     )");
167 
168     REQUIRE(!pg);
169 }
170 
171 TEST_CASE("Infinite loop 9", "[infinite loop]") {
172     parser pg(R"(
173         ROOT <- %recover(('A' /)*)
174     )");
175 
176     REQUIRE(!pg);
177 }
178 
179 TEST_CASE("Not infinite 1", "[infinite loop]") {
180   parser pg(R"(
181         Numbers <- Number* EOF
182         Number <- [0-9]+ / Spacing
183         Spacing <- ' ' / '\t' / '\n'
184         EOF <- !.
185     )");
186 
187   REQUIRE(!!pg); // OK
188 }
189 
190 TEST_CASE("Not infinite 2", "[infinite loop]") {
191   parser pg(R"(
192         ROOT      <-  _ ('[' TAG_NAME ']' _)*
193         # In a sequence operator, if there is at least one non-empty element, we can treat it as non-empty
194         TAG_NAME  <-  (!']' .)+
195         _         <-  [ \t]*
196     )");
197 
198   REQUIRE(!!pg); // OK
199 }
200 
201 TEST_CASE("Not infinite 3", "[infinite loop]") {
202   parser pg(R"(
203         EXPRESSION       <-  _ TERM (TERM_OPERATOR TERM)*
204         TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
205         FACTOR           <-  NUMBER / '(' _ EXPRESSION ')' _ # Recursive...
206         TERM_OPERATOR    <-  < [-+] > _
207         FACTOR_OPERATOR  <-  < [/*] > _
208         NUMBER           <-  < [0-9]+ > _
209         _                <-  [ \t\r\n]*
210     )");
211 
212   REQUIRE(!!pg); // OK
213 }
214 
215 TEST_CASE("Precedence climbing", "[precedence]") {
216   parser parser(R"(
217         START            <-  _ EXPRESSION
218         EXPRESSION       <-  ATOM (OPERATOR ATOM)* {
219                                precedence
220                                  L + -
221                                  L * /
222                              }
223         ATOM             <-  NUMBER / T('(') EXPRESSION T(')')
224         OPERATOR         <-  T([-+/*])
225         NUMBER           <-  T('-'? [0-9]+)
226 		~_               <-  [ \t]*
227 		T(S)             <-  < S > _
228 	)");
229 
230   REQUIRE(!!parser); // OK
231 
232   parser.enable_packrat_parsing();
233 
234   // Setup actions
__anone2d3858f0102(const SemanticValues &vs) 235   parser["EXPRESSION"] = [](const SemanticValues &vs) -> long {
236     auto result = std::any_cast<long>(vs[0]);
237     if (vs.size() > 1) {
238       auto ope = std::any_cast<char>(vs[1]);
239       auto num = std::any_cast<long>(vs[2]);
240       switch (ope) {
241       case '+': result += num; break;
242       case '-': result -= num; break;
243       case '*': result *= num; break;
244       case '/': result /= num; break;
245       }
246     }
247     return result;
248   };
__anone2d3858f0202(const SemanticValues &vs) 249   parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
__anone2d3858f0302(const SemanticValues &vs) 250   parser["NUMBER"] = [](const SemanticValues &vs) { return vs.token_to_number<long>(); };
251 
252   bool ret = parser;
253   REQUIRE(ret == true);
254 
255   {
256     auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
257     long val = 0;
258     ret = parser.parse(expr, val);
259 
260     REQUIRE(ret == true);
261     REQUIRE(val == -3);
262   }
263 
264   {
265     auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0
266     long val = 0;
267     ret = parser.parse(expr, val);
268 
269     REQUIRE(ret == true);
270     REQUIRE(val == 0);
271   }
272 }
273 
274 TEST_CASE("Precedence climbing with literal operator", "[precedence]") {
275   parser parser(R"(
276         START            <-  _ EXPRESSION
277         EXPRESSION       <-  ATOM (OPERATOR ATOM)* {
278                                precedence
279                                  L '#plus#' -     # weaker
280                                  L '#multiply#' / # stronger
281                              }
282         ATOM             <-  NUMBER / T('(') EXPRESSION T(')')
283         OPERATOR         <-  T('#plus#' / '#multiply#' / [-/])
284         NUMBER           <-  T('-'? [0-9]+)
285 		~_               <-  [ \t]*
286 		T(S)             <-  < S > _
287 	)");
288 
289   REQUIRE(!!parser); // OK
290 
291   parser.enable_packrat_parsing();
292 
293   // Setup actions
__anone2d3858f0402(const SemanticValues &vs) 294   parser["EXPRESSION"] = [](const SemanticValues &vs) -> long {
295     auto result = std::any_cast<long>(vs[0]);
296     if (vs.size() > 1) {
297       auto ope = std::any_cast<std::string>(vs[1]);
298       auto num = std::any_cast<long>(vs[2]);
299       if (ope == "#plus#") {
300         result += num;
301       } else if (ope == "-") {
302         result -= num;
303       } else if (ope == "#multiply#") {
304         result *= num;
305       } else if (ope == "/") {
306         result /= num;
307       }
308     }
309     return result;
310   };
__anone2d3858f0502(const SemanticValues &vs) 311   parser["OPERATOR"] = [](const SemanticValues &vs) { return vs.token_to_string(); };
__anone2d3858f0602(const SemanticValues &vs) 312   parser["NUMBER"] = [](const SemanticValues &vs) { return vs.token_to_number<long>(); };
313 
314   bool ret = parser;
315   REQUIRE(ret == true);
316 
317   {
318     auto expr = " 1 #plus#  2 #multiply# 3 #multiply# (4 - 5 #plus# 6) / 7 - 8 ";
319     long val = 0;
320     ret = parser.parse(expr, val);
321 
322     REQUIRE(ret == true);
323     REQUIRE(val == -3);
324   }
325 
326   {
327     auto expr = "-1#plus#-2--3"; // -1 + -2 - -3 = 0
328     long val = 0;
329     ret = parser.parse(expr, val);
330 
331     REQUIRE(ret == true);
332     REQUIRE(val == 0);
333   }
334 }
335 
336 TEST_CASE("Precedence climbing with macro", "[precedence]") {
337   // Create a PEG parser
338   parser parser(R"(
339         EXPRESSION             <-  INFIX_EXPRESSION(ATOM, OPERATOR)
340         INFIX_EXPRESSION(A, O) <-  A (O A)* {
341                                      precedence
342                                        L + -
343                                        L * /
344                                    }
345         ATOM                   <-  NUMBER / '(' EXPRESSION ')'
346         OPERATOR               <-  < [-+/*] >
347         NUMBER                 <-  < '-'? [0-9]+ >
348         %whitespace            <-  [ \t]*
349 	)");
350 
351   parser.enable_packrat_parsing();
352 
353   bool ret = parser;
354   REQUIRE(ret == true);
355 
356   // Setup actions
__anone2d3858f0702(const SemanticValues &vs) 357   parser["INFIX_EXPRESSION"] = [](const SemanticValues &vs) -> long {
358     auto result = std::any_cast<long>(vs[0]);
359     if (vs.size() > 1) {
360       auto ope = std::any_cast<char>(vs[1]);
361       auto num = std::any_cast<long>(vs[2]);
362       switch (ope) {
363       case '+': result += num; break;
364       case '-': result -= num; break;
365       case '*': result *= num; break;
366       case '/': result /= num; break;
367       }
368     }
369     return result;
370   };
__anone2d3858f0802(const SemanticValues &vs) 371   parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
__anone2d3858f0902(const SemanticValues &vs) 372   parser["NUMBER"] = [](const SemanticValues &vs) { return vs.token_to_number<long>(); };
373 
374   {
375     auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
376     long val = 0;
377     ret = parser.parse(expr, val);
378 
379     REQUIRE(ret == true);
380     REQUIRE(val == -3);
381   }
382 
383   {
384     auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0
385     long val = 0;
386     ret = parser.parse(expr, val);
387 
388     REQUIRE(ret == true);
389     REQUIRE(val == 0);
390   }
391 }
392 
393 TEST_CASE("Precedence climbing error1", "[precedence]") {
394   parser parser(R"(
395         START            <-  _ EXPRESSION
396         EXPRESSION       <-  ATOM (OPERATOR ATOM1)* {
397                                precedence
398                                  L + -
399                                  L * /
400                              }
401         ATOM             <-  NUMBER / T('(') EXPRESSION T(')')
402         ATOM1            <-  NUMBER / T('(') EXPRESSION T(')')
403         OPERATOR         <-  T([-+/*])
404         NUMBER           <-  T('-'? [0-9]+)
405 		~_               <-  [ \t]*
406 		T(S)             <-  < S > _
407 	)");
408 
409   bool ret = parser;
410   REQUIRE(ret == false);
411 }
412 
413 TEST_CASE("Precedence climbing error2", "[precedence]") {
414   parser parser(R"(
415         START            <-  _ EXPRESSION
416         EXPRESSION       <-  ATOM OPERATOR ATOM {
417                                precedence
418                                  L + -
419                                  L * /
420                              }
421         ATOM             <-  NUMBER / T('(') EXPRESSION T(')')
422         OPERATOR         <-  T([-+/*])
423         NUMBER           <-  T('-'? [0-9]+)
424 		~_               <-  [ \t]*
425 		T(S)             <-  < S > _
426 	)");
427 
428   bool ret = parser;
429   REQUIRE(ret == false);
430 }
431 
432 TEST_CASE("Precedence climbing error3", "[precedence]") {
433   parser parser(R"(
434         EXPRESSION               <-  PRECEDENCE_PARSING(ATOM, OPERATOR)
435         PRECEDENCE_PARSING(A, O) <-  A (O A)+ {
436                                        precedence
437                                          L + -
438                                          L * /
439                                      }
440         ATOM                     <-  NUMBER / '(' EXPRESSION ')'
441         OPERATOR                 <-  < [-+/*] >
442         NUMBER                   <-  < '-'? [0-9]+ >
443         %whitespace              <-  [ \t]*
444 	)");
445 
446   bool ret = parser;
447   REQUIRE(ret == false);
448 }
449 
450 TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") {
451   peg::parser parser(R"(
452         ROOT         <-  'a'
453         %whitespace  <-  SPACE*
454         SPACE        <-  ' '
455     )");
456 
457   parser.enable_packrat_parsing();
458 
459   auto ret = parser.parse("a");
460   REQUIRE(ret == true);
461 }
462 
463 TEST_CASE("Packrat parser test with macro", "[packrat]") {
464   parser parser(R"(
465         EXPRESSION       <-  _ LIST(TERM, TERM_OPERATOR)
466         TERM             <-  LIST(FACTOR, FACTOR_OPERATOR)
467         FACTOR           <-  NUMBER / T('(') EXPRESSION T(')')
468         TERM_OPERATOR    <-  T([-+])
469         FACTOR_OPERATOR  <-  T([/*])
470         NUMBER           <-  T([0-9]+)
471 		~_               <-  [ \t]*
472 		LIST(I, D)       <-  I (D I)*
473 		T(S)             <-  < S > _
474 	)");
475 
476   parser.enable_packrat_parsing();
477 
478   auto ret = parser.parse(" 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ");
479   REQUIRE(ret == true);
480 }
481 
482 TEST_CASE("Packrat parser test with precedence expression parser",
483           "[packrat]") {
484   peg::parser parser(R"(
485     Expression  <- Atom (Operator Atom)* { precedence L + - L * / }
486     Atom        <- _? Number _?
487     Number      <- [0-9]+
488     Operator    <- '+' / '-' / '*' / '/'
489     _           <- ' '+
490   )");
491 
492   bool ret = parser;
493   REQUIRE(ret == true);
494 
495   parser.enable_packrat_parsing();
496 
497   ret = parser.parse(" 1 + 2 * 3 ");
498   REQUIRE(ret == true);
499 }
500 
501 TEST_CASE("Backreference test", "[backreference]") {
502   parser parser(R"(
503         START  <- _ LQUOTE < (!RQUOTE .)* > RQUOTE _
504         LQUOTE <- 'R"' $delm< [a-zA-Z]* > '('
505         RQUOTE <- ')' $delm '"'
506         ~_     <- [ \t\r\n]*
507     )");
508 
509   std::string token;
__anone2d3858f0a02(const SemanticValues &vs) 510   parser["START"] = [&](const SemanticValues &vs) { token = vs.token(); };
511 
512   {
513     token.clear();
514     auto ret = parser.parse(R"delm(
515             R"("hello world")"
516         )delm");
517 
518     REQUIRE(ret == true);
519     REQUIRE(token == "\"hello world\"");
520   }
521 
522   {
523     token.clear();
524     auto ret = parser.parse(R"delm(
525             R"foo("(hello world)")foo"
526         )delm");
527 
528     REQUIRE(ret == true);
529     REQUIRE(token == "\"(hello world)\"");
530   }
531 
532   {
533     token.clear();
534     auto ret = parser.parse(R"delm(
535             R"foo("(hello world)foo")foo"
536         )delm");
537 
538     REQUIRE(ret == false);
539     REQUIRE(token == "\"(hello world");
540   }
541 
542   {
543     token.clear();
544     auto ret = parser.parse(R"delm(
545             R"foo("(hello world)")bar"
546         )delm");
547 
548     REQUIRE(ret == false);
549     REQUIRE(token.empty());
550   }
551 }
552 
553 TEST_CASE("Invalid backreference test", "[backreference]") {
554   parser parser(R"(
555         START  <- _ LQUOTE (!RQUOTE .)* RQUOTE _
556         LQUOTE <- 'R"' $delm< [a-zA-Z]* > '('
557         RQUOTE <- ')' $delm2 '"'
558         ~_     <- [ \t\r\n]*
559     )");
560 
561   REQUIRE_THROWS_AS(parser.parse(R"delm(
562             R"foo("(hello world)")foo"
563         )delm"),
564                     std::runtime_error);
565 }
566 
567 TEST_CASE("Nested capture test", "[backreference]") {
568   parser parser(R"(
569         ROOT      <- CONTENT
570         CONTENT   <- (ELEMENT / TEXT)*
571         ELEMENT   <- $(STAG CONTENT ETAG)
572         STAG      <- '<' $tag< TAG_NAME > '>'
573         ETAG      <- '</' $tag '>'
574         TAG_NAME  <- 'b' / 'u'
575         TEXT      <- TEXT_DATA
576         TEXT_DATA <- ![<] .
577     )");
578 
579   REQUIRE(parser.parse("This is <b>a <u>test</u> text</b>."));
580   REQUIRE(!parser.parse("This is <b>a <u>test</b> text</u>."));
581   REQUIRE(!parser.parse("This is <b>a <u>test text</b>."));
582   REQUIRE(!parser.parse("This is a <u>test</u> text</b>."));
583 }
584 
585 TEST_CASE("Backreference with Prioritized Choice test", "[backreference]") {
586   parser parser(R"(
587         TREE           <- WRONG_BRANCH / CORRECT_BRANCH
588         WRONG_BRANCH   <- BRANCH THAT IS_capture WRONG
589         CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT
590         BRANCH         <- 'branch'
591         THAT           <- 'that'
592         IS_capture     <- $ref<..>
593         IS_backref     <- $ref
594         WRONG          <- 'wrong'
595         CORRECT        <- 'correct'
596     )");
597 
598   REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error);
599 }
600 
601 TEST_CASE("Backreference with Zero or More test", "[backreference]") {
602   parser parser(R"(
603         TREE           <- WRONG_BRANCH* CORRECT_BRANCH
604         WRONG_BRANCH   <- BRANCH THAT IS_capture WRONG
605         CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT
606         BRANCH         <- 'branch'
607         THAT           <- 'that'
608         IS_capture     <- $ref<..>
609         IS_backref     <- $ref
610         WRONG          <- 'wrong'
611         CORRECT        <- 'correct'
612     )");
613 
614   REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect"));
615   REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect"));
616   REQUIRE(
617       !parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect"));
618   REQUIRE(
619       parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect"));
620   REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error);
621   REQUIRE_THROWS_AS(parser.parse("branchthatiswron_branchthatiscorrect"),
622                     std::runtime_error);
623 }
624 
625 TEST_CASE("Backreference with One or More test", "[backreference]") {
626   parser parser(R"(
627         TREE           <- WRONG_BRANCH+ CORRECT_BRANCH
628         WRONG_BRANCH   <- BRANCH THAT IS_capture WRONG
629         CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT
630         BRANCH         <- 'branch'
631         THAT           <- 'that'
632         IS_capture     <- $ref<..>
633         IS_backref     <- $ref
634         WRONG          <- 'wrong'
635         CORRECT        <- 'correct'
636     )");
637 
638   REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect"));
639   REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect"));
640   REQUIRE(
641       !parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect"));
642   REQUIRE(
643       parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect"));
644   REQUIRE(!parser.parse("branchthatiscorrect"));
645   REQUIRE(!parser.parse("branchthatiswron_branchthatiscorrect"));
646 }
647 
648 TEST_CASE("Backreference with Option test", "[backreference]") {
649   parser parser(R"(
650         TREE           <- WRONG_BRANCH? CORRECT_BRANCH
651         WRONG_BRANCH   <- BRANCH THAT IS_capture WRONG
652         CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT
653         BRANCH         <- 'branch'
654         THAT           <- 'that'
655         IS_capture     <- $ref<..>
656         IS_backref     <- $ref
657         WRONG          <- 'wrong'
658         CORRECT        <- 'correct'
659     )");
660 
661   REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect"));
662   REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect"));
663   REQUIRE(
664       !parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect"));
665   REQUIRE(
666       !parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect"));
667   REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error);
668   REQUIRE_THROWS_AS(parser.parse("branchthatiswron_branchthatiscorrect"),
669                     std::runtime_error);
670 }
671 
672 TEST_CASE("Repetition {0}", "[repetition]") {
673   parser parser(R"(
674         START <- '(' DIGIT{3} ') ' DIGIT{3} '-' DIGIT{4}
675         DIGIT <- [0-9]
676     )");
677   REQUIRE(parser.parse("(123) 456-7890"));
678   REQUIRE(!parser.parse("(12a) 456-7890"));
679   REQUIRE(!parser.parse("(123) 45-7890"));
680   REQUIRE(!parser.parse("(123) 45-7a90"));
681 }
682 
683 TEST_CASE("Repetition {2,4}", "[repetition]") {
684   parser parser(R"(
685         START <- DIGIT{2,4}
686         DIGIT <- [0-9]
687     )");
688   REQUIRE(!parser.parse("1"));
689   REQUIRE(parser.parse("12"));
690   REQUIRE(parser.parse("123"));
691   REQUIRE(parser.parse("1234"));
692   REQUIRE(!parser.parse("12345"));
693 }
694 
695 TEST_CASE("Repetition {2,1}", "[repetition]") {
696   parser parser(R"(
697         START <- DIGIT{2,1} # invalid range
698         DIGIT <- [0-9]
699     )");
700   REQUIRE(!parser.parse("1"));
701   REQUIRE(parser.parse("12"));
702   REQUIRE(!parser.parse("123"));
703 }
704 
705 TEST_CASE("Repetition {2,}", "[repetition]") {
706   parser parser(R"(
707         START <- DIGIT{2,}
708         DIGIT <- [0-9]
709     )");
710   REQUIRE(!parser.parse("1"));
711   REQUIRE(parser.parse("12"));
712   REQUIRE(parser.parse("123"));
713   REQUIRE(parser.parse("1234"));
714 }
715 
716 TEST_CASE("Repetition {,2}", "[repetition]") {
717   parser parser(R"(
718         START <- DIGIT{,2}
719         DIGIT <- [0-9]
720     )");
721   REQUIRE(parser.parse("1"));
722   REQUIRE(parser.parse("12"));
723   REQUIRE(!parser.parse("123"));
724   REQUIRE(!parser.parse("1234"));
725 }
726 
727 TEST_CASE("Left recursive test", "[left recursive]") {
728   parser parser(R"(
729         A <- A 'a'
730         B <- A 'a'
731     )");
732 
733   REQUIRE(!parser);
734 }
735 
736 TEST_CASE("Left recursive with option test", "[left recursive]") {
737   parser parser(R"(
738         A  <- 'a' / 'b'? B 'c'
739         B  <- A
740     )");
741 
742   REQUIRE(!parser);
743 }
744 
745 TEST_CASE("Left recursive with zom test", "[left recursive]") {
746   parser parser(R"(
747         A <- 'a'* A*
748     )");
749 
750   REQUIRE(!parser);
751 }
752 
753 TEST_CASE("Left recursive with a ZOM content rule", "[left recursive]") {
754   parser parser(R"(
755         A <- B
756         B <- _ A
757         _ <- ' '* # Zero or more
758     )");
759 
760   REQUIRE(!parser);
761 }
762 
763 TEST_CASE("Left recursive with empty string test", "[left recursive]") {
764   parser parser(" A <- '' A");
765 
766   REQUIRE(!parser);
767 }
768 
769 TEST_CASE("User defined rule test", "[user rule]") {
770   auto g = parser(R"(
771         ROOT <- _ 'Hello' _ NAME '!' _
772     )",
773                   {{"NAME", usr([](const char *s, size_t n, SemanticValues &,
__anone2d3858f0b02(const char *s, size_t n, SemanticValues &, std::any &) 774                                    std::any &) -> size_t {
775                       static std::vector<std::string> names = {"PEG", "BNF"};
776                       for (const auto &name : names) {
777                         if (name.size() <= n &&
778                             !name.compare(0, name.size(), s, name.size())) {
779                           return name.size();
780                         }
781                       }
782                       return static_cast<size_t>(-1);
783                     })},
784                    {"~_", zom(cls(" \t\r\n"))}});
785 
786   REQUIRE(g.parse(" Hello BNF! ") == true);
787 }
788 
789 TEST_CASE("Semantic predicate test", "[predicate]") {
790   parser parser("NUMBER  <-  [0-9]+");
791 
__anone2d3858f0c02(const SemanticValues &vs) 792   parser["NUMBER"] = [](const SemanticValues &vs) {
793     auto val = vs.token_to_number<long>();
794     if (val != 100) { throw parse_error("value error!!"); }
795     return val;
796   };
797 
798   long val;
799   REQUIRE(parser.parse("100", val));
800   REQUIRE(val == 100);
801 
__anone2d3858f0d02(size_t line, size_t col, const std::string &msg) 802   parser.log = [](size_t line, size_t col, const std::string &msg) {
803     REQUIRE(line == 1);
804     REQUIRE(col == 1);
805     REQUIRE(msg == "value error!!");
806   };
807   REQUIRE(!parser.parse("200", val));
808 }
809 
810 TEST_CASE("Japanese character", "[unicode]") {
811   peg::parser parser(u8R"(
812         文 <- 修飾語? 主語 述語 '。'
813         主語 <- 名詞 助詞
814         述語 <- 動詞 助詞
815         修飾語 <- 形容詞
816         名詞 <- 'サーバー' / 'クライアント'
817         形容詞 <- '古い' / '新しい'
818         動詞 <- '落ち' / '復旧し'
819         助詞 <- 'が' / 'を' / 'た' / 'ます' / 'に'
820     )");
821 
822   bool ret = parser;
823   REQUIRE(ret == true);
824 
825   REQUIRE(parser.parse(u8R"(サーバーを復旧します。)"));
826 }
827 
828 TEST_CASE("dot with a code", "[unicode]") {
829   peg::parser parser(" S <- 'a' . 'b' ");
830   REQUIRE(parser.parse(u8R"(aあb)"));
831 }
832 
833 TEST_CASE("dot with a char", "[unicode]") {
834   peg::parser parser(" S <- 'a' . 'b' ");
835   REQUIRE(parser.parse(u8R"(aåb)"));
836 }
837 
838 TEST_CASE("character class", "[unicode]") {
839   peg::parser parser(R"(
840         S <- 'a' [い-おAさC-Eた-とは] 'b'
841     )");
842 
843   bool ret = parser;
844   REQUIRE(ret == true);
845 
846   REQUIRE(!parser.parse(u8R"(aあb)"));
847   REQUIRE(parser.parse(u8R"(aいb)"));
848   REQUIRE(parser.parse(u8R"(aうb)"));
849   REQUIRE(parser.parse(u8R"(aおb)"));
850   REQUIRE(!parser.parse(u8R"(aかb)"));
851   REQUIRE(parser.parse(u8R"(aAb)"));
852   REQUIRE(!parser.parse(u8R"(aBb)"));
853   REQUIRE(parser.parse(u8R"(aEb)"));
854   REQUIRE(!parser.parse(u8R"(aFb)"));
855   REQUIRE(!parser.parse(u8R"(aそb)"));
856   REQUIRE(parser.parse(u8R"(aたb)"));
857   REQUIRE(parser.parse(u8R"(aちb)"));
858   REQUIRE(parser.parse(u8R"(aとb)"));
859   REQUIRE(!parser.parse(u8R"(aなb)"));
860   REQUIRE(parser.parse(u8R"(aはb)"));
861   REQUIRE(!parser.parse(u8R"(a?b)"));
862 }
863 
864 #if 0 // TODO: Unicode Grapheme support
865 TEST_CASE("dot with a grapheme", "[unicode]")
866 {
867     peg::parser parser(" S <- 'a' . 'b' ");
868     REQUIRE(parser.parse(u8R"(aसिb)"));
869 }
870 #endif
871 
872 TEST_CASE("Macro simple test", "[macro]") {
873   parser parser(R"(
874 		S     <- HELLO WORLD
875 		HELLO <- T('hello')
876 		WORLD <- T('world')
877 		T(a)  <- a [ \t]*
878 	)");
879 
880   REQUIRE(parser.parse("hello \tworld "));
881 }
882 
883 TEST_CASE("Macro two parameters", "[macro]") {
884   parser parser(R"(
885 		S           <- HELLO_WORLD
886 		HELLO_WORLD <- T('hello', 'world')
887 		T(a, b)     <- a [ \t]* b [ \t]*
888 	)");
889 
890   REQUIRE(parser.parse("hello \tworld "));
891 }
892 
893 TEST_CASE("Macro syntax error", "[macro]") {
894   parser parser(R"(
895 		S     <- T('hello')
896 		T (a) <- a [ \t]*
897 	)");
898 
899   bool ret = parser;
900   REQUIRE(ret == false);
901 }
902 
903 TEST_CASE("Macro missing argument", "[macro]") {
904   parser parser(R"(
905 		S       <- T ('hello')
906 		T(a, b) <- a [ \t]* b
907 	)");
908 
909   bool ret = parser;
910   REQUIRE(ret == false);
911 }
912 
913 TEST_CASE("Macro reference syntax error", "[macro]") {
914   parser parser(R"(
915 		S    <- T ('hello')
916 		T(a) <- a [ \t]*
917 	)");
918 
919   bool ret = parser;
920   REQUIRE(ret == false);
921 }
922 
923 TEST_CASE("Macro invalid macro reference error", "[macro]") {
924   parser parser(R"(
925 		S <- T('hello')
926 		T <- 'world'
927 	)");
928 
929   bool ret = parser;
930   REQUIRE(ret == false);
931 }
932 
933 TEST_CASE("Macro calculator", "[macro]") {
934   // Create a PEG parser
935   parser parser(R"(
936         # Grammar for simple calculator...
937         EXPRESSION       <-  _ LIST(TERM, TERM_OPERATOR)
938         TERM             <-  LIST(FACTOR, FACTOR_OPERATOR)
939         FACTOR           <-  NUMBER / T('(') EXPRESSION T(')')
940         TERM_OPERATOR    <-  T([-+])
941         FACTOR_OPERATOR  <-  T([/*])
942         NUMBER           <-  T([0-9]+)
943 		~_               <-  [ \t]*
944 		LIST(I, D)       <-  I (D I)*
945 		T(S)             <-  < S > _
946 	)");
947 
948   // Setup actions
__anone2d3858f0e02(const SemanticValues &vs) 949   auto reduce = [](const SemanticValues &vs) {
950     auto result = std::any_cast<long>(vs[0]);
951     for (auto i = 1u; i < vs.size(); i += 2) {
952       auto num = std::any_cast<long>(vs[i + 1]);
953       auto ope = std::any_cast<char>(vs[i]);
954       switch (ope) {
955       case '+': result += num; break;
956       case '-': result -= num; break;
957       case '*': result *= num; break;
958       case '/': result /= num; break;
959       }
960     }
961     return result;
962   };
963 
964   parser["EXPRESSION"] = reduce;
965   parser["TERM"] = reduce;
__anone2d3858f0f02(const SemanticValues &vs) 966   parser["TERM_OPERATOR"] = [](const SemanticValues &vs) {
967     return static_cast<char>(*vs.sv().data());
968   };
__anone2d3858f1002(const SemanticValues &vs) 969   parser["FACTOR_OPERATOR"] = [](const SemanticValues &vs) {
970     return static_cast<char>(*vs.sv().data());
971   };
__anone2d3858f1102(const SemanticValues &vs) 972   parser["NUMBER"] = [](const SemanticValues &vs) { return vs.token_to_number<long>(); };
973 
974   bool ret = parser;
975   REQUIRE(ret == true);
976 
977   auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
978   long val = 0;
979   ret = parser.parse(expr, val);
980 
981   REQUIRE(ret == true);
982   REQUIRE(val == -3);
983 }
984 
985 TEST_CASE("Macro expression arguments", "[macro]") {
986   parser parser(R"(
987 		S             <- M('hello' / 'Hello', 'world' / 'World')
988 		M(arg0, arg1) <- arg0 [ \t]+ arg1
989 	)");
990 
991   REQUIRE(parser.parse("Hello world"));
992 }
993 
994 TEST_CASE("Macro recursive", "[macro]") {
995   parser parser(R"(
996 		S    <- M('abc')
997 		M(s) <- !s / s ' ' M(s / '123') / s
998 	)");
999 
1000   REQUIRE(parser.parse(""));
1001   REQUIRE(parser.parse("abc"));
1002   REQUIRE(parser.parse("abc abc"));
1003   REQUIRE(parser.parse("abc 123 abc"));
1004 }
1005 
1006 TEST_CASE("Macro recursive2", "[macro]") {
1007   auto syntaxes = std::vector<const char *>{
1008       "S <- M('abc') M(s) <- !s / s ' ' M(s* '-' '123') / s",
1009       "S <- M('abc') M(s) <- !s / s ' ' M(s+ '-' '123') / s",
1010       "S <- M('abc') M(s) <- !s / s ' ' M(s? '-' '123') / s",
1011       "S <- M('abc') M(s) <- !s / s ' ' M(&s s+ '-' '123') / s",
1012       "S <- M('abc') M(s) <- !s / s ' ' M(s '-' !s '123') / s",
1013       "S <- M('abc') M(s) <- !s / s ' ' M(< s > '-' '123') / s",
1014       "S <- M('abc') M(s) <- !s / s ' ' M(~s '-' '123') / s",
1015   };
1016 
1017   for (const auto &syntax : syntaxes) {
1018     parser parser(syntax);
1019     REQUIRE(parser.parse("abc abc-123"));
1020   }
1021 }
1022 
1023 TEST_CASE("Macro exclusive modifiers", "[macro]") {
1024   parser parser(R"(
1025 		S                   <- Modifiers(!"") _
1026 		Modifiers(Appeared) <- (!Appeared) (
1027 								   Token('public') Modifiers(Appeared / 'public') /
1028 								   Token('static') Modifiers(Appeared / 'static') /
1029 								   Token('final') Modifiers(Appeared / 'final') /
1030 								   "")
1031 		Token(t)            <- t _
1032 		_                   <- [ \t\r\n]*
1033 	)");
1034 
1035   REQUIRE(parser.parse("public"));
1036   REQUIRE(parser.parse("static"));
1037   REQUIRE(parser.parse("final"));
1038   REQUIRE(parser.parse("public static final"));
1039   REQUIRE(!parser.parse("public public"));
1040   REQUIRE(!parser.parse("public static public"));
1041 }
1042 
1043 TEST_CASE("Macro token check test", "[macro]") {
1044   parser parser(R"(
1045         # Grammar for simple calculator...
1046         EXPRESSION       <-  _ LIST(TERM, TERM_OPERATOR)
1047         TERM             <-  LIST(FACTOR, FACTOR_OPERATOR)
1048         FACTOR           <-  NUMBER / T('(') EXPRESSION T(')')
1049         TERM_OPERATOR    <-  T([-+])
1050         FACTOR_OPERATOR  <-  T([/*])
1051         NUMBER           <-  T([0-9]+)
1052 		~_               <-  [ \t]*
1053 		LIST(I, D)       <-  I (D I)*
1054 		T(S)             <-  < S > _
1055 	)");
1056 
1057   REQUIRE(parser["EXPRESSION"].is_token() == false);
1058   REQUIRE(parser["TERM"].is_token() == false);
1059   REQUIRE(parser["FACTOR"].is_token() == false);
1060   REQUIRE(parser["FACTOR_OPERATOR"].is_token() == true);
1061   REQUIRE(parser["NUMBER"].is_token() == true);
1062   REQUIRE(parser["_"].is_token() == true);
1063   REQUIRE(parser["LIST"].is_token() == false);
1064   REQUIRE(parser["T"].is_token() == true);
1065 }
1066 
1067 TEST_CASE("Macro passes an arg to another macro", "[macro]") {
1068   parser parser(R"(
1069         A    <- B(C)
1070         B(D) <- D
1071         C    <- 'c'
1072 	)");
1073 
1074   REQUIRE(parser.parse("c"));
1075 }
1076 
1077 TEST_CASE("Unreferenced rule", "[macro]") {
1078   parser parser(R"(
1079         A    <- B(C)
1080         B(D) <- D
1081         C    <- 'c'
1082         D    <- 'd'
1083 	)");
1084 
1085   bool ret = parser;
1086   REQUIRE(ret == true); // This is OK, because it's a warning, not an erro...
1087 }
1088 
1089 TEST_CASE("Nested macro call", "[macro]") {
1090   parser parser(R"(
1091         A    <- B(T)
1092         B(X) <- C(X)
1093         C(Y) <- Y
1094         T    <- 'val'
1095 	)");
1096 
1097   REQUIRE(parser.parse("val"));
1098 }
1099 
1100 TEST_CASE("Nested macro call2", "[macro]") {
1101   parser parser(R"(
1102         START           <- A('TestVal1', 'TestVal2')+
1103         A(Aarg1, Aarg2) <- B(Aarg1) '#End'
1104         B(Barg1)        <- '#' Barg1
1105 	)");
1106 
1107   REQUIRE(parser.parse("#TestVal1#End"));
1108 }
1109 
1110 TEST_CASE("Line information test", "[line information]") {
1111   parser parser(R"(
1112         S    <- _ (WORD _)+
1113         WORD <- [A-Za-z]+
1114         ~_   <- [ \t\r\n]+
1115     )");
1116 
1117   std::vector<std::pair<size_t, size_t>> locations;
__anone2d3858f1202(const peg::SemanticValues &vs) 1118   parser["WORD"] = [&](const peg::SemanticValues &vs) {
1119     locations.push_back(vs.line_info());
1120   };
1121 
1122   bool ret = parser;
1123   REQUIRE(ret == true);
1124 
1125   ret = parser.parse(" Mon Tue Wed \nThu  Fri  Sat\nSun\n");
1126   REQUIRE(ret == true);
1127 
1128   REQUIRE(locations[0] == std::make_pair<size_t, size_t>(1, 2));
1129   REQUIRE(locations[1] == std::make_pair<size_t, size_t>(1, 6));
1130   REQUIRE(locations[2] == std::make_pair<size_t, size_t>(1, 10));
1131   REQUIRE(locations[3] == std::make_pair<size_t, size_t>(2, 1));
1132   REQUIRE(locations[4] == std::make_pair<size_t, size_t>(2, 6));
1133   REQUIRE(locations[5] == std::make_pair<size_t, size_t>(2, 11));
1134   REQUIRE(locations[6] == std::make_pair<size_t, size_t>(3, 1));
1135 }
1136 
1137 TEST_CASE("Dictionary", "[dic]") {
1138   parser parser(R"(
1139         START <- 'This month is ' MONTH '.'
1140         MONTH <- 'Jan' | 'January' | 'Feb' | 'February'
1141 	)");
1142 
1143   REQUIRE(parser.parse("This month is Jan."));
1144   REQUIRE(parser.parse("This month is January."));
1145   REQUIRE_FALSE(parser.parse("This month is Jannuary."));
1146   REQUIRE_FALSE(parser.parse("This month is ."));
1147 }
1148 
1149 TEST_CASE("Dictionary invalid", "[dic]") {
1150   parser parser(R"(
1151         START <- 'This month is ' MONTH '.'
1152         MONTH <- 'Jan' | 'January' | [a-z]+ | 'Feb' | 'February'
1153 	)");
1154 
1155   bool ret = parser;
1156   REQUIRE_FALSE(ret);
1157 }
1158 
1159 TEST_CASE("Error recovery 1", "[error]") {
1160   parser pg(R"(
1161 START      <- __? SECTION*
1162 
1163 SECTION    <- HEADER __ ENTRIES __?
1164 
1165 HEADER     <- '[' _ CATEGORY (':' _  ATTRIBUTES)? ']'^header
1166 
1167 CATEGORY   <- < [-_a-zA-Z0-9\u0080-\uFFFF ]+ > _
1168 ATTRIBUTES <- ATTRIBUTE (',' _ ATTRIBUTE)*
1169 ATTRIBUTE  <- < [-_a-zA-Z0-9\u0080-\uFFFF]+ > _
1170 
1171 ENTRIES    <- (ENTRY (__ ENTRY)*)? { no_ast_opt }
1172 
1173 ENTRY      <- ONE_WAY PHRASE ('|' _ PHRASE)* !'='
1174             / PHRASE ('|' _ PHRASE)+ !'='
1175             / %recover(entry)
1176 
1177 ONE_WAY    <- PHRASE '=' _
1178 PHRASE     <- WORD (' ' WORD)* _
1179 WORD       <- < (![ \t\r\n=|[\]#] .)+ >
1180 
1181 ~__        <- _ (comment? nl _)+
1182 ~_         <- [ \t]*
1183 
1184 comment    <- ('#' (!nl .)*)
1185 nl         <- '\r'? '\n'
1186 
1187 header <- (!__ .)* { message "invalid section header, missing ']'." }
1188 entry  <- (!(__ / HEADER) .)+ { message "invalid entry." }
1189   )");
1190 
1191   REQUIRE(!!pg); // OK
1192 
1193   std::vector<std::string> errors{
1194     R"(3:1: invalid entry.)",
1195     R"(7:1: invalid entry.)",
1196     R"(10:11: invalid section header, missing ']'.)",
1197     R"(18:1: invalid entry.)",
1198   };
1199 
1200   size_t i = 0;
__anone2d3858f1302(size_t ln, size_t col, const std::string &msg) 1201   pg.log = [&](size_t ln, size_t col, const std::string &msg) {
1202     std::stringstream ss;
1203     ss << ln << ":" << col << ": " << msg;
1204     REQUIRE(ss.str() == errors[i++]);
1205   };
1206 
1207   pg.enable_ast();
1208 
1209   std::shared_ptr<Ast> ast;
1210   REQUIRE_FALSE(pg.parse(R"([Section 1]
1211 111 = 222 | 333
1212 aaa || bbb
1213 ccc = ddd
1214 
1215 [Section 2]
1216 eee
1217 fff | ggg
1218 
1219 [Section 3
1220 hhh | iii
1221 
1222 [Section 日本語]
1223 ppp | qqq
1224 
1225 [Section 4]
1226 jjj | kkk
1227 lll = mmm | nnn = ooo
1228 
1229 [Section 5]
1230 rrr | sss
1231 
1232   )", ast));
1233 
1234   ast = pg.optimize_ast(ast);
1235 
1236   REQUIRE(ast_to_s(ast) ==
1237 R"(+ START
1238   + SECTION
1239     - HEADER/0[CATEGORY] (Section 1)
1240     + ENTRIES
1241       + ENTRY/0
1242         - ONE_WAY/0[WORD] (111)
1243         - PHRASE/0[WORD] (222)
1244         - PHRASE/0[WORD] (333)
1245       + ENTRY/2
1246       + ENTRY/0
1247         - ONE_WAY/0[WORD] (ccc)
1248         - PHRASE/0[WORD] (ddd)
1249   + SECTION
1250     - HEADER/0[CATEGORY] (Section 2)
1251     + ENTRIES
1252       + ENTRY/2
1253       + ENTRY/1
1254         - PHRASE/0[WORD] (fff)
1255         - PHRASE/0[WORD] (ggg)
1256   + SECTION
1257     - HEADER/0[CATEGORY] (Section 3)
1258     + ENTRIES
1259       + ENTRY/1
1260         - PHRASE/0[WORD] (hhh)
1261         - PHRASE/0[WORD] (iii)
1262   + SECTION
1263     - HEADER/0[CATEGORY] (Section 日本語)
1264     + ENTRIES
1265       + ENTRY/1
1266         - PHRASE/0[WORD] (ppp)
1267         - PHRASE/0[WORD] (qqq)
1268   + SECTION
1269     - HEADER/0[CATEGORY] (Section 4)
1270     + ENTRIES
1271       + ENTRY/1
1272         - PHRASE/0[WORD] (jjj)
1273         - PHRASE/0[WORD] (kkk)
1274       + ENTRY/2
1275   + SECTION
1276     - HEADER/0[CATEGORY] (Section 5)
1277     + ENTRIES
1278       + ENTRY/1
1279         - PHRASE/0[WORD] (rrr)
1280         - PHRASE/0[WORD] (sss)
1281 )");
1282 }
1283 
1284 TEST_CASE("Error recovery 2", "[error]") {
1285   parser pg(R"(
1286     START <- ENTRY ((',' ENTRY) / %recover((!(',' / Space) .)+))* (_ / %recover(.*))
1287     ENTRY <- '[' ITEM (',' ITEM)* ']'
1288     ITEM  <- WORD / NUM / %recover((!(',' / ']') .)+)
1289     NUM   <- [0-9]+ ![a-z]
1290     WORD  <- '"' [a-z]+ '"'
1291 
1292     ~_    <- Space+
1293     Space <- [ \n]
1294   )");
1295 
1296   REQUIRE(!!pg); // OK
1297 
1298   std::vector<std::string> errors{
1299     R"(1:6: syntax error, unexpected ']'.)",
1300     R"(1:18: syntax error, unexpected 'z', expecting <NUM>.)",
1301     R"(1:24: syntax error, unexpected ',', expecting <WORD>.)",
1302     R"(1:31: syntax error, unexpected 'ccc', expecting <NUM>.)",
1303     R"(1:38: syntax error, unexpected 'ddd', expecting <NUM>.)",
1304     R"(1:55: syntax error, unexpected ']', expecting <WORD>.)",
1305     R"(1:58: syntax error, unexpected '\n', expecting <NUM>.)",
1306     R"(2:3: syntax error.)",
1307   };
1308 
1309   size_t i = 0;
__anone2d3858f1402(size_t ln, size_t col, const std::string &msg) 1310   pg.log = [&](size_t ln, size_t col, const std::string &msg) {
1311     std::stringstream ss;
1312     ss << ln << ":" << col << ": " << msg;
1313     REQUIRE(ss.str() == errors[i++]);
1314   };
1315 
1316   pg.enable_ast();
1317 
1318   std::shared_ptr<Ast> ast;
1319   REQUIRE_FALSE(pg.parse(R"([000]],[111],[222z,"aaa,"bbb",ccc"],[ddd",444,555,"eee],[
1320   )", ast));
1321 
1322   ast = pg.optimize_ast(ast);
1323 
1324   REQUIRE(ast_to_s(ast) ==
1325 R"(+ START
1326   - ENTRY/0[NUM] (000)
1327   - ENTRY/0[NUM] (111)
1328   + ENTRY
1329     + ITEM/2
1330     + ITEM/2
1331     - ITEM/0[WORD] ("bbb")
1332     + ITEM/2
1333   + ENTRY
1334     + ITEM/2
1335     - ITEM/1[NUM] (444)
1336     - ITEM/1[NUM] (555)
1337     + ITEM/2
1338 )");
1339 }
1340 
1341 TEST_CASE("Error recovery 3", "[error]") {
1342   parser pg(R"~(
1343 # Grammar
1344 START      <- __? SECTION*
1345 
1346 SECTION    <- HEADER __ ENTRIES __?
1347 
1348 HEADER     <- '['^missing_bracket _ CATEGORY (':' _  ATTRIBUTES)? ']'^missing_bracket ___
1349 
1350 CATEGORY   <- < (&[-_a-zA-Z0-9\u0080-\uFFFF ] (![\u0080-\uFFFF])^vernacular_char .)+ > _
1351 ATTRIBUTES <- ATTRIBUTE (',' _ ATTRIBUTE)*
1352 ATTRIBUTE  <- < [-_a-zA-Z0-9]+ > _
1353 
1354 ENTRIES    <- (ENTRY (__ ENTRY)*)? { no_ast_opt }
1355 
1356 ENTRY      <- ONE_WAY PHRASE^expect_phrase (or _ PHRASE^expect_phrase)* ___
1357             / PHRASE (or^missing_or _ PHRASE^expect_phrase) (or _ PHRASE^expect_phrase)* ___ { no_ast_opt }
1358 
1359 ONE_WAY    <- PHRASE assign _
1360 PHRASE     <- WORD (' ' WORD)* _ { no_ast_opt }
1361 WORD       <- < (![ \t\r\n=|[\]#] (![*?] / %recover(wildcard)) .)+ >
1362 
1363 ~assign    <- '=' ____
1364 ~or        <- '|' (!'|')^duplicate_or ____
1365 
1366 ~_         <- [ \t]*
1367 ~__        <- _ (comment? nl _)+
1368 ~___       <- (!operators)^invalid_ope
1369 ~____      <- (!operators)^invalid_ope_comb
1370 
1371 operators  <- [|=]+
1372 comment    <- ('#' (!nl .)*)
1373 nl         <- '\r'? '\n'
1374 
1375 # Recovery
1376 duplicate_or     <- skip_puncs { message "Duplicate OR operator (|)" }
1377 missing_or       <- '' { message "Missing OR operator (|)" }
1378 missing_bracket  <- skip_puncs { message "Missing opening/closing square bracket" }
1379 expect_phrase    <- skip { message "Expect phrase" }
1380 invalid_ope_comb <- skip_puncs { message "Use of invalid operator combination" }
1381 invalid_ope      <- skip { message "Use of invalid operator" }
1382 wildcard         <- '' { message "Wildcard characters (%c) should not be used" }
1383 vernacular_char  <- '' { message "Section name %c must be in English" }
1384 
1385 skip             <- (!(__) .)*
1386 skip_puncs       <- [|=]* _
1387   )~");
1388 
1389   REQUIRE(!!pg); // OK
1390 
1391   std::vector<std::string> errors{
1392     R"(3:7: Wildcard characters (*) should not be used)",
1393     R"(4:6: Wildcard characters (?) should not be used)",
1394     R"(5:6: Duplicate OR operator (|))",
1395     R"(9:4: Missing OR operator (|))",
1396     R"(11:16: Expect phrase)",
1397     R"(13:11: Missing opening/closing square bracket)",
1398     R"(16:10: Section name 日 must be in English)",
1399     R"(16:11: Section name 本 must be in English)",
1400     R"(16:12: Section name 語 must be in English)",
1401     R"(16:13: Section name で must be in English)",
1402     R"(16:14: Section name す must be in English)",
1403     R"(21:17: Use of invalid operator)",
1404     R"(24:10: Use of invalid operator combination)",
1405     R"(26:10: Missing OR operator (|))",
1406   };
1407 
1408   size_t i = 0;
__anone2d3858f1502(size_t ln, size_t col, const std::string &msg) 1409   pg.log = [&](size_t ln, size_t col, const std::string &msg) {
1410     std::stringstream ss;
1411     ss << ln << ":" << col << ": " << msg;
1412     REQUIRE(ss.str() == errors[i++]);
1413   };
1414 
1415   pg.enable_ast();
1416 
1417   std::shared_ptr<Ast> ast;
1418   REQUIRE_FALSE(pg.parse(R"([Section 1]
1419 111 = 222 | 333
1420 AAA BB* | CCC
1421 AAA B?B | CCC
1422 aaa || bbb
1423 ccc = ddd
1424 
1425 [Section 2]
1426 eee
1427 fff | ggg
1428 fff | ggg 111 |
1429 
1430 [Section 3
1431 hhh | iii
1432 
1433 [Section 日本語です]
1434 ppp | qqq
1435 
1436 [Section 4]
1437 jjj | kkk
1438 lll = mmm | nnn = ooo
1439 
1440 [Section 5]
1441 ppp qqq |= rrr
1442 
1443 Section 6]
1444 sss | ttt
1445   )", ast));
1446 
1447   ast = pg.optimize_ast(ast);
1448 
1449   REQUIRE(ast_to_s(ast) ==
1450 R"(+ START
1451   + SECTION
1452     - HEADER/0[CATEGORY] (Section 1)
1453     + ENTRIES
1454       + ENTRY/0
1455         + ONE_WAY/0[PHRASE]
1456           - WORD (111)
1457         + PHRASE
1458           - WORD (222)
1459         + PHRASE
1460           - WORD (333)
1461       + ENTRY/1
1462         + PHRASE
1463           - WORD (AAA)
1464           - WORD (BB*)
1465         + PHRASE
1466           - WORD (CCC)
1467       + ENTRY/1
1468         + PHRASE
1469           - WORD (AAA)
1470           - WORD (B?B)
1471         + PHRASE
1472           - WORD (CCC)
1473       + ENTRY/1
1474         + PHRASE
1475           - WORD (aaa)
1476         + PHRASE
1477           - WORD (bbb)
1478       + ENTRY/0
1479         + ONE_WAY/0[PHRASE]
1480           - WORD (ccc)
1481         + PHRASE
1482           - WORD (ddd)
1483   + SECTION
1484     - HEADER/0[CATEGORY] (Section 2)
1485     + ENTRIES
1486       + ENTRY/1
1487         + PHRASE
1488           - WORD (eee)
1489       + ENTRY/1
1490         + PHRASE
1491           - WORD (fff)
1492         + PHRASE
1493           - WORD (ggg)
1494       + ENTRY/1
1495         + PHRASE
1496           - WORD (fff)
1497         + PHRASE
1498           - WORD (ggg)
1499           - WORD (111)
1500   + SECTION
1501     - HEADER/0[CATEGORY] (Section 3)
1502     + ENTRIES
1503       + ENTRY/1
1504         + PHRASE
1505           - WORD (hhh)
1506         + PHRASE
1507           - WORD (iii)
1508   + SECTION
1509     - HEADER/0[CATEGORY] (Section 日本語です)
1510     + ENTRIES
1511       + ENTRY/1
1512         + PHRASE
1513           - WORD (ppp)
1514         + PHRASE
1515           - WORD (qqq)
1516   + SECTION
1517     - HEADER/0[CATEGORY] (Section 4)
1518     + ENTRIES
1519       + ENTRY/1
1520         + PHRASE
1521           - WORD (jjj)
1522         + PHRASE
1523           - WORD (kkk)
1524       + ENTRY/0
1525         + ONE_WAY/0[PHRASE]
1526           - WORD (lll)
1527         + PHRASE
1528           - WORD (mmm)
1529         + PHRASE
1530           - WORD (nnn)
1531   + SECTION
1532     - HEADER/0[CATEGORY] (Section 5)
1533     + ENTRIES
1534       + ENTRY/1
1535         + PHRASE
1536           - WORD (ppp)
1537           - WORD (qqq)
1538         + PHRASE
1539           - WORD (rrr)
1540       + ENTRY/1
1541         + PHRASE
1542           - WORD (Section)
1543           - WORD (6)
1544       + ENTRY/1
1545         + PHRASE
1546           - WORD (sss)
1547         + PHRASE
1548           - WORD (ttt)
1549 )");
1550 }
1551 
1552 TEST_CASE("Error recovery Java", "[error]") {
1553   parser pg(R"(
1554 Prog       ← PUBLIC CLASS NAME LCUR PUBLIC STATIC VOID MAIN LPAR STRING LBRA RBRA NAME RPAR BlockStmt RCUR
1555 BlockStmt  ← LCUR (Stmt)* RCUR^rcblk
1556 Stmt       ← IfStmt / WhileStmt / PrintStmt / DecStmt / AssignStmt / BlockStmt
1557 IfStmt     ← IF LPAR Exp RPAR Stmt (ELSE Stmt)?
1558 WhileStmt  ← WHILE LPAR Exp RPAR Stmt
1559 DecStmt    ← INT NAME (ASSIGN Exp)? SEMI
1560 AssignStmt ← NAME ASSIGN Exp SEMI^semia
1561 PrintStmt  ← PRINTLN LPAR Exp RPAR SEMI
1562 Exp        ← RelExp (EQ RelExp)*
1563 RelExp     ← AddExp (LT AddExp)*
1564 AddExp     ← MulExp ((PLUS / MINUS) MulExp)*
1565 MulExp     ← AtomExp ((TIMES / DIV) AtomExp)*
1566 AtomExp    ← LPAR Exp RPAR / NUMBER / NAME
1567 
1568 NUMBER     ← < [0-9]+ >
1569 NAME       ← < [a-zA-Z_][a-zA-Z_0-9]* >
1570 
1571 ~LPAR       ← '('
1572 ~RPAR       ← ')'
1573 ~LCUR       ← '{'
1574 ~RCUR       ← '}'
1575 ~LBRA       ← '['
1576 ~RBRA       ← ']'
1577 ~SEMI       ← ';'
1578 
1579 ~EQ         ← '=='
1580 ~LT         ← '<'
1581 ~ASSIGN     ← '='
1582 
1583 ~IF         ← 'if'
1584 ~ELSE       ← 'else'
1585 ~WHILE      ← 'while'
1586 
1587 PLUS       ← '+'
1588 MINUS      ← '-'
1589 TIMES      ← '*'
1590 DIV        ← '/'
1591 
1592 CLASS      ← 'class'
1593 PUBLIC     ← 'public'
1594 STATIC     ← 'static'
1595 
1596 VOID       ← 'void'
1597 INT        ← 'int'
1598 
1599 MAIN       ← 'main'
1600 STRING     ← 'String'
1601 PRINTLN    ← 'System.out.println'
1602 
1603 %whitespace ← [ \t\n]*
1604 %word       ← NAME
1605 
1606 # Throw operator labels
1607 rcblk      ← SkipToRCUR { message "missing end of block." }
1608 semia      ← '' { message "missing simicolon in assignment." }
1609 
1610 # Recovery expressions
1611 SkipToRCUR ← (!RCUR (LCUR SkipToRCUR / .))* RCUR
1612   )");
1613 
1614   REQUIRE(!!pg); // OK
1615 
1616   std::vector<std::string> errors{
1617     R"(8:5: missing simicolon in assignment.)",
1618     R"(8:6: missing end of block.)",
1619   };
1620 
1621   size_t i = 0;
__anone2d3858f1602(size_t ln, size_t col, const std::string &msg) 1622   pg.log = [&](size_t ln, size_t col, const std::string &msg) {
1623     std::stringstream ss;
1624     ss << ln << ":" << col << ": " << msg;
1625     REQUIRE(ss.str() == errors[i++]);
1626   };
1627 
1628   pg.enable_ast();
1629 
1630   std::shared_ptr<Ast> ast;
1631   REQUIRE_FALSE(pg.parse(R"(public class Example {
1632   public static void main(String[] args) {
1633     int n = 5;
1634     int f = 1;
1635     while(0 < n) {
1636       f = f * n;
1637       n = n - 1
1638     };
1639     System.out.println(f);
1640   }
1641 }
1642   )", ast));
1643 
1644   ast = pg.optimize_ast(ast);
1645 
1646   REQUIRE(ast_to_s(ast) ==
1647 R"(+ Prog
1648   - PUBLIC (public)
1649   - CLASS (class)
1650   - NAME (Example)
1651   - PUBLIC (public)
1652   - STATIC (static)
1653   - VOID (void)
1654   - MAIN (main)
1655   - STRING (String)
1656   - NAME (args)
1657   + BlockStmt
1658     + Stmt/3[DecStmt]
1659       - INT (int)
1660       - NAME (n)
1661       - Exp/0[NUMBER] (5)
1662     + Stmt/3[DecStmt]
1663       - INT (int)
1664       - NAME (f)
1665       - Exp/0[NUMBER] (1)
1666     + Stmt/1[WhileStmt]
1667       + Exp/0[RelExp]
1668         - AddExp/0[NUMBER] (0)
1669         - AddExp/0[NAME] (n)
1670       + Stmt/5[BlockStmt]
1671         + Stmt/4[AssignStmt]
1672           - NAME (f)
1673           + Exp/0[MulExp]
1674             - AtomExp/2[NAME] (f)
1675             - TIMES (*)
1676             - AtomExp/2[NAME] (n)
1677         + Stmt/4[AssignStmt]
1678           - NAME (n)
1679           + Exp/0[AddExp]
1680             - MulExp/0[NAME] (n)
1681             - MINUS (-)
1682             - MulExp/0[NUMBER] (1)
1683 )");
1684 }
1685