1 #include "catch.hh" 2 #include <peglib.h> 3 #include <sstream> 4 5 using namespace peg; 6 7 TEST_CASE("Token boundary 1", "[token boundary]") 8 { 9 parser pg(R"( 10 ROOT <- TOP 11 TOP <- 'a' 'b' 'c' 12 %whitespace <- [ \t\r\n]* 13 )"); 14 15 REQUIRE(pg.parse(" a b c ")); 16 } 17 18 TEST_CASE("Token boundary 2", "[token boundary]") 19 { 20 parser pg(R"( 21 ROOT <- TOP 22 TOP <- < 'a' 'b' 'c' > 23 %whitespace <- [ \t\r\n]* 24 )"); 25 26 REQUIRE(!pg.parse(" a b c ")); 27 } 28 29 TEST_CASE("Token boundary 3", "[token boundary]") 30 { 31 parser pg(R"( 32 ROOT <- TOP 33 TOP <- < 'a' B 'c' > 34 B <- 'b' 35 %whitespace <- [ \t\r\n]* 36 )"); 37 38 REQUIRE(!pg.parse(" a b c ")); 39 } 40 41 TEST_CASE("Token boundary 4", "[token boundary]") 42 { 43 parser pg(R"( 44 ROOT <- TOP 45 TOP <- < A 'b' 'c' > 46 A <- 'a' 47 %whitespace <- [ \t\r\n]* 48 )"); 49 50 REQUIRE(!pg.parse(" a b c ")); 51 } 52 53 TEST_CASE("Token boundary 5", "[token boundary]") 54 { 55 parser pg(R"( 56 ROOT <- TOP 57 TOP <- A < 'b' C > 58 A <- 'a' 59 C <- 'c' 60 %whitespace <- [ \t\r\n]* 61 )"); 62 63 REQUIRE(!pg.parse(" a b c ")); 64 } 65 66 TEST_CASE("Token boundary 6", "[token boundary]") 67 { 68 parser pg(R"( 69 ROOT <- TOP 70 TOP <- < A > B C 71 A <- 'a' 72 B <- 'b' 73 C <- 'c' 74 %whitespace <- [ \t\r\n]* 75 )"); 76 77 REQUIRE(pg.parse(" a b c ")); 78 } 79 80 TEST_CASE("Token boundary 7", "[token boundary]") 81 { 82 parser pg(R"( 83 ROOT <- TOP 84 TOP <- < A B C > 85 A <- 'a' 86 B <- 'b' 87 C <- 'c' 88 %whitespace <- [ \t\r\n]* 89 )"); 90 91 REQUIRE(!pg.parse(" a b c ")); 92 } 93 94 TEST_CASE("Infinite loop 1", "[infinite loop]") 95 { 96 parser pg(R"( 97 ROOT <- WH TOKEN* WH 98 TOKEN <- [a-z0-9]* 99 WH <- [ \t]* 100 )"); 101 102 REQUIRE(!pg); 103 } 104 105 TEST_CASE("Infinite loop 2", "[infinite loop]") { 106 parser pg(R"( 107 ROOT <- WH TOKEN+ WH 108 TOKEN <- [a-z0-9]* 109 WH <- [ \t]* 110 )"); 111 112 REQUIRE(!pg); 113 } 114 115 TEST_CASE("Infinite loop 3", "[infinite loop]") { 116 parser pg(R"( 117 ROOT <- WH TOKEN* WH 118 TOKEN <- !'word1' 119 WH <- [ \t]* 120 )"); 121 122 REQUIRE(!pg); 123 } 124 125 TEST_CASE("Infinite loop 4", "[infinite loop]") { 126 parser pg(R"( 127 ROOT <- WH TOKEN* WH 128 TOKEN <- &'word1' 129 WH <- [ \t]* 130 )"); 131 132 REQUIRE(!pg); 133 } 134 135 TEST_CASE("Infinite loop 5", "[infinite loop]") { 136 parser pg(R"( 137 Numbers <- Number* 138 Number <- [0-9]+ / Spacing 139 Spacing <- ' ' / '\t' / '\n' / EOF # EOF is empty 140 EOF <- !. 141 )"); 142 143 REQUIRE(!pg); 144 } 145 146 TEST_CASE("Infinite loop 6", "[infinite loop]") { 147 parser pg(R"( 148 S <- ''* 149 )"); 150 151 REQUIRE(!pg); 152 } 153 154 TEST_CASE("Infinite loop 7", "[infinite loop]") { 155 parser pg(R"( 156 S <- A* 157 A <- '' 158 )"); 159 160 REQUIRE(!pg); 161 } 162 163 TEST_CASE("Infinite loop 8", "[infinite loop]") { 164 parser pg(R"( 165 ROOT <- ('A' /)* 166 )"); 167 168 REQUIRE(!pg); 169 } 170 171 TEST_CASE("Infinite loop 9", "[infinite loop]") { 172 parser pg(R"( 173 ROOT <- %recover(('A' /)*) 174 )"); 175 176 REQUIRE(!pg); 177 } 178 179 TEST_CASE("Not infinite 1", "[infinite loop]") { 180 parser pg(R"( 181 Numbers <- Number* EOF 182 Number <- [0-9]+ / Spacing 183 Spacing <- ' ' / '\t' / '\n' 184 EOF <- !. 185 )"); 186 187 REQUIRE(!!pg); // OK 188 } 189 190 TEST_CASE("Not infinite 2", "[infinite loop]") { 191 parser pg(R"( 192 ROOT <- _ ('[' TAG_NAME ']' _)* 193 # In a sequence operator, if there is at least one non-empty element, we can treat it as non-empty 194 TAG_NAME <- (!']' .)+ 195 _ <- [ \t]* 196 )"); 197 198 REQUIRE(!!pg); // OK 199 } 200 201 TEST_CASE("Not infinite 3", "[infinite loop]") { 202 parser pg(R"( 203 EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* 204 TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 205 FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ # Recursive... 206 TERM_OPERATOR <- < [-+] > _ 207 FACTOR_OPERATOR <- < [/*] > _ 208 NUMBER <- < [0-9]+ > _ 209 _ <- [ \t\r\n]* 210 )"); 211 212 REQUIRE(!!pg); // OK 213 } 214 215 TEST_CASE("Precedence climbing", "[precedence]") { 216 parser parser(R"( 217 START <- _ EXPRESSION 218 EXPRESSION <- ATOM (OPERATOR ATOM)* { 219 precedence 220 L + - 221 L * / 222 } 223 ATOM <- NUMBER / T('(') EXPRESSION T(')') 224 OPERATOR <- T([-+/*]) 225 NUMBER <- T('-'? [0-9]+) 226 ~_ <- [ \t]* 227 T(S) <- < S > _ 228 )"); 229 230 REQUIRE(!!parser); // OK 231 232 parser.enable_packrat_parsing(); 233 234 // Setup actions __anone2d3858f0102(const SemanticValues &vs) 235 parser["EXPRESSION"] = [](const SemanticValues &vs) -> long { 236 auto result = std::any_cast<long>(vs[0]); 237 if (vs.size() > 1) { 238 auto ope = std::any_cast<char>(vs[1]); 239 auto num = std::any_cast<long>(vs[2]); 240 switch (ope) { 241 case '+': result += num; break; 242 case '-': result -= num; break; 243 case '*': result *= num; break; 244 case '/': result /= num; break; 245 } 246 } 247 return result; 248 }; __anone2d3858f0202(const SemanticValues &vs) 249 parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; __anone2d3858f0302(const SemanticValues &vs) 250 parser["NUMBER"] = [](const SemanticValues &vs) { return vs.token_to_number<long>(); }; 251 252 bool ret = parser; 253 REQUIRE(ret == true); 254 255 { 256 auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "; 257 long val = 0; 258 ret = parser.parse(expr, val); 259 260 REQUIRE(ret == true); 261 REQUIRE(val == -3); 262 } 263 264 { 265 auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0 266 long val = 0; 267 ret = parser.parse(expr, val); 268 269 REQUIRE(ret == true); 270 REQUIRE(val == 0); 271 } 272 } 273 274 TEST_CASE("Precedence climbing with literal operator", "[precedence]") { 275 parser parser(R"( 276 START <- _ EXPRESSION 277 EXPRESSION <- ATOM (OPERATOR ATOM)* { 278 precedence 279 L '#plus#' - # weaker 280 L '#multiply#' / # stronger 281 } 282 ATOM <- NUMBER / T('(') EXPRESSION T(')') 283 OPERATOR <- T('#plus#' / '#multiply#' / [-/]) 284 NUMBER <- T('-'? [0-9]+) 285 ~_ <- [ \t]* 286 T(S) <- < S > _ 287 )"); 288 289 REQUIRE(!!parser); // OK 290 291 parser.enable_packrat_parsing(); 292 293 // Setup actions __anone2d3858f0402(const SemanticValues &vs) 294 parser["EXPRESSION"] = [](const SemanticValues &vs) -> long { 295 auto result = std::any_cast<long>(vs[0]); 296 if (vs.size() > 1) { 297 auto ope = std::any_cast<std::string>(vs[1]); 298 auto num = std::any_cast<long>(vs[2]); 299 if (ope == "#plus#") { 300 result += num; 301 } else if (ope == "-") { 302 result -= num; 303 } else if (ope == "#multiply#") { 304 result *= num; 305 } else if (ope == "/") { 306 result /= num; 307 } 308 } 309 return result; 310 }; __anone2d3858f0502(const SemanticValues &vs) 311 parser["OPERATOR"] = [](const SemanticValues &vs) { return vs.token_to_string(); }; __anone2d3858f0602(const SemanticValues &vs) 312 parser["NUMBER"] = [](const SemanticValues &vs) { return vs.token_to_number<long>(); }; 313 314 bool ret = parser; 315 REQUIRE(ret == true); 316 317 { 318 auto expr = " 1 #plus# 2 #multiply# 3 #multiply# (4 - 5 #plus# 6) / 7 - 8 "; 319 long val = 0; 320 ret = parser.parse(expr, val); 321 322 REQUIRE(ret == true); 323 REQUIRE(val == -3); 324 } 325 326 { 327 auto expr = "-1#plus#-2--3"; // -1 + -2 - -3 = 0 328 long val = 0; 329 ret = parser.parse(expr, val); 330 331 REQUIRE(ret == true); 332 REQUIRE(val == 0); 333 } 334 } 335 336 TEST_CASE("Precedence climbing with macro", "[precedence]") { 337 // Create a PEG parser 338 parser parser(R"( 339 EXPRESSION <- INFIX_EXPRESSION(ATOM, OPERATOR) 340 INFIX_EXPRESSION(A, O) <- A (O A)* { 341 precedence 342 L + - 343 L * / 344 } 345 ATOM <- NUMBER / '(' EXPRESSION ')' 346 OPERATOR <- < [-+/*] > 347 NUMBER <- < '-'? [0-9]+ > 348 %whitespace <- [ \t]* 349 )"); 350 351 parser.enable_packrat_parsing(); 352 353 bool ret = parser; 354 REQUIRE(ret == true); 355 356 // Setup actions __anone2d3858f0702(const SemanticValues &vs) 357 parser["INFIX_EXPRESSION"] = [](const SemanticValues &vs) -> long { 358 auto result = std::any_cast<long>(vs[0]); 359 if (vs.size() > 1) { 360 auto ope = std::any_cast<char>(vs[1]); 361 auto num = std::any_cast<long>(vs[2]); 362 switch (ope) { 363 case '+': result += num; break; 364 case '-': result -= num; break; 365 case '*': result *= num; break; 366 case '/': result /= num; break; 367 } 368 } 369 return result; 370 }; __anone2d3858f0802(const SemanticValues &vs) 371 parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; __anone2d3858f0902(const SemanticValues &vs) 372 parser["NUMBER"] = [](const SemanticValues &vs) { return vs.token_to_number<long>(); }; 373 374 { 375 auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "; 376 long val = 0; 377 ret = parser.parse(expr, val); 378 379 REQUIRE(ret == true); 380 REQUIRE(val == -3); 381 } 382 383 { 384 auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0 385 long val = 0; 386 ret = parser.parse(expr, val); 387 388 REQUIRE(ret == true); 389 REQUIRE(val == 0); 390 } 391 } 392 393 TEST_CASE("Precedence climbing error1", "[precedence]") { 394 parser parser(R"( 395 START <- _ EXPRESSION 396 EXPRESSION <- ATOM (OPERATOR ATOM1)* { 397 precedence 398 L + - 399 L * / 400 } 401 ATOM <- NUMBER / T('(') EXPRESSION T(')') 402 ATOM1 <- NUMBER / T('(') EXPRESSION T(')') 403 OPERATOR <- T([-+/*]) 404 NUMBER <- T('-'? [0-9]+) 405 ~_ <- [ \t]* 406 T(S) <- < S > _ 407 )"); 408 409 bool ret = parser; 410 REQUIRE(ret == false); 411 } 412 413 TEST_CASE("Precedence climbing error2", "[precedence]") { 414 parser parser(R"( 415 START <- _ EXPRESSION 416 EXPRESSION <- ATOM OPERATOR ATOM { 417 precedence 418 L + - 419 L * / 420 } 421 ATOM <- NUMBER / T('(') EXPRESSION T(')') 422 OPERATOR <- T([-+/*]) 423 NUMBER <- T('-'? [0-9]+) 424 ~_ <- [ \t]* 425 T(S) <- < S > _ 426 )"); 427 428 bool ret = parser; 429 REQUIRE(ret == false); 430 } 431 432 TEST_CASE("Precedence climbing error3", "[precedence]") { 433 parser parser(R"( 434 EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR) 435 PRECEDENCE_PARSING(A, O) <- A (O A)+ { 436 precedence 437 L + - 438 L * / 439 } 440 ATOM <- NUMBER / '(' EXPRESSION ')' 441 OPERATOR <- < [-+/*] > 442 NUMBER <- < '-'? [0-9]+ > 443 %whitespace <- [ \t]* 444 )"); 445 446 bool ret = parser; 447 REQUIRE(ret == false); 448 } 449 450 TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") { 451 peg::parser parser(R"( 452 ROOT <- 'a' 453 %whitespace <- SPACE* 454 SPACE <- ' ' 455 )"); 456 457 parser.enable_packrat_parsing(); 458 459 auto ret = parser.parse("a"); 460 REQUIRE(ret == true); 461 } 462 463 TEST_CASE("Packrat parser test with macro", "[packrat]") { 464 parser parser(R"( 465 EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) 466 TERM <- LIST(FACTOR, FACTOR_OPERATOR) 467 FACTOR <- NUMBER / T('(') EXPRESSION T(')') 468 TERM_OPERATOR <- T([-+]) 469 FACTOR_OPERATOR <- T([/*]) 470 NUMBER <- T([0-9]+) 471 ~_ <- [ \t]* 472 LIST(I, D) <- I (D I)* 473 T(S) <- < S > _ 474 )"); 475 476 parser.enable_packrat_parsing(); 477 478 auto ret = parser.parse(" 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "); 479 REQUIRE(ret == true); 480 } 481 482 TEST_CASE("Packrat parser test with precedence expression parser", 483 "[packrat]") { 484 peg::parser parser(R"( 485 Expression <- Atom (Operator Atom)* { precedence L + - L * / } 486 Atom <- _? Number _? 487 Number <- [0-9]+ 488 Operator <- '+' / '-' / '*' / '/' 489 _ <- ' '+ 490 )"); 491 492 bool ret = parser; 493 REQUIRE(ret == true); 494 495 parser.enable_packrat_parsing(); 496 497 ret = parser.parse(" 1 + 2 * 3 "); 498 REQUIRE(ret == true); 499 } 500 501 TEST_CASE("Backreference test", "[backreference]") { 502 parser parser(R"( 503 START <- _ LQUOTE < (!RQUOTE .)* > RQUOTE _ 504 LQUOTE <- 'R"' $delm< [a-zA-Z]* > '(' 505 RQUOTE <- ')' $delm '"' 506 ~_ <- [ \t\r\n]* 507 )"); 508 509 std::string token; __anone2d3858f0a02(const SemanticValues &vs) 510 parser["START"] = [&](const SemanticValues &vs) { token = vs.token(); }; 511 512 { 513 token.clear(); 514 auto ret = parser.parse(R"delm( 515 R"("hello world")" 516 )delm"); 517 518 REQUIRE(ret == true); 519 REQUIRE(token == "\"hello world\""); 520 } 521 522 { 523 token.clear(); 524 auto ret = parser.parse(R"delm( 525 R"foo("(hello world)")foo" 526 )delm"); 527 528 REQUIRE(ret == true); 529 REQUIRE(token == "\"(hello world)\""); 530 } 531 532 { 533 token.clear(); 534 auto ret = parser.parse(R"delm( 535 R"foo("(hello world)foo")foo" 536 )delm"); 537 538 REQUIRE(ret == false); 539 REQUIRE(token == "\"(hello world"); 540 } 541 542 { 543 token.clear(); 544 auto ret = parser.parse(R"delm( 545 R"foo("(hello world)")bar" 546 )delm"); 547 548 REQUIRE(ret == false); 549 REQUIRE(token.empty()); 550 } 551 } 552 553 TEST_CASE("Invalid backreference test", "[backreference]") { 554 parser parser(R"( 555 START <- _ LQUOTE (!RQUOTE .)* RQUOTE _ 556 LQUOTE <- 'R"' $delm< [a-zA-Z]* > '(' 557 RQUOTE <- ')' $delm2 '"' 558 ~_ <- [ \t\r\n]* 559 )"); 560 561 REQUIRE_THROWS_AS(parser.parse(R"delm( 562 R"foo("(hello world)")foo" 563 )delm"), 564 std::runtime_error); 565 } 566 567 TEST_CASE("Nested capture test", "[backreference]") { 568 parser parser(R"( 569 ROOT <- CONTENT 570 CONTENT <- (ELEMENT / TEXT)* 571 ELEMENT <- $(STAG CONTENT ETAG) 572 STAG <- '<' $tag< TAG_NAME > '>' 573 ETAG <- '</' $tag '>' 574 TAG_NAME <- 'b' / 'u' 575 TEXT <- TEXT_DATA 576 TEXT_DATA <- ![<] . 577 )"); 578 579 REQUIRE(parser.parse("This is <b>a <u>test</u> text</b>.")); 580 REQUIRE(!parser.parse("This is <b>a <u>test</b> text</u>.")); 581 REQUIRE(!parser.parse("This is <b>a <u>test text</b>.")); 582 REQUIRE(!parser.parse("This is a <u>test</u> text</b>.")); 583 } 584 585 TEST_CASE("Backreference with Prioritized Choice test", "[backreference]") { 586 parser parser(R"( 587 TREE <- WRONG_BRANCH / CORRECT_BRANCH 588 WRONG_BRANCH <- BRANCH THAT IS_capture WRONG 589 CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT 590 BRANCH <- 'branch' 591 THAT <- 'that' 592 IS_capture <- $ref<..> 593 IS_backref <- $ref 594 WRONG <- 'wrong' 595 CORRECT <- 'correct' 596 )"); 597 598 REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); 599 } 600 601 TEST_CASE("Backreference with Zero or More test", "[backreference]") { 602 parser parser(R"( 603 TREE <- WRONG_BRANCH* CORRECT_BRANCH 604 WRONG_BRANCH <- BRANCH THAT IS_capture WRONG 605 CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT 606 BRANCH <- 'branch' 607 THAT <- 'that' 608 IS_capture <- $ref<..> 609 IS_backref <- $ref 610 WRONG <- 'wrong' 611 CORRECT <- 'correct' 612 )"); 613 614 REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); 615 REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); 616 REQUIRE( 617 !parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); 618 REQUIRE( 619 parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); 620 REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); 621 REQUIRE_THROWS_AS(parser.parse("branchthatiswron_branchthatiscorrect"), 622 std::runtime_error); 623 } 624 625 TEST_CASE("Backreference with One or More test", "[backreference]") { 626 parser parser(R"( 627 TREE <- WRONG_BRANCH+ CORRECT_BRANCH 628 WRONG_BRANCH <- BRANCH THAT IS_capture WRONG 629 CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT 630 BRANCH <- 'branch' 631 THAT <- 'that' 632 IS_capture <- $ref<..> 633 IS_backref <- $ref 634 WRONG <- 'wrong' 635 CORRECT <- 'correct' 636 )"); 637 638 REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); 639 REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); 640 REQUIRE( 641 !parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); 642 REQUIRE( 643 parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); 644 REQUIRE(!parser.parse("branchthatiscorrect")); 645 REQUIRE(!parser.parse("branchthatiswron_branchthatiscorrect")); 646 } 647 648 TEST_CASE("Backreference with Option test", "[backreference]") { 649 parser parser(R"( 650 TREE <- WRONG_BRANCH? CORRECT_BRANCH 651 WRONG_BRANCH <- BRANCH THAT IS_capture WRONG 652 CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT 653 BRANCH <- 'branch' 654 THAT <- 'that' 655 IS_capture <- $ref<..> 656 IS_backref <- $ref 657 WRONG <- 'wrong' 658 CORRECT <- 'correct' 659 )"); 660 661 REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); 662 REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); 663 REQUIRE( 664 !parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); 665 REQUIRE( 666 !parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); 667 REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); 668 REQUIRE_THROWS_AS(parser.parse("branchthatiswron_branchthatiscorrect"), 669 std::runtime_error); 670 } 671 672 TEST_CASE("Repetition {0}", "[repetition]") { 673 parser parser(R"( 674 START <- '(' DIGIT{3} ') ' DIGIT{3} '-' DIGIT{4} 675 DIGIT <- [0-9] 676 )"); 677 REQUIRE(parser.parse("(123) 456-7890")); 678 REQUIRE(!parser.parse("(12a) 456-7890")); 679 REQUIRE(!parser.parse("(123) 45-7890")); 680 REQUIRE(!parser.parse("(123) 45-7a90")); 681 } 682 683 TEST_CASE("Repetition {2,4}", "[repetition]") { 684 parser parser(R"( 685 START <- DIGIT{2,4} 686 DIGIT <- [0-9] 687 )"); 688 REQUIRE(!parser.parse("1")); 689 REQUIRE(parser.parse("12")); 690 REQUIRE(parser.parse("123")); 691 REQUIRE(parser.parse("1234")); 692 REQUIRE(!parser.parse("12345")); 693 } 694 695 TEST_CASE("Repetition {2,1}", "[repetition]") { 696 parser parser(R"( 697 START <- DIGIT{2,1} # invalid range 698 DIGIT <- [0-9] 699 )"); 700 REQUIRE(!parser.parse("1")); 701 REQUIRE(parser.parse("12")); 702 REQUIRE(!parser.parse("123")); 703 } 704 705 TEST_CASE("Repetition {2,}", "[repetition]") { 706 parser parser(R"( 707 START <- DIGIT{2,} 708 DIGIT <- [0-9] 709 )"); 710 REQUIRE(!parser.parse("1")); 711 REQUIRE(parser.parse("12")); 712 REQUIRE(parser.parse("123")); 713 REQUIRE(parser.parse("1234")); 714 } 715 716 TEST_CASE("Repetition {,2}", "[repetition]") { 717 parser parser(R"( 718 START <- DIGIT{,2} 719 DIGIT <- [0-9] 720 )"); 721 REQUIRE(parser.parse("1")); 722 REQUIRE(parser.parse("12")); 723 REQUIRE(!parser.parse("123")); 724 REQUIRE(!parser.parse("1234")); 725 } 726 727 TEST_CASE("Left recursive test", "[left recursive]") { 728 parser parser(R"( 729 A <- A 'a' 730 B <- A 'a' 731 )"); 732 733 REQUIRE(!parser); 734 } 735 736 TEST_CASE("Left recursive with option test", "[left recursive]") { 737 parser parser(R"( 738 A <- 'a' / 'b'? B 'c' 739 B <- A 740 )"); 741 742 REQUIRE(!parser); 743 } 744 745 TEST_CASE("Left recursive with zom test", "[left recursive]") { 746 parser parser(R"( 747 A <- 'a'* A* 748 )"); 749 750 REQUIRE(!parser); 751 } 752 753 TEST_CASE("Left recursive with a ZOM content rule", "[left recursive]") { 754 parser parser(R"( 755 A <- B 756 B <- _ A 757 _ <- ' '* # Zero or more 758 )"); 759 760 REQUIRE(!parser); 761 } 762 763 TEST_CASE("Left recursive with empty string test", "[left recursive]") { 764 parser parser(" A <- '' A"); 765 766 REQUIRE(!parser); 767 } 768 769 TEST_CASE("User defined rule test", "[user rule]") { 770 auto g = parser(R"( 771 ROOT <- _ 'Hello' _ NAME '!' _ 772 )", 773 {{"NAME", usr([](const char *s, size_t n, SemanticValues &, __anone2d3858f0b02(const char *s, size_t n, SemanticValues &, std::any &) 774 std::any &) -> size_t { 775 static std::vector<std::string> names = {"PEG", "BNF"}; 776 for (const auto &name : names) { 777 if (name.size() <= n && 778 !name.compare(0, name.size(), s, name.size())) { 779 return name.size(); 780 } 781 } 782 return static_cast<size_t>(-1); 783 })}, 784 {"~_", zom(cls(" \t\r\n"))}}); 785 786 REQUIRE(g.parse(" Hello BNF! ") == true); 787 } 788 789 TEST_CASE("Semantic predicate test", "[predicate]") { 790 parser parser("NUMBER <- [0-9]+"); 791 __anone2d3858f0c02(const SemanticValues &vs) 792 parser["NUMBER"] = [](const SemanticValues &vs) { 793 auto val = vs.token_to_number<long>(); 794 if (val != 100) { throw parse_error("value error!!"); } 795 return val; 796 }; 797 798 long val; 799 REQUIRE(parser.parse("100", val)); 800 REQUIRE(val == 100); 801 __anone2d3858f0d02(size_t line, size_t col, const std::string &msg) 802 parser.log = [](size_t line, size_t col, const std::string &msg) { 803 REQUIRE(line == 1); 804 REQUIRE(col == 1); 805 REQUIRE(msg == "value error!!"); 806 }; 807 REQUIRE(!parser.parse("200", val)); 808 } 809 810 TEST_CASE("Japanese character", "[unicode]") { 811 peg::parser parser(u8R"( 812 文 <- 修飾語? 主語 述語 '。' 813 主語 <- 名詞 助詞 814 述語 <- 動詞 助詞 815 修飾語 <- 形容詞 816 名詞 <- 'サーバー' / 'クライアント' 817 形容詞 <- '古い' / '新しい' 818 動詞 <- '落ち' / '復旧し' 819 助詞 <- 'が' / 'を' / 'た' / 'ます' / 'に' 820 )"); 821 822 bool ret = parser; 823 REQUIRE(ret == true); 824 825 REQUIRE(parser.parse(u8R"(サーバーを復旧します。)")); 826 } 827 828 TEST_CASE("dot with a code", "[unicode]") { 829 peg::parser parser(" S <- 'a' . 'b' "); 830 REQUIRE(parser.parse(u8R"(aあb)")); 831 } 832 833 TEST_CASE("dot with a char", "[unicode]") { 834 peg::parser parser(" S <- 'a' . 'b' "); 835 REQUIRE(parser.parse(u8R"(aåb)")); 836 } 837 838 TEST_CASE("character class", "[unicode]") { 839 peg::parser parser(R"( 840 S <- 'a' [い-おAさC-Eた-とは] 'b' 841 )"); 842 843 bool ret = parser; 844 REQUIRE(ret == true); 845 846 REQUIRE(!parser.parse(u8R"(aあb)")); 847 REQUIRE(parser.parse(u8R"(aいb)")); 848 REQUIRE(parser.parse(u8R"(aうb)")); 849 REQUIRE(parser.parse(u8R"(aおb)")); 850 REQUIRE(!parser.parse(u8R"(aかb)")); 851 REQUIRE(parser.parse(u8R"(aAb)")); 852 REQUIRE(!parser.parse(u8R"(aBb)")); 853 REQUIRE(parser.parse(u8R"(aEb)")); 854 REQUIRE(!parser.parse(u8R"(aFb)")); 855 REQUIRE(!parser.parse(u8R"(aそb)")); 856 REQUIRE(parser.parse(u8R"(aたb)")); 857 REQUIRE(parser.parse(u8R"(aちb)")); 858 REQUIRE(parser.parse(u8R"(aとb)")); 859 REQUIRE(!parser.parse(u8R"(aなb)")); 860 REQUIRE(parser.parse(u8R"(aはb)")); 861 REQUIRE(!parser.parse(u8R"(a?b)")); 862 } 863 864 #if 0 // TODO: Unicode Grapheme support 865 TEST_CASE("dot with a grapheme", "[unicode]") 866 { 867 peg::parser parser(" S <- 'a' . 'b' "); 868 REQUIRE(parser.parse(u8R"(aसिb)")); 869 } 870 #endif 871 872 TEST_CASE("Macro simple test", "[macro]") { 873 parser parser(R"( 874 S <- HELLO WORLD 875 HELLO <- T('hello') 876 WORLD <- T('world') 877 T(a) <- a [ \t]* 878 )"); 879 880 REQUIRE(parser.parse("hello \tworld ")); 881 } 882 883 TEST_CASE("Macro two parameters", "[macro]") { 884 parser parser(R"( 885 S <- HELLO_WORLD 886 HELLO_WORLD <- T('hello', 'world') 887 T(a, b) <- a [ \t]* b [ \t]* 888 )"); 889 890 REQUIRE(parser.parse("hello \tworld ")); 891 } 892 893 TEST_CASE("Macro syntax error", "[macro]") { 894 parser parser(R"( 895 S <- T('hello') 896 T (a) <- a [ \t]* 897 )"); 898 899 bool ret = parser; 900 REQUIRE(ret == false); 901 } 902 903 TEST_CASE("Macro missing argument", "[macro]") { 904 parser parser(R"( 905 S <- T ('hello') 906 T(a, b) <- a [ \t]* b 907 )"); 908 909 bool ret = parser; 910 REQUIRE(ret == false); 911 } 912 913 TEST_CASE("Macro reference syntax error", "[macro]") { 914 parser parser(R"( 915 S <- T ('hello') 916 T(a) <- a [ \t]* 917 )"); 918 919 bool ret = parser; 920 REQUIRE(ret == false); 921 } 922 923 TEST_CASE("Macro invalid macro reference error", "[macro]") { 924 parser parser(R"( 925 S <- T('hello') 926 T <- 'world' 927 )"); 928 929 bool ret = parser; 930 REQUIRE(ret == false); 931 } 932 933 TEST_CASE("Macro calculator", "[macro]") { 934 // Create a PEG parser 935 parser parser(R"( 936 # Grammar for simple calculator... 937 EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) 938 TERM <- LIST(FACTOR, FACTOR_OPERATOR) 939 FACTOR <- NUMBER / T('(') EXPRESSION T(')') 940 TERM_OPERATOR <- T([-+]) 941 FACTOR_OPERATOR <- T([/*]) 942 NUMBER <- T([0-9]+) 943 ~_ <- [ \t]* 944 LIST(I, D) <- I (D I)* 945 T(S) <- < S > _ 946 )"); 947 948 // Setup actions __anone2d3858f0e02(const SemanticValues &vs) 949 auto reduce = [](const SemanticValues &vs) { 950 auto result = std::any_cast<long>(vs[0]); 951 for (auto i = 1u; i < vs.size(); i += 2) { 952 auto num = std::any_cast<long>(vs[i + 1]); 953 auto ope = std::any_cast<char>(vs[i]); 954 switch (ope) { 955 case '+': result += num; break; 956 case '-': result -= num; break; 957 case '*': result *= num; break; 958 case '/': result /= num; break; 959 } 960 } 961 return result; 962 }; 963 964 parser["EXPRESSION"] = reduce; 965 parser["TERM"] = reduce; __anone2d3858f0f02(const SemanticValues &vs) 966 parser["TERM_OPERATOR"] = [](const SemanticValues &vs) { 967 return static_cast<char>(*vs.sv().data()); 968 }; __anone2d3858f1002(const SemanticValues &vs) 969 parser["FACTOR_OPERATOR"] = [](const SemanticValues &vs) { 970 return static_cast<char>(*vs.sv().data()); 971 }; __anone2d3858f1102(const SemanticValues &vs) 972 parser["NUMBER"] = [](const SemanticValues &vs) { return vs.token_to_number<long>(); }; 973 974 bool ret = parser; 975 REQUIRE(ret == true); 976 977 auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "; 978 long val = 0; 979 ret = parser.parse(expr, val); 980 981 REQUIRE(ret == true); 982 REQUIRE(val == -3); 983 } 984 985 TEST_CASE("Macro expression arguments", "[macro]") { 986 parser parser(R"( 987 S <- M('hello' / 'Hello', 'world' / 'World') 988 M(arg0, arg1) <- arg0 [ \t]+ arg1 989 )"); 990 991 REQUIRE(parser.parse("Hello world")); 992 } 993 994 TEST_CASE("Macro recursive", "[macro]") { 995 parser parser(R"( 996 S <- M('abc') 997 M(s) <- !s / s ' ' M(s / '123') / s 998 )"); 999 1000 REQUIRE(parser.parse("")); 1001 REQUIRE(parser.parse("abc")); 1002 REQUIRE(parser.parse("abc abc")); 1003 REQUIRE(parser.parse("abc 123 abc")); 1004 } 1005 1006 TEST_CASE("Macro recursive2", "[macro]") { 1007 auto syntaxes = std::vector<const char *>{ 1008 "S <- M('abc') M(s) <- !s / s ' ' M(s* '-' '123') / s", 1009 "S <- M('abc') M(s) <- !s / s ' ' M(s+ '-' '123') / s", 1010 "S <- M('abc') M(s) <- !s / s ' ' M(s? '-' '123') / s", 1011 "S <- M('abc') M(s) <- !s / s ' ' M(&s s+ '-' '123') / s", 1012 "S <- M('abc') M(s) <- !s / s ' ' M(s '-' !s '123') / s", 1013 "S <- M('abc') M(s) <- !s / s ' ' M(< s > '-' '123') / s", 1014 "S <- M('abc') M(s) <- !s / s ' ' M(~s '-' '123') / s", 1015 }; 1016 1017 for (const auto &syntax : syntaxes) { 1018 parser parser(syntax); 1019 REQUIRE(parser.parse("abc abc-123")); 1020 } 1021 } 1022 1023 TEST_CASE("Macro exclusive modifiers", "[macro]") { 1024 parser parser(R"( 1025 S <- Modifiers(!"") _ 1026 Modifiers(Appeared) <- (!Appeared) ( 1027 Token('public') Modifiers(Appeared / 'public') / 1028 Token('static') Modifiers(Appeared / 'static') / 1029 Token('final') Modifiers(Appeared / 'final') / 1030 "") 1031 Token(t) <- t _ 1032 _ <- [ \t\r\n]* 1033 )"); 1034 1035 REQUIRE(parser.parse("public")); 1036 REQUIRE(parser.parse("static")); 1037 REQUIRE(parser.parse("final")); 1038 REQUIRE(parser.parse("public static final")); 1039 REQUIRE(!parser.parse("public public")); 1040 REQUIRE(!parser.parse("public static public")); 1041 } 1042 1043 TEST_CASE("Macro token check test", "[macro]") { 1044 parser parser(R"( 1045 # Grammar for simple calculator... 1046 EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) 1047 TERM <- LIST(FACTOR, FACTOR_OPERATOR) 1048 FACTOR <- NUMBER / T('(') EXPRESSION T(')') 1049 TERM_OPERATOR <- T([-+]) 1050 FACTOR_OPERATOR <- T([/*]) 1051 NUMBER <- T([0-9]+) 1052 ~_ <- [ \t]* 1053 LIST(I, D) <- I (D I)* 1054 T(S) <- < S > _ 1055 )"); 1056 1057 REQUIRE(parser["EXPRESSION"].is_token() == false); 1058 REQUIRE(parser["TERM"].is_token() == false); 1059 REQUIRE(parser["FACTOR"].is_token() == false); 1060 REQUIRE(parser["FACTOR_OPERATOR"].is_token() == true); 1061 REQUIRE(parser["NUMBER"].is_token() == true); 1062 REQUIRE(parser["_"].is_token() == true); 1063 REQUIRE(parser["LIST"].is_token() == false); 1064 REQUIRE(parser["T"].is_token() == true); 1065 } 1066 1067 TEST_CASE("Macro passes an arg to another macro", "[macro]") { 1068 parser parser(R"( 1069 A <- B(C) 1070 B(D) <- D 1071 C <- 'c' 1072 )"); 1073 1074 REQUIRE(parser.parse("c")); 1075 } 1076 1077 TEST_CASE("Unreferenced rule", "[macro]") { 1078 parser parser(R"( 1079 A <- B(C) 1080 B(D) <- D 1081 C <- 'c' 1082 D <- 'd' 1083 )"); 1084 1085 bool ret = parser; 1086 REQUIRE(ret == true); // This is OK, because it's a warning, not an erro... 1087 } 1088 1089 TEST_CASE("Nested macro call", "[macro]") { 1090 parser parser(R"( 1091 A <- B(T) 1092 B(X) <- C(X) 1093 C(Y) <- Y 1094 T <- 'val' 1095 )"); 1096 1097 REQUIRE(parser.parse("val")); 1098 } 1099 1100 TEST_CASE("Nested macro call2", "[macro]") { 1101 parser parser(R"( 1102 START <- A('TestVal1', 'TestVal2')+ 1103 A(Aarg1, Aarg2) <- B(Aarg1) '#End' 1104 B(Barg1) <- '#' Barg1 1105 )"); 1106 1107 REQUIRE(parser.parse("#TestVal1#End")); 1108 } 1109 1110 TEST_CASE("Line information test", "[line information]") { 1111 parser parser(R"( 1112 S <- _ (WORD _)+ 1113 WORD <- [A-Za-z]+ 1114 ~_ <- [ \t\r\n]+ 1115 )"); 1116 1117 std::vector<std::pair<size_t, size_t>> locations; __anone2d3858f1202(const peg::SemanticValues &vs) 1118 parser["WORD"] = [&](const peg::SemanticValues &vs) { 1119 locations.push_back(vs.line_info()); 1120 }; 1121 1122 bool ret = parser; 1123 REQUIRE(ret == true); 1124 1125 ret = parser.parse(" Mon Tue Wed \nThu Fri Sat\nSun\n"); 1126 REQUIRE(ret == true); 1127 1128 REQUIRE(locations[0] == std::make_pair<size_t, size_t>(1, 2)); 1129 REQUIRE(locations[1] == std::make_pair<size_t, size_t>(1, 6)); 1130 REQUIRE(locations[2] == std::make_pair<size_t, size_t>(1, 10)); 1131 REQUIRE(locations[3] == std::make_pair<size_t, size_t>(2, 1)); 1132 REQUIRE(locations[4] == std::make_pair<size_t, size_t>(2, 6)); 1133 REQUIRE(locations[5] == std::make_pair<size_t, size_t>(2, 11)); 1134 REQUIRE(locations[6] == std::make_pair<size_t, size_t>(3, 1)); 1135 } 1136 1137 TEST_CASE("Dictionary", "[dic]") { 1138 parser parser(R"( 1139 START <- 'This month is ' MONTH '.' 1140 MONTH <- 'Jan' | 'January' | 'Feb' | 'February' 1141 )"); 1142 1143 REQUIRE(parser.parse("This month is Jan.")); 1144 REQUIRE(parser.parse("This month is January.")); 1145 REQUIRE_FALSE(parser.parse("This month is Jannuary.")); 1146 REQUIRE_FALSE(parser.parse("This month is .")); 1147 } 1148 1149 TEST_CASE("Dictionary invalid", "[dic]") { 1150 parser parser(R"( 1151 START <- 'This month is ' MONTH '.' 1152 MONTH <- 'Jan' | 'January' | [a-z]+ | 'Feb' | 'February' 1153 )"); 1154 1155 bool ret = parser; 1156 REQUIRE_FALSE(ret); 1157 } 1158 1159 TEST_CASE("Error recovery 1", "[error]") { 1160 parser pg(R"( 1161 START <- __? SECTION* 1162 1163 SECTION <- HEADER __ ENTRIES __? 1164 1165 HEADER <- '[' _ CATEGORY (':' _ ATTRIBUTES)? ']'^header 1166 1167 CATEGORY <- < [-_a-zA-Z0-9\u0080-\uFFFF ]+ > _ 1168 ATTRIBUTES <- ATTRIBUTE (',' _ ATTRIBUTE)* 1169 ATTRIBUTE <- < [-_a-zA-Z0-9\u0080-\uFFFF]+ > _ 1170 1171 ENTRIES <- (ENTRY (__ ENTRY)*)? { no_ast_opt } 1172 1173 ENTRY <- ONE_WAY PHRASE ('|' _ PHRASE)* !'=' 1174 / PHRASE ('|' _ PHRASE)+ !'=' 1175 / %recover(entry) 1176 1177 ONE_WAY <- PHRASE '=' _ 1178 PHRASE <- WORD (' ' WORD)* _ 1179 WORD <- < (![ \t\r\n=|[\]#] .)+ > 1180 1181 ~__ <- _ (comment? nl _)+ 1182 ~_ <- [ \t]* 1183 1184 comment <- ('#' (!nl .)*) 1185 nl <- '\r'? '\n' 1186 1187 header <- (!__ .)* { message "invalid section header, missing ']'." } 1188 entry <- (!(__ / HEADER) .)+ { message "invalid entry." } 1189 )"); 1190 1191 REQUIRE(!!pg); // OK 1192 1193 std::vector<std::string> errors{ 1194 R"(3:1: invalid entry.)", 1195 R"(7:1: invalid entry.)", 1196 R"(10:11: invalid section header, missing ']'.)", 1197 R"(18:1: invalid entry.)", 1198 }; 1199 1200 size_t i = 0; __anone2d3858f1302(size_t ln, size_t col, const std::string &msg) 1201 pg.log = [&](size_t ln, size_t col, const std::string &msg) { 1202 std::stringstream ss; 1203 ss << ln << ":" << col << ": " << msg; 1204 REQUIRE(ss.str() == errors[i++]); 1205 }; 1206 1207 pg.enable_ast(); 1208 1209 std::shared_ptr<Ast> ast; 1210 REQUIRE_FALSE(pg.parse(R"([Section 1] 1211 111 = 222 | 333 1212 aaa || bbb 1213 ccc = ddd 1214 1215 [Section 2] 1216 eee 1217 fff | ggg 1218 1219 [Section 3 1220 hhh | iii 1221 1222 [Section 日本語] 1223 ppp | qqq 1224 1225 [Section 4] 1226 jjj | kkk 1227 lll = mmm | nnn = ooo 1228 1229 [Section 5] 1230 rrr | sss 1231 1232 )", ast)); 1233 1234 ast = pg.optimize_ast(ast); 1235 1236 REQUIRE(ast_to_s(ast) == 1237 R"(+ START 1238 + SECTION 1239 - HEADER/0[CATEGORY] (Section 1) 1240 + ENTRIES 1241 + ENTRY/0 1242 - ONE_WAY/0[WORD] (111) 1243 - PHRASE/0[WORD] (222) 1244 - PHRASE/0[WORD] (333) 1245 + ENTRY/2 1246 + ENTRY/0 1247 - ONE_WAY/0[WORD] (ccc) 1248 - PHRASE/0[WORD] (ddd) 1249 + SECTION 1250 - HEADER/0[CATEGORY] (Section 2) 1251 + ENTRIES 1252 + ENTRY/2 1253 + ENTRY/1 1254 - PHRASE/0[WORD] (fff) 1255 - PHRASE/0[WORD] (ggg) 1256 + SECTION 1257 - HEADER/0[CATEGORY] (Section 3) 1258 + ENTRIES 1259 + ENTRY/1 1260 - PHRASE/0[WORD] (hhh) 1261 - PHRASE/0[WORD] (iii) 1262 + SECTION 1263 - HEADER/0[CATEGORY] (Section 日本語) 1264 + ENTRIES 1265 + ENTRY/1 1266 - PHRASE/0[WORD] (ppp) 1267 - PHRASE/0[WORD] (qqq) 1268 + SECTION 1269 - HEADER/0[CATEGORY] (Section 4) 1270 + ENTRIES 1271 + ENTRY/1 1272 - PHRASE/0[WORD] (jjj) 1273 - PHRASE/0[WORD] (kkk) 1274 + ENTRY/2 1275 + SECTION 1276 - HEADER/0[CATEGORY] (Section 5) 1277 + ENTRIES 1278 + ENTRY/1 1279 - PHRASE/0[WORD] (rrr) 1280 - PHRASE/0[WORD] (sss) 1281 )"); 1282 } 1283 1284 TEST_CASE("Error recovery 2", "[error]") { 1285 parser pg(R"( 1286 START <- ENTRY ((',' ENTRY) / %recover((!(',' / Space) .)+))* (_ / %recover(.*)) 1287 ENTRY <- '[' ITEM (',' ITEM)* ']' 1288 ITEM <- WORD / NUM / %recover((!(',' / ']') .)+) 1289 NUM <- [0-9]+ ![a-z] 1290 WORD <- '"' [a-z]+ '"' 1291 1292 ~_ <- Space+ 1293 Space <- [ \n] 1294 )"); 1295 1296 REQUIRE(!!pg); // OK 1297 1298 std::vector<std::string> errors{ 1299 R"(1:6: syntax error, unexpected ']'.)", 1300 R"(1:18: syntax error, unexpected 'z', expecting <NUM>.)", 1301 R"(1:24: syntax error, unexpected ',', expecting <WORD>.)", 1302 R"(1:31: syntax error, unexpected 'ccc', expecting <NUM>.)", 1303 R"(1:38: syntax error, unexpected 'ddd', expecting <NUM>.)", 1304 R"(1:55: syntax error, unexpected ']', expecting <WORD>.)", 1305 R"(1:58: syntax error, unexpected '\n', expecting <NUM>.)", 1306 R"(2:3: syntax error.)", 1307 }; 1308 1309 size_t i = 0; __anone2d3858f1402(size_t ln, size_t col, const std::string &msg) 1310 pg.log = [&](size_t ln, size_t col, const std::string &msg) { 1311 std::stringstream ss; 1312 ss << ln << ":" << col << ": " << msg; 1313 REQUIRE(ss.str() == errors[i++]); 1314 }; 1315 1316 pg.enable_ast(); 1317 1318 std::shared_ptr<Ast> ast; 1319 REQUIRE_FALSE(pg.parse(R"([000]],[111],[222z,"aaa,"bbb",ccc"],[ddd",444,555,"eee],[ 1320 )", ast)); 1321 1322 ast = pg.optimize_ast(ast); 1323 1324 REQUIRE(ast_to_s(ast) == 1325 R"(+ START 1326 - ENTRY/0[NUM] (000) 1327 - ENTRY/0[NUM] (111) 1328 + ENTRY 1329 + ITEM/2 1330 + ITEM/2 1331 - ITEM/0[WORD] ("bbb") 1332 + ITEM/2 1333 + ENTRY 1334 + ITEM/2 1335 - ITEM/1[NUM] (444) 1336 - ITEM/1[NUM] (555) 1337 + ITEM/2 1338 )"); 1339 } 1340 1341 TEST_CASE("Error recovery 3", "[error]") { 1342 parser pg(R"~( 1343 # Grammar 1344 START <- __? SECTION* 1345 1346 SECTION <- HEADER __ ENTRIES __? 1347 1348 HEADER <- '['^missing_bracket _ CATEGORY (':' _ ATTRIBUTES)? ']'^missing_bracket ___ 1349 1350 CATEGORY <- < (&[-_a-zA-Z0-9\u0080-\uFFFF ] (![\u0080-\uFFFF])^vernacular_char .)+ > _ 1351 ATTRIBUTES <- ATTRIBUTE (',' _ ATTRIBUTE)* 1352 ATTRIBUTE <- < [-_a-zA-Z0-9]+ > _ 1353 1354 ENTRIES <- (ENTRY (__ ENTRY)*)? { no_ast_opt } 1355 1356 ENTRY <- ONE_WAY PHRASE^expect_phrase (or _ PHRASE^expect_phrase)* ___ 1357 / PHRASE (or^missing_or _ PHRASE^expect_phrase) (or _ PHRASE^expect_phrase)* ___ { no_ast_opt } 1358 1359 ONE_WAY <- PHRASE assign _ 1360 PHRASE <- WORD (' ' WORD)* _ { no_ast_opt } 1361 WORD <- < (![ \t\r\n=|[\]#] (![*?] / %recover(wildcard)) .)+ > 1362 1363 ~assign <- '=' ____ 1364 ~or <- '|' (!'|')^duplicate_or ____ 1365 1366 ~_ <- [ \t]* 1367 ~__ <- _ (comment? nl _)+ 1368 ~___ <- (!operators)^invalid_ope 1369 ~____ <- (!operators)^invalid_ope_comb 1370 1371 operators <- [|=]+ 1372 comment <- ('#' (!nl .)*) 1373 nl <- '\r'? '\n' 1374 1375 # Recovery 1376 duplicate_or <- skip_puncs { message "Duplicate OR operator (|)" } 1377 missing_or <- '' { message "Missing OR operator (|)" } 1378 missing_bracket <- skip_puncs { message "Missing opening/closing square bracket" } 1379 expect_phrase <- skip { message "Expect phrase" } 1380 invalid_ope_comb <- skip_puncs { message "Use of invalid operator combination" } 1381 invalid_ope <- skip { message "Use of invalid operator" } 1382 wildcard <- '' { message "Wildcard characters (%c) should not be used" } 1383 vernacular_char <- '' { message "Section name %c must be in English" } 1384 1385 skip <- (!(__) .)* 1386 skip_puncs <- [|=]* _ 1387 )~"); 1388 1389 REQUIRE(!!pg); // OK 1390 1391 std::vector<std::string> errors{ 1392 R"(3:7: Wildcard characters (*) should not be used)", 1393 R"(4:6: Wildcard characters (?) should not be used)", 1394 R"(5:6: Duplicate OR operator (|))", 1395 R"(9:4: Missing OR operator (|))", 1396 R"(11:16: Expect phrase)", 1397 R"(13:11: Missing opening/closing square bracket)", 1398 R"(16:10: Section name 日 must be in English)", 1399 R"(16:11: Section name 本 must be in English)", 1400 R"(16:12: Section name 語 must be in English)", 1401 R"(16:13: Section name で must be in English)", 1402 R"(16:14: Section name す must be in English)", 1403 R"(21:17: Use of invalid operator)", 1404 R"(24:10: Use of invalid operator combination)", 1405 R"(26:10: Missing OR operator (|))", 1406 }; 1407 1408 size_t i = 0; __anone2d3858f1502(size_t ln, size_t col, const std::string &msg) 1409 pg.log = [&](size_t ln, size_t col, const std::string &msg) { 1410 std::stringstream ss; 1411 ss << ln << ":" << col << ": " << msg; 1412 REQUIRE(ss.str() == errors[i++]); 1413 }; 1414 1415 pg.enable_ast(); 1416 1417 std::shared_ptr<Ast> ast; 1418 REQUIRE_FALSE(pg.parse(R"([Section 1] 1419 111 = 222 | 333 1420 AAA BB* | CCC 1421 AAA B?B | CCC 1422 aaa || bbb 1423 ccc = ddd 1424 1425 [Section 2] 1426 eee 1427 fff | ggg 1428 fff | ggg 111 | 1429 1430 [Section 3 1431 hhh | iii 1432 1433 [Section 日本語です] 1434 ppp | qqq 1435 1436 [Section 4] 1437 jjj | kkk 1438 lll = mmm | nnn = ooo 1439 1440 [Section 5] 1441 ppp qqq |= rrr 1442 1443 Section 6] 1444 sss | ttt 1445 )", ast)); 1446 1447 ast = pg.optimize_ast(ast); 1448 1449 REQUIRE(ast_to_s(ast) == 1450 R"(+ START 1451 + SECTION 1452 - HEADER/0[CATEGORY] (Section 1) 1453 + ENTRIES 1454 + ENTRY/0 1455 + ONE_WAY/0[PHRASE] 1456 - WORD (111) 1457 + PHRASE 1458 - WORD (222) 1459 + PHRASE 1460 - WORD (333) 1461 + ENTRY/1 1462 + PHRASE 1463 - WORD (AAA) 1464 - WORD (BB*) 1465 + PHRASE 1466 - WORD (CCC) 1467 + ENTRY/1 1468 + PHRASE 1469 - WORD (AAA) 1470 - WORD (B?B) 1471 + PHRASE 1472 - WORD (CCC) 1473 + ENTRY/1 1474 + PHRASE 1475 - WORD (aaa) 1476 + PHRASE 1477 - WORD (bbb) 1478 + ENTRY/0 1479 + ONE_WAY/0[PHRASE] 1480 - WORD (ccc) 1481 + PHRASE 1482 - WORD (ddd) 1483 + SECTION 1484 - HEADER/0[CATEGORY] (Section 2) 1485 + ENTRIES 1486 + ENTRY/1 1487 + PHRASE 1488 - WORD (eee) 1489 + ENTRY/1 1490 + PHRASE 1491 - WORD (fff) 1492 + PHRASE 1493 - WORD (ggg) 1494 + ENTRY/1 1495 + PHRASE 1496 - WORD (fff) 1497 + PHRASE 1498 - WORD (ggg) 1499 - WORD (111) 1500 + SECTION 1501 - HEADER/0[CATEGORY] (Section 3) 1502 + ENTRIES 1503 + ENTRY/1 1504 + PHRASE 1505 - WORD (hhh) 1506 + PHRASE 1507 - WORD (iii) 1508 + SECTION 1509 - HEADER/0[CATEGORY] (Section 日本語です) 1510 + ENTRIES 1511 + ENTRY/1 1512 + PHRASE 1513 - WORD (ppp) 1514 + PHRASE 1515 - WORD (qqq) 1516 + SECTION 1517 - HEADER/0[CATEGORY] (Section 4) 1518 + ENTRIES 1519 + ENTRY/1 1520 + PHRASE 1521 - WORD (jjj) 1522 + PHRASE 1523 - WORD (kkk) 1524 + ENTRY/0 1525 + ONE_WAY/0[PHRASE] 1526 - WORD (lll) 1527 + PHRASE 1528 - WORD (mmm) 1529 + PHRASE 1530 - WORD (nnn) 1531 + SECTION 1532 - HEADER/0[CATEGORY] (Section 5) 1533 + ENTRIES 1534 + ENTRY/1 1535 + PHRASE 1536 - WORD (ppp) 1537 - WORD (qqq) 1538 + PHRASE 1539 - WORD (rrr) 1540 + ENTRY/1 1541 + PHRASE 1542 - WORD (Section) 1543 - WORD (6) 1544 + ENTRY/1 1545 + PHRASE 1546 - WORD (sss) 1547 + PHRASE 1548 - WORD (ttt) 1549 )"); 1550 } 1551 1552 TEST_CASE("Error recovery Java", "[error]") { 1553 parser pg(R"( 1554 Prog ← PUBLIC CLASS NAME LCUR PUBLIC STATIC VOID MAIN LPAR STRING LBRA RBRA NAME RPAR BlockStmt RCUR 1555 BlockStmt ← LCUR (Stmt)* RCUR^rcblk 1556 Stmt ← IfStmt / WhileStmt / PrintStmt / DecStmt / AssignStmt / BlockStmt 1557 IfStmt ← IF LPAR Exp RPAR Stmt (ELSE Stmt)? 1558 WhileStmt ← WHILE LPAR Exp RPAR Stmt 1559 DecStmt ← INT NAME (ASSIGN Exp)? SEMI 1560 AssignStmt ← NAME ASSIGN Exp SEMI^semia 1561 PrintStmt ← PRINTLN LPAR Exp RPAR SEMI 1562 Exp ← RelExp (EQ RelExp)* 1563 RelExp ← AddExp (LT AddExp)* 1564 AddExp ← MulExp ((PLUS / MINUS) MulExp)* 1565 MulExp ← AtomExp ((TIMES / DIV) AtomExp)* 1566 AtomExp ← LPAR Exp RPAR / NUMBER / NAME 1567 1568 NUMBER ← < [0-9]+ > 1569 NAME ← < [a-zA-Z_][a-zA-Z_0-9]* > 1570 1571 ~LPAR ← '(' 1572 ~RPAR ← ')' 1573 ~LCUR ← '{' 1574 ~RCUR ← '}' 1575 ~LBRA ← '[' 1576 ~RBRA ← ']' 1577 ~SEMI ← ';' 1578 1579 ~EQ ← '==' 1580 ~LT ← '<' 1581 ~ASSIGN ← '=' 1582 1583 ~IF ← 'if' 1584 ~ELSE ← 'else' 1585 ~WHILE ← 'while' 1586 1587 PLUS ← '+' 1588 MINUS ← '-' 1589 TIMES ← '*' 1590 DIV ← '/' 1591 1592 CLASS ← 'class' 1593 PUBLIC ← 'public' 1594 STATIC ← 'static' 1595 1596 VOID ← 'void' 1597 INT ← 'int' 1598 1599 MAIN ← 'main' 1600 STRING ← 'String' 1601 PRINTLN ← 'System.out.println' 1602 1603 %whitespace ← [ \t\n]* 1604 %word ← NAME 1605 1606 # Throw operator labels 1607 rcblk ← SkipToRCUR { message "missing end of block." } 1608 semia ← '' { message "missing simicolon in assignment." } 1609 1610 # Recovery expressions 1611 SkipToRCUR ← (!RCUR (LCUR SkipToRCUR / .))* RCUR 1612 )"); 1613 1614 REQUIRE(!!pg); // OK 1615 1616 std::vector<std::string> errors{ 1617 R"(8:5: missing simicolon in assignment.)", 1618 R"(8:6: missing end of block.)", 1619 }; 1620 1621 size_t i = 0; __anone2d3858f1602(size_t ln, size_t col, const std::string &msg) 1622 pg.log = [&](size_t ln, size_t col, const std::string &msg) { 1623 std::stringstream ss; 1624 ss << ln << ":" << col << ": " << msg; 1625 REQUIRE(ss.str() == errors[i++]); 1626 }; 1627 1628 pg.enable_ast(); 1629 1630 std::shared_ptr<Ast> ast; 1631 REQUIRE_FALSE(pg.parse(R"(public class Example { 1632 public static void main(String[] args) { 1633 int n = 5; 1634 int f = 1; 1635 while(0 < n) { 1636 f = f * n; 1637 n = n - 1 1638 }; 1639 System.out.println(f); 1640 } 1641 } 1642 )", ast)); 1643 1644 ast = pg.optimize_ast(ast); 1645 1646 REQUIRE(ast_to_s(ast) == 1647 R"(+ Prog 1648 - PUBLIC (public) 1649 - CLASS (class) 1650 - NAME (Example) 1651 - PUBLIC (public) 1652 - STATIC (static) 1653 - VOID (void) 1654 - MAIN (main) 1655 - STRING (String) 1656 - NAME (args) 1657 + BlockStmt 1658 + Stmt/3[DecStmt] 1659 - INT (int) 1660 - NAME (n) 1661 - Exp/0[NUMBER] (5) 1662 + Stmt/3[DecStmt] 1663 - INT (int) 1664 - NAME (f) 1665 - Exp/0[NUMBER] (1) 1666 + Stmt/1[WhileStmt] 1667 + Exp/0[RelExp] 1668 - AddExp/0[NUMBER] (0) 1669 - AddExp/0[NAME] (n) 1670 + Stmt/5[BlockStmt] 1671 + Stmt/4[AssignStmt] 1672 - NAME (f) 1673 + Exp/0[MulExp] 1674 - AtomExp/2[NAME] (f) 1675 - TIMES (*) 1676 - AtomExp/2[NAME] (n) 1677 + Stmt/4[AssignStmt] 1678 - NAME (n) 1679 + Exp/0[AddExp] 1680 - MulExp/0[NAME] (n) 1681 - MINUS (-) 1682 - MulExp/0[NUMBER] (1) 1683 )"); 1684 } 1685