1 //
2 // peglib.h
3 //
4 // Copyright (c) 2020 Yuji Hirose. All rights reserved.
5 // MIT License
6 //
7
8 #pragma once
9
10 #include <algorithm>
11 #include <any>
12 #include <cassert>
13 #include <cctype>
14 #if __has_include(<charconv>)
15 #include <charconv>
16 #define CPPPEGLIB_HAS_CHARCONV true
17 #else
18 #define CPPPEGLIB_HAS_CHARCONV false
19 #endif
20 #include <cstring>
21 #include <functional>
22 #include <initializer_list>
23 #include <iostream>
24 #include <limits>
25 #include <list>
26 #include <map>
27 #include <memory>
28 #include <mutex>
29 #include <set>
30 #include <sstream>
31 #include <string>
32 #include <unordered_map>
33 #include <unordered_set>
34 #include <vector>
35
36 #if !defined(__cplusplus) || __cplusplus < 201703L
37 #error "Requires complete C++17 support"
38 #endif
39
40 namespace peg {
41
42 /*-----------------------------------------------------------------------------
43 * scope_exit
44 *---------------------------------------------------------------------------*/
45
46 // This is based on
47 // "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189".
48
49 template <typename EF> struct scope_exit {
scope_exitscope_exit50 explicit scope_exit(EF &&f)
51 : exit_function(std::move(f)), execute_on_destruction{true} {}
52
scope_exitscope_exit53 scope_exit(scope_exit &&rhs)
54 : exit_function(std::move(rhs.exit_function)),
55 execute_on_destruction{rhs.execute_on_destruction} {
56 rhs.release();
57 }
58
~scope_exitscope_exit59 ~scope_exit() {
60 if (execute_on_destruction) { this->exit_function(); }
61 }
62
releasescope_exit63 void release() { this->execute_on_destruction = false; }
64
65 private:
66 scope_exit(const scope_exit &) = delete;
67 void operator=(const scope_exit &) = delete;
68 scope_exit &operator=(scope_exit &&) = delete;
69
70 EF exit_function;
71 bool execute_on_destruction;
72 };
73
74 /*-----------------------------------------------------------------------------
75 * UTF8 functions
76 *---------------------------------------------------------------------------*/
77
codepoint_length(const char * s8,size_t l)78 inline size_t codepoint_length(const char *s8, size_t l) {
79 if (l) {
80 auto b = static_cast<uint8_t>(s8[0]);
81 if ((b & 0x80) == 0) {
82 return 1;
83 } else if ((b & 0xE0) == 0xC0 && l >= 2) {
84 return 2;
85 } else if ((b & 0xF0) == 0xE0 && l >= 3) {
86 return 3;
87 } else if ((b & 0xF8) == 0xF0 && l >= 4) {
88 return 4;
89 }
90 }
91 return 0;
92 }
93
codepoint_count(const char * s8,size_t l)94 inline size_t codepoint_count(const char *s8, size_t l) {
95 size_t count = 0;
96 for (size_t i = 0; i < l; i += codepoint_length(s8 + i, l - i)) {
97 count++;
98 }
99 return count;
100 }
101
encode_codepoint(char32_t cp,char * buff)102 inline size_t encode_codepoint(char32_t cp, char *buff) {
103 if (cp < 0x0080) {
104 buff[0] = static_cast<char>(cp & 0x7F);
105 return 1;
106 } else if (cp < 0x0800) {
107 buff[0] = static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
108 buff[1] = static_cast<char>(0x80 | (cp & 0x3F));
109 return 2;
110 } else if (cp < 0xD800) {
111 buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
112 buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
113 buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
114 return 3;
115 } else if (cp < 0xE000) {
116 // D800 - DFFF is invalid...
117 return 0;
118 } else if (cp < 0x10000) {
119 buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
120 buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
121 buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
122 return 3;
123 } else if (cp < 0x110000) {
124 buff[0] = static_cast<char>(0xF0 | ((cp >> 18) & 0x7));
125 buff[1] = static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
126 buff[2] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
127 buff[3] = static_cast<char>(0x80 | (cp & 0x3F));
128 return 4;
129 }
130 return 0;
131 }
132
encode_codepoint(char32_t cp)133 inline std::string encode_codepoint(char32_t cp) {
134 char buff[4];
135 auto l = encode_codepoint(cp, buff);
136 return std::string(buff, l);
137 }
138
decode_codepoint(const char * s8,size_t l,size_t & bytes,char32_t & cp)139 inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes,
140 char32_t &cp) {
141 if (l) {
142 auto b = static_cast<uint8_t>(s8[0]);
143 if ((b & 0x80) == 0) {
144 bytes = 1;
145 cp = b;
146 return true;
147 } else if ((b & 0xE0) == 0xC0) {
148 if (l >= 2) {
149 bytes = 2;
150 cp = ((static_cast<char32_t>(s8[0] & 0x1F)) << 6) |
151 (static_cast<char32_t>(s8[1] & 0x3F));
152 return true;
153 }
154 } else if ((b & 0xF0) == 0xE0) {
155 if (l >= 3) {
156 bytes = 3;
157 cp = ((static_cast<char32_t>(s8[0] & 0x0F)) << 12) |
158 ((static_cast<char32_t>(s8[1] & 0x3F)) << 6) |
159 (static_cast<char32_t>(s8[2] & 0x3F));
160 return true;
161 }
162 } else if ((b & 0xF8) == 0xF0) {
163 if (l >= 4) {
164 bytes = 4;
165 cp = ((static_cast<char32_t>(s8[0] & 0x07)) << 18) |
166 ((static_cast<char32_t>(s8[1] & 0x3F)) << 12) |
167 ((static_cast<char32_t>(s8[2] & 0x3F)) << 6) |
168 (static_cast<char32_t>(s8[3] & 0x3F));
169 return true;
170 }
171 }
172 }
173 return false;
174 }
175
decode_codepoint(const char * s8,size_t l,char32_t & cp)176 inline size_t decode_codepoint(const char *s8, size_t l, char32_t &cp) {
177 size_t bytes;
178 if (decode_codepoint(s8, l, bytes, cp)) { return bytes; }
179 return 0;
180 }
181
decode_codepoint(const char * s8,size_t l)182 inline char32_t decode_codepoint(const char *s8, size_t l) {
183 char32_t cp = 0;
184 decode_codepoint(s8, l, cp);
185 return cp;
186 }
187
decode(const char * s8,size_t l)188 inline std::u32string decode(const char *s8, size_t l) {
189 std::u32string out;
190 size_t i = 0;
191 while (i < l) {
192 auto beg = i++;
193 while (i < l && (s8[i] & 0xc0) == 0x80) {
194 i++;
195 }
196 out += decode_codepoint(&s8[beg], (i - beg));
197 }
198 return out;
199 }
200
u8(const T * s)201 template <typename T> const char *u8(const T *s) {
202 return reinterpret_cast<const char *>(s);
203 }
204
205 /*-----------------------------------------------------------------------------
206 * escape_characters
207 *---------------------------------------------------------------------------*/
208
escape_characters(const char * s,size_t n)209 inline std::string escape_characters(const char *s, size_t n) {
210 std::string str;
211 for (size_t i = 0; i < n; i++) {
212 auto c = s[i];
213 switch (c) {
214 case '\n': str += "\\n"; break;
215 case '\r': str += "\\r"; break;
216 case '\t': str += "\\t"; break;
217 default: str += c; break;
218 }
219 }
220 return str;
221 }
222
escape_characters(std::string_view sv)223 inline std::string escape_characters(std::string_view sv) {
224 return escape_characters(sv.data(), sv.size());
225 }
226
227 /*-----------------------------------------------------------------------------
228 * resolve_escape_sequence
229 *---------------------------------------------------------------------------*/
230
is_hex(char c,int & v)231 inline bool is_hex(char c, int &v) {
232 if ('0' <= c && c <= '9') {
233 v = c - '0';
234 return true;
235 } else if ('a' <= c && c <= 'f') {
236 v = c - 'a' + 10;
237 return true;
238 } else if ('A' <= c && c <= 'F') {
239 v = c - 'A' + 10;
240 return true;
241 }
242 return false;
243 }
244
is_digit(char c,int & v)245 inline bool is_digit(char c, int &v) {
246 if ('0' <= c && c <= '9') {
247 v = c - '0';
248 return true;
249 }
250 return false;
251 }
252
parse_hex_number(const char * s,size_t n,size_t i)253 inline std::pair<int, size_t> parse_hex_number(const char *s, size_t n,
254 size_t i) {
255 int ret = 0;
256 int val;
257 while (i < n && is_hex(s[i], val)) {
258 ret = static_cast<int>(ret * 16 + val);
259 i++;
260 }
261 return std::pair(ret, i);
262 }
263
parse_octal_number(const char * s,size_t n,size_t i)264 inline std::pair<int, size_t> parse_octal_number(const char *s, size_t n,
265 size_t i) {
266 int ret = 0;
267 int val;
268 while (i < n && is_digit(s[i], val)) {
269 ret = static_cast<int>(ret * 8 + val);
270 i++;
271 }
272 return std::pair(ret, i);
273 }
274
resolve_escape_sequence(const char * s,size_t n)275 inline std::string resolve_escape_sequence(const char *s, size_t n) {
276 std::string r;
277 r.reserve(n);
278
279 size_t i = 0;
280 while (i < n) {
281 auto ch = s[i];
282 if (ch == '\\') {
283 i++;
284 if (i == n) { throw std::runtime_error("Invalid escape sequence..."); }
285 switch (s[i]) {
286 case 'n':
287 r += '\n';
288 i++;
289 break;
290 case 'r':
291 r += '\r';
292 i++;
293 break;
294 case 't':
295 r += '\t';
296 i++;
297 break;
298 case '\'':
299 r += '\'';
300 i++;
301 break;
302 case '"':
303 r += '"';
304 i++;
305 break;
306 case '[':
307 r += '[';
308 i++;
309 break;
310 case ']':
311 r += ']';
312 i++;
313 break;
314 case '\\':
315 r += '\\';
316 i++;
317 break;
318 case 'x':
319 case 'u': {
320 char32_t cp;
321 std::tie(cp, i) = parse_hex_number(s, n, i + 1);
322 r += encode_codepoint(cp);
323 break;
324 }
325 default: {
326 char32_t cp;
327 std::tie(cp, i) = parse_octal_number(s, n, i);
328 r += encode_codepoint(cp);
329 break;
330 }
331 }
332 } else {
333 r += ch;
334 i++;
335 }
336 }
337 return r;
338 }
339
340 /*-----------------------------------------------------------------------------
341 * token_to_number_ - This function should be removed eventually
342 *---------------------------------------------------------------------------*/
343
token_to_number_(std::string_view sv)344 template <typename T> T token_to_number_(std::string_view sv) {
345 T n = 0;
346 if constexpr (CPPPEGLIB_HAS_CHARCONV && !std::is_floating_point<T>::value) {
347 std::from_chars(sv.data(), sv.data() + sv.size(), n);
348 } else {
349 auto s = std::string(sv);
350 std::istringstream ss(s);
351 ss >> n;
352 }
353 return n;
354 }
355
356 /*-----------------------------------------------------------------------------
357 * Trie
358 *---------------------------------------------------------------------------*/
359
360 class Trie {
361 public:
362 Trie() = default;
363 Trie(const Trie &) = default;
364
Trie(const std::vector<std::string> & items)365 Trie(const std::vector<std::string> &items) {
366 for (const auto &item : items) {
367 for (size_t len = 1; len <= item.size(); len++) {
368 auto last = len == item.size();
369 std::string_view sv(item.data(), len);
370 auto it = dic_.find(sv);
371 if (it == dic_.end()) {
372 dic_.emplace(sv, Info{last, last});
373 } else if (last) {
374 it->second.match = true;
375 } else {
376 it->second.done = false;
377 }
378 }
379 }
380 }
381
match(const char * text,size_t text_len)382 size_t match(const char *text, size_t text_len) const {
383 size_t match_len = 0;
384 auto done = false;
385 size_t len = 1;
386 while (!done && len <= text_len) {
387 std::string_view sv(text, len);
388 auto it = dic_.find(sv);
389 if (it == dic_.end()) {
390 done = true;
391 } else {
392 if (it->second.match) { match_len = len; }
393 if (it->second.done) { done = true; }
394 }
395 len += 1;
396 }
397 return match_len;
398 }
399
400 private:
401 struct Info {
402 bool done;
403 bool match;
404 };
405
406 // TODO: Use unordered_map when heterogeneous lookup is supported in C++20
407 // std::unordered_map<std::string, Info> dic_;
408 std::map<std::string, Info, std::less<>> dic_;
409 };
410
411 /*-----------------------------------------------------------------------------
412 * PEG
413 *---------------------------------------------------------------------------*/
414
415 /*
416 * Line information utility function
417 */
line_info(const char * start,const char * cur)418 inline std::pair<size_t, size_t> line_info(const char *start, const char *cur) {
419 auto p = start;
420 auto col_ptr = p;
421 auto no = 1;
422
423 while (p < cur) {
424 if (*p == '\n') {
425 no++;
426 col_ptr = p + 1;
427 }
428 p++;
429 }
430
431 auto col = codepoint_count(col_ptr, p - col_ptr) + 1;
432
433 return std::pair(no, col);
434 }
435
436 /*
437 * String tag
438 */
str2tag_core(const char * s,size_t l,unsigned int h)439 inline constexpr unsigned int str2tag_core(const char *s, size_t l,
440 unsigned int h) {
441 return (l == 0) ? h
442 : str2tag_core(s + 1, l - 1,
443 (h * 33) ^ static_cast<unsigned char>(*s));
444 }
445
str2tag(std::string_view sv)446 inline constexpr unsigned int str2tag(std::string_view sv) {
447 return str2tag_core(sv.data(), sv.size(), 0);
448 }
449
450 namespace udl {
451
452 inline constexpr unsigned int operator"" _(const char *s, size_t l) {
453 return str2tag_core(s, l, 0);
454 }
455
456 } // namespace udl
457
458 /*
459 * Semantic values
460 */
461 struct SemanticValues : protected std::vector<std::any> {
462 // Input text
463 const char *path = nullptr;
464 const char *ss = nullptr;
465 std::function<const std::vector<size_t> &()> source_line_index;
466
467 // Matched string
svSemanticValues468 std::string_view sv() const { return sv_; }
469
470 // Definition name
nameSemanticValues471 const std::string &name() const { return name_; }
472
473 std::vector<unsigned int> tags;
474
475 // Line number and column at which the matched string is
line_infoSemanticValues476 std::pair<size_t, size_t> line_info() const {
477 auto &idx = source_line_index();
478
479 auto cur = static_cast<size_t>(std::distance(ss, sv_.data()));
480 auto it = std::lower_bound(
481 idx.begin(), idx.end(), cur,
482 [](size_t element, size_t value) { return element < value; });
483
484 auto id = static_cast<size_t>(std::distance(idx.begin(), it));
485 auto off = cur - (id == 0 ? 0 : idx[id - 1] + 1);
486 return std::pair(id + 1, off + 1);
487 }
488
489 // Choice count
choice_countSemanticValues490 size_t choice_count() const { return choice_count_; }
491
492 // Choice number (0 based index)
choiceSemanticValues493 size_t choice() const { return choice_; }
494
495 // Tokens
496 std::vector<std::string_view> tokens;
497
498 std::string_view token(size_t id = 0) const {
499 if (tokens.empty()) { return sv_; }
500 assert(id < tokens.size());
501 return tokens[id];
502 }
503
504 // Token conversion
505 std::string token_to_string(size_t id = 0) const {
506 return std::string(token(id));
507 }
508
token_to_numberSemanticValues509 template <typename T> T token_to_number() const {
510 return token_to_number_<T>(token());
511 }
512
513 // Transform the semantic value vector to another vector
514 template <typename T>
515 std::vector<T> transform(size_t beg = 0,
516 size_t end = static_cast<size_t>(-1)) const {
517 std::vector<T> r;
518 end = (std::min)(end, size());
519 for (size_t i = beg; i < end; i++) {
520 r.emplace_back(std::any_cast<T>((*this)[i]));
521 }
522 return r;
523 }
524
525 using std::vector<std::any>::iterator;
526 using std::vector<std::any>::const_iterator;
527 using std::vector<std::any>::size;
528 using std::vector<std::any>::empty;
529 using std::vector<std::any>::assign;
530 using std::vector<std::any>::begin;
531 using std::vector<std::any>::end;
532 using std::vector<std::any>::rbegin;
533 using std::vector<std::any>::rend;
534 using std::vector<std::any>::operator[];
535 using std::vector<std::any>::at;
536 using std::vector<std::any>::resize;
537 using std::vector<std::any>::front;
538 using std::vector<std::any>::back;
539 using std::vector<std::any>::push_back;
540 using std::vector<std::any>::pop_back;
541 using std::vector<std::any>::insert;
542 using std::vector<std::any>::erase;
543 using std::vector<std::any>::clear;
544 using std::vector<std::any>::swap;
545 using std::vector<std::any>::emplace;
546 using std::vector<std::any>::emplace_back;
547
548 private:
549 friend class Context;
550 friend class Sequence;
551 friend class PrioritizedChoice;
552 friend class Holder;
553 friend class PrecedenceClimbing;
554
555 std::string_view sv_;
556 size_t choice_count_ = 0;
557 size_t choice_ = 0;
558 std::string name_;
559 };
560
561 /*
562 * Semantic action
563 */
call(F fn,Args &&...args)564 template <typename F, typename... Args> std::any call(F fn, Args &&... args) {
565 using R = decltype(fn(std::forward<Args>(args)...));
566 if constexpr (std::is_void<R>::value) {
567 fn(std::forward<Args>(args)...);
568 return std::any();
569 } else if constexpr (std::is_same<typename std::remove_cv<R>::type,
570 std::any>::value) {
571 return fn(std::forward<Args>(args)...);
572 } else {
573 return std::any(fn(std::forward<Args>(args)...));
574 }
575 }
576
577 template <typename T>
578 struct argument_count : argument_count<decltype(&T::operator())> {};
579 template <typename R, typename... Args>
580 struct argument_count<R (*)(Args...)>
581 : std::integral_constant<unsigned, sizeof...(Args)> {};
582 template <typename R, typename C, typename... Args>
583 struct argument_count<R (C::*)(Args...)>
584 : std::integral_constant<unsigned, sizeof...(Args)> {};
585 template <typename R, typename C, typename... Args>
586 struct argument_count<R (C::*)(Args...) const>
587 : std::integral_constant<unsigned, sizeof...(Args)> {};
588
589 class Action {
590 public:
591 Action() = default;
592 Action(Action &&rhs) = default;
593 template <typename F> Action(F fn) : fn_(make_adaptor(fn)) {}
594 template <typename F> void operator=(F fn) { fn_ = make_adaptor(fn); }
595 Action &operator=(const Action &rhs) = default;
596
597 operator bool() const { return bool(fn_); }
598
599 std::any operator()(SemanticValues &vs, std::any &dt) const {
600 return fn_(vs, dt);
601 }
602
603 private:
604 using Fty = std::function<std::any(SemanticValues &vs, std::any &dt)>;
605
606 template <typename F> Fty make_adaptor(F fn) {
607 if constexpr (argument_count<F>::value == 1) {
608 return [fn](auto &vs, auto & /*dt*/) { return call(fn, vs); };
609 } else {
610 return [fn](auto &vs, auto &dt) { return call(fn, vs, dt); };
611 }
612 }
613
614 Fty fn_;
615 };
616
617 /*
618 * Semantic predicate
619 */
620 // Note: 'parse_error' exception class should be be used in sematic action
621 // handlers to reject the rule.
622 struct parse_error {
623 parse_error() = default;
624 parse_error(const char *s) : s_(s) {}
625 const char *what() const { return s_.empty() ? nullptr : s_.data(); }
626
627 private:
628 std::string s_;
629 };
630
631 /*
632 * Parse result helper
633 */
634 inline bool success(size_t len) { return len != static_cast<size_t>(-1); }
635
636 inline bool fail(size_t len) { return len == static_cast<size_t>(-1); }
637
638 /*
639 * Log
640 */
641 using Log = std::function<void(size_t, size_t, const std::string &)>;
642
643 /*
644 * ErrorInfo
645 */
646 struct ErrorInfo {
647 const char *error_pos = nullptr;
648 std::vector<std::pair<const char *, bool>> expected_tokens;
649 const char *message_pos = nullptr;
650 std::string message;
651 mutable const char *last_output_pos = nullptr;
652
653 void clear() {
654 error_pos = nullptr;
655 expected_tokens.clear();
656 message_pos = nullptr;
657 message.clear();
658 }
659
660 void add(const char *token, bool is_literal) {
661 for (const auto &[t, l] : expected_tokens) {
662 if (t == token && l == is_literal) { return; }
663 }
664 expected_tokens.push_back(std::make_pair(token, is_literal));
665 }
666
667 void output_log(const Log &log, const char *s, size_t n) const {
668 if (message_pos) {
669 if (message_pos > last_output_pos) {
670 last_output_pos = message_pos;
671 auto line = line_info(s, message_pos);
672 std::string msg;
673 if (auto unexpected_token = heuristic_error_token(s, n, message_pos);
674 !unexpected_token.empty()) {
675 msg = replace_all(message, "%t", unexpected_token);
676
677 auto unexpected_char = unexpected_token.substr(
678 0, codepoint_length(unexpected_token.data(),
679 unexpected_token.size()));
680
681 msg = replace_all(msg, "%c", unexpected_char);
682 } else {
683 msg = message;
684 }
685 log(line.first, line.second, msg);
686 }
687 } else if (error_pos) {
688 if (error_pos > last_output_pos) {
689 last_output_pos = error_pos;
690 auto line = line_info(s, error_pos);
691
692 std::string msg;
693 if (expected_tokens.empty()) {
694 msg = "syntax error.";
695 } else {
696 msg = "syntax error";
697
698 // unexpected token
699 if (auto unexpected_token = heuristic_error_token(s, n, error_pos);
700 !unexpected_token.empty()) {
701 msg += ", unexpected '";
702 msg += unexpected_token;
703 msg += "'";
704 }
705
706 auto first_item = true;
707 size_t i = 0;
708 while (i < expected_tokens.size()) {
709 auto [token, is_literal] =
710 expected_tokens[expected_tokens.size() - i - 1];
711
712 // Skip rules start with '_'
713 if (!is_literal && token[0] != '_') {
714 msg += (first_item ? ", expecting " : ", ");
715 if (is_literal) {
716 msg += "'";
717 msg += token;
718 msg += "'";
719 } else {
720 msg += "<";
721 msg += token;
722 msg += ">";
723 }
724 first_item = false;
725 }
726
727 i++;
728 }
729 msg += ".";
730 }
731
732 log(line.first, line.second, msg);
733 }
734 }
735 }
736
737 private:
738 int cast_char(char c) const { return static_cast<unsigned char>(c); }
739
740 std::string heuristic_error_token(const char *s, size_t n,
741 const char *pos) const {
742 auto len = n - std::distance(s, pos);
743 if (len) {
744 size_t i = 0;
745 auto c = cast_char(pos[i++]);
746 if (!std::ispunct(c) && !std::isspace(c)) {
747 while (i < len && !std::ispunct(cast_char(pos[i])) &&
748 !std::isspace(cast_char(pos[i]))) {
749 i++;
750 }
751 }
752
753 size_t count = 8;
754 size_t j = 0;
755 while (count > 0 && j < i) {
756 j += codepoint_length(&pos[j], i - j);
757 count--;
758 }
759
760 return escape_characters(pos, j);
761 }
762 return std::string();
763 }
764
765 std::string replace_all(std::string str, const std::string &from,
766 const std::string &to) const {
767 size_t pos = 0;
768 while ((pos = str.find(from, pos)) != std::string::npos) {
769 str.replace(pos, from.length(), to);
770 pos += to.length();
771 }
772 return str;
773 }
774 };
775
776 /*
777 * Context
778 */
779 class Context;
780 class Ope;
781 class Definition;
782
783 using TracerEnter = std::function<void(const Ope &name, const char *s, size_t n,
784 const SemanticValues &vs,
785 const Context &c, const std::any &dt)>;
786
787 using TracerLeave = std::function<void(
788 const Ope &ope, const char *s, size_t n, const SemanticValues &vs,
789 const Context &c, const std::any &dt, size_t)>;
790
791 class Context {
792 public:
793 const char *path;
794 const char *s;
795 const size_t l;
796 std::vector<size_t> source_line_index;
797
798 ErrorInfo error_info;
799 bool recovered = false;
800
801 std::vector<std::shared_ptr<SemanticValues>> value_stack;
802 size_t value_stack_size = 0;
803
804 std::vector<Definition *> rule_stack;
805 std::vector<std::vector<std::shared_ptr<Ope>>> args_stack;
806
807 size_t in_token_boundary_count = 0;
808
809 std::shared_ptr<Ope> whitespaceOpe;
810 bool in_whitespace = false;
811
812 std::shared_ptr<Ope> wordOpe;
813
814 std::vector<std::map<std::string_view, std::string>> capture_scope_stack;
815 size_t capture_scope_stack_size = 0;
816
817 std::vector<bool> cut_stack;
818
819 const size_t def_count;
820 const bool enablePackratParsing;
821 std::vector<bool> cache_registered;
822 std::vector<bool> cache_success;
823
824 std::map<std::pair<size_t, size_t>, std::tuple<size_t, std::any>>
825 cache_values;
826
827 TracerEnter tracer_enter;
828 TracerLeave tracer_leave;
829
830 Log log;
831
832 Context(const char *path, const char *s, size_t l, size_t def_count,
833 std::shared_ptr<Ope> whitespaceOpe, std::shared_ptr<Ope> wordOpe,
834 bool enablePackratParsing, TracerEnter tracer_enter,
835 TracerLeave tracer_leave, Log log)
836 : path(path), s(s), l(l), whitespaceOpe(whitespaceOpe), wordOpe(wordOpe),
837 def_count(def_count), enablePackratParsing(enablePackratParsing),
838 cache_registered(enablePackratParsing ? def_count * (l + 1) : 0),
839 cache_success(enablePackratParsing ? def_count * (l + 1) : 0),
840 tracer_enter(tracer_enter), tracer_leave(tracer_leave), log(log) {
841
842 args_stack.resize(1);
843
844 push_capture_scope();
845 }
846
847 ~Context() { assert(!value_stack_size); }
848
849 Context(const Context &) = delete;
850 Context(Context &&) = delete;
851 Context operator=(const Context &) = delete;
852
853 template <typename T>
854 void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val,
855 T fn) {
856 if (!enablePackratParsing) {
857 fn(val);
858 return;
859 }
860
861 auto col = a_s - s;
862 auto idx = def_count * static_cast<size_t>(col) + def_id;
863
864 if (cache_registered[idx]) {
865 if (cache_success[idx]) {
866 auto key = std::pair(col, def_id);
867 std::tie(len, val) = cache_values[key];
868 return;
869 } else {
870 len = static_cast<size_t>(-1);
871 return;
872 }
873 } else {
874 fn(val);
875 cache_registered[idx] = true;
876 cache_success[idx] = success(len);
877 if (success(len)) {
878 auto key = std::pair(col, def_id);
879 cache_values[key] = std::pair(len, val);
880 }
881 return;
882 }
883 }
884
885 SemanticValues &push() {
886 assert(value_stack_size <= value_stack.size());
887 if (value_stack_size == value_stack.size()) {
888 value_stack.emplace_back(std::make_shared<SemanticValues>());
889 } else {
890 auto &vs = *value_stack[value_stack_size];
891 if (!vs.empty()) {
892 vs.clear();
893 if (!vs.tags.empty()) { vs.tags.clear(); }
894 }
895 vs.sv_ = std::string_view();
896 vs.choice_count_ = 0;
897 vs.choice_ = 0;
898 if (!vs.tokens.empty()) { vs.tokens.clear(); }
899 }
900
901 auto &vs = *value_stack[value_stack_size++];
902 vs.path = path;
903 vs.ss = s;
904 vs.source_line_index = [&]() -> const std::vector<size_t> & {
905 if (source_line_index.empty()) {
906 for (size_t pos = 0; pos < l; pos++) {
907 if (s[pos] == '\n') { source_line_index.push_back(pos); }
908 }
909 source_line_index.push_back(l);
910 }
911 return source_line_index;
912 };
913
914 return vs;
915 }
916
917 void pop() { value_stack_size--; }
918
919 void push_args(std::vector<std::shared_ptr<Ope>> &&args) {
920 args_stack.emplace_back(args);
921 }
922
923 void pop_args() { args_stack.pop_back(); }
924
925 const std::vector<std::shared_ptr<Ope>> &top_args() const {
926 return args_stack[args_stack.size() - 1];
927 }
928
929 void push_capture_scope() {
930 assert(capture_scope_stack_size <= capture_scope_stack.size());
931 if (capture_scope_stack_size == capture_scope_stack.size()) {
932 capture_scope_stack.emplace_back(
933 std::map<std::string_view, std::string>());
934 } else {
935 auto &cs = capture_scope_stack[capture_scope_stack_size];
936 if (!cs.empty()) { cs.clear(); }
937 }
938 capture_scope_stack_size++;
939 }
940
941 void pop_capture_scope() { capture_scope_stack_size--; }
942
943 void shift_capture_values() {
944 assert(capture_scope_stack.size() >= 2);
945 auto curr = &capture_scope_stack[capture_scope_stack_size - 1];
946 auto prev = curr - 1;
947 for (const auto &[k, v] : *curr) {
948 (*prev)[k] = v;
949 }
950 }
951
952 void set_error_pos(const char *a_s, const char *literal = nullptr);
953
954 // void trace_enter(const char *name, const char *a_s, size_t n,
955 void trace_enter(const Ope &ope, const char *a_s, size_t n,
956 SemanticValues &vs, std::any &dt) const;
957 // void trace_leave(const char *name, const char *a_s, size_t n,
958 void trace_leave(const Ope &ope, const char *a_s, size_t n,
959 SemanticValues &vs, std::any &dt, size_t len) const;
960 bool is_traceable(const Ope &ope) const;
961
962 mutable size_t next_trace_id = 0;
963 mutable std::list<size_t> trace_ids;
964 };
965
966 /*
967 * Parser operators
968 */
969 class Ope {
970 public:
971 struct Visitor;
972
973 virtual ~Ope() = default;
974 size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c,
975 std::any &dt) const;
976 virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs,
977 Context &c, std::any &dt) const = 0;
978 virtual void accept(Visitor &v) = 0;
979 };
980
981 class Sequence : public Ope {
982 public:
983 template <typename... Args>
984 Sequence(const Args &... args)
985 : opes_{static_cast<std::shared_ptr<Ope>>(args)...} {}
986 Sequence(const std::vector<std::shared_ptr<Ope>> &opes) : opes_(opes) {}
987 Sequence(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
988
989 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
990 std::any &dt) const override {
991 auto &chldsv = c.push();
992 auto pop_se = scope_exit([&]() { c.pop(); });
993 size_t i = 0;
994 for (const auto &ope : opes_) {
995 const auto &rule = *ope;
996 auto len = rule.parse(s + i, n - i, chldsv, c, dt);
997 if (fail(len)) { return len; }
998 i += len;
999 }
1000 if (!chldsv.empty()) {
1001 for (size_t j = 0; j < chldsv.size(); j++) {
1002 vs.emplace_back(std::move(chldsv[j]));
1003 }
1004 }
1005 if (!chldsv.tags.empty()) {
1006 for (size_t j = 0; j < chldsv.tags.size(); j++) {
1007 vs.tags.emplace_back(std::move(chldsv.tags[j]));
1008 }
1009 }
1010 vs.sv_ = chldsv.sv_;
1011 if (!chldsv.tokens.empty()) {
1012 for (size_t j = 0; j < chldsv.tokens.size(); j++) {
1013 vs.tokens.emplace_back(std::move(chldsv.tokens[j]));
1014 }
1015 }
1016 return i;
1017 }
1018
1019 void accept(Visitor &v) override;
1020
1021 std::vector<std::shared_ptr<Ope>> opes_;
1022 };
1023
1024 class PrioritizedChoice : public Ope {
1025 public:
1026 template <typename... Args>
1027 PrioritizedChoice(bool for_label, const Args &... args)
1028 : opes_{static_cast<std::shared_ptr<Ope>>(args)...},
1029 for_label_(for_label) {}
1030 PrioritizedChoice(const std::vector<std::shared_ptr<Ope>> &opes)
1031 : opes_(opes) {}
1032 PrioritizedChoice(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
1033
1034 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1035 std::any &dt) const override {
1036 size_t len = static_cast<size_t>(-1);
1037
1038 if (!for_label_) { c.cut_stack.push_back(false); }
1039
1040 size_t id = 0;
1041 for (const auto &ope : opes_) {
1042 if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
1043
1044 auto &chldsv = c.push();
1045 c.push_capture_scope();
1046
1047 auto se = scope_exit([&]() {
1048 c.pop();
1049 c.pop_capture_scope();
1050 });
1051
1052 len = ope->parse(s, n, chldsv, c, dt);
1053
1054 if (success(len)) {
1055 if (!chldsv.empty()) {
1056 for (size_t i = 0; i < chldsv.size(); i++) {
1057 vs.emplace_back(std::move(chldsv[i]));
1058 }
1059 }
1060 if (!chldsv.tags.empty()) {
1061 for (size_t i = 0; i < chldsv.tags.size(); i++) {
1062 vs.tags.emplace_back(std::move(chldsv.tags[i]));
1063 }
1064 }
1065 vs.sv_ = chldsv.sv_;
1066 vs.choice_count_ = opes_.size();
1067 vs.choice_ = id;
1068 if (!chldsv.tokens.empty()) {
1069 for (size_t i = 0; i < chldsv.tokens.size(); i++) {
1070 vs.tokens.emplace_back(std::move(chldsv.tokens[i]));
1071 }
1072 }
1073 c.shift_capture_values();
1074 break;
1075 } else if (!c.cut_stack.empty() && c.cut_stack.back()) {
1076 break;
1077 }
1078
1079 id++;
1080 }
1081
1082 if (!for_label_) { c.cut_stack.pop_back(); }
1083
1084 return len;
1085 }
1086
1087 void accept(Visitor &v) override;
1088
1089 size_t size() const { return opes_.size(); }
1090
1091 std::vector<std::shared_ptr<Ope>> opes_;
1092 bool for_label_ = false;
1093 };
1094
1095 class Repetition : public Ope {
1096 public:
1097 Repetition(const std::shared_ptr<Ope> &ope, size_t min, size_t max)
1098 : ope_(ope), min_(min), max_(max) {}
1099
1100 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1101 std::any &dt) const override {
1102 size_t count = 0;
1103 size_t i = 0;
1104 while (count < min_) {
1105 c.push_capture_scope();
1106 auto se = scope_exit([&]() { c.pop_capture_scope(); });
1107 const auto &rule = *ope_;
1108 auto len = rule.parse(s + i, n - i, vs, c, dt);
1109 if (success(len)) {
1110 c.shift_capture_values();
1111 } else {
1112 return len;
1113 }
1114 i += len;
1115 count++;
1116 }
1117
1118 while (n - i > 0 && count < max_) {
1119 c.push_capture_scope();
1120 auto se = scope_exit([&]() { c.pop_capture_scope(); });
1121 auto save_sv_size = vs.size();
1122 auto save_tok_size = vs.tokens.size();
1123 const auto &rule = *ope_;
1124 auto len = rule.parse(s + i, n - i, vs, c, dt);
1125 if (success(len)) {
1126 c.shift_capture_values();
1127 } else {
1128 if (vs.size() != save_sv_size) {
1129 vs.erase(vs.begin() + static_cast<std::ptrdiff_t>(save_sv_size));
1130 vs.tags.erase(vs.tags.begin() +
1131 static_cast<std::ptrdiff_t>(save_sv_size));
1132 }
1133 if (vs.tokens.size() != save_tok_size) {
1134 vs.tokens.erase(vs.tokens.begin() +
1135 static_cast<std::ptrdiff_t>(save_tok_size));
1136 }
1137 break;
1138 }
1139 i += len;
1140 count++;
1141 }
1142 return i;
1143 }
1144
1145 void accept(Visitor &v) override;
1146
1147 bool is_zom() const {
1148 return min_ == 0 && max_ == std::numeric_limits<size_t>::max();
1149 }
1150
1151 static std::shared_ptr<Repetition> zom(const std::shared_ptr<Ope> &ope) {
1152 return std::make_shared<Repetition>(ope, 0,
1153 std::numeric_limits<size_t>::max());
1154 }
1155
1156 static std::shared_ptr<Repetition> oom(const std::shared_ptr<Ope> &ope) {
1157 return std::make_shared<Repetition>(ope, 1,
1158 std::numeric_limits<size_t>::max());
1159 }
1160
1161 static std::shared_ptr<Repetition> opt(const std::shared_ptr<Ope> &ope) {
1162 return std::make_shared<Repetition>(ope, 0, 1);
1163 }
1164
1165 std::shared_ptr<Ope> ope_;
1166 size_t min_;
1167 size_t max_;
1168 };
1169
1170 class AndPredicate : public Ope {
1171 public:
1172 AndPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1173
1174 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1175 Context &c, std::any &dt) const override {
1176 auto &chldsv = c.push();
1177 c.push_capture_scope();
1178 auto se = scope_exit([&]() {
1179 c.pop();
1180 c.pop_capture_scope();
1181 });
1182 const auto &rule = *ope_;
1183 auto len = rule.parse(s, n, chldsv, c, dt);
1184 if (success(len)) {
1185 return 0;
1186 } else {
1187 return len;
1188 }
1189 }
1190
1191 void accept(Visitor &v) override;
1192
1193 std::shared_ptr<Ope> ope_;
1194 };
1195
1196 class NotPredicate : public Ope {
1197 public:
1198 NotPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1199
1200 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1201 Context &c, std::any &dt) const override {
1202 auto &chldsv = c.push();
1203 c.push_capture_scope();
1204 auto se = scope_exit([&]() {
1205 c.pop();
1206 c.pop_capture_scope();
1207 });
1208 auto len = ope_->parse(s, n, chldsv, c, dt);
1209 if (success(len)) {
1210 c.set_error_pos(s);
1211 return static_cast<size_t>(-1);
1212 } else {
1213 return 0;
1214 }
1215 }
1216
1217 void accept(Visitor &v) override;
1218
1219 std::shared_ptr<Ope> ope_;
1220 };
1221
1222 class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
1223 public:
1224 Dictionary(const std::vector<std::string> &v) : trie_(v) {}
1225
1226 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1227 std::any &dt) const override;
1228
1229 void accept(Visitor &v) override;
1230
1231 Trie trie_;
1232 };
1233
1234 class LiteralString : public Ope,
1235 public std::enable_shared_from_this<LiteralString> {
1236 public:
1237 LiteralString(std::string &&s, bool ignore_case)
1238 : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1239
1240 LiteralString(const std::string &s, bool ignore_case)
1241 : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1242
1243 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1244 std::any &dt) const override;
1245
1246 void accept(Visitor &v) override;
1247
1248 std::string lit_;
1249 bool ignore_case_;
1250 mutable std::once_flag init_is_word_;
1251 mutable bool is_word_;
1252 };
1253
1254 class CharacterClass : public Ope,
1255 public std::enable_shared_from_this<CharacterClass> {
1256 public:
1257 CharacterClass(const std::string &s, bool negated) : negated_(negated) {
1258 auto chars = decode(s.data(), s.length());
1259 auto i = 0u;
1260 while (i < chars.size()) {
1261 if (i + 2 < chars.size() && chars[i + 1] == '-') {
1262 auto cp1 = chars[i];
1263 auto cp2 = chars[i + 2];
1264 ranges_.emplace_back(std::pair(cp1, cp2));
1265 i += 3;
1266 } else {
1267 auto cp = chars[i];
1268 ranges_.emplace_back(std::pair(cp, cp));
1269 i += 1;
1270 }
1271 }
1272 assert(!ranges_.empty());
1273 }
1274
1275 CharacterClass(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1276 bool negated)
1277 : ranges_(ranges), negated_(negated) {
1278 assert(!ranges_.empty());
1279 }
1280
1281 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1282 Context &c, std::any & /*dt*/) const override {
1283 if (n < 1) {
1284 c.set_error_pos(s);
1285 return static_cast<size_t>(-1);
1286 }
1287
1288 char32_t cp = 0;
1289 auto len = decode_codepoint(s, n, cp);
1290
1291 for (const auto &range : ranges_) {
1292 if (range.first <= cp && cp <= range.second) {
1293 if (negated_) {
1294 c.set_error_pos(s);
1295 return static_cast<size_t>(-1);
1296 } else {
1297 return len;
1298 }
1299 }
1300 }
1301
1302 if (negated_) {
1303 return len;
1304 } else {
1305 c.set_error_pos(s);
1306 return static_cast<size_t>(-1);
1307 }
1308 }
1309
1310 void accept(Visitor &v) override;
1311
1312 std::vector<std::pair<char32_t, char32_t>> ranges_;
1313 bool negated_;
1314 };
1315
1316 class Character : public Ope, public std::enable_shared_from_this<Character> {
1317 public:
1318 Character(char ch) : ch_(ch) {}
1319
1320 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1321 Context &c, std::any & /*dt*/) const override {
1322 if (n < 1 || s[0] != ch_) {
1323 c.set_error_pos(s);
1324 return static_cast<size_t>(-1);
1325 }
1326 return 1;
1327 }
1328
1329 void accept(Visitor &v) override;
1330
1331 char ch_;
1332 };
1333
1334 class AnyCharacter : public Ope,
1335 public std::enable_shared_from_this<AnyCharacter> {
1336 public:
1337 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1338 Context &c, std::any & /*dt*/) const override {
1339 auto len = codepoint_length(s, n);
1340 if (len < 1) {
1341 c.set_error_pos(s);
1342 return static_cast<size_t>(-1);
1343 }
1344 return len;
1345 }
1346
1347 void accept(Visitor &v) override;
1348 };
1349
1350 class CaptureScope : public Ope {
1351 public:
1352 CaptureScope(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1353
1354 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1355 std::any &dt) const override {
1356 c.push_capture_scope();
1357 auto se = scope_exit([&]() { c.pop_capture_scope(); });
1358 const auto &rule = *ope_;
1359 auto len = rule.parse(s, n, vs, c, dt);
1360 return len;
1361 }
1362
1363 void accept(Visitor &v) override;
1364
1365 std::shared_ptr<Ope> ope_;
1366 };
1367
1368 class Capture : public Ope {
1369 public:
1370 using MatchAction = std::function<void(const char *s, size_t n, Context &c)>;
1371
1372 Capture(const std::shared_ptr<Ope> &ope, MatchAction ma)
1373 : ope_(ope), match_action_(ma) {}
1374
1375 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1376 std::any &dt) const override {
1377 const auto &rule = *ope_;
1378 auto len = rule.parse(s, n, vs, c, dt);
1379 if (success(len) && match_action_) { match_action_(s, len, c); }
1380 return len;
1381 }
1382
1383 void accept(Visitor &v) override;
1384
1385 std::shared_ptr<Ope> ope_;
1386 MatchAction match_action_;
1387 };
1388
1389 class TokenBoundary : public Ope {
1390 public:
1391 TokenBoundary(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1392
1393 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1394 std::any &dt) const override;
1395
1396 void accept(Visitor &v) override;
1397
1398 std::shared_ptr<Ope> ope_;
1399 };
1400
1401 class Ignore : public Ope {
1402 public:
1403 Ignore(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1404
1405 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1406 Context &c, std::any &dt) const override {
1407 const auto &rule = *ope_;
1408 auto &chldsv = c.push();
1409 auto se = scope_exit([&]() { c.pop(); });
1410 return rule.parse(s, n, chldsv, c, dt);
1411 }
1412
1413 void accept(Visitor &v) override;
1414
1415 std::shared_ptr<Ope> ope_;
1416 };
1417
1418 using Parser = std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1419 std::any &dt)>;
1420
1421 class User : public Ope {
1422 public:
1423 User(Parser fn) : fn_(fn) {}
1424 size_t parse_core(const char *s, size_t n, SemanticValues &vs,
1425 Context & /*c*/, std::any &dt) const override {
1426 assert(fn_);
1427 return fn_(s, n, vs, dt);
1428 }
1429 void accept(Visitor &v) override;
1430 std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1431 std::any &dt)>
1432 fn_;
1433 };
1434
1435 class WeakHolder : public Ope {
1436 public:
1437 WeakHolder(const std::shared_ptr<Ope> &ope) : weak_(ope) {}
1438
1439 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1440 std::any &dt) const override {
1441 auto ope = weak_.lock();
1442 assert(ope);
1443 const auto &rule = *ope;
1444 return rule.parse(s, n, vs, c, dt);
1445 }
1446
1447 void accept(Visitor &v) override;
1448
1449 std::weak_ptr<Ope> weak_;
1450 };
1451
1452 class Holder : public Ope {
1453 public:
1454 Holder(Definition *outer) : outer_(outer) {}
1455
1456 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1457 std::any &dt) const override;
1458
1459 void accept(Visitor &v) override;
1460
1461 std::any reduce(SemanticValues &vs, std::any &dt) const;
1462
1463 const char *trace_name() const;
1464
1465 std::shared_ptr<Ope> ope_;
1466 Definition *outer_;
1467 mutable std::string trace_name_;
1468
1469 friend class Definition;
1470 };
1471
1472 using Grammar = std::unordered_map<std::string, Definition>;
1473
1474 class Reference : public Ope, public std::enable_shared_from_this<Reference> {
1475 public:
1476 Reference(const Grammar &grammar, const std::string &name, const char *s,
1477 bool is_macro, const std::vector<std::shared_ptr<Ope>> &args)
1478 : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args),
1479 rule_(nullptr), iarg_(0) {}
1480
1481 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1482 std::any &dt) const override;
1483
1484 void accept(Visitor &v) override;
1485
1486 std::shared_ptr<Ope> get_core_operator() const;
1487
1488 const Grammar &grammar_;
1489 const std::string name_;
1490 const char *s_;
1491
1492 const bool is_macro_;
1493 const std::vector<std::shared_ptr<Ope>> args_;
1494
1495 Definition *rule_;
1496 size_t iarg_;
1497 };
1498
1499 class Whitespace : public Ope {
1500 public:
1501 Whitespace(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1502
1503 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1504 std::any &dt) const override {
1505 if (c.in_whitespace) { return 0; }
1506 c.in_whitespace = true;
1507 auto se = scope_exit([&]() { c.in_whitespace = false; });
1508 const auto &rule = *ope_;
1509 return rule.parse(s, n, vs, c, dt);
1510 }
1511
1512 void accept(Visitor &v) override;
1513
1514 std::shared_ptr<Ope> ope_;
1515 };
1516
1517 class BackReference : public Ope {
1518 public:
1519 BackReference(std::string &&name) : name_(name) {}
1520
1521 BackReference(const std::string &name) : name_(name) {}
1522
1523 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1524 std::any &dt) const override;
1525
1526 void accept(Visitor &v) override;
1527
1528 std::string name_;
1529 };
1530
1531 class PrecedenceClimbing : public Ope {
1532 public:
1533 using BinOpeInfo = std::map<std::string_view, std::pair<size_t, char>>;
1534
1535 PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
1536 const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
1537 const Definition &rule)
1538 : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
1539
1540 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1541 std::any &dt) const override {
1542 return parse_expression(s, n, vs, c, dt, 0);
1543 }
1544
1545 void accept(Visitor &v) override;
1546
1547 std::shared_ptr<Ope> atom_;
1548 std::shared_ptr<Ope> binop_;
1549 BinOpeInfo info_;
1550 const Definition &rule_;
1551
1552 private:
1553 size_t parse_expression(const char *s, size_t n, SemanticValues &vs,
1554 Context &c, std::any &dt, size_t min_prec) const;
1555
1556 Definition &get_reference_for_binop(Context &c) const;
1557 };
1558
1559 class Recovery : public Ope {
1560 public:
1561 Recovery(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1562
1563 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1564 std::any &dt) const override;
1565
1566 void accept(Visitor &v) override;
1567
1568 std::shared_ptr<Ope> ope_;
1569 };
1570
1571 class Cut : public Ope, public std::enable_shared_from_this<Cut> {
1572 public:
1573 size_t parse_core(const char * /*s*/, size_t /*n*/, SemanticValues & /*vs*/,
1574 Context &c, std::any & /*dt*/) const override {
1575 c.cut_stack.back() = true;
1576 return 0;
1577 }
1578
1579 void accept(Visitor &v) override;
1580 };
1581
1582 /*
1583 * Factories
1584 */
1585 template <typename... Args> std::shared_ptr<Ope> seq(Args &&... args) {
1586 return std::make_shared<Sequence>(static_cast<std::shared_ptr<Ope>>(args)...);
1587 }
1588
1589 template <typename... Args> std::shared_ptr<Ope> cho(Args &&... args) {
1590 return std::make_shared<PrioritizedChoice>(
1591 false, static_cast<std::shared_ptr<Ope>>(args)...);
1592 }
1593
1594 template <typename... Args> std::shared_ptr<Ope> cho4label_(Args &&... args) {
1595 return std::make_shared<PrioritizedChoice>(
1596 true, static_cast<std::shared_ptr<Ope>>(args)...);
1597 }
1598
1599 inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope> &ope) {
1600 return Repetition::zom(ope);
1601 }
1602
1603 inline std::shared_ptr<Ope> oom(const std::shared_ptr<Ope> &ope) {
1604 return Repetition::oom(ope);
1605 }
1606
1607 inline std::shared_ptr<Ope> opt(const std::shared_ptr<Ope> &ope) {
1608 return Repetition::opt(ope);
1609 }
1610
1611 inline std::shared_ptr<Ope> rep(const std::shared_ptr<Ope> &ope, size_t min,
1612 size_t max) {
1613 return std::make_shared<Repetition>(ope, min, max);
1614 }
1615
1616 inline std::shared_ptr<Ope> apd(const std::shared_ptr<Ope> &ope) {
1617 return std::make_shared<AndPredicate>(ope);
1618 }
1619
1620 inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope> &ope) {
1621 return std::make_shared<NotPredicate>(ope);
1622 }
1623
1624 inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v) {
1625 return std::make_shared<Dictionary>(v);
1626 }
1627
1628 inline std::shared_ptr<Ope> lit(std::string &&s) {
1629 return std::make_shared<LiteralString>(s, false);
1630 }
1631
1632 inline std::shared_ptr<Ope> liti(std::string &&s) {
1633 return std::make_shared<LiteralString>(s, true);
1634 }
1635
1636 inline std::shared_ptr<Ope> cls(const std::string &s) {
1637 return std::make_shared<CharacterClass>(s, false);
1638 }
1639
1640 inline std::shared_ptr<Ope>
1641 cls(const std::vector<std::pair<char32_t, char32_t>> &ranges) {
1642 return std::make_shared<CharacterClass>(ranges, false);
1643 }
1644
1645 inline std::shared_ptr<Ope> ncls(const std::string &s) {
1646 return std::make_shared<CharacterClass>(s, true);
1647 }
1648
1649 inline std::shared_ptr<Ope>
1650 ncls(const std::vector<std::pair<char32_t, char32_t>> &ranges) {
1651 return std::make_shared<CharacterClass>(ranges, true);
1652 }
1653
1654 inline std::shared_ptr<Ope> chr(char dt) {
1655 return std::make_shared<Character>(dt);
1656 }
1657
1658 inline std::shared_ptr<Ope> dot() { return std::make_shared<AnyCharacter>(); }
1659
1660 inline std::shared_ptr<Ope> csc(const std::shared_ptr<Ope> &ope) {
1661 return std::make_shared<CaptureScope>(ope);
1662 }
1663
1664 inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope> &ope,
1665 Capture::MatchAction ma) {
1666 return std::make_shared<Capture>(ope, ma);
1667 }
1668
1669 inline std::shared_ptr<Ope> tok(const std::shared_ptr<Ope> &ope) {
1670 return std::make_shared<TokenBoundary>(ope);
1671 }
1672
1673 inline std::shared_ptr<Ope> ign(const std::shared_ptr<Ope> &ope) {
1674 return std::make_shared<Ignore>(ope);
1675 }
1676
1677 inline std::shared_ptr<Ope>
1678 usr(std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1679 std::any &dt)>
1680 fn) {
1681 return std::make_shared<User>(fn);
1682 }
1683
1684 inline std::shared_ptr<Ope> ref(const Grammar &grammar, const std::string &name,
1685 const char *s, bool is_macro,
1686 const std::vector<std::shared_ptr<Ope>> &args) {
1687 return std::make_shared<Reference>(grammar, name, s, is_macro, args);
1688 }
1689
1690 inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope> &ope) {
1691 return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
1692 }
1693
1694 inline std::shared_ptr<Ope> bkr(std::string &&name) {
1695 return std::make_shared<BackReference>(name);
1696 }
1697
1698 inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
1699 const std::shared_ptr<Ope> &binop,
1700 const PrecedenceClimbing::BinOpeInfo &info,
1701 const Definition &rule) {
1702 return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
1703 }
1704
1705 inline std::shared_ptr<Ope> rec(const std::shared_ptr<Ope> &ope) {
1706 return std::make_shared<Recovery>(ope);
1707 }
1708
1709 inline std::shared_ptr<Ope> cut() { return std::make_shared<Cut>(); }
1710
1711 /*
1712 * Visitor
1713 */
1714 struct Ope::Visitor {
1715 virtual ~Visitor() {}
1716 virtual void visit(Sequence &) {}
1717 virtual void visit(PrioritizedChoice &) {}
1718 virtual void visit(Repetition &) {}
1719 virtual void visit(AndPredicate &) {}
1720 virtual void visit(NotPredicate &) {}
1721 virtual void visit(Dictionary &) {}
1722 virtual void visit(LiteralString &) {}
1723 virtual void visit(CharacterClass &) {}
1724 virtual void visit(Character &) {}
1725 virtual void visit(AnyCharacter &) {}
1726 virtual void visit(CaptureScope &) {}
1727 virtual void visit(Capture &) {}
1728 virtual void visit(TokenBoundary &) {}
1729 virtual void visit(Ignore &) {}
1730 virtual void visit(User &) {}
1731 virtual void visit(WeakHolder &) {}
1732 virtual void visit(Holder &) {}
1733 virtual void visit(Reference &) {}
1734 virtual void visit(Whitespace &) {}
1735 virtual void visit(BackReference &) {}
1736 virtual void visit(PrecedenceClimbing &) {}
1737 virtual void visit(Recovery &) {}
1738 virtual void visit(Cut &) {}
1739 };
1740
1741 struct IsReference : public Ope::Visitor {
1742 void visit(Reference &) override { is_reference_ = true; }
1743
1744 static bool check(Ope &ope) {
1745 IsReference vis;
1746 ope.accept(vis);
1747 return vis.is_reference_;
1748 }
1749
1750 private:
1751 bool is_reference_ = false;
1752 };
1753
1754 struct TraceOpeName : public Ope::Visitor {
1755 void visit(Sequence &) override { name_ = "Sequence"; }
1756 void visit(PrioritizedChoice &) override { name_ = "PrioritizedChoice"; }
1757 void visit(Repetition &) override { name_ = "Repetition"; }
1758 void visit(AndPredicate &) override { name_ = "AndPredicate"; }
1759 void visit(NotPredicate &) override { name_ = "NotPredicate"; }
1760 void visit(Dictionary &) override { name_ = "Dictionary"; }
1761 void visit(LiteralString &) override { name_ = "LiteralString"; }
1762 void visit(CharacterClass &) override { name_ = "CharacterClass"; }
1763 void visit(Character &) override { name_ = "Character"; }
1764 void visit(AnyCharacter &) override { name_ = "AnyCharacter"; }
1765 void visit(CaptureScope &) override { name_ = "CaptureScope"; }
1766 void visit(Capture &) override { name_ = "Capture"; }
1767 void visit(TokenBoundary &) override { name_ = "TokenBoundary"; }
1768 void visit(Ignore &) override { name_ = "Ignore"; }
1769 void visit(User &) override { name_ = "User"; }
1770 void visit(WeakHolder &) override { name_ = "WeakHolder"; }
1771 void visit(Holder &ope) override { name_ = ope.trace_name(); }
1772 void visit(Reference &) override { name_ = "Reference"; }
1773 void visit(Whitespace &) override { name_ = "Whitespace"; }
1774 void visit(BackReference &) override { name_ = "BackReference"; }
1775 void visit(PrecedenceClimbing &) override { name_ = "PrecedenceClimbing"; }
1776 void visit(Recovery &) override { name_ = "Recovery"; }
1777 void visit(Cut &) override { name_ = "Cut"; }
1778
1779 static std::string get(Ope &ope) {
1780 TraceOpeName vis;
1781 ope.accept(vis);
1782 return vis.name_;
1783 }
1784
1785 private:
1786 const char *name_ = nullptr;
1787 };
1788
1789 struct AssignIDToDefinition : public Ope::Visitor {
1790 void visit(Sequence &ope) override {
1791 for (auto op : ope.opes_) {
1792 op->accept(*this);
1793 }
1794 }
1795 void visit(PrioritizedChoice &ope) override {
1796 for (auto op : ope.opes_) {
1797 op->accept(*this);
1798 }
1799 }
1800 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1801 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
1802 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
1803 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1804 void visit(Capture &ope) override { ope.ope_->accept(*this); }
1805 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1806 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1807 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1808 void visit(Holder &ope) override;
1809 void visit(Reference &ope) override;
1810 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1811 void visit(PrecedenceClimbing &ope) override;
1812 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1813
1814 std::unordered_map<void *, size_t> ids;
1815 };
1816
1817 struct IsLiteralToken : public Ope::Visitor {
1818 void visit(PrioritizedChoice &ope) override {
1819 for (auto op : ope.opes_) {
1820 if (!IsLiteralToken::check(*op)) { return; }
1821 }
1822 result_ = true;
1823 }
1824
1825 void visit(Dictionary &) override { result_ = true; }
1826 void visit(LiteralString &) override { result_ = true; }
1827
1828 static bool check(Ope &ope) {
1829 IsLiteralToken vis;
1830 ope.accept(vis);
1831 return vis.result_;
1832 }
1833
1834 private:
1835 bool result_ = false;
1836 };
1837
1838 struct TokenChecker : public Ope::Visitor {
1839 void visit(Sequence &ope) override {
1840 for (auto op : ope.opes_) {
1841 op->accept(*this);
1842 }
1843 }
1844 void visit(PrioritizedChoice &ope) override {
1845 for (auto op : ope.opes_) {
1846 op->accept(*this);
1847 }
1848 }
1849 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1850 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1851 void visit(Capture &ope) override { ope.ope_->accept(*this); }
1852 void visit(TokenBoundary &) override { has_token_boundary_ = true; }
1853 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1854 void visit(WeakHolder &) override { has_rule_ = true; }
1855 void visit(Holder &ope) override { ope.ope_->accept(*this); }
1856 void visit(Reference &ope) override;
1857 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1858 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1859 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1860
1861 static bool is_token(Ope &ope) {
1862 if (IsLiteralToken::check(ope)) { return true; }
1863
1864 TokenChecker vis;
1865 ope.accept(vis);
1866 return vis.has_token_boundary_ || !vis.has_rule_;
1867 }
1868
1869 private:
1870 bool has_token_boundary_ = false;
1871 bool has_rule_ = false;
1872 };
1873
1874 struct FindLiteralToken : public Ope::Visitor {
1875 void visit(LiteralString &ope) override { token_ = ope.lit_.c_str(); }
1876 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1877 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1878 void visit(Reference &ope) override;
1879 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1880
1881 static const char *token(Ope &ope) {
1882 FindLiteralToken vis;
1883 ope.accept(vis);
1884 return vis.token_;
1885 }
1886
1887 private:
1888 const char *token_ = nullptr;
1889 };
1890
1891 struct DetectLeftRecursion : public Ope::Visitor {
1892 DetectLeftRecursion(const std::string &name) : name_(name) {}
1893
1894 void visit(Sequence &ope) override {
1895 for (auto op : ope.opes_) {
1896 op->accept(*this);
1897 if (done_) {
1898 break;
1899 } else if (error_s) {
1900 done_ = true;
1901 break;
1902 }
1903 }
1904 }
1905 void visit(PrioritizedChoice &ope) override {
1906 for (auto op : ope.opes_) {
1907 op->accept(*this);
1908 if (error_s) {
1909 done_ = true;
1910 break;
1911 }
1912 }
1913 }
1914 void visit(Repetition &ope) override {
1915 ope.ope_->accept(*this);
1916 done_ = ope.min_ > 0;
1917 }
1918 void visit(AndPredicate &ope) override {
1919 ope.ope_->accept(*this);
1920 done_ = false;
1921 }
1922 void visit(NotPredicate &ope) override {
1923 ope.ope_->accept(*this);
1924 done_ = false;
1925 }
1926 void visit(Dictionary &) override { done_ = true; }
1927 void visit(LiteralString &ope) override { done_ = !ope.lit_.empty(); }
1928 void visit(CharacterClass &) override { done_ = true; }
1929 void visit(Character &) override { done_ = true; }
1930 void visit(AnyCharacter &) override { done_ = true; }
1931 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1932 void visit(Capture &ope) override { ope.ope_->accept(*this); }
1933 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1934 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1935 void visit(User &) override { done_ = true; }
1936 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1937 void visit(Holder &ope) override { ope.ope_->accept(*this); }
1938 void visit(Reference &ope) override;
1939 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1940 void visit(BackReference &) override { done_ = true; }
1941 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1942 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1943 void visit(Cut &) override { done_ = true; }
1944
1945 const char *error_s = nullptr;
1946
1947 private:
1948 std::string name_;
1949 std::set<std::string> refs_;
1950 bool done_ = false;
1951 };
1952
1953 struct HasEmptyElement : public Ope::Visitor {
1954 HasEmptyElement(std::list<std::pair<const char *, std::string>> &refs)
1955 : refs_(refs) {}
1956
1957 void visit(Sequence &ope) override {
1958 auto save_is_empty = false;
1959 const char *save_error_s = nullptr;
1960 std::string save_error_name;
1961 for (auto op : ope.opes_) {
1962 op->accept(*this);
1963 if (!is_empty) { return; }
1964 save_is_empty = is_empty;
1965 save_error_s = error_s;
1966 save_error_name = error_name;
1967 is_empty = false;
1968 error_name.clear();
1969 }
1970 is_empty = save_is_empty;
1971 error_s = save_error_s;
1972 error_name = save_error_name;
1973 }
1974 void visit(PrioritizedChoice &ope) override {
1975 for (auto op : ope.opes_) {
1976 op->accept(*this);
1977 if (is_empty) { return; }
1978 }
1979 }
1980 void visit(Repetition &ope) override {
1981 if (ope.min_ == 0) {
1982 set_error();
1983 } else {
1984 ope.ope_->accept(*this);
1985 }
1986 }
1987 void visit(AndPredicate &) override { set_error(); }
1988 void visit(NotPredicate &) override { set_error(); }
1989 void visit(LiteralString &ope) override {
1990 if (ope.lit_.empty()) { set_error(); }
1991 }
1992 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1993 void visit(Capture &ope) override { ope.ope_->accept(*this); }
1994 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1995 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1996 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1997 void visit(Holder &ope) override { ope.ope_->accept(*this); }
1998 void visit(Reference &ope) override;
1999 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2000 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2001 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2002
2003 bool is_empty = false;
2004 const char *error_s = nullptr;
2005 std::string error_name;
2006
2007 private:
2008 void set_error() {
2009 is_empty = true;
2010 tie(error_s, error_name) = refs_.back();
2011 }
2012 std::list<std::pair<const char *, std::string>> &refs_;
2013 };
2014
2015 struct DetectInfiniteLoop : public Ope::Visitor {
2016 DetectInfiniteLoop(const char *s, const std::string &name) {
2017 refs_.emplace_back(s, name);
2018 }
2019
2020 void visit(Sequence &ope) override {
2021 for (auto op : ope.opes_) {
2022 op->accept(*this);
2023 if (has_error) { return; }
2024 }
2025 }
2026 void visit(PrioritizedChoice &ope) override {
2027 for (auto op : ope.opes_) {
2028 op->accept(*this);
2029 if (has_error) { return; }
2030 }
2031 }
2032 void visit(Repetition &ope) override {
2033 if (ope.max_ == std::numeric_limits<size_t>::max()) {
2034 HasEmptyElement vis(refs_);
2035 ope.ope_->accept(vis);
2036 if (vis.is_empty) {
2037 has_error = true;
2038 error_s = vis.error_s;
2039 error_name = vis.error_name;
2040 }
2041 } else {
2042 ope.ope_->accept(*this);
2043 }
2044 }
2045 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2046 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2047 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2048 void visit(Capture &ope) override { ope.ope_->accept(*this); }
2049 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2050 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2051 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2052 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2053 void visit(Reference &ope) override;
2054 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2055 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2056 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2057
2058 bool has_error = false;
2059 const char *error_s = nullptr;
2060 std::string error_name;
2061
2062 private:
2063 std::list<std::pair<const char *, std::string>> refs_;
2064 };
2065
2066 struct ReferenceChecker : public Ope::Visitor {
2067 ReferenceChecker(const Grammar &grammar,
2068 const std::vector<std::string> ¶ms)
2069 : grammar_(grammar), params_(params) {}
2070
2071 void visit(Sequence &ope) override {
2072 for (auto op : ope.opes_) {
2073 op->accept(*this);
2074 }
2075 }
2076 void visit(PrioritizedChoice &ope) override {
2077 for (auto op : ope.opes_) {
2078 op->accept(*this);
2079 }
2080 }
2081 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2082 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2083 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2084 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2085 void visit(Capture &ope) override { ope.ope_->accept(*this); }
2086 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2087 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2088 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2089 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2090 void visit(Reference &ope) override;
2091 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2092 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2093 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2094
2095 std::unordered_map<std::string, const char *> error_s;
2096 std::unordered_map<std::string, std::string> error_message;
2097 std::unordered_set<std::string> referenced;
2098
2099 private:
2100 const Grammar &grammar_;
2101 const std::vector<std::string> ¶ms_;
2102 };
2103
2104 struct LinkReferences : public Ope::Visitor {
2105 LinkReferences(Grammar &grammar, const std::vector<std::string> ¶ms)
2106 : grammar_(grammar), params_(params) {}
2107
2108 void visit(Sequence &ope) override {
2109 for (auto op : ope.opes_) {
2110 op->accept(*this);
2111 }
2112 }
2113 void visit(PrioritizedChoice &ope) override {
2114 for (auto op : ope.opes_) {
2115 op->accept(*this);
2116 }
2117 }
2118 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2119 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2120 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2121 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2122 void visit(Capture &ope) override { ope.ope_->accept(*this); }
2123 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2124 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2125 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2126 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2127 void visit(Reference &ope) override;
2128 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2129 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2130 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2131
2132 private:
2133 Grammar &grammar_;
2134 const std::vector<std::string> ¶ms_;
2135 };
2136
2137 struct FindReference : public Ope::Visitor {
2138 FindReference(const std::vector<std::shared_ptr<Ope>> &args,
2139 const std::vector<std::string> ¶ms)
2140 : args_(args), params_(params) {}
2141
2142 void visit(Sequence &ope) override {
2143 std::vector<std::shared_ptr<Ope>> opes;
2144 for (auto o : ope.opes_) {
2145 o->accept(*this);
2146 opes.push_back(found_ope);
2147 }
2148 found_ope = std::make_shared<Sequence>(opes);
2149 }
2150 void visit(PrioritizedChoice &ope) override {
2151 std::vector<std::shared_ptr<Ope>> opes;
2152 for (auto o : ope.opes_) {
2153 o->accept(*this);
2154 opes.push_back(found_ope);
2155 }
2156 found_ope = std::make_shared<PrioritizedChoice>(opes);
2157 }
2158 void visit(Repetition &ope) override {
2159 ope.ope_->accept(*this);
2160 found_ope = rep(found_ope, ope.min_, ope.max_);
2161 }
2162 void visit(AndPredicate &ope) override {
2163 ope.ope_->accept(*this);
2164 found_ope = apd(found_ope);
2165 }
2166 void visit(NotPredicate &ope) override {
2167 ope.ope_->accept(*this);
2168 found_ope = npd(found_ope);
2169 }
2170 void visit(Dictionary &ope) override { found_ope = ope.shared_from_this(); }
2171 void visit(LiteralString &ope) override {
2172 found_ope = ope.shared_from_this();
2173 }
2174 void visit(CharacterClass &ope) override {
2175 found_ope = ope.shared_from_this();
2176 }
2177 void visit(Character &ope) override { found_ope = ope.shared_from_this(); }
2178 void visit(AnyCharacter &ope) override { found_ope = ope.shared_from_this(); }
2179 void visit(CaptureScope &ope) override {
2180 ope.ope_->accept(*this);
2181 found_ope = csc(found_ope);
2182 }
2183 void visit(Capture &ope) override {
2184 ope.ope_->accept(*this);
2185 found_ope = cap(found_ope, ope.match_action_);
2186 }
2187 void visit(TokenBoundary &ope) override {
2188 ope.ope_->accept(*this);
2189 found_ope = tok(found_ope);
2190 }
2191 void visit(Ignore &ope) override {
2192 ope.ope_->accept(*this);
2193 found_ope = ign(found_ope);
2194 }
2195 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2196 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2197 void visit(Reference &ope) override;
2198 void visit(Whitespace &ope) override {
2199 ope.ope_->accept(*this);
2200 found_ope = wsp(found_ope);
2201 }
2202 void visit(PrecedenceClimbing &ope) override {
2203 ope.atom_->accept(*this);
2204 found_ope = csc(found_ope);
2205 }
2206 void visit(Recovery &ope) override {
2207 ope.ope_->accept(*this);
2208 found_ope = rec(found_ope);
2209 }
2210 void visit(Cut &ope) override { found_ope = ope.shared_from_this(); }
2211
2212 std::shared_ptr<Ope> found_ope;
2213
2214 private:
2215 const std::vector<std::shared_ptr<Ope>> &args_;
2216 const std::vector<std::string> ¶ms_;
2217 };
2218
2219 struct IsPrioritizedChoice : public Ope::Visitor {
2220 void visit(PrioritizedChoice &) override { result_ = true; }
2221
2222 static bool check(Ope &ope) {
2223 IsPrioritizedChoice vis;
2224 ope.accept(vis);
2225 return vis.result_;
2226 }
2227
2228 private:
2229 bool result_ = false;
2230 };
2231
2232 /*
2233 * Keywords
2234 */
2235 static const char *WHITESPACE_DEFINITION_NAME = "%whitespace";
2236 static const char *WORD_DEFINITION_NAME = "%word";
2237 static const char *RECOVER_DEFINITION_NAME = "%recover";
2238
2239 /*
2240 * Definition
2241 */
2242 class Definition {
2243 public:
2244 struct Result {
2245 bool ret;
2246 bool recovered;
2247 size_t len;
2248 ErrorInfo error_info;
2249 };
2250
2251 Definition() : holder_(std::make_shared<Holder>(this)) {}
2252
2253 Definition(const Definition &rhs) : name(rhs.name), holder_(rhs.holder_) {
2254 holder_->outer_ = this;
2255 }
2256
2257 Definition(const std::shared_ptr<Ope> &ope)
2258 : holder_(std::make_shared<Holder>(this)) {
2259 *this <= ope;
2260 }
2261
2262 operator std::shared_ptr<Ope>() {
2263 return std::make_shared<WeakHolder>(holder_);
2264 }
2265
2266 Definition &operator<=(const std::shared_ptr<Ope> &ope) {
2267 holder_->ope_ = ope;
2268 return *this;
2269 }
2270
2271 Result parse(const char *s, size_t n, const char *path = nullptr,
2272 Log log = nullptr) const {
2273 SemanticValues vs;
2274 std::any dt;
2275 return parse_core(s, n, vs, dt, path, log);
2276 }
2277
2278 Result parse(const char *s, const char *path = nullptr,
2279 Log log = nullptr) const {
2280 auto n = strlen(s);
2281 return parse(s, n, path, log);
2282 }
2283
2284 Result parse(const char *s, size_t n, std::any &dt,
2285 const char *path = nullptr, Log log = nullptr) const {
2286 SemanticValues vs;
2287 return parse_core(s, n, vs, dt, path, log);
2288 }
2289
2290 Result parse(const char *s, std::any &dt, const char *path = nullptr,
2291 Log log = nullptr) const {
2292 auto n = strlen(s);
2293 return parse(s, n, dt, path, log);
2294 }
2295
2296 template <typename T>
2297 Result parse_and_get_value(const char *s, size_t n, T &val,
2298 const char *path = nullptr,
2299 Log log = nullptr) const {
2300 SemanticValues vs;
2301 std::any dt;
2302 auto r = parse_core(s, n, vs, dt, path, log);
2303 if (r.ret && !vs.empty() && vs.front().has_value()) {
2304 val = std::any_cast<T>(vs[0]);
2305 }
2306 return r;
2307 }
2308
2309 template <typename T>
2310 Result parse_and_get_value(const char *s, T &val, const char *path = nullptr,
2311 Log log = nullptr) const {
2312 auto n = strlen(s);
2313 return parse_and_get_value(s, n, val, path, log);
2314 }
2315
2316 template <typename T>
2317 Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val,
2318 const char *path = nullptr,
2319 Log log = nullptr) const {
2320 SemanticValues vs;
2321 auto r = parse_core(s, n, vs, dt, path, log);
2322 if (r.ret && !vs.empty() && vs.front().has_value()) {
2323 val = std::any_cast<T>(vs[0]);
2324 }
2325 return r;
2326 }
2327
2328 template <typename T>
2329 Result parse_and_get_value(const char *s, std::any &dt, T &val,
2330 const char *path = nullptr,
2331 Log log = nullptr) const {
2332 auto n = strlen(s);
2333 return parse_and_get_value(s, n, dt, val, path, log);
2334 }
2335
2336 void operator=(Action a) { action = a; }
2337
2338 template <typename T> Definition &operator,(T fn) {
2339 operator=(fn);
2340 return *this;
2341 }
2342
2343 Definition &operator~() {
2344 ignoreSemanticValue = true;
2345 return *this;
2346 }
2347
2348 void accept(Ope::Visitor &v) { holder_->accept(v); }
2349
2350 std::shared_ptr<Ope> get_core_operator() const { return holder_->ope_; }
2351
2352 bool is_token() const {
2353 std::call_once(is_token_init_, [this]() {
2354 is_token_ = TokenChecker::is_token(*get_core_operator());
2355 });
2356 return is_token_;
2357 }
2358
2359 std::string name;
2360 const char *s_ = nullptr;
2361
2362 size_t id = 0;
2363 Action action;
2364 std::function<void(const char *s, size_t n, std::any &dt)> enter;
2365 std::function<void(const char *s, size_t n, size_t matchlen, std::any &value,
2366 std::any &dt)>
2367 leave;
2368 bool ignoreSemanticValue = false;
2369 std::shared_ptr<Ope> whitespaceOpe;
2370 std::shared_ptr<Ope> wordOpe;
2371 bool enablePackratParsing = false;
2372 bool is_macro = false;
2373 std::vector<std::string> params;
2374 TracerEnter tracer_enter;
2375 TracerLeave tracer_leave;
2376 bool disable_action = false;
2377
2378 std::string error_message;
2379 bool no_ast_opt = false;
2380
2381 private:
2382 friend class Reference;
2383 friend class ParserGenerator;
2384
2385 Definition &operator=(const Definition &rhs);
2386 Definition &operator=(Definition &&rhs);
2387
2388 void initialize_definition_ids() const {
2389 std::call_once(definition_ids_init_, [&]() {
2390 AssignIDToDefinition vis;
2391 holder_->accept(vis);
2392 if (whitespaceOpe) { whitespaceOpe->accept(vis); }
2393 if (wordOpe) { wordOpe->accept(vis); }
2394 definition_ids_.swap(vis.ids);
2395 });
2396 }
2397
2398 Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt,
2399 const char *path, Log log) const {
2400 initialize_definition_ids();
2401
2402 std::shared_ptr<Ope> ope = holder_;
2403 if (whitespaceOpe) { ope = std::make_shared<Sequence>(whitespaceOpe, ope); }
2404
2405 Context cxt(path, s, n, definition_ids_.size(), whitespaceOpe, wordOpe,
2406 enablePackratParsing, tracer_enter, tracer_leave, log);
2407
2408 auto len = ope->parse(s, n, vs, cxt, dt);
2409 return Result{success(len), cxt.recovered, len, cxt.error_info};
2410 }
2411
2412 std::shared_ptr<Holder> holder_;
2413 mutable std::once_flag is_token_init_;
2414 mutable bool is_token_ = false;
2415 mutable std::once_flag assign_id_to_definition_init_;
2416 mutable std::once_flag definition_ids_init_;
2417 mutable std::unordered_map<void *, size_t> definition_ids_;
2418 };
2419
2420 /*
2421 * Implementations
2422 */
2423
2424 inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs,
2425 Context &c, std::any &dt, const std::string &lit,
2426 std::once_flag &init_is_word, bool &is_word,
2427 bool ignore_case) {
2428 size_t i = 0;
2429 for (; i < lit.size(); i++) {
2430 if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i]))
2431 : (s[i] != lit[i]))) {
2432 c.set_error_pos(s, lit.c_str());
2433 return static_cast<size_t>(-1);
2434 }
2435 }
2436
2437 // Word check
2438 if (c.wordOpe) {
2439 std::call_once(init_is_word, [&]() {
2440 SemanticValues dummy_vs;
2441 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2442 nullptr, nullptr);
2443 std::any dummy_dt;
2444
2445 auto len =
2446 c.wordOpe->parse(lit.data(), lit.size(), dummy_vs, dummy_c, dummy_dt);
2447 is_word = success(len);
2448 });
2449
2450 if (is_word) {
2451 SemanticValues dummy_vs;
2452 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2453 nullptr, nullptr);
2454 std::any dummy_dt;
2455
2456 NotPredicate ope(c.wordOpe);
2457 auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
2458 if (fail(len)) { return len; }
2459 i += len;
2460 }
2461 }
2462
2463 // Skip whiltespace
2464 if (!c.in_token_boundary_count) {
2465 if (c.whitespaceOpe) {
2466 auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
2467 if (fail(len)) { return len; }
2468 i += len;
2469 }
2470 }
2471
2472 return i;
2473 }
2474
2475 inline void Context::set_error_pos(const char *a_s, const char *literal) {
2476 if (log) {
2477 if (error_info.error_pos <= a_s) {
2478 if (error_info.error_pos < a_s) {
2479 error_info.error_pos = a_s;
2480 error_info.expected_tokens.clear();
2481 }
2482 if (literal) {
2483 error_info.add(literal, true);
2484 } else if (!rule_stack.empty()) {
2485 auto rule = rule_stack.back();
2486 auto ope = rule->get_core_operator();
2487 if (auto token = FindLiteralToken::token(*ope);
2488 token && token[0] != '\0') {
2489 error_info.add(token, true);
2490 } else {
2491 error_info.add(rule->name.c_str(), false);
2492 }
2493 }
2494 }
2495 }
2496 }
2497
2498 inline void Context::trace_enter(const Ope &ope, const char *a_s, size_t n,
2499 SemanticValues &vs, std::any &dt) const {
2500 trace_ids.push_back(next_trace_id++);
2501 tracer_enter(ope, a_s, n, vs, *this, dt);
2502 }
2503
2504 inline void Context::trace_leave(const Ope &ope, const char *a_s, size_t n,
2505 SemanticValues &vs, std::any &dt,
2506 size_t len) const {
2507 tracer_leave(ope, a_s, n, vs, *this, dt, len);
2508 trace_ids.pop_back();
2509 }
2510
2511 inline bool Context::is_traceable(const Ope &ope) const {
2512 if (tracer_enter && tracer_leave) {
2513 return !IsReference::check(const_cast<Ope &>(ope));
2514 }
2515 return false;
2516 }
2517
2518 inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs,
2519 Context &c, std::any &dt) const {
2520 if (c.is_traceable(*this)) {
2521 c.trace_enter(*this, s, n, vs, dt);
2522 auto len = parse_core(s, n, vs, c, dt);
2523 c.trace_leave(*this, s, n, vs, dt, len);
2524 return len;
2525 }
2526 return parse_core(s, n, vs, c, dt);
2527 }
2528
2529 inline size_t Dictionary::parse_core(const char *s, size_t n,
2530 SemanticValues & /*vs*/, Context &c,
2531 std::any & /*dt*/) const {
2532 auto len = trie_.match(s, n);
2533 if (len > 0) { return len; }
2534 c.set_error_pos(s);
2535 return static_cast<size_t>(-1);
2536 }
2537
2538 inline size_t LiteralString::parse_core(const char *s, size_t n,
2539 SemanticValues &vs, Context &c,
2540 std::any &dt) const {
2541 return parse_literal(s, n, vs, c, dt, lit_, init_is_word_, is_word_,
2542 ignore_case_);
2543 }
2544
2545 inline size_t TokenBoundary::parse_core(const char *s, size_t n,
2546 SemanticValues &vs, Context &c,
2547 std::any &dt) const {
2548 size_t len;
2549 {
2550 c.in_token_boundary_count++;
2551 auto se = scope_exit([&]() { c.in_token_boundary_count--; });
2552 len = ope_->parse(s, n, vs, c, dt);
2553 }
2554
2555 if (success(len)) {
2556 vs.tokens.emplace_back(std::string_view(s, len));
2557
2558 if (!c.in_token_boundary_count) {
2559 if (c.whitespaceOpe) {
2560 auto l = c.whitespaceOpe->parse(s + len, n - len, vs, c, dt);
2561 if (fail(l)) { return l; }
2562 len += l;
2563 }
2564 }
2565 }
2566 return len;
2567 }
2568
2569 inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
2570 Context &c, std::any &dt) const {
2571 if (!ope_) {
2572 throw std::logic_error("Uninitialized definition ope was used...");
2573 }
2574
2575 // Macro reference
2576 if (outer_->is_macro) {
2577 c.rule_stack.push_back(outer_);
2578 auto len = ope_->parse(s, n, vs, c, dt);
2579 c.rule_stack.pop_back();
2580 return len;
2581 }
2582
2583 size_t len;
2584 std::any val;
2585
2586 c.packrat(s, outer_->id, len, val, [&](std::any &a_val) {
2587 if (outer_->enter) { outer_->enter(s, n, dt); }
2588
2589 auto se2 = scope_exit([&]() {
2590 c.pop();
2591 if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); }
2592 });
2593
2594 auto &chldsv = c.push();
2595
2596 c.rule_stack.push_back(outer_);
2597 len = ope_->parse(s, n, chldsv, c, dt);
2598 c.rule_stack.pop_back();
2599
2600 // Invoke action
2601 if (success(len)) {
2602 chldsv.sv_ = std::string_view(s, len);
2603 chldsv.name_ = outer_->name;
2604
2605 if (!IsPrioritizedChoice::check(*ope_)) {
2606 chldsv.choice_count_ = 0;
2607 chldsv.choice_ = 0;
2608 }
2609
2610 try {
2611 a_val = reduce(chldsv, dt);
2612 } catch (const parse_error &e) {
2613 if (c.log) {
2614 if (e.what()) {
2615 if (c.error_info.message_pos < s) {
2616 c.error_info.message_pos = s;
2617 c.error_info.message = e.what();
2618 }
2619 }
2620 }
2621 len = static_cast<size_t>(-1);
2622 }
2623 }
2624 });
2625
2626 if (success(len)) {
2627 if (!outer_->ignoreSemanticValue) {
2628 vs.emplace_back(std::move(val));
2629 vs.tags.emplace_back(str2tag(outer_->name));
2630 }
2631 }
2632
2633 return len;
2634 }
2635
2636 inline std::any Holder::reduce(SemanticValues &vs, std::any &dt) const {
2637 if (outer_->action && !outer_->disable_action) {
2638 return outer_->action(vs, dt);
2639 } else if (vs.empty()) {
2640 return std::any();
2641 } else {
2642 return std::move(vs.front());
2643 }
2644 }
2645
2646 inline const char *Holder::trace_name() const {
2647 if (trace_name_.empty()) { trace_name_ = "[" + outer_->name + "]"; }
2648 return trace_name_.data();
2649 }
2650
2651 inline size_t Reference::parse_core(const char *s, size_t n, SemanticValues &vs,
2652 Context &c, std::any &dt) const {
2653 if (rule_) {
2654 // Reference rule
2655 if (rule_->is_macro) {
2656 // Macro
2657 FindReference vis(c.top_args(), c.rule_stack.back()->params);
2658
2659 // Collect arguments
2660 std::vector<std::shared_ptr<Ope>> args;
2661 for (auto arg : args_) {
2662 arg->accept(vis);
2663 args.emplace_back(std::move(vis.found_ope));
2664 }
2665
2666 c.push_args(std::move(args));
2667 auto se = scope_exit([&]() { c.pop_args(); });
2668 auto ope = get_core_operator();
2669 return ope->parse(s, n, vs, c, dt);
2670 } else {
2671 // Definition
2672 c.push_args(std::vector<std::shared_ptr<Ope>>());
2673 auto se = scope_exit([&]() { c.pop_args(); });
2674 auto ope = get_core_operator();
2675 return ope->parse(s, n, vs, c, dt);
2676 }
2677 } else {
2678 // Reference parameter in macro
2679 const auto &args = c.top_args();
2680 return args[iarg_]->parse(s, n, vs, c, dt);
2681 }
2682 }
2683
2684 inline std::shared_ptr<Ope> Reference::get_core_operator() const {
2685 return rule_->holder_;
2686 }
2687
2688 inline size_t BackReference::parse_core(const char *s, size_t n,
2689 SemanticValues &vs, Context &c,
2690 std::any &dt) const {
2691 auto size = static_cast<int>(c.capture_scope_stack_size);
2692 for (auto i = size - 1; i >= 0; i--) {
2693 auto index = static_cast<size_t>(i);
2694 const auto &cs = c.capture_scope_stack[index];
2695 if (cs.find(name_) != cs.end()) {
2696 const auto &lit = cs.at(name_);
2697 std::once_flag init_is_word;
2698 auto is_word = false;
2699 return parse_literal(s, n, vs, c, dt, lit, init_is_word, is_word, false);
2700 }
2701 }
2702 throw std::runtime_error("Invalid back reference...");
2703 }
2704
2705 inline Definition &
2706 PrecedenceClimbing::get_reference_for_binop(Context &c) const {
2707 if (rule_.is_macro) {
2708 // Reference parameter in macro
2709 const auto &args = c.top_args();
2710 auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
2711 auto arg = args[iarg];
2712 return *dynamic_cast<Reference &>(*arg).rule_;
2713 }
2714
2715 return *dynamic_cast<Reference &>(*binop_).rule_;
2716 }
2717
2718 inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
2719 SemanticValues &vs,
2720 Context &c, std::any &dt,
2721 size_t min_prec) const {
2722 auto len = atom_->parse(s, n, vs, c, dt);
2723 if (fail(len)) { return len; }
2724
2725 std::string tok;
2726 auto &rule = get_reference_for_binop(c);
2727 auto action = std::move(rule.action);
2728
2729 rule.action = [&](SemanticValues &vs2, std::any &dt2) {
2730 tok = vs2.token();
2731 if (action) {
2732 return action(vs2, dt2);
2733 } else if (!vs2.empty()) {
2734 return vs2[0];
2735 }
2736 return std::any();
2737 };
2738 auto action_se = scope_exit([&]() { rule.action = std::move(action); });
2739
2740 auto i = len;
2741 while (i < n) {
2742 std::vector<std::any> save_values(vs.begin(), vs.end());
2743 auto save_tokens = vs.tokens;
2744
2745 auto chv = c.push();
2746 auto chl = binop_->parse(s + i, n - i, chv, c, dt);
2747 c.pop();
2748
2749 if (fail(chl)) { break; }
2750
2751 auto it = info_.find(tok);
2752 if (it == info_.end()) { break; }
2753
2754 auto level = std::get<0>(it->second);
2755 auto assoc = std::get<1>(it->second);
2756
2757 if (level < min_prec) { break; }
2758
2759 vs.emplace_back(std::move(chv[0]));
2760 i += chl;
2761
2762 auto next_min_prec = level;
2763 if (assoc == 'L') { next_min_prec = level + 1; }
2764
2765 chv = c.push();
2766 chl = parse_expression(s + i, n - i, chv, c, dt, next_min_prec);
2767 c.pop();
2768
2769 if (fail(chl)) {
2770 vs.assign(save_values.begin(), save_values.end());
2771 vs.tokens = save_tokens;
2772 i = chl;
2773 break;
2774 }
2775
2776 vs.emplace_back(std::move(chv[0]));
2777 i += chl;
2778
2779 std::any val;
2780 if (rule_.action) {
2781 vs.sv_ = std::string_view(s, i);
2782 val = rule_.action(vs, dt);
2783 } else if (!vs.empty()) {
2784 val = vs[0];
2785 }
2786 vs.clear();
2787 vs.emplace_back(std::move(val));
2788 }
2789
2790 return i;
2791 }
2792
2793 inline size_t Recovery::parse_core(const char *s, size_t n,
2794 SemanticValues & /*vs*/, Context &c,
2795 std::any & /*dt*/) const {
2796 const auto &rule = dynamic_cast<Reference &>(*ope_);
2797
2798 // Custom error message
2799 if (c.log) {
2800 auto label = dynamic_cast<Reference *>(rule.args_[0].get());
2801 if (label) {
2802 if (!label->rule_->error_message.empty()) {
2803 c.error_info.message_pos = s;
2804 c.error_info.message = label->rule_->error_message;
2805 }
2806 }
2807 }
2808
2809 // Recovery
2810 size_t len = static_cast<size_t>(-1);
2811 {
2812 auto save_log = c.log;
2813 c.log = nullptr;
2814 auto se = scope_exit([&]() { c.log = save_log; });
2815
2816 SemanticValues dummy_vs;
2817 std::any dummy_dt;
2818
2819 len = rule.parse(s, n, dummy_vs, c, dummy_dt);
2820 }
2821
2822 if (success(len)) {
2823 c.recovered = true;
2824
2825 if (c.log) {
2826 c.error_info.output_log(c.log, c.s, c.l);
2827 c.error_info.clear();
2828 }
2829 }
2830
2831 // Cut
2832 if (!c.cut_stack.empty()) {
2833 c.cut_stack.back() = true;
2834
2835 if (c.cut_stack.size() == 1) {
2836 // TODO: Remove unneeded entries in packrat memoise table
2837 }
2838 }
2839
2840 return len;
2841 }
2842
2843 inline void Sequence::accept(Visitor &v) { v.visit(*this); }
2844 inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); }
2845 inline void Repetition::accept(Visitor &v) { v.visit(*this); }
2846 inline void AndPredicate::accept(Visitor &v) { v.visit(*this); }
2847 inline void NotPredicate::accept(Visitor &v) { v.visit(*this); }
2848 inline void Dictionary::accept(Visitor &v) { v.visit(*this); }
2849 inline void LiteralString::accept(Visitor &v) { v.visit(*this); }
2850 inline void CharacterClass::accept(Visitor &v) { v.visit(*this); }
2851 inline void Character::accept(Visitor &v) { v.visit(*this); }
2852 inline void AnyCharacter::accept(Visitor &v) { v.visit(*this); }
2853 inline void CaptureScope::accept(Visitor &v) { v.visit(*this); }
2854 inline void Capture::accept(Visitor &v) { v.visit(*this); }
2855 inline void TokenBoundary::accept(Visitor &v) { v.visit(*this); }
2856 inline void Ignore::accept(Visitor &v) { v.visit(*this); }
2857 inline void User::accept(Visitor &v) { v.visit(*this); }
2858 inline void WeakHolder::accept(Visitor &v) { v.visit(*this); }
2859 inline void Holder::accept(Visitor &v) { v.visit(*this); }
2860 inline void Reference::accept(Visitor &v) { v.visit(*this); }
2861 inline void Whitespace::accept(Visitor &v) { v.visit(*this); }
2862 inline void BackReference::accept(Visitor &v) { v.visit(*this); }
2863 inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); }
2864 inline void Recovery::accept(Visitor &v) { v.visit(*this); }
2865 inline void Cut::accept(Visitor &v) { v.visit(*this); }
2866
2867 inline void AssignIDToDefinition::visit(Holder &ope) {
2868 auto p = static_cast<void *>(ope.outer_);
2869 if (ids.count(p)) { return; }
2870 auto id = ids.size();
2871 ids[p] = id;
2872 ope.outer_->id = id;
2873 ope.ope_->accept(*this);
2874 }
2875
2876 inline void AssignIDToDefinition::visit(Reference &ope) {
2877 if (ope.rule_) {
2878 for (auto arg : ope.args_) {
2879 arg->accept(*this);
2880 }
2881 ope.rule_->accept(*this);
2882 }
2883 }
2884
2885 inline void AssignIDToDefinition::visit(PrecedenceClimbing &ope) {
2886 ope.atom_->accept(*this);
2887 ope.binop_->accept(*this);
2888 }
2889
2890 inline void TokenChecker::visit(Reference &ope) {
2891 if (ope.is_macro_) {
2892 for (auto arg : ope.args_) {
2893 arg->accept(*this);
2894 }
2895 } else {
2896 has_rule_ = true;
2897 }
2898 }
2899
2900 inline void FindLiteralToken::visit(Reference &ope) {
2901 if (ope.is_macro_) {
2902 ope.rule_->accept(*this);
2903 for (auto arg : ope.args_) {
2904 arg->accept(*this);
2905 }
2906 }
2907 }
2908
2909 inline void DetectLeftRecursion::visit(Reference &ope) {
2910 if (ope.name_ == name_) {
2911 error_s = ope.s_;
2912 } else if (!refs_.count(ope.name_)) {
2913 refs_.insert(ope.name_);
2914 if (ope.rule_) {
2915 ope.rule_->accept(*this);
2916 if (done_ == false) { return; }
2917 }
2918 }
2919 done_ = true;
2920 }
2921
2922 inline void HasEmptyElement::visit(Reference &ope) {
2923 auto it = std::find_if(refs_.begin(), refs_.end(),
2924 [&](const std::pair<const char *, std::string> &ref) {
2925 return ope.name_ == ref.second;
2926 });
2927 if (it != refs_.end()) { return; }
2928
2929 if (ope.rule_) {
2930 refs_.emplace_back(ope.s_, ope.name_);
2931 ope.rule_->accept(*this);
2932 refs_.pop_back();
2933 }
2934 }
2935
2936 inline void DetectInfiniteLoop::visit(Reference &ope) {
2937 auto it = std::find_if(refs_.begin(), refs_.end(),
2938 [&](const std::pair<const char *, std::string> &ref) {
2939 return ope.name_ == ref.second;
2940 });
2941 if (it != refs_.end()) { return; }
2942
2943 if (ope.rule_) {
2944 refs_.emplace_back(ope.s_, ope.name_);
2945 ope.rule_->accept(*this);
2946 refs_.pop_back();
2947 }
2948
2949 if (ope.is_macro_) {
2950 for (auto arg : ope.args_) {
2951 arg->accept(*this);
2952 }
2953 }
2954 }
2955
2956 inline void ReferenceChecker::visit(Reference &ope) {
2957 auto it = std::find(params_.begin(), params_.end(), ope.name_);
2958 if (it != params_.end()) { return; }
2959
2960 if (!grammar_.count(ope.name_)) {
2961 error_s[ope.name_] = ope.s_;
2962 error_message[ope.name_] = "'" + ope.name_ + "' is not defined.";
2963 } else {
2964 if (!referenced.count(ope.name_)) { referenced.insert(ope.name_); }
2965 const auto &rule = grammar_.at(ope.name_);
2966 if (rule.is_macro) {
2967 if (!ope.is_macro_ || ope.args_.size() != rule.params.size()) {
2968 error_s[ope.name_] = ope.s_;
2969 error_message[ope.name_] = "incorrect number of arguments.";
2970 }
2971 } else if (ope.is_macro_) {
2972 error_s[ope.name_] = ope.s_;
2973 error_message[ope.name_] = "'" + ope.name_ + "' is not macro.";
2974 }
2975 for (auto arg : ope.args_) {
2976 arg->accept(*this);
2977 }
2978 }
2979 }
2980
2981 inline void LinkReferences::visit(Reference &ope) {
2982 // Check if the reference is a macro parameter
2983 auto found_param = false;
2984 for (size_t i = 0; i < params_.size(); i++) {
2985 const auto ¶m = params_[i];
2986 if (param == ope.name_) {
2987 ope.iarg_ = i;
2988 found_param = true;
2989 break;
2990 }
2991 }
2992
2993 // Check if the reference is a definition rule
2994 if (!found_param && grammar_.count(ope.name_)) {
2995 auto &rule = grammar_.at(ope.name_);
2996 ope.rule_ = &rule;
2997 }
2998
2999 for (auto arg : ope.args_) {
3000 arg->accept(*this);
3001 }
3002 }
3003
3004 inline void FindReference::visit(Reference &ope) {
3005 for (size_t i = 0; i < args_.size(); i++) {
3006 const auto &name = params_[i];
3007 if (name == ope.name_) {
3008 found_ope = args_[i];
3009 return;
3010 }
3011 }
3012 found_ope = ope.shared_from_this();
3013 }
3014
3015 /*-----------------------------------------------------------------------------
3016 * PEG parser generator
3017 *---------------------------------------------------------------------------*/
3018
3019 using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
3020
3021 class ParserGenerator {
3022 public:
3023 static std::shared_ptr<Grammar> parse(const char *s, size_t n,
3024 const Rules &rules, std::string &start,
3025 bool &enablePackratParsing, Log log) {
3026 return get_instance().perform_core(s, n, rules, start, enablePackratParsing,
3027 log);
3028 }
3029
3030 static std::shared_ptr<Grammar> parse(const char *s, size_t n,
3031 std::string &start,
3032 bool &enablePackratParsing, Log log) {
3033 Rules dummy;
3034 return parse(s, n, dummy, start, enablePackratParsing, log);
3035 }
3036
3037 // For debuging purpose
3038 static Grammar &grammar() { return get_instance().g; }
3039
3040 private:
3041 static ParserGenerator &get_instance() {
3042 static ParserGenerator instance;
3043 return instance;
3044 }
3045
3046 ParserGenerator() {
3047 make_grammar();
3048 setup_actions();
3049 }
3050
3051 struct Instruction {
3052 std::string type;
3053 std::any data;
3054 };
3055
3056 struct Data {
3057 std::shared_ptr<Grammar> grammar;
3058 std::string start;
3059 const char *start_pos = nullptr;
3060 std::vector<std::pair<std::string, const char *>> duplicates;
3061 std::map<std::string, Instruction> instructions;
3062 std::set<std::string_view> captures;
3063 bool enablePackratParsing = true;
3064
3065 Data() : grammar(std::make_shared<Grammar>()) {}
3066 };
3067
3068 void make_grammar() {
3069 // Setup PEG syntax parser
3070 g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
3071 g["Definition"] <=
3072 cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"],
3073 g["Expression"], opt(g["Instruction"])),
3074 seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"],
3075 opt(g["Instruction"])));
3076 g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
3077 g["Sequence"] <= zom(cho(g["CUT"], g["Prefix"]));
3078 g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["SuffixWithLabel"]);
3079 g["SuffixWithLabel"] <=
3080 seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
3081 g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
3082 g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
3083 g["Primary"] <=
3084 cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
3085 npd(g["LEFTARROW"])),
3086 seq(g["Ignore"], g["Identifier"],
3087 npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
3088 seq(g["OPEN"], g["Expression"], g["CLOSE"]),
3089 seq(g["BeginTok"], g["Expression"], g["EndTok"]),
3090 seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]),
3091 seq(g["BeginCap"], g["Expression"], g["EndCap"]), g["BackRef"],
3092 g["LiteralI"], g["Dictionary"], g["Literal"], g["NegatedClass"],
3093 g["Class"], g["DOT"]);
3094
3095 g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
3096 g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
3097
3098 const static std::vector<std::pair<char32_t, char32_t>> range = {
3099 {0x0080, 0xFFFF}};
3100 g["IdentStart"] <= seq(npd(lit(u8(u8"↑"))), npd(lit(u8(u8"⇑"))),
3101 cho(cls("a-zA-Z_%"), cls(range)));
3102
3103 g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
3104
3105 g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
3106
3107 auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
3108 cls("'"), g["Spacing"]),
3109 seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
3110 cls("\""), g["Spacing"]));
3111 g["Literal"] <= lit_ope;
3112 g["LiteralD"] <= lit_ope;
3113
3114 g["LiteralI"] <=
3115 cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
3116 g["Spacing"]),
3117 seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
3118 g["Spacing"]));
3119
3120 // NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
3121 g["Class"] <= seq(chr('['), npd(chr('^')),
3122 tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
3123 g["Spacing"]);
3124 g["NegatedClass"] <= seq(lit("[^"),
3125 tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
3126 g["Spacing"]);
3127
3128 g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
3129 g["Char"] <=
3130 cho(seq(chr('\\'), cls("nrt'\"[]\\^")),
3131 seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")),
3132 seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
3133 seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))),
3134 seq(lit("\\u"),
3135 cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")),
3136 rep(cls("0-9a-fA-F"), 4, 4)),
3137 rep(cls("0-9a-fA-F"), 4, 5))),
3138 seq(npd(chr('\\')), dot()));
3139
3140 g["Repetition"] <=
3141 seq(g["BeginBlacket"], g["RepetitionRange"], g["EndBlacket"]);
3142 g["RepetitionRange"] <= cho(seq(g["Number"], g["COMMA"], g["Number"]),
3143 seq(g["Number"], g["COMMA"]), g["Number"],
3144 seq(g["COMMA"], g["Number"]));
3145 g["Number"] <= seq(oom(cls("0-9")), g["Spacing"]);
3146
3147 g["LEFTARROW"] <= seq(cho(lit("<-"), lit(u8(u8"←"))), g["Spacing"]);
3148 ~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
3149 ~g["PIPE"] <= seq(chr('|'), g["Spacing"]);
3150 g["AND"] <= seq(chr('&'), g["Spacing"]);
3151 g["NOT"] <= seq(chr('!'), g["Spacing"]);
3152 g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
3153 g["STAR"] <= seq(chr('*'), g["Spacing"]);
3154 g["PLUS"] <= seq(chr('+'), g["Spacing"]);
3155 ~g["OPEN"] <= seq(chr('('), g["Spacing"]);
3156 ~g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
3157 g["DOT"] <= seq(chr('.'), g["Spacing"]);
3158
3159 g["CUT"] <= seq(lit(u8(u8"↑")), g["Spacing"]);
3160 ~g["LABEL"] <= seq(cho(chr('^'), lit(u8(u8"⇑"))), g["Spacing"]);
3161
3162 ~g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
3163 g["Comment"] <=
3164 seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())), g["EndOfLine"]);
3165 g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
3166 g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
3167 g["EndOfFile"] <= npd(dot());
3168
3169 ~g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
3170 ~g["EndTok"] <= seq(chr('>'), g["Spacing"]);
3171
3172 ~g["BeginCapScope"] <= seq(chr('$'), chr('('), g["Spacing"]);
3173 ~g["EndCapScope"] <= seq(chr(')'), g["Spacing"]);
3174
3175 g["BeginCap"] <= seq(chr('$'), tok(g["IdentCont"]), chr('<'), g["Spacing"]);
3176 ~g["EndCap"] <= seq(chr('>'), g["Spacing"]);
3177
3178 g["BackRef"] <= seq(chr('$'), tok(g["IdentCont"]), g["Spacing"]);
3179
3180 g["IGNORE"] <= chr('~');
3181
3182 g["Ignore"] <= opt(g["IGNORE"]);
3183 g["Parameters"] <= seq(g["OPEN"], g["Identifier"],
3184 zom(seq(g["COMMA"], g["Identifier"])), g["CLOSE"]);
3185 g["Arguments"] <= seq(g["OPEN"], g["Expression"],
3186 zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]);
3187 ~g["COMMA"] <= seq(chr(','), g["Spacing"]);
3188
3189 // Instruction grammars
3190 g["Instruction"] <= seq(g["BeginBlacket"],
3191 cho(cho(g["PrecedenceClimbing"]),
3192 cho(g["ErrorMessage"]), cho(g["NoAstOpt"])),
3193 g["EndBlacket"]);
3194
3195 ~g["SpacesZom"] <= zom(g["Space"]);
3196 ~g["SpacesOom"] <= oom(g["Space"]);
3197 ~g["BeginBlacket"] <= seq(chr('{'), g["Spacing"]);
3198 ~g["EndBlacket"] <= seq(chr('}'), g["Spacing"]);
3199
3200 // PrecedenceClimbing instruction
3201 g["PrecedenceClimbing"] <=
3202 seq(lit("precedence"), g["SpacesOom"], g["PrecedenceInfo"],
3203 zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]);
3204 g["PrecedenceInfo"] <=
3205 seq(g["PrecedenceAssoc"],
3206 oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"])));
3207 g["PrecedenceOpe"] <=
3208 cho(seq(cls("'"),
3209 tok(zom(seq(npd(cho(g["Space"], cls("'"))), g["Char"]))),
3210 cls("'")),
3211 seq(cls("\""),
3212 tok(zom(seq(npd(cho(g["Space"], cls("\""))), g["Char"]))),
3213 cls("\"")),
3214 tok(oom(seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))),
3215 dot()))));
3216 g["PrecedenceAssoc"] <= cls("LR");
3217
3218 // Error message instruction
3219 g["ErrorMessage"] <=
3220 seq(lit("message"), g["SpacesOom"], g["LiteralD"], g["SpacesZom"]);
3221
3222 // No Ast node optimazation instruction
3223 g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
3224
3225 // Set definition names
3226 for (auto &x : g) {
3227 x.second.name = x.first;
3228 }
3229 }
3230
3231 void setup_actions() {
3232 g["Definition"] = [&](const SemanticValues &vs, std::any &dt) {
3233 auto &data = *std::any_cast<Data *>(dt);
3234
3235 auto is_macro = vs.choice() == 0;
3236 auto ignore = std::any_cast<bool>(vs[0]);
3237 auto name = std::any_cast<std::string>(vs[1]);
3238
3239 std::vector<std::string> params;
3240 std::shared_ptr<Ope> ope;
3241 if (is_macro) {
3242 params = std::any_cast<std::vector<std::string>>(vs[2]);
3243 ope = std::any_cast<std::shared_ptr<Ope>>(vs[4]);
3244 if (vs.size() == 6) {
3245 data.instructions[name] = std::any_cast<Instruction>(vs[5]);
3246 }
3247 } else {
3248 ope = std::any_cast<std::shared_ptr<Ope>>(vs[3]);
3249 if (vs.size() == 5) {
3250 data.instructions[name] = std::any_cast<Instruction>(vs[4]);
3251 }
3252 }
3253
3254 auto &grammar = *data.grammar;
3255 if (!grammar.count(name)) {
3256 auto &rule = grammar[name];
3257 rule <= ope;
3258 rule.name = name;
3259 rule.s_ = vs.sv().data();
3260 rule.ignoreSemanticValue = ignore;
3261 rule.is_macro = is_macro;
3262 rule.params = params;
3263
3264 if (data.start.empty()) {
3265 data.start = name;
3266 data.start_pos = vs.sv().data();
3267 }
3268 } else {
3269 data.duplicates.emplace_back(name, vs.sv().data());
3270 }
3271 };
3272
3273 g["Definition"].enter = [](const char * /*s*/, size_t /*n*/, std::any &dt) {
3274 auto &data = *std::any_cast<Data *>(dt);
3275 data.captures.clear();
3276 };
3277
3278 g["Expression"] = [&](const SemanticValues &vs) {
3279 if (vs.size() == 1) {
3280 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3281 } else {
3282 std::vector<std::shared_ptr<Ope>> opes;
3283 for (auto i = 0u; i < vs.size(); i++) {
3284 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(vs[i]));
3285 }
3286 const std::shared_ptr<Ope> ope =
3287 std::make_shared<PrioritizedChoice>(opes);
3288 return ope;
3289 }
3290 };
3291
3292 g["Sequence"] = [&](const SemanticValues &vs) {
3293 if (vs.empty()) {
3294 return npd(lit(""));
3295 } else if (vs.size() == 1) {
3296 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3297 } else {
3298 std::vector<std::shared_ptr<Ope>> opes;
3299 for (const auto &x : vs) {
3300 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(x));
3301 }
3302 const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
3303 return ope;
3304 }
3305 };
3306
3307 g["Prefix"] = [&](const SemanticValues &vs) {
3308 std::shared_ptr<Ope> ope;
3309 if (vs.size() == 1) {
3310 ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3311 } else {
3312 assert(vs.size() == 2);
3313 auto tok = std::any_cast<char>(vs[0]);
3314 ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3315 if (tok == '&') {
3316 ope = apd(ope);
3317 } else { // '!'
3318 ope = npd(ope);
3319 }
3320 }
3321 return ope;
3322 };
3323
3324 g["SuffixWithLabel"] = [&](const SemanticValues &vs, std::any &dt) {
3325 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3326 if (vs.size() == 1) {
3327 return ope;
3328 } else {
3329 assert(vs.size() == 2);
3330 auto &data = *std::any_cast<Data *>(dt);
3331 const auto &ident = std::any_cast<std::string>(vs[1]);
3332 auto label = ref(*data.grammar, ident, vs.sv().data(), false, {});
3333 auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME,
3334 vs.sv().data(), true, {label}));
3335 return cho4label_(ope, recovery);
3336 }
3337 };
3338
3339 struct Loop {
3340 enum class Type { opt = 0, zom, oom, rep };
3341 Type type;
3342 std::pair<size_t, size_t> range;
3343 };
3344
3345 g["Suffix"] = [&](const SemanticValues &vs) {
3346 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3347 if (vs.size() == 1) {
3348 return ope;
3349 } else {
3350 assert(vs.size() == 2);
3351 auto loop = std::any_cast<Loop>(vs[1]);
3352 switch (loop.type) {
3353 case Loop::Type::opt: return opt(ope);
3354 case Loop::Type::zom: return zom(ope);
3355 case Loop::Type::oom: return oom(ope);
3356 default: // Regex-like repetition
3357 return rep(ope, loop.range.first, loop.range.second);
3358 }
3359 }
3360 };
3361
3362 g["Loop"] = [&](const SemanticValues &vs) {
3363 switch (vs.choice()) {
3364 case 0: // Option
3365 return Loop{Loop::Type::opt, std::pair<size_t, size_t>()};
3366 case 1: // Zero or More
3367 return Loop{Loop::Type::zom, std::pair<size_t, size_t>()};
3368 case 2: // One or More
3369 return Loop{Loop::Type::oom, std::pair<size_t, size_t>()};
3370 default: // Regex-like repetition
3371 return Loop{Loop::Type::rep,
3372 std::any_cast<std::pair<size_t, size_t>>(vs[0])};
3373 }
3374 };
3375
3376 g["RepetitionRange"] = [&](const SemanticValues &vs) {
3377 switch (vs.choice()) {
3378 case 0: { // Number COMMA Number
3379 auto min = std::any_cast<size_t>(vs[0]);
3380 auto max = std::any_cast<size_t>(vs[1]);
3381 return std::pair(min, max);
3382 }
3383 case 1: // Number COMMA
3384 return std::pair(std::any_cast<size_t>(vs[0]),
3385 std::numeric_limits<size_t>::max());
3386 case 2: { // Number
3387 auto n = std::any_cast<size_t>(vs[0]);
3388 return std::pair(n, n);
3389 }
3390 default: // COMMA Number
3391 return std::pair(std::numeric_limits<size_t>::min(),
3392 std::any_cast<size_t>(vs[0]));
3393 }
3394 };
3395 g["Number"] = [&](const SemanticValues &vs) {
3396 return vs.token_to_number<size_t>();
3397 };
3398
3399 g["Primary"] = [&](const SemanticValues &vs, std::any &dt) {
3400 auto &data = *std::any_cast<Data *>(dt);
3401
3402 switch (vs.choice()) {
3403 case 0: // Macro Reference
3404 case 1: { // Reference
3405 auto is_macro = vs.choice() == 0;
3406 auto ignore = std::any_cast<bool>(vs[0]);
3407 const auto &ident = std::any_cast<std::string>(vs[1]);
3408
3409 std::vector<std::shared_ptr<Ope>> args;
3410 if (is_macro) {
3411 args = std::any_cast<std::vector<std::shared_ptr<Ope>>>(vs[2]);
3412 }
3413
3414 auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args);
3415 if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); }
3416
3417 if (ignore) {
3418 return ign(ope);
3419 } else {
3420 return ope;
3421 }
3422 }
3423 case 2: { // (Expression)
3424 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3425 }
3426 case 3: { // TokenBoundary
3427 return tok(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3428 }
3429 case 4: { // CaptureScope
3430 return csc(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3431 }
3432 case 5: { // Capture
3433 const auto &name = std::any_cast<std::string_view>(vs[0]);
3434 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3435
3436 data.captures.insert(name);
3437
3438 return cap(ope, [name](const char *a_s, size_t a_n, Context &c) {
3439 auto &cs = c.capture_scope_stack[c.capture_scope_stack_size - 1];
3440 cs[name] = std::string(a_s, a_n);
3441 });
3442 }
3443 default: {
3444 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3445 }
3446 }
3447 };
3448
3449 g["IdentCont"] = [](const SemanticValues &vs) {
3450 return std::string(vs.sv().data(), vs.sv().length());
3451 };
3452
3453 g["Dictionary"] = [](const SemanticValues &vs) {
3454 auto items = vs.transform<std::string>();
3455 return dic(items);
3456 };
3457
3458 g["Literal"] = [](const SemanticValues &vs) {
3459 const auto &tok = vs.tokens.front();
3460 return lit(resolve_escape_sequence(tok.data(), tok.size()));
3461 };
3462 g["LiteralI"] = [](const SemanticValues &vs) {
3463 const auto &tok = vs.tokens.front();
3464 return liti(resolve_escape_sequence(tok.data(), tok.size()));
3465 };
3466 g["LiteralD"] = [](const SemanticValues &vs) {
3467 auto &tok = vs.tokens.front();
3468 return resolve_escape_sequence(tok.data(), tok.size());
3469 };
3470
3471 g["Class"] = [](const SemanticValues &vs) {
3472 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3473 return cls(ranges);
3474 };
3475 g["NegatedClass"] = [](const SemanticValues &vs) {
3476 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3477 return ncls(ranges);
3478 };
3479 g["Range"] = [](const SemanticValues &vs) {
3480 switch (vs.choice()) {
3481 case 0: {
3482 auto s1 = std::any_cast<std::string>(vs[0]);
3483 auto s2 = std::any_cast<std::string>(vs[1]);
3484 auto cp1 = decode_codepoint(s1.data(), s1.length());
3485 auto cp2 = decode_codepoint(s2.data(), s2.length());
3486 return std::pair(cp1, cp2);
3487 }
3488 case 1: {
3489 auto s = std::any_cast<std::string>(vs[0]);
3490 auto cp = decode_codepoint(s.data(), s.length());
3491 return std::pair(cp, cp);
3492 }
3493 }
3494 return std::pair<char32_t, char32_t>(0, 0);
3495 };
3496 g["Char"] = [](const SemanticValues &vs) {
3497 return resolve_escape_sequence(vs.sv().data(), vs.sv().length());
3498 };
3499
3500 g["AND"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3501 g["NOT"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3502 g["QUESTION"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3503 g["STAR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3504 g["PLUS"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3505
3506 g["DOT"] = [](const SemanticValues & /*vs*/) { return dot(); };
3507
3508 g["CUT"] = [](const SemanticValues & /*vs*/) { return cut(); };
3509
3510 g["BeginCap"] = [](const SemanticValues &vs) { return vs.token(); };
3511
3512 g["BackRef"] = [&](const SemanticValues &vs, std::any &dt) {
3513 auto &data = *std::any_cast<Data *>(dt);
3514 if (data.captures.find(vs.token()) == data.captures.end()) {
3515 data.enablePackratParsing = false;
3516 }
3517 return bkr(vs.token_to_string());
3518 };
3519
3520 g["Ignore"] = [](const SemanticValues &vs) { return vs.size() > 0; };
3521
3522 g["Parameters"] = [](const SemanticValues &vs) {
3523 return vs.transform<std::string>();
3524 };
3525
3526 g["Arguments"] = [](const SemanticValues &vs) {
3527 return vs.transform<std::shared_ptr<Ope>>();
3528 };
3529
3530 g["PrecedenceClimbing"] = [](const SemanticValues &vs) {
3531 PrecedenceClimbing::BinOpeInfo binOpeInfo;
3532 size_t level = 1;
3533 for (auto v : vs) {
3534 auto tokens = std::any_cast<std::vector<std::string_view>>(v);
3535 auto assoc = tokens[0][0];
3536 for (size_t i = 1; i < tokens.size(); i++) {
3537 binOpeInfo[tokens[i]] = std::pair(level, assoc);
3538 }
3539 level++;
3540 }
3541 Instruction instruction;
3542 instruction.type = "precedence";
3543 instruction.data = binOpeInfo;
3544 return instruction;
3545 };
3546 g["PrecedenceInfo"] = [](const SemanticValues &vs) {
3547 return vs.transform<std::string_view>();
3548 };
3549 g["PrecedenceOpe"] = [](const SemanticValues &vs) { return vs.token(); };
3550 g["PrecedenceAssoc"] = [](const SemanticValues &vs) { return vs.token(); };
3551
3552 g["ErrorMessage"] = [](const SemanticValues &vs) {
3553 Instruction instruction;
3554 instruction.type = "message";
3555 instruction.data = std::any_cast<std::string>(vs[0]);
3556 return instruction;
3557 };
3558
3559 g["NoAstOpt"] = [](const SemanticValues & /*vs*/) {
3560 Instruction instruction;
3561 instruction.type = "no_ast_opt";
3562 return instruction;
3563 };
3564 }
3565
3566 bool apply_precedence_instruction(Definition &rule,
3567 const PrecedenceClimbing::BinOpeInfo &info,
3568 const char *s, Log log) {
3569 try {
3570 auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
3571 auto atom = seq.opes_[0];
3572 auto &rep = dynamic_cast<Repetition &>(*seq.opes_[1]);
3573 auto &seq1 = dynamic_cast<Sequence &>(*rep.ope_);
3574 auto binop = seq1.opes_[0];
3575 auto atom1 = seq1.opes_[1];
3576
3577 auto atom_name = dynamic_cast<Reference &>(*atom).name_;
3578 auto binop_name = dynamic_cast<Reference &>(*binop).name_;
3579 auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
3580
3581 if (!rep.is_zom() || atom_name != atom1_name || atom_name == binop_name) {
3582 if (log) {
3583 auto line = line_info(s, rule.s_);
3584 log(line.first, line.second,
3585 "'precedence' instruction cannot be applied to '" + rule.name +
3586 "'.");
3587 }
3588 return false;
3589 }
3590
3591 rule.holder_->ope_ = pre(atom, binop, info, rule);
3592 rule.disable_action = true;
3593 } catch (...) {
3594 if (log) {
3595 auto line = line_info(s, rule.s_);
3596 log(line.first, line.second,
3597 "'precedence' instruction cannot be applied to '" + rule.name +
3598 "'.");
3599 }
3600 return false;
3601 }
3602 return true;
3603 }
3604
3605 std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
3606 const Rules &rules, std::string &start,
3607 bool &enablePackratParsing, Log log) {
3608 Data data;
3609 auto &grammar = *data.grammar;
3610
3611 // Built-in macros
3612 {
3613 // `%recover`
3614 {
3615 auto &rule = grammar[RECOVER_DEFINITION_NAME];
3616 rule <= ref(grammar, "x", "", false, {});
3617 rule.name = RECOVER_DEFINITION_NAME;
3618 rule.s_ = "[native]";
3619 rule.ignoreSemanticValue = true;
3620 rule.is_macro = true;
3621 rule.params = {"x"};
3622 }
3623 }
3624
3625 std::any dt = &data;
3626 auto r = g["Grammar"].parse(s, n, dt, nullptr, log);
3627
3628 if (!r.ret) {
3629 if (log) {
3630 if (r.error_info.message_pos) {
3631 auto line = line_info(s, r.error_info.message_pos);
3632 log(line.first, line.second, r.error_info.message);
3633 } else {
3634 auto line = line_info(s, r.error_info.error_pos);
3635 log(line.first, line.second, "syntax error");
3636 }
3637 }
3638 return nullptr;
3639 }
3640
3641 // User provided rules
3642 for (auto [user_name, user_rule] : rules) {
3643 auto name = user_name;
3644 auto ignore = false;
3645 if (!name.empty() && name[0] == '~') {
3646 ignore = true;
3647 name.erase(0, 1);
3648 }
3649 if (!name.empty()) {
3650 auto &rule = grammar[name];
3651 rule <= user_rule;
3652 rule.name = name;
3653 rule.ignoreSemanticValue = ignore;
3654 }
3655 }
3656
3657 // Check duplicated definitions
3658 auto ret = data.duplicates.empty();
3659
3660 for (const auto &[name, ptr] : data.duplicates) {
3661 if (log) {
3662 auto line = line_info(s, ptr);
3663 log(line.first, line.second, "'" + name + "' is already defined.");
3664 }
3665 }
3666
3667 // Set root definition
3668 auto &start_rule = grammar[data.start];
3669
3670 // Check if the start rule has ignore operator
3671 {
3672 if (start_rule.ignoreSemanticValue) {
3673 if (log) {
3674 auto line = line_info(s, start_rule.s_);
3675 log(line.first, line.second,
3676 "Ignore operator cannot be applied to '" + start_rule.name +
3677 "'.");
3678 }
3679 ret = false;
3680 }
3681 }
3682
3683 if (!ret) { return nullptr; }
3684
3685 // Check missing definitions
3686 auto referenced = std::unordered_set<std::string>{
3687 WHITESPACE_DEFINITION_NAME,
3688 WORD_DEFINITION_NAME,
3689 RECOVER_DEFINITION_NAME,
3690 start_rule.name,
3691 };
3692
3693 for (auto &[_, rule] : grammar) {
3694 ReferenceChecker vis(grammar, rule.params);
3695 rule.accept(vis);
3696 referenced.insert(vis.referenced.begin(), vis.referenced.end());
3697 for (const auto &[name, ptr] : vis.error_s) {
3698 if (log) {
3699 auto line = line_info(s, ptr);
3700 log(line.first, line.second, vis.error_message[name]);
3701 }
3702 ret = false;
3703 }
3704 }
3705
3706 for (auto &[name, rule] : grammar) {
3707 if (!referenced.count(name)) {
3708 if (log) {
3709 auto line = line_info(s, rule.s_);
3710 auto msg = "'" + name + "' is not referenced.";
3711 log(line.first, line.second, msg);
3712 }
3713 }
3714 }
3715
3716 if (!ret) { return nullptr; }
3717
3718 // Link references
3719 for (auto &x : grammar) {
3720 auto &rule = x.second;
3721 LinkReferences vis(grammar, rule.params);
3722 rule.accept(vis);
3723 }
3724
3725 // Check left recursion
3726 ret = true;
3727
3728 for (auto &[name, rule] : grammar) {
3729 DetectLeftRecursion vis(name);
3730 rule.accept(vis);
3731 if (vis.error_s) {
3732 if (log) {
3733 auto line = line_info(s, vis.error_s);
3734 log(line.first, line.second, "'" + name + "' is left recursive.");
3735 }
3736 ret = false;
3737 }
3738 }
3739
3740 if (!ret) { return nullptr; }
3741
3742 // Check infinite loop
3743 {
3744 DetectInfiniteLoop vis(data.start_pos, data.start);
3745 start_rule.accept(vis);
3746 if (vis.has_error) {
3747 if (log) {
3748 auto line = line_info(s, vis.error_s);
3749 log(line.first, line.second,
3750 "infinite loop is detected in '" + vis.error_name + "'.");
3751 }
3752 return nullptr;
3753 }
3754 }
3755
3756 // Automatic whitespace skipping
3757 if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
3758 for (auto &x : grammar) {
3759 auto &rule = x.second;
3760 auto ope = rule.get_core_operator();
3761 if (IsLiteralToken::check(*ope)) { rule <= tok(ope); }
3762 }
3763
3764 start_rule.whitespaceOpe =
3765 wsp(grammar[WHITESPACE_DEFINITION_NAME].get_core_operator());
3766 }
3767
3768 // Word expression
3769 if (grammar.count(WORD_DEFINITION_NAME)) {
3770 start_rule.wordOpe = grammar[WORD_DEFINITION_NAME].get_core_operator();
3771 }
3772
3773 // Apply instructions
3774 for (const auto &[name, instruction] : data.instructions) {
3775 auto &rule = grammar[name];
3776
3777 if (instruction.type == "precedence") {
3778 const auto &info =
3779 std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
3780
3781 if (!apply_precedence_instruction(rule, info, s, log)) {
3782 return nullptr;
3783 }
3784 } else if (instruction.type == "message") {
3785 rule.error_message = std::any_cast<std::string>(instruction.data);
3786 } else if (instruction.type == "no_ast_opt") {
3787 rule.no_ast_opt = true;
3788 }
3789 }
3790
3791 // Set root definition
3792 start = data.start;
3793 enablePackratParsing = data.enablePackratParsing;
3794
3795 return data.grammar;
3796 }
3797
3798 Grammar g;
3799 };
3800
3801 /*-----------------------------------------------------------------------------
3802 * AST
3803 *---------------------------------------------------------------------------*/
3804
3805 template <typename Annotation> struct AstBase : public Annotation {
3806 AstBase(const char *path, size_t line, size_t column, const char *name,
3807 const std::vector<std::shared_ptr<AstBase>> &nodes,
3808 size_t position = 0, size_t length = 0, size_t choice_count = 0,
3809 size_t choice = 0)
3810 : path(path ? path : ""), line(line), column(column), name(name),
3811 position(position), length(length), choice_count(choice_count),
3812 choice(choice), original_name(name),
3813 original_choice_count(choice_count), original_choice(choice),
3814 tag(str2tag(name)), original_tag(tag), is_token(false), nodes(nodes) {}
3815
3816 AstBase(const char *path, size_t line, size_t column, const char *name,
3817 const std::string_view &token, size_t position = 0, size_t length = 0,
3818 size_t choice_count = 0, size_t choice = 0)
3819 : path(path ? path : ""), line(line), column(column), name(name),
3820 position(position), length(length), choice_count(choice_count),
3821 choice(choice), original_name(name),
3822 original_choice_count(choice_count), original_choice(choice),
3823 tag(str2tag(name)), original_tag(tag), is_token(true), token(token) {}
3824
3825 AstBase(const AstBase &ast, const char *original_name, size_t position = 0,
3826 size_t length = 0, size_t original_choice_count = 0,
3827 size_t original_choise = 0)
3828 : path(ast.path), line(ast.line), column(ast.column), name(ast.name),
3829 position(position), length(length), choice_count(ast.choice_count),
3830 choice(ast.choice), original_name(original_name),
3831 original_choice_count(original_choice_count),
3832 original_choice(original_choise), tag(ast.tag),
3833 original_tag(str2tag(original_name)), is_token(ast.is_token),
3834 token(ast.token), nodes(ast.nodes), parent(ast.parent) {}
3835
3836 const std::string path;
3837 const size_t line = 1;
3838 const size_t column = 1;
3839
3840 const std::string name;
3841 size_t position;
3842 size_t length;
3843 const size_t choice_count;
3844 const size_t choice;
3845 const std::string original_name;
3846 const size_t original_choice_count;
3847 const size_t original_choice;
3848 const unsigned int tag;
3849 const unsigned int original_tag;
3850
3851 const bool is_token;
3852 const std::string_view token;
3853
3854 std::vector<std::shared_ptr<AstBase<Annotation>>> nodes;
3855 std::weak_ptr<AstBase<Annotation>> parent;
3856
3857 std::string token_to_string() const {
3858 assert(is_token);
3859 return std::string(token);
3860 }
3861
3862 template <typename T> T token_to_number() const {
3863 return token_to_number_<T>(token);
3864 }
3865 };
3866
3867 template <typename T>
3868 void ast_to_s_core(const std::shared_ptr<T> &ptr, std::string &s, int level,
3869 std::function<std::string(const T &ast, int level)> fn) {
3870 const auto &ast = *ptr;
3871 for (auto i = 0; i < level; i++) {
3872 s += " ";
3873 }
3874 auto name = ast.original_name;
3875 if (ast.original_choice_count > 0) {
3876 name += "/" + std::to_string(ast.original_choice);
3877 }
3878 if (ast.name != ast.original_name) { name += "[" + ast.name + "]"; }
3879 if (ast.is_token) {
3880 s += "- " + name + " (";
3881 s += ast.token;
3882 s += ")\n";
3883 } else {
3884 s += "+ " + name + "\n";
3885 }
3886 if (fn) { s += fn(ast, level + 1); }
3887 for (auto node : ast.nodes) {
3888 ast_to_s_core(node, s, level + 1, fn);
3889 }
3890 }
3891
3892 template <typename T>
3893 std::string
3894 ast_to_s(const std::shared_ptr<T> &ptr,
3895 std::function<std::string(const T &ast, int level)> fn = nullptr) {
3896 std::string s;
3897 ast_to_s_core(ptr, s, 0, fn);
3898 return s;
3899 }
3900
3901 struct AstOptimizer {
3902 AstOptimizer(bool mode, const std::vector<std::string> &rules = {})
3903 : mode_(mode), rules_(rules) {}
3904
3905 template <typename T>
3906 std::shared_ptr<T> optimize(std::shared_ptr<T> original,
3907 std::shared_ptr<T> parent = nullptr) {
3908 auto found =
3909 std::find(rules_.begin(), rules_.end(), original->name) != rules_.end();
3910 bool opt = mode_ ? !found : found;
3911
3912 if (opt && original->nodes.size() == 1) {
3913 auto child = optimize(original->nodes[0], parent);
3914 return std::make_shared<T>(*child, original->name.data(),
3915 original->choice_count, original->position,
3916 original->length, original->choice);
3917 }
3918
3919 auto ast = std::make_shared<T>(*original);
3920 ast->parent = parent;
3921 ast->nodes.clear();
3922 for (auto node : original->nodes) {
3923 auto child = optimize(node, ast);
3924 ast->nodes.push_back(child);
3925 }
3926 return ast;
3927 }
3928
3929 private:
3930 const bool mode_;
3931 const std::vector<std::string> rules_;
3932 };
3933
3934 struct EmptyType {};
3935 using Ast = AstBase<EmptyType>;
3936
3937 template <typename T = Ast> void add_ast_action(Definition &rule) {
3938 rule.action = [&](const SemanticValues &vs) {
3939 auto line = vs.line_info();
3940
3941 if (rule.is_token()) {
3942 return std::make_shared<T>(
3943 vs.path, line.first, line.second, rule.name.data(), vs.token(),
3944 std::distance(vs.ss, vs.sv().data()), vs.sv().length(),
3945 vs.choice_count(), vs.choice());
3946 }
3947
3948 auto ast =
3949 std::make_shared<T>(vs.path, line.first, line.second, rule.name.data(),
3950 vs.transform<std::shared_ptr<T>>(),
3951 std::distance(vs.ss, vs.sv().data()),
3952 vs.sv().length(), vs.choice_count(), vs.choice());
3953
3954 for (auto node : ast->nodes) {
3955 node->parent = ast;
3956 }
3957 return ast;
3958 };
3959 }
3960
3961 #define PEG_EXPAND(...) __VA_ARGS__
3962 #define PEG_CONCAT(a, b) a##b
3963 #define PEG_CONCAT2(a, b) PEG_CONCAT(a, b)
3964
3965 #define PEG_PICK( \
3966 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, \
3967 a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, \
3968 a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, \
3969 a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59, a60, a61, \
3970 a62, a63, a64, a65, a66, a67, a68, a69, a70, a71, a72, a73, a74, a75, a76, \
3971 a77, a78, a79, a80, a81, a82, a83, a84, a85, a86, a87, a88, a89, a90, a91, \
3972 a92, a93, a94, a95, a96, a97, a98, a99, a100, ...) \
3973 a100
3974
3975 #define PEG_COUNT(...) \
3976 PEG_EXPAND(PEG_PICK( \
3977 __VA_ARGS__, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, \
3978 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, \
3979 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
3980 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
3981 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
3982 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
3983
3984 #define PEG_DEF_1(r) \
3985 peg::Definition r; \
3986 r.name = #r; \
3987 peg::add_ast_action(r);
3988
3989 #define PEG_DEF_2(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_1(__VA_ARGS__))
3990 #define PEG_DEF_3(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_2(__VA_ARGS__))
3991 #define PEG_DEF_4(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_3(__VA_ARGS__))
3992 #define PEG_DEF_5(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_4(__VA_ARGS__))
3993 #define PEG_DEF_6(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_5(__VA_ARGS__))
3994 #define PEG_DEF_7(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_6(__VA_ARGS__))
3995 #define PEG_DEF_8(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_7(__VA_ARGS__))
3996 #define PEG_DEF_9(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_8(__VA_ARGS__))
3997 #define PEG_DEF_10(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_9(__VA_ARGS__))
3998 #define PEG_DEF_11(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_10(__VA_ARGS__))
3999 #define PEG_DEF_12(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_11(__VA_ARGS__))
4000 #define PEG_DEF_13(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_12(__VA_ARGS__))
4001 #define PEG_DEF_14(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_13(__VA_ARGS__))
4002 #define PEG_DEF_15(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_14(__VA_ARGS__))
4003 #define PEG_DEF_16(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_15(__VA_ARGS__))
4004 #define PEG_DEF_17(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_16(__VA_ARGS__))
4005 #define PEG_DEF_18(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_17(__VA_ARGS__))
4006 #define PEG_DEF_19(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_18(__VA_ARGS__))
4007 #define PEG_DEF_20(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_19(__VA_ARGS__))
4008 #define PEG_DEF_21(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_20(__VA_ARGS__))
4009 #define PEG_DEF_22(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_21(__VA_ARGS__))
4010 #define PEG_DEF_23(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_22(__VA_ARGS__))
4011 #define PEG_DEF_24(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_23(__VA_ARGS__))
4012 #define PEG_DEF_25(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_24(__VA_ARGS__))
4013 #define PEG_DEF_26(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_25(__VA_ARGS__))
4014 #define PEG_DEF_27(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_26(__VA_ARGS__))
4015 #define PEG_DEF_28(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_27(__VA_ARGS__))
4016 #define PEG_DEF_29(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_28(__VA_ARGS__))
4017 #define PEG_DEF_30(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_29(__VA_ARGS__))
4018 #define PEG_DEF_31(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_30(__VA_ARGS__))
4019 #define PEG_DEF_32(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_31(__VA_ARGS__))
4020 #define PEG_DEF_33(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_32(__VA_ARGS__))
4021 #define PEG_DEF_34(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_33(__VA_ARGS__))
4022 #define PEG_DEF_35(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_34(__VA_ARGS__))
4023 #define PEG_DEF_36(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_35(__VA_ARGS__))
4024 #define PEG_DEF_37(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_36(__VA_ARGS__))
4025 #define PEG_DEF_38(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_37(__VA_ARGS__))
4026 #define PEG_DEF_39(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_38(__VA_ARGS__))
4027 #define PEG_DEF_40(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_39(__VA_ARGS__))
4028 #define PEG_DEF_41(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_40(__VA_ARGS__))
4029 #define PEG_DEF_42(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_41(__VA_ARGS__))
4030 #define PEG_DEF_43(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_42(__VA_ARGS__))
4031 #define PEG_DEF_44(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_43(__VA_ARGS__))
4032 #define PEG_DEF_45(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_44(__VA_ARGS__))
4033 #define PEG_DEF_46(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_45(__VA_ARGS__))
4034 #define PEG_DEF_47(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_46(__VA_ARGS__))
4035 #define PEG_DEF_48(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_47(__VA_ARGS__))
4036 #define PEG_DEF_49(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_48(__VA_ARGS__))
4037 #define PEG_DEF_50(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_49(__VA_ARGS__))
4038 #define PEG_DEF_51(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_50(__VA_ARGS__))
4039 #define PEG_DEF_52(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_51(__VA_ARGS__))
4040 #define PEG_DEF_53(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_52(__VA_ARGS__))
4041 #define PEG_DEF_54(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_53(__VA_ARGS__))
4042 #define PEG_DEF_55(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_54(__VA_ARGS__))
4043 #define PEG_DEF_56(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_55(__VA_ARGS__))
4044 #define PEG_DEF_57(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_56(__VA_ARGS__))
4045 #define PEG_DEF_58(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_57(__VA_ARGS__))
4046 #define PEG_DEF_59(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_58(__VA_ARGS__))
4047 #define PEG_DEF_60(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_59(__VA_ARGS__))
4048 #define PEG_DEF_61(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_60(__VA_ARGS__))
4049 #define PEG_DEF_62(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_61(__VA_ARGS__))
4050 #define PEG_DEF_63(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_62(__VA_ARGS__))
4051 #define PEG_DEF_64(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_63(__VA_ARGS__))
4052 #define PEG_DEF_65(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_64(__VA_ARGS__))
4053 #define PEG_DEF_66(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_65(__VA_ARGS__))
4054 #define PEG_DEF_67(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_66(__VA_ARGS__))
4055 #define PEG_DEF_68(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_67(__VA_ARGS__))
4056 #define PEG_DEF_69(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_68(__VA_ARGS__))
4057 #define PEG_DEF_70(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_69(__VA_ARGS__))
4058 #define PEG_DEF_71(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_70(__VA_ARGS__))
4059 #define PEG_DEF_72(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_71(__VA_ARGS__))
4060 #define PEG_DEF_73(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_72(__VA_ARGS__))
4061 #define PEG_DEF_74(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_73(__VA_ARGS__))
4062 #define PEG_DEF_75(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_74(__VA_ARGS__))
4063 #define PEG_DEF_76(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_75(__VA_ARGS__))
4064 #define PEG_DEF_77(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_76(__VA_ARGS__))
4065 #define PEG_DEF_78(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_77(__VA_ARGS__))
4066 #define PEG_DEF_79(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_78(__VA_ARGS__))
4067 #define PEG_DEF_80(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_79(__VA_ARGS__))
4068 #define PEG_DEF_81(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_80(__VA_ARGS__))
4069 #define PEG_DEF_82(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_81(__VA_ARGS__))
4070 #define PEG_DEF_83(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_82(__VA_ARGS__))
4071 #define PEG_DEF_84(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_83(__VA_ARGS__))
4072 #define PEG_DEF_85(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_84(__VA_ARGS__))
4073 #define PEG_DEF_86(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_85(__VA_ARGS__))
4074 #define PEG_DEF_87(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_86(__VA_ARGS__))
4075 #define PEG_DEF_88(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_87(__VA_ARGS__))
4076 #define PEG_DEF_89(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_88(__VA_ARGS__))
4077 #define PEG_DEF_90(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_89(__VA_ARGS__))
4078 #define PEG_DEF_91(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_90(__VA_ARGS__))
4079 #define PEG_DEF_92(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_91(__VA_ARGS__))
4080 #define PEG_DEF_93(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_92(__VA_ARGS__))
4081 #define PEG_DEF_94(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_93(__VA_ARGS__))
4082 #define PEG_DEF_95(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_94(__VA_ARGS__))
4083 #define PEG_DEF_96(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_95(__VA_ARGS__))
4084 #define PEG_DEF_97(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_96(__VA_ARGS__))
4085 #define PEG_DEF_98(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_97(__VA_ARGS__))
4086 #define PEG_DEF_99(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_98(__VA_ARGS__))
4087 #define PEG_DEF_100(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_99(__VA_ARGS__))
4088
4089 #define AST_DEFINITIONS(...) \
4090 PEG_EXPAND(PEG_CONCAT2(PEG_DEF_, PEG_COUNT(__VA_ARGS__))(__VA_ARGS__))
4091
4092 /*-----------------------------------------------------------------------------
4093 * parser
4094 *---------------------------------------------------------------------------*/
4095
4096 class parser {
4097 public:
4098 parser() = default;
4099
4100 parser(const char *s, size_t n, const Rules &rules) {
4101 load_grammar(s, n, rules);
4102 }
4103
4104 parser(std::string_view sv, const Rules &rules)
4105 : parser(sv.data(), sv.size(), rules) {}
4106
4107 parser(const char *s, size_t n) : parser(s, n, Rules()) {}
4108
4109 parser(std::string_view sv) : parser(sv.data(), sv.size(), Rules()) {}
4110
4111 operator bool() { return grammar_ != nullptr; }
4112
4113 bool load_grammar(const char *s, size_t n, const Rules &rules) {
4114 grammar_ =
4115 ParserGenerator::parse(s, n, rules, start_, enablePackratParsing_, log);
4116 return grammar_ != nullptr;
4117 }
4118
4119 bool load_grammar(const char *s, size_t n) {
4120 return load_grammar(s, n, Rules());
4121 }
4122
4123 bool load_grammar(std::string_view sv, const Rules &rules) {
4124 return load_grammar(sv.data(), sv.size(), rules);
4125 }
4126
4127 bool load_grammar(std::string_view sv) {
4128 return load_grammar(sv.data(), sv.size());
4129 }
4130
4131 bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
4132 if (grammar_ != nullptr) {
4133 const auto &rule = (*grammar_)[start_];
4134 return post_process(s, n, rule.parse(s, n, path, log));
4135 }
4136 return false;
4137 }
4138
4139 bool parse(std::string_view sv, const char *path = nullptr) const {
4140 return parse_n(sv.data(), sv.size(), path);
4141 }
4142
4143 bool parse_n(const char *s, size_t n, std::any &dt,
4144 const char *path = nullptr) const {
4145 if (grammar_ != nullptr) {
4146 const auto &rule = (*grammar_)[start_];
4147 return post_process(s, n, rule.parse(s, n, dt, path, log));
4148 }
4149 return false;
4150 }
4151
4152 bool parse(std::string_view sv, std::any &dt,
4153 const char *path = nullptr) const {
4154 return parse_n(sv.data(), sv.size(), dt, path);
4155 }
4156
4157 template <typename T>
4158 bool parse_n(const char *s, size_t n, T &val,
4159 const char *path = nullptr) const {
4160 if (grammar_ != nullptr) {
4161 const auto &rule = (*grammar_)[start_];
4162 return post_process(s, n, rule.parse_and_get_value(s, n, val, path, log));
4163 }
4164 return false;
4165 }
4166
4167 template <typename T>
4168 bool parse(std::string_view sv, T &val, const char *path = nullptr) const {
4169 return parse_n(sv.data(), sv.size(), val, path);
4170 }
4171
4172 template <typename T>
4173 bool parse_n(const char *s, size_t n, std::any &dt, T &val,
4174 const char *path = nullptr) const {
4175 if (grammar_ != nullptr) {
4176 const auto &rule = (*grammar_)[start_];
4177 return post_process(s, n,
4178 rule.parse_and_get_value(s, n, dt, val, path, log));
4179 }
4180 return false;
4181 }
4182
4183 template <typename T>
4184 bool parse(const char *s, std::any &dt, T &val,
4185 const char *path = nullptr) const {
4186 auto n = strlen(s);
4187 return parse_n(s, n, dt, val, path);
4188 }
4189
4190 Definition &operator[](const char *s) { return (*grammar_)[s]; }
4191
4192 const Definition &operator[](const char *s) const { return (*grammar_)[s]; }
4193
4194 std::vector<std::string> get_rule_names() const {
4195 std::vector<std::string> rules;
4196 for (auto &[name, _] : *grammar_) {
4197 rules.push_back(name);
4198 }
4199 return rules;
4200 }
4201
4202 void enable_packrat_parsing() {
4203 if (grammar_ != nullptr) {
4204 auto &rule = (*grammar_)[start_];
4205 rule.enablePackratParsing = enablePackratParsing_ && true;
4206 }
4207 }
4208
4209 void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave) {
4210 if (grammar_ != nullptr) {
4211 auto &rule = (*grammar_)[start_];
4212 rule.tracer_enter = tracer_enter;
4213 rule.tracer_leave = tracer_leave;
4214 }
4215 }
4216
4217 template <typename T = Ast> parser &enable_ast() {
4218 for (auto &[_, rule] : *grammar_) {
4219 if (!rule.action) { add_ast_action<T>(rule); }
4220 }
4221 return *this;
4222 }
4223
4224 template <typename T>
4225 std::shared_ptr<T> optimize_ast(std::shared_ptr<T> ast,
4226 bool opt_mode = true) const {
4227 return AstOptimizer(opt_mode, get_no_ast_opt_rules()).optimize(ast);
4228 }
4229
4230 Log log;
4231
4232 private:
4233 bool post_process(const char *s, size_t n,
4234 const Definition::Result &r) const {
4235 auto ret = r.ret && r.len == n;
4236 if (log && !ret) { r.error_info.output_log(log, s, n); }
4237 return ret && !r.recovered;
4238 }
4239
4240 std::vector<std::string> get_no_ast_opt_rules() const {
4241 std::vector<std::string> rules;
4242 for (auto &[name, rule] : *grammar_) {
4243 if (rule.no_ast_opt) { rules.push_back(name); }
4244 }
4245 return rules;
4246 }
4247
4248 std::shared_ptr<Grammar> grammar_;
4249 std::string start_;
4250 bool enablePackratParsing_ = false;
4251 };
4252
4253 } // namespace peg
4254