1 //
2 //  peglib.h
3 //
4 //  Copyright (c) 2020 Yuji Hirose. All rights reserved.
5 //  MIT License
6 //
7 
8 #pragma once
9 
10 #include <algorithm>
11 #include <any>
12 #include <cassert>
13 #include <cctype>
14 #if __has_include(<charconv>)
15 #include <charconv>
16 #define CPPPEGLIB_HAS_CHARCONV true
17 #else
18 #define CPPPEGLIB_HAS_CHARCONV false
19 #endif
20 #include <cstring>
21 #include <functional>
22 #include <initializer_list>
23 #include <iostream>
24 #include <limits>
25 #include <list>
26 #include <map>
27 #include <memory>
28 #include <mutex>
29 #include <set>
30 #include <sstream>
31 #include <string>
32 #include <unordered_map>
33 #include <unordered_set>
34 #include <vector>
35 
36 #if !defined(__cplusplus) || __cplusplus < 201703L
37 #error "Requires complete C++17 support"
38 #endif
39 
40 namespace peg {
41 
42 /*-----------------------------------------------------------------------------
43  *  scope_exit
44  *---------------------------------------------------------------------------*/
45 
46 // This is based on
47 // "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189".
48 
49 template <typename EF> struct scope_exit {
scope_exitscope_exit50   explicit scope_exit(EF &&f)
51       : exit_function(std::move(f)), execute_on_destruction{true} {}
52 
scope_exitscope_exit53   scope_exit(scope_exit &&rhs)
54       : exit_function(std::move(rhs.exit_function)),
55         execute_on_destruction{rhs.execute_on_destruction} {
56     rhs.release();
57   }
58 
~scope_exitscope_exit59   ~scope_exit() {
60     if (execute_on_destruction) { this->exit_function(); }
61   }
62 
releasescope_exit63   void release() { this->execute_on_destruction = false; }
64 
65 private:
66   scope_exit(const scope_exit &) = delete;
67   void operator=(const scope_exit &) = delete;
68   scope_exit &operator=(scope_exit &&) = delete;
69 
70   EF exit_function;
71   bool execute_on_destruction;
72 };
73 
74 /*-----------------------------------------------------------------------------
75  *  UTF8 functions
76  *---------------------------------------------------------------------------*/
77 
codepoint_length(const char * s8,size_t l)78 inline size_t codepoint_length(const char *s8, size_t l) {
79   if (l) {
80     auto b = static_cast<uint8_t>(s8[0]);
81     if ((b & 0x80) == 0) {
82       return 1;
83     } else if ((b & 0xE0) == 0xC0 && l >= 2) {
84       return 2;
85     } else if ((b & 0xF0) == 0xE0 && l >= 3) {
86       return 3;
87     } else if ((b & 0xF8) == 0xF0 && l >= 4) {
88       return 4;
89     }
90   }
91   return 0;
92 }
93 
codepoint_count(const char * s8,size_t l)94 inline size_t codepoint_count(const char *s8, size_t l) {
95   size_t count = 0;
96   for (size_t i = 0; i < l; i += codepoint_length(s8 + i, l - i)) {
97     count++;
98   }
99   return count;
100 }
101 
encode_codepoint(char32_t cp,char * buff)102 inline size_t encode_codepoint(char32_t cp, char *buff) {
103   if (cp < 0x0080) {
104     buff[0] = static_cast<char>(cp & 0x7F);
105     return 1;
106   } else if (cp < 0x0800) {
107     buff[0] = static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
108     buff[1] = static_cast<char>(0x80 | (cp & 0x3F));
109     return 2;
110   } else if (cp < 0xD800) {
111     buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
112     buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
113     buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
114     return 3;
115   } else if (cp < 0xE000) {
116     // D800 - DFFF is invalid...
117     return 0;
118   } else if (cp < 0x10000) {
119     buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
120     buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
121     buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
122     return 3;
123   } else if (cp < 0x110000) {
124     buff[0] = static_cast<char>(0xF0 | ((cp >> 18) & 0x7));
125     buff[1] = static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
126     buff[2] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
127     buff[3] = static_cast<char>(0x80 | (cp & 0x3F));
128     return 4;
129   }
130   return 0;
131 }
132 
encode_codepoint(char32_t cp)133 inline std::string encode_codepoint(char32_t cp) {
134   char buff[4];
135   auto l = encode_codepoint(cp, buff);
136   return std::string(buff, l);
137 }
138 
decode_codepoint(const char * s8,size_t l,size_t & bytes,char32_t & cp)139 inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes,
140                              char32_t &cp) {
141   if (l) {
142     auto b = static_cast<uint8_t>(s8[0]);
143     if ((b & 0x80) == 0) {
144       bytes = 1;
145       cp = b;
146       return true;
147     } else if ((b & 0xE0) == 0xC0) {
148       if (l >= 2) {
149         bytes = 2;
150         cp = ((static_cast<char32_t>(s8[0] & 0x1F)) << 6) |
151              (static_cast<char32_t>(s8[1] & 0x3F));
152         return true;
153       }
154     } else if ((b & 0xF0) == 0xE0) {
155       if (l >= 3) {
156         bytes = 3;
157         cp = ((static_cast<char32_t>(s8[0] & 0x0F)) << 12) |
158              ((static_cast<char32_t>(s8[1] & 0x3F)) << 6) |
159              (static_cast<char32_t>(s8[2] & 0x3F));
160         return true;
161       }
162     } else if ((b & 0xF8) == 0xF0) {
163       if (l >= 4) {
164         bytes = 4;
165         cp = ((static_cast<char32_t>(s8[0] & 0x07)) << 18) |
166              ((static_cast<char32_t>(s8[1] & 0x3F)) << 12) |
167              ((static_cast<char32_t>(s8[2] & 0x3F)) << 6) |
168              (static_cast<char32_t>(s8[3] & 0x3F));
169         return true;
170       }
171     }
172   }
173   return false;
174 }
175 
decode_codepoint(const char * s8,size_t l,char32_t & cp)176 inline size_t decode_codepoint(const char *s8, size_t l, char32_t &cp) {
177   size_t bytes;
178   if (decode_codepoint(s8, l, bytes, cp)) { return bytes; }
179   return 0;
180 }
181 
decode_codepoint(const char * s8,size_t l)182 inline char32_t decode_codepoint(const char *s8, size_t l) {
183   char32_t cp = 0;
184   decode_codepoint(s8, l, cp);
185   return cp;
186 }
187 
decode(const char * s8,size_t l)188 inline std::u32string decode(const char *s8, size_t l) {
189   std::u32string out;
190   size_t i = 0;
191   while (i < l) {
192     auto beg = i++;
193     while (i < l && (s8[i] & 0xc0) == 0x80) {
194       i++;
195     }
196     out += decode_codepoint(&s8[beg], (i - beg));
197   }
198   return out;
199 }
200 
u8(const T * s)201 template <typename T> const char *u8(const T *s) {
202   return reinterpret_cast<const char *>(s);
203 }
204 
205 /*-----------------------------------------------------------------------------
206  *  escape_characters
207  *---------------------------------------------------------------------------*/
208 
escape_characters(const char * s,size_t n)209 inline std::string escape_characters(const char *s, size_t n) {
210   std::string str;
211   for (size_t i = 0; i < n; i++) {
212     auto c = s[i];
213     switch (c) {
214     case '\n': str += "\\n"; break;
215     case '\r': str += "\\r"; break;
216     case '\t': str += "\\t"; break;
217     default: str += c; break;
218     }
219   }
220   return str;
221 }
222 
escape_characters(std::string_view sv)223 inline std::string escape_characters(std::string_view sv) {
224   return escape_characters(sv.data(), sv.size());
225 }
226 
227 /*-----------------------------------------------------------------------------
228  *  resolve_escape_sequence
229  *---------------------------------------------------------------------------*/
230 
is_hex(char c,int & v)231 inline bool is_hex(char c, int &v) {
232   if ('0' <= c && c <= '9') {
233     v = c - '0';
234     return true;
235   } else if ('a' <= c && c <= 'f') {
236     v = c - 'a' + 10;
237     return true;
238   } else if ('A' <= c && c <= 'F') {
239     v = c - 'A' + 10;
240     return true;
241   }
242   return false;
243 }
244 
is_digit(char c,int & v)245 inline bool is_digit(char c, int &v) {
246   if ('0' <= c && c <= '9') {
247     v = c - '0';
248     return true;
249   }
250   return false;
251 }
252 
parse_hex_number(const char * s,size_t n,size_t i)253 inline std::pair<int, size_t> parse_hex_number(const char *s, size_t n,
254                                                size_t i) {
255   int ret = 0;
256   int val;
257   while (i < n && is_hex(s[i], val)) {
258     ret = static_cast<int>(ret * 16 + val);
259     i++;
260   }
261   return std::pair(ret, i);
262 }
263 
parse_octal_number(const char * s,size_t n,size_t i)264 inline std::pair<int, size_t> parse_octal_number(const char *s, size_t n,
265                                                  size_t i) {
266   int ret = 0;
267   int val;
268   while (i < n && is_digit(s[i], val)) {
269     ret = static_cast<int>(ret * 8 + val);
270     i++;
271   }
272   return std::pair(ret, i);
273 }
274 
resolve_escape_sequence(const char * s,size_t n)275 inline std::string resolve_escape_sequence(const char *s, size_t n) {
276   std::string r;
277   r.reserve(n);
278 
279   size_t i = 0;
280   while (i < n) {
281     auto ch = s[i];
282     if (ch == '\\') {
283       i++;
284       if (i == n) { throw std::runtime_error("Invalid escape sequence..."); }
285       switch (s[i]) {
286       case 'n':
287         r += '\n';
288         i++;
289         break;
290       case 'r':
291         r += '\r';
292         i++;
293         break;
294       case 't':
295         r += '\t';
296         i++;
297         break;
298       case '\'':
299         r += '\'';
300         i++;
301         break;
302       case '"':
303         r += '"';
304         i++;
305         break;
306       case '[':
307         r += '[';
308         i++;
309         break;
310       case ']':
311         r += ']';
312         i++;
313         break;
314       case '\\':
315         r += '\\';
316         i++;
317         break;
318       case 'x':
319       case 'u': {
320         char32_t cp;
321         std::tie(cp, i) = parse_hex_number(s, n, i + 1);
322         r += encode_codepoint(cp);
323         break;
324       }
325       default: {
326         char32_t cp;
327         std::tie(cp, i) = parse_octal_number(s, n, i);
328         r += encode_codepoint(cp);
329         break;
330       }
331       }
332     } else {
333       r += ch;
334       i++;
335     }
336   }
337   return r;
338 }
339 
340 /*-----------------------------------------------------------------------------
341  *  token_to_number_ - This function should be removed eventually
342  *---------------------------------------------------------------------------*/
343 
token_to_number_(std::string_view sv)344 template <typename T> T token_to_number_(std::string_view sv) {
345   T n = 0;
346   if constexpr (CPPPEGLIB_HAS_CHARCONV && !std::is_floating_point<T>::value) {
347     std::from_chars(sv.data(), sv.data() + sv.size(), n);
348   } else {
349     auto s = std::string(sv);
350     std::istringstream ss(s);
351     ss >> n;
352   }
353   return n;
354 }
355 
356 /*-----------------------------------------------------------------------------
357  *  Trie
358  *---------------------------------------------------------------------------*/
359 
360 class Trie {
361 public:
362   Trie() = default;
363   Trie(const Trie &) = default;
364 
Trie(const std::vector<std::string> & items)365   Trie(const std::vector<std::string> &items) {
366     for (const auto &item : items) {
367       for (size_t len = 1; len <= item.size(); len++) {
368         auto last = len == item.size();
369         std::string_view sv(item.data(), len);
370         auto it = dic_.find(sv);
371         if (it == dic_.end()) {
372           dic_.emplace(sv, Info{last, last});
373         } else if (last) {
374           it->second.match = true;
375         } else {
376           it->second.done = false;
377         }
378       }
379     }
380   }
381 
match(const char * text,size_t text_len)382   size_t match(const char *text, size_t text_len) const {
383     size_t match_len = 0;
384     auto done = false;
385     size_t len = 1;
386     while (!done && len <= text_len) {
387       std::string_view sv(text, len);
388       auto it = dic_.find(sv);
389       if (it == dic_.end()) {
390         done = true;
391       } else {
392         if (it->second.match) { match_len = len; }
393         if (it->second.done) { done = true; }
394       }
395       len += 1;
396     }
397     return match_len;
398   }
399 
400 private:
401   struct Info {
402     bool done;
403     bool match;
404   };
405 
406   // TODO: Use unordered_map when heterogeneous lookup is supported in C++20
407   // std::unordered_map<std::string, Info> dic_;
408   std::map<std::string, Info, std::less<>> dic_;
409 };
410 
411 /*-----------------------------------------------------------------------------
412  *  PEG
413  *---------------------------------------------------------------------------*/
414 
415 /*
416  * Line information utility function
417  */
line_info(const char * start,const char * cur)418 inline std::pair<size_t, size_t> line_info(const char *start, const char *cur) {
419   auto p = start;
420   auto col_ptr = p;
421   auto no = 1;
422 
423   while (p < cur) {
424     if (*p == '\n') {
425       no++;
426       col_ptr = p + 1;
427     }
428     p++;
429   }
430 
431   auto col = codepoint_count(col_ptr, p - col_ptr) + 1;
432 
433   return std::pair(no, col);
434 }
435 
436 /*
437  * String tag
438  */
str2tag_core(const char * s,size_t l,unsigned int h)439 inline constexpr unsigned int str2tag_core(const char *s, size_t l,
440                                            unsigned int h) {
441   return (l == 0) ? h
442                   : str2tag_core(s + 1, l - 1,
443                                  (h * 33) ^ static_cast<unsigned char>(*s));
444 }
445 
str2tag(std::string_view sv)446 inline constexpr unsigned int str2tag(std::string_view sv) {
447   return str2tag_core(sv.data(), sv.size(), 0);
448 }
449 
450 namespace udl {
451 
452 inline constexpr unsigned int operator"" _(const char *s, size_t l) {
453   return str2tag_core(s, l, 0);
454 }
455 
456 } // namespace udl
457 
458 /*
459  * Semantic values
460  */
461 struct SemanticValues : protected std::vector<std::any> {
462   // Input text
463   const char *path = nullptr;
464   const char *ss = nullptr;
465   std::function<const std::vector<size_t> &()> source_line_index;
466 
467   // Matched string
svSemanticValues468   std::string_view sv() const { return sv_; }
469 
470   // Definition name
nameSemanticValues471   const std::string &name() const { return name_; }
472 
473   std::vector<unsigned int> tags;
474 
475   // Line number and column at which the matched string is
line_infoSemanticValues476   std::pair<size_t, size_t> line_info() const {
477     auto &idx = source_line_index();
478 
479     auto cur = static_cast<size_t>(std::distance(ss, sv_.data()));
480     auto it = std::lower_bound(
481         idx.begin(), idx.end(), cur,
482         [](size_t element, size_t value) { return element < value; });
483 
484     auto id = static_cast<size_t>(std::distance(idx.begin(), it));
485     auto off = cur - (id == 0 ? 0 : idx[id - 1] + 1);
486     return std::pair(id + 1, off + 1);
487   }
488 
489   // Choice count
choice_countSemanticValues490   size_t choice_count() const { return choice_count_; }
491 
492   // Choice number (0 based index)
choiceSemanticValues493   size_t choice() const { return choice_; }
494 
495   // Tokens
496   std::vector<std::string_view> tokens;
497 
498   std::string_view token(size_t id = 0) const {
499     if (tokens.empty()) { return sv_; }
500     assert(id < tokens.size());
501     return tokens[id];
502   }
503 
504   // Token conversion
505   std::string token_to_string(size_t id = 0) const {
506     return std::string(token(id));
507   }
508 
token_to_numberSemanticValues509   template <typename T> T token_to_number() const {
510     return token_to_number_<T>(token());
511   }
512 
513   // Transform the semantic value vector to another vector
514   template <typename T>
515   std::vector<T> transform(size_t beg = 0,
516                            size_t end = static_cast<size_t>(-1)) const {
517     std::vector<T> r;
518     end = (std::min)(end, size());
519     for (size_t i = beg; i < end; i++) {
520       r.emplace_back(std::any_cast<T>((*this)[i]));
521     }
522     return r;
523   }
524 
525   using std::vector<std::any>::iterator;
526   using std::vector<std::any>::const_iterator;
527   using std::vector<std::any>::size;
528   using std::vector<std::any>::empty;
529   using std::vector<std::any>::assign;
530   using std::vector<std::any>::begin;
531   using std::vector<std::any>::end;
532   using std::vector<std::any>::rbegin;
533   using std::vector<std::any>::rend;
534   using std::vector<std::any>::operator[];
535   using std::vector<std::any>::at;
536   using std::vector<std::any>::resize;
537   using std::vector<std::any>::front;
538   using std::vector<std::any>::back;
539   using std::vector<std::any>::push_back;
540   using std::vector<std::any>::pop_back;
541   using std::vector<std::any>::insert;
542   using std::vector<std::any>::erase;
543   using std::vector<std::any>::clear;
544   using std::vector<std::any>::swap;
545   using std::vector<std::any>::emplace;
546   using std::vector<std::any>::emplace_back;
547 
548 private:
549   friend class Context;
550   friend class Sequence;
551   friend class PrioritizedChoice;
552   friend class Holder;
553   friend class PrecedenceClimbing;
554 
555   std::string_view sv_;
556   size_t choice_count_ = 0;
557   size_t choice_ = 0;
558   std::string name_;
559 };
560 
561 /*
562  * Semantic action
563  */
call(F fn,Args &&...args)564 template <typename F, typename... Args> std::any call(F fn, Args &&... args) {
565   using R = decltype(fn(std::forward<Args>(args)...));
566   if constexpr (std::is_void<R>::value) {
567     fn(std::forward<Args>(args)...);
568     return std::any();
569   } else if constexpr (std::is_same<typename std::remove_cv<R>::type,
570                                     std::any>::value) {
571     return fn(std::forward<Args>(args)...);
572   } else {
573     return std::any(fn(std::forward<Args>(args)...));
574   }
575 }
576 
577 template <typename T>
578 struct argument_count : argument_count<decltype(&T::operator())> {};
579 template <typename R, typename... Args>
580 struct argument_count<R (*)(Args...)>
581     : std::integral_constant<unsigned, sizeof...(Args)> {};
582 template <typename R, typename C, typename... Args>
583 struct argument_count<R (C::*)(Args...)>
584     : std::integral_constant<unsigned, sizeof...(Args)> {};
585 template <typename R, typename C, typename... Args>
586 struct argument_count<R (C::*)(Args...) const>
587     : std::integral_constant<unsigned, sizeof...(Args)> {};
588 
589 class Action {
590 public:
591   Action() = default;
592   Action(Action &&rhs) = default;
593   template <typename F> Action(F fn) : fn_(make_adaptor(fn)) {}
594   template <typename F> void operator=(F fn) { fn_ = make_adaptor(fn); }
595   Action &operator=(const Action &rhs) = default;
596 
597   operator bool() const { return bool(fn_); }
598 
599   std::any operator()(SemanticValues &vs, std::any &dt) const {
600     return fn_(vs, dt);
601   }
602 
603 private:
604   using Fty = std::function<std::any(SemanticValues &vs, std::any &dt)>;
605 
606   template <typename F> Fty make_adaptor(F fn) {
607     if constexpr (argument_count<F>::value == 1) {
608       return [fn](auto &vs, auto & /*dt*/) { return call(fn, vs); };
609     } else {
610       return [fn](auto &vs, auto &dt) { return call(fn, vs, dt); };
611     }
612   }
613 
614   Fty fn_;
615 };
616 
617 /*
618  * Semantic predicate
619  */
620 // Note: 'parse_error' exception class should be be used in sematic action
621 // handlers to reject the rule.
622 struct parse_error {
623   parse_error() = default;
624   parse_error(const char *s) : s_(s) {}
625   const char *what() const { return s_.empty() ? nullptr : s_.data(); }
626 
627 private:
628   std::string s_;
629 };
630 
631 /*
632  * Parse result helper
633  */
634 inline bool success(size_t len) { return len != static_cast<size_t>(-1); }
635 
636 inline bool fail(size_t len) { return len == static_cast<size_t>(-1); }
637 
638 /*
639  * Log
640  */
641 using Log = std::function<void(size_t, size_t, const std::string &)>;
642 
643 /*
644  * ErrorInfo
645  */
646 struct ErrorInfo {
647   const char *error_pos = nullptr;
648   std::vector<std::pair<const char *, bool>> expected_tokens;
649   const char *message_pos = nullptr;
650   std::string message;
651   mutable const char *last_output_pos = nullptr;
652 
653   void clear() {
654     error_pos = nullptr;
655     expected_tokens.clear();
656     message_pos = nullptr;
657     message.clear();
658   }
659 
660   void add(const char *token, bool is_literal) {
661     for (const auto &[t, l] : expected_tokens) {
662       if (t == token && l == is_literal) { return; }
663     }
664     expected_tokens.push_back(std::make_pair(token, is_literal));
665   }
666 
667   void output_log(const Log &log, const char *s, size_t n) const {
668     if (message_pos) {
669       if (message_pos > last_output_pos) {
670         last_output_pos = message_pos;
671         auto line = line_info(s, message_pos);
672         std::string msg;
673         if (auto unexpected_token = heuristic_error_token(s, n, message_pos);
674             !unexpected_token.empty()) {
675           msg = replace_all(message, "%t", unexpected_token);
676 
677           auto unexpected_char = unexpected_token.substr(
678               0, codepoint_length(unexpected_token.data(),
679                                   unexpected_token.size()));
680 
681           msg = replace_all(msg, "%c", unexpected_char);
682         } else {
683           msg = message;
684         }
685         log(line.first, line.second, msg);
686       }
687     } else if (error_pos) {
688       if (error_pos > last_output_pos) {
689         last_output_pos = error_pos;
690         auto line = line_info(s, error_pos);
691 
692         std::string msg;
693         if (expected_tokens.empty()) {
694           msg = "syntax error.";
695         } else {
696           msg = "syntax error";
697 
698           // unexpected token
699           if (auto unexpected_token = heuristic_error_token(s, n, error_pos);
700               !unexpected_token.empty()) {
701             msg += ", unexpected '";
702             msg += unexpected_token;
703             msg += "'";
704           }
705 
706           auto first_item = true;
707           size_t i = 0;
708           while (i < expected_tokens.size()) {
709             auto [token, is_literal] =
710                 expected_tokens[expected_tokens.size() - i - 1];
711 
712             // Skip rules start with '_'
713             if (!is_literal && token[0] != '_') {
714               msg += (first_item ? ", expecting " : ", ");
715               if (is_literal) {
716                 msg += "'";
717                 msg += token;
718                 msg += "'";
719               } else {
720                 msg += "<";
721                 msg += token;
722                 msg += ">";
723               }
724               first_item = false;
725             }
726 
727             i++;
728           }
729           msg += ".";
730         }
731 
732         log(line.first, line.second, msg);
733       }
734     }
735   }
736 
737 private:
738   int cast_char(char c) const { return static_cast<unsigned char>(c); }
739 
740   std::string heuristic_error_token(const char *s, size_t n,
741                                     const char *pos) const {
742     auto len = n - std::distance(s, pos);
743     if (len) {
744       size_t i = 0;
745       auto c = cast_char(pos[i++]);
746       if (!std::ispunct(c) && !std::isspace(c)) {
747         while (i < len && !std::ispunct(cast_char(pos[i])) &&
748                !std::isspace(cast_char(pos[i]))) {
749           i++;
750         }
751       }
752 
753       size_t count = 8;
754       size_t j = 0;
755       while (count > 0 && j < i) {
756         j += codepoint_length(&pos[j], i - j);
757         count--;
758       }
759 
760       return escape_characters(pos, j);
761     }
762     return std::string();
763   }
764 
765   std::string replace_all(std::string str, const std::string &from,
766                           const std::string &to) const {
767     size_t pos = 0;
768     while ((pos = str.find(from, pos)) != std::string::npos) {
769       str.replace(pos, from.length(), to);
770       pos += to.length();
771     }
772     return str;
773   }
774 };
775 
776 /*
777  * Context
778  */
779 class Context;
780 class Ope;
781 class Definition;
782 
783 using TracerEnter = std::function<void(const Ope &name, const char *s, size_t n,
784                                        const SemanticValues &vs,
785                                        const Context &c, const std::any &dt)>;
786 
787 using TracerLeave = std::function<void(
788     const Ope &ope, const char *s, size_t n, const SemanticValues &vs,
789     const Context &c, const std::any &dt, size_t)>;
790 
791 class Context {
792 public:
793   const char *path;
794   const char *s;
795   const size_t l;
796   std::vector<size_t> source_line_index;
797 
798   ErrorInfo error_info;
799   bool recovered = false;
800 
801   std::vector<std::shared_ptr<SemanticValues>> value_stack;
802   size_t value_stack_size = 0;
803 
804   std::vector<Definition *> rule_stack;
805   std::vector<std::vector<std::shared_ptr<Ope>>> args_stack;
806 
807   size_t in_token_boundary_count = 0;
808 
809   std::shared_ptr<Ope> whitespaceOpe;
810   bool in_whitespace = false;
811 
812   std::shared_ptr<Ope> wordOpe;
813 
814   std::vector<std::map<std::string_view, std::string>> capture_scope_stack;
815   size_t capture_scope_stack_size = 0;
816 
817   std::vector<bool> cut_stack;
818 
819   const size_t def_count;
820   const bool enablePackratParsing;
821   std::vector<bool> cache_registered;
822   std::vector<bool> cache_success;
823 
824   std::map<std::pair<size_t, size_t>, std::tuple<size_t, std::any>>
825       cache_values;
826 
827   TracerEnter tracer_enter;
828   TracerLeave tracer_leave;
829 
830   Log log;
831 
832   Context(const char *path, const char *s, size_t l, size_t def_count,
833           std::shared_ptr<Ope> whitespaceOpe, std::shared_ptr<Ope> wordOpe,
834           bool enablePackratParsing, TracerEnter tracer_enter,
835           TracerLeave tracer_leave, Log log)
836       : path(path), s(s), l(l), whitespaceOpe(whitespaceOpe), wordOpe(wordOpe),
837         def_count(def_count), enablePackratParsing(enablePackratParsing),
838         cache_registered(enablePackratParsing ? def_count * (l + 1) : 0),
839         cache_success(enablePackratParsing ? def_count * (l + 1) : 0),
840         tracer_enter(tracer_enter), tracer_leave(tracer_leave), log(log) {
841 
842     args_stack.resize(1);
843 
844     push_capture_scope();
845   }
846 
847   ~Context() { assert(!value_stack_size); }
848 
849   Context(const Context &) = delete;
850   Context(Context &&) = delete;
851   Context operator=(const Context &) = delete;
852 
853   template <typename T>
854   void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val,
855                T fn) {
856     if (!enablePackratParsing) {
857       fn(val);
858       return;
859     }
860 
861     auto col = a_s - s;
862     auto idx = def_count * static_cast<size_t>(col) + def_id;
863 
864     if (cache_registered[idx]) {
865       if (cache_success[idx]) {
866         auto key = std::pair(col, def_id);
867         std::tie(len, val) = cache_values[key];
868         return;
869       } else {
870         len = static_cast<size_t>(-1);
871         return;
872       }
873     } else {
874       fn(val);
875       cache_registered[idx] = true;
876       cache_success[idx] = success(len);
877       if (success(len)) {
878         auto key = std::pair(col, def_id);
879         cache_values[key] = std::pair(len, val);
880       }
881       return;
882     }
883   }
884 
885   SemanticValues &push() {
886     assert(value_stack_size <= value_stack.size());
887     if (value_stack_size == value_stack.size()) {
888       value_stack.emplace_back(std::make_shared<SemanticValues>());
889     } else {
890       auto &vs = *value_stack[value_stack_size];
891       if (!vs.empty()) {
892         vs.clear();
893         if (!vs.tags.empty()) { vs.tags.clear(); }
894       }
895       vs.sv_ = std::string_view();
896       vs.choice_count_ = 0;
897       vs.choice_ = 0;
898       if (!vs.tokens.empty()) { vs.tokens.clear(); }
899     }
900 
901     auto &vs = *value_stack[value_stack_size++];
902     vs.path = path;
903     vs.ss = s;
904     vs.source_line_index = [&]() -> const std::vector<size_t> & {
905       if (source_line_index.empty()) {
906         for (size_t pos = 0; pos < l; pos++) {
907           if (s[pos] == '\n') { source_line_index.push_back(pos); }
908         }
909         source_line_index.push_back(l);
910       }
911       return source_line_index;
912     };
913 
914     return vs;
915   }
916 
917   void pop() { value_stack_size--; }
918 
919   void push_args(std::vector<std::shared_ptr<Ope>> &&args) {
920     args_stack.emplace_back(args);
921   }
922 
923   void pop_args() { args_stack.pop_back(); }
924 
925   const std::vector<std::shared_ptr<Ope>> &top_args() const {
926     return args_stack[args_stack.size() - 1];
927   }
928 
929   void push_capture_scope() {
930     assert(capture_scope_stack_size <= capture_scope_stack.size());
931     if (capture_scope_stack_size == capture_scope_stack.size()) {
932       capture_scope_stack.emplace_back(
933           std::map<std::string_view, std::string>());
934     } else {
935       auto &cs = capture_scope_stack[capture_scope_stack_size];
936       if (!cs.empty()) { cs.clear(); }
937     }
938     capture_scope_stack_size++;
939   }
940 
941   void pop_capture_scope() { capture_scope_stack_size--; }
942 
943   void shift_capture_values() {
944     assert(capture_scope_stack.size() >= 2);
945     auto curr = &capture_scope_stack[capture_scope_stack_size - 1];
946     auto prev = curr - 1;
947     for (const auto &[k, v] : *curr) {
948       (*prev)[k] = v;
949     }
950   }
951 
952   void set_error_pos(const char *a_s, const char *literal = nullptr);
953 
954   // void trace_enter(const char *name, const char *a_s, size_t n,
955   void trace_enter(const Ope &ope, const char *a_s, size_t n,
956                    SemanticValues &vs, std::any &dt) const;
957   // void trace_leave(const char *name, const char *a_s, size_t n,
958   void trace_leave(const Ope &ope, const char *a_s, size_t n,
959                    SemanticValues &vs, std::any &dt, size_t len) const;
960   bool is_traceable(const Ope &ope) const;
961 
962   mutable size_t next_trace_id = 0;
963   mutable std::list<size_t> trace_ids;
964 };
965 
966 /*
967  * Parser operators
968  */
969 class Ope {
970 public:
971   struct Visitor;
972 
973   virtual ~Ope() = default;
974   size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c,
975                std::any &dt) const;
976   virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs,
977                             Context &c, std::any &dt) const = 0;
978   virtual void accept(Visitor &v) = 0;
979 };
980 
981 class Sequence : public Ope {
982 public:
983   template <typename... Args>
984   Sequence(const Args &... args)
985       : opes_{static_cast<std::shared_ptr<Ope>>(args)...} {}
986   Sequence(const std::vector<std::shared_ptr<Ope>> &opes) : opes_(opes) {}
987   Sequence(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
988 
989   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
990                     std::any &dt) const override {
991     auto &chldsv = c.push();
992     auto pop_se = scope_exit([&]() { c.pop(); });
993     size_t i = 0;
994     for (const auto &ope : opes_) {
995       const auto &rule = *ope;
996       auto len = rule.parse(s + i, n - i, chldsv, c, dt);
997       if (fail(len)) { return len; }
998       i += len;
999     }
1000     if (!chldsv.empty()) {
1001       for (size_t j = 0; j < chldsv.size(); j++) {
1002         vs.emplace_back(std::move(chldsv[j]));
1003       }
1004     }
1005     if (!chldsv.tags.empty()) {
1006       for (size_t j = 0; j < chldsv.tags.size(); j++) {
1007         vs.tags.emplace_back(std::move(chldsv.tags[j]));
1008       }
1009     }
1010     vs.sv_ = chldsv.sv_;
1011     if (!chldsv.tokens.empty()) {
1012       for (size_t j = 0; j < chldsv.tokens.size(); j++) {
1013         vs.tokens.emplace_back(std::move(chldsv.tokens[j]));
1014       }
1015     }
1016     return i;
1017   }
1018 
1019   void accept(Visitor &v) override;
1020 
1021   std::vector<std::shared_ptr<Ope>> opes_;
1022 };
1023 
1024 class PrioritizedChoice : public Ope {
1025 public:
1026   template <typename... Args>
1027   PrioritizedChoice(bool for_label, const Args &... args)
1028       : opes_{static_cast<std::shared_ptr<Ope>>(args)...},
1029         for_label_(for_label) {}
1030   PrioritizedChoice(const std::vector<std::shared_ptr<Ope>> &opes)
1031       : opes_(opes) {}
1032   PrioritizedChoice(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
1033 
1034   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1035                     std::any &dt) const override {
1036     size_t len = static_cast<size_t>(-1);
1037 
1038     if (!for_label_) { c.cut_stack.push_back(false); }
1039 
1040     size_t id = 0;
1041     for (const auto &ope : opes_) {
1042       if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
1043 
1044       auto &chldsv = c.push();
1045       c.push_capture_scope();
1046 
1047       auto se = scope_exit([&]() {
1048         c.pop();
1049         c.pop_capture_scope();
1050       });
1051 
1052       len = ope->parse(s, n, chldsv, c, dt);
1053 
1054       if (success(len)) {
1055         if (!chldsv.empty()) {
1056           for (size_t i = 0; i < chldsv.size(); i++) {
1057             vs.emplace_back(std::move(chldsv[i]));
1058           }
1059         }
1060         if (!chldsv.tags.empty()) {
1061           for (size_t i = 0; i < chldsv.tags.size(); i++) {
1062             vs.tags.emplace_back(std::move(chldsv.tags[i]));
1063           }
1064         }
1065         vs.sv_ = chldsv.sv_;
1066         vs.choice_count_ = opes_.size();
1067         vs.choice_ = id;
1068         if (!chldsv.tokens.empty()) {
1069           for (size_t i = 0; i < chldsv.tokens.size(); i++) {
1070             vs.tokens.emplace_back(std::move(chldsv.tokens[i]));
1071           }
1072         }
1073         c.shift_capture_values();
1074         break;
1075       } else if (!c.cut_stack.empty() && c.cut_stack.back()) {
1076         break;
1077       }
1078 
1079       id++;
1080     }
1081 
1082     if (!for_label_) { c.cut_stack.pop_back(); }
1083 
1084     return len;
1085   }
1086 
1087   void accept(Visitor &v) override;
1088 
1089   size_t size() const { return opes_.size(); }
1090 
1091   std::vector<std::shared_ptr<Ope>> opes_;
1092   bool for_label_ = false;
1093 };
1094 
1095 class Repetition : public Ope {
1096 public:
1097   Repetition(const std::shared_ptr<Ope> &ope, size_t min, size_t max)
1098       : ope_(ope), min_(min), max_(max) {}
1099 
1100   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1101                     std::any &dt) const override {
1102     size_t count = 0;
1103     size_t i = 0;
1104     while (count < min_) {
1105       c.push_capture_scope();
1106       auto se = scope_exit([&]() { c.pop_capture_scope(); });
1107       const auto &rule = *ope_;
1108       auto len = rule.parse(s + i, n - i, vs, c, dt);
1109       if (success(len)) {
1110         c.shift_capture_values();
1111       } else {
1112         return len;
1113       }
1114       i += len;
1115       count++;
1116     }
1117 
1118     while (n - i > 0 && count < max_) {
1119       c.push_capture_scope();
1120       auto se = scope_exit([&]() { c.pop_capture_scope(); });
1121       auto save_sv_size = vs.size();
1122       auto save_tok_size = vs.tokens.size();
1123       const auto &rule = *ope_;
1124       auto len = rule.parse(s + i, n - i, vs, c, dt);
1125       if (success(len)) {
1126         c.shift_capture_values();
1127       } else {
1128         if (vs.size() != save_sv_size) {
1129           vs.erase(vs.begin() + static_cast<std::ptrdiff_t>(save_sv_size));
1130           vs.tags.erase(vs.tags.begin() +
1131                         static_cast<std::ptrdiff_t>(save_sv_size));
1132         }
1133         if (vs.tokens.size() != save_tok_size) {
1134           vs.tokens.erase(vs.tokens.begin() +
1135                           static_cast<std::ptrdiff_t>(save_tok_size));
1136         }
1137         break;
1138       }
1139       i += len;
1140       count++;
1141     }
1142     return i;
1143   }
1144 
1145   void accept(Visitor &v) override;
1146 
1147   bool is_zom() const {
1148     return min_ == 0 && max_ == std::numeric_limits<size_t>::max();
1149   }
1150 
1151   static std::shared_ptr<Repetition> zom(const std::shared_ptr<Ope> &ope) {
1152     return std::make_shared<Repetition>(ope, 0,
1153                                         std::numeric_limits<size_t>::max());
1154   }
1155 
1156   static std::shared_ptr<Repetition> oom(const std::shared_ptr<Ope> &ope) {
1157     return std::make_shared<Repetition>(ope, 1,
1158                                         std::numeric_limits<size_t>::max());
1159   }
1160 
1161   static std::shared_ptr<Repetition> opt(const std::shared_ptr<Ope> &ope) {
1162     return std::make_shared<Repetition>(ope, 0, 1);
1163   }
1164 
1165   std::shared_ptr<Ope> ope_;
1166   size_t min_;
1167   size_t max_;
1168 };
1169 
1170 class AndPredicate : public Ope {
1171 public:
1172   AndPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1173 
1174   size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1175                     Context &c, std::any &dt) const override {
1176     auto &chldsv = c.push();
1177     c.push_capture_scope();
1178     auto se = scope_exit([&]() {
1179       c.pop();
1180       c.pop_capture_scope();
1181     });
1182     const auto &rule = *ope_;
1183     auto len = rule.parse(s, n, chldsv, c, dt);
1184     if (success(len)) {
1185       return 0;
1186     } else {
1187       return len;
1188     }
1189   }
1190 
1191   void accept(Visitor &v) override;
1192 
1193   std::shared_ptr<Ope> ope_;
1194 };
1195 
1196 class NotPredicate : public Ope {
1197 public:
1198   NotPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1199 
1200   size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1201                     Context &c, std::any &dt) const override {
1202     auto &chldsv = c.push();
1203     c.push_capture_scope();
1204     auto se = scope_exit([&]() {
1205       c.pop();
1206       c.pop_capture_scope();
1207     });
1208     auto len = ope_->parse(s, n, chldsv, c, dt);
1209     if (success(len)) {
1210       c.set_error_pos(s);
1211       return static_cast<size_t>(-1);
1212     } else {
1213       return 0;
1214     }
1215   }
1216 
1217   void accept(Visitor &v) override;
1218 
1219   std::shared_ptr<Ope> ope_;
1220 };
1221 
1222 class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
1223 public:
1224   Dictionary(const std::vector<std::string> &v) : trie_(v) {}
1225 
1226   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1227                     std::any &dt) const override;
1228 
1229   void accept(Visitor &v) override;
1230 
1231   Trie trie_;
1232 };
1233 
1234 class LiteralString : public Ope,
1235                       public std::enable_shared_from_this<LiteralString> {
1236 public:
1237   LiteralString(std::string &&s, bool ignore_case)
1238       : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1239 
1240   LiteralString(const std::string &s, bool ignore_case)
1241       : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1242 
1243   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1244                     std::any &dt) const override;
1245 
1246   void accept(Visitor &v) override;
1247 
1248   std::string lit_;
1249   bool ignore_case_;
1250   mutable std::once_flag init_is_word_;
1251   mutable bool is_word_;
1252 };
1253 
1254 class CharacterClass : public Ope,
1255                        public std::enable_shared_from_this<CharacterClass> {
1256 public:
1257   CharacterClass(const std::string &s, bool negated) : negated_(negated) {
1258     auto chars = decode(s.data(), s.length());
1259     auto i = 0u;
1260     while (i < chars.size()) {
1261       if (i + 2 < chars.size() && chars[i + 1] == '-') {
1262         auto cp1 = chars[i];
1263         auto cp2 = chars[i + 2];
1264         ranges_.emplace_back(std::pair(cp1, cp2));
1265         i += 3;
1266       } else {
1267         auto cp = chars[i];
1268         ranges_.emplace_back(std::pair(cp, cp));
1269         i += 1;
1270       }
1271     }
1272     assert(!ranges_.empty());
1273   }
1274 
1275   CharacterClass(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1276                  bool negated)
1277       : ranges_(ranges), negated_(negated) {
1278     assert(!ranges_.empty());
1279   }
1280 
1281   size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1282                     Context &c, std::any & /*dt*/) const override {
1283     if (n < 1) {
1284       c.set_error_pos(s);
1285       return static_cast<size_t>(-1);
1286     }
1287 
1288     char32_t cp = 0;
1289     auto len = decode_codepoint(s, n, cp);
1290 
1291     for (const auto &range : ranges_) {
1292       if (range.first <= cp && cp <= range.second) {
1293         if (negated_) {
1294           c.set_error_pos(s);
1295           return static_cast<size_t>(-1);
1296         } else {
1297           return len;
1298         }
1299       }
1300     }
1301 
1302     if (negated_) {
1303       return len;
1304     } else {
1305       c.set_error_pos(s);
1306       return static_cast<size_t>(-1);
1307     }
1308   }
1309 
1310   void accept(Visitor &v) override;
1311 
1312   std::vector<std::pair<char32_t, char32_t>> ranges_;
1313   bool negated_;
1314 };
1315 
1316 class Character : public Ope, public std::enable_shared_from_this<Character> {
1317 public:
1318   Character(char ch) : ch_(ch) {}
1319 
1320   size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1321                     Context &c, std::any & /*dt*/) const override {
1322     if (n < 1 || s[0] != ch_) {
1323       c.set_error_pos(s);
1324       return static_cast<size_t>(-1);
1325     }
1326     return 1;
1327   }
1328 
1329   void accept(Visitor &v) override;
1330 
1331   char ch_;
1332 };
1333 
1334 class AnyCharacter : public Ope,
1335                      public std::enable_shared_from_this<AnyCharacter> {
1336 public:
1337   size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1338                     Context &c, std::any & /*dt*/) const override {
1339     auto len = codepoint_length(s, n);
1340     if (len < 1) {
1341       c.set_error_pos(s);
1342       return static_cast<size_t>(-1);
1343     }
1344     return len;
1345   }
1346 
1347   void accept(Visitor &v) override;
1348 };
1349 
1350 class CaptureScope : public Ope {
1351 public:
1352   CaptureScope(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1353 
1354   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1355                     std::any &dt) const override {
1356     c.push_capture_scope();
1357     auto se = scope_exit([&]() { c.pop_capture_scope(); });
1358     const auto &rule = *ope_;
1359     auto len = rule.parse(s, n, vs, c, dt);
1360     return len;
1361   }
1362 
1363   void accept(Visitor &v) override;
1364 
1365   std::shared_ptr<Ope> ope_;
1366 };
1367 
1368 class Capture : public Ope {
1369 public:
1370   using MatchAction = std::function<void(const char *s, size_t n, Context &c)>;
1371 
1372   Capture(const std::shared_ptr<Ope> &ope, MatchAction ma)
1373       : ope_(ope), match_action_(ma) {}
1374 
1375   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1376                     std::any &dt) const override {
1377     const auto &rule = *ope_;
1378     auto len = rule.parse(s, n, vs, c, dt);
1379     if (success(len) && match_action_) { match_action_(s, len, c); }
1380     return len;
1381   }
1382 
1383   void accept(Visitor &v) override;
1384 
1385   std::shared_ptr<Ope> ope_;
1386   MatchAction match_action_;
1387 };
1388 
1389 class TokenBoundary : public Ope {
1390 public:
1391   TokenBoundary(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1392 
1393   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1394                     std::any &dt) const override;
1395 
1396   void accept(Visitor &v) override;
1397 
1398   std::shared_ptr<Ope> ope_;
1399 };
1400 
1401 class Ignore : public Ope {
1402 public:
1403   Ignore(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1404 
1405   size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1406                     Context &c, std::any &dt) const override {
1407     const auto &rule = *ope_;
1408     auto &chldsv = c.push();
1409     auto se = scope_exit([&]() { c.pop(); });
1410     return rule.parse(s, n, chldsv, c, dt);
1411   }
1412 
1413   void accept(Visitor &v) override;
1414 
1415   std::shared_ptr<Ope> ope_;
1416 };
1417 
1418 using Parser = std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1419                                     std::any &dt)>;
1420 
1421 class User : public Ope {
1422 public:
1423   User(Parser fn) : fn_(fn) {}
1424   size_t parse_core(const char *s, size_t n, SemanticValues &vs,
1425                     Context & /*c*/, std::any &dt) const override {
1426     assert(fn_);
1427     return fn_(s, n, vs, dt);
1428   }
1429   void accept(Visitor &v) override;
1430   std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1431                        std::any &dt)>
1432       fn_;
1433 };
1434 
1435 class WeakHolder : public Ope {
1436 public:
1437   WeakHolder(const std::shared_ptr<Ope> &ope) : weak_(ope) {}
1438 
1439   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1440                     std::any &dt) const override {
1441     auto ope = weak_.lock();
1442     assert(ope);
1443     const auto &rule = *ope;
1444     return rule.parse(s, n, vs, c, dt);
1445   }
1446 
1447   void accept(Visitor &v) override;
1448 
1449   std::weak_ptr<Ope> weak_;
1450 };
1451 
1452 class Holder : public Ope {
1453 public:
1454   Holder(Definition *outer) : outer_(outer) {}
1455 
1456   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1457                     std::any &dt) const override;
1458 
1459   void accept(Visitor &v) override;
1460 
1461   std::any reduce(SemanticValues &vs, std::any &dt) const;
1462 
1463   const char *trace_name() const;
1464 
1465   std::shared_ptr<Ope> ope_;
1466   Definition *outer_;
1467   mutable std::string trace_name_;
1468 
1469   friend class Definition;
1470 };
1471 
1472 using Grammar = std::unordered_map<std::string, Definition>;
1473 
1474 class Reference : public Ope, public std::enable_shared_from_this<Reference> {
1475 public:
1476   Reference(const Grammar &grammar, const std::string &name, const char *s,
1477             bool is_macro, const std::vector<std::shared_ptr<Ope>> &args)
1478       : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args),
1479         rule_(nullptr), iarg_(0) {}
1480 
1481   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1482                     std::any &dt) const override;
1483 
1484   void accept(Visitor &v) override;
1485 
1486   std::shared_ptr<Ope> get_core_operator() const;
1487 
1488   const Grammar &grammar_;
1489   const std::string name_;
1490   const char *s_;
1491 
1492   const bool is_macro_;
1493   const std::vector<std::shared_ptr<Ope>> args_;
1494 
1495   Definition *rule_;
1496   size_t iarg_;
1497 };
1498 
1499 class Whitespace : public Ope {
1500 public:
1501   Whitespace(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1502 
1503   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1504                     std::any &dt) const override {
1505     if (c.in_whitespace) { return 0; }
1506     c.in_whitespace = true;
1507     auto se = scope_exit([&]() { c.in_whitespace = false; });
1508     const auto &rule = *ope_;
1509     return rule.parse(s, n, vs, c, dt);
1510   }
1511 
1512   void accept(Visitor &v) override;
1513 
1514   std::shared_ptr<Ope> ope_;
1515 };
1516 
1517 class BackReference : public Ope {
1518 public:
1519   BackReference(std::string &&name) : name_(name) {}
1520 
1521   BackReference(const std::string &name) : name_(name) {}
1522 
1523   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1524                     std::any &dt) const override;
1525 
1526   void accept(Visitor &v) override;
1527 
1528   std::string name_;
1529 };
1530 
1531 class PrecedenceClimbing : public Ope {
1532 public:
1533   using BinOpeInfo = std::map<std::string_view, std::pair<size_t, char>>;
1534 
1535   PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
1536                      const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
1537                      const Definition &rule)
1538       : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
1539 
1540   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1541                     std::any &dt) const override {
1542     return parse_expression(s, n, vs, c, dt, 0);
1543   }
1544 
1545   void accept(Visitor &v) override;
1546 
1547   std::shared_ptr<Ope> atom_;
1548   std::shared_ptr<Ope> binop_;
1549   BinOpeInfo info_;
1550   const Definition &rule_;
1551 
1552 private:
1553   size_t parse_expression(const char *s, size_t n, SemanticValues &vs,
1554                           Context &c, std::any &dt, size_t min_prec) const;
1555 
1556   Definition &get_reference_for_binop(Context &c) const;
1557 };
1558 
1559 class Recovery : public Ope {
1560 public:
1561   Recovery(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1562 
1563   size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1564                     std::any &dt) const override;
1565 
1566   void accept(Visitor &v) override;
1567 
1568   std::shared_ptr<Ope> ope_;
1569 };
1570 
1571 class Cut : public Ope, public std::enable_shared_from_this<Cut> {
1572 public:
1573   size_t parse_core(const char * /*s*/, size_t /*n*/, SemanticValues & /*vs*/,
1574                     Context &c, std::any & /*dt*/) const override {
1575     c.cut_stack.back() = true;
1576     return 0;
1577   }
1578 
1579   void accept(Visitor &v) override;
1580 };
1581 
1582 /*
1583  * Factories
1584  */
1585 template <typename... Args> std::shared_ptr<Ope> seq(Args &&... args) {
1586   return std::make_shared<Sequence>(static_cast<std::shared_ptr<Ope>>(args)...);
1587 }
1588 
1589 template <typename... Args> std::shared_ptr<Ope> cho(Args &&... args) {
1590   return std::make_shared<PrioritizedChoice>(
1591       false, static_cast<std::shared_ptr<Ope>>(args)...);
1592 }
1593 
1594 template <typename... Args> std::shared_ptr<Ope> cho4label_(Args &&... args) {
1595   return std::make_shared<PrioritizedChoice>(
1596       true, static_cast<std::shared_ptr<Ope>>(args)...);
1597 }
1598 
1599 inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope> &ope) {
1600   return Repetition::zom(ope);
1601 }
1602 
1603 inline std::shared_ptr<Ope> oom(const std::shared_ptr<Ope> &ope) {
1604   return Repetition::oom(ope);
1605 }
1606 
1607 inline std::shared_ptr<Ope> opt(const std::shared_ptr<Ope> &ope) {
1608   return Repetition::opt(ope);
1609 }
1610 
1611 inline std::shared_ptr<Ope> rep(const std::shared_ptr<Ope> &ope, size_t min,
1612                                 size_t max) {
1613   return std::make_shared<Repetition>(ope, min, max);
1614 }
1615 
1616 inline std::shared_ptr<Ope> apd(const std::shared_ptr<Ope> &ope) {
1617   return std::make_shared<AndPredicate>(ope);
1618 }
1619 
1620 inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope> &ope) {
1621   return std::make_shared<NotPredicate>(ope);
1622 }
1623 
1624 inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v) {
1625   return std::make_shared<Dictionary>(v);
1626 }
1627 
1628 inline std::shared_ptr<Ope> lit(std::string &&s) {
1629   return std::make_shared<LiteralString>(s, false);
1630 }
1631 
1632 inline std::shared_ptr<Ope> liti(std::string &&s) {
1633   return std::make_shared<LiteralString>(s, true);
1634 }
1635 
1636 inline std::shared_ptr<Ope> cls(const std::string &s) {
1637   return std::make_shared<CharacterClass>(s, false);
1638 }
1639 
1640 inline std::shared_ptr<Ope>
1641 cls(const std::vector<std::pair<char32_t, char32_t>> &ranges) {
1642   return std::make_shared<CharacterClass>(ranges, false);
1643 }
1644 
1645 inline std::shared_ptr<Ope> ncls(const std::string &s) {
1646   return std::make_shared<CharacterClass>(s, true);
1647 }
1648 
1649 inline std::shared_ptr<Ope>
1650 ncls(const std::vector<std::pair<char32_t, char32_t>> &ranges) {
1651   return std::make_shared<CharacterClass>(ranges, true);
1652 }
1653 
1654 inline std::shared_ptr<Ope> chr(char dt) {
1655   return std::make_shared<Character>(dt);
1656 }
1657 
1658 inline std::shared_ptr<Ope> dot() { return std::make_shared<AnyCharacter>(); }
1659 
1660 inline std::shared_ptr<Ope> csc(const std::shared_ptr<Ope> &ope) {
1661   return std::make_shared<CaptureScope>(ope);
1662 }
1663 
1664 inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope> &ope,
1665                                 Capture::MatchAction ma) {
1666   return std::make_shared<Capture>(ope, ma);
1667 }
1668 
1669 inline std::shared_ptr<Ope> tok(const std::shared_ptr<Ope> &ope) {
1670   return std::make_shared<TokenBoundary>(ope);
1671 }
1672 
1673 inline std::shared_ptr<Ope> ign(const std::shared_ptr<Ope> &ope) {
1674   return std::make_shared<Ignore>(ope);
1675 }
1676 
1677 inline std::shared_ptr<Ope>
1678 usr(std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1679                          std::any &dt)>
1680         fn) {
1681   return std::make_shared<User>(fn);
1682 }
1683 
1684 inline std::shared_ptr<Ope> ref(const Grammar &grammar, const std::string &name,
1685                                 const char *s, bool is_macro,
1686                                 const std::vector<std::shared_ptr<Ope>> &args) {
1687   return std::make_shared<Reference>(grammar, name, s, is_macro, args);
1688 }
1689 
1690 inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope> &ope) {
1691   return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
1692 }
1693 
1694 inline std::shared_ptr<Ope> bkr(std::string &&name) {
1695   return std::make_shared<BackReference>(name);
1696 }
1697 
1698 inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
1699                                 const std::shared_ptr<Ope> &binop,
1700                                 const PrecedenceClimbing::BinOpeInfo &info,
1701                                 const Definition &rule) {
1702   return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
1703 }
1704 
1705 inline std::shared_ptr<Ope> rec(const std::shared_ptr<Ope> &ope) {
1706   return std::make_shared<Recovery>(ope);
1707 }
1708 
1709 inline std::shared_ptr<Ope> cut() { return std::make_shared<Cut>(); }
1710 
1711 /*
1712  * Visitor
1713  */
1714 struct Ope::Visitor {
1715   virtual ~Visitor() {}
1716   virtual void visit(Sequence &) {}
1717   virtual void visit(PrioritizedChoice &) {}
1718   virtual void visit(Repetition &) {}
1719   virtual void visit(AndPredicate &) {}
1720   virtual void visit(NotPredicate &) {}
1721   virtual void visit(Dictionary &) {}
1722   virtual void visit(LiteralString &) {}
1723   virtual void visit(CharacterClass &) {}
1724   virtual void visit(Character &) {}
1725   virtual void visit(AnyCharacter &) {}
1726   virtual void visit(CaptureScope &) {}
1727   virtual void visit(Capture &) {}
1728   virtual void visit(TokenBoundary &) {}
1729   virtual void visit(Ignore &) {}
1730   virtual void visit(User &) {}
1731   virtual void visit(WeakHolder &) {}
1732   virtual void visit(Holder &) {}
1733   virtual void visit(Reference &) {}
1734   virtual void visit(Whitespace &) {}
1735   virtual void visit(BackReference &) {}
1736   virtual void visit(PrecedenceClimbing &) {}
1737   virtual void visit(Recovery &) {}
1738   virtual void visit(Cut &) {}
1739 };
1740 
1741 struct IsReference : public Ope::Visitor {
1742   void visit(Reference &) override { is_reference_ = true; }
1743 
1744   static bool check(Ope &ope) {
1745     IsReference vis;
1746     ope.accept(vis);
1747     return vis.is_reference_;
1748   }
1749 
1750 private:
1751   bool is_reference_ = false;
1752 };
1753 
1754 struct TraceOpeName : public Ope::Visitor {
1755   void visit(Sequence &) override { name_ = "Sequence"; }
1756   void visit(PrioritizedChoice &) override { name_ = "PrioritizedChoice"; }
1757   void visit(Repetition &) override { name_ = "Repetition"; }
1758   void visit(AndPredicate &) override { name_ = "AndPredicate"; }
1759   void visit(NotPredicate &) override { name_ = "NotPredicate"; }
1760   void visit(Dictionary &) override { name_ = "Dictionary"; }
1761   void visit(LiteralString &) override { name_ = "LiteralString"; }
1762   void visit(CharacterClass &) override { name_ = "CharacterClass"; }
1763   void visit(Character &) override { name_ = "Character"; }
1764   void visit(AnyCharacter &) override { name_ = "AnyCharacter"; }
1765   void visit(CaptureScope &) override { name_ = "CaptureScope"; }
1766   void visit(Capture &) override { name_ = "Capture"; }
1767   void visit(TokenBoundary &) override { name_ = "TokenBoundary"; }
1768   void visit(Ignore &) override { name_ = "Ignore"; }
1769   void visit(User &) override { name_ = "User"; }
1770   void visit(WeakHolder &) override { name_ = "WeakHolder"; }
1771   void visit(Holder &ope) override { name_ = ope.trace_name(); }
1772   void visit(Reference &) override { name_ = "Reference"; }
1773   void visit(Whitespace &) override { name_ = "Whitespace"; }
1774   void visit(BackReference &) override { name_ = "BackReference"; }
1775   void visit(PrecedenceClimbing &) override { name_ = "PrecedenceClimbing"; }
1776   void visit(Recovery &) override { name_ = "Recovery"; }
1777   void visit(Cut &) override { name_ = "Cut"; }
1778 
1779   static std::string get(Ope &ope) {
1780     TraceOpeName vis;
1781     ope.accept(vis);
1782     return vis.name_;
1783   }
1784 
1785 private:
1786   const char *name_ = nullptr;
1787 };
1788 
1789 struct AssignIDToDefinition : public Ope::Visitor {
1790   void visit(Sequence &ope) override {
1791     for (auto op : ope.opes_) {
1792       op->accept(*this);
1793     }
1794   }
1795   void visit(PrioritizedChoice &ope) override {
1796     for (auto op : ope.opes_) {
1797       op->accept(*this);
1798     }
1799   }
1800   void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1801   void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
1802   void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
1803   void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1804   void visit(Capture &ope) override { ope.ope_->accept(*this); }
1805   void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1806   void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1807   void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1808   void visit(Holder &ope) override;
1809   void visit(Reference &ope) override;
1810   void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1811   void visit(PrecedenceClimbing &ope) override;
1812   void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1813 
1814   std::unordered_map<void *, size_t> ids;
1815 };
1816 
1817 struct IsLiteralToken : public Ope::Visitor {
1818   void visit(PrioritizedChoice &ope) override {
1819     for (auto op : ope.opes_) {
1820       if (!IsLiteralToken::check(*op)) { return; }
1821     }
1822     result_ = true;
1823   }
1824 
1825   void visit(Dictionary &) override { result_ = true; }
1826   void visit(LiteralString &) override { result_ = true; }
1827 
1828   static bool check(Ope &ope) {
1829     IsLiteralToken vis;
1830     ope.accept(vis);
1831     return vis.result_;
1832   }
1833 
1834 private:
1835   bool result_ = false;
1836 };
1837 
1838 struct TokenChecker : public Ope::Visitor {
1839   void visit(Sequence &ope) override {
1840     for (auto op : ope.opes_) {
1841       op->accept(*this);
1842     }
1843   }
1844   void visit(PrioritizedChoice &ope) override {
1845     for (auto op : ope.opes_) {
1846       op->accept(*this);
1847     }
1848   }
1849   void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1850   void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1851   void visit(Capture &ope) override { ope.ope_->accept(*this); }
1852   void visit(TokenBoundary &) override { has_token_boundary_ = true; }
1853   void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1854   void visit(WeakHolder &) override { has_rule_ = true; }
1855   void visit(Holder &ope) override { ope.ope_->accept(*this); }
1856   void visit(Reference &ope) override;
1857   void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1858   void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1859   void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1860 
1861   static bool is_token(Ope &ope) {
1862     if (IsLiteralToken::check(ope)) { return true; }
1863 
1864     TokenChecker vis;
1865     ope.accept(vis);
1866     return vis.has_token_boundary_ || !vis.has_rule_;
1867   }
1868 
1869 private:
1870   bool has_token_boundary_ = false;
1871   bool has_rule_ = false;
1872 };
1873 
1874 struct FindLiteralToken : public Ope::Visitor {
1875   void visit(LiteralString &ope) override { token_ = ope.lit_.c_str(); }
1876   void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1877   void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1878   void visit(Reference &ope) override;
1879   void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1880 
1881   static const char *token(Ope &ope) {
1882     FindLiteralToken vis;
1883     ope.accept(vis);
1884     return vis.token_;
1885   }
1886 
1887 private:
1888   const char *token_ = nullptr;
1889 };
1890 
1891 struct DetectLeftRecursion : public Ope::Visitor {
1892   DetectLeftRecursion(const std::string &name) : name_(name) {}
1893 
1894   void visit(Sequence &ope) override {
1895     for (auto op : ope.opes_) {
1896       op->accept(*this);
1897       if (done_) {
1898         break;
1899       } else if (error_s) {
1900         done_ = true;
1901         break;
1902       }
1903     }
1904   }
1905   void visit(PrioritizedChoice &ope) override {
1906     for (auto op : ope.opes_) {
1907       op->accept(*this);
1908       if (error_s) {
1909         done_ = true;
1910         break;
1911       }
1912     }
1913   }
1914   void visit(Repetition &ope) override {
1915     ope.ope_->accept(*this);
1916     done_ = ope.min_ > 0;
1917   }
1918   void visit(AndPredicate &ope) override {
1919     ope.ope_->accept(*this);
1920     done_ = false;
1921   }
1922   void visit(NotPredicate &ope) override {
1923     ope.ope_->accept(*this);
1924     done_ = false;
1925   }
1926   void visit(Dictionary &) override { done_ = true; }
1927   void visit(LiteralString &ope) override { done_ = !ope.lit_.empty(); }
1928   void visit(CharacterClass &) override { done_ = true; }
1929   void visit(Character &) override { done_ = true; }
1930   void visit(AnyCharacter &) override { done_ = true; }
1931   void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1932   void visit(Capture &ope) override { ope.ope_->accept(*this); }
1933   void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1934   void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1935   void visit(User &) override { done_ = true; }
1936   void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1937   void visit(Holder &ope) override { ope.ope_->accept(*this); }
1938   void visit(Reference &ope) override;
1939   void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1940   void visit(BackReference &) override { done_ = true; }
1941   void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1942   void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1943   void visit(Cut &) override { done_ = true; }
1944 
1945   const char *error_s = nullptr;
1946 
1947 private:
1948   std::string name_;
1949   std::set<std::string> refs_;
1950   bool done_ = false;
1951 };
1952 
1953 struct HasEmptyElement : public Ope::Visitor {
1954   HasEmptyElement(std::list<std::pair<const char *, std::string>> &refs)
1955       : refs_(refs) {}
1956 
1957   void visit(Sequence &ope) override {
1958     auto save_is_empty = false;
1959     const char *save_error_s = nullptr;
1960     std::string save_error_name;
1961     for (auto op : ope.opes_) {
1962       op->accept(*this);
1963       if (!is_empty) { return; }
1964       save_is_empty = is_empty;
1965       save_error_s = error_s;
1966       save_error_name = error_name;
1967       is_empty = false;
1968       error_name.clear();
1969     }
1970     is_empty = save_is_empty;
1971     error_s = save_error_s;
1972     error_name = save_error_name;
1973   }
1974   void visit(PrioritizedChoice &ope) override {
1975     for (auto op : ope.opes_) {
1976       op->accept(*this);
1977       if (is_empty) { return; }
1978     }
1979   }
1980   void visit(Repetition &ope) override {
1981     if (ope.min_ == 0) {
1982       set_error();
1983     } else {
1984       ope.ope_->accept(*this);
1985     }
1986   }
1987   void visit(AndPredicate &) override { set_error(); }
1988   void visit(NotPredicate &) override { set_error(); }
1989   void visit(LiteralString &ope) override {
1990     if (ope.lit_.empty()) { set_error(); }
1991   }
1992   void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1993   void visit(Capture &ope) override { ope.ope_->accept(*this); }
1994   void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1995   void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1996   void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1997   void visit(Holder &ope) override { ope.ope_->accept(*this); }
1998   void visit(Reference &ope) override;
1999   void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2000   void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2001   void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2002 
2003   bool is_empty = false;
2004   const char *error_s = nullptr;
2005   std::string error_name;
2006 
2007 private:
2008   void set_error() {
2009     is_empty = true;
2010     tie(error_s, error_name) = refs_.back();
2011   }
2012   std::list<std::pair<const char *, std::string>> &refs_;
2013 };
2014 
2015 struct DetectInfiniteLoop : public Ope::Visitor {
2016   DetectInfiniteLoop(const char *s, const std::string &name) {
2017     refs_.emplace_back(s, name);
2018   }
2019 
2020   void visit(Sequence &ope) override {
2021     for (auto op : ope.opes_) {
2022       op->accept(*this);
2023       if (has_error) { return; }
2024     }
2025   }
2026   void visit(PrioritizedChoice &ope) override {
2027     for (auto op : ope.opes_) {
2028       op->accept(*this);
2029       if (has_error) { return; }
2030     }
2031   }
2032   void visit(Repetition &ope) override {
2033     if (ope.max_ == std::numeric_limits<size_t>::max()) {
2034       HasEmptyElement vis(refs_);
2035       ope.ope_->accept(vis);
2036       if (vis.is_empty) {
2037         has_error = true;
2038         error_s = vis.error_s;
2039         error_name = vis.error_name;
2040       }
2041     } else {
2042       ope.ope_->accept(*this);
2043     }
2044   }
2045   void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2046   void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2047   void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2048   void visit(Capture &ope) override { ope.ope_->accept(*this); }
2049   void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2050   void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2051   void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2052   void visit(Holder &ope) override { ope.ope_->accept(*this); }
2053   void visit(Reference &ope) override;
2054   void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2055   void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2056   void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2057 
2058   bool has_error = false;
2059   const char *error_s = nullptr;
2060   std::string error_name;
2061 
2062 private:
2063   std::list<std::pair<const char *, std::string>> refs_;
2064 };
2065 
2066 struct ReferenceChecker : public Ope::Visitor {
2067   ReferenceChecker(const Grammar &grammar,
2068                    const std::vector<std::string> &params)
2069       : grammar_(grammar), params_(params) {}
2070 
2071   void visit(Sequence &ope) override {
2072     for (auto op : ope.opes_) {
2073       op->accept(*this);
2074     }
2075   }
2076   void visit(PrioritizedChoice &ope) override {
2077     for (auto op : ope.opes_) {
2078       op->accept(*this);
2079     }
2080   }
2081   void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2082   void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2083   void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2084   void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2085   void visit(Capture &ope) override { ope.ope_->accept(*this); }
2086   void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2087   void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2088   void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2089   void visit(Holder &ope) override { ope.ope_->accept(*this); }
2090   void visit(Reference &ope) override;
2091   void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2092   void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2093   void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2094 
2095   std::unordered_map<std::string, const char *> error_s;
2096   std::unordered_map<std::string, std::string> error_message;
2097   std::unordered_set<std::string> referenced;
2098 
2099 private:
2100   const Grammar &grammar_;
2101   const std::vector<std::string> &params_;
2102 };
2103 
2104 struct LinkReferences : public Ope::Visitor {
2105   LinkReferences(Grammar &grammar, const std::vector<std::string> &params)
2106       : grammar_(grammar), params_(params) {}
2107 
2108   void visit(Sequence &ope) override {
2109     for (auto op : ope.opes_) {
2110       op->accept(*this);
2111     }
2112   }
2113   void visit(PrioritizedChoice &ope) override {
2114     for (auto op : ope.opes_) {
2115       op->accept(*this);
2116     }
2117   }
2118   void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2119   void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2120   void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2121   void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2122   void visit(Capture &ope) override { ope.ope_->accept(*this); }
2123   void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2124   void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2125   void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2126   void visit(Holder &ope) override { ope.ope_->accept(*this); }
2127   void visit(Reference &ope) override;
2128   void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2129   void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2130   void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2131 
2132 private:
2133   Grammar &grammar_;
2134   const std::vector<std::string> &params_;
2135 };
2136 
2137 struct FindReference : public Ope::Visitor {
2138   FindReference(const std::vector<std::shared_ptr<Ope>> &args,
2139                 const std::vector<std::string> &params)
2140       : args_(args), params_(params) {}
2141 
2142   void visit(Sequence &ope) override {
2143     std::vector<std::shared_ptr<Ope>> opes;
2144     for (auto o : ope.opes_) {
2145       o->accept(*this);
2146       opes.push_back(found_ope);
2147     }
2148     found_ope = std::make_shared<Sequence>(opes);
2149   }
2150   void visit(PrioritizedChoice &ope) override {
2151     std::vector<std::shared_ptr<Ope>> opes;
2152     for (auto o : ope.opes_) {
2153       o->accept(*this);
2154       opes.push_back(found_ope);
2155     }
2156     found_ope = std::make_shared<PrioritizedChoice>(opes);
2157   }
2158   void visit(Repetition &ope) override {
2159     ope.ope_->accept(*this);
2160     found_ope = rep(found_ope, ope.min_, ope.max_);
2161   }
2162   void visit(AndPredicate &ope) override {
2163     ope.ope_->accept(*this);
2164     found_ope = apd(found_ope);
2165   }
2166   void visit(NotPredicate &ope) override {
2167     ope.ope_->accept(*this);
2168     found_ope = npd(found_ope);
2169   }
2170   void visit(Dictionary &ope) override { found_ope = ope.shared_from_this(); }
2171   void visit(LiteralString &ope) override {
2172     found_ope = ope.shared_from_this();
2173   }
2174   void visit(CharacterClass &ope) override {
2175     found_ope = ope.shared_from_this();
2176   }
2177   void visit(Character &ope) override { found_ope = ope.shared_from_this(); }
2178   void visit(AnyCharacter &ope) override { found_ope = ope.shared_from_this(); }
2179   void visit(CaptureScope &ope) override {
2180     ope.ope_->accept(*this);
2181     found_ope = csc(found_ope);
2182   }
2183   void visit(Capture &ope) override {
2184     ope.ope_->accept(*this);
2185     found_ope = cap(found_ope, ope.match_action_);
2186   }
2187   void visit(TokenBoundary &ope) override {
2188     ope.ope_->accept(*this);
2189     found_ope = tok(found_ope);
2190   }
2191   void visit(Ignore &ope) override {
2192     ope.ope_->accept(*this);
2193     found_ope = ign(found_ope);
2194   }
2195   void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2196   void visit(Holder &ope) override { ope.ope_->accept(*this); }
2197   void visit(Reference &ope) override;
2198   void visit(Whitespace &ope) override {
2199     ope.ope_->accept(*this);
2200     found_ope = wsp(found_ope);
2201   }
2202   void visit(PrecedenceClimbing &ope) override {
2203     ope.atom_->accept(*this);
2204     found_ope = csc(found_ope);
2205   }
2206   void visit(Recovery &ope) override {
2207     ope.ope_->accept(*this);
2208     found_ope = rec(found_ope);
2209   }
2210   void visit(Cut &ope) override { found_ope = ope.shared_from_this(); }
2211 
2212   std::shared_ptr<Ope> found_ope;
2213 
2214 private:
2215   const std::vector<std::shared_ptr<Ope>> &args_;
2216   const std::vector<std::string> &params_;
2217 };
2218 
2219 struct IsPrioritizedChoice : public Ope::Visitor {
2220   void visit(PrioritizedChoice &) override { result_ = true; }
2221 
2222   static bool check(Ope &ope) {
2223     IsPrioritizedChoice vis;
2224     ope.accept(vis);
2225     return vis.result_;
2226   }
2227 
2228 private:
2229   bool result_ = false;
2230 };
2231 
2232 /*
2233  * Keywords
2234  */
2235 static const char *WHITESPACE_DEFINITION_NAME = "%whitespace";
2236 static const char *WORD_DEFINITION_NAME = "%word";
2237 static const char *RECOVER_DEFINITION_NAME = "%recover";
2238 
2239 /*
2240  * Definition
2241  */
2242 class Definition {
2243 public:
2244   struct Result {
2245     bool ret;
2246     bool recovered;
2247     size_t len;
2248     ErrorInfo error_info;
2249   };
2250 
2251   Definition() : holder_(std::make_shared<Holder>(this)) {}
2252 
2253   Definition(const Definition &rhs) : name(rhs.name), holder_(rhs.holder_) {
2254     holder_->outer_ = this;
2255   }
2256 
2257   Definition(const std::shared_ptr<Ope> &ope)
2258       : holder_(std::make_shared<Holder>(this)) {
2259     *this <= ope;
2260   }
2261 
2262   operator std::shared_ptr<Ope>() {
2263     return std::make_shared<WeakHolder>(holder_);
2264   }
2265 
2266   Definition &operator<=(const std::shared_ptr<Ope> &ope) {
2267     holder_->ope_ = ope;
2268     return *this;
2269   }
2270 
2271   Result parse(const char *s, size_t n, const char *path = nullptr,
2272                Log log = nullptr) const {
2273     SemanticValues vs;
2274     std::any dt;
2275     return parse_core(s, n, vs, dt, path, log);
2276   }
2277 
2278   Result parse(const char *s, const char *path = nullptr,
2279                Log log = nullptr) const {
2280     auto n = strlen(s);
2281     return parse(s, n, path, log);
2282   }
2283 
2284   Result parse(const char *s, size_t n, std::any &dt,
2285                const char *path = nullptr, Log log = nullptr) const {
2286     SemanticValues vs;
2287     return parse_core(s, n, vs, dt, path, log);
2288   }
2289 
2290   Result parse(const char *s, std::any &dt, const char *path = nullptr,
2291                Log log = nullptr) const {
2292     auto n = strlen(s);
2293     return parse(s, n, dt, path, log);
2294   }
2295 
2296   template <typename T>
2297   Result parse_and_get_value(const char *s, size_t n, T &val,
2298                              const char *path = nullptr,
2299                              Log log = nullptr) const {
2300     SemanticValues vs;
2301     std::any dt;
2302     auto r = parse_core(s, n, vs, dt, path, log);
2303     if (r.ret && !vs.empty() && vs.front().has_value()) {
2304       val = std::any_cast<T>(vs[0]);
2305     }
2306     return r;
2307   }
2308 
2309   template <typename T>
2310   Result parse_and_get_value(const char *s, T &val, const char *path = nullptr,
2311                              Log log = nullptr) const {
2312     auto n = strlen(s);
2313     return parse_and_get_value(s, n, val, path, log);
2314   }
2315 
2316   template <typename T>
2317   Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val,
2318                              const char *path = nullptr,
2319                              Log log = nullptr) const {
2320     SemanticValues vs;
2321     auto r = parse_core(s, n, vs, dt, path, log);
2322     if (r.ret && !vs.empty() && vs.front().has_value()) {
2323       val = std::any_cast<T>(vs[0]);
2324     }
2325     return r;
2326   }
2327 
2328   template <typename T>
2329   Result parse_and_get_value(const char *s, std::any &dt, T &val,
2330                              const char *path = nullptr,
2331                              Log log = nullptr) const {
2332     auto n = strlen(s);
2333     return parse_and_get_value(s, n, dt, val, path, log);
2334   }
2335 
2336   void operator=(Action a) { action = a; }
2337 
2338   template <typename T> Definition &operator,(T fn) {
2339     operator=(fn);
2340     return *this;
2341   }
2342 
2343   Definition &operator~() {
2344     ignoreSemanticValue = true;
2345     return *this;
2346   }
2347 
2348   void accept(Ope::Visitor &v) { holder_->accept(v); }
2349 
2350   std::shared_ptr<Ope> get_core_operator() const { return holder_->ope_; }
2351 
2352   bool is_token() const {
2353     std::call_once(is_token_init_, [this]() {
2354       is_token_ = TokenChecker::is_token(*get_core_operator());
2355     });
2356     return is_token_;
2357   }
2358 
2359   std::string name;
2360   const char *s_ = nullptr;
2361 
2362   size_t id = 0;
2363   Action action;
2364   std::function<void(const char *s, size_t n, std::any &dt)> enter;
2365   std::function<void(const char *s, size_t n, size_t matchlen, std::any &value,
2366                      std::any &dt)>
2367       leave;
2368   bool ignoreSemanticValue = false;
2369   std::shared_ptr<Ope> whitespaceOpe;
2370   std::shared_ptr<Ope> wordOpe;
2371   bool enablePackratParsing = false;
2372   bool is_macro = false;
2373   std::vector<std::string> params;
2374   TracerEnter tracer_enter;
2375   TracerLeave tracer_leave;
2376   bool disable_action = false;
2377 
2378   std::string error_message;
2379   bool no_ast_opt = false;
2380 
2381 private:
2382   friend class Reference;
2383   friend class ParserGenerator;
2384 
2385   Definition &operator=(const Definition &rhs);
2386   Definition &operator=(Definition &&rhs);
2387 
2388   void initialize_definition_ids() const {
2389     std::call_once(definition_ids_init_, [&]() {
2390       AssignIDToDefinition vis;
2391       holder_->accept(vis);
2392       if (whitespaceOpe) { whitespaceOpe->accept(vis); }
2393       if (wordOpe) { wordOpe->accept(vis); }
2394       definition_ids_.swap(vis.ids);
2395     });
2396   }
2397 
2398   Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt,
2399                     const char *path, Log log) const {
2400     initialize_definition_ids();
2401 
2402     std::shared_ptr<Ope> ope = holder_;
2403     if (whitespaceOpe) { ope = std::make_shared<Sequence>(whitespaceOpe, ope); }
2404 
2405     Context cxt(path, s, n, definition_ids_.size(), whitespaceOpe, wordOpe,
2406                 enablePackratParsing, tracer_enter, tracer_leave, log);
2407 
2408     auto len = ope->parse(s, n, vs, cxt, dt);
2409     return Result{success(len), cxt.recovered, len, cxt.error_info};
2410   }
2411 
2412   std::shared_ptr<Holder> holder_;
2413   mutable std::once_flag is_token_init_;
2414   mutable bool is_token_ = false;
2415   mutable std::once_flag assign_id_to_definition_init_;
2416   mutable std::once_flag definition_ids_init_;
2417   mutable std::unordered_map<void *, size_t> definition_ids_;
2418 };
2419 
2420 /*
2421  * Implementations
2422  */
2423 
2424 inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs,
2425                             Context &c, std::any &dt, const std::string &lit,
2426                             std::once_flag &init_is_word, bool &is_word,
2427                             bool ignore_case) {
2428   size_t i = 0;
2429   for (; i < lit.size(); i++) {
2430     if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i]))
2431                                : (s[i] != lit[i]))) {
2432       c.set_error_pos(s, lit.c_str());
2433       return static_cast<size_t>(-1);
2434     }
2435   }
2436 
2437   // Word check
2438   if (c.wordOpe) {
2439     std::call_once(init_is_word, [&]() {
2440       SemanticValues dummy_vs;
2441       Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2442                       nullptr, nullptr);
2443       std::any dummy_dt;
2444 
2445       auto len =
2446           c.wordOpe->parse(lit.data(), lit.size(), dummy_vs, dummy_c, dummy_dt);
2447       is_word = success(len);
2448     });
2449 
2450     if (is_word) {
2451       SemanticValues dummy_vs;
2452       Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2453                       nullptr, nullptr);
2454       std::any dummy_dt;
2455 
2456       NotPredicate ope(c.wordOpe);
2457       auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
2458       if (fail(len)) { return len; }
2459       i += len;
2460     }
2461   }
2462 
2463   // Skip whiltespace
2464   if (!c.in_token_boundary_count) {
2465     if (c.whitespaceOpe) {
2466       auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
2467       if (fail(len)) { return len; }
2468       i += len;
2469     }
2470   }
2471 
2472   return i;
2473 }
2474 
2475 inline void Context::set_error_pos(const char *a_s, const char *literal) {
2476   if (log) {
2477     if (error_info.error_pos <= a_s) {
2478       if (error_info.error_pos < a_s) {
2479         error_info.error_pos = a_s;
2480         error_info.expected_tokens.clear();
2481       }
2482       if (literal) {
2483         error_info.add(literal, true);
2484       } else if (!rule_stack.empty()) {
2485         auto rule = rule_stack.back();
2486         auto ope = rule->get_core_operator();
2487         if (auto token = FindLiteralToken::token(*ope);
2488             token && token[0] != '\0') {
2489           error_info.add(token, true);
2490         } else {
2491           error_info.add(rule->name.c_str(), false);
2492         }
2493       }
2494     }
2495   }
2496 }
2497 
2498 inline void Context::trace_enter(const Ope &ope, const char *a_s, size_t n,
2499                                  SemanticValues &vs, std::any &dt) const {
2500   trace_ids.push_back(next_trace_id++);
2501   tracer_enter(ope, a_s, n, vs, *this, dt);
2502 }
2503 
2504 inline void Context::trace_leave(const Ope &ope, const char *a_s, size_t n,
2505                                  SemanticValues &vs, std::any &dt,
2506                                  size_t len) const {
2507   tracer_leave(ope, a_s, n, vs, *this, dt, len);
2508   trace_ids.pop_back();
2509 }
2510 
2511 inline bool Context::is_traceable(const Ope &ope) const {
2512   if (tracer_enter && tracer_leave) {
2513     return !IsReference::check(const_cast<Ope &>(ope));
2514   }
2515   return false;
2516 }
2517 
2518 inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs,
2519                          Context &c, std::any &dt) const {
2520   if (c.is_traceable(*this)) {
2521     c.trace_enter(*this, s, n, vs, dt);
2522     auto len = parse_core(s, n, vs, c, dt);
2523     c.trace_leave(*this, s, n, vs, dt, len);
2524     return len;
2525   }
2526   return parse_core(s, n, vs, c, dt);
2527 }
2528 
2529 inline size_t Dictionary::parse_core(const char *s, size_t n,
2530                                      SemanticValues & /*vs*/, Context &c,
2531                                      std::any & /*dt*/) const {
2532   auto len = trie_.match(s, n);
2533   if (len > 0) { return len; }
2534   c.set_error_pos(s);
2535   return static_cast<size_t>(-1);
2536 }
2537 
2538 inline size_t LiteralString::parse_core(const char *s, size_t n,
2539                                         SemanticValues &vs, Context &c,
2540                                         std::any &dt) const {
2541   return parse_literal(s, n, vs, c, dt, lit_, init_is_word_, is_word_,
2542                        ignore_case_);
2543 }
2544 
2545 inline size_t TokenBoundary::parse_core(const char *s, size_t n,
2546                                         SemanticValues &vs, Context &c,
2547                                         std::any &dt) const {
2548   size_t len;
2549   {
2550     c.in_token_boundary_count++;
2551     auto se = scope_exit([&]() { c.in_token_boundary_count--; });
2552     len = ope_->parse(s, n, vs, c, dt);
2553   }
2554 
2555   if (success(len)) {
2556     vs.tokens.emplace_back(std::string_view(s, len));
2557 
2558     if (!c.in_token_boundary_count) {
2559       if (c.whitespaceOpe) {
2560         auto l = c.whitespaceOpe->parse(s + len, n - len, vs, c, dt);
2561         if (fail(l)) { return l; }
2562         len += l;
2563       }
2564     }
2565   }
2566   return len;
2567 }
2568 
2569 inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
2570                                  Context &c, std::any &dt) const {
2571   if (!ope_) {
2572     throw std::logic_error("Uninitialized definition ope was used...");
2573   }
2574 
2575   // Macro reference
2576   if (outer_->is_macro) {
2577     c.rule_stack.push_back(outer_);
2578     auto len = ope_->parse(s, n, vs, c, dt);
2579     c.rule_stack.pop_back();
2580     return len;
2581   }
2582 
2583   size_t len;
2584   std::any val;
2585 
2586   c.packrat(s, outer_->id, len, val, [&](std::any &a_val) {
2587     if (outer_->enter) { outer_->enter(s, n, dt); }
2588 
2589     auto se2 = scope_exit([&]() {
2590       c.pop();
2591       if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); }
2592     });
2593 
2594     auto &chldsv = c.push();
2595 
2596     c.rule_stack.push_back(outer_);
2597     len = ope_->parse(s, n, chldsv, c, dt);
2598     c.rule_stack.pop_back();
2599 
2600     // Invoke action
2601     if (success(len)) {
2602       chldsv.sv_ = std::string_view(s, len);
2603       chldsv.name_ = outer_->name;
2604 
2605       if (!IsPrioritizedChoice::check(*ope_)) {
2606         chldsv.choice_count_ = 0;
2607         chldsv.choice_ = 0;
2608       }
2609 
2610       try {
2611         a_val = reduce(chldsv, dt);
2612       } catch (const parse_error &e) {
2613         if (c.log) {
2614           if (e.what()) {
2615             if (c.error_info.message_pos < s) {
2616               c.error_info.message_pos = s;
2617               c.error_info.message = e.what();
2618             }
2619           }
2620         }
2621         len = static_cast<size_t>(-1);
2622       }
2623     }
2624   });
2625 
2626   if (success(len)) {
2627     if (!outer_->ignoreSemanticValue) {
2628       vs.emplace_back(std::move(val));
2629       vs.tags.emplace_back(str2tag(outer_->name));
2630     }
2631   }
2632 
2633   return len;
2634 }
2635 
2636 inline std::any Holder::reduce(SemanticValues &vs, std::any &dt) const {
2637   if (outer_->action && !outer_->disable_action) {
2638     return outer_->action(vs, dt);
2639   } else if (vs.empty()) {
2640     return std::any();
2641   } else {
2642     return std::move(vs.front());
2643   }
2644 }
2645 
2646 inline const char *Holder::trace_name() const {
2647   if (trace_name_.empty()) { trace_name_ = "[" + outer_->name + "]"; }
2648   return trace_name_.data();
2649 }
2650 
2651 inline size_t Reference::parse_core(const char *s, size_t n, SemanticValues &vs,
2652                                     Context &c, std::any &dt) const {
2653   if (rule_) {
2654     // Reference rule
2655     if (rule_->is_macro) {
2656       // Macro
2657       FindReference vis(c.top_args(), c.rule_stack.back()->params);
2658 
2659       // Collect arguments
2660       std::vector<std::shared_ptr<Ope>> args;
2661       for (auto arg : args_) {
2662         arg->accept(vis);
2663         args.emplace_back(std::move(vis.found_ope));
2664       }
2665 
2666       c.push_args(std::move(args));
2667       auto se = scope_exit([&]() { c.pop_args(); });
2668       auto ope = get_core_operator();
2669       return ope->parse(s, n, vs, c, dt);
2670     } else {
2671       // Definition
2672       c.push_args(std::vector<std::shared_ptr<Ope>>());
2673       auto se = scope_exit([&]() { c.pop_args(); });
2674       auto ope = get_core_operator();
2675       return ope->parse(s, n, vs, c, dt);
2676     }
2677   } else {
2678     // Reference parameter in macro
2679     const auto &args = c.top_args();
2680     return args[iarg_]->parse(s, n, vs, c, dt);
2681   }
2682 }
2683 
2684 inline std::shared_ptr<Ope> Reference::get_core_operator() const {
2685   return rule_->holder_;
2686 }
2687 
2688 inline size_t BackReference::parse_core(const char *s, size_t n,
2689                                         SemanticValues &vs, Context &c,
2690                                         std::any &dt) const {
2691   auto size = static_cast<int>(c.capture_scope_stack_size);
2692   for (auto i = size - 1; i >= 0; i--) {
2693     auto index = static_cast<size_t>(i);
2694     const auto &cs = c.capture_scope_stack[index];
2695     if (cs.find(name_) != cs.end()) {
2696       const auto &lit = cs.at(name_);
2697       std::once_flag init_is_word;
2698       auto is_word = false;
2699       return parse_literal(s, n, vs, c, dt, lit, init_is_word, is_word, false);
2700     }
2701   }
2702   throw std::runtime_error("Invalid back reference...");
2703 }
2704 
2705 inline Definition &
2706 PrecedenceClimbing::get_reference_for_binop(Context &c) const {
2707   if (rule_.is_macro) {
2708     // Reference parameter in macro
2709     const auto &args = c.top_args();
2710     auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
2711     auto arg = args[iarg];
2712     return *dynamic_cast<Reference &>(*arg).rule_;
2713   }
2714 
2715   return *dynamic_cast<Reference &>(*binop_).rule_;
2716 }
2717 
2718 inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
2719                                                    SemanticValues &vs,
2720                                                    Context &c, std::any &dt,
2721                                                    size_t min_prec) const {
2722   auto len = atom_->parse(s, n, vs, c, dt);
2723   if (fail(len)) { return len; }
2724 
2725   std::string tok;
2726   auto &rule = get_reference_for_binop(c);
2727   auto action = std::move(rule.action);
2728 
2729   rule.action = [&](SemanticValues &vs2, std::any &dt2) {
2730     tok = vs2.token();
2731     if (action) {
2732       return action(vs2, dt2);
2733     } else if (!vs2.empty()) {
2734       return vs2[0];
2735     }
2736     return std::any();
2737   };
2738   auto action_se = scope_exit([&]() { rule.action = std::move(action); });
2739 
2740   auto i = len;
2741   while (i < n) {
2742     std::vector<std::any> save_values(vs.begin(), vs.end());
2743     auto save_tokens = vs.tokens;
2744 
2745     auto chv = c.push();
2746     auto chl = binop_->parse(s + i, n - i, chv, c, dt);
2747     c.pop();
2748 
2749     if (fail(chl)) { break; }
2750 
2751     auto it = info_.find(tok);
2752     if (it == info_.end()) { break; }
2753 
2754     auto level = std::get<0>(it->second);
2755     auto assoc = std::get<1>(it->second);
2756 
2757     if (level < min_prec) { break; }
2758 
2759     vs.emplace_back(std::move(chv[0]));
2760     i += chl;
2761 
2762     auto next_min_prec = level;
2763     if (assoc == 'L') { next_min_prec = level + 1; }
2764 
2765     chv = c.push();
2766     chl = parse_expression(s + i, n - i, chv, c, dt, next_min_prec);
2767     c.pop();
2768 
2769     if (fail(chl)) {
2770       vs.assign(save_values.begin(), save_values.end());
2771       vs.tokens = save_tokens;
2772       i = chl;
2773       break;
2774     }
2775 
2776     vs.emplace_back(std::move(chv[0]));
2777     i += chl;
2778 
2779     std::any val;
2780     if (rule_.action) {
2781       vs.sv_ = std::string_view(s, i);
2782       val = rule_.action(vs, dt);
2783     } else if (!vs.empty()) {
2784       val = vs[0];
2785     }
2786     vs.clear();
2787     vs.emplace_back(std::move(val));
2788   }
2789 
2790   return i;
2791 }
2792 
2793 inline size_t Recovery::parse_core(const char *s, size_t n,
2794                                    SemanticValues & /*vs*/, Context &c,
2795                                    std::any & /*dt*/) const {
2796   const auto &rule = dynamic_cast<Reference &>(*ope_);
2797 
2798   // Custom error message
2799   if (c.log) {
2800     auto label = dynamic_cast<Reference *>(rule.args_[0].get());
2801     if (label) {
2802       if (!label->rule_->error_message.empty()) {
2803         c.error_info.message_pos = s;
2804         c.error_info.message = label->rule_->error_message;
2805       }
2806     }
2807   }
2808 
2809   // Recovery
2810   size_t len = static_cast<size_t>(-1);
2811   {
2812     auto save_log = c.log;
2813     c.log = nullptr;
2814     auto se = scope_exit([&]() { c.log = save_log; });
2815 
2816     SemanticValues dummy_vs;
2817     std::any dummy_dt;
2818 
2819     len = rule.parse(s, n, dummy_vs, c, dummy_dt);
2820   }
2821 
2822   if (success(len)) {
2823     c.recovered = true;
2824 
2825     if (c.log) {
2826       c.error_info.output_log(c.log, c.s, c.l);
2827       c.error_info.clear();
2828     }
2829   }
2830 
2831   // Cut
2832   if (!c.cut_stack.empty()) {
2833     c.cut_stack.back() = true;
2834 
2835     if (c.cut_stack.size() == 1) {
2836       // TODO: Remove unneeded entries in packrat memoise table
2837     }
2838   }
2839 
2840   return len;
2841 }
2842 
2843 inline void Sequence::accept(Visitor &v) { v.visit(*this); }
2844 inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); }
2845 inline void Repetition::accept(Visitor &v) { v.visit(*this); }
2846 inline void AndPredicate::accept(Visitor &v) { v.visit(*this); }
2847 inline void NotPredicate::accept(Visitor &v) { v.visit(*this); }
2848 inline void Dictionary::accept(Visitor &v) { v.visit(*this); }
2849 inline void LiteralString::accept(Visitor &v) { v.visit(*this); }
2850 inline void CharacterClass::accept(Visitor &v) { v.visit(*this); }
2851 inline void Character::accept(Visitor &v) { v.visit(*this); }
2852 inline void AnyCharacter::accept(Visitor &v) { v.visit(*this); }
2853 inline void CaptureScope::accept(Visitor &v) { v.visit(*this); }
2854 inline void Capture::accept(Visitor &v) { v.visit(*this); }
2855 inline void TokenBoundary::accept(Visitor &v) { v.visit(*this); }
2856 inline void Ignore::accept(Visitor &v) { v.visit(*this); }
2857 inline void User::accept(Visitor &v) { v.visit(*this); }
2858 inline void WeakHolder::accept(Visitor &v) { v.visit(*this); }
2859 inline void Holder::accept(Visitor &v) { v.visit(*this); }
2860 inline void Reference::accept(Visitor &v) { v.visit(*this); }
2861 inline void Whitespace::accept(Visitor &v) { v.visit(*this); }
2862 inline void BackReference::accept(Visitor &v) { v.visit(*this); }
2863 inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); }
2864 inline void Recovery::accept(Visitor &v) { v.visit(*this); }
2865 inline void Cut::accept(Visitor &v) { v.visit(*this); }
2866 
2867 inline void AssignIDToDefinition::visit(Holder &ope) {
2868   auto p = static_cast<void *>(ope.outer_);
2869   if (ids.count(p)) { return; }
2870   auto id = ids.size();
2871   ids[p] = id;
2872   ope.outer_->id = id;
2873   ope.ope_->accept(*this);
2874 }
2875 
2876 inline void AssignIDToDefinition::visit(Reference &ope) {
2877   if (ope.rule_) {
2878     for (auto arg : ope.args_) {
2879       arg->accept(*this);
2880     }
2881     ope.rule_->accept(*this);
2882   }
2883 }
2884 
2885 inline void AssignIDToDefinition::visit(PrecedenceClimbing &ope) {
2886   ope.atom_->accept(*this);
2887   ope.binop_->accept(*this);
2888 }
2889 
2890 inline void TokenChecker::visit(Reference &ope) {
2891   if (ope.is_macro_) {
2892     for (auto arg : ope.args_) {
2893       arg->accept(*this);
2894     }
2895   } else {
2896     has_rule_ = true;
2897   }
2898 }
2899 
2900 inline void FindLiteralToken::visit(Reference &ope) {
2901   if (ope.is_macro_) {
2902     ope.rule_->accept(*this);
2903     for (auto arg : ope.args_) {
2904       arg->accept(*this);
2905     }
2906   }
2907 }
2908 
2909 inline void DetectLeftRecursion::visit(Reference &ope) {
2910   if (ope.name_ == name_) {
2911     error_s = ope.s_;
2912   } else if (!refs_.count(ope.name_)) {
2913     refs_.insert(ope.name_);
2914     if (ope.rule_) {
2915       ope.rule_->accept(*this);
2916       if (done_ == false) { return; }
2917     }
2918   }
2919   done_ = true;
2920 }
2921 
2922 inline void HasEmptyElement::visit(Reference &ope) {
2923   auto it = std::find_if(refs_.begin(), refs_.end(),
2924                          [&](const std::pair<const char *, std::string> &ref) {
2925                            return ope.name_ == ref.second;
2926                          });
2927   if (it != refs_.end()) { return; }
2928 
2929   if (ope.rule_) {
2930     refs_.emplace_back(ope.s_, ope.name_);
2931     ope.rule_->accept(*this);
2932     refs_.pop_back();
2933   }
2934 }
2935 
2936 inline void DetectInfiniteLoop::visit(Reference &ope) {
2937   auto it = std::find_if(refs_.begin(), refs_.end(),
2938                          [&](const std::pair<const char *, std::string> &ref) {
2939                            return ope.name_ == ref.second;
2940                          });
2941   if (it != refs_.end()) { return; }
2942 
2943   if (ope.rule_) {
2944     refs_.emplace_back(ope.s_, ope.name_);
2945     ope.rule_->accept(*this);
2946     refs_.pop_back();
2947   }
2948 
2949   if (ope.is_macro_) {
2950     for (auto arg : ope.args_) {
2951       arg->accept(*this);
2952     }
2953   }
2954 }
2955 
2956 inline void ReferenceChecker::visit(Reference &ope) {
2957   auto it = std::find(params_.begin(), params_.end(), ope.name_);
2958   if (it != params_.end()) { return; }
2959 
2960   if (!grammar_.count(ope.name_)) {
2961     error_s[ope.name_] = ope.s_;
2962     error_message[ope.name_] = "'" + ope.name_ + "' is not defined.";
2963   } else {
2964     if (!referenced.count(ope.name_)) { referenced.insert(ope.name_); }
2965     const auto &rule = grammar_.at(ope.name_);
2966     if (rule.is_macro) {
2967       if (!ope.is_macro_ || ope.args_.size() != rule.params.size()) {
2968         error_s[ope.name_] = ope.s_;
2969         error_message[ope.name_] = "incorrect number of arguments.";
2970       }
2971     } else if (ope.is_macro_) {
2972       error_s[ope.name_] = ope.s_;
2973       error_message[ope.name_] = "'" + ope.name_ + "' is not macro.";
2974     }
2975     for (auto arg : ope.args_) {
2976       arg->accept(*this);
2977     }
2978   }
2979 }
2980 
2981 inline void LinkReferences::visit(Reference &ope) {
2982   // Check if the reference is a macro parameter
2983   auto found_param = false;
2984   for (size_t i = 0; i < params_.size(); i++) {
2985     const auto &param = params_[i];
2986     if (param == ope.name_) {
2987       ope.iarg_ = i;
2988       found_param = true;
2989       break;
2990     }
2991   }
2992 
2993   // Check if the reference is a definition rule
2994   if (!found_param && grammar_.count(ope.name_)) {
2995     auto &rule = grammar_.at(ope.name_);
2996     ope.rule_ = &rule;
2997   }
2998 
2999   for (auto arg : ope.args_) {
3000     arg->accept(*this);
3001   }
3002 }
3003 
3004 inline void FindReference::visit(Reference &ope) {
3005   for (size_t i = 0; i < args_.size(); i++) {
3006     const auto &name = params_[i];
3007     if (name == ope.name_) {
3008       found_ope = args_[i];
3009       return;
3010     }
3011   }
3012   found_ope = ope.shared_from_this();
3013 }
3014 
3015 /*-----------------------------------------------------------------------------
3016  *  PEG parser generator
3017  *---------------------------------------------------------------------------*/
3018 
3019 using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
3020 
3021 class ParserGenerator {
3022 public:
3023   static std::shared_ptr<Grammar> parse(const char *s, size_t n,
3024                                         const Rules &rules, std::string &start,
3025                                         bool &enablePackratParsing, Log log) {
3026     return get_instance().perform_core(s, n, rules, start, enablePackratParsing,
3027                                        log);
3028   }
3029 
3030   static std::shared_ptr<Grammar> parse(const char *s, size_t n,
3031                                         std::string &start,
3032                                         bool &enablePackratParsing, Log log) {
3033     Rules dummy;
3034     return parse(s, n, dummy, start, enablePackratParsing, log);
3035   }
3036 
3037   // For debuging purpose
3038   static Grammar &grammar() { return get_instance().g; }
3039 
3040 private:
3041   static ParserGenerator &get_instance() {
3042     static ParserGenerator instance;
3043     return instance;
3044   }
3045 
3046   ParserGenerator() {
3047     make_grammar();
3048     setup_actions();
3049   }
3050 
3051   struct Instruction {
3052     std::string type;
3053     std::any data;
3054   };
3055 
3056   struct Data {
3057     std::shared_ptr<Grammar> grammar;
3058     std::string start;
3059     const char *start_pos = nullptr;
3060     std::vector<std::pair<std::string, const char *>> duplicates;
3061     std::map<std::string, Instruction> instructions;
3062     std::set<std::string_view> captures;
3063     bool enablePackratParsing = true;
3064 
3065     Data() : grammar(std::make_shared<Grammar>()) {}
3066   };
3067 
3068   void make_grammar() {
3069     // Setup PEG syntax parser
3070     g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
3071     g["Definition"] <=
3072         cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"],
3073                 g["Expression"], opt(g["Instruction"])),
3074             seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"],
3075                 opt(g["Instruction"])));
3076     g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
3077     g["Sequence"] <= zom(cho(g["CUT"], g["Prefix"]));
3078     g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["SuffixWithLabel"]);
3079     g["SuffixWithLabel"] <=
3080         seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
3081     g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
3082     g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
3083     g["Primary"] <=
3084         cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
3085                 npd(g["LEFTARROW"])),
3086             seq(g["Ignore"], g["Identifier"],
3087                 npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
3088             seq(g["OPEN"], g["Expression"], g["CLOSE"]),
3089             seq(g["BeginTok"], g["Expression"], g["EndTok"]),
3090             seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]),
3091             seq(g["BeginCap"], g["Expression"], g["EndCap"]), g["BackRef"],
3092             g["LiteralI"], g["Dictionary"], g["Literal"], g["NegatedClass"],
3093             g["Class"], g["DOT"]);
3094 
3095     g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
3096     g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
3097 
3098     const static std::vector<std::pair<char32_t, char32_t>> range = {
3099         {0x0080, 0xFFFF}};
3100     g["IdentStart"] <= seq(npd(lit(u8(u8"↑"))), npd(lit(u8(u8"⇑"))),
3101                            cho(cls("a-zA-Z_%"), cls(range)));
3102 
3103     g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
3104 
3105     g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
3106 
3107     auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
3108                            cls("'"), g["Spacing"]),
3109                        seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
3110                            cls("\""), g["Spacing"]));
3111     g["Literal"] <= lit_ope;
3112     g["LiteralD"] <= lit_ope;
3113 
3114     g["LiteralI"] <=
3115         cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
3116                 g["Spacing"]),
3117             seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
3118                 g["Spacing"]));
3119 
3120     // NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
3121     g["Class"] <= seq(chr('['), npd(chr('^')),
3122                       tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
3123                       g["Spacing"]);
3124     g["NegatedClass"] <= seq(lit("[^"),
3125                              tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
3126                              g["Spacing"]);
3127 
3128     g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
3129     g["Char"] <=
3130         cho(seq(chr('\\'), cls("nrt'\"[]\\^")),
3131             seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")),
3132             seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
3133             seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))),
3134             seq(lit("\\u"),
3135                 cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")),
3136                         rep(cls("0-9a-fA-F"), 4, 4)),
3137                     rep(cls("0-9a-fA-F"), 4, 5))),
3138             seq(npd(chr('\\')), dot()));
3139 
3140     g["Repetition"] <=
3141         seq(g["BeginBlacket"], g["RepetitionRange"], g["EndBlacket"]);
3142     g["RepetitionRange"] <= cho(seq(g["Number"], g["COMMA"], g["Number"]),
3143                                 seq(g["Number"], g["COMMA"]), g["Number"],
3144                                 seq(g["COMMA"], g["Number"]));
3145     g["Number"] <= seq(oom(cls("0-9")), g["Spacing"]);
3146 
3147     g["LEFTARROW"] <= seq(cho(lit("<-"), lit(u8(u8"←"))), g["Spacing"]);
3148     ~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
3149     ~g["PIPE"] <= seq(chr('|'), g["Spacing"]);
3150     g["AND"] <= seq(chr('&'), g["Spacing"]);
3151     g["NOT"] <= seq(chr('!'), g["Spacing"]);
3152     g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
3153     g["STAR"] <= seq(chr('*'), g["Spacing"]);
3154     g["PLUS"] <= seq(chr('+'), g["Spacing"]);
3155     ~g["OPEN"] <= seq(chr('('), g["Spacing"]);
3156     ~g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
3157     g["DOT"] <= seq(chr('.'), g["Spacing"]);
3158 
3159     g["CUT"] <= seq(lit(u8(u8"↑")), g["Spacing"]);
3160     ~g["LABEL"] <= seq(cho(chr('^'), lit(u8(u8"⇑"))), g["Spacing"]);
3161 
3162     ~g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
3163     g["Comment"] <=
3164         seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())), g["EndOfLine"]);
3165     g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
3166     g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
3167     g["EndOfFile"] <= npd(dot());
3168 
3169     ~g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
3170     ~g["EndTok"] <= seq(chr('>'), g["Spacing"]);
3171 
3172     ~g["BeginCapScope"] <= seq(chr('$'), chr('('), g["Spacing"]);
3173     ~g["EndCapScope"] <= seq(chr(')'), g["Spacing"]);
3174 
3175     g["BeginCap"] <= seq(chr('$'), tok(g["IdentCont"]), chr('<'), g["Spacing"]);
3176     ~g["EndCap"] <= seq(chr('>'), g["Spacing"]);
3177 
3178     g["BackRef"] <= seq(chr('$'), tok(g["IdentCont"]), g["Spacing"]);
3179 
3180     g["IGNORE"] <= chr('~');
3181 
3182     g["Ignore"] <= opt(g["IGNORE"]);
3183     g["Parameters"] <= seq(g["OPEN"], g["Identifier"],
3184                            zom(seq(g["COMMA"], g["Identifier"])), g["CLOSE"]);
3185     g["Arguments"] <= seq(g["OPEN"], g["Expression"],
3186                           zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]);
3187     ~g["COMMA"] <= seq(chr(','), g["Spacing"]);
3188 
3189     // Instruction grammars
3190     g["Instruction"] <= seq(g["BeginBlacket"],
3191                             cho(cho(g["PrecedenceClimbing"]),
3192                                 cho(g["ErrorMessage"]), cho(g["NoAstOpt"])),
3193                             g["EndBlacket"]);
3194 
3195     ~g["SpacesZom"] <= zom(g["Space"]);
3196     ~g["SpacesOom"] <= oom(g["Space"]);
3197     ~g["BeginBlacket"] <= seq(chr('{'), g["Spacing"]);
3198     ~g["EndBlacket"] <= seq(chr('}'), g["Spacing"]);
3199 
3200     // PrecedenceClimbing instruction
3201     g["PrecedenceClimbing"] <=
3202         seq(lit("precedence"), g["SpacesOom"], g["PrecedenceInfo"],
3203             zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]);
3204     g["PrecedenceInfo"] <=
3205         seq(g["PrecedenceAssoc"],
3206             oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"])));
3207     g["PrecedenceOpe"] <=
3208         cho(seq(cls("'"),
3209                 tok(zom(seq(npd(cho(g["Space"], cls("'"))), g["Char"]))),
3210                 cls("'")),
3211             seq(cls("\""),
3212                 tok(zom(seq(npd(cho(g["Space"], cls("\""))), g["Char"]))),
3213                 cls("\"")),
3214             tok(oom(seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))),
3215                         dot()))));
3216     g["PrecedenceAssoc"] <= cls("LR");
3217 
3218     // Error message instruction
3219     g["ErrorMessage"] <=
3220         seq(lit("message"), g["SpacesOom"], g["LiteralD"], g["SpacesZom"]);
3221 
3222     // No Ast node optimazation instruction
3223     g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
3224 
3225     // Set definition names
3226     for (auto &x : g) {
3227       x.second.name = x.first;
3228     }
3229   }
3230 
3231   void setup_actions() {
3232     g["Definition"] = [&](const SemanticValues &vs, std::any &dt) {
3233       auto &data = *std::any_cast<Data *>(dt);
3234 
3235       auto is_macro = vs.choice() == 0;
3236       auto ignore = std::any_cast<bool>(vs[0]);
3237       auto name = std::any_cast<std::string>(vs[1]);
3238 
3239       std::vector<std::string> params;
3240       std::shared_ptr<Ope> ope;
3241       if (is_macro) {
3242         params = std::any_cast<std::vector<std::string>>(vs[2]);
3243         ope = std::any_cast<std::shared_ptr<Ope>>(vs[4]);
3244         if (vs.size() == 6) {
3245           data.instructions[name] = std::any_cast<Instruction>(vs[5]);
3246         }
3247       } else {
3248         ope = std::any_cast<std::shared_ptr<Ope>>(vs[3]);
3249         if (vs.size() == 5) {
3250           data.instructions[name] = std::any_cast<Instruction>(vs[4]);
3251         }
3252       }
3253 
3254       auto &grammar = *data.grammar;
3255       if (!grammar.count(name)) {
3256         auto &rule = grammar[name];
3257         rule <= ope;
3258         rule.name = name;
3259         rule.s_ = vs.sv().data();
3260         rule.ignoreSemanticValue = ignore;
3261         rule.is_macro = is_macro;
3262         rule.params = params;
3263 
3264         if (data.start.empty()) {
3265           data.start = name;
3266           data.start_pos = vs.sv().data();
3267         }
3268       } else {
3269         data.duplicates.emplace_back(name, vs.sv().data());
3270       }
3271     };
3272 
3273     g["Definition"].enter = [](const char * /*s*/, size_t /*n*/, std::any &dt) {
3274       auto &data = *std::any_cast<Data *>(dt);
3275       data.captures.clear();
3276     };
3277 
3278     g["Expression"] = [&](const SemanticValues &vs) {
3279       if (vs.size() == 1) {
3280         return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3281       } else {
3282         std::vector<std::shared_ptr<Ope>> opes;
3283         for (auto i = 0u; i < vs.size(); i++) {
3284           opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(vs[i]));
3285         }
3286         const std::shared_ptr<Ope> ope =
3287             std::make_shared<PrioritizedChoice>(opes);
3288         return ope;
3289       }
3290     };
3291 
3292     g["Sequence"] = [&](const SemanticValues &vs) {
3293       if (vs.empty()) {
3294         return npd(lit(""));
3295       } else if (vs.size() == 1) {
3296         return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3297       } else {
3298         std::vector<std::shared_ptr<Ope>> opes;
3299         for (const auto &x : vs) {
3300           opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(x));
3301         }
3302         const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
3303         return ope;
3304       }
3305     };
3306 
3307     g["Prefix"] = [&](const SemanticValues &vs) {
3308       std::shared_ptr<Ope> ope;
3309       if (vs.size() == 1) {
3310         ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3311       } else {
3312         assert(vs.size() == 2);
3313         auto tok = std::any_cast<char>(vs[0]);
3314         ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3315         if (tok == '&') {
3316           ope = apd(ope);
3317         } else { // '!'
3318           ope = npd(ope);
3319         }
3320       }
3321       return ope;
3322     };
3323 
3324     g["SuffixWithLabel"] = [&](const SemanticValues &vs, std::any &dt) {
3325       auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3326       if (vs.size() == 1) {
3327         return ope;
3328       } else {
3329         assert(vs.size() == 2);
3330         auto &data = *std::any_cast<Data *>(dt);
3331         const auto &ident = std::any_cast<std::string>(vs[1]);
3332         auto label = ref(*data.grammar, ident, vs.sv().data(), false, {});
3333         auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME,
3334                                 vs.sv().data(), true, {label}));
3335         return cho4label_(ope, recovery);
3336       }
3337     };
3338 
3339     struct Loop {
3340       enum class Type { opt = 0, zom, oom, rep };
3341       Type type;
3342       std::pair<size_t, size_t> range;
3343     };
3344 
3345     g["Suffix"] = [&](const SemanticValues &vs) {
3346       auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3347       if (vs.size() == 1) {
3348         return ope;
3349       } else {
3350         assert(vs.size() == 2);
3351         auto loop = std::any_cast<Loop>(vs[1]);
3352         switch (loop.type) {
3353         case Loop::Type::opt: return opt(ope);
3354         case Loop::Type::zom: return zom(ope);
3355         case Loop::Type::oom: return oom(ope);
3356         default: // Regex-like repetition
3357           return rep(ope, loop.range.first, loop.range.second);
3358         }
3359       }
3360     };
3361 
3362     g["Loop"] = [&](const SemanticValues &vs) {
3363       switch (vs.choice()) {
3364       case 0: // Option
3365         return Loop{Loop::Type::opt, std::pair<size_t, size_t>()};
3366       case 1: // Zero or More
3367         return Loop{Loop::Type::zom, std::pair<size_t, size_t>()};
3368       case 2: // One or More
3369         return Loop{Loop::Type::oom, std::pair<size_t, size_t>()};
3370       default: // Regex-like repetition
3371         return Loop{Loop::Type::rep,
3372                     std::any_cast<std::pair<size_t, size_t>>(vs[0])};
3373       }
3374     };
3375 
3376     g["RepetitionRange"] = [&](const SemanticValues &vs) {
3377       switch (vs.choice()) {
3378       case 0: { // Number COMMA Number
3379         auto min = std::any_cast<size_t>(vs[0]);
3380         auto max = std::any_cast<size_t>(vs[1]);
3381         return std::pair(min, max);
3382       }
3383       case 1: // Number COMMA
3384         return std::pair(std::any_cast<size_t>(vs[0]),
3385                          std::numeric_limits<size_t>::max());
3386       case 2: { // Number
3387         auto n = std::any_cast<size_t>(vs[0]);
3388         return std::pair(n, n);
3389       }
3390       default: // COMMA Number
3391         return std::pair(std::numeric_limits<size_t>::min(),
3392                          std::any_cast<size_t>(vs[0]));
3393       }
3394     };
3395     g["Number"] = [&](const SemanticValues &vs) {
3396       return vs.token_to_number<size_t>();
3397     };
3398 
3399     g["Primary"] = [&](const SemanticValues &vs, std::any &dt) {
3400       auto &data = *std::any_cast<Data *>(dt);
3401 
3402       switch (vs.choice()) {
3403       case 0:   // Macro Reference
3404       case 1: { // Reference
3405         auto is_macro = vs.choice() == 0;
3406         auto ignore = std::any_cast<bool>(vs[0]);
3407         const auto &ident = std::any_cast<std::string>(vs[1]);
3408 
3409         std::vector<std::shared_ptr<Ope>> args;
3410         if (is_macro) {
3411           args = std::any_cast<std::vector<std::shared_ptr<Ope>>>(vs[2]);
3412         }
3413 
3414         auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args);
3415         if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); }
3416 
3417         if (ignore) {
3418           return ign(ope);
3419         } else {
3420           return ope;
3421         }
3422       }
3423       case 2: { // (Expression)
3424         return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3425       }
3426       case 3: { // TokenBoundary
3427         return tok(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3428       }
3429       case 4: { // CaptureScope
3430         return csc(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3431       }
3432       case 5: { // Capture
3433         const auto &name = std::any_cast<std::string_view>(vs[0]);
3434         auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3435 
3436         data.captures.insert(name);
3437 
3438         return cap(ope, [name](const char *a_s, size_t a_n, Context &c) {
3439           auto &cs = c.capture_scope_stack[c.capture_scope_stack_size - 1];
3440           cs[name] = std::string(a_s, a_n);
3441         });
3442       }
3443       default: {
3444         return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3445       }
3446       }
3447     };
3448 
3449     g["IdentCont"] = [](const SemanticValues &vs) {
3450       return std::string(vs.sv().data(), vs.sv().length());
3451     };
3452 
3453     g["Dictionary"] = [](const SemanticValues &vs) {
3454       auto items = vs.transform<std::string>();
3455       return dic(items);
3456     };
3457 
3458     g["Literal"] = [](const SemanticValues &vs) {
3459       const auto &tok = vs.tokens.front();
3460       return lit(resolve_escape_sequence(tok.data(), tok.size()));
3461     };
3462     g["LiteralI"] = [](const SemanticValues &vs) {
3463       const auto &tok = vs.tokens.front();
3464       return liti(resolve_escape_sequence(tok.data(), tok.size()));
3465     };
3466     g["LiteralD"] = [](const SemanticValues &vs) {
3467       auto &tok = vs.tokens.front();
3468       return resolve_escape_sequence(tok.data(), tok.size());
3469     };
3470 
3471     g["Class"] = [](const SemanticValues &vs) {
3472       auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3473       return cls(ranges);
3474     };
3475     g["NegatedClass"] = [](const SemanticValues &vs) {
3476       auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3477       return ncls(ranges);
3478     };
3479     g["Range"] = [](const SemanticValues &vs) {
3480       switch (vs.choice()) {
3481       case 0: {
3482         auto s1 = std::any_cast<std::string>(vs[0]);
3483         auto s2 = std::any_cast<std::string>(vs[1]);
3484         auto cp1 = decode_codepoint(s1.data(), s1.length());
3485         auto cp2 = decode_codepoint(s2.data(), s2.length());
3486         return std::pair(cp1, cp2);
3487       }
3488       case 1: {
3489         auto s = std::any_cast<std::string>(vs[0]);
3490         auto cp = decode_codepoint(s.data(), s.length());
3491         return std::pair(cp, cp);
3492       }
3493       }
3494       return std::pair<char32_t, char32_t>(0, 0);
3495     };
3496     g["Char"] = [](const SemanticValues &vs) {
3497       return resolve_escape_sequence(vs.sv().data(), vs.sv().length());
3498     };
3499 
3500     g["AND"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3501     g["NOT"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3502     g["QUESTION"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3503     g["STAR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3504     g["PLUS"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3505 
3506     g["DOT"] = [](const SemanticValues & /*vs*/) { return dot(); };
3507 
3508     g["CUT"] = [](const SemanticValues & /*vs*/) { return cut(); };
3509 
3510     g["BeginCap"] = [](const SemanticValues &vs) { return vs.token(); };
3511 
3512     g["BackRef"] = [&](const SemanticValues &vs, std::any &dt) {
3513       auto &data = *std::any_cast<Data *>(dt);
3514       if (data.captures.find(vs.token()) == data.captures.end()) {
3515         data.enablePackratParsing = false;
3516       }
3517       return bkr(vs.token_to_string());
3518     };
3519 
3520     g["Ignore"] = [](const SemanticValues &vs) { return vs.size() > 0; };
3521 
3522     g["Parameters"] = [](const SemanticValues &vs) {
3523       return vs.transform<std::string>();
3524     };
3525 
3526     g["Arguments"] = [](const SemanticValues &vs) {
3527       return vs.transform<std::shared_ptr<Ope>>();
3528     };
3529 
3530     g["PrecedenceClimbing"] = [](const SemanticValues &vs) {
3531       PrecedenceClimbing::BinOpeInfo binOpeInfo;
3532       size_t level = 1;
3533       for (auto v : vs) {
3534         auto tokens = std::any_cast<std::vector<std::string_view>>(v);
3535         auto assoc = tokens[0][0];
3536         for (size_t i = 1; i < tokens.size(); i++) {
3537           binOpeInfo[tokens[i]] = std::pair(level, assoc);
3538         }
3539         level++;
3540       }
3541       Instruction instruction;
3542       instruction.type = "precedence";
3543       instruction.data = binOpeInfo;
3544       return instruction;
3545     };
3546     g["PrecedenceInfo"] = [](const SemanticValues &vs) {
3547       return vs.transform<std::string_view>();
3548     };
3549     g["PrecedenceOpe"] = [](const SemanticValues &vs) { return vs.token(); };
3550     g["PrecedenceAssoc"] = [](const SemanticValues &vs) { return vs.token(); };
3551 
3552     g["ErrorMessage"] = [](const SemanticValues &vs) {
3553       Instruction instruction;
3554       instruction.type = "message";
3555       instruction.data = std::any_cast<std::string>(vs[0]);
3556       return instruction;
3557     };
3558 
3559     g["NoAstOpt"] = [](const SemanticValues & /*vs*/) {
3560       Instruction instruction;
3561       instruction.type = "no_ast_opt";
3562       return instruction;
3563     };
3564   }
3565 
3566   bool apply_precedence_instruction(Definition &rule,
3567                                     const PrecedenceClimbing::BinOpeInfo &info,
3568                                     const char *s, Log log) {
3569     try {
3570       auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
3571       auto atom = seq.opes_[0];
3572       auto &rep = dynamic_cast<Repetition &>(*seq.opes_[1]);
3573       auto &seq1 = dynamic_cast<Sequence &>(*rep.ope_);
3574       auto binop = seq1.opes_[0];
3575       auto atom1 = seq1.opes_[1];
3576 
3577       auto atom_name = dynamic_cast<Reference &>(*atom).name_;
3578       auto binop_name = dynamic_cast<Reference &>(*binop).name_;
3579       auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
3580 
3581       if (!rep.is_zom() || atom_name != atom1_name || atom_name == binop_name) {
3582         if (log) {
3583           auto line = line_info(s, rule.s_);
3584           log(line.first, line.second,
3585               "'precedence' instruction cannot be applied to '" + rule.name +
3586                   "'.");
3587         }
3588         return false;
3589       }
3590 
3591       rule.holder_->ope_ = pre(atom, binop, info, rule);
3592       rule.disable_action = true;
3593     } catch (...) {
3594       if (log) {
3595         auto line = line_info(s, rule.s_);
3596         log(line.first, line.second,
3597             "'precedence' instruction cannot be applied to '" + rule.name +
3598                 "'.");
3599       }
3600       return false;
3601     }
3602     return true;
3603   }
3604 
3605   std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
3606                                         const Rules &rules, std::string &start,
3607                                         bool &enablePackratParsing, Log log) {
3608     Data data;
3609     auto &grammar = *data.grammar;
3610 
3611     // Built-in macros
3612     {
3613       // `%recover`
3614       {
3615         auto &rule = grammar[RECOVER_DEFINITION_NAME];
3616         rule <= ref(grammar, "x", "", false, {});
3617         rule.name = RECOVER_DEFINITION_NAME;
3618         rule.s_ = "[native]";
3619         rule.ignoreSemanticValue = true;
3620         rule.is_macro = true;
3621         rule.params = {"x"};
3622       }
3623     }
3624 
3625     std::any dt = &data;
3626     auto r = g["Grammar"].parse(s, n, dt, nullptr, log);
3627 
3628     if (!r.ret) {
3629       if (log) {
3630         if (r.error_info.message_pos) {
3631           auto line = line_info(s, r.error_info.message_pos);
3632           log(line.first, line.second, r.error_info.message);
3633         } else {
3634           auto line = line_info(s, r.error_info.error_pos);
3635           log(line.first, line.second, "syntax error");
3636         }
3637       }
3638       return nullptr;
3639     }
3640 
3641     // User provided rules
3642     for (auto [user_name, user_rule] : rules) {
3643       auto name = user_name;
3644       auto ignore = false;
3645       if (!name.empty() && name[0] == '~') {
3646         ignore = true;
3647         name.erase(0, 1);
3648       }
3649       if (!name.empty()) {
3650         auto &rule = grammar[name];
3651         rule <= user_rule;
3652         rule.name = name;
3653         rule.ignoreSemanticValue = ignore;
3654       }
3655     }
3656 
3657     // Check duplicated definitions
3658     auto ret = data.duplicates.empty();
3659 
3660     for (const auto &[name, ptr] : data.duplicates) {
3661       if (log) {
3662         auto line = line_info(s, ptr);
3663         log(line.first, line.second, "'" + name + "' is already defined.");
3664       }
3665     }
3666 
3667     // Set root definition
3668     auto &start_rule = grammar[data.start];
3669 
3670     // Check if the start rule has ignore operator
3671     {
3672       if (start_rule.ignoreSemanticValue) {
3673         if (log) {
3674           auto line = line_info(s, start_rule.s_);
3675           log(line.first, line.second,
3676               "Ignore operator cannot be applied to '" + start_rule.name +
3677                   "'.");
3678         }
3679         ret = false;
3680       }
3681     }
3682 
3683     if (!ret) { return nullptr; }
3684 
3685     // Check missing definitions
3686     auto referenced = std::unordered_set<std::string>{
3687         WHITESPACE_DEFINITION_NAME,
3688         WORD_DEFINITION_NAME,
3689         RECOVER_DEFINITION_NAME,
3690         start_rule.name,
3691     };
3692 
3693     for (auto &[_, rule] : grammar) {
3694       ReferenceChecker vis(grammar, rule.params);
3695       rule.accept(vis);
3696       referenced.insert(vis.referenced.begin(), vis.referenced.end());
3697       for (const auto &[name, ptr] : vis.error_s) {
3698         if (log) {
3699           auto line = line_info(s, ptr);
3700           log(line.first, line.second, vis.error_message[name]);
3701         }
3702         ret = false;
3703       }
3704     }
3705 
3706     for (auto &[name, rule] : grammar) {
3707       if (!referenced.count(name)) {
3708         if (log) {
3709           auto line = line_info(s, rule.s_);
3710           auto msg = "'" + name + "' is not referenced.";
3711           log(line.first, line.second, msg);
3712         }
3713       }
3714     }
3715 
3716     if (!ret) { return nullptr; }
3717 
3718     // Link references
3719     for (auto &x : grammar) {
3720       auto &rule = x.second;
3721       LinkReferences vis(grammar, rule.params);
3722       rule.accept(vis);
3723     }
3724 
3725     // Check left recursion
3726     ret = true;
3727 
3728     for (auto &[name, rule] : grammar) {
3729       DetectLeftRecursion vis(name);
3730       rule.accept(vis);
3731       if (vis.error_s) {
3732         if (log) {
3733           auto line = line_info(s, vis.error_s);
3734           log(line.first, line.second, "'" + name + "' is left recursive.");
3735         }
3736         ret = false;
3737       }
3738     }
3739 
3740     if (!ret) { return nullptr; }
3741 
3742     // Check infinite loop
3743     {
3744       DetectInfiniteLoop vis(data.start_pos, data.start);
3745       start_rule.accept(vis);
3746       if (vis.has_error) {
3747         if (log) {
3748           auto line = line_info(s, vis.error_s);
3749           log(line.first, line.second,
3750               "infinite loop is detected in '" + vis.error_name + "'.");
3751         }
3752         return nullptr;
3753       }
3754     }
3755 
3756     // Automatic whitespace skipping
3757     if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
3758       for (auto &x : grammar) {
3759         auto &rule = x.second;
3760         auto ope = rule.get_core_operator();
3761         if (IsLiteralToken::check(*ope)) { rule <= tok(ope); }
3762       }
3763 
3764       start_rule.whitespaceOpe =
3765           wsp(grammar[WHITESPACE_DEFINITION_NAME].get_core_operator());
3766     }
3767 
3768     // Word expression
3769     if (grammar.count(WORD_DEFINITION_NAME)) {
3770       start_rule.wordOpe = grammar[WORD_DEFINITION_NAME].get_core_operator();
3771     }
3772 
3773     // Apply instructions
3774     for (const auto &[name, instruction] : data.instructions) {
3775       auto &rule = grammar[name];
3776 
3777       if (instruction.type == "precedence") {
3778         const auto &info =
3779             std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
3780 
3781         if (!apply_precedence_instruction(rule, info, s, log)) {
3782           return nullptr;
3783         }
3784       } else if (instruction.type == "message") {
3785         rule.error_message = std::any_cast<std::string>(instruction.data);
3786       } else if (instruction.type == "no_ast_opt") {
3787         rule.no_ast_opt = true;
3788       }
3789     }
3790 
3791     // Set root definition
3792     start = data.start;
3793     enablePackratParsing = data.enablePackratParsing;
3794 
3795     return data.grammar;
3796   }
3797 
3798   Grammar g;
3799 };
3800 
3801 /*-----------------------------------------------------------------------------
3802  *  AST
3803  *---------------------------------------------------------------------------*/
3804 
3805 template <typename Annotation> struct AstBase : public Annotation {
3806   AstBase(const char *path, size_t line, size_t column, const char *name,
3807           const std::vector<std::shared_ptr<AstBase>> &nodes,
3808           size_t position = 0, size_t length = 0, size_t choice_count = 0,
3809           size_t choice = 0)
3810       : path(path ? path : ""), line(line), column(column), name(name),
3811         position(position), length(length), choice_count(choice_count),
3812         choice(choice), original_name(name),
3813         original_choice_count(choice_count), original_choice(choice),
3814         tag(str2tag(name)), original_tag(tag), is_token(false), nodes(nodes) {}
3815 
3816   AstBase(const char *path, size_t line, size_t column, const char *name,
3817           const std::string_view &token, size_t position = 0, size_t length = 0,
3818           size_t choice_count = 0, size_t choice = 0)
3819       : path(path ? path : ""), line(line), column(column), name(name),
3820         position(position), length(length), choice_count(choice_count),
3821         choice(choice), original_name(name),
3822         original_choice_count(choice_count), original_choice(choice),
3823         tag(str2tag(name)), original_tag(tag), is_token(true), token(token) {}
3824 
3825   AstBase(const AstBase &ast, const char *original_name, size_t position = 0,
3826           size_t length = 0, size_t original_choice_count = 0,
3827           size_t original_choise = 0)
3828       : path(ast.path), line(ast.line), column(ast.column), name(ast.name),
3829         position(position), length(length), choice_count(ast.choice_count),
3830         choice(ast.choice), original_name(original_name),
3831         original_choice_count(original_choice_count),
3832         original_choice(original_choise), tag(ast.tag),
3833         original_tag(str2tag(original_name)), is_token(ast.is_token),
3834         token(ast.token), nodes(ast.nodes), parent(ast.parent) {}
3835 
3836   const std::string path;
3837   const size_t line = 1;
3838   const size_t column = 1;
3839 
3840   const std::string name;
3841   size_t position;
3842   size_t length;
3843   const size_t choice_count;
3844   const size_t choice;
3845   const std::string original_name;
3846   const size_t original_choice_count;
3847   const size_t original_choice;
3848   const unsigned int tag;
3849   const unsigned int original_tag;
3850 
3851   const bool is_token;
3852   const std::string_view token;
3853 
3854   std::vector<std::shared_ptr<AstBase<Annotation>>> nodes;
3855   std::weak_ptr<AstBase<Annotation>> parent;
3856 
3857   std::string token_to_string() const {
3858     assert(is_token);
3859     return std::string(token);
3860   }
3861 
3862   template <typename T> T token_to_number() const {
3863     return token_to_number_<T>(token);
3864   }
3865 };
3866 
3867 template <typename T>
3868 void ast_to_s_core(const std::shared_ptr<T> &ptr, std::string &s, int level,
3869                    std::function<std::string(const T &ast, int level)> fn) {
3870   const auto &ast = *ptr;
3871   for (auto i = 0; i < level; i++) {
3872     s += "  ";
3873   }
3874   auto name = ast.original_name;
3875   if (ast.original_choice_count > 0) {
3876     name += "/" + std::to_string(ast.original_choice);
3877   }
3878   if (ast.name != ast.original_name) { name += "[" + ast.name + "]"; }
3879   if (ast.is_token) {
3880     s += "- " + name + " (";
3881     s += ast.token;
3882     s += ")\n";
3883   } else {
3884     s += "+ " + name + "\n";
3885   }
3886   if (fn) { s += fn(ast, level + 1); }
3887   for (auto node : ast.nodes) {
3888     ast_to_s_core(node, s, level + 1, fn);
3889   }
3890 }
3891 
3892 template <typename T>
3893 std::string
3894 ast_to_s(const std::shared_ptr<T> &ptr,
3895          std::function<std::string(const T &ast, int level)> fn = nullptr) {
3896   std::string s;
3897   ast_to_s_core(ptr, s, 0, fn);
3898   return s;
3899 }
3900 
3901 struct AstOptimizer {
3902   AstOptimizer(bool mode, const std::vector<std::string> &rules = {})
3903       : mode_(mode), rules_(rules) {}
3904 
3905   template <typename T>
3906   std::shared_ptr<T> optimize(std::shared_ptr<T> original,
3907                               std::shared_ptr<T> parent = nullptr) {
3908     auto found =
3909         std::find(rules_.begin(), rules_.end(), original->name) != rules_.end();
3910     bool opt = mode_ ? !found : found;
3911 
3912     if (opt && original->nodes.size() == 1) {
3913       auto child = optimize(original->nodes[0], parent);
3914       return std::make_shared<T>(*child, original->name.data(),
3915                                  original->choice_count, original->position,
3916                                  original->length, original->choice);
3917     }
3918 
3919     auto ast = std::make_shared<T>(*original);
3920     ast->parent = parent;
3921     ast->nodes.clear();
3922     for (auto node : original->nodes) {
3923       auto child = optimize(node, ast);
3924       ast->nodes.push_back(child);
3925     }
3926     return ast;
3927   }
3928 
3929 private:
3930   const bool mode_;
3931   const std::vector<std::string> rules_;
3932 };
3933 
3934 struct EmptyType {};
3935 using Ast = AstBase<EmptyType>;
3936 
3937 template <typename T = Ast> void add_ast_action(Definition &rule) {
3938   rule.action = [&](const SemanticValues &vs) {
3939     auto line = vs.line_info();
3940 
3941     if (rule.is_token()) {
3942       return std::make_shared<T>(
3943           vs.path, line.first, line.second, rule.name.data(), vs.token(),
3944           std::distance(vs.ss, vs.sv().data()), vs.sv().length(),
3945           vs.choice_count(), vs.choice());
3946     }
3947 
3948     auto ast =
3949         std::make_shared<T>(vs.path, line.first, line.second, rule.name.data(),
3950                             vs.transform<std::shared_ptr<T>>(),
3951                             std::distance(vs.ss, vs.sv().data()),
3952                             vs.sv().length(), vs.choice_count(), vs.choice());
3953 
3954     for (auto node : ast->nodes) {
3955       node->parent = ast;
3956     }
3957     return ast;
3958   };
3959 }
3960 
3961 #define PEG_EXPAND(...) __VA_ARGS__
3962 #define PEG_CONCAT(a, b) a##b
3963 #define PEG_CONCAT2(a, b) PEG_CONCAT(a, b)
3964 
3965 #define PEG_PICK(                                                              \
3966     a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, \
3967     a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, \
3968     a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, \
3969     a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59, a60, a61, \
3970     a62, a63, a64, a65, a66, a67, a68, a69, a70, a71, a72, a73, a74, a75, a76, \
3971     a77, a78, a79, a80, a81, a82, a83, a84, a85, a86, a87, a88, a89, a90, a91, \
3972     a92, a93, a94, a95, a96, a97, a98, a99, a100, ...)                         \
3973   a100
3974 
3975 #define PEG_COUNT(...)                                                         \
3976   PEG_EXPAND(PEG_PICK(                                                         \
3977       __VA_ARGS__, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87,    \
3978       86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69,  \
3979       68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51,  \
3980       50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,  \
3981       32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15,  \
3982       14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
3983 
3984 #define PEG_DEF_1(r)                                                           \
3985   peg::Definition r;                                                           \
3986   r.name = #r;                                                                 \
3987   peg::add_ast_action(r);
3988 
3989 #define PEG_DEF_2(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_1(__VA_ARGS__))
3990 #define PEG_DEF_3(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_2(__VA_ARGS__))
3991 #define PEG_DEF_4(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_3(__VA_ARGS__))
3992 #define PEG_DEF_5(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_4(__VA_ARGS__))
3993 #define PEG_DEF_6(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_5(__VA_ARGS__))
3994 #define PEG_DEF_7(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_6(__VA_ARGS__))
3995 #define PEG_DEF_8(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_7(__VA_ARGS__))
3996 #define PEG_DEF_9(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_8(__VA_ARGS__))
3997 #define PEG_DEF_10(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_9(__VA_ARGS__))
3998 #define PEG_DEF_11(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_10(__VA_ARGS__))
3999 #define PEG_DEF_12(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_11(__VA_ARGS__))
4000 #define PEG_DEF_13(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_12(__VA_ARGS__))
4001 #define PEG_DEF_14(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_13(__VA_ARGS__))
4002 #define PEG_DEF_15(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_14(__VA_ARGS__))
4003 #define PEG_DEF_16(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_15(__VA_ARGS__))
4004 #define PEG_DEF_17(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_16(__VA_ARGS__))
4005 #define PEG_DEF_18(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_17(__VA_ARGS__))
4006 #define PEG_DEF_19(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_18(__VA_ARGS__))
4007 #define PEG_DEF_20(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_19(__VA_ARGS__))
4008 #define PEG_DEF_21(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_20(__VA_ARGS__))
4009 #define PEG_DEF_22(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_21(__VA_ARGS__))
4010 #define PEG_DEF_23(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_22(__VA_ARGS__))
4011 #define PEG_DEF_24(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_23(__VA_ARGS__))
4012 #define PEG_DEF_25(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_24(__VA_ARGS__))
4013 #define PEG_DEF_26(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_25(__VA_ARGS__))
4014 #define PEG_DEF_27(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_26(__VA_ARGS__))
4015 #define PEG_DEF_28(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_27(__VA_ARGS__))
4016 #define PEG_DEF_29(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_28(__VA_ARGS__))
4017 #define PEG_DEF_30(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_29(__VA_ARGS__))
4018 #define PEG_DEF_31(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_30(__VA_ARGS__))
4019 #define PEG_DEF_32(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_31(__VA_ARGS__))
4020 #define PEG_DEF_33(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_32(__VA_ARGS__))
4021 #define PEG_DEF_34(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_33(__VA_ARGS__))
4022 #define PEG_DEF_35(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_34(__VA_ARGS__))
4023 #define PEG_DEF_36(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_35(__VA_ARGS__))
4024 #define PEG_DEF_37(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_36(__VA_ARGS__))
4025 #define PEG_DEF_38(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_37(__VA_ARGS__))
4026 #define PEG_DEF_39(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_38(__VA_ARGS__))
4027 #define PEG_DEF_40(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_39(__VA_ARGS__))
4028 #define PEG_DEF_41(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_40(__VA_ARGS__))
4029 #define PEG_DEF_42(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_41(__VA_ARGS__))
4030 #define PEG_DEF_43(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_42(__VA_ARGS__))
4031 #define PEG_DEF_44(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_43(__VA_ARGS__))
4032 #define PEG_DEF_45(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_44(__VA_ARGS__))
4033 #define PEG_DEF_46(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_45(__VA_ARGS__))
4034 #define PEG_DEF_47(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_46(__VA_ARGS__))
4035 #define PEG_DEF_48(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_47(__VA_ARGS__))
4036 #define PEG_DEF_49(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_48(__VA_ARGS__))
4037 #define PEG_DEF_50(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_49(__VA_ARGS__))
4038 #define PEG_DEF_51(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_50(__VA_ARGS__))
4039 #define PEG_DEF_52(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_51(__VA_ARGS__))
4040 #define PEG_DEF_53(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_52(__VA_ARGS__))
4041 #define PEG_DEF_54(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_53(__VA_ARGS__))
4042 #define PEG_DEF_55(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_54(__VA_ARGS__))
4043 #define PEG_DEF_56(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_55(__VA_ARGS__))
4044 #define PEG_DEF_57(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_56(__VA_ARGS__))
4045 #define PEG_DEF_58(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_57(__VA_ARGS__))
4046 #define PEG_DEF_59(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_58(__VA_ARGS__))
4047 #define PEG_DEF_60(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_59(__VA_ARGS__))
4048 #define PEG_DEF_61(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_60(__VA_ARGS__))
4049 #define PEG_DEF_62(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_61(__VA_ARGS__))
4050 #define PEG_DEF_63(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_62(__VA_ARGS__))
4051 #define PEG_DEF_64(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_63(__VA_ARGS__))
4052 #define PEG_DEF_65(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_64(__VA_ARGS__))
4053 #define PEG_DEF_66(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_65(__VA_ARGS__))
4054 #define PEG_DEF_67(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_66(__VA_ARGS__))
4055 #define PEG_DEF_68(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_67(__VA_ARGS__))
4056 #define PEG_DEF_69(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_68(__VA_ARGS__))
4057 #define PEG_DEF_70(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_69(__VA_ARGS__))
4058 #define PEG_DEF_71(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_70(__VA_ARGS__))
4059 #define PEG_DEF_72(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_71(__VA_ARGS__))
4060 #define PEG_DEF_73(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_72(__VA_ARGS__))
4061 #define PEG_DEF_74(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_73(__VA_ARGS__))
4062 #define PEG_DEF_75(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_74(__VA_ARGS__))
4063 #define PEG_DEF_76(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_75(__VA_ARGS__))
4064 #define PEG_DEF_77(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_76(__VA_ARGS__))
4065 #define PEG_DEF_78(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_77(__VA_ARGS__))
4066 #define PEG_DEF_79(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_78(__VA_ARGS__))
4067 #define PEG_DEF_80(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_79(__VA_ARGS__))
4068 #define PEG_DEF_81(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_80(__VA_ARGS__))
4069 #define PEG_DEF_82(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_81(__VA_ARGS__))
4070 #define PEG_DEF_83(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_82(__VA_ARGS__))
4071 #define PEG_DEF_84(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_83(__VA_ARGS__))
4072 #define PEG_DEF_85(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_84(__VA_ARGS__))
4073 #define PEG_DEF_86(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_85(__VA_ARGS__))
4074 #define PEG_DEF_87(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_86(__VA_ARGS__))
4075 #define PEG_DEF_88(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_87(__VA_ARGS__))
4076 #define PEG_DEF_89(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_88(__VA_ARGS__))
4077 #define PEG_DEF_90(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_89(__VA_ARGS__))
4078 #define PEG_DEF_91(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_90(__VA_ARGS__))
4079 #define PEG_DEF_92(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_91(__VA_ARGS__))
4080 #define PEG_DEF_93(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_92(__VA_ARGS__))
4081 #define PEG_DEF_94(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_93(__VA_ARGS__))
4082 #define PEG_DEF_95(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_94(__VA_ARGS__))
4083 #define PEG_DEF_96(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_95(__VA_ARGS__))
4084 #define PEG_DEF_97(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_96(__VA_ARGS__))
4085 #define PEG_DEF_98(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_97(__VA_ARGS__))
4086 #define PEG_DEF_99(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_98(__VA_ARGS__))
4087 #define PEG_DEF_100(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_99(__VA_ARGS__))
4088 
4089 #define AST_DEFINITIONS(...)                                                   \
4090   PEG_EXPAND(PEG_CONCAT2(PEG_DEF_, PEG_COUNT(__VA_ARGS__))(__VA_ARGS__))
4091 
4092 /*-----------------------------------------------------------------------------
4093  *  parser
4094  *---------------------------------------------------------------------------*/
4095 
4096 class parser {
4097 public:
4098   parser() = default;
4099 
4100   parser(const char *s, size_t n, const Rules &rules) {
4101     load_grammar(s, n, rules);
4102   }
4103 
4104   parser(std::string_view sv, const Rules &rules)
4105       : parser(sv.data(), sv.size(), rules) {}
4106 
4107   parser(const char *s, size_t n) : parser(s, n, Rules()) {}
4108 
4109   parser(std::string_view sv) : parser(sv.data(), sv.size(), Rules()) {}
4110 
4111   operator bool() { return grammar_ != nullptr; }
4112 
4113   bool load_grammar(const char *s, size_t n, const Rules &rules) {
4114     grammar_ =
4115         ParserGenerator::parse(s, n, rules, start_, enablePackratParsing_, log);
4116     return grammar_ != nullptr;
4117   }
4118 
4119   bool load_grammar(const char *s, size_t n) {
4120     return load_grammar(s, n, Rules());
4121   }
4122 
4123   bool load_grammar(std::string_view sv, const Rules &rules) {
4124     return load_grammar(sv.data(), sv.size(), rules);
4125   }
4126 
4127   bool load_grammar(std::string_view sv) {
4128     return load_grammar(sv.data(), sv.size());
4129   }
4130 
4131   bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
4132     if (grammar_ != nullptr) {
4133       const auto &rule = (*grammar_)[start_];
4134       return post_process(s, n, rule.parse(s, n, path, log));
4135     }
4136     return false;
4137   }
4138 
4139   bool parse(std::string_view sv, const char *path = nullptr) const {
4140     return parse_n(sv.data(), sv.size(), path);
4141   }
4142 
4143   bool parse_n(const char *s, size_t n, std::any &dt,
4144                const char *path = nullptr) const {
4145     if (grammar_ != nullptr) {
4146       const auto &rule = (*grammar_)[start_];
4147       return post_process(s, n, rule.parse(s, n, dt, path, log));
4148     }
4149     return false;
4150   }
4151 
4152   bool parse(std::string_view sv, std::any &dt,
4153              const char *path = nullptr) const {
4154     return parse_n(sv.data(), sv.size(), dt, path);
4155   }
4156 
4157   template <typename T>
4158   bool parse_n(const char *s, size_t n, T &val,
4159                const char *path = nullptr) const {
4160     if (grammar_ != nullptr) {
4161       const auto &rule = (*grammar_)[start_];
4162       return post_process(s, n, rule.parse_and_get_value(s, n, val, path, log));
4163     }
4164     return false;
4165   }
4166 
4167   template <typename T>
4168   bool parse(std::string_view sv, T &val, const char *path = nullptr) const {
4169     return parse_n(sv.data(), sv.size(), val, path);
4170   }
4171 
4172   template <typename T>
4173   bool parse_n(const char *s, size_t n, std::any &dt, T &val,
4174                const char *path = nullptr) const {
4175     if (grammar_ != nullptr) {
4176       const auto &rule = (*grammar_)[start_];
4177       return post_process(s, n,
4178                           rule.parse_and_get_value(s, n, dt, val, path, log));
4179     }
4180     return false;
4181   }
4182 
4183   template <typename T>
4184   bool parse(const char *s, std::any &dt, T &val,
4185              const char *path = nullptr) const {
4186     auto n = strlen(s);
4187     return parse_n(s, n, dt, val, path);
4188   }
4189 
4190   Definition &operator[](const char *s) { return (*grammar_)[s]; }
4191 
4192   const Definition &operator[](const char *s) const { return (*grammar_)[s]; }
4193 
4194   std::vector<std::string> get_rule_names() const {
4195     std::vector<std::string> rules;
4196     for (auto &[name, _] : *grammar_) {
4197       rules.push_back(name);
4198     }
4199     return rules;
4200   }
4201 
4202   void enable_packrat_parsing() {
4203     if (grammar_ != nullptr) {
4204       auto &rule = (*grammar_)[start_];
4205       rule.enablePackratParsing = enablePackratParsing_ && true;
4206     }
4207   }
4208 
4209   void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave) {
4210     if (grammar_ != nullptr) {
4211       auto &rule = (*grammar_)[start_];
4212       rule.tracer_enter = tracer_enter;
4213       rule.tracer_leave = tracer_leave;
4214     }
4215   }
4216 
4217   template <typename T = Ast> parser &enable_ast() {
4218     for (auto &[_, rule] : *grammar_) {
4219       if (!rule.action) { add_ast_action<T>(rule); }
4220     }
4221     return *this;
4222   }
4223 
4224   template <typename T>
4225   std::shared_ptr<T> optimize_ast(std::shared_ptr<T> ast,
4226                                   bool opt_mode = true) const {
4227     return AstOptimizer(opt_mode, get_no_ast_opt_rules()).optimize(ast);
4228   }
4229 
4230   Log log;
4231 
4232 private:
4233   bool post_process(const char *s, size_t n,
4234                     const Definition::Result &r) const {
4235     auto ret = r.ret && r.len == n;
4236     if (log && !ret) { r.error_info.output_log(log, s, n); }
4237     return ret && !r.recovered;
4238   }
4239 
4240   std::vector<std::string> get_no_ast_opt_rules() const {
4241     std::vector<std::string> rules;
4242     for (auto &[name, rule] : *grammar_) {
4243       if (rule.no_ast_opt) { rules.push_back(name); }
4244     }
4245     return rules;
4246   }
4247 
4248   std::shared_ptr<Grammar> grammar_;
4249   std::string start_;
4250   bool enablePackratParsing_ = false;
4251 };
4252 
4253 } // namespace peg
4254