1 //
2 //  peglint.cc
3 //
4 //  Copyright (c) 2021 Yuji Hirose. All rights reserved.
5 //  MIT License
6 //
7 
8 #include <fstream>
9 #include <peglib.h>
10 #include <sstream>
11 
12 using namespace std;
13 
read_file(const char * path,vector<char> & buff)14 inline bool read_file(const char *path, vector<char> &buff) {
15   ifstream ifs(path, ios::in | ios::binary);
16   if (ifs.fail()) { return false; }
17 
18   buff.resize(static_cast<unsigned int>(ifs.seekg(0, ios::end).tellg()));
19   if (!buff.empty()) {
20     ifs.seekg(0, ios::beg).read(&buff[0], static_cast<streamsize>(buff.size()));
21   }
22   return true;
23 }
24 
split(const string & s,char delim)25 inline vector<string> split(const string &s, char delim) {
26   vector<string> elems;
27   stringstream ss(s);
28   string elem;
29   while (getline(ss, elem, delim)) {
30     elems.push_back(elem);
31   }
32   return elems;
33 }
34 
main(int argc,const char ** argv)35 int main(int argc, const char **argv) {
36   auto opt_packrat = false;
37   auto opt_ast = false;
38   auto opt_optimize = false;
39   auto opt_mode = true;
40   auto opt_help = false;
41   auto opt_source = false;
42   vector<char> source;
43   auto opt_trace = false;
44   vector<const char *> path_list;
45 
46   auto argi = 1;
47   while (argi < argc) {
48     auto arg = argv[argi++];
49     if (string("--help") == arg) {
50       opt_help = true;
51     } else if (string("--packrat") == arg) {
52       opt_packrat = true;
53     } else if (string("--ast") == arg) {
54       opt_ast = true;
55     } else if (string("--opt") == arg || string("--opt-all") == arg) {
56       opt_optimize = true;
57       opt_mode = true;
58     } else if (string("--opt-only") == arg) {
59       opt_optimize = true;
60       opt_mode = false;
61     } else if (string("--source") == arg) {
62       opt_source = true;
63       if (argi < argc) {
64         std::string text = argv[argi++];
65         source.assign(text.begin(), text.end());
66       }
67     } else if (string("--trace") == arg) {
68       opt_trace = true;
69     } else {
70       path_list.push_back(arg);
71     }
72   }
73 
74   if (path_list.empty() || opt_help) {
75     cerr << R"(usage: grammar_file_path [source_file_path]
76 
77   options:
78     --source: source text
79     --packrat: enable packrat memoise
80     --ast: show AST tree
81     --opt, --opt-all: optimaze all AST nodes except nodes selected with `no_ast_opt` instruction
82     --opt-only: optimaze only AST nodes selected with `no_ast_opt` instruction
83     --trace: show trace messages
84 )";
85 
86     return 1;
87   }
88 
89   // Check PEG grammar
90   auto syntax_path = path_list[0];
91 
92   vector<char> syntax;
93   if (!read_file(syntax_path, syntax)) {
94     cerr << "can't open the grammar file." << endl;
95     return -1;
96   }
97 
98   peg::parser parser;
99 
100   parser.log = [&](size_t ln, size_t col, const string &msg) {
101     cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl;
102   };
103 
104   if (!parser.load_grammar(syntax.data(), syntax.size())) { return -1; }
105 
106   if (path_list.size() < 2 && !opt_source) { return 0; }
107 
108   // Check source
109   std::string source_path = "[commandline]";
110   if (path_list.size() >= 2) {
111     if (!read_file(path_list[1], source)) {
112       cerr << "can't open the code file." << endl;
113       return -1;
114     }
115     source_path = path_list[1];
116   }
117 
118   parser.log = [&](size_t ln, size_t col, const string &msg) {
119     cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl;
120   };
121 
122   if (opt_packrat) {
123     parser.enable_packrat_parsing();
124   }
125 
126   if (opt_trace) {
127     size_t prev_pos = 0;
128     parser.enable_trace(
129         [&](const peg::Ope &ope, const char *s, size_t /*n*/,
130             const peg::SemanticValues & /*sv*/, const peg::Context &c,
131             const std::any & /*dt*/) {
132           auto pos = static_cast<size_t>(s - c.s);
133           auto backtrack = (pos < prev_pos ? "*" : "");
134           string indent;
135           auto level = c.trace_ids.size() - 1;
136           while (level--) {
137             indent += "│";
138           }
139           std::string name;
140           {
141             name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
142 
143             auto lit = dynamic_cast<const peg::LiteralString *>(&ope);
144             if (lit) { name += " '" + peg::escape_characters(lit->lit_) + "'"; }
145           }
146           std::cout << "E " << pos << backtrack << "\t" << indent << "┌" << name
147                     << " #" << c.trace_ids.back() << std::endl;
148           prev_pos = static_cast<size_t>(pos);
149         },
150         [&](const peg::Ope &ope, const char *s, size_t /*n*/,
151             const peg::SemanticValues &sv, const peg::Context &c,
152             const std::any & /*dt*/, size_t len) {
153           auto pos = static_cast<size_t>(s - c.s);
154           if (len != static_cast<size_t>(-1)) { pos += len; }
155           string indent;
156           auto level = c.trace_ids.size() - 1;
157           while (level--) {
158             indent += "│";
159           }
160           auto ret = len != static_cast<size_t>(-1) ? "└o " : "└x ";
161           auto name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
162           std::stringstream choice;
163           if (sv.choice_count() > 0) {
164             choice << " " << sv.choice() << "/" << sv.choice_count();
165           }
166           std::string token;
167           if (!sv.tokens.empty()) {
168             token += ", token '";
169             token += sv.tokens[0];
170             token += "'";
171           }
172           std::string matched;
173           if (peg::success(len) &&
174               peg::TokenChecker::is_token(const_cast<peg::Ope &>(ope))) {
175             matched = ", match '" + peg::escape_characters(s, len) + "'";
176           }
177           std::cout << "L " << pos << "\t" << indent << ret << name << " #"
178                     << c.trace_ids.back() << choice.str() << token << matched << std::endl;
179         });
180   }
181 
182   if (opt_ast) {
183     parser.enable_ast();
184 
185     std::shared_ptr<peg::Ast> ast;
186     auto ret = parser.parse_n(source.data(), source.size(), ast);
187 
188     if (ast) {
189       if (opt_optimize) {
190         ast = parser.optimize_ast(ast, opt_mode);
191       }
192       std::cout << peg::ast_to_s(ast);
193     }
194 
195     if (!ret) { return -1; }
196   } else {
197     if (!parser.parse_n(source.data(), source.size())) { return -1; }
198   }
199 
200   return 0;
201 }
202