1 //
2 // peglint.cc
3 //
4 // Copyright (c) 2021 Yuji Hirose. All rights reserved.
5 // MIT License
6 //
7
8 #include <fstream>
9 #include <peglib.h>
10 #include <sstream>
11
12 using namespace std;
13
read_file(const char * path,vector<char> & buff)14 inline bool read_file(const char *path, vector<char> &buff) {
15 ifstream ifs(path, ios::in | ios::binary);
16 if (ifs.fail()) { return false; }
17
18 buff.resize(static_cast<unsigned int>(ifs.seekg(0, ios::end).tellg()));
19 if (!buff.empty()) {
20 ifs.seekg(0, ios::beg).read(&buff[0], static_cast<streamsize>(buff.size()));
21 }
22 return true;
23 }
24
split(const string & s,char delim)25 inline vector<string> split(const string &s, char delim) {
26 vector<string> elems;
27 stringstream ss(s);
28 string elem;
29 while (getline(ss, elem, delim)) {
30 elems.push_back(elem);
31 }
32 return elems;
33 }
34
main(int argc,const char ** argv)35 int main(int argc, const char **argv) {
36 auto opt_packrat = false;
37 auto opt_ast = false;
38 auto opt_optimize = false;
39 auto opt_mode = true;
40 auto opt_help = false;
41 auto opt_source = false;
42 vector<char> source;
43 auto opt_trace = false;
44 vector<const char *> path_list;
45
46 auto argi = 1;
47 while (argi < argc) {
48 auto arg = argv[argi++];
49 if (string("--help") == arg) {
50 opt_help = true;
51 } else if (string("--packrat") == arg) {
52 opt_packrat = true;
53 } else if (string("--ast") == arg) {
54 opt_ast = true;
55 } else if (string("--opt") == arg || string("--opt-all") == arg) {
56 opt_optimize = true;
57 opt_mode = true;
58 } else if (string("--opt-only") == arg) {
59 opt_optimize = true;
60 opt_mode = false;
61 } else if (string("--source") == arg) {
62 opt_source = true;
63 if (argi < argc) {
64 std::string text = argv[argi++];
65 source.assign(text.begin(), text.end());
66 }
67 } else if (string("--trace") == arg) {
68 opt_trace = true;
69 } else {
70 path_list.push_back(arg);
71 }
72 }
73
74 if (path_list.empty() || opt_help) {
75 cerr << R"(usage: grammar_file_path [source_file_path]
76
77 options:
78 --source: source text
79 --packrat: enable packrat memoise
80 --ast: show AST tree
81 --opt, --opt-all: optimaze all AST nodes except nodes selected with `no_ast_opt` instruction
82 --opt-only: optimaze only AST nodes selected with `no_ast_opt` instruction
83 --trace: show trace messages
84 )";
85
86 return 1;
87 }
88
89 // Check PEG grammar
90 auto syntax_path = path_list[0];
91
92 vector<char> syntax;
93 if (!read_file(syntax_path, syntax)) {
94 cerr << "can't open the grammar file." << endl;
95 return -1;
96 }
97
98 peg::parser parser;
99
100 parser.log = [&](size_t ln, size_t col, const string &msg) {
101 cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl;
102 };
103
104 if (!parser.load_grammar(syntax.data(), syntax.size())) { return -1; }
105
106 if (path_list.size() < 2 && !opt_source) { return 0; }
107
108 // Check source
109 std::string source_path = "[commandline]";
110 if (path_list.size() >= 2) {
111 if (!read_file(path_list[1], source)) {
112 cerr << "can't open the code file." << endl;
113 return -1;
114 }
115 source_path = path_list[1];
116 }
117
118 parser.log = [&](size_t ln, size_t col, const string &msg) {
119 cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl;
120 };
121
122 if (opt_packrat) {
123 parser.enable_packrat_parsing();
124 }
125
126 if (opt_trace) {
127 size_t prev_pos = 0;
128 parser.enable_trace(
129 [&](const peg::Ope &ope, const char *s, size_t /*n*/,
130 const peg::SemanticValues & /*sv*/, const peg::Context &c,
131 const std::any & /*dt*/) {
132 auto pos = static_cast<size_t>(s - c.s);
133 auto backtrack = (pos < prev_pos ? "*" : "");
134 string indent;
135 auto level = c.trace_ids.size() - 1;
136 while (level--) {
137 indent += "│";
138 }
139 std::string name;
140 {
141 name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
142
143 auto lit = dynamic_cast<const peg::LiteralString *>(&ope);
144 if (lit) { name += " '" + peg::escape_characters(lit->lit_) + "'"; }
145 }
146 std::cout << "E " << pos << backtrack << "\t" << indent << "┌" << name
147 << " #" << c.trace_ids.back() << std::endl;
148 prev_pos = static_cast<size_t>(pos);
149 },
150 [&](const peg::Ope &ope, const char *s, size_t /*n*/,
151 const peg::SemanticValues &sv, const peg::Context &c,
152 const std::any & /*dt*/, size_t len) {
153 auto pos = static_cast<size_t>(s - c.s);
154 if (len != static_cast<size_t>(-1)) { pos += len; }
155 string indent;
156 auto level = c.trace_ids.size() - 1;
157 while (level--) {
158 indent += "│";
159 }
160 auto ret = len != static_cast<size_t>(-1) ? "└o " : "└x ";
161 auto name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
162 std::stringstream choice;
163 if (sv.choice_count() > 0) {
164 choice << " " << sv.choice() << "/" << sv.choice_count();
165 }
166 std::string token;
167 if (!sv.tokens.empty()) {
168 token += ", token '";
169 token += sv.tokens[0];
170 token += "'";
171 }
172 std::string matched;
173 if (peg::success(len) &&
174 peg::TokenChecker::is_token(const_cast<peg::Ope &>(ope))) {
175 matched = ", match '" + peg::escape_characters(s, len) + "'";
176 }
177 std::cout << "L " << pos << "\t" << indent << ret << name << " #"
178 << c.trace_ids.back() << choice.str() << token << matched << std::endl;
179 });
180 }
181
182 if (opt_ast) {
183 parser.enable_ast();
184
185 std::shared_ptr<peg::Ast> ast;
186 auto ret = parser.parse_n(source.data(), source.size(), ast);
187
188 if (ast) {
189 if (opt_optimize) {
190 ast = parser.optimize_ast(ast, opt_mode);
191 }
192 std::cout << peg::ast_to_s(ast);
193 }
194
195 if (!ret) { return -1; }
196 } else {
197 if (!parser.parse_n(source.data(), source.size())) { return -1; }
198 }
199
200 return 0;
201 }
202