1 #include "Parse.h"
2 #include "ParseImpl.h"
3 
4 #include "ConditionParserImpl.h"
5 #include "EffectParser.h"
6 #include "EnumParser.h"
7 #include "ValueRefParser.h"
8 
9 #include "../universe/UnlockableItem.h"
10 #include "../util/Logger.h"
11 #include "../util/Directories.h"
12 
13 #include <boost/xpressive/xpressive.hpp>
14 #include <boost/algorithm/string/replace.hpp>
15 #include <boost/filesystem/operations.hpp>
16 #include <boost/filesystem/fstream.hpp>
17 #include <boost/spirit/include/phoenix.hpp>
18 #include <boost/algorithm/string/find_iterator.hpp>
19 
20 #define DEBUG_PARSERS 0
21 
22 #if DEBUG_PARSERS
23 namespace std {
operator <<(ostream & os,const std::vector<Effect::Effect * > &)24     inline ostream& operator<<(ostream& os, const std::vector<Effect::Effect*>&) { return os; }
operator <<(ostream & os,const GG::Clr &)25     inline ostream& operator<<(ostream& os, const GG::Clr&) { return os; }
operator <<(ostream & os,const UnlockableItem &)26     inline ostream& operator<<(ostream& os, const UnlockableItem&) { return os; }
27 }
28 #endif
29 
30 namespace parse {
31     using namespace boost::xpressive;
32     const sregex MACRO_KEY = +_w;   // word character (alnum | _), one or more times, greedy
33     const sregex MACRO_TEXT = -*_;  // any character, zero or more times, not greedy
34     const sregex MACRO_DEFINITION = (s1 = MACRO_KEY) >> _n >> "'''" >> (s2 = MACRO_TEXT) >> "'''" >> _n;
35     const sregex MACRO_INSERTION = "[[" >> *space >> (s1 = MACRO_KEY) >> *space >> !("(" >> (s2 = +~(set = ')', '\n')) >> ")") >> "]]";
36 
parse_and_erase_macro_definitions(std::string & text,std::map<std::string,std::string> & macros)37     void parse_and_erase_macro_definitions(std::string& text, std::map<std::string, std::string>& macros) {
38         try {
39             std::string::iterator text_it = text.begin();
40             while (true) {
41                 // find next macro definition
42                 smatch match;
43                 if (!regex_search(text_it, text.end(), match, MACRO_DEFINITION, regex_constants::match_default))
44                     break;
45 
46                 //const std::string& matched_text = match.str();  // [[MACRO_KEY]] '''macro text'''
47                 //DebugLogger() << "found macro definition:\n" << matched_text;
48 
49                 // get macro key and macro text from match
50                 const std::string& macro_key = match[1];
51                 assert(macro_key != "");
52                 const std::string& macro_text = match[2];
53                 assert(macro_text != "");
54 
55                 //DebugLogger() << "key: " << macro_key;
56                 //DebugLogger() << "text:\n" << macro_text;
57 
58                 // store macro
59                 if (!macros.count(macro_key)) {
60                     macros[macro_key] = macro_text;
61                 } else {
62                     ErrorLogger() << "Duplicate macro key foud: " << macro_key << ".  Ignoring duplicate.";
63                 }
64 
65                 // remove macro definition from text by replacing with a newline that is ignored by later parsing
66                 text.replace(text_it + match.position(), text_it + match.position() + match.length(), "\n");
67                 // subsequent scanning starts after macro defininition
68                 text_it = text.end() - match.suffix().length();
69             }
70         } catch (const std::exception& e) {
71             ErrorLogger() << "Exception caught regex parsing script file: " << e.what();
72             std::cerr << "Exception caught regex parsing script file: " << e.what() << std::endl;
73             return;
74         }
75     }
76 
macros_directly_referenced_in_text(const std::string & text)77     std::set<std::string> macros_directly_referenced_in_text(const std::string& text) {
78         std::set<std::string> retval;
79         try {
80             std::size_t position = 0; // position in the text, past the already processed part
81             smatch match;
82             while (regex_search(text.begin() + position, text.end(), match, MACRO_INSERTION, regex_constants::match_default)) {
83                 position += match.position() + match.length();
84                 retval.insert(match[1]);
85             }
86         } catch (const std::exception& e) {
87             ErrorLogger() << "Exception caught regex parsing script file: " << e.what();
88             std::cerr << "Exception caught regex parsing script file: " << e.what() << std::endl;
89             return retval;
90         }
91         return retval;
92     }
93 
macro_deep_referenced_in_text(const std::string & macro_to_find,const std::string & text,const std::map<std::string,std::string> & macros)94     bool macro_deep_referenced_in_text(const std::string& macro_to_find, const std::string& text,
95                                        const std::map<std::string, std::string>& macros)
96     {
97         TraceLogger() << "Checking if " << macro_to_find << " deep referenced in text: " << text;
98         // check of text directly references macro_to_find
99         std::set<std::string> macros_directly_referenced_in_input_text = macros_directly_referenced_in_text(text);
100         if (macros_directly_referenced_in_input_text.empty())
101             return false;
102         if (macros_directly_referenced_in_input_text.count(macro_to_find))
103             return true;
104         // check if macros referenced in text reference macro_to_find
105         for (const std::string& direct_referenced_macro_key : macros_directly_referenced_in_input_text) {
106             // get text of directly referenced macro
107             auto macro_it = macros.find(direct_referenced_macro_key);
108             if (macro_it == macros.end()) {
109                 ErrorLogger() << "macro_deep_referenced_in_text couldn't find referenced macro: " << direct_referenced_macro_key;
110                 continue;
111             }
112             const std::string& macro_text = macro_it->second;
113             // check of text of directly referenced macro has any reference to the macro_to_find
114             if (macro_deep_referenced_in_text(macro_to_find, macro_text, macros))
115                 return true;
116         }
117         // didn't locate macro_to_find in any of the macros referenced in this text
118         return false;
119     }
120 
check_for_cyclic_macro_references(const std::map<std::string,std::string> & macros)121     void check_for_cyclic_macro_references(const std::map<std::string, std::string>& macros) {
122         for (const auto& macro : macros) {
123             if (macro_deep_referenced_in_text(macro.first, macro.second, macros))
124                 ErrorLogger() << "Cyclic macro found: " << macro.first << " references itself (eventually)";
125         }
126     }
127 
replace_macro_references(std::string & text,const std::map<std::string,std::string> & macros)128     void replace_macro_references(std::string& text,
129                                   const std::map<std::string, std::string>& macros)
130     {
131         try {
132             std::size_t position = 0; // position in the text, past the already processed part
133             smatch match;
134             while (regex_search(text.begin() + position, text.end(), match, MACRO_INSERTION, regex_constants::match_default)) {
135                 position += match.position();
136                 const std::string& matched_text = match.str();  // [[MACRO_KEY]] or [[MACRO_KEY(foo,bar,...)]]
137                 const std::string& macro_key = match[1];        // just MACRO_KEY
138                 // look up macro key to insert
139                 auto macro_lookup_it = macros.find(macro_key);
140                 if (macro_lookup_it != macros.end()) {
141                     // verify that macro is safe: check for cyclic reference of macro to itself
142                     if (macro_deep_referenced_in_text(macro_key, macro_lookup_it->second, macros)) {
143                         ErrorLogger() << "Skipping cyclic macro reference: " << macro_key;
144                         position += match.length();
145                     } else {
146                         // insert macro text in place of reference
147                         std::string replacement = macro_lookup_it->second;
148                         std::string macro_params = match[2]; // arg1,arg2,arg3,etc.
149                         if (!macro_params.empty()) { // found macro parameters
150                             int replace_number = 1;
151                             for (boost::split_iterator<std::string::iterator> it =
152                                     boost::make_split_iterator(macro_params, boost::first_finder(",", boost::is_iequal()));
153                                 it != boost::split_iterator<std::string::iterator>();
154                                 ++it, ++replace_number)
155                             {
156                                 // not using %1% (and boost::fmt) because the replaced text may itself have %s inside it that will get eaten
157                                 boost::replace_all(replacement, "@" + std::to_string(replace_number) + "@", boost::copy_range<std::string>(*it));
158                             }
159                         }
160                         text.replace(position, matched_text.length(), replacement);
161                         // recursive replacement allowed, so don't skip past
162                         // start of replacement text, so that inserted text can
163                         // be matched on the next pass
164                     }
165                 } else {
166                     ErrorLogger() << "Unresolved macro reference: " << macro_key;
167                     position += match.length();
168                 }
169             }
170         } catch (const std::exception& e) {
171             ErrorLogger() << "Exception caught regex parsing script file: " << e.what();
172             std::cerr << "Exception caught regex parsing script file: " << e.what() << std::endl;
173             return;
174         }
175     }
176 
macro_substitution(std::string & text)177     void macro_substitution(std::string& text) {
178         //DebugLogger() << "macro_substitution for text:" << text;
179         std::map<std::string, std::string> macros;
180 
181         parse_and_erase_macro_definitions(text, macros);
182         check_for_cyclic_macro_references(macros);
183 
184         //DebugLogger() << "after macro pasring text:" << text;
185 
186         // recursively expand macro keys: replace [[MACRO_KEY]] in other macro
187         // text with the macro text corresponding to MACRO_KEY.
188         for (auto& macro : macros)
189         { replace_macro_references(macro.second, macros); }
190 
191         // substitute macro keys - replace [[MACRO_KEY]] in the input text with
192         // the macro text corresponding to MACRO_KEY
193         replace_macro_references(text, macros);
194 
195         //DebugLogger() << "after macro substitution text: " << text;
196     }
197 
read_file(const boost::filesystem::path & path,std::string & file_contents)198     bool read_file(const boost::filesystem::path& path, std::string& file_contents) {
199         boost::filesystem::ifstream ifs(path);
200         if (!ifs)
201             return false;
202 
203         // skip byte order mark (BOM)
204         for (int BOM : {0xEF, 0xBB, 0xBF}) {
205             if (BOM != ifs.get()) {
206                 // no header set stream back to start of file
207                 ifs.seekg(0, std::ios::beg);
208                 // and continue
209                 break;
210             }
211         }
212 
213         std::getline(ifs, file_contents, '\0');
214 
215         // no problems?
216         return true;
217     }
218 
219     const sregex FILENAME_TEXT = -+_;   // any character, one or more times, not greedy
220     const sregex FILENAME_INSERTION = bol >> "#include" >> *space >> "\"" >> (s1 = FILENAME_TEXT) >> "\"" >> *space >> _n;
221 
222     std::set<std::string> missing_include_files;
223 
224     /** \brief Resolve script directives
225      *
226      * @param[in,out] text contents to search through
227      * @param[in] file_search_path base path of content
228      */
file_substitution(std::string & text,const boost::filesystem::path & file_search_path)229     void file_substitution(std::string& text, const boost::filesystem::path& file_search_path) {
230         if (!boost::filesystem::is_directory(file_search_path)) {
231             ErrorLogger() << "File parsing include substitution given search path that is not a directory: "
232                           << file_search_path.string();
233             return;
234         }
235         try {
236             std::set<boost::filesystem::path> files_included;
237             process_include_substitutions(text, file_search_path, files_included);
238         } catch (const std::exception& e) {
239             ErrorLogger() << "Exception caught regex parsing script file: " << e.what();
240             std::cerr << "Exception caught regex parsing script file: " << e.what() << std::endl;
241             return;
242         }
243     }
244 
245     /** \brief Replace all include statements with contents of file
246      *
247      * Search for any include statements in *text* and replace them with the contents
248      * of the file given.  File lookup is relative to *file_search_path* and will not
249      * be included if found in *files_included*.
250      * Each included file is added to *files_included*.
251      * This is a recursive function, processing the contents of any included files.
252      *
253      * @param[in,out] text content to search through
254      * @param[in] file_search_path base path of content
255      * @param[in,out] files_included canonical path of any files previously included
256      * */
process_include_substitutions(std::string & text,const boost::filesystem::path & file_search_path,std::set<boost::filesystem::path> & files_included)257     void process_include_substitutions(std::string& text, const boost::filesystem::path& file_search_path,
258                                        std::set<boost::filesystem::path>& files_included)
259     {
260         smatch match;
261         while (regex_search(text.begin(), text.end(), match, FILENAME_INSERTION, regex_constants::match_default)) {
262             const std::string& fn_match = match[1];
263             if (fn_match.empty()) {
264                 continue;
265             }
266             const sregex INCL_ONCE_SEARCH = bol >> "#include" >> *space >> "\"" >> fn_match >> "\"" >> *space >> _n;
267             boost::filesystem::path base_path;
268             boost::filesystem::path match_path;
269             // check for base path
270             if (fn_match.substr(0, 1) == "/") {
271                 base_path = GetResourceDir();
272                 match_path = base_path / fn_match.substr(1);
273             } else {
274                 base_path = file_search_path;
275                 match_path = base_path / fn_match;
276             }
277             std::string fn_str = boost::filesystem::path(fn_match).filename().string();
278             if (fn_str.substr(0, 1) == "*") {
279                 if (match_path.parent_path().empty()) {
280                     DebugLogger() << "Parse: " << match_path.parent_path().string() << " is empty, skipping.";
281                     continue;
282                 }
283                 fn_str = fn_str.substr(1, fn_str.size() - 1);
284                 std::set<boost::filesystem::path> match_list;
285                 // filter results
286                 for (const boost::filesystem::path& file : ListDir(match_path.parent_path())) {
287                     std::string it_str = file.filename().string();
288                     std::size_t it_len = it_str.length();
289                     std::size_t match_len = fn_str.length();
290                     if (it_len > match_len) {
291                         if (it_str.substr(it_len - match_len, match_len) == fn_str) {
292                             match_list.insert(file);
293                         }
294                     }
295                 }
296                 // read in results
297                 std::string dir_text;
298                 for (const boost::filesystem::path& file : match_list) {
299                     if (files_included.insert(boost::filesystem::canonical(file)).second) {
300                         std::string new_text;
301                         if (read_file(file, new_text)) {
302                             new_text.append("\n");
303                             dir_text.append(new_text);
304                         } else {
305                             ErrorLogger() << "Parse: Unable to read file " << file.string();
306                         }
307                     }
308                 }
309                 text = regex_replace(text, INCL_ONCE_SEARCH, dir_text, regex_constants::format_first_only);
310             } else if (files_included.insert(boost::filesystem::canonical(match_path)).second) {
311                 std::string file_content;
312                 if (read_file(match_path, file_content)) {
313                     file_content.append("\n");
314                     process_include_substitutions(file_content, match_path.parent_path(), files_included);
315                     text = regex_replace(text, INCL_ONCE_SEARCH, file_content, regex_constants::format_first_only);
316                 } else if (missing_include_files.insert(PathToString(match_path)).second) {
317                     ErrorLogger() << "Parse: " << PathToString(match_path) << " was not found for inclusion (Path:"
318                                   << PathToString(base_path) << ") (File:" << fn_str << ")";
319                 }
320             }
321             // remove any remaining includes of this file
322             text = regex_replace(text, INCL_ONCE_SEARCH, "\n", regex_constants::match_default);
323             // TraceLogger() << "Parse: contents after scrub of " << fn_match << ":\n" << text;
324         }
325     }
326 
327     namespace detail {
double_grammar(const parse::lexer & tok)328     double_grammar::double_grammar(const parse::lexer& tok) :
329         double_grammar::base_type(double_, "double_grammar")
330     {
331         namespace phoenix = boost::phoenix;
332         namespace qi = boost::spirit::qi;
333 
334         using phoenix::static_cast_;
335 
336         qi::_1_type _1;
337         qi::_val_type _val;
338 
339         double_
340             =    '-' >> tok.int_ [ _val = -static_cast_<double>(_1) ]
341             |    tok.int_ [ _val = static_cast_<double>(_1) ]
342             |    '-' >> tok.double_ [ _val = -_1 ]
343             |    tok.double_ [ _val = _1 ]
344             ;
345 
346         double_.name("real number");
347 
348 #if DEBUG_PARSERS
349         debug(double_);
350 #endif
351 
352     }
353 
int_grammar(const parse::lexer & tok)354     int_grammar::int_grammar(const parse::lexer& tok) :
355         int_grammar::base_type(int_, "int_grammar")
356     {
357 
358         namespace phoenix = boost::phoenix;
359         namespace qi = boost::spirit::qi;
360 
361         using phoenix::static_cast_;
362 
363         qi::_1_type _1;
364         qi::_val_type _val;
365 
366         int_
367             =    '-' >> tok.int_ [ _val = -_1 ]
368             |    tok.int_ [ _val = _1 ]
369             ;
370 
371         int_.name("integer");
372 
373 #if DEBUG_PARSERS
374         debug(detail::int_);
375         debug(detail::double_);
376 #endif
377     }
378 
379 #define PARSING_LABELS_OPTIONAL false
operator ()(const parse::lexer::string_token_def & token)380     label_rule& Labeller::operator()(const parse::lexer::string_token_def& token) {
381         auto it = m_rules.find(&token);
382         if (it != m_rules.end())
383             return it->second;
384 
385         label_rule& retval = m_rules[&token];
386         if (PARSING_LABELS_OPTIONAL) {
387             retval = -(token >> '=');
388         } else {
389             retval =  (token >> '=');
390         }
391         return retval;
392     }
393 
tags_grammar(const parse::lexer & tok,Labeller & label)394     tags_grammar::tags_grammar(const parse::lexer& tok,
395                                Labeller& label) :
396         tags_grammar::base_type(start, "tags_grammar"),
397         one_or_more_string_tokens(tok)
398     {
399         namespace phoenix = boost::phoenix;
400         namespace qi = boost::spirit::qi;
401 
402         using phoenix::insert;
403 
404         start %=
405             -(
406                 label(tok.Tags_)
407                 >>  one_or_more_string_tokens
408             )
409             ;
410 
411         start.name("Tags");
412 
413 #if DEBUG_PARSERS
414         debug(start);
415 #endif
416     }
417 
color_parser_grammar(const parse::lexer & tok)418     color_parser_grammar::color_parser_grammar(const parse::lexer& tok) :
419         color_parser_grammar::base_type(start, "color_parser_grammar")
420     {
421         namespace phoenix = boost::phoenix;
422         namespace qi = boost::spirit::qi;
423 
424         using phoenix::construct;
425         using phoenix::if_;
426 
427         qi::_1_type _1;
428         qi::_2_type _2;
429         qi::_3_type _3;
430         qi::_4_type _4;
431         qi::_pass_type _pass;
432         qi::_val_type _val;
433         qi::eps_type eps;
434 
435         channel = tok.int_ [ _val = _1, _pass = 0 <= _1 && _1 <= 255 ];
436         alpha   = (',' > channel) [ _val = _1 ] | eps [ _val = 255 ];
437         start
438             =  ( ('(' >> channel )
439             >    (',' >> channel )
440             >    (',' >> channel )
441             >    alpha
442             >    ')'
443                ) [ _val  = construct<GG::Clr>(_1, _2, _3, _4)]
444             ;
445 
446         channel.name("colour channel (0 to 255)");
447         alpha.name("alpha channel (0 to 255) defaults to 255");
448         start.name("Colour");
449 
450 #if DEBUG_PARSERS
451         debug(channel);
452         debug(start);
453 #endif
454     }
455 
unlockable_item_grammar(const parse::lexer & tok,Labeller & label)456     unlockable_item_grammar::unlockable_item_grammar(const parse::lexer& tok, Labeller& label) :
457         unlockable_item_grammar::base_type(start, "unlockable_item_grammar"),
458         unlockable_item_type_enum(tok)
459     {
460         namespace phoenix = boost::phoenix;
461         namespace qi = boost::spirit::qi;
462 
463         using phoenix::construct;
464 
465         qi::_1_type _1;
466         qi::_2_type _2;
467         qi::_val_type _val;
468         qi::omit_type omit_;
469 
470         start
471             =  ( omit_[tok.Item_]
472             >    label(tok.Type_) > unlockable_item_type_enum
473             >    label(tok.Name_) > tok.string
474                ) [ _val = construct<UnlockableItem>(_1, _2) ]
475             ;
476 
477         start.name("UnlockableItem");
478 
479 #if DEBUG_PARSERS
480         debug(start);
481 #endif
482     }
483 
484 
485     /** \brief Load and parse script file(s) from given path
486         *
487         * @param[in] path absolute path to a regular file
488         * @param[in] l lexer instance to use
489         * @param[out] filename filename of the given path
490         * @param[out] file_contents parsed contents of file(s)
491         * @param[out] first content iterator
492         * @param[out] it lexer iterator
493         */
parse_file_common(const boost::filesystem::path & path,const parse::lexer & lexer,std::string & filename,std::string & file_contents,parse::text_iterator & first,parse::text_iterator & last,parse::token_iterator & it)494     void parse_file_common(const boost::filesystem::path& path, const parse::lexer& lexer,
495                             std::string& filename, std::string& file_contents,
496                             parse::text_iterator& first, parse::text_iterator& last, parse::token_iterator& it)
497     {
498         filename = path.string();
499 
500         bool read_success = read_file(path, file_contents);
501         if (!read_success) {
502             ErrorLogger() << "Unable to open data file " << filename;
503             return;
504         }
505 
506         // add newline at end to avoid errors when one is left out, but is expected by parsers
507         file_contents += "\n";
508 
509         file_substitution(file_contents, path.parent_path());
510         macro_substitution(file_contents);
511 
512         first = file_contents.begin();
513         last = file_contents.end();
514 
515         it = lexer.begin(first, last);
516     }
517 
518 
parse_file_end_of_file_warnings(const boost::filesystem::path & path,bool parser_success,std::string & file_contents,text_iterator & first,text_iterator & last)519     bool parse_file_end_of_file_warnings(const boost::filesystem::path& path,
520                                             bool parser_success,
521                                             std::string& file_contents,
522                                             text_iterator& first,
523                                             text_iterator& last)
524     {
525         if (!parser_success)
526             WarnLogger() << "A parser failed while parsing " << path;
527 
528         auto length_of_unparsed_file = std::distance(first, last);
529         bool parse_length_good = ((length_of_unparsed_file == 0)
530                                     || (length_of_unparsed_file == 1 && *first == '\n'));
531 
532         if (!parse_length_good
533             && length_of_unparsed_file > 0
534             && static_cast<std::string::size_type>(length_of_unparsed_file) <= file_contents.size())
535         {
536             auto unparsed_section = file_contents.substr(file_contents.size() - std::abs(length_of_unparsed_file));
537             std::copy(first, last, std::back_inserter(unparsed_section));
538             WarnLogger() << "File \"" << path << "\" was incompletely parsed. " << std::endl
539                             << "Unparsed section of file, " << length_of_unparsed_file <<" characters:" << std::endl
540                             << unparsed_section;
541         }
542 
543         return parser_success && parse_length_good;
544     }
545 }}
546