1 
2 /*++
3 Copyright (c) 2015 Microsoft Corporation
4 
5 --*/
6 
7 
8 #include "muz/fp/datalog_parser.h"
9 #include "util/string_buffer.h"
10 #include "util/str_hashtable.h"
11 #include "ast/ast_pp.h"
12 #include "ast/arith_decl_plugin.h"
13 #include "util/region.h"
14 #include "util/warning.h"
15 #include<iostream>
16 #include<sstream>
17 #include<cstdio>
18 
19 using namespace datalog;
20 
21 enum dtoken {
22     TK_LP,
23     TK_RP,
24     TK_STRING,
25     TK_ID,
26     TK_NUM,
27     TK_PERIOD,
28     TK_INCLUDE,
29     TK_COMMA,
30     TK_COLON,
31     TK_WILD,
32     TK_LEFT_ARROW,
33     TK_EOS,
34     TK_NEWLINE,
35     TK_ERROR,
36     TK_NEQ,
37     TK_LT,
38     TK_GT,
39     TK_EQ,
40     TK_NEG
41 };
42 
43 static char const* dtoken_strings[] = { "(", ")", "<string>", "<id>", "<num>", ".", ".include", ",", ":", "_", ":-", "<eos>", "\\n", "<error>", "!=", "<", ">", "=", "!" };
44 
45 class line_reader {
46 
47     static const char s_delimiter = '\n';
48     static const unsigned s_expansion_step = 1024;
49 
50     FILE * m_file;
51     svector<char> m_data;
52     bool m_eof;
53     bool m_eof_behind_buffer;
54     unsigned m_next_index;
55     bool m_ok;
56 
57     //actually by one larger than the actual size of m_data,
58     //to fit in the terminating delimiter
59     unsigned m_data_size;
60 
resize_data(unsigned sz)61     void resize_data(unsigned sz) {
62         m_data_size = sz;
63         m_data.resize(m_data_size+1);
64         m_data[m_data_size] = s_delimiter;
65     }
66 
67 #if 0
68     void refill_buffer(unsigned start) {
69         unsigned should_read = m_data_size-start;
70         m_stm.read(m_data.begin()+start, should_read);
71         unsigned actually_read = static_cast<unsigned>(m_stm.gcount());
72         SASSERT(should_read==actually_read || m_stm.eof());
73         if(m_stm.eof()) {
74             m_eof_behind_buffer = true;
75             resize_data(start+actually_read);
76         }
77     }
78 #else
refill_buffer(unsigned start)79     void refill_buffer(unsigned start) {
80         unsigned should_read = m_data_size-start;
81         size_t actually_read = fread(m_data.begin()+start, 1, should_read, m_file);
82         if(actually_read==should_read) {
83             return;
84         }
85         SASSERT(actually_read < should_read);
86         SASSERT(feof(m_file));
87         m_eof_behind_buffer = true;
88         resize_data(start+static_cast<unsigned>(actually_read));
89     }
90 #endif
91 
92 public:
93 
line_reader(const char * fname)94     line_reader(const char * fname)
95         :m_eof(false),
96          m_eof_behind_buffer(false),
97          m_next_index(0),
98          m_ok(true),
99          m_data_size(0) {
100         m_data.resize(2*s_expansion_step);
101         resize_data(0);
102 #if _WINDOWS
103         errno_t err = fopen_s(&m_file, fname, "rb");
104         m_ok = (m_file != nullptr) && (err == 0);
105 #else
106         m_file = fopen(fname, "rb");
107         m_ok = (m_file != nullptr);
108 #endif
109     }
~line_reader()110     ~line_reader() {
111         if (m_file != nullptr){
112             fclose(m_file);
113         }
114     }
115 
operator ()()116     bool operator()() { return m_ok; }
117 
118     /**
119        \brief Retrieve next line from the stream.
120 
121        This operation invalidates the line previously retrieved.
122 
123        This operation can be called only if we are not at the end of file.
124 
125        User is free to modify the content of the returned array until the terminating NULL character.
126      */
get_line()127     char * get_line() {
128         SASSERT(!m_eof);
129         unsigned start = m_next_index;
130         unsigned curr = start;
131         for(;;) {
132             SASSERT(curr<=m_data_size);
133             SASSERT(m_data[m_data_size]==s_delimiter);
134             {
135                 const char * data_ptr = m_data.begin();
136                 const char * ptr = data_ptr+curr;
137                 while(*ptr!=s_delimiter) {
138                     ptr++;
139                 }
140                 curr = static_cast<unsigned>(ptr-data_ptr);
141             }
142             SASSERT(m_data[curr]==s_delimiter);
143             if(curr<m_data_size || m_eof_behind_buffer) {
144                 if(curr==m_data_size) {
145                     SASSERT(m_eof_behind_buffer);
146                     m_eof = true;
147                 }
148                 m_data[curr] = 0; //put in string terminator
149                 m_next_index = curr+1;
150                 return m_data.begin()+start;
151             }
152             if(start!=0) {
153                 unsigned len = curr-start;
154                 if(len) {
155                     memmove(m_data.begin(), m_data.begin()+start, len);
156                 }
157                 start = 0;
158                 curr = len;
159             }
160             if(m_data_size-curr<s_expansion_step) {
161                 resize_data(m_data_size+s_expansion_step);
162             }
163             refill_buffer(curr);
164         }
165     }
166 
eof() const167     bool eof() const { return m_eof; }
168 };
169 
170 class char_reader {
171     line_reader m_line_reader;
172     char const* m_line;
173 public:
char_reader(char const * file)174     char_reader(char const* file):
175         m_line_reader(file),
176         m_line(nullptr)
177     {}
178 
operator ()()179     bool operator()() { return m_line_reader(); }
180 
get()181     char get() {
182         if (!m_line) {
183             if (m_line_reader.eof()) {
184                 return EOF;
185             }
186             m_line = m_line_reader.get_line();
187         }
188         if (!(m_line[0])) {
189             m_line = nullptr;
190             return '\n';
191         }
192         char result = m_line[0];
193         ++m_line;
194         return result;
195     }
196 
eof()197     bool eof() {
198         return m_line_reader.eof();
199     }
200 };
201 
202 class reserved_symbols {
203     typedef map<char const *, dtoken, str_hash_proc, str_eq_proc> str2token;
204     str2token m_str2token;
205 
206 public:
reserved_symbols()207     reserved_symbols() {
208         m_str2token.insert(":-", TK_LEFT_ARROW);
209         m_str2token.insert("_",  TK_WILD);
210         m_str2token.insert(".",  TK_PERIOD);
211         m_str2token.insert("!=", TK_NEQ);
212         m_str2token.insert("=", TK_EQ);
213         m_str2token.insert("<",  TK_LT);
214         m_str2token.insert(">",  TK_GT);
215         m_str2token.insert(":",  TK_COLON);
216         m_str2token.insert(".include", TK_INCLUDE);
217         m_str2token.insert("!", TK_NEG);
218     }
219 
string2dtoken(char const * str)220     dtoken string2dtoken(char const * str) {
221         str2token::entry * e = m_str2token.find_core(str);
222         if (e)
223             return e->get_data().m_value;
224         else
225             return TK_ID;
226     }
227 };
228 
229 
230 class dlexer {
231     std::istream*   m_input;
232     char_reader*    m_reader;
233     char            m_prev_char;
234     char            m_curr_char;
235     int             m_line;
236     int             m_pos;
237     int             m_tok_pos;
238     string_buffer<> m_buffer;
239     reserved_symbols m_reserved_symbols;
240 
241 public:
242     //when parsing domains, we want '.' character to be allowed in IDs, but elsewhere
243     //we don't (because of the "y." in rules like "P(x,y):-x=y.")
244     bool m_parsing_domains;
245 
eos() const246     bool eos() const {
247         return m_curr_char == EOF;
248     }
249 
next()250     void next() {
251         m_prev_char = m_curr_char;
252         if (m_reader) {
253             m_curr_char = m_reader->get();
254         }
255         else {
256             m_curr_char = m_input->get();
257         }
258         m_pos++;
259     }
260 
save_char(char c)261     void save_char(char c) {
262         m_buffer << c;
263     }
264 
save_and_next()265     void save_and_next() {
266         m_buffer << m_curr_char;
267         next();
268     }
269 
dlexer()270     dlexer():
271         m_input(nullptr),
272         m_reader(nullptr),
273         m_prev_char(0),
274         m_curr_char(0),
275         m_line(1),
276         m_pos(0),
277         m_tok_pos(0),
278         m_parsing_domains(false) {
279     }
280 
set_stream(std::istream * s,char_reader * r)281     void set_stream(std::istream* s, char_reader* r) {
282         m_input = s;
283         m_reader = r;
284         next();
285     }
286 
287 
read_num()288     dtoken read_num() {
289         while(isdigit(m_curr_char)) {
290             save_and_next();
291         }
292         return TK_NUM;
293     }
294 
read_id()295     dtoken read_id() {
296         while (!eos() && m_curr_char != '(' && m_curr_char != ')' &&
297                m_curr_char != '#' && m_curr_char != ',' && (m_parsing_domains || m_curr_char != '.') &&
298                m_curr_char != ':' && m_curr_char != '=' && !iswspace(m_curr_char) ) {
299             save_and_next();
300         }
301         return m_reserved_symbols.string2dtoken(m_buffer.c_str());
302     }
303 
304     // read an id of the form '|'.*'|'
read_bid()305     dtoken read_bid() {
306         while (!eos() && m_curr_char != '|') {
307             save_and_next();
308         }
309         if (m_curr_char == '|') {
310             next();
311         }
312         return m_reserved_symbols.string2dtoken(m_buffer.c_str());
313     }
314 
read_string()315     dtoken read_string() {
316         m_tok_pos = m_pos;
317         next();
318         while (m_curr_char != '"') {
319             if (m_input && m_input->eof()) {
320                 return TK_ERROR;
321             }
322             if (m_reader && m_reader->eof()) {
323                 return TK_ERROR;
324             }
325             if (m_curr_char == '\n') {
326                 return TK_ERROR;
327             }
328             save_and_next();
329         }
330         next();
331         return TK_STRING;
332     }
333 
read_comment()334     void read_comment() {
335         bool line_comment = m_prev_char=='\n' || m_prev_char == 0;
336         while (m_curr_char != '\n' && !eos()) {
337             next();
338         }
339         if (line_comment && m_curr_char == '\n') {
340             m_line++;
341             next();
342         }
343     }
344 
lookahead_newline()345     bool lookahead_newline() {
346         while (m_curr_char == ' ') {
347             save_and_next();
348         }
349         if (m_curr_char == '\n') {
350             next();
351             m_line++;
352             m_buffer.reset();
353             return true;
354         }
355         if (m_curr_char == '#') {
356             m_buffer.reset();
357             m_prev_char = 0;
358             read_comment();
359             return true;
360         }
361         return false;
362     }
363 
next_token()364     dtoken next_token() {
365         for(;;) {
366             if (eos()) {
367                 return TK_EOS;
368             }
369 
370             m_buffer.reset();
371             switch (m_curr_char) {
372             case '#': // comment
373                 read_comment();
374                 break;
375             case '\n':
376                 next();
377                 m_line++;
378                 return TK_NEWLINE;
379             case '\\':
380                 // here we ignore a newline if it is preceded by a backslash.
381                 // We need to take care, since anywhere else backshlash is used
382                 // as a regular character
383                 next();
384                 save_char('\\');
385                 if (lookahead_newline()) {
386                     break;
387                 }
388                 return read_id();
389             case '(':
390                 m_tok_pos = m_pos;
391                 next();
392                 return TK_LP;
393             case ')':
394                 m_tok_pos = m_pos;
395                 next();
396                 return TK_RP;
397             case ',':
398                 m_tok_pos = m_pos;
399                 next();
400                 return TK_COMMA;
401             case '=':
402                 m_tok_pos = m_pos;
403                 next();
404                 return TK_EQ;
405             case '!':
406                 m_tok_pos = m_pos;
407                 next();
408                 if(m_curr_char == '=') {
409                     next();
410                     return TK_NEQ;
411                 }
412                 return TK_NEG;
413             case ':':
414                 m_tok_pos = m_pos;
415                 next();
416                 if (m_curr_char == '-') {
417                     next();
418                     return TK_LEFT_ARROW;
419                 }
420                 return TK_COLON;
421             case '\"':
422                 return read_string();
423             case '|':
424                 next();
425                 return read_bid();
426             default:
427                 if (iswspace(m_curr_char)) {
428                     next();
429                     break;
430                 }
431                 else if (iswdigit(m_curr_char)) {
432                     m_tok_pos = m_pos;
433                     save_and_next();
434                     return read_num();
435                 }
436                 else {
437                     char old = m_curr_char;
438                     m_tok_pos = m_pos;
439                     save_and_next();
440                     if (old == '-' && iswdigit(m_curr_char)) {
441                         return read_num();
442                     }
443                     else {
444                         return read_id();
445                     }
446                 }
447             }
448         }
449     }
450 
get_token_data() const451     const char * get_token_data() const {
452         return m_buffer.c_str();
453     }
454 
get_token_pos() const455     unsigned get_token_pos() const {
456         return m_tok_pos;
457     }
458 
get_line() const459     unsigned get_line() const { return m_line; }
460 
461 
462 
463 };
464 
465 class dparser : public parser {
466 protected:
467     typedef map<std::string, expr*,      std_string_hash_proc, default_eq<std::string> > str2var;
468     typedef map<std::string, sort*,      std_string_hash_proc, default_eq<std::string> > str2sort;
469 
470     context&          m_context;
471     ast_manager&      m;
472 
473     dlexer*           m_lexer;
474     region            m_region;
475     dl_decl_util &    m_decl_util;
476     arith_util        m_arith;
477 
478     unsigned          m_num_vars;
479     str2var           m_vars;
480     unsigned          m_sym_idx;
481     std::string       m_path;
482     str2sort          m_sort_dict;
483 
484 
485     // true if an error occurred during the current call to the parse_stream
486     // function
487     bool              m_error;
488 public:
dparser(context & ctx,ast_manager & m)489     dparser(context& ctx, ast_manager& m):
490         m_context(ctx),
491         m(m),
492         m_decl_util(ctx.get_decl_util()),
493         m_arith(m),
494         m_num_vars(0),
495         m_sym_idx(0)
496     {
497     }
498 
parse_file(char const * filename)499     bool parse_file(char const * filename) override {
500         reset();
501         if (filename != nullptr) {
502             set_path(filename);
503             char_reader reader(filename);
504             if (!reader()) {
505                 get_err() << "ERROR: could not open file '" << filename << "'.\n";
506                 return false;
507             }
508             return parse_stream(nullptr, &reader);
509         }
510         else {
511             return parse_stream(&std::cin, nullptr);
512         }
513     }
514 
parse_string(char const * string)515     bool parse_string(char const * string) override {
516         reset();
517         std::string s(string);
518         std::istringstream is(s);
519         return parse_stream(&is, nullptr);
520     }
521 
522 protected:
523 
reset()524     void reset() {
525         m_num_vars = 0;
526         m_sym_idx = 0;
527         m_vars.reset();
528         m_region.reset();
529         m_path.clear();
530         m_sort_dict.reset();
531     }
532 
set_path(char const * filename)533     void set_path(char const* filename) {
534         char const* div = strrchr(filename, '/');
535         if (!div) {
536             div = strrchr(filename,'\\');
537         }
538         if (div) {
539             m_path.assign(filename, div - filename + 1);
540         }
541     }
542 
get_err()543     std::ostream& get_err() {
544         return std::cerr;
545     }
546 
parse_stream(std::istream * is,char_reader * r)547     bool parse_stream(std::istream* is, char_reader* r) {
548         bool result = false;
549         try {
550             m_error=false;
551             dlexer lexer;
552             m_lexer = &lexer;
553             m_lexer->set_stream(is, r);
554             dtoken tok = m_lexer->next_token();
555             tok = parse_domains(tok);
556             tok = parse_decls(tok);
557             result = tok == TK_EOS && m_error == false;
558         }
559         catch (z3_exception& ex) {
560             std::cerr << ex.msg() << std::endl;
561             result = false;
562         }
563         return result;
564     }
565 
parse_domains(dtoken tok)566     dtoken parse_domains(dtoken tok) {
567         flet<bool> flet_parsing_domains(m_lexer->m_parsing_domains, true);
568         while (tok != TK_EOS && tok != TK_ERROR) {
569             switch(tok) {
570             case TK_ID:
571                 tok = parse_domain();
572                 break;
573             case TK_NEWLINE:
574                 return m_lexer->next_token();
575             case TK_INCLUDE:
576                 tok = m_lexer->next_token();
577                 if (tok != TK_STRING) {
578                     tok = unexpected(tok, "a string");
579                     break;
580                 }
581                 tok = parse_include(m_lexer->get_token_data(), true);
582                 if(tok!=TK_NEWLINE) {
583                     tok = unexpected(tok, "newline expected after include statement");
584                 }
585                 else {
586                     tok = m_lexer->next_token();
587                 }
588                 break;
589             default:
590                 tok = unexpected(tok, "identifier, newline or include");
591                 break;
592             }
593         }
594         return tok;
595     }
596 
extract_domain_name(const char * s0,std::string & result)597     bool extract_domain_name(const char* s0, std::string & result) {
598         std::string str(s0);
599         size_t last_non_digit = str.find_last_not_of("0123456789");
600         if(last_non_digit==std::string::npos) {
601             //the domain name consists only of digits, which should not happen
602             result=str;
603             return false;
604         }
605         str.erase(last_non_digit+1);
606         result=str;
607         return true;
608     }
609 
parse_domain()610     dtoken parse_domain() {
611         std::string domain_name;
612         if(!extract_domain_name(m_lexer->get_token_data(), domain_name)) {
613             return unexpected(TK_ID, "domain name");
614         }
615         dtoken tok = m_lexer->next_token();
616         if (tok == TK_ID && strcmp(m_lexer->get_token_data(), "int")==0) {
617             register_int_sort(symbol(domain_name.c_str()));
618 
619             tok = m_lexer->next_token();
620             if(tok != TK_NEWLINE) {
621                 return unexpected(tok, "end of line");
622             }
623             return tok;
624         }
625         if (tok != TK_NUM) {
626             return unexpected(tok, "numeral or 'int'");
627         }
628 
629         unsigned num = atoi(m_lexer->get_token_data());
630         sort * s = register_finite_sort(symbol(domain_name.c_str()), num, context::SK_SYMBOL);
631 
632         tok = m_lexer->next_token();
633         if (tok == TK_ID) {
634             tok = parse_mapfile(tok, s, m_lexer->get_token_data());
635         }
636         if (tok == TK_NEWLINE) {
637             tok = m_lexer->next_token();
638         }
639         return tok;
640     }
641 
642 
parse_decls(dtoken tok)643     dtoken parse_decls(dtoken tok) {
644         while (tok != TK_EOS && tok != TK_ERROR) {
645             switch(tok) {
646             case TK_ID:
647                 tok = parse_rule(tok);
648                 break;
649             case TK_NEWLINE:
650                 tok = m_lexer->next_token();
651                 break;
652             case TK_INCLUDE:
653                 tok = m_lexer->next_token();
654                 if (tok != TK_STRING) {
655                     tok = unexpected(tok, "a string");
656                     break;
657                 }
658                 tok = parse_include(m_lexer->get_token_data(), false);
659                 break;
660             default:
661                 tok = unexpected(tok, "identifier");
662                 break;
663             }
664         }
665         return tok;
666     }
667 
unexpected(dtoken tok,char const * msg)668     dtoken unexpected(dtoken tok, char const* msg) {
669 #if 1
670         throw default_exception(default_exception::fmt(), "%s at line %u '%s' found '%s'\n", msg,
671             m_lexer->get_line(), m_lexer->get_token_data(), dtoken_strings[tok]);
672 
673         SASSERT(false);
674         return tok;
675 #else
676         m_error = true;
677 
678         get_err() << msg << " expected at line " << m_lexer->get_line() << "\n";
679         get_err() << "'" << m_lexer->get_token_data() << "' found\n";
680         get_err() << "'" << dtoken_strings[tok] << "'\n";
681         if (tok == TK_ERROR || tok == TK_EOS) {
682             return tok;
683         }
684         return m_lexer->next_token();
685 #endif
686     }
687 
parse_rule(dtoken tok)688     dtoken parse_rule(dtoken tok) {
689         m_num_vars = 0;
690         m_vars.reset();
691 
692         switch(tok) {
693         case TK_EOS:
694             return tok;
695         case TK_ID: {
696             app_ref pred(m);
697             symbol s(m_lexer->get_token_data());
698             tok = m_lexer->next_token();
699             bool is_predicate_declaration;
700             tok = parse_pred(tok, s, pred, is_predicate_declaration);
701             switch (tok) {
702             case TK_PERIOD:
703                 if(is_predicate_declaration) {
704                     return unexpected(tok, "predicate declaration should not end with '.'");
705                 }
706                 add_rule(pred, 0, nullptr, nullptr);
707                 return m_lexer->next_token();
708             case TK_LEFT_ARROW:
709                 return parse_body(pred);
710             case TK_NEWLINE:
711             case TK_EOS:
712                 if(!is_predicate_declaration) {
713                     return unexpected(tok, "'.' expected at the end of rule");
714                 }
715                 return tok;
716             default:
717                 return unexpected(tok, "unexpected token");
718             }
719         }
720         default:
721             return unexpected(tok, "rule expected");
722         }
723     }
724 
parse_body(app * head)725     dtoken parse_body(app* head) {
726         app_ref_vector body(m);
727         bool_vector polarity_vect;
728         dtoken tok = m_lexer->next_token();
729         while (tok != TK_ERROR && tok != TK_EOS) {
730             if (tok == TK_PERIOD) {
731                 SASSERT(body.size()==polarity_vect.size());
732                 add_rule(head, body.size(), body.data(), polarity_vect.data());
733                 return m_lexer->next_token();
734             }
735             char const* td = m_lexer->get_token_data();
736             app_ref pred(m);
737             bool is_neg = false;
738             if (tok == TK_NEG) {
739                 tok = m_lexer->next_token();
740                 is_neg = true;
741             }
742 
743             if (tok == TK_STRING || tok == TK_NUM || (tok == TK_ID && m_vars.contains(td))) {
744                 tok = parse_infix(tok, td, pred);
745             }
746             else if (tok == TK_ID) {
747                 symbol s(td);
748                 tok = m_lexer->next_token();
749                 bool is_declaration;
750                 tok = parse_pred(tok, s, pred, is_declaration);
751                 SASSERT(!is_declaration);
752             }
753             else {
754                 tok = unexpected(tok, "expected predicate or relation");
755                 return tok;
756             }
757             body.push_back(pred);
758             polarity_vect.push_back(is_neg);
759 
760             if (tok == TK_COMMA) {
761                 tok = m_lexer->next_token();
762             }
763             else if (tok == TK_PERIOD) {
764                 continue;
765             }
766             else {
767                 tok = unexpected(tok, "expected comma or period");
768                 return tok;
769             }
770         }
771         return tok;
772     }
773 
774     //
775     // infix:
776     // Sym REL Sym
777     // Sym ::= String | NUM | Var
778     //
parse_infix(dtoken tok1,char const * td,app_ref & pred)779     dtoken parse_infix(dtoken tok1, char const* td, app_ref& pred) {
780         symbol td1(td);
781         expr_ref v1(m), v2(m);
782         sort* s = nullptr;
783         dtoken tok2 = m_lexer->next_token();
784         if (tok2 != TK_NEQ && tok2 != TK_GT && tok2 != TK_LT && tok2 != TK_EQ) {
785             return unexpected(tok2, "built-in infix operator");
786         }
787         dtoken tok3 = m_lexer->next_token();
788         td = m_lexer->get_token_data();
789         if (tok3 != TK_STRING && tok3 != TK_NUM && !(tok3 == TK_ID && m_vars.contains(td))) {
790             return unexpected(tok3, "identifier");
791         }
792         symbol td2(td);
793 
794         if (tok1 == TK_ID) {
795             expr* _v1 = nullptr;
796             m_vars.find(td1.bare_str(), _v1);
797             v1 = _v1;
798         }
799         if (tok3 == TK_ID) {
800             expr* _v2 = nullptr;
801             m_vars.find(td2.bare_str(), _v2);
802             v2 = _v2;
803         }
804         if (!v1 && !v2) {
805             return unexpected(tok3, "at least one argument should be a variable");
806         }
807         if (v1) {
808             s = v1->get_sort();
809         }
810         else {
811             s = v2->get_sort();
812         }
813         if (!v1) {
814             v1 = mk_const(td1, s);
815         }
816         if (!v2) {
817             v2 = mk_const(td2, s);
818         }
819 
820         switch(tok2) {
821         case TK_EQ:
822             pred = m.mk_eq(v1,v2);
823             break;
824         case TK_NEQ:
825             pred = m.mk_not(m.mk_eq(v1,v2));
826             break;
827         case TK_LT:
828             pred = m_decl_util.mk_lt(v1, v2);
829             break;
830         case TK_GT:
831             pred = m_decl_util.mk_lt(v2, v1);
832             break;
833         default:
834             UNREACHABLE();
835         }
836 
837         return m_lexer->next_token();
838     }
839 
840 
parse_pred(dtoken tok,symbol const & s,app_ref & pred,bool & is_predicate_declaration)841     dtoken parse_pred(dtoken tok, symbol const& s, app_ref& pred, bool & is_predicate_declaration) {
842 
843         expr_ref_vector args(m);
844         svector<symbol> arg_names;
845         func_decl* f = m_context.try_get_predicate_decl(s);
846         tok = parse_args(tok, f, args, arg_names);
847         is_predicate_declaration = f==nullptr;
848         if (f==nullptr) {
849             //we're in a declaration
850             unsigned arity = args.size();
851             ptr_vector<sort> domain;
852             for (unsigned i = 0; i < arity; ++i) {
853                 domain.push_back(args[i]->get_sort());
854             }
855             f = m.mk_func_decl(s, domain.size(), domain.data(), m.mk_bool_sort());
856 
857             m_context.register_predicate(f, true);
858 
859             while (tok == TK_ID) {
860                 char const* pred_pragma = m_lexer->get_token_data();
861                 if(strcmp(pred_pragma, "printtuples")==0 || strcmp(pred_pragma, "outputtuples")==0) {
862                     m_context.set_output_predicate(f);
863                 }
864                 tok = m_lexer->next_token();
865             }
866             m_context.set_argument_names(f, arg_names);
867         }
868         if(args.size() < f->get_arity()) {
869             return unexpected(tok, "too few arguments passed to predicate");
870         }
871         SASSERT(args.size()==f->get_arity());
872         //TODO: we do not need to do the mk_app if we're in a declaration
873         pred = m.mk_app(f, args.size(), args.data());
874         return tok;
875     }
876 
877     /**
878        \brief Parse predicate arguments. If \c f==0, they are arguments of a predicate declaration.
879        If parsing a declaration, argument names are pushed to the \c arg_names vector.
880     */
parse_args(dtoken tok,func_decl * f,expr_ref_vector & args,svector<symbol> & arg_names)881     dtoken parse_args(dtoken tok, func_decl* f, expr_ref_vector& args, svector<symbol> & arg_names) {
882         if (tok != TK_LP) {
883             return tok;
884         }
885         unsigned arg_idx = 0;
886         tok = m_lexer->next_token();
887         while (tok != TK_EOS && tok != TK_ERROR) {
888             symbol alias;
889             sort* s = nullptr;
890 
891             if(!f) {
892                 //we're in a predicate declaration
893                 if(tok != TK_ID) {
894                     tok = unexpected(tok, "Expecting variable in declaration");
895                     return tok;
896                 }
897                 symbol var_symbol(m_lexer->get_token_data());
898                 tok = m_lexer->next_token();
899                 if (tok != TK_COLON) {
900                     tok = unexpected(tok,
901                         "Expecting colon in declaration (first occurrence of a predicate must be a declaration)");
902                     return tok;
903                 }
904                 tok = m_lexer->next_token();
905 
906                 if (tok != TK_ID) {
907                     tok = unexpected(tok, "Expecting sort after colon in declaration");
908                     return tok;
909                 }
910                 std::string sort_name;
911                 if(!extract_domain_name(m_lexer->get_token_data(), sort_name)) {
912                     return unexpected(TK_ID, "sort name");
913                 }
914                 sort* s = get_sort(sort_name.c_str());
915                 args.push_back(m.mk_var(m_num_vars, s));
916                 arg_names.push_back(var_symbol);
917                 tok = m_lexer->next_token();
918             }
919             else {
920                 if(arg_idx>=f->get_arity()) {
921                     return unexpected(tok, "too many arguments passed to predicate");
922                 }
923                 s = f->get_domain(arg_idx);
924 
925                 symbol var_symbol;
926                 tok = parse_arg(tok, s, args);
927             }
928 
929 
930             ++arg_idx;
931 
932             if (tok == TK_RP) {
933                 return m_lexer->next_token();
934             }
935             if (tok == TK_COMMA) {
936                 tok = m_lexer->next_token();
937             }
938         }
939         return tok;
940     }
941 
942     /**
943        \remark \c var_symbol argument is assigned name of the variable. If the argument is not
944        a variable, is remains unchanged.
945     */
parse_arg(dtoken tok,sort * s,expr_ref_vector & args)946     dtoken parse_arg(dtoken tok, sort* s, expr_ref_vector& args) {
947         switch(tok) {
948         case TK_WILD: {
949             args.push_back(m.mk_var(m_num_vars++, s));
950             break;
951         }
952         case TK_ID: {
953             symbol data (m_lexer->get_token_data());
954             if (is_var(data.bare_str())) {
955                 unsigned idx = 0;
956                 expr* v = nullptr;
957                 if (!m_vars.find(data.bare_str(), v)) {
958                     idx = m_num_vars++;
959                     v = m.mk_var(idx, s);
960                     m_vars.insert(data.bare_str(), v);
961                 }
962                 else if (s != v->get_sort()) {
963                     throw default_exception(default_exception::fmt(), "sort: %s expected, but got: %s\n",
964                         s->get_name().bare_str(), v->get_sort()->get_name().bare_str());
965                 }
966                 args.push_back(v);
967             }
968             else {
969                 args.push_back(mk_const(data, s));
970             }
971             break;
972         }
973         case TK_STRING: {
974             char const* data = m_lexer->get_token_data();
975             args.push_back(mk_const(symbol(data), s));
976             break;
977         }
978         case TK_NUM: {
979             char const* data = m_lexer->get_token_data();
980             rational num(data);
981             if (!num.is_uint64()) {
982                 return unexpected(tok, "integer expected");
983             }
984             uint64_t int_num = num.get_uint64();
985 
986             app * numeral = mk_symbol_const(int_num, s);
987             args.push_back(numeral);
988             break;
989         }
990         default:
991             break;
992         }
993         return m_lexer->next_token();
994     }
995 
996     // all IDs are variables.
is_var(char const * data)997     bool is_var(char const* data) {
998         return true;
999     }
1000 
parse_decl(dtoken tok)1001     dtoken parse_decl(dtoken tok) {
1002 
1003         return tok;
1004     }
1005 
parse_include(char const * filename,bool parsing_domain)1006     dtoken parse_include(char const* filename, bool parsing_domain) {
1007         IF_VERBOSE(2, verbose_stream() << "include: " << filename << "\n";);
1008         std::string path(m_path);
1009         path += filename;
1010         char_reader stream(path.c_str());
1011         if (!stream()) {
1012             get_err() << "ERROR: could not open file '" << path << "'.\n";
1013             return TK_ERROR;
1014         }
1015         dtoken tok;
1016         dlexer lexer;
1017         {
1018             flet<dlexer*> lexer_let(m_lexer, &lexer);
1019             m_lexer->set_stream(nullptr, &stream);
1020             tok = m_lexer->next_token();
1021             if(parsing_domain) {
1022                 tok = parse_domains(tok);
1023             }
1024             tok = parse_decls(tok);
1025         }
1026         if (tok == TK_EOS) {
1027             tok = m_lexer->next_token();
1028         }
1029         return tok;
1030     }
1031 
parse_mapfile(dtoken tok,sort * s,char const * filename)1032     dtoken parse_mapfile(dtoken tok, sort * s, char const* filename) {
1033         std::string path(m_path);
1034         path += filename;
1035         line_reader reader(path.c_str());
1036 
1037         if (!reader()) {
1038             get_err() << "Warning: could not open file '" << path << "'.\n";
1039             return m_lexer->next_token();
1040         }
1041 
1042         std::string line;
1043         while(!reader.eof()) {
1044             symbol sym=symbol(reader.get_line());
1045             m_context.get_constant_number(s, sym);
1046         }
1047         return m_lexer->next_token();
1048     }
1049 
read_line(std::istream & strm,std::string & line)1050     bool read_line(std::istream& strm, std::string& line) {
1051         line.clear();
1052         char ch = strm.get();
1053         while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
1054             ch = strm.get();
1055         }
1056         while (ch != '\n' && ch != '\r' && ch != EOF) {
1057             line.push_back(ch);
1058             ch = strm.get();
1059         }
1060         return !line.empty();
1061     }
1062 
add_rule(app * head,unsigned sz,app * const * body,const bool * is_neg)1063     void add_rule(app* head, unsigned sz, app* const* body, const bool * is_neg) {
1064         rule_manager& m = m_context.get_rule_manager();
1065 
1066         if(sz==0 && m.is_fact(head)) {
1067             m_context.add_fact(head);
1068         }
1069         else {
1070             rule * r = m.mk(head, sz, body, is_neg);
1071             rule_ref rule(r, m);
1072             m_context.add_rule(rule);
1073 
1074         }
1075     }
1076 
register_finite_sort(symbol name,uint64_t domain_size,context::sort_kind k)1077     sort * register_finite_sort(symbol name, uint64_t domain_size, context::sort_kind k) {
1078         if(m_sort_dict.contains(name.bare_str())) {
1079             throw default_exception(default_exception::fmt(), "sort %s already declared", name.bare_str());
1080         }
1081         sort * s = m_decl_util.mk_sort(name, domain_size);
1082         m_context.register_finite_sort(s, k);
1083         m_sort_dict.insert(name.bare_str(), s);
1084         return s;
1085     }
1086 
register_int_sort(symbol name)1087     sort * register_int_sort(symbol name) {
1088         if(m_sort_dict.contains(name.bare_str())) {
1089             throw default_exception(default_exception::fmt(), "sort %s already declared", name.bare_str());
1090         }
1091         sort * s = m_arith.mk_int();
1092         m_sort_dict.insert(name.bare_str(), s);
1093         return s;
1094     }
1095 
get_sort(const char * str)1096     sort * get_sort(const char* str) {
1097         sort * res;
1098         if(!m_sort_dict.find(str, res)) {
1099             throw default_exception(default_exception::fmt(), "unknown sort \"%s\"", str);
1100         }
1101         return res;
1102     }
1103 
mk_const(symbol const & name,sort * s)1104     app* mk_const(symbol const& name, sort* s) {
1105         app * res;
1106         if(m_arith.is_int(s)) {
1107             uint64_t val;
1108             if (!string_to_uint64(name.bare_str(), val)) {
1109                 throw default_exception(default_exception::fmt(), "Invalid integer: \"%s\"", name.bare_str());
1110             }
1111             res = m_arith.mk_numeral(rational(val, rational::ui64()), s);
1112         }
1113         else {
1114             unsigned idx = m_context.get_constant_number(s, name);
1115             res = m_decl_util.mk_numeral(idx, s);
1116         }
1117         return res;
1118     }
1119     /**
1120        \brief Make a constant for DK_SYMBOL sort out of an integer
1121      */
mk_symbol_const(uint64_t el,sort * s)1122     app* mk_symbol_const(uint64_t el, sort* s) {
1123         uint64_t sz = 0;
1124         if (m_arith.is_int(s))
1125             return m_arith.mk_numeral(rational(el, rational::ui64()), s);
1126         else if (m_decl_util.try_get_size(s, sz)) {
1127             if (el >= sz)
1128                 throw default_exception("numeric value out of bounds of domain");
1129             return m_decl_util.mk_numeral(el, s);
1130         }
1131         else {
1132             unsigned idx = m_context.get_constant_number(s, el);
1133             return m_decl_util.mk_numeral(idx, s);
1134         }
1135     }
mk_const(uint64_t el,sort * s)1136     app* mk_const(uint64_t el, sort* s) {
1137         unsigned idx = m_context.get_constant_number(s, el);
1138         app * res = m_decl_util.mk_numeral(idx, s);
1139         return res;
1140     }
1141 
mk_table_const(symbol const & name,sort * s)1142     table_element mk_table_const(symbol const& name, sort* s) {
1143         return m_context.get_constant_number(s, name);
1144     }
mk_table_const(uint64_t el,sort * s)1145     table_element mk_table_const(uint64_t el, sort* s) {
1146         return m_context.get_constant_number(s, el);
1147     }
1148 };
1149 
1150 /*
1151 
1152   Program     ::== Sort* (Rule | Include | Decl)*
1153   Comment     ::== '#...'
1154   Rule        ::== Fact | InfRule
1155   Fact        ::== Identifier(Element*).
1156   InfRule     ::== Identifier(Element*) :- (Identifier(Element*))+.
1157   Element     ::== '_' | 'string' | integer | Identifier
1158 
1159   Sort        ::== Identifier (Number [map-file]| 'int')
1160   Decl        ::== Identifier(SortDecl) [Pragma] \n
1161   SortDecl    ::== Identifier ':' Identifier
1162 
1163   Pragma      ::== 'input' | 'printtuples' |
1164 
1165 
1166   If sort name ends with a sequence of digits, they are ignored (so V and V1234 stand for the same sort)
1167   This is how BDDBDDB behaves.
1168 */
1169 
1170 // -----------------------------------
1171 //
1172 // wpa_parser
1173 //
1174 // -----------------------------------
1175 
1176 class wpa_parser_impl : public wpa_parser, dparser {
1177     typedef svector<uint64_t> uint64_vector;
1178     typedef hashtable<uint64_t, uint64_hash, default_eq<uint64_t> > uint64_set;
1179     typedef map<uint64_t, symbol, uint64_hash, default_eq<uint64_t> > num2sym;
1180     typedef map<symbol, uint64_set*, symbol_hash_proc, symbol_eq_proc> sym2nums;
1181 
1182     num2sym m_number_names;
1183     sym2nums m_sort_contents;
1184 
1185     sort_ref m_bool_sort;
1186     sort_ref m_short_sort;
1187 
1188     std::string m_current_file;
1189     unsigned m_current_line;
1190 
1191     bool m_use_map_names;
1192 
ensure_sort_content(symbol sort_name)1193     uint64_set& ensure_sort_content(symbol sort_name) {
1194         auto& value = m_sort_contents.insert_if_not_there(sort_name, nullptr);
1195         if (!value) {
1196             value = alloc(uint64_set);
1197         }
1198         return *value;
1199     }
1200 
1201 public:
wpa_parser_impl(context & ctx)1202     wpa_parser_impl(context & ctx)
1203         : dparser(ctx, ctx.get_manager()),
1204           m_bool_sort(ctx.get_manager()),
1205           m_short_sort(ctx.get_manager()),
1206           m_use_map_names(ctx.use_map_names()) {
1207     }
~wpa_parser_impl()1208     ~wpa_parser_impl() override {
1209         reset_dealloc_values(m_sort_contents);
1210     }
reset()1211     void reset() {
1212     }
1213 
parse_directory(char const * path)1214     bool parse_directory(char const * path) override {
1215         bool result = false;
1216         try {
1217             result = parse_directory_core(path);
1218         }
1219         catch (z3_exception& ex) {
1220             std::cerr << ex.msg() << std::endl;
1221             return false;
1222         }
1223         return result;
1224     }
1225 
1226 private:
1227 
parse_directory_core(char const * path)1228     bool parse_directory_core(char const* path) {
1229 
1230         IF_VERBOSE(10, verbose_stream() << "Start parsing directory " << path << "\n";);
1231         reset();
1232         string_vector map_files;
1233         get_file_names(path, "map", true, map_files);
1234         string_vector::iterator mit = map_files.begin();
1235         string_vector::iterator mend = map_files.end();
1236         for(; mit!=mend; ++mit) {
1237             std::string map_file_name = *mit;
1238             parse_map_file(map_file_name);
1239         }
1240 
1241         finish_map_files();
1242 
1243         string_vector rule_files;
1244         get_file_names(path, "rules", true, rule_files);
1245         string_vector::iterator rlit = rule_files.begin();
1246         string_vector::iterator rlend = rule_files.end();
1247         for(; rlit!=rlend; ++rlit) {
1248             parse_rules_file(*rlit);
1249         }
1250 
1251         string_vector rel_files;
1252         get_file_names(path, "rel", true, rel_files);
1253         string_vector::iterator rit = rel_files.begin();
1254         string_vector::iterator rend = rel_files.end();
1255         for(; rit!=rend; ++rit) {
1256             std::string rel_file_name = *rit;
1257             //skip relations which we do not support yet
1258             if(rel_file_name.find("DirectCall")!=std::string::npos ||
1259                   rel_file_name.find("FunctionFormals")!=std::string::npos ||
1260                   rel_file_name.find("IndirectCall")!=std::string::npos) {
1261                 continue;
1262             }
1263             parse_rel_file(rel_file_name);
1264         }
1265         IF_VERBOSE(10, verbose_stream() << "Done parsing directory " << path << "\n";);
1266         return true;
1267     }
1268 
inp_num_to_element(sort * s,uint64_t num,table_element & res)1269     bool inp_num_to_element(sort * s, uint64_t num, table_element & res) {
1270         if(s==m_bool_sort.get() || s==m_short_sort.get()) {
1271             res = mk_table_const(num, s);
1272             return true;
1273         }
1274 
1275         if(num==0) {
1276             if(!m_use_map_names) {
1277                 res = mk_table_const(0, s);
1278             }
1279             else {
1280                 res = mk_table_const(symbol("<zero element>"), s);
1281             }
1282             return true;
1283         }
1284 
1285         sym2nums::entry * e =  m_sort_contents.find_core(s->get_name());
1286         SASSERT(e);
1287         SASSERT(e->get_data().m_value);
1288         uint64_set & sort_content = *e->get_data().m_value;
1289         if(!sort_content.contains(num)) {
1290             warning_msg("symbol number %I64u on line %d in file %s does not belong to sort %s",
1291                 num, m_current_line, m_current_file.c_str(), s->get_name().bare_str());
1292             return false;
1293         }
1294         if(!m_use_map_names) {
1295             res = mk_table_const(num, s);
1296             return true;
1297         }
1298         else {
1299             symbol const_name;
1300             if(num==0) {
1301                 const_name = symbol("<zero element>");
1302             } else if(!m_number_names.find(num, const_name)) {
1303                 throw default_exception(default_exception::fmt(), "unknown symbol number %I64u on line %d in file %s",
1304                     num, m_current_line, m_current_file.c_str());
1305             }
1306             res =  mk_table_const(const_name, s);
1307             return true;
1308         }
1309     }
1310 
parse_rules_file(const std::string & fname)1311     void parse_rules_file(const std::string & fname) {
1312         SASSERT(file_exists(fname));
1313         flet<std::string> flet_cur_file(m_current_file, fname);
1314 
1315         std::ifstream stm(fname.c_str());
1316         SASSERT(!stm.fail());
1317 
1318         dlexer lexer;
1319         m_lexer = &lexer;
1320         m_lexer->set_stream(&stm, nullptr);
1321         dtoken tok = m_lexer->next_token();
1322         tok = parse_decls(tok);
1323         m_lexer = nullptr;
1324     }
1325 
parse_rel_line(char * full_line,uint64_vector & args)1326     bool parse_rel_line(char * full_line, uint64_vector & args) {
1327         SASSERT(args.empty());
1328         cut_off_comment(full_line);
1329         if(full_line[0]==0) {
1330             return false;
1331         }
1332         const char * ptr = full_line;
1333 
1334         bool last = false;
1335         do {
1336             while(*ptr==' ') { ptr++; }
1337             if(*ptr==0) {
1338                 break;
1339             }
1340             uint64_t num;
1341             if(!read_uint64(ptr, num)) {
1342                 throw default_exception(default_exception::fmt(), "number expected on line %d in file %s",
1343                     m_current_line, m_current_file.c_str());
1344             }
1345             if(*ptr!=' ' && *ptr!=0) {
1346                 throw default_exception(default_exception::fmt(),
1347                                         "' ' expected to separate numbers on line %d in file %s, got '%s'",
1348                                         m_current_line, m_current_file.c_str(), ptr);
1349             }
1350             args.push_back(num);
1351         } while(!last);
1352         return true;
1353     }
1354 
parse_rel_file(const std::string & fname)1355     void parse_rel_file(const std::string & fname) {
1356         SASSERT(file_exists(fname));
1357 
1358         IF_VERBOSE(10, verbose_stream() << "Parsing relation file " << fname << "\n";);
1359 
1360         flet<std::string> flet_cur_file(m_current_file, fname);
1361         flet<unsigned> flet_cur_line(m_current_line, 0);
1362 
1363         std::string predicate_name_str = get_file_name_without_extension(fname);
1364         symbol predicate_name(predicate_name_str.c_str());
1365 
1366         func_decl * pred = m_context.try_get_predicate_decl(predicate_name);
1367         if(!pred) {
1368             throw default_exception(default_exception::fmt(), "tuple file %s for undeclared predicate %s",
1369                 m_current_file.c_str(), predicate_name.bare_str());
1370         }
1371         unsigned pred_arity = pred->get_arity();
1372         sort * const * arg_sorts = pred->get_domain();
1373 
1374         uint64_vector args;
1375         table_fact fact;
1376 
1377         //std::ifstream stm(fname.c_str(), std::ios_base::binary);
1378         //SASSERT(!stm.fail());
1379         //line_reader rdr(stm);
1380         line_reader rdr(fname.c_str());
1381         while(!rdr.eof()) {
1382             m_current_line++;
1383             char * full_line = rdr.get_line();
1384 
1385             args.reset();
1386             if(!parse_rel_line(full_line, args)) {
1387                 continue;
1388             }
1389             if(args.size()!=pred_arity) {
1390                 throw default_exception(default_exception::fmt(), "invalid number of arguments on line %d in file %s",
1391                     m_current_line, m_current_file.c_str());
1392             }
1393 
1394             bool fact_fail = false;
1395             fact.reset();
1396             for(unsigned i=0;i<pred_arity; i++) {
1397                 uint64_t const_num = args[i];
1398                 table_element c;
1399                 if(!inp_num_to_element(arg_sorts[i], const_num, c)) {
1400                     fact_fail = true;
1401                     break;
1402                 }
1403                 fact.push_back(c);
1404             }
1405             if(fact_fail) {
1406                 continue;
1407             }
1408             m_context.add_table_fact(pred, fact);
1409         }
1410     }
1411 
finish_map_files()1412     void finish_map_files() {
1413 
1414         m_bool_sort = register_finite_sort(symbol("BOOL"), 2, context::SK_UINT64);
1415         m_short_sort = register_finite_sort(symbol("SHORT"), 65536, context::SK_UINT64);
1416 
1417         sym2nums::iterator sit = m_sort_contents.begin();
1418         sym2nums::iterator send = m_sort_contents.end();
1419         for(; sit!=send; ++sit) {
1420             symbol sort_name = sit->m_key;
1421             uint64_set & sort_content = *sit->m_value;
1422             //the +1 is for a zero element which happens to appear in the problem files
1423             uint64_t domain_size = sort_content.size()+1;
1424             // sort * s;
1425             if(!m_use_map_names) {
1426                 /* s = */ register_finite_sort(sort_name, domain_size, context::SK_UINT64);
1427             }
1428             else {
1429                 /* s = */ register_finite_sort(sort_name, domain_size, context::SK_SYMBOL);
1430             }
1431 
1432             /*
1433             uint64_set::iterator cit = sort_content.begin();
1434             uint64_set::iterator cend = sort_content.end();
1435             for(; cit!=cend; ++cit) {
1436                 uint64_t const_num = *cit;
1437                 inp_num_to_element(s, const_num);
1438             }
1439             */
1440         }
1441     }
1442 
cut_off_comment(char * line)1443     void cut_off_comment(char * line) {
1444         char * ptr = line;
1445         while(*ptr && *ptr!='#' && *ptr!='\n' && *ptr!='\r') {
1446             ptr++;
1447         }
1448         *ptr=0;
1449     }
1450 
parse_map_line(char * full_line,uint64_t & num,symbol & name)1451     bool parse_map_line(char * full_line, uint64_t & num, symbol & name) {
1452         cut_off_comment(full_line);
1453         if(full_line[0]==0) {
1454             return false;
1455         }
1456 
1457         const char * ptr = full_line;
1458         if(!read_uint64(ptr, num)) {
1459             throw default_exception(default_exception::fmt(),
1460                                     "number expected at line %d in file %s", m_current_line, m_current_file.c_str());
1461         }
1462         if(*ptr!=' ') {
1463             throw default_exception(default_exception::fmt(),
1464                                     "' ' expected after the number at line %d in file %s", m_current_line, m_current_file.c_str());
1465         }
1466         ptr++;
1467 
1468         if(!m_use_map_names) {
1469             static symbol no_name("<names ignored>");
1470             name=no_name;
1471         }
1472         else {
1473             std::string rest_of_line(ptr);
1474 
1475             const char * cut_off_word = " SC_EXTERN ";
1476             size_t cut_off_pos = rest_of_line.find(cut_off_word);
1477             if(cut_off_pos!=std::string::npos) {
1478                 rest_of_line = rest_of_line.substr(0, cut_off_pos);
1479             }
1480 
1481             cut_off_word = " _ZONE_";
1482             cut_off_pos = rest_of_line.find(cut_off_word);
1483             if(cut_off_pos!=std::string::npos) {
1484                 rest_of_line = rest_of_line.substr(0, cut_off_pos);
1485             }
1486 
1487             const char * const ignored_suffix = "Constant ";
1488             const size_t ignored_suffix_len = 9;
1489 
1490             if(rest_of_line.size()>ignored_suffix_len &&
1491                     rest_of_line.substr(rest_of_line.size()-ignored_suffix_len)==ignored_suffix) {
1492                 rest_of_line = rest_of_line.substr(0, rest_of_line.size()-ignored_suffix_len);
1493             }
1494 
1495             if(rest_of_line[rest_of_line.size()-1]==' ') {
1496                 rest_of_line = rest_of_line.substr(0, rest_of_line.size()-1);
1497             }
1498 
1499             name = symbol(rest_of_line.c_str());
1500         }
1501         return true;
1502     }
1503 
parse_map_file(const std::string & fname)1504     void parse_map_file(const std::string & fname) {
1505         SASSERT(file_exists(fname));
1506 
1507         IF_VERBOSE(10, verbose_stream() << "Parsing map file " << fname << "\n";);
1508         flet<std::string> flet_cur_file(m_current_file, fname);
1509         flet<unsigned> flet_cur_line(m_current_line, 0);
1510 
1511         std::string sort_name_str = get_file_name_without_extension(fname);
1512         symbol sort_name(sort_name_str.c_str());
1513         uint64_set & sort_elements = ensure_sort_content(sort_name);
1514 
1515         //std::ifstream stm(fname.c_str(), std::ios_base::binary);
1516         //SASSERT(!stm.fail());
1517         //line_reader rdr(stm);
1518         line_reader rdr(fname.c_str());
1519         while(!rdr.eof()) {
1520             m_current_line++;
1521             char * full_line = rdr.get_line();
1522 
1523             uint64_t num;
1524             symbol el_name;
1525 
1526             if(!parse_map_line(full_line, num, el_name)) {
1527                 continue;
1528             }
1529 
1530             sort_elements.insert(num);
1531 
1532             if(m_use_map_names) {
1533                 auto const & value = m_number_names.insert_if_not_there(num, el_name);
1534                 if (value!=el_name) {
1535                     warning_msg("mismatch of number names on line %d in file %s. old: \"%s\" new: \"%s\"",
1536                         m_current_line, fname.c_str(), value.bare_str(), el_name.bare_str());
1537                 }
1538             }
1539         }
1540     }
1541 };
1542 
create(context & ctx,ast_manager & m)1543 parser* parser::create(context& ctx, ast_manager& m) {
1544     return alloc(dparser, ctx, m);
1545 }
1546 
create(context & ctx,ast_manager & ast_manager)1547 wpa_parser * wpa_parser::create(context& ctx, ast_manager & ast_manager) {
1548     return alloc(wpa_parser_impl, ctx);
1549 }
1550 
1551