1 /*
2 * This file is part of the Yices SMT Solver.
3 * Copyright (C) 2017 SRI International.
4 *
5 * Yices is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * Yices is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with Yices. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 /*
20 * Lexer for Yices language
21 * - separators are ( ) : spaces, EOF, ; and "
22 *
23 * - strings are delimited by " with escaped char \n, \t, etc. allowed
24 *
25 * - two kinds of numeric literals are recognized
26 * TK_NUM_RATIONAL: <optional_sign><digits>/<digits>
27 * or <optional_sign><digits>
28 * TK_NUM_FLOAT:
29 * <optional_sign> <digits> . <digits>
30 * <optional_sign> <digits> <exp> <optional_sign> <digits>
31 * <optional_sign> <digits> . <digits> <exp> <optional_sign> <digits>
32 *
33 * (the two formats recognized by string-to-rational conversions in rational.c)
34 *
35 * - bit-vector literals are written 0b<binary digits> (cf. bv_constants.c)
36 * (added 5/10/07) can also be written 0x<hexa digits>
37 *
38 * - comments start with ; and extend to the end of the line
39 */
40
41 #include <ctype.h>
42 #include <assert.h>
43
44 /*
45 * yices_hash_keywords.h is generated by gperf
46 * from input file yices_keywords.txt
47 */
48 #include "frontend/yices/yices_hash_keywords.h"
49 #include "frontend/yices/yices_lexer.h"
50
51
52 /*
53 * All keywords
54 */
55 static keyword_t yices_keywords[] = {
56 // type keywords
57 { "bool", TK_BOOL },
58 { "int", TK_INT },
59 { "real", TK_REAL },
60 { "bitvector", TK_BITVECTOR },
61 { "scalar", TK_SCALAR },
62 { "tuple", TK_TUPLE },
63 { "->", TK_ARROW },
64
65 // term keywords
66 { "true", TK_TRUE },
67 { "false", TK_FALSE },
68 { "if", TK_IF },
69 { "ite", TK_ITE },
70 { "=", TK_EQ },
71 { "/=", TK_DISEQ },
72 { "distinct", TK_DISTINCT },
73
74 { "or", TK_OR },
75 { "and", TK_AND },
76 { "not", TK_NOT },
77 { "xor", TK_XOR },
78 { "<=>", TK_IFF },
79 { "=>", TK_IMPLIES },
80 { "mk-tuple", TK_MK_TUPLE },
81 { "select", TK_SELECT },
82 { "tuple-update", TK_UPDATE_TUPLE },
83 { "update", TK_UPDATE },
84 { "forall", TK_FORALL },
85 { "exists", TK_EXISTS },
86 { "lambda", TK_LAMBDA },
87
88 // arithmetic keywords
89 { "+", TK_ADD },
90 { "-", TK_SUB },
91 { "*", TK_MUL },
92 { "/", TK_DIV },
93 { "^", TK_POW },
94 { "<", TK_LT },
95 { "<=", TK_LE },
96 { ">", TK_GT },
97 { ">=", TK_GE },
98
99 // bitvector keywords
100 { "mk-bv", TK_MK_BV },
101 { "bv-add", TK_BV_ADD },
102 { "bv-sub", TK_BV_SUB },
103 { "bv-mul", TK_BV_MUL },
104 { "bv-neg", TK_BV_NEG },
105 { "bv-pow", TK_BV_POW },
106 { "bv-not", TK_BV_NOT },
107 { "bv-and", TK_BV_AND },
108 { "bv-or", TK_BV_OR },
109 { "bv-xor", TK_BV_XOR },
110 { "bv-nand", TK_BV_NAND },
111 { "bv-nor", TK_BV_NOR },
112 { "bv-xnor", TK_BV_XNOR },
113 { "bv-shift-left0", TK_BV_SHIFT_LEFT0 },
114 { "bv-shift-left1", TK_BV_SHIFT_LEFT1 },
115 { "bv-shift-right0", TK_BV_SHIFT_RIGHT0 },
116 { "bv-shift-right1", TK_BV_SHIFT_RIGHT1 },
117 { "bv-ashift-right", TK_BV_ASHIFT_RIGHT },
118 { "bv-rotate-left", TK_BV_ROTATE_LEFT },
119 { "bv-rotate-right", TK_BV_ROTATE_RIGHT },
120 { "bv-extract", TK_BV_EXTRACT },
121 { "bv-concat", TK_BV_CONCAT },
122 { "bv-repeat", TK_BV_REPEAT },
123 { "bv-sign-extend", TK_BV_SIGN_EXTEND },
124 { "bv-zero-extend", TK_BV_ZERO_EXTEND },
125 { "bv-ge", TK_BV_GE },
126 { "bv-gt", TK_BV_GT },
127 { "bv-le", TK_BV_LE },
128 { "bv-lt", TK_BV_LT },
129 { "bv-sge", TK_BV_SGE },
130 { "bv-sgt", TK_BV_SGT },
131 { "bv-sle", TK_BV_SLE },
132 { "bv-slt", TK_BV_SLT },
133
134 // more bitvector keywords
135 { "bv-shl", TK_BV_SHL },
136 { "bv-lshr", TK_BV_LSHR },
137 { "bv-ashr", TK_BV_ASHR },
138 { "bv-div", TK_BV_DIV },
139 { "bv-rem", TK_BV_REM },
140 { "bv-sdiv", TK_BV_SDIV },
141 { "bv-srem", TK_BV_SREM },
142 { "bv-smod", TK_BV_SMOD },
143 { "bv-redor", TK_BV_REDOR },
144 { "bv-redand", TK_BV_REDAND },
145 { "bv-comp", TK_BV_COMP },
146
147 // conversions between bitvectors and Booleans
148 { "bool-to-bv", TK_BOOL_TO_BV },
149 { "bit", TK_BIT},
150
151 // more arithmetic functions
152 { "floor", TK_FLOOR },
153 { "ceil", TK_CEIL },
154 { "abs", TK_ABS },
155 { "div", TK_IDIV },
156 { "mod", TK_MOD },
157 { "divides", TK_DIVIDES },
158 { "is-int", TK_IS_INT },
159
160 // other keywords
161 { "let", TK_LET },
162 { "define-type", TK_DEFINE_TYPE },
163 { "define", TK_DEFINE },
164 { "assert", TK_ASSERT },
165 { "check", TK_CHECK },
166 { "check-assuming", TK_CHECK_ASSUMING },
167 { "push", TK_PUSH },
168 { "pop", TK_POP },
169 { "reset", TK_RESET },
170 { "dump-context", TK_DUMP_CONTEXT },
171 { "exit", TK_EXIT },
172 { "echo", TK_ECHO },
173 { "include", TK_INCLUDE },
174 { "show-model", TK_SHOW_MODEL },
175 { "eval", TK_EVAL },
176 { "set-param", TK_SET_PARAM },
177 { "show-param", TK_SHOW_PARAM },
178 { "show-params", TK_SHOW_PARAMS },
179 { "show-stats", TK_SHOW_STATS },
180 { "reset-stats", TK_RESET_STATS },
181 { "set-timeout", TK_SET_TIMEOUT },
182 { "show-timeout", TK_SHOW_TIMEOUT },
183 { "help", TK_HELP },
184 { "ef-solve", TK_EF_SOLVE },
185 { "export-to-dimacs", TK_EXPORT_TO_DIMACS },
186 { "show-implicant", TK_SHOW_IMPLICANT },
187 { "show-unsat-core", TK_SHOW_UNSAT_CORE },
188 { "show-unsat-assumptions", TK_SHOW_UNSAT_ASSUMPTIONS },
189 { "show-reduced-model", TK_SHOW_REDUCED_MODEL },
190
191 // end-marker
192 { NULL, 0 },
193 };
194
195 /*
196 * name for each token
197 */
198 static char *token_string[NUM_YICES_TOKENS];
199
200
201 /*
202 * Initialize token2string table
203 */
init_token2string(void)204 static void init_token2string(void) {
205 keyword_t *kw;
206
207 // keywords
208 kw = yices_keywords;
209 while (kw->word != NULL) {
210 token_string[kw->tk] = kw->word;
211 kw ++;
212 }
213
214 // other tokens
215 token_string[TK_LP] = "(";
216 token_string[TK_RP] = ")";
217 token_string[TK_COLON_COLON] = "::";
218 token_string[TK_EOS] = "<end-of-stream>";
219 token_string[TK_STRING] = "<string>";
220 token_string[TK_NUM_RATIONAL] = "<rational>";
221 token_string[TK_NUM_FLOAT] = "<float>";
222 token_string[TK_BV_CONSTANT] = "<bv-constant>";
223 token_string[TK_HEX_CONSTANT] = "<hex-constant>";
224 token_string[TK_SYMBOL] = "<symbol>";
225
226 token_string[TK_OPEN_STRING] = "<bad-string>";
227 token_string[TK_EMPTY_BVCONST] = "<bad-bvconst>";
228 token_string[TK_EMPTY_HEXCONST] = "<bad-hexconst>";
229 token_string[TK_INVALID_NUM] = "<bad-float>";
230 token_string[TK_ZERO_DIVISOR] = "<zero-divisor-in-rational>";
231 token_string[TK_ERROR] = "<error>";
232 }
233
234
235 /*
236 * Lexer initialization
237 */
init_yices_file_lexer(lexer_t * lex,char * filename)238 int32_t init_yices_file_lexer(lexer_t *lex, char *filename) {
239 init_token2string();
240 return init_file_lexer(lex, filename);
241 }
242
init_yices_stream_lexer(lexer_t * lex,FILE * f,char * name)243 void init_yices_stream_lexer(lexer_t *lex, FILE *f, char *name) {
244 init_token2string();
245 init_stream_lexer(lex, f, name);
246 }
247
init_yices_string_lexer(lexer_t * lex,char * data,char * name)248 void init_yices_string_lexer(lexer_t *lex, char *data, char *name) {
249 init_token2string();
250 init_string_lexer(lex, data, name);
251 }
252
253
254
255 /*
256 * Get token_string
257 */
yices_token_to_string(yices_token_t tk)258 char *yices_token_to_string(yices_token_t tk) {
259 assert(0 <= tk && tk < NUM_YICES_TOKENS);
260 return token_string[tk];
261 }
262
263 /*
264 * Check whether c is a separator character (cannot be part of a symbol)
265 */
is_yices_sep(int c)266 static bool is_yices_sep(int c) {
267 return isspace(c) || c == EOF || c == '(' || c == ')' ||
268 c == ':' || c == ';' || c == '"';
269 }
270
271
272 /*
273 * Read a string literal:
274 * - lex->current_char == '"' and lex->buffer is empty
275 */
read_string(lexer_t * lex)276 static yices_token_t read_string(lexer_t *lex) {
277 yices_token_t tk;
278 int c, x;
279 reader_t *rd;
280 string_buffer_t *buffer;
281
282 rd = &lex->reader;
283 buffer = lex->buffer;
284 assert(reader_current_char(rd) == '"');
285
286 c = reader_next_char(rd);
287
288 for (;;) {
289 if (c == '"') { // end of string
290 // consume the closing quote
291 reader_next_char(rd);
292 tk = TK_STRING;
293 break;
294 }
295 if (c == '\n' || c == EOF) { // missing quotes
296 tk = TK_OPEN_STRING;
297 break;
298 }
299 if (c == '\\') {
300 // escape sequence
301 c = reader_next_char(rd);
302 switch (c) {
303 case 'n': c = '\n'; break;
304 case 't': c = '\t'; break;
305 default:
306 if ('0' <= c && c <= '7') {
307 // read at most 2 more octal digits
308 x = c - '0';
309 c = reader_next_char(rd);
310 if ('0' <= c && c <= '7') {
311 x = 8 * x + (c - '0');
312 c = reader_next_char(rd);
313 if ('0' <= c && c <= '7') {
314 x = 8 * x + (c - '0');
315 c = reader_next_char(rd);
316 }
317 }
318 // x = character built from the octal digits
319 // c = character after octal digit
320 string_buffer_append_char(buffer, x);
321 continue;
322 } // else skip '\': copy c in the buffer
323 break;
324 }
325 }
326 string_buffer_append_char(buffer, c);
327 c = reader_next_char(rd);
328 }
329
330 string_buffer_close(buffer);
331 return tk;
332 }
333
334 /*
335 * Read a bitvector constant:
336 * lex->current_char = 'b' and lex->buffer contains "0"
337 */
read_bv_constant(lexer_t * lex)338 static yices_token_t read_bv_constant(lexer_t *lex) {
339 reader_t *rd;
340 string_buffer_t *buffer;
341 int c;
342
343 rd = &lex->reader;
344 c = reader_current_char(rd);
345 buffer = lex->buffer;
346
347 do {
348 string_buffer_append_char(buffer, c);
349 c = reader_next_char(rd);
350 } while (c == '0' || c == '1');
351 string_buffer_close(buffer);
352
353 if (string_buffer_length(buffer) <= 2) {
354 return TK_EMPTY_BVCONST; // empty constant
355 } else {
356 return TK_BV_CONSTANT;
357 }
358 }
359
360 /*
361 * Read a hexadecimal constant:
362 * lex->current_char = 'x' and lex->buffer contains "0"
363 */
read_hex_constant(lexer_t * lex)364 static yices_token_t read_hex_constant(lexer_t *lex) {
365 reader_t *rd;
366 string_buffer_t *buffer;
367 int c;
368
369 rd = &lex->reader;
370 c = reader_current_char(rd);
371 buffer = lex->buffer;
372
373 do {
374 string_buffer_append_char(buffer, c);
375 c = reader_next_char(rd);
376 } while (isxdigit(c));
377 string_buffer_close(buffer);
378
379 if (string_buffer_length(buffer) <= 2) {
380 return TK_EMPTY_HEXCONST; // empty constant
381 } else {
382 return TK_HEX_CONSTANT;
383 }
384 }
385
386 /*
387 * Read a symbol or keyword
388 * lex->buffer contains one char (not a separator or digit)
389 * char = next character after that.
390 */
read_symbol(lexer_t * lex)391 static yices_token_t read_symbol(lexer_t *lex) {
392 reader_t *rd;
393 string_buffer_t *buffer;
394 int c;
395 token_t tk;
396 const keyword_t *kw;
397
398 rd = &lex->reader;
399 c = reader_current_char(rd);
400 buffer = lex->buffer;
401
402 while (! is_yices_sep(c)) {
403 string_buffer_append_char(buffer, c);
404 c = reader_next_char(rd);
405 }
406
407 string_buffer_close(buffer);
408
409 tk = TK_SYMBOL;
410 kw = in_yices_kw(buffer->data, buffer->index);
411 if (kw != NULL) {
412 tk = kw->tk;
413 }
414
415 return tk;
416 }
417
418 /*
419 * Read a number
420 * lex->buffer contains <optional_sign> and a single digit
421 * current_char = what's after the digit in buffer.
422 */
read_number(lexer_t * lex)423 static yices_token_t read_number(lexer_t *lex) {
424 reader_t *rd;
425 string_buffer_t *buffer;
426 int c, all_zeros;
427 yices_token_t tk;
428
429 rd = &lex->reader;
430 c = reader_current_char(rd);
431 buffer = lex->buffer;
432 tk = TK_NUM_RATIONAL; // default
433
434 while (isdigit(c)) {
435 string_buffer_append_char(buffer, c);
436 c = reader_next_char(rd);
437 }
438
439 if (c == '/') {
440 // denominator
441 string_buffer_append_char(buffer, c);
442 c = reader_next_char(rd);
443 if (! isdigit(c)) {
444 tk = TK_INVALID_NUM;
445 goto done;
446 }
447 all_zeros = true;
448 do {
449 if (c != '0') all_zeros = false;
450 string_buffer_append_char(buffer, c);
451 c = reader_next_char(rd);
452 } while (isdigit(c));
453
454 if (all_zeros) tk = TK_ZERO_DIVISOR;
455 // else tk = TK_NUM_RATIONAL
456 goto done;
457 }
458
459 if (c == '.') {
460 tk = TK_NUM_FLOAT;
461 // fractional part
462 string_buffer_append_char(buffer, c);
463 c = reader_next_char(rd);
464 if (! isdigit(c)) {
465 tk = TK_INVALID_NUM;
466 goto done;
467 }
468 do {
469 string_buffer_append_char(buffer, c);
470 c = reader_next_char(rd);
471 } while (isdigit(c));
472 }
473
474 if (c == 'e' || c == 'E') {
475 tk = TK_NUM_FLOAT;
476 // exponent
477 string_buffer_append_char(buffer, c);
478 c = reader_next_char(rd);
479 if (c == '+' || c == '-') {
480 string_buffer_append_char(buffer, c);
481 c = reader_next_char(rd);
482 }
483 if (! isdigit(c)) {
484 tk = TK_INVALID_NUM;
485 goto done;
486 }
487 do {
488 string_buffer_append_char(buffer, c);
489 c = reader_next_char(rd);
490 } while (isdigit(c));
491 }
492
493 done:
494 string_buffer_close(buffer);
495 return tk;
496 }
497
498 /*
499 * Read next token and return its type tk
500 * - set lex->token to tk
501 * - set lex->tk_pos, etc.
502 * - if token is TK_STRING, TK_NUM_RATIONAL, TK_NUM_FLOAT, TK_BV_CONSTANT, TK_SYMBOL, TK_ERROR,
503 * the token value is stored in lex->buffer (as a string).
504 */
next_yices_token(lexer_t * lex)505 yices_token_t next_yices_token(lexer_t *lex) {
506 yices_token_t tk;
507 reader_t *rd;
508 string_buffer_t *buffer;
509 int c;
510
511 rd = &lex->reader;
512 c = reader_current_char(rd);
513 buffer = lex->buffer;
514 string_buffer_reset(buffer);
515
516 // skip spaces and comments
517 for (;;) {
518 while (isspace(c)) c = reader_next_char(rd);
519 if (c != ';') break;
520 do { // read to end-of-line or eof
521 c = reader_next_char(rd);
522 } while (c != '\n' && c != EOF);
523 }
524
525 // record token position (start of token)
526 lex->tk_pos = rd->pos;
527 lex->tk_line = rd->line;
528 lex->tk_column = rd->column;
529
530 switch (c) {
531 case '(':
532 tk = TK_LP;
533 goto next_then_return;
534 case ')':
535 tk = TK_RP;
536 goto next_then_return;
537 case EOF:
538 tk = TK_EOS;
539 goto done;
540 case ':':
541 c = reader_next_char(rd);
542 if (c == ':') {
543 tk = TK_COLON_COLON;
544 goto next_then_return;
545 } else {
546 // store ':' in the buffer since that may be used for reporting errors
547 string_buffer_append_char(buffer, ':');
548 string_buffer_close(buffer);
549 tk = TK_ERROR;
550 goto done;
551 }
552 case '"':
553 tk = read_string(lex);
554 goto done;
555 case '+':
556 case '-':
557 string_buffer_append_char(buffer, c);
558 c = reader_next_char(rd);
559 if (isdigit(c)) {
560 string_buffer_append_char(buffer, c);
561 reader_next_char(rd);
562 tk = read_number(lex);
563 } else {
564 tk = read_symbol(lex);
565 }
566 goto done;
567
568 case '0':
569 string_buffer_append_char(buffer, c);
570 c = reader_next_char(rd);
571 if (c == 'b') {
572 tk = read_bv_constant(lex);
573 } else if (c == 'x') {
574 tk = read_hex_constant(lex);
575 } else {
576 tk = read_number(lex);
577 }
578 goto done;
579
580 case '1':
581 case '2':
582 case '3':
583 case '4':
584 case '5':
585 case '6':
586 case '7':
587 case '8':
588 case '9':
589 string_buffer_append_char(buffer, c);
590 reader_next_char(rd);
591 tk = read_number(lex);
592 goto done;
593
594 default: // symbol or keyword
595 string_buffer_append_char(buffer, c);
596 reader_next_char(rd);
597 tk = read_symbol(lex);
598 goto done;
599 }
600
601 /*
602 * read next character and exit
603 */
604 next_then_return:
605 reader_next_char(rd);
606
607
608 done:
609 lex->token = tk;
610 return tk;
611 }
612
613