1 /*
2  * This file is part of the Yices SMT Solver.
3  * Copyright (C) 2017 SRI International.
4  *
5  * Yices is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Yices is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with Yices.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /*
20  * Generic lexer operations.
21  *
22  * The same data structure is used for both SMTLIB and the Yices language.
23  * To support nested (include "file"), lexers can be organized into a stack
24  * (implemented as a list of lexer_t objects).
25  */
26 
27 #include <assert.h>
28 
29 #include "parser_utils/lexer.h"
30 #include "utils/memalloc.h"
31 
32 
33 /*
34  * Allocate and initialize buffer
35  * set default values for token, tk_pos etc.
36  */
init_lexer(lexer_t * lex)37 static void init_lexer(lexer_t *lex) {
38   lex->token = -1;
39   lex->tk_pos = 0;
40   lex->tk_line = 0;
41   lex->tk_column = 0;
42   lex->next = NULL;
43 
44   lex->buffer = (string_buffer_t *) safe_malloc(sizeof(string_buffer_t));
45   init_string_buffer(lex->buffer, 128);
46 }
47 
48 /*
49  * Initialize a lexer for the given filename
50  *
51  * Return -1 if the file can't be opened, 0 otherwise.
52  * (lex cannot be used if the result is -1)
53  *
54  * If result = 0,
55  * - string buffer is allocated,
56  * - the reader is initialized
57  * - token is set to -1
58  */
init_file_lexer(lexer_t * lex,const char * filename)59 int32_t init_file_lexer(lexer_t *lex, const char *filename) {
60   int32_t code;
61 
62   code = init_file_reader(&lex->reader, filename);
63   if (code >= 0) {
64     init_lexer(lex);
65   }
66   return code;
67 }
68 
69 
70 /*
71  * Same thing, starting from an already open stream f
72  */
init_stream_lexer(lexer_t * lex,FILE * f,const char * name)73 void init_stream_lexer(lexer_t *lex, FILE *f, const char *name) {
74   init_stream_reader(&lex->reader, f, name);
75   init_lexer(lex);
76 }
77 
78 
79 #if 0
80 /*
81  * HACK/EXPERIMENT: use UTF-8 encoded input
82  */
83 int32_t init_wide_file_lexer(lexer_t *lex, const char *filename) {
84   int32_t code;
85 
86   code = init_wide_file_reader(&lex->reader, filename);
87   if (code >= 0) {
88     init_lexer(lex);
89   }
90   return code;
91 }
92 
93 void init_wide_stream_lexer(lexer_t *lex, FILE *f, const char *name) {
94   init_wide_stream_reader(&lex->reader, f, name);
95   init_lexer(lex);
96 }
97 
98 #endif
99 
100 
101 /*
102  * Initialize lexer for a string data
103  */
init_string_lexer(lexer_t * lex,const char * data,const char * name)104 void init_string_lexer(lexer_t *lex, const char *data, const char *name) {
105   init_string_reader(&lex->reader, data, name);
106   init_lexer(lex);
107 }
108 
109 
110 /*
111  * Change the input string for lex to data
112  */
reset_string_lexer(lexer_t * lex,const char * data)113 void reset_string_lexer(lexer_t *lex, const char *data) {
114   reset_string_reader(&lex->reader, data);
115   // reset token and location
116   lex->token = -1;
117   lex->tk_pos = 0;
118   lex->tk_line = 0;
119   lex->tk_column = 0;
120   string_buffer_reset(lex->buffer);
121 }
122 
123 
124 
125 /*
126  * Nested lexer: get buffer from parent
127  * the reader is initialized for filename.
128  * TODO: report an error if there's a circular nesting
129  * (i.e., same file is already open in an enclosing lexer)
130  */
init_nested_lexer(lexer_t * lex,const char * filename,lexer_t * parent)131 int32_t init_nested_lexer(lexer_t *lex, const char *filename, lexer_t *parent) {
132   int32_t code;
133 
134   lex->token = -1;
135   lex->tk_pos = 0;
136   lex->tk_line = 0;
137   lex->tk_column = 0;
138 
139   code = init_file_reader(&lex->reader, filename);
140   if (code < 0) {
141     lex->buffer = NULL;
142     lex->next = NULL;
143     return code;
144   }
145 
146   string_buffer_reset(parent->buffer);
147   lex->buffer = parent->buffer;
148   lex->next = parent;
149   return code;
150 }
151 
152 
153 /*
154  * Nested lexer using a string data
155  */
init_nested_string_lexer(lexer_t * lex,const char * data,const char * name,lexer_t * parent)156 void init_nested_string_lexer(lexer_t *lex, const char *data, const char *name, lexer_t *parent) {
157   lex->token = -1;
158   lex->tk_pos = 0;
159   lex->tk_line = 0;
160   lex->tk_column = 0;
161 
162   init_string_reader(&lex->reader, data, name);
163   lex->buffer = parent->buffer;
164   lex->next = parent;
165 }
166 
167 /*
168  * Close lexer. If lex has no parent, delete the allocated
169  * string buffer.
170  */
close_lexer(lexer_t * lex)171 int close_lexer(lexer_t *lex) {
172   int code;
173 
174   code = close_reader(&lex->reader);
175   if (lex->next == NULL) {
176     if (lex->buffer != NULL) {
177       delete_string_buffer(lex->buffer);
178       safe_free(lex->buffer);
179     }
180   }
181   return code;
182 }
183 
184 
185 /*
186  * Variant: close lex but not the file/stream attached if any.
187  * - this allows us to attach a lexer to stdin, then close it
188  *   without closing stdin.
189  * - if lex->next is NULL (toplevel lexer), delete the internal buffer
190  */
close_lexer_only(lexer_t * lex)191 void close_lexer_only(lexer_t *lex) {
192   if (lex->next == NULL) {
193     if (lex->buffer != NULL) {
194       delete_string_buffer(lex->buffer);
195       safe_free(lex->buffer);
196     }
197   }
198 }
199 
200 
201 
202 /*
203  * Flush: read until the end of the line or EOF
204  */
flush_lexer(lexer_t * lex)205 void flush_lexer(lexer_t *lex) {
206   int c;
207   c = reader_current_char(&lex->reader);
208   while (c != '\n' && c != EOF) {
209     c = reader_next_char(&lex->reader);
210   }
211   lex->token = -1;
212   string_buffer_reset(lex->buffer);
213 }
214