xref: /freebsd/contrib/bc/src/dc_parse.c (revision 53b70c86)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * The parser for dc.
33  *
34  */
35 
36 #if DC_ENABLED
37 
38 #include <assert.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <setjmp.h>
42 
43 #include <dc.h>
44 #include <program.h>
45 #include <vm.h>
46 
47 /**
48  * Parses a register. The lexer should have already lexed the true name of the
49  * register, per extended registers and such.
50  * @param p    The parser.
51  * @param var  True if the parser is for a variable, false otherwise.
52  */
53 static void dc_parse_register(BcParse *p, bool var) {
54 
55 	bc_lex_next(&p->l);
56 	if (p->l.t != BC_LEX_NAME) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
57 
58 	bc_parse_pushName(p, p->l.str.v, var);
59 }
60 
61 /**
62  * Parses a dc string.
63  * @param p  The parser.
64  */
65 static inline void dc_parse_string(BcParse *p) {
66 	bc_parse_addString(p);
67 	bc_lex_next(&p->l);
68 }
69 
70 /**
71  * Parses a token that requires a memory operation, like load or store.
72  * @param p      The parser.
73  * @param inst   The instruction to push for the memory operation.
74  * @param name   Whether the load or store is to a variable or array, and not to
75  *               a global.
76  * @param store  True if the operation is a store, false otherwise.
77  */
78 static void dc_parse_mem(BcParse *p, uchar inst, bool name, bool store) {
79 
80 	// Push the instruction.
81 	bc_parse_push(p, inst);
82 
83 	// Parse the register if necessary.
84 	if (name) dc_parse_register(p, inst != BC_INST_ARRAY_ELEM);
85 
86 	// Stores use the bc assign infrastructure, but they need to do a swap
87 	// first.
88 	if (store) {
89 		bc_parse_push(p, BC_INST_SWAP);
90 		bc_parse_push(p, BC_INST_ASSIGN_NO_VAL);
91 	}
92 
93 	bc_lex_next(&p->l);
94 }
95 
96 /**
97  * Parses a conditional execution instruction.
98  * @param p     The parser.
99  * @param inst  The instruction for the condition.
100  */
101 static void dc_parse_cond(BcParse *p, uchar inst) {
102 
103 	// Push the instruction for the condition and the conditional execution.
104 	bc_parse_push(p, inst);
105 	bc_parse_push(p, BC_INST_EXEC_COND);
106 
107 	// Parse the register.
108 	dc_parse_register(p, true);
109 
110 	bc_lex_next(&p->l);
111 
112 	// If the next token is an else, parse the else.
113 	if (p->l.t == BC_LEX_KW_ELSE) {
114 		dc_parse_register(p, true);
115 		bc_lex_next(&p->l);
116 	}
117 	// Otherwise, push a marker for no else.
118 	else bc_parse_pushIndex(p, SIZE_MAX);
119 }
120 
121 /**
122  * Parses a token for dc.
123  * @param p      The parser.
124  * @param t      The token to parse.
125  * @param flags  The flags that say what is allowed or not.
126  */
127 static void dc_parse_token(BcParse *p, BcLexType t, uint8_t flags) {
128 
129 	uchar inst;
130 	bool assign, get_token = false;
131 
132 	switch (t) {
133 
134 		case BC_LEX_OP_REL_EQ:
135 		case BC_LEX_OP_REL_LE:
136 		case BC_LEX_OP_REL_GE:
137 		case BC_LEX_OP_REL_NE:
138 		case BC_LEX_OP_REL_LT:
139 		case BC_LEX_OP_REL_GT:
140 		{
141 			inst = (uchar) (t - BC_LEX_OP_REL_EQ + BC_INST_REL_EQ);
142 			dc_parse_cond(p, inst);
143 			break;
144 		}
145 
146 		case BC_LEX_SCOLON:
147 		case BC_LEX_COLON:
148 		{
149 			dc_parse_mem(p, BC_INST_ARRAY_ELEM, true, t == BC_LEX_COLON);
150 			break;
151 		}
152 
153 		case BC_LEX_STR:
154 		{
155 			dc_parse_string(p);
156 			break;
157 		}
158 
159 		case BC_LEX_NEG:
160 		{
161 			// This tells us whether or not the neg is for a command or at the
162 			// beginning of a number. If it's a command, push it. Otherwise,
163 			// fallthrough and parse the number.
164 			if (dc_lex_negCommand(&p->l)) {
165 				bc_parse_push(p, BC_INST_NEG);
166 				get_token = true;
167 				break;
168 			}
169 
170 			bc_lex_next(&p->l);
171 		}
172 		// Fallthrough.
173 		BC_FALLTHROUGH
174 
175 		case BC_LEX_NUMBER:
176 		{
177 			bc_parse_number(p);
178 
179 			// Push the negative instruction if we fell through from above.
180 			if (t == BC_LEX_NEG) bc_parse_push(p, BC_INST_NEG);
181 			get_token = true;
182 
183 			break;
184 		}
185 
186 		case BC_LEX_KW_READ:
187 		{
188 			// Make sure the read is not recursive.
189 			if (BC_ERR(flags & BC_PARSE_NOREAD))
190 				bc_parse_err(p, BC_ERR_EXEC_REC_READ);
191 			else bc_parse_push(p, BC_INST_READ);
192 
193 			get_token = true;
194 
195 			break;
196 		}
197 
198 		case BC_LEX_OP_ASSIGN:
199 		case BC_LEX_STORE_PUSH:
200 		{
201 			assign = t == BC_LEX_OP_ASSIGN;
202 			inst = assign ? BC_INST_VAR : BC_INST_PUSH_TO_VAR;
203 			dc_parse_mem(p, inst, true, assign);
204 			break;
205 		}
206 
207 		case BC_LEX_LOAD:
208 		case BC_LEX_LOAD_POP:
209 		{
210 			inst = t == BC_LEX_LOAD_POP ? BC_INST_PUSH_VAR : BC_INST_LOAD;
211 			dc_parse_mem(p, inst, true, false);
212 			break;
213 		}
214 
215 		case BC_LEX_REG_STACK_LEVEL:
216 		{
217 			dc_parse_mem(p, BC_INST_REG_STACK_LEN, true, false);
218 			break;
219 		}
220 
221 		case BC_LEX_STORE_IBASE:
222 		case BC_LEX_STORE_OBASE:
223 		case BC_LEX_STORE_SCALE:
224 #if BC_ENABLE_EXTRA_MATH
225 		case BC_LEX_STORE_SEED:
226 #endif // BC_ENABLE_EXTRA_MATH
227 		{
228 			inst = (uchar) (t - BC_LEX_STORE_IBASE + BC_INST_IBASE);
229 			dc_parse_mem(p, inst, false, true);
230 			break;
231 		}
232 
233 		case BC_LEX_ARRAY_LENGTH:
234 		{
235 			// Need to push the array first, based on how length is implemented.
236 			bc_parse_push(p, BC_INST_ARRAY);
237 			dc_parse_register(p, false);
238 
239 			bc_parse_push(p, BC_INST_LENGTH);
240 
241 			get_token = true;
242 
243 			break;
244 		}
245 
246 		default:
247 		{
248 			// All other tokens should be taken care of by the caller, or they
249 			// actually *are* invalid.
250 			bc_parse_err(p, BC_ERR_PARSE_TOKEN);
251 		}
252 	}
253 
254 	if (get_token) bc_lex_next(&p->l);
255 }
256 
257 void dc_parse_expr(BcParse *p, uint8_t flags) {
258 
259 	BcInst inst;
260 	BcLexType t;
261 	bool need_expr, have_expr = false;
262 
263 	need_expr = ((flags & BC_PARSE_NOREAD) != 0);
264 
265 	// dc can just keep parsing forever basically, unlike bc, which has to have
266 	// a whole bunch of complicated nonsense because its language was horribly
267 	// designed.
268 
269 	// While we don't have EOF...
270 	while ((t = p->l.t) != BC_LEX_EOF) {
271 
272 		// Eat newline.
273 		if (t == BC_LEX_NLINE) {
274 			bc_lex_next(&p->l);
275 			continue;
276 		}
277 
278 		// Get the instruction that corresponds to the token.
279 		inst = dc_parse_insts[t];
280 
281 		// If the instruction is invalid, that means we have to do some harder
282 		// parsing. So if not invalid, just push the instruction; otherwise,
283 		// parse the token.
284 		if (inst != BC_INST_INVALID) {
285 			bc_parse_push(p, inst);
286 			bc_lex_next(&p->l);
287 		}
288 		else dc_parse_token(p, t, flags);
289 
290 		have_expr = true;
291 	}
292 
293 	// If we don't have an expression and need one, barf. Otherwise, just push a
294 	// BC_INST_POP_EXEC if we have EOF and BC_PARSE_NOCALL, which dc uses to
295 	// indicate that it is executing a string.
296 	if (BC_ERR(need_expr && !have_expr)) bc_err(BC_ERR_EXEC_READ_EXPR);
297 	else if (p->l.t == BC_LEX_EOF && (flags & BC_PARSE_NOCALL))
298 		bc_parse_push(p, BC_INST_POP_EXEC);
299 }
300 
301 void dc_parse_parse(BcParse *p) {
302 
303 	assert(p != NULL);
304 
305 	BC_SETJMP(exit);
306 
307 	// If we have EOF, someone called this function one too many times.
308 	// Otherwise, parse.
309 	if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF);
310 	else dc_parse_expr(p, 0);
311 
312 exit:
313 
314 	BC_SIG_MAYLOCK;
315 
316 	// Need to reset if there was an error.
317 	if (BC_SIG_EXC) bc_parse_reset(p);
318 
319 	BC_LONGJMP_CONT;
320 }
321 #endif // DC_ENABLED
322