xref: /freebsd/contrib/bc/src/dc_parse.c (revision 38a52bd3)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * The parser for dc.
33  *
34  */
35 
36 #if DC_ENABLED
37 
38 #include <assert.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <setjmp.h>
42 
43 #include <dc.h>
44 #include <program.h>
45 #include <vm.h>
46 
47 /**
48  * Parses a register. The lexer should have already lexed the true name of the
49  * register, per extended registers and such.
50  * @param p    The parser.
51  * @param var  True if the parser is for a variable, false otherwise.
52  */
53 static void
54 dc_parse_register(BcParse* p, bool var)
55 {
56 	bc_lex_next(&p->l);
57 	if (p->l.t != BC_LEX_NAME) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
58 
59 	bc_parse_pushName(p, p->l.str.v, var);
60 }
61 
62 /**
63  * Parses a dc string.
64  * @param p  The parser.
65  */
66 static inline void
67 dc_parse_string(BcParse* p)
68 {
69 	bc_parse_addString(p);
70 	bc_lex_next(&p->l);
71 }
72 
73 /**
74  * Parses a token that requires a memory operation, like load or store.
75  * @param p      The parser.
76  * @param inst   The instruction to push for the memory operation.
77  * @param name   Whether the load or store is to a variable or array, and not to
78  *               a global.
79  * @param store  True if the operation is a store, false otherwise.
80  */
81 static void
82 dc_parse_mem(BcParse* p, uchar inst, bool name, bool store)
83 {
84 	// Push the instruction.
85 	bc_parse_push(p, inst);
86 
87 	// Parse the register if necessary.
88 	if (name) dc_parse_register(p, inst != BC_INST_ARRAY_ELEM);
89 
90 	// Stores use the bc assign infrastructure, but they need to do a swap
91 	// first.
92 	if (store)
93 	{
94 		bc_parse_push(p, BC_INST_SWAP);
95 		bc_parse_push(p, BC_INST_ASSIGN_NO_VAL);
96 	}
97 
98 	bc_lex_next(&p->l);
99 }
100 
101 /**
102  * Parses a conditional execution instruction.
103  * @param p     The parser.
104  * @param inst  The instruction for the condition.
105  */
106 static void
107 dc_parse_cond(BcParse* p, uchar inst)
108 {
109 	// Push the instruction for the condition and the conditional execution.
110 	bc_parse_push(p, inst);
111 	bc_parse_push(p, BC_INST_EXEC_COND);
112 
113 	// Parse the register.
114 	dc_parse_register(p, true);
115 
116 	bc_lex_next(&p->l);
117 
118 	// If the next token is an else, parse the else.
119 	if (p->l.t == BC_LEX_KW_ELSE)
120 	{
121 		dc_parse_register(p, true);
122 		bc_lex_next(&p->l);
123 	}
124 	// Otherwise, push a marker for no else.
125 	else bc_parse_pushIndex(p, SIZE_MAX);
126 }
127 
128 /**
129  * Parses a token for dc.
130  * @param p      The parser.
131  * @param t      The token to parse.
132  * @param flags  The flags that say what is allowed or not.
133  */
134 static void
135 dc_parse_token(BcParse* p, BcLexType t, uint8_t flags)
136 {
137 	uchar inst;
138 	bool assign, get_token = false;
139 
140 	switch (t)
141 	{
142 		case BC_LEX_OP_REL_EQ:
143 		case BC_LEX_OP_REL_LE:
144 		case BC_LEX_OP_REL_GE:
145 		case BC_LEX_OP_REL_NE:
146 		case BC_LEX_OP_REL_LT:
147 		case BC_LEX_OP_REL_GT:
148 		{
149 			inst = (uchar) (t - BC_LEX_OP_REL_EQ + BC_INST_REL_EQ);
150 			dc_parse_cond(p, inst);
151 			break;
152 		}
153 
154 		case BC_LEX_SCOLON:
155 		case BC_LEX_COLON:
156 		{
157 			dc_parse_mem(p, BC_INST_ARRAY_ELEM, true, t == BC_LEX_COLON);
158 			break;
159 		}
160 
161 		case BC_LEX_STR:
162 		{
163 			dc_parse_string(p);
164 			break;
165 		}
166 
167 		case BC_LEX_NEG:
168 		{
169 			// This tells us whether or not the neg is for a command or at the
170 			// beginning of a number. If it's a command, push it. Otherwise,
171 			// fallthrough and parse the number.
172 			if (dc_lex_negCommand(&p->l))
173 			{
174 				bc_parse_push(p, BC_INST_NEG);
175 				get_token = true;
176 				break;
177 			}
178 
179 			bc_lex_next(&p->l);
180 
181 			// Fallthrough.
182 			BC_FALLTHROUGH
183 		}
184 
185 		case BC_LEX_NUMBER:
186 		{
187 			bc_parse_number(p);
188 
189 			// Push the negative instruction if we fell through from above.
190 			if (t == BC_LEX_NEG) bc_parse_push(p, BC_INST_NEG);
191 			get_token = true;
192 
193 			break;
194 		}
195 
196 		case BC_LEX_KW_READ:
197 		{
198 			// Make sure the read is not recursive.
199 			if (BC_ERR(flags & BC_PARSE_NOREAD))
200 			{
201 				bc_parse_err(p, BC_ERR_EXEC_REC_READ);
202 			}
203 			else bc_parse_push(p, BC_INST_READ);
204 
205 			get_token = true;
206 
207 			break;
208 		}
209 
210 		case BC_LEX_OP_ASSIGN:
211 		case BC_LEX_STORE_PUSH:
212 		{
213 			assign = t == BC_LEX_OP_ASSIGN;
214 			inst = assign ? BC_INST_VAR : BC_INST_PUSH_TO_VAR;
215 			dc_parse_mem(p, inst, true, assign);
216 			break;
217 		}
218 
219 		case BC_LEX_LOAD:
220 		case BC_LEX_LOAD_POP:
221 		{
222 			inst = t == BC_LEX_LOAD_POP ? BC_INST_PUSH_VAR : BC_INST_LOAD;
223 			dc_parse_mem(p, inst, true, false);
224 			break;
225 		}
226 
227 		case BC_LEX_REG_STACK_LEVEL:
228 		{
229 			dc_parse_mem(p, BC_INST_REG_STACK_LEN, true, false);
230 			break;
231 		}
232 
233 		case BC_LEX_STORE_IBASE:
234 		case BC_LEX_STORE_OBASE:
235 		case BC_LEX_STORE_SCALE:
236 #if BC_ENABLE_EXTRA_MATH
237 		case BC_LEX_STORE_SEED:
238 #endif // BC_ENABLE_EXTRA_MATH
239 		{
240 			inst = (uchar) (t - BC_LEX_STORE_IBASE + BC_INST_IBASE);
241 			dc_parse_mem(p, inst, false, true);
242 			break;
243 		}
244 
245 		case BC_LEX_ARRAY_LENGTH:
246 		{
247 			// Need to push the array first, based on how length is implemented.
248 			bc_parse_push(p, BC_INST_ARRAY);
249 			dc_parse_register(p, false);
250 
251 			bc_parse_push(p, BC_INST_LENGTH);
252 
253 			get_token = true;
254 
255 			break;
256 		}
257 
258 		default:
259 		{
260 			// All other tokens should be taken care of by the caller, or they
261 			// actually *are* invalid.
262 			bc_parse_err(p, BC_ERR_PARSE_TOKEN);
263 		}
264 	}
265 
266 	if (get_token) bc_lex_next(&p->l);
267 }
268 
269 void
270 dc_parse_expr(BcParse* p, uint8_t flags)
271 {
272 	BcInst inst;
273 	BcLexType t;
274 	bool need_expr, have_expr = false;
275 
276 	need_expr = ((flags & BC_PARSE_NOREAD) != 0);
277 
278 	// dc can just keep parsing forever basically, unlike bc, which has to have
279 	// a whole bunch of complicated nonsense because its language was horribly
280 	// designed.
281 
282 	// While we don't have EOF...
283 	while ((t = p->l.t) != BC_LEX_EOF)
284 	{
285 		// Eat newline.
286 		if (t == BC_LEX_NLINE)
287 		{
288 			bc_lex_next(&p->l);
289 			continue;
290 		}
291 
292 		// Get the instruction that corresponds to the token.
293 		inst = dc_parse_insts[t];
294 
295 		// If the instruction is invalid, that means we have to do some harder
296 		// parsing. So if not invalid, just push the instruction; otherwise,
297 		// parse the token.
298 		if (inst != BC_INST_INVALID)
299 		{
300 			bc_parse_push(p, inst);
301 			bc_lex_next(&p->l);
302 		}
303 		else dc_parse_token(p, t, flags);
304 
305 		have_expr = true;
306 	}
307 
308 	// If we don't have an expression and need one, barf. Otherwise, just push a
309 	// BC_INST_POP_EXEC if we have EOF and BC_PARSE_NOCALL, which dc uses to
310 	// indicate that it is executing a string.
311 	if (BC_ERR(need_expr && !have_expr)) bc_err(BC_ERR_EXEC_READ_EXPR);
312 	else if (p->l.t == BC_LEX_EOF && (flags & BC_PARSE_NOCALL))
313 	{
314 		bc_parse_push(p, BC_INST_POP_EXEC);
315 	}
316 }
317 
318 void
319 dc_parse_parse(BcParse* p)
320 {
321 	assert(p != NULL);
322 
323 	BC_SETJMP_LOCKED(exit);
324 
325 	// If we have EOF, someone called this function one too many times.
326 	// Otherwise, parse.
327 	if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF);
328 	else dc_parse_expr(p, 0);
329 
330 exit:
331 
332 	// Need to reset if there was an error.
333 	if (BC_SIG_EXC) bc_parse_reset(p);
334 
335 	BC_LONGJMP_CONT;
336 	BC_SIG_MAYLOCK;
337 }
338 #endif // DC_ENABLED
339