xref: /freebsd/contrib/bc/include/lang.h (revision c03c5b1c)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Definitions for program data.
33  *
34  */
35 
36 #ifndef BC_LANG_H
37 #define BC_LANG_H
38 
39 #include <stdbool.h>
40 #if BC_C11
41 #include <assert.h>
42 #endif // BC_C11
43 
44 #if BC_C11
45 #include <assert.h>
46 #endif // BC_C11
47 
48 #include <status.h>
49 #include <vector.h>
50 #include <num.h>
51 
52 /// The instructions for bytecode.
53 typedef enum BcInst {
54 
55 #if BC_ENABLED
56 
57 	/// Postfix increment and decrement. Prefix are translated into
58 	/// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS.
59 	BC_INST_INC = 0,
60 	BC_INST_DEC,
61 #endif // BC_ENABLED
62 
63 	/// Unary negation.
64 	BC_INST_NEG,
65 
66 	/// Boolean not.
67 	BC_INST_BOOL_NOT,
68 #if BC_ENABLE_EXTRA_MATH
69 	/// Truncation operator.
70 	BC_INST_TRUNC,
71 #endif // BC_ENABLE_EXTRA_MATH
72 
73 	/// These should be self-explanatory.
74 	BC_INST_POWER,
75 	BC_INST_MULTIPLY,
76 	BC_INST_DIVIDE,
77 	BC_INST_MODULUS,
78 	BC_INST_PLUS,
79 	BC_INST_MINUS,
80 
81 #if BC_ENABLE_EXTRA_MATH
82 
83 	/// Places operator.
84 	BC_INST_PLACES,
85 
86 	/// Shift operators.
87 	BC_INST_LSHIFT,
88 	BC_INST_RSHIFT,
89 #endif // BC_ENABLE_EXTRA_MATH
90 
91 	/// Comparison operators.
92 	BC_INST_REL_EQ,
93 	BC_INST_REL_LE,
94 	BC_INST_REL_GE,
95 	BC_INST_REL_NE,
96 	BC_INST_REL_LT,
97 	BC_INST_REL_GT,
98 
99 	/// Boolean or and and.
100 	BC_INST_BOOL_OR,
101 	BC_INST_BOOL_AND,
102 
103 #if BC_ENABLED
104 	/// Same as the normal operators, but assigment. So ^=, *=, /=, etc.
105 	BC_INST_ASSIGN_POWER,
106 	BC_INST_ASSIGN_MULTIPLY,
107 	BC_INST_ASSIGN_DIVIDE,
108 	BC_INST_ASSIGN_MODULUS,
109 	BC_INST_ASSIGN_PLUS,
110 	BC_INST_ASSIGN_MINUS,
111 #if BC_ENABLE_EXTRA_MATH
112 	/// Places and shift assignment operators.
113 	BC_INST_ASSIGN_PLACES,
114 	BC_INST_ASSIGN_LSHIFT,
115 	BC_INST_ASSIGN_RSHIFT,
116 #endif // BC_ENABLE_EXTRA_MATH
117 
118 	/// Normal assignment.
119 	BC_INST_ASSIGN,
120 
121 	/// bc and dc detect when the value from an assignment is not necessary.
122 	/// For example, a plain assignment statement means the value is never used.
123 	/// In those cases, we can get lots of performance back by not even creating
124 	/// a copy at all. In fact, it saves a copy, a push onto the results stack,
125 	/// a pop from the results stack, and a free. Definitely worth it to detect.
126 	BC_INST_ASSIGN_POWER_NO_VAL,
127 	BC_INST_ASSIGN_MULTIPLY_NO_VAL,
128 	BC_INST_ASSIGN_DIVIDE_NO_VAL,
129 	BC_INST_ASSIGN_MODULUS_NO_VAL,
130 	BC_INST_ASSIGN_PLUS_NO_VAL,
131 	BC_INST_ASSIGN_MINUS_NO_VAL,
132 #if BC_ENABLE_EXTRA_MATH
133 	/// Same as above.
134 	BC_INST_ASSIGN_PLACES_NO_VAL,
135 	BC_INST_ASSIGN_LSHIFT_NO_VAL,
136 	BC_INST_ASSIGN_RSHIFT_NO_VAL,
137 #endif // BC_ENABLE_EXTRA_MATH
138 #endif // BC_ENABLED
139 
140 	/// Normal assignment that pushes no value on the stack.
141 	BC_INST_ASSIGN_NO_VAL,
142 
143 	/// Push a constant onto the results stack.
144 	BC_INST_NUM,
145 
146 	/// Push a variable onto the results stack.
147 	BC_INST_VAR,
148 
149 	/// Push an array element onto the results stack.
150 	BC_INST_ARRAY_ELEM,
151 
152 	/// Push an array onto the results stack. This is different from pushing an
153 	/// array *element* onto the results stack; it pushes a reference to the
154 	/// whole array. This is needed in bc for function arguments that are
155 	/// arrays. It is also needed for returning the length of an array.
156 	BC_INST_ARRAY,
157 
158 	/// Push a zero or a one onto the stack. These are special cased because it
159 	/// does help performance, particularly for one since inc/dec operators
160 	/// use it.
161 	BC_INST_ZERO,
162 	BC_INST_ONE,
163 
164 #if BC_ENABLED
165 	/// Push the last printed value onto the stack.
166 	BC_INST_LAST,
167 #endif // BC_ENABLED
168 
169 	/// Push the value of any of the globals onto the stack.
170 	BC_INST_IBASE,
171 	BC_INST_OBASE,
172 	BC_INST_SCALE,
173 
174 #if BC_ENABLE_EXTRA_MATH
175 	/// Push the value of the seed global onto the stack.
176 	BC_INST_SEED,
177 #endif // BC_ENABLE_EXTRA_MATH
178 
179 	/// These are builtin functions.
180 	BC_INST_LENGTH,
181 	BC_INST_SCALE_FUNC,
182 	BC_INST_SQRT,
183 	BC_INST_ABS,
184 
185 #if BC_ENABLE_EXTRA_MATH
186 	/// Another builtin function.
187 	BC_INST_IRAND,
188 #endif // BC_ENABLE_EXTRA_MATH
189 
190 	/// Asciify.
191 	BC_INST_ASCIIFY,
192 
193 	/// Another builtin function.
194 	BC_INST_READ,
195 
196 #if BC_ENABLE_EXTRA_MATH
197 	/// Another builtin function.
198 	BC_INST_RAND,
199 #endif // BC_ENABLE_EXTRA_MATH
200 
201 	/// Return the max for the various globals.
202 	BC_INST_MAXIBASE,
203 	BC_INST_MAXOBASE,
204 	BC_INST_MAXSCALE,
205 #if BC_ENABLE_EXTRA_MATH
206 	/// Return the max value returned by rand().
207 	BC_INST_MAXRAND,
208 #endif // BC_ENABLE_EXTRA_MATH
209 
210 	/// bc line_length() builtin function.
211 	BC_INST_LINE_LENGTH,
212 
213 #if BC_ENABLED
214 
215 	/// bc global_stacks() builtin function.
216 	BC_INST_GLOBAL_STACKS,
217 
218 #endif // BC_ENABLED
219 
220 	/// bc leading_zero() builtin function.
221 	BC_INST_LEADING_ZERO,
222 
223 	/// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc.
224 	/// dc uses this instruction to print, but not pop. That's valid in dc.
225 	/// However, in bc, it is *never* valid to print without popping. In bc,
226 	/// BC_INST_PRINT_POP is used to indicate when a string should be printed
227 	/// because of a print statement or whether it should be printed raw. The
228 	/// reason for this is because a print statement handles escaped characters.
229 	/// So BC_INST_PRINT_POP is for printing a string from a print statement,
230 	/// BC_INST_PRINT_STR is for printing a string by itself.
231 	///
232 	/// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints.
233 	///
234 	/// Oh, and BC_INST_STR pushes a string onto the results stack.
235 	BC_INST_PRINT,
236 	BC_INST_PRINT_POP,
237 	BC_INST_STR,
238 #if BC_ENABLED
239 	BC_INST_PRINT_STR,
240 
241 	/// Jumps unconditionally.
242 	BC_INST_JUMP,
243 
244 	/// Jumps if the top of the results stack is zero (condition failed). It
245 	/// turns out that we only want to jump when conditions fail to "skip" code.
246 	BC_INST_JUMP_ZERO,
247 
248 	/// Call a function.
249 	BC_INST_CALL,
250 
251 	/// Return the top of the stack to the caller.
252 	BC_INST_RET,
253 
254 	/// Return 0 to the caller.
255 	BC_INST_RET0,
256 
257 	/// Special return instruction for void functions.
258 	BC_INST_RET_VOID,
259 
260 	/// Special halt instruction.
261 	BC_INST_HALT,
262 #endif // BC_ENABLED
263 
264 	/// Pop an item off of the results stack.
265 	BC_INST_POP,
266 
267 	/// Swaps the top two items on the results stack.
268 	BC_INST_SWAP,
269 
270 	/// Modular exponentiation.
271 	BC_INST_MODEXP,
272 
273 	/// Do divide and modulus at the same time.
274 	BC_INST_DIVMOD,
275 
276 	/// Turns a number into a string and prints it.
277 	BC_INST_PRINT_STREAM,
278 
279 #if DC_ENABLED
280 
281 	/// dc's return; it pops an executing string off of the stack.
282 	BC_INST_POP_EXEC,
283 
284 	/// Unconditionally execute a string.
285 	BC_INST_EXECUTE,
286 
287 	/// Conditionally execute a string.
288 	BC_INST_EXEC_COND,
289 
290 	/// Prints each item on the results stack, separated by newlines.
291 	BC_INST_PRINT_STACK,
292 
293 	/// Pops everything off of the results stack.
294 	BC_INST_CLEAR_STACK,
295 
296 	/// Pushes the current length of a register stack onto the results stack.
297 	BC_INST_REG_STACK_LEN,
298 
299 	/// Pushes the current length of the results stack onto the results stack.
300 	BC_INST_STACK_LEN,
301 
302 	/// Pushes a copy of the item on the top of the results stack onto the
303 	/// results stack.
304 	BC_INST_DUPLICATE,
305 
306 	/// Copies the value in a register and pushes the copy onto the results
307 	/// stack.
308 	BC_INST_LOAD,
309 
310 	/// Pops an item off of a register stack and pushes it onto the results
311 	/// stack.
312 	BC_INST_PUSH_VAR,
313 
314 	/// Pops an item off of the results stack and pushes it onto a register's
315 	/// stack.
316 	BC_INST_PUSH_TO_VAR,
317 
318 	/// Quit.
319 	BC_INST_QUIT,
320 
321 	/// Quit executing some number of strings.
322 	BC_INST_NQUIT,
323 
324 	/// Push the depth of the execution stack onto the stack.
325 	BC_INST_EXEC_STACK_LEN,
326 
327 #endif // DC_ENABLED
328 
329 	/// Invalid instruction.
330 	BC_INST_INVALID,
331 
332 } BcInst;
333 
334 #if BC_C11
335 static_assert(BC_INST_INVALID <= UCHAR_MAX,
336               "Too many instructions to fit into an unsigned char");
337 #endif // BC_C11
338 
339 /// Used by maps to identify where items are in the array.
340 typedef struct BcId {
341 
342 	/// The name of the item.
343 	char *name;
344 
345 	/// The index into the array where the item is.
346 	size_t idx;
347 
348 } BcId;
349 
350 /// The location of a var, array, or array element.
351 typedef struct BcLoc {
352 
353 	/// The index of the var or array.
354 	size_t loc;
355 
356 	/// The index of the array element. Only used for array elements.
357 	size_t idx;
358 
359 } BcLoc;
360 
361 /// An entry for a constant.
362 typedef struct BcConst {
363 
364 	/// The original string as parsed from the source code.
365 	char *val;
366 
367 	/// The last base that the constant was parsed in.
368 	BcBigDig base;
369 
370 	/// The parsed constant.
371 	BcNum num;
372 
373 } BcConst;
374 
375 /// A function. This is also used in dc, not just bc. The reason is that strings
376 /// are executed in dc, and they are converted to functions in order to be
377 /// executed.
378 typedef struct BcFunc {
379 
380 	/// The bytecode instructions.
381 	BcVec code;
382 
383 #if BC_ENABLED
384 
385 	/// The labels. This is a vector of indices. The index is the index into
386 	/// the bytecode vector where the label is.
387 	BcVec labels;
388 
389 	/// The autos for the function. The first items are the parameters, and the
390 	/// arguments to the parameters must match the types in this vector.
391 	BcVec autos;
392 
393 	/// The number of parameters the function takes.
394 	size_t nparams;
395 
396 #endif // BC_ENABLED
397 
398 	/// The strings encountered in the function.
399 	BcVec strs;
400 
401 	/// The constants encountered in the function.
402 	BcVec consts;
403 
404 	/// The function's name.
405 	const char *name;
406 
407 #if BC_ENABLED
408 	/// True if the function is a void function.
409 	bool voidfn;
410 #endif // BC_ENABLED
411 
412 } BcFunc;
413 
414 /// Types of results that can be pushed onto the results stack.
415 typedef enum BcResultType {
416 
417 	/// Result is a variable.
418 	BC_RESULT_VAR,
419 
420 	/// Result is an array element.
421 	BC_RESULT_ARRAY_ELEM,
422 
423 	/// Result is an array. This is only allowed for function arguments or
424 	/// returning the length of the array.
425 	BC_RESULT_ARRAY,
426 
427 	/// Result is a string.
428 	BC_RESULT_STR,
429 
430 	/// Result is a temporary. This is used for the result of almost all
431 	/// expressions.
432 	BC_RESULT_TEMP,
433 
434 	/// Special casing the two below gave performance improvements.
435 
436 	/// Result is a 0.
437 	BC_RESULT_ZERO,
438 
439 	/// Result is a 1. Useful for inc/dec operators.
440 	BC_RESULT_ONE,
441 
442 #if BC_ENABLED
443 
444 	/// Result is the special "last" variable.
445 	BC_RESULT_LAST,
446 
447 	/// Result is the return value of a void function.
448 	BC_RESULT_VOID,
449 #endif // BC_ENABLED
450 
451 	/// Result is the value of ibase.
452 	BC_RESULT_IBASE,
453 
454 	/// Result is the value of obase.
455 	BC_RESULT_OBASE,
456 
457 	/// Result is the value of scale.
458 	BC_RESULT_SCALE,
459 
460 #if BC_ENABLE_EXTRA_MATH
461 
462 	/// Result is the value of seed.
463 	BC_RESULT_SEED,
464 
465 #endif // BC_ENABLE_EXTRA_MATH
466 
467 } BcResultType;
468 
469 /// A union to store data for various result types.
470 typedef union BcResultData {
471 
472 	/// A number. Strings are stored here too; they are numbers with
473 	/// cap == 0 && num == NULL. The string's index into the strings vector is
474 	/// stored in the scale field. But this is only used for strings stored in
475 	/// variables.
476 	BcNum n;
477 
478 	/// A vector.
479 	BcVec v;
480 
481 	/// A variable, array, or array element reference. This could also be a
482 	/// string if a string is not stored in a variable (dc only).
483 	BcLoc loc;
484 
485 } BcResultData;
486 
487 /// A tagged union for results.
488 typedef struct BcResult {
489 
490 	/// The tag. The type of the result.
491 	BcResultType t;
492 
493 	/// The data. The data for the result.
494 	BcResultData d;
495 
496 } BcResult;
497 
498 /// An instruction pointer. This is how bc knows where in the bytecode vector,
499 /// and which function, the current execution is.
500 typedef struct BcInstPtr {
501 
502 	/// The index of the currently executing function in the fns vector.
503 	size_t func;
504 
505 	/// The index into the bytecode vector of the *next* instruction.
506 	size_t idx;
507 
508 	/// The length of the results vector when this function started executing.
509 	/// This is mostly used for bc where functions should not affect the results
510 	/// of their callers.
511 	size_t len;
512 
513 } BcInstPtr;
514 
515 /// Types of identifiers.
516 typedef enum BcType {
517 
518 	/// Variable.
519 	BC_TYPE_VAR,
520 
521 	/// Array.
522 	BC_TYPE_ARRAY,
523 
524 #if BC_ENABLED
525 
526 	/// Array reference.
527 	BC_TYPE_REF,
528 
529 #endif // BC_ENABLED
530 
531 } BcType;
532 
533 #if BC_ENABLED
534 /// An auto variable in bc.
535 typedef struct BcAuto {
536 
537 	/// The index of the variable in the vars or arrs vectors.
538 	size_t idx;
539 
540 	/// The type of the variable.
541 	BcType type;
542 
543 } BcAuto;
544 #endif // BC_ENABLED
545 
546 /// Forward declaration.
547 struct BcProgram;
548 
549 /**
550  * Initializes a function.
551  * @param f     The function to initialize.
552  * @param name  The name of the function. The string is assumed to be owned by
553  *              some other entity.
554  */
555 void bc_func_init(BcFunc *f, const char* name);
556 
557 /**
558  * Inserts an auto into the function.
559  * @param f     The function to insert into.
560  * @param p     The program. This is to search for the variable or array name.
561  * @param name  The name of the auto to insert.
562  * @param type  The type of the auto.
563  * @param line  The line in the source code where the insert happened. This is
564  *              solely for error reporting.
565  */
566 void bc_func_insert(BcFunc *f, struct BcProgram* p, char* name,
567                     BcType type, size_t line);
568 
569 /**
570  * Resets a function in preparation for it to be reused. This can happen in bc
571  * because it is a dynamic language and functions can be redefined.
572  * @param f  The functio to reset.
573  */
574 void bc_func_reset(BcFunc *f);
575 
576 #ifndef NDEBUG
577 /**
578  * Frees a function. This is a destructor. This is only used in debug builds
579  * because all functions are freed at exit. We free them in debug builds to
580  * check for memory leaks.
581  * @param func  The function to free as a void pointer.
582  */
583 void bc_func_free(void *func);
584 #endif // NDEBUG
585 
586 /**
587  * Initializes an array, which is the array type in bc and dc source code. Since
588  * variables and arrays are both arrays (see the development manual,
589  * manuals/development.md#execution, for more information), the @a nums
590  * parameter tells bc whether to initialize an array of numbers or an array of
591  * arrays of numbers. If the latter, it does a recursive call with nums set to
592  * true.
593  * @param a     The array to initialize.
594  * @param nums  True if the array should be for numbers, false if it should be
595  *              for vectors.
596  */
597 void bc_array_init(BcVec *a, bool nums);
598 
599 /**
600  * Copies an array to another array. This is used to do pass arrays to functions
601  * that do not take references to arrays. The arrays are passed entirely by
602  * value, which means that they need to be copied.
603  * @param d  The destination array.
604  * @param s  The source array.
605  */
606 void bc_array_copy(BcVec *d, const BcVec *s);
607 
608 /**
609  * Frees a string stored in a function. This is a destructor.
610  * @param string  The string to free as a void pointer.
611  */
612 void bc_string_free(void *string);
613 
614 /**
615  * Frees a constant stored in a function. This is a destructor.
616  * @param constant  The constant to free as a void pointer.
617  */
618 void bc_const_free(void *constant);
619 
620 /**
621  * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by
622  * clearing the BcNum in the union. This is to ensure that bc does not use
623  * uninitialized data.
624  * @param r  The result to clear.
625  */
626 void bc_result_clear(BcResult *r);
627 
628 /**
629  * Copies a result into another. This is done for things like duplicating the
630  * top of the results stack or copying the result of an assignment to put back
631  * on the results stack.
632  * @param d    The destination result.
633  * @param src  The source result.
634  */
635 void bc_result_copy(BcResult *d, BcResult *src);
636 
637 /**
638  * Frees a result. This is a destructor.
639  * @param result  The result to free as a void pointer.
640  */
641 void bc_result_free(void *result);
642 
643 /**
644  * Expands an array to @a len. This can happen because in bc, you do not have to
645  * explicitly initialize elements of an array. If you access an element that is
646  * not initialized, the array is expanded to fit it, and all missing elements
647  * are initialized to 0 if they are numbers, or arrays with one element of 0.
648  * This function does that expansion.
649  * @param a    The array to expand.
650  * @param len  The length to expand to.
651  */
652 void bc_array_expand(BcVec *a, size_t len);
653 
654 /**
655  * Compare two BcId's and return the result. Since they are just comparing the
656  * names in the BcId, I return the result from strcmp() exactly. This is used by
657  * maps in their binary search.
658  * @param e1  The first id.
659  * @param e2  The second id.
660  * @return    The result of strcmp() on the BcId's names.
661  */
662 int bc_id_cmp(const BcId *e1, const BcId *e2);
663 
664 #if BC_ENABLED
665 
666 /**
667  * Returns non-zero if the bytecode instruction i is an assignment instruction.
668  * @param i  The instruction to test.
669  * @return   Non-zero if i is an assignment instruction, zero otherwise.
670  */
671 #define BC_INST_IS_ASSIGN(i) \
672 	((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL)
673 
674 /**
675  * Returns true if the bytecode instruction @a i requires the value to be
676  * returned for use.
677  * @param i  The instruction to test.
678  * @return   True if @a i requires the value to be returned for use, false
679  *           otherwise.
680  */
681 #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN)
682 
683 #else // BC_ENABLED
684 
685 /**
686  * Returns non-zero if the bytecode instruction i is an assignment instruction.
687  * @param i  The instruction to test.
688  * @return   Non-zero if i is an assignment instruction, zero otherwise.
689  */
690 #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL)
691 
692 /**
693  * Returns true if the bytecode instruction @a i requires the value to be
694  * returned for use.
695  * @param i  The instruction to test.
696  * @return   True if @a i requires the value to be returned for use, false
697  *           otherwise.
698  */
699 #define BC_INST_USE_VAL(i) (false)
700 
701 #endif // BC_ENABLED
702 
703 #if BC_DEBUG_CODE
704 /// Reference to string names for all of the instructions. For debugging.
705 extern const char* bc_inst_names[];
706 #endif // BC_DEBUG_CODE
707 
708 /// References to the names of the main and read functions.
709 extern const char bc_func_main[];
710 extern const char bc_func_read[];
711 
712 #endif // BC_LANG_H
713