1 /*
2 * eval.c - gawk bytecode interpreter
3 */
4
5 /*
6 * Copyright (C) 1986, 1988, 1989, 1991-2019, 2021,
7 * the Free Software Foundation, Inc.
8 *
9 * This file is part of GAWK, the GNU implementation of the
10 * AWK Programming Language.
11 *
12 * GAWK is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 3 of the License, or
15 * (at your option) any later version.
16 *
17 * GAWK is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 */
26
27 #include "awk.h"
28
29 extern double pow(double x, double y);
30 extern double modf(double x, double *yp);
31 extern double fmod(double x, double y);
32 NODE **fcall_list = NULL;
33 long fcall_count = 0;
34 int currule = 0;
35 IOBUF *curfile = NULL; /* current data file */
36 bool exiting = false;
37
38 int (*interpret)(INSTRUCTION *);
39 #define MAX_EXEC_HOOKS 10
40 static int num_exec_hook = 0;
41 static Func_pre_exec pre_execute[MAX_EXEC_HOOKS];
42 static Func_post_exec post_execute = NULL;
43
44 extern void frame_popped();
45
46 int OFSlen;
47 int ORSlen;
48 int OFMTidx;
49 int CONVFMTidx;
50
51 static NODE *node_Boolean[2];
52
53 /* This rather ugly macro is for VMS C */
54 #ifdef C
55 #undef C
56 #endif
57 #define C(c) ((char)c)
58 /*
59 * This table is used by the regexp routines to do case independent
60 * matching. Basically, every ascii character maps to itself, except
61 * uppercase letters map to lower case ones. This table has 256
62 * entries, for ISO 8859-1. Note also that if the system this
63 * is compiled on doesn't use 7-bit ascii, casetable[] should not be
64 * defined to the linker, so gawk should not load.
65 *
66 * Do NOT make this array static, it is used in several spots, not
67 * just in this file.
68 *
69 * 6/2004:
70 * This table is also used for IGNORECASE for == and !=, and index().
71 * Although with GLIBC, we could use tolower() everywhere and RE_ICASE
72 * for the regex matcher, precomputing this table once gives us a
73 * performance improvement. I also think it's better for portability
74 * to non-GLIBC systems. All the world is not (yet :-) GNU/Linux.
75 */
76 #if 'a' == 97 /* it's ascii */
77 char casetable[] = {
78 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
79 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
80 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
81 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
82 /* ' ' '!' '"' '#' '$' '%' '&' ''' */
83 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
84 /* '(' ')' '*' '+' ',' '-' '.' '/' */
85 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
86 /* '0' '1' '2' '3' '4' '5' '6' '7' */
87 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
88 /* '8' '9' ':' ';' '<' '=' '>' '?' */
89 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
90 /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
91 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
92 /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
93 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
94 /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
95 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
96 /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
97 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
98 /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
99 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
100 /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
101 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
102 /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
103 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
104 /* 'x' 'y' 'z' '{' '|' '}' '~' */
105 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
106
107 /* Latin 1: */
108 /*
109 * 4/2019: This is now overridden; in single byte locales
110 * we call load_casetable from main and it fills in the values
111 * based on the current locale. In particular, we want LC_ALL=C
112 * to work correctly for values >= 0200.
113 */
114 C('\200'), C('\201'), C('\202'), C('\203'), C('\204'), C('\205'), C('\206'), C('\207'),
115 C('\210'), C('\211'), C('\212'), C('\213'), C('\214'), C('\215'), C('\216'), C('\217'),
116 C('\220'), C('\221'), C('\222'), C('\223'), C('\224'), C('\225'), C('\226'), C('\227'),
117 C('\230'), C('\231'), C('\232'), C('\233'), C('\234'), C('\235'), C('\236'), C('\237'),
118 C('\240'), C('\241'), C('\242'), C('\243'), C('\244'), C('\245'), C('\246'), C('\247'),
119 C('\250'), C('\251'), C('\252'), C('\253'), C('\254'), C('\255'), C('\256'), C('\257'),
120 C('\260'), C('\261'), C('\262'), C('\263'), C('\264'), C('\265'), C('\266'), C('\267'),
121 C('\270'), C('\271'), C('\272'), C('\273'), C('\274'), C('\275'), C('\276'), C('\277'),
122 C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'),
123 C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'),
124 C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\327'),
125 C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\337'),
126 C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'),
127 C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'),
128 C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\367'),
129 C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\377'),
130 };
131 #elif defined(USE_EBCDIC)
132 char casetable[] = {
133 /*00 NU SH SX EX PF HT LC DL */
134 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
135 /*08 SM VT FF CR SO SI */
136 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
137 /*10 DE D1 D2 TM RS NL BS IL */
138 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
139 /*18 CN EM CC C1 FS GS RS US */
140 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
141 /*20 DS SS FS BP LF EB EC */
142 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
143 /*28 SM C2 EQ AK BL */
144 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
145 /*30 SY PN RS UC ET */
146 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
147 /*38 C3 D4 NK SU */
148 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
149 /*40 SP */
150 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
151 /*48 CENT . < ( + | */
152 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
153 /*50 & */
154 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
155 /*58 ! $ * ) ; ^ */
156 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
157 /*60 - / */
158 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
159 /*68 | , % _ > ? */
160 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
161 /*70 */
162 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
163 /*78 ` : # @ ' = " */
164 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
165 /*80 a b c d e f g */
166 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
167 /*88 h i { */
168 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
169 /*90 j k l m n o p */
170 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
171 /*98 q r } */
172 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
173 /*A0 ~ s t u v w x */
174 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
175 /*A8 y z [ */
176 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
177 /*B0 */
178 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
179 /*B8 ] */
180 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
181 /*C0 { A B C D E F G */
182 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
183 /*C8 H I */
184 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
185 /*D0 } J K L M N O P */
186 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
187 /*D8 Q R */
188 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
189 /*E0 \ S T U V W X */
190 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
191 /*E8 Y Z */
192 0xA8, 0xA9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
193 /*F0 0 1 2 3 4 5 6 7 */
194 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
195 /*F8 8 9 */
196 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
197 };
198 #else
199 #include "You lose. You will need a translation table for your character set."
200 #endif
201
202 #undef C
203
204 /* load_casetable --- for a non-ASCII locale, redo the table */
205
206 void
load_casetable(void)207 load_casetable(void)
208 {
209 #if defined(LC_CTYPE)
210 int i;
211 static bool loaded = false;
212
213 if (loaded || do_traditional)
214 return;
215
216 loaded = true;
217
218 #ifndef USE_EBCDIC
219 /* use of isalpha is ok here (see is_alpha in awkgram.y) */
220 for (i = 0200; i <= 0377; i++) {
221 if (isalpha(i) && islower(i) && i != toupper(i))
222 casetable[i] = toupper(i);
223 else
224 casetable[i] = i;
225 }
226 #endif
227 #endif
228 }
229
230 /*
231 * This table maps node types to strings for debugging.
232 * KEEP IN SYNC WITH awk.h!!!!
233 */
234
235 static const char *const nodetypes[] = {
236 "Node_illegal",
237 "Node_val",
238 "Node_regex",
239 "Node_dynregex",
240 "Node_var",
241 "Node_var_array",
242 "Node_var_new",
243 "Node_param_list",
244 "Node_func",
245 "Node_ext_func",
246 "Node_builtin_func",
247 "Node_array_ref",
248 "Node_array_tree",
249 "Node_array_leaf",
250 "Node_dump_array",
251 "Node_arrayfor",
252 "Node_frame",
253 "Node_instruction",
254 "Node_final --- this should never appear",
255 NULL
256 };
257
258
259 /*
260 * This table maps Op codes to strings.
261 * KEEP IN SYNC WITH awk.h!!!!
262 */
263
264 static struct optypetab {
265 char *desc;
266 char *operator;
267 } optypes[] = {
268 { "Op_illegal", NULL },
269 { "Op_times", " * " },
270 { "Op_times_i", " * " },
271 { "Op_quotient", " / " },
272 { "Op_quotient_i", " / " },
273 { "Op_mod", " % " },
274 { "Op_mod_i", " % " },
275 { "Op_plus", " + " },
276 { "Op_plus_i", " + " },
277 { "Op_minus", " - " },
278 { "Op_minus_i", " - " },
279 { "Op_exp", " ^ " },
280 { "Op_exp_i", " ^ " },
281 { "Op_concat", " " },
282 { "Op_line_range", NULL },
283 { "Op_cond_pair", ", " },
284 { "Op_subscript", "[]" },
285 { "Op_sub_array", "[]" },
286 { "Op_preincrement", "++" },
287 { "Op_predecrement", "--" },
288 { "Op_postincrement", "++" },
289 { "Op_postdecrement", "--" },
290 { "Op_unary_minus", "-" },
291 { "Op_unary_plus", "+" },
292 { "Op_field_spec", "$" },
293 { "Op_not", "! " },
294 { "Op_assign", " = " },
295 { "Op_store_var", " = " },
296 { "Op_store_sub", " = " },
297 { "Op_store_field", " = " },
298 { "Op_assign_times", " *= " },
299 { "Op_assign_quotient", " /= " },
300 { "Op_assign_mod", " %= " },
301 { "Op_assign_plus", " += " },
302 { "Op_assign_minus", " -= " },
303 { "Op_assign_exp", " ^= " },
304 { "Op_assign_concat", " " },
305 { "Op_and", " && " },
306 { "Op_and_final", NULL },
307 { "Op_or", " || " },
308 { "Op_or_final", NULL },
309 { "Op_equal", " == " },
310 { "Op_notequal", " != " },
311 { "Op_less", " < " },
312 { "Op_greater", " > " },
313 { "Op_leq", " <= " },
314 { "Op_geq", " >= " },
315 { "Op_match", " ~ " },
316 { "Op_match_rec", NULL },
317 { "Op_nomatch", " !~ " },
318 { "Op_rule", NULL },
319 { "Op_K_case", "case" },
320 { "Op_K_default", "default" },
321 { "Op_K_break", "break" },
322 { "Op_K_continue", "continue" },
323 { "Op_K_print", "print" },
324 { "Op_K_print_rec", "print" },
325 { "Op_K_printf", "printf" },
326 { "Op_K_next", "next" },
327 { "Op_K_exit", "exit" },
328 { "Op_K_return", "return" },
329 { "Op_K_return_from_eval", "return" },
330 { "Op_K_delete", "delete" },
331 { "Op_K_delete_loop", NULL },
332 { "Op_K_getline_redir", "getline" },
333 { "Op_K_getline", "getline" },
334 { "Op_K_nextfile", "nextfile" },
335 { "Op_K_namespace", "@namespace" },
336 { "Op_builtin", NULL },
337 { "Op_sub_builtin", NULL },
338 { "Op_ext_builtin", NULL },
339 { "Op_in_array", " in " },
340 { "Op_func_call", NULL },
341 { "Op_indirect_func_call", NULL },
342 { "Op_push", NULL },
343 { "Op_push_arg", NULL },
344 { "Op_push_arg_untyped", NULL },
345 { "Op_push_i", NULL },
346 { "Op_push_re", NULL },
347 { "Op_push_array", NULL },
348 { "Op_push_param", NULL },
349 { "Op_push_lhs", NULL },
350 { "Op_subscript_lhs", "[]" },
351 { "Op_field_spec_lhs", "$" },
352 { "Op_no_op", NULL },
353 { "Op_pop", NULL },
354 { "Op_jmp", NULL },
355 { "Op_jmp_true", NULL },
356 { "Op_jmp_false", NULL },
357 { "Op_get_record", NULL },
358 { "Op_newfile", NULL },
359 { "Op_arrayfor_init", NULL },
360 { "Op_arrayfor_incr", NULL },
361 { "Op_arrayfor_final", NULL },
362 { "Op_var_update", NULL },
363 { "Op_var_assign", NULL },
364 { "Op_field_assign", NULL },
365 { "Op_subscript_assign", NULL },
366 { "Op_after_beginfile", NULL },
367 { "Op_after_endfile", NULL },
368 { "Op_func", NULL },
369 { "Op_comment", NULL },
370 { "Op_exec_count", NULL },
371 { "Op_breakpoint", NULL },
372 { "Op_lint", NULL },
373 { "Op_lint_plus", NULL },
374 { "Op_atexit", NULL },
375 { "Op_stop", NULL },
376 { "Op_token", NULL },
377 { "Op_symbol", NULL },
378 { "Op_list", NULL },
379 { "Op_K_do", "do" },
380 { "Op_K_for", "for" },
381 { "Op_K_arrayfor", "for" },
382 { "Op_K_while", "while" },
383 { "Op_K_switch", "switch" },
384 { "Op_K_if", "if" },
385 { "Op_K_else", "else" },
386 { "Op_K_function", "function" },
387 { "Op_cond_exp", NULL },
388 { "Op_parens", NULL },
389 { "Op_final --- this should never appear", NULL },
390 { NULL, NULL },
391 };
392
393 /* nodetype2str --- convert a node type into a printable value */
394
395 const char *
nodetype2str(NODETYPE type)396 nodetype2str(NODETYPE type)
397 {
398 static char buf[40];
399
400 if (type >= Node_illegal && type <= Node_final)
401 return nodetypes[(int) type];
402
403 sprintf(buf, _("unknown nodetype %d"), (int) type);
404 return buf;
405 }
406
407 /* opcode2str --- convert an opcode type into a printable value */
408
409 const char *
opcode2str(OPCODE op)410 opcode2str(OPCODE op)
411 {
412 if (op >= Op_illegal && op < Op_final)
413 return optypes[(int) op].desc;
414 fatal(_("unknown opcode %d"), (int) op);
415 return NULL;
416 }
417
418 /* op2str --- convert an opcode type to corresponding operator or keyword */
419
420 const char *
op2str(OPCODE op)421 op2str(OPCODE op)
422 {
423 if (op >= Op_illegal && op < Op_final) {
424 if (optypes[(int) op].operator != NULL)
425 return optypes[(int) op].operator;
426 else
427 fatal(_("opcode %s not an operator or keyword"),
428 optypes[(int) op].desc);
429 } else
430 fatal(_("unknown opcode %d"), (int) op);
431 return NULL;
432 }
433
434
435 /* flags2str --- make a flags value readable */
436
437 const char *
flags2str(int flagval)438 flags2str(int flagval)
439 {
440 static const struct flagtab values[] = {
441 { MALLOC, "MALLOC" },
442 { STRING, "STRING" },
443 { STRCUR, "STRCUR" },
444 { NUMCUR, "NUMCUR" },
445 { NUMBER, "NUMBER" },
446 { USER_INPUT, "USER_INPUT" },
447 { INTLSTR, "INTLSTR" },
448 { NUMINT, "NUMINT" },
449 { INTIND, "INTIND" },
450 { WSTRCUR, "WSTRCUR" },
451 { MPFN, "MPFN" },
452 { MPZN, "MPZN" },
453 { NO_EXT_SET, "NO_EXT_SET" },
454 { NULL_FIELD, "NULL_FIELD" },
455 { ARRAYMAXED, "ARRAYMAXED" },
456 { HALFHAT, "HALFHAT" },
457 { XARRAY, "XARRAY" },
458 { NUMCONSTSTR, "NUMCONSTSTR" },
459 { REGEX, "REGEX" },
460 { 0, NULL },
461 };
462
463 return genflags2str(flagval, values);
464 }
465
466 /* genflags2str --- general routine to convert a flag value to a string */
467
468 const char *
genflags2str(int flagval,const struct flagtab * tab)469 genflags2str(int flagval, const struct flagtab *tab)
470 {
471 static char buffer[BUFSIZ];
472 char *sp;
473 int i, space_left, space_needed;
474
475 sp = buffer;
476 space_left = BUFSIZ;
477 for (i = 0; tab[i].name != NULL; i++) {
478 if ((flagval & tab[i].val) != 0) {
479 /*
480 * note the trick, we want 1 or 0 for whether we need
481 * the '|' character.
482 */
483 space_needed = (strlen(tab[i].name) + (sp != buffer));
484 if (space_left <= space_needed)
485 fatal(_("buffer overflow in genflags2str"));
486
487 if (sp != buffer) {
488 *sp++ = '|';
489 space_left--;
490 }
491 strcpy(sp, tab[i].name);
492 /* note ordering! */
493 space_left -= strlen(sp);
494 sp += strlen(sp);
495 }
496 }
497
498 *sp = '\0';
499 return buffer;
500 }
501
502 /* posix_compare --- compare strings using strcoll */
503
504 static int
posix_compare(NODE * s1,NODE * s2)505 posix_compare(NODE *s1, NODE *s2)
506 {
507 int ret;
508
509 if (gawk_mb_cur_max == 1) {
510 char save1, save2;
511 const char *p1, *p2;
512
513 save1 = s1->stptr[s1->stlen];
514 s1->stptr[s1->stlen] = '\0';
515
516 save2 = s2->stptr[s2->stlen];
517 s2->stptr[s2->stlen] = '\0';
518
519 p1 = s1->stptr;
520 p2 = s2->stptr;
521
522 for (;;) {
523 size_t len;
524
525 ret = strcoll(p1, p2);
526 if (ret != 0)
527 break;
528
529 len = strlen(p1);
530 p1 += len + 1;
531 p2 += len + 1;
532
533 if (p1 == s1->stptr + s1->stlen + 1) {
534 if (p2 != s2->stptr + s2->stlen + 1)
535 ret = -1;
536 break;
537 }
538 if (p2 == s2->stptr + s2->stlen + 1) {
539 ret = 1;
540 break;
541 }
542 }
543
544 s1->stptr[s1->stlen] = save1;
545 s2->stptr[s2->stlen] = save2;
546 }
547 #if ! defined(__DJGPP__)
548 else {
549 /* Similar logic, using wide characters */
550 const wchar_t *p1, *p2;
551
552 (void) force_wstring(s1);
553 (void) force_wstring(s2);
554
555 p1 = s1->wstptr;
556 p2 = s2->wstptr;
557
558 for (;;) {
559 size_t len;
560
561 ret = wcscoll(p1, p2);
562 if (ret != 0)
563 break;
564
565 len = wcslen(p1);
566 p1 += len + 1;
567 p2 += len + 1;
568
569 if (p1 == s1->wstptr + s1->wstlen + 1) {
570 if (p2 != s2->wstptr + s2->wstlen + 1)
571 ret = -1;
572 break;
573 }
574 if (p2 == s2->wstptr + s2->wstlen + 1) {
575 ret = 1;
576 break;
577 }
578 }
579 }
580 #endif
581
582 return ret;
583 }
584
585
586 /* cmp_nodes --- compare two nodes, returning negative, 0, positive */
587
588 int
cmp_nodes(NODE * t1,NODE * t2,bool use_strcmp)589 cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp)
590 {
591 int ret = 0;
592 size_t len1, len2;
593 int l, ldiff;
594
595 if (t1 == t2)
596 return 0;
597
598 (void) fixtype(t1);
599 (void) fixtype(t2);
600
601 if ((t1->flags & NUMBER) != 0 && (t2->flags & NUMBER) != 0)
602 return cmp_numbers(t1, t2);
603
604 (void) force_string(t1);
605 (void) force_string(t2);
606 len1 = t1->stlen;
607 len2 = t2->stlen;
608 ldiff = len1 - len2;
609 if (len1 == 0 || len2 == 0)
610 return ldiff;
611
612 if (do_posix && ! use_strcmp)
613 return posix_compare(t1, t2);
614
615 l = (ldiff <= 0 ? len1 : len2);
616 if (IGNORECASE) {
617 const unsigned char *cp1 = (const unsigned char *) t1->stptr;
618 const unsigned char *cp2 = (const unsigned char *) t2->stptr;
619 char save1 = t1->stptr[t1->stlen];
620 char save2 = t2->stptr[t2->stlen];
621
622
623 if (gawk_mb_cur_max > 1) {
624 t1->stptr[t1->stlen] = t2->stptr[t2->stlen] = '\0';
625 ret = strncasecmpmbs((const unsigned char *) cp1,
626 (const unsigned char *) cp2, l);
627 t1->stptr[t1->stlen] = save1;
628 t2->stptr[t2->stlen] = save2;
629 } else {
630 /* Could use tolower() here; see discussion above. */
631 for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++)
632 ret = casetable[*cp1] - casetable[*cp2];
633 }
634 } else
635 ret = memcmp(t1->stptr, t2->stptr, l);
636
637 ret = ret == 0 ? ldiff : ret;
638 return ret;
639 }
640
641 /* push_frame --- push a frame NODE onto stack */
642
643 static void
push_frame(NODE * f)644 push_frame(NODE *f)
645 {
646 static long max_fcall;
647
648 /* NB: frame numbering scheme as in GDB. frame_ptr => frame #0. */
649
650 fcall_count++;
651 if (fcall_list == NULL) {
652 max_fcall = 10;
653 emalloc(fcall_list, NODE **, (max_fcall + 1) * sizeof(NODE *), "push_frame");
654 } else if (fcall_count == max_fcall) {
655 max_fcall *= 2;
656 erealloc(fcall_list, NODE **, (max_fcall + 1) * sizeof(NODE *), "push_frame");
657 }
658
659 if (fcall_count > 1)
660 memmove(fcall_list + 2, fcall_list + 1, (fcall_count - 1) * sizeof(NODE *));
661 fcall_list[1] = f;
662 }
663
664
665 /* pop_frame --- pop off a frame NODE*/
666
667 static void
pop_frame()668 pop_frame()
669 {
670 if (fcall_count > 1)
671 memmove(fcall_list + 1, fcall_list + 2, (fcall_count - 1) * sizeof(NODE *));
672 fcall_count--;
673 assert(fcall_count >= 0);
674 if (do_debug)
675 frame_popped();
676 }
677
678
679 /* dump_fcall_stack --- print a backtrace of the awk function calls */
680
681 void
dump_fcall_stack(FILE * fp)682 dump_fcall_stack(FILE *fp)
683 {
684 NODE *f, *func;
685 long i = 0, k = 0;
686
687 if (fcall_count == 0)
688 return;
689 fprintf(fp, _("\n\t# Function Call Stack:\n\n"));
690
691 /* current frame */
692 func = frame_ptr->func_node;
693 fprintf(fp, "\t# %3ld. %s\n", k++, func->vname);
694
695 /* outer frames except main */
696 for (i = 1; i < fcall_count; i++) {
697 f = fcall_list[i];
698 func = f->func_node;
699 fprintf(fp, "\t# %3ld. %s\n", k++, func->vname);
700 }
701
702 fprintf(fp, "\t# %3ld. -- main --\n", k);
703 }
704
705
706 /* set_IGNORECASE --- update IGNORECASE as appropriate */
707
708 void
set_IGNORECASE()709 set_IGNORECASE()
710 {
711 static bool warned = false;
712
713 if ((do_lint_extensions || do_traditional) && ! warned) {
714 warned = true;
715 lintwarn(_("`IGNORECASE' is a gawk extension"));
716 }
717
718 if (do_traditional)
719 IGNORECASE = false;
720 else
721 IGNORECASE = boolval(IGNORECASE_node->var_value);
722 set_RS(); /* set_RS() calls set_FS() if need be, for us */
723 }
724
725 /* set_BINMODE --- set translation mode (OS/2, DOS, others) */
726
727 void
set_BINMODE()728 set_BINMODE()
729 {
730 static bool warned = false;
731 char *p;
732 NODE *v = fixtype(BINMODE_node->var_value);
733
734 if ((do_lint_extensions || do_traditional) && ! warned) {
735 warned = true;
736 lintwarn(_("`BINMODE' is a gawk extension"));
737 }
738 if (do_traditional)
739 BINMODE = TEXT_TRANSLATE;
740 else if ((v->flags & NUMBER) != 0) {
741 BINMODE = get_number_si(v);
742 /* Make sure the value is rational. */
743 if (BINMODE < TEXT_TRANSLATE)
744 BINMODE = TEXT_TRANSLATE;
745 else if (BINMODE > BINMODE_BOTH)
746 BINMODE = BINMODE_BOTH;
747 } else if ((v->flags & STRING) != 0) {
748 p = v->stptr;
749
750 /*
751 * Allow only one of the following:
752 * "0", "1", "2", "3",
753 * "r", "w", "rw", "wr"
754 * ANYTHING ELSE goes to 3. So there.
755 */
756 switch (v->stlen) {
757 case 1:
758 switch (p[0]) {
759 case '0':
760 case '1':
761 case '2':
762 case '3':
763 BINMODE = p[0] - '0';
764 break;
765 case 'r':
766 BINMODE = BINMODE_INPUT;
767 break;
768 case 'w':
769 BINMODE = BINMODE_OUTPUT;
770 break;
771 default:
772 BINMODE = BINMODE_BOTH;
773 goto bad_value;
774 break;
775 }
776 break;
777 case 2:
778 switch (p[0]) {
779 case 'r':
780 BINMODE = BINMODE_BOTH;
781 if (p[1] != 'w')
782 goto bad_value;
783 break;
784 case 'w':
785 BINMODE = BINMODE_BOTH;
786 if (p[1] != 'r')
787 goto bad_value;
788 break;
789 }
790 break;
791 default:
792 bad_value:
793 lintwarn(_("BINMODE value `%s' is invalid, treated as 3"), p);
794 break;
795 }
796 } else
797 BINMODE = 3; /* shouldn't happen */
798 }
799
800 /* set_OFS --- update OFS related variables when OFS assigned to */
801
802 void
set_OFS()803 set_OFS()
804 {
805 static bool first = true;
806 size_t new_ofs_len;
807
808 if (first) /* true when called from init_vars() in main() */
809 first = false;
810 else {
811 /* rebuild $0 using OFS that was current when $0 changed */
812 if (! field0_valid) {
813 get_field(UNLIMITED - 1, NULL);
814 rebuild_record();
815 }
816 }
817
818 /*
819 * Save OFS value for use in building record and in printing.
820 * Can't just have OFS point into the OFS_node since it's
821 * already updated when we come into this routine, and we need
822 * the old value to rebuild the record (see above).
823 */
824 OFS_node->var_value = force_string(OFS_node->var_value);
825 new_ofs_len = OFS_node->var_value->stlen;
826
827 if (OFS == NULL)
828 emalloc(OFS, char *, new_ofs_len + 1, "set_OFS");
829 else if (OFSlen < new_ofs_len)
830 erealloc(OFS, char *, new_ofs_len + 1, "set_OFS");
831
832 memcpy(OFS, OFS_node->var_value->stptr, OFS_node->var_value->stlen);
833 OFSlen = new_ofs_len;
834 OFS[OFSlen] = '\0';
835 }
836
837 /* set_ORS --- update ORS related variables when ORS assigned to */
838
839 void
set_ORS()840 set_ORS()
841 {
842 ORS_node->var_value = force_string(ORS_node->var_value);
843 ORS = ORS_node->var_value->stptr;
844 ORSlen = ORS_node->var_value->stlen;
845 }
846
847 /* fmt_ok --- is the conversion format a valid one? */
848
849 NODE **fmt_list = NULL;
850 static int fmt_ok(NODE *n);
851 static int fmt_index(NODE *n);
852
853 static int
fmt_ok(NODE * n)854 fmt_ok(NODE *n)
855 {
856 NODE *tmp = force_string(n);
857 const char *p = tmp->stptr;
858
859 #if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
860 static const char float_formats[] = "efgEG";
861 #else
862 static const char float_formats[] = "efgEFG";
863 #endif
864 #if defined(HAVE_LOCALE_H)
865 static const char flags[] = " +-#'";
866 #else
867 static const char flags[] = " +-#";
868 #endif
869
870 // We rely on the caller to zero-terminate n->stptr.
871
872 if (*p++ != '%')
873 return 0;
874 while (*p && strchr(flags, *p) != NULL) /* flags */
875 p++;
876 while (*p && isdigit((unsigned char) *p)) /* width - %*.*g is NOT allowed */
877 p++;
878 if (*p == '\0' || (*p != '.' && ! isdigit((unsigned char) *p)))
879 return 0;
880 if (*p == '.')
881 p++;
882 while (*p && isdigit((unsigned char) *p)) /* precision */
883 p++;
884 if (*p == '\0' || strchr(float_formats, *p) == NULL)
885 return 0;
886 if (*++p != '\0')
887 return 0;
888 return 1;
889 }
890
891 /* fmt_index --- track values of OFMT and CONVFMT to keep semantics correct */
892
893 static int
fmt_index(NODE * n)894 fmt_index(NODE *n)
895 {
896 int ix = 0;
897 static int fmt_num = 4;
898 static int fmt_hiwater = 0;
899 char save;
900
901 if (fmt_list == NULL)
902 emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
903 n = force_string(n);
904
905 save = n->stptr[n->stlen];
906 n->stptr[n->stlen] = '\0';
907
908 while (ix < fmt_hiwater) {
909 if (cmp_nodes(fmt_list[ix], n, true) == 0)
910 return ix;
911 ix++;
912 }
913
914 /* not found */
915 if (do_lint && ! fmt_ok(n))
916 lintwarn(_("bad `%sFMT' specification `%s'"),
917 n == CONVFMT_node->var_value ? "CONV"
918 : n == OFMT_node->var_value ? "O"
919 : "", n->stptr);
920
921 n->stptr[n->stlen] = save;
922
923 if (fmt_hiwater >= fmt_num) {
924 fmt_num *= 2;
925 erealloc(fmt_list, NODE **, fmt_num * sizeof(*fmt_list), "fmt_index");
926 }
927 fmt_list[fmt_hiwater] = dupnode(n);
928 return fmt_hiwater++;
929 }
930
931 /* set_OFMT --- track OFMT correctly */
932
933 void
set_OFMT()934 set_OFMT()
935 {
936 OFMTidx = fmt_index(OFMT_node->var_value);
937 OFMT = fmt_list[OFMTidx]->stptr;
938 }
939
940 /* set_CONVFMT --- track CONVFMT correctly */
941
942 void
set_CONVFMT()943 set_CONVFMT()
944 {
945 CONVFMTidx = fmt_index(CONVFMT_node->var_value);
946 CONVFMT = fmt_list[CONVFMTidx]->stptr;
947 }
948
949 /* set_LINT --- update LINT as appropriate */
950
951 void
set_LINT()952 set_LINT()
953 {
954 #ifndef NO_LINT
955 int old_lint = do_lint;
956 NODE *n = fixtype(LINT_node->var_value);
957
958 /* start with clean defaults */
959 lintfunc = r_warning;
960 do_flags &= ~(DO_LINT_ALL|DO_LINT_INVALID);
961
962 if ((n->flags & STRING) != 0) {
963 const char *lintval;
964 size_t lintlen;
965
966 lintval = n->stptr;
967 lintlen = n->stlen;
968 if (lintlen > 0) {
969 if (lintlen == 7 && strncmp(lintval, "invalid", 7) == 0)
970 do_flags |= DO_LINT_INVALID;
971 else if (lintlen == 6 && strncmp(lintval, "no-ext", 6) == 0)
972 do_flags &= ~DO_LINT_EXTENSIONS;
973 else {
974 do_flags |= DO_LINT_ALL;
975 if (lintlen == 5 && strncmp(lintval, "fatal", 5) == 0)
976 lintfunc = r_fatal;
977 }
978 }
979 } else {
980 if (! is_zero(n))
981 do_flags |= DO_LINT_ALL;
982 }
983
984 /* explicitly use warning() here, in case lintfunc == r_fatal */
985 if (old_lint != do_lint && old_lint && ! do_lint)
986 warning(_("turning off `--lint' due to assignment to `LINT'"));
987
988 /* inform plug-in api of change */
989 update_ext_api();
990 #endif /* ! NO_LINT */
991 }
992
993 /* set_TEXTDOMAIN --- update TEXTDOMAIN variable when TEXTDOMAIN assigned to */
994
995 void
set_TEXTDOMAIN()996 set_TEXTDOMAIN()
997 {
998 NODE *tmp;
999
1000 tmp = TEXTDOMAIN_node->var_value = force_string(TEXTDOMAIN_node->var_value);
1001 TEXTDOMAIN = tmp->stptr;
1002 /*
1003 * Note: don't call textdomain(); this value is for
1004 * the awk program, not for gawk itself.
1005 */
1006 }
1007
1008 /* update_ERRNO_int --- update the value of ERRNO based on argument */
1009
1010 void
update_ERRNO_int(int errcode)1011 update_ERRNO_int(int errcode)
1012 {
1013 const char *cp;
1014
1015 update_PROCINFO_num("errno", errcode);
1016 if (errcode) {
1017 cp = strerror(errcode);
1018 cp = gettext(cp);
1019 } else
1020 cp = "";
1021 unref(ERRNO_node->var_value);
1022 ERRNO_node->var_value = make_string(cp, strlen(cp));
1023 }
1024
1025 /* update_ERRNO_string --- update ERRNO */
1026
1027 void
update_ERRNO_string(const char * string)1028 update_ERRNO_string(const char *string)
1029 {
1030 update_PROCINFO_num("errno", 0);
1031 unref(ERRNO_node->var_value);
1032 size_t len = strlen(string);
1033 #if defined(USE_EBCDIC) && defined(ELIDE_IBM_ERROR_CODE)
1034 // skip over leading IBM error code
1035 // N.B. This code is untested
1036 if (isupper(string[0]) && isupper(string[1])) {
1037 while (*string && *string != ' ')
1038 string++;
1039
1040 while (*string && *string == ' ')
1041 string++;
1042
1043 len = strlen(string);
1044 if (string[len-1] == '.')
1045 len--; // remove the final '.'
1046 }
1047 #endif
1048 ERRNO_node->var_value = make_string(string, len);
1049 }
1050
1051 /* unset_ERRNO --- eliminate the value of ERRNO */
1052
1053 void
unset_ERRNO(void)1054 unset_ERRNO(void)
1055 {
1056 update_PROCINFO_num("errno", 0);
1057 unref(ERRNO_node->var_value);
1058 ERRNO_node->var_value = dupnode(Nnull_string);
1059 }
1060
1061 /* update_NR --- update the value of NR */
1062
1063 void
update_NR()1064 update_NR()
1065 {
1066 #ifdef HAVE_MPFR
1067 if (is_mpg_number(NR_node->var_value))
1068 (void) mpg_update_var(NR_node);
1069 else
1070 #endif
1071 if (NR_node->var_value->numbr != NR) {
1072 unref(NR_node->var_value);
1073 NR_node->var_value = make_number(NR);
1074 }
1075 }
1076
1077 /* update_NF --- update the value of NF */
1078
1079 void
update_NF()1080 update_NF()
1081 {
1082 long l;
1083
1084 l = get_number_si(NF_node->var_value);
1085 if (NF == -1 || l != NF) {
1086 if (NF == -1)
1087 (void) get_field(UNLIMITED - 1, NULL); /* parse record */
1088 unref(NF_node->var_value);
1089 NF_node->var_value = make_number(NF);
1090 }
1091 }
1092
1093 /* update_FNR --- update the value of FNR */
1094
1095 void
update_FNR()1096 update_FNR()
1097 {
1098 #ifdef HAVE_MPFR
1099 if (is_mpg_number(FNR_node->var_value))
1100 (void) mpg_update_var(FNR_node);
1101 else
1102 #endif
1103 if (FNR_node->var_value->numbr != FNR) {
1104 unref(FNR_node->var_value);
1105 FNR_node->var_value = make_number(FNR);
1106 }
1107 }
1108
1109
1110 NODE *frame_ptr; /* current frame */
1111 STACK_ITEM *stack_ptr = NULL;
1112 STACK_ITEM *stack_bottom;
1113 STACK_ITEM *stack_top;
1114 static unsigned long STACK_SIZE = 256; /* initial size of stack */
1115 int max_args = 0; /* maximum # of arguments to printf, print, sprintf,
1116 * or # of array subscripts, or adjacent strings
1117 * to be concatenated.
1118 */
1119 NODE **args_array = NULL;
1120
1121 /* grow_stack --- grow the size of runtime stack */
1122
1123 /* N.B. stack_ptr points to the topmost occupied location
1124 * on the stack, not the first free location.
1125 */
1126
1127 STACK_ITEM *
grow_stack()1128 grow_stack()
1129 {
1130 STACK_SIZE *= 2;
1131 erealloc(stack_bottom, STACK_ITEM *, STACK_SIZE * sizeof(STACK_ITEM), "grow_stack");
1132 stack_top = stack_bottom + STACK_SIZE - 1;
1133 stack_ptr = stack_bottom + STACK_SIZE / 2;
1134 return stack_ptr;
1135 }
1136
1137 /*
1138 * r_get_lhs:
1139 * This returns a POINTER to a node pointer (var's value).
1140 * used to store the var's new value.
1141 */
1142
1143 NODE **
r_get_lhs(NODE * n,bool reference)1144 r_get_lhs(NODE *n, bool reference)
1145 {
1146 bool isparam = false;
1147
1148 if (n->type == Node_param_list) {
1149 isparam = true;
1150 n = GET_PARAM(n->param_cnt);
1151 }
1152
1153 switch (n->type) {
1154 case Node_var_array:
1155 fatal(_("attempt to use array `%s' in a scalar context"),
1156 array_vname(n));
1157 case Node_array_ref:
1158 if (n->orig_array->type == Node_var_array)
1159 fatal(_("attempt to use array `%s' in a scalar context"),
1160 array_vname(n));
1161 if (n->orig_array->type != Node_var) {
1162 n->orig_array->type = Node_var;
1163 n->orig_array->var_value = dupnode(Nnull_string);
1164 }
1165 /* fall through */
1166 case Node_var_new:
1167 n->type = Node_var;
1168 n->var_value = dupnode(Nnull_string);
1169 break;
1170
1171 case Node_var:
1172 break;
1173
1174 default:
1175 cant_happen();
1176 }
1177
1178 if (do_lint && reference && var_uninitialized(n))
1179 lintwarn((isparam ?
1180 _("reference to uninitialized argument `%s'") :
1181 _("reference to uninitialized variable `%s'")),
1182 n->vname);
1183 return & n->var_value;
1184 }
1185
1186
1187 /* r_get_field --- get the address of a field node */
1188
1189 NODE **
r_get_field(NODE * n,Func_ptr * assign,bool reference)1190 r_get_field(NODE *n, Func_ptr *assign, bool reference)
1191 {
1192 long field_num;
1193 NODE **lhs;
1194
1195 if (assign)
1196 *assign = NULL;
1197 if (do_lint) {
1198 if ((fixtype(n)->flags & NUMBER) == 0) {
1199 lintwarn(_("attempt to field reference from non-numeric value"));
1200 if (n->stlen == 0)
1201 lintwarn(_("attempt to field reference from null string"));
1202 }
1203 }
1204
1205 (void) force_number(n);
1206 field_num = get_number_si(n);
1207
1208 if (field_num < 0)
1209 fatal(_("attempt to access field %ld"), field_num);
1210
1211 if (field_num == 0 && field0_valid) { /* short circuit */
1212 lhs = &fields_arr[0];
1213 if (assign)
1214 *assign = reset_record;
1215 } else
1216 lhs = get_field(field_num, assign);
1217 if (do_lint && reference && ((*lhs)->flags & NULL_FIELD) != 0)
1218 lintwarn(_("reference to uninitialized field `$%ld'"),
1219 field_num);
1220 return lhs;
1221 }
1222
1223
1224 /*
1225 * calc_exp_posint --- calculate x^n for positive integral n,
1226 * using exponentiation by squaring without recursion.
1227 */
1228
1229 static AWKNUM
calc_exp_posint(AWKNUM x,long n)1230 calc_exp_posint(AWKNUM x, long n)
1231 {
1232 AWKNUM mult = 1;
1233
1234 while (n > 1) {
1235 if ((n % 2) == 1)
1236 mult *= x;
1237 x *= x;
1238 n /= 2;
1239 }
1240 return mult * x;
1241 }
1242
1243 /* calc_exp --- calculate x1^x2 */
1244
1245 AWKNUM
calc_exp(AWKNUM x1,AWKNUM x2)1246 calc_exp(AWKNUM x1, AWKNUM x2)
1247 {
1248 long lx;
1249
1250 if ((lx = x2) == x2) { /* integer exponent */
1251 if (lx == 0)
1252 return 1;
1253 return (lx > 0) ? calc_exp_posint(x1, lx)
1254 : 1.0 / calc_exp_posint(x1, -lx);
1255 }
1256 return (AWKNUM) pow((double) x1, (double) x2);
1257 }
1258
1259
1260 /* setup_frame --- setup new frame for function call */
1261
1262 static INSTRUCTION *
setup_frame(INSTRUCTION * pc)1263 setup_frame(INSTRUCTION *pc)
1264 {
1265 NODE *r = NULL;
1266 NODE *m, *f, *fp;
1267 NODE **sp = NULL;
1268 int pcount, arg_count, i, j;
1269
1270 f = pc->func_body;
1271 pcount = f->param_cnt;
1272 fp = f->fparms;
1273 arg_count = (pc + 1)->expr_count;
1274
1275 if (pcount > 0) {
1276 ezalloc(sp, NODE **, pcount * sizeof(NODE *), "setup_frame");
1277 }
1278
1279 /* check for extra args */
1280 if (arg_count > pcount) {
1281 warning(
1282 _("function `%s' called with more arguments than declared"),
1283 f->vname);
1284 do {
1285 r = POP();
1286 if (r->type == Node_val)
1287 DEREF(r);
1288 } while (--arg_count > pcount);
1289 }
1290
1291 for (i = 0, j = arg_count - 1; i < pcount; i++, j--) {
1292 getnode(r);
1293 memset(r, 0, sizeof(NODE));
1294 sp[i] = r;
1295
1296 if (i >= arg_count) {
1297 /* local variable */
1298 r->type = Node_var_new;
1299 r->vname = fp[i].param;
1300 continue;
1301 }
1302
1303 m = PEEK(j); /* arguments in reverse order on runtime stack */
1304
1305 if (m->type == Node_param_list)
1306 m = GET_PARAM(m->param_cnt);
1307
1308 /* $0 needs to be passed by value to a function */
1309 if (m == fields_arr[0]) {
1310 DEREF(m);
1311 m = dupnode(m);
1312 }
1313
1314 switch (m->type) {
1315 case Node_var_new:
1316 case Node_var_array:
1317 r->type = Node_array_ref;
1318 r->orig_array = r->prev_array = m;
1319 break;
1320
1321 case Node_array_ref:
1322 r->type = Node_array_ref;
1323 r->orig_array = m->orig_array;
1324 r->prev_array = m;
1325 break;
1326
1327 case Node_var:
1328 /* Untyped (Node_var_new) variable as param became a
1329 * scalar during evaluation of expression for a
1330 * subsequent param.
1331 */
1332 r->type = Node_var;
1333 r->var_value = dupnode(Nnull_string);
1334 break;
1335
1336 case Node_val:
1337 r->type = Node_var;
1338 r->var_value = m;
1339 break;
1340
1341 case Node_func:
1342 case Node_builtin_func:
1343 case Node_ext_func:
1344 r->type = Node_var;
1345 r->var_value = make_string(m->vname, strlen(m->vname));
1346 break;
1347
1348 default:
1349 cant_happen();
1350 }
1351 r->vname = fp[i].param;
1352 }
1353
1354 stack_adj(-arg_count); /* adjust stack pointer */
1355
1356 if (pc->opcode == Op_indirect_func_call) {
1357 r = POP(); /* indirect var */
1358 DEREF(r);
1359 }
1360
1361 frame_ptr->vname = source; /* save current source */
1362
1363 if (do_profile || do_debug)
1364 push_frame(frame_ptr);
1365
1366 /* save current frame in stack */
1367 PUSH(frame_ptr);
1368
1369 /* setup new frame */
1370 getnode(frame_ptr);
1371 frame_ptr->type = Node_frame;
1372 frame_ptr->stack = sp;
1373 frame_ptr->prev_frame_size = (stack_ptr - stack_bottom); /* size of the previous stack frame */
1374 frame_ptr->func_node = f;
1375 frame_ptr->vname = NULL;
1376 frame_ptr->reti = pc; /* on return execute pc->nexti */
1377
1378 return f->code_ptr;
1379 }
1380
1381
1382 /* restore_frame --- clean up the stack and update frame */
1383
1384 static INSTRUCTION *
restore_frame(NODE * fp)1385 restore_frame(NODE *fp)
1386 {
1387 NODE *r;
1388 NODE **sp;
1389 int n;
1390 NODE *func;
1391 INSTRUCTION *ri;
1392
1393 func = frame_ptr->func_node;
1394 n = func->param_cnt;
1395 sp = frame_ptr->stack;
1396
1397 for (; n > 0; n--) {
1398 r = *sp++;
1399 if (r->type == Node_var) /* local variable */
1400 DEREF(r->var_value);
1401 else if (r->type == Node_var_array) /* local array */
1402 assoc_clear(r);
1403 freenode(r);
1404 }
1405
1406 if (frame_ptr->stack != NULL)
1407 efree(frame_ptr->stack);
1408 ri = frame_ptr->reti; /* execution in calling frame
1409 * resumes from ri->nexti.
1410 */
1411 freenode(frame_ptr);
1412 if (do_profile || do_debug)
1413 pop_frame();
1414
1415 /* restore frame */
1416 frame_ptr = fp;
1417 /* restore source */
1418 source = fp->vname;
1419 fp->vname = NULL;
1420
1421 return ri->nexti;
1422 }
1423
1424
1425 /* free_arrayfor --- free 'for (var in array)' related data */
1426
1427 static inline void
free_arrayfor(NODE * r)1428 free_arrayfor(NODE *r)
1429 {
1430 if (r->for_list != NULL) {
1431 NODE *n;
1432 size_t num_elems = r->for_list_size;
1433 NODE **list = r->for_list;
1434 while (num_elems > 0) {
1435 n = list[--num_elems];
1436 unref(n);
1437 }
1438 efree(list);
1439 }
1440 freenode(r);
1441 }
1442
1443
1444 /*
1445 * unwind_stack --- pop items off the run-time stack;
1446 * 'n' is the # of items left in the stack.
1447 */
1448
1449 INSTRUCTION *
unwind_stack(long n)1450 unwind_stack(long n)
1451 {
1452 NODE *r;
1453 INSTRUCTION *cp = NULL;
1454 STACK_ITEM *sp;
1455
1456 if (stack_empty())
1457 return NULL;
1458
1459 sp = stack_bottom + n;
1460
1461 if (stack_ptr < sp)
1462 return NULL;
1463
1464 while ((r = POP()) != NULL) {
1465 switch (r->type) {
1466 case Node_frame:
1467 cp = restore_frame(r);
1468 break;
1469 case Node_arrayfor:
1470 free_arrayfor(r);
1471 break;
1472 case Node_val:
1473 DEREF(r);
1474 break;
1475 case Node_instruction:
1476 freenode(r);
1477 break;
1478 default:
1479 /*
1480 * Check `exiting' and don't produce an error for
1481 * cases like:
1482 * func _fn0() { exit }
1483 * BEGIN { ARRAY[_fn0()] }
1484 */
1485 if (in_main_context() && ! exiting)
1486 fatal(_("unwind_stack: unexpected type `%s'"),
1487 nodetype2str(r->type));
1488 /* else
1489 * Node_var_array,
1490 * Node_param_list,
1491 * Node_var (e.g: trying to use scalar for array)
1492 * Node_regex/Node_dynregex
1493 * ?
1494 */
1495 break;
1496 }
1497
1498 if (stack_ptr < sp)
1499 break;
1500 }
1501 return cp;
1502 }
1503
1504
1505 /* pop_fcall --- pop off the innermost frame */
1506 #define pop_fcall() unwind_stack(frame_ptr->prev_frame_size)
1507
1508 /* pop the run-time stack */
1509 #define pop_stack() (void) unwind_stack(0)
1510
1511
1512 static inline bool
eval_condition(NODE * t)1513 eval_condition(NODE *t)
1514 {
1515 if (t == node_Boolean[false])
1516 return false;
1517
1518 if (t == node_Boolean[true])
1519 return true;
1520
1521 return boolval(t);
1522 }
1523
1524 typedef enum {
1525 SCALAR_EQ_NEQ,
1526 SCALAR_RELATIONAL
1527 } scalar_cmp_t;
1528
1529 /* cmp_scalars -- compare two nodes on the stack */
1530
1531 static inline int
cmp_scalars(scalar_cmp_t comparison_type)1532 cmp_scalars(scalar_cmp_t comparison_type)
1533 {
1534 NODE *t1, *t2;
1535 int di;
1536
1537 t2 = POP_SCALAR();
1538 t1 = TOP();
1539 if (t1->type == Node_var_array) {
1540 DEREF(t2);
1541 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(t1));
1542 }
1543 di = cmp_nodes(t1, t2, comparison_type == SCALAR_EQ_NEQ);
1544 DEREF(t1);
1545 DEREF(t2);
1546 return di;
1547 }
1548
1549 /* op_assign --- assignment operators excluding = */
1550
1551 static void
op_assign(OPCODE op)1552 op_assign(OPCODE op)
1553 {
1554 NODE **lhs;
1555 NODE *t1, *t2;
1556 AWKNUM x = 0.0, x1, x2;
1557
1558 lhs = POP_ADDRESS();
1559 t1 = *lhs;
1560 x1 = force_number(t1)->numbr;
1561
1562 t2 = TOP_SCALAR();
1563 x2 = force_number(t2)->numbr;
1564 DEREF(t2);
1565
1566 switch (op) {
1567 case Op_assign_plus:
1568 x = x1 + x2;
1569 break;
1570 case Op_assign_minus:
1571 x = x1 - x2;
1572 break;
1573 case Op_assign_times:
1574 x = x1 * x2;
1575 break;
1576 case Op_assign_quotient:
1577 if (x2 == (AWKNUM) 0) {
1578 decr_sp();
1579 fatal(_("division by zero attempted in `/='"));
1580 }
1581 x = x1 / x2;
1582 break;
1583 case Op_assign_mod:
1584 if (x2 == (AWKNUM) 0) {
1585 decr_sp();
1586 fatal(_("division by zero attempted in `%%='"));
1587 }
1588 #ifdef HAVE_FMOD
1589 x = fmod(x1, x2);
1590 #else /* ! HAVE_FMOD */
1591 (void) modf(x1 / x2, &x);
1592 x = x1 - x2 * x;
1593 #endif /* ! HAVE_FMOD */
1594 break;
1595 case Op_assign_exp:
1596 x = calc_exp((double) x1, (double) x2);
1597 break;
1598 default:
1599 break;
1600 }
1601
1602 if (t1->valref == 1 && t1->flags == (MALLOC|NUMCUR|NUMBER)) {
1603 /* optimization */
1604 t1->numbr = x;
1605 } else {
1606 unref(t1);
1607 t1 = *lhs = make_number(x);
1608 }
1609
1610 UPREF(t1);
1611 REPLACE(t1);
1612 }
1613
1614 /* PUSH_CODE --- push a code onto the runtime stack */
1615
1616 void
PUSH_CODE(INSTRUCTION * cp)1617 PUSH_CODE(INSTRUCTION *cp)
1618 {
1619 NODE *r;
1620 getnode(r);
1621 r->type = Node_instruction;
1622 r->code_ptr = cp;
1623 PUSH(r);
1624 }
1625
1626 /* POP_CODE --- pop a code off the runtime stack */
1627
1628 INSTRUCTION *
POP_CODE()1629 POP_CODE()
1630 {
1631 NODE *r;
1632 INSTRUCTION *cp;
1633 r = POP();
1634 cp = r->code_ptr;
1635 freenode(r);
1636 return cp;
1637 }
1638
1639
1640 /*
1641 * Implementation of BEGINFILE and ENDFILE requires saving an execution
1642 * state and the ability to return to that state. The state is
1643 * defined by the instruction triggering the BEGINFILE/ENDFILE rule, the
1644 * run-time stack, the rule and the source file. The source line is available in
1645 * the instruction and hence is not considered a part of the execution state.
1646 */
1647
1648
1649 typedef struct exec_state {
1650 struct exec_state *next;
1651
1652 INSTRUCTION *cptr; /* either getline (Op_K_getline) or the
1653 * implicit "open-file, read-record" loop (Op_newfile).
1654 */
1655
1656 int rule; /* rule for the INSTRUCTION */
1657
1658 long stack_size; /* For this particular usage, it is sufficient to save
1659 * only the size of the call stack. We do not
1660 * store the actual stack pointer to avoid problems
1661 * in case the stack gets realloc-ed.
1662 */
1663
1664 const char *source; /* source file for the INSTRUCTION */
1665 } EXEC_STATE;
1666
1667 static EXEC_STATE exec_state_stack;
1668
1669 /* push_exec_state --- save an execution state on stack */
1670
1671 static void
push_exec_state(INSTRUCTION * cp,int rule,char * src,STACK_ITEM * sp)1672 push_exec_state(INSTRUCTION *cp, int rule, char *src, STACK_ITEM *sp)
1673 {
1674 EXEC_STATE *es;
1675
1676 emalloc(es, EXEC_STATE *, sizeof(EXEC_STATE), "push_exec_state");
1677 es->rule = rule;
1678 es->cptr = cp;
1679 es->stack_size = (sp - stack_bottom) + 1;
1680 es->source = src;
1681 es->next = exec_state_stack.next;
1682 exec_state_stack.next = es;
1683 }
1684
1685
1686 /* pop_exec_state --- pop one execution state off the stack */
1687
1688 static INSTRUCTION *
pop_exec_state(int * rule,char ** src,long * sz)1689 pop_exec_state(int *rule, char **src, long *sz)
1690 {
1691 INSTRUCTION *cp;
1692 EXEC_STATE *es;
1693
1694 es = exec_state_stack.next;
1695 if (es == NULL)
1696 return NULL;
1697 cp = es->cptr;
1698 if (rule != NULL)
1699 *rule = es->rule;
1700 if (src != NULL)
1701 *src = (char *) es->source;
1702 if (sz != NULL)
1703 *sz = es->stack_size;
1704 exec_state_stack.next = es->next;
1705 efree(es);
1706 return cp;
1707 }
1708
1709
1710 /* register_exec_hook --- add exec hooks in the interpreter. */
1711
1712 int
register_exec_hook(Func_pre_exec preh,Func_post_exec posth)1713 register_exec_hook(Func_pre_exec preh, Func_post_exec posth)
1714 {
1715 int pos = 0;
1716
1717 /*
1718 * multiple post-exec hooks aren't supported. post-exec hook is mainly
1719 * for use by the debugger.
1720 */
1721
1722 if (! preh || (post_execute && posth))
1723 return false;
1724
1725 if (num_exec_hook == MAX_EXEC_HOOKS)
1726 return false;
1727
1728 /*
1729 * Add to the beginning of the array but do not displace the
1730 * debugger hook if it exists.
1731 */
1732 if (num_exec_hook > 0) {
1733 pos = !! do_debug;
1734 if (num_exec_hook > pos)
1735 memmove(pre_execute + pos + 1, pre_execute + pos,
1736 (num_exec_hook - pos) * sizeof (preh));
1737 }
1738 pre_execute[pos] = preh;
1739 num_exec_hook++;
1740
1741 if (posth)
1742 post_execute = posth;
1743
1744 return true;
1745 }
1746
1747
1748 /* interpreter routine when not debugging */
1749 #include "interpret.h"
1750
1751 /* interpreter routine with exec hook(s). Used when debugging and/or with MPFR. */
1752 #define r_interpret h_interpret
1753 #define EXEC_HOOK 1
1754 #include "interpret.h"
1755 #undef EXEC_HOOK
1756 #undef r_interpret
1757
1758
1759 void
init_interpret()1760 init_interpret()
1761 {
1762 long newval;
1763
1764 if ((newval = getenv_long("GAWK_STACKSIZE")) > 0)
1765 STACK_SIZE = newval;
1766
1767 emalloc(stack_bottom, STACK_ITEM *, STACK_SIZE * sizeof(STACK_ITEM), "grow_stack");
1768 stack_ptr = stack_bottom - 1;
1769 stack_top = stack_bottom + STACK_SIZE - 1;
1770
1771 /* initialize frame pointer */
1772 getnode(frame_ptr);
1773 frame_ptr->type = Node_frame;
1774 frame_ptr->stack = NULL;
1775 frame_ptr->func_node = NULL; /* in main */
1776 frame_ptr->vname = NULL;
1777
1778 /* initialize true and false nodes */
1779 node_Boolean[false] = make_number(0.0);
1780 node_Boolean[true] = make_number(1.0);
1781 if (! is_mpg_number(node_Boolean[false])) {
1782 node_Boolean[false]->flags |= NUMINT;
1783 node_Boolean[true]->flags |= NUMINT;
1784 }
1785
1786 /*
1787 * Select the interpreter routine. The version without
1788 * any exec hook support (r_interpret) is faster by about
1789 * 5%, or more depending on the opcodes.
1790 */
1791
1792 if (num_exec_hook > 0)
1793 interpret = h_interpret;
1794 else
1795 interpret = r_interpret;
1796 }
1797
1798