1 /*
2 * awkgram.y --- yacc/bison parser
3 */
4
5 /*
6 * Copyright (C) 1986, 1988, 1989, 1991-2021 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 */
25
26 %{
27 #ifdef GAWKDEBUG
28 #define YYDEBUG 12
29 #endif
30
31 #include "awk.h"
32
33 #if defined(__STDC__) && __STDC__ < 1 /* VMS weirdness, maybe elsewhere */
34 #define signed /**/
35 #endif
36
37 static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38 #define YYERROR_IS_DECLARED 1 /* for bison 3.8. sigh. */
39 static void error_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
40 static void lintwarn_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
41 static void warning_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
42 static char *get_src_buf(void);
43 static int yylex(void);
44 int yyparse(void);
45 static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op);
46 static char **check_params(char *fname, int pcount, INSTRUCTION *list);
47 static int install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist);
48 static NODE *mk_rexp(INSTRUCTION *exp);
49 static void param_sanity(INSTRUCTION *arglist);
50 static int parms_shadow(INSTRUCTION *pc, bool *shadow);
51 #ifndef NO_LINT
52 static int isnoeffect(OPCODE type);
53 #endif
54 static INSTRUCTION *make_assignable(INSTRUCTION *ip);
55 static void dumpintlstr(const char *str, size_t len);
56 static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2);
57 static bool include_source(INSTRUCTION *file, void **srcfile_p);
58 static bool load_library(INSTRUCTION *file, void **srcfile_p);
59 static void set_namespace(INSTRUCTION *ns, INSTRUCTION *comment);
60 static void next_sourcefile(void);
61 static char *tokexpand(void);
62 static NODE *set_profile_text(NODE *n, const char *str, size_t len);
63 static int check_qualified_special(char *token);
64 static char *qualify_name(const char *name, size_t len);
65 static INSTRUCTION *trailing_comment;
66 static INSTRUCTION *outer_comment;
67 static INSTRUCTION *interblock_comment;
68 static INSTRUCTION *pending_comment;
69 static INSTRUCTION *namespace_chain;
70
71 #ifdef DEBUG_COMMENTS
72 static void
debug_print_comment_s(const char * name,INSTRUCTION * comment,int line)73 debug_print_comment_s(const char *name, INSTRUCTION *comment, int line)
74 {
75 if (comment != NULL)
76 fprintf(stderr, "%d: %s: <%.*s>\n", line, name,
77 (int) (comment->memory->stlen - 1),
78 comment->memory->stptr);
79 }
80 #define debug_print_comment(comment) \
81 debug_print_comment_s(# comment, comment, __LINE__)
82 #endif
83
84 #define instruction(t) bcalloc(t, 1, 0)
85
86 static INSTRUCTION *mk_program(void);
87 static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action);
88 static INSTRUCTION *mk_function(INSTRUCTION *fi, INSTRUCTION *def);
89 static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
90 INSTRUCTION *elsep, INSTRUCTION *false_branch);
91 static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1);
92 static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
93 INSTRUCTION *incr, INSTRUCTION *body);
94 static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target);
95 static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op);
96 static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
97 static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
98 static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype);
99 static int count_expressions(INSTRUCTION **list, bool isarg);
100 static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
101 static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
102
103 enum defref { FUNC_DEFINE, FUNC_USE, FUNC_EXT };
104 static void func_use(const char *name, enum defref how);
105 static void check_funcs(void);
106
107 static ssize_t read_one_line(int fd, void *buffer, size_t count);
108 static int one_line_close(int fd);
109 static void merge_comments(INSTRUCTION *c1, INSTRUCTION *c2);
110 static INSTRUCTION *make_braced_statements(INSTRUCTION *lbrace, INSTRUCTION *stmts, INSTRUCTION *rbrace);
111 static void add_sign_to_num(NODE *n, char sign);
112
113 static bool at_seen = false;
114 static bool want_source = false;
115 static bool want_regexp = false; /* lexical scanning kludge */
116 static enum {
117 FUNC_HEADER,
118 FUNC_BODY,
119 DONT_CHECK
120 } want_param_names = DONT_CHECK; /* ditto */
121 static bool in_function; /* parsing kludge */
122 static int rule = 0;
123
124 const char *const ruletab[] = {
125 "?",
126 "BEGIN",
127 "Rule",
128 "END",
129 "BEGINFILE",
130 "ENDFILE",
131 };
132
133 static bool in_print = false; /* lexical scanning kludge for print */
134 static int in_parens = 0; /* lexical scanning kludge for print */
135 static int sub_counter = 0; /* array dimension counter for use in delete */
136 static char *lexptr; /* pointer to next char during parsing */
137 static char *lexend; /* end of buffer */
138 static char *lexptr_begin; /* keep track of where we were for error msgs */
139 static char *lexeme; /* beginning of lexeme for debugging */
140 static bool lexeof; /* seen EOF for current source? */
141 static char *thisline = NULL;
142 static int in_braces = 0; /* count braces for firstline, lastline in an 'action' */
143 static int lastline = 0;
144 static int firstline = 0;
145 static SRCFILE *sourcefile = NULL; /* current program source */
146 static int lasttok = 0;
147 static bool eof_warned = false; /* GLOBAL: want warning for each file */
148 static int break_allowed; /* kludge for break */
149 static int continue_allowed; /* kludge for continue */
150
151 #define END_FILE -1000
152 #define END_SRC -2000
153
154 #define YYDEBUG_LEXER_TEXT (lexeme)
155 static char *tokstart = NULL;
156 static char *tok = NULL;
157 static char *tokend;
158 int errcount = 0;
159
160 extern char *source;
161 extern int sourceline;
162 extern SRCFILE *srcfiles;
163 extern INSTRUCTION *rule_list;
164 extern int max_args;
165 extern NODE **args_array;
166
167 const char awk_namespace[] = "awk";
168 const char *current_namespace = awk_namespace;
169 bool namespace_changed = false;
170
171 static INSTRUCTION *rule_block[sizeof(ruletab)/sizeof(ruletab[0])];
172
173 static INSTRUCTION *ip_rec;
174 static INSTRUCTION *ip_newfile;
175 static INSTRUCTION *ip_atexit = NULL;
176 static INSTRUCTION *ip_end;
177 static INSTRUCTION *ip_endfile;
178 static INSTRUCTION *ip_beginfile;
179 INSTRUCTION *main_beginfile;
180 static bool called_from_eval = false;
181
182 static inline INSTRUCTION *list_create(INSTRUCTION *x);
183 static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
184 static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x);
185 static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
186
187 extern double fmod(double x, double y);
188
189 #define YYSTYPE INSTRUCTION *
190 %}
191
192 %token FUNC_CALL NAME REGEXP FILENAME
193 %token YNUMBER YSTRING TYPED_REGEXP
194 %token RELOP IO_OUT IO_IN
195 %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP
196 %token SUBSCRIPT
197 %token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
198 %token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
199 %token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
200 %token LEX_BEGINFILE LEX_ENDFILE
201 %token LEX_GETLINE LEX_NEXTFILE
202 %token LEX_IN
203 %token LEX_AND LEX_OR INCREMENT DECREMENT
204 %token LEX_BUILTIN LEX_LENGTH
205 %token LEX_EOF
206 %token LEX_INCLUDE LEX_EVAL LEX_LOAD LEX_NAMESPACE
207 %token NEWLINE
208
209 /* Lowest to highest */
210 %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
211 %right '?' ':'
212 %left LEX_OR
213 %left LEX_AND
214 %left LEX_GETLINE
215 %nonassoc LEX_IN
216 %left FUNC_CALL LEX_BUILTIN LEX_LENGTH
217 %nonassoc ','
218 %left MATCHOP
219 %nonassoc RELOP '<' '>' IO_IN IO_OUT
220 %left CONCAT_OP
221 %left YSTRING YNUMBER TYPED_REGEXP
222 %left '+' '-'
223 %left '*' '/' '%'
224 %right '!' UNARY
225 %right '^'
226 %left INCREMENT DECREMENT
227 %left '$'
228 %left '(' ')'
229 %%
230
231 program
232 : /* empty */
233 { $$ = NULL; }
234 | program rule
235 {
236 rule = 0;
237 yyerrok;
238 }
239 | program nls
240 {
241 if ($2 != NULL) {
242 if ($1 == NULL)
243 outer_comment = $2;
244 else
245 interblock_comment = $2;
246 }
247 $$ = $1;
248 }
249 | program LEX_EOF
250 {
251 next_sourcefile();
252 }
253 | program error
254 {
255 rule = 0;
256 /*
257 * If errors, give up, don't produce an infinite
258 * stream of syntax error messages.
259 */
260 /* yyerrok; */
261 }
262 ;
263
264 rule
265 : pattern action
266 {
267 (void) append_rule($1, $2);
268 if (pending_comment != NULL) {
269 interblock_comment = pending_comment;
270 pending_comment = NULL;
271 }
272 }
273 | pattern statement_term
274 {
275 if (rule != Rule) {
276 msg(_("%s blocks must have an action part"), ruletab[rule]);
277 errcount++;
278 } else if ($1 == NULL) {
279 msg(_("each rule must have a pattern or an action part"));
280 errcount++;
281 } else { /* pattern rule with non-empty pattern */
282 if ($2 != NULL)
283 list_append($1, $2);
284 (void) append_rule($1, NULL);
285 }
286 }
287 | function_prologue action
288 {
289 in_function = false;
290 (void) mk_function($1, $2);
291 want_param_names = DONT_CHECK;
292 if (pending_comment != NULL) {
293 interblock_comment = pending_comment;
294 pending_comment = NULL;
295 }
296 yyerrok;
297 }
298 | '@' LEX_INCLUDE source statement_term
299 {
300 want_source = false;
301 at_seen = false;
302 if ($3 != NULL && $4 != NULL) {
303 SRCFILE *s = (SRCFILE *) $3;
304 s->comment = $4;
305 }
306 yyerrok;
307 }
308 | '@' LEX_LOAD library statement_term
309 {
310 want_source = false;
311 at_seen = false;
312 if ($3 != NULL && $4 != NULL) {
313 SRCFILE *s = (SRCFILE *) $3;
314 s->comment = $4;
315 }
316 yyerrok;
317 }
318 | '@' LEX_NAMESPACE namespace statement_term
319 {
320 want_source = false;
321 at_seen = false;
322
323 // this frees $3 storage in all cases
324 set_namespace($3, $4);
325
326 yyerrok;
327 }
328 ;
329
330 source
331 : FILENAME
332 {
333 void *srcfile = NULL;
334
335 if (! include_source($1, & srcfile))
336 YYABORT;
337 efree($1->lextok);
338 bcfree($1);
339 $$ = (INSTRUCTION *) srcfile;
340 }
341 | FILENAME error
342 { $$ = NULL; }
343 | error
344 { $$ = NULL; }
345 ;
346
347 library
348 : FILENAME
349 {
350 void *srcfile;
351
352 if (! load_library($1, & srcfile))
353 YYABORT;
354 efree($1->lextok);
355 bcfree($1);
356 $$ = (INSTRUCTION *) srcfile;
357 }
358 | FILENAME error
359 { $$ = NULL; }
360 | error
361 { $$ = NULL; }
362 ;
363
364 namespace
365 : FILENAME
366 { $$ = $1; }
367 | FILENAME error
368 { $$ = NULL; }
369 | error
370 { $$ = NULL; }
371 ;
372
373 pattern
374 : /* empty */
375 {
376 rule = Rule;
377 $$ = NULL;
378 }
379 | exp
380 {
381 rule = Rule;
382 }
383
384 | exp comma exp
385 {
386 INSTRUCTION *tp;
387
388 add_lint($1, LINT_assign_in_cond);
389 add_lint($3, LINT_assign_in_cond);
390
391 tp = instruction(Op_no_op);
392 list_prepend($1, bcalloc(Op_line_range, !!do_pretty_print + 1, 0));
393 $1->nexti->triggered = false;
394 $1->nexti->target_jmp = $3->nexti;
395
396 list_append($1, instruction(Op_cond_pair));
397 $1->lasti->line_range = $1->nexti;
398 $1->lasti->target_jmp = tp;
399
400 list_append($3, instruction(Op_cond_pair));
401 $3->lasti->line_range = $1->nexti;
402 $3->lasti->target_jmp = tp;
403 if (do_pretty_print) {
404 ($1->nexti + 1)->condpair_left = $1->lasti;
405 ($1->nexti + 1)->condpair_right = $3->lasti;
406 }
407 /* Put any comments in front of the range expression */
408 if ($2 != NULL)
409 $$ = list_append(list_merge(list_prepend($1, $2), $3), tp);
410 else
411 $$ = list_append(list_merge($1, $3), tp);
412 rule = Rule;
413 }
414 | LEX_BEGIN
415 {
416 static int begin_seen = 0;
417
418 if (do_lint_old && ++begin_seen == 2)
419 lintwarn_ln($1->source_line,
420 _("old awk does not support multiple `BEGIN' or `END' rules"));
421
422 $1->in_rule = rule = BEGIN;
423 $1->source_file = source;
424 $$ = $1;
425 }
426 | LEX_END
427 {
428 static int end_seen = 0;
429
430 if (do_lint_old && ++end_seen == 2)
431 lintwarn_ln($1->source_line,
432 _("old awk does not support multiple `BEGIN' or `END' rules"));
433
434 $1->in_rule = rule = END;
435 $1->source_file = source;
436 $$ = $1;
437 }
438 | LEX_BEGINFILE
439 {
440 $1->in_rule = rule = BEGINFILE;
441 $1->source_file = source;
442 $$ = $1;
443 }
444 | LEX_ENDFILE
445 {
446 $1->in_rule = rule = ENDFILE;
447 $1->source_file = source;
448 $$ = $1;
449 }
450 ;
451
452 action
453 : l_brace statements r_brace opt_semi opt_nls
454 {
455 INSTRUCTION *ip = make_braced_statements($1, $2, $3);
456
457 if ($3 != NULL && $5 != NULL) {
458 merge_comments($3, $5);
459 pending_comment = $3;
460 } else if ($3 != NULL) {
461 pending_comment = $3;
462 } else if ($5 != NULL) {
463 pending_comment = $5;
464 }
465
466 $$ = ip;
467 }
468 ;
469
470 func_name
471 : NAME
472 | FUNC_CALL
473 {
474 const char *name = $1->lextok;
475 char *qname = qualify_name(name, strlen(name));
476
477 if (qname != name) {
478 efree((void *)name);
479 $1->lextok = qname;
480 }
481 $$ = $1;
482 }
483 | lex_builtin
484 {
485 yyerror(_("`%s' is a built-in function, it cannot be redefined"),
486 tokstart);
487 YYABORT;
488 }
489 | '@' LEX_EVAL
490 {
491 $$ = $2;
492 at_seen = false;
493 }
494 ;
495
496 lex_builtin
497 : LEX_BUILTIN
498 | LEX_LENGTH
499 ;
500
501 function_prologue
502 : LEX_FUNCTION func_name '(' { want_param_names = FUNC_HEADER; } opt_param_list r_paren opt_nls
503 {
504 INSTRUCTION *func_comment = NULL;
505 // Merge any comments found in the parameter list with those
506 // following the function header, associate the whole shebang
507 // with the function as one block comment.
508 if ($5 != NULL && $5->comment != NULL) {
509 if ($7 != NULL) {
510 merge_comments($5->comment, $7);
511 }
512 func_comment = $5->comment;
513 } else if ($7 != NULL) {
514 func_comment = $7;
515 }
516
517 $1->source_file = source;
518 $1->comment = func_comment;
519 if (install_function($2->lextok, $1, $5) < 0)
520 YYABORT;
521 in_function = true;
522 $2->lextok = NULL;
523 bcfree($2);
524 /* $5 already free'd in install_function */
525 $$ = $1;
526 want_param_names = FUNC_BODY;
527 }
528 ;
529
530 regexp
531 /*
532 * In this rule, want_regexp tells yylex that the next thing
533 * is a regexp so it should read up to the closing slash.
534 */
535 : a_slash
536 { want_regexp = true; }
537 REGEXP /* The terminating '/' is consumed by yylex(). */
538 {
539 NODE *n, *exp;
540 char *re;
541 size_t len;
542
543 re = $3->lextok;
544 $3->lextok = NULL;
545 len = strlen(re);
546 if (do_lint) {
547 if (len == 0)
548 lintwarn_ln($3->source_line,
549 _("regexp constant `//' looks like a C++ comment, but is not"));
550 else if (re[0] == '*' && re[len-1] == '*')
551 /* possible C comment */
552 lintwarn_ln($3->source_line,
553 _("regexp constant `/%s/' looks like a C comment, but is not"), re);
554 }
555
556 exp = make_str_node(re, len, ALREADY_MALLOCED);
557 n = make_regnode(Node_regex, exp);
558 if (n == NULL) {
559 unref(exp);
560 YYABORT;
561 }
562 $$ = $3;
563 $$->opcode = Op_match_rec;
564 $$->memory = n;
565 }
566 ;
567
568 typed_regexp
569 : TYPED_REGEXP
570 {
571 char *re;
572 size_t len;
573
574 re = $1->lextok;
575 $1->lextok = NULL;
576 len = strlen(re);
577
578 $$ = $1;
579 $$->opcode = Op_push_re;
580 $$->memory = make_typed_regex(re, len);
581 }
582
583 a_slash
584 : '/'
585 { bcfree($1); }
586 | SLASH_BEFORE_EQUAL
587 ;
588
589 statements
590 : /* empty */
591 { $$ = NULL; }
592 | statements statement
593 {
594 if ($2 == NULL) {
595 $$ = $1;
596 } else {
597 add_lint($2, LINT_no_effect);
598 if ($1 == NULL) {
599 $$ = $2;
600 } else {
601 $$ = list_merge($1, $2);
602 }
603 }
604
605 if (trailing_comment != NULL) {
606 $$ = list_append($$, trailing_comment);
607 trailing_comment = NULL;
608 }
609
610 yyerrok;
611 }
612 | statements error
613 { $$ = NULL; }
614 ;
615
616 statement_term
617 : nls { $$ = $1; }
618 | semi opt_nls { $$ = $2; }
619 ;
620
621 statement
622 : semi opt_nls
623 {
624 if ($2 != NULL) {
625 INSTRUCTION *ip;
626
627 merge_comments($2, NULL);
628 ip = list_create(instruction(Op_no_op));
629 $$ = list_append(ip, $2);
630 } else
631 $$ = NULL;
632 }
633 | l_brace statements r_brace
634 {
635 trailing_comment = $3; // NULL or comment
636 $$ = make_braced_statements($1, $2, $3);
637 }
638 | if_statement
639 {
640 if (do_pretty_print)
641 $$ = list_prepend($1, instruction(Op_exec_count));
642 else
643 $$ = $1;
644 }
645 | LEX_SWITCH '(' exp r_paren opt_nls l_brace case_statements opt_nls r_brace
646 {
647 INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt;
648 INSTRUCTION *ip, *nextc, *tbreak;
649 const char **case_values = NULL;
650 int maxcount = 128;
651 int case_count = 0;
652 int i;
653
654 tbreak = instruction(Op_no_op);
655 cstmt = list_create(tbreak);
656 cexp = list_create(instruction(Op_pop));
657 dflt = instruction(Op_jmp);
658 dflt->target_jmp = tbreak; /* if no case match and no explicit default */
659
660 if ($7 != NULL) {
661 curr = $7->nexti;
662 bcfree($7); /* Op_list */
663 }
664 /* else
665 curr = NULL; */
666
667 for (; curr != NULL; curr = nextc) {
668 INSTRUCTION *caseexp = curr->case_exp;
669 INSTRUCTION *casestmt = curr->case_stmt;
670
671 nextc = curr->nexti;
672 if (curr->opcode == Op_K_case) {
673 if (caseexp->opcode == Op_push_i) {
674 /* a constant scalar */
675 char *caseval;
676 caseval = force_string(caseexp->memory)->stptr;
677 for (i = 0; i < case_count; i++) {
678 if (strcmp(caseval, case_values[i]) == 0)
679 error_ln(curr->source_line,
680 _("duplicate case values in switch body: %s"), caseval);
681 }
682
683 if (case_values == NULL)
684 emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement");
685 else if (case_count >= maxcount) {
686 maxcount += 128;
687 erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement");
688 }
689 case_values[case_count++] = caseval;
690 } else {
691 /* match a constant regex against switch expression. */
692 (curr + 1)->match_exp = true;
693 }
694 curr->stmt_start = casestmt->nexti;
695 curr->stmt_end = casestmt->lasti;
696 (void) list_prepend(cexp, curr);
697 (void) list_prepend(cexp, caseexp);
698 } else {
699 if (dflt->target_jmp != tbreak)
700 error_ln(curr->source_line,
701 _("duplicate `default' detected in switch body"));
702 else
703 dflt->target_jmp = casestmt->nexti;
704
705 if (do_pretty_print) {
706 curr->stmt_start = casestmt->nexti;
707 curr->stmt_end = casestmt->lasti;
708 (void) list_prepend(cexp, curr);
709 } else
710 bcfree(curr);
711 }
712
713 cstmt = list_merge(casestmt, cstmt);
714 }
715
716 if (case_values != NULL)
717 efree(case_values);
718
719 ip = $3;
720 if (do_pretty_print) {
721 // first merge comments
722 INSTRUCTION *head_comment = NULL;
723
724 if ($5 != NULL && $6 != NULL) {
725 merge_comments($5, $6);
726 head_comment = $5;
727 } else if ($5 != NULL)
728 head_comment = $5;
729 else
730 head_comment = $6;
731
732 $1->comment = head_comment;
733
734 (void) list_prepend(ip, $1);
735 (void) list_prepend(ip, instruction(Op_exec_count));
736 $1->target_break = tbreak;
737 ($1 + 1)->switch_start = cexp->nexti;
738 ($1 + 1)->switch_end = cexp->lasti;
739 ($1 + 1)->switch_end->comment = $9;
740 }
741 /* else
742 $1 is NULL */
743
744 (void) list_append(cexp, dflt);
745 (void) list_merge(ip, cexp);
746 if ($8 != NULL)
747 (void) list_append(cstmt, $8);
748 $$ = list_merge(ip, cstmt);
749
750 break_allowed--;
751 fix_break_continue(ip, tbreak, NULL);
752 }
753 | LEX_WHILE '(' exp r_paren opt_nls statement
754 {
755 /*
756 * -----------------
757 * tc:
758 * cond
759 * -----------------
760 * [Op_jmp_false tb ]
761 * -----------------
762 * body
763 * -----------------
764 * [Op_jmp tc ]
765 * tb:[Op_no_op ]
766 */
767
768 INSTRUCTION *ip, *tbreak, *tcont;
769
770 tbreak = instruction(Op_no_op);
771 add_lint($3, LINT_assign_in_cond);
772 tcont = $3->nexti;
773 ip = list_append($3, instruction(Op_jmp_false));
774 ip->lasti->target_jmp = tbreak;
775
776 if (do_pretty_print) {
777 (void) list_append(ip, instruction(Op_exec_count));
778 $1->target_break = tbreak;
779 $1->target_continue = tcont;
780 ($1 + 1)->while_body = ip->lasti;
781 (void) list_prepend(ip, $1);
782 }
783 /* else
784 $1 is NULL */
785
786 if ($5 != NULL) {
787 if ($6 == NULL)
788 $6 = list_create(instruction(Op_no_op));
789
790 $5->memory->comment_type = BLOCK_COMMENT;
791 $6 = list_prepend($6, $5);
792 }
793
794 if ($6 != NULL)
795 (void) list_merge(ip, $6);
796 (void) list_append(ip, instruction(Op_jmp));
797 ip->lasti->target_jmp = tcont;
798 $$ = list_append(ip, tbreak);
799
800 break_allowed--;
801 continue_allowed--;
802 fix_break_continue(ip, tbreak, tcont);
803 }
804 | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
805 {
806 /*
807 * -----------------
808 * z:
809 * body
810 * -----------------
811 * tc:
812 * cond
813 * -----------------
814 * [Op_jmp_true | z ]
815 * tb:[Op_no_op ]
816 */
817
818 INSTRUCTION *ip, *tbreak, *tcont;
819
820 tbreak = instruction(Op_no_op);
821 tcont = $6->nexti;
822 add_lint($6, LINT_assign_in_cond);
823 if ($3 != NULL)
824 ip = list_merge($3, $6);
825 else
826 ip = list_prepend($6, instruction(Op_no_op));
827
828 if ($2 != NULL)
829 (void) list_prepend(ip, $2);
830
831 if (do_pretty_print)
832 (void) list_prepend(ip, instruction(Op_exec_count));
833
834 (void) list_append(ip, instruction(Op_jmp_true));
835 ip->lasti->target_jmp = ip->nexti;
836 $$ = list_append(ip, tbreak);
837
838 break_allowed--;
839 continue_allowed--;
840 fix_break_continue(ip, tbreak, tcont);
841
842 if (do_pretty_print) {
843 $1->target_break = tbreak;
844 $1->target_continue = tcont;
845 ($1 + 1)->doloop_cond = tcont;
846 $$ = list_prepend(ip, $1);
847 bcfree($4);
848 if ($8 != NULL)
849 $1->comment = $8;
850 }
851 /* else
852 $1 and $4 are NULLs */
853 }
854 | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement
855 {
856 INSTRUCTION *ip;
857 char *var_name = $3->lextok;
858
859 if ($8 != NULL
860 && $8->lasti->opcode == Op_K_delete
861 && $8->lasti->expr_count == 1
862 && $8->nexti->opcode == Op_push
863 && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update))
864 && strcmp($8->nexti->memory->vname, var_name) == 0
865 ) {
866
867 /*
868 * Efficiency hack. Recognize the special case of
869 *
870 * for (iggy in foo)
871 * delete foo[iggy]
872 *
873 * and treat it as if it were
874 *
875 * delete foo
876 *
877 * Check that the body is a `delete a[i]' statement,
878 * and that both the loop var and array names match.
879 */
880 NODE *arr = NULL;
881
882 ip = $8->nexti->nexti;
883 if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti)
884 arr = $5->nexti->memory;
885 if (arr != NULL
886 && ip->opcode == Op_no_op
887 && ip->nexti->opcode == Op_push_array
888 && strcmp(ip->nexti->memory->vname, arr->vname) == 0
889 && ip->nexti->nexti == $8->lasti
890 ) {
891 (void) make_assignable($8->nexti);
892 $8->lasti->opcode = Op_K_delete_loop;
893 $8->lasti->expr_count = 0;
894 if ($1 != NULL)
895 bcfree($1);
896 efree(var_name);
897 bcfree($3);
898 bcfree($4);
899 bcfree($5);
900 if ($7 != NULL) {
901 merge_comments($7, NULL);
902 $8 = list_prepend($8, $7);
903 }
904 $$ = $8;
905 } else
906 goto regular_loop;
907 } else {
908 INSTRUCTION *tbreak, *tcont;
909
910 /* [ Op_push_array a ]
911 * [ Op_arrayfor_init | ib ]
912 * ic:[ Op_arrayfor_incr | ib ]
913 * [ Op_var_assign if any ]
914 *
915 * body
916 *
917 * [Op_jmp | ic ]
918 * ib:[Op_arrayfor_final ]
919 */
920 regular_loop:
921 ip = $5;
922 ip->nexti->opcode = Op_push_array;
923
924 tbreak = instruction(Op_arrayfor_final);
925 $4->opcode = Op_arrayfor_incr;
926 $4->array_var = variable($3->source_line, var_name, Node_var);
927 $4->target_jmp = tbreak;
928 tcont = $4;
929 $3->opcode = Op_arrayfor_init;
930 $3->target_jmp = tbreak;
931 (void) list_append(ip, $3);
932
933 if (do_pretty_print) {
934 $1->opcode = Op_K_arrayfor;
935 $1->target_continue = tcont;
936 $1->target_break = tbreak;
937 (void) list_append(ip, $1);
938 }
939 /* else
940 $1 is NULL */
941
942 /* add update_FOO instruction if necessary */
943 if ($4->array_var->type == Node_var && $4->array_var->var_update) {
944 (void) list_append(ip, instruction(Op_var_update));
945 ip->lasti->update_var = $4->array_var->var_update;
946 }
947 (void) list_append(ip, $4);
948
949 /* add set_FOO instruction if necessary */
950 if ($4->array_var->type == Node_var && $4->array_var->var_assign) {
951 (void) list_append(ip, instruction(Op_var_assign));
952 ip->lasti->assign_var = $4->array_var->var_assign;
953 }
954
955 if (do_pretty_print) {
956 (void) list_append(ip, instruction(Op_exec_count));
957 ($1 + 1)->forloop_cond = $4;
958 ($1 + 1)->forloop_body = ip->lasti;
959 }
960
961 if ($7 != NULL)
962 merge_comments($7, NULL);
963
964 if ($8 != NULL) {
965 if ($7 != NULL)
966 $8 = list_prepend($8, $7);
967 (void) list_merge(ip, $8);
968 } else if ($7 != NULL)
969 (void) list_append(ip, $7);
970
971 (void) list_append(ip, instruction(Op_jmp));
972 ip->lasti->target_jmp = $4;
973 $$ = list_append(ip, tbreak);
974 fix_break_continue(ip, tbreak, tcont);
975 }
976
977 break_allowed--;
978 continue_allowed--;
979 }
980 | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
981 {
982 if ($5 != NULL) {
983 merge_comments($5, NULL);
984 $1->comment = $5;
985 }
986 if ($8 != NULL) {
987 merge_comments($8, NULL);
988 if ($1->comment == NULL) {
989 $8->memory->comment_type = FOR_COMMENT;
990 $1->comment = $8;
991 } else
992 $1->comment->comment = $8;
993 }
994 if ($11 != NULL)
995 $12 = list_prepend($12, $11);
996 add_lint($6, LINT_assign_in_cond);
997 $$ = mk_for_loop($1, $3, $6, $9, $12);
998
999 break_allowed--;
1000 continue_allowed--;
1001 }
1002 | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
1003 {
1004 if ($5 != NULL) {
1005 merge_comments($5, NULL);
1006 $1->comment = $5;
1007 }
1008 if ($7 != NULL) {
1009 merge_comments($7, NULL);
1010 if ($1->comment == NULL) {
1011 $7->memory->comment_type = FOR_COMMENT;
1012 $1->comment = $7;
1013 } else
1014 $1->comment->comment = $7;
1015 }
1016 if ($10 != NULL)
1017 $11 = list_prepend($11, $10);
1018 $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11);
1019
1020 break_allowed--;
1021 continue_allowed--;
1022 }
1023 | non_compound_stmt
1024 {
1025 if (do_pretty_print)
1026 $$ = list_prepend($1, instruction(Op_exec_count));
1027 else
1028 $$ = $1;
1029 }
1030 ;
1031
1032 non_compound_stmt
1033 : LEX_BREAK statement_term
1034 {
1035 if (! break_allowed)
1036 error_ln($1->source_line,
1037 _("`break' is not allowed outside a loop or switch"));
1038 $1->target_jmp = NULL;
1039 $$ = list_create($1);
1040 if ($2 != NULL)
1041 $$ = list_append($$, $2);
1042 }
1043 | LEX_CONTINUE statement_term
1044 {
1045 if (! continue_allowed)
1046 error_ln($1->source_line,
1047 _("`continue' is not allowed outside a loop"));
1048 $1->target_jmp = NULL;
1049 $$ = list_create($1);
1050 if ($2 != NULL)
1051 $$ = list_append($$, $2);
1052 }
1053 | LEX_NEXT statement_term
1054 {
1055 /* if inside function (rule = 0), resolve context at run-time */
1056 if (rule && rule != Rule)
1057 error_ln($1->source_line,
1058 _("`next' used in %s action"), ruletab[rule]);
1059 $1->target_jmp = ip_rec;
1060 $$ = list_create($1);
1061 if ($2 != NULL)
1062 $$ = list_append($$, $2);
1063 }
1064 | LEX_NEXTFILE statement_term
1065 {
1066 /* if inside function (rule = 0), resolve context at run-time */
1067 if (rule == BEGIN || rule == END || rule == ENDFILE)
1068 error_ln($1->source_line,
1069 _("`nextfile' used in %s action"), ruletab[rule]);
1070
1071 $1->target_newfile = ip_newfile;
1072 $1->target_endfile = ip_endfile;
1073 $$ = list_create($1);
1074 if ($2 != NULL)
1075 $$ = list_append($$, $2);
1076 }
1077 | LEX_EXIT opt_exp statement_term
1078 {
1079 /* Initialize the two possible jump targets, the actual target
1080 * is resolved at run-time.
1081 */
1082 $1->target_end = ip_end; /* first instruction in end_block */
1083 $1->target_atexit = ip_atexit; /* cleanup and go home */
1084
1085 if ($2 == NULL) {
1086 $$ = list_create($1);
1087 (void) list_prepend($$, instruction(Op_push_i));
1088 $$->nexti->memory = dupnode(Nnull_string);
1089 } else
1090 $$ = list_append($2, $1);
1091 if ($3 != NULL)
1092 $$ = list_append($$, $3);
1093 }
1094 | LEX_RETURN
1095 {
1096 if (! in_function)
1097 yyerror(_("`return' used outside function context"));
1098 } opt_fcall_exp statement_term {
1099 if (called_from_eval)
1100 $1->opcode = Op_K_return_from_eval;
1101
1102 if ($3 == NULL) {
1103 $$ = list_create($1);
1104 (void) list_prepend($$, instruction(Op_push_i));
1105 $$->nexti->memory = dupnode(Nnull_string);
1106 } else
1107 $$ = list_append($3, $1);
1108 if ($4 != NULL)
1109 $$ = list_append($$, $4);
1110 }
1111 | simple_stmt statement_term
1112 {
1113 if ($2 != NULL)
1114 $$ = list_append($1, $2);
1115 else
1116 $$ = $1;
1117 }
1118 ;
1119
1120 /*
1121 * A simple_stmt exists to satisfy a constraint in the POSIX
1122 * grammar allowing them to occur as the 1st and 3rd parts
1123 * in a `for (...;...;...)' loop. This is a historical oddity
1124 * inherited from Unix awk, not at all documented in the AK&W
1125 * awk book. We support it, as this was reported as a bug.
1126 * We don't bother to document it though. So there.
1127 */
1128 simple_stmt
1129 : print { in_print = true; in_parens = 0; } print_expression_list output_redir
1130 {
1131 /*
1132 * Optimization: plain `print' has no expression list, so $3 is null.
1133 * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec,
1134 * which is faster for these two cases.
1135 */
1136
1137 if (do_optimize && $1->opcode == Op_K_print &&
1138 ($3 == NULL
1139 || ($3->lasti->opcode == Op_field_spec
1140 && $3->nexti->nexti->nexti == $3->lasti
1141 && $3->nexti->nexti->opcode == Op_push_i
1142 && $3->nexti->nexti->memory->type == Node_val)
1143 )
1144 ) {
1145 static bool warned = false;
1146 /* -----------------
1147 * output_redir
1148 * [ redirect exp ]
1149 * -----------------
1150 * expression_list
1151 * ------------------
1152 * [Op_K_print_rec | NULL | redir_type | expr_count]
1153 */
1154
1155 if ($3 != NULL) {
1156 NODE *n = $3->nexti->nexti->memory;
1157
1158 if ((n->flags & (STRING|STRCUR)) != 0 || ! is_zero(n))
1159 goto regular_print;
1160
1161 bcfree($3->lasti); /* Op_field_spec */
1162 unref(n); /* Node_val */
1163 bcfree($3->nexti->nexti); /* Op_push_i */
1164 bcfree($3->nexti); /* Op_list */
1165 bcfree($3); /* Op_list */
1166 } else {
1167 if (do_lint && (rule == BEGIN || rule == END) && ! warned) {
1168 warned = true;
1169 lintwarn_ln($1->source_line,
1170 _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
1171 }
1172 }
1173
1174 $1->expr_count = 0;
1175 $1->opcode = Op_K_print_rec;
1176 if ($4 == NULL) { /* no redircetion */
1177 $1->redir_type = redirect_none;
1178 $$ = list_create($1);
1179 } else {
1180 INSTRUCTION *ip;
1181 ip = $4->nexti;
1182 $1->redir_type = ip->redir_type;
1183 $4->nexti = ip->nexti;
1184 bcfree(ip);
1185 $$ = list_append($4, $1);
1186 }
1187 } else {
1188 /* -----------------
1189 * [ output_redir ]
1190 * [ redirect exp ]
1191 * -----------------
1192 * [ expression_list ]
1193 * ------------------
1194 * [$1 | NULL | redir_type | expr_count]
1195 *
1196 */
1197 regular_print:
1198 if ($4 == NULL) { /* no redirection */
1199 if ($3 == NULL) { /* print/printf without arg */
1200 $1->expr_count = 0;
1201 if ($1->opcode == Op_K_print)
1202 $1->opcode = Op_K_print_rec;
1203 $1->redir_type = redirect_none;
1204 $$ = list_create($1);
1205 } else {
1206 INSTRUCTION *t = $3;
1207 $1->expr_count = count_expressions(&t, false);
1208 $1->redir_type = redirect_none;
1209 $$ = list_append(t, $1);
1210 }
1211 } else {
1212 INSTRUCTION *ip;
1213 ip = $4->nexti;
1214 $1->redir_type = ip->redir_type;
1215 $4->nexti = ip->nexti;
1216 bcfree(ip);
1217 if ($3 == NULL) {
1218 $1->expr_count = 0;
1219 if ($1->opcode == Op_K_print)
1220 $1->opcode = Op_K_print_rec;
1221 $$ = list_append($4, $1);
1222 } else {
1223 INSTRUCTION *t = $3;
1224 $1->expr_count = count_expressions(&t, false);
1225 $$ = list_append(list_merge($4, t), $1);
1226 }
1227 }
1228 }
1229 }
1230
1231 | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list
1232 {
1233 char *arr = $2->lextok;
1234
1235 $2->opcode = Op_push_array;
1236 $2->memory = variable($2->source_line, arr, Node_var_new);
1237
1238 if (! do_posix && ! do_traditional) {
1239 if ($2->memory == symbol_table)
1240 fatal(_("`delete' is not allowed with SYMTAB"));
1241 else if ($2->memory == func_table)
1242 fatal(_("`delete' is not allowed with FUNCTAB"));
1243 }
1244
1245 if ($4 == NULL) {
1246 /*
1247 * As of September 2012, POSIX has added support
1248 * for `delete array'. See:
1249 * http://austingroupbugs.net/view.php?id=544
1250 *
1251 * Thanks to Nathan Weeks for the initiative.
1252 *
1253 * Thus we no longer warn or check do_posix.
1254 * Also, since BWK awk supports it, we don't have to
1255 * check do_traditional either.
1256 */
1257 $1->expr_count = 0;
1258 $$ = list_append(list_create($2), $1);
1259 } else {
1260 $1->expr_count = sub_counter;
1261 $$ = list_append(list_append($4, $2), $1);
1262 }
1263 }
1264 | LEX_DELETE '(' NAME ')'
1265 /*
1266 * this is for tawk compatibility. maybe the warnings
1267 * should always be done.
1268 */
1269 {
1270 static bool warned = false;
1271 char *arr = $3->lextok;
1272
1273 if (do_lint && ! warned) {
1274 warned = true;
1275 lintwarn_ln($1->source_line,
1276 _("`delete(array)' is a non-portable tawk extension"));
1277 }
1278 if (do_traditional) {
1279 error_ln($1->source_line,
1280 _("`delete(array)' is a non-portable tawk extension"));
1281 }
1282 $3->memory = variable($3->source_line, arr, Node_var_new);
1283 $3->opcode = Op_push_array;
1284 $1->expr_count = 0;
1285 $$ = list_append(list_create($3), $1);
1286
1287 if (! do_posix && ! do_traditional) {
1288 if ($3->memory == symbol_table)
1289 fatal(_("`delete' is not allowed with SYMTAB"));
1290 else if ($3->memory == func_table)
1291 fatal(_("`delete' is not allowed with FUNCTAB"));
1292 }
1293 }
1294 | exp
1295 {
1296 $$ = optimize_assignment($1);
1297 }
1298 ;
1299
1300 opt_simple_stmt
1301 : /* empty */
1302 { $$ = NULL; }
1303 | simple_stmt
1304 { $$ = $1; }
1305 ;
1306
1307 case_statements
1308 : /* empty */
1309 { $$ = NULL; }
1310 | case_statements case_statement
1311 {
1312 if ($1 == NULL)
1313 $$ = list_create($2);
1314 else
1315 $$ = list_prepend($1, $2);
1316 }
1317 | case_statements error
1318 { $$ = NULL; }
1319 ;
1320
1321 case_statement
1322 : LEX_CASE case_value colon opt_nls statements
1323 {
1324 INSTRUCTION *casestmt = $5;
1325 if ($5 == NULL)
1326 casestmt = list_create(instruction(Op_no_op));
1327 if (do_pretty_print)
1328 (void) list_prepend(casestmt, instruction(Op_exec_count));
1329 $1->case_exp = $2;
1330 $1->case_stmt = casestmt;
1331 $1->comment = $4;
1332 bcfree($3);
1333 $$ = $1;
1334 }
1335 | LEX_DEFAULT colon opt_nls statements
1336 {
1337 INSTRUCTION *casestmt = $4;
1338 if ($4 == NULL)
1339 casestmt = list_create(instruction(Op_no_op));
1340 if (do_pretty_print)
1341 (void) list_prepend(casestmt, instruction(Op_exec_count));
1342 bcfree($2);
1343 $1->case_stmt = casestmt;
1344 $1->comment = $3;
1345 $$ = $1;
1346 }
1347 ;
1348
1349 case_value
1350 : YNUMBER
1351 { $$ = $1; }
1352 | '-' YNUMBER %prec UNARY
1353 {
1354 NODE *n = $2->memory;
1355 (void) force_number(n);
1356 negate_num(n);
1357 bcfree($1);
1358 $$ = $2;
1359 }
1360 | '+' YNUMBER %prec UNARY
1361 {
1362 NODE *n = $2->lasti->memory;
1363 bcfree($1);
1364 add_sign_to_num(n, '+');
1365 $$ = $2;
1366 }
1367 | YSTRING
1368 { $$ = $1; }
1369 | regexp
1370 {
1371 if ($1->memory->type == Node_regex)
1372 $1->opcode = Op_push_re;
1373 else
1374 $1->opcode = Op_push;
1375 $$ = $1;
1376 }
1377 | typed_regexp
1378 {
1379 assert(($1->memory->flags & REGEX) == REGEX);
1380 $1->opcode = Op_push_re;
1381 $$ = $1;
1382 }
1383 ;
1384
1385 print
1386 : LEX_PRINT
1387 { $$ = $1; }
1388 | LEX_PRINTF
1389 { $$ = $1; }
1390 ;
1391
1392 /*
1393 * Note: ``print(x)'' is already parsed by the first rule,
1394 * so there is no good in covering it by the second one too.
1395 */
1396 print_expression_list
1397 : opt_expression_list
1398 | '(' expression_list r_paren
1399 {
1400 $$ = $2;
1401 }
1402 ;
1403
1404 output_redir
1405 : /* empty */
1406 {
1407 in_print = false;
1408 in_parens = 0;
1409 $$ = NULL;
1410 }
1411 | IO_OUT { in_print = false; in_parens = 0; } common_exp
1412 {
1413 if ($1->redir_type == redirect_twoway
1414 && $3->lasti->opcode == Op_K_getline_redir
1415 && $3->lasti->redir_type == redirect_twoway)
1416 yyerror(_("multistage two-way pipelines don't work"));
1417 if (do_lint && $1->redir_type == redirect_output && $3->lasti->opcode == Op_concat)
1418 lintwarn(_("concatenation as I/O `>' redirection target is ambiguous"));
1419 $$ = list_prepend($3, $1);
1420 }
1421 ;
1422
1423 if_statement
1424 : LEX_IF '(' exp r_paren opt_nls statement
1425 {
1426 if ($5 != NULL)
1427 $1->comment = $5;
1428 add_lint($3, LINT_assign_in_cond);
1429 $$ = mk_condition($3, $1, $6, NULL, NULL);
1430 }
1431 | LEX_IF '(' exp r_paren opt_nls statement
1432 LEX_ELSE opt_nls statement
1433 {
1434 if ($5 != NULL)
1435 $1->comment = $5;
1436 if ($8 != NULL)
1437 $7->comment = $8;
1438 add_lint($3, LINT_assign_in_cond);
1439 $$ = mk_condition($3, $1, $6, $7, $9);
1440 }
1441 ;
1442
1443 nls
1444 : NEWLINE
1445 {
1446 $$ = $1;
1447 }
1448 | nls NEWLINE
1449 {
1450 if ($1 != NULL && $2 != NULL) {
1451 if ($1->memory->comment_type == EOL_COMMENT) {
1452 assert($2->memory->comment_type == BLOCK_COMMENT);
1453 $1->comment = $2; // chain them
1454 } else {
1455 merge_comments($1, $2);
1456 }
1457
1458 $$ = $1;
1459 } else if ($1 != NULL) {
1460 $$ = $1;
1461 } else if ($2 != NULL) {
1462 $$ = $2;
1463 } else
1464 $$ = NULL;
1465 }
1466 ;
1467
1468 opt_nls
1469 : /* empty */
1470 { $$ = NULL; }
1471 | nls
1472 { $$ = $1; }
1473 ;
1474
1475 input_redir
1476 : /* empty */
1477 { $$ = NULL; }
1478 | '<' simp_exp
1479 {
1480 bcfree($1);
1481 $$ = $2;
1482 }
1483 ;
1484
1485 opt_param_list
1486 : /* empty */
1487 { $$ = NULL; }
1488 | param_list
1489 { $$ = $1; }
1490 ;
1491
1492 param_list
1493 : NAME
1494 {
1495 $1->param_count = 0;
1496 $$ = list_create($1);
1497 }
1498 | param_list comma NAME
1499 {
1500 if ($1 != NULL && $3 != NULL) {
1501 $3->param_count = $1->lasti->param_count + 1;
1502 $$ = list_append($1, $3);
1503 yyerrok;
1504
1505 // newlines are allowed after commas, catch any comments
1506 if ($2 != NULL) {
1507 if ($1->comment != NULL)
1508 merge_comments($1->comment, $2);
1509 else
1510 $1->comment = $2;
1511 }
1512 } else
1513 $$ = NULL;
1514 }
1515 | error
1516 { $$ = NULL; }
1517 | param_list error
1518 { $$ = $1; }
1519 | param_list comma error
1520 { $$ = $1; }
1521 ;
1522
1523 /* optional expression, as in for loop */
1524 opt_exp
1525 : /* empty */
1526 { $$ = NULL; }
1527 | exp
1528 { $$ = $1; }
1529 ;
1530
1531 opt_expression_list
1532 : /* empty */
1533 { $$ = NULL; }
1534 | expression_list
1535 { $$ = $1; }
1536 ;
1537
1538 expression_list
1539 : exp
1540 { $$ = mk_expression_list(NULL, $1); }
1541 | expression_list comma exp
1542 {
1543 if ($2 != NULL)
1544 $1->lasti->comment = $2;
1545 $$ = mk_expression_list($1, $3);
1546 yyerrok;
1547 }
1548 | error
1549 { $$ = NULL; }
1550 | expression_list error
1551 {
1552 /*
1553 * Returning the expression list instead of NULL lets
1554 * snode get a list of arguments that it can count.
1555 */
1556 $$ = $1;
1557 }
1558 | expression_list error exp
1559 {
1560 /* Ditto */
1561 $$ = mk_expression_list($1, $3);
1562 }
1563 | expression_list comma error
1564 {
1565 /* Ditto */
1566 if ($2 != NULL)
1567 $1->lasti->comment = $2;
1568 $$ = $1;
1569 }
1570 ;
1571
1572 opt_fcall_expression_list
1573 : /* empty */
1574 { $$ = NULL; }
1575 | fcall_expression_list
1576 { $$ = $1; }
1577 ;
1578
1579 fcall_expression_list
1580 : fcall_exp
1581 { $$ = mk_expression_list(NULL, $1); }
1582 | fcall_expression_list comma fcall_exp
1583 {
1584 if ($2 != NULL)
1585 $1->lasti->comment = $2;
1586 $$ = mk_expression_list($1, $3);
1587 yyerrok;
1588 }
1589 | error
1590 { $$ = NULL; }
1591 | fcall_expression_list error
1592 {
1593 /*
1594 * Returning the expression list instead of NULL lets
1595 * snode get a list of arguments that it can count.
1596 */
1597 $$ = $1;
1598 }
1599 | fcall_expression_list error fcall_exp
1600 {
1601 /* Ditto */
1602 $$ = mk_expression_list($1, $3);
1603 }
1604 | fcall_expression_list comma error
1605 {
1606 /* Ditto */
1607 if ($2 != NULL)
1608 $1->comment = $2;
1609 $$ = $1;
1610 }
1611 ;
1612
1613 fcall_exp
1614 : exp { $$ = $1; }
1615 | typed_regexp { $$ = list_create($1); }
1616 ;
1617
1618 opt_fcall_exp
1619 : /* empty */
1620 { $$ = NULL; }
1621 | fcall_exp { $$ = $1; }
1622 ;
1623
1624 /* Expressions, not including the comma operator. */
1625 exp
1626 : variable assign_operator exp %prec ASSIGNOP
1627 {
1628 if (do_lint && $3->lasti->opcode == Op_match_rec)
1629 lintwarn_ln($2->source_line,
1630 _("regular expression on right of assignment"));
1631 $$ = mk_assignment($1, $3, $2);
1632 }
1633 | variable ASSIGN typed_regexp %prec ASSIGNOP
1634 {
1635 $$ = mk_assignment($1, list_create($3), $2);
1636 }
1637 | exp LEX_AND exp
1638 { $$ = mk_boolean($1, $3, $2); }
1639 | exp LEX_OR exp
1640 { $$ = mk_boolean($1, $3, $2); }
1641 | exp MATCHOP typed_regexp
1642 {
1643 if ($1->lasti->opcode == Op_match_rec)
1644 warning_ln($2->source_line,
1645 _("regular expression on left of `~' or `!~' operator"));
1646
1647 assert($3->opcode == Op_push_re
1648 && ($3->memory->flags & REGEX) != 0);
1649 /* RHS is @/.../ */
1650 $2->memory = $3->memory;
1651 bcfree($3);
1652 $$ = list_append($1, $2);
1653 }
1654 | exp MATCHOP exp
1655 {
1656 if ($1->lasti->opcode == Op_match_rec)
1657 warning_ln($2->source_line,
1658 _("regular expression on left of `~' or `!~' operator"));
1659
1660 if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) {
1661 /* RHS is /.../ */
1662 $2->memory = $3->nexti->memory;
1663 bcfree($3->nexti); /* Op_match_rec */
1664 bcfree($3); /* Op_list */
1665 $$ = list_append($1, $2);
1666 } else {
1667 $2->memory = make_regnode(Node_dynregex, NULL);
1668 $$ = list_append(list_merge($1, $3), $2);
1669 }
1670 }
1671 | exp LEX_IN simple_variable
1672 {
1673 if (do_lint_old)
1674 lintwarn_ln($2->source_line,
1675 _("old awk does not support the keyword `in' except after `for'"));
1676 $3->nexti->opcode = Op_push_array;
1677 $2->opcode = Op_in_array;
1678 $2->expr_count = 1;
1679 $$ = list_append(list_merge($1, $3), $2);
1680 }
1681 | exp a_relop exp %prec RELOP
1682 {
1683 if (do_lint && $3->lasti->opcode == Op_match_rec)
1684 lintwarn_ln($2->source_line,
1685 _("regular expression on right of comparison"));
1686 $$ = list_append(list_merge($1, $3), $2);
1687 }
1688 | exp '?' exp ':' exp
1689 { $$ = mk_condition($1, $2, $3, $4, $5); }
1690 | common_exp
1691 { $$ = $1; }
1692 ;
1693
1694 assign_operator
1695 : ASSIGN
1696 { $$ = $1; }
1697 | ASSIGNOP
1698 { $$ = $1; }
1699 | SLASH_BEFORE_EQUAL ASSIGN /* `/=' */
1700 {
1701 $2->opcode = Op_assign_quotient;
1702 $$ = $2;
1703 }
1704 ;
1705
1706 relop_or_less
1707 : RELOP
1708 { $$ = $1; }
1709 | '<'
1710 { $$ = $1; }
1711 ;
1712
1713 a_relop
1714 : relop_or_less
1715 { $$ = $1; }
1716 | '>'
1717 { $$ = $1; }
1718 ;
1719
1720 common_exp
1721 : simp_exp
1722 { $$ = $1; }
1723 | simp_exp_nc
1724 { $$ = $1; }
1725 | common_exp simp_exp %prec CONCAT_OP
1726 {
1727 int count = 2;
1728 bool is_simple_var = false;
1729
1730 if ($1->lasti->opcode == Op_concat) {
1731 /* multiple (> 2) adjacent strings optimization */
1732 is_simple_var = ($1->lasti->concat_flag & CSVAR) != 0;
1733 count = $1->lasti->expr_count + 1;
1734 $1->lasti->opcode = Op_no_op;
1735 } else {
1736 is_simple_var = ($1->nexti->opcode == Op_push
1737 && $1->lasti == $1->nexti); /* first exp. is a simple
1738 * variable?; kludge for use
1739 * in Op_assign_concat.
1740 */
1741 }
1742
1743 if (do_optimize
1744 && $1->nexti == $1->lasti && $1->nexti->opcode == Op_push_i
1745 && $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i
1746 ) {
1747 NODE *n1 = $1->nexti->memory;
1748 NODE *n2 = $2->nexti->memory;
1749 size_t nlen;
1750
1751 // 1.5 "" # can't fold this if program mucks with CONVFMT.
1752 // See test #12 in test/posix.awk.
1753 // Also can't fold if one or the other is translatable.
1754 if ((n1->flags & (NUMBER|NUMINT|INTLSTR)) != 0 || (n2->flags & (NUMBER|NUMINT|INTLSTR)) != 0)
1755 goto plain_concat;
1756
1757 n1 = force_string(n1);
1758 n2 = force_string(n2);
1759 nlen = n1->stlen + n2->stlen;
1760 erealloc(n1->stptr, char *, nlen + 1, "constant fold");
1761 memcpy(n1->stptr + n1->stlen, n2->stptr, n2->stlen);
1762 n1->stlen = nlen;
1763 n1->stptr[nlen] = '\0';
1764 n1->flags &= ~(NUMCUR|NUMBER|NUMINT);
1765 n1->flags |= (STRING|STRCUR);
1766 unref(n2);
1767 bcfree($2->nexti);
1768 bcfree($2);
1769 $$ = $1;
1770 } else {
1771 plain_concat:
1772 $$ = list_append(list_merge($1, $2), instruction(Op_concat));
1773 $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0);
1774 $$->lasti->expr_count = count;
1775 if (count > max_args)
1776 max_args = count;
1777 }
1778 }
1779 ;
1780
1781 simp_exp
1782 : non_post_simp_exp
1783 /* Binary operators in order of decreasing precedence. */
1784 | simp_exp '^' simp_exp
1785 { $$ = mk_binary($1, $3, $2); }
1786 | simp_exp '*' simp_exp
1787 { $$ = mk_binary($1, $3, $2); }
1788 | simp_exp '/' simp_exp
1789 { $$ = mk_binary($1, $3, $2); }
1790 | simp_exp '%' simp_exp
1791 { $$ = mk_binary($1, $3, $2); }
1792 | simp_exp '+' simp_exp
1793 { $$ = mk_binary($1, $3, $2); }
1794 | simp_exp '-' simp_exp
1795 { $$ = mk_binary($1, $3, $2); }
1796 | LEX_GETLINE opt_variable input_redir
1797 {
1798 /*
1799 * In BEGINFILE/ENDFILE, allow `getline [var] < file'
1800 */
1801
1802 if ((rule == BEGINFILE || rule == ENDFILE) && $3 == NULL)
1803 error_ln($1->source_line,
1804 _("non-redirected `getline' invalid inside `%s' rule"), ruletab[rule]);
1805 if (do_lint && rule == END && $3 == NULL)
1806 lintwarn_ln($1->source_line,
1807 _("non-redirected `getline' undefined inside END action"));
1808 $$ = mk_getline($1, $2, $3, redirect_input);
1809 }
1810 | variable INCREMENT
1811 {
1812 $2->opcode = Op_postincrement;
1813 $$ = mk_assignment($1, NULL, $2);
1814 }
1815 | variable DECREMENT
1816 {
1817 $2->opcode = Op_postdecrement;
1818 $$ = mk_assignment($1, NULL, $2);
1819 }
1820 | '(' expression_list r_paren LEX_IN simple_variable
1821 {
1822 if (do_lint_old) {
1823 /* first one is warning so that second one comes out if warnings are fatal */
1824 warning_ln($4->source_line,
1825 _("old awk does not support the keyword `in' except after `for'"));
1826 lintwarn_ln($4->source_line,
1827 _("old awk does not support multidimensional arrays"));
1828 }
1829 $5->nexti->opcode = Op_push_array;
1830 $4->opcode = Op_in_array;
1831 if ($2 == NULL) { /* error */
1832 errcount++;
1833 $4->expr_count = 0;
1834 $$ = list_merge($5, $4);
1835 } else {
1836 INSTRUCTION *t = $2;
1837 $4->expr_count = count_expressions(&t, false);
1838 $$ = list_append(list_merge(t, $5), $4);
1839 }
1840 }
1841 ;
1842
1843 /* Expressions containing "| getline" lose the ability to be on the
1844 right-hand side of a concatenation. */
1845 simp_exp_nc
1846 : common_exp IO_IN LEX_GETLINE opt_variable
1847 {
1848 $$ = mk_getline($3, $4, $1, $2->redir_type);
1849 bcfree($2);
1850 }
1851 /* Binary operators in order of decreasing precedence. */
1852 | simp_exp_nc '^' simp_exp
1853 { $$ = mk_binary($1, $3, $2); }
1854 | simp_exp_nc '*' simp_exp
1855 { $$ = mk_binary($1, $3, $2); }
1856 | simp_exp_nc '/' simp_exp
1857 { $$ = mk_binary($1, $3, $2); }
1858 | simp_exp_nc '%' simp_exp
1859 { $$ = mk_binary($1, $3, $2); }
1860 | simp_exp_nc '+' simp_exp
1861 { $$ = mk_binary($1, $3, $2); }
1862 | simp_exp_nc '-' simp_exp
1863 { $$ = mk_binary($1, $3, $2); }
1864 ;
1865
1866 non_post_simp_exp
1867 : regexp
1868 {
1869 $$ = list_create($1);
1870 }
1871 | '!' simp_exp %prec UNARY
1872 {
1873 if ($2->opcode == Op_match_rec) {
1874 $2->opcode = Op_nomatch;
1875 $1->opcode = Op_push_i;
1876 $1->memory = set_profile_text(make_number(0.0), "0", 1);
1877 $$ = list_append(list_append(list_create($1),
1878 instruction(Op_field_spec)), $2);
1879 } else {
1880 if (do_optimize && $2->nexti == $2->lasti
1881 && $2->nexti->opcode == Op_push_i
1882 && ($2->nexti->memory->flags & (MPFN|MPZN|INTLSTR)) == 0
1883 ) {
1884 NODE *n = $2->nexti->memory;
1885 if ((n->flags & STRING) != 0) {
1886 n->numbr = (AWKNUM) (n->stlen == 0);
1887 n->flags &= ~(STRCUR|STRING);
1888 n->flags |= (NUMCUR|NUMBER);
1889 efree(n->stptr);
1890 n->stptr = NULL;
1891 n->stlen = 0;
1892 } else
1893 n->numbr = (AWKNUM) (n->numbr == 0.0);
1894 bcfree($1);
1895 $$ = $2;
1896 } else {
1897 $1->opcode = Op_not;
1898 add_lint($2, LINT_assign_in_cond);
1899 $$ = list_append($2, $1);
1900 }
1901 }
1902 }
1903 | '(' exp r_paren
1904 {
1905 // Always include. Allows us to lint warn on
1906 // print "foo" > "bar" 1
1907 // but not warn on
1908 // print "foo" > ("bar" 1)
1909 $$ = list_append($2, bcalloc(Op_parens, 1, sourceline));
1910 }
1911 | LEX_BUILTIN '(' opt_fcall_expression_list r_paren
1912 {
1913 $$ = snode($3, $1);
1914 if ($$ == NULL)
1915 YYABORT;
1916 }
1917 | LEX_LENGTH '(' opt_fcall_expression_list r_paren
1918 {
1919 $$ = snode($3, $1);
1920 if ($$ == NULL)
1921 YYABORT;
1922 }
1923 | LEX_LENGTH
1924 {
1925 static bool warned = false;
1926
1927 if (do_lint && ! warned) {
1928 warned = true;
1929 lintwarn_ln($1->source_line,
1930 _("call of `length' without parentheses is not portable"));
1931 }
1932 $$ = snode(NULL, $1);
1933 if ($$ == NULL)
1934 YYABORT;
1935 }
1936 | func_call
1937 | variable
1938 | INCREMENT variable
1939 {
1940 $1->opcode = Op_preincrement;
1941 $$ = mk_assignment($2, NULL, $1);
1942 }
1943 | DECREMENT variable
1944 {
1945 $1->opcode = Op_predecrement;
1946 $$ = mk_assignment($2, NULL, $1);
1947 }
1948 | YNUMBER
1949 {
1950 $$ = list_create($1);
1951 }
1952 | YSTRING
1953 {
1954 $$ = list_create($1);
1955 }
1956 | '-' simp_exp %prec UNARY
1957 {
1958 if ($2->lasti->opcode == Op_push_i
1959 && ($2->lasti->memory->flags & STRING) == 0
1960 ) {
1961 NODE *n = $2->lasti->memory;
1962 (void) force_number(n);
1963 negate_num(n);
1964 $$ = $2;
1965 bcfree($1);
1966 } else {
1967 $1->opcode = Op_unary_minus;
1968 $$ = list_append($2, $1);
1969 }
1970 }
1971 | '+' simp_exp %prec UNARY
1972 {
1973 if ($2->lasti->opcode == Op_push_i
1974 && ($2->lasti->memory->flags & STRING) == 0
1975 && ($2->lasti->memory->flags & NUMCONSTSTR) != 0) {
1976 NODE *n = $2->lasti->memory;
1977 add_sign_to_num(n, '+');
1978 $$ = $2;
1979 bcfree($1);
1980 } else {
1981 /*
1982 * was: $$ = $2
1983 * POSIX semantics: force a conversion to numeric type
1984 */
1985 $1->opcode = Op_unary_plus;
1986 $$ = list_append($2, $1);
1987 }
1988 }
1989 ;
1990
1991 func_call
1992 : direct_func_call
1993 {
1994 func_use($1->lasti->func_name, FUNC_USE);
1995 $$ = $1;
1996 }
1997 | '@' direct_func_call
1998 {
1999 /* indirect function call */
2000 INSTRUCTION *f, *t;
2001 char *name;
2002 NODE *indirect_var;
2003 static bool warned = false;
2004 const char *msg = _("indirect function calls are a gawk extension");
2005
2006 if (do_traditional || do_posix)
2007 yyerror("%s", msg);
2008 else if (do_lint_extensions && ! warned) {
2009 warned = true;
2010 lintwarn("%s", msg);
2011 }
2012
2013 f = $2->lasti;
2014 f->opcode = Op_indirect_func_call;
2015 name = estrdup(f->func_name, strlen(f->func_name));
2016 if (is_std_var(name))
2017 yyerror(_("cannot use special variable `%s' for indirect function call"), name);
2018 indirect_var = variable(f->source_line, name, Node_var_new);
2019 t = instruction(Op_push);
2020 t->memory = indirect_var;
2021
2022 /* prepend indirect var instead of appending to arguments (opt_expression_list),
2023 * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case:
2024 * f = "fun"
2025 * @f(f="real_fun")
2026 */
2027
2028 $$ = list_prepend($2, t);
2029 at_seen = false;
2030 }
2031 ;
2032
2033 direct_func_call
2034 : FUNC_CALL '(' opt_fcall_expression_list r_paren
2035 {
2036 NODE *n;
2037 char *name = $1->func_name;
2038 char *qname = qualify_name(name, strlen(name));
2039
2040 if (qname != name) {
2041 efree((char *) name);
2042 $1->func_name = qname;
2043 }
2044
2045 if (! at_seen) {
2046 n = lookup($1->func_name);
2047 if (n != NULL && n->type != Node_func
2048 && n->type != Node_ext_func) {
2049 error_ln($1->source_line,
2050 _("attempt to use non-function `%s' in function call"),
2051 $1->func_name);
2052 }
2053 }
2054
2055 param_sanity($3);
2056 $1->opcode = Op_func_call;
2057 $1->func_body = NULL;
2058 if ($3 == NULL) { /* no argument or error */
2059 ($1 + 1)->expr_count = 0;
2060 $$ = list_create($1);
2061 } else {
2062 INSTRUCTION *t = $3;
2063 ($1 + 1)->expr_count = count_expressions(&t, true);
2064 $$ = list_append(t, $1);
2065 }
2066 }
2067 ;
2068
2069 opt_variable
2070 : /* empty */
2071 { $$ = NULL; }
2072 | variable
2073 { $$ = $1; }
2074 ;
2075
2076 delete_subscript_list
2077 : /* empty */
2078 { $$ = NULL; }
2079 | delete_subscript SUBSCRIPT
2080 { $$ = $1; }
2081 ;
2082
2083 delete_subscript
2084 : delete_exp_list
2085 { $$ = $1; }
2086 | delete_subscript delete_exp_list
2087 {
2088 $$ = list_merge($1, $2);
2089 }
2090 ;
2091
2092 delete_exp_list
2093 : bracketed_exp_list
2094 {
2095 INSTRUCTION *ip = $1->lasti;
2096 int count = ip->sub_count; /* # of SUBSEP-seperated expressions */
2097 if (count > 1) {
2098 /* change Op_subscript or Op_sub_array to Op_concat */
2099 ip->opcode = Op_concat;
2100 ip->concat_flag = CSUBSEP;
2101 ip->expr_count = count;
2102 } else
2103 ip->opcode = Op_no_op;
2104 sub_counter++; /* count # of dimensions */
2105 $$ = $1;
2106 }
2107 ;
2108
2109 bracketed_exp_list
2110 : '[' expression_list ']'
2111 {
2112 INSTRUCTION *t = $2;
2113 if ($2 == NULL) {
2114 error_ln($3->source_line,
2115 _("invalid subscript expression"));
2116 /* install Null string as subscript. */
2117 t = list_create(instruction(Op_push_i));
2118 t->nexti->memory = dupnode(Nnull_string);
2119 $3->sub_count = 1;
2120 } else
2121 $3->sub_count = count_expressions(&t, false);
2122 $$ = list_append(t, $3);
2123 }
2124 ;
2125
2126 subscript
2127 : bracketed_exp_list
2128 { $$ = $1; }
2129 | subscript bracketed_exp_list
2130 {
2131 $$ = list_merge($1, $2);
2132 }
2133 ;
2134
2135 subscript_list
2136 : subscript SUBSCRIPT
2137 { $$ = $1; }
2138 ;
2139
2140 simple_variable
2141 : NAME
2142 {
2143 $1->opcode = Op_push;
2144 $1->memory = variable($1->source_line, $1->lextok, Node_var_new);
2145 $$ = list_create($1);
2146 }
2147 | NAME subscript_list
2148 {
2149 char *arr = $1->lextok;
2150
2151 $1->memory = variable($1->source_line, arr, Node_var_array);
2152 $1->opcode = Op_push_array;
2153 $$ = list_prepend($2, $1);
2154 }
2155 ;
2156
2157 variable
2158 : simple_variable
2159 {
2160 INSTRUCTION *ip = $1->nexti;
2161 if (ip->opcode == Op_push
2162 && ip->memory->type == Node_var
2163 && ip->memory->var_update
2164 ) {
2165 $$ = list_prepend($1, instruction(Op_var_update));
2166 $$->nexti->update_var = ip->memory->var_update;
2167 } else
2168 $$ = $1;
2169 }
2170 | '$' non_post_simp_exp opt_incdec
2171 {
2172 $$ = list_append($2, $1);
2173 if ($3 != NULL)
2174 mk_assignment($2, NULL, $3);
2175 }
2176 ;
2177
2178 opt_incdec
2179 : INCREMENT
2180 {
2181 $1->opcode = Op_postincrement;
2182 }
2183 | DECREMENT
2184 {
2185 $1->opcode = Op_postdecrement;
2186 }
2187 | /* empty */
2188 { $$ = NULL; }
2189 ;
2190
2191 l_brace
2192 : '{' opt_nls { $$ = $2; }
2193 ;
2194
2195 r_brace
2196 : '}' opt_nls { $$ = $2; yyerrok; }
2197 ;
2198
2199 r_paren
2200 : ')' { yyerrok; }
2201 ;
2202
2203 opt_semi
2204 : /* empty */
2205 { $$ = NULL; }
2206 | semi
2207 ;
2208
2209 semi
2210 : ';' { yyerrok; }
2211 ;
2212
2213 colon
2214 : ':' { $$ = $1; yyerrok; }
2215 ;
2216
2217 comma
2218 : ',' opt_nls { $$ = $2; yyerrok; }
2219 ;
2220 %%
2221
2222 struct token {
2223 const char *operator; /* text to match */
2224 OPCODE value; /* type */
2225 int class; /* lexical class */
2226 unsigned flags; /* # of args. allowed and compatability */
2227 # define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
2228 # define A(n) (1<<(n))
2229 # define VERSION_MASK 0xFF00 /* old awk is zero */
2230 # define NOT_OLD 0x0100 /* feature not in old awk */
2231 # define NOT_POSIX 0x0200 /* feature not in POSIX */
2232 # define GAWKX 0x0400 /* gawk extension */
2233 # define BREAK 0x0800 /* break allowed inside */
2234 # define CONTINUE 0x1000 /* continue allowed inside */
2235 # define DEBUG_USE 0x2000 /* for use by developers */
2236
2237 NODE *(*ptr)(int); /* function that implements this keyword */
2238 NODE *(*ptr2)(int); /* alternate arbitrary-precision function */
2239 };
2240
2241 #ifdef USE_EBCDIC
2242 /* tokcompare --- lexicographically compare token names for sorting */
2243
2244 static int
tokcompare(const void * l,const void * r)2245 tokcompare(const void *l, const void *r)
2246 {
2247 struct token *lhs, *rhs;
2248
2249 lhs = (struct token *) l;
2250 rhs = (struct token *) r;
2251
2252 return strcmp(lhs->operator, rhs->operator);
2253 }
2254 #endif
2255
2256 /*
2257 * Tokentab is sorted ASCII ascending order, so it can be binary searched.
2258 * See check_special(), which sorts the table on EBCDIC systems.
2259 * Function pointers come from declarations in awk.h.
2260 */
2261
2262 #ifdef HAVE_MPFR
2263 #define MPF(F) do_mpfr_##F
2264 #else
2265 #define MPF(F) 0
2266 #endif
2267
2268 static const struct token tokentab[] = {
2269 {"BEGIN", Op_rule, LEX_BEGIN, 0, 0, 0},
2270 {"BEGINFILE", Op_rule, LEX_BEGINFILE, GAWKX, 0, 0},
2271 {"END", Op_rule, LEX_END, 0, 0, 0},
2272 {"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0, 0},
2273 #ifdef ARRAYDEBUG
2274 {"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE, do_adump, 0},
2275 #endif
2276 {"and", Op_builtin, LEX_BUILTIN, GAWKX, do_and, MPF(and)},
2277 {"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort, 0},
2278 {"asorti", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asorti, 0},
2279 {"atan2", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2, MPF(atan2)},
2280 {"bindtextdomain", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain, 0},
2281 {"break", Op_K_break, LEX_BREAK, 0, 0, 0},
2282 {"case", Op_K_case, LEX_CASE, GAWKX, 0, 0},
2283 {"close", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close, 0},
2284 {"compl", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl, MPF(compl)},
2285 {"continue", Op_K_continue, LEX_CONTINUE, 0, 0, 0},
2286 {"cos", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos, MPF(cos)},
2287 {"dcgettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext, 0},
2288 {"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext, 0},
2289 {"default", Op_K_default, LEX_DEFAULT, GAWKX, 0, 0},
2290 {"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0, 0},
2291 {"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0, 0},
2292 {"else", Op_K_else, LEX_ELSE, 0, 0, 0},
2293 {"eval", Op_symbol, LEX_EVAL, 0, 0, 0},
2294 {"exit", Op_K_exit, LEX_EXIT, 0, 0, 0},
2295 {"exp", Op_builtin, LEX_BUILTIN, A(1), do_exp, MPF(exp)},
2296 {"fflush", Op_builtin, LEX_BUILTIN, A(0)|A(1), do_fflush, 0},
2297 {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0, 0},
2298 {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0, 0},
2299 {"function", Op_func, LEX_FUNCTION, NOT_OLD, 0, 0},
2300 {"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0, 0},
2301 {"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0, 0},
2302 {"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0, 0},
2303 {"if", Op_K_if, LEX_IF, 0, 0, 0},
2304 {"in", Op_symbol, LEX_IN, 0, 0, 0},
2305 {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0, 0},
2306 {"index", Op_builtin, LEX_BUILTIN, A(2), do_index, 0},
2307 {"int", Op_builtin, LEX_BUILTIN, A(1), do_int, MPF(int)},
2308 #ifdef SUPPLY_INTDIV
2309 {"intdiv0", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_intdiv, MPF(intdiv)},
2310 #endif
2311 {"isarray", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_isarray, 0},
2312 {"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length, 0},
2313 {"load", Op_symbol, LEX_LOAD, GAWKX, 0, 0},
2314 {"log", Op_builtin, LEX_BUILTIN, A(1), do_log, MPF(log)},
2315 {"lshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift, MPF(lshift)},
2316 {"match", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match, 0},
2317 {"mktime", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_mktime, 0},
2318 {"namespace", Op_symbol, LEX_NAMESPACE, GAWKX, 0, 0},
2319 {"next", Op_K_next, LEX_NEXT, 0, 0, 0},
2320 {"nextfile", Op_K_nextfile, LEX_NEXTFILE, 0, 0, 0},
2321 {"or", Op_builtin, LEX_BUILTIN, GAWKX, do_or, MPF(or)},
2322 {"patsplit", Op_builtin, LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit, 0},
2323 {"print", Op_K_print, LEX_PRINT, 0, 0, 0},
2324 {"printf", Op_K_printf, LEX_PRINTF, 0, 0, 0},
2325 {"rand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand, MPF(rand)},
2326 {"return", Op_K_return, LEX_RETURN, NOT_OLD, 0, 0},
2327 {"rshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift, MPF(rshift)},
2328 {"sin", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin, MPF(sin)},
2329 {"split", Op_builtin, LEX_BUILTIN, A(2)|A(3)|A(4), do_split, 0},
2330 {"sprintf", Op_builtin, LEX_BUILTIN, 0, do_sprintf, 0},
2331 {"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt, MPF(sqrt)},
2332 {"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand, MPF(srand)},
2333 #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
2334 {"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|DEBUG_USE, stopme, 0},
2335 #endif
2336 {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0},
2337 {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum, MPF(strtonum)},
2338 {"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0, 0},
2339 {"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr, 0},
2340 {"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0, 0},
2341 {"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system, 0},
2342 {"systime", Op_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime, 0},
2343 {"tolower", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower, 0},
2344 {"toupper", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper, 0},
2345 {"typeof", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_typeof, 0},
2346 {"while", Op_K_while, LEX_WHILE, BREAK|CONTINUE, 0, 0},
2347 {"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)},
2348 };
2349
2350 /* Variable containing the current shift state. */
2351 static mbstate_t cur_mbstate;
2352 /* Ring buffer containing current characters. */
2353 #define MAX_CHAR_IN_RING_BUFFER 8
2354 #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
2355 static char cur_char_ring[RING_BUFFER_SIZE];
2356 /* Index for ring buffers. */
2357 static int cur_ring_idx;
2358 /* This macro means that last nextc() return a singlebyte character
2359 or 1st byte of a multibyte character. */
2360 #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
2361
2362 /* getfname --- return name of a builtin function (for pretty printing) */
2363
2364 const char *
getfname(NODE * (* fptr)(int),bool prepend_awk)2365 getfname(NODE *(*fptr)(int), bool prepend_awk)
2366 {
2367 int i, j;
2368 static char buf[100];
2369
2370 j = sizeof(tokentab) / sizeof(tokentab[0]);
2371 /* linear search, no other way to do it */
2372 for (i = 0; i < j; i++) {
2373 if (tokentab[i].ptr == fptr || tokentab[i].ptr2 == fptr) {
2374 if (prepend_awk && (tokentab[i].flags & GAWKX) != 0) {
2375 sprintf(buf, "awk::%s", tokentab[i].operator);
2376 return buf;
2377 }
2378 return tokentab[i].operator;
2379 }
2380 }
2381
2382 return NULL;
2383 }
2384
2385 /* negate_num --- negate a number in NODE */
2386
2387 void
negate_num(NODE * n)2388 negate_num(NODE *n)
2389 {
2390 #ifdef HAVE_MPFR
2391 int tval = 0;
2392 #endif
2393
2394 add_sign_to_num(n, '-');
2395
2396 if (! is_mpg_number(n)) {
2397 n->numbr = -n->numbr;
2398 return;
2399 }
2400
2401 #ifdef HAVE_MPFR
2402 if (is_mpg_integer(n)) {
2403 if (! is_zero(n)) {
2404 mpz_neg(n->mpg_i, n->mpg_i);
2405 return;
2406 }
2407
2408 /*
2409 * 0 --> -0 conversion. Requires turning the MPG integer
2410 * into an MPFR float.
2411 */
2412
2413 mpz_clear(n->mpg_i); /* release the integer storage */
2414
2415 /* Convert and fall through. */
2416 tval = mpfr_set_d(n->mpg_numbr, 0.0, ROUND_MODE);
2417 IEEE_FMT(n->mpg_numbr, tval);
2418 n->flags &= ~MPZN;
2419 n->flags |= MPFN;
2420 }
2421
2422 /* mpfr float case */
2423 tval = mpfr_neg(n->mpg_numbr, n->mpg_numbr, ROUND_MODE);
2424 IEEE_FMT(n->mpg_numbr, tval);
2425 #endif
2426 }
2427
2428 /* add_sign_to_num --- make a constant unary plus or minus for profiling */
2429
2430 static void
add_sign_to_num(NODE * n,char sign)2431 add_sign_to_num(NODE *n, char sign)
2432 {
2433 if ((n->flags & NUMCONSTSTR) != 0) {
2434 char *s;
2435
2436 s = n->stptr;
2437 memmove(& s[1], & s[0], n->stlen + 1);
2438 s[0] = sign;
2439 n->stlen++;
2440 }
2441 }
2442
2443 /* print_included_from --- print `Included from ..' file names and locations */
2444
2445 static void
print_included_from()2446 print_included_from()
2447 {
2448 int saveline, line;
2449 SRCFILE *s;
2450
2451 /* suppress current file name, line # from `.. included from ..' msgs */
2452 saveline = sourceline;
2453 sourceline = 0;
2454
2455 for (s = sourcefile; s != NULL && s->stype == SRC_INC; ) {
2456 s = s->next;
2457 if (s == NULL || s->fd <= INVALID_HANDLE)
2458 continue;
2459 line = s->srclines;
2460
2461 /* if last token is NEWLINE, line number is off by 1. */
2462 if (s->lasttok == NEWLINE)
2463 line--;
2464 msg("%s %s:%d%c",
2465 s->prev == sourcefile ? "In file included from"
2466 : " from",
2467 (s->stype == SRC_INC ||
2468 s->stype == SRC_FILE) ? s->src : "cmd. line",
2469 line,
2470 s->stype == SRC_INC ? ',' : ':'
2471 );
2472 }
2473 sourceline = saveline;
2474 }
2475
2476 /* warning_ln --- print a warning message with location */
2477
2478 static void
warning_ln(int line,const char * mesg,...)2479 warning_ln(int line, const char *mesg, ...)
2480 {
2481 va_list args;
2482 int saveline;
2483
2484 saveline = sourceline;
2485 sourceline = line;
2486 print_included_from();
2487 va_start(args, mesg);
2488 err(false, _("warning: "), mesg, args);
2489 va_end(args);
2490 sourceline = saveline;
2491 }
2492
2493 /* lintwarn_ln --- print a lint warning and location */
2494
2495 static void
lintwarn_ln(int line,const char * mesg,...)2496 lintwarn_ln(int line, const char *mesg, ...)
2497 {
2498 va_list args;
2499 int saveline;
2500
2501 saveline = sourceline;
2502 sourceline = line;
2503 print_included_from();
2504 va_start(args, mesg);
2505 if (lintfunc == r_fatal)
2506 err(true, _("fatal: "), mesg, args);
2507 else
2508 err(false, _("warning: "), mesg, args);
2509 va_end(args);
2510 sourceline = saveline;
2511 if (lintfunc == r_fatal)
2512 gawk_exit(EXIT_FATAL);
2513 }
2514
2515 /* error_ln --- print an error message and location */
2516
2517 static void
error_ln(int line,const char * m,...)2518 error_ln(int line, const char *m, ...)
2519 {
2520 va_list args;
2521 int saveline;
2522
2523 saveline = sourceline;
2524 sourceline = line;
2525 print_included_from();
2526 errcount++;
2527 va_start(args, m);
2528 err(false, "error: ", m, args);
2529 va_end(args);
2530 sourceline = saveline;
2531 }
2532
2533 /* yyerror --- print a syntax error message, show where */
2534
2535 static void
yyerror(const char * m,...)2536 yyerror(const char *m, ...)
2537 {
2538 va_list args;
2539 const char *mesg = NULL;
2540 char *bp, *cp;
2541 char *scan;
2542 char *buf;
2543 int count;
2544 static char end_of_file_line[] = "(END OF FILE)";
2545 static char syntax_error[] = "syntax error";
2546 static size_t syn_err_len = sizeof(syntax_error) - 1;
2547 bool generic_error = (strncmp(m, syntax_error, syn_err_len) == 0);
2548
2549 print_included_from();
2550
2551 errcount++;
2552 /* Find the current line in the input file */
2553 if (lexptr && lexeme) {
2554 if (thisline == NULL) {
2555 cp = lexeme;
2556 if (*cp == '\n') {
2557 if (cp > lexptr_begin)
2558 cp--;
2559 mesg = _("unexpected newline or end of string");
2560 }
2561 for (; cp != lexptr_begin && *cp != '\n'; --cp)
2562 continue;
2563 if (*cp == '\n')
2564 cp++;
2565 thisline = cp;
2566 }
2567 /* NL isn't guaranteed */
2568 bp = lexeme;
2569 if (bp < thisline)
2570 bp = thisline + 1;
2571 while (bp < lexend && *bp && *bp != '\n')
2572 bp++;
2573 } else {
2574 thisline = end_of_file_line;
2575 bp = thisline + strlen(thisline);
2576 }
2577
2578 if (lexeof && mesg == NULL && generic_error) {
2579 msg("%s", end_of_file_line);
2580 mesg = _("source files / command-line arguments must contain complete functions or rules");
2581 } else
2582 msg("%.*s", (int) (bp - thisline), thisline);
2583
2584 va_start(args, m);
2585 if (mesg == NULL)
2586 mesg = m;
2587
2588 count = strlen(mesg) + 1;
2589 if (lexptr != NULL)
2590 count += (lexeme - thisline) + 2;
2591 ezalloc(buf, char *, count+1, "yyerror");
2592
2593 bp = buf;
2594
2595 if (lexptr != NULL) {
2596 scan = thisline;
2597 while (scan < lexeme)
2598 if (*scan++ == '\t')
2599 *bp++ = '\t';
2600 else
2601 *bp++ = ' ';
2602 *bp++ = '^';
2603 *bp++ = ' ';
2604 }
2605 strcpy(bp, mesg);
2606 err(false, "", buf, args);
2607 va_end(args);
2608 efree(buf);
2609 }
2610
2611 /* mk_program --- create a single list of instructions */
2612
2613 static INSTRUCTION *
mk_program()2614 mk_program()
2615 {
2616 INSTRUCTION *cp, *tmp;
2617
2618 #define begin_block rule_block[BEGIN]
2619 #define end_block rule_block[END]
2620 #define prog_block rule_block[Rule]
2621 #define beginfile_block rule_block[BEGINFILE]
2622 #define endfile_block rule_block[ENDFILE]
2623
2624 if (end_block == NULL)
2625 end_block = list_create(ip_end);
2626 else
2627 (void) list_prepend(end_block, ip_end);
2628
2629 if (! in_main_context()) {
2630 if (begin_block != NULL && prog_block != NULL)
2631 cp = list_merge(begin_block, prog_block);
2632 else
2633 cp = (begin_block != NULL) ? begin_block : prog_block;
2634
2635 if (cp != NULL)
2636 (void) list_merge(cp, end_block);
2637 else
2638 cp = end_block;
2639
2640 (void) list_append(cp, instruction(Op_stop));
2641 goto out;
2642 }
2643
2644 if (endfile_block == NULL)
2645 endfile_block = list_create(ip_endfile);
2646 else {
2647 ip_rec->has_endfile = true;
2648 (void) list_prepend(endfile_block, ip_endfile);
2649 }
2650
2651 if (beginfile_block == NULL)
2652 beginfile_block = list_create(ip_beginfile);
2653 else
2654 (void) list_prepend(beginfile_block, ip_beginfile);
2655
2656 if (prog_block == NULL) {
2657 if (end_block->nexti == end_block->lasti
2658 && beginfile_block->nexti == beginfile_block->lasti
2659 && endfile_block->nexti == endfile_block->lasti
2660 ) {
2661 /* no pattern-action and (real) end, beginfile or endfile blocks */
2662 bcfree(ip_rec);
2663 bcfree(ip_newfile);
2664 ip_rec = ip_newfile = NULL;
2665
2666 list_append(beginfile_block, instruction(Op_after_beginfile));
2667 (void) list_append(endfile_block, instruction(Op_after_endfile));
2668
2669 if (begin_block == NULL) /* no program at all */
2670 cp = end_block;
2671 else
2672 cp = list_merge(begin_block, end_block);
2673
2674 if (interblock_comment != NULL) {
2675 (void) list_append(cp, interblock_comment);
2676 interblock_comment = NULL;
2677 }
2678
2679 (void) list_append(cp, ip_atexit);
2680 (void) list_append(cp, instruction(Op_stop));
2681
2682 /* append beginfile_block and endfile_block for sole use
2683 * in getline without redirection (Op_K_getline).
2684 */
2685
2686 (void) list_merge(cp, beginfile_block);
2687 (void) list_merge(cp, endfile_block);
2688
2689 if (outer_comment != NULL) {
2690 cp = list_merge(list_create(outer_comment), cp);
2691 outer_comment = NULL;
2692 }
2693
2694 if (interblock_comment != NULL) {
2695 (void) list_append(cp, interblock_comment);
2696 interblock_comment = NULL;
2697 }
2698
2699 goto out;
2700
2701 } else {
2702 /* install a do-nothing prog block */
2703 prog_block = list_create(instruction(Op_no_op));
2704 }
2705 }
2706
2707 (void) list_append(endfile_block, instruction(Op_after_endfile));
2708 (void) list_prepend(prog_block, ip_rec);
2709 (void) list_append(prog_block, instruction(Op_jmp));
2710 prog_block->lasti->target_jmp = ip_rec;
2711
2712 list_append(beginfile_block, instruction(Op_after_beginfile));
2713
2714 cp = list_merge(beginfile_block, prog_block);
2715 (void) list_prepend(cp, ip_newfile);
2716 (void) list_merge(cp, endfile_block);
2717 (void) list_merge(cp, end_block);
2718 if (begin_block != NULL)
2719 cp = list_merge(begin_block, cp);
2720
2721 if (outer_comment != NULL) {
2722 cp = list_merge(list_create(outer_comment), cp);
2723 outer_comment = NULL;
2724 }
2725
2726 if (interblock_comment != NULL) {
2727 (void) list_append(cp, interblock_comment);
2728 interblock_comment = NULL;
2729 }
2730
2731 (void) list_append(cp, ip_atexit);
2732 (void) list_append(cp, instruction(Op_stop));
2733
2734 out:
2735 /* delete the Op_list, not needed */
2736 tmp = cp->nexti;
2737 bcfree(cp);
2738 return tmp;
2739
2740 #undef begin_block
2741 #undef end_block
2742 #undef prog_block
2743 #undef beginfile_block
2744 #undef endfile_block
2745 }
2746
2747 /* parse_program --- read in the program and convert into a list of instructions */
2748
2749 int
parse_program(INSTRUCTION ** pcode,bool from_eval)2750 parse_program(INSTRUCTION **pcode, bool from_eval)
2751 {
2752 int ret;
2753
2754 called_from_eval = from_eval;
2755
2756 /* pre-create non-local jump targets
2757 * ip_end (Op_no_op) -- used as jump target for `exit'
2758 * outside an END block.
2759 */
2760 ip_end = instruction(Op_no_op);
2761
2762 if (! in_main_context())
2763 ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
2764 else {
2765 ip_endfile = instruction(Op_no_op);
2766 main_beginfile = ip_beginfile = instruction(Op_no_op);
2767 ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */
2768 ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */
2769 ip_newfile->target_jmp = ip_end;
2770 ip_newfile->target_endfile = ip_endfile;
2771 (ip_newfile + 1)->target_get_record = ip_rec;
2772 ip_rec->target_newfile = ip_newfile;
2773 ip_atexit = instruction(Op_atexit); /* target for `exit' in END block */
2774 }
2775
2776 for (sourcefile = srcfiles->next; sourcefile->stype == SRC_EXTLIB;
2777 sourcefile = sourcefile->next)
2778 ;
2779
2780 lexeof = false;
2781 lexptr = NULL;
2782 lasttok = 0;
2783 memset(rule_block, 0, sizeof(rule_block));
2784 errcount = 0;
2785 tok = tokstart != NULL ? tokstart : tokexpand();
2786
2787 ret = yyparse();
2788 *pcode = mk_program();
2789
2790 /* avoid false source indications */
2791 source = NULL;
2792 sourceline = 0;
2793 if (ret == 0) /* avoid spurious warning if parser aborted with YYABORT */
2794 check_funcs();
2795
2796 if (do_posix && ! check_param_names())
2797 errcount++;
2798
2799 if (args_array == NULL)
2800 emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
2801 else
2802 erealloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
2803
2804 return (ret || errcount);
2805 }
2806
2807 /* free_srcfile --- free a SRCFILE struct */
2808
2809 void
free_srcfile(SRCFILE * thisfile)2810 free_srcfile(SRCFILE *thisfile)
2811 {
2812 efree(thisfile->src);
2813 efree(thisfile);
2814 }
2815
2816 /* do_add_srcfile --- add one item to srcfiles */
2817
2818 static SRCFILE *
do_add_srcfile(enum srctype stype,char * src,char * path,SRCFILE * thisfile)2819 do_add_srcfile(enum srctype stype, char *src, char *path, SRCFILE *thisfile)
2820 {
2821 SRCFILE *s;
2822
2823 ezalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile");
2824 s->src = estrdup(src, strlen(src));
2825 s->fullpath = path;
2826 s->stype = stype;
2827 s->fd = INVALID_HANDLE;
2828 s->next = thisfile;
2829 s->prev = thisfile->prev;
2830 thisfile->prev->next = s;
2831 thisfile->prev = s;
2832 return s;
2833 }
2834
2835 /* add_srcfile --- add one item to srcfiles after checking if
2836 * a source file exists and not already in list.
2837 */
2838
2839 SRCFILE *
add_srcfile(enum srctype stype,char * src,SRCFILE * thisfile,bool * already_included,int * errcode)2840 add_srcfile(enum srctype stype, char *src, SRCFILE *thisfile, bool *already_included, int *errcode)
2841 {
2842 SRCFILE *s;
2843 struct stat sbuf;
2844 char *path;
2845 int errno_val = 0;
2846
2847 if (already_included)
2848 *already_included = false;
2849 if (errcode)
2850 *errcode = 0;
2851 if (stype == SRC_CMDLINE || stype == SRC_STDIN)
2852 return do_add_srcfile(stype, src, NULL, thisfile);
2853
2854 path = find_source(src, & sbuf, & errno_val, stype == SRC_EXTLIB);
2855 if (path == NULL) {
2856 if (errcode) {
2857 *errcode = errno_val;
2858 return NULL;
2859 }
2860 /* use full messages to ease translation */
2861 fatal(stype != SRC_EXTLIB
2862 ? _("cannot open source file `%s' for reading: %s")
2863 : _("cannot open shared library `%s' for reading: %s"),
2864 src,
2865 errno_val ? strerror(errno_val) : _("reason unknown"));
2866 }
2867
2868 /* N.B. We do not eliminate duplicate SRC_FILE (-f) programs. */
2869 for (s = srcfiles->next; s != srcfiles; s = s->next) {
2870 if ((s->stype == SRC_FILE || s->stype == SRC_INC || s->stype == SRC_EXTLIB) && files_are_same(path, s)) {
2871 if (stype == SRC_INC || stype == SRC_EXTLIB) {
2872 /* eliminate duplicates */
2873 if ((stype == SRC_INC) && (s->stype == SRC_FILE))
2874 fatal(_("cannot include `%s' and use it as a program file"), src);
2875
2876 if (do_lint) {
2877 int line = sourceline;
2878 /* Kludge: the line number may be off for `@include file'.
2879 * Since, this function is also used for '-f file' in main.c,
2880 * sourceline > 1 check ensures that the call is at
2881 * parse time.
2882 */
2883 if (sourceline > 1 && lasttok == NEWLINE)
2884 line--;
2885 lintwarn_ln(line,
2886 stype != SRC_EXTLIB
2887 ? _("already included source file `%s'")
2888 : _("already loaded shared library `%s'"),
2889 src);
2890 }
2891 efree(path);
2892 if (already_included)
2893 *already_included = true;
2894 return NULL;
2895 } else {
2896 /* duplicates are allowed for -f */
2897 if (s->stype == SRC_INC)
2898 fatal(_("cannot include `%s' and use it as a program file"), src);
2899 /* no need to scan for further matches, since
2900 * they must be of homogeneous type */
2901 break;
2902 }
2903 }
2904 }
2905
2906 s = do_add_srcfile(stype, src, path, thisfile);
2907 s->sbuf = sbuf;
2908 s->mtime = sbuf.st_mtime;
2909 return s;
2910 }
2911
2912 /* include_source --- read program from source included using `@include' */
2913
2914 static bool
include_source(INSTRUCTION * file,void ** srcfile_p)2915 include_source(INSTRUCTION *file, void **srcfile_p)
2916 {
2917 SRCFILE *s;
2918 char *src = file->lextok;
2919 int errcode;
2920 bool already_included;
2921
2922 *srcfile_p = NULL;
2923
2924 if (do_traditional || do_posix) {
2925 error_ln(file->source_line, _("@include is a gawk extension"));
2926 return false;
2927 }
2928
2929 if (strlen(src) == 0) {
2930 if (do_lint)
2931 lintwarn_ln(file->source_line, _("empty filename after @include"));
2932 return true;
2933 }
2934
2935 s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode);
2936 if (s == NULL) {
2937 if (already_included)
2938 return true;
2939 error_ln(file->source_line,
2940 _("cannot open source file `%s' for reading: %s"),
2941 src, errcode ? strerror(errcode) : _("reason unknown"));
2942 return false;
2943 }
2944
2945 /* save scanner state for the current sourcefile */
2946 sourcefile->srclines = sourceline;
2947 sourcefile->lexptr = lexptr;
2948 sourcefile->lexend = lexend;
2949 sourcefile->lexptr_begin = lexptr_begin;
2950 sourcefile->lexeme = lexeme;
2951 sourcefile->lasttok = lasttok;
2952 sourcefile->namespace = current_namespace;
2953
2954 /* included file becomes the current source */
2955 sourcefile = s;
2956 lexptr = NULL;
2957 sourceline = 0;
2958 source = NULL;
2959 lasttok = 0;
2960 lexeof = false;
2961 eof_warned = false;
2962 current_namespace = awk_namespace;
2963 *srcfile_p = (void *) s;
2964 return true;
2965 }
2966
2967 /* load_library --- load a shared library */
2968
2969 static bool
load_library(INSTRUCTION * file,void ** srcfile_p)2970 load_library(INSTRUCTION *file, void **srcfile_p)
2971 {
2972 SRCFILE *s;
2973 char *src = file->lextok;
2974 int errcode;
2975 bool already_included;
2976
2977 *srcfile_p = NULL;
2978
2979 if (do_traditional || do_posix) {
2980 error_ln(file->source_line, _("@load is a gawk extension"));
2981 return false;
2982 }
2983
2984
2985 if (strlen(src) == 0) {
2986 if (do_lint)
2987 lintwarn_ln(file->source_line, _("empty filename after @load"));
2988 return true;
2989 }
2990
2991 if (do_pretty_print && ! do_profile) {
2992 // create a fake one, don't try to open the file
2993 s = do_add_srcfile(SRC_EXTLIB, src, src, sourcefile);
2994 } else {
2995 s = add_srcfile(SRC_EXTLIB, src, sourcefile, &already_included, &errcode);
2996 if (s == NULL) {
2997 if (already_included)
2998 return true;
2999 error_ln(file->source_line,
3000 _("cannot open shared library `%s' for reading: %s"),
3001 src, errcode ? strerror(errcode) : _("reason unknown"));
3002 return false;
3003 }
3004
3005 load_ext(s->fullpath);
3006 }
3007
3008 *srcfile_p = (void *) s;
3009 return true;
3010 }
3011
3012 /* next_sourcefile --- read program from the next source in srcfiles */
3013
3014 static void
next_sourcefile()3015 next_sourcefile()
3016 {
3017 static int (*closefunc)(int fd) = NULL;
3018
3019 if (closefunc == NULL) {
3020 char *cp = getenv("AWKREADFUNC");
3021
3022 /* If necessary, one day, test value for different functions. */
3023 if (cp == NULL)
3024 closefunc = close;
3025 else
3026 closefunc = one_line_close;
3027 }
3028
3029 /*
3030 * This won't be true if there's an invalid character in
3031 * the source file or source string (e.g., user typo).
3032 * Previous versions of gawk did not core dump in such a
3033 * case.
3034 *
3035 * assert(lexeof == true);
3036 */
3037
3038 lexeof = false;
3039 eof_warned = false;
3040 sourcefile->srclines = sourceline; /* total no of lines in current file */
3041 if (sourcefile->fd > INVALID_HANDLE) {
3042 if (sourcefile->fd != fileno(stdin)) /* safety */
3043 (*closefunc)(sourcefile->fd);
3044 sourcefile->fd = INVALID_HANDLE;
3045 }
3046 if (sourcefile->buf != NULL) {
3047 efree(sourcefile->buf);
3048 sourcefile->buf = NULL;
3049 sourcefile->lexptr_begin = NULL;
3050 }
3051
3052 while ((sourcefile = sourcefile->next) != NULL) {
3053 if (sourcefile == srcfiles)
3054 return;
3055 if (sourcefile->stype != SRC_EXTLIB)
3056 break;
3057 }
3058
3059 if (sourcefile->lexptr_begin != NULL) {
3060 /* resume reading from already opened file (postponed to process '@include') */
3061 lexptr = sourcefile->lexptr;
3062 lexend = sourcefile->lexend;
3063 lasttok = sourcefile->lasttok;
3064 lexptr_begin = sourcefile->lexptr_begin;
3065 lexeme = sourcefile->lexeme;
3066 sourceline = sourcefile->srclines;
3067 source = sourcefile->src;
3068 set_current_namespace(sourcefile->namespace);
3069 } else {
3070 lexptr = NULL;
3071 sourceline = 0;
3072 source = NULL;
3073 lasttok = 0;
3074 set_current_namespace(awk_namespace);
3075 }
3076 }
3077
3078 /* get_src_buf --- read the next buffer of source program */
3079
3080 static char *
get_src_buf()3081 get_src_buf()
3082 {
3083 int n;
3084 char *scan;
3085 bool newfile;
3086 int savelen;
3087 struct stat sbuf;
3088
3089 /*
3090 * No argument prototype on readfunc on purpose,
3091 * avoids problems with some ancient systems where
3092 * the types of arguments to read() aren't up to date.
3093 */
3094 static ssize_t (*readfunc)(int, void *, size_t) = NULL;
3095
3096 if (readfunc == NULL) {
3097 char *cp = getenv("AWKREADFUNC");
3098
3099 /* If necessary, one day, test value for different functions. */
3100 if (cp == NULL)
3101 /*
3102 * cast is to remove warnings on systems with
3103 * different return types for read.
3104 */
3105 readfunc = ( ssize_t(*)(int, void *, size_t) ) read;
3106 else
3107 readfunc = read_one_line;
3108 }
3109
3110 newfile = false;
3111 if (sourcefile == srcfiles)
3112 return NULL;
3113
3114 if (sourcefile->stype == SRC_CMDLINE) {
3115 if (sourcefile->bufsize == 0) {
3116 sourcefile->bufsize = strlen(sourcefile->src);
3117 lexptr = lexptr_begin = lexeme = sourcefile->src;
3118 lexend = lexptr + sourcefile->bufsize;
3119 sourceline = 1;
3120 if (sourcefile->bufsize == 0) {
3121 /*
3122 * Yet Another Special case:
3123 * gawk '' /path/name
3124 * Sigh.
3125 */
3126 static bool warned = false;
3127
3128 if (do_lint && ! warned) {
3129 warned = true;
3130 lintwarn(_("empty program text on command line"));
3131 }
3132 lexeof = true;
3133 }
3134 } else if (sourcefile->buf == NULL && *(lexptr-1) != '\n') {
3135 /*
3136 * The following goop is to ensure that the source
3137 * ends with a newline and that the entire current
3138 * line is available for error messages.
3139 */
3140 int offset;
3141 char *buf;
3142
3143 offset = lexptr - lexeme;
3144 for (scan = lexeme; scan > lexptr_begin; scan--)
3145 if (*scan == '\n') {
3146 scan++;
3147 break;
3148 }
3149 savelen = lexptr - scan;
3150 emalloc(buf, char *, savelen + 1, "get_src_buf");
3151 memcpy(buf, scan, savelen);
3152 thisline = buf;
3153 lexptr = buf + savelen;
3154 *lexptr = '\n';
3155 lexeme = lexptr - offset;
3156 lexptr_begin = buf;
3157 lexend = lexptr + 1;
3158 sourcefile->buf = buf;
3159 } else
3160 lexeof = true;
3161 return lexptr;
3162 }
3163
3164 if (sourcefile->fd <= INVALID_HANDLE) {
3165 int fd;
3166 int l;
3167
3168 source = sourcefile->src;
3169 if (source == NULL)
3170 return NULL;
3171 fd = srcopen(sourcefile);
3172 if (fd <= INVALID_HANDLE) {
3173 char *in;
3174
3175 /* suppress file name and line no. in error mesg */
3176 in = source;
3177 source = NULL;
3178 error(_("cannot open source file `%s' for reading: %s"),
3179 in, strerror(errno));
3180 errcount++;
3181 lexeof = true;
3182 return sourcefile->src;
3183 }
3184
3185 sourcefile->fd = fd;
3186 l = optimal_bufsize(fd, &sbuf);
3187 /*
3188 * Make sure that something silly like
3189 * AWKBUFSIZE=8 make check
3190 * works ok.
3191 */
3192 #define A_DECENT_BUFFER_SIZE 128
3193 if (l < A_DECENT_BUFFER_SIZE)
3194 l = A_DECENT_BUFFER_SIZE;
3195 #undef A_DECENT_BUFFER_SIZE
3196 sourcefile->bufsize = l;
3197 newfile = true;
3198 emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
3199 memset(sourcefile->buf, '\0', sourcefile->bufsize); // keep valgrind happy
3200 lexptr = lexptr_begin = lexeme = sourcefile->buf;
3201 savelen = 0;
3202 sourceline = 1;
3203 thisline = NULL;
3204 } else {
3205 /*
3206 * Here, we retain the current source line in the beginning of the buffer.
3207 */
3208 int offset;
3209 for (scan = lexeme; scan > lexptr_begin; scan--)
3210 if (*scan == '\n') {
3211 scan++;
3212 break;
3213 }
3214
3215 savelen = lexptr - scan;
3216 offset = lexptr - lexeme;
3217
3218 if (savelen > 0) {
3219 /*
3220 * Need to make sure we have room left for reading new text;
3221 * grow the buffer (by doubling, an arbitrary choice), if the retained line
3222 * takes up more than a certain percentage (50%, again an arbitrary figure)
3223 * of the available space.
3224 */
3225
3226 if (savelen > sourcefile->bufsize / 2) { /* long line or token */
3227 sourcefile->bufsize *= 2;
3228 erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
3229 scan = sourcefile->buf + (scan - lexptr_begin);
3230 lexptr_begin = sourcefile->buf;
3231 }
3232
3233 thisline = lexptr_begin;
3234 memmove(thisline, scan, savelen);
3235 lexptr = thisline + savelen;
3236 lexeme = lexptr - offset;
3237 } else {
3238 savelen = 0;
3239 lexptr = lexeme = lexptr_begin;
3240 thisline = NULL;
3241 }
3242 }
3243
3244 n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen);
3245 if (n == -1) {
3246 error(_("cannot read source file `%s': %s"),
3247 source, strerror(errno));
3248 errcount++;
3249 lexeof = true;
3250 } else {
3251 lexend = lexptr + n;
3252 if (n == 0) {
3253 static bool warned = false;
3254 if (do_lint && newfile && ! warned) {
3255 warned = true;
3256 sourceline = 0;
3257 lintwarn(_("source file `%s' is empty"), source);
3258 }
3259 lexeof = true;
3260 }
3261 }
3262 return sourcefile->buf;
3263 }
3264
3265 /* tokadd --- add a character to the token buffer */
3266
3267 #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
3268
3269 /* tokexpand --- grow the token buffer */
3270
3271 static char *
tokexpand()3272 tokexpand()
3273 {
3274 static size_t toksize;
3275 int tokoffset;
3276
3277 if (tokstart != NULL) {
3278 tokoffset = tok - tokstart;
3279 toksize *= 2;
3280 erealloc(tokstart, char *, toksize, "tokexpand");
3281 tok = tokstart + tokoffset;
3282 } else {
3283 toksize = 60;
3284 emalloc(tokstart, char *, toksize, "tokexpand");
3285 tok = tokstart;
3286 }
3287 tokend = tokstart + toksize;
3288 return tok;
3289 }
3290
3291 /* check_bad_char --- fatal if c isn't allowed in gawk source code */
3292
3293 /*
3294 * The error message was inspired by someone who decided to put
3295 * a physical \0 byte into the source code to see what would
3296 * happen and then filed a bug report about it. Sigh.
3297 */
3298
3299 static void
check_bad_char(int c)3300 check_bad_char(int c)
3301 {
3302 /* allow escapes. needed for autoconf. bleah. */
3303 switch (c) {
3304 case '\a':
3305 case '\b':
3306 case '\f':
3307 case '\n':
3308 case '\r':
3309 case '\t':
3310 return;
3311 default:
3312 break;
3313 }
3314
3315 if (iscntrl(c) && ! isspace(c))
3316 // This is a PEBKAC error, but we'll be nice and not say so.
3317 fatal(_("error: invalid character '\\%03o' in source code"), c & 0xFF);
3318 }
3319
3320 /* nextc --- get the next input character */
3321
3322 // For namespaces, -e chunks must be syntactic units.
3323 #define NO_CONTINUE_SOURCE_STRINGS 1
3324
3325 static int
nextc(bool check_for_bad)3326 nextc(bool check_for_bad)
3327 {
3328 if (gawk_mb_cur_max > 1) {
3329 again:
3330 #ifdef NO_CONTINUE_SOURCE_STRINGS
3331 if (lexeof)
3332 return END_FILE;
3333 #else
3334 if (lexeof) {
3335 if (sourcefile->next == srcfiles)
3336 return END_FILE;
3337 else
3338 next_sourcefile();
3339 }
3340 #endif
3341 if (lexptr == NULL || lexptr >= lexend) {
3342 if (get_src_buf())
3343 goto again;
3344 return END_SRC;
3345 }
3346
3347 /* Update the buffer index. */
3348 cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
3349 cur_ring_idx + 1;
3350
3351 /* Did we already check the current character? */
3352 if (cur_char_ring[cur_ring_idx] == 0) {
3353 /* No, we need to check the next character on the buffer. */
3354 int idx, work_ring_idx = cur_ring_idx;
3355 mbstate_t tmp_state;
3356 size_t mbclen;
3357
3358 for (idx = 0; lexptr + idx < lexend; idx++) {
3359 tmp_state = cur_mbstate;
3360 mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
3361
3362 if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
3363 /* It is a singlebyte character, non-complete multibyte
3364 character or EOF. We treat it as a singlebyte
3365 character. */
3366 cur_char_ring[work_ring_idx] = 1;
3367 break;
3368 } else if (mbclen == (size_t)-2) {
3369 /* It is not a complete multibyte character. */
3370 cur_char_ring[work_ring_idx] = idx + 1;
3371 } else {
3372 /* mbclen > 1 */
3373 cur_char_ring[work_ring_idx] = mbclen;
3374 break;
3375 }
3376 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
3377 0 : work_ring_idx + 1;
3378 }
3379 cur_mbstate = tmp_state;
3380
3381 /* Put a mark on the position on which we write next character. */
3382 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
3383 0 : work_ring_idx + 1;
3384 cur_char_ring[work_ring_idx] = 0;
3385 }
3386 if (check_for_bad || *lexptr == '\0')
3387 check_bad_char(*lexptr);
3388
3389 return (int) (unsigned char) *lexptr++;
3390 } else {
3391 do {
3392 #ifdef NO_CONTINUE_SOURCE_STRINGS
3393 if (lexeof)
3394 return END_FILE;
3395 #else
3396 if (lexeof) {
3397 if (sourcefile->next == srcfiles)
3398 return END_FILE;
3399 else
3400 next_sourcefile();
3401 }
3402 #endif
3403 if (lexptr && lexptr < lexend) {
3404 if (check_for_bad || *lexptr == '\0')
3405 check_bad_char(*lexptr);
3406 return ((int) (unsigned char) *lexptr++);
3407 }
3408 } while (get_src_buf());
3409 return END_SRC;
3410 }
3411 }
3412 #undef NO_CONTINUE_SOURCE_STRINGS
3413
3414 /* pushback --- push a character back on the input */
3415
3416 static inline void
pushback(void)3417 pushback(void)
3418 {
3419 if (gawk_mb_cur_max > 1)
3420 cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
3421 cur_ring_idx - 1;
3422 (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
3423 }
3424
3425 /*
3426 * get_comment --- collect comment text.
3427 * Flag = EOL_COMMENT for end-of-line comments.
3428 * Flag = BLOCK_COMMENT for self-contained comments.
3429 */
3430
3431 static int
get_comment(enum commenttype flag,INSTRUCTION ** comment_instruction)3432 get_comment(enum commenttype flag, INSTRUCTION **comment_instruction)
3433 {
3434 int c;
3435 int sl;
3436 char *p1;
3437 char *p2;
3438
3439 tok = tokstart;
3440 tokadd('#');
3441 sl = sourceline;
3442
3443 while (true) {
3444 while ((c = nextc(false)) != '\n' && c != END_FILE) {
3445 /* ignore \r characters */
3446 if (c != '\r')
3447 tokadd(c);
3448 }
3449 if (flag == EOL_COMMENT) {
3450 /* comment at end of line. */
3451 if (c == '\n')
3452 tokadd(c);
3453 break;
3454 }
3455 if (c == '\n') {
3456 tokadd(c);
3457 sourceline++;
3458 do {
3459 c = nextc(false);
3460 if (c == '\n') {
3461 sourceline++;
3462 tokadd(c);
3463 }
3464 } while (c != END_FILE && isspace(c));
3465 if (c == END_FILE)
3466 break;
3467 else if (c != '#') {
3468 pushback();
3469 sourceline--;
3470 break;
3471 } else
3472 tokadd(c);
3473 } else
3474 break;
3475 }
3476
3477 /* remove any trailing blank lines (consecutive \n) from comment */
3478 p1 = tok - 1;
3479 p2 = tok - 2;
3480 while (*p1 == '\n' && *p2 == '\n') {
3481 p1--;
3482 p2--;
3483 tok--;
3484 }
3485
3486 (*comment_instruction) = bcalloc(Op_comment, 1, sl);
3487 (*comment_instruction)->source_file = source;
3488 (*comment_instruction)->memory = make_str_node(tokstart, tok - tokstart, 0);
3489 (*comment_instruction)->memory->comment_type = flag;
3490
3491 return c;
3492 }
3493
3494 /* allow_newline --- allow newline after &&, ||, ? and : */
3495
3496 static void
allow_newline(INSTRUCTION ** new_comment)3497 allow_newline(INSTRUCTION **new_comment)
3498 {
3499 int c;
3500
3501 for (;;) {
3502 c = nextc(true);
3503 if (c == END_FILE) {
3504 pushback();
3505 break;
3506 }
3507 if (c == '#') {
3508 if (do_pretty_print && ! do_profile) {
3509 /* collect comment byte code iff doing pretty print but not profiling. */
3510 c = get_comment(EOL_COMMENT, new_comment);
3511 } else {
3512 while ((c = nextc(false)) != '\n' && c != END_FILE)
3513 continue;
3514 }
3515 if (c == END_FILE) {
3516 pushback();
3517 break;
3518 }
3519 }
3520 if (c == '\n')
3521 sourceline++;
3522 if (! isspace(c)) {
3523 pushback();
3524 break;
3525 }
3526 }
3527 }
3528
3529 /* newline_eof --- return newline or EOF as needed and adjust variables */
3530
3531 /*
3532 * This routine used to be a macro, however GCC 4.6.2 warned about
3533 * the result of a computation not being used. Converting to a function
3534 * removes the warnings.
3535 */
3536
3537 static int
newline_eof()3538 newline_eof()
3539 {
3540 /* NB: a newline at end does not start a source line. */
3541 if (lasttok != NEWLINE) {
3542 pushback();
3543 if (do_lint && ! eof_warned) {
3544 lintwarn(_("source file does not end in newline"));
3545 eof_warned = true;
3546 }
3547 sourceline++;
3548 return NEWLINE;
3549 }
3550
3551 sourceline--;
3552 eof_warned = false;
3553 return LEX_EOF;
3554 }
3555
3556 /* yylex --- Read the input and turn it into tokens. */
3557
3558 static int
3559 #ifdef USE_EBCDIC
yylex_ebcdic(void)3560 yylex_ebcdic(void)
3561 #else
3562 yylex(void)
3563 #endif
3564 {
3565 int c;
3566 bool seen_e = false; /* These are for numbers */
3567 bool seen_point = false;
3568 bool esc_seen; /* for literal strings */
3569 int mid;
3570 int base;
3571 static bool did_newline = false;
3572 char *tokkey;
3573 bool inhex = false;
3574 bool intlstr = false;
3575 AWKNUM d;
3576 bool collecting_typed_regexp = false;
3577 static int qm_col_count = 0;
3578
3579 #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline)
3580
3581 #define NEWLINE_EOF newline_eof()
3582
3583 yylval = (INSTRUCTION *) NULL;
3584 if (lasttok == SUBSCRIPT) {
3585 lasttok = 0;
3586 return SUBSCRIPT;
3587 }
3588
3589 if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
3590 return 0;
3591
3592 c = nextc(! want_regexp);
3593 if (c == END_SRC)
3594 return 0;
3595 if (c == END_FILE)
3596 return lasttok = NEWLINE_EOF;
3597 pushback();
3598
3599 #if defined __EMX__
3600 /*
3601 * added for OS/2's extproc feature of cmd.exe
3602 * (like #! in BSD sh)
3603 */
3604 if (strncasecmp(lexptr, "extproc ", 8) == 0) {
3605 while (*lexptr && *lexptr != '\n')
3606 lexptr++;
3607 }
3608 #endif
3609
3610 lexeme = lexptr;
3611 thisline = NULL;
3612
3613 collect_regexp:
3614 if (want_regexp) {
3615 int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
3616 int b_index = -1;
3617 int cur_index = 0;
3618
3619 /*
3620 * Here is what's ok with brackets:
3621 *
3622 * [..[..] []] [^]] [.../...]
3623 * [...\[...] [...\]...] [...\/...]
3624 *
3625 * (Remember that all of the above are inside /.../)
3626 *
3627 * The code for \ handles \[, \] and \/.
3628 *
3629 * Otherwise, track the first open [ position, and if
3630 * an embedded ] occurs, allow it to pass through
3631 * if it's right after the first [ or after [^.
3632 *
3633 * Whew!
3634 */
3635
3636 want_regexp = false;
3637 tok = tokstart;
3638 for (;;) {
3639 c = nextc(false);
3640
3641 cur_index = tok - tokstart;
3642 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
3643 case '[':
3644 if (nextc(false) == ':' || in_brack == 0) {
3645 in_brack++;
3646 if (in_brack == 1)
3647 b_index = tok - tokstart;
3648 }
3649 pushback();
3650 break;
3651 case ']':
3652 if (in_brack > 0
3653 && (cur_index == b_index + 1
3654 || (cur_index == b_index + 2 && tok[-1] == '^')))
3655 ; /* do nothing */
3656 else {
3657 in_brack--;
3658 if (in_brack == 0)
3659 b_index = -1;
3660 }
3661 break;
3662 case '\\':
3663 if ((c = nextc(false)) == END_FILE) {
3664 pushback();
3665 yyerror(_("unterminated regexp ends with `\\' at end of file"));
3666 goto end_regexp; /* kludge */
3667 }
3668 if (c == '\r') /* allow MS-DOS files. bleah */
3669 c = nextc(true);
3670 if (c == '\n') {
3671 sourceline++;
3672 continue;
3673 } else {
3674 tokadd('\\');
3675 tokadd(c);
3676 continue;
3677 }
3678 break;
3679 case '/': /* end of the regexp */
3680 if (in_brack > 0)
3681 break;
3682 end_regexp:
3683 yylval = GET_INSTRUCTION(Op_token);
3684 yylval->lextok = estrdup(tokstart, tok - tokstart);
3685 if (do_lint) {
3686 int peek = nextc(true);
3687
3688 pushback();
3689 if (peek == 'i' || peek == 's') {
3690 if (source)
3691 lintwarn(
3692 _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
3693 source, sourceline, peek);
3694 else
3695 lintwarn(
3696 _("tawk regex modifier `/.../%c' doesn't work in gawk"),
3697 peek);
3698 }
3699 }
3700 if (collecting_typed_regexp) {
3701 collecting_typed_regexp = false;
3702 lasttok = TYPED_REGEXP;
3703 } else
3704 lasttok = REGEXP;
3705
3706 return lasttok;
3707 case '\n':
3708 pushback();
3709 yyerror(_("unterminated regexp"));
3710 goto end_regexp; /* kludge */
3711 case END_FILE:
3712 pushback();
3713 yyerror(_("unterminated regexp at end of file"));
3714 goto end_regexp; /* kludge */
3715 }
3716 tokadd(c);
3717 }
3718 }
3719 retry:
3720
3721 /* skipping \r is a hack, but windows is just too pervasive. sigh. */
3722 while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
3723 continue;
3724
3725 lexeme = lexptr ? lexptr - 1 : lexptr;
3726 thisline = NULL;
3727 tok = tokstart;
3728
3729 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
3730 switch (c) {
3731 case END_SRC:
3732 return 0;
3733
3734 case END_FILE:
3735 return lasttok = NEWLINE_EOF;
3736
3737 case '\n':
3738 sourceline++;
3739 return lasttok = NEWLINE;
3740
3741 case '#': /* it's a comment */
3742 yylval = NULL;
3743 if (do_pretty_print && ! do_profile) {
3744 /*
3745 * Collect comment byte code iff doing pretty print
3746 * but not profiling.
3747 */
3748 INSTRUCTION *new_comment;
3749
3750 if (lasttok == NEWLINE || lasttok == 0)
3751 c = get_comment(BLOCK_COMMENT, & new_comment);
3752 else
3753 c = get_comment(EOL_COMMENT, & new_comment);
3754
3755 yylval = new_comment;
3756
3757 if (c == END_FILE) {
3758 pushback();
3759 return lasttok = NEWLINE;
3760 }
3761 } else {
3762 while ((c = nextc(false)) != '\n') {
3763 if (c == END_FILE)
3764 return lasttok = NEWLINE_EOF;
3765 }
3766 }
3767 sourceline++;
3768 return lasttok = NEWLINE;
3769
3770 case '@':
3771 c = nextc(true);
3772 if (c == '/') {
3773 want_regexp = true;
3774 collecting_typed_regexp = true;
3775 goto collect_regexp;
3776 }
3777 pushback();
3778 at_seen = true;
3779 return lasttok = '@';
3780
3781 case '\\':
3782 #ifdef RELAXED_CONTINUATION
3783 /*
3784 * This code purports to allow comments and/or whitespace
3785 * after the `\' at the end of a line used for continuation.
3786 * Use it at your own risk. We think it's a bad idea, which
3787 * is why it's not on by default.
3788 */
3789 yylval = NULL;
3790 if (! do_traditional) {
3791 INSTRUCTION *new_comment;
3792
3793 /* strip trailing white-space and/or comment */
3794 while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
3795 continue;
3796 if (c == '#') {
3797 static bool warned = false;
3798
3799 if (do_lint && ! warned) {
3800 warned = true;
3801 lintwarn(
3802 _("use of `\\ #...' line continuation is not portable"));
3803 }
3804 if (do_pretty_print && ! do_profile) {
3805 c = get_comment(EOL_COMMENT, & new_comment);
3806 yylval = new_comment;
3807 return lasttok = c;
3808 } else {
3809 while ((c = nextc(false)) != '\n')
3810 if (c == END_FILE)
3811 break;
3812 }
3813 }
3814 pushback();
3815 }
3816 #endif /* RELAXED_CONTINUATION */
3817 c = nextc(true);
3818 if (c == '\r') /* allow MS-DOS files. bleah */
3819 c = nextc(true);
3820 if (c == '\n') {
3821 sourceline++;
3822 goto retry;
3823 } else {
3824 yyerror(_("backslash not last character on line"));
3825 return lasttok = LEX_EOF;
3826 }
3827 break;
3828
3829 case '?':
3830 qm_col_count++;
3831 // fall through
3832 case ':':
3833 yylval = GET_INSTRUCTION(Op_cond_exp);
3834 if (qm_col_count > 0) {
3835 if (! do_posix) {
3836 INSTRUCTION *new_comment = NULL;
3837 allow_newline(& new_comment);
3838 yylval->comment = new_comment;
3839 }
3840 if (c == ':')
3841 qm_col_count--;
3842 }
3843 return lasttok = c;
3844
3845 /*
3846 * in_parens is undefined unless we are parsing a print
3847 * statement (in_print), but why bother with a check?
3848 */
3849 case ')':
3850 in_parens--;
3851 return lasttok = c;
3852
3853 case '(':
3854 in_parens++;
3855 return lasttok = c;
3856 case '$':
3857 yylval = GET_INSTRUCTION(Op_field_spec);
3858 return lasttok = c;
3859 case '{':
3860 if (++in_braces == 1)
3861 firstline = sourceline;
3862 case ';':
3863 case ',':
3864 case '[':
3865 return lasttok = c;
3866 case ']':
3867 c = nextc(true);
3868 pushback();
3869 if (c == '[') {
3870 if (do_traditional)
3871 fatal(_("multidimensional arrays are a gawk extension"));
3872 if (do_lint_extensions)
3873 lintwarn(_("multidimensional arrays are a gawk extension"));
3874 yylval = GET_INSTRUCTION(Op_sub_array);
3875 lasttok = ']';
3876 } else {
3877 yylval = GET_INSTRUCTION(Op_subscript);
3878 lasttok = SUBSCRIPT; /* end of subscripts */
3879 }
3880 return ']';
3881
3882 case '*':
3883 if ((c = nextc(true)) == '=') {
3884 yylval = GET_INSTRUCTION(Op_assign_times);
3885 return lasttok = ASSIGNOP;
3886 } else if (do_posix) {
3887 pushback();
3888 yylval = GET_INSTRUCTION(Op_times);
3889 return lasttok = '*';
3890 } else if (c == '*') {
3891 /* make ** and **= aliases for ^ and ^= */
3892 static bool did_warn_op = false, did_warn_assgn = false;
3893
3894 if (nextc(true) == '=') {
3895 if (! did_warn_assgn) {
3896 did_warn_assgn = true;
3897 if (do_lint)
3898 lintwarn(_("POSIX does not allow operator `%s'"), "**=");
3899 if (do_lint_old)
3900 lintwarn(_("operator `%s' is not supported in old awk"), "**=");
3901 }
3902 yylval = GET_INSTRUCTION(Op_assign_exp);
3903 return ASSIGNOP;
3904 } else {
3905 pushback();
3906 if (! did_warn_op) {
3907 did_warn_op = true;
3908 if (do_lint)
3909 lintwarn(_("POSIX does not allow operator `%s'"), "**");
3910 if (do_lint_old)
3911 lintwarn(_("operator `%s' is not supported in old awk"), "**");
3912 }
3913 yylval = GET_INSTRUCTION(Op_exp);
3914 return lasttok = '^';
3915 }
3916 }
3917 pushback();
3918 yylval = GET_INSTRUCTION(Op_times);
3919 return lasttok = '*';
3920
3921 case '/':
3922 if (nextc(false) == '=') {
3923 pushback();
3924 return lasttok = SLASH_BEFORE_EQUAL;
3925 }
3926 pushback();
3927 yylval = GET_INSTRUCTION(Op_quotient);
3928 return lasttok = '/';
3929
3930 case '%':
3931 if (nextc(true) == '=') {
3932 yylval = GET_INSTRUCTION(Op_assign_mod);
3933 return lasttok = ASSIGNOP;
3934 }
3935 pushback();
3936 yylval = GET_INSTRUCTION(Op_mod);
3937 return lasttok = '%';
3938
3939 case '^':
3940 {
3941 static bool did_warn_op = false, did_warn_assgn = false;
3942
3943 if (nextc(true) == '=') {
3944 if (do_lint_old && ! did_warn_assgn) {
3945 did_warn_assgn = true;
3946 lintwarn(_("operator `%s' is not supported in old awk"), "^=");
3947 }
3948 yylval = GET_INSTRUCTION(Op_assign_exp);
3949 return lasttok = ASSIGNOP;
3950 }
3951 pushback();
3952 if (do_lint_old && ! did_warn_op) {
3953 did_warn_op = true;
3954 lintwarn(_("operator `%s' is not supported in old awk"), "^");
3955 }
3956 yylval = GET_INSTRUCTION(Op_exp);
3957 return lasttok = '^';
3958 }
3959
3960 case '+':
3961 if ((c = nextc(true)) == '=') {
3962 yylval = GET_INSTRUCTION(Op_assign_plus);
3963 return lasttok = ASSIGNOP;
3964 }
3965 if (c == '+') {
3966 yylval = GET_INSTRUCTION(Op_symbol);
3967 return lasttok = INCREMENT;
3968 }
3969 pushback();
3970 yylval = GET_INSTRUCTION(Op_plus);
3971 return lasttok = '+';
3972
3973 case '!':
3974 if ((c = nextc(true)) == '=') {
3975 yylval = GET_INSTRUCTION(Op_notequal);
3976 return lasttok = RELOP;
3977 }
3978 if (c == '~') {
3979 yylval = GET_INSTRUCTION(Op_nomatch);
3980 return lasttok = MATCHOP;
3981 }
3982 pushback();
3983 yylval = GET_INSTRUCTION(Op_symbol);
3984 return lasttok = '!';
3985
3986 case '<':
3987 if (nextc(true) == '=') {
3988 yylval = GET_INSTRUCTION(Op_leq);
3989 return lasttok = RELOP;
3990 }
3991 yylval = GET_INSTRUCTION(Op_less);
3992 pushback();
3993 return lasttok = '<';
3994
3995 case '=':
3996 if (nextc(true) == '=') {
3997 yylval = GET_INSTRUCTION(Op_equal);
3998 return lasttok = RELOP;
3999 }
4000 yylval = GET_INSTRUCTION(Op_assign);
4001 pushback();
4002 return lasttok = ASSIGN;
4003
4004 case '>':
4005 if ((c = nextc(true)) == '=') {
4006 yylval = GET_INSTRUCTION(Op_geq);
4007 return lasttok = RELOP;
4008 } else if (c == '>') {
4009 yylval = GET_INSTRUCTION(Op_symbol);
4010 yylval->redir_type = redirect_append;
4011 return lasttok = IO_OUT;
4012 }
4013 pushback();
4014 if (in_print && in_parens == 0) {
4015 yylval = GET_INSTRUCTION(Op_symbol);
4016 yylval->redir_type = redirect_output;
4017 return lasttok = IO_OUT;
4018 }
4019 yylval = GET_INSTRUCTION(Op_greater);
4020 return lasttok = '>';
4021
4022 case '~':
4023 yylval = GET_INSTRUCTION(Op_match);
4024 return lasttok = MATCHOP;
4025
4026 case '}':
4027 /*
4028 * Added did newline stuff. Easier than
4029 * hacking the grammar.
4030 */
4031 if (did_newline) {
4032 did_newline = false;
4033 if (--in_braces == 0)
4034 lastline = sourceline;
4035 return lasttok = c;
4036 }
4037 did_newline = true;
4038 --lexptr; /* pick up } next time */
4039 return lasttok = NEWLINE;
4040
4041 case '"':
4042 string:
4043 esc_seen = false;
4044 /*
4045 * Allow any kind of junk in quoted string,
4046 * so pass false to nextc().
4047 */
4048 while ((c = nextc(false)) != '"') {
4049 if (c == '\n') {
4050 pushback();
4051 yyerror(_("unterminated string"));
4052 return lasttok = LEX_EOF;
4053 }
4054 if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
4055 c == '\\') {
4056 c = nextc(true);
4057 if (c == '\r') /* allow MS-DOS files. bleah */
4058 c = nextc(true);
4059 if (c == '\n') {
4060 if (do_posix)
4061 fatal(_("POSIX does not allow physical newlines in string values"));
4062 else if (do_lint)
4063 lintwarn(_("backslash string continuation is not portable"));
4064 sourceline++;
4065 continue;
4066 }
4067 esc_seen = true;
4068 if (! want_source || c != '"')
4069 tokadd('\\');
4070 }
4071 if (c == END_FILE) {
4072 pushback();
4073 yyerror(_("unterminated string"));
4074 return lasttok = LEX_EOF;
4075 }
4076 tokadd(c);
4077 }
4078 yylval = GET_INSTRUCTION(Op_token);
4079 if (want_source) {
4080 yylval->lextok = estrdup(tokstart, tok - tokstart);
4081 return lasttok = FILENAME;
4082 }
4083
4084 yylval->opcode = Op_push_i;
4085 yylval->memory = make_str_node(tokstart,
4086 tok - tokstart, esc_seen ? SCAN : 0);
4087 if (intlstr) {
4088 yylval->memory->flags |= INTLSTR;
4089 intlstr = false;
4090 if (do_intl)
4091 dumpintlstr(yylval->memory->stptr, yylval->memory->stlen);
4092 }
4093 return lasttok = YSTRING;
4094
4095 case '-':
4096 if ((c = nextc(true)) == '=') {
4097 yylval = GET_INSTRUCTION(Op_assign_minus);
4098 return lasttok = ASSIGNOP;
4099 }
4100 if (c == '-') {
4101 yylval = GET_INSTRUCTION(Op_symbol);
4102 return lasttok = DECREMENT;
4103 }
4104 pushback();
4105 yylval = GET_INSTRUCTION(Op_minus);
4106 return lasttok = '-';
4107
4108 case '.':
4109 c = nextc(true);
4110 pushback();
4111 if (! isdigit(c))
4112 return lasttok = '.';
4113 else
4114 c = '.';
4115 /* FALL THROUGH */
4116 case '0':
4117 case '1':
4118 case '2':
4119 case '3':
4120 case '4':
4121 case '5':
4122 case '6':
4123 case '7':
4124 case '8':
4125 case '9':
4126 /* It's a number */
4127 for (;;) {
4128 bool gotnumber = false;
4129
4130 tokadd(c);
4131 switch (c) {
4132 case 'x':
4133 case 'X':
4134 if (do_traditional)
4135 goto done;
4136 if (tok == tokstart + 2) {
4137 int peek = nextc(true);
4138
4139 if (isxdigit(peek)) {
4140 inhex = true;
4141 pushback(); /* following digit */
4142 } else {
4143 pushback(); /* x or X */
4144 goto done;
4145 }
4146 }
4147 break;
4148 case '.':
4149 /* period ends exponent part of floating point number */
4150 if (seen_point || seen_e) {
4151 gotnumber = true;
4152 break;
4153 }
4154 seen_point = true;
4155 break;
4156 case 'e':
4157 case 'E':
4158 if (inhex)
4159 break;
4160 if (seen_e) {
4161 gotnumber = true;
4162 break;
4163 }
4164 seen_e = true;
4165 if ((c = nextc(true)) == '-' || c == '+') {
4166 int c2 = nextc(true);
4167
4168 if (isdigit(c2)) {
4169 tokadd(c);
4170 tokadd(c2);
4171 } else {
4172 pushback(); /* non-digit after + or - */
4173 pushback(); /* + or - */
4174 pushback(); /* e or E */
4175 }
4176 } else if (! isdigit(c)) {
4177 pushback(); /* character after e or E */
4178 pushback(); /* e or E */
4179 } else {
4180 pushback(); /* digit */
4181 }
4182 break;
4183 case 'a':
4184 case 'A':
4185 case 'b':
4186 case 'B':
4187 case 'c':
4188 case 'C':
4189 case 'D':
4190 case 'd':
4191 case 'f':
4192 case 'F':
4193 if (do_traditional || ! inhex)
4194 goto done;
4195 /* fall through */
4196 case '0':
4197 case '1':
4198 case '2':
4199 case '3':
4200 case '4':
4201 case '5':
4202 case '6':
4203 case '7':
4204 case '8':
4205 case '9':
4206 break;
4207 default:
4208 done:
4209 gotnumber = true;
4210 }
4211 if (gotnumber)
4212 break;
4213 c = nextc(true);
4214 }
4215 pushback();
4216
4217 tokadd('\0');
4218 yylval = GET_INSTRUCTION(Op_push_i);
4219
4220 base = 10;
4221 if (! do_traditional) {
4222 base = get_numbase(tokstart, strlen(tokstart)-1, false);
4223 if (do_lint) {
4224 if (base == 8)
4225 lintwarn("numeric constant `%.*s' treated as octal",
4226 (int) strlen(tokstart)-1, tokstart);
4227 else if (base == 16)
4228 lintwarn("numeric constant `%.*s' treated as hexadecimal",
4229 (int) strlen(tokstart)-1, tokstart);
4230 }
4231 }
4232
4233 #ifdef HAVE_MPFR
4234 if (do_mpfr) {
4235 NODE *r;
4236
4237 if (! seen_point && ! seen_e) {
4238 r = mpg_integer();
4239 mpg_strtoui(r->mpg_i, tokstart, strlen(tokstart), NULL, base);
4240 errno = 0;
4241 } else {
4242 int tval;
4243 r = mpg_float();
4244 tval = mpfr_strtofr(r->mpg_numbr, tokstart, NULL, base, ROUND_MODE);
4245 errno = 0;
4246 IEEE_FMT(r->mpg_numbr, tval);
4247 }
4248 yylval->memory = set_profile_text(r, tokstart, strlen(tokstart)-1);
4249 return lasttok = YNUMBER;
4250 }
4251 #endif
4252 if (base != 10)
4253 d = nondec2awknum(tokstart, strlen(tokstart)-1, NULL);
4254 else
4255 d = atof(tokstart);
4256 yylval->memory = set_profile_text(make_number(d), tokstart, strlen(tokstart) - 1);
4257 if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d)
4258 yylval->memory->flags |= NUMINT;
4259 return lasttok = YNUMBER;
4260
4261 case '&':
4262 if ((c = nextc(true)) == '&') {
4263 yylval = GET_INSTRUCTION(Op_and);
4264 INSTRUCTION *new_comment = NULL;
4265 allow_newline(& new_comment);
4266 yylval->comment = new_comment;
4267
4268 return lasttok = LEX_AND;
4269 }
4270 pushback();
4271 yylval = GET_INSTRUCTION(Op_symbol);
4272 return lasttok = '&';
4273
4274 case '|':
4275 if ((c = nextc(true)) == '|') {
4276 yylval = GET_INSTRUCTION(Op_or);
4277 INSTRUCTION *new_comment = NULL;
4278 allow_newline(& new_comment);
4279 yylval->comment = new_comment;
4280
4281 return lasttok = LEX_OR;
4282 } else if (! do_traditional && c == '&') {
4283 yylval = GET_INSTRUCTION(Op_symbol);
4284 yylval->redir_type = redirect_twoway;
4285
4286 return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
4287 }
4288 pushback();
4289 if (in_print && in_parens == 0) {
4290 yylval = GET_INSTRUCTION(Op_symbol);
4291 yylval->redir_type = redirect_pipe;
4292 return lasttok = IO_OUT;
4293 } else {
4294 yylval = GET_INSTRUCTION(Op_symbol);
4295 yylval->redir_type = redirect_pipein;
4296 return lasttok = IO_IN;
4297 }
4298 }
4299
4300 if (! is_letter(c)) {
4301 yyerror(_("invalid char '%c' in expression"), c);
4302 return lasttok = LEX_EOF;
4303 }
4304
4305 /*
4306 * Lots of fog here. Consider:
4307 *
4308 * print "xyzzy"$_"foo"
4309 *
4310 * Without the check for ` lasttok != '$' ', this is parsed as
4311 *
4312 * print "xxyzz" $(_"foo")
4313 *
4314 * With the check, it is "correctly" parsed as three
4315 * string concatenations. Sigh. This seems to be
4316 * "more correct", but this is definitely one of those
4317 * occasions where the interactions are funny.
4318 */
4319 if (! do_traditional && c == '_' && lasttok != '$') {
4320 if ((c = nextc(true)) == '"') {
4321 intlstr = true;
4322 goto string;
4323 }
4324 pushback();
4325 c = '_';
4326 }
4327
4328 /* it's some type of name-type-thing. Find its length. */
4329 tok = tokstart;
4330 while (c != END_FILE && is_identchar(c)) {
4331 tokadd(c);
4332 c = nextc(true);
4333
4334 if (! do_traditional && c == ':') {
4335 int peek = nextc(true);
4336
4337 if (peek == ':') { // saw identifier::
4338 tokadd(c);
4339 tokadd(c);
4340 c = nextc(true);
4341 } else
4342 pushback();
4343 // then continue around the loop, c == ':'
4344 }
4345 }
4346 tokadd('\0');
4347 pushback();
4348
4349 (void) validate_qualified_name(tokstart);
4350
4351 /* See if it is a special token. */
4352 if ((mid = check_qualified_special(tokstart)) >= 0) {
4353 static int warntab[sizeof(tokentab) / sizeof(tokentab[0])];
4354 int class = tokentab[mid].class;
4355
4356 switch (class) {
4357 case LEX_EVAL:
4358 case LEX_INCLUDE:
4359 case LEX_LOAD:
4360 case LEX_NAMESPACE:
4361 if (lasttok != '@')
4362 goto out;
4363 default:
4364 break;
4365 }
4366
4367 /* allow parameter names to shadow the names of gawk extension built-ins */
4368 if ((tokentab[mid].flags & GAWKX) != 0) {
4369 NODE *f;
4370
4371 switch (want_param_names) {
4372 case FUNC_HEADER:
4373 /* in header, defining parameter names */
4374 goto out;
4375 case FUNC_BODY:
4376 /* in body, name must be in symbol table for it to be a parameter */
4377 if ((f = lookup(tokstart)) != NULL) {
4378 if (f->type == Node_builtin_func)
4379 break;
4380 else
4381 goto out;
4382 }
4383 /* else
4384 fall through */
4385 case DONT_CHECK:
4386 /* regular code */
4387 break;
4388 default:
4389 cant_happen();
4390 break;
4391 }
4392 }
4393
4394 if (do_lint) {
4395 if (do_lint_extensions && (tokentab[mid].flags & GAWKX) != 0 && (warntab[mid] & GAWKX) == 0) {
4396 lintwarn(_("`%s' is a gawk extension"),
4397 tokentab[mid].operator);
4398 warntab[mid] |= GAWKX;
4399 }
4400 if ((tokentab[mid].flags & NOT_POSIX) != 0 && (warntab[mid] & NOT_POSIX) == 0) {
4401 lintwarn(_("POSIX does not allow `%s'"),
4402 tokentab[mid].operator);
4403 warntab[mid] |= NOT_POSIX;
4404 }
4405 }
4406 if (do_lint_old && (tokentab[mid].flags & NOT_OLD) != 0
4407 && (warntab[mid] & NOT_OLD) == 0
4408 ) {
4409 lintwarn(_("`%s' is not supported in old awk"),
4410 tokentab[mid].operator);
4411 warntab[mid] |= NOT_OLD;
4412 }
4413
4414 if ((tokentab[mid].flags & BREAK) != 0)
4415 break_allowed++;
4416 if ((tokentab[mid].flags & CONTINUE) != 0)
4417 continue_allowed++;
4418
4419 switch (class) {
4420 case LEX_NAMESPACE:
4421 case LEX_INCLUDE:
4422 case LEX_LOAD:
4423 want_source = true;
4424 break;
4425 case LEX_EVAL:
4426 if (in_main_context())
4427 goto out;
4428 emalloc(tokkey, char *, tok - tokstart + 1, "yylex");
4429 tokkey[0] = '@';
4430 memcpy(tokkey + 1, tokstart, tok - tokstart);
4431 yylval = GET_INSTRUCTION(Op_token);
4432 yylval->lextok = tokkey;
4433 break;
4434
4435 case LEX_FUNCTION:
4436 case LEX_BEGIN:
4437 case LEX_END:
4438 case LEX_BEGINFILE:
4439 case LEX_ENDFILE:
4440 yylval = bcalloc(tokentab[mid].value, 4, sourceline);
4441 break;
4442
4443 case LEX_FOR:
4444 case LEX_WHILE:
4445 case LEX_DO:
4446 case LEX_SWITCH:
4447 if (! do_pretty_print)
4448 return lasttok = class;
4449 /* fall through */
4450 case LEX_CASE:
4451 yylval = bcalloc(tokentab[mid].value, 2, sourceline);
4452 break;
4453
4454 /*
4455 * These must be checked here, due to the LALR nature of the parser,
4456 * the rules for continue and break may not be reduced until after
4457 * a token that increments the xxx_allowed varibles is seen. Bleah.
4458 */
4459 case LEX_CONTINUE:
4460 if (! continue_allowed) {
4461 error_ln(sourceline,
4462 _("`continue' is not allowed outside a loop"));
4463 errcount++;
4464 }
4465 goto make_instruction;
4466
4467 case LEX_BREAK:
4468 if (! break_allowed) {
4469 error_ln(sourceline,
4470 _("`break' is not allowed outside a loop or switch"));
4471 errcount++;
4472 }
4473 goto make_instruction;
4474
4475 default:
4476 make_instruction:
4477 yylval = GET_INSTRUCTION(tokentab[mid].value);
4478 if (class == LEX_BUILTIN || class == LEX_LENGTH)
4479 yylval->builtin_idx = mid;
4480 break;
4481 }
4482 return lasttok = class;
4483 }
4484 out:
4485 if (want_param_names == FUNC_HEADER)
4486 tokkey = estrdup(tokstart, tok - tokstart - 1);
4487 else
4488 tokkey = qualify_name(tokstart, tok - tokstart - 1);
4489
4490 if (*lexptr == '(') {
4491 yylval = bcalloc(Op_token, 2, sourceline);
4492 yylval->lextok = tokkey;
4493 return lasttok = FUNC_CALL;
4494 } else {
4495 static bool goto_warned = false;
4496
4497 yylval = GET_INSTRUCTION(Op_token);
4498 yylval->lextok = tokkey;
4499
4500 #define SMART_ALECK 1
4501 if (SMART_ALECK
4502 && do_lint
4503 && ! goto_warned
4504 && tolower(tokkey[0]) == 'g'
4505 && strcasecmp(tokkey, "goto") == 0) {
4506 goto_warned = true;
4507 lintwarn(_("`goto' considered harmful!"));
4508 }
4509 return lasttok = NAME;
4510 }
4511
4512 #undef GET_INSTRUCTION
4513 #undef NEWLINE_EOF
4514 }
4515
4516 /* It's EBCDIC in a Bison grammar, run for the hills!
4517
4518 Or, convert single-character tokens coming out of yylex() from EBCDIC to
4519 ASCII values on-the-fly so that the parse tables need not be regenerated
4520 for EBCDIC systems. */
4521 #ifdef USE_EBCDIC
4522 static int
yylex(void)4523 yylex(void)
4524 {
4525 static char etoa_xlate[256];
4526 static bool do_etoa_init = true;
4527 int tok;
4528
4529 if (do_etoa_init)
4530 {
4531 for (tok = 0; tok < 256; tok++)
4532 etoa_xlate[tok] = (char) tok;
4533 #ifdef HAVE___ETOA_L
4534 /* IBM helpfully provides this function. */
4535 __etoa_l(etoa_xlate, sizeof(etoa_xlate));
4536 #else
4537 # error "An EBCDIC-to-ASCII translation function is needed for this system"
4538 #endif
4539 do_etoa_init = false;
4540 }
4541
4542 tok = yylex_ebcdic();
4543
4544 if (tok >= 0 && tok <= 0xFF)
4545 tok = etoa_xlate[tok];
4546
4547 return tok;
4548 }
4549 #endif /* USE_EBCDIC */
4550
4551 /* snode --- instructions for builtin functions. Checks for arg. count
4552 and supplies defaults where possible. */
4553
4554 static INSTRUCTION *
snode(INSTRUCTION * subn,INSTRUCTION * r)4555 snode(INSTRUCTION *subn, INSTRUCTION *r)
4556 {
4557 INSTRUCTION *arg;
4558 INSTRUCTION *ip;
4559 NODE *n;
4560 int nexp = 0;
4561 int args_allowed;
4562 int idx = r->builtin_idx;
4563
4564 if (subn != NULL) {
4565 INSTRUCTION *tp;
4566 for (tp = subn->nexti; tp; tp = tp->nexti) {
4567 tp = tp->lasti;
4568 nexp++;
4569 }
4570 assert(nexp > 0);
4571 }
4572
4573 /* check against how many args. are allowed for this builtin */
4574 args_allowed = tokentab[idx].flags & ARGS;
4575 if (args_allowed && (args_allowed & A(nexp)) == 0) {
4576 yyerror(_("%d is invalid as number of arguments for %s"),
4577 nexp, tokentab[idx].operator);
4578 return NULL;
4579 }
4580
4581 /* special processing for sub, gsub and gensub */
4582
4583 if (tokentab[idx].value == Op_sub_builtin) {
4584 const char *operator = tokentab[idx].operator;
4585
4586 r->sub_flags = 0;
4587
4588 arg = subn->nexti; /* first arg list */
4589 (void) mk_rexp(arg);
4590
4591 if (strcmp(operator, "gensub") != 0) {
4592 /* sub and gsub */
4593
4594 if (strcmp(operator, "gsub") == 0)
4595 r->sub_flags |= GSUB;
4596
4597 arg = arg->lasti->nexti; /* 2nd arg list */
4598 if (nexp == 2) {
4599 INSTRUCTION *expr;
4600
4601 expr = list_create(instruction(Op_push_i));
4602 expr->nexti->memory = set_profile_text(make_number(0.0), "0", 1);
4603 (void) mk_expression_list(subn,
4604 list_append(expr, instruction(Op_field_spec)));
4605 }
4606
4607 arg = arg->lasti->nexti; /* third arg list */
4608 ip = arg->lasti;
4609 if (ip->opcode == Op_push_i) {
4610 if (do_lint)
4611 lintwarn(_("%s: string literal as last argument of substitute has no effect"),
4612 operator);
4613 r->sub_flags |= LITERAL;
4614 } else {
4615 if (make_assignable(ip) == NULL)
4616 yyerror(_("%s third parameter is not a changeable object"),
4617 operator);
4618 else
4619 ip->do_reference = true;
4620 }
4621
4622 r->expr_count = count_expressions(&subn, false);
4623 ip = subn->lasti;
4624
4625 (void) list_append(subn, r);
4626
4627 /* add after_assign code */
4628 if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
4629 (void) list_append(subn, instruction(Op_var_assign));
4630 subn->lasti->assign_ctxt = Op_sub_builtin;
4631 subn->lasti->assign_var = ip->memory->var_assign;
4632 } else if (ip->opcode == Op_field_spec_lhs) {
4633 (void) list_append(subn, instruction(Op_field_assign));
4634 subn->lasti->assign_ctxt = Op_sub_builtin;
4635 subn->lasti->field_assign = (Func_ptr) 0;
4636 ip->target_assign = subn->lasti;
4637 } else if (ip->opcode == Op_subscript_lhs) {
4638 (void) list_append(subn, instruction(Op_subscript_assign));
4639 subn->lasti->assign_ctxt = Op_sub_builtin;
4640 }
4641
4642 return subn;
4643
4644 } else {
4645 /* gensub */
4646
4647 r->sub_flags |= GENSUB;
4648 if (nexp == 3) {
4649 ip = instruction(Op_push_i);
4650 ip->memory = set_profile_text(make_number(0.0), "0", 1);
4651 (void) mk_expression_list(subn,
4652 list_append(list_create(ip), instruction(Op_field_spec)));
4653 }
4654
4655 r->expr_count = count_expressions(&subn, false);
4656 return list_append(subn, r);
4657 }
4658 }
4659
4660 #ifdef HAVE_MPFR
4661 /* N.B.: If necessary, add special processing for alternate builtin, below */
4662 if (do_mpfr && tokentab[idx].ptr2)
4663 r->builtin = tokentab[idx].ptr2;
4664 else
4665 #endif
4666 r->builtin = tokentab[idx].ptr;
4667
4668 /* special case processing for a few builtins */
4669
4670 if (r->builtin == do_length) {
4671 if (nexp == 0) {
4672 /* no args. Use $0 */
4673
4674 INSTRUCTION *list;
4675 r->expr_count = 1;
4676 list = list_create(r);
4677 (void) list_prepend(list, instruction(Op_field_spec));
4678 (void) list_prepend(list, instruction(Op_push_i));
4679 list->nexti->memory = set_profile_text(make_number(0.0), "0", 1);
4680 return list;
4681 } else {
4682 arg = subn->nexti;
4683 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
4684 arg->nexti->opcode = Op_push_arg; /* argument may be array */
4685 }
4686 } else if (r->builtin == do_isarray) {
4687 arg = subn->nexti;
4688 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
4689 arg->nexti->opcode = Op_push_arg_untyped; /* argument may be untyped */
4690 } else if (r->builtin == do_typeof) {
4691 arg = subn->nexti;
4692 if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
4693 arg->nexti->opcode = Op_push_arg_untyped; /* argument may be untyped */
4694 if (nexp == 2) { /* 2nd argument there */
4695 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
4696 ip = arg->lasti;
4697 if (ip->opcode == Op_push)
4698 ip->opcode = Op_push_array;
4699 }
4700 #ifdef SUPPLY_INTDIV
4701 } else if (r->builtin == do_intdiv
4702 #ifdef HAVE_MPFR
4703 || r->builtin == MPF(intdiv)
4704 #endif
4705 ) {
4706 arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
4707 ip = arg->lasti;
4708 if (ip->opcode == Op_push)
4709 ip->opcode = Op_push_array;
4710 #endif /* SUPPLY_INTDIV */
4711 } else if (r->builtin == do_match) {
4712 static bool warned = false;
4713
4714 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
4715 (void) mk_rexp(arg);
4716
4717 if (nexp == 3) { /* 3rd argument there */
4718 if (do_lint_extensions && ! warned) {
4719 warned = true;
4720 lintwarn(_("match: third argument is a gawk extension"));
4721 }
4722 if (do_traditional) {
4723 yyerror(_("match: third argument is a gawk extension"));
4724 return NULL;
4725 }
4726
4727 arg = arg->lasti->nexti; /* third arg list */
4728 ip = arg->lasti;
4729 if (/*ip == arg->nexti && */ ip->opcode == Op_push)
4730 ip->opcode = Op_push_array;
4731 }
4732 } else if (r->builtin == do_split) {
4733 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
4734 ip = arg->lasti;
4735 if (ip->opcode == Op_push)
4736 ip->opcode = Op_push_array;
4737 if (nexp == 2) {
4738 INSTRUCTION *expr;
4739 expr = list_create(instruction(Op_push));
4740 expr->nexti->memory = FS_node;
4741 (void) mk_expression_list(subn, expr);
4742 }
4743 arg = arg->lasti->nexti;
4744 n = mk_rexp(arg);
4745 if (nexp == 2)
4746 n->re_flags |= FS_DFLT;
4747 if (nexp == 4) {
4748 arg = arg->lasti->nexti;
4749 ip = arg->lasti;
4750 if (ip->opcode == Op_push)
4751 ip->opcode = Op_push_array;
4752 }
4753 } else if (r->builtin == do_patsplit) {
4754 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
4755 ip = arg->lasti;
4756 if (ip->opcode == Op_push)
4757 ip->opcode = Op_push_array;
4758 if (nexp == 2) {
4759 INSTRUCTION *expr;
4760 expr = list_create(instruction(Op_push));
4761 expr->nexti->memory = FPAT_node;
4762 (void) mk_expression_list(subn, expr);
4763 }
4764 arg = arg->lasti->nexti;
4765 n = mk_rexp(arg);
4766 if (nexp == 4) {
4767 arg = arg->lasti->nexti;
4768 ip = arg->lasti;
4769 if (ip->opcode == Op_push)
4770 ip->opcode = Op_push_array;
4771 }
4772 } else if (r->builtin == do_close) {
4773 static bool warned = false;
4774 if (nexp == 2) {
4775 if (do_lint_extensions && ! warned) {
4776 warned = true;
4777 lintwarn(_("close: second argument is a gawk extension"));
4778 }
4779 if (do_traditional) {
4780 yyerror(_("close: second argument is a gawk extension"));
4781 return NULL;
4782 }
4783 }
4784 } else if (do_intl /* --gen-po */
4785 && r->builtin == do_dcgettext /* dcgettext(...) */
4786 && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */
4787 && (subn->nexti->lasti->memory->flags & STRING) != 0) { /* it's a string constant */
4788 /* ala xgettext, dcgettext("some string" ...) dumps the string */
4789 NODE *str = subn->nexti->lasti->memory;
4790
4791 if ((str->flags & INTLSTR) != 0)
4792 warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
4793 /* don't dump it, the lexer already did */
4794 else
4795 dumpintlstr(str->stptr, str->stlen);
4796 } else if (do_intl /* --gen-po */
4797 && r->builtin == do_dcngettext /* dcngettext(...) */
4798 && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */
4799 && (subn->nexti->lasti->memory->flags & STRING) != 0 /* it's a string constant */
4800 && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i /* 2nd arg is constant too */
4801 && (subn->nexti->lasti->nexti->lasti->memory->flags & STRING) != 0) { /* it's a string constant */
4802 /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
4803 NODE *str1 = subn->nexti->lasti->memory;
4804 NODE *str2 = subn->nexti->lasti->nexti->lasti->memory;
4805
4806 if (((str1->flags | str2->flags) & INTLSTR) != 0)
4807 warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
4808 else
4809 dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
4810 } else if (r->builtin == do_asort || r->builtin == do_asorti) {
4811 arg = subn->nexti; /* 1st arg list */
4812 ip = arg->lasti;
4813 if (ip->opcode == Op_push)
4814 ip->opcode = Op_push_array;
4815 if (nexp >= 2) {
4816 arg = ip->nexti;
4817 ip = arg->lasti;
4818 if (ip->opcode == Op_push)
4819 ip->opcode = Op_push_array;
4820 }
4821 }
4822 else if (r->builtin == do_index) {
4823 arg = subn->nexti->lasti->nexti; /* 2nd arg list */
4824 ip = arg->lasti;
4825 if (ip->opcode == Op_match_rec || ip->opcode == Op_push_re)
4826 fatal(_("index: regexp constant as second argument is not allowed"));
4827 }
4828 #ifdef ARRAYDEBUG
4829 else if (r->builtin == do_adump) {
4830 ip = subn->nexti->lasti;
4831 if (ip->opcode == Op_push)
4832 ip->opcode = Op_push_array;
4833 }
4834 #endif
4835
4836 if (subn != NULL) {
4837 r->expr_count = count_expressions(&subn, false);
4838 return list_append(subn, r);
4839 }
4840
4841 r->expr_count = 0;
4842 return list_create(r);
4843 }
4844
4845
4846 /* parms_shadow --- check if parameters shadow globals */
4847
4848 static int
parms_shadow(INSTRUCTION * pc,bool * shadow)4849 parms_shadow(INSTRUCTION *pc, bool *shadow)
4850 {
4851 int pcount, i;
4852 bool ret = false;
4853 NODE *func, *fp;
4854 char *fname;
4855
4856 func = pc->func_body;
4857 fname = func->vname;
4858 fp = func->fparms;
4859
4860 #if 0 /* can't happen, already exited if error ? */
4861 if (fname == NULL || func == NULL) /* error earlier */
4862 return false;
4863 #endif
4864
4865 pcount = func->param_cnt;
4866
4867 if (pcount == 0) /* no args, no problem */
4868 return 0;
4869
4870 source = pc->source_file;
4871 sourceline = pc->source_line;
4872 /*
4873 * Use warning() and not lintwarn() so that can warn
4874 * about all shadowed parameters.
4875 */
4876 for (i = 0; i < pcount; i++) {
4877 if (lookup(fp[i].param) != NULL) {
4878 warning(
4879 _("function `%s': parameter `%s' shadows global variable"),
4880 fname, fp[i].param);
4881 ret = true;
4882 }
4883 }
4884
4885 *shadow |= ret;
4886 return 0;
4887 }
4888
4889 /* valinfo --- dump var info */
4890
4891 void
valinfo(NODE * n,Func_print print_func,FILE * fp)4892 valinfo(NODE *n, Func_print print_func, FILE *fp)
4893 {
4894 if (n == Nnull_string)
4895 print_func(fp, "uninitialized scalar\n");
4896 else if ((n->flags & REGEX) != 0)
4897 print_func(fp, "@/%.*s/\n", n->stlen, n->stptr);
4898 else if ((n->flags & STRING) != 0) {
4899 pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
4900 print_func(fp, "\n");
4901 } else if ((n->flags & NUMBER) != 0) {
4902 #ifdef HAVE_MPFR
4903 if (is_mpg_float(n))
4904 print_func(fp, "%s\n", mpg_fmt("%.17R*g", ROUND_MODE, n->mpg_numbr));
4905 else if (is_mpg_integer(n))
4906 print_func(fp, "%s\n", mpg_fmt("%Zd", n->mpg_i));
4907 else
4908 #endif
4909 print_func(fp, "%.17g\n", n->numbr);
4910 } else
4911 print_func(fp, "?? flags %s\n", flags2str(n->flags));
4912 }
4913
4914
4915 /* dump_vars --- dump the symbol table */
4916
4917 void
dump_vars(const char * fname)4918 dump_vars(const char *fname)
4919 {
4920 FILE *fp;
4921 NODE **vars;
4922
4923 if (fname == NULL)
4924 fp = stderr;
4925 else if (strcmp(fname, "-") == 0)
4926 fp = stdout;
4927 else if ((fp = fopen(fname, "w")) == NULL) {
4928 warning(_("could not open `%s' for writing: %s"), fname, strerror(errno));
4929 warning(_("sending variable list to standard error"));
4930 fp = stderr;
4931 }
4932
4933 vars = variable_list();
4934 print_vars(vars, fprintf, fp);
4935 efree(vars);
4936 if (fp != stdout && fp != stderr && fclose(fp) != 0)
4937 warning(_("%s: close failed: %s"), fname, strerror(errno));
4938 }
4939
4940 /* dump_funcs --- print all functions */
4941
4942 void
dump_funcs()4943 dump_funcs()
4944 {
4945 NODE **funcs;
4946 funcs = function_list(true);
4947 (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) pp_func, (void *) 0);
4948 efree(funcs);
4949 }
4950
4951
4952 /* shadow_funcs --- check all functions for parameters that shadow globals */
4953
4954 void
shadow_funcs()4955 shadow_funcs()
4956 {
4957 static int calls = 0;
4958 bool shadow = false;
4959 NODE **funcs;
4960
4961 if (calls++ != 0)
4962 fatal(_("shadow_funcs() called twice!"));
4963
4964 funcs = function_list(true);
4965 (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) parms_shadow, & shadow);
4966 efree(funcs);
4967
4968 /* End with fatal if the user requested it. */
4969 if (shadow && lintfunc == r_fatal)
4970 lintwarn(_("there were shadowed variables"));
4971 }
4972
4973
4974 /* mk_function --- finalize function definition node; remove parameters
4975 * out of the symbol table.
4976 */
4977
4978 static INSTRUCTION *
mk_function(INSTRUCTION * fi,INSTRUCTION * def)4979 mk_function(INSTRUCTION *fi, INSTRUCTION *def)
4980 {
4981 NODE *thisfunc;
4982
4983 thisfunc = fi->func_body;
4984 assert(thisfunc != NULL);
4985
4986 /* add any pre-function comment to start of action for profile.c */
4987
4988 if (interblock_comment != NULL) {
4989 interblock_comment->source_line = 0;
4990 merge_comments(interblock_comment, fi->comment);
4991 fi->comment = interblock_comment;
4992 interblock_comment = NULL;
4993 }
4994
4995 /*
4996 * Add an implicit return at end;
4997 * also used by 'return' command in debugger
4998 */
4999
5000 (void) list_append(def, instruction(Op_push_i));
5001 def->lasti->memory = dupnode(Nnull_string);
5002 (void) list_append(def, instruction(Op_K_return));
5003
5004 if (trailing_comment != NULL) {
5005 (void) list_append(def, trailing_comment);
5006 trailing_comment = NULL;
5007 }
5008
5009 if (do_pretty_print) {
5010 fi[3].nexti = namespace_chain;
5011 namespace_chain = NULL;
5012 (void) list_prepend(def, instruction(Op_exec_count));
5013 }
5014
5015 /* fi->opcode = Op_func */
5016 (fi + 1)->firsti = def->nexti;
5017 (fi + 1)->lasti = def->lasti;
5018 (fi + 2)->first_line = fi->source_line;
5019 (fi + 2)->last_line = lastline;
5020 fi->nexti = def->nexti;
5021 bcfree(def);
5022
5023 (void) list_append(rule_list, fi + 1); /* debugging */
5024
5025 /* update lint table info */
5026 func_use(thisfunc->vname, FUNC_DEFINE);
5027
5028 /* remove params from symbol table */
5029 remove_params(thisfunc);
5030 return fi;
5031 }
5032
5033 /*
5034 * install_function:
5035 * install function name in the symbol table.
5036 * Extra work, build up and install a list of the parameter names.
5037 */
5038
5039 static int
install_function(char * fname,INSTRUCTION * fi,INSTRUCTION * plist)5040 install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist)
5041 {
5042 NODE *r, *f;
5043 int pcount = 0;
5044
5045 r = lookup(fname);
5046 if (r != NULL) {
5047 error_ln(fi->source_line, _("function name `%s' previously defined"), fname);
5048 return -1;
5049 }
5050
5051 if (plist != NULL)
5052 pcount = plist->lasti->param_count + 1;
5053 f = install_symbol(fname, Node_func);
5054 if (f->vname != fname) {
5055 // DON'T free fname, it's done later
5056 fname = f->vname;
5057 }
5058
5059 fi->func_body = f;
5060 f->param_cnt = pcount;
5061 f->code_ptr = fi;
5062 f->fparms = NULL;
5063 if (pcount > 0) {
5064 char **pnames;
5065 pnames = check_params(fname, pcount, plist); /* frees plist */
5066 f->fparms = make_params(pnames, pcount);
5067 efree(pnames);
5068 install_params(f);
5069 }
5070 return 0;
5071 }
5072
5073
5074 /* check_params --- build a list of function parameter names after
5075 * making sure that the names are valid and there are no duplicates.
5076 */
5077
5078 static char **
check_params(char * fname,int pcount,INSTRUCTION * list)5079 check_params(char *fname, int pcount, INSTRUCTION *list)
5080 {
5081 INSTRUCTION *p, *np;
5082 int i, j;
5083 char *name;
5084 char **pnames;
5085
5086 assert(pcount > 0);
5087
5088 emalloc(pnames, char **, pcount * sizeof(char *), "check_params");
5089
5090 for (i = 0, p = list->nexti; p != NULL; i++, p = np) {
5091 np = p->nexti;
5092 name = p->lextok;
5093 p->lextok = NULL;
5094
5095 if (strcmp(name, fname) == 0) {
5096 /* check for function foo(foo) { ... }. bleah. */
5097 error_ln(p->source_line,
5098 _("function `%s': cannot use function name as parameter name"), fname);
5099 } else if (is_std_var(name)) {
5100 error_ln(p->source_line,
5101 _("function `%s': cannot use special variable `%s' as a function parameter"),
5102 fname, name);
5103 } else if (strchr(name, ':') != NULL)
5104 error_ln(p->source_line,
5105 _("function `%s': parameter `%s' cannot contain a namespace"),
5106 fname, name);
5107
5108 /* check for duplicate parameters */
5109 for (j = 0; j < i; j++) {
5110 if (strcmp(name, pnames[j]) == 0) {
5111 error_ln(p->source_line,
5112 _("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
5113 fname, i + 1, name, j + 1);
5114 }
5115 }
5116
5117 pnames[i] = name;
5118 bcfree(p);
5119 }
5120 bcfree(list);
5121
5122 return pnames;
5123 }
5124
5125
5126 #ifdef HASHSIZE
5127 undef HASHSIZE
5128 #endif
5129 #define HASHSIZE 1021
5130
5131 static struct fdesc {
5132 char *name;
5133 short used;
5134 short defined;
5135 short extension;
5136 struct fdesc *next;
5137 } *ftable[HASHSIZE];
5138
5139 /* func_use --- track uses and definitions of functions */
5140
5141 static void
func_use(const char * name,enum defref how)5142 func_use(const char *name, enum defref how)
5143 {
5144 struct fdesc *fp;
5145 int len;
5146 int ind;
5147
5148 len = strlen(name);
5149 ind = hash(name, len, HASHSIZE, NULL);
5150
5151 for (fp = ftable[ind]; fp != NULL; fp = fp->next)
5152 if (strcmp(fp->name, name) == 0)
5153 goto update_value;
5154
5155 /* not in the table, fall through to allocate a new one */
5156
5157 ezalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
5158 emalloc(fp->name, char *, len + 1, "func_use");
5159 strcpy(fp->name, name);
5160 fp->next = ftable[ind];
5161 ftable[ind] = fp;
5162
5163 update_value:
5164 if (how == FUNC_DEFINE)
5165 fp->defined++;
5166 else if (how == FUNC_EXT) {
5167 fp->defined++;
5168 fp->extension++;
5169 } else
5170 fp->used++;
5171 }
5172
5173 /* track_ext_func --- add an extension function to the table */
5174
5175 void
track_ext_func(const char * name)5176 track_ext_func(const char *name)
5177 {
5178 func_use(name, FUNC_EXT);
5179 }
5180
5181 /* check_funcs --- verify functions that are called but not defined */
5182
5183 static void
check_funcs()5184 check_funcs()
5185 {
5186 struct fdesc *fp, *next;
5187 int i;
5188
5189 if (! in_main_context())
5190 goto free_mem;
5191
5192 for (i = 0; i < HASHSIZE; i++) {
5193 for (fp = ftable[i]; fp != NULL; fp = fp->next) {
5194 if (do_lint && ! fp->extension) {
5195 /*
5196 * Making this not a lint check and
5197 * incrementing * errcount breaks old code.
5198 * Sigh.
5199 */
5200 if (fp->defined == 0)
5201 lintwarn(_("function `%s' called but never defined"),
5202 fp->name);
5203
5204 if (fp->used == 0)
5205 lintwarn(_("function `%s' defined but never called directly"),
5206 fp->name);
5207 }
5208 }
5209 }
5210
5211 free_mem:
5212 /* now let's free all the memory */
5213 for (i = 0; i < HASHSIZE; i++) {
5214 for (fp = ftable[i]; fp != NULL; fp = next) {
5215 next = fp->next;
5216 efree(fp->name);
5217 efree(fp);
5218 }
5219 ftable[i] = NULL;
5220 }
5221 }
5222
5223 /* param_sanity --- look for parameters that are regexp constants */
5224
5225 static void
param_sanity(INSTRUCTION * arglist)5226 param_sanity(INSTRUCTION *arglist)
5227 {
5228 INSTRUCTION *argl, *arg;
5229 int i = 1;
5230
5231 if (arglist == NULL)
5232 return;
5233 for (argl = arglist->nexti; argl; ) {
5234 arg = argl->lasti;
5235 if (arg->opcode == Op_match_rec)
5236 warning_ln(arg->source_line,
5237 _("regexp constant for parameter #%d yields boolean value"), i);
5238 argl = arg->nexti;
5239 i++;
5240 }
5241 }
5242
5243 /* variable --- make sure NAME is in the symbol table */
5244
5245 NODE *
variable(int location,char * name,NODETYPE type)5246 variable(int location, char *name, NODETYPE type)
5247 {
5248 NODE *r;
5249
5250 if ((r = lookup(name)) != NULL) {
5251 if (r->type == Node_func || r->type == Node_ext_func )
5252 error_ln(location, _("function `%s' called with space between name and `(',\nor used as a variable or an array"),
5253 r->vname);
5254 } else {
5255 /* not found */
5256 return install_symbol(name, type);
5257 }
5258 efree(name);
5259 return r;
5260 }
5261
5262 /* make_regnode --- make a regular expression node */
5263
5264 NODE *
make_regnode(NODETYPE type,NODE * exp)5265 make_regnode(NODETYPE type, NODE *exp)
5266 {
5267 NODE *n;
5268
5269 assert(type == Node_regex || type == Node_dynregex);
5270 getnode(n);
5271 memset(n, 0, sizeof(NODE));
5272 n->type = type;
5273 n->re_cnt = 1;
5274
5275 if (type == Node_regex) {
5276 n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, false);
5277 if (n->re_reg[0] == NULL) {
5278 freenode(n);
5279 return NULL;
5280 }
5281 n->re_exp = exp;
5282 n->re_flags = CONSTANT;
5283 }
5284 return n;
5285 }
5286
5287
5288 /* mk_rexp --- make a regular expression constant */
5289
5290 static NODE *
mk_rexp(INSTRUCTION * list)5291 mk_rexp(INSTRUCTION *list)
5292 {
5293 INSTRUCTION *ip;
5294
5295 ip = list->nexti;
5296 if (ip == list->lasti && ip->opcode == Op_match_rec)
5297 ip->opcode = Op_push_re;
5298 else if (ip == list->lasti && ip->opcode == Op_push_re)
5299 ; /* do nothing --- @/.../ */
5300 else {
5301 ip = instruction(Op_push_re);
5302 ip->memory = make_regnode(Node_dynregex, NULL);
5303 ip->nexti = list->lasti->nexti;
5304 list->lasti->nexti = ip;
5305 list->lasti = ip;
5306 }
5307 return ip->memory;
5308 }
5309
5310 #ifndef NO_LINT
5311 /* isnoeffect --- when used as a statement, has no side effects */
5312
5313 static int
isnoeffect(OPCODE type)5314 isnoeffect(OPCODE type)
5315 {
5316 switch (type) {
5317 case Op_times:
5318 case Op_times_i:
5319 case Op_quotient:
5320 case Op_quotient_i:
5321 case Op_mod:
5322 case Op_mod_i:
5323 case Op_plus:
5324 case Op_plus_i:
5325 case Op_minus:
5326 case Op_minus_i:
5327 case Op_subscript:
5328 case Op_concat:
5329 case Op_exp:
5330 case Op_exp_i:
5331 case Op_unary_minus:
5332 case Op_field_spec:
5333 case Op_and_final:
5334 case Op_or_final:
5335 case Op_equal:
5336 case Op_notequal:
5337 case Op_less:
5338 case Op_greater:
5339 case Op_leq:
5340 case Op_geq:
5341 case Op_match:
5342 case Op_nomatch:
5343 case Op_match_rec:
5344 case Op_not:
5345 case Op_in_array:
5346 return true;
5347 // Additional opcodes that can be part of an expression
5348 // that has no effect:
5349 case Op_and:
5350 case Op_or:
5351 case Op_push:
5352 case Op_push_i:
5353 case Op_push_array:
5354 case Op_pop:
5355 case Op_lint_plus:
5356 return true;
5357 default:
5358 break; /* keeps gcc -Wall happy */
5359 }
5360
5361 return false;
5362 }
5363 #endif /* NO_LINT */
5364
5365
5366 /* make_assignable --- make this operand an assignable one if posiible */
5367
5368 static INSTRUCTION *
make_assignable(INSTRUCTION * ip)5369 make_assignable(INSTRUCTION *ip)
5370 {
5371 switch (ip->opcode) {
5372 case Op_push:
5373 ip->opcode = Op_push_lhs;
5374 return ip;
5375 case Op_field_spec:
5376 ip->opcode = Op_field_spec_lhs;
5377 return ip;
5378 case Op_subscript:
5379 ip->opcode = Op_subscript_lhs;
5380 return ip;
5381 default:
5382 break; /* keeps gcc -Wall happy */
5383 }
5384 return NULL;
5385 }
5386
5387 /* stopme --- for debugging */
5388
5389 NODE *
stopme(int nargs ATTRIBUTE_UNUSED)5390 stopme(int nargs ATTRIBUTE_UNUSED)
5391 {
5392 return make_number(0.0);
5393 }
5394
5395 /* dumpintlstr --- write out an initial .po file entry for the string */
5396
5397 static void
dumpintlstr(const char * str,size_t len)5398 dumpintlstr(const char *str, size_t len)
5399 {
5400 char *cp;
5401
5402 /* See the GNU gettext distribution for details on the file format */
5403
5404 if (source != NULL) {
5405 /* ala the gettext sources, remove leading `./'s */
5406 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
5407 continue;
5408 printf("#: %s:%d\n", cp, sourceline);
5409 }
5410
5411 printf("msgid ");
5412 pp_string_fp(fprintf, stdout, str, len, '"', true);
5413 putchar('\n');
5414 printf("msgstr \"\"\n\n");
5415 fflush(stdout);
5416 }
5417
5418 /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
5419
5420 static void
dumpintlstr2(const char * str1,size_t len1,const char * str2,size_t len2)5421 dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
5422 {
5423 char *cp;
5424
5425 /* See the GNU gettext distribution for details on the file format */
5426
5427 if (source != NULL) {
5428 /* ala the gettext sources, remove leading `./'s */
5429 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
5430 continue;
5431 printf("#: %s:%d\n", cp, sourceline);
5432 }
5433
5434 printf("msgid ");
5435 pp_string_fp(fprintf, stdout, str1, len1, '"', true);
5436 putchar('\n');
5437 printf("msgid_plural ");
5438 pp_string_fp(fprintf, stdout, str2, len2, '"', true);
5439 putchar('\n');
5440 printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
5441 fflush(stdout);
5442 }
5443
5444 /* mk_binary --- instructions for binary operators */
5445
5446 static INSTRUCTION *
mk_binary(INSTRUCTION * s1,INSTRUCTION * s2,INSTRUCTION * op)5447 mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
5448 {
5449 INSTRUCTION *ip1,*ip2, *lint_plus;
5450 AWKNUM res;
5451
5452 ip2 = s2->nexti;
5453 if (s2->lasti == ip2 && ip2->opcode == Op_push_i) {
5454 /* do any numeric constant folding */
5455 ip1 = s1->nexti;
5456 if (do_optimize
5457 && ip1 == s1->lasti && ip1->opcode == Op_push_i
5458 && (ip1->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
5459 && (ip2->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
5460 ) {
5461 NODE *n1 = ip1->memory, *n2 = ip2->memory;
5462 res = force_number(n1)->numbr;
5463 (void) force_number(n2);
5464 switch (op->opcode) {
5465 case Op_times:
5466 res *= n2->numbr;
5467 break;
5468 case Op_quotient:
5469 if (n2->numbr == 0.0) {
5470 /* don't fatalize, allow parsing rest of the input */
5471 error_ln(op->source_line, _("division by zero attempted"));
5472 goto regular;
5473 }
5474
5475 res /= n2->numbr;
5476 break;
5477 case Op_mod:
5478 if (n2->numbr == 0.0) {
5479 /* don't fatalize, allow parsing rest of the input */
5480 error_ln(op->source_line, _("division by zero attempted in `%%'"));
5481 goto regular;
5482 }
5483 #ifdef HAVE_FMOD
5484 res = fmod(res, n2->numbr);
5485 #else /* ! HAVE_FMOD */
5486 (void) modf(res / n2->numbr, &res);
5487 res = n1->numbr - res * n2->numbr;
5488 #endif /* ! HAVE_FMOD */
5489 break;
5490 case Op_plus:
5491 res += n2->numbr;
5492 break;
5493 case Op_minus:
5494 res -= n2->numbr;
5495 break;
5496 case Op_exp:
5497 res = calc_exp(res, n2->numbr);
5498 break;
5499 default:
5500 goto regular;
5501 }
5502
5503 op->opcode = Op_push_i;
5504 // We don't need to call set_profile_text() here since
5505 // optimizing is disabled when doing pretty printing.
5506 op->memory = make_number(res);
5507 unref(n1);
5508 unref(n2);
5509 bcfree(ip1);
5510 bcfree(ip2);
5511 bcfree(s1);
5512 bcfree(s2);
5513 return list_create(op);
5514 } else {
5515 /* do basic arithmetic optimisation */
5516 /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */
5517 switch (op->opcode) {
5518 case Op_times:
5519 op->opcode = Op_times_i;
5520 break;
5521 case Op_quotient:
5522 op->opcode = Op_quotient_i;
5523 break;
5524 case Op_mod:
5525 op->opcode = Op_mod_i;
5526 break;
5527 case Op_plus:
5528 if (do_lint)
5529 goto regular;
5530 op->opcode = Op_plus_i;
5531 break;
5532 case Op_minus:
5533 op->opcode = Op_minus_i;
5534 break;
5535 case Op_exp:
5536 op->opcode = Op_exp_i;
5537 break;
5538 default:
5539 goto regular;
5540 }
5541
5542 op->memory = ip2->memory;
5543 bcfree(ip2);
5544 bcfree(s2); /* Op_list */
5545 return list_append(s1, op);
5546 }
5547 }
5548
5549 regular:
5550 /* append lists s1, s2 and add `op' bytecode */
5551 (void) list_merge(s1, s2);
5552 if (do_lint && op->opcode == Op_plus) {
5553 lint_plus = instruction(Op_lint_plus);
5554 (void) list_append(s1, lint_plus);
5555 }
5556 return list_append(s1, op);
5557 }
5558
5559 /* mk_boolean --- instructions for boolean and, or */
5560
5561 static INSTRUCTION *
mk_boolean(INSTRUCTION * left,INSTRUCTION * right,INSTRUCTION * op)5562 mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op)
5563 {
5564 INSTRUCTION *tp;
5565 OPCODE opc, final_opc;
5566
5567 opc = op->opcode; /* Op_and or Op_or */
5568 final_opc = (opc == Op_or) ? Op_or_final : Op_and_final;
5569
5570 add_lint(right, LINT_assign_in_cond);
5571
5572 tp = left->lasti;
5573
5574 if (tp->opcode != final_opc) { /* x || y */
5575 list_append(right, instruction(final_opc));
5576 add_lint(left, LINT_assign_in_cond);
5577 (void) list_append(left, op);
5578 left->lasti->target_jmp = right->lasti;
5579
5580 /* NB: target_stmt points to previous Op_and(Op_or) in a chain;
5581 * target_stmt only used in the parser (see below).
5582 */
5583
5584 left->lasti->target_stmt = left->lasti;
5585 right->lasti->target_stmt = left->lasti;
5586 } else { /* optimization for x || y || z || ... */
5587 INSTRUCTION *ip;
5588
5589 op->opcode = final_opc;
5590 (void) list_append(right, op);
5591 op->target_stmt = tp;
5592 tp->opcode = opc;
5593 tp->target_jmp = op;
5594
5595 /* update jump targets */
5596 for (ip = tp->target_stmt; ; ip = ip->target_stmt) {
5597 assert(ip->opcode == opc);
5598 assert(ip->target_jmp == tp);
5599 /* if (ip->opcode == opc && ip->target_jmp == tp) */
5600 ip->target_jmp = op;
5601 if (ip->target_stmt == ip)
5602 break;
5603 }
5604 }
5605
5606 return list_merge(left, right);
5607 }
5608
5609 /* mk_condition --- if-else and conditional */
5610
5611 static INSTRUCTION *
mk_condition(INSTRUCTION * cond,INSTRUCTION * ifp,INSTRUCTION * true_branch,INSTRUCTION * elsep,INSTRUCTION * false_branch)5612 mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
5613 INSTRUCTION *elsep, INSTRUCTION *false_branch)
5614 {
5615 /*
5616 * ----------------
5617 * cond
5618 * ----------------
5619 * t: [Op_jmp_false f ]
5620 * ----------------
5621 * true_branch
5622 *
5623 * ----------------
5624 * [Op_jmp y]
5625 * ----------------
5626 * f:
5627 * false_branch
5628 * ----------------
5629 * y: [Op_no_op]
5630 * ----------------
5631 */
5632
5633 INSTRUCTION *ip;
5634 bool setup_else_part = true;
5635
5636 if (false_branch == NULL) {
5637 false_branch = list_create(instruction(Op_no_op));
5638 if (elsep == NULL) { /* else { } */
5639 setup_else_part = false;
5640 }
5641 } else {
5642 /* assert(elsep != NULL); */
5643
5644 /* avoid a series of no_op's: if .. else if .. else if .. */
5645 if (false_branch->lasti->opcode != Op_no_op)
5646 (void) list_append(false_branch, instruction(Op_no_op));
5647 }
5648
5649 if (setup_else_part) {
5650 if (do_pretty_print) {
5651 (void) list_prepend(false_branch, elsep);
5652 false_branch->nexti->branch_end = false_branch->lasti;
5653 (void) list_prepend(false_branch, instruction(Op_exec_count));
5654 } else
5655 bcfree(elsep);
5656 }
5657
5658 (void) list_prepend(false_branch, instruction(Op_jmp));
5659 false_branch->nexti->target_jmp = false_branch->lasti;
5660
5661 add_lint(cond, LINT_assign_in_cond);
5662 ip = list_append(cond, instruction(Op_jmp_false));
5663 ip->lasti->target_jmp = false_branch->nexti->nexti;
5664
5665 if (do_pretty_print) {
5666 (void) list_prepend(ip, ifp);
5667 (void) list_append(ip, instruction(Op_exec_count));
5668 ip->nexti->branch_if = ip->lasti;
5669 ip->nexti->branch_else = false_branch->nexti;
5670 } else
5671 bcfree(ifp);
5672
5673 if (true_branch != NULL)
5674 list_merge(ip, true_branch);
5675 return list_merge(ip, false_branch);
5676 }
5677
5678 enum defline { FIRST_LINE, LAST_LINE };
5679
5680 /* find_line -- find the first(last) line in a list of (pattern) instructions */
5681
5682 static int
find_line(INSTRUCTION * pattern,enum defline what)5683 find_line(INSTRUCTION *pattern, enum defline what)
5684 {
5685 INSTRUCTION *ip;
5686 int lineno = 0;
5687
5688 for (ip = pattern->nexti; ip; ip = ip->nexti) {
5689 if (what == LAST_LINE) {
5690 if (ip->source_line > lineno)
5691 lineno = ip->source_line;
5692 } else { /* FIRST_LINE */
5693 if (ip->source_line > 0
5694 && (lineno == 0 || ip->source_line < lineno))
5695 lineno = ip->source_line;
5696 }
5697 if (ip == pattern->lasti)
5698 break;
5699 }
5700 assert(lineno > 0);
5701 return lineno;
5702 }
5703
5704 /* append_rule --- pattern-action instructions */
5705
5706 static INSTRUCTION *
append_rule(INSTRUCTION * pattern,INSTRUCTION * action)5707 append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
5708 {
5709 /*
5710 * ----------------
5711 * pattern
5712 * ----------------
5713 * [Op_jmp_false f ]
5714 * ----------------
5715 * action
5716 * ----------------
5717 * f: [Op_no_op ]
5718 * ----------------
5719 */
5720
5721 INSTRUCTION *rp;
5722 INSTRUCTION *tp;
5723 INSTRUCTION *ip;
5724
5725 if (rule != Rule) {
5726 rp = pattern;
5727 if (do_pretty_print) {
5728 rp[3].nexti = namespace_chain;
5729 namespace_chain = NULL;
5730 (void) list_append(action, instruction(Op_no_op));
5731 }
5732 (rp + 1)->firsti = action->nexti;
5733 (rp + 1)->lasti = action->lasti;
5734 (rp + 2)->first_line = pattern->source_line;
5735 (rp + 2)->last_line = lastline;
5736 ip = list_prepend(action, rp);
5737 if (interblock_comment != NULL) {
5738 ip = list_prepend(ip, interblock_comment);
5739 interblock_comment = NULL;
5740 }
5741 } else {
5742 rp = bcalloc(Op_rule, 4, 0);
5743 rp->in_rule = Rule;
5744 rp->source_file = source;
5745 tp = instruction(Op_no_op);
5746
5747 if (do_pretty_print) {
5748 rp[3].nexti = namespace_chain;
5749 namespace_chain = NULL;
5750 }
5751
5752 if (pattern == NULL) {
5753 /* assert(action != NULL); */
5754 if (do_pretty_print)
5755 (void) list_prepend(action, instruction(Op_exec_count));
5756 (rp + 1)->firsti = action->nexti;
5757 (rp + 1)->lasti = tp;
5758 (rp + 2)->first_line = firstline;
5759 (rp + 2)->last_line = lastline;
5760 rp->source_line = firstline;
5761 ip = list_prepend(list_append(action, tp), rp);
5762 } else {
5763 (void) list_append(pattern, instruction(Op_jmp_false));
5764 pattern->lasti->target_jmp = tp;
5765 (rp + 2)->first_line = find_line(pattern, FIRST_LINE);
5766 rp->source_line = (rp + 2)->first_line;
5767 if (action == NULL) {
5768 (rp + 2)->last_line = find_line(pattern, LAST_LINE);
5769 action = list_create(instruction(Op_K_print_rec));
5770 if (do_pretty_print)
5771 action = list_prepend(action, instruction(Op_exec_count));
5772 } else
5773 (rp + 2)->last_line = lastline;
5774
5775 if (interblock_comment != NULL) { // was after previous action
5776 pattern = list_prepend(pattern, interblock_comment);
5777 interblock_comment = NULL;
5778 }
5779
5780 if (do_pretty_print) {
5781 pattern = list_prepend(pattern, instruction(Op_exec_count));
5782 action = list_prepend(action, instruction(Op_exec_count));
5783 }
5784
5785 (rp + 1)->firsti = action->nexti;
5786 (rp + 1)->lasti = tp;
5787 ip = list_append(
5788 list_merge(list_prepend(pattern, rp),
5789 action),
5790 tp);
5791 }
5792 }
5793
5794 list_append(rule_list, rp + 1);
5795
5796 if (rule_block[rule] == NULL)
5797 rule_block[rule] = ip;
5798 else
5799 (void) list_merge(rule_block[rule], ip);
5800
5801 return rule_block[rule];
5802 }
5803
5804 /* mk_assignment --- assignment bytecodes */
5805
5806 static INSTRUCTION *
mk_assignment(INSTRUCTION * lhs,INSTRUCTION * rhs,INSTRUCTION * op)5807 mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op)
5808 {
5809 INSTRUCTION *tp;
5810 INSTRUCTION *ip;
5811
5812 tp = lhs->lasti;
5813 switch (tp->opcode) {
5814 case Op_field_spec:
5815 tp->opcode = Op_field_spec_lhs;
5816 break;
5817 case Op_subscript:
5818 tp->opcode = Op_subscript_lhs;
5819 break;
5820 case Op_push:
5821 case Op_push_array:
5822 tp->opcode = Op_push_lhs;
5823 break;
5824 case Op_field_assign:
5825 yyerror(_("cannot assign a value to the result of a field post-increment expression"));
5826 break;
5827 default:
5828 yyerror(_("invalid target of assignment (opcode %s)"),
5829 opcode2str(tp->opcode));
5830 break;
5831 }
5832
5833 tp->do_reference = (op->opcode != Op_assign); /* check for uninitialized reference */
5834
5835 if (rhs != NULL)
5836 ip = list_merge(rhs, lhs);
5837 else
5838 ip = lhs;
5839
5840 (void) list_append(ip, op);
5841
5842 if (tp->opcode == Op_push_lhs
5843 && tp->memory->type == Node_var
5844 && tp->memory->var_assign
5845 ) {
5846 tp->do_reference = false; /* no uninitialized reference checking
5847 * for a special variable.
5848 */
5849 (void) list_append(ip, instruction(Op_var_assign));
5850 ip->lasti->assign_var = tp->memory->var_assign;
5851 } else if (tp->opcode == Op_field_spec_lhs) {
5852 (void) list_append(ip, instruction(Op_field_assign));
5853 ip->lasti->field_assign = (Func_ptr) 0;
5854 tp->target_assign = ip->lasti;
5855 } else if (tp->opcode == Op_subscript_lhs) {
5856 (void) list_append(ip, instruction(Op_subscript_assign));
5857 }
5858
5859 return ip;
5860 }
5861
5862 /* optimize_assignment --- peephole optimization for assignment */
5863
5864 static INSTRUCTION *
optimize_assignment(INSTRUCTION * exp)5865 optimize_assignment(INSTRUCTION *exp)
5866 {
5867 INSTRUCTION *i1, *i2, *i3;
5868
5869 /*
5870 * Optimize assignment statements array[subs] = x; var = x; $n = x;
5871 * string concatenation of the form s = s t.
5872 *
5873 * 1) Array element assignment array[subs] = x:
5874 * Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop
5875 * with single instruction Op_store_sub.
5876 * Limitation: 1 dimension and sub is simple var/value.
5877 *
5878 * 2) Simple variable assignment var = x:
5879 * Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var.
5880 *
5881 * 3) Field assignment $n = x:
5882 * Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop
5883 * with Op_store_field.
5884 *
5885 * 4) Optimization for string concatenation:
5886 * For cases like x = x y, uses realloc to include y in x;
5887 * also eliminates instructions Op_push_lhs and Op_pop.
5888 */
5889
5890 /*
5891 * N.B.: do not append Op_pop instruction to the returned
5892 * instruction list if optimized. None of these
5893 * optimized instructions pushes the r-value of assignment
5894 * onto the runtime stack.
5895 */
5896
5897 i2 = NULL;
5898 i1 = exp->lasti;
5899
5900 if ( i1->opcode != Op_assign
5901 && i1->opcode != Op_field_assign)
5902 return list_append(exp, instruction(Op_pop));
5903
5904 for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) {
5905 switch (i2->opcode) {
5906 case Op_concat:
5907 if (i2->nexti->opcode == Op_push_lhs /* l.h.s is a simple variable */
5908 && (i2->concat_flag & CSVAR) != 0 /* 1st exp in r.h.s is a simple variable;
5909 * see Op_concat in the grammer above.
5910 */
5911 && i2->nexti->memory == exp->nexti->memory /* and the same as in l.h.s */
5912 && i2->nexti->nexti == i1
5913 && i1->opcode == Op_assign
5914 ) {
5915 /* s = s ... optimization */
5916
5917 /* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x);
5918 * check for l-value reference to this variable in the r.h.s.
5919 * Also, avoid function calls in general to guard against
5920 * global variable assignment.
5921 */
5922
5923 for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) {
5924 if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory)
5925 || i3->opcode == Op_func_call)
5926 return list_append(exp, instruction(Op_pop)); /* no optimization */
5927 }
5928
5929 /* remove the variable from r.h.s */
5930 i3 = exp->nexti;
5931 exp->nexti = i3->nexti;
5932 bcfree(i3);
5933
5934 if (--i2->expr_count == 1) /* one less expression in Op_concat */
5935 i2->opcode = Op_no_op;
5936
5937 i3 = i2->nexti;
5938 assert(i3->opcode == Op_push_lhs);
5939 i3->opcode = Op_assign_concat; /* change Op_push_lhs to Op_assign_concat */
5940 i3->nexti = NULL;
5941 bcfree(i1); /* Op_assign */
5942 exp->lasti = i3; /* update Op_list */
5943 return exp;
5944 }
5945 break;
5946
5947 case Op_field_spec_lhs:
5948 if (i2->nexti->opcode == Op_assign
5949 && i2->nexti->nexti == i1
5950 && i1->opcode == Op_field_assign
5951 ) {
5952 /* $n = .. */
5953 i2->opcode = Op_store_field;
5954 bcfree(i2->nexti); /* Op_assign */
5955 i2->nexti = NULL;
5956 bcfree(i1); /* Op_field_assign */
5957 exp->lasti = i2; /* update Op_list */
5958 return exp;
5959 }
5960 break;
5961
5962 case Op_push_array:
5963 if (i2->nexti->nexti->opcode == Op_subscript_lhs) {
5964 i3 = i2->nexti->nexti;
5965 if (i3->sub_count == 1
5966 && i3->nexti == i1
5967 && i1->opcode == Op_assign
5968 ) {
5969 /* array[sub] = .. */
5970 i3->opcode = Op_store_sub;
5971 i3->memory = i2->memory;
5972 i3->expr_count = 1; /* sub_count shadows memory,
5973 * so use expr_count instead.
5974 */
5975 i3->nexti = NULL;
5976 i2->opcode = Op_no_op;
5977 bcfree(i1); /* Op_assign */
5978 exp->lasti = i3; /* update Op_list */
5979 return exp;
5980 }
5981 }
5982 break;
5983
5984 case Op_push_lhs:
5985 if (i2->nexti == i1
5986 && i1->opcode == Op_assign
5987 ) {
5988 /* var = .. */
5989 i2->opcode = Op_store_var;
5990 i2->nexti = NULL;
5991 bcfree(i1); /* Op_assign */
5992 exp->lasti = i2; /* update Op_list */
5993
5994 i3 = exp->nexti;
5995 if (i3->opcode == Op_push_i
5996 && (i3->memory->flags & INTLSTR) == 0
5997 && i3->nexti == i2
5998 ) {
5999 /* constant initializer */
6000 i2->initval = i3->memory;
6001 bcfree(i3);
6002 exp->nexti = i2;
6003 } else
6004 i2->initval = NULL;
6005
6006 return exp;
6007 }
6008 break;
6009
6010 default:
6011 break;
6012 }
6013 }
6014
6015 /* no optimization */
6016 return list_append(exp, instruction(Op_pop));
6017 }
6018
6019
6020 /* mk_getline --- make instructions for getline */
6021
6022 static INSTRUCTION *
mk_getline(INSTRUCTION * op,INSTRUCTION * var,INSTRUCTION * redir,int redirtype)6023 mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype)
6024 {
6025 INSTRUCTION *ip;
6026 INSTRUCTION *tp;
6027 INSTRUCTION *asgn = NULL;
6028
6029 /*
6030 * getline [var] < [file]
6031 *
6032 * [ file (simp_exp)]
6033 * [ [ var ] ]
6034 * [ Op_K_getline_redir|NULL|redir_type|into_var]
6035 * [ [var_assign] ]
6036 *
6037 */
6038
6039 if (redir == NULL) {
6040 int sline = op->source_line;
6041 bcfree(op);
6042 op = bcalloc(Op_K_getline, 2, sline);
6043 (op + 1)->target_endfile = ip_endfile;
6044 (op + 1)->target_beginfile = ip_beginfile;
6045 }
6046
6047 if (var != NULL) {
6048 tp = make_assignable(var->lasti);
6049 assert(tp != NULL);
6050
6051 /* check if we need after_assign bytecode */
6052 if (tp->opcode == Op_push_lhs
6053 && tp->memory->type == Node_var
6054 && tp->memory->var_assign
6055 ) {
6056 asgn = instruction(Op_var_assign);
6057 asgn->assign_ctxt = op->opcode;
6058 asgn->assign_var = tp->memory->var_assign;
6059 } else if (tp->opcode == Op_field_spec_lhs) {
6060 asgn = instruction(Op_field_assign);
6061 asgn->assign_ctxt = op->opcode;
6062 asgn->field_assign = (Func_ptr) 0; /* determined at run time */
6063 tp->target_assign = asgn;
6064 } else if (tp->opcode == Op_subscript_lhs) {
6065 asgn = instruction(Op_subscript_assign);
6066 asgn->assign_ctxt = op->opcode;
6067 }
6068
6069 if (redir != NULL) {
6070 ip = list_merge(redir, var);
6071 (void) list_append(ip, op);
6072 } else
6073 ip = list_append(var, op);
6074 } else if (redir != NULL)
6075 ip = list_append(redir, op);
6076 else
6077 ip = list_create(op);
6078 op->into_var = (var != NULL);
6079 op->redir_type = (redir != NULL) ? redirtype : redirect_none;
6080
6081 return (asgn == NULL ? ip : list_append(ip, asgn));
6082 }
6083
6084
6085 /* mk_for_loop --- for loop bytecodes */
6086
6087 static INSTRUCTION *
mk_for_loop(INSTRUCTION * forp,INSTRUCTION * init,INSTRUCTION * cond,INSTRUCTION * incr,INSTRUCTION * body)6088 mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
6089 INSTRUCTION *incr, INSTRUCTION *body)
6090 {
6091 /*
6092 * ------------------------
6093 * init (may be NULL)
6094 * ------------------------
6095 * x:
6096 * cond (Op_no_op if NULL)
6097 * ------------------------
6098 * [ Op_jmp_false tb ]
6099 * ------------------------
6100 * body (may be NULL)
6101 * ------------------------
6102 * tc:
6103 * incr (may be NULL)
6104 * [ Op_jmp x ]
6105 * ------------------------
6106 * tb:[ Op_no_op ]
6107 */
6108
6109 INSTRUCTION *ip, *tbreak, *tcont;
6110 INSTRUCTION *jmp;
6111 INSTRUCTION *pp_cond;
6112 INSTRUCTION *ret;
6113
6114 tbreak = instruction(Op_no_op);
6115
6116 if (cond != NULL) {
6117 add_lint(cond, LINT_assign_in_cond);
6118 pp_cond = cond->nexti;
6119 ip = cond;
6120 (void) list_append(ip, instruction(Op_jmp_false));
6121 ip->lasti->target_jmp = tbreak;
6122 } else {
6123 pp_cond = instruction(Op_no_op);
6124 ip = list_create(pp_cond);
6125 }
6126
6127 if (init != NULL)
6128 ip = list_merge(init, ip);
6129
6130 if (do_pretty_print) {
6131 (void) list_append(ip, instruction(Op_exec_count));
6132 (forp + 1)->forloop_cond = pp_cond;
6133 (forp + 1)->forloop_body = ip->lasti;
6134 }
6135
6136 if (body != NULL)
6137 (void) list_merge(ip, body);
6138
6139 jmp = instruction(Op_jmp);
6140 jmp->target_jmp = pp_cond;
6141 if (incr == NULL)
6142 tcont = jmp;
6143 else {
6144 tcont = incr->nexti;
6145 (void) list_merge(ip, incr);
6146 }
6147
6148 (void) list_append(ip, jmp);
6149 ret = list_append(ip, tbreak);
6150 fix_break_continue(ret, tbreak, tcont);
6151
6152 if (do_pretty_print) {
6153 forp->target_break = tbreak;
6154 forp->target_continue = tcont;
6155 ret = list_prepend(ret, forp);
6156 }
6157 /* else
6158 forp is NULL */
6159
6160 return ret;
6161 }
6162
6163 /* add_lint --- add lint warning bytecode if needed */
6164
6165 static void
add_lint(INSTRUCTION * list,LINTTYPE linttype)6166 add_lint(INSTRUCTION *list, LINTTYPE linttype)
6167 {
6168 #ifndef NO_LINT
6169 INSTRUCTION *ip;
6170 bool no_effect = true;
6171
6172 switch (linttype) {
6173 case LINT_assign_in_cond:
6174 ip = list->lasti;
6175 if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) {
6176 assert(ip != list->nexti);
6177 for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
6178 ;
6179 }
6180
6181 if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) {
6182 list_append(list, instruction(Op_lint));
6183 list->lasti->lint_type = linttype;
6184 }
6185 break;
6186
6187 case LINT_no_effect:
6188 if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) {
6189 int line = 0;
6190
6191 // Get down to the last instruction ...
6192 for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti) {
6193 // ... along the way track line numbers, we will use the line
6194 // closest to the opcode if that opcode doesn't have one
6195 if (ip->source_line != 0)
6196 line = ip->source_line;
6197
6198 // And check each opcode for no effect
6199 no_effect = no_effect && isnoeffect(ip->opcode);
6200 }
6201
6202 // check the last one also
6203 no_effect = no_effect && isnoeffect(ip->opcode);
6204
6205 // Only if all the traversed opcodes have no effect do we
6206 // produce a warning. This avoids warnings for things like
6207 // a == b && b = c.
6208 if (do_lint) { /* parse-time warning */
6209 if (no_effect) {
6210 if (ip->source_line != 0)
6211 line = ip->source_line;
6212 lintwarn_ln(line, _("statement has no effect"));
6213 }
6214 }
6215
6216 // We no longer place a run-time warning also. One warning
6217 // at parse time is enough.
6218 }
6219 break;
6220
6221 default:
6222 break;
6223 }
6224 #endif
6225 }
6226
6227 /* mk_expression_list --- list of bytecode lists */
6228
6229 static INSTRUCTION *
mk_expression_list(INSTRUCTION * list,INSTRUCTION * s1)6230 mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1)
6231 {
6232 INSTRUCTION *r;
6233
6234 /* we can't just combine all bytecodes, since we need to
6235 * process individual expressions for a few builtins in snode() (-:
6236 */
6237
6238 /* -- list of lists */
6239 /* [Op_list| ... ]------
6240 * |
6241 * [Op_list| ... ] -- |
6242 * ... | |
6243 * ... <------- |
6244 * [Op_list| ... ] -- |
6245 * ... | |
6246 * ... | |
6247 * ... <------- --
6248 */
6249
6250 assert(s1 != NULL && s1->opcode == Op_list);
6251 if (list == NULL) {
6252 list = instruction(Op_list);
6253 list->nexti = s1;
6254 list->lasti = s1->lasti;
6255 return list;
6256 }
6257
6258 /* append expression to the end of the list */
6259
6260 r = list->lasti;
6261 r->nexti = s1;
6262 list->lasti = s1->lasti;
6263 return list;
6264 }
6265
6266 /* count_expressions --- fixup expression_list from mk_expression_list.
6267 * returns no of expressions in list. isarg is true
6268 * for function arguments.
6269 */
6270
6271 static int
count_expressions(INSTRUCTION ** list,bool isarg)6272 count_expressions(INSTRUCTION **list, bool isarg)
6273 {
6274 INSTRUCTION *expr;
6275 INSTRUCTION *r = NULL;
6276 int count = 0;
6277
6278 if (*list == NULL) /* error earlier */
6279 return 0;
6280
6281 for (expr = (*list)->nexti; expr; ) {
6282 INSTRUCTION *t1, *t2;
6283 t1 = expr->nexti;
6284 t2 = expr->lasti;
6285 if (isarg && t1 == t2 && t1->opcode == Op_push)
6286 t1->opcode = Op_push_param;
6287 if (++count == 1)
6288 r = expr;
6289 else
6290 (void) list_merge(r, expr);
6291 expr = t2->nexti;
6292 }
6293
6294 assert(count > 0);
6295 if (! isarg && count > max_args)
6296 max_args = count;
6297 bcfree(*list);
6298 *list = r;
6299 return count;
6300 }
6301
6302 /* fix_break_continue --- fix up break & continue codes in loop bodies */
6303
6304 static void
fix_break_continue(INSTRUCTION * list,INSTRUCTION * b_target,INSTRUCTION * c_target)6305 fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target)
6306 {
6307 INSTRUCTION *ip;
6308
6309 list->lasti->nexti = NULL; /* just to make sure */
6310
6311 for (ip = list->nexti; ip != NULL; ip = ip->nexti) {
6312 switch (ip->opcode) {
6313 case Op_K_break:
6314 if (ip->target_jmp == NULL)
6315 ip->target_jmp = b_target;
6316 break;
6317
6318 case Op_K_continue:
6319 if (ip->target_jmp == NULL)
6320 ip->target_jmp = c_target;
6321 break;
6322
6323 default:
6324 /* this is to keep the compiler happy. sheesh. */
6325 break;
6326 }
6327 }
6328 }
6329
6330 static inline INSTRUCTION *
list_create(INSTRUCTION * x)6331 list_create(INSTRUCTION *x)
6332 {
6333 INSTRUCTION *l;
6334
6335 l = instruction(Op_list);
6336 l->nexti = x;
6337 l->lasti = x;
6338 return l;
6339 }
6340
6341 static inline INSTRUCTION *
list_append(INSTRUCTION * l,INSTRUCTION * x)6342 list_append(INSTRUCTION *l, INSTRUCTION *x)
6343 {
6344 #ifdef GAWKDEBUG
6345 if (l->opcode != Op_list)
6346 cant_happen();
6347 #endif
6348 l->lasti->nexti = x;
6349 l->lasti = x;
6350 return l;
6351 }
6352
6353 static inline INSTRUCTION *
list_prepend(INSTRUCTION * l,INSTRUCTION * x)6354 list_prepend(INSTRUCTION *l, INSTRUCTION *x)
6355 {
6356 #ifdef GAWKDEBUG
6357 if (l->opcode != Op_list)
6358 cant_happen();
6359 #endif
6360 x->nexti = l->nexti;
6361 l->nexti = x;
6362 return l;
6363 }
6364
6365 static inline INSTRUCTION *
list_merge(INSTRUCTION * l1,INSTRUCTION * l2)6366 list_merge(INSTRUCTION *l1, INSTRUCTION *l2)
6367 {
6368 #ifdef GAWKDEBUG
6369 if (l1->opcode != Op_list)
6370 cant_happen();
6371 if (l2->opcode != Op_list)
6372 cant_happen();
6373 #endif
6374 l1->lasti->nexti = l2->nexti;
6375 l1->lasti = l2->lasti;
6376 bcfree(l2);
6377 return l1;
6378 }
6379
6380 /* See if name is a special token. */
6381
6382 int
check_special(const char * name)6383 check_special(const char *name)
6384 {
6385 int low, high, mid;
6386 int i;
6387 int non_standard_flags = 0;
6388 #ifdef USE_EBCDIC
6389 static bool did_sort = false;
6390
6391 if (! did_sort) {
6392 qsort((void *) tokentab,
6393 sizeof(tokentab) / sizeof(tokentab[0]),
6394 sizeof(tokentab[0]), tokcompare);
6395 did_sort = true;
6396 }
6397 #endif
6398
6399 if (do_traditional)
6400 non_standard_flags |= GAWKX;
6401 if (do_posix)
6402 non_standard_flags |= NOT_POSIX;
6403
6404 low = 0;
6405 high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
6406 while (low <= high) {
6407 mid = (low + high) / 2;
6408 i = *name - tokentab[mid].operator[0];
6409 if (i == 0)
6410 i = strcmp(name, tokentab[mid].operator);
6411
6412 if (i < 0) /* token < mid */
6413 high = mid - 1;
6414 else if (i > 0) /* token > mid */
6415 low = mid + 1;
6416 else {
6417 if ((tokentab[mid].flags & non_standard_flags) != 0)
6418 return -1;
6419 return mid;
6420 }
6421 }
6422 return -1;
6423 }
6424
6425 /*
6426 * This provides a private version of functions that act like VMS's
6427 * variable-length record filesystem, where there was a bug on
6428 * certain source files.
6429 */
6430
6431 static FILE *fp = NULL;
6432
6433 /* read_one_line --- return one input line at a time. mainly for debugging. */
6434
6435 static ssize_t
read_one_line(int fd,void * buffer,size_t count)6436 read_one_line(int fd, void *buffer, size_t count)
6437 {
6438 char buf[BUFSIZ];
6439
6440 /* Minor potential memory leak here. Too bad. */
6441 if (fp == NULL) {
6442 fp = fdopen(fd, "r");
6443 if (fp == NULL) {
6444 fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno));
6445 gawk_exit(EXIT_FAILURE);
6446 }
6447 }
6448
6449 if (fgets(buf, sizeof buf, fp) == NULL)
6450 return 0;
6451
6452 memcpy(buffer, buf, strlen(buf));
6453 return strlen(buf);
6454 }
6455
6456 /* one_line_close --- close the open file being read with read_one_line() */
6457
6458 static int
one_line_close(int fd)6459 one_line_close(int fd)
6460 {
6461 int ret;
6462
6463 if (fp == NULL || fd != fileno(fp))
6464 fatal("debugging read/close screwed up!");
6465
6466 ret = fclose(fp);
6467 fp = NULL;
6468 return ret;
6469 }
6470
6471
6472 /* lookup_builtin --- find a builtin function or return NULL */
6473
6474 builtin_func_t
lookup_builtin(const char * name)6475 lookup_builtin(const char *name)
6476 {
6477 if (strncmp(name, "awk::", 5) == 0)
6478 name += 5;
6479
6480 int mid = check_special(name);
6481
6482 if (mid == -1)
6483 return NULL;
6484
6485 switch (tokentab[mid].class) {
6486 case LEX_BUILTIN:
6487 case LEX_LENGTH:
6488 break;
6489 default:
6490 return NULL;
6491 }
6492
6493 /* And another special case... */
6494 if (tokentab[mid].value == Op_sub_builtin)
6495 return (builtin_func_t) do_sub;
6496
6497 #ifdef HAVE_MPFR
6498 if (do_mpfr && tokentab[mid].ptr2 != NULL)
6499 return tokentab[mid].ptr2;
6500 #endif
6501
6502 return tokentab[mid].ptr;
6503 }
6504
6505 /* install_builtins --- add built-in functions to FUNCTAB */
6506
6507 void
install_builtins(void)6508 install_builtins(void)
6509 {
6510 int i, j;
6511 int flags_that_must_be_clear = DEBUG_USE;
6512
6513 if (do_traditional)
6514 flags_that_must_be_clear |= GAWKX;
6515
6516 if (do_posix)
6517 flags_that_must_be_clear |= NOT_POSIX;
6518
6519
6520 j = sizeof(tokentab) / sizeof(tokentab[0]);
6521 for (i = 0; i < j; i++) {
6522 if ( (tokentab[i].class == LEX_BUILTIN
6523 || tokentab[i].class == LEX_LENGTH)
6524 && (tokentab[i].flags & flags_that_must_be_clear) == 0) {
6525 (void) install_symbol(tokentab[i].operator, Node_builtin_func);
6526 }
6527 }
6528 }
6529
6530 /*
6531 * 9/2014: Gawk cannot use <ctype.h> isalpha or isalnum when
6532 * parsing the program since that can let through non-English
6533 * letters. So, we supply our own. !@#$%^&*()-ing locales!
6534 */
6535
6536 /* is_alpha --- return true if c is an English letter */
6537
6538 /*
6539 * The scene of the murder was grisly to look upon. When the inspector
6540 * arrived, the sergeant turned to him and said, "Another programmer stabbed
6541 * in the back. He never knew what happened."
6542 *
6543 * The inspector replied, "Looks like the MO of isalpha, and his even meaner
6544 * big brother, isalnum. The Locale brothers." The sergeant merely
6545 * shuddered in horror.
6546 */
6547
6548 bool
is_alpha(int c)6549 is_alpha(int c)
6550 {
6551 switch (c) {
6552 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6553 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
6554 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
6555 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
6556 case 'y': case 'z':
6557 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6558 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
6559 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
6560 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
6561 case 'Y': case 'Z':
6562 return true;
6563 }
6564 return false;
6565 }
6566
6567 /* is_alnum --- return true for alphanumeric, English only letters */
6568
6569 bool
is_alnum(int c)6570 is_alnum(int c)
6571 {
6572 /* digit test is good for EBCDIC too. so there. */
6573 return (is_alpha(c) || ('0' <= c && c <= '9'));
6574 }
6575
6576
6577 /*
6578 * is_letter --- function to check letters
6579 * isalpha() isn't good enough since it can look at the locale.
6580 * Underscore counts as a letter in awk identifiers
6581 */
6582
6583 bool
is_letter(int c)6584 is_letter(int c)
6585 {
6586 return (is_alpha(c) || c == '_');
6587 }
6588
6589 /* is_identchar --- return true if c can be in an identifier */
6590
6591 bool
is_identchar(int c)6592 is_identchar(int c)
6593 {
6594 return (is_alnum(c) || c == '_');
6595 }
6596
6597 /* set_profile_text --- make a number that can be printed when profiling */
6598
6599 static NODE *
set_profile_text(NODE * n,const char * str,size_t len)6600 set_profile_text(NODE *n, const char *str, size_t len)
6601 {
6602 if (do_pretty_print) {
6603 // two extra bytes: one for NUL termination, and another in
6604 // case we need to add a leading minus sign in add_sign_to_num
6605 emalloc(n->stptr, char *, len + 2, "set_profile_text");
6606 memcpy(n->stptr, str, len);
6607 n->stptr[len] = '\0';
6608 n->stlen = len;
6609 // Set STRCUR and n->stfmt for use when profiling
6610 // (i.e., actually running the program) so that
6611 // force_string() on this item will work ok.
6612 // Thanks and a tip of the hatlo to valgrind.
6613 n->flags |= (NUMCONSTSTR|STRCUR);
6614 n->stfmt = STFMT_UNUSED;
6615 #ifdef HAVE_MPFR
6616 n->strndmode = MPFR_round_mode;
6617 #endif
6618 }
6619
6620 return n;
6621 }
6622
6623 /*
6624 * merge_comments --- merge c2 into c1 and free c2 if successful.
6625 * Allow c2 to be NULL, in which case just merged chained
6626 * comments in c1.
6627 */
6628
6629 static void
merge_comments(INSTRUCTION * c1,INSTRUCTION * c2)6630 merge_comments(INSTRUCTION *c1, INSTRUCTION *c2)
6631 {
6632 assert(c1->opcode == Op_comment);
6633
6634 if (c1->comment == NULL && c2 == NULL) // nothing to do
6635 return;
6636
6637 size_t total = c1->memory->stlen;
6638 if (c1->comment != NULL)
6639 total += 1 /* \n */ + c1->comment->memory->stlen;
6640
6641 if (c2 != NULL) {
6642 assert(c2->opcode == Op_comment);
6643 total += 1 /* \n */ + c2->memory->stlen;
6644 if (c2->comment != NULL)
6645 total += c2->comment->memory->stlen + 1;
6646 }
6647
6648 char *buffer;
6649 emalloc(buffer, char *, total + 1, "merge_comments");
6650
6651 strcpy(buffer, c1->memory->stptr);
6652 if (c1->comment != NULL) {
6653 strcat(buffer, "\n");
6654 strcat(buffer, c1->comment->memory->stptr);
6655 }
6656
6657 if (c2 != NULL) {
6658 strcat(buffer, "\n");
6659 strcat(buffer, c2->memory->stptr);
6660 if (c2->comment != NULL) {
6661 strcat(buffer, "\n");
6662 strcat(buffer, c2->comment->memory->stptr);
6663 }
6664
6665 unref(c2->memory);
6666 if (c2->comment != NULL) {
6667 unref(c2->comment->memory);
6668 bcfree(c2->comment);
6669 c2->comment = NULL;
6670 }
6671 bcfree(c2);
6672 }
6673
6674 c1->memory->comment_type = BLOCK_COMMENT;
6675 free(c1->memory->stptr);
6676 c1->memory->stptr = buffer;
6677 c1->memory->stlen = strlen(buffer);
6678
6679 // now free everything else
6680 if (c1->comment != NULL) {
6681 unref(c1->comment->memory);
6682 bcfree(c1->comment);
6683 c1->comment = NULL;
6684 }
6685 }
6686
6687 /* make_braced_statements --- handle `l_brace statements r_brace' with comments */
6688
6689 static INSTRUCTION *
make_braced_statements(INSTRUCTION * lbrace,INSTRUCTION * stmts,INSTRUCTION * rbrace)6690 make_braced_statements(INSTRUCTION *lbrace, INSTRUCTION *stmts, INSTRUCTION *rbrace)
6691 {
6692 INSTRUCTION *ip;
6693
6694 if (stmts == NULL)
6695 ip = list_create(instruction(Op_no_op));
6696 else
6697 ip = stmts;
6698
6699 if (lbrace != NULL) {
6700 INSTRUCTION *comment2 = lbrace->comment;
6701 if (comment2 != NULL) {
6702 ip = list_prepend(ip, comment2);
6703 lbrace->comment = NULL;
6704 }
6705 ip = list_prepend(ip, lbrace);
6706 }
6707
6708 return ip;
6709 }
6710
6711 /* validate_qualified_name --- make sure that a qualified name is built correctly */
6712
6713 /*
6714 * This routine returns upon first error, no need to produce multiple, possibly
6715 * conflicting / confusing error messages.
6716 */
6717
6718 bool
validate_qualified_name(char * token)6719 validate_qualified_name(char *token)
6720 {
6721 char *cp, *cp2;
6722
6723 // no colon, by definition it's well formed
6724 if ((cp = strchr(token, ':')) == NULL)
6725 return true;
6726
6727 if (do_traditional || do_posix) {
6728 error_ln(sourceline, _("identifier %s: qualified names not allowed in traditional / POSIX mode"), token);
6729 return false;
6730 }
6731
6732 if (cp[1] != ':') { // could happen from command line
6733 error_ln(sourceline, _("identifier %s: namespace separator is two colons, not one"), token);
6734 return false;
6735 }
6736
6737 if (! is_letter(cp[2])) {
6738 error_ln(sourceline,
6739 _("qualified identifier `%s' is badly formed"),
6740 token);
6741 return false;
6742 }
6743
6744 if ((cp2 = strchr(cp+2, ':')) != NULL) {
6745 error_ln(sourceline,
6746 _("identifier `%s': namespace separator can only appear once in a qualified name"),
6747 token);
6748 return false;
6749 }
6750
6751 return true;
6752 }
6753
6754 /* check_qualified_special --- decide if a name is special or not */
6755
6756 static int
check_qualified_special(char * token)6757 check_qualified_special(char *token)
6758 {
6759 char *cp;
6760
6761 if ((cp = strchr(token, ':')) == NULL && current_namespace == awk_namespace)
6762 return check_special(token);
6763
6764 /*
6765 * Now it's more complicated. Here are the rules.
6766 *
6767 * 1. Namespace name cannot be a standard awk reserved word or function.
6768 * 2. Subordinate part of the name cannot be a standard awk reserved word or function.
6769 * 3. If the namespace part is explicitly "awk", return the result of check_special().
6770 * 4. Else return -1 (gawk extensions allowed, we check standard awk in step 2).
6771 */
6772
6773 const struct token *tok;
6774 int i;
6775 if (cp == NULL) { // namespace not awk, but a simple identifier
6776 i = check_special(token);
6777 if (i < 0)
6778 return i;
6779
6780 tok = & tokentab[i];
6781 if ((tok->flags & GAWKX) != 0 && tok->class == LEX_BUILTIN)
6782 return -1;
6783 else
6784 return i;
6785 }
6786
6787 char *ns, *end, *subname;
6788 ns = token;
6789 *(end = cp) = '\0'; // temporarily turn it into standalone string
6790 subname = end + 2;
6791
6792 // First check the namespace part
6793 i = check_special(ns);
6794 if (i >= 0 && (tokentab[i].flags & GAWKX) == 0) {
6795 error_ln(sourceline, _("using reserved identifier `%s' as a namespace is not allowed"), ns);
6796 goto done;
6797 }
6798
6799 // Now check the subordinate part
6800 i = check_special(subname);
6801 if (i >= 0 && (tokentab[i].flags & GAWKX) == 0 && strcmp(ns, awk_namespace) != 0) {
6802 error_ln(sourceline, _("using reserved identifier `%s' as second component of a qualified name is not allowed"), subname);
6803 goto done;
6804 }
6805
6806 if (strcmp(ns, awk_namespace) == 0) {
6807 i = check_special(subname);
6808 if (i >= 0) {
6809 if ((tokentab[i].flags & GAWKX) != 0 && tokentab[i].class == LEX_BUILTIN)
6810 ; // gawk additional builtin function, is ok
6811 else
6812 error_ln(sourceline, _("using reserved identifier `%s' as second component of a qualified name is not allowed"), subname);
6813 }
6814 } else
6815 i = -1;
6816 done:
6817 *end = ':';
6818 return i;
6819 }
6820
6821 /* set_namespace --- change the current namespace */
6822
6823 static void
set_namespace(INSTRUCTION * ns,INSTRUCTION * comment)6824 set_namespace(INSTRUCTION *ns, INSTRUCTION *comment)
6825 {
6826 if (ns == NULL)
6827 return;
6828
6829 if (do_traditional || do_posix) {
6830 error_ln(ns->source_line, _("@namespace is a gawk extension"));
6831 efree(ns->lextok);
6832 bcfree(ns);
6833 return;
6834 }
6835
6836 if (! is_valid_identifier(ns->lextok)) {
6837 error_ln(ns->source_line, _("namespace name `%s' must meet identifier naming rules"), ns->lextok);
6838 efree(ns->lextok);
6839 bcfree(ns);
6840 return;
6841 }
6842
6843 int mid = check_special(ns->lextok);
6844
6845 if (mid >= 0) {
6846 error_ln(ns->source_line, _("using reserved identifier `%s' as a namespace is not allowed"), ns->lextok);
6847 efree(ns->lextok);
6848 bcfree(ns);
6849 return;
6850 }
6851
6852 if (strcmp(ns->lextok, current_namespace) == 0)
6853 ; // nothing to do
6854 else if (strcmp(ns->lextok, awk_namespace) == 0) {
6855 set_current_namespace(awk_namespace);
6856 } else {
6857 set_current_namespace(estrdup(ns->lextok, strlen(ns->lextok)));
6858 }
6859 efree(ns->lextok);
6860
6861 // save info and push on front of list of namespaces seen
6862 INSTRUCTION *new_ns = instruction(Op_K_namespace);
6863 new_ns->comment = comment;
6864 new_ns->ns_name = estrdup(current_namespace, strlen(current_namespace));
6865 new_ns->nexti = namespace_chain;
6866 namespace_chain = new_ns;
6867
6868 ns->lextok = NULL;
6869 bcfree(ns);
6870
6871 namespace_changed = true;
6872
6873 return;
6874 }
6875
6876 /* qualify_name --- put name into namespace */
6877
6878 static char *
qualify_name(const char * name,size_t len)6879 qualify_name(const char *name, size_t len)
6880 {
6881 if (strchr(name, ':') != NULL) // already qualified
6882 return estrdup(name, len);
6883
6884 NODE *p = lookup(name);
6885 if (p != NULL && p->type == Node_param_list)
6886 return estrdup(name, len);
6887
6888 if (current_namespace != awk_namespace && ! is_all_upper(name)) {
6889 size_t length = strlen(current_namespace) + 2 + len + 1;
6890 char *buf;
6891
6892 emalloc(buf, char *, length, "qualify_name");
6893 sprintf(buf, "%s::%s", current_namespace, name);
6894
6895 return buf;
6896 }
6897
6898 return estrdup(name, len);
6899 }
6900